Coverage Report

Created: 2026-06-04 10:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/build/source/nativelink-util/src/fs_util.rs
Line
Count
Source
1
// Copyright 2024 The NativeLink Authors. All rights reserved.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//    http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
use core::future::Future;
16
use core::pin::Pin;
17
use std::fs::Metadata;
18
use std::path::{Path, PathBuf};
19
20
use nativelink_error::{Code, Error, ResultExt, error_if, make_err};
21
use tokio::fs;
22
#[cfg(target_os = "macos")]
23
use tracing::debug;
24
25
/// Which kernel mechanism actually materialized the destination tree.
26
/// Returned by [`hardlink_directory_tree`] so callers can record per-hit
27
/// telemetry and detect when the fast path silently degrades (e.g., a
28
/// cross-volume cache layout that forces clonefile to fall through to
29
/// per-file hardlinks).
30
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
31
pub enum CloneMethod {
32
    /// APFS `clonefile(2)` succeeded — O(1) regardless of tree size.
33
    /// macOS only.
34
    Clonefile,
35
    /// Per-file `fs::hard_link` walk — O(N) in file count.
36
    /// Used on Linux/Windows always, and on macOS when clonefile fell through.
37
    Hardlink,
38
}
39
40
/// Materializes an entire directory tree from source to destination using the
41
/// fastest method the host filesystem supports.
42
///
43
/// # Arguments
44
/// * `src_dir` - Source directory path (must exist)
45
/// * `dst_dir` - Destination directory path (must NOT exist; parent will be created)
46
///
47
/// # Returns
48
/// * `Ok(CloneMethod)` indicating which kernel mechanism was used
49
/// * `Err` if materialization fails (e.g., cross-filesystem, unsupported filesystem)
50
///
51
/// # Platform Support
52
/// - macOS: Tries APFS `clonefile(2)` first (O(1), copy-on-write). On failure
53
///   (e.g., cross-volume EXDEV, or any unexpected errno) falls back to per-file
54
///   `fs::hard_link`. `clonefile(2)` copies the source's modes verbatim, so
55
///   the destination's directory/file modes mirror the source. For a directory
56
///   cache entry locked down by [`set_readonly_recursive`], that means
57
///   directories are writable (0o755) and files are read-only (0o555): the
58
///   worker can create the action's declared outputs at any nested path, but
59
///   the hardlinked input files stay immutable. This matches the hermeticity
60
///   contract enforced by Bazel's local sandbox and the REAPI
61
///   `Action.output_files` semantics: actions can only write to declared
62
///   outputs, never mutate inputs. The COW semantics of `clonefile(2)` mean
63
///   any writes the worker does make to the destination do not affect the
64
///   source. The destination root is additionally chmod'd to 0o755 as a
65
///   defensive guarantee for callers that did not pre-mark the source.
66
/// - Linux: Per-file `fs::hard_link` (directory hardlinks are not supported on
67
///   ext4/btrfs without root). Directories at the destination are created
68
///   fresh by this walk, so they are writable regardless of the source's
69
///   directory modes; files are hardlinked and keep the source inode's mode.
70
///   Always returns `CloneMethod::Hardlink`.
71
/// - Windows: Per-file `fs::hard_link` (requires NTFS). Always returns
72
///   `CloneMethod::Hardlink`.
73
///
74
/// # Errors
75
/// - Source directory doesn't exist
76
/// - Destination already exists
77
/// - Cross-filesystem materialization attempted and fallback also fails
78
/// - Filesystem doesn't support hardlinks (Linux/Windows fallback)
79
/// - Permission denied
80
36
pub async fn hardlink_directory_tree(src_dir: &Path, dst_dir: &Path) -> Result<CloneMethod, Error> {
81
35
    error_if!(
82
36
        !src_dir.exists(),
83
        "Source directory does not exist: {}",
84
1
        src_dir.display()
85
    );
86
87
1
    error_if!(
88
35
        dst_dir.exists(),
89
        "Destination directory already exists: {}",
90
1
        dst_dir.display()
91
    );
92
93
    #[cfg(target_os = "macos")]
94
    {
95
        // clonefile(2) requires dst's parent to exist but dst itself must NOT
96
        // exist. Make sure the parent is present without creating dst. The
97
        // non-macOS fallback path below creates dst (and any missing parents)
98
        // itself via `fs::create_dir_all(dst_dir)`, so this pre-step is only
99
        // needed for the clonefile case.
100
        if let Some(parent) = dst_dir.parent() {
101
            fs::create_dir_all(parent).await.err_tip(|| {
102
                format!(
103
                    "Failed to create parent of destination: {}",
104
                    parent.display()
105
                )
106
            })?;
107
        }
108
109
        match try_clonefile(src_dir, dst_dir).await {
110
            Ok(()) => {
111
                // `clonefile(2)` copies the source's modes verbatim. A
112
                // directory cache entry locked down by
113
                // `set_readonly_recursive` already has writable directories
114
                // (0o755) and read-only files (0o555), so the clone is
115
                // immediately usable: the worker can create declared outputs
116
                // at any nested path and the hardlinked inputs stay
117
                // immutable. No per-directory chmod walk is needed. The root
118
                // is still chmod'd here as a defensive guarantee for callers
119
                // that pass a source whose root was not pre-marked writable.
120
                chmod_dir_writable(dst_dir)
121
                    .await
122
                    .err_tip(|| "Failed to chmod cloned tree root")?;
123
                return Ok(CloneMethod::Clonefile);
124
            }
125
            Err(e) => {
126
                debug!(
127
                    src = %src_dir.display(),
128
                    dst = %dst_dir.display(),
129
                    error = %e,
130
                    "clonefile failed, falling back to per-file hardlinks"
131
                );
132
                // clonefile(2) is atomic — on failure dst should not exist —
133
                // but be defensive in case a partial tree was left behind.
134
                let _cleanup = fs::remove_dir_all(dst_dir).await;
135
            }
136
        }
137
    }
138
139
    // Create the root destination directory
140
34
    fs::create_dir_all(dst_dir).await.err_tip(|| 
{0
141
0
        format!(
142
            "Failed to create destination directory: {}",
143
0
            dst_dir.display()
144
        )
145
0
    })?;
146
147
    // Recursively hardlink the directory tree
148
34
    hardlink_directory_tree_recursive(src_dir, dst_dir).await
?0
;
149
34
    Ok(CloneMethod::Hardlink)
150
36
}
151
152
/// Recursively clones a directory tree using APFS `clonefile(2)`. On success
153
/// the destination shares data blocks with the source via copy-on-write; the
154
/// operation is O(1) in tree size regardless of file count.
155
///
156
/// Returns `Err` on EXDEV (cross-volume), ENOTSUP (filesystem doesn't support
157
/// clones), or any other errno; callers are expected to fall back to per-file
158
/// hardlinks.
159
#[cfg(target_os = "macos")]
160
async fn try_clonefile(src: &Path, dst: &Path) -> std::io::Result<()> {
161
    use std::ffi::CString;
162
    use std::os::unix::ffi::OsStrExt;
163
164
    // From <sys/clonefile.h>: don't follow symlinks at the top level. Symlinks
165
    // *within* the cloned tree are cloned as symlinks regardless. The `libc`
166
    // crate exposes `clonefile` but not this flag constant.
167
    const CLONE_NOFOLLOW: u32 = 0x0001;
168
169
    let src_c = CString::new(src.as_os_str().as_bytes()).map_err(|_| {
170
        std::io::Error::new(
171
            std::io::ErrorKind::InvalidInput,
172
            "src path contains interior NUL byte",
173
        )
174
    })?;
175
    let dst_c = CString::new(dst.as_os_str().as_bytes()).map_err(|_| {
176
        std::io::Error::new(
177
            std::io::ErrorKind::InvalidInput,
178
            "dst path contains interior NUL byte",
179
        )
180
    })?;
181
182
    crate::spawn_blocking!("clonefile", move || {
183
        // SAFETY: clonefile(2) takes two NUL-terminated C strings and a flag
184
        // word. Both CStrings are owned by this closure for the duration of
185
        // the call, so the pointers stay valid.
186
        let res = unsafe { libc::clonefile(src_c.as_ptr(), dst_c.as_ptr(), CLONE_NOFOLLOW) };
187
        if res == 0 {
188
            Ok(())
189
        } else {
190
            Err(std::io::Error::last_os_error())
191
        }
192
    })
193
    .await
194
    .map_err(std::io::Error::other)?
195
}
196
197
/// Sets the directory `dir`'s mode to 0o755 so callers can create new
198
/// entries inside it. Used after `clonefile(2)` on the materialized
199
/// destination root as a defensive guarantee: a directory cache entry locked
200
/// down by [`set_readonly_recursive`] already has writable directories, so
201
/// for those callers this is a no-op, but it keeps `hardlink_directory_tree`
202
/// correct for any source whose root was not pre-marked writable. Existing
203
/// entries inside `dir` are intentionally left at their cloned perms — files
204
/// stay read-only (the hermeticity contract), directories stay writable.
205
#[cfg(target_os = "macos")]
206
async fn chmod_dir_writable(dir: &Path) -> Result<(), Error> {
207
    use std::os::unix::fs::PermissionsExt;
208
    fs::set_permissions(dir, std::fs::Permissions::from_mode(0o755))
209
        .await
210
        .err_tip(|| format!("Failed to chmod {} to 0o755", dir.display()))
211
}
212
213
/// Internal recursive function to hardlink directory contents
214
42
fn hardlink_directory_tree_recursive<'a>(
215
42
    src: &'a Path,
216
42
    dst: &'a Path,
217
42
) -> Pin<Box<dyn Future<Output = Result<(), Error>> + Send + 'a>> {
218
42
    Box::pin(async move {
219
42
        let mut entries = fs::read_dir(src)
220
42
            .await
221
42
            .err_tip(|| 
format!0
("Failed to read directory: {}",
src0
.
display0
()))
?0
;
222
223
91
        while let Some(
entry49
) = entries
224
91
            .next_entry()
225
91
            .await
226
91
            .err_tip(|| 
format!0
("Failed to get next entry in: {}",
src0
.
display0
()))
?0
227
        {
228
49
            let entry_path = entry.path();
229
49
            let file_name = entry.file_name().into_string().map_err(|os_str| 
{0
230
0
                make_err!(
231
0
                    Code::InvalidArgument,
232
                    "Invalid UTF-8 in filename: {:?}",
233
                    os_str
234
                )
235
0
            })?;
236
237
49
            let dst_path = dst.join(&file_name);
238
            // `DirEntry::metadata` does NOT traverse symlinks (it has
239
            // `symlink_metadata`/lstat semantics), so `is_symlink()` below
240
            // correctly identifies symlink entries and the symlink branch
241
            // recreates them as symlinks rather than dereferencing them.
242
49
            let metadata = entry
243
49
                .metadata()
244
49
                .await
245
49
                .err_tip(|| 
format!0
("Failed to get metadata for: {}",
entry_path.display()0
))
?0
;
246
247
49
            if metadata.is_symlink() {
248
                // Recreate the symlink as a symlink. Checked BEFORE `is_dir()`
249
                // / `is_file()` so a symlink that resolves to a directory is
250
                // never treated as a real directory and recursed *through*
251
                // (which would dereference the link and potentially escape
252
                // the tree).
253
5
                let target = fs::read_link(&entry_path)
254
5
                    .await
255
5
                    .err_tip(|| 
format!0
("Failed to read symlink: {}",
entry_path.display()0
))
?0
;
256
257
                #[cfg(unix)]
258
5
                fs::symlink(&target, &dst_path)
259
5
                    .await
260
5
                    .err_tip(|| 
format!0
("Failed to create symlink: {}",
dst_path.display()0
))
?0
;
261
262
                #[cfg(windows)]
263
                {
264
                    if target.is_dir() {
265
                        fs::symlink_dir(&target, &dst_path).await.err_tip(|| {
266
                            format!("Failed to create directory symlink: {}", dst_path.display())
267
                        })?;
268
                    } else {
269
                        fs::symlink_file(&target, &dst_path).await.err_tip(|| {
270
                            format!("Failed to create file symlink: {}", dst_path.display())
271
                        })?;
272
                    }
273
                }
274
44
            } else if metadata.is_dir() {
275
                // Create subdirectory and recurse
276
8
                fs::create_dir(&dst_path)
277
8
                    .await
278
8
                    .err_tip(|| 
format!0
("Failed to create directory: {}",
dst_path.display()0
))
?0
;
279
280
8
                hardlink_directory_tree_recursive(&entry_path, &dst_path).await
?0
;
281
36
            } else if metadata.is_file() {
282
                // Hardlink the file
283
36
                fs::hard_link(&entry_path, &dst_path)
284
36
                    .await
285
36
                    .err_tip(|| 
{0
286
0
                        format!(
287
                            "Failed to hardlink {} to {}. This may occur if the source and destination are on different filesystems",
288
0
                            entry_path.display(),
289
0
                            dst_path.display()
290
                        )
291
0
                    })?;
292
0
            }
293
        }
294
295
42
        Ok(())
296
42
    })
297
42
}
298
299
/// Locks down a directory tree as an immutable cache entry: every **file** is
300
/// made read-only, every **directory** is left writable.
301
///
302
/// This is used by the worker's directory cache after it constructs a cache
303
/// entry. Files must be read-only because they are hardlinked into the CAS
304
/// (`FilesystemStore`) — keeping them immutable preserves the hermeticity
305
/// contract (actions cannot mutate inputs) and avoids mutating the shared
306
/// inode's mode for other in-flight actions.
307
///
308
/// Directories are deliberately left writable (0o755). Directories are *not*
309
/// hardlink-shared between cache entries — only file content inodes are — so a
310
/// writable directory mode is safe. Keeping cache-entry directories writable
311
/// means the materialized destination tree (an APFS `clonefile(2)` clone,
312
/// which copies modes verbatim, or a per-file hardlink walk, which creates
313
/// fresh directories) already has writable directories. Bazel actions declare
314
/// outputs at paths nested inside input subdirectories, so every directory in
315
/// the materialized tree must be writable for the worker to create those
316
/// outputs; doing it here, once per cache entry, removes the need for a
317
/// separate per-materialization recursive chmod walk.
318
///
319
/// # Arguments
320
/// * `dir` - Directory tree to lock down
321
///
322
/// # Platform Notes
323
/// - Unix: files get 0o555 (r-xr-xr-x); directories get 0o755 (rwxr-xr-x).
324
/// - Windows: files get `FILE_ATTRIBUTE_READONLY`; directories are left
325
///   writable.
326
///
327
/// Symlink entries in the tree are skipped (their own mode is not meaningful
328
/// and `chmod` would follow the link) - see `set_perms_recursive_impl`.
329
6
pub async fn set_readonly_recursive(dir: &Path) -> Result<(), Error> {
330
6
    error_if!(!dir.exists(), "Directory does not exist: {}", 
dir0
.
display0
());
331
332
6
    set_perms_recursive_impl(dir.to_path_buf(), set_readonly_one_path).await
333
6
}
334
335
/// Sets only the **directories** in a tree to writable for the current user,
336
/// leaving files untouched. This is the safe variant for cleanup paths that
337
/// need to delete a tree containing CAS-hardlinked files.
338
///
339
/// On unix, write permission on the parent directory is sufficient to unlink
340
/// files inside it — the files' own modes are irrelevant for unlinking. Chmoding
341
/// a CAS-hardlinked file would silently mutate the shared inode's permissions
342
/// for every other in-flight action that has hardlinked the same blob, leading
343
/// to EACCES on exec or EPERM on open in unrelated actions.
344
///
345
/// # Arguments
346
/// * `dir` - Directory whose directories should be made writable
347
///
348
/// # Platform Notes
349
/// - Unix: Sets directory permissions to 0o755 (rwxr-xr-x); files are NOT touched.
350
/// - Windows: Clears `FILE_ATTRIBUTE_READONLY` on directories only; files are NOT touched.
351
///
352
/// Symlink entries in the tree are skipped (their own mode is not meaningful
353
/// and `chmod` would follow the link) - see `set_perms_recursive_impl`.
354
5
pub async fn set_dir_writable_recursive(dir: &Path) -> Result<(), Error> {
355
5
    error_if!(!dir.exists(), "Directory does not exist: {}", 
dir0
.
display0
());
356
357
5
    set_perms_recursive_impl(dir.to_path_buf(), set_dir_writable_one_path).await
358
4
}
359
360
23
fn set_readonly_one_path(
361
23
    path: PathBuf,
362
23
    metadata: Metadata,
363
23
) -> Pin<Box<dyn Future<Output = Result<(), Error>> + Send>> {
364
23
    Box::pin(async move {
365
        // Directories are left writable on purpose. They are not
366
        // hardlink-shared between cache entries — only file content inodes
367
        // are — so a writable directory mode cannot corrupt anything. Keeping
368
        // them writable means the materialized destination tree already
369
        // accepts the nested output files Bazel actions declare, with no
370
        // separate per-materialization chmod walk.
371
23
        if metadata.is_dir() {
372
            #[cfg(unix)]
373
            {
374
                use std::os::unix::fs::PermissionsExt;
375
12
                let mut perms = metadata.permissions();
376
12
                perms.set_mode(0o755);
377
378
12
                fs::set_permissions(&path, perms)
379
12
                    .await
380
12
                    .err_tip(|| 
format!0
("Failed to set permissions for: {}",
path.display()0
))
?0
;
381
            }
382
383
            // On Windows directories are already writable; clearing the
384
            // read-only attribute here would be a no-op, so leave them alone.
385
386
12
            return Ok(());
387
11
        }
388
389
        // Set the file to read-only.
390
        #[cfg(unix)]
391
        {
392
            use std::os::unix::fs::PermissionsExt;
393
11
            let mut perms = metadata.permissions();
394
395
            // Files get r-xr-xr-x (0o555): read and execute for everyone,
396
            // write for no one. Files use 0o555 rather than 0o444 so the
397
            // execute bit survives on cached executables — a stripped +x bit
398
            // makes an action's interpreter or wrapper script fail with
399
            // EACCES once the tree is materialized into a workspace. The
400
            // write bit stays cleared, so the hermeticity contract (inputs
401
            // are immutable) is unchanged.
402
11
            perms.set_mode(0o555);
403
404
11
            fs::set_permissions(&path, perms)
405
11
                .await
406
11
                .err_tip(|| 
format!0
("Failed to set permissions for: {}",
path.display()0
))
?0
;
407
        }
408
409
        #[cfg(windows)]
410
        {
411
            let mut perms = metadata.permissions();
412
            perms.set_readonly(true);
413
414
            fs::set_permissions(&path, perms)
415
                .await
416
                .err_tip(|| format!("Failed to set permissions for: {}", path.display()))?;
417
        }
418
419
11
        Ok(())
420
23
    })
421
23
}
422
423
14
fn set_dir_writable_one_path(
424
14
    path: PathBuf,
425
14
    metadata: Metadata,
426
14
) -> Pin<Box<dyn Future<Output = Result<(), Error>> + Send>> {
427
14
    Box::pin(async move {
428
        // Files are intentionally skipped here. They may be hardlinked into
429
        // the CAS (FilesystemStore); chmoding them would corrupt the shared
430
        // inode's mode for every other in-flight action.
431
14
        if !metadata.is_dir() {
432
6
            return Ok(());
433
8
        }
434
435
        #[cfg(unix)]
436
        {
437
            use std::os::unix::fs::PermissionsExt;
438
8
            let mut perms = metadata.permissions();
439
8
            perms.set_mode(0o755);
440
441
8
            fs::set_permissions(&path, perms)
442
8
                .await
443
8
                .err_tip(|| 
format!0
("Failed to set permissions for: {}",
path.display()0
))
?0
;
444
        }
445
446
        #[cfg(windows)]
447
        {
448
            let mut perms = metadata.permissions();
449
            perms.set_readonly(false);
450
451
            fs::set_permissions(&path, perms)
452
                .await
453
                .err_tip(|| format!("Failed to set permissions for: {}", path.display()))?;
454
        }
455
456
8
        Ok(())
457
14
    })
458
14
}
459
460
51
fn set_perms_recursive_impl<'a, F>(
461
51
    path: PathBuf,
462
51
    perms_fn: F,
463
51
) -> Pin<Box<dyn Future<Output = Result<(), Error>> + Send + 'a>>
464
51
where
465
51
    F: Fn(PathBuf, Metadata) -> Pin<Box<dyn Future<Output = Result<(), Error>> + Send>>
466
51
        + Send
467
51
        + Copy
468
51
        + 'a,
469
{
470
51
    Box::pin(async move {
471
        // Use `symlink_metadata` (lstat) rather than `metadata` (stat) so the
472
        // walk inspects the entry *itself*, never the target a symlink points
473
        // at. This matters for input trees containing symlinks - e.g.
474
        // `.venv/bin/python3` created by rules_python / rules_apple venv
475
        // tooling. With plain `stat`, a symlink to a directory reports
476
        // `is_dir() == true` and the walk would recurse *through* the link
477
        // (escaping the tree, or descending into an unrelated directory), and
478
        // a symlink to a file would have `chmod` applied to it - and `chmod`
479
        // follows symlinks, so it mutates the target. A symlink whose target
480
        // does not exist (a dangling link, common when a venv points outside
481
        // the action's input set) then fails the whole walk with ENOENT -
482
        // the cause of directory-cache actions falling back to the slow
483
        // download path.
484
51
        let metadata = fs::symlink_metadata(&path)
485
51
            .await
486
51
            .err_tip(|| 
format!0
("Failed to get metadata for: {}",
path.display()0
))
?0
;
487
488
        // Symlinks are skipped entirely: their own mode is not meaningful, a
489
        // `chmod` on the link path would follow it and touch the target, and
490
        // descending into a symlinked directory would walk outside the tree.
491
        // The symlink entry itself is left exactly as created.
492
51
        if metadata.is_symlink() {
493
13
            return Ok(());
494
38
        }
495
496
38
        if metadata.is_dir() {
497
21
            let 
mut entries20
= fs::read_dir(&path)
498
21
                .await
499
20
                .err_tip(|| 
format!0
("Failed to read directory: {}",
path.display()0
))
?0
;
500
501
60
            while let Some(
entry40
) = entries
502
60
                .next_entry()
503
60
                .await
504
60
                .err_tip(|| 
format!0
("Failed to get next entry in: {}",
path.display()0
))
?0
505
            {
506
40
                set_perms_recursive_impl(entry.path(), perms_fn).await
?0
;
507
            }
508
17
        }
509
37
        perms_fn(path, metadata).await
510
50
    })
511
51
}
512
513
/// Calculates the total size of a directory tree in bytes.
514
/// Used for cache size tracking and LRU eviction.
515
///
516
/// # Arguments
517
/// * `dir` - Directory to calculate size for
518
///
519
/// # Returns
520
/// Total size in bytes, or Error if directory cannot be read
521
1
pub async fn calculate_directory_size(dir: &Path) -> Result<u64, Error> {
522
1
    error_if!(!dir.exists(), "Directory does not exist: {}", 
dir0
.
display0
());
523
524
1
    calculate_directory_size_impl(dir).await
525
1
}
526
527
4
fn calculate_directory_size_impl<'a>(
528
4
    path: &'a Path,
529
4
) -> Pin<Box<dyn Future<Output = Result<u64, Error>> + Send + 'a>> {
530
4
    Box::pin(async move {
531
4
        let metadata = fs::metadata(path)
532
4
            .await
533
4
            .err_tip(|| 
format!0
("Failed to get metadata for: {}",
path0
.
display0
()))
?0
;
534
535
4
        if metadata.is_file() {
536
2
            return Ok(metadata.len());
537
2
        }
538
539
2
        if !metadata.is_dir() {
540
0
            return Ok(0);
541
2
        }
542
543
2
        let mut total_size = 0u64;
544
2
        let mut entries = fs::read_dir(path)
545
2
            .await
546
2
            .err_tip(|| 
format!0
("Failed to read directory: {}",
path0
.
display0
()))
?0
;
547
548
5
        while let Some(
entry3
) = entries
549
5
            .next_entry()
550
5
            .await
551
5
            .err_tip(|| 
format!0
("Failed to get next entry in: {}",
path0
.
display0
()))
?0
552
        {
553
3
            total_size += calculate_directory_size_impl(&entry.path()).await
?0
;
554
        }
555
556
2
        Ok(total_size)
557
4
    })
558
4
}
559
560
#[cfg(test)]
561
mod tests {
562
    use std::path::PathBuf;
563
564
    use nativelink_macro::nativelink_test;
565
    use tempfile::TempDir;
566
    use tokio::io::AsyncWriteExt;
567
568
    use super::*;
569
570
8
    async fn create_test_directory() -> Result<(TempDir, PathBuf), Error> {
571
8
        let temp_dir = TempDir::new().err_tip(|| "Failed to create temp directory")
?0
;
572
8
        let test_dir = temp_dir.path().join("test_src");
573
574
8
        fs::create_dir(&test_dir).await
?0
;
575
576
        // Create a file
577
8
        let file1 = test_dir.join("file1.txt");
578
8
        let mut f = fs::File::create(&file1).await
?0
;
579
8
        f.write_all(b"Hello, World!").await
?0
;
580
8
        f.sync_all().await
?0
;
581
8
        drop(f);
582
583
        // Create a subdirectory with a file
584
8
        let subdir = test_dir.join("subdir");
585
8
        fs::create_dir(&subdir).await
?0
;
586
587
8
        let file2 = subdir.join("file2.txt");
588
8
        let mut f = fs::File::create(&file2).await
?0
;
589
8
        f.write_all(b"Nested file").await
?0
;
590
8
        f.sync_all().await
?0
;
591
8
        drop(f);
592
593
8
        Ok((temp_dir, test_dir))
594
8
    }
595
596
    #[nativelink_test("crate")]
597
    async fn test_hardlink_directory_tree() -> Result<(), Error> {
598
        let (temp_dir, src_dir) = create_test_directory().await?;
599
        let dst_dir = temp_dir.path().join("test_dst");
600
601
        // Hardlink the directory
602
        let method = hardlink_directory_tree(&src_dir, &dst_dir).await?;
603
604
        #[cfg(target_os = "macos")]
605
        assert_eq!(method, CloneMethod::Clonefile, "macOS should use clonefile");
606
        #[cfg(not(target_os = "macos"))]
607
        assert_eq!(
608
            method,
609
            CloneMethod::Hardlink,
610
            "non-macOS should use per-file hardlinks"
611
        );
612
613
        // Verify structure
614
        assert!(dst_dir.join("file1.txt").exists());
615
        assert!(dst_dir.join("subdir").is_dir());
616
        assert!(dst_dir.join("subdir/file2.txt").exists());
617
618
        // Verify contents
619
        let content1 = fs::read_to_string(dst_dir.join("file1.txt")).await?;
620
        assert_eq!(content1, "Hello, World!");
621
622
        let content2 = fs::read_to_string(dst_dir.join("subdir/file2.txt")).await?;
623
        assert_eq!(content2, "Nested file");
624
625
        // Linux: per-file hardlinks share inodes with the source.
626
        #[cfg(all(unix, not(target_os = "macos")))]
627
        {
628
            use std::os::unix::fs::MetadataExt;
629
            let src_meta = fs::metadata(src_dir.join("file1.txt")).await?;
630
            let dst_meta = fs::metadata(dst_dir.join("file1.txt")).await?;
631
            assert_eq!(
632
                src_meta.ino(),
633
                dst_meta.ino(),
634
                "Files should have same inode (hardlinked)"
635
            );
636
        }
637
638
        // macOS: clonefile(2) creates distinct inodes that share data via COW.
639
        #[cfg(target_os = "macos")]
640
        {
641
            use std::os::unix::fs::MetadataExt;
642
            let src_meta = fs::metadata(src_dir.join("file1.txt")).await?;
643
            let dst_meta = fs::metadata(dst_dir.join("file1.txt")).await?;
644
            assert_ne!(
645
                src_meta.ino(),
646
                dst_meta.ino(),
647
                "clonefile should create distinct inodes from source"
648
            );
649
        }
650
651
        Ok(())
652
    }
653
654
    #[cfg(target_os = "macos")]
655
    #[nativelink_test("crate")]
656
    async fn test_clonefile_dirs_writable_files_readonly() -> Result<(), Error> {
657
        use std::os::unix::fs::PermissionsExt;
658
659
        let (temp_dir, src_dir) = create_test_directory().await?;
660
        // Source mimics a directory cache entry: writable dirs (0o755),
661
        // read-only files (0o555).
662
        set_readonly_recursive(&src_dir).await?;
663
664
        let dst_dir = temp_dir.path().join("clone_dst");
665
        hardlink_directory_tree(&src_dir, &dst_dir).await?;
666
667
        // Root: writable, so the worker can drop the action's declared
668
        // outputs inside it.
669
        let root_mode = fs::metadata(&dst_dir).await?.permissions().mode() & 0o777;
670
        assert_eq!(root_mode, 0o755, "destination root must be writable");
671
672
        // Nested subdir: writable too. `clonefile(2)` copies the source's
673
        // modes verbatim and the source's directories were left writable by
674
        // `set_readonly_recursive`. Bazel actions declare outputs at paths
675
        // nested inside input subdirectories, so every directory in the
676
        // materialized tree must be writable — no separate chmod walk needed.
677
        let dst_subdir_mode = fs::metadata(dst_dir.join("subdir"))
678
            .await?
679
            .permissions()
680
            .mode()
681
            & 0o777;
682
        assert_eq!(
683
            dst_subdir_mode, 0o755,
684
            "cloned subdirs must be writable so nested outputs can be created"
685
        );
686
687
        // Existing file: stays read-only. Hermeticity contract — inputs are
688
        // not writable. Matches Bazel's local-sandbox model and REAPI
689
        // Action.output_files semantics: actions can only write to declared
690
        // outputs, not mutate inputs.
691
        let dst_file_mode = fs::metadata(dst_dir.join("file1.txt"))
692
            .await?
693
            .permissions()
694
            .mode()
695
            & 0o777;
696
        assert_eq!(
697
            dst_file_mode, 0o555,
698
            "cloned files must inherit source read-only mode"
699
        );
700
701
        // Source untouched: dirs writable, files read-only.
702
        let src_subdir_mode = fs::metadata(src_dir.join("subdir"))
703
            .await?
704
            .permissions()
705
            .mode()
706
            & 0o777;
707
        assert_eq!(
708
            src_subdir_mode, 0o755,
709
            "source dir should still be writable after clone"
710
        );
711
        let src_file_mode = fs::metadata(src_dir.join("file1.txt"))
712
            .await?
713
            .permissions()
714
            .mode()
715
            & 0o777;
716
        assert_eq!(
717
            src_file_mode, 0o555,
718
            "source file should still be read-only after clone"
719
        );
720
721
        Ok(())
722
    }
723
724
    #[cfg(target_os = "macos")]
725
    #[nativelink_test("crate")]
726
    async fn test_clonefile_root_accepts_new_files() -> Result<(), Error> {
727
        let (temp_dir, src_dir) = create_test_directory().await?;
728
        set_readonly_recursive(&src_dir).await?;
729
730
        let dst_dir = temp_dir.path().join("clone_dst");
731
        hardlink_directory_tree(&src_dir, &dst_dir).await?;
732
733
        // The worker creates declared output files at the action's
734
        // working directory root. Verify a new file can be created there
735
        // even though everything inside the clone is read-only (0o555).
736
        let new_output = dst_dir.join("new_output.bin");
737
        fs::write(&new_output, b"action output").await?;
738
        assert_eq!(fs::read(&new_output).await?, b"action output");
739
740
        Ok(())
741
    }
742
743
    #[cfg(target_os = "macos")]
744
    #[nativelink_test("crate")]
745
    async fn test_clonefile_input_mutation_fails() -> Result<(), Error> {
746
        let (temp_dir, src_dir) = create_test_directory().await?;
747
        set_readonly_recursive(&src_dir).await?;
748
749
        let dst_dir = temp_dir.path().join("clone_dst");
750
        hardlink_directory_tree(&src_dir, &dst_dir).await?;
751
752
        // Hermeticity: actions cannot mutate inputs. A write to an input
753
        // file in the cloned tree must fail with EACCES, mirroring what
754
        // Bazel's linux-sandbox / darwin-sandbox would do.
755
        let input_file = dst_dir.join("file1.txt");
756
        let err = fs::write(&input_file, b"mutated")
757
            .await
758
            .expect_err("input file write should fail (file is 0o555, no write bit)");
759
        assert_eq!(err.kind(), std::io::ErrorKind::PermissionDenied);
760
761
        // Source must be untouched.
762
        let src_content = fs::read_to_string(src_dir.join("file1.txt")).await?;
763
        assert_eq!(src_content, "Hello, World!");
764
765
        Ok(())
766
    }
767
768
    #[cfg(target_os = "macos")]
769
    #[nativelink_test("crate")]
770
    async fn test_clonefile_cow_isolation() -> Result<(), Error> {
771
        let (temp_dir, src_dir) = create_test_directory().await?;
772
        let dst_dir = temp_dir.path().join("clone_dst");
773
774
        hardlink_directory_tree(&src_dir, &dst_dir).await?;
775
776
        // Mutate the clone and confirm the source is unaffected.
777
        let dst_file = dst_dir.join("file1.txt");
778
        fs::write(&dst_file, b"mutated by clone").await?;
779
780
        let src_content = fs::read_to_string(src_dir.join("file1.txt")).await?;
781
        assert_eq!(
782
            src_content, "Hello, World!",
783
            "source must be untouched after writing to clone (COW)"
784
        );
785
786
        let dst_content = fs::read_to_string(&dst_file).await?;
787
        assert_eq!(dst_content, "mutated by clone");
788
789
        Ok(())
790
    }
791
792
    /// Bazel actions declare outputs at paths nested inside input
793
    /// subdirectories. Because `set_readonly_recursive` leaves directories
794
    /// writable and `clonefile(2)` copies modes verbatim, the materialized
795
    /// tree already accepts a nested output file with NO separate
796
    /// `set_dir_writable_recursive` walk — that is the redundant work this
797
    /// change removes from `prepare_action_inputs`.
798
    #[cfg(target_os = "macos")]
799
    #[nativelink_test("crate")]
800
    async fn test_clonefile_nested_output_without_dir_writable_walk() -> Result<(), Error> {
801
        use std::os::unix::fs::PermissionsExt;
802
803
        let (temp_dir, src_dir) = create_test_directory().await?;
804
        // Lock the source down the way the directory cache does after
805
        // constructing a cache entry: writable dirs, read-only files.
806
        set_readonly_recursive(&src_dir).await?;
807
808
        let dst_dir = temp_dir.path().join("clone_dst");
809
        hardlink_directory_tree(&src_dir, &dst_dir).await?;
810
811
        // Creating an output nested inside a cloned subdir succeeds straight
812
        // away — no recursive chmod walk. This is the post-condition that
813
        // lets `prepare_action_inputs` drop its `set_dir_writable_recursive`
814
        // call.
815
        let nested_output = dst_dir.join("subdir").join("nested_output.o");
816
        fs::write(&nested_output, b"action output").await?;
817
        assert_eq!(fs::read(&nested_output).await?, b"action output");
818
819
        // Files inside the tree stay read-only — hermeticity holds, and the
820
        // CAS-hardlink inode invariant is preserved.
821
        let file_mode = fs::metadata(dst_dir.join("subdir").join("file2.txt"))
822
            .await?
823
            .permissions()
824
            .mode()
825
            & 0o777;
826
        assert_eq!(file_mode, 0o555, "input files must remain read-only");
827
828
        // A write to an input file still fails — actions cannot mutate inputs.
829
        let err = fs::write(dst_dir.join("subdir").join("file2.txt"), b"mutated")
830
            .await
831
            .expect_err("input file write must fail (file is 0o555)");
832
        assert_eq!(err.kind(), std::io::ErrorKind::PermissionDenied);
833
834
        Ok(())
835
    }
836
837
    /// `set_readonly_recursive` locks a tree down as a cache entry: every
838
    /// file is made read-only, every directory is left writable. Directories
839
    /// stay writable because they are not hardlink-shared between cache
840
    /// entries, and a writable directory mode lets the materialized
841
    /// destination tree accept nested action outputs without a separate
842
    /// chmod walk.
843
    #[nativelink_test("crate")]
844
    async fn test_set_readonly_recursive() -> Result<(), Error> {
845
        let (_temp_dir, test_dir) = create_test_directory().await?;
846
847
        set_readonly_recursive(&test_dir).await?;
848
849
        // Files are read-only.
850
        let metadata = fs::metadata(test_dir.join("file1.txt")).await?;
851
        assert!(metadata.permissions().readonly());
852
853
        let metadata = fs::metadata(test_dir.join("subdir/file2.txt")).await?;
854
        assert!(metadata.permissions().readonly());
855
856
        // Directories are left writable — root and every nested subdir.
857
        #[cfg(unix)]
858
        {
859
            use std::os::unix::fs::PermissionsExt;
860
            for dir in [test_dir.clone(), test_dir.join("subdir")] {
861
                let mode = fs::metadata(&dir).await?.permissions().mode() & 0o777;
862
                assert_eq!(mode, 0o755, "{} must stay writable", dir.display());
863
            }
864
        }
865
        #[cfg(windows)]
866
        {
867
            // On Windows directories carry no read-only attribute that would
868
            // block creating children; assert they are not marked read-only.
869
            for dir in [test_dir.clone(), test_dir.join("subdir")] {
870
                assert!(
871
                    !fs::metadata(&dir).await?.permissions().readonly(),
872
                    "{} must stay writable",
873
                    dir.display()
874
                );
875
            }
876
        }
877
878
        Ok(())
879
    }
880
881
    /// `set_dir_writable_recursive` must make *every* directory in a tree
882
    /// writable — including nested subdirs — so the eviction cleanup path can
883
    /// `remove_dir_all` a cache entry. Files are left read-only because they
884
    /// may share a CAS inode via hardlink. This walk runs on already-read-only
885
    /// directory trees too, so the test first sets every file read-only with
886
    /// `set_readonly_recursive`.
887
    #[cfg(unix)]
888
    #[nativelink_test("crate")]
889
    async fn test_set_dir_writable_recursive_walks_nested_dirs() -> Result<(), Error> {
890
        use std::os::unix::fs::PermissionsExt;
891
892
        let (_temp_dir, test_dir) = create_test_directory().await?;
893
        // Lock files down, then explicitly force every directory read-only so
894
        // the walk has real work to do (the directory cache leaves dirs
895
        // writable, but the eviction path must cope with any mode).
896
        set_readonly_recursive(&test_dir).await?;
897
        for dir in [test_dir.clone(), test_dir.join("subdir")] {
898
            fs::set_permissions(&dir, std::fs::Permissions::from_mode(0o555)).await?;
899
        }
900
901
        set_dir_writable_recursive(&test_dir).await?;
902
903
        // Every directory — the root and the nested subdir — must be writable.
904
        for dir in [test_dir.clone(), test_dir.join("subdir")] {
905
            let mode = fs::metadata(&dir).await?.permissions().mode() & 0o777;
906
            assert_eq!(mode, 0o755, "{} must be writable", dir.display());
907
        }
908
909
        // Files stay read-only — chmoding them would corrupt a shared CAS inode.
910
        let file_mode = fs::metadata(test_dir.join("subdir/file2.txt"))
911
            .await?
912
            .permissions()
913
            .mode()
914
            & 0o777;
915
        assert_eq!(file_mode, 0o555, "files must remain read-only");
916
917
        Ok(())
918
    }
919
920
    /// Regression test for the directory-cache fallback bug: input trees
921
    /// produced by `rules_python` / `rules_apple` venv tooling contain
922
    /// symlinks (e.g. `.venv/bin/python3`). `set_readonly_recursive` walks the
923
    /// materialized tree with `chmod`; `chmod` follows symlinks, so a symlink
924
    /// to a file would mutate the target and a *dangling* symlink (target
925
    /// outside the action's input set) would fail the whole walk with ENOENT
926
    /// — pushing the action onto the slow `download_to_directory` fallback.
927
    /// The walk must `lstat` and skip the symlink, leaving it intact.
928
    #[cfg(unix)]
929
    #[nativelink_test("crate")]
930
    async fn test_set_readonly_recursive_skips_symlinks() -> Result<(), Error> {
931
        let (_temp_dir, test_dir) = create_test_directory().await?;
932
933
        // A symlink to a path *inside* the same tree (the realistic
934
        // `.venv/bin/python3 -> ../../file1.txt` shape).
935
        let internal_link = test_dir.join("link_to_file1");
936
        fs::symlink("file1.txt", &internal_link).await?;
937
938
        // A symlink with a *relative* target that does not resolve (dangling).
939
        // This is the case that previously failed the walk with ENOENT.
940
        let dangling_link = test_dir.join("dangling_link");
941
        fs::symlink("../does/not/exist", &dangling_link).await?;
942
943
        // A symlink that points at a directory inside the tree. With `stat`
944
        // the walk would recurse *through* this link; with `lstat` it must
945
        // not.
946
        let dir_link = test_dir.join("link_to_subdir");
947
        fs::symlink("subdir", &dir_link).await?;
948
949
        // The walk must succeed despite the symlinks.
950
        set_readonly_recursive(&test_dir).await?;
951
952
        // Every symlink is preserved as a symlink with its target intact.
953
        for (link, expected_target) in [
954
            (&internal_link, "file1.txt"),
955
            (&dangling_link, "../does/not/exist"),
956
            (&dir_link, "subdir"),
957
        ] {
958
            let link_meta = fs::symlink_metadata(link).await?;
959
            assert!(
960
                link_meta.is_symlink(),
961
                "{} must still be a symlink after the walk",
962
                link.display()
963
            );
964
            assert_eq!(
965
                fs::read_link(link).await?,
966
                PathBuf::from(expected_target),
967
                "{} target must be unchanged",
968
                link.display()
969
            );
970
        }
971
972
        // The real files were still made read-only.
973
        assert!(
974
            fs::metadata(test_dir.join("file1.txt"))
975
                .await?
976
                .permissions()
977
                .readonly()
978
        );
979
980
        Ok(())
981
    }
982
983
    /// Companion to the read-only test: `set_dir_writable_recursive` must also
984
    /// be symlink-safe. It must not `chmod` a symlink (which would follow the
985
    /// link) and must not recurse through a symlinked directory.
986
    #[cfg(unix)]
987
    #[nativelink_test("crate")]
988
    async fn test_set_dir_writable_recursive_skips_symlinks() -> Result<(), Error> {
989
        use std::os::unix::fs::PermissionsExt;
990
991
        let (_temp_dir, test_dir) = create_test_directory().await?;
992
993
        // Symlink to a file inside the tree, a dangling relative symlink, and
994
        // a symlink pointing at a directory inside the tree.
995
        fs::symlink("file1.txt", test_dir.join("link_to_file1")).await?;
996
        fs::symlink("../does/not/exist", test_dir.join("dangling_link")).await?;
997
        fs::symlink("subdir", test_dir.join("link_to_subdir")).await?;
998
999
        // Mirror the directory cache's post-construction sequence.
1000
        set_readonly_recursive(&test_dir).await?;
1001
        set_dir_writable_recursive(&test_dir).await?;
1002
1003
        // Symlinks survive both walks untouched.
1004
        for (link, expected_target) in [
1005
            ("link_to_file1", "file1.txt"),
1006
            ("dangling_link", "../does/not/exist"),
1007
            ("link_to_subdir", "subdir"),
1008
        ] {
1009
            let link_path = test_dir.join(link);
1010
            assert!(
1011
                fs::symlink_metadata(&link_path).await?.is_symlink(),
1012
                "{} must still be a symlink",
1013
                link_path.display()
1014
            );
1015
            assert_eq!(
1016
                fs::read_link(&link_path).await?,
1017
                PathBuf::from(expected_target),
1018
                "{} target must be unchanged",
1019
                link_path.display()
1020
            );
1021
        }
1022
1023
        // Real directories were made writable; real files stayed read-only.
1024
        let dir_mode = fs::metadata(test_dir.join("subdir"))
1025
            .await?
1026
            .permissions()
1027
            .mode()
1028
            & 0o777;
1029
        assert_eq!(dir_mode, 0o755, "real subdir must be writable");
1030
        let file_mode = fs::metadata(test_dir.join("subdir/file2.txt"))
1031
            .await?
1032
            .permissions()
1033
            .mode()
1034
            & 0o777;
1035
        assert_eq!(file_mode, 0o555, "real files must stay read-only");
1036
1037
        Ok(())
1038
    }
1039
1040
    /// `hardlink_directory_tree` must recreate symlink entries as symlinks at
1041
    /// the destination (not dereference them), and the subsequent
1042
    /// `set_readonly_recursive` walk over the materialized tree must succeed.
1043
    /// This is the end-to-end shape `DirectoryCache::get_or_create` runs.
1044
    #[cfg(unix)]
1045
    #[nativelink_test("crate")]
1046
    async fn test_hardlink_directory_tree_preserves_symlinks() -> Result<(), Error> {
1047
        let (temp_dir, src_dir) = create_test_directory().await?;
1048
1049
        // Symlink to a sibling file, a dangling relative symlink, and a
1050
        // symlink to a subdirectory — all inside the source tree.
1051
        fs::symlink("file1.txt", src_dir.join("link_to_file1")).await?;
1052
        fs::symlink("../does/not/exist", src_dir.join("dangling_link")).await?;
1053
        fs::symlink("subdir", src_dir.join("link_to_subdir")).await?;
1054
1055
        let dst_dir = temp_dir.path().join("test_dst");
1056
        hardlink_directory_tree(&src_dir, &dst_dir).await?;
1057
1058
        // Each symlink is materialized as a symlink with its target intact.
1059
        for (link, expected_target) in [
1060
            ("link_to_file1", "file1.txt"),
1061
            ("dangling_link", "../does/not/exist"),
1062
            ("link_to_subdir", "subdir"),
1063
        ] {
1064
            let link_path = dst_dir.join(link);
1065
            assert!(
1066
                fs::symlink_metadata(&link_path).await?.is_symlink(),
1067
                "{} must be a symlink in the materialized tree",
1068
                link_path.display()
1069
            );
1070
            assert_eq!(
1071
                fs::read_link(&link_path).await?,
1072
                PathBuf::from(expected_target),
1073
                "{} target must be preserved",
1074
                link_path.display()
1075
            );
1076
        }
1077
1078
        // The read-only walk over the materialized tree must not choke on the
1079
        // symlinks (this is the operation that previously failed the cache).
1080
        set_readonly_recursive(&dst_dir).await?;
1081
1082
        Ok(())
1083
    }
1084
1085
    #[nativelink_test("crate")]
1086
    async fn test_calculate_directory_size() -> Result<(), Error> {
1087
        let (_temp_dir, test_dir) = create_test_directory().await?;
1088
1089
        let size = calculate_directory_size(&test_dir).await?;
1090
1091
        // "Hello, World!" = 13 bytes
1092
        // "Nested file" = 11 bytes
1093
        // Total = 24 bytes
1094
        assert_eq!(size, 24);
1095
1096
        Ok(())
1097
    }
1098
1099
    #[nativelink_test("crate")]
1100
    async fn test_hardlink_nonexistent_source() {
1101
        let temp_dir = TempDir::new().unwrap();
1102
        let src = temp_dir.path().join("nonexistent");
1103
        let dst = temp_dir.path().join("dest");
1104
1105
        let result = hardlink_directory_tree(&src, &dst).await;
1106
        assert!(result.is_err());
1107
    }
1108
1109
    #[nativelink_test("crate")]
1110
    async fn test_hardlink_existing_destination() -> Result<(), Error> {
1111
        let (temp_dir, src_dir) = create_test_directory().await?;
1112
        let dst_dir = temp_dir.path().join("existing");
1113
1114
        fs::create_dir(&dst_dir).await?;
1115
1116
        let result = hardlink_directory_tree(&src_dir, &dst_dir).await;
1117
        assert!(result.is_err());
1118
1119
        Ok(())
1120
    }
1121
}