Coverage Report

Created: 2026-06-04 10:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/build/source/nativelink-worker/src/directory_cache.rs
Line
Count
Source
1
// Copyright 2024 The NativeLink Authors. All rights reserved.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//    http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
use core::future::Future;
16
use core::pin::Pin;
17
use core::sync::atomic::{AtomicU64, Ordering};
18
use std::collections::HashMap;
19
use std::path::{Path, PathBuf};
20
use std::sync::Arc;
21
use std::time::SystemTime;
22
23
use nativelink_error::{Code, Error, ResultExt, make_err};
24
use nativelink_proto::build::bazel::remote::execution::v2::{
25
    Directory as ProtoDirectory, DirectoryNode, FileNode, SymlinkNode,
26
};
27
use nativelink_store::ac_utils::get_and_decode_digest;
28
use nativelink_store::cas_utils::is_zero_digest;
29
use nativelink_store::fast_slow_store::FastSlowStore;
30
use nativelink_store::filesystem_store::{FileEntry, FilesystemStore};
31
use nativelink_util::background_spawn;
32
use nativelink_util::common::DigestInfo;
33
use nativelink_util::fs_util::{CloneMethod, hardlink_directory_tree, set_dir_writable_recursive};
34
use nativelink_util::store_trait::{StoreKey, StoreLike};
35
use tokio::fs;
36
use tokio::sync::{Mutex, RwLock};
37
use tracing::{debug, trace, warn};
38
39
/// Configuration for the directory cache
40
#[derive(Debug, Clone)]
41
pub struct DirectoryCacheConfig {
42
    /// Maximum number of cached directories
43
    pub max_entries: usize,
44
    /// Maximum total size in bytes (0 = unlimited)
45
    pub max_size_bytes: u64,
46
    /// Base directory for cache storage
47
    pub cache_root: PathBuf,
48
}
49
50
impl Default for DirectoryCacheConfig {
51
6
    fn default() -> Self {
52
6
        Self {
53
6
            max_entries: 1000,
54
6
            max_size_bytes: 10 * 1024 * 1024 * 1024, // 10 GB
55
6
            cache_root: std::env::temp_dir().join("nativelink_directory_cache"),
56
6
        }
57
6
    }
58
}
59
60
/// Metadata for a cached directory
61
#[derive(Debug, Clone)]
62
struct CachedDirectoryMetadata {
63
    /// Path to the cached directory
64
    path: PathBuf,
65
    /// Size in bytes
66
    size: u64,
67
    /// Last access time for LRU eviction
68
    last_access: SystemTime,
69
    /// Reference count (number of active users)
70
    ref_count: usize,
71
}
72
73
/// High-performance directory cache that uses hardlinks to avoid repeated
74
/// directory reconstruction from the CAS.
75
///
76
/// When actions need input directories, instead of fetching and reconstructing
77
/// files from the CAS each time, we:
78
/// 1. Check if we've already constructed this exact directory (by digest)
79
/// 2. If yes, hardlink the entire tree to the action's workspace
80
/// 3. If no, construct it once and cache for future use
81
///
82
/// This dramatically reduces I/O and improves action startup time.
83
#[derive(Debug)]
84
pub struct DirectoryCache {
85
    /// Configuration
86
    config: DirectoryCacheConfig,
87
    /// Cache mapping digest -> metadata
88
    cache: Arc<RwLock<HashMap<DigestInfo, CachedDirectoryMetadata>>>,
89
    /// Lock for cache construction to prevent stampedes
90
    construction_locks: Arc<Mutex<HashMap<DigestInfo, Arc<Mutex<()>>>>>,
91
    /// CAS store for fetching directory protos and (fallback) file content.
92
    cas_store: Arc<FastSlowStore>,
93
    /// The `FastSlowStore`'s fast tier, if it is a `FilesystemStore`. When
94
    /// present, CAS blobs can be hardlinked directly into the cache entry
95
    /// (zero-copy) instead of fetched into RAM and rewritten. When absent
96
    /// (e.g. an unusual store layout) the cache falls back to fetch+write.
97
    filesystem_store: Option<Arc<FilesystemStore>>,
98
    /// Count of materializations that used APFS `clonefile(2)` (macOS only;
99
    /// always zero on other platforms).
100
    clonefile_hits: AtomicU64,
101
    /// Count of materializations that used per-file `fs::hard_link`.
102
    hardlink_hits: AtomicU64,
103
}
104
105
impl DirectoryCache {
106
    /// Creates a new `DirectoryCache`.
107
    ///
108
    /// `cas_store` is the worker's `FastSlowStore`. Its fast tier is expected
109
    /// to be a `FilesystemStore`; when it is, `construct_directory` hardlinks
110
    /// CAS blobs directly into the cache entry instead of copying them.
111
15
    pub async fn new(
112
15
        config: DirectoryCacheConfig,
113
15
        cas_store: Arc<FastSlowStore>,
114
15
    ) -> Result<Self, Error> {
115
        // Ensure cache root exists
116
15
        fs::create_dir_all(&config.cache_root).await.err_tip(|| 
{0
117
0
            format!(
118
                "Failed to create cache root: {}",
119
0
                config.cache_root.display()
120
            )
121
0
        })?;
122
123
        // Mirror RunningActionsManagerImpl: the fast tier is normally a
124
        // FilesystemStore. If the downcast fails the cache still works — it
125
        // just falls back to the fetch+write path for every file.
126
15
        let filesystem_store = cas_store
127
15
            .fast_store()
128
15
            .downcast_ref::<FilesystemStore>(None)
129
15
            .and_then(FilesystemStore::get_arc);
130
15
        if filesystem_store.is_none() {
131
0
            warn!(
132
                "DirectoryCache fast store is not a FilesystemStore; \
133
                 CAS blobs will be copied instead of hardlinked"
134
            );
135
15
        }
136
137
15
        Ok(Self {
138
15
            config,
139
15
            cache: Arc::new(RwLock::new(HashMap::new())),
140
15
            construction_locks: Arc::new(Mutex::new(HashMap::new())),
141
15
            cas_store,
142
15
            filesystem_store,
143
15
            clonefile_hits: AtomicU64::new(0),
144
15
            hardlink_hits: AtomicU64::new(0),
145
15
        })
146
15
    }
147
148
    /// Records which kernel mechanism materialized a tree, for observability.
149
32
    fn record_clone_method(&self, method: CloneMethod) {
150
32
        let counter = match method {
151
0
            CloneMethod::Clonefile => &self.clonefile_hits,
152
32
            CloneMethod::Hardlink => &self.hardlink_hits,
153
        };
154
32
        counter.fetch_add(1, Ordering::Relaxed);
155
32
    }
156
157
    /// Gets or creates a directory in the cache, then hardlinks it to the destination
158
    ///
159
    /// # Arguments
160
    /// * `digest` - Digest of the root Directory proto
161
    /// * `dest_path` - Where to hardlink/create the directory
162
    ///
163
    /// # Returns
164
    /// * `Ok(true)` - Cache hit (directory was hardlinked)
165
    /// * `Ok(false)` - Cache miss (directory was constructed)
166
    /// * `Err` - Error during construction or hardlinking
167
33
    pub async fn get_or_create(&self, digest: DigestInfo, dest_path: &Path) -> Result<bool, Error> {
168
        // Fast path: check if already in cache.
169
        //
170
        // The cache write lock is held only long enough to bump `ref_count`
171
        // and snapshot the entry's path — NOT across the syscall-heavy
172
        // `hardlink_directory_tree`. A global lock held across that
173
        // materialization serializes every concurrent `get_or_create`. The
174
        // `ref_count` bump is what makes releasing the lock safe: eviction
175
        // skips any entry with `ref_count > 0`, so the cache path cannot be
176
        // deleted out from under the unlocked materialization.
177
33
        if let Some(
cache_path2
) = self.acquire_entry(&digest).await {
178
2
            debug!(?digest, ?cache_path, "Directory cache HIT");
179
2
            let result = hardlink_directory_tree(&cache_path, dest_path).await;
180
2
            self.release_entry(&digest).await;
181
2
            match result {
182
2
                Ok(method) => {
183
2
                    self.record_clone_method(method);
184
2
                    return Ok(true);
185
                }
186
0
                Err(e) => {
187
0
                    warn!(
188
                        ?digest,
189
                        error = ?e,
190
                        "Failed to hardlink from cache, will reconstruct"
191
                    );
192
                    // Fall through to reconstruction.
193
                }
194
            }
195
31
        }
196
197
31
        debug!(?digest, "Directory cache MISS");
198
199
        // Single-flight: only one task constructs a given digest at a time.
200
        // Concurrent callers for the same digest block on this per-digest
201
        // mutex; when the constructor finishes and inserts the entry, the
202
        // waiters wake, find it in the cache, and materialize their own
203
        // destination from the single shared cache entry.
204
31
        let construction_lock = {
205
31
            let mut locks = self.construction_locks.lock().await;
206
31
            locks
207
31
                .entry(digest)
208
31
                .or_insert_with(|| 
Arc::new16
(
Mutex::new16
(
()16
)))
209
31
                .clone()
210
        };
211
31
        let _guard = construction_lock.lock().await;
212
213
        // Run the construction/materialization under the per-digest guard,
214
        // then drop the per-digest mutex from the stampede map regardless of
215
        // outcome so it cannot grow unbounded. The guard (`_guard`) is still
216
        // held until the end of this function — `forget_construction_lock`
217
        // only unmaps the Arc; any waiter already cloned it before blocking.
218
31
        let result = self.construct_and_materialize(digest, dest_path).await;
219
31
        self.forget_construction_lock(&digest).await;
220
31
        result
221
33
    }
222
223
    /// The cache-miss body, run while holding the per-digest construction
224
    /// guard. Split out so `get_or_create` can unconditionally clean up the
225
    /// construction-lock map entry afterwards on every exit path.
226
31
    async fn construct_and_materialize(
227
31
        &self,
228
31
        digest: DigestInfo,
229
31
        dest_path: &Path,
230
31
    ) -> Result<bool, Error> {
231
        // Re-check: another task may have just constructed this digest while
232
        // we waited on the construction lock.
233
31
        if let Some(
cache_path15
) = self.acquire_entry(&digest).await {
234
15
            let result = hardlink_directory_tree(&cache_path, dest_path).await;
235
15
            self.release_entry(&digest).await;
236
15
            match result {
237
15
                Ok(method) => {
238
15
                    self.record_clone_method(method);
239
15
                    return Ok(true);
240
                }
241
0
                Err(e) => {
242
0
                    warn!(
243
                        ?digest,
244
                        error = ?e,
245
                        "Failed to hardlink after construction"
246
                    );
247
                    // Construct directly at dest_path as a last resort.
248
0
                    self.construct_directory(digest, dest_path).await?;
249
0
                    return Ok(false);
250
                }
251
            }
252
16
        }
253
254
        // Construct the directory in cache. `construct_directory` returns the
255
        // total tree size accumulated from `FileNode.digest.size_bytes` as it
256
        // builds — no post-hoc filesystem walk is needed. It also sets every
257
        // cache-entry directory's mode at creation time (0o755), so no
258
        // separate permission-fixup walk is needed either.
259
        //
260
        // The cache entry's *files* are deliberately never chmod'd here:
261
        // non-executable files are hardlinks to FilesystemStore CAS blobs (see
262
        // `create_file`), and chmoding such a file mutates the inode shared
263
        // with the CAS and every other in-flight action that hardlinked the
264
        // same blob — the inode-corruption bug PR #2347 fixed.
265
16
        let cache_path = self.get_cache_path(&digest);
266
16
        let 
size15
= self.construct_directory(digest, &cache_path).await
?1
;
267
268
        // Insert into the cache. Only the in-memory map mutation runs under
269
        // the write lock: `evict_if_needed` selects victims and removes them
270
        // from the map here, but their filesystem deletion is dispatched off
271
        // the lock so eviction I/O never serializes other callers.
272
        //
273
        // The new entry is inserted with `ref_count: 1` — pinned. The
274
        // hardlink-to-destination below runs unlocked, and a concurrent
275
        // `get_or_create` for an unrelated digest could otherwise pick this
276
        // brand-new, last-accessed-now entry as an eviction victim and delete
277
        // its tree mid-hardlink. The pin blocks that; `release_entry` drops it
278
        // to 0 once the hardlink is done.
279
15
        let evicted = {
280
15
            let mut cache = self.cache.write().await;
281
15
            let evicted = self.evict_if_needed(size, &mut cache);
282
15
            cache.insert(
283
15
                digest,
284
15
                CachedDirectoryMetadata {
285
15
                    path: cache_path.clone(),
286
15
                    size,
287
15
                    last_access: SystemTime::now(),
288
15
                    ref_count: 1,
289
15
                },
290
15
            );
291
15
            evicted
292
        };
293
15
        Self::dispatch_evictions(evicted);
294
295
        // Hardlink to destination (unlocked). The entry is pinned
296
        // (`ref_count == 1`) so it cannot be evicted from under this hardlink.
297
15
        let result = hardlink_directory_tree(&cache_path, dest_path).await;
298
15
        self.release_entry(&digest).await;
299
15
        let method = result.err_tip(|| "Failed to hardlink newly cached directory")
?0
;
300
15
        self.record_clone_method(method);
301
302
15
        Ok(false)
303
31
    }
304
305
    /// If `digest` is cached, bumps its `ref_count` (pinning it against
306
    /// eviction) and returns a snapshot of its on-disk path. The cache write
307
    /// lock is released before returning, so the caller can perform unlocked
308
    /// I/O against the returned path. Every successful `acquire_entry` MUST be
309
    /// balanced by exactly one `release_entry`.
310
64
    async fn acquire_entry(&self, digest: &DigestInfo) -> Option<PathBuf> {
311
64
        let mut cache = self.cache.write().await;
312
64
        let 
metadata17
= cache.get_mut(digest)
?47
;
313
17
        metadata.last_access = SystemTime::now();
314
17
        metadata.ref_count += 1;
315
17
        Some(metadata.path.clone())
316
64
    }
317
318
    /// Releases a pin taken by [`Self::acquire_entry`]. Tolerates the entry
319
    /// having been removed from the map in the interim (it cannot have been,
320
    /// because a non-zero `ref_count` blocks eviction, but be defensive).
321
32
    async fn release_entry(&self, digest: &DigestInfo) {
322
32
        let mut cache = self.cache.write().await;
323
32
        if let Some(metadata) = cache.get_mut(digest) {
324
32
            metadata.ref_count = metadata.ref_count.saturating_sub(1);
325
32
        
}0
326
32
    }
327
328
    /// Drops the per-digest construction mutex from the stampede map once
329
    /// construction (or the post-construction recheck) for `digest` is done.
330
    /// Without this the map grows unbounded over the worker's lifetime.
331
    ///
332
    /// Safe to call while holding the construction guard: a concurrent waiter
333
    /// already cloned the `Arc<Mutex>` before blocking, so removing the map
334
    /// entry only prevents *future* callers from joining this exact mutex —
335
    /// they will create a fresh one, re-check the cache, find the entry, and
336
    /// take the fast hardlink path. It never causes a redundant construct.
337
31
    async fn forget_construction_lock(&self, digest: &DigestInfo) {
338
31
        self.construction_locks.lock().await.remove(digest);
339
31
    }
340
341
    /// Constructs a directory from the CAS at the given path and returns the
342
    /// total size of the materialized tree in bytes.
343
    ///
344
    /// The size is accumulated from `FileNode.digest.size_bytes` in the
345
    /// `Directory` protos as the tree is built, rather than walking the
346
    /// filesystem afterwards with `fs::metadata` per file. Symlinks contribute
347
    /// nothing — a symlink's own inode is negligible and following it could
348
    /// double-count a file already counted via its `FileNode`.
349
    ///
350
    /// Each directory's final mode (0o755) is set at creation time, so no
351
    /// separate recursive permission pass is needed after construction.
352
21
    fn construct_directory<'a>(
353
21
        &'a self,
354
21
        digest: DigestInfo,
355
21
        dest_path: &'a Path,
356
21
    ) -> Pin<Box<dyn Future<Output = Result<u64, Error>> + Send + 'a>> {
357
21
        Box::pin(async move {
358
21
            debug!(?digest, ?dest_path, "Constructing directory");
359
360
            // Fetch the Directory proto
361
20
            let directory: ProtoDirectory =
362
21
                get_and_decode_digest(self.cas_store.as_ref(), digest.into())
363
21
                    .await
364
21
                    .err_tip(|| 
format!1
("Failed to fetch directory digest: {digest:?}"))
?1
;
365
366
            // Create the destination directory. It must be writable while it
367
            // is being populated; 0o755 is its final mode too, so set it now
368
            // (umask-independent) — no post-construction permission walk.
369
20
            self.create_dir_writable(dest_path).await
?0
;
370
371
20
            let mut total_size: u64 = 0;
372
373
            // Process files
374
20
            for 
file14
in &directory.files {
375
14
                self.create_file(dest_path, file).await
?0
;
376
14
                if let Some(file_digest) = &file.digest {
377
14
                    // size_bytes is non-negative; clamp defensively.
378
14
                    total_size += u64::try_from(file_digest.size_bytes).unwrap_or(0);
379
14
                
}0
380
            }
381
382
            // Process subdirectories recursively
383
20
            for 
dir_node5
in &directory.directories {
384
5
                total_size += self.create_subdirectory(dest_path, dir_node).await
?0
;
385
            }
386
387
            // Process symlinks
388
20
            for 
symlink2
in &directory.symlinks {
389
2
                self.create_symlink(dest_path, symlink).await
?0
;
390
            }
391
392
20
            Ok(total_size)
393
21
        })
394
21
    }
395
396
    /// Creates `dir` (and any missing parents) and sets its mode to 0o755 so
397
    /// that it is writable while the cache entry is being populated and stays
398
    /// at a stable, umask-independent final mode afterwards.
399
20
    async fn create_dir_writable(&self, dir: &Path) -> Result<(), Error> {
400
20
        fs::create_dir_all(dir)
401
20
            .await
402
20
            .err_tip(|| 
format!0
("Failed to create directory: {}",
dir0
.
display0
()))
?0
;
403
        #[cfg(unix)]
404
        {
405
            use std::os::unix::fs::PermissionsExt;
406
20
            fs::set_permissions(dir, std::fs::Permissions::from_mode(0o755))
407
20
                .await
408
20
                .err_tip(|| 
format!0
("Failed to set directory mode: {}",
dir0
.
display0
()))
?0
;
409
        }
410
20
        Ok(())
411
20
    }
412
413
    /// Creates a file from a `FileNode` inside a cache entry.
414
    ///
415
    /// The fast path hardlinks the `FilesystemStore` CAS blob directly into the
416
    /// cache entry — zero-copy, metadata-only — exactly like
417
    /// `download_to_directory`. A hardlinked file shares its inode with the CAS
418
    /// store (and every other action that hardlinked the same blob), so it MUST
419
    /// NOT be chmod'd: doing so is the inode-corruption bug PR #2347 fixed.
420
    ///
421
    /// This imposes two correctness rules, both handled here:
422
    ///  * Executable files (`FileNode.is_executable`) need the `+x` bit, which
423
    ///    cannot be applied to a shared CAS inode. They are given their own
424
    ///    private inode via fetch+write and then chmod'd — never hardlinked.
425
    ///  * If the blob is not locally hardlinkable (the fast tier is not a
426
    ///    `FilesystemStore`, or the blob is not present in it / was evicted),
427
    ///    fall back to fetch+write for that file rather than failing.
428
14
    async fn create_file(&self, parent: &Path, file_node: &FileNode) -> Result<(), Error> {
429
14
        let file_path = parent.join(&file_node.name);
430
14
        let digest = DigestInfo::try_from(
431
14
            file_node
432
14
                .digest
433
14
                .clone()
434
14
                .ok_or_else(|| 
make_err!0
(
Code::InvalidArgument0
, "File node missing digest"))
?0
,
435
        )
436
14
        .err_tip(|| "Invalid file digest")
?0
;
437
438
14
        trace!(?file_path, ?digest, "Creating file");
439
440
        // Zero-byte files (digest af1349b9...-0) are not stored in
441
        // FilesystemStore / many CAS backends, so fetching here returns
442
        // NotFound. In Bazel-style trees these show up frequently as empty
443
        // marker / config files (.linksearchpaths, empty .env, .toml, etc.),
444
        // and a single failure aborts the whole DirectoryCache construction.
445
        // Short-circuit and write the empty file directly.
446
14
        if is_zero_digest(digest) {
447
1
            fs::write(&file_path, b"")
448
1
                .await
449
1
                .err_tip(|| 
format!0
("Failed to write empty file: {}",
file_path.display()0
))
?0
;
450
1
            return Ok(());
451
13
        }
452
453
        // Executable files need their own inode to carry the +x bit without
454
        // mutating the shared CAS blob — copy, never hardlink.
455
13
        if file_node.is_executable {
456
1
            return self.copy_file_to(&digest, &file_path, true).await;
457
12
        }
458
459
        // Non-executable file: try to hardlink the CAS blob directly.
460
12
        if let Some(filesystem_store) = &self.filesystem_store {
461
12
            match self
462
12
                .hardlink_cas_blob(filesystem_store, &digest, &file_path)
463
12
                .await
464
            {
465
12
                Ok(()) => return Ok(()),
466
0
                Err(e) if e.code == Code::NotFound => {
467
                    // The blob is not in the filesystem tier (e.g. it lives
468
                    // only in the slow store, or was evicted). Fall through
469
                    // to fetch+write rather than failing the whole build.
470
0
                    trace!(
471
                        ?digest,
472
                        ?file_path,
473
                        "CAS blob not locally hardlinkable, copying instead"
474
                    );
475
                }
476
0
                Err(e) => return Err(e),
477
            }
478
0
        }
479
480
        // Fallback: fetch the blob and write a private copy. Non-executable,
481
        // but still made read-only (0o444) by copy_file_to so the materialized
482
        // input is immutable like the hardlink path.
483
0
        self.copy_file_to(&digest, &file_path, false).await
484
14
    }
485
486
    /// Hardlinks the `FilesystemStore` CAS blob for `digest` into `file_path`.
487
    /// Mirrors `download_to_directory`: populate the fast store, resolve the
488
    /// blob's on-disk path under the entry lock, then `fs::hard_link`.
489
    ///
490
    /// Returns a `NotFound` error if the blob is not present in the filesystem
491
    /// tier; callers fall back to fetch+write in that case.
492
12
    async fn hardlink_cas_blob(
493
12
        &self,
494
12
        filesystem_store: &FilesystemStore,
495
12
        digest: &DigestInfo,
496
12
        file_path: &Path,
497
12
    ) -> Result<(), Error> {
498
        // Ensure the blob is in the fast (filesystem) tier so it has an
499
        // on-disk file we can hardlink.
500
12
        self.cas_store
501
12
            .populate_fast_store(StoreKey::Digest(*digest))
502
12
            .await
503
12
            .err_tip(|| 
format!0
("Failed to populate fast store for {digest}"))
?0
;
504
505
12
        let file_entry = filesystem_store
506
12
            .get_file_entry_for_digest(digest)
507
12
            .await
508
12
            .err_tip(|| "Resolving CAS file entry for hardlink")
?0
;
509
510
12
        let file_path = file_path.to_path_buf();
511
12
        file_entry
512
12
            .get_file_path_locked(move |src| async move {
513
12
                fs::hard_link(&src, &file_path).await.err_tip(|| 
{0
514
0
                    format!(
515
                        "Failed to hardlink CAS blob into cache entry: {}",
516
0
                        file_path.display()
517
                    )
518
0
                })
519
24
            })
520
12
            .await
521
12
    }
522
523
    /// Fetches the blob for `digest` from the CAS and writes a private copy at
524
    /// `file_path`, then chmods it read-only (`0o555` when `executable`, else
525
    /// `0o444`). This is safe because the copy has its own inode, unshared with
526
    /// the CAS, so the chmod cannot mutate a shared blob.
527
1
    async fn copy_file_to(
528
1
        &self,
529
1
        digest: &DigestInfo,
530
1
        file_path: &Path,
531
1
        executable: bool,
532
1
    ) -> Result<(), Error> {
533
1
        let data = self
534
1
            .cas_store
535
1
            .get_part_unchunked(StoreKey::Digest(*digest), 0, None)
536
1
            .await
537
1
            .err_tip(|| 
format!0
("Failed to fetch file: {}",
file_path0
.
display0
()))
?0
;
538
539
1
        fs::write(file_path, data.as_ref())
540
1
            .await
541
1
            .err_tip(|| 
format!0
("Failed to write file: {}",
file_path0
.
display0
()))
?0
;
542
543
        #[cfg(unix)]
544
        {
545
            use std::os::unix::fs::PermissionsExt;
546
            // Read-only, matching the hermeticity contract for materialized
547
            // inputs: 0o555 (r-xr-xr-x) for executables so the +x bit survives,
548
            // 0o444 (r--r--r--) for data files. This file has its own private
549
            // inode (just written above), so chmoding it cannot affect any CAS
550
            // blob or another action's hardlink — unlike the hardlink path,
551
            // where the shared read-only blob must never be chmod'd.
552
1
            let mode = if executable { 0o555 } else { 
0o4440
};
553
1
            fs::set_permissions(file_path, std::fs::Permissions::from_mode(mode))
554
1
                .await
555
1
                .err_tip(|| 
format!0
("Failed to set permissions: {}",
file_path0
.
display0
()))
?0
;
556
        }
557
        #[cfg(not(unix))]
558
        let _ = executable;
559
560
1
        Ok(())
561
1
    }
562
563
    /// Creates a subdirectory from a `DirectoryNode`, returning the total size
564
    /// of the subtree it materializes.
565
5
    async fn create_subdirectory(
566
5
        &self,
567
5
        parent: &Path,
568
5
        dir_node: &DirectoryNode,
569
5
    ) -> Result<u64, Error> {
570
5
        let dir_path = parent.join(&dir_node.name);
571
5
        let digest =
572
5
            DigestInfo::try_from(dir_node.digest.clone().ok_or_else(|| 
{0
573
0
                make_err!(Code::InvalidArgument, "Directory node missing digest")
574
0
            })?)
575
5
            .err_tip(|| "Invalid directory digest")
?0
;
576
577
5
        trace!(?dir_path, ?digest, "Creating subdirectory");
578
579
        // Recursively construct subdirectory
580
5
        self.construct_directory(digest, &dir_path).await
581
5
    }
582
583
    /// Creates a symlink from a `SymlinkNode`
584
2
    async fn create_symlink(&self, parent: &Path, symlink: &SymlinkNode) -> Result<(), Error> {
585
2
        let link_path = parent.join(&symlink.name);
586
2
        let target = Path::new(&symlink.target);
587
588
2
        trace!(?link_path, ?target, "Creating symlink");
589
590
        #[cfg(unix)]
591
2
        fs::symlink(&target, &link_path)
592
2
            .await
593
2
            .err_tip(|| 
format!0
("Failed to create symlink: {}",
link_path.display()0
))
?0
;
594
595
        #[cfg(windows)]
596
        {
597
            // On Windows, we need to know if target is a directory
598
            // For now, assume files (can be improved later)
599
            fs::symlink_file(&target, &link_path)
600
                .await
601
                .err_tip(|| format!("Failed to create symlink: {}", link_path.display()))?;
602
        }
603
604
2
        Ok(())
605
2
    }
606
607
    /// Selects and removes victim entries from the in-memory `cache` map until
608
    /// it is within the entry-count and size budgets, and returns the on-disk
609
    /// paths of the removed entries.
610
    ///
611
    /// This is a pure in-memory operation — it does NO filesystem I/O and is
612
    /// not `async`. The caller runs it under the cache write lock and then,
613
    /// after releasing the lock, dispatches the returned paths for deletion
614
    /// via [`Self::dispatch_evictions`]. Keeping eviction's `remove_dir_all`
615
    /// off the write lock prevents one caller's eviction I/O from serializing
616
    /// every other concurrent `get_or_create`.
617
15
    fn evict_if_needed(
618
15
        &self,
619
15
        incoming_size: u64,
620
15
        cache: &mut HashMap<DigestInfo, CachedDirectoryMetadata>,
621
15
    ) -> Vec<PathBuf> {
622
15
        let mut evicted_paths = Vec::new();
623
624
        // Check entry count
625
17
        while cache.len() >= self.config.max_entries {
626
3
            let Some((
_size2
,
path2
)) = Self::evict_lru(cache) else {
627
                // nothing evictable (all entries pinned) — have to exit
628
1
                warn!(
629
1
                    current_items = cache.len(),
630
                    max_entries = self.config.max_entries,
631
                    "Unable to evict anything from directory_cache, will exceed max entries"
632
                );
633
1
                break;
634
            };
635
2
            evicted_paths.push(path);
636
        }
637
638
        // Check total size
639
15
        if self.config.max_size_bytes > 0 {
640
15
            let current_size: u64 = cache.values().map(|m| m.size).sum();
641
15
            let mut size_after = current_size + incoming_size;
642
643
15
            while size_after > self.config.max_size_bytes {
644
0
                let Some((e_size, path)) = Self::evict_lru(cache) else {
645
                    // nothing evictable (all entries pinned) — have to exit
646
0
                    warn!(
647
                        size_after,
648
                        max_size_bytes = self.config.max_size_bytes,
649
                        "Unable to evict anything from directory_cache, will exceed max size"
650
                    );
651
0
                    break;
652
                };
653
0
                size_after -= e_size;
654
0
                evicted_paths.push(path);
655
            }
656
0
        }
657
658
15
        evicted_paths
659
15
    }
660
661
    /// Removes the least-recently-used unpinned entry from the in-memory map
662
    /// and returns its `(size, path)`. Entries with `ref_count > 0` are
663
    /// in-flight materializations and are never selected — their on-disk tree
664
    /// must not be deleted while a caller is hardlinking from it.
665
    ///
666
    /// Pure in-memory; the actual filesystem deletion is the caller's job.
667
3
    fn evict_lru(
668
3
        cache: &mut HashMap<DigestInfo, CachedDirectoryMetadata>,
669
3
    ) -> Option<(u64, PathBuf)> {
670
3
        let 
to_evict2
= cache
671
3
            .iter()
672
3
            .filter(|(_, m)| 
m.ref_count2
== 0)
673
3
            .min_by_key(|(_, m)| m.last_access)
674
3
            .map(|(digest, _)| *digest)
?1
;
675
2
        let metadata = cache.remove(&to_evict)
?0
;
676
2
        debug!(
677
            digest = ?to_evict,
678
            size = metadata.size,
679
            "Evicting cached directory"
680
        );
681
2
        Some((metadata.size, metadata.path))
682
3
    }
683
684
    /// Dispatches filesystem deletion of evicted cache-entry trees onto a
685
    /// background task, so eviction I/O never runs under the cache write lock.
686
    ///
687
    /// Each tree's directories are chmod'd writable first
688
    /// (`set_dir_writable_recursive`) — never its files: a cache-entry file
689
    /// shares an inode with the `FilesystemStore` CAS blob and every action
690
    /// that hardlinked it, so chmoding it would corrupt that shared inode (the
691
    /// PR #2347 bug). Directory write permission alone is sufficient to unlink
692
    /// files on unix.
693
15
    fn dispatch_evictions(paths: Vec<PathBuf>) {
694
15
        if paths.is_empty() {
695
13
            return;
696
2
        }
697
2
        background_spawn!("directory_cache_evict", async move {
698
2
            for path in paths {
699
2
                if !path.exists() {
700
                    // Already gone (e.g. a prior cleanup, or the cache root
701
                    // was torn down). Nothing to do, and not an error.
702
0
                    continue;
703
2
                }
704
2
                if let Err(
e0
) = set_dir_writable_recursive(&path).await {
705
0
                    warn!(
706
                        ?path,
707
                        error = ?e,
708
                        "Unable to mark evicted directory writable, removal may fail"
709
                    );
710
1
                }
711
1
                if let Err(
e0
) = fs::remove_dir_all(&path).await {
712
0
                    warn!(
713
                        ?path,
714
                        error = ?e,
715
                        "Failed to remove evicted directory from disk"
716
                    );
717
0
                }
718
            }
719
0
        });
720
15
    }
721
722
    /// Gets the cache path for a digest
723
19
    fn get_cache_path(&self, digest: &DigestInfo) -> PathBuf {
724
19
        self.config.cache_root.join(format!("{digest}"))
725
19
    }
726
727
    /// Returns cache statistics
728
3
    pub async fn stats(&self) -> CacheStats {
729
3
        let cache = self.cache.read().await;
730
3
        let total_size: u64 = cache.values().map(|m| m.size).sum();
731
3
        let in_use = cache.values().filter(|m| m.ref_count > 0).count();
732
733
3
        CacheStats {
734
3
            entries: cache.len(),
735
3
            total_size_bytes: total_size,
736
3
            in_use_entries: in_use,
737
3
            clonefile_hits: self.clonefile_hits.load(Ordering::Relaxed),
738
3
            hardlink_hits: self.hardlink_hits.load(Ordering::Relaxed),
739
3
        }
740
3
    }
741
}
742
743
/// Statistics about the directory cache
744
#[derive(Debug, Clone, Copy)]
745
pub struct CacheStats {
746
    pub entries: usize,
747
    pub total_size_bytes: u64,
748
    pub in_use_entries: usize,
749
    /// Materializations that used APFS `clonefile(2)` (macOS).
750
    pub clonefile_hits: u64,
751
    /// Materializations that used per-file `fs::hard_link`.
752
    pub hardlink_hits: u64,
753
}
754
755
#[cfg(test)]
756
mod tests {
757
    use nativelink_config::stores::{
758
        FastSlowSpec, FilesystemSpec, MemorySpec, StoreDirection, StoreSpec,
759
    };
760
    use nativelink_macro::nativelink_test;
761
    use nativelink_store::memory_store::MemoryStore;
762
    use nativelink_util::store_trait::Store;
763
    use prost::Message;
764
    use tempfile::TempDir;
765
766
    use super::*;
767
768
    /// Builds a `FastSlowStore` whose fast tier is a real `FilesystemStore`
769
    /// and whose slow tier is a `MemoryStore` — the same shape the worker
770
    /// wires up. Returns the `FastSlowStore` plus the slow `Store` handle so
771
    /// tests can seed blobs/protos into the slow tier.
772
9
    async fn make_fast_slow_store(temp_dir: &TempDir) -> (Arc<FastSlowStore>, Store) {
773
9
        let fast_spec = FilesystemSpec {
774
9
            content_path: temp_dir
775
9
                .path()
776
9
                .join("cas_content")
777
9
                .to_string_lossy()
778
9
                .into_owned(),
779
9
            temp_path: temp_dir
780
9
                .path()
781
9
                .join("cas_temp")
782
9
                .to_string_lossy()
783
9
                .into_owned(),
784
9
            eviction_policy: None,
785
9
            ..Default::default()
786
9
        };
787
9
        let slow_spec = MemorySpec::default();
788
9
        let fast_store: Arc<FilesystemStore> = FilesystemStore::new(&fast_spec).await.unwrap();
789
9
        let slow_store = MemoryStore::new(&slow_spec);
790
9
        let cas_store = FastSlowStore::new(
791
9
            &FastSlowSpec {
792
9
                fast: StoreSpec::Filesystem(fast_spec),
793
9
                slow: StoreSpec::Memory(slow_spec),
794
9
                fast_direction: StoreDirection::default(),
795
9
                slow_direction: StoreDirection::default(),
796
9
            },
797
9
            Store::new(fast_store),
798
9
            Store::new(slow_store.clone()),
799
        );
800
9
        (cas_store, Store::new(slow_store))
801
9
    }
802
803
    /// Uploads `content` to `store` under a digest derived from `tag`, returns
804
    /// the digest. `FastSlowStore`/`MemoryStore`/`FilesystemStore` do not
805
    /// verify content hashes, so a synthetic-but-unique digest is sufficient.
806
22
    async fn upload_blob(store: &Store, tag: u8, content: &[u8]) -> DigestInfo {
807
22
        let digest = DigestInfo::new([tag; 32], content.len() as u64);
808
22
        store
809
22
            .as_store_driver_pin()
810
22
            .update_oneshot(digest.into(), content.to_vec().into())
811
22
            .await
812
22
            .unwrap();
813
22
        digest
814
22
    }
815
816
    /// Seeds a one-file directory ("test.txt" = "Hello, World!") into the slow
817
    /// store and returns the `FastSlowStore` + the root directory digest.
818
4
    async fn setup_test_store(temp_dir: &TempDir) -> (Arc<FastSlowStore>, DigestInfo) {
819
4
        let (cas_store, slow_store) = make_fast_slow_store(temp_dir).await;
820
821
4
        let file_digest = upload_blob(&slow_store, 1, b"Hello, World!").await;
822
823
4
        let directory = ProtoDirectory {
824
4
            files: vec![FileNode {
825
4
                name: "test.txt".to_string(),
826
4
                digest: Some(file_digest.into()),
827
4
                is_executable: false,
828
4
                ..Default::default()
829
4
            }],
830
4
            directories: vec![],
831
4
            symlinks: vec![],
832
4
            ..Default::default()
833
4
        };
834
4
        let mut dir_data = Vec::new();
835
4
        directory.encode(&mut dir_data).unwrap();
836
4
        let dir_digest = upload_blob(&slow_store, 2, &dir_data).await;
837
838
4
        (cas_store, dir_digest)
839
4
    }
840
841
    #[nativelink_test]
842
    async fn test_directory_cache_basic() -> Result<(), Error> {
843
        let temp_dir = TempDir::new().unwrap();
844
        let cache_root = temp_dir.path().join("cache");
845
        let (store, dir_digest) = setup_test_store(&temp_dir).await;
846
847
        let config = DirectoryCacheConfig {
848
            max_entries: 10,
849
            max_size_bytes: 1024 * 1024,
850
            cache_root,
851
        };
852
853
        let cache = DirectoryCache::new(config, store).await?;
854
855
        // First access - cache miss
856
        let dest1 = temp_dir.path().join("dest1");
857
        let hit = cache.get_or_create(dir_digest, &dest1).await?;
858
        assert!(!hit, "First access should be cache miss");
859
        assert!(dest1.join("test.txt").exists());
860
        assert_eq!(
861
            fs::read(dest1.join("test.txt")).await.unwrap(),
862
            b"Hello, World!",
863
            "materialized file content must be byte-identical to the CAS blob"
864
        );
865
866
        // Second access - cache hit
867
        let dest2 = temp_dir.path().join("dest2");
868
        let hit = cache.get_or_create(dir_digest, &dest2).await?;
869
        assert!(hit, "Second access should be cache hit");
870
        assert!(dest2.join("test.txt").exists());
871
        assert_eq!(
872
            fs::read(dest2.join("test.txt")).await.unwrap(),
873
            b"Hello, World!",
874
            "cache-hit materialized content must be byte-identical"
875
        );
876
877
        // Verify stats
878
        let stats = cache.stats().await;
879
        assert_eq!(stats.entries, 1);
880
881
        // Two get_or_create calls succeeded → two materializations were
882
        // recorded. On macOS both should be clonefile; on Linux both hardlink.
883
        #[cfg(target_os = "macos")]
884
        {
885
            assert_eq!(stats.clonefile_hits, 2, "macOS should record 2 clones");
886
            assert_eq!(stats.hardlink_hits, 0);
887
        }
888
        #[cfg(not(target_os = "macos"))]
889
        {
890
            assert_eq!(stats.clonefile_hits, 0);
891
            assert_eq!(
892
                stats.hardlink_hits, 2,
893
                "non-macOS should record 2 hardlinks"
894
            );
895
        }
896
897
        Ok(())
898
    }
899
900
    /// A Directory containing a zero-byte file must be constructible even when
901
    /// the CAS has no entry for the zero-byte digest. In production CAS
902
    /// backends (`FilesystemStore` in particular) refuse to store zero-byte
903
    /// blobs, so without the short-circuit this is a `NotFound` error and 30%+
904
    /// of cache constructions fail (per PR #2243).
905
    #[nativelink_test]
906
    async fn test_directory_cache_zero_byte_file() -> Result<(), Error> {
907
        let temp_dir = TempDir::new().unwrap();
908
        let cache_root = temp_dir.path().join("cache");
909
        let (store, slow_store) = make_fast_slow_store(&temp_dir).await;
910
911
        // RFC 6234 / Bazel zero-byte SHA-256 digest, hash for b"".
912
        let zero_digest = DigestInfo::try_new(
913
            "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
914
            0,
915
        )
916
        .unwrap();
917
        // Deliberately do NOT upload the zero-byte blob — that's the whole
918
        // point: real CAS backends won't have it.
919
920
        let directory = ProtoDirectory {
921
            files: vec![FileNode {
922
                name: "empty.txt".to_string(),
923
                digest: Some(zero_digest.into()),
924
                is_executable: false,
925
                ..Default::default()
926
            }],
927
            directories: vec![],
928
            symlinks: vec![],
929
            ..Default::default()
930
        };
931
        let mut dir_data = Vec::new();
932
        directory.encode(&mut dir_data).unwrap();
933
        let dir_digest = upload_blob(&slow_store, 3, &dir_data).await;
934
935
        let config = DirectoryCacheConfig {
936
            max_entries: 10,
937
            max_size_bytes: 1024 * 1024,
938
            cache_root,
939
        };
940
        let cache = DirectoryCache::new(config, store).await?;
941
942
        let dest = temp_dir.path().join("dest_empty");
943
        let hit = cache.get_or_create(dir_digest, &dest).await?;
944
        assert!(!hit, "First construction should be a cache miss");
945
946
        let empty_path = dest.join("empty.txt");
947
        assert!(empty_path.exists(), "zero-byte file should be created");
948
        let metadata = fs::metadata(&empty_path).await.unwrap();
949
        assert_eq!(metadata.len(), 0, "zero-byte file must be 0 bytes");
950
951
        Ok(())
952
    }
953
954
    /// Regression test for CAS inode corruption during directory cache
955
    /// eviction.
956
    ///
957
    /// Background: a cached directory's files share an inode (via hardlink)
958
    /// with both the `FilesystemStore` CAS entry and every action workspace
959
    /// that has consumed this cached directory. The cleanup path that runs
960
    /// before `fs::remove_dir_all` used to call `set_readwrite_recursive`,
961
    /// which chmods every file in the tree to 0o644 — silently mutating the
962
    /// shared inode's mode for every in-flight action holding a hardlink to
963
    /// the same blob. In production this surfaced as EACCES on exec for
964
    /// `cc_wrapper.sh` (whose CAS mode is 0o555, but eviction turned it into
965
    /// 0o644, dropping the +x bit).
966
    ///
967
    /// This test models the real scenario: a "cached" directory tree whose
968
    /// files are hardlinked to a still-active "action workspace" file. The
969
    /// eviction cleanup runs on the cached tree, and we assert the active
970
    /// workspace file's mode is untouched. Before the fix this test fails
971
    /// because `set_readwrite_recursive` chmods the cached-side file, and
972
    /// the same inode underlies the workspace file.
973
    #[cfg(unix)]
974
    #[nativelink_test]
975
    async fn test_eviction_cleanup_preserves_hardlinked_file_mode() -> Result<(), Error> {
976
        use std::os::unix::fs::{MetadataExt, PermissionsExt};
977
978
        use nativelink_util::fs_util::{set_dir_writable_recursive, set_readonly_recursive};
979
980
        let temp_dir = TempDir::new().unwrap();
981
982
        // Build a "cached" directory tree mimicking what DirectoryCache
983
        // produces: a top-level dir with a nested subdir and an executable
984
        // file inside. Mode 0o555 matches the CAS mode of an executable like
985
        // `cc_wrapper.sh`.
986
        let cache_entry_dir = temp_dir.path().join("cache_entry");
987
        let nested_dir = cache_entry_dir.join("nested");
988
        fs::create_dir_all(&nested_dir).await.unwrap();
989
        let cached_file = nested_dir.join("cc_wrapper.sh");
990
        fs::write(&cached_file, b"#!/bin/sh\necho hi\n")
991
            .await
992
            .unwrap();
993
        fs::set_permissions(&cached_file, std::fs::Permissions::from_mode(0o555))
994
            .await
995
            .unwrap();
996
997
        // Lock the cached tree down the way DirectoryCache does after
998
        // construction (set_readonly_recursive). This makes every file
999
        // read-only (0o555) and leaves every directory writable (0o755).
1000
        set_readonly_recursive(&cache_entry_dir).await?;
1001
1002
        // Simulate an in-flight action workspace that has hardlinked the
1003
        // cached file. This is what `hardlink_directory_tree` does in
1004
        // `get_or_create` after the cached tree is built and locked down.
1005
        let action_workspace = temp_dir.path().join("action_workspace");
1006
        fs::create_dir_all(&action_workspace).await.unwrap();
1007
        let workspace_file = action_workspace.join("cc_wrapper.sh");
1008
        fs::hard_link(&cached_file, &workspace_file).await.unwrap();
1009
1010
        // Sanity check: cached file and workspace file share the same inode.
1011
        let cached_ino = fs::metadata(&cached_file).await.unwrap().ino();
1012
        let workspace_ino = fs::metadata(&workspace_file).await.unwrap().ino();
1013
        assert_eq!(
1014
            cached_ino, workspace_ino,
1015
            "workspace file must share inode with cached file (hardlinked)",
1016
        );
1017
        let workspace_mode_before = fs::metadata(&workspace_file)
1018
            .await
1019
            .unwrap()
1020
            .permissions()
1021
            .mode()
1022
            & 0o777;
1023
        assert_eq!(workspace_mode_before, 0o555);
1024
1025
        // Run the cleanup that `evict_lru` runs before removing the tree.
1026
        // After the fix this only chmods directories; before the fix this
1027
        // chmoded every file to 0o644, mutating the shared inode.
1028
        set_dir_writable_recursive(&cache_entry_dir).await?;
1029
1030
        // Critical assertion: the action workspace file's mode (i.e. the
1031
        // shared inode's mode) MUST be unchanged. If this fails, the cleanup
1032
        // path corrupted the inode for an in-flight action — that is the
1033
        // bug we are guarding against.
1034
        let workspace_mode_after = fs::metadata(&workspace_file)
1035
            .await
1036
            .unwrap()
1037
            .permissions()
1038
            .mode()
1039
            & 0o777;
1040
        assert_eq!(
1041
            workspace_mode_after, workspace_mode_before,
1042
            "eviction cleanup mutated the inode mode of an active workspace \
1043
             file (was 0o{workspace_mode_before:o}, now 0o{workspace_mode_after:o}); \
1044
             this is the CAS inode corruption bug",
1045
        );
1046
1047
        // We should still be able to remove the cached tree. This proves
1048
        // directory writability alone is sufficient to unlink files on unix.
1049
        fs::remove_dir_all(&cache_entry_dir).await.unwrap();
1050
        assert!(!cache_entry_dir.exists());
1051
1052
        // The workspace's file is still intact and the mode survives even
1053
        // after the cached tree is gone.
1054
        assert!(workspace_file.exists());
1055
        let workspace_mode_after_remove = fs::metadata(&workspace_file)
1056
            .await
1057
            .unwrap()
1058
            .permissions()
1059
            .mode()
1060
            & 0o777;
1061
        assert_eq!(workspace_mode_after_remove, workspace_mode_before);
1062
1063
        Ok(())
1064
    }
1065
1066
    /// Builds a nested directory tree in the CAS: a root directory containing
1067
    /// one file plus a subdirectory, and the subdirectory in turn containing a
1068
    /// file. Returns the `FastSlowStore` and the root directory's digest. Uses
1069
    /// the same `make_fast_slow_store` + `upload_blob` shape as the other tests
1070
    /// so the store matches `DirectoryCache::new`'s `Arc<FastSlowStore>` arg.
1071
    ///
1072
    /// Only used by `test_materialized_tree_dirs_writable_files_readonly`,
1073
    /// which is `#[cfg(unix)]`; gated to match so non-unix builds (Windows)
1074
    /// do not flag this helper as dead code.
1075
    #[cfg(unix)]
1076
1
    async fn setup_nested_test_store(temp_dir: &TempDir) -> (Arc<FastSlowStore>, DigestInfo) {
1077
1
        let (cas_store, slow_store) = make_fast_slow_store(temp_dir).await;
1078
1079
        // A file shared by both the root and the nested subdirectory.
1080
1
        let file_digest = upload_blob(&slow_store, 10, b"Hello, World!").await;
1081
1082
        // The nested subdirectory: contains a single file.
1083
1
        let subdir = ProtoDirectory {
1084
1
            files: vec![FileNode {
1085
1
                name: "nested.txt".to_string(),
1086
1
                digest: Some(file_digest.into()),
1087
1
                is_executable: false,
1088
1
                ..Default::default()
1089
1
            }],
1090
1
            directories: vec![],
1091
1
            symlinks: vec![],
1092
1
            ..Default::default()
1093
1
        };
1094
1
        let mut subdir_data = Vec::new();
1095
1
        subdir.encode(&mut subdir_data).unwrap();
1096
1
        let subdir_digest = upload_blob(&slow_store, 11, &subdir_data).await;
1097
1098
        // The root directory: one file plus the subdirectory above.
1099
1
        let root = ProtoDirectory {
1100
1
            files: vec![FileNode {
1101
1
                name: "root.txt".to_string(),
1102
1
                digest: Some(file_digest.into()),
1103
1
                is_executable: false,
1104
1
                ..Default::default()
1105
1
            }],
1106
1
            directories: vec![DirectoryNode {
1107
1
                name: "subdir".to_string(),
1108
1
                digest: Some(subdir_digest.into()),
1109
1
            }],
1110
1
            symlinks: vec![],
1111
1
            ..Default::default()
1112
1
        };
1113
1
        let mut root_data = Vec::new();
1114
1
        root.encode(&mut root_data).unwrap();
1115
1
        let root_digest = upload_blob(&slow_store, 12, &root_data).await;
1116
1117
1
        (cas_store, root_digest)
1118
1
    }
1119
1120
    /// Asserts every directory in `root` (the root itself and every nested
1121
    /// subdirectory) is writable and every file is read-only.
1122
    #[cfg(unix)]
1123
8
    fn assert_dirs_writable_files_readonly(
1124
8
        root: &Path,
1125
8
    ) -> Pin<Box<dyn Future<Output = Result<(), Error>> + Send + '_>> {
1126
8
        Box::pin(async move {
1127
            use std::os::unix::fs::PermissionsExt;
1128
1129
8
            let metadata = fs::symlink_metadata(root)
1130
8
                .await
1131
8
                .err_tip(|| 
format!0
("metadata for {}",
root0
.
display0
()))
?0
;
1132
8
            let mode = metadata.permissions().mode() & 0o777;
1133
1134
8
            if metadata.is_dir() {
1135
4
                assert_eq!(
1136
4
                    mode & 0o200,
1137
                    0o200,
1138
                    "directory {} must be writable (mode 0o{mode:o})",
1139
0
                    root.display(),
1140
                );
1141
4
                let mut entries = fs::read_dir(root).await
?0
;
1142
10
                while let Some(
entry6
) = entries.next_entry().await
?0
{
1143
6
                    assert_dirs_writable_files_readonly(&entry.path()).await
?0
;
1144
                }
1145
4
            } else if metadata.is_file() {
1146
4
                assert_eq!(
1147
4
                    mode & 0o222,
1148
                    0,
1149
                    "file {} must be read-only (mode 0o{mode:o})",
1150
0
                    root.display(),
1151
                );
1152
0
            }
1153
            // Symlinks: mode is not meaningful, skip.
1154
1155
8
            Ok(())
1156
8
        })
1157
8
    }
1158
1159
    /// After `get_or_create` materializes a tree — on both the fresh
1160
    /// cache-miss path and the cache-hit path — every directory in the
1161
    /// destination must be writable (so Bazel actions can create outputs at
1162
    /// nested declared paths) and every file must be read-only (the file
1163
    /// inodes are CAS-hardlinked; chmoding them would corrupt the shared
1164
    /// inode). `prepare_action_inputs` relies on this so it no longer needs a
1165
    /// separate `set_dir_writable_recursive` post-walk.
1166
    #[cfg(unix)]
1167
    #[nativelink_test]
1168
    async fn test_materialized_tree_dirs_writable_files_readonly() -> Result<(), Error> {
1169
        let temp_dir = TempDir::new().unwrap();
1170
        let cache_root = temp_dir.path().join("cache");
1171
        let (store, root_digest) = setup_nested_test_store(&temp_dir).await;
1172
1173
        let config = DirectoryCacheConfig {
1174
            max_entries: 10,
1175
            max_size_bytes: 1024 * 1024,
1176
            cache_root,
1177
        };
1178
        let cache = DirectoryCache::new(config, store).await?;
1179
1180
        // Fresh-materialize path (cache miss).
1181
        let miss_dest = temp_dir.path().join("dest_miss");
1182
        let hit = cache.get_or_create(root_digest, &miss_dest).await?;
1183
        assert!(!hit, "first access must be a cache miss");
1184
        assert!(miss_dest.join("subdir").join("nested.txt").exists());
1185
        assert_dirs_writable_files_readonly(&miss_dest).await?;
1186
1187
        // A nested output can be created with no separate chmod walk.
1188
        let nested_output = miss_dest.join("subdir").join("output.o");
1189
        fs::write(&nested_output, b"action output").await.err_tip(
1190
            || "creating a nested output must succeed without set_dir_writable_recursive",
1191
        )?;
1192
1193
        // Cache-hit path: a second materialization of the same digest.
1194
        let hit_dest = temp_dir.path().join("dest_hit");
1195
        let hit = cache.get_or_create(root_digest, &hit_dest).await?;
1196
        assert!(hit, "second access must be a cache hit");
1197
        assert!(hit_dest.join("subdir").join("nested.txt").exists());
1198
        assert_dirs_writable_files_readonly(&hit_dest).await?;
1199
1200
        // The cache-hit destination also accepts a nested output directly.
1201
        fs::write(hit_dest.join("subdir").join("output.o"), b"action output")
1202
            .await
1203
            .err_tip(|| "cache-hit destination must accept a nested output directly")?;
1204
1205
        Ok(())
1206
    }
1207
1208
    /// OPT #1: a non-executable file in a cache entry must be a hardlink to
1209
    /// the `FilesystemStore` CAS blob — sharing the same inode — rather than a
1210
    /// fresh copy. This is the zero-copy materialization the optimization
1211
    /// delivers.
1212
    #[cfg(unix)]
1213
    #[nativelink_test]
1214
    async fn test_construct_hardlinks_cas_blob() -> Result<(), Error> {
1215
        use std::os::unix::fs::MetadataExt;
1216
1217
        let temp_dir = TempDir::new().unwrap();
1218
        let cache_root = temp_dir.path().join("cache");
1219
        let (store, dir_digest) = setup_test_store(&temp_dir).await;
1220
1221
        // Resolve the filesystem-tier CAS blob path for the file before
1222
        // construction so we can compare inodes afterwards.
1223
        let filesystem_store = store
1224
            .fast_store()
1225
            .downcast_ref::<FilesystemStore>(None)
1226
            .unwrap()
1227
            .get_arc()
1228
            .unwrap();
1229
        // Pull the blob into the fast tier (construction does this too).
1230
        store
1231
            .populate_fast_store(StoreKey::Digest(DigestInfo::new([1u8; 32], 13)))
1232
            .await?;
1233
        let cas_ino = filesystem_store
1234
            .get_file_entry_for_digest(&DigestInfo::new([1u8; 32], 13))
1235
            .await?
1236
2
            .get_file_path_locked(|p| async move 
{1
Ok(
fs::metadata1
(&p).await
?0
.
ino1
()) })
1237
            .await?;
1238
1239
        let config = DirectoryCacheConfig {
1240
            max_entries: 10,
1241
            max_size_bytes: 1024 * 1024,
1242
            cache_root,
1243
        };
1244
        let cache = DirectoryCache::new(config, store).await?;
1245
1246
        let dest = temp_dir.path().join("dest");
1247
        let hit = cache.get_or_create(dir_digest, &dest).await?;
1248
        assert!(!hit, "first access is a miss");
1249
1250
        // The cache entry's file (not yet the dest, which is a clone on macOS)
1251
        // must share the CAS inode. The cache entry path is cache_root/<digest>.
1252
        let cache_entry_file = cache.get_cache_path(&dir_digest).join("test.txt");
1253
        let entry_ino = fs::metadata(&cache_entry_file).await?.ino();
1254
        assert_eq!(
1255
            entry_ino, cas_ino,
1256
            "cache-entry file must be hardlinked to the CAS blob inode (zero-copy)"
1257
        );
1258
1259
        // Content must still be byte-identical.
1260
        assert_eq!(
1261
            fs::read(&cache_entry_file).await?,
1262
            b"Hello, World!",
1263
            "hardlinked file content must match the CAS blob"
1264
        );
1265
1266
        Ok(())
1267
    }
1268
1269
    /// OPT #1 correctness: an executable file must NOT be hardlinked to the
1270
    /// shared CAS blob (chmoding it would corrupt the inode shared with the
1271
    /// CAS and every other action — the PR #2347 bug). It must instead get
1272
    /// its own private inode AND carry the +x bit.
1273
    #[cfg(unix)]
1274
    #[nativelink_test]
1275
    async fn test_construct_executable_gets_private_inode() -> Result<(), Error> {
1276
        use std::os::unix::fs::{MetadataExt, PermissionsExt};
1277
1278
        let temp_dir = TempDir::new().unwrap();
1279
        let cache_root = temp_dir.path().join("cache");
1280
        let (cas_store, slow_store) = make_fast_slow_store(&temp_dir).await;
1281
1282
        let script = b"#!/bin/sh\necho ran\n";
1283
        let file_digest = upload_blob(&slow_store, 7, script).await;
1284
1285
        let directory = ProtoDirectory {
1286
            files: vec![FileNode {
1287
                name: "run.sh".to_string(),
1288
                digest: Some(file_digest.into()),
1289
                is_executable: true,
1290
                ..Default::default()
1291
            }],
1292
            ..Default::default()
1293
        };
1294
        let mut dir_data = Vec::new();
1295
        directory.encode(&mut dir_data).unwrap();
1296
        let dir_digest = upload_blob(&slow_store, 8, &dir_data).await;
1297
1298
        // Resolve the CAS blob inode for the executable.
1299
        cas_store
1300
            .populate_fast_store(StoreKey::Digest(file_digest))
1301
            .await?;
1302
        let filesystem_store = cas_store
1303
            .fast_store()
1304
            .downcast_ref::<FilesystemStore>(None)
1305
            .unwrap()
1306
            .get_arc()
1307
            .unwrap();
1308
        let (cas_ino, cas_mode) = filesystem_store
1309
            .get_file_entry_for_digest(&file_digest)
1310
            .await?
1311
1
            .get_file_path_locked(|p| async move {
1312
1
                let m = fs::metadata(&p).await
?0
;
1313
1
                Ok((m.ino(), m.permissions().mode() & 0o777))
1314
2
            })
1315
            .await?;
1316
1317
        let config = DirectoryCacheConfig {
1318
            max_entries: 10,
1319
            max_size_bytes: 1024 * 1024,
1320
            cache_root,
1321
        };
1322
        let cache = DirectoryCache::new(config, cas_store).await?;
1323
1324
        let dest = temp_dir.path().join("dest");
1325
        cache.get_or_create(dir_digest, &dest).await?;
1326
1327
        let cache_entry_file = cache.get_cache_path(&dir_digest).join("run.sh");
1328
        let entry_meta = fs::metadata(&cache_entry_file).await?;
1329
        let entry_mode = entry_meta.permissions().mode() & 0o777;
1330
1331
        // Private inode: distinct from the shared CAS blob.
1332
        assert_ne!(
1333
            entry_meta.ino(),
1334
            cas_ino,
1335
            "executable must have its own inode, not the shared CAS blob inode"
1336
        );
1337
        // The +x bit is set on the cache entry.
1338
        assert_ne!(entry_mode & 0o111, 0, "executable bit must be set");
1339
        // Content byte-identical.
1340
        assert_eq!(fs::read(&cache_entry_file).await?, script);
1341
        // The CAS blob's mode was NOT mutated by the chmod of the private copy.
1342
        let cas_mode_after = filesystem_store
1343
            .get_file_entry_for_digest(&file_digest)
1344
            .await?
1345
1
            .get_file_path_locked(|p| async move {
1346
1
                Ok(fs::metadata(&p).await
?0
.permissions().mode() & 0o777)
1347
2
            })
1348
            .await?;
1349
        assert_eq!(
1350
            cas_mode_after, cas_mode,
1351
            "CAS blob mode must be untouched by the executable's private chmod"
1352
        );
1353
1354
        Ok(())
1355
    }
1356
1357
    /// OPT #1 fallback: when the CAS blob lives only in the slow tier and is
1358
    /// not locally hardlinkable, construction must still succeed by copying.
1359
    /// `populate_fast_store` resolves this in practice, but the fetch+write
1360
    /// fallback must remain correct and produce identical content.
1361
    #[nativelink_test]
1362
    async fn test_construct_file_content_roundtrip() -> Result<(), Error> {
1363
        let temp_dir = TempDir::new().unwrap();
1364
        let cache_root = temp_dir.path().join("cache");
1365
        let (store, dir_digest) = setup_test_store(&temp_dir).await;
1366
1367
        let config = DirectoryCacheConfig {
1368
            max_entries: 10,
1369
            max_size_bytes: 1024 * 1024,
1370
            cache_root,
1371
        };
1372
        let cache = DirectoryCache::new(config, store).await?;
1373
1374
        let dest = temp_dir.path().join("dest");
1375
        cache.get_or_create(dir_digest, &dest).await?;
1376
        assert_eq!(
1377
            fs::read(dest.join("test.txt")).await?,
1378
            b"Hello, World!",
1379
            "materialized content must round-trip the CAS blob exactly"
1380
        );
1381
1382
        Ok(())
1383
    }
1384
1385
    /// OPT #2: the cache entry's recorded size must equal the sum of
1386
    /// `FileNode.digest.size_bytes` across the whole (nested) tree —
1387
    /// accumulated during construction, with no post-hoc filesystem walk.
1388
    #[nativelink_test]
1389
    async fn test_size_accounting_from_digest_sizes() -> Result<(), Error> {
1390
        let temp_dir = TempDir::new().unwrap();
1391
        let cache_root = temp_dir.path().join("cache");
1392
        let (cas_store, slow_store) = make_fast_slow_store(&temp_dir).await;
1393
1394
        // Two files at the root, one file in a nested subdir.
1395
        let f1 = upload_blob(&slow_store, 10, b"aaaaaaaa").await; // 8 bytes
1396
        let f2 = upload_blob(&slow_store, 11, b"bbb").await; // 3 bytes
1397
        let f3 = upload_blob(&slow_store, 12, b"ccccc").await; // 5 bytes
1398
1399
        let sub = ProtoDirectory {
1400
            files: vec![FileNode {
1401
                name: "nested.bin".to_string(),
1402
                digest: Some(f3.into()),
1403
                is_executable: false,
1404
                ..Default::default()
1405
            }],
1406
            ..Default::default()
1407
        };
1408
        let mut sub_data = Vec::new();
1409
        sub.encode(&mut sub_data).unwrap();
1410
        let sub_digest = upload_blob(&slow_store, 13, &sub_data).await;
1411
1412
        let root = ProtoDirectory {
1413
            files: vec![
1414
                FileNode {
1415
                    name: "a.bin".to_string(),
1416
                    digest: Some(f1.into()),
1417
                    is_executable: false,
1418
                    ..Default::default()
1419
                },
1420
                FileNode {
1421
                    name: "b.bin".to_string(),
1422
                    digest: Some(f2.into()),
1423
                    is_executable: false,
1424
                    ..Default::default()
1425
                },
1426
            ],
1427
            directories: vec![DirectoryNode {
1428
                name: "sub".to_string(),
1429
                digest: Some(sub_digest.into()),
1430
            }],
1431
            ..Default::default()
1432
        };
1433
        let mut root_data = Vec::new();
1434
        root.encode(&mut root_data).unwrap();
1435
        let root_digest = upload_blob(&slow_store, 14, &root_data).await;
1436
1437
        let config = DirectoryCacheConfig {
1438
            max_entries: 10,
1439
            max_size_bytes: 1024 * 1024,
1440
            cache_root,
1441
        };
1442
        let cache = DirectoryCache::new(config, cas_store).await?;
1443
1444
        let dest = temp_dir.path().join("dest");
1445
        cache.get_or_create(root_digest, &dest).await?;
1446
1447
        let stats = cache.stats().await;
1448
        assert_eq!(
1449
            stats.total_size_bytes,
1450
            8 + 3 + 5,
1451
            "cache size must be the sum of all FileNode digest sizes (incl. nested)"
1452
        );
1453
1454
        Ok(())
1455
    }
1456
1457
    /// OPT #2: every directory in a cache entry — root and nested — must be
1458
    /// left at mode 0o755, set at creation time without a separate walk.
1459
    #[cfg(unix)]
1460
    #[nativelink_test]
1461
    async fn test_cache_entry_dirs_are_writable() -> Result<(), Error> {
1462
        use std::os::unix::fs::PermissionsExt;
1463
1464
        let temp_dir = TempDir::new().unwrap();
1465
        let cache_root = temp_dir.path().join("cache");
1466
        let (cas_store, slow_store) = make_fast_slow_store(&temp_dir).await;
1467
1468
        let f = upload_blob(&slow_store, 20, b"data").await;
1469
        let sub = ProtoDirectory {
1470
            files: vec![FileNode {
1471
                name: "leaf.txt".to_string(),
1472
                digest: Some(f.into()),
1473
                is_executable: false,
1474
                ..Default::default()
1475
            }],
1476
            ..Default::default()
1477
        };
1478
        let mut sub_data = Vec::new();
1479
        sub.encode(&mut sub_data).unwrap();
1480
        let sub_digest = upload_blob(&slow_store, 21, &sub_data).await;
1481
1482
        let root = ProtoDirectory {
1483
            directories: vec![DirectoryNode {
1484
                name: "sub".to_string(),
1485
                digest: Some(sub_digest.into()),
1486
            }],
1487
            ..Default::default()
1488
        };
1489
        let mut root_data = Vec::new();
1490
        root.encode(&mut root_data).unwrap();
1491
        let root_digest = upload_blob(&slow_store, 22, &root_data).await;
1492
1493
        let config = DirectoryCacheConfig {
1494
            max_entries: 10,
1495
            max_size_bytes: 1024 * 1024,
1496
            cache_root,
1497
        };
1498
        let cache = DirectoryCache::new(config, cas_store).await?;
1499
        let dest = temp_dir.path().join("dest");
1500
        cache.get_or_create(root_digest, &dest).await?;
1501
1502
        let entry_root = cache.get_cache_path(&root_digest);
1503
        for dir in [entry_root.clone(), entry_root.join("sub")] {
1504
            let mode = fs::metadata(&dir).await?.permissions().mode() & 0o777;
1505
            assert_eq!(
1506
                mode,
1507
                0o755,
1508
                "cache-entry directory {} must be 0o755",
1509
                dir.display()
1510
            );
1511
        }
1512
1513
        Ok(())
1514
    }
1515
1516
    /// OPT #5: many concurrent `get_or_create` calls for the *same* digest
1517
    /// must be single-flighted — the directory is constructed exactly once
1518
    /// and every caller materializes its own destination from that single
1519
    /// cache entry. Single-flight is observable through the hit/miss return:
1520
    /// exactly one call sees a miss (`false` — it did the construct), all
1521
    /// others see a hit (`true`). Every destination must also be correct.
1522
    ///
1523
    /// Runs on a multi-threaded runtime so the calls truly race.
1524
    #[nativelink_test(flavor = "multi_thread", worker_threads = 4)]
1525
    async fn test_concurrent_get_or_create_single_flight() -> Result<(), Error> {
1526
        use futures::future::join_all;
1527
1528
        let temp_dir = TempDir::new().unwrap();
1529
        let cache_root = temp_dir.path().join("cache");
1530
        let (store, dir_digest) = setup_test_store(&temp_dir).await;
1531
1532
        let config = DirectoryCacheConfig {
1533
            max_entries: 10,
1534
            max_size_bytes: 1024 * 1024,
1535
            cache_root,
1536
        };
1537
        let cache = Arc::new(DirectoryCache::new(config, store).await?);
1538
1539
        // Fire 16 concurrent requests for the same digest, each to its own
1540
        // destination.
1541
        #[allow(clippy::items_after_statements)]
1542
        const N: usize = 16;
1543
        let dests: Vec<PathBuf> = (0..N)
1544
16
            .map(|i| temp_dir.path().join(format!("dest_{i}")))
1545
            .collect();
1546
16
        let futures = dests.iter().map(|dest| {
1547
16
            let cache = Arc::clone(&cache);
1548
16
            let dest = dest.clone();
1549
16
            async move { cache.get_or_create(dir_digest, &dest).await }
1550
16
        });
1551
        let results: Vec<bool> = join_all(futures)
1552
            .await
1553
            .into_iter()
1554
            .collect::<Result<_, _>>()?;
1555
1556
        // Exactly one construction (one miss); the rest are cache hits.
1557
16
        let misses = results.iter().filter(|hit| !**hit).count();
1558
        assert_eq!(
1559
            misses, 1,
1560
            "exactly one caller should construct the directory (single-flight)"
1561
        );
1562
        assert_eq!(results.iter().filter(|hit| **hit).count(), N - 1);
1563
1564
        // The cache holds exactly one entry, and every destination has the
1565
        // correct, byte-identical content.
1566
        let stats = cache.stats().await;
1567
        assert_eq!(stats.entries, 1, "digest must be cached exactly once");
1568
        for dest in &dests {
1569
            assert_eq!(
1570
                fs::read(dest.join("test.txt")).await?,
1571
                b"Hello, World!",
1572
                "every concurrently-materialized destination must be correct"
1573
            );
1574
        }
1575
1576
        Ok(())
1577
    }
1578
}