Coverage Report

Created: 2025-10-21 10:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/build/source/nativelink-store/src/fast_slow_store.rs
Line
Count
Source
1
// Copyright 2024-2025 The NativeLink Authors. All rights reserved.
2
//
3
// Licensed under the Functional Source License, Version 1.1, Apache 2.0 Future License (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//    See LICENSE file for details
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
use core::borrow::BorrowMut;
16
use core::cmp::{max, min};
17
use core::ops::Range;
18
use core::pin::Pin;
19
use core::sync::atomic::{AtomicU64, Ordering};
20
use std::collections::HashMap;
21
use std::ffi::OsString;
22
use std::sync::{Arc, Weak};
23
24
use async_trait::async_trait;
25
use futures::{FutureExt, join};
26
use nativelink_config::stores::FastSlowSpec;
27
use nativelink_error::{Code, Error, ResultExt, make_err};
28
use nativelink_metric::MetricsComponent;
29
use nativelink_util::buf_channel::{
30
    DropCloserReadHalf, DropCloserWriteHalf, make_buf_channel_pair,
31
};
32
use nativelink_util::fs;
33
use nativelink_util::health_utils::{HealthStatusIndicator, default_health_status_indicator};
34
use nativelink_util::store_trait::{
35
    RemoveItemCallback, Store, StoreDriver, StoreKey, StoreLike, StoreOptimizations,
36
    UploadSizeInfo, slow_update_store_with_file,
37
};
38
use parking_lot::Mutex;
39
use tokio::sync::OnceCell;
40
41
// TODO(palfrey) This store needs to be evaluated for more efficient memory usage,
42
// there are many copies happening internally.
43
44
type Loader = Arc<OnceCell<()>>;
45
46
// TODO(palfrey) We should consider copying the data in the background to allow the
47
// client to hang up while the data is buffered. An alternative is to possibly make a
48
// "BufferedStore" that could be placed on the "slow" store that would hang up early
49
// if data is in the buffer.
50
#[derive(Debug, MetricsComponent)]
51
pub struct FastSlowStore {
52
    #[metric(group = "fast_store")]
53
    fast_store: Store,
54
    #[metric(group = "slow_store")]
55
    slow_store: Store,
56
    weak_self: Weak<Self>,
57
    #[metric]
58
    metrics: FastSlowStoreMetrics,
59
    // De-duplicate requests for the fast store, only the first streams, others
60
    // are blocked.  This may feel like it's causing a slow down of tasks, but
61
    // actually it's faster because we're not downloading the file multiple
62
    // times are doing loads of duplicate IO.
63
    populating_digests: Mutex<HashMap<StoreKey<'static>, Loader>>,
64
}
65
66
// This guard ensures that the populating_digests is cleared even if the future
67
// is dropped, it is cancel safe.
68
struct LoaderGuard<'a> {
69
    weak_store: Weak<FastSlowStore>,
70
    key: StoreKey<'a>,
71
    loader: Option<Loader>,
72
}
73
74
impl LoaderGuard<'_> {
75
12
    async fn get_or_try_init<E, F, Fut>(&self, f: F) -> Result<(), E>
76
12
    where
77
12
        F: FnOnce() -> Fut,
78
12
        Fut: Future<Output = Result<(), E>>,
79
12
    {
80
12
        if let Some(loader) = &self.loader {
  Branch (80:16): [True: 8, False: 0]
  Branch (80:16): [True: 4, False: 0]
  Branch (80:16): [Folded - Ignored]
81
12
            loader.get_or_try_init(f).await.map(|&()| ())
82
        } else {
83
            // This is impossible, but we do it anyway.
84
0
            f().await
85
        }
86
12
    }
87
}
88
89
impl Drop for LoaderGuard<'_> {
90
12
    fn drop(&mut self) {
91
12
        let Some(store) = self.weak_store.upgrade() else {
  Branch (91:13): [True: 12, False: 0]
  Branch (91:13): [Folded - Ignored]
92
            // The store has already gone away, nothing to remove from.
93
0
            return;
94
        };
95
12
        let Some(loader) = self.loader.take() else {
  Branch (95:13): [True: 12, False: 0]
  Branch (95:13): [Folded - Ignored]
96
            // This should never happen, but we do it to be safe.
97
0
            return;
98
        };
99
100
12
        let mut guard = store.populating_digests.lock();
101
12
        if let std::collections::hash_map::Entry::Occupied(occupied_entry) =
  Branch (101:16): [True: 12, False: 0]
  Branch (101:16): [Folded - Ignored]
102
12
            guard.entry(self.key.borrow().into_owned())
103
        {
104
12
            if Arc::ptr_eq(occupied_entry.get(), &loader) {
  Branch (104:16): [True: 12, False: 0]
  Branch (104:16): [Folded - Ignored]
105
12
                drop(loader);
106
12
                if Arc::strong_count(occupied_entry.get()) == 1 {
  Branch (106:20): [True: 12, False: 0]
  Branch (106:20): [Folded - Ignored]
107
12
                    // This is the last loader, so remove it.
108
12
                    occupied_entry.remove();
109
12
                
}0
110
0
            }
111
0
        }
112
12
    }
113
}
114
115
impl FastSlowStore {
116
33
    pub fn new(_spec: &FastSlowSpec, fast_store: Store, slow_store: Store) -> Arc<Self> {
117
33
        Arc::new_cyclic(|weak_self| Self {
118
33
            fast_store,
119
33
            slow_store,
120
33
            weak_self: weak_self.clone(),
121
33
            metrics: FastSlowStoreMetrics::default(),
122
33
            populating_digests: Mutex::new(HashMap::new()),
123
33
        })
124
33
    }
125
126
0
    pub const fn fast_store(&self) -> &Store {
127
0
        &self.fast_store
128
0
    }
129
130
0
    pub const fn slow_store(&self) -> &Store {
131
0
        &self.slow_store
132
0
    }
133
134
2
    pub fn get_arc(&self) -> Option<Arc<Self>> {
135
2
        self.weak_self.upgrade()
136
2
    }
137
138
12
    fn get_loader<'a>(&self, key: StoreKey<'a>) -> LoaderGuard<'a> {
139
        // Get a single loader instance that's used to populate the fast store
140
        // for this digest.  If another request comes in then it's de-duplicated.
141
12
        let loader = match self
142
12
            .populating_digests
143
12
            .lock()
144
12
            .entry(key.borrow().into_owned())
145
        {
146
0
            std::collections::hash_map::Entry::Occupied(occupied_entry) => {
147
0
                occupied_entry.get().clone()
148
            }
149
12
            std::collections::hash_map::Entry::Vacant(vacant_entry) => {
150
12
                vacant_entry.insert(Arc::new(OnceCell::new())).clone()
151
            }
152
        };
153
12
        LoaderGuard {
154
12
            weak_store: self.weak_self.clone(),
155
12
            key,
156
12
            loader: Some(loader),
157
12
        }
158
12
    }
159
160
12
    async fn populate_and_maybe_stream(
161
12
        self: Pin<&Self>,
162
12
        key: StoreKey<'_>,
163
12
        maybe_writer: Option<&mut DropCloserWriteHalf>,
164
12
        offset: u64,
165
12
        length: Option<u64>,
166
12
    ) -> Result<(), Error> {
167
12
        let sz = self
168
12
            .slow_store
169
12
            .has(key.borrow())
170
12
            .await
171
12
            .err_tip(|| "Failed to run has() on slow store")
?0
172
12
            .ok_or_else(|| 
{0
173
0
                make_err!(
174
0
                    Code::NotFound,
175
                    "Object {} not found in either fast or slow store. \
176
                    If using multiple workers, ensure all workers share the same CAS storage path.",
177
0
                    key.as_str()
178
                )
179
0
            })?;
180
181
12
        self.metrics
182
12
            .slow_store_hit_count
183
12
            .fetch_add(1, Ordering::Acquire);
184
185
12
        let send_range = offset..length.map_or(u64::MAX, |length| 
length7
+
offset7
);
186
12
        let mut bytes_received: u64 = 0;
187
188
12
        let (mut fast_tx, fast_rx) = make_buf_channel_pair();
189
12
        let (slow_tx, mut slow_rx) = make_buf_channel_pair();
190
12
        let data_stream_fut = async move {
191
12
            let mut maybe_writer_pin = maybe_writer.map(Pin::new);
192
            loop {
193
23
                let output_buf = slow_rx
194
23
                    .recv()
195
23
                    .await
196
23
                    .err_tip(|| "Failed to read data data buffer from slow store")
?0
;
197
23
                if output_buf.is_empty() {
  Branch (197:20): [True: 8, False: 7]
  Branch (197:20): [True: 4, False: 4]
  Branch (197:20): [Folded - Ignored]
198
                    // Write out our EOF.
199
                    // We are dropped as soon as we send_eof to writer_pin, so
200
                    // we wait until we've finished all of our joins to do that.
201
12
                    let fast_res = fast_tx.send_eof();
202
12
                    return Ok::<_, Error>((fast_res, maybe_writer_pin));
203
11
                }
204
11
                let output_buf_len = u64::try_from(output_buf.len())
205
11
                    .err_tip(|| "Could not output_buf.len() to u64")
?0
;
206
11
                self.metrics
207
11
                    .slow_store_downloaded_bytes
208
11
                    .fetch_add(output_buf_len, Ordering::Acquire);
209
210
11
                let writer_fut = Self::calculate_range(
211
11
                    &(bytes_received..bytes_received + output_buf_len),
212
11
                    &send_range,
213
0
                )?
214
11
                .zip(maybe_writer_pin.as_mut())
215
11
                .map_or_else(
216
4
                    || futures::future::ready(Ok(())).left_future(),
217
7
                    |(range, writer_pin)| writer_pin.send(output_buf.slice(range)).right_future(),
218
                );
219
220
11
                bytes_received += output_buf_len;
221
222
11
                let (fast_tx_res, writer_res) = join!(fast_tx.send(output_buf), writer_fut);
223
11
                fast_tx_res.err_tip(|| "Failed to write to fast store in fast_slow store")
?0
;
224
11
                writer_res.err_tip(|| "Failed to write result to writer in fast_slow store")
?0
;
225
            }
226
12
        };
227
228
12
        let slow_store_fut = self.slow_store.get(key.borrow(), slow_tx);
229
12
        let fast_store_fut =
230
12
            self.fast_store
231
12
                .update(key.borrow(), fast_rx, UploadSizeInfo::ExactSize(sz));
232
233
12
        let (data_stream_res, slow_res, fast_res) =
234
12
            join!(data_stream_fut, slow_store_fut, fast_store_fut);
235
12
        match data_stream_res {
236
12
            Ok((fast_eof_res, maybe_writer_pin)) =>
237
            // Sending the EOF will drop us almost immediately in bytestream_server
238
            // so we perform it as the very last action in this method.
239
            {
240
12
                fast_eof_res.merge(fast_res).merge(slow_res).merge(
241
12
                    if let Some(
mut writer_pin8
) = maybe_writer_pin {
  Branch (241:28): [True: 8, False: 0]
  Branch (241:28): [True: 0, False: 4]
  Branch (241:28): [Folded - Ignored]
242
8
                        writer_pin.send_eof()
243
                    } else {
244
4
                        Ok(())
245
                    },
246
                )
247
            }
248
0
            Err(err) => fast_res.merge(slow_res).merge(Err(err)),
249
        }
250
12
    }
251
252
    /// Ensure our fast store is populated. This should be kept as a low
253
    /// cost function. Since the data itself is shared and not copied it should be fairly
254
    /// low cost to just discard the data, but does cost a few mutex locks while
255
    /// streaming.
256
4
    
pub async fn populate_fast_store(&self, key: StoreKey<'_>) -> Result<(), Error>0
{
257
4
        let maybe_size_info = self
258
4
            .fast_store
259
4
            .has(key.borrow())
260
4
            .await
261
4
            .err_tip(|| "While querying in populate_fast_store")
?0
;
262
4
        if maybe_size_info.is_some() {
  Branch (262:12): [Folded - Ignored]
  Branch (262:12): [True: 0, False: 4]
  Branch (262:12): [Folded - Ignored]
263
0
            return Ok(());
264
4
        }
265
4
        let loader = self.get_loader(key.borrow());
266
4
        loader
267
4
            .get_or_try_init(|| {
268
4
                Pin::new(self).populate_and_maybe_stream(key.borrow(), None, 0, None)
269
4
            })
270
4
            .await
271
4
            .err_tip(|| "Failed to populate()")
272
4
    }
273
274
    /// Returns the range of bytes that should be sent given a slice bounds
275
    /// offset so the output range maps the `received_range.start` to 0.
276
    // TODO(palfrey) This should be put into utils, as this logic is used
277
    // elsewhere in the code.
278
21
    pub fn calculate_range(
279
21
        received_range: &Range<u64>,
280
21
        send_range: &Range<u64>,
281
21
    ) -> Result<Option<Range<usize>>, Error> {
282
        // Protect against subtraction overflow.
283
21
        if received_range.start >= received_range.end {
  Branch (283:12): [True: 0, False: 21]
  Branch (283:12): [Folded - Ignored]
284
0
            return Ok(None);
285
21
        }
286
287
21
        let start = max(received_range.start, send_range.start);
288
21
        let end = min(received_range.end, send_range.end);
289
21
        if received_range.contains(&start) && 
received_range19
.
contains19
(
&(end - 1)19
) {
  Branch (289:12): [True: 19, False: 2]
  Branch (289:47): [True: 17, False: 2]
  Branch (289:12): [Folded - Ignored]
  Branch (289:47): [Folded - Ignored]
290
            // Offset both to the start of the received_range.
291
17
            let calculated_range_start = usize::try_from(start - received_range.start)
292
17
                .err_tip(|| "Could not convert (start - received_range.start) to usize")
?0
;
293
17
            let calculated_range_end = usize::try_from(end - received_range.start)
294
17
                .err_tip(|| "Could not convert (end - received_range.start) to usize")
?0
;
295
17
            Ok(Some(calculated_range_start..calculated_range_end))
296
        } else {
297
4
            Ok(None)
298
        }
299
21
    }
300
}
301
302
#[async_trait]
303
impl StoreDriver for FastSlowStore {
304
    async fn has_with_results(
305
        self: Pin<&Self>,
306
        key: &[StoreKey<'_>],
307
        results: &mut [Option<u64>],
308
12
    ) -> Result<(), Error> {
309
        // If our slow store is a noop store, it'll always return a 404,
310
        // so only check the fast store in such case.
311
        let slow_store = self.slow_store.inner_store::<StoreKey<'_>>(None);
312
        if slow_store.optimized_for(StoreOptimizations::NoopDownloads) {
313
            return self.fast_store.has_with_results(key, results).await;
314
        }
315
        // Only check the slow store because if it's not there, then something
316
        // down stream might be unable to get it.  This should not affect
317
        // workers as they only use get() and a CAS can use an
318
        // ExistenceCacheStore to avoid the bottleneck.
319
        self.slow_store.has_with_results(key, results).await
320
12
    }
321
322
    async fn update(
323
        self: Pin<&Self>,
324
        key: StoreKey<'_>,
325
        mut reader: DropCloserReadHalf,
326
        size_info: UploadSizeInfo,
327
84
    ) -> Result<(), Error> {
328
        // If either one of our stores is a noop store, bypass the multiplexing
329
        // and just use the store that is not a noop store.
330
        let slow_store = self.slow_store.inner_store(Some(key.borrow()));
331
        if slow_store.optimized_for(StoreOptimizations::NoopUpdates) {
332
            return self.fast_store.update(key, reader, size_info).await;
333
        }
334
        let fast_store = self.fast_store.inner_store(Some(key.borrow()));
335
        if fast_store.optimized_for(StoreOptimizations::NoopUpdates) {
336
            return self.slow_store.update(key, reader, size_info).await;
337
        }
338
339
        let (mut fast_tx, fast_rx) = make_buf_channel_pair();
340
        let (mut slow_tx, slow_rx) = make_buf_channel_pair();
341
342
84
        let data_stream_fut = async move {
343
            loop {
344
136
                let buffer = reader
345
136
                    .recv()
346
136
                    .await
347
136
                    .err_tip(|| "Failed to read buffer in fastslow store")
?0
;
348
136
                if buffer.is_empty() {
  Branch (348:20): [True: 84, False: 52]
  Branch (348:20): [Folded - Ignored]
349
                    // EOF received.
350
84
                    fast_tx.send_eof().err_tip(
351
                        || "Failed to write eof to fast store in fast_slow store update",
352
0
                    )?;
353
84
                    slow_tx
354
84
                        .send_eof()
355
84
                        .err_tip(|| "Failed to write eof to writer in fast_slow store update")
?0
;
356
84
                    return Result::<(), Error>::Ok(());
357
52
                }
358
359
52
                let (fast_result, slow_result) =
360
52
                    join!(fast_tx.send(buffer.clone()), slow_tx.send(buffer));
361
52
                fast_result
362
52
                    .map_err(|e| 
{0
363
0
                        make_err!(
364
0
                            Code::Internal,
365
                            "Failed to send message to fast_store in fast_slow_store {:?}",
366
                            e
367
                        )
368
0
                    })
369
52
                    .merge(slow_result.map_err(|e| 
{0
370
0
                        make_err!(
371
0
                            Code::Internal,
372
                            "Failed to send message to slow_store in fast_slow store {:?}",
373
                            e
374
                        )
375
0
                    }))?;
376
            }
377
84
        };
378
379
        let fast_store_fut = self.fast_store.update(key.borrow(), fast_rx, size_info);
380
        let slow_store_fut = self.slow_store.update(key.borrow(), slow_rx, size_info);
381
382
        let (data_stream_res, fast_res, slow_res) =
383
            join!(data_stream_fut, fast_store_fut, slow_store_fut);
384
        data_stream_res.merge(fast_res).merge(slow_res)?;
385
        Ok(())
386
84
    }
387
388
    /// FastSlowStore has optimizations for dealing with files.
389
0
    fn optimized_for(&self, optimization: StoreOptimizations) -> bool {
390
0
        optimization == StoreOptimizations::FileUpdates
391
0
    }
392
393
    /// Optimized variation to consume the file if one of the stores is a
394
    /// filesystem store. This makes the operation a move instead of a copy
395
    /// dramatically increasing performance for large files.
396
    async fn update_with_whole_file(
397
        self: Pin<&Self>,
398
        key: StoreKey<'_>,
399
        path: OsString,
400
        mut file: fs::FileSlot,
401
        upload_size: UploadSizeInfo,
402
5
    ) -> Result<Option<fs::FileSlot>, Error> {
403
        if self
404
            .fast_store
405
            .optimized_for(StoreOptimizations::FileUpdates)
406
        {
407
            if !self
408
                .slow_store
409
                .optimized_for(StoreOptimizations::NoopUpdates)
410
            {
411
                slow_update_store_with_file(
412
                    self.slow_store.as_store_driver_pin(),
413
                    key.borrow(),
414
                    &mut file,
415
                    upload_size,
416
                )
417
                .await
418
                .err_tip(|| "In FastSlowStore::update_with_whole_file slow_store")?;
419
            }
420
            return self
421
                .fast_store
422
                .update_with_whole_file(key, path, file, upload_size)
423
                .await;
424
        }
425
426
        if self
427
            .slow_store
428
            .optimized_for(StoreOptimizations::FileUpdates)
429
        {
430
            if !self
431
                .fast_store
432
                .optimized_for(StoreOptimizations::NoopUpdates)
433
            {
434
                slow_update_store_with_file(
435
                    self.fast_store.as_store_driver_pin(),
436
                    key.borrow(),
437
                    &mut file,
438
                    upload_size,
439
                )
440
                .await
441
                .err_tip(|| "In FastSlowStore::update_with_whole_file fast_store")?;
442
            }
443
            return self
444
                .slow_store
445
                .update_with_whole_file(key, path, file, upload_size)
446
                .await;
447
        }
448
449
        slow_update_store_with_file(self, key, &mut file, upload_size)
450
            .await
451
            .err_tip(|| "In FastSlowStore::update_with_whole_file")?;
452
        Ok(Some(file))
453
5
    }
454
455
    async fn get_part(
456
        self: Pin<&Self>,
457
        key: StoreKey<'_>,
458
        writer: &mut DropCloserWriteHalf,
459
        offset: u64,
460
        length: Option<u64>,
461
66
    ) -> Result<(), Error> {
462
        // TODO(palfrey) Investigate if we should maybe ignore errors here instead of
463
        // forwarding the up.
464
        if self.fast_store.has(key.borrow()).await?.is_some() {
465
            self.metrics
466
                .fast_store_hit_count
467
                .fetch_add(1, Ordering::Acquire);
468
            self.fast_store
469
                .get_part(key, writer.borrow_mut(), offset, length)
470
                .await?;
471
            self.metrics
472
                .fast_store_downloaded_bytes
473
                .fetch_add(writer.get_bytes_written(), Ordering::Acquire);
474
            return Ok(());
475
        }
476
477
        let loader = self.get_loader(key.borrow());
478
        let mut writer = Some(writer);
479
        loader
480
8
            .get_or_try_init(|| {
481
8
                writer
482
8
                    .take()
483
8
                    .map(|writer| {
484
8
                        self.populate_and_maybe_stream(key.borrow(), Some(writer), offset, length)
485
8
                    })
486
8
                    .expect("writer somehow became None")
487
8
            })
488
            .await?;
489
        drop(loader);
490
491
        // If we didn't stream then re-enter which will stream from the fast
492
        // store, or retry the download.  We should not get in a loop here
493
        // because OnceCell has the good sense to retry for all callers so in
494
        // order to get here the fast store will have been populated.  There's
495
        // an outside chance it was evicted, but that's slim.
496
        if let Some(writer) = writer.take() {
497
            self.get_part(key, writer, offset, length).await
498
        } else {
499
            // This was the thread that did the streaming already, lucky duck.
500
            Ok(())
501
        }
502
66
    }
503
504
2
    fn inner_store(&self, _key: Option<StoreKey>) -> &dyn StoreDriver {
505
2
        self
506
2
    }
507
508
2
    fn as_any<'a>(&'a self) -> &'a (dyn core::any::Any + Sync + Send + 'static) {
509
2
        self
510
2
    }
511
512
0
    fn as_any_arc(self: Arc<Self>) -> Arc<dyn core::any::Any + Sync + Send + 'static> {
513
0
        self
514
0
    }
515
516
0
    fn register_remove_callback(
517
0
        self: Arc<Self>,
518
0
        callback: Arc<dyn RemoveItemCallback>,
519
0
    ) -> Result<(), Error> {
520
0
        self.fast_store.register_remove_callback(callback.clone())?;
521
0
        self.slow_store.register_remove_callback(callback)?;
522
0
        Ok(())
523
0
    }
524
}
525
526
#[derive(Debug, Default, MetricsComponent)]
527
struct FastSlowStoreMetrics {
528
    #[metric(help = "Hit count for the fast store")]
529
    fast_store_hit_count: AtomicU64,
530
    #[metric(help = "Downloaded bytes from the fast store")]
531
    fast_store_downloaded_bytes: AtomicU64,
532
    #[metric(help = "Hit count for the slow store")]
533
    slow_store_hit_count: AtomicU64,
534
    #[metric(help = "Downloaded bytes from the slow store")]
535
    slow_store_downloaded_bytes: AtomicU64,
536
}
537
538
default_health_status_indicator!(FastSlowStore);