/build/source/nativelink-store/src/filesystem_store.rs
Line | Count | Source |
1 | | // Copyright 2024 The NativeLink Authors. All rights reserved. |
2 | | // |
3 | | // Licensed under the Functional Source License, Version 1.1, Apache 2.0 Future License (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // See LICENSE file for details |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | use core::fmt::{Debug, Formatter}; |
16 | | use core::pin::Pin; |
17 | | use core::sync::atomic::{AtomicU64, Ordering}; |
18 | | use std::borrow::Cow; |
19 | | use std::ffi::{OsStr, OsString}; |
20 | | use std::sync::{Arc, Weak}; |
21 | | use std::time::SystemTime; |
22 | | |
23 | | use async_lock::RwLock; |
24 | | use async_trait::async_trait; |
25 | | use bytes::{Bytes, BytesMut}; |
26 | | use futures::stream::{StreamExt, TryStreamExt}; |
27 | | use futures::{Future, TryFutureExt}; |
28 | | use nativelink_config::stores::FilesystemSpec; |
29 | | use nativelink_error::{Code, Error, ResultExt, make_err, make_input_err}; |
30 | | use nativelink_metric::MetricsComponent; |
31 | | use nativelink_util::background_spawn; |
32 | | use nativelink_util::buf_channel::{ |
33 | | DropCloserReadHalf, DropCloserWriteHalf, make_buf_channel_pair, |
34 | | }; |
35 | | use nativelink_util::common::{DigestInfo, fs}; |
36 | | use nativelink_util::evicting_map::{EvictingMap, LenEntry}; |
37 | | use nativelink_util::health_utils::{HealthRegistryBuilder, HealthStatus, HealthStatusIndicator}; |
38 | | use nativelink_util::store_trait::{ |
39 | | RemoveItemCallback, StoreDriver, StoreKey, StoreKeyBorrow, StoreOptimizations, UploadSizeInfo, |
40 | | }; |
41 | | use tokio::io::{AsyncReadExt, AsyncWriteExt, Take}; |
42 | | use tokio::sync::Semaphore; |
43 | | use tokio_stream::wrappers::ReadDirStream; |
44 | | use tracing::{debug, error, info, trace, warn}; |
45 | | |
46 | | use crate::callback_utils::RemoveItemCallbackHolder; |
47 | | use crate::cas_utils::is_zero_digest; |
48 | | |
49 | | // Default size to allocate memory of the buffer when reading files. |
50 | | const DEFAULT_BUFF_SIZE: usize = 32 * 1024; |
51 | | // Default block size of all major filesystems is 4KB |
52 | | const DEFAULT_BLOCK_SIZE: u64 = 4 * 1024; |
53 | | |
54 | | pub const STR_FOLDER: &str = "s"; |
55 | | pub const DIGEST_FOLDER: &str = "d"; |
56 | | |
57 | | #[derive(Clone, Copy, Debug)] |
58 | | pub enum FileType { |
59 | | Digest, |
60 | | String, |
61 | | } |
62 | | |
63 | | #[derive(Debug, MetricsComponent)] |
64 | | pub struct SharedContext { |
65 | | // Used in testing to know how many active drop() spawns are running. |
66 | | // TODO(palfrey) It is probably a good idea to use a spin lock during |
67 | | // destruction of the store to ensure that all files are actually |
68 | | // deleted (similar to how it is done in tests). |
69 | | #[metric(help = "Number of active drop spawns")] |
70 | | pub active_drop_spawns: AtomicU64, |
71 | | #[metric(help = "Path to the configured temp path")] |
72 | | temp_path: String, |
73 | | #[metric(help = "Path to the configured content path")] |
74 | | content_path: String, |
75 | | } |
76 | | |
77 | | #[derive(Eq, PartialEq, Debug)] |
78 | | enum PathType { |
79 | | Content, |
80 | | Temp, |
81 | | Custom(OsString), |
82 | | } |
83 | | |
84 | | /// [`EncodedFilePath`] stores the path to the file |
85 | | /// including the context, path type and key to the file. |
86 | | /// The whole [`StoreKey`] is stored as opposed to solely |
87 | | /// the [`DigestInfo`] so that it is more usable for things |
88 | | /// such as BEP -see Issue #1108 |
89 | | #[derive(Debug)] |
90 | | pub struct EncodedFilePath { |
91 | | shared_context: Arc<SharedContext>, |
92 | | path_type: PathType, |
93 | | key: StoreKey<'static>, |
94 | | } |
95 | | |
96 | | impl EncodedFilePath { |
97 | | #[inline] |
98 | 250 | fn get_file_path(&self) -> Cow<'_, OsStr> { |
99 | 250 | get_file_path_raw(&self.path_type, self.shared_context.as_ref(), &self.key) |
100 | 250 | } |
101 | | } |
102 | | |
103 | | #[inline] |
104 | 341 | fn get_file_path_raw<'a>( |
105 | 341 | path_type: &'a PathType, |
106 | 341 | shared_context: &SharedContext, |
107 | 341 | key: &StoreKey<'a>, |
108 | 341 | ) -> Cow<'a, OsStr> { |
109 | 341 | let folder334 = match path_type { |
110 | 155 | PathType::Content => &shared_context.content_path, |
111 | 179 | PathType::Temp => &shared_context.temp_path, |
112 | 7 | PathType::Custom(path) => return Cow::Borrowed(path), |
113 | | }; |
114 | 334 | Cow::Owned(to_full_path_from_key(folder, key)) |
115 | 341 | } |
116 | | |
117 | | impl Drop for EncodedFilePath { |
118 | 94 | fn drop(&mut self) { |
119 | | // `drop()` can be called during shutdown, so we use `path_type` flag to know if the |
120 | | // file actually needs to be deleted. |
121 | 94 | if self.path_type == PathType::Content { Branch (121:12): [True: 85, False: 9]
Branch (121:12): [Folded - Ignored]
|
122 | 85 | return; |
123 | 9 | } |
124 | | |
125 | 9 | let file_path = self.get_file_path().to_os_string(); |
126 | 9 | let shared_context = self.shared_context.clone(); |
127 | | // .fetch_add returns previous value, so we add one to get approximate current value |
128 | 9 | let current_active_drop_spawns = shared_context |
129 | 9 | .active_drop_spawns |
130 | 9 | .fetch_add(1, Ordering::Relaxed) |
131 | 9 | + 1; |
132 | 9 | debug!( |
133 | | %current_active_drop_spawns, |
134 | | ?file_path, |
135 | 9 | "Spawned a filesystem_delete_file" |
136 | | ); |
137 | 9 | background_spawn!("filesystem_delete_file", async move {8 |
138 | 8 | let result5 = fs::remove_file(&file_path) |
139 | 8 | .await |
140 | 5 | .err_tip(|| format!("Failed to remove file {}"0 , file_path.display()0 )); |
141 | 5 | if let Err(err0 ) = result { Branch (141:20): [True: 0, False: 5]
Branch (141:20): [Folded - Ignored]
|
142 | 0 | error!(?file_path, ?err, "Failed to delete file",); |
143 | | } else { |
144 | 5 | debug!(?file_path, "File deleted",); |
145 | | } |
146 | | // .fetch_sub returns previous value, so we subtract one to get approximate current value |
147 | 5 | let current_active_drop_spawns = shared_context |
148 | 5 | .active_drop_spawns |
149 | 5 | .fetch_sub(1, Ordering::Relaxed) |
150 | 5 | - 1; |
151 | 5 | debug!( |
152 | | ?current_active_drop_spawns, |
153 | | ?file_path, |
154 | 5 | "Dropped a filesystem_delete_file" |
155 | | ); |
156 | 5 | }); |
157 | 94 | } |
158 | | } |
159 | | |
160 | | /// This creates the file path from the [`StoreKey`]. If |
161 | | /// it is a string, the string, prefixed with [`STR_PREFIX`] |
162 | | /// for backwards compatibility, is stored. |
163 | | /// |
164 | | /// If it is a [`DigestInfo`], it is prefixed by [`DIGEST_PREFIX`] |
165 | | /// followed by the string representation of a digest - the hash in hex, |
166 | | /// a hyphen then the size in bytes |
167 | | /// |
168 | | /// Previously, only the string representation of the [`DigestInfo`] was |
169 | | /// used with no prefix |
170 | | #[inline] |
171 | 343 | fn to_full_path_from_key(folder: &str, key: &StoreKey<'_>) -> OsString { |
172 | 343 | match key { |
173 | 3 | StoreKey::Str(str) => format!("{folder}/{STR_FOLDER}/{str}"), |
174 | 340 | StoreKey::Digest(digest_info) => format!("{folder}/{DIGEST_FOLDER}/{digest_info}"), |
175 | | } |
176 | 343 | .into() |
177 | 343 | } |
178 | | |
179 | | pub trait FileEntry: LenEntry + Send + Sync + Debug + 'static { |
180 | | /// Responsible for creating the underlying `FileEntry`. |
181 | | fn create(data_size: u64, block_size: u64, encoded_file_path: RwLock<EncodedFilePath>) -> Self; |
182 | | |
183 | | /// Creates a (usually) temp file, opens it and returns the path to the temp file. |
184 | | fn make_and_open_file( |
185 | | block_size: u64, |
186 | | encoded_file_path: EncodedFilePath, |
187 | | ) -> impl Future<Output = Result<(Self, fs::FileSlot, OsString), Error>> + Send |
188 | | where |
189 | | Self: Sized; |
190 | | |
191 | | /// Returns the underlying reference to the size of the data in bytes |
192 | | fn data_size_mut(&mut self) -> &mut u64; |
193 | | |
194 | | /// Returns the actual size of the underlying file on the disk after accounting for filesystem block size. |
195 | | fn size_on_disk(&self) -> u64; |
196 | | |
197 | | /// Gets the underlying `EncodedfilePath`. |
198 | | fn get_encoded_file_path(&self) -> &RwLock<EncodedFilePath>; |
199 | | |
200 | | /// Returns a reader that will read part of the underlying file. |
201 | | fn read_file_part( |
202 | | &self, |
203 | | offset: u64, |
204 | | length: u64, |
205 | | ) -> impl Future<Output = Result<Take<fs::FileSlot>, Error>> + Send; |
206 | | |
207 | | /// This function is a safe way to extract the file name of the underlying file. To protect users from |
208 | | /// accidentally creating undefined behavior we encourage users to do the logic they need to do with |
209 | | /// the filename inside this function instead of extracting the filename and doing the logic outside. |
210 | | /// This is because the filename is not guaranteed to exist after this function returns, however inside |
211 | | /// the callback the file is always guaranteed to exist and immutable. |
212 | | /// DO NOT USE THIS FUNCTION TO EXTRACT THE FILENAME AND STORE IT FOR LATER USE. |
213 | | fn get_file_path_locked< |
214 | | T, |
215 | | Fut: Future<Output = Result<T, Error>> + Send, |
216 | | F: FnOnce(OsString) -> Fut + Send, |
217 | | >( |
218 | | &self, |
219 | | handler: F, |
220 | | ) -> impl Future<Output = Result<T, Error>> + Send; |
221 | | } |
222 | | |
223 | | pub struct FileEntryImpl { |
224 | | data_size: u64, |
225 | | block_size: u64, |
226 | | // We lock around this as it gets rewritten when we move between temp and content types |
227 | | encoded_file_path: RwLock<EncodedFilePath>, |
228 | | } |
229 | | |
230 | | impl FileEntryImpl { |
231 | 11 | pub fn get_shared_context_for_test(&mut self) -> Arc<SharedContext> { |
232 | 11 | self.encoded_file_path.get_mut().shared_context.clone() |
233 | 11 | } |
234 | | } |
235 | | |
236 | | impl FileEntry for FileEntryImpl { |
237 | 94 | fn create(data_size: u64, block_size: u64, encoded_file_path: RwLock<EncodedFilePath>) -> Self { |
238 | 94 | Self { |
239 | 94 | data_size, |
240 | 94 | block_size, |
241 | 94 | encoded_file_path, |
242 | 94 | } |
243 | 94 | } |
244 | | |
245 | | /// This encapsulates the logic for the edge case of if the file fails to create |
246 | | /// the cleanup of the file is handled without creating a `FileEntry`, which would |
247 | | /// try to cleanup the file as well during `drop()`. |
248 | 85 | async fn make_and_open_file( |
249 | 85 | block_size: u64, |
250 | 85 | encoded_file_path: EncodedFilePath, |
251 | 85 | ) -> Result<(Self, fs::FileSlot, OsString), Error> { |
252 | 85 | let temp_full_path = encoded_file_path.get_file_path().to_os_string(); |
253 | 85 | let temp_file_result = fs::create_file(temp_full_path.clone()) |
254 | 85 | .or_else(|mut err| async {0 |
255 | 0 | let remove_result = fs::remove_file(&temp_full_path).await.err_tip(|| { |
256 | 0 | format!( |
257 | 0 | "Failed to remove file {} in filesystem store", |
258 | 0 | temp_full_path.display() |
259 | | ) |
260 | 0 | }); |
261 | 0 | if let Err(remove_err) = remove_result { Branch (261:24): [True: 0, False: 0]
Branch (261:24): [True: 0, False: 0]
Branch (261:24): [True: 0, False: 0]
Branch (261:24): [True: 0, False: 0]
Branch (261:24): [Folded - Ignored]
Branch (261:24): [True: 0, False: 0]
|
262 | 0 | err = err.merge(remove_err); |
263 | 0 | } |
264 | 0 | warn!(?err, ?block_size, ?temp_full_path, "Failed to create file",); |
265 | 0 | Err(err).err_tip(|| { |
266 | 0 | format!( |
267 | 0 | "Failed to create {} in filesystem store", |
268 | 0 | temp_full_path.display() |
269 | | ) |
270 | 0 | }) |
271 | 0 | }) |
272 | 85 | .await?0 ; |
273 | | |
274 | 85 | Ok(( |
275 | 85 | <Self as FileEntry>::create( |
276 | 85 | 0, /* Unknown yet, we will fill it in later */ |
277 | 85 | block_size, |
278 | 85 | RwLock::new(encoded_file_path), |
279 | 85 | ), |
280 | 85 | temp_file_result, |
281 | 85 | temp_full_path, |
282 | 85 | )) |
283 | 85 | } |
284 | | |
285 | 85 | fn data_size_mut(&mut self) -> &mut u64 { |
286 | 85 | &mut self.data_size |
287 | 85 | } |
288 | | |
289 | 159 | fn size_on_disk(&self) -> u64 { |
290 | 159 | self.data_size.div_ceil(self.block_size) * self.block_size |
291 | 159 | } |
292 | | |
293 | 148 | fn get_encoded_file_path(&self) -> &RwLock<EncodedFilePath> { |
294 | 148 | &self.encoded_file_path |
295 | 148 | } |
296 | | |
297 | 50 | fn read_file_part( |
298 | 50 | &self, |
299 | 50 | offset: u64, |
300 | 50 | length: u64, |
301 | 50 | ) -> impl Future<Output = Result<Take<fs::FileSlot>, Error>> + Send { |
302 | 50 | self.get_file_path_locked(move |full_content_path| async move { |
303 | 50 | let file48 = fs::open_file(&full_content_path, offset, length) |
304 | 50 | .await |
305 | 50 | .err_tip(|| {2 |
306 | 2 | format!( |
307 | 2 | "Failed to open file in filesystem store {}", |
308 | 2 | full_content_path.display() |
309 | | ) |
310 | 2 | })?; |
311 | 48 | Ok(file) |
312 | 100 | }) |
313 | 50 | } |
314 | | |
315 | 56 | async fn get_file_path_locked< |
316 | 56 | T, |
317 | 56 | Fut: Future<Output = Result<T, Error>> + Send, |
318 | 56 | F: FnOnce(OsString) -> Fut + Send, |
319 | 56 | >( |
320 | 56 | &self, |
321 | 56 | handler: F, |
322 | 56 | ) -> Result<T, Error> { |
323 | 56 | let encoded_file_path = self.get_encoded_file_path().read().await; |
324 | 56 | handler(encoded_file_path.get_file_path().to_os_string()).await |
325 | 56 | } |
326 | | } |
327 | | |
328 | | impl Debug for FileEntryImpl { |
329 | 0 | fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), core::fmt::Error> { |
330 | 0 | f.debug_struct("FileEntryImpl") |
331 | 0 | .field("data_size", &self.data_size) |
332 | 0 | .field("encoded_file_path", &"<behind mutex>") |
333 | 0 | .finish() |
334 | 0 | } |
335 | | } |
336 | | |
337 | 94 | fn make_temp_digest(mut digest: DigestInfo) -> DigestInfo { |
338 | | static DELETE_FILE_COUNTER: AtomicU64 = AtomicU64::new(0); |
339 | 94 | let mut hash = *digest.packed_hash(); |
340 | 94 | hash[24..].clone_from_slice( |
341 | 94 | &DELETE_FILE_COUNTER |
342 | 94 | .fetch_add(1, Ordering::Relaxed) |
343 | 94 | .to_le_bytes(), |
344 | | ); |
345 | 94 | digest.set_packed_hash(*hash); |
346 | 94 | digest |
347 | 94 | } |
348 | | |
349 | 94 | fn make_temp_key(key: &StoreKey) -> StoreKey<'static> { |
350 | 94 | StoreKey::Digest(make_temp_digest(key.borrow().into_digest())) |
351 | 94 | } |
352 | | |
353 | | impl LenEntry for FileEntryImpl { |
354 | | #[inline] |
355 | 157 | fn len(&self) -> u64 { |
356 | 157 | self.size_on_disk() |
357 | 157 | } |
358 | | |
359 | 1 | fn is_empty(&self) -> bool { |
360 | 1 | self.data_size == 0 |
361 | 1 | } |
362 | | |
363 | | // unref() only triggers when an item is removed from the eviction_map. It is possible |
364 | | // that another place in code has a reference to `FileEntryImpl` and may later read the |
365 | | // file. To support this edge case, we first move the file to a temp file and point |
366 | | // target file location to the new temp file. `unref()` should only ever be called once. |
367 | | #[inline] |
368 | 11 | async fn unref(&self) { |
369 | 11 | let mut encoded_file_path = self.encoded_file_path.write().await; |
370 | 11 | if encoded_file_path.path_type == PathType::Temp { Branch (370:12): [True: 0, False: 0]
Branch (370:12): [True: 2, False: 9]
Branch (370:12): [True: 0, False: 0]
Branch (370:12): [True: 0, False: 0]
Branch (370:12): [True: 0, False: 0]
Branch (370:12): [Folded - Ignored]
Branch (370:12): [True: 0, False: 0]
|
371 | | // We are already a temp file that is now marked for deletion on drop. |
372 | | // This is very rare, but most likely the rename into the content path failed. |
373 | 2 | warn!( |
374 | 2 | key = ?encoded_file_path.key, |
375 | 2 | "File is already a temp file", |
376 | | ); |
377 | 2 | return; |
378 | 9 | } |
379 | 9 | let from_path = encoded_file_path.get_file_path(); |
380 | 9 | let new_key = make_temp_key(&encoded_file_path.key); |
381 | | |
382 | 9 | let to_path = to_full_path_from_key(&encoded_file_path.shared_context.temp_path, &new_key); |
383 | | |
384 | 9 | if let Err(err2 ) = fs::rename(&from_path, &to_path).await { Branch (384:16): [True: 0, False: 0]
Branch (384:16): [True: 2, False: 7]
Branch (384:16): [True: 0, False: 0]
Branch (384:16): [True: 0, False: 0]
Branch (384:16): [True: 0, False: 0]
Branch (384:16): [Folded - Ignored]
Branch (384:16): [True: 0, False: 0]
|
385 | 2 | warn!( |
386 | 2 | key = ?encoded_file_path.key, |
387 | | ?from_path, |
388 | | ?to_path, |
389 | | ?err, |
390 | 2 | "Failed to rename file", |
391 | | ); |
392 | | } else { |
393 | 7 | debug!( |
394 | 7 | key = ?encoded_file_path.key, |
395 | | ?from_path, |
396 | | ?to_path, |
397 | 7 | "Renamed file (unref)", |
398 | | ); |
399 | 7 | encoded_file_path.path_type = PathType::Temp; |
400 | 7 | encoded_file_path.key = new_key; |
401 | | } |
402 | 11 | } |
403 | | } |
404 | | |
405 | | #[inline] |
406 | 2 | fn digest_from_filename(file_name: &str) -> Result<DigestInfo, Error> { |
407 | 2 | let (hash, size) = file_name.split_once('-').err_tip(|| "")?0 ; |
408 | 2 | let size = size.parse::<i64>()?0 ; |
409 | 2 | DigestInfo::try_new(hash, size) |
410 | 2 | } |
411 | | |
412 | 2 | pub fn key_from_file(file_name: &str, file_type: FileType) -> Result<StoreKey<'_>, Error> { |
413 | 2 | match file_type { |
414 | 0 | FileType::String => Ok(StoreKey::new_str(file_name)), |
415 | 2 | FileType::Digest => digest_from_filename(file_name).map(StoreKey::Digest), |
416 | | } |
417 | 2 | } |
418 | | |
419 | | /// The number of files to read the metadata for at the same time when running |
420 | | /// `add_files_to_cache`. |
421 | | const SIMULTANEOUS_METADATA_READS: usize = 200; |
422 | | |
423 | | type FsEvictingMap<'a, Fe> = |
424 | | EvictingMap<StoreKeyBorrow, StoreKey<'a>, Arc<Fe>, SystemTime, RemoveItemCallbackHolder>; |
425 | | |
426 | 52 | async fn add_files_to_cache<Fe: FileEntry>( |
427 | 52 | evicting_map: &FsEvictingMap<'_, Fe>, |
428 | 52 | anchor_time: &SystemTime, |
429 | 52 | shared_context: &Arc<SharedContext>, |
430 | 52 | block_size: u64, |
431 | 52 | rename_fn: fn(&OsStr, &OsStr) -> Result<(), std::io::Error>, |
432 | 52 | ) -> Result<(), Error> { |
433 | | #[expect(clippy::too_many_arguments)] |
434 | 1 | async fn process_entry<Fe: FileEntry>( |
435 | 1 | evicting_map: &FsEvictingMap<'_, Fe>, |
436 | 1 | file_name: &str, |
437 | 1 | file_type: FileType, |
438 | 1 | atime: SystemTime, |
439 | 1 | data_size: u64, |
440 | 1 | block_size: u64, |
441 | 1 | anchor_time: &SystemTime, |
442 | 1 | shared_context: &Arc<SharedContext>, |
443 | 1 | ) -> Result<(), Error> { |
444 | 1 | let key = key_from_file(file_name, file_type)?0 ; |
445 | | |
446 | 1 | let file_entry = Fe::create( |
447 | 1 | data_size, |
448 | 1 | block_size, |
449 | 1 | RwLock::new(EncodedFilePath { |
450 | 1 | shared_context: shared_context.clone(), |
451 | 1 | path_type: PathType::Content, |
452 | 1 | key: key.borrow().into_owned(), |
453 | 1 | }), |
454 | | ); |
455 | 1 | let time_since_anchor = anchor_time |
456 | 1 | .duration_since(atime) |
457 | 1 | .map_err(|_| make_input_err!("File access time newer than now"))?0 ; |
458 | 1 | evicting_map |
459 | 1 | .insert_with_time( |
460 | 1 | key.into_owned().into(), |
461 | 1 | Arc::new(file_entry), |
462 | 1 | i32::try_from(time_since_anchor.as_secs()).unwrap_or(i32::MAX), |
463 | 1 | ) |
464 | 1 | .await; |
465 | 1 | Ok(()) |
466 | 1 | } |
467 | | |
468 | 156 | async fn read_files( |
469 | 156 | folder: Option<&str>, |
470 | 156 | shared_context: &SharedContext, |
471 | 156 | ) -> Result<Vec<(String, SystemTime, u64, bool)>, Error> { |
472 | | // Note: In Dec 2024 this is for backwards compatibility with the old |
473 | | // way files were stored on disk. Previously all files were in a single |
474 | | // folder regardless of the StoreKey type. This allows old versions of |
475 | | // nativelink file layout to be upgraded at startup time. |
476 | | // This logic can be removed once more time has passed. |
477 | 156 | let read_dir = folder.map_or_else( |
478 | 52 | || format!("{}/", shared_context.content_path), |
479 | 104 | |folder| format!("{}/{folder}/", shared_context.content_path), |
480 | | ); |
481 | | |
482 | 156 | let (_permit, dir_handle) = fs::read_dir(read_dir) |
483 | 156 | .await |
484 | 156 | .err_tip(|| "Failed opening content directory for iterating in filesystem store")?0 |
485 | 156 | .into_inner(); |
486 | | |
487 | 156 | let read_dir_stream = ReadDirStream::new(dir_handle); |
488 | 156 | read_dir_stream |
489 | 156 | .map(|dir_entry| async move {105 |
490 | 105 | let dir_entry = dir_entry.unwrap(); |
491 | 105 | let file_name = dir_entry.file_name().into_string().unwrap(); |
492 | 105 | let metadata = dir_entry |
493 | 105 | .metadata() |
494 | 105 | .await |
495 | 105 | .err_tip(|| "Failed to get metadata in filesystem store")?0 ; |
496 | | // We need to filter out folders - we do not want to try to cache the s and d folders. |
497 | 105 | let is_file = |
498 | 105 | metadata.is_file() || !(file_name == STR_FOLDER104 || file_name == DIGEST_FOLDER52 ); Branch (498:21): [True: 0, False: 0]
Branch (498:45): [True: 0, False: 0]
Branch (498:21): [True: 1, False: 44]
Branch (498:45): [True: 22, False: 22]
Branch (498:21): [True: 0, False: 4]
Branch (498:45): [True: 2, False: 2]
Branch (498:21): [True: 0, False: 6]
Branch (498:45): [True: 3, False: 3]
Branch (498:21): [Folded - Ignored]
Branch (498:45): [Folded - Ignored]
Branch (498:21): [True: 0, False: 50]
Branch (498:45): [True: 25, False: 25]
|
499 | | // Using access time is not perfect, but better than random. We do not update the |
500 | | // atime when a file is actually "touched", we rely on whatever the filesystem does |
501 | | // when we read the file (usually update on read). |
502 | 105 | let atime = metadata |
503 | 105 | .accessed() |
504 | 105 | .or_else(|_| metadata0 .modified0 ()) |
505 | 105 | .unwrap_or(SystemTime::UNIX_EPOCH); |
506 | 105 | Result::<(String, SystemTime, u64, bool), Error>::Ok(( |
507 | 105 | file_name, |
508 | 105 | atime, |
509 | 105 | metadata.len(), |
510 | 105 | is_file, |
511 | 105 | )) |
512 | 210 | }) |
513 | 156 | .buffer_unordered(SIMULTANEOUS_METADATA_READS) |
514 | 156 | .try_collect() |
515 | 156 | .await |
516 | 156 | } |
517 | | |
518 | | /// Note: In Dec 2024 this is for backwards compatibility with the old |
519 | | /// way files were stored on disk. Previously all files were in a single |
520 | | /// folder regardless of the [`StoreKey`] type. This moves files from the old cache |
521 | | /// location to the new cache location, under [`DIGEST_FOLDER`]. |
522 | 52 | async fn move_old_cache( |
523 | 52 | shared_context: &Arc<SharedContext>, |
524 | 52 | rename_fn: fn(&OsStr, &OsStr) -> Result<(), std::io::Error>, |
525 | 52 | ) -> Result<(), Error> { |
526 | 52 | let file_infos = read_files(None, shared_context).await?0 ; |
527 | | |
528 | 52 | let from_path = shared_context.content_path.to_string(); |
529 | | |
530 | 52 | let to_path = format!("{}/{DIGEST_FOLDER}", shared_context.content_path); |
531 | | |
532 | 52 | for (file_name0 , _, _, _) in file_infos.into_iter().filter(|x| x.3) { |
533 | 0 | let from_file: OsString = format!("{from_path}/{file_name}").into(); |
534 | 0 | let to_file: OsString = format!("{to_path}/{file_name}").into(); |
535 | | |
536 | 0 | if let Err(err) = rename_fn(&from_file, &to_file) { Branch (536:20): [True: 0, False: 0]
Branch (536:20): [True: 0, False: 0]
Branch (536:20): [True: 0, False: 0]
Branch (536:20): [True: 0, False: 0]
Branch (536:20): [Folded - Ignored]
Branch (536:20): [True: 0, False: 0]
|
537 | 0 | warn!(?from_file, ?to_file, ?err, "Failed to rename file",); |
538 | | } else { |
539 | 0 | debug!(?from_file, ?to_file, "Renamed file (old cache)",); |
540 | | } |
541 | | } |
542 | 52 | Ok(()) |
543 | 52 | } |
544 | | |
545 | 104 | async fn add_files_to_cache<Fe: FileEntry>( |
546 | 104 | evicting_map: &FsEvictingMap<'_, Fe>, |
547 | 104 | anchor_time: &SystemTime, |
548 | 104 | shared_context: &Arc<SharedContext>, |
549 | 104 | block_size: u64, |
550 | 104 | folder: &str, |
551 | 104 | ) -> Result<(), Error> { |
552 | 104 | let file_infos = read_files(Some(folder), shared_context).await?0 ; |
553 | 104 | let file_type = match folder { |
554 | 104 | STR_FOLDER => FileType::String52 , |
555 | 52 | DIGEST_FOLDER => FileType::Digest, |
556 | 0 | _ => panic!("Invalid folder type"), |
557 | | }; |
558 | | |
559 | 104 | let path_root = format!("{}/{folder}", shared_context.content_path); |
560 | | |
561 | 104 | for (file_name1 , atime1 , data_size1 , _) in file_infos.into_iter().filter(|x| x.3) { |
562 | 1 | let result = process_entry( |
563 | 1 | evicting_map, |
564 | 1 | &file_name, |
565 | 1 | file_type, |
566 | 1 | atime, |
567 | 1 | data_size, |
568 | 1 | block_size, |
569 | 1 | anchor_time, |
570 | 1 | shared_context, |
571 | 1 | ) |
572 | 1 | .await; |
573 | 1 | if let Err(err0 ) = result { Branch (573:20): [True: 0, False: 0]
Branch (573:20): [True: 0, False: 0]
Branch (573:20): [True: 0, False: 0]
Branch (573:20): [True: 0, False: 0]
Branch (573:20): [True: 0, False: 0]
Branch (573:20): [True: 0, False: 0]
Branch (573:20): [True: 0, False: 0]
Branch (573:20): [True: 0, False: 1]
Branch (573:20): [True: 0, False: 0]
Branch (573:20): [True: 0, False: 0]
Branch (573:20): [Folded - Ignored]
Branch (573:20): [True: 0, False: 0]
|
574 | 0 | warn!(?file_name, ?err, "Failed to add file to eviction cache",); |
575 | | // Ignore result. |
576 | 0 | drop(fs::remove_file(format!("{path_root}/{file_name}")).await); |
577 | 1 | } |
578 | | } |
579 | 104 | Ok(()) |
580 | 104 | } |
581 | | |
582 | 52 | move_old_cache(shared_context, rename_fn).await?0 ; |
583 | | |
584 | 52 | add_files_to_cache( |
585 | 52 | evicting_map, |
586 | 52 | anchor_time, |
587 | 52 | shared_context, |
588 | 52 | block_size, |
589 | 52 | DIGEST_FOLDER, |
590 | 52 | ) |
591 | 52 | .await?0 ; |
592 | | |
593 | 52 | add_files_to_cache( |
594 | 52 | evicting_map, |
595 | 52 | anchor_time, |
596 | 52 | shared_context, |
597 | 52 | block_size, |
598 | 52 | STR_FOLDER, |
599 | 52 | ) |
600 | 52 | .await?0 ; |
601 | 52 | Ok(()) |
602 | 52 | } |
603 | | |
604 | 52 | async fn prune_temp_path(temp_path: &str) -> Result<(), Error> { |
605 | 104 | async fn prune_temp_inner(temp_path: &str, subpath: &str) -> Result<(), Error> { |
606 | 104 | let (_permit, dir_handle) = fs::read_dir(format!("{temp_path}/{subpath}")) |
607 | 104 | .await |
608 | 104 | .err_tip( |
609 | | || "Failed opening temp directory to prune partial downloads in filesystem store", |
610 | 0 | )? |
611 | 104 | .into_inner(); |
612 | | |
613 | 104 | let mut read_dir_stream = ReadDirStream::new(dir_handle); |
614 | 104 | while let Some(dir_entry0 ) = read_dir_stream.next().await { Branch (614:19): [True: 0, False: 0]
Branch (614:19): [True: 0, False: 44]
Branch (614:19): [True: 0, False: 4]
Branch (614:19): [True: 0, False: 6]
Branch (614:19): [Folded - Ignored]
Branch (614:19): [True: 0, False: 50]
|
615 | 0 | let path = dir_entry?.path(); |
616 | 0 | if let Err(err) = fs::remove_file(&path).await { Branch (616:20): [True: 0, False: 0]
Branch (616:20): [True: 0, False: 0]
Branch (616:20): [True: 0, False: 0]
Branch (616:20): [True: 0, False: 0]
Branch (616:20): [Folded - Ignored]
Branch (616:20): [True: 0, False: 0]
|
617 | 0 | warn!(?path, ?err, "Failed to delete file",); |
618 | 0 | } |
619 | | } |
620 | 104 | Ok(()) |
621 | 104 | } |
622 | | |
623 | 52 | prune_temp_inner(temp_path, STR_FOLDER).await?0 ; |
624 | 52 | prune_temp_inner(temp_path, DIGEST_FOLDER).await?0 ; |
625 | 52 | Ok(()) |
626 | 52 | } |
627 | | |
628 | | #[derive(Debug, MetricsComponent)] |
629 | | pub struct FilesystemStore<Fe: FileEntry = FileEntryImpl> { |
630 | | #[metric] |
631 | | shared_context: Arc<SharedContext>, |
632 | | #[metric(group = "evicting_map")] |
633 | | evicting_map: Arc<FsEvictingMap<'static, Fe>>, |
634 | | #[metric(help = "Block size of the configured filesystem")] |
635 | | block_size: u64, |
636 | | #[metric(help = "Size of the configured read buffer size")] |
637 | | read_buffer_size: usize, |
638 | | weak_self: Weak<Self>, |
639 | | rename_fn: fn(&OsStr, &OsStr) -> Result<(), std::io::Error>, |
640 | | /// Limits concurrent write operations to prevent disk I/O saturation. |
641 | | write_semaphore: Option<Semaphore>, |
642 | | } |
643 | | |
644 | | impl<Fe: FileEntry> FilesystemStore<Fe> { |
645 | 40 | pub async fn new(spec: &FilesystemSpec) -> Result<Arc<Self>, Error> { |
646 | 81 | Self::new_with_timeout_and_rename_fn40 (spec40 , |from, to| std::fs::rename(from, to)).await40 |
647 | 40 | } |
648 | | |
649 | 52 | pub async fn new_with_timeout_and_rename_fn( |
650 | 52 | spec: &FilesystemSpec, |
651 | 52 | rename_fn: fn(&OsStr, &OsStr) -> Result<(), std::io::Error>, |
652 | 52 | ) -> Result<Arc<Self>, Error> { |
653 | 104 | async fn create_subdirs(path: &str) -> Result<(), Error> { |
654 | 104 | fs::create_dir_all(format!("{path}/{STR_FOLDER}")) |
655 | 104 | .await |
656 | 104 | .err_tip(|| format!("Failed to create directory {path}/{STR_FOLDER}"0 ))?0 ; |
657 | 104 | fs::create_dir_all(format!("{path}/{DIGEST_FOLDER}")) |
658 | 104 | .await |
659 | 104 | .err_tip(|| format!("Failed to create directory {path}/{DIGEST_FOLDER}"0 )) |
660 | 104 | } |
661 | | |
662 | 52 | let now = SystemTime::now(); |
663 | | |
664 | 52 | let empty_policy = nativelink_config::stores::EvictionPolicy::default(); |
665 | 52 | let eviction_policy = spec.eviction_policy.as_ref().unwrap_or(&empty_policy); |
666 | 52 | let evicting_map = Arc::new(EvictingMap::new(eviction_policy, now)); |
667 | | |
668 | | // Create temp and content directories and the s and d subdirectories. |
669 | | |
670 | 52 | create_subdirs(&spec.temp_path).await?0 ; |
671 | 52 | create_subdirs(&spec.content_path).await?0 ; |
672 | | |
673 | 52 | let shared_context = Arc::new(SharedContext { |
674 | 52 | active_drop_spawns: AtomicU64::new(0), |
675 | 52 | temp_path: spec.temp_path.clone(), |
676 | 52 | content_path: spec.content_path.clone(), |
677 | 52 | }); |
678 | | |
679 | 52 | let block_size = if spec.block_size == 0 { Branch (679:29): [True: 0, False: 0]
Branch (679:29): [True: 1, False: 0]
Branch (679:29): [True: 0, False: 1]
Branch (679:29): [True: 0, False: 1]
Branch (679:29): [True: 1, False: 0]
Branch (679:29): [True: 0, False: 1]
Branch (679:29): [True: 1, False: 0]
Branch (679:29): [True: 15, False: 1]
Branch (679:29): [True: 2, False: 0]
Branch (679:29): [True: 3, False: 0]
Branch (679:29): [Folded - Ignored]
Branch (679:29): [True: 25, False: 0]
|
680 | 48 | DEFAULT_BLOCK_SIZE |
681 | | } else { |
682 | 4 | spec.block_size |
683 | | }; |
684 | 52 | add_files_to_cache( |
685 | 52 | evicting_map.as_ref(), |
686 | 52 | &now, |
687 | 52 | &shared_context, |
688 | 52 | block_size, |
689 | 52 | rename_fn, |
690 | 52 | ) |
691 | 52 | .await?0 ; |
692 | 52 | prune_temp_path(&shared_context.temp_path).await?0 ; |
693 | | |
694 | 52 | let read_buffer_size = if spec.read_buffer_size == 0 { Branch (694:35): [True: 0, False: 0]
Branch (694:35): [True: 0, False: 1]
Branch (694:35): [True: 1, False: 0]
Branch (694:35): [True: 0, False: 1]
Branch (694:35): [True: 1, False: 0]
Branch (694:35): [True: 0, False: 1]
Branch (694:35): [True: 1, False: 0]
Branch (694:35): [True: 5, False: 11]
Branch (694:35): [True: 2, False: 0]
Branch (694:35): [True: 3, False: 0]
Branch (694:35): [Folded - Ignored]
Branch (694:35): [True: 25, False: 0]
|
695 | 38 | DEFAULT_BUFF_SIZE |
696 | | } else { |
697 | 14 | spec.read_buffer_size as usize |
698 | | }; |
699 | 52 | let write_semaphore = if spec.max_concurrent_writes > 0 { Branch (699:34): [True: 0, False: 0]
Branch (699:34): [True: 0, False: 1]
Branch (699:34): [True: 0, False: 1]
Branch (699:34): [True: 0, False: 1]
Branch (699:34): [True: 0, False: 1]
Branch (699:34): [True: 0, False: 1]
Branch (699:34): [True: 0, False: 1]
Branch (699:34): [True: 0, False: 16]
Branch (699:34): [True: 0, False: 2]
Branch (699:34): [True: 0, False: 3]
Branch (699:34): [Folded - Ignored]
Branch (699:34): [True: 0, False: 25]
|
700 | 0 | Some(Semaphore::new(spec.max_concurrent_writes)) |
701 | | } else { |
702 | 52 | None |
703 | | }; |
704 | 52 | Ok(Arc::new_cyclic(|weak_self| Self { |
705 | 52 | shared_context, |
706 | 52 | evicting_map, |
707 | 52 | block_size, |
708 | 52 | read_buffer_size, |
709 | 52 | weak_self: weak_self.clone(), |
710 | 52 | rename_fn, |
711 | 52 | write_semaphore, |
712 | 52 | })) |
713 | 52 | } |
714 | | |
715 | 26 | pub fn get_arc(&self) -> Option<Arc<Self>> { |
716 | 26 | self.weak_self.upgrade() |
717 | 26 | } |
718 | | |
719 | 10 | pub async fn get_file_entry_for_digest(&self, digest: &DigestInfo) -> Result<Arc<Fe>, Error> { |
720 | 10 | if is_zero_digest(digest) { Branch (720:12): [Folded - Ignored]
Branch (720:12): [True: 1, False: 5]
Branch (720:12): [True: 0, False: 4]
Branch (720:12): [Folded - Ignored]
|
721 | 1 | return Ok(Arc::new(Fe::create( |
722 | 1 | 0, |
723 | 1 | 0, |
724 | 1 | RwLock::new(EncodedFilePath { |
725 | 1 | shared_context: self.shared_context.clone(), |
726 | 1 | path_type: PathType::Content, |
727 | 1 | key: digest.into(), |
728 | 1 | }), |
729 | 1 | ))); |
730 | 9 | } |
731 | 9 | self.evicting_map |
732 | 9 | .get(&digest.into()) |
733 | 9 | .await |
734 | 9 | .ok_or_else(|| make_err!(Code::NotFound0 , "{digest} not found in filesystem store. This may indicate the file was evicted due to cache pressure. Consider increasing 'max_bytes' in your filesystem store's eviction_policy configuration.")) |
735 | 10 | } |
736 | | |
737 | 63 | async fn update_file( |
738 | 63 | self: Pin<&Self>, |
739 | 63 | mut entry: Fe, |
740 | 63 | mut temp_file: fs::FileSlot, |
741 | 63 | final_key: StoreKey<'static>, |
742 | 63 | mut reader: DropCloserReadHalf, |
743 | 63 | ) -> Result<(), Error> { |
744 | 63 | let mut data_size = 0; |
745 | | loop { |
746 | 131 | let mut data = reader |
747 | 131 | .recv() |
748 | 131 | .await |
749 | 131 | .err_tip(|| "Failed to receive data in filesystem store")?0 ; |
750 | 131 | let data_len = data.len(); |
751 | 131 | if data_len == 0 { Branch (751:16): [True: 0, False: 0]
Branch (751:16): [True: 2, False: 8]
Branch (751:16): [True: 0, False: 0]
Branch (751:16): [True: 0, False: 0]
Branch (751:16): [True: 0, False: 0]
Branch (751:16): [True: 0, False: 0]
Branch (751:16): [True: 1, False: 1]
Branch (751:16): [True: 0, False: 0]
Branch (751:16): [True: 0, False: 0]
Branch (751:16): [True: 0, False: 0]
Branch (751:16): [Folded - Ignored]
Branch (751:16): [True: 60, False: 59]
|
752 | 63 | break; // EOF. |
753 | 68 | } |
754 | 68 | temp_file |
755 | 68 | .write_all_buf(&mut data) |
756 | 68 | .await |
757 | 68 | .err_tip(|| "Failed to write data into filesystem store")?0 ; |
758 | 68 | data_size += data_len as u64; |
759 | | } |
760 | | |
761 | 63 | let permit = if let Some(sem0 ) = &self.write_semaphore { Branch (761:29): [True: 0, False: 0]
Branch (761:29): [True: 0, False: 2]
Branch (761:29): [True: 0, False: 0]
Branch (761:29): [True: 0, False: 0]
Branch (761:29): [True: 0, False: 0]
Branch (761:29): [True: 0, False: 0]
Branch (761:29): [True: 0, False: 1]
Branch (761:29): [True: 0, False: 0]
Branch (761:29): [True: 0, False: 0]
Branch (761:29): [True: 0, False: 0]
Branch (761:29): [Folded - Ignored]
Branch (761:29): [True: 0, False: 60]
|
762 | | Some( |
763 | 0 | sem.acquire() |
764 | 0 | .await |
765 | 0 | .map_err(|_| make_err!(Code::Internal, "Write semaphore closed"))?, |
766 | | ) |
767 | | } else { |
768 | 63 | None |
769 | | }; |
770 | | |
771 | 63 | temp_file |
772 | 63 | .as_ref() |
773 | 63 | .sync_all() |
774 | 63 | .await |
775 | 63 | .err_tip(|| "Failed to sync_data in filesystem store")?0 ; |
776 | | |
777 | 63 | drop(permit); |
778 | | |
779 | 63 | temp_file.advise_dontneed(); |
780 | 63 | trace!(?temp_file, "Dropping file to update_file"); |
781 | 63 | drop(temp_file); |
782 | | |
783 | 63 | *entry.data_size_mut() = data_size; |
784 | 63 | self.emplace_file(final_key, Arc::new(entry)).await |
785 | 63 | } |
786 | | |
787 | 92 | async fn emplace_file(&self, key: StoreKey<'static>, entry: Arc<Fe>) -> Result<(), Error> { |
788 | | // This sequence of events is quite tricky to understand due to the amount of triggers that |
789 | | // happen, async'ness of it and the locking. So here is a breakdown of what happens: |
790 | | // 1. Here will hold a write lock on any file operations of this FileEntry. |
791 | | // 2. Then insert the entry into the evicting map. This may trigger an eviction of other |
792 | | // entries. |
793 | | // 3. Eviction triggers `unref()`, which grabs a write lock on the evicted FileEntry |
794 | | // during the rename. |
795 | | // 4. It should be impossible for items to be added while eviction is happening, so there |
796 | | // should not be a deadlock possibility. However, it is possible for the new FileEntry |
797 | | // to be evicted before the file is moved into place. Eviction of the newly inserted |
798 | | // item is not possible within the `insert()` call because the write lock inside the |
799 | | // eviction map. If an eviction of new item happens after `insert()` but before |
800 | | // `rename()` then we get to finish our operation because the `unref()` of the new item |
801 | | // will be blocked on us because we currently have the lock. |
802 | | // 5. Move the file into place. Since we hold a write lock still anyone that gets our new |
803 | | // FileEntry (which has not yet been placed on disk) will not be able to read the file's |
804 | | // contents until we release the lock. |
805 | 92 | let evicting_map = self.evicting_map.clone(); |
806 | 92 | let rename_fn = self.rename_fn; |
807 | | |
808 | | // We need to guarantee that this will get to the end even if the parent future is dropped. |
809 | | // See: https://github.com/TraceMachina/nativelink/issues/495 |
810 | 92 | background_spawn!("filesystem_store_emplace_file", async move { |
811 | 92 | evicting_map |
812 | 92 | .insert(key.borrow().into_owned().into(), entry.clone()) |
813 | 92 | .await; |
814 | | |
815 | | // The insert might have resulted in an eviction/unref so we need to check |
816 | | // it still exists in there. But first, get the lock... |
817 | 92 | let mut encoded_file_path = entry.get_encoded_file_path().write().await; |
818 | | // Then check it's still in there... |
819 | 92 | if evicting_map.get(&key).await.is_none() { Branch (819:16): [True: 0, False: 0]
Branch (819:16): [True: 0, False: 2]
Branch (819:16): [True: 0, False: 2]
Branch (819:16): [True: 0, False: 2]
Branch (819:16): [True: 0, False: 2]
Branch (819:16): [True: 0, False: 2]
Branch (819:16): [True: 0, False: 1]
Branch (819:16): [True: 1, False: 13]
Branch (819:16): [True: 0, False: 0]
Branch (819:16): [True: 0, False: 2]
Branch (819:16): [Folded - Ignored]
Branch (819:16): [True: 0, False: 65]
|
820 | 1 | info!(%key, "Got eviction while emplacing, dropping"); |
821 | 1 | return Ok(()); |
822 | 91 | } |
823 | | |
824 | 91 | let final_path = get_file_path_raw( |
825 | 91 | &PathType::Content, |
826 | 91 | encoded_file_path.shared_context.as_ref(), |
827 | 91 | &key, |
828 | | ); |
829 | | |
830 | 91 | let from_path = encoded_file_path.get_file_path(); |
831 | | // Internally tokio spawns fs commands onto a blocking thread anyways. |
832 | | // Since we are already on a blocking thread, we just need the `fs` wrapper to manage |
833 | | // an open-file permit (ensure we don't open too many files at once). |
834 | 91 | let result = (rename_fn)(&from_path, &final_path).err_tip(|| {1 |
835 | 1 | format!( |
836 | 1 | "Failed to rename temp file to final path {}", |
837 | 1 | final_path.display() |
838 | | ) |
839 | 1 | }); |
840 | | |
841 | | // In the event our move from temp file to final file fails we need to ensure we remove |
842 | | // the entry from our map. |
843 | | // Remember: At this point it is possible for another thread to have a reference to |
844 | | // `entry`, so we can't delete the file, only drop() should ever delete files. |
845 | 91 | if let Err(err1 ) = result { Branch (845:20): [True: 0, False: 0]
Branch (845:20): [True: 0, False: 2]
Branch (845:20): [True: 0, False: 2]
Branch (845:20): [True: 0, False: 2]
Branch (845:20): [True: 0, False: 2]
Branch (845:20): [True: 0, False: 2]
Branch (845:20): [True: 1, False: 0]
Branch (845:20): [True: 0, False: 13]
Branch (845:20): [True: 0, False: 0]
Branch (845:20): [True: 0, False: 2]
Branch (845:20): [Folded - Ignored]
Branch (845:20): [True: 0, False: 65]
|
846 | 1 | error!(?err, ?from_path, ?final_path, "Failed to rename file",); |
847 | | // Warning: To prevent deadlock we need to release our lock or during `remove_if()` |
848 | | // it will call `unref()`, which triggers a write-lock on `encoded_file_path`. |
849 | 1 | drop(encoded_file_path); |
850 | | // It is possible that the item in our map is no longer the item we inserted, |
851 | | // So, we need to conditionally remove it only if the pointers are the same. |
852 | | |
853 | 1 | evicting_map |
854 | 1 | .remove_if(&key, |map_entry| Arc::<Fe>::ptr_eq(map_entry, &entry)) |
855 | 1 | .await; |
856 | 1 | return Err(err); |
857 | 90 | } |
858 | 90 | encoded_file_path.path_type = PathType::Content; |
859 | 90 | encoded_file_path.key = key; |
860 | 90 | Ok(()) |
861 | 92 | }) |
862 | 92 | .await |
863 | 91 | .err_tip(|| "Failed to create spawn in filesystem store update_file")?0 |
864 | 91 | } |
865 | | } |
866 | | |
867 | | #[async_trait] |
868 | | impl<Fe: FileEntry> StoreDriver for FilesystemStore<Fe> { |
869 | | async fn has_with_results( |
870 | | self: Pin<&Self>, |
871 | | keys: &[StoreKey<'_>], |
872 | | results: &mut [Option<u64>], |
873 | 86 | ) -> Result<(), Error> { |
874 | | let own_keys = keys |
875 | | .iter() |
876 | 86 | .map(|sk| sk.borrow().into_owned()) |
877 | | .collect::<Vec<_>>(); |
878 | | self.evicting_map |
879 | | .sizes_for_keys(own_keys.iter(), results, false /* peek */) |
880 | | .await; |
881 | | // We need to do a special pass to ensure our zero files exist. |
882 | | // If our results failed and the result was a zero file, we need to |
883 | | // create the file by spec. |
884 | | for (key, result) in keys.iter().zip(results.iter_mut()) { |
885 | | if result.is_some() || !is_zero_digest(key.borrow()) { |
886 | | continue; |
887 | | } |
888 | | let (mut tx, rx) = make_buf_channel_pair(); |
889 | | let send_eof_result = tx.send_eof(); |
890 | | self.update(key.borrow(), rx, UploadSizeInfo::ExactSize(0)) |
891 | | .await |
892 | 0 | .err_tip(|| format!("Failed to create zero file for key {}", key.as_str())) |
893 | | .merge( |
894 | | send_eof_result |
895 | | .err_tip(|| "Failed to send zero file EOF in filesystem store has"), |
896 | | )?; |
897 | | |
898 | | *result = Some(0); |
899 | | } |
900 | | Ok(()) |
901 | 86 | } |
902 | | |
903 | | async fn update( |
904 | | self: Pin<&Self>, |
905 | | key: StoreKey<'_>, |
906 | | mut reader: DropCloserReadHalf, |
907 | | _upload_size: UploadSizeInfo, |
908 | 128 | ) -> Result<(), Error> { |
909 | | if is_zero_digest(key.borrow()) { |
910 | | // don't need to add, because zero length files are just assumed to exist |
911 | | return Ok(()); |
912 | | } |
913 | | |
914 | | let temp_key = make_temp_key(&key); |
915 | | |
916 | | // There's a possibility of deadlock here where we take all of the |
917 | | // file semaphores with make_and_open_file and the semaphores for |
918 | | // whatever is populating reader is exhasted on the threads that |
919 | | // have the FileSlots and not on those which can't. To work around |
920 | | // this we don't take the FileSlot until there's something on the |
921 | | // reader available to know that the populator is active. |
922 | | reader.peek().await?; |
923 | | |
924 | | let (entry, temp_file, temp_full_path) = Fe::make_and_open_file( |
925 | | self.block_size, |
926 | | EncodedFilePath { |
927 | | shared_context: self.shared_context.clone(), |
928 | | path_type: PathType::Temp, |
929 | | key: temp_key, |
930 | | }, |
931 | | ) |
932 | | .await?; |
933 | | |
934 | | self.update_file(entry, temp_file, key.into_owned(), reader) |
935 | | .await |
936 | 1 | .err_tip(|| { |
937 | 1 | format!( |
938 | 1 | "While processing with temp file {}", |
939 | 1 | temp_full_path.display() |
940 | | ) |
941 | 1 | }) |
942 | 128 | } |
943 | | |
944 | 97 | fn optimized_for(&self, optimization: StoreOptimizations) -> bool { |
945 | 92 | matches!( |
946 | 97 | optimization, |
947 | | StoreOptimizations::FileUpdates | StoreOptimizations::SubscribesToUpdateOneshot |
948 | | ) |
949 | 97 | } |
950 | | |
951 | 23 | async fn update_oneshot(self: Pin<&Self>, key: StoreKey<'_>, data: Bytes) -> Result<(), Error> { |
952 | | if is_zero_digest(key.borrow()) { |
953 | | return Ok(()); |
954 | | } |
955 | | |
956 | | let temp_key = make_temp_key(&key); |
957 | | let (mut entry, mut temp_file, temp_full_path) = Fe::make_and_open_file( |
958 | | self.block_size, |
959 | | EncodedFilePath { |
960 | | shared_context: self.shared_context.clone(), |
961 | | path_type: PathType::Temp, |
962 | | key: temp_key, |
963 | | }, |
964 | | ) |
965 | | .await |
966 | | .err_tip(|| "Failed to create temp file in filesystem store update_oneshot")?; |
967 | | |
968 | | // Write directly without channel overhead |
969 | | if !data.is_empty() { |
970 | | temp_file |
971 | | .write_all(&data) |
972 | | .await |
973 | 0 | .err_tip(|| format!("Failed to write data to {}", temp_full_path.display()))?; |
974 | | } |
975 | | |
976 | | let _permit = if let Some(sem) = &self.write_semaphore { |
977 | | Some( |
978 | | sem.acquire() |
979 | | .await |
980 | 0 | .map_err(|_| make_err!(Code::Internal, "Write semaphore closed"))?, |
981 | | ) |
982 | | } else { |
983 | | None |
984 | | }; |
985 | | |
986 | | temp_file |
987 | | .as_ref() |
988 | | .sync_all() |
989 | | .await |
990 | | .err_tip(|| "Failed to sync_data in filesystem store update_oneshot")?; |
991 | | |
992 | | drop(_permit); |
993 | | |
994 | | temp_file.advise_dontneed(); |
995 | | drop(temp_file); |
996 | | |
997 | | *entry.data_size_mut() = data.len() as u64; |
998 | | self.emplace_file(key.into_owned(), Arc::new(entry)).await |
999 | 23 | } |
1000 | | |
1001 | | async fn update_with_whole_file( |
1002 | | self: Pin<&Self>, |
1003 | | key: StoreKey<'_>, |
1004 | | path: OsString, |
1005 | | file: fs::FileSlot, |
1006 | | upload_size: UploadSizeInfo, |
1007 | 8 | ) -> Result<Option<fs::FileSlot>, Error> { |
1008 | | let file_size = match upload_size { |
1009 | | UploadSizeInfo::ExactSize(size) => size, |
1010 | | UploadSizeInfo::MaxSize(_) => file |
1011 | | .as_ref() |
1012 | | .metadata() |
1013 | | .await |
1014 | 0 | .err_tip(|| format!("While reading metadata for {}", path.display()))? |
1015 | | .len(), |
1016 | | }; |
1017 | | if file_size == 0 { |
1018 | | // don't need to add, because zero length files are just assumed to exist |
1019 | | return Ok(None); |
1020 | | } |
1021 | | let entry = Fe::create( |
1022 | | file_size, |
1023 | | self.block_size, |
1024 | | RwLock::new(EncodedFilePath { |
1025 | | shared_context: self.shared_context.clone(), |
1026 | | path_type: PathType::Custom(path), |
1027 | | key: key.borrow().into_owned(), |
1028 | | }), |
1029 | | ); |
1030 | | // We are done with the file, if we hold a reference to the file here, it could |
1031 | | // result in a deadlock if `emplace_file()` also needs file descriptors. |
1032 | | trace!(?file, "Dropping file to to update_with_whole_file"); |
1033 | | file.advise_dontneed(); |
1034 | | drop(file); |
1035 | | self.emplace_file(key.into_owned(), Arc::new(entry)) |
1036 | | .await |
1037 | | .err_tip(|| "Could not move file into store in upload_file_to_store, maybe dest is on different volume?")?; |
1038 | | return Ok(None); |
1039 | 8 | } |
1040 | | |
1041 | | async fn get_part( |
1042 | | self: Pin<&Self>, |
1043 | | key: StoreKey<'_>, |
1044 | | writer: &mut DropCloserWriteHalf, |
1045 | | offset: u64, |
1046 | | length: Option<u64>, |
1047 | 66 | ) -> Result<(), Error> { |
1048 | | if is_zero_digest(key.borrow()) { |
1049 | | self.has(key.borrow()) |
1050 | | .await |
1051 | | .err_tip(|| "Failed to check if zero digest exists in filesystem store")?; |
1052 | | writer |
1053 | | .send_eof() |
1054 | | .err_tip(|| "Failed to send zero EOF in filesystem store get_part")?; |
1055 | | return Ok(()); |
1056 | | } |
1057 | | let owned_key = key.into_owned(); |
1058 | 2 | let entry = self.evicting_map.get(&owned_key).await.ok_or_else(|| { |
1059 | 2 | make_err!( |
1060 | 2 | Code::NotFound, |
1061 | | "{} not found in filesystem store here", |
1062 | 2 | owned_key.as_str() |
1063 | | ) |
1064 | 2 | })?; |
1065 | | let read_limit = length.unwrap_or(u64::MAX); |
1066 | 2 | let mut temp_file = entry.read_file_part(offset, read_limit).or_else(|err| async move { |
1067 | | // If the file is not found, we need to remove it from the eviction map. |
1068 | 2 | if err.code == Code::NotFound { Branch (1068:16): [True: 0, False: 0]
Branch (1068:16): [True: 0, False: 0]
Branch (1068:16): [True: 0, False: 0]
Branch (1068:16): [True: 0, False: 0]
Branch (1068:16): [True: 0, False: 0]
Branch (1068:16): [True: 0, False: 0]
Branch (1068:16): [True: 0, False: 0]
Branch (1068:16): [True: 2, False: 0]
Branch (1068:16): [True: 0, False: 0]
Branch (1068:16): [True: 0, False: 0]
Branch (1068:16): [Folded - Ignored]
Branch (1068:16): [True: 0, False: 0]
|
1069 | 2 | error!( |
1070 | | ?err, |
1071 | | key = ?owned_key, |
1072 | 2 | "Entry was in our map, but not found on disk. Removing from map as a precaution, but process probably need restarted." |
1073 | | ); |
1074 | 2 | self.evicting_map.remove(&owned_key).await; |
1075 | 0 | } |
1076 | 2 | Err(err) |
1077 | 4 | }).await?; |
1078 | | |
1079 | | loop { |
1080 | | let mut buf = BytesMut::with_capacity(self.read_buffer_size); |
1081 | | temp_file |
1082 | | .read_buf(&mut buf) |
1083 | | .await |
1084 | | .err_tip(|| "Failed to read data in filesystem store")?; |
1085 | | if buf.is_empty() { |
1086 | | break; // EOF. |
1087 | | } |
1088 | | writer |
1089 | | .send(buf.freeze()) |
1090 | | .await |
1091 | | .err_tip(|| "Failed to send chunk in filesystem store get_part")?; |
1092 | | } |
1093 | | temp_file.get_ref().advise_dontneed(); |
1094 | | writer |
1095 | | .send_eof() |
1096 | | .err_tip(|| "Filed to send EOF in filesystem store get_part")?; |
1097 | | |
1098 | | Ok(()) |
1099 | 66 | } |
1100 | | |
1101 | 118 | fn inner_store(&self, _digest: Option<StoreKey>) -> &dyn StoreDriver { |
1102 | 118 | self |
1103 | 118 | } |
1104 | | |
1105 | 26 | fn as_any<'a>(&'a self) -> &'a (dyn core::any::Any + Sync + Send + 'static) { |
1106 | 26 | self |
1107 | 26 | } |
1108 | | |
1109 | 0 | fn as_any_arc(self: Arc<Self>) -> Arc<dyn core::any::Any + Sync + Send + 'static> { |
1110 | 0 | self |
1111 | 0 | } |
1112 | | |
1113 | 0 | fn register_health(self: Arc<Self>, registry: &mut HealthRegistryBuilder) { |
1114 | 0 | registry.register_indicator(self); |
1115 | 0 | } |
1116 | | |
1117 | 0 | fn register_remove_callback( |
1118 | 0 | self: Arc<Self>, |
1119 | 0 | callback: Arc<dyn RemoveItemCallback>, |
1120 | 0 | ) -> Result<(), Error> { |
1121 | 0 | self.evicting_map |
1122 | 0 | .add_remove_callback(RemoveItemCallbackHolder::new(callback)); |
1123 | 0 | Ok(()) |
1124 | 0 | } |
1125 | | } |
1126 | | |
1127 | | #[async_trait] |
1128 | | impl<Fe: FileEntry> HealthStatusIndicator for FilesystemStore<Fe> { |
1129 | 0 | fn get_name(&self) -> &'static str { |
1130 | 0 | "FilesystemStore" |
1131 | 0 | } |
1132 | | |
1133 | 0 | async fn check_health(&self, namespace: Cow<'static, str>) -> HealthStatus { |
1134 | | StoreDriver::check_health(Pin::new(self), namespace).await |
1135 | 0 | } |
1136 | | } |