/build/source/nativelink-config/src/stores.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2024 The NativeLink Authors. All rights reserved. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | use serde::{Deserialize, Serialize}; |
16 | | |
17 | | use crate::serde_utils::{ |
18 | | convert_data_size_with_shellexpand, convert_duration_with_shellexpand, |
19 | | convert_numeric_with_shellexpand, convert_optional_string_with_shellexpand, |
20 | | convert_string_with_shellexpand, convert_vec_string_with_shellexpand, |
21 | | }; |
22 | | |
23 | | /// Name of the store. This type will be used when referencing a store |
24 | | /// in the `CasConfig::stores`'s map key. |
25 | | pub type StoreRefName = String; |
26 | | |
27 | | #[allow(non_camel_case_types)] |
28 | 0 | #[derive(Serialize, Deserialize, Debug, Clone, Copy)] |
29 | | pub enum ConfigDigestHashFunction { |
30 | | /// Use the sha256 hash function. |
31 | | /// <https://en.wikipedia.org/wiki/SHA-2> |
32 | | sha256, |
33 | | |
34 | | /// Use the blake3 hash function. |
35 | | /// <https://en.wikipedia.org/wiki/BLAKE_(hash_function)> |
36 | | blake3, |
37 | | } |
38 | | |
39 | | #[allow(non_camel_case_types)] |
40 | 0 | #[derive(Serialize, Deserialize, Debug, Clone)] |
41 | | pub enum StoreSpec { |
42 | | /// Memory store will store all data in a hashmap in memory. |
43 | | /// |
44 | | /// **Example JSON Config:** |
45 | | /// ```json |
46 | | /// "memory": { |
47 | | /// "eviction_policy": { |
48 | | /// // 10mb. |
49 | | /// "max_bytes": 10000000, |
50 | | /// } |
51 | | /// } |
52 | | /// } |
53 | | /// ``` |
54 | | /// |
55 | | memory(MemorySpec), |
56 | | |
57 | | /// S3 store will use Amazon's S3 service as a backend to store |
58 | | /// the files. This configuration can be used to share files |
59 | | /// across multiple instances. |
60 | | /// |
61 | | /// This configuration will never delete files, so you are |
62 | | /// responsible for purging old files in other ways. |
63 | | /// |
64 | | /// **Example JSON Config:** |
65 | | /// ```json |
66 | | /// "experimental_s3_store": { |
67 | | /// "region": "eu-north-1", |
68 | | /// "bucket": "crossplane-bucket-af79aeca9", |
69 | | /// "key_prefix": "test-prefix-index/", |
70 | | /// "retry": { |
71 | | /// "max_retries": 6, |
72 | | /// "delay": 0.3, |
73 | | /// "jitter": 0.5 |
74 | | /// }, |
75 | | /// "multipart_max_concurrent_uploads": 10 |
76 | | /// } |
77 | | /// ``` |
78 | | /// |
79 | | experimental_s3_store(S3Spec), |
80 | | |
81 | | /// Verify store is used to apply verifications to an underlying |
82 | | /// store implementation. It is strongly encouraged to validate |
83 | | /// as much data as you can before accepting data from a client, |
84 | | /// failing to do so may cause the data in the store to be |
85 | | /// populated with invalid data causing all kinds of problems. |
86 | | /// |
87 | | /// The suggested configuration is to have the CAS validate the |
88 | | /// hash and size and the AC validate nothing. |
89 | | /// |
90 | | /// **Example JSON Config:** |
91 | | /// ```json |
92 | | /// "verify": { |
93 | | /// "memory": { |
94 | | /// "eviction_policy": { |
95 | | /// "max_bytes": 500000000 // 500mb. |
96 | | /// } |
97 | | /// }, |
98 | | /// "verify_size": true, |
99 | | /// "hash_verification_function": "sha256" |
100 | | /// } |
101 | | /// ``` |
102 | | /// |
103 | | verify(Box<VerifySpec>), |
104 | | |
105 | | /// Completeness checking store verifies if the |
106 | | /// output files & folders exist in the CAS before forwarding |
107 | | /// the request to the underlying store. |
108 | | /// Note: This store should only be used on AC stores. |
109 | | /// |
110 | | /// **Example JSON Config:** |
111 | | /// ```json |
112 | | /// "completeness_checking": { |
113 | | /// "backend": { |
114 | | /// "filesystem": { |
115 | | /// "content_path": "~/.cache/nativelink/content_path-ac", |
116 | | /// "temp_path": "~/.cache/nativelink/tmp_path-ac", |
117 | | /// "eviction_policy": { |
118 | | /// // 500mb. |
119 | | /// "max_bytes": 500000000, |
120 | | /// } |
121 | | /// } |
122 | | /// }, |
123 | | /// "cas_store": { |
124 | | /// "ref_store": { |
125 | | /// "name": "CAS_MAIN_STORE" |
126 | | /// } |
127 | | /// } |
128 | | /// } |
129 | | /// ``` |
130 | | /// |
131 | | completeness_checking(Box<CompletenessCheckingSpec>), |
132 | | |
133 | | /// A compression store that will compress the data inbound and |
134 | | /// outbound. There will be a non-trivial cost to compress and |
135 | | /// decompress the data, but in many cases if the final store is |
136 | | /// a store that requires network transport and/or storage space |
137 | | /// is a concern it is often faster and more efficient to use this |
138 | | /// store before those stores. |
139 | | /// |
140 | | /// **Example JSON Config:** |
141 | | /// ```json |
142 | | /// "compression": { |
143 | | /// "compression_algorithm": { |
144 | | /// "lz4": {} |
145 | | /// }, |
146 | | /// "backend": { |
147 | | /// "filesystem": { |
148 | | /// "content_path": "/tmp/nativelink/data/content_path-cas", |
149 | | /// "temp_path": "/tmp/nativelink/data/tmp_path-cas", |
150 | | /// "eviction_policy": { |
151 | | /// // 2gb. |
152 | | /// "max_bytes": 2000000000, |
153 | | /// } |
154 | | /// } |
155 | | /// } |
156 | | /// } |
157 | | /// ``` |
158 | | /// |
159 | | compression(Box<CompressionSpec>), |
160 | | |
161 | | /// A dedup store will take the inputs and run a rolling hash |
162 | | /// algorithm on them to slice the input into smaller parts then |
163 | | /// run a sha256 algorithm on the slice and if the object doesn't |
164 | | /// already exist, upload the slice to the `content_store` using |
165 | | /// a new digest of just the slice. Once all parts exist, an |
166 | | /// Action-Cache-like digest will be built and uploaded to the |
167 | | /// `index_store` which will contain a reference to each |
168 | | /// chunk/digest of the uploaded file. Downloading a request will |
169 | | /// first grab the index from the `index_store`, and forward the |
170 | | /// download content of each chunk as if it were one file. |
171 | | /// |
172 | | /// This store is exceptionally good when the following conditions |
173 | | /// are met: |
174 | | /// * Content is mostly the same (inserts, updates, deletes are ok) |
175 | | /// * Content is not compressed or encrypted |
176 | | /// * Uploading or downloading from `content_store` is the bottleneck. |
177 | | /// |
178 | | /// Note: This store pairs well when used with `CompressionSpec` as |
179 | | /// the `content_store`, but never put `DedupSpec` as the backend of |
180 | | /// `CompressionSpec` as it will negate all the gains. |
181 | | /// |
182 | | /// Note: When running `.has()` on this store, it will only check |
183 | | /// to see if the entry exists in the `index_store` and not check |
184 | | /// if the individual chunks exist in the `content_store`. |
185 | | /// |
186 | | /// **Example JSON Config:** |
187 | | /// ```json |
188 | | /// "dedup": { |
189 | | /// "index_store": { |
190 | | /// "memory_store": { |
191 | | /// "max_size": 1000000000, // 1GB |
192 | | /// "eviction_policy": "LeastRecentlyUsed" |
193 | | /// } |
194 | | /// }, |
195 | | /// "content_store": { |
196 | | /// "compression": { |
197 | | /// "compression_algorithm": { |
198 | | /// "lz4": {} |
199 | | /// }, |
200 | | /// "backend": { |
201 | | /// "fast_slow": { |
202 | | /// "fast": { |
203 | | /// "memory_store": { |
204 | | /// "max_size": 500000000, // 500MB |
205 | | /// "eviction_policy": "LeastRecentlyUsed" |
206 | | /// } |
207 | | /// }, |
208 | | /// "slow": { |
209 | | /// "filesystem": { |
210 | | /// "content_path": "/tmp/nativelink/data/content_path-content", |
211 | | /// "temp_path": "/tmp/nativelink/data/tmp_path-content", |
212 | | /// "eviction_policy": { |
213 | | /// "max_bytes": 2000000000 // 2gb. |
214 | | /// } |
215 | | /// } |
216 | | /// } |
217 | | /// } |
218 | | /// } |
219 | | /// } |
220 | | /// } |
221 | | /// } |
222 | | /// ``` |
223 | | /// |
224 | | dedup(Box<DedupSpec>), |
225 | | |
226 | | /// Existence store will wrap around another store and cache calls |
227 | | /// to has so that subsequent `has_with_results` calls will be |
228 | | /// faster. This is useful for cases when you have a store that |
229 | | /// is slow to respond to has calls. |
230 | | /// Note: This store should only be used on CAS stores. |
231 | | /// |
232 | | /// **Example JSON Config:** |
233 | | /// ```json |
234 | | /// "existence_cache": { |
235 | | /// "backend": { |
236 | | /// "memory": { |
237 | | /// "eviction_policy": { |
238 | | /// // 500mb. |
239 | | /// "max_bytes": 500000000, |
240 | | /// } |
241 | | /// } |
242 | | /// }, |
243 | | /// "cas_store": { |
244 | | /// "ref_store": { |
245 | | /// "name": "CAS_MAIN_STORE" |
246 | | /// } |
247 | | /// } |
248 | | /// } |
249 | | /// ``` |
250 | | /// |
251 | | existence_cache(Box<ExistenceCacheSpec>), |
252 | | |
253 | | /// `FastSlow` store will first try to fetch the data from the `fast` |
254 | | /// store and then if it does not exist try the `slow` store. |
255 | | /// When the object does exist in the `slow` store, it will copy |
256 | | /// the data to the `fast` store while returning the data. |
257 | | /// This store should be thought of as a store that "buffers" |
258 | | /// the data to the `fast` store. |
259 | | /// On uploads it will mirror data to both `fast` and `slow` stores. |
260 | | /// |
261 | | /// WARNING: If you need data to always exist in the `slow` store |
262 | | /// for something like remote execution, be careful because this |
263 | | /// store will never check to see if the objects exist in the |
264 | | /// `slow` store if it exists in the `fast` store (ie: it assumes |
265 | | /// that if an object exists `fast` store it will exist in `slow` |
266 | | /// store). |
267 | | /// |
268 | | /// ***Example JSON Config:*** |
269 | | /// ```json |
270 | | /// "fast_slow": { |
271 | | /// "fast": { |
272 | | /// "filesystem": { |
273 | | /// "content_path": "/tmp/nativelink/data/content_path-index", |
274 | | /// "temp_path": "/tmp/nativelink/data/tmp_path-index", |
275 | | /// "eviction_policy": { |
276 | | /// // 500mb. |
277 | | /// "max_bytes": 500000000, |
278 | | /// } |
279 | | /// } |
280 | | /// }, |
281 | | /// "slow": { |
282 | | /// "filesystem": { |
283 | | /// "content_path": "/tmp/nativelink/data/content_path-index", |
284 | | /// "temp_path": "/tmp/nativelink/data/tmp_path-index", |
285 | | /// "eviction_policy": { |
286 | | /// // 500mb. |
287 | | /// "max_bytes": 500000000, |
288 | | /// } |
289 | | /// } |
290 | | /// } |
291 | | /// } |
292 | | /// ``` |
293 | | /// |
294 | | fast_slow(Box<FastSlowSpec>), |
295 | | |
296 | | /// Shards the data to multiple stores. This is useful for cases |
297 | | /// when you want to distribute the load across multiple stores. |
298 | | /// The digest hash is used to determine which store to send the |
299 | | /// data to. |
300 | | /// |
301 | | /// **Example JSON Config:** |
302 | | /// ```json |
303 | | /// "shard": { |
304 | | /// "stores": [ |
305 | | /// "memory": { |
306 | | /// "eviction_policy": { |
307 | | /// // 10mb. |
308 | | /// "max_bytes": 10000000 |
309 | | /// }, |
310 | | /// "weight": 1 |
311 | | /// } |
312 | | /// ] |
313 | | /// } |
314 | | /// ``` |
315 | | /// |
316 | | shard(ShardSpec), |
317 | | |
318 | | /// Stores the data on the filesystem. This store is designed for |
319 | | /// local persistent storage. Restarts of this program should restore |
320 | | /// the previous state, meaning anything uploaded will be persistent |
321 | | /// as long as the filesystem integrity holds. This store uses the |
322 | | /// filesystem's `atime` (access time) to hold the last touched time |
323 | | /// of the file(s). |
324 | | /// |
325 | | /// **Example JSON Config:** |
326 | | /// ```json |
327 | | /// "filesystem": { |
328 | | /// "content_path": "/tmp/nativelink/data-worker-test/content_path-cas", |
329 | | /// "temp_path": "/tmp/nativelink/data-worker-test/tmp_path-cas", |
330 | | /// "eviction_policy": { |
331 | | /// // 10gb. |
332 | | /// "max_bytes": 10000000000, |
333 | | /// } |
334 | | /// } |
335 | | /// ``` |
336 | | /// |
337 | | filesystem(FilesystemSpec), |
338 | | |
339 | | /// Store used to reference a store in the root store manager. |
340 | | /// This is useful for cases when you want to share a store in different |
341 | | /// nested stores. Example, you may want to share the same memory store |
342 | | /// used for the action cache, but use a `FastSlowSpec` and have the fast |
343 | | /// store also share the memory store for efficiency. |
344 | | /// |
345 | | /// **Example JSON Config:** |
346 | | /// ```json |
347 | | /// "ref_store": { |
348 | | /// "name": "FS_CONTENT_STORE" |
349 | | /// } |
350 | | /// ``` |
351 | | /// |
352 | | ref_store(RefSpec), |
353 | | |
354 | | /// Uses the size field of the digest to separate which store to send the |
355 | | /// data. This is useful for cases when you'd like to put small objects |
356 | | /// in one store and large objects in another store. This should only be |
357 | | /// used if the size field is the real size of the content, in other |
358 | | /// words, don't use on AC (Action Cache) stores. Any store where you can |
359 | | /// safely use `VerifySpec.verify_size = true`, this store should be safe |
360 | | /// to use (ie: CAS stores). |
361 | | /// |
362 | | /// **Example JSON Config:** |
363 | | /// ```json |
364 | | /// "size_partitioning": { |
365 | | /// "size": 134217728, // 128mib. |
366 | | /// "lower_store": { |
367 | | /// "memory": { |
368 | | /// "eviction_policy": { |
369 | | /// "max_bytes": "${NATIVELINK_CAS_MEMORY_CONTENT_LIMIT:-100000000}" |
370 | | /// } |
371 | | /// } |
372 | | /// }, |
373 | | /// "upper_store": { |
374 | | /// /// This store discards data larger than 128mib. |
375 | | /// "noop": {} |
376 | | /// } |
377 | | /// } |
378 | | /// ``` |
379 | | /// |
380 | | size_partitioning(Box<SizePartitioningSpec>), |
381 | | |
382 | | /// This store will pass-through calls to another GRPC store. This store |
383 | | /// is not designed to be used as a sub-store of another store, but it |
384 | | /// does satisfy the interface and will likely work. |
385 | | /// |
386 | | /// One major GOTCHA is that some stores use a special function on this |
387 | | /// store to get the size of the underlying object, which is only reliable |
388 | | /// when this store is serving the a CAS store, not an AC store. If using |
389 | | /// this store directly without being a child of any store there are no |
390 | | /// side effects and is the most efficient way to use it. |
391 | | /// |
392 | | /// **Example JSON Config:** |
393 | | /// ```json |
394 | | /// "grpc": { |
395 | | /// "instance_name": "main", |
396 | | /// "endpoints": [ |
397 | | /// {"address": "grpc://${CAS_ENDPOINT:-127.0.0.1}:50051"} |
398 | | /// ], |
399 | | /// "store_type": "ac" |
400 | | /// } |
401 | | /// ``` |
402 | | /// |
403 | | grpc(GrpcSpec), |
404 | | |
405 | | /// Stores data in any stores compatible with Redis APIs. |
406 | | /// |
407 | | /// Pairs well with `SizePartitioning` and/or `FastSlow` stores. |
408 | | /// Ideal for accepting small object sizes as most redis store |
409 | | /// services have a max file upload of between 256Mb-512Mb. |
410 | | /// |
411 | | /// **Example JSON Config:** |
412 | | /// ```json |
413 | | /// "redis_store": { |
414 | | /// "addresses": [ |
415 | | /// "redis://127.0.0.1:6379/", |
416 | | /// ] |
417 | | /// } |
418 | | /// ``` |
419 | | /// |
420 | | redis_store(RedisSpec), |
421 | | |
422 | | /// Noop store is a store that sends streams into the void and all data |
423 | | /// retrieval will return 404 (`NotFound`). This can be useful for cases |
424 | | /// where you may need to partition your data and part of your data needs |
425 | | /// to be discarded. |
426 | | /// |
427 | | /// **Example JSON Config:** |
428 | | /// ```json |
429 | | /// "noop": {} |
430 | | /// ``` |
431 | | /// |
432 | | noop(NoopSpec), |
433 | | } |
434 | | |
435 | | /// Configuration for an individual shard of the store. |
436 | 0 | #[derive(Serialize, Deserialize, Debug, Clone)] |
437 | | #[serde(deny_unknown_fields)] |
438 | | pub struct ShardConfig { |
439 | | /// Store to shard the data to. |
440 | | pub store: StoreSpec, |
441 | | |
442 | | /// The weight of the store. This is used to determine how much data |
443 | | /// should be sent to the store. The actual percentage is the sum of |
444 | | /// all the store's weights divided by the individual store's weight. |
445 | | /// |
446 | | /// Default: 1 |
447 | | pub weight: Option<u32>, |
448 | | } |
449 | | |
450 | 0 | #[derive(Serialize, Deserialize, Debug, Clone)] |
451 | | #[serde(deny_unknown_fields)] |
452 | | pub struct ShardSpec { |
453 | | /// Stores to shard the data to. |
454 | | pub stores: Vec<ShardConfig>, |
455 | | } |
456 | | |
457 | 0 | #[derive(Serialize, Deserialize, Debug, Clone)] |
458 | | #[serde(deny_unknown_fields)] |
459 | | pub struct SizePartitioningSpec { |
460 | | /// Size to partition the data on. |
461 | | #[serde(deserialize_with = "convert_data_size_with_shellexpand")] |
462 | | pub size: u64, |
463 | | |
464 | | /// Store to send data when object is < (less than) size. |
465 | | pub lower_store: StoreSpec, |
466 | | |
467 | | /// Store to send data when object is >= (less than eq) size. |
468 | | pub upper_store: StoreSpec, |
469 | | } |
470 | | |
471 | 0 | #[derive(Serialize, Deserialize, Debug, Default, Clone)] |
472 | | #[serde(deny_unknown_fields)] |
473 | | pub struct RefSpec { |
474 | | /// Name of the store under the root "stores" config object. |
475 | | #[serde(deserialize_with = "convert_string_with_shellexpand")] |
476 | | pub name: String, |
477 | | } |
478 | | |
479 | 0 | #[derive(Serialize, Deserialize, Debug, Default, Clone)] |
480 | | #[serde(deny_unknown_fields)] |
481 | | pub struct FilesystemSpec { |
482 | | /// Path on the system where to store the actual content. This is where |
483 | | /// the bulk of the data will be placed. |
484 | | /// On service bootup this folder will be scanned and all files will be |
485 | | /// added to the cache. In the event one of the files doesn't match the |
486 | | /// criteria, the file will be deleted. |
487 | | #[serde(deserialize_with = "convert_string_with_shellexpand")] |
488 | | pub content_path: String, |
489 | | |
490 | | /// A temporary location of where files that are being uploaded or |
491 | | /// deleted will be placed while the content cannot be guaranteed to be |
492 | | /// accurate. This location must be on the same block device as |
493 | | /// `content_path` so atomic moves can happen (ie: move without copy). |
494 | | /// All files in this folder will be deleted on every startup. |
495 | | #[serde(deserialize_with = "convert_string_with_shellexpand")] |
496 | | pub temp_path: String, |
497 | | |
498 | | /// Buffer size to use when reading files. Generally this should be left |
499 | | /// to the default value except for testing. |
500 | | /// Default: 32k. |
501 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
502 | | pub read_buffer_size: u32, |
503 | | |
504 | | /// Policy used to evict items out of the store. Failure to set this |
505 | | /// value will cause items to never be removed from the store causing |
506 | | /// infinite memory usage. |
507 | | pub eviction_policy: Option<EvictionPolicy>, |
508 | | |
509 | | /// The block size of the filesystem for the running machine |
510 | | /// value is used to determine an entry's actual size on disk consumed |
511 | | /// For a 4KB block size filesystem, a 1B file actually consumes 4KB |
512 | | /// Default: 4096 |
513 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
514 | | pub block_size: u64, |
515 | | } |
516 | | |
517 | 0 | #[derive(Serialize, Deserialize, Debug, Clone)] |
518 | | #[serde(deny_unknown_fields)] |
519 | | pub struct FastSlowSpec { |
520 | | /// Fast store that will be attempted to be contacted before reaching |
521 | | /// out to the `slow` store. |
522 | | pub fast: StoreSpec, |
523 | | |
524 | | /// If the object does not exist in the `fast` store it will try to |
525 | | /// get it from this store. |
526 | | pub slow: StoreSpec, |
527 | | } |
528 | | |
529 | 0 | #[derive(Serialize, Deserialize, Debug, Default, Clone)] |
530 | | #[serde(deny_unknown_fields)] |
531 | | pub struct MemorySpec { |
532 | | /// Policy used to evict items out of the store. Failure to set this |
533 | | /// value will cause items to never be removed from the store causing |
534 | | /// infinite memory usage. |
535 | | pub eviction_policy: Option<EvictionPolicy>, |
536 | | } |
537 | | |
538 | 0 | #[derive(Serialize, Deserialize, Debug, Clone)] |
539 | | #[serde(deny_unknown_fields)] |
540 | | pub struct DedupSpec { |
541 | | /// Store used to store the index of each dedup slice. This store |
542 | | /// should generally be fast and small. |
543 | | pub index_store: StoreSpec, |
544 | | |
545 | | /// The store where the individual chunks will be uploaded. This |
546 | | /// store should generally be the slower & larger store. |
547 | | pub content_store: StoreSpec, |
548 | | |
549 | | /// Minimum size that a chunk will be when slicing up the content. |
550 | | /// Note: This setting can be increased to improve performance |
551 | | /// because it will actually not check this number of bytes when |
552 | | /// deciding where to partition the data. |
553 | | /// |
554 | | /// Default: 65536 (64k) |
555 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
556 | | pub min_size: u32, |
557 | | |
558 | | /// A best-effort attempt will be made to keep the average size |
559 | | /// of the chunks to this number. It is not a guarantee, but a |
560 | | /// slight attempt will be made. |
561 | | /// |
562 | | /// This value will also be about the threshold used to determine |
563 | | /// if we should even attempt to dedup the entry or just forward |
564 | | /// it directly to the `content_store` without an index. The actual |
565 | | /// value will be about `normal_size * 1.3` due to implementation |
566 | | /// details. |
567 | | /// |
568 | | /// Default: 262144 (256k) |
569 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
570 | | pub normal_size: u32, |
571 | | |
572 | | /// Maximum size a chunk is allowed to be. |
573 | | /// |
574 | | /// Default: 524288 (512k) |
575 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
576 | | pub max_size: u32, |
577 | | |
578 | | /// Due to implementation detail, we want to prefer to download |
579 | | /// the first chunks of the file so we can stream the content |
580 | | /// out and free up some of our buffers. This configuration |
581 | | /// will be used to to restrict the number of concurrent chunk |
582 | | /// downloads at a time per `get()` request. |
583 | | /// |
584 | | /// This setting will also affect how much memory might be used |
585 | | /// per `get()` request. Estimated worst case memory per `get()` |
586 | | /// request is: `max_concurrent_fetch_per_get * max_size`. |
587 | | /// |
588 | | /// Default: 10 |
589 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
590 | | pub max_concurrent_fetch_per_get: u32, |
591 | | } |
592 | | |
593 | 0 | #[derive(Serialize, Deserialize, Debug, Clone)] |
594 | | #[serde(deny_unknown_fields)] |
595 | | pub struct ExistenceCacheSpec { |
596 | | /// The underlying store wrap around. All content will first flow |
597 | | /// through self before forwarding to backend. In the event there |
598 | | /// is an error detected in self, the connection to the backend |
599 | | /// will be terminated, and early termination should always cause |
600 | | /// updates to fail on the backend. |
601 | | pub backend: StoreSpec, |
602 | | |
603 | | /// Policy used to evict items out of the store. Failure to set this |
604 | | /// value will cause items to never be removed from the store causing |
605 | | /// infinite memory usage. |
606 | | pub eviction_policy: Option<EvictionPolicy>, |
607 | | } |
608 | | |
609 | 0 | #[derive(Serialize, Deserialize, Debug, Clone)] |
610 | | #[serde(deny_unknown_fields)] |
611 | | pub struct VerifySpec { |
612 | | /// The underlying store wrap around. All content will first flow |
613 | | /// through self before forwarding to backend. In the event there |
614 | | /// is an error detected in self, the connection to the backend |
615 | | /// will be terminated, and early termination should always cause |
616 | | /// updates to fail on the backend. |
617 | | pub backend: StoreSpec, |
618 | | |
619 | | /// If set the store will verify the size of the data before accepting |
620 | | /// an upload of data. |
621 | | /// |
622 | | /// This should be set to false for AC, but true for CAS stores. |
623 | | #[serde(default)] |
624 | | pub verify_size: bool, |
625 | | |
626 | | /// If the data should be hashed and verify that the key matches the |
627 | | /// computed hash. The hash function is automatically determined based |
628 | | /// request and if not set will use the global default. |
629 | | /// |
630 | | /// This should be set to None for AC, but hashing function like `sha256` for CAS stores. |
631 | | #[serde(default)] |
632 | | pub verify_hash: bool, |
633 | | } |
634 | | |
635 | 0 | #[derive(Serialize, Deserialize, Debug, Clone)] |
636 | | #[serde(deny_unknown_fields)] |
637 | | pub struct CompletenessCheckingSpec { |
638 | | /// The underlying store that will have it's results validated before sending to client. |
639 | | pub backend: StoreSpec, |
640 | | |
641 | | /// When a request is made, the results are decoded and all output digests/files are verified |
642 | | /// to exist in this CAS store before returning success. |
643 | | pub cas_store: StoreSpec, |
644 | | } |
645 | | |
646 | 0 | #[derive(Serialize, Deserialize, Debug, Default, PartialEq, Clone, Copy)] |
647 | | #[serde(deny_unknown_fields)] |
648 | | pub struct Lz4Config { |
649 | | /// Size of the blocks to compress. |
650 | | /// Higher values require more ram, but might yield slightly better |
651 | | /// compression ratios. |
652 | | /// |
653 | | /// Default: 65536 (64k). |
654 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
655 | | pub block_size: u32, |
656 | | |
657 | | /// Maximum size allowed to attempt to deserialize data into. |
658 | | /// This is needed because the `block_size` is embedded into the data |
659 | | /// so if there was a bad actor, they could upload an extremely large |
660 | | /// `block_size`'ed entry and we'd allocate a large amount of memory |
661 | | /// when retrieving the data. To prevent this from happening, we |
662 | | /// allow you to specify the maximum that we'll attempt deserialize. |
663 | | /// |
664 | | /// Default: value in `block_size`. |
665 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
666 | | pub max_decode_block_size: u32, |
667 | | } |
668 | | |
669 | | #[allow(non_camel_case_types)] |
670 | 0 | #[derive(Serialize, Deserialize, Debug, PartialEq, Clone)] |
671 | | pub enum CompressionAlgorithm { |
672 | | /// LZ4 compression algorithm is extremely fast for compression and |
673 | | /// decompression, however does not perform very well in compression |
674 | | /// ratio. In most cases build artifacts are highly compressible, however |
675 | | /// lz4 is quite good at aborting early if the data is not deemed very |
676 | | /// compressible. |
677 | | /// |
678 | | /// see: <https://lz4.github.io/lz4/> |
679 | | lz4(Lz4Config), |
680 | | } |
681 | | |
682 | 0 | #[derive(Serialize, Deserialize, Debug, Clone)] |
683 | | #[serde(deny_unknown_fields)] |
684 | | pub struct CompressionSpec { |
685 | | /// The underlying store wrap around. All content will first flow |
686 | | /// through self before forwarding to backend. In the event there |
687 | | /// is an error detected in self, the connection to the backend |
688 | | /// will be terminated, and early termination should always cause |
689 | | /// updates to fail on the backend. |
690 | | pub backend: StoreSpec, |
691 | | |
692 | | /// The compression algorithm to use. |
693 | | pub compression_algorithm: CompressionAlgorithm, |
694 | | } |
695 | | |
696 | | /// Eviction policy always works on LRU (Least Recently Used). Any time an entry |
697 | | /// is touched it updates the timestamp. Inserts and updates will execute the |
698 | | /// eviction policy removing any expired entries and/or the oldest entries |
699 | | /// until the store size becomes smaller than `max_bytes`. |
700 | 0 | #[derive(Serialize, Deserialize, Debug, Default, Clone)] |
701 | | #[serde(deny_unknown_fields)] |
702 | | pub struct EvictionPolicy { |
703 | | /// Maximum number of bytes before eviction takes place. |
704 | | /// Default: 0. Zero means never evict based on size. |
705 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
706 | | pub max_bytes: usize, |
707 | | |
708 | | /// When eviction starts based on hitting `max_bytes`, continue until |
709 | | /// `max_bytes - evict_bytes` is met to create a low watermark. This stops |
710 | | /// operations from thrashing when the store is close to the limit. |
711 | | /// Default: 0 |
712 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
713 | | pub evict_bytes: usize, |
714 | | |
715 | | /// Maximum number of seconds for an entry to live since it was last |
716 | | /// accessed before it is evicted. |
717 | | /// Default: 0. Zero means never evict based on time. |
718 | | #[serde(default, deserialize_with = "convert_duration_with_shellexpand")] |
719 | | pub max_seconds: u32, |
720 | | |
721 | | /// Maximum size of the store before an eviction takes place. |
722 | | /// Default: 0. Zero means never evict based on count. |
723 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
724 | | pub max_count: u64, |
725 | | } |
726 | | |
727 | 0 | #[derive(Serialize, Deserialize, Debug, Default, Clone)] |
728 | | #[serde(deny_unknown_fields)] |
729 | | pub struct S3Spec { |
730 | | /// S3 region. Usually us-east-1, us-west-2, af-south-1, exc... |
731 | | #[serde(default, deserialize_with = "convert_string_with_shellexpand")] |
732 | | pub region: String, |
733 | | |
734 | | /// Bucket name to use as the backend. |
735 | | #[serde(default, deserialize_with = "convert_string_with_shellexpand")] |
736 | | pub bucket: String, |
737 | | |
738 | | /// If you wish to prefix the location on s3. If None, no prefix will be used. |
739 | | #[serde(default)] |
740 | | pub key_prefix: Option<String>, |
741 | | |
742 | | /// Retry configuration to use when a network request fails. |
743 | | #[serde(default)] |
744 | | pub retry: Retry, |
745 | | |
746 | | /// If the number of seconds since the `last_modified` time of the object |
747 | | /// is greater than this value, the object will not be considered |
748 | | /// "existing". This allows for external tools to delete objects that |
749 | | /// have not been uploaded in a long time. If a client receives a `NotFound` |
750 | | /// the client should re-upload the object. |
751 | | /// |
752 | | /// There should be sufficient buffer time between how long the expiration |
753 | | /// configuration of the external tool is and this value. Keeping items |
754 | | /// around for a few days is generally a good idea. |
755 | | /// |
756 | | /// Default: 0. Zero means never consider an object expired. |
757 | | #[serde(default, deserialize_with = "convert_duration_with_shellexpand")] |
758 | | pub consider_expired_after_s: u32, |
759 | | |
760 | | /// The maximum buffer size to retain in case of a retryable error |
761 | | /// during upload. Setting this to zero will disable upload buffering; |
762 | | /// this means that in the event of a failure during upload, the entire |
763 | | /// upload will be aborted and the client will likely receive an error. |
764 | | /// |
765 | | /// Default: 5MB. |
766 | | pub max_retry_buffer_per_request: Option<usize>, |
767 | | |
768 | | /// Maximum number of concurrent `UploadPart` requests per `MultipartUpload`. |
769 | | /// |
770 | | /// Default: 10. |
771 | | pub multipart_max_concurrent_uploads: Option<usize>, |
772 | | |
773 | | /// Allow unencrypted HTTP connections. Only use this for local testing. |
774 | | /// |
775 | | /// Default: false |
776 | | #[serde(default)] |
777 | | pub insecure_allow_http: bool, |
778 | | |
779 | | /// Disable http/2 connections and only use http/1.1. Default client |
780 | | /// configuration will have http/1.1 and http/2 enabled for connection |
781 | | /// schemes. Http/2 should be disabled if environments have poor support |
782 | | /// or performance related to http/2. Safe to keep default unless |
783 | | /// underlying network environment or S3 API servers specify otherwise. |
784 | | /// |
785 | | /// Default: false |
786 | | #[serde(default)] |
787 | | pub disable_http2: bool, |
788 | | } |
789 | | |
790 | | #[allow(non_camel_case_types)] |
791 | 0 | #[derive(Serialize, Deserialize, Debug, Clone, Copy)] |
792 | | pub enum StoreType { |
793 | | /// The store is content addressable storage. |
794 | | cas, |
795 | | /// The store is an action cache. |
796 | | ac, |
797 | | } |
798 | | |
799 | 0 | #[derive(Serialize, Deserialize, Debug, Clone)] |
800 | | pub struct ClientTlsConfig { |
801 | | /// Path to the certificate authority to use to validate the remote. |
802 | | #[serde(deserialize_with = "convert_string_with_shellexpand")] |
803 | | pub ca_file: String, |
804 | | |
805 | | /// Path to the certificate file for client authentication. |
806 | | #[serde(deserialize_with = "convert_optional_string_with_shellexpand")] |
807 | | pub cert_file: Option<String>, |
808 | | |
809 | | /// Path to the private key file for client authentication. |
810 | | #[serde(deserialize_with = "convert_optional_string_with_shellexpand")] |
811 | | pub key_file: Option<String>, |
812 | | } |
813 | | |
814 | 0 | #[derive(Serialize, Deserialize, Debug, Clone)] |
815 | | #[serde(deny_unknown_fields)] |
816 | | pub struct GrpcEndpoint { |
817 | | /// The endpoint address (i.e. grpc(s)://example.com:443). |
818 | | #[serde(deserialize_with = "convert_string_with_shellexpand")] |
819 | | pub address: String, |
820 | | /// The TLS configuration to use to connect to the endpoint (if grpcs). |
821 | | pub tls_config: Option<ClientTlsConfig>, |
822 | | /// The maximum concurrency to allow on this endpoint. |
823 | | pub concurrency_limit: Option<usize>, |
824 | | } |
825 | | |
826 | 0 | #[derive(Serialize, Deserialize, Debug, Clone)] |
827 | | #[serde(deny_unknown_fields)] |
828 | | pub struct GrpcSpec { |
829 | | /// Instance name for GRPC calls. Proxy calls will have the `instance_name` changed to this. |
830 | | #[serde(default, deserialize_with = "convert_string_with_shellexpand")] |
831 | | pub instance_name: String, |
832 | | |
833 | | /// The endpoint of the grpc connection. |
834 | | pub endpoints: Vec<GrpcEndpoint>, |
835 | | |
836 | | /// The type of the upstream store, this ensures that the correct server calls are made. |
837 | | pub store_type: StoreType, |
838 | | |
839 | | /// Retry configuration to use when a network request fails. |
840 | | #[serde(default)] |
841 | | pub retry: Retry, |
842 | | |
843 | | /// Limit the number of simultaneous upstream requests to this many. A |
844 | | /// value of zero is treated as unlimited. If the limit is reached the |
845 | | /// request is queued. |
846 | | #[serde(default)] |
847 | | pub max_concurrent_requests: usize, |
848 | | |
849 | | /// The number of connections to make to each specified endpoint to balance |
850 | | /// the load over multiple TCP connections. Default 1. |
851 | | #[serde(default)] |
852 | | pub connections_per_endpoint: usize, |
853 | | } |
854 | | |
855 | | /// The possible error codes that might occur on an upstream request. |
856 | 0 | #[derive(Serialize, Deserialize, Clone, Debug, PartialEq)] |
857 | | pub enum ErrorCode { |
858 | | Cancelled = 1, |
859 | | Unknown = 2, |
860 | | InvalidArgument = 3, |
861 | | DeadlineExceeded = 4, |
862 | | NotFound = 5, |
863 | | AlreadyExists = 6, |
864 | | PermissionDenied = 7, |
865 | | ResourceExhausted = 8, |
866 | | FailedPrecondition = 9, |
867 | | Aborted = 10, |
868 | | OutOfRange = 11, |
869 | | Unimplemented = 12, |
870 | | Internal = 13, |
871 | | Unavailable = 14, |
872 | | DataLoss = 15, |
873 | | Unauthenticated = 16, |
874 | | // Note: This list is duplicated from nativelink-error/lib.rs. |
875 | | } |
876 | | |
877 | 0 | #[derive(Serialize, Deserialize, Debug, Clone)] |
878 | | pub struct RedisSpec { |
879 | | /// The hostname or IP address of the Redis server. |
880 | | /// Ex: `["redis://username:password@redis-server-url:6380/99"]` |
881 | | /// 99 Represents database ID, 6380 represents the port. |
882 | | #[serde(deserialize_with = "convert_vec_string_with_shellexpand")] |
883 | | pub addresses: Vec<String>, |
884 | | |
885 | | /// The response timeout for the Redis connection in seconds. |
886 | | /// |
887 | | /// Default: 10 |
888 | | #[serde(default)] |
889 | | pub response_timeout_s: u64, |
890 | | |
891 | | /// The connection timeout for the Redis connection in seconds. |
892 | | /// |
893 | | /// Default: 10 |
894 | | #[serde(default)] |
895 | | pub connection_timeout_s: u64, |
896 | | |
897 | | /// An optional and experimental Redis channel to publish write events to. |
898 | | /// |
899 | | /// If set, every time a write operation is made to a Redis node |
900 | | /// then an event will be published to a Redis channel with the given name. |
901 | | /// If unset, the writes will still be made, |
902 | | /// but the write events will not be published. |
903 | | /// |
904 | | /// Default: (Empty String / No Channel) |
905 | | #[serde(default)] |
906 | | pub experimental_pub_sub_channel: Option<String>, |
907 | | |
908 | | /// An optional prefix to prepend to all keys in this store. |
909 | | /// |
910 | | /// Setting this value can make it convenient to query or |
911 | | /// organize your data according to the shared prefix. |
912 | | /// |
913 | | /// Default: (Empty String / No Prefix) |
914 | | #[serde(default)] |
915 | | pub key_prefix: String, |
916 | | |
917 | | /// Set the mode Redis is operating in. |
918 | | /// |
919 | | /// Available options are "cluster" for |
920 | | /// [cluster mode](https://redis.io/docs/latest/operate/oss_and_stack/reference/cluster-spec/), |
921 | | /// "sentinel" for [sentinel mode](https://redis.io/docs/latest/operate/oss_and_stack/management/sentinel/), |
922 | | /// or "standard" if Redis is operating in neither cluster nor sentinel mode. |
923 | | /// |
924 | | /// Default: standard, |
925 | | #[serde(default)] |
926 | | pub mode: RedisMode, |
927 | | |
928 | | /// When using pubsub interface, this is the maximum number of items to keep |
929 | | /// queued up before dropping old items. |
930 | | /// |
931 | | /// Default: 4096 |
932 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
933 | | pub broadcast_channel_capacity: usize, |
934 | | |
935 | | /// The amount of time in milliseconds until the redis store considers the |
936 | | /// command to be timed out. This will trigger a retry of the command and |
937 | | /// potentially a reconnection to the redis server. |
938 | | /// |
939 | | /// Default: 10000 (10 seconds) |
940 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
941 | | pub command_timeout_ms: u64, |
942 | | |
943 | | /// The amount of time in milliseconds until the redis store considers the |
944 | | /// connection to unresponsive. This will trigger a reconnection to the |
945 | | /// redis server. |
946 | | /// |
947 | | /// Default: 3000 (3 seconds) |
948 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
949 | | pub connection_timeout_ms: u64, |
950 | | |
951 | | /// The amount of data to read from the redis server at a time. |
952 | | /// This is used to limit the amount of memory used when reading |
953 | | /// large objects from the redis server as well as limiting the |
954 | | /// amount of time a single read operation can take. |
955 | | /// |
956 | | /// IMPORTANT: If this value is too high, the `command_timeout_ms` |
957 | | /// might be triggered if the latency or throughput to the redis |
958 | | /// server is too low. |
959 | | /// |
960 | | /// Default: 64KiB |
961 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
962 | | pub read_chunk_size: usize, |
963 | | |
964 | | /// The number of connections to keep open to the redis server(s). |
965 | | /// |
966 | | /// Default: 3 |
967 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
968 | | pub connection_pool_size: usize, |
969 | | |
970 | | /// The maximum number of upload chunks to allow per update. |
971 | | /// This is used to limit the amount of memory used when uploading |
972 | | /// large objects to the redis server. A good rule of thumb is to |
973 | | /// think of the data as: |
974 | | /// `AVAIL_MEMORY / (read_chunk_size * max_chunk_uploads_per_update) = THORETICAL_MAX_CONCURRENT_UPLOADS` |
975 | | /// (note: it is a good idea to divide `AVAIL_MAX_MEMORY` by ~10 to account for other memory usage) |
976 | | /// |
977 | | /// Default: 10 |
978 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
979 | | pub max_chunk_uploads_per_update: usize, |
980 | | |
981 | | /// Retry configuration to use when a network request fails. |
982 | | /// See the `Retry` struct for more information. |
983 | | /// |
984 | | /// ```txt |
985 | | /// Default: Retry { |
986 | | /// max_retries: 0, /* unlimited */ |
987 | | /// delay: 0.1, /* 100ms */ |
988 | | /// jitter: 0.5, /* 50% */ |
989 | | /// retry_on_errors: None, /* not used in redis store */ |
990 | | /// } |
991 | | /// ``` |
992 | | #[serde(default)] |
993 | | pub retry: Retry, |
994 | | } |
995 | | |
996 | 0 | #[derive(Debug, Default, Deserialize, Serialize, Clone, PartialEq, Eq)] |
997 | | #[serde(rename_all = "lowercase")] |
998 | | pub enum RedisMode { |
999 | | Cluster, |
1000 | | Sentinel, |
1001 | | #[default] |
1002 | | Standard, |
1003 | | } |
1004 | | |
1005 | 0 | #[derive(Clone, Debug, Default, Deserialize, Serialize)] |
1006 | | pub struct NoopSpec {} |
1007 | | |
1008 | | /// Retry configuration. This configuration is exponential and each iteration |
1009 | | /// a jitter as a percentage is applied of the calculated delay. For example: |
1010 | | /// ```haskell |
1011 | | /// Retry{ |
1012 | | /// max_retries: 7, |
1013 | | /// delay: 0.1, |
1014 | | /// jitter: 0.5, |
1015 | | /// } |
1016 | | /// ``` |
1017 | | /// will result in: |
1018 | | /// Attempt - Delay |
1019 | | /// 1 0ms |
1020 | | /// 2 75ms - 125ms |
1021 | | /// 3 150ms - 250ms |
1022 | | /// 4 300ms - 500ms |
1023 | | /// 5 600ms - 1s |
1024 | | /// 6 1.2s - 2s |
1025 | | /// 7 2.4s - 4s |
1026 | | /// 8 4.8s - 8s |
1027 | | /// Remember that to get total results is additive, meaning the above results |
1028 | | /// would mean a single request would have a total delay of 9.525s - 15.875s. |
1029 | 0 | #[derive(Serialize, Deserialize, Clone, Debug, Default)] |
1030 | | #[serde(deny_unknown_fields)] |
1031 | | pub struct Retry { |
1032 | | /// Maximum number of retries until retrying stops. |
1033 | | /// Setting this to zero will always attempt 1 time, but not retry. |
1034 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1035 | | pub max_retries: usize, |
1036 | | |
1037 | | /// Delay in seconds for exponential back off. |
1038 | | #[serde(default)] |
1039 | | pub delay: f32, |
1040 | | |
1041 | | /// Amount of jitter to add as a percentage in decimal form. This will |
1042 | | /// change the formula like: |
1043 | | /// ```haskell |
1044 | | /// random( |
1045 | | /// (2 ^ {attempt_number}) * {delay} * (1 - (jitter / 2)), |
1046 | | /// (2 ^ {attempt_number}) * {delay} * (1 + (jitter / 2)), |
1047 | | /// ) |
1048 | | /// ``` |
1049 | | #[serde(default)] |
1050 | | pub jitter: f32, |
1051 | | |
1052 | | /// A list of error codes to retry on, if this is not set then the default |
1053 | | /// error codes to retry on are used. These default codes are the most |
1054 | | /// likely to be non-permanent. |
1055 | | /// - `Unknown` |
1056 | | /// - `Cancelled` |
1057 | | /// - `DeadlineExceeded` |
1058 | | /// - `ResourceExhausted` |
1059 | | /// - `Aborted` |
1060 | | /// - `Internal` |
1061 | | /// - `Unavailable` |
1062 | | /// - `DataLoss` |
1063 | | #[serde(default)] |
1064 | | pub retry_on_errors: Option<Vec<ErrorCode>>, |
1065 | | } |