/build/source/nativelink-config/src/stores.rs
Line | Count | Source |
1 | | // Copyright 2024 The NativeLink Authors. All rights reserved. |
2 | | // |
3 | | // Licensed under the Functional Source License, Version 1.1, Apache 2.0 Future License (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // See LICENSE file for details |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | use core::time::Duration; |
16 | | use std::sync::Arc; |
17 | | |
18 | | use rand::Rng; |
19 | | use serde::{Deserialize, Serialize}; |
20 | | |
21 | | use crate::serde_utils::{ |
22 | | convert_data_size_with_shellexpand, convert_duration_with_shellexpand, |
23 | | convert_numeric_with_shellexpand, convert_optional_numeric_with_shellexpand, |
24 | | convert_optional_string_with_shellexpand, convert_string_with_shellexpand, |
25 | | convert_vec_string_with_shellexpand, |
26 | | }; |
27 | | |
28 | | /// Name of the store. This type will be used when referencing a store |
29 | | /// in the `CasConfig::stores`'s map key. |
30 | | pub type StoreRefName = String; |
31 | | |
32 | | #[derive(Serialize, Deserialize, Debug, Clone, Copy)] |
33 | | #[serde(rename_all = "snake_case")] |
34 | | pub enum ConfigDigestHashFunction { |
35 | | /// Use the sha256 hash function. |
36 | | /// <https://en.wikipedia.org/wiki/SHA-2> |
37 | | Sha256, |
38 | | |
39 | | /// Use the blake3 hash function. |
40 | | /// <https://en.wikipedia.org/wiki/BLAKE_(hash_function)> |
41 | | Blake3, |
42 | | } |
43 | | |
44 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
45 | | #[serde(rename_all = "snake_case")] |
46 | | pub enum StoreSpec { |
47 | | /// Memory store will store all data in a hashmap in memory. |
48 | | /// |
49 | | /// **Example JSON Config:** |
50 | | /// ```json |
51 | | /// "memory": { |
52 | | /// "eviction_policy": { |
53 | | /// "max_bytes": "10mb", |
54 | | /// } |
55 | | /// } |
56 | | /// ``` |
57 | | /// |
58 | | Memory(MemorySpec), |
59 | | |
60 | | /// A generic blob store that will store files on the cloud |
61 | | /// provider. This configuration will never delete files, so you are |
62 | | /// responsible for purging old files in other ways. |
63 | | /// It supports the following backends: |
64 | | /// |
65 | | /// 1. **Amazon S3:** |
66 | | /// S3 store will use Amazon's S3 service as a backend to store |
67 | | /// the files. This configuration can be used to share files |
68 | | /// across multiple instances. Uses system certificates for TLS |
69 | | /// verification via `rustls-platform-verifier`. |
70 | | /// |
71 | | /// **Example JSON Config:** |
72 | | /// ```json |
73 | | /// "experimental_cloud_object_store": { |
74 | | /// "provider": "aws", |
75 | | /// "region": "eu-north-1", |
76 | | /// "bucket": "crossplane-bucket-af79aeca9", |
77 | | /// "key_prefix": "test-prefix-index/", |
78 | | /// "retry": { |
79 | | /// "max_retries": 6, |
80 | | /// "delay": 0.3, |
81 | | /// "jitter": 0.5 |
82 | | /// }, |
83 | | /// "multipart_max_concurrent_uploads": 10 |
84 | | /// } |
85 | | /// ``` |
86 | | /// |
87 | | /// 2. **Google Cloud Storage:** |
88 | | /// GCS store uses Google's GCS service as a backend to store |
89 | | /// the files. This configuration can be used to share files |
90 | | /// across multiple instances. |
91 | | /// |
92 | | /// **Example JSON Config:** |
93 | | /// ```json |
94 | | /// "experimental_cloud_object_store": { |
95 | | /// "provider": "gcs", |
96 | | /// "bucket": "test-bucket", |
97 | | /// "key_prefix": "test-prefix-index/", |
98 | | /// "retry": { |
99 | | /// "max_retries": 6, |
100 | | /// "delay": 0.3, |
101 | | /// "jitter": 0.5 |
102 | | /// }, |
103 | | /// "multipart_max_concurrent_uploads": 10 |
104 | | /// } |
105 | | /// ``` |
106 | | /// |
107 | | /// 3. **`NetApp` ONTAP S3** |
108 | | /// `NetApp` ONTAP S3 store will use ONTAP's S3-compatible storage as a backend |
109 | | /// to store files. This store is specifically configured for ONTAP's S3 requirements |
110 | | /// including custom TLS configuration, credentials management, and proper vserver |
111 | | /// configuration. |
112 | | /// |
113 | | /// This store uses AWS environment variables for credentials: |
114 | | /// - `AWS_ACCESS_KEY_ID` |
115 | | /// - `AWS_SECRET_ACCESS_KEY` |
116 | | /// - `AWS_DEFAULT_REGION` |
117 | | /// |
118 | | /// **Example JSON Config:** |
119 | | /// ```json |
120 | | /// "experimental_cloud_object_store": { |
121 | | /// "provider": "ontap", |
122 | | /// "endpoint": "https://ontap-s3-endpoint:443", |
123 | | /// "vserver_name": "your-vserver", |
124 | | /// "bucket": "your-bucket", |
125 | | /// "root_certificates": "/path/to/certs.pem", // Optional |
126 | | /// "key_prefix": "test-prefix/", // Optional |
127 | | /// "retry": { |
128 | | /// "max_retries": 6, |
129 | | /// "delay": 0.3, |
130 | | /// "jitter": 0.5 |
131 | | /// }, |
132 | | /// "multipart_max_concurrent_uploads": 10 |
133 | | /// } |
134 | | /// ``` |
135 | | ExperimentalCloudObjectStore(ExperimentalCloudObjectSpec), |
136 | | |
137 | | /// ONTAP S3 Existence Cache provides a caching layer on top of the ONTAP S3 store |
138 | | /// to optimize repeated existence checks. It maintains an in-memory cache of object |
139 | | /// digests and periodically syncs this cache to disk for persistence. |
140 | | /// |
141 | | /// The cache helps reduce latency for repeated calls to check object existence, |
142 | | /// while still ensuring eventual consistency with the underlying ONTAP S3 store. |
143 | | /// |
144 | | /// Example JSON Config: |
145 | | /// ```json |
146 | | /// "ontap_s3_existence_cache": { |
147 | | /// "index_path": "/path/to/cache/index.json", |
148 | | /// "sync_interval_seconds": 300, |
149 | | /// "backend": { |
150 | | /// "endpoint": "https://ontap-s3-endpoint:443", |
151 | | /// "vserver_name": "your-vserver", |
152 | | /// "bucket": "your-bucket", |
153 | | /// "key_prefix": "test-prefix/" |
154 | | /// } |
155 | | /// } |
156 | | /// ``` |
157 | | /// |
158 | | OntapS3ExistenceCache(Box<OntapS3ExistenceCacheSpec>), |
159 | | |
160 | | /// Verify store is used to apply verifications to an underlying |
161 | | /// store implementation. It is strongly encouraged to validate |
162 | | /// as much data as you can before accepting data from a client, |
163 | | /// failing to do so may cause the data in the store to be |
164 | | /// populated with invalid data causing all kinds of problems. |
165 | | /// |
166 | | /// The suggested configuration is to have the CAS validate the |
167 | | /// hash and size and the AC validate nothing. |
168 | | /// |
169 | | /// **Example JSON Config:** |
170 | | /// ```json |
171 | | /// "verify": { |
172 | | /// "backend": { |
173 | | /// "memory": { |
174 | | /// "eviction_policy": { |
175 | | /// "max_bytes": "500mb" |
176 | | /// } |
177 | | /// }, |
178 | | /// }, |
179 | | /// "verify_size": true, |
180 | | /// "verify_hash": true |
181 | | /// } |
182 | | /// ``` |
183 | | /// |
184 | | Verify(Box<VerifySpec>), |
185 | | |
186 | | /// Completeness checking store verifies if the |
187 | | /// output files & folders exist in the CAS before forwarding |
188 | | /// the request to the underlying store. |
189 | | /// Note: This store should only be used on AC stores. |
190 | | /// |
191 | | /// **Example JSON Config:** |
192 | | /// ```json |
193 | | /// "completeness_checking": { |
194 | | /// "backend": { |
195 | | /// "filesystem": { |
196 | | /// "content_path": "~/.cache/nativelink/content_path-ac", |
197 | | /// "temp_path": "~/.cache/nativelink/tmp_path-ac", |
198 | | /// "eviction_policy": { |
199 | | /// "max_bytes": "500mb", |
200 | | /// } |
201 | | /// } |
202 | | /// }, |
203 | | /// "cas_store": { |
204 | | /// "ref_store": { |
205 | | /// "name": "CAS_MAIN_STORE" |
206 | | /// } |
207 | | /// } |
208 | | /// } |
209 | | /// ``` |
210 | | /// |
211 | | CompletenessChecking(Box<CompletenessCheckingSpec>), |
212 | | |
213 | | /// A compression store that will compress the data inbound and |
214 | | /// outbound. There will be a non-trivial cost to compress and |
215 | | /// decompress the data, but in many cases if the final store is |
216 | | /// a store that requires network transport and/or storage space |
217 | | /// is a concern it is often faster and more efficient to use this |
218 | | /// store before those stores. |
219 | | /// |
220 | | /// **Example JSON Config:** |
221 | | /// ```json |
222 | | /// "compression": { |
223 | | /// "compression_algorithm": { |
224 | | /// "lz4": {} |
225 | | /// }, |
226 | | /// "backend": { |
227 | | /// "filesystem": { |
228 | | /// "content_path": "/tmp/nativelink/data/content_path-cas", |
229 | | /// "temp_path": "/tmp/nativelink/data/tmp_path-cas", |
230 | | /// "eviction_policy": { |
231 | | /// "max_bytes": "2gb", |
232 | | /// } |
233 | | /// } |
234 | | /// } |
235 | | /// } |
236 | | /// ``` |
237 | | /// |
238 | | Compression(Box<CompressionSpec>), |
239 | | |
240 | | /// A dedup store will take the inputs and run a rolling hash |
241 | | /// algorithm on them to slice the input into smaller parts then |
242 | | /// run a sha256 algorithm on the slice and if the object doesn't |
243 | | /// already exist, upload the slice to the `content_store` using |
244 | | /// a new digest of just the slice. Once all parts exist, an |
245 | | /// Action-Cache-like digest will be built and uploaded to the |
246 | | /// `index_store` which will contain a reference to each |
247 | | /// chunk/digest of the uploaded file. Downloading a request will |
248 | | /// first grab the index from the `index_store`, and forward the |
249 | | /// download content of each chunk as if it were one file. |
250 | | /// |
251 | | /// This store is exceptionally good when the following conditions |
252 | | /// are met: |
253 | | /// * Content is mostly the same (inserts, updates, deletes are ok) |
254 | | /// * Content is not compressed or encrypted |
255 | | /// * Uploading or downloading from `content_store` is the bottleneck. |
256 | | /// |
257 | | /// Note: This store pairs well when used with `CompressionSpec` as |
258 | | /// the `content_store`, but never put `DedupSpec` as the backend of |
259 | | /// `CompressionSpec` as it will negate all the gains. |
260 | | /// |
261 | | /// Note: When running `.has()` on this store, it will only check |
262 | | /// to see if the entry exists in the `index_store` and not check |
263 | | /// if the individual chunks exist in the `content_store`. |
264 | | /// |
265 | | /// **Example JSON Config:** |
266 | | /// ```json |
267 | | /// "dedup": { |
268 | | /// "index_store": { |
269 | | /// "memory": { |
270 | | /// "eviction_policy": { |
271 | | /// "max_bytes": "1GB", |
272 | | /// } |
273 | | /// } |
274 | | /// }, |
275 | | /// "content_store": { |
276 | | /// "compression": { |
277 | | /// "compression_algorithm": { |
278 | | /// "lz4": {} |
279 | | /// }, |
280 | | /// "backend": { |
281 | | /// "fast_slow": { |
282 | | /// "fast": { |
283 | | /// "memory": { |
284 | | /// "eviction_policy": { |
285 | | /// "max_bytes": "500MB", |
286 | | /// } |
287 | | /// } |
288 | | /// }, |
289 | | /// "slow": { |
290 | | /// "filesystem": { |
291 | | /// "content_path": "/tmp/nativelink/data/content_path-content", |
292 | | /// "temp_path": "/tmp/nativelink/data/tmp_path-content", |
293 | | /// "eviction_policy": { |
294 | | /// "max_bytes": "2gb" |
295 | | /// } |
296 | | /// } |
297 | | /// } |
298 | | /// } |
299 | | /// } |
300 | | /// } |
301 | | /// } |
302 | | /// } |
303 | | /// ``` |
304 | | /// |
305 | | Dedup(Box<DedupSpec>), |
306 | | |
307 | | /// Existence store will wrap around another store and cache calls |
308 | | /// to has so that subsequent `has_with_results` calls will be |
309 | | /// faster. This is useful for cases when you have a store that |
310 | | /// is slow to respond to has calls. |
311 | | /// Note: This store should only be used on CAS stores. |
312 | | /// |
313 | | /// **Example JSON Config:** |
314 | | /// ```json |
315 | | /// "existence_cache": { |
316 | | /// "backend": { |
317 | | /// "memory": { |
318 | | /// "eviction_policy": { |
319 | | /// "max_bytes": "500mb", |
320 | | /// } |
321 | | /// } |
322 | | /// }, |
323 | | /// // Note this is the existence store policy, not the backend policy |
324 | | /// "eviction_policy": { |
325 | | /// "max_seconds": 100, |
326 | | /// } |
327 | | /// } |
328 | | /// ``` |
329 | | /// |
330 | | ExistenceCache(Box<ExistenceCacheSpec>), |
331 | | |
332 | | /// `FastSlow` store will first try to fetch the data from the `fast` |
333 | | /// store and then if it does not exist try the `slow` store. |
334 | | /// When the object does exist in the `slow` store, it will copy |
335 | | /// the data to the `fast` store while returning the data. |
336 | | /// This store should be thought of as a store that "buffers" |
337 | | /// the data to the `fast` store. |
338 | | /// On uploads it will mirror data to both `fast` and `slow` stores. |
339 | | /// |
340 | | /// WARNING: If you need data to always exist in the `slow` store |
341 | | /// for something like remote execution, be careful because this |
342 | | /// store will never check to see if the objects exist in the |
343 | | /// `slow` store if it exists in the `fast` store (ie: it assumes |
344 | | /// that if an object exists in the `fast` store it will exist in |
345 | | /// the `slow` store). |
346 | | /// |
347 | | /// ***Example JSON Config:*** |
348 | | /// ```json |
349 | | /// "fast_slow": { |
350 | | /// "fast": { |
351 | | /// "filesystem": { |
352 | | /// "content_path": "/tmp/nativelink/data/content_path-index", |
353 | | /// "temp_path": "/tmp/nativelink/data/tmp_path-index", |
354 | | /// "eviction_policy": { |
355 | | /// "max_bytes": "500mb", |
356 | | /// } |
357 | | /// } |
358 | | /// }, |
359 | | /// "slow": { |
360 | | /// "filesystem": { |
361 | | /// "content_path": "/tmp/nativelink/data/content_path-index", |
362 | | /// "temp_path": "/tmp/nativelink/data/tmp_path-index", |
363 | | /// "eviction_policy": { |
364 | | /// "max_bytes": "500mb", |
365 | | /// } |
366 | | /// } |
367 | | /// } |
368 | | /// } |
369 | | /// ``` |
370 | | /// |
371 | | FastSlow(Box<FastSlowSpec>), |
372 | | |
373 | | /// Shards the data to multiple stores. This is useful for cases |
374 | | /// when you want to distribute the load across multiple stores. |
375 | | /// The digest hash is used to determine which store to send the |
376 | | /// data to. |
377 | | /// |
378 | | /// **Example JSON Config:** |
379 | | /// ```json |
380 | | /// "shard": { |
381 | | /// "stores": [ |
382 | | /// { |
383 | | /// "store": { |
384 | | /// "memory": { |
385 | | /// "eviction_policy": { |
386 | | /// "max_bytes": "10mb" |
387 | | /// }, |
388 | | /// }, |
389 | | /// }, |
390 | | /// "weight": 1 |
391 | | /// }] |
392 | | /// } |
393 | | /// ``` |
394 | | /// |
395 | | Shard(ShardSpec), |
396 | | |
397 | | /// Stores the data on the filesystem. This store is designed for |
398 | | /// local persistent storage. Restarts of this program should restore |
399 | | /// the previous state, meaning anything uploaded will be persistent |
400 | | /// as long as the filesystem integrity holds. |
401 | | /// |
402 | | /// **Example JSON Config:** |
403 | | /// ```json |
404 | | /// "filesystem": { |
405 | | /// "content_path": "/tmp/nativelink/data-worker-test/content_path-cas", |
406 | | /// "temp_path": "/tmp/nativelink/data-worker-test/tmp_path-cas", |
407 | | /// "eviction_policy": { |
408 | | /// "max_bytes": "10gb", |
409 | | /// } |
410 | | /// } |
411 | | /// ``` |
412 | | /// |
413 | | Filesystem(FilesystemSpec), |
414 | | |
415 | | /// Store used to reference a store in the root store manager. |
416 | | /// This is useful for cases when you want to share a store in different |
417 | | /// nested stores. Example, you may want to share the same memory store |
418 | | /// used for the action cache, but use a `FastSlowSpec` and have the fast |
419 | | /// store also share the memory store for efficiency. |
420 | | /// |
421 | | /// **Example JSON Config:** |
422 | | /// ```json |
423 | | /// "ref_store": { |
424 | | /// "name": "FS_CONTENT_STORE" |
425 | | /// } |
426 | | /// ``` |
427 | | /// |
428 | | RefStore(RefSpec), |
429 | | |
430 | | /// Uses the size field of the digest to separate which store to send the |
431 | | /// data. This is useful for cases when you'd like to put small objects |
432 | | /// in one store and large objects in another store. This should only be |
433 | | /// used if the size field is the real size of the content, in other |
434 | | /// words, don't use on AC (Action Cache) stores. Any store where you can |
435 | | /// safely use `VerifySpec.verify_size = true`, this store should be safe |
436 | | /// to use (ie: CAS stores). |
437 | | /// |
438 | | /// **Example JSON Config:** |
439 | | /// ```json |
440 | | /// "size_partitioning": { |
441 | | /// "size": "128mib", |
442 | | /// "lower_store": { |
443 | | /// "memory": { |
444 | | /// "eviction_policy": { |
445 | | /// "max_bytes": "${NATIVELINK_CAS_MEMORY_CONTENT_LIMIT:-100mb}" |
446 | | /// } |
447 | | /// } |
448 | | /// }, |
449 | | /// "upper_store": { |
450 | | /// /// This store discards data larger than 128mib. |
451 | | /// "noop": {} |
452 | | /// } |
453 | | /// } |
454 | | /// ``` |
455 | | /// |
456 | | SizePartitioning(Box<SizePartitioningSpec>), |
457 | | |
458 | | /// This store will pass-through calls to another GRPC store. This store |
459 | | /// is not designed to be used as a sub-store of another store, but it |
460 | | /// does satisfy the interface and will likely work. |
461 | | /// |
462 | | /// One major GOTCHA is that some stores use a special function on this |
463 | | /// store to get the size of the underlying object, which is only reliable |
464 | | /// when this store is serving the a CAS store, not an AC store. If using |
465 | | /// this store directly without being a child of any store there are no |
466 | | /// side effects and is the most efficient way to use it. |
467 | | /// |
468 | | /// **Example JSON Config:** |
469 | | /// ```json |
470 | | /// "grpc": { |
471 | | /// "instance_name": "main", |
472 | | /// "endpoints": [ |
473 | | /// {"address": "grpc://${CAS_ENDPOINT:-127.0.0.1}:50051"} |
474 | | /// ], |
475 | | /// "store_type": "ac" |
476 | | /// } |
477 | | /// ``` |
478 | | /// |
479 | | Grpc(GrpcSpec), |
480 | | |
481 | | /// Stores data in any stores compatible with Redis APIs. |
482 | | /// |
483 | | /// Pairs well with `SizePartitioning` and/or `FastSlow` stores. |
484 | | /// Ideal for accepting small object sizes as most redis store |
485 | | /// services have a max file upload of between 256Mb-512Mb. |
486 | | /// |
487 | | /// **Example JSON Config:** |
488 | | /// ```json |
489 | | /// "redis_store": { |
490 | | /// "addresses": [ |
491 | | /// "redis://127.0.0.1:6379/", |
492 | | /// ] |
493 | | /// } |
494 | | /// ``` |
495 | | /// |
496 | | RedisStore(RedisSpec), |
497 | | |
498 | | /// Noop store is a store that sends streams into the void and all data |
499 | | /// retrieval will return 404 (`NotFound`). This can be useful for cases |
500 | | /// where you may need to partition your data and part of your data needs |
501 | | /// to be discarded. |
502 | | /// |
503 | | /// **Example JSON Config:** |
504 | | /// ```json |
505 | | /// "noop": {} |
506 | | /// ``` |
507 | | /// |
508 | | Noop(NoopSpec), |
509 | | |
510 | | /// Experimental `MongoDB` store implementation. |
511 | | /// |
512 | | /// This store uses `MongoDB` as a backend for storing data. It supports |
513 | | /// both CAS (Content Addressable Storage) and scheduler data with |
514 | | /// optional change streams for real-time updates. |
515 | | /// |
516 | | /// **Example JSON Config:** |
517 | | /// ```json |
518 | | /// "experimental_mongo": { |
519 | | /// "connection_string": "mongodb://localhost:27017", |
520 | | /// "database": "nativelink", |
521 | | /// "cas_collection": "cas", |
522 | | /// "key_prefix": "cas:", |
523 | | /// "read_chunk_size": 65536, |
524 | | /// "max_concurrent_uploads": 10, |
525 | | /// "enable_change_streams": false |
526 | | /// } |
527 | | /// ``` |
528 | | /// |
529 | | ExperimentalMongo(ExperimentalMongoSpec), |
530 | | } |
531 | | |
532 | | /// Configuration for an individual shard of the store. |
533 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
534 | | #[serde(deny_unknown_fields)] |
535 | | pub struct ShardConfig { |
536 | | /// Store to shard the data to. |
537 | | pub store: StoreSpec, |
538 | | |
539 | | /// The weight of the store. This is used to determine how much data |
540 | | /// should be sent to the store. The actual percentage is the sum of |
541 | | /// all the store's weights divided by the individual store's weight. |
542 | | /// |
543 | | /// Default: 1 |
544 | | pub weight: Option<u32>, |
545 | | } |
546 | | |
547 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
548 | | #[serde(deny_unknown_fields)] |
549 | | pub struct ShardSpec { |
550 | | /// Stores to shard the data to. |
551 | | pub stores: Vec<ShardConfig>, |
552 | | } |
553 | | |
554 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
555 | | #[serde(deny_unknown_fields)] |
556 | | pub struct SizePartitioningSpec { |
557 | | /// Size to partition the data on. |
558 | | #[serde(deserialize_with = "convert_data_size_with_shellexpand")] |
559 | | pub size: u64, |
560 | | |
561 | | /// Store to send data when object is < (less than) size. |
562 | | pub lower_store: StoreSpec, |
563 | | |
564 | | /// Store to send data when object is >= (less than eq) size. |
565 | | pub upper_store: StoreSpec, |
566 | | } |
567 | | |
568 | | #[derive(Serialize, Deserialize, Debug, Default, Clone)] |
569 | | #[serde(deny_unknown_fields)] |
570 | | pub struct RefSpec { |
571 | | /// Name of the store under the root "stores" config object. |
572 | | #[serde(deserialize_with = "convert_string_with_shellexpand")] |
573 | | pub name: String, |
574 | | } |
575 | | |
576 | | #[derive(Serialize, Deserialize, Debug, Default, Clone)] |
577 | | #[serde(deny_unknown_fields)] |
578 | | pub struct FilesystemSpec { |
579 | | /// Path on the system where to store the actual content. This is where |
580 | | /// the bulk of the data will be placed. |
581 | | /// On service bootup this folder will be scanned and all files will be |
582 | | /// added to the cache. In the event one of the files doesn't match the |
583 | | /// criteria, the file will be deleted. |
584 | | #[serde(deserialize_with = "convert_string_with_shellexpand")] |
585 | | pub content_path: String, |
586 | | |
587 | | /// A temporary location of where files that are being uploaded or |
588 | | /// deleted will be placed while the content cannot be guaranteed to be |
589 | | /// accurate. This location must be on the same block device as |
590 | | /// `content_path` so atomic moves can happen (ie: move without copy). |
591 | | /// All files in this folder will be deleted on every startup. |
592 | | #[serde(deserialize_with = "convert_string_with_shellexpand")] |
593 | | pub temp_path: String, |
594 | | |
595 | | /// Buffer size to use when reading files. Generally this should be left |
596 | | /// to the default value except for testing. |
597 | | /// Default: 32k. |
598 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
599 | | pub read_buffer_size: u32, |
600 | | |
601 | | /// Policy used to evict items out of the store. Failure to set this |
602 | | /// value will cause items to never be removed from the store causing |
603 | | /// infinite memory usage. |
604 | | pub eviction_policy: Option<EvictionPolicy>, |
605 | | |
606 | | /// The block size of the filesystem for the running machine |
607 | | /// value is used to determine an entry's actual size on disk consumed |
608 | | /// For a 4KB block size filesystem, a 1B file actually consumes 4KB |
609 | | /// Default: 4096 |
610 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
611 | | pub block_size: u64, |
612 | | } |
613 | | |
614 | | // NetApp ONTAP S3 Spec |
615 | | #[derive(Serialize, Deserialize, Debug, Default, Clone)] |
616 | | #[serde(deny_unknown_fields)] |
617 | | pub struct ExperimentalOntapS3Spec { |
618 | | #[serde(deserialize_with = "convert_string_with_shellexpand")] |
619 | | pub endpoint: String, |
620 | | #[serde(deserialize_with = "convert_string_with_shellexpand")] |
621 | | pub vserver_name: String, |
622 | | #[serde(deserialize_with = "convert_string_with_shellexpand")] |
623 | | pub bucket: String, |
624 | | #[serde(default)] |
625 | | pub root_certificates: Option<String>, |
626 | | |
627 | | /// Common retry and upload configuration |
628 | | #[serde(flatten)] |
629 | | pub common: CommonObjectSpec, |
630 | | } |
631 | | |
632 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
633 | | #[serde(deny_unknown_fields)] |
634 | | pub struct OntapS3ExistenceCacheSpec { |
635 | | #[serde(deserialize_with = "convert_string_with_shellexpand")] |
636 | | pub index_path: String, |
637 | | #[serde(deserialize_with = "convert_numeric_with_shellexpand")] |
638 | | pub sync_interval_seconds: u32, |
639 | | pub backend: Box<ExperimentalOntapS3Spec>, |
640 | | } |
641 | | |
642 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
643 | | #[serde(deny_unknown_fields)] |
644 | | pub struct FastSlowSpec { |
645 | | /// Fast store that will be attempted to be contacted before reaching |
646 | | /// out to the `slow` store. |
647 | | pub fast: StoreSpec, |
648 | | |
649 | | /// If the object does not exist in the `fast` store it will try to |
650 | | /// get it from this store. |
651 | | pub slow: StoreSpec, |
652 | | } |
653 | | |
654 | | #[derive(Serialize, Deserialize, Debug, Default, Clone, Copy)] |
655 | | #[serde(deny_unknown_fields)] |
656 | | pub struct MemorySpec { |
657 | | /// Policy used to evict items out of the store. Failure to set this |
658 | | /// value will cause items to never be removed from the store causing |
659 | | /// infinite memory usage. |
660 | | pub eviction_policy: Option<EvictionPolicy>, |
661 | | } |
662 | | |
663 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
664 | | #[serde(deny_unknown_fields)] |
665 | | pub struct DedupSpec { |
666 | | /// Store used to store the index of each dedup slice. This store |
667 | | /// should generally be fast and small. |
668 | | pub index_store: StoreSpec, |
669 | | |
670 | | /// The store where the individual chunks will be uploaded. This |
671 | | /// store should generally be the slower & larger store. |
672 | | pub content_store: StoreSpec, |
673 | | |
674 | | /// Minimum size that a chunk will be when slicing up the content. |
675 | | /// Note: This setting can be increased to improve performance |
676 | | /// because it will actually not check this number of bytes when |
677 | | /// deciding where to partition the data. |
678 | | /// |
679 | | /// Default: 65536 (64k) |
680 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
681 | | pub min_size: u32, |
682 | | |
683 | | /// A best-effort attempt will be made to keep the average size |
684 | | /// of the chunks to this number. It is not a guarantee, but a |
685 | | /// slight attempt will be made. |
686 | | /// |
687 | | /// This value will also be about the threshold used to determine |
688 | | /// if we should even attempt to dedup the entry or just forward |
689 | | /// it directly to the `content_store` without an index. The actual |
690 | | /// value will be about `normal_size * 1.3` due to implementation |
691 | | /// details. |
692 | | /// |
693 | | /// Default: 262144 (256k) |
694 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
695 | | pub normal_size: u32, |
696 | | |
697 | | /// Maximum size a chunk is allowed to be. |
698 | | /// |
699 | | /// Default: 524288 (512k) |
700 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
701 | | pub max_size: u32, |
702 | | |
703 | | /// Due to implementation detail, we want to prefer to download |
704 | | /// the first chunks of the file so we can stream the content |
705 | | /// out and free up some of our buffers. This configuration |
706 | | /// will be used to to restrict the number of concurrent chunk |
707 | | /// downloads at a time per `get()` request. |
708 | | /// |
709 | | /// This setting will also affect how much memory might be used |
710 | | /// per `get()` request. Estimated worst case memory per `get()` |
711 | | /// request is: `max_concurrent_fetch_per_get * max_size`. |
712 | | /// |
713 | | /// Default: 10 |
714 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
715 | | pub max_concurrent_fetch_per_get: u32, |
716 | | } |
717 | | |
718 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
719 | | #[serde(deny_unknown_fields)] |
720 | | pub struct ExistenceCacheSpec { |
721 | | /// The underlying store wrap around. All content will first flow |
722 | | /// through self before forwarding to backend. In the event there |
723 | | /// is an error detected in self, the connection to the backend |
724 | | /// will be terminated, and early termination should always cause |
725 | | /// updates to fail on the backend. |
726 | | pub backend: StoreSpec, |
727 | | |
728 | | /// Policy used to evict items out of the store. Failure to set this |
729 | | /// value will cause items to never be removed from the store causing |
730 | | /// infinite memory usage. |
731 | | pub eviction_policy: Option<EvictionPolicy>, |
732 | | } |
733 | | |
734 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
735 | | #[serde(deny_unknown_fields)] |
736 | | pub struct VerifySpec { |
737 | | /// The underlying store wrap around. All content will first flow |
738 | | /// through self before forwarding to backend. In the event there |
739 | | /// is an error detected in self, the connection to the backend |
740 | | /// will be terminated, and early termination should always cause |
741 | | /// updates to fail on the backend. |
742 | | pub backend: StoreSpec, |
743 | | |
744 | | /// If set the store will verify the size of the data before accepting |
745 | | /// an upload of data. |
746 | | /// |
747 | | /// This should be set to false for AC, but true for CAS stores. |
748 | | #[serde(default)] |
749 | | pub verify_size: bool, |
750 | | |
751 | | /// If the data should be hashed and verify that the key matches the |
752 | | /// computed hash. The hash function is automatically determined based |
753 | | /// request and if not set will use the global default. |
754 | | /// |
755 | | /// This should be set to false for AC, but true for CAS stores. |
756 | | #[serde(default)] |
757 | | pub verify_hash: bool, |
758 | | } |
759 | | |
760 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
761 | | #[serde(deny_unknown_fields)] |
762 | | pub struct CompletenessCheckingSpec { |
763 | | /// The underlying store that will have it's results validated before sending to client. |
764 | | pub backend: StoreSpec, |
765 | | |
766 | | /// When a request is made, the results are decoded and all output digests/files are verified |
767 | | /// to exist in this CAS store before returning success. |
768 | | pub cas_store: StoreSpec, |
769 | | } |
770 | | |
771 | | #[derive(Serialize, Deserialize, Debug, Default, PartialEq, Eq, Clone, Copy)] |
772 | | #[serde(deny_unknown_fields)] |
773 | | pub struct Lz4Config { |
774 | | /// Size of the blocks to compress. |
775 | | /// Higher values require more ram, but might yield slightly better |
776 | | /// compression ratios. |
777 | | /// |
778 | | /// Default: 65536 (64k). |
779 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
780 | | pub block_size: u32, |
781 | | |
782 | | /// Maximum size allowed to attempt to deserialize data into. |
783 | | /// This is needed because the `block_size` is embedded into the data |
784 | | /// so if there was a bad actor, they could upload an extremely large |
785 | | /// `block_size`'ed entry and we'd allocate a large amount of memory |
786 | | /// when retrieving the data. To prevent this from happening, we |
787 | | /// allow you to specify the maximum that we'll attempt deserialize. |
788 | | /// |
789 | | /// Default: value in `block_size`. |
790 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
791 | | pub max_decode_block_size: u32, |
792 | | } |
793 | | |
794 | | #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone, Copy)] |
795 | | #[serde(rename_all = "snake_case")] |
796 | | pub enum CompressionAlgorithm { |
797 | | /// LZ4 compression algorithm is extremely fast for compression and |
798 | | /// decompression, however does not perform very well in compression |
799 | | /// ratio. In most cases build artifacts are highly compressible, however |
800 | | /// lz4 is quite good at aborting early if the data is not deemed very |
801 | | /// compressible. |
802 | | /// |
803 | | /// see: <https://lz4.github.io/lz4/> |
804 | | Lz4(Lz4Config), |
805 | | } |
806 | | |
807 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
808 | | #[serde(deny_unknown_fields)] |
809 | | pub struct CompressionSpec { |
810 | | /// The underlying store wrap around. All content will first flow |
811 | | /// through self before forwarding to backend. In the event there |
812 | | /// is an error detected in self, the connection to the backend |
813 | | /// will be terminated, and early termination should always cause |
814 | | /// updates to fail on the backend. |
815 | | pub backend: StoreSpec, |
816 | | |
817 | | /// The compression algorithm to use. |
818 | | pub compression_algorithm: CompressionAlgorithm, |
819 | | } |
820 | | |
821 | | /// Eviction policy always works on LRU (Least Recently Used). Any time an entry |
822 | | /// is touched it updates the timestamp. Inserts and updates will execute the |
823 | | /// eviction policy removing any expired entries and/or the oldest entries |
824 | | /// until the store size becomes smaller than `max_bytes`. |
825 | | #[derive(Serialize, Deserialize, Debug, Default, Clone, Copy)] |
826 | | #[serde(deny_unknown_fields)] |
827 | | pub struct EvictionPolicy { |
828 | | /// Maximum number of bytes before eviction takes place. |
829 | | /// Default: 0. Zero means never evict based on size. |
830 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
831 | | pub max_bytes: usize, |
832 | | |
833 | | /// When eviction starts based on hitting `max_bytes`, continue until |
834 | | /// `max_bytes - evict_bytes` is met to create a low watermark. This stops |
835 | | /// operations from thrashing when the store is close to the limit. |
836 | | /// Default: 0 |
837 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
838 | | pub evict_bytes: usize, |
839 | | |
840 | | /// Maximum number of seconds for an entry to live since it was last |
841 | | /// accessed before it is evicted. |
842 | | /// Default: 0. Zero means never evict based on time. |
843 | | #[serde(default, deserialize_with = "convert_duration_with_shellexpand")] |
844 | | pub max_seconds: u32, |
845 | | |
846 | | /// Maximum size of the store before an eviction takes place. |
847 | | /// Default: 0. Zero means never evict based on count. |
848 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
849 | | pub max_count: u64, |
850 | | } |
851 | | |
852 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
853 | | #[serde(tag = "provider", rename_all = "snake_case")] |
854 | | pub enum ExperimentalCloudObjectSpec { |
855 | | Aws(ExperimentalAwsSpec), |
856 | | Gcs(ExperimentalGcsSpec), |
857 | | Ontap(ExperimentalOntapS3Spec), |
858 | | } |
859 | | |
860 | | impl Default for ExperimentalCloudObjectSpec { |
861 | 0 | fn default() -> Self { |
862 | 0 | Self::Aws(ExperimentalAwsSpec::default()) |
863 | 0 | } |
864 | | } |
865 | | |
866 | | #[derive(Serialize, Deserialize, Debug, Default, Clone)] |
867 | | #[serde(deny_unknown_fields)] |
868 | | pub struct ExperimentalAwsSpec { |
869 | | /// S3 region. Usually us-east-1, us-west-2, af-south-1, exc... |
870 | | #[serde(default, deserialize_with = "convert_string_with_shellexpand")] |
871 | | pub region: String, |
872 | | |
873 | | /// Bucket name to use as the backend. |
874 | | #[serde(default, deserialize_with = "convert_string_with_shellexpand")] |
875 | | pub bucket: String, |
876 | | |
877 | | /// Common retry and upload configuration |
878 | | #[serde(flatten)] |
879 | | pub common: CommonObjectSpec, |
880 | | } |
881 | | |
882 | | #[derive(Serialize, Deserialize, Debug, Default, Clone)] |
883 | | #[serde(deny_unknown_fields)] |
884 | | pub struct ExperimentalGcsSpec { |
885 | | /// Bucket name to use as the backend. |
886 | | #[serde(default, deserialize_with = "convert_string_with_shellexpand")] |
887 | | pub bucket: String, |
888 | | |
889 | | /// Chunk size for resumable uploads. |
890 | | /// |
891 | | /// Default: 2MB |
892 | | pub resumable_chunk_size: Option<usize>, |
893 | | |
894 | | /// Common retry and upload configuration |
895 | | #[serde(flatten)] |
896 | | pub common: CommonObjectSpec, |
897 | | |
898 | | /// Error if authentication was not found. |
899 | | #[serde(default)] |
900 | | pub authentication_required: bool, |
901 | | |
902 | | /// Connection timeout in milliseconds. |
903 | | /// Default: 3000 |
904 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
905 | | pub connection_timeout_s: u64, |
906 | | |
907 | | /// Read timeout in milliseconds. |
908 | | /// Default: 3000 |
909 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
910 | | pub read_timeout_s: u64, |
911 | | } |
912 | | |
913 | | #[derive(Serialize, Deserialize, Debug, Default, Clone)] |
914 | | pub struct CommonObjectSpec { |
915 | | /// If you wish to prefix the location in the bucket. If None, no prefix will be used. |
916 | | #[serde(default)] |
917 | | pub key_prefix: Option<String>, |
918 | | |
919 | | /// Retry configuration to use when a network request fails. |
920 | | #[serde(default)] |
921 | | pub retry: Retry, |
922 | | |
923 | | /// If the number of seconds since the `last_modified` time of the object |
924 | | /// is greater than this value, the object will not be considered |
925 | | /// "existing". This allows for external tools to delete objects that |
926 | | /// have not been uploaded in a long time. If a client receives a `NotFound` |
927 | | /// the client should re-upload the object. |
928 | | /// |
929 | | /// There should be sufficient buffer time between how long the expiration |
930 | | /// configuration of the external tool is and this value. Keeping items |
931 | | /// around for a few days is generally a good idea. |
932 | | /// |
933 | | /// Default: 0. Zero means never consider an object expired. |
934 | | #[serde(default, deserialize_with = "convert_duration_with_shellexpand")] |
935 | | pub consider_expired_after_s: u32, |
936 | | |
937 | | /// The maximum buffer size to retain in case of a retryable error |
938 | | /// during upload. Setting this to zero will disable upload buffering; |
939 | | /// this means that in the event of a failure during upload, the entire |
940 | | /// upload will be aborted and the client will likely receive an error. |
941 | | /// |
942 | | /// Default: 5MB. |
943 | | pub max_retry_buffer_per_request: Option<usize>, |
944 | | |
945 | | /// Maximum number of concurrent `UploadPart` requests per `MultipartUpload`. |
946 | | /// |
947 | | /// Default: 10. |
948 | | pub multipart_max_concurrent_uploads: Option<usize>, |
949 | | |
950 | | /// Allow unencrypted HTTP connections. Only use this for local testing. |
951 | | /// |
952 | | /// Default: false |
953 | | #[serde(default)] |
954 | | pub insecure_allow_http: bool, |
955 | | |
956 | | /// Disable http/2 connections and only use http/1.1. Default client |
957 | | /// configuration will have http/1.1 and http/2 enabled for connection |
958 | | /// schemes. Http/2 should be disabled if environments have poor support |
959 | | /// or performance related to http/2. Safe to keep default unless |
960 | | /// underlying network environment, S3, or GCS API servers specify otherwise. |
961 | | /// |
962 | | /// Default: false |
963 | | #[serde(default)] |
964 | | pub disable_http2: bool, |
965 | | } |
966 | | |
967 | | #[derive(Serialize, Deserialize, Debug, Clone, Copy)] |
968 | | #[serde(rename_all = "snake_case")] |
969 | | pub enum StoreType { |
970 | | /// The store is content addressable storage. |
971 | | Cas, |
972 | | /// The store is an action cache. |
973 | | Ac, |
974 | | } |
975 | | |
976 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
977 | | pub struct ClientTlsConfig { |
978 | | /// Path to the certificate authority to use to validate the remote. |
979 | | /// |
980 | | /// Default: None |
981 | | #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")] |
982 | | pub ca_file: Option<String>, |
983 | | |
984 | | /// Path to the certificate file for client authentication. |
985 | | /// |
986 | | /// Default: None |
987 | | #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")] |
988 | | pub cert_file: Option<String>, |
989 | | |
990 | | /// Path to the private key file for client authentication. |
991 | | /// |
992 | | /// Default: None |
993 | | #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")] |
994 | | pub key_file: Option<String>, |
995 | | |
996 | | /// If set the client will use the native roots for TLS connections. |
997 | | /// |
998 | | /// Default: false |
999 | | #[serde(default)] |
1000 | | pub use_native_roots: Option<bool>, |
1001 | | } |
1002 | | |
1003 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
1004 | | #[serde(deny_unknown_fields)] |
1005 | | pub struct GrpcEndpoint { |
1006 | | /// The endpoint address (i.e. grpc(s)://example.com:443). |
1007 | | #[serde(deserialize_with = "convert_string_with_shellexpand")] |
1008 | | pub address: String, |
1009 | | /// The TLS configuration to use to connect to the endpoint (if grpcs). |
1010 | | pub tls_config: Option<ClientTlsConfig>, |
1011 | | /// The maximum concurrency to allow on this endpoint. |
1012 | | pub concurrency_limit: Option<usize>, |
1013 | | } |
1014 | | |
1015 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
1016 | | #[serde(deny_unknown_fields)] |
1017 | | pub struct GrpcSpec { |
1018 | | /// Instance name for GRPC calls. Proxy calls will have the `instance_name` changed to this. |
1019 | | #[serde(default, deserialize_with = "convert_string_with_shellexpand")] |
1020 | | pub instance_name: String, |
1021 | | |
1022 | | /// The endpoint of the grpc connection. |
1023 | | pub endpoints: Vec<GrpcEndpoint>, |
1024 | | |
1025 | | /// The type of the upstream store, this ensures that the correct server calls are made. |
1026 | | pub store_type: StoreType, |
1027 | | |
1028 | | /// Retry configuration to use when a network request fails. |
1029 | | #[serde(default)] |
1030 | | pub retry: Retry, |
1031 | | |
1032 | | /// Limit the number of simultaneous upstream requests to this many. A |
1033 | | /// value of zero is treated as unlimited. If the limit is reached the |
1034 | | /// request is queued. |
1035 | | #[serde(default)] |
1036 | | pub max_concurrent_requests: usize, |
1037 | | |
1038 | | /// The number of connections to make to each specified endpoint to balance |
1039 | | /// the load over multiple TCP connections. Default 1. |
1040 | | #[serde(default)] |
1041 | | pub connections_per_endpoint: usize, |
1042 | | } |
1043 | | |
1044 | | /// The possible error codes that might occur on an upstream request. |
1045 | | #[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq)] |
1046 | | pub enum ErrorCode { |
1047 | | Cancelled = 1, |
1048 | | Unknown = 2, |
1049 | | InvalidArgument = 3, |
1050 | | DeadlineExceeded = 4, |
1051 | | NotFound = 5, |
1052 | | AlreadyExists = 6, |
1053 | | PermissionDenied = 7, |
1054 | | ResourceExhausted = 8, |
1055 | | FailedPrecondition = 9, |
1056 | | Aborted = 10, |
1057 | | OutOfRange = 11, |
1058 | | Unimplemented = 12, |
1059 | | Internal = 13, |
1060 | | Unavailable = 14, |
1061 | | DataLoss = 15, |
1062 | | Unauthenticated = 16, |
1063 | | // Note: This list is duplicated from nativelink-error/lib.rs. |
1064 | | } |
1065 | | |
1066 | | #[derive(Serialize, Deserialize, Debug, Clone, Default)] |
1067 | | pub struct RedisSpec { |
1068 | | /// The hostname or IP address of the Redis server. |
1069 | | /// Ex: `["redis://username:password@redis-server-url:6380/99"]` |
1070 | | /// 99 Represents database ID, 6380 represents the port. |
1071 | | #[serde(deserialize_with = "convert_vec_string_with_shellexpand")] |
1072 | | pub addresses: Vec<String>, |
1073 | | |
1074 | | /// The response timeout for the Redis connection in seconds. |
1075 | | /// |
1076 | | /// Default: 10 |
1077 | | #[serde(default)] |
1078 | | pub response_timeout_s: u64, |
1079 | | |
1080 | | /// The connection timeout for the Redis connection in seconds. |
1081 | | /// |
1082 | | /// Default: 10 |
1083 | | #[serde(default)] |
1084 | | pub connection_timeout_s: u64, |
1085 | | |
1086 | | /// An optional and experimental Redis channel to publish write events to. |
1087 | | /// |
1088 | | /// If set, every time a write operation is made to a Redis node |
1089 | | /// then an event will be published to a Redis channel with the given name. |
1090 | | /// If unset, the writes will still be made, |
1091 | | /// but the write events will not be published. |
1092 | | /// |
1093 | | /// Default: (Empty String / No Channel) |
1094 | | #[serde(default)] |
1095 | | pub experimental_pub_sub_channel: Option<String>, |
1096 | | |
1097 | | /// An optional prefix to prepend to all keys in this store. |
1098 | | /// |
1099 | | /// Setting this value can make it convenient to query or |
1100 | | /// organize your data according to the shared prefix. |
1101 | | /// |
1102 | | /// Default: (Empty String / No Prefix) |
1103 | | #[serde(default)] |
1104 | | pub key_prefix: String, |
1105 | | |
1106 | | /// Set the mode Redis is operating in. |
1107 | | /// |
1108 | | /// Available options are "cluster" for |
1109 | | /// [cluster mode](https://redis.io/docs/latest/operate/oss_and_stack/reference/cluster-spec/), |
1110 | | /// "sentinel" for [sentinel mode](https://redis.io/docs/latest/operate/oss_and_stack/management/sentinel/), |
1111 | | /// or "standard" if Redis is operating in neither cluster nor sentinel mode. |
1112 | | /// |
1113 | | /// Default: standard, |
1114 | | #[serde(default)] |
1115 | | pub mode: RedisMode, |
1116 | | |
1117 | | /// When using pubsub interface, this is the maximum number of items to keep |
1118 | | /// queued up before dropping old items. |
1119 | | /// |
1120 | | /// Default: 4096 |
1121 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1122 | | pub broadcast_channel_capacity: usize, |
1123 | | |
1124 | | /// The amount of time in milliseconds until the redis store considers the |
1125 | | /// command to be timed out. This will trigger a retry of the command and |
1126 | | /// potentially a reconnection to the redis server. |
1127 | | /// |
1128 | | /// Default: 10000 (10 seconds) |
1129 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1130 | | pub command_timeout_ms: u64, |
1131 | | |
1132 | | /// The amount of time in milliseconds until the redis store considers the |
1133 | | /// connection to unresponsive. This will trigger a reconnection to the |
1134 | | /// redis server. |
1135 | | /// |
1136 | | /// Default: 3000 (3 seconds) |
1137 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1138 | | pub connection_timeout_ms: u64, |
1139 | | |
1140 | | /// The amount of data to read from the redis server at a time. |
1141 | | /// This is used to limit the amount of memory used when reading |
1142 | | /// large objects from the redis server as well as limiting the |
1143 | | /// amount of time a single read operation can take. |
1144 | | /// |
1145 | | /// IMPORTANT: If this value is too high, the `command_timeout_ms` |
1146 | | /// might be triggered if the latency or throughput to the redis |
1147 | | /// server is too low. |
1148 | | /// |
1149 | | /// Default: 64KiB |
1150 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1151 | | pub read_chunk_size: usize, |
1152 | | |
1153 | | /// The number of connections to keep open to the redis server(s). |
1154 | | /// |
1155 | | /// Default: 3 |
1156 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1157 | | pub connection_pool_size: usize, |
1158 | | |
1159 | | /// The maximum number of upload chunks to allow per update. |
1160 | | /// This is used to limit the amount of memory used when uploading |
1161 | | /// large objects to the redis server. A good rule of thumb is to |
1162 | | /// think of the data as: |
1163 | | /// `AVAIL_MEMORY / (read_chunk_size * max_chunk_uploads_per_update) = THORETICAL_MAX_CONCURRENT_UPLOADS` |
1164 | | /// (note: it is a good idea to divide `AVAIL_MAX_MEMORY` by ~10 to account for other memory usage) |
1165 | | /// |
1166 | | /// Default: 10 |
1167 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1168 | | pub max_chunk_uploads_per_update: usize, |
1169 | | |
1170 | | /// The COUNT value passed when scanning keys in Redis. |
1171 | | /// This is used to hint the amount of work that should be done per response. |
1172 | | /// |
1173 | | /// Default: 10000 |
1174 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1175 | | pub scan_count: u32, |
1176 | | |
1177 | | /// Retry configuration to use when a network request fails. |
1178 | | /// See the `Retry` struct for more information. |
1179 | | /// |
1180 | | /// ```txt |
1181 | | /// Default: Retry { |
1182 | | /// max_retries: 0, /* unlimited */ |
1183 | | /// delay: 0.1, /* 100ms */ |
1184 | | /// jitter: 0.5, /* 50% */ |
1185 | | /// retry_on_errors: None, /* not used in redis store */ |
1186 | | /// } |
1187 | | /// ``` |
1188 | | #[serde(default)] |
1189 | | pub retry: Retry, |
1190 | | } |
1191 | | |
1192 | | #[derive(Debug, Default, Deserialize, Serialize, Clone, Copy, PartialEq, Eq)] |
1193 | | #[serde(rename_all = "snake_case")] |
1194 | | pub enum RedisMode { |
1195 | | Cluster, |
1196 | | Sentinel, |
1197 | | #[default] |
1198 | | Standard, |
1199 | | } |
1200 | | |
1201 | | #[derive(Clone, Copy, Debug, Default, Deserialize, Serialize)] |
1202 | | pub struct NoopSpec {} |
1203 | | |
1204 | | /// Retry configuration. This configuration is exponential and each iteration |
1205 | | /// a jitter as a percentage is applied of the calculated delay. For example: |
1206 | | /// ```haskell |
1207 | | /// Retry{ |
1208 | | /// max_retries: 7, |
1209 | | /// delay: 0.1, |
1210 | | /// jitter: 0.5, |
1211 | | /// } |
1212 | | /// ``` |
1213 | | /// will result in: |
1214 | | /// Attempt - Delay |
1215 | | /// 1 0ms |
1216 | | /// 2 75ms - 125ms |
1217 | | /// 3 150ms - 250ms |
1218 | | /// 4 300ms - 500ms |
1219 | | /// 5 600ms - 1s |
1220 | | /// 6 1.2s - 2s |
1221 | | /// 7 2.4s - 4s |
1222 | | /// 8 4.8s - 8s |
1223 | | /// Remember that to get total results is additive, meaning the above results |
1224 | | /// would mean a single request would have a total delay of 9.525s - 15.875s. |
1225 | | #[derive(Serialize, Deserialize, Clone, Debug, Default)] |
1226 | | #[serde(deny_unknown_fields)] |
1227 | | pub struct Retry { |
1228 | | /// Maximum number of retries until retrying stops. |
1229 | | /// Setting this to zero will always attempt 1 time, but not retry. |
1230 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1231 | | pub max_retries: usize, |
1232 | | |
1233 | | /// Delay in seconds for exponential back off. |
1234 | | #[serde(default)] |
1235 | | pub delay: f32, |
1236 | | |
1237 | | /// Amount of jitter to add as a percentage in decimal form. This will |
1238 | | /// change the formula like: |
1239 | | /// ```haskell |
1240 | | /// random( |
1241 | | /// (2 ^ {attempt_number}) * {delay} * (1 - (jitter / 2)), |
1242 | | /// (2 ^ {attempt_number}) * {delay} * (1 + (jitter / 2)), |
1243 | | /// ) |
1244 | | /// ``` |
1245 | | #[serde(default)] |
1246 | | pub jitter: f32, |
1247 | | |
1248 | | /// A list of error codes to retry on, if this is not set then the default |
1249 | | /// error codes to retry on are used. These default codes are the most |
1250 | | /// likely to be non-permanent. |
1251 | | /// - `Unknown` |
1252 | | /// - `Cancelled` |
1253 | | /// - `DeadlineExceeded` |
1254 | | /// - `ResourceExhausted` |
1255 | | /// - `Aborted` |
1256 | | /// - `Internal` |
1257 | | /// - `Unavailable` |
1258 | | /// - `DataLoss` |
1259 | | #[serde(default)] |
1260 | | pub retry_on_errors: Option<Vec<ErrorCode>>, |
1261 | | } |
1262 | | |
1263 | | /// Configuration for `ExperimentalMongoDB` store. |
1264 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
1265 | | #[serde(deny_unknown_fields)] |
1266 | | pub struct ExperimentalMongoSpec { |
1267 | | /// `ExperimentalMongoDB` connection string. |
1268 | | /// Example: <mongodb://localhost:27017> or <mongodb+srv://cluster.mongodb.net> |
1269 | | #[serde(deserialize_with = "convert_string_with_shellexpand")] |
1270 | | pub connection_string: String, |
1271 | | |
1272 | | /// The database name to use. |
1273 | | /// Default: "nativelink" |
1274 | | #[serde(default, deserialize_with = "convert_string_with_shellexpand")] |
1275 | | pub database: String, |
1276 | | |
1277 | | /// The collection name for CAS data. |
1278 | | /// Default: "cas" |
1279 | | #[serde(default, deserialize_with = "convert_string_with_shellexpand")] |
1280 | | pub cas_collection: String, |
1281 | | |
1282 | | /// The collection name for scheduler data. |
1283 | | /// Default: "scheduler" |
1284 | | #[serde(default, deserialize_with = "convert_string_with_shellexpand")] |
1285 | | pub scheduler_collection: String, |
1286 | | |
1287 | | /// Prefix to prepend to all keys stored in `MongoDB`. |
1288 | | /// Default: "" |
1289 | | #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")] |
1290 | | pub key_prefix: Option<String>, |
1291 | | |
1292 | | /// The maximum amount of data to read from `MongoDB` in a single chunk (in bytes). |
1293 | | /// Default: 65536 (64KB) |
1294 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
1295 | | pub read_chunk_size: usize, |
1296 | | |
1297 | | /// Maximum number of concurrent uploads allowed. |
1298 | | /// Default: 10 |
1299 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1300 | | pub max_concurrent_uploads: usize, |
1301 | | |
1302 | | /// Connection timeout in milliseconds. |
1303 | | /// Default: 3000 |
1304 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1305 | | pub connection_timeout_ms: u64, |
1306 | | |
1307 | | /// Command timeout in milliseconds. |
1308 | | /// Default: 10000 |
1309 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1310 | | pub command_timeout_ms: u64, |
1311 | | |
1312 | | /// Enable `MongoDB` change streams for real-time updates. |
1313 | | /// Required for scheduler subscriptions. |
1314 | | /// Default: false |
1315 | | #[serde(default)] |
1316 | | pub enable_change_streams: bool, |
1317 | | |
1318 | | /// Write concern 'w' parameter. |
1319 | | /// Can be a number (e.g., 1) or string (e.g., "majority"). |
1320 | | /// Default: None (uses `MongoDB` default) |
1321 | | #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")] |
1322 | | pub write_concern_w: Option<String>, |
1323 | | |
1324 | | /// Write concern 'j' parameter (journal acknowledgment). |
1325 | | /// Default: None (uses `MongoDB` default) |
1326 | | #[serde(default)] |
1327 | | pub write_concern_j: Option<bool>, |
1328 | | |
1329 | | /// Write concern timeout in milliseconds. |
1330 | | /// Default: None (uses `MongoDB` default) |
1331 | | #[serde( |
1332 | | default, |
1333 | | deserialize_with = "convert_optional_numeric_with_shellexpand" |
1334 | | )] |
1335 | | pub write_concern_timeout_ms: Option<u32>, |
1336 | | } |
1337 | | |
1338 | | impl Retry { |
1339 | 9 | pub fn make_jitter_fn(&self) -> Arc<dyn Fn(Duration) -> Duration + Send + Sync> { |
1340 | 9 | if self.jitter == 0f32 { Branch (1340:12): [True: 9, False: 0]
Branch (1340:12): [Folded - Ignored]
|
1341 | 9 | Arc::new(move |delay: Duration| delay) |
1342 | | } else { |
1343 | 0 | let local_jitter = self.jitter; |
1344 | 0 | Arc::new(move |delay: Duration| { |
1345 | 0 | delay.mul_f32(local_jitter.mul_add(rand::rng().random::<f32>() - 0.5, 1.)) |
1346 | 0 | }) |
1347 | | } |
1348 | 9 | } |
1349 | | } |