/build/source/nativelink-config/src/stores.rs
Line | Count | Source |
1 | | // Copyright 2024 The NativeLink Authors. All rights reserved. |
2 | | // |
3 | | // Licensed under the Functional Source License, Version 1.1, Apache 2.0 Future License (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // See LICENSE file for details |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | use core::time::Duration; |
16 | | use std::sync::Arc; |
17 | | |
18 | | use rand::Rng; |
19 | | #[cfg(feature = "dev-schema")] |
20 | | use schemars::JsonSchema; |
21 | | use serde::{Deserialize, Serialize}; |
22 | | |
23 | | use crate::serde_utils::{ |
24 | | convert_boolean_with_shellexpand, convert_data_size_with_shellexpand, |
25 | | convert_duration_with_shellexpand, convert_numeric_with_shellexpand, |
26 | | convert_optional_data_size_with_shellexpand, convert_optional_numeric_with_shellexpand, |
27 | | convert_optional_string_with_shellexpand, convert_string_with_shellexpand, |
28 | | convert_vec_string_with_shellexpand, |
29 | | }; |
30 | | |
31 | | /// Name of the store. This type will be used when referencing a store |
32 | | /// in the `CasConfig::stores`'s map key. |
33 | | pub type StoreRefName = String; |
34 | | |
35 | | #[derive(Serialize, Deserialize, Debug, Clone, Copy)] |
36 | | #[serde(rename_all = "snake_case")] |
37 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
38 | | pub enum ConfigDigestHashFunction { |
39 | | /// Use the sha256 hash function. |
40 | | /// <https://en.wikipedia.org/wiki/SHA-2> |
41 | | Sha256, |
42 | | |
43 | | /// Use the blake3 hash function. |
44 | | /// <https://en.wikipedia.org/wiki/BLAKE_(hash_function)> |
45 | | Blake3, |
46 | | } |
47 | | |
48 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
49 | | #[serde(rename_all = "snake_case")] |
50 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
51 | | pub enum StoreSpec { |
52 | | /// Memory store will store all data in a hashmap in memory. |
53 | | /// |
54 | | /// **Example JSON Config:** |
55 | | /// ```json |
56 | | /// "memory": { |
57 | | /// "eviction_policy": { |
58 | | /// "max_bytes": "10mb", |
59 | | /// } |
60 | | /// } |
61 | | /// ``` |
62 | | /// |
63 | | Memory(MemorySpec), |
64 | | |
65 | | /// A generic blob store that will store files on the cloud |
66 | | /// provider. This configuration will never delete files, so you are |
67 | | /// responsible for purging old files in other ways. |
68 | | /// It supports the following backends: |
69 | | /// |
70 | | /// 1. **Amazon S3:** |
71 | | /// S3 store will use Amazon's S3 service as a backend to store |
72 | | /// the files. This configuration can be used to share files |
73 | | /// across multiple instances. Uses system certificates for TLS |
74 | | /// verification via `rustls-platform-verifier`. |
75 | | /// |
76 | | /// **Example JSON Config:** |
77 | | /// ```json |
78 | | /// "experimental_cloud_object_store": { |
79 | | /// "provider": "aws", |
80 | | /// "region": "eu-north-1", |
81 | | /// "bucket": "crossplane-bucket-af79aeca9", |
82 | | /// "key_prefix": "test-prefix-index/", |
83 | | /// "retry": { |
84 | | /// "max_retries": 6, |
85 | | /// "delay": 0.3, |
86 | | /// "jitter": 0.5 |
87 | | /// }, |
88 | | /// "multipart_max_concurrent_uploads": 10 |
89 | | /// } |
90 | | /// ``` |
91 | | /// |
92 | | /// 2. **Google Cloud Storage:** |
93 | | /// GCS store uses Google's GCS service as a backend to store |
94 | | /// the files. This configuration can be used to share files |
95 | | /// across multiple instances. |
96 | | /// |
97 | | /// **Example JSON Config:** |
98 | | /// ```json |
99 | | /// "experimental_cloud_object_store": { |
100 | | /// "provider": "gcs", |
101 | | /// "bucket": "test-bucket", |
102 | | /// "key_prefix": "test-prefix-index/", |
103 | | /// "retry": { |
104 | | /// "max_retries": 6, |
105 | | /// "delay": 0.3, |
106 | | /// "jitter": 0.5 |
107 | | /// }, |
108 | | /// "multipart_max_concurrent_uploads": 10 |
109 | | /// } |
110 | | /// ``` |
111 | | /// |
112 | | /// 3. **`NetApp` ONTAP S3** |
113 | | /// `NetApp` ONTAP S3 store will use ONTAP's S3-compatible storage as a backend |
114 | | /// to store files. This store is specifically configured for ONTAP's S3 requirements |
115 | | /// including custom TLS configuration, credentials management, and proper vserver |
116 | | /// configuration. |
117 | | /// |
118 | | /// This store uses AWS environment variables for credentials: |
119 | | /// - `AWS_ACCESS_KEY_ID` |
120 | | /// - `AWS_SECRET_ACCESS_KEY` |
121 | | /// - `AWS_DEFAULT_REGION` |
122 | | /// |
123 | | /// **Example JSON Config:** |
124 | | /// ```json |
125 | | /// "experimental_cloud_object_store": { |
126 | | /// "provider": "ontap", |
127 | | /// "endpoint": "https://ontap-s3-endpoint:443", |
128 | | /// "vserver_name": "your-vserver", |
129 | | /// "bucket": "your-bucket", |
130 | | /// "root_certificates": "/path/to/certs.pem", // Optional |
131 | | /// "key_prefix": "test-prefix/", // Optional |
132 | | /// "retry": { |
133 | | /// "max_retries": 6, |
134 | | /// "delay": 0.3, |
135 | | /// "jitter": 0.5 |
136 | | /// }, |
137 | | /// "multipart_max_concurrent_uploads": 10 |
138 | | /// } |
139 | | /// ``` |
140 | | ExperimentalCloudObjectStore(ExperimentalCloudObjectSpec), |
141 | | |
142 | | /// ONTAP S3 Existence Cache provides a caching layer on top of the ONTAP S3 store |
143 | | /// to optimize repeated existence checks. It maintains an in-memory cache of object |
144 | | /// digests and periodically syncs this cache to disk for persistence. |
145 | | /// |
146 | | /// The cache helps reduce latency for repeated calls to check object existence, |
147 | | /// while still ensuring eventual consistency with the underlying ONTAP S3 store. |
148 | | /// |
149 | | /// Example JSON Config: |
150 | | /// ```json |
151 | | /// "ontap_s3_existence_cache": { |
152 | | /// "index_path": "/path/to/cache/index.json", |
153 | | /// "sync_interval_seconds": 300, |
154 | | /// "backend": { |
155 | | /// "endpoint": "https://ontap-s3-endpoint:443", |
156 | | /// "vserver_name": "your-vserver", |
157 | | /// "bucket": "your-bucket", |
158 | | /// "key_prefix": "test-prefix/" |
159 | | /// } |
160 | | /// } |
161 | | /// ``` |
162 | | /// |
163 | | OntapS3ExistenceCache(Box<OntapS3ExistenceCacheSpec>), |
164 | | |
165 | | /// Verify store is used to apply verifications to an underlying |
166 | | /// store implementation. It is strongly encouraged to validate |
167 | | /// as much data as you can before accepting data from a client, |
168 | | /// failing to do so may cause the data in the store to be |
169 | | /// populated with invalid data causing all kinds of problems. |
170 | | /// |
171 | | /// The suggested configuration is to have the CAS validate the |
172 | | /// hash and size and the AC validate nothing. |
173 | | /// |
174 | | /// **Example JSON Config:** |
175 | | /// ```json |
176 | | /// "verify": { |
177 | | /// "backend": { |
178 | | /// "memory": { |
179 | | /// "eviction_policy": { |
180 | | /// "max_bytes": "500mb" |
181 | | /// } |
182 | | /// }, |
183 | | /// }, |
184 | | /// "verify_size": true, |
185 | | /// "verify_hash": true |
186 | | /// } |
187 | | /// ``` |
188 | | /// |
189 | | Verify(Box<VerifySpec>), |
190 | | |
191 | | /// Completeness checking store verifies if the |
192 | | /// output files & folders exist in the CAS before forwarding |
193 | | /// the request to the underlying store. |
194 | | /// Note: This store should only be used on AC stores. |
195 | | /// |
196 | | /// **Example JSON Config:** |
197 | | /// ```json |
198 | | /// "completeness_checking": { |
199 | | /// "backend": { |
200 | | /// "filesystem": { |
201 | | /// "content_path": "~/.cache/nativelink/content_path-ac", |
202 | | /// "temp_path": "~/.cache/nativelink/tmp_path-ac", |
203 | | /// "eviction_policy": { |
204 | | /// "max_bytes": "500mb", |
205 | | /// } |
206 | | /// } |
207 | | /// }, |
208 | | /// "cas_store": { |
209 | | /// "ref_store": { |
210 | | /// "name": "CAS_MAIN_STORE" |
211 | | /// } |
212 | | /// } |
213 | | /// } |
214 | | /// ``` |
215 | | /// |
216 | | CompletenessChecking(Box<CompletenessCheckingSpec>), |
217 | | |
218 | | /// A compression store that will compress the data inbound and |
219 | | /// outbound. There will be a non-trivial cost to compress and |
220 | | /// decompress the data, but in many cases if the final store is |
221 | | /// a store that requires network transport and/or storage space |
222 | | /// is a concern it is often faster and more efficient to use this |
223 | | /// store before those stores. |
224 | | /// |
225 | | /// **Example JSON Config:** |
226 | | /// ```json |
227 | | /// "compression": { |
228 | | /// "compression_algorithm": { |
229 | | /// "lz4": {} |
230 | | /// }, |
231 | | /// "backend": { |
232 | | /// "filesystem": { |
233 | | /// "content_path": "/tmp/nativelink/data/content_path-cas", |
234 | | /// "temp_path": "/tmp/nativelink/data/tmp_path-cas", |
235 | | /// "eviction_policy": { |
236 | | /// "max_bytes": "2gb", |
237 | | /// } |
238 | | /// } |
239 | | /// } |
240 | | /// } |
241 | | /// ``` |
242 | | /// |
243 | | Compression(Box<CompressionSpec>), |
244 | | |
245 | | /// A dedup store will take the inputs and run a rolling hash |
246 | | /// algorithm on them to slice the input into smaller parts then |
247 | | /// run a sha256 algorithm on the slice and if the object doesn't |
248 | | /// already exist, upload the slice to the `content_store` using |
249 | | /// a new digest of just the slice. Once all parts exist, an |
250 | | /// Action-Cache-like digest will be built and uploaded to the |
251 | | /// `index_store` which will contain a reference to each |
252 | | /// chunk/digest of the uploaded file. Downloading a request will |
253 | | /// first grab the index from the `index_store`, and forward the |
254 | | /// download content of each chunk as if it were one file. |
255 | | /// |
256 | | /// This store is exceptionally good when the following conditions |
257 | | /// are met: |
258 | | /// * Content is mostly the same (inserts, updates, deletes are ok) |
259 | | /// * Content is not compressed or encrypted |
260 | | /// * Uploading or downloading from `content_store` is the bottleneck. |
261 | | /// |
262 | | /// Note: This store pairs well when used with `CompressionSpec` as |
263 | | /// the `content_store`, but never put `DedupSpec` as the backend of |
264 | | /// `CompressionSpec` as it will negate all the gains. |
265 | | /// |
266 | | /// Note: When running `.has()` on this store, it will only check |
267 | | /// to see if the entry exists in the `index_store` and not check |
268 | | /// if the individual chunks exist in the `content_store`. |
269 | | /// |
270 | | /// **Example JSON Config:** |
271 | | /// ```json |
272 | | /// "dedup": { |
273 | | /// "index_store": { |
274 | | /// "memory": { |
275 | | /// "eviction_policy": { |
276 | | /// "max_bytes": "1GB", |
277 | | /// } |
278 | | /// } |
279 | | /// }, |
280 | | /// "content_store": { |
281 | | /// "compression": { |
282 | | /// "compression_algorithm": { |
283 | | /// "lz4": {} |
284 | | /// }, |
285 | | /// "backend": { |
286 | | /// "fast_slow": { |
287 | | /// "fast": { |
288 | | /// "memory": { |
289 | | /// "eviction_policy": { |
290 | | /// "max_bytes": "500MB", |
291 | | /// } |
292 | | /// } |
293 | | /// }, |
294 | | /// "slow": { |
295 | | /// "filesystem": { |
296 | | /// "content_path": "/tmp/nativelink/data/content_path-content", |
297 | | /// "temp_path": "/tmp/nativelink/data/tmp_path-content", |
298 | | /// "eviction_policy": { |
299 | | /// "max_bytes": "2gb" |
300 | | /// } |
301 | | /// } |
302 | | /// } |
303 | | /// } |
304 | | /// } |
305 | | /// } |
306 | | /// } |
307 | | /// } |
308 | | /// ``` |
309 | | /// |
310 | | Dedup(Box<DedupSpec>), |
311 | | |
312 | | /// Existence store will wrap around another store and cache calls |
313 | | /// to has so that subsequent `has_with_results` calls will be |
314 | | /// faster. This is useful for cases when you have a store that |
315 | | /// is slow to respond to has calls. |
316 | | /// Note: This store should only be used on CAS stores. |
317 | | /// |
318 | | /// **Example JSON Config:** |
319 | | /// ```json |
320 | | /// "existence_cache": { |
321 | | /// "backend": { |
322 | | /// "memory": { |
323 | | /// "eviction_policy": { |
324 | | /// "max_bytes": "500mb", |
325 | | /// } |
326 | | /// } |
327 | | /// }, |
328 | | /// // Note this is the existence store policy, not the backend policy |
329 | | /// "eviction_policy": { |
330 | | /// "max_seconds": 100, |
331 | | /// } |
332 | | /// } |
333 | | /// ``` |
334 | | /// |
335 | | ExistenceCache(Box<ExistenceCacheSpec>), |
336 | | |
337 | | /// `FastSlow` store will first try to fetch the data from the `fast` |
338 | | /// store and then if it does not exist try the `slow` store. |
339 | | /// When the object does exist in the `slow` store, it will copy |
340 | | /// the data to the `fast` store while returning the data. |
341 | | /// This store should be thought of as a store that "buffers" |
342 | | /// the data to the `fast` store. |
343 | | /// On uploads it will mirror data to both `fast` and `slow` stores. |
344 | | /// |
345 | | /// WARNING: If you need data to always exist in the `slow` store |
346 | | /// for something like remote execution, be careful because this |
347 | | /// store will never check to see if the objects exist in the |
348 | | /// `slow` store if it exists in the `fast` store (ie: it assumes |
349 | | /// that if an object exists in the `fast` store it will exist in |
350 | | /// the `slow` store). |
351 | | /// |
352 | | /// ***Example JSON Config:*** |
353 | | /// ```json |
354 | | /// "fast_slow": { |
355 | | /// "fast": { |
356 | | /// "filesystem": { |
357 | | /// "content_path": "/tmp/nativelink/data/content_path-index", |
358 | | /// "temp_path": "/tmp/nativelink/data/tmp_path-index", |
359 | | /// "eviction_policy": { |
360 | | /// "max_bytes": "500mb", |
361 | | /// } |
362 | | /// } |
363 | | /// }, |
364 | | /// "slow": { |
365 | | /// "filesystem": { |
366 | | /// "content_path": "/tmp/nativelink/data/content_path-index", |
367 | | /// "temp_path": "/tmp/nativelink/data/tmp_path-index", |
368 | | /// "eviction_policy": { |
369 | | /// "max_bytes": "500mb", |
370 | | /// } |
371 | | /// } |
372 | | /// } |
373 | | /// } |
374 | | /// ``` |
375 | | /// |
376 | | FastSlow(Box<FastSlowSpec>), |
377 | | |
378 | | /// Shards the data to multiple stores. This is useful for cases |
379 | | /// when you want to distribute the load across multiple stores. |
380 | | /// The digest hash is used to determine which store to send the |
381 | | /// data to. |
382 | | /// |
383 | | /// **Example JSON Config:** |
384 | | /// ```json |
385 | | /// "shard": { |
386 | | /// "stores": [ |
387 | | /// { |
388 | | /// "store": { |
389 | | /// "memory": { |
390 | | /// "eviction_policy": { |
391 | | /// "max_bytes": "10mb" |
392 | | /// }, |
393 | | /// }, |
394 | | /// }, |
395 | | /// "weight": 1 |
396 | | /// }] |
397 | | /// } |
398 | | /// ``` |
399 | | /// |
400 | | Shard(ShardSpec), |
401 | | |
402 | | /// Stores the data on the filesystem. This store is designed for |
403 | | /// local persistent storage. Restarts of this program should restore |
404 | | /// the previous state, meaning anything uploaded will be persistent |
405 | | /// as long as the filesystem integrity holds. |
406 | | /// |
407 | | /// **Example JSON Config:** |
408 | | /// ```json |
409 | | /// "filesystem": { |
410 | | /// "content_path": "/tmp/nativelink/data-worker-test/content_path-cas", |
411 | | /// "temp_path": "/tmp/nativelink/data-worker-test/tmp_path-cas", |
412 | | /// "eviction_policy": { |
413 | | /// "max_bytes": "10gb", |
414 | | /// } |
415 | | /// } |
416 | | /// ``` |
417 | | /// |
418 | | Filesystem(FilesystemSpec), |
419 | | |
420 | | /// Store used to reference a store in the root store manager. |
421 | | /// This is useful for cases when you want to share a store in different |
422 | | /// nested stores. Example, you may want to share the same memory store |
423 | | /// used for the action cache, but use a `FastSlowSpec` and have the fast |
424 | | /// store also share the memory store for efficiency. |
425 | | /// |
426 | | /// **Example JSON Config:** |
427 | | /// ```json |
428 | | /// "ref_store": { |
429 | | /// "name": "FS_CONTENT_STORE" |
430 | | /// } |
431 | | /// ``` |
432 | | /// |
433 | | RefStore(RefSpec), |
434 | | |
435 | | /// Uses the size field of the digest to separate which store to send the |
436 | | /// data. This is useful for cases when you'd like to put small objects |
437 | | /// in one store and large objects in another store. This should only be |
438 | | /// used if the size field is the real size of the content, in other |
439 | | /// words, don't use on AC (Action Cache) stores. Any store where you can |
440 | | /// safely use `VerifySpec.verify_size = true`, this store should be safe |
441 | | /// to use (ie: CAS stores). |
442 | | /// |
443 | | /// **Example JSON Config:** |
444 | | /// ```json |
445 | | /// "size_partitioning": { |
446 | | /// "size": "128mib", |
447 | | /// "lower_store": { |
448 | | /// "memory": { |
449 | | /// "eviction_policy": { |
450 | | /// "max_bytes": "${NATIVELINK_CAS_MEMORY_CONTENT_LIMIT:-100mb}" |
451 | | /// } |
452 | | /// } |
453 | | /// }, |
454 | | /// "upper_store": { |
455 | | /// /// This store discards data larger than 128mib. |
456 | | /// "noop": {} |
457 | | /// } |
458 | | /// } |
459 | | /// ``` |
460 | | /// |
461 | | SizePartitioning(Box<SizePartitioningSpec>), |
462 | | |
463 | | /// This store will pass-through calls to another GRPC store. This store |
464 | | /// is not designed to be used as a sub-store of another store, but it |
465 | | /// does satisfy the interface and will likely work. |
466 | | /// |
467 | | /// One major GOTCHA is that some stores use a special function on this |
468 | | /// store to get the size of the underlying object, which is only reliable |
469 | | /// when this store is serving the a CAS store, not an AC store. If using |
470 | | /// this store directly without being a child of any store there are no |
471 | | /// side effects and is the most efficient way to use it. |
472 | | /// |
473 | | /// **Example JSON Config:** |
474 | | /// ```json |
475 | | /// "grpc": { |
476 | | /// "instance_name": "main", |
477 | | /// "endpoints": [ |
478 | | /// {"address": "grpc://${CAS_ENDPOINT:-127.0.0.1}:50051"} |
479 | | /// ], |
480 | | /// "connections_per_endpoint": "5", |
481 | | /// "rpc_timeout_s": "5m", |
482 | | /// "store_type": "ac" |
483 | | /// } |
484 | | /// ``` |
485 | | /// |
486 | | Grpc(GrpcSpec), |
487 | | |
488 | | /// Stores data in any stores compatible with Redis APIs. |
489 | | /// |
490 | | /// Pairs well with `SizePartitioning` and/or `FastSlow` stores. |
491 | | /// Ideal for accepting small object sizes as most redis store |
492 | | /// services have a max file upload of between 256Mb-512Mb. |
493 | | /// |
494 | | /// **Example JSON Config:** |
495 | | /// ```json |
496 | | /// "redis_store": { |
497 | | /// "addresses": [ |
498 | | /// "redis://127.0.0.1:6379/", |
499 | | /// ], |
500 | | /// "max_client_permits": 1000, |
501 | | /// } |
502 | | /// ``` |
503 | | /// |
504 | | RedisStore(RedisSpec), |
505 | | |
506 | | /// Noop store is a store that sends streams into the void and all data |
507 | | /// retrieval will return 404 (`NotFound`). This can be useful for cases |
508 | | /// where you may need to partition your data and part of your data needs |
509 | | /// to be discarded. |
510 | | /// |
511 | | /// **Example JSON Config:** |
512 | | /// ```json |
513 | | /// "noop": {} |
514 | | /// ``` |
515 | | /// |
516 | | Noop(NoopSpec), |
517 | | |
518 | | /// Experimental `MongoDB` store implementation. |
519 | | /// |
520 | | /// This store uses `MongoDB` as a backend for storing data. It supports |
521 | | /// both CAS (Content Addressable Storage) and scheduler data with |
522 | | /// optional change streams for real-time updates. |
523 | | /// |
524 | | /// **Example JSON Config:** |
525 | | /// ```json |
526 | | /// "experimental_mongo": { |
527 | | /// "connection_string": "mongodb://localhost:27017", |
528 | | /// "database": "nativelink", |
529 | | /// "cas_collection": "cas", |
530 | | /// "key_prefix": "cas:", |
531 | | /// "read_chunk_size": 65536, |
532 | | /// "max_concurrent_uploads": 10, |
533 | | /// "enable_change_streams": false |
534 | | /// } |
535 | | /// ``` |
536 | | /// |
537 | | ExperimentalMongo(ExperimentalMongoSpec), |
538 | | } |
539 | | |
540 | | /// Configuration for an individual shard of the store. |
541 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
542 | | #[serde(deny_unknown_fields)] |
543 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
544 | | pub struct ShardConfig { |
545 | | /// Store to shard the data to. |
546 | | pub store: StoreSpec, |
547 | | |
548 | | /// The weight of the store. This is used to determine how much data |
549 | | /// should be sent to the store. The actual percentage is the sum of |
550 | | /// all the store's weights divided by the individual store's weight. |
551 | | /// |
552 | | /// Default: 1 |
553 | | #[serde(deserialize_with = "convert_optional_numeric_with_shellexpand")] |
554 | | pub weight: Option<u32>, |
555 | | } |
556 | | |
557 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
558 | | #[serde(deny_unknown_fields)] |
559 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
560 | | pub struct ShardSpec { |
561 | | /// Stores to shard the data to. |
562 | | pub stores: Vec<ShardConfig>, |
563 | | } |
564 | | |
565 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
566 | | #[serde(deny_unknown_fields)] |
567 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
568 | | pub struct SizePartitioningSpec { |
569 | | /// Size to partition the data on. |
570 | | #[serde(deserialize_with = "convert_data_size_with_shellexpand")] |
571 | | pub size: u64, |
572 | | |
573 | | /// Store to send data when object is < (less than) size. |
574 | | pub lower_store: StoreSpec, |
575 | | |
576 | | /// Store to send data when object is >= (less than eq) size. |
577 | | pub upper_store: StoreSpec, |
578 | | } |
579 | | |
580 | | #[derive(Serialize, Deserialize, Debug, Default, Clone)] |
581 | | #[serde(deny_unknown_fields)] |
582 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
583 | | pub struct RefSpec { |
584 | | /// Name of the store under the root "stores" config object. |
585 | | #[serde(deserialize_with = "convert_string_with_shellexpand")] |
586 | | pub name: String, |
587 | | } |
588 | | |
589 | | #[derive(Serialize, Deserialize, Debug, Default, Clone)] |
590 | | #[serde(deny_unknown_fields)] |
591 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
592 | | pub struct FilesystemSpec { |
593 | | /// Path on the system where to store the actual content. This is where |
594 | | /// the bulk of the data will be placed. |
595 | | /// On service bootup this folder will be scanned and all files will be |
596 | | /// added to the cache. In the event one of the files doesn't match the |
597 | | /// criteria, the file will be deleted. |
598 | | #[serde(deserialize_with = "convert_string_with_shellexpand")] |
599 | | pub content_path: String, |
600 | | |
601 | | /// A temporary location of where files that are being uploaded or |
602 | | /// deleted will be placed while the content cannot be guaranteed to be |
603 | | /// accurate. This location must be on the same block device as |
604 | | /// `content_path` so atomic moves can happen (ie: move without copy). |
605 | | /// All files in this folder will be deleted on every startup. |
606 | | #[serde(deserialize_with = "convert_string_with_shellexpand")] |
607 | | pub temp_path: String, |
608 | | |
609 | | /// Buffer size to use when reading files. Generally this should be left |
610 | | /// to the default value except for testing. |
611 | | /// Default: 32k. |
612 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
613 | | pub read_buffer_size: u32, |
614 | | |
615 | | /// Policy used to evict items out of the store. Failure to set this |
616 | | /// value will cause items to never be removed from the store causing |
617 | | /// infinite memory usage. |
618 | | pub eviction_policy: Option<EvictionPolicy>, |
619 | | |
620 | | /// The block size of the filesystem for the running machine |
621 | | /// value is used to determine an entry's actual size on disk consumed |
622 | | /// For a 4KB block size filesystem, a 1B file actually consumes 4KB |
623 | | /// Default: 4096 |
624 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
625 | | pub block_size: u64, |
626 | | |
627 | | /// Maximum number of concurrent write operations allowed. |
628 | | /// Each write involves streaming data to a temp file and calling `sync_all()`, |
629 | | /// which can saturate disk I/O when many writes happen simultaneously. |
630 | | /// Limiting concurrency prevents disk saturation from blocking the async |
631 | | /// runtime. |
632 | | /// A value of 0 means unlimited (no concurrency limit). |
633 | | /// Default: 0 |
634 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
635 | | pub max_concurrent_writes: usize, |
636 | | } |
637 | | |
638 | | // NetApp ONTAP S3 Spec |
639 | | #[derive(Serialize, Deserialize, Debug, Default, Clone)] |
640 | | #[serde(deny_unknown_fields)] |
641 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
642 | | pub struct ExperimentalOntapS3Spec { |
643 | | #[serde(deserialize_with = "convert_string_with_shellexpand")] |
644 | | pub endpoint: String, |
645 | | #[serde(deserialize_with = "convert_string_with_shellexpand")] |
646 | | pub vserver_name: String, |
647 | | #[serde(deserialize_with = "convert_string_with_shellexpand")] |
648 | | pub bucket: String, |
649 | | #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")] |
650 | | pub root_certificates: Option<String>, |
651 | | |
652 | | /// Common retry and upload configuration |
653 | | #[serde(flatten)] |
654 | | pub common: CommonObjectSpec, |
655 | | } |
656 | | |
657 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
658 | | #[serde(deny_unknown_fields)] |
659 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
660 | | pub struct OntapS3ExistenceCacheSpec { |
661 | | #[serde(deserialize_with = "convert_string_with_shellexpand")] |
662 | | pub index_path: String, |
663 | | #[serde(deserialize_with = "convert_numeric_with_shellexpand")] |
664 | | pub sync_interval_seconds: u32, |
665 | | pub backend: Box<ExperimentalOntapS3Spec>, |
666 | | } |
667 | | |
668 | | #[derive(Serialize, Deserialize, Default, Debug, Clone, Copy, PartialEq, Eq)] |
669 | | #[serde(rename_all = "snake_case")] |
670 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
671 | | pub enum StoreDirection { |
672 | | /// The store operates normally and all get and put operations are |
673 | | /// handled by it. |
674 | | #[default] |
675 | | Both, |
676 | | /// Update operations will cause persistence to this store, but Get |
677 | | /// operations will be ignored. |
678 | | /// This only makes sense on the fast store as the slow store will |
679 | | /// never get written to on Get anyway. |
680 | | Update, |
681 | | /// Get operations will cause persistence to this store, but Update |
682 | | /// operations will be ignored. |
683 | | Get, |
684 | | /// Operate as a read only store, only really makes sense if there's |
685 | | /// another way to write to it. |
686 | | ReadOnly, |
687 | | } |
688 | | |
689 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
690 | | #[serde(deny_unknown_fields)] |
691 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
692 | | pub struct FastSlowSpec { |
693 | | /// Fast store that will be attempted to be contacted before reaching |
694 | | /// out to the `slow` store. |
695 | | pub fast: StoreSpec, |
696 | | |
697 | | /// How to handle the fast store. This can be useful to set to Get for |
698 | | /// worker nodes such that results are persisted to the slow store only. |
699 | | #[serde(default)] |
700 | | pub fast_direction: StoreDirection, |
701 | | |
702 | | /// If the object does not exist in the `fast` store it will try to |
703 | | /// get it from this store. |
704 | | pub slow: StoreSpec, |
705 | | |
706 | | /// How to handle the slow store. This can be useful if creating a diode |
707 | | /// and you wish to have an upstream read only store. |
708 | | #[serde(default)] |
709 | | pub slow_direction: StoreDirection, |
710 | | } |
711 | | |
712 | | #[derive(Serialize, Deserialize, Debug, Default, Clone, Copy)] |
713 | | #[serde(deny_unknown_fields)] |
714 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
715 | | pub struct MemorySpec { |
716 | | /// Policy used to evict items out of the store. Failure to set this |
717 | | /// value will cause items to never be removed from the store causing |
718 | | /// infinite memory usage. |
719 | | pub eviction_policy: Option<EvictionPolicy>, |
720 | | } |
721 | | |
722 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
723 | | #[serde(deny_unknown_fields)] |
724 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
725 | | pub struct DedupSpec { |
726 | | /// Store used to store the index of each dedup slice. This store |
727 | | /// should generally be fast and small. |
728 | | pub index_store: StoreSpec, |
729 | | |
730 | | /// The store where the individual chunks will be uploaded. This |
731 | | /// store should generally be the slower & larger store. |
732 | | pub content_store: StoreSpec, |
733 | | |
734 | | /// Minimum size that a chunk will be when slicing up the content. |
735 | | /// Note: This setting can be increased to improve performance |
736 | | /// because it will actually not check this number of bytes when |
737 | | /// deciding where to partition the data. |
738 | | /// |
739 | | /// Default: 65536 (64k) |
740 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
741 | | pub min_size: u32, |
742 | | |
743 | | /// A best-effort attempt will be made to keep the average size |
744 | | /// of the chunks to this number. It is not a guarantee, but a |
745 | | /// slight attempt will be made. |
746 | | /// |
747 | | /// This value will also be about the threshold used to determine |
748 | | /// if we should even attempt to dedup the entry or just forward |
749 | | /// it directly to the `content_store` without an index. The actual |
750 | | /// value will be about `normal_size * 1.3` due to implementation |
751 | | /// details. |
752 | | /// |
753 | | /// Default: 262144 (256k) |
754 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
755 | | pub normal_size: u32, |
756 | | |
757 | | /// Maximum size a chunk is allowed to be. |
758 | | /// |
759 | | /// Default: 524288 (512k) |
760 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
761 | | pub max_size: u32, |
762 | | |
763 | | /// Due to implementation detail, we want to prefer to download |
764 | | /// the first chunks of the file so we can stream the content |
765 | | /// out and free up some of our buffers. This configuration |
766 | | /// will be used to to restrict the number of concurrent chunk |
767 | | /// downloads at a time per `get()` request. |
768 | | /// |
769 | | /// This setting will also affect how much memory might be used |
770 | | /// per `get()` request. Estimated worst case memory per `get()` |
771 | | /// request is: `max_concurrent_fetch_per_get * max_size`. |
772 | | /// |
773 | | /// Default: 10 |
774 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
775 | | pub max_concurrent_fetch_per_get: u32, |
776 | | } |
777 | | |
778 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
779 | | #[serde(deny_unknown_fields)] |
780 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
781 | | pub struct ExistenceCacheSpec { |
782 | | /// The underlying store wrap around. All content will first flow |
783 | | /// through self before forwarding to backend. In the event there |
784 | | /// is an error detected in self, the connection to the backend |
785 | | /// will be terminated, and early termination should always cause |
786 | | /// updates to fail on the backend. |
787 | | pub backend: StoreSpec, |
788 | | |
789 | | /// Policy used to evict items out of the store. Failure to set this |
790 | | /// value will cause items to never be removed from the store causing |
791 | | /// infinite memory usage. |
792 | | pub eviction_policy: Option<EvictionPolicy>, |
793 | | } |
794 | | |
795 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
796 | | #[serde(deny_unknown_fields)] |
797 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
798 | | pub struct VerifySpec { |
799 | | /// The underlying store wrap around. All content will first flow |
800 | | /// through self before forwarding to backend. In the event there |
801 | | /// is an error detected in self, the connection to the backend |
802 | | /// will be terminated, and early termination should always cause |
803 | | /// updates to fail on the backend. |
804 | | pub backend: StoreSpec, |
805 | | |
806 | | /// If set the store will verify the size of the data before accepting |
807 | | /// an upload of data. |
808 | | /// |
809 | | /// This should be set to false for AC, but true for CAS stores. |
810 | | #[serde(default, deserialize_with = "convert_boolean_with_shellexpand")] |
811 | | pub verify_size: bool, |
812 | | |
813 | | /// If the data should be hashed and verify that the key matches the |
814 | | /// computed hash. The hash function is automatically determined based |
815 | | /// request and if not set will use the global default. |
816 | | /// |
817 | | /// This should be set to false for AC, but true for CAS stores. |
818 | | #[serde(default, deserialize_with = "convert_boolean_with_shellexpand")] |
819 | | pub verify_hash: bool, |
820 | | } |
821 | | |
822 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
823 | | #[serde(deny_unknown_fields)] |
824 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
825 | | pub struct CompletenessCheckingSpec { |
826 | | /// The underlying store that will have it's results validated before sending to client. |
827 | | pub backend: StoreSpec, |
828 | | |
829 | | /// When a request is made, the results are decoded and all output digests/files are verified |
830 | | /// to exist in this CAS store before returning success. |
831 | | pub cas_store: StoreSpec, |
832 | | } |
833 | | |
834 | | #[derive(Serialize, Deserialize, Debug, Default, PartialEq, Eq, Clone, Copy)] |
835 | | #[serde(deny_unknown_fields)] |
836 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
837 | | pub struct Lz4Config { |
838 | | /// Size of the blocks to compress. |
839 | | /// Higher values require more ram, but might yield slightly better |
840 | | /// compression ratios. |
841 | | /// |
842 | | /// Default: 65536 (64k). |
843 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
844 | | pub block_size: u32, |
845 | | |
846 | | /// Maximum size allowed to attempt to deserialize data into. |
847 | | /// This is needed because the `block_size` is embedded into the data |
848 | | /// so if there was a bad actor, they could upload an extremely large |
849 | | /// `block_size`'ed entry and we'd allocate a large amount of memory |
850 | | /// when retrieving the data. To prevent this from happening, we |
851 | | /// allow you to specify the maximum that we'll attempt deserialize. |
852 | | /// |
853 | | /// Default: value in `block_size`. |
854 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
855 | | pub max_decode_block_size: u32, |
856 | | } |
857 | | |
858 | | #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone, Copy)] |
859 | | #[serde(rename_all = "snake_case")] |
860 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
861 | | pub enum CompressionAlgorithm { |
862 | | /// LZ4 compression algorithm is extremely fast for compression and |
863 | | /// decompression, however does not perform very well in compression |
864 | | /// ratio. In most cases build artifacts are highly compressible, however |
865 | | /// lz4 is quite good at aborting early if the data is not deemed very |
866 | | /// compressible. |
867 | | /// |
868 | | /// see: <https://lz4.github.io/lz4/> |
869 | | Lz4(Lz4Config), |
870 | | } |
871 | | |
872 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
873 | | #[serde(deny_unknown_fields)] |
874 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
875 | | pub struct CompressionSpec { |
876 | | /// The underlying store wrap around. All content will first flow |
877 | | /// through self before forwarding to backend. In the event there |
878 | | /// is an error detected in self, the connection to the backend |
879 | | /// will be terminated, and early termination should always cause |
880 | | /// updates to fail on the backend. |
881 | | pub backend: StoreSpec, |
882 | | |
883 | | /// The compression algorithm to use. |
884 | | pub compression_algorithm: CompressionAlgorithm, |
885 | | } |
886 | | |
887 | | /// Eviction policy always works on LRU (Least Recently Used). Any time an entry |
888 | | /// is touched it updates the timestamp. Inserts and updates will execute the |
889 | | /// eviction policy removing any expired entries and/or the oldest entries |
890 | | /// until the store size becomes smaller than `max_bytes`. |
891 | | #[derive(Serialize, Deserialize, Debug, Default, Clone, Copy)] |
892 | | #[serde(deny_unknown_fields)] |
893 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
894 | | pub struct EvictionPolicy { |
895 | | /// Maximum number of bytes before eviction takes place. |
896 | | /// Default: 0. Zero means never evict based on size. |
897 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
898 | | pub max_bytes: usize, |
899 | | |
900 | | /// When eviction starts based on hitting `max_bytes`, continue until |
901 | | /// `max_bytes - evict_bytes` is met to create a low watermark. This stops |
902 | | /// operations from thrashing when the store is close to the limit. |
903 | | /// Default: 0 |
904 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
905 | | pub evict_bytes: usize, |
906 | | |
907 | | /// Maximum number of seconds for an entry to live since it was last |
908 | | /// accessed before it is evicted. |
909 | | /// Default: 0. Zero means never evict based on time. |
910 | | #[serde(default, deserialize_with = "convert_duration_with_shellexpand")] |
911 | | pub max_seconds: u32, |
912 | | |
913 | | /// Maximum size of the store before an eviction takes place. |
914 | | /// Default: 0. Zero means never evict based on count. |
915 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
916 | | pub max_count: u64, |
917 | | } |
918 | | |
919 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
920 | | #[serde(tag = "provider", rename_all = "snake_case")] |
921 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
922 | | pub enum ExperimentalCloudObjectSpec { |
923 | | Aws(ExperimentalAwsSpec), |
924 | | Gcs(ExperimentalGcsSpec), |
925 | | Ontap(ExperimentalOntapS3Spec), |
926 | | } |
927 | | |
928 | | impl Default for ExperimentalCloudObjectSpec { |
929 | 0 | fn default() -> Self { |
930 | 0 | Self::Aws(ExperimentalAwsSpec::default()) |
931 | 0 | } |
932 | | } |
933 | | |
934 | | #[derive(Serialize, Deserialize, Debug, Default, Clone)] |
935 | | #[serde(deny_unknown_fields)] |
936 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
937 | | pub struct ExperimentalAwsSpec { |
938 | | /// S3 region. Usually us-east-1, us-west-2, af-south-1, exc... |
939 | | #[serde(default, deserialize_with = "convert_string_with_shellexpand")] |
940 | | pub region: String, |
941 | | |
942 | | /// Bucket name to use as the backend. |
943 | | #[serde(default, deserialize_with = "convert_string_with_shellexpand")] |
944 | | pub bucket: String, |
945 | | |
946 | | /// Common retry and upload configuration |
947 | | #[serde(flatten)] |
948 | | pub common: CommonObjectSpec, |
949 | | } |
950 | | |
951 | | #[derive(Serialize, Deserialize, Debug, Default, Clone)] |
952 | | #[serde(deny_unknown_fields)] |
953 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
954 | | pub struct ExperimentalGcsSpec { |
955 | | /// Bucket name to use as the backend. |
956 | | #[serde(default, deserialize_with = "convert_string_with_shellexpand")] |
957 | | pub bucket: String, |
958 | | |
959 | | /// Chunk size for resumable uploads. |
960 | | /// |
961 | | /// Default: 2MB |
962 | | #[serde( |
963 | | default, |
964 | | deserialize_with = "convert_optional_data_size_with_shellexpand" |
965 | | )] |
966 | | pub resumable_chunk_size: Option<usize>, |
967 | | |
968 | | /// Common retry and upload configuration |
969 | | #[serde(flatten)] |
970 | | pub common: CommonObjectSpec, |
971 | | |
972 | | /// Error if authentication was not found. |
973 | | #[serde(default, deserialize_with = "convert_boolean_with_shellexpand")] |
974 | | pub authentication_required: bool, |
975 | | |
976 | | /// Connection timeout in milliseconds. |
977 | | /// Default: 3000 |
978 | | #[serde(default, deserialize_with = "convert_duration_with_shellexpand")] |
979 | | pub connection_timeout_s: u64, |
980 | | |
981 | | /// Read timeout in milliseconds. |
982 | | /// Default: 3000 |
983 | | #[serde(default, deserialize_with = "convert_duration_with_shellexpand")] |
984 | | pub read_timeout_s: u64, |
985 | | } |
986 | | |
987 | | #[derive(Serialize, Deserialize, Debug, Default, Clone)] |
988 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
989 | | pub struct CommonObjectSpec { |
990 | | /// If you wish to prefix the location in the bucket. If None, no prefix will be used. |
991 | | #[serde(default)] |
992 | | pub key_prefix: Option<String>, |
993 | | |
994 | | /// Retry configuration to use when a network request fails. |
995 | | #[serde(default)] |
996 | | pub retry: Retry, |
997 | | |
998 | | /// If the number of seconds since the `last_modified` time of the object |
999 | | /// is greater than this value, the object will not be considered |
1000 | | /// "existing". This allows for external tools to delete objects that |
1001 | | /// have not been uploaded in a long time. If a client receives a `NotFound` |
1002 | | /// the client should re-upload the object. |
1003 | | /// |
1004 | | /// There should be sufficient buffer time between how long the expiration |
1005 | | /// configuration of the external tool is and this value. Keeping items |
1006 | | /// around for a few days is generally a good idea. |
1007 | | /// |
1008 | | /// Default: 0. Zero means never consider an object expired. |
1009 | | #[serde(default, deserialize_with = "convert_duration_with_shellexpand")] |
1010 | | pub consider_expired_after_s: u32, |
1011 | | |
1012 | | /// The maximum buffer size to retain in case of a retryable error |
1013 | | /// during upload. Setting this to zero will disable upload buffering; |
1014 | | /// this means that in the event of a failure during upload, the entire |
1015 | | /// upload will be aborted and the client will likely receive an error. |
1016 | | /// |
1017 | | /// Default: 5MB. |
1018 | | #[serde( |
1019 | | default, |
1020 | | deserialize_with = "convert_optional_data_size_with_shellexpand" |
1021 | | )] |
1022 | | pub max_retry_buffer_per_request: Option<usize>, |
1023 | | |
1024 | | /// Maximum number of concurrent `UploadPart` requests per `MultipartUpload`. |
1025 | | /// |
1026 | | /// Default: 10. |
1027 | | /// |
1028 | | #[serde( |
1029 | | default, |
1030 | | deserialize_with = "convert_optional_numeric_with_shellexpand" |
1031 | | )] |
1032 | | pub multipart_max_concurrent_uploads: Option<usize>, |
1033 | | |
1034 | | /// Allow unencrypted HTTP connections. Only use this for local testing. |
1035 | | /// |
1036 | | /// Default: false |
1037 | | #[serde(default, deserialize_with = "convert_boolean_with_shellexpand")] |
1038 | | pub insecure_allow_http: bool, |
1039 | | |
1040 | | /// Disable http/2 connections and only use http/1.1. Default client |
1041 | | /// configuration will have http/1.1 and http/2 enabled for connection |
1042 | | /// schemes. Http/2 should be disabled if environments have poor support |
1043 | | /// or performance related to http/2. Safe to keep default unless |
1044 | | /// underlying network environment, S3, or GCS API servers specify otherwise. |
1045 | | /// |
1046 | | /// Default: false |
1047 | | #[serde(default, deserialize_with = "convert_boolean_with_shellexpand")] |
1048 | | pub disable_http2: bool, |
1049 | | } |
1050 | | |
1051 | | #[derive(Serialize, Deserialize, Debug, Clone, Copy)] |
1052 | | #[serde(rename_all = "snake_case")] |
1053 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
1054 | | pub enum StoreType { |
1055 | | /// The store is content addressable storage. |
1056 | | Cas, |
1057 | | /// The store is an action cache. |
1058 | | Ac, |
1059 | | } |
1060 | | |
1061 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
1062 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
1063 | | pub struct ClientTlsConfig { |
1064 | | /// Path to the certificate authority to use to validate the remote. |
1065 | | /// |
1066 | | /// Default: None |
1067 | | #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")] |
1068 | | pub ca_file: Option<String>, |
1069 | | |
1070 | | /// Path to the certificate file for client authentication. |
1071 | | /// |
1072 | | /// Default: None |
1073 | | #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")] |
1074 | | pub cert_file: Option<String>, |
1075 | | |
1076 | | /// Path to the private key file for client authentication. |
1077 | | /// |
1078 | | /// Default: None |
1079 | | #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")] |
1080 | | pub key_file: Option<String>, |
1081 | | |
1082 | | /// If set the client will use the native roots for TLS connections. |
1083 | | /// |
1084 | | /// Default: false |
1085 | | #[serde(default)] |
1086 | | pub use_native_roots: Option<bool>, |
1087 | | } |
1088 | | |
1089 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
1090 | | #[serde(deny_unknown_fields)] |
1091 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
1092 | | pub struct GrpcEndpoint { |
1093 | | /// The endpoint address (i.e. grpc(s)://example.com:443). |
1094 | | #[serde(deserialize_with = "convert_string_with_shellexpand")] |
1095 | | pub address: String, |
1096 | | /// The TLS configuration to use to connect to the endpoint (if grpcs). |
1097 | | pub tls_config: Option<ClientTlsConfig>, |
1098 | | /// The maximum concurrency to allow on this endpoint. |
1099 | | #[serde( |
1100 | | default, |
1101 | | deserialize_with = "convert_optional_numeric_with_shellexpand" |
1102 | | )] |
1103 | | pub concurrency_limit: Option<usize>, |
1104 | | |
1105 | | /// Timeout for establishing a TCP connection to the endpoint (seconds). |
1106 | | /// If not set or 0, defaults to 30 seconds. |
1107 | | #[serde(default, deserialize_with = "convert_duration_with_shellexpand")] |
1108 | | pub connect_timeout_s: u64, |
1109 | | |
1110 | | /// TCP keepalive interval (seconds). Sends TCP keepalive probes at this |
1111 | | /// interval to detect dead connections at the OS level. |
1112 | | /// If not set or 0, defaults to 30 seconds. |
1113 | | #[serde(default, deserialize_with = "convert_duration_with_shellexpand")] |
1114 | | pub tcp_keepalive_s: u64, |
1115 | | |
1116 | | /// HTTP/2 keepalive interval (seconds). Sends HTTP/2 PING frames at this |
1117 | | /// interval to detect dead connections at the application level. |
1118 | | /// If not set or 0, defaults to 30 seconds. |
1119 | | #[serde(default, deserialize_with = "convert_duration_with_shellexpand")] |
1120 | | pub http2_keepalive_interval_s: u64, |
1121 | | |
1122 | | /// HTTP/2 keepalive timeout (seconds). If a PING response is not received |
1123 | | /// within this duration, the connection is considered dead. |
1124 | | /// If not set or 0, defaults to 20 seconds. |
1125 | | #[serde(default, deserialize_with = "convert_duration_with_shellexpand")] |
1126 | | pub http2_keepalive_timeout_s: u64, |
1127 | | } |
1128 | | |
1129 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
1130 | | #[serde(deny_unknown_fields)] |
1131 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
1132 | | pub struct GrpcSpec { |
1133 | | /// Instance name for GRPC calls. Proxy calls will have the `instance_name` changed to this. |
1134 | | #[serde(default, deserialize_with = "convert_string_with_shellexpand")] |
1135 | | pub instance_name: String, |
1136 | | |
1137 | | /// The endpoint of the grpc connection. |
1138 | | pub endpoints: Vec<GrpcEndpoint>, |
1139 | | |
1140 | | /// The type of the upstream store, this ensures that the correct server calls are made. |
1141 | | pub store_type: StoreType, |
1142 | | |
1143 | | /// Retry configuration to use when a network request fails. |
1144 | | #[serde(default)] |
1145 | | pub retry: Retry, |
1146 | | |
1147 | | /// Limit the number of simultaneous upstream requests to this many. A |
1148 | | /// value of zero is treated as unlimited. If the limit is reached the |
1149 | | /// request is queued. |
1150 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1151 | | pub max_concurrent_requests: usize, |
1152 | | |
1153 | | /// The number of connections to make to each specified endpoint to balance |
1154 | | /// the load over multiple TCP connections. Default 1. |
1155 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1156 | | pub connections_per_endpoint: usize, |
1157 | | |
1158 | | /// Maximum time (seconds) allowed for a single RPC request (e.g. a |
1159 | | /// ByteStream.Write call) before it is cancelled. |
1160 | | /// |
1161 | | /// A value of 0 (the default) disables the per-RPC timeout. Dead |
1162 | | /// connections are still detected by the HTTP/2 and TCP keepalive |
1163 | | /// mechanisms configured on each endpoint. |
1164 | | /// |
1165 | | /// For large uploads (multi-GB), either leave this at 0 or set it |
1166 | | /// large enough to accommodate the full transfer time. |
1167 | | /// |
1168 | | /// Default: 0 (disabled) |
1169 | | #[serde(default, deserialize_with = "convert_duration_with_shellexpand")] |
1170 | | pub rpc_timeout_s: u64, |
1171 | | } |
1172 | | |
1173 | | /// The possible error codes that might occur on an upstream request. |
1174 | | #[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq)] |
1175 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
1176 | | pub enum ErrorCode { |
1177 | | Cancelled = 1, |
1178 | | Unknown = 2, |
1179 | | InvalidArgument = 3, |
1180 | | DeadlineExceeded = 4, |
1181 | | NotFound = 5, |
1182 | | AlreadyExists = 6, |
1183 | | PermissionDenied = 7, |
1184 | | ResourceExhausted = 8, |
1185 | | FailedPrecondition = 9, |
1186 | | Aborted = 10, |
1187 | | OutOfRange = 11, |
1188 | | Unimplemented = 12, |
1189 | | Internal = 13, |
1190 | | Unavailable = 14, |
1191 | | DataLoss = 15, |
1192 | | Unauthenticated = 16, |
1193 | | // Note: This list is duplicated from nativelink-error/lib.rs. |
1194 | | } |
1195 | | |
1196 | | #[derive(Serialize, Deserialize, Debug, Clone, Default)] |
1197 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
1198 | | pub struct RedisSpec { |
1199 | | /// The hostname or IP address of the Redis server. |
1200 | | /// Ex: `["redis://username:password@redis-server-url:6380/99"]` |
1201 | | /// 99 Represents database ID, 6380 represents the port. |
1202 | | #[serde(deserialize_with = "convert_vec_string_with_shellexpand")] |
1203 | | pub addresses: Vec<String>, |
1204 | | |
1205 | | /// DEPRECATED: use `command_timeout_ms` |
1206 | | /// The response timeout for the Redis connection in seconds. |
1207 | | /// |
1208 | | /// Default: 10 |
1209 | | #[serde(default)] |
1210 | | pub response_timeout_s: u64, |
1211 | | |
1212 | | /// DEPRECATED: use `connection_timeout_ms` |
1213 | | /// |
1214 | | /// The connection timeout for the Redis connection in seconds. |
1215 | | /// |
1216 | | /// Default: 10 |
1217 | | #[serde(default)] |
1218 | | pub connection_timeout_s: u64, |
1219 | | |
1220 | | /// An optional and experimental Redis channel to publish write events to. |
1221 | | /// |
1222 | | /// If set, every time a write operation is made to a Redis node |
1223 | | /// then an event will be published to a Redis channel with the given name. |
1224 | | /// If unset, the writes will still be made, |
1225 | | /// but the write events will not be published. |
1226 | | /// |
1227 | | /// Default: (Empty String / No Channel) |
1228 | | #[serde(default)] |
1229 | | pub experimental_pub_sub_channel: Option<String>, |
1230 | | |
1231 | | /// An optional prefix to prepend to all keys in this store. |
1232 | | /// |
1233 | | /// Setting this value can make it convenient to query or |
1234 | | /// organize your data according to the shared prefix. |
1235 | | /// |
1236 | | /// Default: (Empty String / No Prefix) |
1237 | | #[serde(default, deserialize_with = "convert_string_with_shellexpand")] |
1238 | | pub key_prefix: String, |
1239 | | |
1240 | | /// Set the mode Redis is operating in. |
1241 | | /// |
1242 | | /// Available options are "cluster" for |
1243 | | /// [cluster mode](https://redis.io/docs/latest/operate/oss_and_stack/reference/cluster-spec/), |
1244 | | /// "sentinel" for [sentinel mode](https://redis.io/docs/latest/operate/oss_and_stack/management/sentinel/), |
1245 | | /// or "standard" if Redis is operating in neither cluster nor sentinel mode. |
1246 | | /// |
1247 | | /// Default: standard, |
1248 | | #[serde(default)] |
1249 | | pub mode: RedisMode, |
1250 | | |
1251 | | /// Deprecated as redis-rs doesn't use it |
1252 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1253 | | pub broadcast_channel_capacity: usize, |
1254 | | |
1255 | | /// The amount of time in milliseconds until the redis store considers the |
1256 | | /// command to be timed out. This will trigger a retry of the command and |
1257 | | /// potentially a reconnection to the redis server. |
1258 | | /// |
1259 | | /// Default: 10000 (10 seconds) |
1260 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1261 | | pub command_timeout_ms: u64, |
1262 | | |
1263 | | /// The amount of time in milliseconds until the redis store considers the |
1264 | | /// connection to unresponsive. This will trigger a reconnection to the |
1265 | | /// redis server. |
1266 | | /// |
1267 | | /// Default: 3000 (3 seconds) |
1268 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1269 | | pub connection_timeout_ms: u64, |
1270 | | |
1271 | | /// The amount of data to read from the redis server at a time. |
1272 | | /// This is used to limit the amount of memory used when reading |
1273 | | /// large objects from the redis server as well as limiting the |
1274 | | /// amount of time a single read operation can take. |
1275 | | /// |
1276 | | /// IMPORTANT: If this value is too high, the `command_timeout_ms` |
1277 | | /// might be triggered if the latency or throughput to the redis |
1278 | | /// server is too low. |
1279 | | /// |
1280 | | /// Default: 64KiB |
1281 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1282 | | pub read_chunk_size: usize, |
1283 | | |
1284 | | /// The number of connections to keep open to the redis server(s). |
1285 | | /// |
1286 | | /// Default: 3 |
1287 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1288 | | pub connection_pool_size: usize, |
1289 | | |
1290 | | /// The maximum number of upload chunks to allow per update. |
1291 | | /// This is used to limit the amount of memory used when uploading |
1292 | | /// large objects to the redis server. A good rule of thumb is to |
1293 | | /// think of the data as: |
1294 | | /// `AVAIL_MEMORY / (read_chunk_size * max_chunk_uploads_per_update) = THORETICAL_MAX_CONCURRENT_UPLOADS` |
1295 | | /// (note: it is a good idea to divide `AVAIL_MAX_MEMORY` by ~10 to account for other memory usage) |
1296 | | /// |
1297 | | /// Default: 10 |
1298 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1299 | | pub max_chunk_uploads_per_update: usize, |
1300 | | |
1301 | | /// The COUNT value passed when scanning keys in Redis. |
1302 | | /// This is used to hint the amount of work that should be done per response. |
1303 | | /// |
1304 | | /// Default: 10000 |
1305 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1306 | | pub scan_count: usize, |
1307 | | |
1308 | | /// Retry configuration to use when a network request fails. |
1309 | | /// See the `Retry` struct for more information. |
1310 | | /// |
1311 | | /// ```txt |
1312 | | /// Default: Retry { |
1313 | | /// max_retries: 0, /* unlimited */ |
1314 | | /// delay: 0.1, /* 100ms */ |
1315 | | /// jitter: 0.5, /* 50% */ |
1316 | | /// retry_on_errors: None, /* not used in redis store */ |
1317 | | /// } |
1318 | | /// ``` |
1319 | | #[serde(default)] |
1320 | | pub retry: Retry, |
1321 | | |
1322 | | /// Maximum number of permitted actions to the Redis store at any one time |
1323 | | /// This stops problems with timeouts due to many, many inflight actions |
1324 | | /// Default: 500 |
1325 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1326 | | pub max_client_permits: usize, |
1327 | | |
1328 | | /// Maximum number of items returned per cursor for the search indexes |
1329 | | /// May reduce thundering herd issues with worker provisioner at higher node counts, |
1330 | | /// Default: 1500 |
1331 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1332 | | pub max_count_per_cursor: u64, |
1333 | | } |
1334 | | |
1335 | | #[derive(Debug, Default, Deserialize, Serialize, Clone, Copy, PartialEq, Eq)] |
1336 | | #[serde(rename_all = "snake_case")] |
1337 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
1338 | | pub enum RedisMode { |
1339 | | Cluster, |
1340 | | Sentinel, |
1341 | | #[default] |
1342 | | Standard, |
1343 | | } |
1344 | | |
1345 | | #[derive(Clone, Copy, Debug, Default, Deserialize, Serialize)] |
1346 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
1347 | | pub struct NoopSpec {} |
1348 | | |
1349 | | /// Retry configuration. This configuration is exponential and each iteration |
1350 | | /// a jitter as a percentage is applied of the calculated delay. For example: |
1351 | | /// ```haskell |
1352 | | /// Retry{ |
1353 | | /// max_retries: 7, |
1354 | | /// delay: 0.1, |
1355 | | /// jitter: 0.5, |
1356 | | /// } |
1357 | | /// ``` |
1358 | | /// will result in: |
1359 | | /// Attempt - Delay |
1360 | | /// 1 0ms |
1361 | | /// 2 75ms - 125ms |
1362 | | /// 3 150ms - 250ms |
1363 | | /// 4 300ms - 500ms |
1364 | | /// 5 600ms - 1s |
1365 | | /// 6 1.2s - 2s |
1366 | | /// 7 2.4s - 4s |
1367 | | /// 8 4.8s - 8s |
1368 | | /// Remember that to get total results is additive, meaning the above results |
1369 | | /// would mean a single request would have a total delay of 9.525s - 15.875s. |
1370 | | #[derive(Serialize, Deserialize, Clone, Debug, Default)] |
1371 | | #[serde(deny_unknown_fields)] |
1372 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
1373 | | pub struct Retry { |
1374 | | /// Maximum number of retries until retrying stops. |
1375 | | /// Setting this to zero will always attempt 1 time, but not retry. |
1376 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1377 | | pub max_retries: usize, |
1378 | | |
1379 | | /// Delay in seconds for exponential back off. |
1380 | | #[serde(default)] |
1381 | | pub delay: f32, |
1382 | | |
1383 | | /// Amount of jitter to add as a percentage in decimal form. This will |
1384 | | /// change the formula like: |
1385 | | /// ```haskell |
1386 | | /// random( |
1387 | | /// (2 ^ {attempt_number}) * {delay} * (1 - (jitter / 2)), |
1388 | | /// (2 ^ {attempt_number}) * {delay} * (1 + (jitter / 2)), |
1389 | | /// ) |
1390 | | /// ``` |
1391 | | #[serde(default)] |
1392 | | pub jitter: f32, |
1393 | | |
1394 | | /// A list of error codes to retry on, if this is not set then the default |
1395 | | /// error codes to retry on are used. These default codes are the most |
1396 | | /// likely to be non-permanent. |
1397 | | /// - `Unknown` |
1398 | | /// - `Cancelled` |
1399 | | /// - `DeadlineExceeded` |
1400 | | /// - `ResourceExhausted` |
1401 | | /// - `Aborted` |
1402 | | /// - `Internal` |
1403 | | /// - `Unavailable` |
1404 | | /// - `DataLoss` |
1405 | | #[serde(default)] |
1406 | | pub retry_on_errors: Option<Vec<ErrorCode>>, |
1407 | | } |
1408 | | |
1409 | | /// Configuration for `ExperimentalMongoDB` store. |
1410 | | #[derive(Serialize, Deserialize, Debug, Clone)] |
1411 | | #[serde(deny_unknown_fields)] |
1412 | | #[cfg_attr(feature = "dev-schema", derive(JsonSchema))] |
1413 | | pub struct ExperimentalMongoSpec { |
1414 | | /// `ExperimentalMongoDB` connection string. |
1415 | | /// Example: <mongodb://localhost:27017> or <mongodb+srv://cluster.mongodb.net> |
1416 | | #[serde(deserialize_with = "convert_string_with_shellexpand")] |
1417 | | pub connection_string: String, |
1418 | | |
1419 | | /// The database name to use. |
1420 | | /// Default: "nativelink" |
1421 | | #[serde(default, deserialize_with = "convert_string_with_shellexpand")] |
1422 | | pub database: String, |
1423 | | |
1424 | | /// The collection name for CAS data. |
1425 | | /// Default: "cas" |
1426 | | #[serde(default, deserialize_with = "convert_string_with_shellexpand")] |
1427 | | pub cas_collection: String, |
1428 | | |
1429 | | /// The collection name for scheduler data. |
1430 | | /// Default: "scheduler" |
1431 | | #[serde(default, deserialize_with = "convert_string_with_shellexpand")] |
1432 | | pub scheduler_collection: String, |
1433 | | |
1434 | | /// Prefix to prepend to all keys stored in `MongoDB`. |
1435 | | /// Default: "" |
1436 | | #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")] |
1437 | | pub key_prefix: Option<String>, |
1438 | | |
1439 | | /// The maximum amount of data to read from `MongoDB` in a single chunk (in bytes). |
1440 | | /// Default: 65536 (64KB) |
1441 | | #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")] |
1442 | | pub read_chunk_size: usize, |
1443 | | |
1444 | | /// Maximum number of concurrent uploads allowed. |
1445 | | /// Default: 10 |
1446 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1447 | | pub max_concurrent_uploads: usize, |
1448 | | |
1449 | | /// Connection timeout in milliseconds. |
1450 | | /// Default: 3000 |
1451 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1452 | | pub connection_timeout_ms: u64, |
1453 | | |
1454 | | /// Command timeout in milliseconds. |
1455 | | /// Default: 10000 |
1456 | | #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")] |
1457 | | pub command_timeout_ms: u64, |
1458 | | |
1459 | | /// Enable `MongoDB` change streams for real-time updates. |
1460 | | /// Required for scheduler subscriptions. |
1461 | | /// Default: false |
1462 | | #[serde(default, deserialize_with = "convert_boolean_with_shellexpand")] |
1463 | | pub enable_change_streams: bool, |
1464 | | |
1465 | | /// Write concern 'w' parameter. |
1466 | | /// Can be a number (e.g., 1) or string (e.g., "majority"). |
1467 | | /// Default: None (uses `MongoDB` default) |
1468 | | #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")] |
1469 | | pub write_concern_w: Option<String>, |
1470 | | |
1471 | | /// Write concern 'j' parameter (journal acknowledgment). |
1472 | | /// Default: None (uses `MongoDB` default) |
1473 | | #[serde(default)] |
1474 | | pub write_concern_j: Option<bool>, |
1475 | | |
1476 | | /// Write concern timeout in milliseconds. |
1477 | | /// Default: None (uses `MongoDB` default) |
1478 | | #[serde( |
1479 | | default, |
1480 | | deserialize_with = "convert_optional_numeric_with_shellexpand" |
1481 | | )] |
1482 | | pub write_concern_timeout_ms: Option<u32>, |
1483 | | } |
1484 | | |
1485 | | impl Retry { |
1486 | 9 | pub fn make_jitter_fn(&self) -> Arc<dyn Fn(Duration) -> Duration + Send + Sync> { |
1487 | 9 | if self.jitter == 0f32 { Branch (1487:12): [True: 9, False: 0]
Branch (1487:12): [Folded - Ignored]
|
1488 | 9 | Arc::new(move |delay: Duration| delay) |
1489 | | } else { |
1490 | 0 | let local_jitter = self.jitter; |
1491 | 0 | Arc::new(move |delay: Duration| { |
1492 | 0 | delay.mul_f32(local_jitter.mul_add(rand::rng().random::<f32>() - 0.5, 1.)) |
1493 | 0 | }) |
1494 | | } |
1495 | 9 | } |
1496 | | } |