Coverage Report

Created: 2026-06-18 15:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/build/source/nativelink-config/src/stores.rs
Line
Count
Source
1
// Copyright 2024 The NativeLink Authors. All rights reserved.
2
//
3
// Licensed under the Functional Source License, Version 1.1, Apache 2.0 Future License (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//    See LICENSE file for details
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
use core::time::Duration;
16
use std::collections::HashMap;
17
use std::sync::Arc;
18
19
use rand::Rng;
20
#[cfg(feature = "dev-schema")]
21
use schemars::JsonSchema;
22
use serde::{Deserialize, Serialize};
23
24
use crate::serde_utils::{
25
    convert_boolean_with_shellexpand, convert_data_size_with_shellexpand,
26
    convert_duration_with_shellexpand, convert_numeric_with_shellexpand,
27
    convert_optional_data_size_with_shellexpand, convert_optional_numeric_with_shellexpand,
28
    convert_optional_string_with_shellexpand, convert_string_with_shellexpand,
29
    convert_vec_string_with_shellexpand,
30
};
31
32
/// Name of the store. This type will be used when referencing a store
33
/// in the `CasConfig::stores`'s map key.
34
pub type StoreRefName = String;
35
36
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
37
#[serde(rename_all = "snake_case")]
38
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
39
pub enum ConfigDigestHashFunction {
40
    /// Use the sha256 hash function.
41
    /// <https://en.wikipedia.org/wiki/SHA-2>
42
    Sha256,
43
44
    /// Use the blake3 hash function.
45
    /// <https://en.wikipedia.org/wiki/BLAKE_(hash_function)>
46
    Blake3,
47
}
48
49
#[derive(Serialize, Deserialize, Debug, Clone)]
50
#[serde(rename_all = "snake_case")]
51
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
52
pub enum StoreSpec {
53
    /// Cache metrics store wraps another store and emits low-cardinality
54
    /// OpenTelemetry cache operation metrics for the wrapped store.
55
    ///
56
    /// This wrapper is opt-in. Stores that are not explicitly wrapped by
57
    /// `cache_metrics` are constructed exactly as they are without this
58
    /// wrapper and do not pay its hot-path timing or recording cost.
59
    ///
60
    /// **Example JSON Config:**
61
    /// ```json
62
    /// "cache_metrics": {
63
    ///   "cache_type": "cas",
64
    ///   "backend": {
65
    ///     "filesystem": {
66
    ///       "content_path": "~/.cache/nativelink/content_path-cas",
67
    ///       "temp_path": "~/.cache/nativelink/tmp_path-cas"
68
    ///     }
69
    ///   }
70
    /// }
71
    /// ```
72
    ///
73
    CacheMetrics(Box<CacheMetricsSpec>),
74
75
    /// Memory store will store all data in a hashmap in memory.
76
    ///
77
    /// **Example JSON Config:**
78
    /// ```json
79
    /// "memory": {
80
    ///   "eviction_policy": {
81
    ///     "max_bytes": "10mb",
82
    ///   }
83
    /// }
84
    /// ```
85
    ///
86
    Memory(MemorySpec),
87
88
    /// A generic blob store that will store files on the cloud
89
    /// provider. This configuration will never delete files, so you are
90
    /// responsible for purging old files in other ways.
91
    /// It supports the following backends:
92
    ///
93
    /// 1. **Amazon S3:**
94
    ///    S3 store will use Amazon's S3 service as a backend to store
95
    ///    the files. This configuration can be used to share files
96
    ///    across multiple instances. Uses system certificates for TLS
97
    ///    verification via `rustls-platform-verifier`.
98
    ///
99
    ///   **Example JSON Config:**
100
    ///   ```json
101
    ///   "experimental_cloud_object_store": {
102
    ///     "provider": "aws",
103
    ///     "region": "eu-north-1",
104
    ///     "bucket": "crossplane-bucket-af79aeca9",
105
    ///     "key_prefix": "test-prefix-index/",
106
    ///     "retry": {
107
    ///       "max_retries": 6,
108
    ///       "delay": 0.3,
109
    ///       "jitter": 0.5
110
    ///     },
111
    ///     "multipart_max_concurrent_uploads": 10
112
    ///   }
113
    ///   ```
114
    ///
115
    /// 2. **Google Cloud Storage:**
116
    ///    GCS store uses Google's GCS service as a backend to store
117
    ///    the files. This configuration can be used to share files
118
    ///    across multiple instances.
119
    ///
120
    ///   **Example JSON Config:**
121
    ///   ```json
122
    ///   "experimental_cloud_object_store": {
123
    ///     "provider": "gcs",
124
    ///     "bucket": "test-bucket",
125
    ///     "key_prefix": "test-prefix-index/",
126
    ///     "retry": {
127
    ///       "max_retries": 6,
128
    ///       "delay": 0.3,
129
    ///       "jitter": 0.5
130
    ///     },
131
    ///     "multipart_max_concurrent_uploads": 10
132
    ///   }
133
    ///   ```
134
    ///
135
    /// 3. **Azure Blob Store:**
136
    ///    Azure Blob store will use Microsoft's Azure Blob service as a
137
    ///    backend to store the files. This configuration can be used to
138
    ///    share files across multiple instances.
139
    ///
140
    ///   **Example JSON Config:**
141
    ///   ```json
142
    ///   "experimental_cloud_object_store": {
143
    ///     "provider": "azure",
144
    ///     "account_name": "cloudshell1393657559",
145
    ///     "container": "simple-test-container",
146
    ///     "key_prefix": "folder/",
147
    ///     "retry": {
148
    ///         "max_retries": 6,
149
    ///         "delay": 0.3,
150
    ///         "jitter": 0.5
151
    ///     },
152
    ///     "multipart_max_concurrent_uploads": 10
153
    ///   }
154
    ///   ```
155
    ///
156
    /// 4. **`NetApp` ONTAP S3**
157
    ///    `NetApp` ONTAP S3 store will use ONTAP's S3-compatible storage as a backend
158
    ///    to store files. This store is specifically configured for ONTAP's S3 requirements
159
    ///    including custom TLS configuration, credentials management, and proper vserver
160
    ///    configuration.
161
    ///
162
    ///    This store uses AWS environment variables for credentials:
163
    ///    - `AWS_ACCESS_KEY_ID`
164
    ///    - `AWS_SECRET_ACCESS_KEY`
165
    ///    - `AWS_DEFAULT_REGION`
166
    ///
167
    ///    **Example JSON Config:**
168
    ///    ```json
169
    ///    "experimental_cloud_object_store": {
170
    ///      "provider": "ontap",
171
    ///      "endpoint": "https://ontap-s3-endpoint:443",
172
    ///      "vserver_name": "your-vserver",
173
    ///      "bucket": "your-bucket",
174
    ///      "root_certificates": "/path/to/certs.pem",  // Optional
175
    ///      "key_prefix": "test-prefix/",               // Optional
176
    ///      "retry": {
177
    ///        "max_retries": 6,
178
    ///        "delay": 0.3,
179
    ///        "jitter": 0.5
180
    ///      },
181
    ///      "multipart_max_concurrent_uploads": 10
182
    ///    }
183
    ///    ```
184
    ExperimentalCloudObjectStore(ExperimentalCloudObjectSpec),
185
186
    /// ONTAP S3 Existence Cache provides a caching layer on top of the ONTAP S3 store
187
    /// to optimize repeated existence checks. It maintains an in-memory cache of object
188
    /// digests and periodically syncs this cache to disk for persistence.
189
    ///
190
    /// The cache helps reduce latency for repeated calls to check object existence,
191
    /// while still ensuring eventual consistency with the underlying ONTAP S3 store.
192
    ///
193
    /// Example JSON Config:
194
    /// ```json
195
    /// "ontap_s3_existence_cache": {
196
    ///   "index_path": "/path/to/cache/index.json",
197
    ///   "sync_interval_seconds": 300,
198
    ///   "backend": {
199
    ///     "endpoint": "https://ontap-s3-endpoint:443",
200
    ///     "vserver_name": "your-vserver",
201
    ///     "bucket": "your-bucket",
202
    ///     "key_prefix": "test-prefix/"
203
    ///   }
204
    /// }
205
    /// ```
206
    ///
207
    OntapS3ExistenceCache(Box<OntapS3ExistenceCacheSpec>),
208
209
    /// Verify store is used to apply verifications to an underlying
210
    /// store implementation. It is strongly encouraged to validate
211
    /// as much data as you can before accepting data from a client,
212
    /// failing to do so may cause the data in the store to be
213
    /// populated with invalid data causing all kinds of problems.
214
    ///
215
    /// The suggested configuration is to have the CAS validate the
216
    /// hash and size and the AC validate nothing.
217
    ///
218
    /// **Example JSON Config:**
219
    /// ```json
220
    /// "verify": {
221
    ///   "backend": {
222
    ///     "memory": {
223
    ///       "eviction_policy": {
224
    ///         "max_bytes": "500mb"
225
    ///       }
226
    ///     },
227
    ///   },
228
    ///   "verify_size": true,
229
    ///   "verify_hash": true
230
    /// }
231
    /// ```
232
    ///
233
    Verify(Box<VerifySpec>),
234
235
    /// Completeness checking store verifies if the
236
    /// output files & folders exist in the CAS before forwarding
237
    /// the request to the underlying store.
238
    /// Note: This store should only be used on AC stores.
239
    ///
240
    /// **Example JSON Config:**
241
    /// ```json
242
    /// "completeness_checking": {
243
    ///   "backend": {
244
    ///     "filesystem": {
245
    ///       "content_path": "~/.cache/nativelink/content_path-ac",
246
    ///       "temp_path": "~/.cache/nativelink/tmp_path-ac",
247
    ///       "eviction_policy": {
248
    ///         "max_bytes": "500mb",
249
    ///       }
250
    ///     }
251
    ///   },
252
    ///   "cas_store": {
253
    ///     "ref_store": {
254
    ///       "name": "CAS_MAIN_STORE"
255
    ///     }
256
    ///   }
257
    /// }
258
    /// ```
259
    ///
260
    CompletenessChecking(Box<CompletenessCheckingSpec>),
261
262
    /// A compression store that will compress the data inbound and
263
    /// outbound. There will be a non-trivial cost to compress and
264
    /// decompress the data, but in many cases if the final store is
265
    /// a store that requires network transport and/or storage space
266
    /// is a concern it is often faster and more efficient to use this
267
    /// store before those stores.
268
    ///
269
    /// **Example JSON Config:**
270
    /// ```json
271
    /// "compression": {
272
    ///   "compression_algorithm": {
273
    ///     "lz4": {}
274
    ///   },
275
    ///   "backend": {
276
    ///     "filesystem": {
277
    ///       "content_path": "/tmp/nativelink/data/content_path-cas",
278
    ///       "temp_path": "/tmp/nativelink/data/tmp_path-cas",
279
    ///       "eviction_policy": {
280
    ///         "max_bytes": "2gb",
281
    ///       }
282
    ///     }
283
    ///   }
284
    /// }
285
    /// ```
286
    ///
287
    Compression(Box<CompressionSpec>),
288
289
    /// A dedup store will take the inputs and run a rolling hash
290
    /// algorithm on them to slice the input into smaller parts then
291
    /// run a sha256 algorithm on the slice and if the object doesn't
292
    /// already exist, upload the slice to the `content_store` using
293
    /// a new digest of just the slice. Once all parts exist, an
294
    /// Action-Cache-like digest will be built and uploaded to the
295
    /// `index_store` which will contain a reference to each
296
    /// chunk/digest of the uploaded file. Downloading a request will
297
    /// first grab the index from the `index_store`, and forward the
298
    /// download content of each chunk as if it were one file.
299
    ///
300
    /// This store is exceptionally good when the following conditions
301
    /// are met:
302
    /// * Content is mostly the same (inserts, updates, deletes are ok)
303
    /// * Content is not compressed or encrypted
304
    /// * Uploading or downloading from `content_store` is the bottleneck.
305
    ///
306
    /// Note: This store pairs well when used with `CompressionSpec` as
307
    /// the `content_store`, but never put `DedupSpec` as the backend of
308
    /// `CompressionSpec` as it will negate all the gains.
309
    ///
310
    /// Note: When running `.has()` on this store, it will only check
311
    /// to see if the entry exists in the `index_store` and not check
312
    /// if the individual chunks exist in the `content_store`.
313
    ///
314
    /// **Example JSON Config:**
315
    /// ```json
316
    /// "dedup": {
317
    ///   "index_store": {
318
    ///     "memory": {
319
    ///       "eviction_policy": {
320
    ///          "max_bytes": "1GB",
321
    ///       }
322
    ///     }
323
    ///   },
324
    ///   "content_store": {
325
    ///     "compression": {
326
    ///       "compression_algorithm": {
327
    ///         "lz4": {}
328
    ///       },
329
    ///       "backend": {
330
    ///         "fast_slow": {
331
    ///           "fast": {
332
    ///             "memory": {
333
    ///               "eviction_policy": {
334
    ///                 "max_bytes": "500MB",
335
    ///               }
336
    ///             }
337
    ///           },
338
    ///           "slow": {
339
    ///             "filesystem": {
340
    ///               "content_path": "/tmp/nativelink/data/content_path-content",
341
    ///               "temp_path": "/tmp/nativelink/data/tmp_path-content",
342
    ///               "eviction_policy": {
343
    ///                 "max_bytes": "2gb"
344
    ///               }
345
    ///             }
346
    ///           }
347
    ///         }
348
    ///       }
349
    ///     }
350
    ///   }
351
    /// }
352
    /// ```
353
    ///
354
    Dedup(Box<DedupSpec>),
355
356
    /// Existence store will wrap around another store and cache calls
357
    /// to has so that subsequent `has_with_results` calls will be
358
    /// faster. This is useful for cases when you have a store that
359
    /// is slow to respond to has calls.
360
    /// Note: This store should only be used on CAS stores.
361
    ///
362
    /// **Example JSON Config:**
363
    /// ```json
364
    /// "existence_cache": {
365
    ///   "backend": {
366
    ///     "memory": {
367
    ///       "eviction_policy": {
368
    ///         "max_bytes": "500mb",
369
    ///       }
370
    ///     }
371
    ///   },
372
    ///   // Note this is the existence store policy, not the backend policy
373
    ///   "eviction_policy": {
374
    ///     "max_seconds": 100,
375
    ///   }
376
    /// }
377
    /// ```
378
    ///
379
    ExistenceCache(Box<ExistenceCacheSpec>),
380
381
    /// `FastSlow` store will first try to fetch the data from the `fast`
382
    /// store and then if it does not exist try the `slow` store.
383
    /// When the object does exist in the `slow` store, it will copy
384
    /// the data to the `fast` store while returning the data.
385
    /// This store should be thought of as a store that "buffers"
386
    /// the data to the `fast` store.
387
    /// On uploads it will mirror data to both `fast` and `slow` stores.
388
    ///
389
    /// WARNING: If you need data to always exist in the `slow` store
390
    /// for something like remote execution, be careful because this
391
    /// store will never check to see if the objects exist in the
392
    /// `slow` store if it exists in the `fast` store (ie: it assumes
393
    /// that if an object exists in the `fast` store it will exist in
394
    /// the `slow` store).
395
    ///
396
    /// ***Example JSON Config:***
397
    /// ```json
398
    /// "fast_slow": {
399
    ///   "fast": {
400
    ///     "filesystem": {
401
    ///       "content_path": "/tmp/nativelink/data/content_path-index",
402
    ///       "temp_path": "/tmp/nativelink/data/tmp_path-index",
403
    ///       "eviction_policy": {
404
    ///         "max_bytes": "500mb",
405
    ///       }
406
    ///     }
407
    ///   },
408
    ///   "slow": {
409
    ///     "filesystem": {
410
    ///       "content_path": "/tmp/nativelink/data/content_path-index",
411
    ///       "temp_path": "/tmp/nativelink/data/tmp_path-index",
412
    ///       "eviction_policy": {
413
    ///         "max_bytes": "500mb",
414
    ///       }
415
    ///     }
416
    ///   }
417
    /// }
418
    /// ```
419
    ///
420
    FastSlow(Box<FastSlowSpec>),
421
422
    /// Shards the data to multiple stores. This is useful for cases
423
    /// when you want to distribute the load across multiple stores.
424
    /// The digest hash is used to determine which store to send the
425
    /// data to.
426
    ///
427
    /// **Example JSON Config:**
428
    /// ```json
429
    /// "shard": {
430
    ///   "stores": [
431
    ///    {
432
    ///     "store": {
433
    ///       "memory": {
434
    ///         "eviction_policy": {
435
    ///             "max_bytes": "10mb"
436
    ///         },
437
    ///       },
438
    ///     },
439
    ///     "weight": 1
440
    ///   }]
441
    /// }
442
    /// ```
443
    ///
444
    Shard(ShardSpec),
445
446
    /// Stores the data on the filesystem. This store is designed for
447
    /// local persistent storage. Restarts of this program should restore
448
    /// the previous state, meaning anything uploaded will be persistent
449
    /// as long as the filesystem integrity holds.
450
    ///
451
    /// **Example JSON Config:**
452
    /// ```json
453
    /// "filesystem": {
454
    ///   "content_path": "/tmp/nativelink/data-worker-test/content_path-cas",
455
    ///   "temp_path": "/tmp/nativelink/data-worker-test/tmp_path-cas",
456
    ///   "eviction_policy": {
457
    ///     "max_bytes": "10gb",
458
    ///   }
459
    /// }
460
    /// ```
461
    ///
462
    Filesystem(FilesystemSpec),
463
464
    /// Store used to reference a store in the root store manager.
465
    /// This is useful for cases when you want to share a store in different
466
    /// nested stores. Example, you may want to share the same memory store
467
    /// used for the action cache, but use a `FastSlowSpec` and have the fast
468
    /// store also share the memory store for efficiency.
469
    ///
470
    /// **Example JSON Config:**
471
    /// ```json
472
    /// "ref_store": {
473
    ///   "name": "FS_CONTENT_STORE"
474
    /// }
475
    /// ```
476
    ///
477
    RefStore(RefSpec),
478
479
    /// Uses the size field of the digest to separate which store to send the
480
    /// data. This is useful for cases when you'd like to put small objects
481
    /// in one store and large objects in another store. This should only be
482
    /// used if the size field is the real size of the content, in other
483
    /// words, don't use on AC (Action Cache) stores. Any store where you can
484
    /// safely use `VerifySpec.verify_size = true`, this store should be safe
485
    /// to use (ie: CAS stores).
486
    ///
487
    /// **Example JSON Config:**
488
    /// ```json
489
    /// "size_partitioning": {
490
    ///   "size": "128mib",
491
    ///   "lower_store": {
492
    ///     "memory": {
493
    ///       "eviction_policy": {
494
    ///         "max_bytes": "${NATIVELINK_CAS_MEMORY_CONTENT_LIMIT:-100mb}"
495
    ///       }
496
    ///     }
497
    ///   },
498
    ///   "upper_store": {
499
    ///     /// This store discards data larger than 128mib.
500
    ///     "noop": {}
501
    ///   }
502
    /// }
503
    /// ```
504
    ///
505
    SizePartitioning(Box<SizePartitioningSpec>),
506
507
    /// This store will pass-through calls to another GRPC store. This store
508
    /// is not designed to be used as a sub-store of another store, but it
509
    /// does satisfy the interface and will likely work.
510
    ///
511
    /// One major GOTCHA is that some stores use a special function on this
512
    /// store to get the size of the underlying object, which is only reliable
513
    /// when this store is serving the a CAS store, not an AC store. If using
514
    /// this store directly without being a child of any store there are no
515
    /// side effects and is the most efficient way to use it.
516
    ///
517
    /// **Example JSON Config:**
518
    /// ```json
519
    /// "grpc": {
520
    ///   "instance_name": "main",
521
    ///   "endpoints": [
522
    ///     {"address": "grpc://${CAS_ENDPOINT:-127.0.0.1}:50051"}
523
    ///   ],
524
    ///   "connections_per_endpoint": "5",
525
    ///   "rpc_timeout_s": "5m",
526
    ///   "store_type": "ac",
527
    ///   // Static headers attached to every outgoing request to the upstream
528
    ///   // remote cache. Useful for fixed service-account credentials.
529
    ///   "headers": {
530
    ///     "authorization": "Bearer my-static-token"
531
    ///   },
532
    ///   // Header names to copy from the inbound client request and forward to
533
    ///   // the upstream remote cache. Use this to pass through dynamic
534
    ///   // credentials such as a JWT sent by the build client.
535
    ///   "forward_headers": ["authorization", "x-custom-token"]
536
    /// }
537
    /// ```
538
    ///
539
    Grpc(GrpcSpec),
540
541
    /// Stores data in any stores compatible with Redis APIs.
542
    ///
543
    /// Pairs well with `SizePartitioning` and/or `FastSlow` stores.
544
    /// Ideal for accepting small object sizes as most redis store
545
    /// services have a max file upload of between 256Mb-512Mb.
546
    ///
547
    /// **Example JSON Config:**
548
    /// ```json
549
    /// "redis_store": {
550
    ///   "addresses": [
551
    ///     "redis://127.0.0.1:6379/",
552
    ///   ],
553
    ///   "max_client_permits": 1000,
554
    /// }
555
    /// ```
556
    ///
557
    RedisStore(RedisSpec),
558
559
    /// Noop store is a store that sends streams into the void and all data
560
    /// retrieval will return 404 (`NotFound`). This can be useful for cases
561
    /// where you may need to partition your data and part of your data needs
562
    /// to be discarded.
563
    ///
564
    /// **Example JSON Config:**
565
    /// ```json
566
    /// "noop": {}
567
    /// ```
568
    ///
569
    Noop(NoopSpec),
570
571
    /// Experimental `MongoDB` store implementation.
572
    ///
573
    /// This store uses `MongoDB` as a backend for storing data. It supports
574
    /// both CAS (Content Addressable Storage) and scheduler data with
575
    /// optional change streams for real-time updates.
576
    ///
577
    /// **Example JSON Config:**
578
    /// ```json
579
    /// "experimental_mongo": {
580
    ///     "connection_string": "mongodb://localhost:27017",
581
    ///     "database": "nativelink",
582
    ///     "cas_collection": "cas",
583
    ///     "key_prefix": "cas:",
584
    ///     "read_chunk_size": 65536,
585
    ///     "max_concurrent_uploads": 10,
586
    ///     "enable_change_streams": false,
587
    ///     "max_requests": "100"
588
    /// }
589
    /// ```
590
    ///
591
    ExperimentalMongo(ExperimentalMongoSpec),
592
}
593
594
/// Configuration for an individual shard of the store.
595
#[derive(Serialize, Deserialize, Debug, Clone)]
596
#[serde(deny_unknown_fields)]
597
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
598
pub struct ShardConfig {
599
    /// Store to shard the data to.
600
    pub store: StoreSpec,
601
602
    /// The weight of the store. This is used to determine how much data
603
    /// should be sent to the store. The actual percentage is the sum of
604
    /// all the store's weights divided by the individual store's weight.
605
    ///
606
    /// Default: 1
607
    #[serde(deserialize_with = "convert_optional_numeric_with_shellexpand")]
608
    pub weight: Option<u32>,
609
}
610
611
#[derive(Serialize, Deserialize, Debug, Clone)]
612
#[serde(deny_unknown_fields)]
613
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
614
pub struct ShardSpec {
615
    /// Stores to shard the data to.
616
    pub stores: Vec<ShardConfig>,
617
}
618
619
#[derive(Serialize, Deserialize, Debug, Clone)]
620
#[serde(deny_unknown_fields)]
621
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
622
pub struct CacheMetricsSpec {
623
    /// Low-cardinality cache type label for metrics, for example `cas` or `ac`.
624
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
625
    pub cache_type: String,
626
627
    /// Store to wrap with cache operation metrics.
628
    pub backend: StoreSpec,
629
}
630
631
#[derive(Serialize, Deserialize, Debug, Clone)]
632
#[serde(deny_unknown_fields)]
633
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
634
pub struct SizePartitioningSpec {
635
    /// Size to partition the data on.
636
    #[serde(deserialize_with = "convert_data_size_with_shellexpand")]
637
    pub size: u64,
638
639
    /// Store to send data when object is < (less than) size.
640
    pub lower_store: StoreSpec,
641
642
    /// Store to send data when object is >= (less than eq) size.
643
    pub upper_store: StoreSpec,
644
}
645
646
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
647
#[serde(deny_unknown_fields)]
648
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
649
pub struct RefSpec {
650
    /// Name of the store under the root "stores" config object.
651
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
652
    pub name: String,
653
}
654
655
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
656
#[serde(deny_unknown_fields)]
657
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
658
pub struct FilesystemSpec {
659
    /// Path on the system where to store the actual content. This is where
660
    /// the bulk of the data will be placed.
661
    /// On service bootup this folder will be scanned and all files will be
662
    /// added to the cache. In the event one of the files doesn't match the
663
    /// criteria, the file will be deleted.
664
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
665
    pub content_path: String,
666
667
    /// A temporary location of where files that are being uploaded or
668
    /// deleted will be placed while the content cannot be guaranteed to be
669
    /// accurate. This location must be on the same block device as
670
    /// `content_path` so atomic moves can happen (ie: move without copy).
671
    /// All files in this folder will be deleted on every startup.
672
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
673
    pub temp_path: String,
674
675
    /// Buffer size to use when reading files. Generally this should be left
676
    /// to the default value except for testing.
677
    /// Default: 32k.
678
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
679
    pub read_buffer_size: u32,
680
681
    /// Policy used to evict items out of the store. Failure to set this
682
    /// value will cause items to never be removed from the store causing
683
    /// infinite memory usage.
684
    pub eviction_policy: Option<EvictionPolicy>,
685
686
    /// The block size of the filesystem for the running machine
687
    /// value is used to determine an entry's actual size on disk consumed
688
    /// For a 4KB block size filesystem, a 1B file actually consumes 4KB
689
    /// Default: 4kb
690
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
691
    pub block_size: u64,
692
693
    /// Maximum number of concurrent write operations allowed.
694
    /// Each write involves streaming data to a temp file and calling `sync_all()`,
695
    /// which can saturate disk I/O when many writes happen simultaneously.
696
    /// Limiting concurrency prevents disk saturation from blocking the async
697
    /// runtime.
698
    /// A value of 0 means unlimited (no concurrency limit).
699
    /// Default: unlimited
700
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
701
    pub max_concurrent_writes: usize,
702
}
703
704
// NetApp ONTAP S3 Spec
705
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
706
#[serde(deny_unknown_fields)]
707
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
708
pub struct ExperimentalOntapS3Spec {
709
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
710
    pub endpoint: String,
711
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
712
    pub vserver_name: String,
713
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
714
    pub bucket: String,
715
    #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")]
716
    pub root_certificates: Option<String>,
717
718
    /// Common retry and upload configuration
719
    #[serde(flatten)]
720
    pub common: CommonObjectSpec,
721
}
722
723
// Cloudflare R2 Spec
724
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
725
#[serde(deny_unknown_fields)]
726
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
727
pub struct ExperimentalR2Spec {
728
    /// Cloudflare account ID. Endpoint is derived as
729
    /// `https://{account_id}.r2.cloudflarestorage.com`.
730
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
731
    pub account_id: String,
732
733
    /// Bucket name to use as the backend.
734
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
735
    pub bucket: String,
736
737
    /// Explicit R2 access key.
738
    #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")]
739
    pub access_key_id: Option<String>,
740
741
    /// Explicit R2 secret key.
742
    #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")]
743
    pub secret_access_key: Option<String>,
744
745
    /// Retry and upload settings.
746
    #[serde(flatten)]
747
    pub common: CommonObjectSpec,
748
}
749
750
#[derive(Serialize, Deserialize, Debug, Clone)]
751
#[serde(deny_unknown_fields)]
752
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
753
pub struct OntapS3ExistenceCacheSpec {
754
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
755
    pub index_path: String,
756
    #[serde(deserialize_with = "convert_numeric_with_shellexpand")]
757
    pub sync_interval_seconds: u32,
758
    pub backend: Box<ExperimentalOntapS3Spec>,
759
}
760
761
#[derive(Serialize, Deserialize, Default, Debug, Clone, Copy, PartialEq, Eq)]
762
#[serde(rename_all = "snake_case")]
763
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
764
pub enum StoreDirection {
765
    /// The store operates normally and all get and put operations are
766
    /// handled by it.
767
    #[default]
768
    Both,
769
    /// Update operations will cause persistence to this store, but Get
770
    /// operations will be ignored.
771
    /// This only makes sense on the fast store as the slow store will
772
    /// never get written to on Get anyway.
773
    Update,
774
    /// Get operations will cause persistence to this store, but Update
775
    /// operations will be ignored.
776
    Get,
777
    /// Operate as a read only store, only really makes sense if there's
778
    /// another way to write to it.
779
    ReadOnly,
780
}
781
782
#[derive(Serialize, Deserialize, Debug, Clone)]
783
#[serde(deny_unknown_fields)]
784
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
785
pub struct FastSlowSpec {
786
    /// Fast store that will be attempted to be contacted before reaching
787
    /// out to the `slow` store.
788
    pub fast: StoreSpec,
789
790
    /// How to handle the fast store.  This can be useful to set to Get for
791
    /// worker nodes such that results are persisted to the slow store only.
792
    #[serde(default)]
793
    pub fast_direction: StoreDirection,
794
795
    /// If the object does not exist in the `fast` store it will try to
796
    /// get it from this store.
797
    pub slow: StoreSpec,
798
799
    /// How to handle the slow store.  This can be useful if creating a diode
800
    /// and you wish to have an upstream read only store.
801
    #[serde(default)]
802
    pub slow_direction: StoreDirection,
803
804
    /// Reads of blobs at or above this size skip the leader/follower dedup
805
    /// map and stream straight from the slow store without populating the
806
    /// fast tier. `0` (the default) disables the bypass: every read goes
807
    /// through dedup, matching the prior behaviour. Enable it by setting a
808
    /// threshold — 256 MiB is a reasonable starting point for backends where
809
    /// large-blob dedup is a net loss (followers tend to time out anyway),
810
    /// but the right value is workload-dependent.
811
    /// Default: disabled (0)
812
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
813
    pub bypass_dedup_threshold_bytes: u64,
814
}
815
816
#[derive(Serialize, Deserialize, Debug, Default, Clone, Copy)]
817
#[serde(deny_unknown_fields)]
818
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
819
pub struct MemorySpec {
820
    /// Policy used to evict items out of the store. Failure to set this
821
    /// value will cause items to never be removed from the store causing
822
    /// infinite memory usage.
823
    pub eviction_policy: Option<EvictionPolicy>,
824
}
825
826
#[derive(Serialize, Deserialize, Debug, Clone)]
827
#[serde(deny_unknown_fields)]
828
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
829
pub struct DedupSpec {
830
    /// Store used to store the index of each dedup slice. This store
831
    /// should generally be fast and small.
832
    pub index_store: StoreSpec,
833
834
    /// The store where the individual chunks will be uploaded. This
835
    /// store should generally be the slower & larger store.
836
    pub content_store: StoreSpec,
837
838
    /// Minimum size that a chunk will be when slicing up the content.
839
    /// Note: This setting can be increased to improve performance
840
    /// because it will actually not check this number of bytes when
841
    /// deciding where to partition the data.
842
    ///
843
    /// Default: 64k
844
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
845
    pub min_size: u32,
846
847
    /// A best-effort attempt will be made to keep the average size
848
    /// of the chunks to this number. It is not a guarantee, but a
849
    /// slight attempt will be made.
850
    ///
851
    /// This value will also be about the threshold used to determine
852
    /// if we should even attempt to dedup the entry or just forward
853
    /// it directly to the `content_store` without an index. The actual
854
    /// value will be about `normal_size * 1.3` due to implementation
855
    /// details.
856
    ///
857
    /// Default: 256k
858
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
859
    pub normal_size: u32,
860
861
    /// Maximum size a chunk is allowed to be.
862
    ///
863
    /// Default: 512k
864
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
865
    pub max_size: u32,
866
867
    /// Due to implementation detail, we want to prefer to download
868
    /// the first chunks of the file so we can stream the content
869
    /// out and free up some of our buffers. This configuration
870
    /// will be used to to restrict the number of concurrent chunk
871
    /// downloads at a time per `get()` request.
872
    ///
873
    /// This setting will also affect how much memory might be used
874
    /// per `get()` request. Estimated worst case memory per `get()`
875
    /// request is: `max_concurrent_fetch_per_get * max_size`.
876
    ///
877
    /// Default: 10
878
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
879
    pub max_concurrent_fetch_per_get: u32,
880
}
881
882
#[derive(Serialize, Deserialize, Debug, Clone)]
883
#[serde(deny_unknown_fields)]
884
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
885
pub struct ExistenceCacheSpec {
886
    /// The underlying store wrap around. All content will first flow
887
    /// through self before forwarding to backend. In the event there
888
    /// is an error detected in self, the connection to the backend
889
    /// will be terminated, and early termination should always cause
890
    /// updates to fail on the backend.
891
    pub backend: StoreSpec,
892
893
    /// Policy used to evict items out of the store. Failure to set this
894
    /// value will cause items to never be removed from the store causing
895
    /// infinite memory usage.
896
    pub eviction_policy: Option<EvictionPolicy>,
897
}
898
899
#[derive(Serialize, Deserialize, Debug, Clone)]
900
#[serde(deny_unknown_fields)]
901
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
902
pub struct VerifySpec {
903
    /// The underlying store wrap around. All content will first flow
904
    /// through self before forwarding to backend. In the event there
905
    /// is an error detected in self, the connection to the backend
906
    /// will be terminated, and early termination should always cause
907
    /// updates to fail on the backend.
908
    pub backend: StoreSpec,
909
910
    /// If set the store will verify the size of the data before accepting
911
    /// an upload of data.
912
    ///
913
    /// This should be set to false for AC, but true for CAS stores.
914
    #[serde(default, deserialize_with = "convert_boolean_with_shellexpand")]
915
    pub verify_size: bool,
916
917
    /// If the data should be hashed and verify that the key matches the
918
    /// computed hash. The hash function is automatically determined based
919
    /// request and if not set will use the global default.
920
    ///
921
    /// This should be set to false for AC, but true for CAS stores.
922
    #[serde(default, deserialize_with = "convert_boolean_with_shellexpand")]
923
    pub verify_hash: bool,
924
}
925
926
#[derive(Serialize, Deserialize, Debug, Clone)]
927
#[serde(deny_unknown_fields)]
928
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
929
pub struct CompletenessCheckingSpec {
930
    /// The underlying store that will have it's results validated before sending to client.
931
    pub backend: StoreSpec,
932
933
    /// When a request is made, the results are decoded and all output digests/files are verified
934
    /// to exist in this CAS store before returning success.
935
    pub cas_store: StoreSpec,
936
}
937
938
#[derive(Serialize, Deserialize, Debug, Default, PartialEq, Eq, Clone, Copy)]
939
#[serde(deny_unknown_fields)]
940
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
941
pub struct Lz4Config {
942
    /// Size of the blocks to compress.
943
    /// Higher values require more ram, but might yield slightly better
944
    /// compression ratios.
945
    ///
946
    /// Default: 65536 (64k).
947
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
948
    pub block_size: u32,
949
950
    /// Maximum size allowed to attempt to deserialize data into.
951
    /// This is needed because the `block_size` is embedded into the data
952
    /// so if there was a bad actor, they could upload an extremely large
953
    /// `block_size`'ed entry and we'd allocate a large amount of memory
954
    /// when retrieving the data. To prevent this from happening, we
955
    /// allow you to specify the maximum that we'll attempt to deserialize.
956
    ///
957
    /// Default: value in `block_size`.
958
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
959
    pub max_decode_block_size: u32,
960
}
961
962
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone, Copy)]
963
#[serde(rename_all = "snake_case")]
964
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
965
pub enum CompressionAlgorithm {
966
    /// LZ4 compression algorithm is extremely fast for compression and
967
    /// decompression, however does not perform very well in compression
968
    /// ratio. In most cases build artifacts are highly compressible, however
969
    /// lz4 is quite good at aborting early if the data is not deemed very
970
    /// compressible.
971
    ///
972
    /// see: <https://lz4.github.io/lz4/>
973
    Lz4(Lz4Config),
974
}
975
976
#[derive(Serialize, Deserialize, Debug, Clone)]
977
#[serde(deny_unknown_fields)]
978
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
979
pub struct CompressionSpec {
980
    /// The underlying store wrap around. All content will first flow
981
    /// through self before forwarding to backend. In the event there
982
    /// is an error detected in self, the connection to the backend
983
    /// will be terminated, and early termination should always cause
984
    /// updates to fail on the backend.
985
    pub backend: StoreSpec,
986
987
    /// The compression algorithm to use.
988
    pub compression_algorithm: CompressionAlgorithm,
989
}
990
991
/// Eviction policy always works on LRU (Least Recently Used). Any time an entry
992
/// is touched it updates the timestamp. Inserts and updates will execute the
993
/// eviction policy removing any expired entries and/or the oldest entries
994
/// until the store size becomes smaller than `max_bytes`.
995
#[derive(Serialize, Deserialize, Debug, Default, Clone, Copy)]
996
#[serde(deny_unknown_fields)]
997
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
998
pub struct EvictionPolicy {
999
    /// Maximum number of bytes before eviction takes place.
1000
    /// Default: 0. Zero means never evict based on size.
1001
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
1002
    pub max_bytes: usize,
1003
1004
    /// When eviction starts based on hitting `max_bytes`, continue until
1005
    /// `max_bytes - evict_bytes` is met to create a low watermark.  This stops
1006
    /// operations from thrashing when the store is close to the limit.
1007
    /// Default: 0
1008
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
1009
    pub evict_bytes: usize,
1010
1011
    /// Maximum number of seconds for an entry to live since it was last
1012
    /// accessed before it is evicted.
1013
    /// Default: 0. Zero means never evict based on time.
1014
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
1015
    pub max_seconds: u32,
1016
1017
    /// Maximum size of the store before an eviction takes place.
1018
    /// Default: 0. Zero means never evict based on count.
1019
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1020
    pub max_count: u64,
1021
}
1022
1023
#[derive(Serialize, Deserialize, Debug, Clone)]
1024
#[serde(tag = "provider", rename_all = "snake_case")]
1025
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1026
pub enum ExperimentalCloudObjectSpec {
1027
    Aws(ExperimentalAwsSpec),
1028
    Gcs(ExperimentalGcsSpec),
1029
    Azure(ExperimentalAzureSpec),
1030
    Ontap(ExperimentalOntapS3Spec),
1031
    R2(ExperimentalR2Spec),
1032
}
1033
1034
impl Default for ExperimentalCloudObjectSpec {
1035
0
    fn default() -> Self {
1036
0
        Self::Aws(ExperimentalAwsSpec::default())
1037
0
    }
1038
}
1039
1040
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
1041
#[serde(deny_unknown_fields)]
1042
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1043
pub struct ExperimentalAwsSpec {
1044
    /// S3 region. Usually us-east-1, us-west-2, af-south-1, exc...
1045
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
1046
    pub region: String,
1047
1048
    /// Bucket name to use as the backend.
1049
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
1050
    pub bucket: String,
1051
1052
    /// Common retry and upload configuration
1053
    #[serde(flatten)]
1054
    pub common: CommonObjectSpec,
1055
}
1056
1057
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
1058
#[serde(deny_unknown_fields)]
1059
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1060
pub struct ExperimentalGcsSpec {
1061
    /// Bucket name to use as the backend.
1062
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
1063
    pub bucket: String,
1064
1065
    /// Chunk size for resumable uploads.
1066
    ///
1067
    /// Default: 2MB
1068
    #[serde(
1069
        default,
1070
        deserialize_with = "convert_optional_data_size_with_shellexpand"
1071
    )]
1072
    pub resumable_chunk_size: Option<usize>,
1073
1074
    /// Common retry and upload configuration
1075
    #[serde(flatten)]
1076
    pub common: CommonObjectSpec,
1077
1078
    /// Error if authentication was not found.
1079
    #[serde(default, deserialize_with = "convert_boolean_with_shellexpand")]
1080
    pub authentication_required: bool,
1081
1082
    /// Connection timeout in milliseconds.
1083
    /// Default: 3000
1084
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
1085
    pub connection_timeout_s: u64,
1086
1087
    /// Read timeout in milliseconds.
1088
    /// Default: 3000
1089
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
1090
    pub read_timeout_s: u64,
1091
}
1092
1093
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
1094
#[serde(deny_unknown_fields)]
1095
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1096
pub struct ExperimentalAzureSpec {
1097
    /// The Azure Storage account name.
1098
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
1099
    pub account_name: String,
1100
1101
    /// The container name to use as the backend.
1102
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
1103
    pub container: String,
1104
1105
    /// Common retry and upload configuration.
1106
    #[serde(flatten)]
1107
    pub common: CommonObjectSpec,
1108
1109
    /// Connection timeout in milliseconds.
1110
    /// Default: 3000
1111
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
1112
    pub connection_timeout_s: u64,
1113
1114
    /// Read timeout in milliseconds.
1115
    /// Default: 3000
1116
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
1117
    pub read_timeout_s: u64,
1118
}
1119
1120
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
1121
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1122
pub struct CommonObjectSpec {
1123
    /// If you wish to prefix the location in the bucket. If None, no prefix will be used.
1124
    #[serde(default)]
1125
    pub key_prefix: Option<String>,
1126
1127
    /// Retry configuration to use when a network request fails.
1128
    #[serde(default)]
1129
    pub retry: Retry,
1130
1131
    /// If the number of seconds since the `last_modified` time of the object
1132
    /// is greater than this value, the object will not be considered
1133
    /// "existing". This allows for external tools to delete objects that
1134
    /// have not been uploaded in a long time. If a client receives a `NotFound`
1135
    /// the client should re-upload the object.
1136
    ///
1137
    /// There should be sufficient buffer time between how long the expiration
1138
    /// configuration of the external tool is and this value. Keeping items
1139
    /// around for a few days is generally a good idea.
1140
    ///
1141
    /// Default: 0. Zero means never consider an object expired.
1142
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
1143
    pub consider_expired_after_s: u32,
1144
1145
    /// The maximum buffer size to retain in case of a retryable error
1146
    /// during upload. Setting this to zero will disable upload buffering;
1147
    /// this means that in the event of a failure during upload, the entire
1148
    /// upload will be aborted and the client will likely receive an error.
1149
    ///
1150
    /// Default: 5MB.
1151
    #[serde(
1152
        default,
1153
        deserialize_with = "convert_optional_data_size_with_shellexpand"
1154
    )]
1155
    pub max_retry_buffer_per_request: Option<usize>,
1156
1157
    /// Maximum number of concurrent `UploadPart` requests per `MultipartUpload`.
1158
    ///
1159
    /// Default: 10.
1160
    ///
1161
    #[serde(
1162
        default,
1163
        deserialize_with = "convert_optional_numeric_with_shellexpand"
1164
    )]
1165
    pub multipart_max_concurrent_uploads: Option<usize>,
1166
1167
    /// Allow unencrypted HTTP connections. Only use this for local testing.
1168
    ///
1169
    /// Default: false
1170
    #[serde(default, deserialize_with = "convert_boolean_with_shellexpand")]
1171
    pub insecure_allow_http: bool,
1172
1173
    /// Disable http/2 connections and only use http/1.1. Default client
1174
    /// configuration will have http/1.1 and http/2 enabled for connection
1175
    /// schemes. Http/2 should be disabled if environments have poor support
1176
    /// or performance related to http/2. Safe to keep default unless
1177
    /// underlying network environment, S3, or GCS API servers specify otherwise.
1178
    ///
1179
    /// Default: false
1180
    #[serde(default, deserialize_with = "convert_boolean_with_shellexpand")]
1181
    pub disable_http2: bool,
1182
}
1183
1184
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
1185
#[serde(rename_all = "snake_case")]
1186
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1187
pub enum StoreType {
1188
    /// The store is content addressable storage.
1189
    Cas,
1190
    /// The store is an action cache.
1191
    Ac,
1192
}
1193
1194
#[derive(Serialize, Deserialize, Debug, Clone)]
1195
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1196
pub struct ClientTlsConfig {
1197
    /// Path to the certificate authority to use to validate the remote.
1198
    ///
1199
    /// Default: None
1200
    #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")]
1201
    pub ca_file: Option<String>,
1202
1203
    /// Path to the certificate file for client authentication.
1204
    ///
1205
    /// Default: None
1206
    #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")]
1207
    pub cert_file: Option<String>,
1208
1209
    /// Path to the private key file for client authentication.
1210
    ///
1211
    /// Default: None
1212
    #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")]
1213
    pub key_file: Option<String>,
1214
1215
    /// If set the client will use the native roots for TLS connections.
1216
    ///
1217
    /// Default: false
1218
    #[serde(default)]
1219
    pub use_native_roots: Option<bool>,
1220
}
1221
1222
#[derive(Serialize, Deserialize, Debug, Clone)]
1223
#[serde(deny_unknown_fields)]
1224
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1225
pub struct GrpcEndpoint {
1226
    /// The endpoint address (i.e. grpc(s)://example.com:443).
1227
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
1228
    pub address: String,
1229
    /// The TLS configuration to use to connect to the endpoint (if grpcs).
1230
    pub tls_config: Option<ClientTlsConfig>,
1231
    /// The maximum concurrency to allow on this endpoint.
1232
    #[serde(
1233
        default,
1234
        deserialize_with = "convert_optional_numeric_with_shellexpand"
1235
    )]
1236
    pub concurrency_limit: Option<usize>,
1237
1238
    /// Timeout for establishing a TCP connection to the endpoint (seconds).
1239
    /// If not set or 0, defaults to 30 seconds.
1240
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
1241
    pub connect_timeout_s: u64,
1242
1243
    /// TCP keepalive interval (seconds). Sends TCP keepalive probes at this
1244
    /// interval to detect dead connections at the OS level.
1245
    /// If not set or 0, defaults to 30 seconds.
1246
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
1247
    pub tcp_keepalive_s: u64,
1248
1249
    /// HTTP/2 keepalive interval (seconds). Sends HTTP/2 PING frames at this
1250
    /// interval to detect dead connections at the application level.
1251
    /// If not set or 0, defaults to 30 seconds.
1252
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
1253
    pub http2_keepalive_interval_s: u64,
1254
1255
    /// HTTP/2 keepalive timeout (seconds). If a PING response is not received
1256
    /// within this duration, the connection is considered dead.
1257
    /// If not set or 0, defaults to 20 seconds.
1258
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
1259
    pub http2_keepalive_timeout_s: u64,
1260
}
1261
1262
#[derive(Serialize, Deserialize, Debug, Clone)]
1263
#[serde(deny_unknown_fields)]
1264
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1265
pub struct GrpcSpec {
1266
    /// Instance name for GRPC calls. Proxy calls will have the `instance_name` changed to this.
1267
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
1268
    pub instance_name: String,
1269
1270
    /// The endpoint of the grpc connection.
1271
    pub endpoints: Vec<GrpcEndpoint>,
1272
1273
    /// The type of the upstream store, this ensures that the correct server calls are made.
1274
    pub store_type: StoreType,
1275
1276
    /// Retry configuration to use when a network request fails.
1277
    #[serde(default)]
1278
    pub retry: Retry,
1279
1280
    /// Limit the number of simultaneous upstream requests to this many.  A
1281
    /// value of zero is treated as unlimited.  If the limit is reached the
1282
    /// request is queued.
1283
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1284
    pub max_concurrent_requests: usize,
1285
1286
    /// The number of connections to make to each specified endpoint to balance
1287
    /// the load over multiple TCP connections.  Default 1.
1288
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1289
    pub connections_per_endpoint: usize,
1290
1291
    /// Maximum time (seconds) allowed for a single RPC request (e.g. a
1292
    /// ByteStream.Write call) before it is cancelled.
1293
    ///
1294
    /// A value of 0 (the default) disables the per-RPC timeout. Dead
1295
    /// connections are still detected by the HTTP/2 and TCP keepalive
1296
    /// mechanisms configured on each endpoint.
1297
    ///
1298
    /// For large uploads (multi-GB), either leave this at 0 or set it
1299
    /// large enough to accommodate the full transfer time.
1300
    ///
1301
    /// Default: 0 (disabled)
1302
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
1303
    pub rpc_timeout_s: u64,
1304
1305
    /// Use legacy `ByteStream` resource name format, omitting the digest
1306
    /// function component from the path.
1307
    ///
1308
    /// Modern `NativeLink` generates resource names like:
1309
    ///   `{instance}/blobs/{digest_function}/{hash}/{size}`
1310
    ///
1311
    /// Older backends (e.g. Buildbarn pre-v0.3) expect the original format:
1312
    ///   `{instance}/blobs/{hash}/{size}`
1313
    ///
1314
    /// Set this to `true` when connecting to such backends to avoid
1315
    /// `InvalidArgument: Unsupported digest function` errors.
1316
    ///
1317
    /// Default: false
1318
    #[serde(default, deserialize_with = "convert_boolean_with_shellexpand")]
1319
    pub use_legacy_resource_names: bool,
1320
1321
    /// Static headers to attach to every outgoing gRPC request sent to this
1322
    /// store's upstream endpoints. Useful for fixed authentication tokens
1323
    /// (e.g. `{"authorization": "Bearer <token>"}`) and other static metadata.
1324
    #[serde(default)]
1325
    pub headers: HashMap<String, String>,
1326
1327
    /// Header names to forward from the incoming client request to every
1328
    /// outgoing upstream request. The header value is taken from the client
1329
    /// request that triggered this store operation. Use this to pass through
1330
    /// dynamic credentials such as JWT tokens sent by build clients.
1331
    ///
1332
    /// Example: `["authorization", "x-custom-token"]`
1333
    ///
1334
    /// `NativeLink` also automatically injects the current OpenTelemetry trace
1335
    /// context (`traceparent` / `tracestate`) into every outgoing request.
1336
    #[serde(default)]
1337
    pub forward_headers: Vec<String>,
1338
}
1339
1340
/// The possible error codes that might occur on an upstream request.
1341
#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq)]
1342
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1343
pub enum ErrorCode {
1344
    Cancelled = 1,
1345
    Unknown = 2,
1346
    InvalidArgument = 3,
1347
    DeadlineExceeded = 4,
1348
    NotFound = 5,
1349
    AlreadyExists = 6,
1350
    PermissionDenied = 7,
1351
    ResourceExhausted = 8,
1352
    FailedPrecondition = 9,
1353
    Aborted = 10,
1354
    OutOfRange = 11,
1355
    Unimplemented = 12,
1356
    Internal = 13,
1357
    Unavailable = 14,
1358
    DataLoss = 15,
1359
    Unauthenticated = 16,
1360
    // Note: This list is duplicated from nativelink-error/lib.rs.
1361
}
1362
1363
#[derive(Serialize, Deserialize, Debug, Clone, Default)]
1364
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1365
pub struct RedisSpec {
1366
    /// The hostname or IP address of the Redis server.
1367
    /// Ex: `["redis://username:password@redis-server-url:6380/99"]`
1368
    /// 99 Represents database ID, 6380 represents the port.
1369
    #[serde(deserialize_with = "convert_vec_string_with_shellexpand")]
1370
    pub addresses: Vec<String>,
1371
1372
    /// DEPRECATED: use `command_timeout_ms`
1373
    /// The response timeout for the Redis connection in seconds.
1374
    ///
1375
    /// Default: 10
1376
    #[serde(default)]
1377
    pub response_timeout_s: u64,
1378
1379
    /// DEPRECATED: use `connection_timeout_ms`
1380
    ///
1381
    /// The connection timeout for the Redis connection in seconds.
1382
    ///
1383
    /// Default: 10
1384
    #[serde(default)]
1385
    pub connection_timeout_s: u64,
1386
1387
    /// An optional and experimental Redis channel to publish write events to.
1388
    ///
1389
    /// If set, every time a write operation is made to a Redis node
1390
    /// then an event will be published to a Redis channel with the given name.
1391
    /// If unset, the writes will still be made,
1392
    /// but the write events will not be published.
1393
    ///
1394
    /// Default: (Empty String / No Channel)
1395
    #[serde(default)]
1396
    pub experimental_pub_sub_channel: Option<String>,
1397
1398
    /// An optional prefix to prepend to all keys in this store.
1399
    ///
1400
    /// Setting this value can make it convenient to query or
1401
    /// organize your data according to the shared prefix.
1402
    ///
1403
    /// Default: (Empty String / No Prefix)
1404
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
1405
    pub key_prefix: String,
1406
1407
    /// Set the mode Redis is operating in.
1408
    ///
1409
    /// Available options are "cluster" for
1410
    /// [cluster mode](https://redis.io/docs/latest/operate/oss_and_stack/reference/cluster-spec/),
1411
    /// "sentinel" for [sentinel mode](https://redis.io/docs/latest/operate/oss_and_stack/management/sentinel/),
1412
    /// or "standard" if Redis is operating in neither cluster nor sentinel mode.
1413
    ///
1414
    /// Default: standard,
1415
    #[serde(default)]
1416
    pub mode: RedisMode,
1417
1418
    /// Deprecated as redis-rs doesn't use it
1419
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1420
    pub broadcast_channel_capacity: usize,
1421
1422
    /// The amount of time in milliseconds until the redis store considers the
1423
    /// command to be timed out. This will trigger a retry of the command and
1424
    /// potentially a reconnection to the redis server.
1425
    ///
1426
    /// Default: 10000 (10 seconds)
1427
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1428
    pub command_timeout_ms: u64,
1429
1430
    /// The amount of time in milliseconds until the redis store considers the
1431
    /// connection to unresponsive. This will trigger a reconnection to the
1432
    /// redis server.
1433
    ///
1434
    /// Default: 3000 (3 seconds)
1435
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1436
    pub connection_timeout_ms: u64,
1437
1438
    /// Per-call ceiling for the `check_health` PING in milliseconds.
1439
    ///
1440
    /// Default: 4000 (4 seconds)
1441
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1442
    pub health_check_timeout_ms: u64,
1443
1444
    /// The amount of data to read from the redis server at a time.
1445
    /// This is used to limit the amount of memory used when reading
1446
    /// large objects from the redis server as well as limiting the
1447
    /// amount of time a single read operation can take.
1448
    ///
1449
    /// IMPORTANT: If this value is too high, the `command_timeout_ms`
1450
    /// might be triggered if the latency or throughput to the redis
1451
    /// server is too low.
1452
    ///
1453
    /// Default: 64KiB
1454
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1455
    pub read_chunk_size: usize,
1456
1457
    /// The number of connections to keep open to the redis server(s).
1458
    ///
1459
    /// Default: 3
1460
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1461
    pub connection_pool_size: usize,
1462
1463
    /// The maximum number of upload chunks to allow per update.
1464
    /// This is used to limit the amount of memory used when uploading
1465
    /// large objects to the redis server. A good rule of thumb is to
1466
    /// think of the data as:
1467
    /// `AVAIL_MEMORY / (read_chunk_size * max_chunk_uploads_per_update) = THORETICAL_MAX_CONCURRENT_UPLOADS`
1468
    /// (note: it is a good idea to divide `AVAIL_MAX_MEMORY` by ~10 to account for other memory usage)
1469
    ///
1470
    /// Default: 10
1471
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1472
    pub max_chunk_uploads_per_update: usize,
1473
1474
    /// The COUNT value passed when scanning keys in Redis.
1475
    /// This is used to hint the amount of work that should be done per response.
1476
    ///
1477
    /// Default: 10000
1478
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1479
    pub scan_count: usize,
1480
1481
    /// Retry configuration to use when a network request fails.
1482
    #[serde(default)]
1483
    pub retry: Retry,
1484
1485
    /// Maximum number of permitted actions to the Redis store at any one time
1486
    /// This stops problems with timeouts due to many, many inflight actions
1487
    /// Default: 500
1488
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1489
    pub max_client_permits: usize,
1490
1491
    /// Maximum number of items returned per cursor for the search indexes
1492
    /// May reduce thundering herd issues with worker provisioner at higher node counts,
1493
    /// Default: 1500
1494
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1495
    pub max_count_per_cursor: u64,
1496
}
1497
1498
#[derive(Debug, Default, Deserialize, Serialize, Clone, Copy, PartialEq, Eq)]
1499
#[serde(rename_all = "snake_case")]
1500
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1501
pub enum RedisMode {
1502
    /// Use Redis Cluster.
1503
    Cluster,
1504
1505
    /// Use Redis Sentinel.
1506
    Sentinel,
1507
1508
    /// Use a standalone Redis server.
1509
    #[default]
1510
    Standard,
1511
}
1512
1513
#[derive(Clone, Copy, Debug, Default, Deserialize, Serialize)]
1514
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1515
pub struct NoopSpec {}
1516
1517
/// Retry configuration. This configuration is exponential and each iteration
1518
/// a jitter as a percentage is applied of the calculated delay. For example:
1519
/// ```haskell
1520
/// Retry{
1521
///   max_retries: 7,
1522
///   delay: 0.1,
1523
///   jitter: 0.5,
1524
/// }
1525
/// ```
1526
/// will result in:
1527
/// Attempt - Delay
1528
/// 1         0ms
1529
/// 2         75ms - 125ms
1530
/// 3         150ms - 250ms
1531
/// 4         300ms - 500ms
1532
/// 5         600ms - 1s
1533
/// 6         1.2s - 2s
1534
/// 7         2.4s - 4s
1535
/// 8         4.8s - 8s
1536
/// Remember that to get total results is additive, meaning the above results
1537
/// would mean a single request would have a total delay of 9.525s - 15.875s.
1538
#[derive(Serialize, Deserialize, Clone, Debug, Default)]
1539
#[serde(deny_unknown_fields)]
1540
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1541
pub struct Retry {
1542
    /// Maximum number of retries until retrying stops.
1543
    /// Setting this to zero will always attempt 1 time, but not retry.
1544
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1545
    pub max_retries: usize,
1546
1547
    /// Delay in seconds for exponential back off.
1548
    #[serde(default)]
1549
    pub delay: f32,
1550
1551
    /// Amount of jitter to add as a percentage in decimal form. This will
1552
    /// change the formula like:
1553
    /// ```haskell
1554
    /// random(
1555
    ///    (2 ^ {attempt_number}) * {delay} * (1 - (jitter / 2)),
1556
    ///    (2 ^ {attempt_number}) * {delay} * (1 + (jitter / 2)),
1557
    /// )
1558
    /// ```
1559
    #[serde(default)]
1560
    pub jitter: f32,
1561
1562
    /// A list of error codes to retry on, if this is not set then the default
1563
    /// error codes to retry on are used.  These default codes are the most
1564
    /// likely to be non-permanent.
1565
    ///  - `Unknown`
1566
    ///  - `Cancelled`
1567
    ///  - `DeadlineExceeded`
1568
    ///  - `ResourceExhausted`
1569
    ///  - `Aborted`
1570
    ///  - `Internal`
1571
    ///  - `Unavailable`
1572
    ///  - `DataLoss`
1573
    #[serde(default)]
1574
    pub retry_on_errors: Option<Vec<ErrorCode>>,
1575
}
1576
1577
/// Configuration for `ExperimentalMongoDB` store.
1578
#[derive(Serialize, Deserialize, Debug, Clone)]
1579
#[serde(deny_unknown_fields)]
1580
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1581
pub struct ExperimentalMongoSpec {
1582
    /// `ExperimentalMongoDB` connection string.
1583
    /// Example: <mongodb://localhost:27017> or <mongodb+srv://cluster.mongodb.net>
1584
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
1585
    pub connection_string: String,
1586
1587
    /// The database name to use.
1588
    /// Default: "nativelink"
1589
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
1590
    pub database: String,
1591
1592
    /// The collection name for CAS data.
1593
    /// Default: "cas"
1594
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
1595
    pub cas_collection: String,
1596
1597
    /// The collection name for scheduler data.
1598
    /// Default: "scheduler"
1599
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
1600
    pub scheduler_collection: String,
1601
1602
    /// Prefix to prepend to all keys stored in `MongoDB`.
1603
    /// Default: ""
1604
    #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")]
1605
    pub key_prefix: Option<String>,
1606
1607
    /// The maximum amount of data to read from `MongoDB` in a single chunk (in bytes).
1608
    /// Default: 65536 (64KB)
1609
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
1610
    pub read_chunk_size: usize,
1611
1612
    /// Deprecated, unused
1613
    /// Maximum number of concurrent uploads allowed.
1614
    /// Default: 10
1615
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1616
    pub max_concurrent_uploads: usize,
1617
1618
    /// Connection timeout in milliseconds.
1619
    /// Default: 3000
1620
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1621
    pub connection_timeout_ms: u64,
1622
1623
    /// Command timeout in milliseconds.
1624
    /// Default: 10000
1625
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1626
    pub command_timeout_ms: u64,
1627
1628
    /// Enable `MongoDB` change streams for real-time updates.
1629
    /// Required for scheduler subscriptions.
1630
    /// Default: false
1631
    #[serde(default, deserialize_with = "convert_boolean_with_shellexpand")]
1632
    pub enable_change_streams: bool,
1633
1634
    /// Write concern 'w' parameter.
1635
    /// Can be a number (e.g., 1) or string (e.g., "majority").
1636
    /// Default: None (uses `MongoDB` default)
1637
    #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")]
1638
    pub write_concern_w: Option<String>,
1639
1640
    /// Write concern 'j' parameter (journal acknowledgment).
1641
    /// Default: None (uses `MongoDB` default)
1642
    #[serde(default)]
1643
    pub write_concern_j: Option<bool>,
1644
1645
    /// Write concern timeout in milliseconds.
1646
    /// Default: None (uses `MongoDB` default)
1647
    #[serde(
1648
        default,
1649
        deserialize_with = "convert_optional_numeric_with_shellexpand"
1650
    )]
1651
    pub write_concern_timeout_ms: Option<u32>,
1652
1653
    /// Limits the number of requests at any one time
1654
    /// Default: Unlimited
1655
    #[serde(
1656
        default,
1657
        deserialize_with = "convert_optional_numeric_with_shellexpand"
1658
    )]
1659
    pub max_requests: Option<usize>,
1660
}
1661
1662
impl Retry {
1663
15
    pub fn make_jitter_fn(&self) -> Arc<dyn Fn(Duration) -> Duration + Send + Sync> {
1664
15
        if self.jitter == 0f32 {
1665
15
            Arc::new(move |delay: Duration| delay)
1666
        } else {
1667
0
            let local_jitter = self.jitter;
1668
0
            Arc::new(move |delay: Duration| {
1669
0
                delay.mul_f32(local_jitter.mul_add(rand::rng().random::<f32>() - 0.5, 1.))
1670
0
            })
1671
        }
1672
15
    }
1673
}