Coverage Report

Created: 2026-05-23 21:09

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/build/source/nativelink-config/src/stores.rs
Line
Count
Source
1
// Copyright 2024 The NativeLink Authors. All rights reserved.
2
//
3
// Licensed under the Functional Source License, Version 1.1, Apache 2.0 Future License (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//    See LICENSE file for details
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
use core::time::Duration;
16
use std::collections::HashMap;
17
use std::sync::Arc;
18
19
use rand::Rng;
20
#[cfg(feature = "dev-schema")]
21
use schemars::JsonSchema;
22
use serde::{Deserialize, Serialize};
23
24
use crate::serde_utils::{
25
    convert_boolean_with_shellexpand, convert_data_size_with_shellexpand,
26
    convert_duration_with_shellexpand, convert_numeric_with_shellexpand,
27
    convert_optional_data_size_with_shellexpand, convert_optional_numeric_with_shellexpand,
28
    convert_optional_string_with_shellexpand, convert_string_with_shellexpand,
29
    convert_vec_string_with_shellexpand,
30
};
31
32
/// Name of the store. This type will be used when referencing a store
33
/// in the `CasConfig::stores`'s map key.
34
pub type StoreRefName = String;
35
36
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
37
#[serde(rename_all = "snake_case")]
38
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
39
pub enum ConfigDigestHashFunction {
40
    /// Use the sha256 hash function.
41
    /// <https://en.wikipedia.org/wiki/SHA-2>
42
    Sha256,
43
44
    /// Use the blake3 hash function.
45
    /// <https://en.wikipedia.org/wiki/BLAKE_(hash_function)>
46
    Blake3,
47
}
48
49
#[derive(Serialize, Deserialize, Debug, Clone)]
50
#[serde(rename_all = "snake_case")]
51
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
52
pub enum StoreSpec {
53
    /// Memory store will store all data in a hashmap in memory.
54
    ///
55
    /// **Example JSON Config:**
56
    /// ```json
57
    /// "memory": {
58
    ///   "eviction_policy": {
59
    ///     "max_bytes": "10mb",
60
    ///   }
61
    /// }
62
    /// ```
63
    ///
64
    Memory(MemorySpec),
65
66
    /// A generic blob store that will store files on the cloud
67
    /// provider. This configuration will never delete files, so you are
68
    /// responsible for purging old files in other ways.
69
    /// It supports the following backends:
70
    ///
71
    /// 1. **Amazon S3:**
72
    ///    S3 store will use Amazon's S3 service as a backend to store
73
    ///    the files. This configuration can be used to share files
74
    ///    across multiple instances. Uses system certificates for TLS
75
    ///    verification via `rustls-platform-verifier`.
76
    ///
77
    ///   **Example JSON Config:**
78
    ///   ```json
79
    ///   "experimental_cloud_object_store": {
80
    ///     "provider": "aws",
81
    ///     "region": "eu-north-1",
82
    ///     "bucket": "crossplane-bucket-af79aeca9",
83
    ///     "key_prefix": "test-prefix-index/",
84
    ///     "retry": {
85
    ///       "max_retries": 6,
86
    ///       "delay": 0.3,
87
    ///       "jitter": 0.5
88
    ///     },
89
    ///     "multipart_max_concurrent_uploads": 10
90
    ///   }
91
    ///   ```
92
    ///
93
    /// 2. **Google Cloud Storage:**
94
    ///    GCS store uses Google's GCS service as a backend to store
95
    ///    the files. This configuration can be used to share files
96
    ///    across multiple instances.
97
    ///
98
    ///   **Example JSON Config:**
99
    ///   ```json
100
    ///   "experimental_cloud_object_store": {
101
    ///     "provider": "gcs",
102
    ///     "bucket": "test-bucket",
103
    ///     "key_prefix": "test-prefix-index/",
104
    ///     "retry": {
105
    ///       "max_retries": 6,
106
    ///       "delay": 0.3,
107
    ///       "jitter": 0.5
108
    ///     },
109
    ///     "multipart_max_concurrent_uploads": 10
110
    ///   }
111
    ///   ```
112
    ///
113
    /// 3. **Azure Blob Store:**
114
    ///    Azure Blob store will use Microsoft's Azure Blob service as a
115
    ///    backend to store the files. This configuration can be used to
116
    ///    share files across multiple instances.
117
    ///
118
    ///   **Example JSON Config:**
119
    ///   ```json
120
    ///   "experimental_cloud_object_store": {
121
    ///     "provider": "azure",
122
    ///     "account_name": "cloudshell1393657559",
123
    ///     "container": "simple-test-container",
124
    ///     "key_prefix": "folder/",
125
    ///     "retry": {
126
    ///         "max_retries": 6,
127
    ///         "delay": 0.3,
128
    ///         "jitter": 0.5
129
    ///     },
130
    ///     "multipart_max_concurrent_uploads": 10
131
    ///   }
132
    ///   ```
133
    ///
134
    /// 4. **`NetApp` ONTAP S3**
135
    ///    `NetApp` ONTAP S3 store will use ONTAP's S3-compatible storage as a backend
136
    ///    to store files. This store is specifically configured for ONTAP's S3 requirements
137
    ///    including custom TLS configuration, credentials management, and proper vserver
138
    ///    configuration.
139
    ///
140
    ///    This store uses AWS environment variables for credentials:
141
    ///    - `AWS_ACCESS_KEY_ID`
142
    ///    - `AWS_SECRET_ACCESS_KEY`
143
    ///    - `AWS_DEFAULT_REGION`
144
    ///
145
    ///    **Example JSON Config:**
146
    ///    ```json
147
    ///    "experimental_cloud_object_store": {
148
    ///      "provider": "ontap",
149
    ///      "endpoint": "https://ontap-s3-endpoint:443",
150
    ///      "vserver_name": "your-vserver",
151
    ///      "bucket": "your-bucket",
152
    ///      "root_certificates": "/path/to/certs.pem",  // Optional
153
    ///      "key_prefix": "test-prefix/",               // Optional
154
    ///      "retry": {
155
    ///        "max_retries": 6,
156
    ///        "delay": 0.3,
157
    ///        "jitter": 0.5
158
    ///      },
159
    ///      "multipart_max_concurrent_uploads": 10
160
    ///    }
161
    ///    ```
162
    ExperimentalCloudObjectStore(ExperimentalCloudObjectSpec),
163
164
    /// ONTAP S3 Existence Cache provides a caching layer on top of the ONTAP S3 store
165
    /// to optimize repeated existence checks. It maintains an in-memory cache of object
166
    /// digests and periodically syncs this cache to disk for persistence.
167
    ///
168
    /// The cache helps reduce latency for repeated calls to check object existence,
169
    /// while still ensuring eventual consistency with the underlying ONTAP S3 store.
170
    ///
171
    /// Example JSON Config:
172
    /// ```json
173
    /// "ontap_s3_existence_cache": {
174
    ///   "index_path": "/path/to/cache/index.json",
175
    ///   "sync_interval_seconds": 300,
176
    ///   "backend": {
177
    ///     "endpoint": "https://ontap-s3-endpoint:443",
178
    ///     "vserver_name": "your-vserver",
179
    ///     "bucket": "your-bucket",
180
    ///     "key_prefix": "test-prefix/"
181
    ///   }
182
    /// }
183
    /// ```
184
    ///
185
    OntapS3ExistenceCache(Box<OntapS3ExistenceCacheSpec>),
186
187
    /// Verify store is used to apply verifications to an underlying
188
    /// store implementation. It is strongly encouraged to validate
189
    /// as much data as you can before accepting data from a client,
190
    /// failing to do so may cause the data in the store to be
191
    /// populated with invalid data causing all kinds of problems.
192
    ///
193
    /// The suggested configuration is to have the CAS validate the
194
    /// hash and size and the AC validate nothing.
195
    ///
196
    /// **Example JSON Config:**
197
    /// ```json
198
    /// "verify": {
199
    ///   "backend": {
200
    ///     "memory": {
201
    ///       "eviction_policy": {
202
    ///         "max_bytes": "500mb"
203
    ///       }
204
    ///     },
205
    ///   },
206
    ///   "verify_size": true,
207
    ///   "verify_hash": true
208
    /// }
209
    /// ```
210
    ///
211
    Verify(Box<VerifySpec>),
212
213
    /// Completeness checking store verifies if the
214
    /// output files & folders exist in the CAS before forwarding
215
    /// the request to the underlying store.
216
    /// Note: This store should only be used on AC stores.
217
    ///
218
    /// **Example JSON Config:**
219
    /// ```json
220
    /// "completeness_checking": {
221
    ///   "backend": {
222
    ///     "filesystem": {
223
    ///       "content_path": "~/.cache/nativelink/content_path-ac",
224
    ///       "temp_path": "~/.cache/nativelink/tmp_path-ac",
225
    ///       "eviction_policy": {
226
    ///         "max_bytes": "500mb",
227
    ///       }
228
    ///     }
229
    ///   },
230
    ///   "cas_store": {
231
    ///     "ref_store": {
232
    ///       "name": "CAS_MAIN_STORE"
233
    ///     }
234
    ///   }
235
    /// }
236
    /// ```
237
    ///
238
    CompletenessChecking(Box<CompletenessCheckingSpec>),
239
240
    /// A compression store that will compress the data inbound and
241
    /// outbound. There will be a non-trivial cost to compress and
242
    /// decompress the data, but in many cases if the final store is
243
    /// a store that requires network transport and/or storage space
244
    /// is a concern it is often faster and more efficient to use this
245
    /// store before those stores.
246
    ///
247
    /// **Example JSON Config:**
248
    /// ```json
249
    /// "compression": {
250
    ///   "compression_algorithm": {
251
    ///     "lz4": {}
252
    ///   },
253
    ///   "backend": {
254
    ///     "filesystem": {
255
    ///       "content_path": "/tmp/nativelink/data/content_path-cas",
256
    ///       "temp_path": "/tmp/nativelink/data/tmp_path-cas",
257
    ///       "eviction_policy": {
258
    ///         "max_bytes": "2gb",
259
    ///       }
260
    ///     }
261
    ///   }
262
    /// }
263
    /// ```
264
    ///
265
    Compression(Box<CompressionSpec>),
266
267
    /// A dedup store will take the inputs and run a rolling hash
268
    /// algorithm on them to slice the input into smaller parts then
269
    /// run a sha256 algorithm on the slice and if the object doesn't
270
    /// already exist, upload the slice to the `content_store` using
271
    /// a new digest of just the slice. Once all parts exist, an
272
    /// Action-Cache-like digest will be built and uploaded to the
273
    /// `index_store` which will contain a reference to each
274
    /// chunk/digest of the uploaded file. Downloading a request will
275
    /// first grab the index from the `index_store`, and forward the
276
    /// download content of each chunk as if it were one file.
277
    ///
278
    /// This store is exceptionally good when the following conditions
279
    /// are met:
280
    /// * Content is mostly the same (inserts, updates, deletes are ok)
281
    /// * Content is not compressed or encrypted
282
    /// * Uploading or downloading from `content_store` is the bottleneck.
283
    ///
284
    /// Note: This store pairs well when used with `CompressionSpec` as
285
    /// the `content_store`, but never put `DedupSpec` as the backend of
286
    /// `CompressionSpec` as it will negate all the gains.
287
    ///
288
    /// Note: When running `.has()` on this store, it will only check
289
    /// to see if the entry exists in the `index_store` and not check
290
    /// if the individual chunks exist in the `content_store`.
291
    ///
292
    /// **Example JSON Config:**
293
    /// ```json
294
    /// "dedup": {
295
    ///   "index_store": {
296
    ///     "memory": {
297
    ///       "eviction_policy": {
298
    ///          "max_bytes": "1GB",
299
    ///       }
300
    ///     }
301
    ///   },
302
    ///   "content_store": {
303
    ///     "compression": {
304
    ///       "compression_algorithm": {
305
    ///         "lz4": {}
306
    ///       },
307
    ///       "backend": {
308
    ///         "fast_slow": {
309
    ///           "fast": {
310
    ///             "memory": {
311
    ///               "eviction_policy": {
312
    ///                 "max_bytes": "500MB",
313
    ///               }
314
    ///             }
315
    ///           },
316
    ///           "slow": {
317
    ///             "filesystem": {
318
    ///               "content_path": "/tmp/nativelink/data/content_path-content",
319
    ///               "temp_path": "/tmp/nativelink/data/tmp_path-content",
320
    ///               "eviction_policy": {
321
    ///                 "max_bytes": "2gb"
322
    ///               }
323
    ///             }
324
    ///           }
325
    ///         }
326
    ///       }
327
    ///     }
328
    ///   }
329
    /// }
330
    /// ```
331
    ///
332
    Dedup(Box<DedupSpec>),
333
334
    /// Existence store will wrap around another store and cache calls
335
    /// to has so that subsequent `has_with_results` calls will be
336
    /// faster. This is useful for cases when you have a store that
337
    /// is slow to respond to has calls.
338
    /// Note: This store should only be used on CAS stores.
339
    ///
340
    /// **Example JSON Config:**
341
    /// ```json
342
    /// "existence_cache": {
343
    ///   "backend": {
344
    ///     "memory": {
345
    ///       "eviction_policy": {
346
    ///         "max_bytes": "500mb",
347
    ///       }
348
    ///     }
349
    ///   },
350
    ///   // Note this is the existence store policy, not the backend policy
351
    ///   "eviction_policy": {
352
    ///     "max_seconds": 100,
353
    ///   }
354
    /// }
355
    /// ```
356
    ///
357
    ExistenceCache(Box<ExistenceCacheSpec>),
358
359
    /// `FastSlow` store will first try to fetch the data from the `fast`
360
    /// store and then if it does not exist try the `slow` store.
361
    /// When the object does exist in the `slow` store, it will copy
362
    /// the data to the `fast` store while returning the data.
363
    /// This store should be thought of as a store that "buffers"
364
    /// the data to the `fast` store.
365
    /// On uploads it will mirror data to both `fast` and `slow` stores.
366
    ///
367
    /// WARNING: If you need data to always exist in the `slow` store
368
    /// for something like remote execution, be careful because this
369
    /// store will never check to see if the objects exist in the
370
    /// `slow` store if it exists in the `fast` store (ie: it assumes
371
    /// that if an object exists in the `fast` store it will exist in
372
    /// the `slow` store).
373
    ///
374
    /// ***Example JSON Config:***
375
    /// ```json
376
    /// "fast_slow": {
377
    ///   "fast": {
378
    ///     "filesystem": {
379
    ///       "content_path": "/tmp/nativelink/data/content_path-index",
380
    ///       "temp_path": "/tmp/nativelink/data/tmp_path-index",
381
    ///       "eviction_policy": {
382
    ///         "max_bytes": "500mb",
383
    ///       }
384
    ///     }
385
    ///   },
386
    ///   "slow": {
387
    ///     "filesystem": {
388
    ///       "content_path": "/tmp/nativelink/data/content_path-index",
389
    ///       "temp_path": "/tmp/nativelink/data/tmp_path-index",
390
    ///       "eviction_policy": {
391
    ///         "max_bytes": "500mb",
392
    ///       }
393
    ///     }
394
    ///   }
395
    /// }
396
    /// ```
397
    ///
398
    FastSlow(Box<FastSlowSpec>),
399
400
    /// Shards the data to multiple stores. This is useful for cases
401
    /// when you want to distribute the load across multiple stores.
402
    /// The digest hash is used to determine which store to send the
403
    /// data to.
404
    ///
405
    /// **Example JSON Config:**
406
    /// ```json
407
    /// "shard": {
408
    ///   "stores": [
409
    ///    {
410
    ///     "store": {
411
    ///       "memory": {
412
    ///         "eviction_policy": {
413
    ///             "max_bytes": "10mb"
414
    ///         },
415
    ///       },
416
    ///     },
417
    ///     "weight": 1
418
    ///   }]
419
    /// }
420
    /// ```
421
    ///
422
    Shard(ShardSpec),
423
424
    /// Stores the data on the filesystem. This store is designed for
425
    /// local persistent storage. Restarts of this program should restore
426
    /// the previous state, meaning anything uploaded will be persistent
427
    /// as long as the filesystem integrity holds.
428
    ///
429
    /// **Example JSON Config:**
430
    /// ```json
431
    /// "filesystem": {
432
    ///   "content_path": "/tmp/nativelink/data-worker-test/content_path-cas",
433
    ///   "temp_path": "/tmp/nativelink/data-worker-test/tmp_path-cas",
434
    ///   "eviction_policy": {
435
    ///     "max_bytes": "10gb",
436
    ///   }
437
    /// }
438
    /// ```
439
    ///
440
    Filesystem(FilesystemSpec),
441
442
    /// Store used to reference a store in the root store manager.
443
    /// This is useful for cases when you want to share a store in different
444
    /// nested stores. Example, you may want to share the same memory store
445
    /// used for the action cache, but use a `FastSlowSpec` and have the fast
446
    /// store also share the memory store for efficiency.
447
    ///
448
    /// **Example JSON Config:**
449
    /// ```json
450
    /// "ref_store": {
451
    ///   "name": "FS_CONTENT_STORE"
452
    /// }
453
    /// ```
454
    ///
455
    RefStore(RefSpec),
456
457
    /// Uses the size field of the digest to separate which store to send the
458
    /// data. This is useful for cases when you'd like to put small objects
459
    /// in one store and large objects in another store. This should only be
460
    /// used if the size field is the real size of the content, in other
461
    /// words, don't use on AC (Action Cache) stores. Any store where you can
462
    /// safely use `VerifySpec.verify_size = true`, this store should be safe
463
    /// to use (ie: CAS stores).
464
    ///
465
    /// **Example JSON Config:**
466
    /// ```json
467
    /// "size_partitioning": {
468
    ///   "size": "128mib",
469
    ///   "lower_store": {
470
    ///     "memory": {
471
    ///       "eviction_policy": {
472
    ///         "max_bytes": "${NATIVELINK_CAS_MEMORY_CONTENT_LIMIT:-100mb}"
473
    ///       }
474
    ///     }
475
    ///   },
476
    ///   "upper_store": {
477
    ///     /// This store discards data larger than 128mib.
478
    ///     "noop": {}
479
    ///   }
480
    /// }
481
    /// ```
482
    ///
483
    SizePartitioning(Box<SizePartitioningSpec>),
484
485
    /// This store will pass-through calls to another GRPC store. This store
486
    /// is not designed to be used as a sub-store of another store, but it
487
    /// does satisfy the interface and will likely work.
488
    ///
489
    /// One major GOTCHA is that some stores use a special function on this
490
    /// store to get the size of the underlying object, which is only reliable
491
    /// when this store is serving the a CAS store, not an AC store. If using
492
    /// this store directly without being a child of any store there are no
493
    /// side effects and is the most efficient way to use it.
494
    ///
495
    /// **Example JSON Config:**
496
    /// ```json
497
    /// "grpc": {
498
    ///   "instance_name": "main",
499
    ///   "endpoints": [
500
    ///     {"address": "grpc://${CAS_ENDPOINT:-127.0.0.1}:50051"}
501
    ///   ],
502
    ///   "connections_per_endpoint": "5",
503
    ///   "rpc_timeout_s": "5m",
504
    ///   "store_type": "ac",
505
    ///   // Static headers attached to every outgoing request to the upstream
506
    ///   // remote cache. Useful for fixed service-account credentials.
507
    ///   "headers": {
508
    ///     "authorization": "Bearer my-static-token"
509
    ///   },
510
    ///   // Header names to copy from the inbound client request and forward to
511
    ///   // the upstream remote cache. Use this to pass through dynamic
512
    ///   // credentials such as a JWT sent by the build client.
513
    ///   "forward_headers": ["authorization", "x-custom-token"]
514
    /// }
515
    /// ```
516
    ///
517
    Grpc(GrpcSpec),
518
519
    /// Stores data in any stores compatible with Redis APIs.
520
    ///
521
    /// Pairs well with `SizePartitioning` and/or `FastSlow` stores.
522
    /// Ideal for accepting small object sizes as most redis store
523
    /// services have a max file upload of between 256Mb-512Mb.
524
    ///
525
    /// **Example JSON Config:**
526
    /// ```json
527
    /// "redis_store": {
528
    ///   "addresses": [
529
    ///     "redis://127.0.0.1:6379/",
530
    ///   ],
531
    ///   "max_client_permits": 1000,
532
    /// }
533
    /// ```
534
    ///
535
    RedisStore(RedisSpec),
536
537
    /// Noop store is a store that sends streams into the void and all data
538
    /// retrieval will return 404 (`NotFound`). This can be useful for cases
539
    /// where you may need to partition your data and part of your data needs
540
    /// to be discarded.
541
    ///
542
    /// **Example JSON Config:**
543
    /// ```json
544
    /// "noop": {}
545
    /// ```
546
    ///
547
    Noop(NoopSpec),
548
549
    /// Experimental `MongoDB` store implementation.
550
    ///
551
    /// This store uses `MongoDB` as a backend for storing data. It supports
552
    /// both CAS (Content Addressable Storage) and scheduler data with
553
    /// optional change streams for real-time updates.
554
    ///
555
    /// **Example JSON Config:**
556
    /// ```json
557
    /// "experimental_mongo": {
558
    ///     "connection_string": "mongodb://localhost:27017",
559
    ///     "database": "nativelink",
560
    ///     "cas_collection": "cas",
561
    ///     "key_prefix": "cas:",
562
    ///     "read_chunk_size": 65536,
563
    ///     "max_concurrent_uploads": 10,
564
    ///     "enable_change_streams": false,
565
    ///     "max_requests": "100"
566
    /// }
567
    /// ```
568
    ///
569
    ExperimentalMongo(ExperimentalMongoSpec),
570
}
571
572
/// Configuration for an individual shard of the store.
573
#[derive(Serialize, Deserialize, Debug, Clone)]
574
#[serde(deny_unknown_fields)]
575
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
576
pub struct ShardConfig {
577
    /// Store to shard the data to.
578
    pub store: StoreSpec,
579
580
    /// The weight of the store. This is used to determine how much data
581
    /// should be sent to the store. The actual percentage is the sum of
582
    /// all the store's weights divided by the individual store's weight.
583
    ///
584
    /// Default: 1
585
    #[serde(deserialize_with = "convert_optional_numeric_with_shellexpand")]
586
    pub weight: Option<u32>,
587
}
588
589
#[derive(Serialize, Deserialize, Debug, Clone)]
590
#[serde(deny_unknown_fields)]
591
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
592
pub struct ShardSpec {
593
    /// Stores to shard the data to.
594
    pub stores: Vec<ShardConfig>,
595
}
596
597
#[derive(Serialize, Deserialize, Debug, Clone)]
598
#[serde(deny_unknown_fields)]
599
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
600
pub struct SizePartitioningSpec {
601
    /// Size to partition the data on.
602
    #[serde(deserialize_with = "convert_data_size_with_shellexpand")]
603
    pub size: u64,
604
605
    /// Store to send data when object is < (less than) size.
606
    pub lower_store: StoreSpec,
607
608
    /// Store to send data when object is >= (less than eq) size.
609
    pub upper_store: StoreSpec,
610
}
611
612
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
613
#[serde(deny_unknown_fields)]
614
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
615
pub struct RefSpec {
616
    /// Name of the store under the root "stores" config object.
617
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
618
    pub name: String,
619
}
620
621
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
622
#[serde(deny_unknown_fields)]
623
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
624
pub struct FilesystemSpec {
625
    /// Path on the system where to store the actual content. This is where
626
    /// the bulk of the data will be placed.
627
    /// On service bootup this folder will be scanned and all files will be
628
    /// added to the cache. In the event one of the files doesn't match the
629
    /// criteria, the file will be deleted.
630
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
631
    pub content_path: String,
632
633
    /// A temporary location of where files that are being uploaded or
634
    /// deleted will be placed while the content cannot be guaranteed to be
635
    /// accurate. This location must be on the same block device as
636
    /// `content_path` so atomic moves can happen (ie: move without copy).
637
    /// All files in this folder will be deleted on every startup.
638
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
639
    pub temp_path: String,
640
641
    /// Buffer size to use when reading files. Generally this should be left
642
    /// to the default value except for testing.
643
    /// Default: 32k.
644
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
645
    pub read_buffer_size: u32,
646
647
    /// Policy used to evict items out of the store. Failure to set this
648
    /// value will cause items to never be removed from the store causing
649
    /// infinite memory usage.
650
    pub eviction_policy: Option<EvictionPolicy>,
651
652
    /// The block size of the filesystem for the running machine
653
    /// value is used to determine an entry's actual size on disk consumed
654
    /// For a 4KB block size filesystem, a 1B file actually consumes 4KB
655
    /// Default: 4096
656
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
657
    pub block_size: u64,
658
659
    /// Maximum number of concurrent write operations allowed.
660
    /// Each write involves streaming data to a temp file and calling `sync_all()`,
661
    /// which can saturate disk I/O when many writes happen simultaneously.
662
    /// Limiting concurrency prevents disk saturation from blocking the async
663
    /// runtime.
664
    /// A value of 0 means unlimited (no concurrency limit).
665
    /// Default: 0
666
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
667
    pub max_concurrent_writes: usize,
668
}
669
670
// NetApp ONTAP S3 Spec
671
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
672
#[serde(deny_unknown_fields)]
673
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
674
pub struct ExperimentalOntapS3Spec {
675
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
676
    pub endpoint: String,
677
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
678
    pub vserver_name: String,
679
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
680
    pub bucket: String,
681
    #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")]
682
    pub root_certificates: Option<String>,
683
684
    /// Common retry and upload configuration
685
    #[serde(flatten)]
686
    pub common: CommonObjectSpec,
687
}
688
689
// Cloudflare R2 Spec
690
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
691
#[serde(deny_unknown_fields)]
692
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
693
pub struct ExperimentalR2Spec {
694
    /// Cloudflare account ID. Endpoint is derived as
695
    /// `https://{account_id}.r2.cloudflarestorage.com`.
696
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
697
    pub account_id: String,
698
699
    /// Bucket name to use as the backend.
700
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
701
    pub bucket: String,
702
703
    /// Explicit R2 access key.
704
    #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")]
705
    pub access_key_id: Option<String>,
706
707
    /// Explicit R2 secret key.
708
    #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")]
709
    pub secret_access_key: Option<String>,
710
711
    /// Retry and upload settings.
712
    #[serde(flatten)]
713
    pub common: CommonObjectSpec,
714
}
715
716
#[derive(Serialize, Deserialize, Debug, Clone)]
717
#[serde(deny_unknown_fields)]
718
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
719
pub struct OntapS3ExistenceCacheSpec {
720
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
721
    pub index_path: String,
722
    #[serde(deserialize_with = "convert_numeric_with_shellexpand")]
723
    pub sync_interval_seconds: u32,
724
    pub backend: Box<ExperimentalOntapS3Spec>,
725
}
726
727
#[derive(Serialize, Deserialize, Default, Debug, Clone, Copy, PartialEq, Eq)]
728
#[serde(rename_all = "snake_case")]
729
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
730
pub enum StoreDirection {
731
    /// The store operates normally and all get and put operations are
732
    /// handled by it.
733
    #[default]
734
    Both,
735
    /// Update operations will cause persistence to this store, but Get
736
    /// operations will be ignored.
737
    /// This only makes sense on the fast store as the slow store will
738
    /// never get written to on Get anyway.
739
    Update,
740
    /// Get operations will cause persistence to this store, but Update
741
    /// operations will be ignored.
742
    Get,
743
    /// Operate as a read only store, only really makes sense if there's
744
    /// another way to write to it.
745
    ReadOnly,
746
}
747
748
#[derive(Serialize, Deserialize, Debug, Clone)]
749
#[serde(deny_unknown_fields)]
750
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
751
pub struct FastSlowSpec {
752
    /// Fast store that will be attempted to be contacted before reaching
753
    /// out to the `slow` store.
754
    pub fast: StoreSpec,
755
756
    /// How to handle the fast store.  This can be useful to set to Get for
757
    /// worker nodes such that results are persisted to the slow store only.
758
    #[serde(default)]
759
    pub fast_direction: StoreDirection,
760
761
    /// If the object does not exist in the `fast` store it will try to
762
    /// get it from this store.
763
    pub slow: StoreSpec,
764
765
    /// How to handle the slow store.  This can be useful if creating a diode
766
    /// and you wish to have an upstream read only store.
767
    #[serde(default)]
768
    pub slow_direction: StoreDirection,
769
}
770
771
#[derive(Serialize, Deserialize, Debug, Default, Clone, Copy)]
772
#[serde(deny_unknown_fields)]
773
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
774
pub struct MemorySpec {
775
    /// Policy used to evict items out of the store. Failure to set this
776
    /// value will cause items to never be removed from the store causing
777
    /// infinite memory usage.
778
    pub eviction_policy: Option<EvictionPolicy>,
779
}
780
781
#[derive(Serialize, Deserialize, Debug, Clone)]
782
#[serde(deny_unknown_fields)]
783
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
784
pub struct DedupSpec {
785
    /// Store used to store the index of each dedup slice. This store
786
    /// should generally be fast and small.
787
    pub index_store: StoreSpec,
788
789
    /// The store where the individual chunks will be uploaded. This
790
    /// store should generally be the slower & larger store.
791
    pub content_store: StoreSpec,
792
793
    /// Minimum size that a chunk will be when slicing up the content.
794
    /// Note: This setting can be increased to improve performance
795
    /// because it will actually not check this number of bytes when
796
    /// deciding where to partition the data.
797
    ///
798
    /// Default: 65536 (64k)
799
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
800
    pub min_size: u32,
801
802
    /// A best-effort attempt will be made to keep the average size
803
    /// of the chunks to this number. It is not a guarantee, but a
804
    /// slight attempt will be made.
805
    ///
806
    /// This value will also be about the threshold used to determine
807
    /// if we should even attempt to dedup the entry or just forward
808
    /// it directly to the `content_store` without an index. The actual
809
    /// value will be about `normal_size * 1.3` due to implementation
810
    /// details.
811
    ///
812
    /// Default: 262144 (256k)
813
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
814
    pub normal_size: u32,
815
816
    /// Maximum size a chunk is allowed to be.
817
    ///
818
    /// Default: 524288 (512k)
819
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
820
    pub max_size: u32,
821
822
    /// Due to implementation detail, we want to prefer to download
823
    /// the first chunks of the file so we can stream the content
824
    /// out and free up some of our buffers. This configuration
825
    /// will be used to to restrict the number of concurrent chunk
826
    /// downloads at a time per `get()` request.
827
    ///
828
    /// This setting will also affect how much memory might be used
829
    /// per `get()` request. Estimated worst case memory per `get()`
830
    /// request is: `max_concurrent_fetch_per_get * max_size`.
831
    ///
832
    /// Default: 10
833
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
834
    pub max_concurrent_fetch_per_get: u32,
835
}
836
837
#[derive(Serialize, Deserialize, Debug, Clone)]
838
#[serde(deny_unknown_fields)]
839
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
840
pub struct ExistenceCacheSpec {
841
    /// The underlying store wrap around. All content will first flow
842
    /// through self before forwarding to backend. In the event there
843
    /// is an error detected in self, the connection to the backend
844
    /// will be terminated, and early termination should always cause
845
    /// updates to fail on the backend.
846
    pub backend: StoreSpec,
847
848
    /// Policy used to evict items out of the store. Failure to set this
849
    /// value will cause items to never be removed from the store causing
850
    /// infinite memory usage.
851
    pub eviction_policy: Option<EvictionPolicy>,
852
}
853
854
#[derive(Serialize, Deserialize, Debug, Clone)]
855
#[serde(deny_unknown_fields)]
856
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
857
pub struct VerifySpec {
858
    /// The underlying store wrap around. All content will first flow
859
    /// through self before forwarding to backend. In the event there
860
    /// is an error detected in self, the connection to the backend
861
    /// will be terminated, and early termination should always cause
862
    /// updates to fail on the backend.
863
    pub backend: StoreSpec,
864
865
    /// If set the store will verify the size of the data before accepting
866
    /// an upload of data.
867
    ///
868
    /// This should be set to false for AC, but true for CAS stores.
869
    #[serde(default, deserialize_with = "convert_boolean_with_shellexpand")]
870
    pub verify_size: bool,
871
872
    /// If the data should be hashed and verify that the key matches the
873
    /// computed hash. The hash function is automatically determined based
874
    /// request and if not set will use the global default.
875
    ///
876
    /// This should be set to false for AC, but true for CAS stores.
877
    #[serde(default, deserialize_with = "convert_boolean_with_shellexpand")]
878
    pub verify_hash: bool,
879
}
880
881
#[derive(Serialize, Deserialize, Debug, Clone)]
882
#[serde(deny_unknown_fields)]
883
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
884
pub struct CompletenessCheckingSpec {
885
    /// The underlying store that will have it's results validated before sending to client.
886
    pub backend: StoreSpec,
887
888
    /// When a request is made, the results are decoded and all output digests/files are verified
889
    /// to exist in this CAS store before returning success.
890
    pub cas_store: StoreSpec,
891
}
892
893
#[derive(Serialize, Deserialize, Debug, Default, PartialEq, Eq, Clone, Copy)]
894
#[serde(deny_unknown_fields)]
895
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
896
pub struct Lz4Config {
897
    /// Size of the blocks to compress.
898
    /// Higher values require more ram, but might yield slightly better
899
    /// compression ratios.
900
    ///
901
    /// Default: 65536 (64k).
902
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
903
    pub block_size: u32,
904
905
    /// Maximum size allowed to attempt to deserialize data into.
906
    /// This is needed because the `block_size` is embedded into the data
907
    /// so if there was a bad actor, they could upload an extremely large
908
    /// `block_size`'ed entry and we'd allocate a large amount of memory
909
    /// when retrieving the data. To prevent this from happening, we
910
    /// allow you to specify the maximum that we'll attempt deserialize.
911
    ///
912
    /// Default: value in `block_size`.
913
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
914
    pub max_decode_block_size: u32,
915
}
916
917
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone, Copy)]
918
#[serde(rename_all = "snake_case")]
919
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
920
pub enum CompressionAlgorithm {
921
    /// LZ4 compression algorithm is extremely fast for compression and
922
    /// decompression, however does not perform very well in compression
923
    /// ratio. In most cases build artifacts are highly compressible, however
924
    /// lz4 is quite good at aborting early if the data is not deemed very
925
    /// compressible.
926
    ///
927
    /// see: <https://lz4.github.io/lz4/>
928
    Lz4(Lz4Config),
929
}
930
931
#[derive(Serialize, Deserialize, Debug, Clone)]
932
#[serde(deny_unknown_fields)]
933
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
934
pub struct CompressionSpec {
935
    /// The underlying store wrap around. All content will first flow
936
    /// through self before forwarding to backend. In the event there
937
    /// is an error detected in self, the connection to the backend
938
    /// will be terminated, and early termination should always cause
939
    /// updates to fail on the backend.
940
    pub backend: StoreSpec,
941
942
    /// The compression algorithm to use.
943
    pub compression_algorithm: CompressionAlgorithm,
944
}
945
946
/// Eviction policy always works on LRU (Least Recently Used). Any time an entry
947
/// is touched it updates the timestamp. Inserts and updates will execute the
948
/// eviction policy removing any expired entries and/or the oldest entries
949
/// until the store size becomes smaller than `max_bytes`.
950
#[derive(Serialize, Deserialize, Debug, Default, Clone, Copy)]
951
#[serde(deny_unknown_fields)]
952
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
953
pub struct EvictionPolicy {
954
    /// Maximum number of bytes before eviction takes place.
955
    /// Default: 0. Zero means never evict based on size.
956
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
957
    pub max_bytes: usize,
958
959
    /// When eviction starts based on hitting `max_bytes`, continue until
960
    /// `max_bytes - evict_bytes` is met to create a low watermark.  This stops
961
    /// operations from thrashing when the store is close to the limit.
962
    /// Default: 0
963
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
964
    pub evict_bytes: usize,
965
966
    /// Maximum number of seconds for an entry to live since it was last
967
    /// accessed before it is evicted.
968
    /// Default: 0. Zero means never evict based on time.
969
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
970
    pub max_seconds: u32,
971
972
    /// Maximum size of the store before an eviction takes place.
973
    /// Default: 0. Zero means never evict based on count.
974
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
975
    pub max_count: u64,
976
}
977
978
#[derive(Serialize, Deserialize, Debug, Clone)]
979
#[serde(tag = "provider", rename_all = "snake_case")]
980
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
981
pub enum ExperimentalCloudObjectSpec {
982
    Aws(ExperimentalAwsSpec),
983
    Gcs(ExperimentalGcsSpec),
984
    Azure(ExperimentalAzureSpec),
985
    Ontap(ExperimentalOntapS3Spec),
986
    R2(ExperimentalR2Spec),
987
}
988
989
impl Default for ExperimentalCloudObjectSpec {
990
0
    fn default() -> Self {
991
0
        Self::Aws(ExperimentalAwsSpec::default())
992
0
    }
993
}
994
995
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
996
#[serde(deny_unknown_fields)]
997
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
998
pub struct ExperimentalAwsSpec {
999
    /// S3 region. Usually us-east-1, us-west-2, af-south-1, exc...
1000
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
1001
    pub region: String,
1002
1003
    /// Bucket name to use as the backend.
1004
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
1005
    pub bucket: String,
1006
1007
    /// Common retry and upload configuration
1008
    #[serde(flatten)]
1009
    pub common: CommonObjectSpec,
1010
}
1011
1012
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
1013
#[serde(deny_unknown_fields)]
1014
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1015
pub struct ExperimentalGcsSpec {
1016
    /// Bucket name to use as the backend.
1017
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
1018
    pub bucket: String,
1019
1020
    /// Chunk size for resumable uploads.
1021
    ///
1022
    /// Default: 2MB
1023
    #[serde(
1024
        default,
1025
        deserialize_with = "convert_optional_data_size_with_shellexpand"
1026
    )]
1027
    pub resumable_chunk_size: Option<usize>,
1028
1029
    /// Common retry and upload configuration
1030
    #[serde(flatten)]
1031
    pub common: CommonObjectSpec,
1032
1033
    /// Error if authentication was not found.
1034
    #[serde(default, deserialize_with = "convert_boolean_with_shellexpand")]
1035
    pub authentication_required: bool,
1036
1037
    /// Connection timeout in milliseconds.
1038
    /// Default: 3000
1039
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
1040
    pub connection_timeout_s: u64,
1041
1042
    /// Read timeout in milliseconds.
1043
    /// Default: 3000
1044
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
1045
    pub read_timeout_s: u64,
1046
}
1047
1048
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
1049
#[serde(deny_unknown_fields)]
1050
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1051
pub struct ExperimentalAzureSpec {
1052
    /// The Azure Storage account name.
1053
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
1054
    pub account_name: String,
1055
1056
    /// The container name to use as the backend.
1057
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
1058
    pub container: String,
1059
1060
    /// Common retry and upload configuration.
1061
    #[serde(flatten)]
1062
    pub common: CommonObjectSpec,
1063
1064
    /// Connection timeout in milliseconds.
1065
    /// Default: 3000
1066
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
1067
    pub connection_timeout_s: u64,
1068
1069
    /// Read timeout in milliseconds.
1070
    /// Default: 3000
1071
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
1072
    pub read_timeout_s: u64,
1073
}
1074
1075
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
1076
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1077
pub struct CommonObjectSpec {
1078
    /// If you wish to prefix the location in the bucket. If None, no prefix will be used.
1079
    #[serde(default)]
1080
    pub key_prefix: Option<String>,
1081
1082
    /// Retry configuration to use when a network request fails.
1083
    #[serde(default)]
1084
    pub retry: Retry,
1085
1086
    /// If the number of seconds since the `last_modified` time of the object
1087
    /// is greater than this value, the object will not be considered
1088
    /// "existing". This allows for external tools to delete objects that
1089
    /// have not been uploaded in a long time. If a client receives a `NotFound`
1090
    /// the client should re-upload the object.
1091
    ///
1092
    /// There should be sufficient buffer time between how long the expiration
1093
    /// configuration of the external tool is and this value. Keeping items
1094
    /// around for a few days is generally a good idea.
1095
    ///
1096
    /// Default: 0. Zero means never consider an object expired.
1097
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
1098
    pub consider_expired_after_s: u32,
1099
1100
    /// The maximum buffer size to retain in case of a retryable error
1101
    /// during upload. Setting this to zero will disable upload buffering;
1102
    /// this means that in the event of a failure during upload, the entire
1103
    /// upload will be aborted and the client will likely receive an error.
1104
    ///
1105
    /// Default: 5MB.
1106
    #[serde(
1107
        default,
1108
        deserialize_with = "convert_optional_data_size_with_shellexpand"
1109
    )]
1110
    pub max_retry_buffer_per_request: Option<usize>,
1111
1112
    /// Maximum number of concurrent `UploadPart` requests per `MultipartUpload`.
1113
    ///
1114
    /// Default: 10.
1115
    ///
1116
    #[serde(
1117
        default,
1118
        deserialize_with = "convert_optional_numeric_with_shellexpand"
1119
    )]
1120
    pub multipart_max_concurrent_uploads: Option<usize>,
1121
1122
    /// Allow unencrypted HTTP connections. Only use this for local testing.
1123
    ///
1124
    /// Default: false
1125
    #[serde(default, deserialize_with = "convert_boolean_with_shellexpand")]
1126
    pub insecure_allow_http: bool,
1127
1128
    /// Disable http/2 connections and only use http/1.1. Default client
1129
    /// configuration will have http/1.1 and http/2 enabled for connection
1130
    /// schemes. Http/2 should be disabled if environments have poor support
1131
    /// or performance related to http/2. Safe to keep default unless
1132
    /// underlying network environment, S3, or GCS API servers specify otherwise.
1133
    ///
1134
    /// Default: false
1135
    #[serde(default, deserialize_with = "convert_boolean_with_shellexpand")]
1136
    pub disable_http2: bool,
1137
}
1138
1139
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
1140
#[serde(rename_all = "snake_case")]
1141
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1142
pub enum StoreType {
1143
    /// The store is content addressable storage.
1144
    Cas,
1145
    /// The store is an action cache.
1146
    Ac,
1147
}
1148
1149
#[derive(Serialize, Deserialize, Debug, Clone)]
1150
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1151
pub struct ClientTlsConfig {
1152
    /// Path to the certificate authority to use to validate the remote.
1153
    ///
1154
    /// Default: None
1155
    #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")]
1156
    pub ca_file: Option<String>,
1157
1158
    /// Path to the certificate file for client authentication.
1159
    ///
1160
    /// Default: None
1161
    #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")]
1162
    pub cert_file: Option<String>,
1163
1164
    /// Path to the private key file for client authentication.
1165
    ///
1166
    /// Default: None
1167
    #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")]
1168
    pub key_file: Option<String>,
1169
1170
    /// If set the client will use the native roots for TLS connections.
1171
    ///
1172
    /// Default: false
1173
    #[serde(default)]
1174
    pub use_native_roots: Option<bool>,
1175
}
1176
1177
#[derive(Serialize, Deserialize, Debug, Clone)]
1178
#[serde(deny_unknown_fields)]
1179
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1180
pub struct GrpcEndpoint {
1181
    /// The endpoint address (i.e. grpc(s)://example.com:443).
1182
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
1183
    pub address: String,
1184
    /// The TLS configuration to use to connect to the endpoint (if grpcs).
1185
    pub tls_config: Option<ClientTlsConfig>,
1186
    /// The maximum concurrency to allow on this endpoint.
1187
    #[serde(
1188
        default,
1189
        deserialize_with = "convert_optional_numeric_with_shellexpand"
1190
    )]
1191
    pub concurrency_limit: Option<usize>,
1192
1193
    /// Timeout for establishing a TCP connection to the endpoint (seconds).
1194
    /// If not set or 0, defaults to 30 seconds.
1195
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
1196
    pub connect_timeout_s: u64,
1197
1198
    /// TCP keepalive interval (seconds). Sends TCP keepalive probes at this
1199
    /// interval to detect dead connections at the OS level.
1200
    /// If not set or 0, defaults to 30 seconds.
1201
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
1202
    pub tcp_keepalive_s: u64,
1203
1204
    /// HTTP/2 keepalive interval (seconds). Sends HTTP/2 PING frames at this
1205
    /// interval to detect dead connections at the application level.
1206
    /// If not set or 0, defaults to 30 seconds.
1207
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
1208
    pub http2_keepalive_interval_s: u64,
1209
1210
    /// HTTP/2 keepalive timeout (seconds). If a PING response is not received
1211
    /// within this duration, the connection is considered dead.
1212
    /// If not set or 0, defaults to 20 seconds.
1213
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
1214
    pub http2_keepalive_timeout_s: u64,
1215
}
1216
1217
#[derive(Serialize, Deserialize, Debug, Clone)]
1218
#[serde(deny_unknown_fields)]
1219
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1220
pub struct GrpcSpec {
1221
    /// Instance name for GRPC calls. Proxy calls will have the `instance_name` changed to this.
1222
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
1223
    pub instance_name: String,
1224
1225
    /// The endpoint of the grpc connection.
1226
    pub endpoints: Vec<GrpcEndpoint>,
1227
1228
    /// The type of the upstream store, this ensures that the correct server calls are made.
1229
    pub store_type: StoreType,
1230
1231
    /// Retry configuration to use when a network request fails.
1232
    #[serde(default)]
1233
    pub retry: Retry,
1234
1235
    /// Limit the number of simultaneous upstream requests to this many.  A
1236
    /// value of zero is treated as unlimited.  If the limit is reached the
1237
    /// request is queued.
1238
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1239
    pub max_concurrent_requests: usize,
1240
1241
    /// The number of connections to make to each specified endpoint to balance
1242
    /// the load over multiple TCP connections.  Default 1.
1243
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1244
    pub connections_per_endpoint: usize,
1245
1246
    /// Maximum time (seconds) allowed for a single RPC request (e.g. a
1247
    /// ByteStream.Write call) before it is cancelled.
1248
    ///
1249
    /// A value of 0 (the default) disables the per-RPC timeout. Dead
1250
    /// connections are still detected by the HTTP/2 and TCP keepalive
1251
    /// mechanisms configured on each endpoint.
1252
    ///
1253
    /// For large uploads (multi-GB), either leave this at 0 or set it
1254
    /// large enough to accommodate the full transfer time.
1255
    ///
1256
    /// Default: 0 (disabled)
1257
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
1258
    pub rpc_timeout_s: u64,
1259
1260
    /// Use legacy `ByteStream` resource name format, omitting the digest
1261
    /// function component from the path.
1262
    ///
1263
    /// Modern `NativeLink` generates resource names like:
1264
    ///   `{instance}/blobs/{digest_function}/{hash}/{size}`
1265
    ///
1266
    /// Older backends (e.g. Buildbarn pre-v0.3) expect the original format:
1267
    ///   `{instance}/blobs/{hash}/{size}`
1268
    ///
1269
    /// Set this to `true` when connecting to such backends to avoid
1270
    /// `InvalidArgument: Unsupported digest function` errors.
1271
    ///
1272
    /// Default: false
1273
    #[serde(default, deserialize_with = "convert_boolean_with_shellexpand")]
1274
    pub use_legacy_resource_names: bool,
1275
1276
    /// Static headers to attach to every outgoing gRPC request sent to this
1277
    /// store's upstream endpoints. Useful for fixed authentication tokens
1278
    /// (e.g. `{"authorization": "Bearer <token>"}`) and other static metadata.
1279
    #[serde(default)]
1280
    pub headers: HashMap<String, String>,
1281
1282
    /// Header names to forward from the incoming client request to every
1283
    /// outgoing upstream request. The header value is taken from the client
1284
    /// request that triggered this store operation. Use this to pass through
1285
    /// dynamic credentials such as JWT tokens sent by build clients.
1286
    ///
1287
    /// Example: `["authorization", "x-custom-token"]`
1288
    ///
1289
    /// `NativeLink` also automatically injects the current OpenTelemetry trace
1290
    /// context (`traceparent` / `tracestate`) into every outgoing request.
1291
    #[serde(default)]
1292
    pub forward_headers: Vec<String>,
1293
}
1294
1295
/// The possible error codes that might occur on an upstream request.
1296
#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq)]
1297
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1298
pub enum ErrorCode {
1299
    Cancelled = 1,
1300
    Unknown = 2,
1301
    InvalidArgument = 3,
1302
    DeadlineExceeded = 4,
1303
    NotFound = 5,
1304
    AlreadyExists = 6,
1305
    PermissionDenied = 7,
1306
    ResourceExhausted = 8,
1307
    FailedPrecondition = 9,
1308
    Aborted = 10,
1309
    OutOfRange = 11,
1310
    Unimplemented = 12,
1311
    Internal = 13,
1312
    Unavailable = 14,
1313
    DataLoss = 15,
1314
    Unauthenticated = 16,
1315
    // Note: This list is duplicated from nativelink-error/lib.rs.
1316
}
1317
1318
#[derive(Serialize, Deserialize, Debug, Clone, Default)]
1319
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1320
pub struct RedisSpec {
1321
    /// The hostname or IP address of the Redis server.
1322
    /// Ex: `["redis://username:password@redis-server-url:6380/99"]`
1323
    /// 99 Represents database ID, 6380 represents the port.
1324
    #[serde(deserialize_with = "convert_vec_string_with_shellexpand")]
1325
    pub addresses: Vec<String>,
1326
1327
    /// DEPRECATED: use `command_timeout_ms`
1328
    /// The response timeout for the Redis connection in seconds.
1329
    ///
1330
    /// Default: 10
1331
    #[serde(default)]
1332
    pub response_timeout_s: u64,
1333
1334
    /// DEPRECATED: use `connection_timeout_ms`
1335
    ///
1336
    /// The connection timeout for the Redis connection in seconds.
1337
    ///
1338
    /// Default: 10
1339
    #[serde(default)]
1340
    pub connection_timeout_s: u64,
1341
1342
    /// An optional and experimental Redis channel to publish write events to.
1343
    ///
1344
    /// If set, every time a write operation is made to a Redis node
1345
    /// then an event will be published to a Redis channel with the given name.
1346
    /// If unset, the writes will still be made,
1347
    /// but the write events will not be published.
1348
    ///
1349
    /// Default: (Empty String / No Channel)
1350
    #[serde(default)]
1351
    pub experimental_pub_sub_channel: Option<String>,
1352
1353
    /// An optional prefix to prepend to all keys in this store.
1354
    ///
1355
    /// Setting this value can make it convenient to query or
1356
    /// organize your data according to the shared prefix.
1357
    ///
1358
    /// Default: (Empty String / No Prefix)
1359
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
1360
    pub key_prefix: String,
1361
1362
    /// Set the mode Redis is operating in.
1363
    ///
1364
    /// Available options are "cluster" for
1365
    /// [cluster mode](https://redis.io/docs/latest/operate/oss_and_stack/reference/cluster-spec/),
1366
    /// "sentinel" for [sentinel mode](https://redis.io/docs/latest/operate/oss_and_stack/management/sentinel/),
1367
    /// or "standard" if Redis is operating in neither cluster nor sentinel mode.
1368
    ///
1369
    /// Default: standard,
1370
    #[serde(default)]
1371
    pub mode: RedisMode,
1372
1373
    /// Deprecated as redis-rs doesn't use it
1374
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1375
    pub broadcast_channel_capacity: usize,
1376
1377
    /// The amount of time in milliseconds until the redis store considers the
1378
    /// command to be timed out. This will trigger a retry of the command and
1379
    /// potentially a reconnection to the redis server.
1380
    ///
1381
    /// Default: 10000 (10 seconds)
1382
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1383
    pub command_timeout_ms: u64,
1384
1385
    /// The amount of time in milliseconds until the redis store considers the
1386
    /// connection to unresponsive. This will trigger a reconnection to the
1387
    /// redis server.
1388
    ///
1389
    /// Default: 3000 (3 seconds)
1390
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1391
    pub connection_timeout_ms: u64,
1392
1393
    /// Per-call ceiling for the `check_health` PING in milliseconds.
1394
    ///
1395
    /// Default: 4000 (4 seconds)
1396
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1397
    pub health_check_timeout_ms: u64,
1398
1399
    /// The amount of data to read from the redis server at a time.
1400
    /// This is used to limit the amount of memory used when reading
1401
    /// large objects from the redis server as well as limiting the
1402
    /// amount of time a single read operation can take.
1403
    ///
1404
    /// IMPORTANT: If this value is too high, the `command_timeout_ms`
1405
    /// might be triggered if the latency or throughput to the redis
1406
    /// server is too low.
1407
    ///
1408
    /// Default: 64KiB
1409
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1410
    pub read_chunk_size: usize,
1411
1412
    /// The number of connections to keep open to the redis server(s).
1413
    ///
1414
    /// Default: 3
1415
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1416
    pub connection_pool_size: usize,
1417
1418
    /// The maximum number of upload chunks to allow per update.
1419
    /// This is used to limit the amount of memory used when uploading
1420
    /// large objects to the redis server. A good rule of thumb is to
1421
    /// think of the data as:
1422
    /// `AVAIL_MEMORY / (read_chunk_size * max_chunk_uploads_per_update) = THORETICAL_MAX_CONCURRENT_UPLOADS`
1423
    /// (note: it is a good idea to divide `AVAIL_MAX_MEMORY` by ~10 to account for other memory usage)
1424
    ///
1425
    /// Default: 10
1426
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1427
    pub max_chunk_uploads_per_update: usize,
1428
1429
    /// The COUNT value passed when scanning keys in Redis.
1430
    /// This is used to hint the amount of work that should be done per response.
1431
    ///
1432
    /// Default: 10000
1433
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1434
    pub scan_count: usize,
1435
1436
    /// Retry configuration to use when a network request fails.
1437
    /// See the `Retry` struct for more information.
1438
    ///
1439
    /// ```txt
1440
    /// Default: Retry {
1441
    ///   max_retries: 0, /* unlimited */
1442
    ///   delay: 0.1, /* 100ms */
1443
    ///   jitter: 0.5, /* 50% */
1444
    ///   retry_on_errors: None, /* not used in redis store */
1445
    /// }
1446
    /// ```
1447
    #[serde(default)]
1448
    pub retry: Retry,
1449
1450
    /// Maximum number of permitted actions to the Redis store at any one time
1451
    /// This stops problems with timeouts due to many, many inflight actions
1452
    /// Default: 500
1453
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1454
    pub max_client_permits: usize,
1455
1456
    /// Maximum number of items returned per cursor for the search indexes
1457
    /// May reduce thundering herd issues with worker provisioner at higher node counts,
1458
    /// Default: 1500
1459
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1460
    pub max_count_per_cursor: u64,
1461
}
1462
1463
#[derive(Debug, Default, Deserialize, Serialize, Clone, Copy, PartialEq, Eq)]
1464
#[serde(rename_all = "snake_case")]
1465
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1466
pub enum RedisMode {
1467
    Cluster,
1468
    Sentinel,
1469
    #[default]
1470
    Standard,
1471
}
1472
1473
#[derive(Clone, Copy, Debug, Default, Deserialize, Serialize)]
1474
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1475
pub struct NoopSpec {}
1476
1477
/// Retry configuration. This configuration is exponential and each iteration
1478
/// a jitter as a percentage is applied of the calculated delay. For example:
1479
/// ```haskell
1480
/// Retry{
1481
///   max_retries: 7,
1482
///   delay: 0.1,
1483
///   jitter: 0.5,
1484
/// }
1485
/// ```
1486
/// will result in:
1487
/// Attempt - Delay
1488
/// 1         0ms
1489
/// 2         75ms - 125ms
1490
/// 3         150ms - 250ms
1491
/// 4         300ms - 500ms
1492
/// 5         600ms - 1s
1493
/// 6         1.2s - 2s
1494
/// 7         2.4s - 4s
1495
/// 8         4.8s - 8s
1496
/// Remember that to get total results is additive, meaning the above results
1497
/// would mean a single request would have a total delay of 9.525s - 15.875s.
1498
#[derive(Serialize, Deserialize, Clone, Debug, Default)]
1499
#[serde(deny_unknown_fields)]
1500
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1501
pub struct Retry {
1502
    /// Maximum number of retries until retrying stops.
1503
    /// Setting this to zero will always attempt 1 time, but not retry.
1504
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1505
    pub max_retries: usize,
1506
1507
    /// Delay in seconds for exponential back off.
1508
    #[serde(default)]
1509
    pub delay: f32,
1510
1511
    /// Amount of jitter to add as a percentage in decimal form. This will
1512
    /// change the formula like:
1513
    /// ```haskell
1514
    /// random(
1515
    ///    (2 ^ {attempt_number}) * {delay} * (1 - (jitter / 2)),
1516
    ///    (2 ^ {attempt_number}) * {delay} * (1 + (jitter / 2)),
1517
    /// )
1518
    /// ```
1519
    #[serde(default)]
1520
    pub jitter: f32,
1521
1522
    /// A list of error codes to retry on, if this is not set then the default
1523
    /// error codes to retry on are used.  These default codes are the most
1524
    /// likely to be non-permanent.
1525
    ///  - `Unknown`
1526
    ///  - `Cancelled`
1527
    ///  - `DeadlineExceeded`
1528
    ///  - `ResourceExhausted`
1529
    ///  - `Aborted`
1530
    ///  - `Internal`
1531
    ///  - `Unavailable`
1532
    ///  - `DataLoss`
1533
    #[serde(default)]
1534
    pub retry_on_errors: Option<Vec<ErrorCode>>,
1535
}
1536
1537
/// Configuration for `ExperimentalMongoDB` store.
1538
#[derive(Serialize, Deserialize, Debug, Clone)]
1539
#[serde(deny_unknown_fields)]
1540
#[cfg_attr(feature = "dev-schema", derive(JsonSchema))]
1541
pub struct ExperimentalMongoSpec {
1542
    /// `ExperimentalMongoDB` connection string.
1543
    /// Example: <mongodb://localhost:27017> or <mongodb+srv://cluster.mongodb.net>
1544
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
1545
    pub connection_string: String,
1546
1547
    /// The database name to use.
1548
    /// Default: "nativelink"
1549
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
1550
    pub database: String,
1551
1552
    /// The collection name for CAS data.
1553
    /// Default: "cas"
1554
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
1555
    pub cas_collection: String,
1556
1557
    /// The collection name for scheduler data.
1558
    /// Default: "scheduler"
1559
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
1560
    pub scheduler_collection: String,
1561
1562
    /// Prefix to prepend to all keys stored in `MongoDB`.
1563
    /// Default: ""
1564
    #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")]
1565
    pub key_prefix: Option<String>,
1566
1567
    /// The maximum amount of data to read from `MongoDB` in a single chunk (in bytes).
1568
    /// Default: 65536 (64KB)
1569
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
1570
    pub read_chunk_size: usize,
1571
1572
    /// Deprecated, unused
1573
    /// Maximum number of concurrent uploads allowed.
1574
    /// Default: 10
1575
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1576
    pub max_concurrent_uploads: usize,
1577
1578
    /// Connection timeout in milliseconds.
1579
    /// Default: 3000
1580
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1581
    pub connection_timeout_ms: u64,
1582
1583
    /// Command timeout in milliseconds.
1584
    /// Default: 10000
1585
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1586
    pub command_timeout_ms: u64,
1587
1588
    /// Enable `MongoDB` change streams for real-time updates.
1589
    /// Required for scheduler subscriptions.
1590
    /// Default: false
1591
    #[serde(default, deserialize_with = "convert_boolean_with_shellexpand")]
1592
    pub enable_change_streams: bool,
1593
1594
    /// Write concern 'w' parameter.
1595
    /// Can be a number (e.g., 1) or string (e.g., "majority").
1596
    /// Default: None (uses `MongoDB` default)
1597
    #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")]
1598
    pub write_concern_w: Option<String>,
1599
1600
    /// Write concern 'j' parameter (journal acknowledgment).
1601
    /// Default: None (uses `MongoDB` default)
1602
    #[serde(default)]
1603
    pub write_concern_j: Option<bool>,
1604
1605
    /// Write concern timeout in milliseconds.
1606
    /// Default: None (uses `MongoDB` default)
1607
    #[serde(
1608
        default,
1609
        deserialize_with = "convert_optional_numeric_with_shellexpand"
1610
    )]
1611
    pub write_concern_timeout_ms: Option<u32>,
1612
1613
    /// Limits the number of requests at any one time
1614
    /// Default: Unlimited
1615
    #[serde(
1616
        default,
1617
        deserialize_with = "convert_optional_numeric_with_shellexpand"
1618
    )]
1619
    pub max_requests: Option<usize>,
1620
}
1621
1622
impl Retry {
1623
15
    pub fn make_jitter_fn(&self) -> Arc<dyn Fn(Duration) -> Duration + Send + Sync> {
1624
15
        if self.jitter == 0f32 {
1625
15
            Arc::new(move |delay: Duration| delay)
1626
        } else {
1627
0
            let local_jitter = self.jitter;
1628
0
            Arc::new(move |delay: Duration| {
1629
0
                delay.mul_f32(local_jitter.mul_add(rand::rng().random::<f32>() - 0.5, 1.))
1630
0
            })
1631
        }
1632
15
    }
1633
}