Coverage Report

Created: 2025-05-30 16:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/build/source/nativelink-config/src/stores.rs
Line
Count
Source
1
// Copyright 2024 The NativeLink Authors. All rights reserved.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//    http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
use serde::{Deserialize, Serialize};
16
17
use crate::serde_utils::{
18
    convert_data_size_with_shellexpand, convert_duration_with_shellexpand,
19
    convert_numeric_with_shellexpand, convert_optional_string_with_shellexpand,
20
    convert_string_with_shellexpand, convert_vec_string_with_shellexpand,
21
};
22
23
/// Name of the store. This type will be used when referencing a store
24
/// in the `CasConfig::stores`'s map key.
25
pub type StoreRefName = String;
26
27
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
28
#[serde(rename_all = "snake_case")]
29
pub enum ConfigDigestHashFunction {
30
    /// Use the sha256 hash function.
31
    /// <https://en.wikipedia.org/wiki/SHA-2>
32
    Sha256,
33
34
    /// Use the blake3 hash function.
35
    /// <https://en.wikipedia.org/wiki/BLAKE_(hash_function)>
36
    Blake3,
37
}
38
39
#[derive(Serialize, Deserialize, Debug, Clone)]
40
#[serde(rename_all = "snake_case")]
41
pub enum StoreSpec {
42
    /// Memory store will store all data in a hashmap in memory.
43
    ///
44
    /// **Example JSON Config:**
45
    /// ```json
46
    /// "memory": {
47
    ///     "eviction_policy": {
48
    ///       // 10mb.
49
    ///       "max_bytes": 10000000,
50
    ///     }
51
    ///   }
52
    /// }
53
    /// ```
54
    ///
55
    Memory(MemorySpec),
56
57
    /// A generic blob store that will store files on the cloud
58
    /// provider. This configuration will never delete files, so you are
59
    /// responsible for purging old files in other ways.
60
    /// It supports the following backends:
61
    ///
62
    /// 1. **Amazon S3:**
63
    ///    S3 store will use Amazon's S3 service as a backend to store
64
    ///    the files. This configuration can be used to share files
65
    ///    across multiple instances. Uses system certificates for TLS
66
    ///    verification via `rustls-platform-verifier`.
67
    ///
68
    ///   **Example JSON Config:**
69
    ///   ```json
70
    ///   "experimental_cloud_object_store": {
71
    ///     "provider": "aws",
72
    ///     "region": "eu-north-1",
73
    ///     "bucket": "crossplane-bucket-af79aeca9",
74
    ///     "key_prefix": "test-prefix-index/",
75
    ///     "retry": {
76
    ///       "max_retries": 6,
77
    ///       "delay": 0.3,
78
    ///       "jitter": 0.5
79
    ///     },
80
    ///     "multipart_max_concurrent_uploads": 10
81
    ///   }
82
    ///   ```
83
    ///
84
    /// 2. **Google Cloud Storage:**
85
    ///    GCS store uses Google's GCS service as a backend to store
86
    ///    the files. This configuration can be used to share files
87
    ///    across multiple instances.
88
    ///
89
    ///   **Example JSON Config:**
90
    ///   ```json
91
    ///   "experimental_cloud_object_store": {
92
    ///     "provider": "gcs",
93
    ///     "bucket": "test-bucket",
94
    ///     "key_prefix": "test-prefix-index/",
95
    ///     "retry": {
96
    ///       "max_retries": 6,
97
    ///       "delay": 0.3,
98
    ///       "jitter": 0.5
99
    ///     },
100
    ///     "multipart_max_concurrent_uploads": 10
101
    ///   }
102
    ///   ```
103
    ///
104
    ExperimentalCloudObjectStore(ExperimentalCloudObjectSpec),
105
106
    /// Verify store is used to apply verifications to an underlying
107
    /// store implementation. It is strongly encouraged to validate
108
    /// as much data as you can before accepting data from a client,
109
    /// failing to do so may cause the data in the store to be
110
    /// populated with invalid data causing all kinds of problems.
111
    ///
112
    /// The suggested configuration is to have the CAS validate the
113
    /// hash and size and the AC validate nothing.
114
    ///
115
    /// **Example JSON Config:**
116
    /// ```json
117
    /// "verify": {
118
    ///   "memory": {
119
    ///     "eviction_policy": {
120
    ///       "max_bytes": 500000000 // 500mb.
121
    ///     }
122
    ///   },
123
    ///   "verify_size": true,
124
    ///   "hash_verification_function": "sha256"
125
    /// }
126
    /// ```
127
    ///
128
    Verify(Box<VerifySpec>),
129
130
    /// Completeness checking store verifies if the
131
    /// output files & folders exist in the CAS before forwarding
132
    /// the request to the underlying store.
133
    /// Note: This store should only be used on AC stores.
134
    ///
135
    /// **Example JSON Config:**
136
    /// ```json
137
    /// "completeness_checking": {
138
    ///     "backend": {
139
    ///       "filesystem": {
140
    ///         "content_path": "~/.cache/nativelink/content_path-ac",
141
    ///         "temp_path": "~/.cache/nativelink/tmp_path-ac",
142
    ///         "eviction_policy": {
143
    ///           // 500mb.
144
    ///           "max_bytes": 500000000,
145
    ///         }
146
    ///       }
147
    ///     },
148
    ///     "cas_store": {
149
    ///       "ref_store": {
150
    ///         "name": "CAS_MAIN_STORE"
151
    ///       }
152
    ///     }
153
    ///   }
154
    /// ```
155
    ///
156
    CompletenessChecking(Box<CompletenessCheckingSpec>),
157
158
    /// A compression store that will compress the data inbound and
159
    /// outbound. There will be a non-trivial cost to compress and
160
    /// decompress the data, but in many cases if the final store is
161
    /// a store that requires network transport and/or storage space
162
    /// is a concern it is often faster and more efficient to use this
163
    /// store before those stores.
164
    ///
165
    /// **Example JSON Config:**
166
    /// ```json
167
    /// "compression": {
168
    ///     "compression_algorithm": {
169
    ///       "lz4": {}
170
    ///     },
171
    ///     "backend": {
172
    ///       "filesystem": {
173
    ///         "content_path": "/tmp/nativelink/data/content_path-cas",
174
    ///         "temp_path": "/tmp/nativelink/data/tmp_path-cas",
175
    ///         "eviction_policy": {
176
    ///           // 2gb.
177
    ///           "max_bytes": 2000000000,
178
    ///         }
179
    ///       }
180
    ///     }
181
    ///   }
182
    /// ```
183
    ///
184
    Compression(Box<CompressionSpec>),
185
186
    /// A dedup store will take the inputs and run a rolling hash
187
    /// algorithm on them to slice the input into smaller parts then
188
    /// run a sha256 algorithm on the slice and if the object doesn't
189
    /// already exist, upload the slice to the `content_store` using
190
    /// a new digest of just the slice. Once all parts exist, an
191
    /// Action-Cache-like digest will be built and uploaded to the
192
    /// `index_store` which will contain a reference to each
193
    /// chunk/digest of the uploaded file. Downloading a request will
194
    /// first grab the index from the `index_store`, and forward the
195
    /// download content of each chunk as if it were one file.
196
    ///
197
    /// This store is exceptionally good when the following conditions
198
    /// are met:
199
    /// * Content is mostly the same (inserts, updates, deletes are ok)
200
    /// * Content is not compressed or encrypted
201
    /// * Uploading or downloading from `content_store` is the bottleneck.
202
    ///
203
    /// Note: This store pairs well when used with `CompressionSpec` as
204
    /// the `content_store`, but never put `DedupSpec` as the backend of
205
    /// `CompressionSpec` as it will negate all the gains.
206
    ///
207
    /// Note: When running `.has()` on this store, it will only check
208
    /// to see if the entry exists in the `index_store` and not check
209
    /// if the individual chunks exist in the `content_store`.
210
    ///
211
    /// **Example JSON Config:**
212
    /// ```json
213
    /// "dedup": {
214
    ///     "index_store": {
215
    ///       "memory_store": {
216
    ///         "max_size": 1000000000, // 1GB
217
    ///         "eviction_policy": "LeastRecentlyUsed"
218
    ///       }
219
    ///     },
220
    ///     "content_store": {
221
    ///       "compression": {
222
    ///         "compression_algorithm": {
223
    ///           "lz4": {}
224
    ///         },
225
    ///         "backend": {
226
    ///           "fast_slow": {
227
    ///             "fast": {
228
    ///               "memory_store": {
229
    ///                 "max_size": 500000000, // 500MB
230
    ///                 "eviction_policy": "LeastRecentlyUsed"
231
    ///               }
232
    ///             },
233
    ///             "slow": {
234
    ///               "filesystem": {
235
    ///                 "content_path": "/tmp/nativelink/data/content_path-content",
236
    ///                 "temp_path": "/tmp/nativelink/data/tmp_path-content",
237
    ///                 "eviction_policy": {
238
    ///                   "max_bytes": 2000000000 // 2gb.
239
    ///                 }
240
    ///               }
241
    ///             }
242
    ///           }
243
    ///         }
244
    ///       }
245
    ///     }
246
    ///   }
247
    /// ```
248
    ///
249
    Dedup(Box<DedupSpec>),
250
251
    /// Existence store will wrap around another store and cache calls
252
    /// to has so that subsequent `has_with_results` calls will be
253
    /// faster. This is useful for cases when you have a store that
254
    /// is slow to respond to has calls.
255
    /// Note: This store should only be used on CAS stores.
256
    ///
257
    /// **Example JSON Config:**
258
    /// ```json
259
    /// "existence_cache": {
260
    ///     "backend": {
261
    ///       "memory": {
262
    ///         "eviction_policy": {
263
    ///           // 500mb.
264
    ///           "max_bytes": 500000000,
265
    ///         }
266
    ///       }
267
    ///     },
268
    ///     "cas_store": {
269
    ///       "ref_store": {
270
    ///         "name": "CAS_MAIN_STORE"
271
    ///       }
272
    ///     }
273
    ///   }
274
    /// ```
275
    ///
276
    ExistenceCache(Box<ExistenceCacheSpec>),
277
278
    /// `FastSlow` store will first try to fetch the data from the `fast`
279
    /// store and then if it does not exist try the `slow` store.
280
    /// When the object does exist in the `slow` store, it will copy
281
    /// the data to the `fast` store while returning the data.
282
    /// This store should be thought of as a store that "buffers"
283
    /// the data to the `fast` store.
284
    /// On uploads it will mirror data to both `fast` and `slow` stores.
285
    ///
286
    /// WARNING: If you need data to always exist in the `slow` store
287
    /// for something like remote execution, be careful because this
288
    /// store will never check to see if the objects exist in the
289
    /// `slow` store if it exists in the `fast` store (ie: it assumes
290
    /// that if an object exists `fast` store it will exist in `slow`
291
    /// store).
292
    ///
293
    /// ***Example JSON Config:***
294
    /// ```json
295
    /// "fast_slow": {
296
    ///     "fast": {
297
    ///       "filesystem": {
298
    ///         "content_path": "/tmp/nativelink/data/content_path-index",
299
    ///         "temp_path": "/tmp/nativelink/data/tmp_path-index",
300
    ///         "eviction_policy": {
301
    ///           // 500mb.
302
    ///           "max_bytes": 500000000,
303
    ///         }
304
    ///       }
305
    ///     },
306
    ///     "slow": {
307
    ///       "filesystem": {
308
    ///         "content_path": "/tmp/nativelink/data/content_path-index",
309
    ///         "temp_path": "/tmp/nativelink/data/tmp_path-index",
310
    ///         "eviction_policy": {
311
    ///           // 500mb.
312
    ///           "max_bytes": 500000000,
313
    ///         }
314
    ///       }
315
    ///     }
316
    ///   }
317
    /// ```
318
    ///
319
    FastSlow(Box<FastSlowSpec>),
320
321
    /// Shards the data to multiple stores. This is useful for cases
322
    /// when you want to distribute the load across multiple stores.
323
    /// The digest hash is used to determine which store to send the
324
    /// data to.
325
    ///
326
    /// **Example JSON Config:**
327
    /// ```json
328
    /// "shard": {
329
    ///     "stores": [
330
    ///         "memory": {
331
    ///             "eviction_policy": {
332
    ///                 // 10mb.
333
    ///                 "max_bytes": 10000000
334
    ///             },
335
    ///             "weight": 1
336
    ///         }
337
    ///     ]
338
    /// }
339
    /// ```
340
    ///
341
    Shard(ShardSpec),
342
343
    /// Stores the data on the filesystem. This store is designed for
344
    /// local persistent storage. Restarts of this program should restore
345
    /// the previous state, meaning anything uploaded will be persistent
346
    /// as long as the filesystem integrity holds.
347
    ///
348
    /// **Example JSON Config:**
349
    /// ```json
350
    /// "filesystem": {
351
    ///     "content_path": "/tmp/nativelink/data-worker-test/content_path-cas",
352
    ///     "temp_path": "/tmp/nativelink/data-worker-test/tmp_path-cas",
353
    ///     "eviction_policy": {
354
    ///       // 10gb.
355
    ///       "max_bytes": 10000000000,
356
    ///     }
357
    /// }
358
    /// ```
359
    ///
360
    Filesystem(FilesystemSpec),
361
362
    /// Store used to reference a store in the root store manager.
363
    /// This is useful for cases when you want to share a store in different
364
    /// nested stores. Example, you may want to share the same memory store
365
    /// used for the action cache, but use a `FastSlowSpec` and have the fast
366
    /// store also share the memory store for efficiency.
367
    ///
368
    /// **Example JSON Config:**
369
    /// ```json
370
    /// "ref_store": {
371
    ///     "name": "FS_CONTENT_STORE"
372
    /// }
373
    /// ```
374
    ///
375
    RefStore(RefSpec),
376
377
    /// Uses the size field of the digest to separate which store to send the
378
    /// data. This is useful for cases when you'd like to put small objects
379
    /// in one store and large objects in another store. This should only be
380
    /// used if the size field is the real size of the content, in other
381
    /// words, don't use on AC (Action Cache) stores. Any store where you can
382
    /// safely use `VerifySpec.verify_size = true`, this store should be safe
383
    /// to use (ie: CAS stores).
384
    ///
385
    /// **Example JSON Config:**
386
    /// ```json
387
    /// "size_partitioning": {
388
    ///     "size": 134217728, // 128mib.
389
    ///     "lower_store": {
390
    ///       "memory": {
391
    ///         "eviction_policy": {
392
    ///           "max_bytes": "${NATIVELINK_CAS_MEMORY_CONTENT_LIMIT:-100000000}"
393
    ///         }
394
    ///       }
395
    ///     },
396
    ///     "upper_store": {
397
    ///       /// This store discards data larger than 128mib.
398
    ///       "noop": {}
399
    ///     }
400
    ///   }
401
    /// ```
402
    ///
403
    SizePartitioning(Box<SizePartitioningSpec>),
404
405
    /// This store will pass-through calls to another GRPC store. This store
406
    /// is not designed to be used as a sub-store of another store, but it
407
    /// does satisfy the interface and will likely work.
408
    ///
409
    /// One major GOTCHA is that some stores use a special function on this
410
    /// store to get the size of the underlying object, which is only reliable
411
    /// when this store is serving the a CAS store, not an AC store. If using
412
    /// this store directly without being a child of any store there are no
413
    /// side effects and is the most efficient way to use it.
414
    ///
415
    /// **Example JSON Config:**
416
    /// ```json
417
    /// "grpc": {
418
    ///     "instance_name": "main",
419
    ///     "endpoints": [
420
    ///       {"address": "grpc://${CAS_ENDPOINT:-127.0.0.1}:50051"}
421
    ///     ],
422
    ///     "store_type": "ac"
423
    ///   }
424
    /// ```
425
    ///
426
    Grpc(GrpcSpec),
427
428
    /// Stores data in any stores compatible with Redis APIs.
429
    ///
430
    /// Pairs well with `SizePartitioning` and/or `FastSlow` stores.
431
    /// Ideal for accepting small object sizes as most redis store
432
    /// services have a max file upload of between 256Mb-512Mb.
433
    ///
434
    /// **Example JSON Config:**
435
    /// ```json
436
    /// "redis_store": {
437
    ///     "addresses": [
438
    ///         "redis://127.0.0.1:6379/",
439
    ///     ]
440
    /// }
441
    /// ```
442
    ///
443
    RedisStore(RedisSpec),
444
445
    /// Noop store is a store that sends streams into the void and all data
446
    /// retrieval will return 404 (`NotFound`). This can be useful for cases
447
    /// where you may need to partition your data and part of your data needs
448
    /// to be discarded.
449
    ///
450
    /// **Example JSON Config:**
451
    /// ```json
452
    /// "noop": {}
453
    /// ```
454
    ///
455
    Noop(NoopSpec),
456
}
457
458
/// Configuration for an individual shard of the store.
459
#[derive(Serialize, Deserialize, Debug, Clone)]
460
#[serde(deny_unknown_fields)]
461
pub struct ShardConfig {
462
    /// Store to shard the data to.
463
    pub store: StoreSpec,
464
465
    /// The weight of the store. This is used to determine how much data
466
    /// should be sent to the store. The actual percentage is the sum of
467
    /// all the store's weights divided by the individual store's weight.
468
    ///
469
    /// Default: 1
470
    pub weight: Option<u32>,
471
}
472
473
#[derive(Serialize, Deserialize, Debug, Clone)]
474
#[serde(deny_unknown_fields)]
475
pub struct ShardSpec {
476
    /// Stores to shard the data to.
477
    pub stores: Vec<ShardConfig>,
478
}
479
480
#[derive(Serialize, Deserialize, Debug, Clone)]
481
#[serde(deny_unknown_fields)]
482
pub struct SizePartitioningSpec {
483
    /// Size to partition the data on.
484
    #[serde(deserialize_with = "convert_data_size_with_shellexpand")]
485
    pub size: u64,
486
487
    /// Store to send data when object is < (less than) size.
488
    pub lower_store: StoreSpec,
489
490
    /// Store to send data when object is >= (less than eq) size.
491
    pub upper_store: StoreSpec,
492
}
493
494
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
495
#[serde(deny_unknown_fields)]
496
pub struct RefSpec {
497
    /// Name of the store under the root "stores" config object.
498
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
499
    pub name: String,
500
}
501
502
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
503
#[serde(deny_unknown_fields)]
504
pub struct FilesystemSpec {
505
    /// Path on the system where to store the actual content. This is where
506
    /// the bulk of the data will be placed.
507
    /// On service bootup this folder will be scanned and all files will be
508
    /// added to the cache. In the event one of the files doesn't match the
509
    /// criteria, the file will be deleted.
510
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
511
    pub content_path: String,
512
513
    /// A temporary location of where files that are being uploaded or
514
    /// deleted will be placed while the content cannot be guaranteed to be
515
    /// accurate. This location must be on the same block device as
516
    /// `content_path` so atomic moves can happen (ie: move without copy).
517
    /// All files in this folder will be deleted on every startup.
518
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
519
    pub temp_path: String,
520
521
    /// Buffer size to use when reading files. Generally this should be left
522
    /// to the default value except for testing.
523
    /// Default: 32k.
524
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
525
    pub read_buffer_size: u32,
526
527
    /// Policy used to evict items out of the store. Failure to set this
528
    /// value will cause items to never be removed from the store causing
529
    /// infinite memory usage.
530
    pub eviction_policy: Option<EvictionPolicy>,
531
532
    /// The block size of the filesystem for the running machine
533
    /// value is used to determine an entry's actual size on disk consumed
534
    /// For a 4KB block size filesystem, a 1B file actually consumes 4KB
535
    /// Default: 4096
536
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
537
    pub block_size: u64,
538
}
539
540
#[derive(Serialize, Deserialize, Debug, Clone)]
541
#[serde(deny_unknown_fields)]
542
pub struct FastSlowSpec {
543
    /// Fast store that will be attempted to be contacted before reaching
544
    /// out to the `slow` store.
545
    pub fast: StoreSpec,
546
547
    /// If the object does not exist in the `fast` store it will try to
548
    /// get it from this store.
549
    pub slow: StoreSpec,
550
}
551
552
#[derive(Serialize, Deserialize, Debug, Default, Clone, Copy)]
553
#[serde(deny_unknown_fields)]
554
pub struct MemorySpec {
555
    /// Policy used to evict items out of the store. Failure to set this
556
    /// value will cause items to never be removed from the store causing
557
    /// infinite memory usage.
558
    pub eviction_policy: Option<EvictionPolicy>,
559
}
560
561
#[derive(Serialize, Deserialize, Debug, Clone)]
562
#[serde(deny_unknown_fields)]
563
pub struct DedupSpec {
564
    /// Store used to store the index of each dedup slice. This store
565
    /// should generally be fast and small.
566
    pub index_store: StoreSpec,
567
568
    /// The store where the individual chunks will be uploaded. This
569
    /// store should generally be the slower & larger store.
570
    pub content_store: StoreSpec,
571
572
    /// Minimum size that a chunk will be when slicing up the content.
573
    /// Note: This setting can be increased to improve performance
574
    /// because it will actually not check this number of bytes when
575
    /// deciding where to partition the data.
576
    ///
577
    /// Default: 65536 (64k)
578
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
579
    pub min_size: u32,
580
581
    /// A best-effort attempt will be made to keep the average size
582
    /// of the chunks to this number. It is not a guarantee, but a
583
    /// slight attempt will be made.
584
    ///
585
    /// This value will also be about the threshold used to determine
586
    /// if we should even attempt to dedup the entry or just forward
587
    /// it directly to the `content_store` without an index. The actual
588
    /// value will be about `normal_size * 1.3` due to implementation
589
    /// details.
590
    ///
591
    /// Default: 262144 (256k)
592
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
593
    pub normal_size: u32,
594
595
    /// Maximum size a chunk is allowed to be.
596
    ///
597
    /// Default: 524288 (512k)
598
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
599
    pub max_size: u32,
600
601
    /// Due to implementation detail, we want to prefer to download
602
    /// the first chunks of the file so we can stream the content
603
    /// out and free up some of our buffers. This configuration
604
    /// will be used to to restrict the number of concurrent chunk
605
    /// downloads at a time per `get()` request.
606
    ///
607
    /// This setting will also affect how much memory might be used
608
    /// per `get()` request. Estimated worst case memory per `get()`
609
    /// request is: `max_concurrent_fetch_per_get * max_size`.
610
    ///
611
    /// Default: 10
612
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
613
    pub max_concurrent_fetch_per_get: u32,
614
}
615
616
#[derive(Serialize, Deserialize, Debug, Clone)]
617
#[serde(deny_unknown_fields)]
618
pub struct ExistenceCacheSpec {
619
    /// The underlying store wrap around. All content will first flow
620
    /// through self before forwarding to backend. In the event there
621
    /// is an error detected in self, the connection to the backend
622
    /// will be terminated, and early termination should always cause
623
    /// updates to fail on the backend.
624
    pub backend: StoreSpec,
625
626
    /// Policy used to evict items out of the store. Failure to set this
627
    /// value will cause items to never be removed from the store causing
628
    /// infinite memory usage.
629
    pub eviction_policy: Option<EvictionPolicy>,
630
}
631
632
#[derive(Serialize, Deserialize, Debug, Clone)]
633
#[serde(deny_unknown_fields)]
634
pub struct VerifySpec {
635
    /// The underlying store wrap around. All content will first flow
636
    /// through self before forwarding to backend. In the event there
637
    /// is an error detected in self, the connection to the backend
638
    /// will be terminated, and early termination should always cause
639
    /// updates to fail on the backend.
640
    pub backend: StoreSpec,
641
642
    /// If set the store will verify the size of the data before accepting
643
    /// an upload of data.
644
    ///
645
    /// This should be set to false for AC, but true for CAS stores.
646
    #[serde(default)]
647
    pub verify_size: bool,
648
649
    /// If the data should be hashed and verify that the key matches the
650
    /// computed hash. The hash function is automatically determined based
651
    /// request and if not set will use the global default.
652
    ///
653
    /// This should be set to None for AC, but hashing function like `sha256` for CAS stores.
654
    #[serde(default)]
655
    pub verify_hash: bool,
656
}
657
658
#[derive(Serialize, Deserialize, Debug, Clone)]
659
#[serde(deny_unknown_fields)]
660
pub struct CompletenessCheckingSpec {
661
    /// The underlying store that will have it's results validated before sending to client.
662
    pub backend: StoreSpec,
663
664
    /// When a request is made, the results are decoded and all output digests/files are verified
665
    /// to exist in this CAS store before returning success.
666
    pub cas_store: StoreSpec,
667
}
668
669
#[derive(Serialize, Deserialize, Debug, Default, PartialEq, Eq, Clone, Copy)]
670
#[serde(deny_unknown_fields)]
671
pub struct Lz4Config {
672
    /// Size of the blocks to compress.
673
    /// Higher values require more ram, but might yield slightly better
674
    /// compression ratios.
675
    ///
676
    /// Default: 65536 (64k).
677
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
678
    pub block_size: u32,
679
680
    /// Maximum size allowed to attempt to deserialize data into.
681
    /// This is needed because the `block_size` is embedded into the data
682
    /// so if there was a bad actor, they could upload an extremely large
683
    /// `block_size`'ed entry and we'd allocate a large amount of memory
684
    /// when retrieving the data. To prevent this from happening, we
685
    /// allow you to specify the maximum that we'll attempt deserialize.
686
    ///
687
    /// Default: value in `block_size`.
688
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
689
    pub max_decode_block_size: u32,
690
}
691
692
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone, Copy)]
693
#[serde(rename_all = "snake_case")]
694
pub enum CompressionAlgorithm {
695
    /// LZ4 compression algorithm is extremely fast for compression and
696
    /// decompression, however does not perform very well in compression
697
    /// ratio. In most cases build artifacts are highly compressible, however
698
    /// lz4 is quite good at aborting early if the data is not deemed very
699
    /// compressible.
700
    ///
701
    /// see: <https://lz4.github.io/lz4/>
702
    Lz4(Lz4Config),
703
}
704
705
#[derive(Serialize, Deserialize, Debug, Clone)]
706
#[serde(deny_unknown_fields)]
707
pub struct CompressionSpec {
708
    /// The underlying store wrap around. All content will first flow
709
    /// through self before forwarding to backend. In the event there
710
    /// is an error detected in self, the connection to the backend
711
    /// will be terminated, and early termination should always cause
712
    /// updates to fail on the backend.
713
    pub backend: StoreSpec,
714
715
    /// The compression algorithm to use.
716
    pub compression_algorithm: CompressionAlgorithm,
717
}
718
719
/// Eviction policy always works on LRU (Least Recently Used). Any time an entry
720
/// is touched it updates the timestamp. Inserts and updates will execute the
721
/// eviction policy removing any expired entries and/or the oldest entries
722
/// until the store size becomes smaller than `max_bytes`.
723
#[derive(Serialize, Deserialize, Debug, Default, Clone, Copy)]
724
#[serde(deny_unknown_fields)]
725
pub struct EvictionPolicy {
726
    /// Maximum number of bytes before eviction takes place.
727
    /// Default: 0. Zero means never evict based on size.
728
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
729
    pub max_bytes: usize,
730
731
    /// When eviction starts based on hitting `max_bytes`, continue until
732
    /// `max_bytes - evict_bytes` is met to create a low watermark.  This stops
733
    /// operations from thrashing when the store is close to the limit.
734
    /// Default: 0
735
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
736
    pub evict_bytes: usize,
737
738
    /// Maximum number of seconds for an entry to live since it was last
739
    /// accessed before it is evicted.
740
    /// Default: 0. Zero means never evict based on time.
741
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
742
    pub max_seconds: u32,
743
744
    /// Maximum size of the store before an eviction takes place.
745
    /// Default: 0. Zero means never evict based on count.
746
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
747
    pub max_count: u64,
748
}
749
750
#[derive(Serialize, Deserialize, Debug, Clone)]
751
#[serde(tag = "provider", rename_all = "snake_case")]
752
pub enum ExperimentalCloudObjectSpec {
753
    Aws(ExperimentalAwsSpec),
754
    Gcs(ExperimentalGcsSpec),
755
}
756
757
impl Default for ExperimentalCloudObjectSpec {
758
0
    fn default() -> Self {
759
0
        Self::Aws(ExperimentalAwsSpec::default())
760
0
    }
761
}
762
763
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
764
#[serde(deny_unknown_fields)]
765
pub struct ExperimentalAwsSpec {
766
    /// S3 region. Usually us-east-1, us-west-2, af-south-1, exc...
767
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
768
    pub region: String,
769
770
    /// Bucket name to use as the backend.
771
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
772
    pub bucket: String,
773
774
    /// Common retry and upload configuration
775
    #[serde(flatten)]
776
    pub common: CommonObjectSpec,
777
}
778
779
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
780
#[serde(deny_unknown_fields)]
781
pub struct ExperimentalGcsSpec {
782
    /// Bucket name to use as the backend.
783
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
784
    pub bucket: String,
785
786
    /// Chunk size for resumable uploads.
787
    ///
788
    /// Default: 2MB
789
    pub resumable_chunk_size: Option<usize>,
790
791
    /// Common retry and upload configuration
792
    #[serde(flatten)]
793
    pub common: CommonObjectSpec,
794
}
795
796
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
797
pub struct CommonObjectSpec {
798
    /// If you wish to prefix the location in the bucket. If None, no prefix will be used.
799
    #[serde(default)]
800
    pub key_prefix: Option<String>,
801
802
    /// Retry configuration to use when a network request fails.
803
    #[serde(default)]
804
    pub retry: Retry,
805
806
    /// If the number of seconds since the `last_modified` time of the object
807
    /// is greater than this value, the object will not be considered
808
    /// "existing". This allows for external tools to delete objects that
809
    /// have not been uploaded in a long time. If a client receives a `NotFound`
810
    /// the client should re-upload the object.
811
    ///
812
    /// There should be sufficient buffer time between how long the expiration
813
    /// configuration of the external tool is and this value. Keeping items
814
    /// around for a few days is generally a good idea.
815
    ///
816
    /// Default: 0. Zero means never consider an object expired.
817
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
818
    pub consider_expired_after_s: u32,
819
820
    /// The maximum buffer size to retain in case of a retryable error
821
    /// during upload. Setting this to zero will disable upload buffering;
822
    /// this means that in the event of a failure during upload, the entire
823
    /// upload will be aborted and the client will likely receive an error.
824
    ///
825
    /// Default: 5MB.
826
    pub max_retry_buffer_per_request: Option<usize>,
827
828
    /// Maximum number of concurrent `UploadPart` requests per `MultipartUpload`.
829
    ///
830
    /// Default: 10.
831
    pub multipart_max_concurrent_uploads: Option<usize>,
832
833
    /// Allow unencrypted HTTP connections. Only use this for local testing.
834
    ///
835
    /// Default: false
836
    #[serde(default)]
837
    pub insecure_allow_http: bool,
838
839
    /// Disable http/2 connections and only use http/1.1. Default client
840
    /// configuration will have http/1.1 and http/2 enabled for connection
841
    /// schemes. Http/2 should be disabled if environments have poor support
842
    /// or performance related to http/2. Safe to keep default unless
843
    /// underlying network environment, S3, or GCS API servers specify otherwise.
844
    ///
845
    /// Default: false
846
    #[serde(default)]
847
    pub disable_http2: bool,
848
}
849
850
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
851
#[serde(rename_all = "snake_case")]
852
pub enum StoreType {
853
    /// The store is content addressable storage.
854
    Cas,
855
    /// The store is an action cache.
856
    Ac,
857
}
858
859
#[derive(Serialize, Deserialize, Debug, Clone)]
860
pub struct ClientTlsConfig {
861
    /// Path to the certificate authority to use to validate the remote.
862
    ///
863
    /// Default: None
864
    #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")]
865
    pub ca_file: Option<String>,
866
867
    /// Path to the certificate file for client authentication.
868
    ///
869
    /// Default: None
870
    #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")]
871
    pub cert_file: Option<String>,
872
873
    /// Path to the private key file for client authentication.
874
    ///
875
    /// Default: None
876
    #[serde(default, deserialize_with = "convert_optional_string_with_shellexpand")]
877
    pub key_file: Option<String>,
878
879
    /// If set the client will use the native roots for TLS connections.
880
    ///
881
    /// Default: false
882
    #[serde(default)]
883
    pub use_native_roots: Option<bool>,
884
}
885
886
#[derive(Serialize, Deserialize, Debug, Clone)]
887
#[serde(deny_unknown_fields)]
888
pub struct GrpcEndpoint {
889
    /// The endpoint address (i.e. grpc(s)://example.com:443).
890
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
891
    pub address: String,
892
    /// The TLS configuration to use to connect to the endpoint (if grpcs).
893
    pub tls_config: Option<ClientTlsConfig>,
894
    /// The maximum concurrency to allow on this endpoint.
895
    pub concurrency_limit: Option<usize>,
896
}
897
898
#[derive(Serialize, Deserialize, Debug, Clone)]
899
#[serde(deny_unknown_fields)]
900
pub struct GrpcSpec {
901
    /// Instance name for GRPC calls. Proxy calls will have the `instance_name` changed to this.
902
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
903
    pub instance_name: String,
904
905
    /// The endpoint of the grpc connection.
906
    pub endpoints: Vec<GrpcEndpoint>,
907
908
    /// The type of the upstream store, this ensures that the correct server calls are made.
909
    pub store_type: StoreType,
910
911
    /// Retry configuration to use when a network request fails.
912
    #[serde(default)]
913
    pub retry: Retry,
914
915
    /// Limit the number of simultaneous upstream requests to this many.  A
916
    /// value of zero is treated as unlimited.  If the limit is reached the
917
    /// request is queued.
918
    #[serde(default)]
919
    pub max_concurrent_requests: usize,
920
921
    /// The number of connections to make to each specified endpoint to balance
922
    /// the load over multiple TCP connections.  Default 1.
923
    #[serde(default)]
924
    pub connections_per_endpoint: usize,
925
}
926
927
/// The possible error codes that might occur on an upstream request.
928
#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq)]
929
pub enum ErrorCode {
930
    Cancelled = 1,
931
    Unknown = 2,
932
    InvalidArgument = 3,
933
    DeadlineExceeded = 4,
934
    NotFound = 5,
935
    AlreadyExists = 6,
936
    PermissionDenied = 7,
937
    ResourceExhausted = 8,
938
    FailedPrecondition = 9,
939
    Aborted = 10,
940
    OutOfRange = 11,
941
    Unimplemented = 12,
942
    Internal = 13,
943
    Unavailable = 14,
944
    DataLoss = 15,
945
    Unauthenticated = 16,
946
    // Note: This list is duplicated from nativelink-error/lib.rs.
947
}
948
949
#[derive(Serialize, Deserialize, Debug, Clone)]
950
pub struct RedisSpec {
951
    /// The hostname or IP address of the Redis server.
952
    /// Ex: `["redis://username:password@redis-server-url:6380/99"]`
953
    /// 99 Represents database ID, 6380 represents the port.
954
    #[serde(deserialize_with = "convert_vec_string_with_shellexpand")]
955
    pub addresses: Vec<String>,
956
957
    /// The response timeout for the Redis connection in seconds.
958
    ///
959
    /// Default: 10
960
    #[serde(default)]
961
    pub response_timeout_s: u64,
962
963
    /// The connection timeout for the Redis connection in seconds.
964
    ///
965
    /// Default: 10
966
    #[serde(default)]
967
    pub connection_timeout_s: u64,
968
969
    /// An optional and experimental Redis channel to publish write events to.
970
    ///
971
    /// If set, every time a write operation is made to a Redis node
972
    /// then an event will be published to a Redis channel with the given name.
973
    /// If unset, the writes will still be made,
974
    /// but the write events will not be published.
975
    ///
976
    /// Default: (Empty String / No Channel)
977
    #[serde(default)]
978
    pub experimental_pub_sub_channel: Option<String>,
979
980
    /// An optional prefix to prepend to all keys in this store.
981
    ///
982
    /// Setting this value can make it convenient to query or
983
    /// organize your data according to the shared prefix.
984
    ///
985
    /// Default: (Empty String / No Prefix)
986
    #[serde(default)]
987
    pub key_prefix: String,
988
989
    /// Set the mode Redis is operating in.
990
    ///
991
    /// Available options are "cluster" for
992
    /// [cluster mode](https://redis.io/docs/latest/operate/oss_and_stack/reference/cluster-spec/),
993
    /// "sentinel" for [sentinel mode](https://redis.io/docs/latest/operate/oss_and_stack/management/sentinel/),
994
    /// or "standard" if Redis is operating in neither cluster nor sentinel mode.
995
    ///
996
    /// Default: standard,
997
    #[serde(default)]
998
    pub mode: RedisMode,
999
1000
    /// When using pubsub interface, this is the maximum number of items to keep
1001
    /// queued up before dropping old items.
1002
    ///
1003
    /// Default: 4096
1004
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1005
    pub broadcast_channel_capacity: usize,
1006
1007
    /// The amount of time in milliseconds until the redis store considers the
1008
    /// command to be timed out. This will trigger a retry of the command and
1009
    /// potentially a reconnection to the redis server.
1010
    ///
1011
    /// Default: 10000 (10 seconds)
1012
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1013
    pub command_timeout_ms: u64,
1014
1015
    /// The amount of time in milliseconds until the redis store considers the
1016
    /// connection to unresponsive. This will trigger a reconnection to the
1017
    /// redis server.
1018
    ///
1019
    /// Default: 3000 (3 seconds)
1020
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1021
    pub connection_timeout_ms: u64,
1022
1023
    /// The amount of data to read from the redis server at a time.
1024
    /// This is used to limit the amount of memory used when reading
1025
    /// large objects from the redis server as well as limiting the
1026
    /// amount of time a single read operation can take.
1027
    ///
1028
    /// IMPORTANT: If this value is too high, the `command_timeout_ms`
1029
    /// might be triggered if the latency or throughput to the redis
1030
    /// server is too low.
1031
    ///
1032
    /// Default: 64KiB
1033
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1034
    pub read_chunk_size: usize,
1035
1036
    /// The number of connections to keep open to the redis server(s).
1037
    ///
1038
    /// Default: 3
1039
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1040
    pub connection_pool_size: usize,
1041
1042
    /// The maximum number of upload chunks to allow per update.
1043
    /// This is used to limit the amount of memory used when uploading
1044
    /// large objects to the redis server. A good rule of thumb is to
1045
    /// think of the data as:
1046
    /// `AVAIL_MEMORY / (read_chunk_size * max_chunk_uploads_per_update) = THORETICAL_MAX_CONCURRENT_UPLOADS`
1047
    /// (note: it is a good idea to divide `AVAIL_MAX_MEMORY` by ~10 to account for other memory usage)
1048
    ///
1049
    /// Default: 10
1050
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1051
    pub max_chunk_uploads_per_update: usize,
1052
1053
    /// The COUNT value passed when scanning keys in Redis.
1054
    /// This is used to hint the amount of work that should be done per response.
1055
    ///
1056
    /// Default: 10000
1057
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1058
    pub scan_count: u32,
1059
1060
    /// Retry configuration to use when a network request fails.
1061
    /// See the `Retry` struct for more information.
1062
    ///
1063
    /// ```txt
1064
    /// Default: Retry {
1065
    ///   max_retries: 0, /* unlimited */
1066
    ///   delay: 0.1, /* 100ms */
1067
    ///   jitter: 0.5, /* 50% */
1068
    ///   retry_on_errors: None, /* not used in redis store */
1069
    /// }
1070
    /// ```
1071
    #[serde(default)]
1072
    pub retry: Retry,
1073
}
1074
1075
#[derive(Debug, Default, Deserialize, Serialize, Clone, Copy, PartialEq, Eq)]
1076
#[serde(rename_all = "snake_case")]
1077
pub enum RedisMode {
1078
    Cluster,
1079
    Sentinel,
1080
    #[default]
1081
    Standard,
1082
}
1083
1084
#[derive(Clone, Copy, Debug, Default, Deserialize, Serialize)]
1085
pub struct NoopSpec {}
1086
1087
/// Retry configuration. This configuration is exponential and each iteration
1088
/// a jitter as a percentage is applied of the calculated delay. For example:
1089
/// ```haskell
1090
/// Retry{
1091
///   max_retries: 7,
1092
///   delay: 0.1,
1093
///   jitter: 0.5,
1094
/// }
1095
/// ```
1096
/// will result in:
1097
/// Attempt - Delay
1098
/// 1         0ms
1099
/// 2         75ms - 125ms
1100
/// 3         150ms - 250ms
1101
/// 4         300ms - 500ms
1102
/// 5         600ms - 1s
1103
/// 6         1.2s - 2s
1104
/// 7         2.4s - 4s
1105
/// 8         4.8s - 8s
1106
/// Remember that to get total results is additive, meaning the above results
1107
/// would mean a single request would have a total delay of 9.525s - 15.875s.
1108
#[derive(Serialize, Deserialize, Clone, Debug, Default)]
1109
#[serde(deny_unknown_fields)]
1110
pub struct Retry {
1111
    /// Maximum number of retries until retrying stops.
1112
    /// Setting this to zero will always attempt 1 time, but not retry.
1113
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1114
    pub max_retries: usize,
1115
1116
    /// Delay in seconds for exponential back off.
1117
    #[serde(default)]
1118
    pub delay: f32,
1119
1120
    /// Amount of jitter to add as a percentage in decimal form. This will
1121
    /// change the formula like:
1122
    /// ```haskell
1123
    /// random(
1124
    ///    (2 ^ {attempt_number}) * {delay} * (1 - (jitter / 2)),
1125
    ///    (2 ^ {attempt_number}) * {delay} * (1 + (jitter / 2)),
1126
    /// )
1127
    /// ```
1128
    #[serde(default)]
1129
    pub jitter: f32,
1130
1131
    /// A list of error codes to retry on, if this is not set then the default
1132
    /// error codes to retry on are used.  These default codes are the most
1133
    /// likely to be non-permanent.
1134
    ///  - `Unknown`
1135
    ///  - `Cancelled`
1136
    ///  - `DeadlineExceeded`
1137
    ///  - `ResourceExhausted`
1138
    ///  - `Aborted`
1139
    ///  - `Internal`
1140
    ///  - `Unavailable`
1141
    ///  - `DataLoss`
1142
    #[serde(default)]
1143
    pub retry_on_errors: Option<Vec<ErrorCode>>,
1144
}