/build/source/nativelink-util/src/resource_info.rs
Line  | Count  | Source  | 
1  |  | // Copyright 2024 The NativeLink Authors. All rights reserved.  | 
2  |  | //  | 
3  |  | // Licensed under the Functional Source License, Version 1.1, Apache 2.0 Future License (the "License");  | 
4  |  | // you may not use this file except in compliance with the License.  | 
5  |  | // You may obtain a copy of the License at  | 
6  |  | //  | 
7  |  | //    See LICENSE file for details  | 
8  |  | //  | 
9  |  | // Unless required by applicable law or agreed to in writing, software  | 
10  |  | // distributed under the License is distributed on an "AS IS" BASIS,  | 
11  |  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
12  |  | // See the License for the specific language governing permissions and  | 
13  |  | // limitations under the License.  | 
14  |  |  | 
15  |  | use core::convert::AsRef;  | 
16  |  | use std::borrow::Cow;  | 
17  |  |  | 
18  |  | use nativelink_error::{Error, ResultExt, error_if, make_input_err}; | 
19  |  |  | 
20  |  | const ERROR_MSG: &str = concat!(  | 
21  |  |     "Expected resource_name to be of pattern ",  | 
22  |  |     "'{?instance_name/}(?uploads/{uuid}/)blobs/{?/digest_function}{/hash}/{size}{?/optional_metadata}' or ", | 
23  |  |     "'{?instance_name/}(?uploads/{uuid}/)compressed-blobs{?/compressor}{?/digest_function}{/hash}/{size}{?/optional_metadata}'", | 
24  |  | );  | 
25  |  | const COMPRESSORS: [&str; 4] = ["identity", "zstd", "deflate", "brotli"];  | 
26  |  | const DIGEST_FUNCTIONS: [&str; 9] = [  | 
27  |  |     "sha256",  | 
28  |  |     "sha1",  | 
29  |  |     "md5",  | 
30  |  |     "vso",  | 
31  |  |     "sha384",  | 
32  |  |     "sha512",  | 
33  |  |     "murmur3",  | 
34  |  |     "sha256tree",  | 
35  |  |     "blake3",  | 
36  |  | ];  | 
37  |  |  | 
38  |  | // Named struct to make the code easier to read when adding the slash size.  | 
39  |  | const SLASH_SIZE: usize = 1;  | 
40  |  |  | 
41  |  | // Rules are as follows:  | 
42  |  | //  | 
43  |  | // "instance_name" may contain slashes and may contain or equal "uploads", "compressed-blobs" and "blobs".  | 
44  |  | // if is_upload is false:  | 
45  |  | // {instance_name}/               compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata} | 
46  |  | // {instance_name}/               compressed-blobs/{compressor}/{digest_function/}{hash}/{size} | 
47  |  | // {instance_name}/               blobs/                        {digest_function/}{hash}/{size}{/optional_metadata} | 
48  |  | // {instance_name}/               blobs/                        {digest_function/}{hash}/{size} | 
49  |  | //                                compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata} | 
50  |  | //                                compressed-blobs/{compressor}/{digest_function/}{hash}/{size} | 
51  |  | //                                blobs/                        {digest_function/}{hash}/{size}{/optional_metadata} | 
52  |  | //                                blobs/                        {digest_function/}{hash}/{size} | 
53  |  | // {instance_name}/               compressed-blobs/{compressor}/                  {hash}/{size}{/optional_metadata} | 
54  |  | // {instance_name}/               compressed-blobs/{compressor}/                  {hash}/{size} | 
55  |  | // {instance_name}/               blobs/                                          {hash}/{size}{/optional_metadata} | 
56  |  | // {instance_name}/               blobs/                                          {hash}/{size} | 
57  |  | //                                compressed-blobs/{compressor}/                  {hash}/{size}{/optional_metadata} | 
58  |  | //                                compressed-blobs/{compressor}/                  {hash}/{size} | 
59  |  | //  | 
60  |  | //                                blobs/                                          {hash}/{size}{/optional_metadata} | 
61  |  | //                                blobs/                                          {hash}/{size} | 
62  |  | //  | 
63  |  | // if is_upload is true:  | 
64  |  | // {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata} | 
65  |  | // {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size} | 
66  |  | // {instance_name}/uploads/{uuid}/blobs/                        {digest_function/}{hash}/{size}{/optional_metadata} | 
67  |  | // {instance_name}/uploads/{uuid}/blobs/                        {digest_function/}{hash}/{size} | 
68  |  | //                 uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata} | 
69  |  | //                 uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size} | 
70  |  | //                 uploads/{uuid}/blobs/                        {digest_function/}{hash}/{size}{/optional_metadata} | 
71  |  | //                 uploads/{uuid}/blobs/                        {digest_function/}{hash}/{size} | 
72  |  | // {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/                  {hash}/{size}{/optional_metadata} | 
73  |  | // {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/                  {hash}/{size} | 
74  |  | // {instance_name}/uploads/{uuid}/blobs/                                          {hash}/{size}{/optional_metadata} | 
75  |  | // {instance_name}/uploads/{uuid}/blobs/                                          {hash}/{size} | 
76  |  | //                 uploads/{uuid}/compressed-blobs/{compressor}/                  {hash}/{size}{/optional_metadata} | 
77  |  | //                 uploads/{uuid}/compressed-blobs/{compressor}/                  {hash}/{size} | 
78  |  | //                 uploads/{uuid}/blobs/                                          {hash}/{size}{/optional_metadata} | 
79  |  | //                 uploads/{uuid}/blobs/                                          {hash}/{size} | 
80  |  | //  | 
81  |  |  | 
82  |  | // Useful utility struct for converting bazel's (uri-like path) into its parts.  | 
83  |  | #[derive(Debug, Default)]  | 
84  |  | pub struct ResourceInfo<'a> { | 
85  |  |     pub instance_name: Cow<'a, str>,  | 
86  |  |     pub uuid: Option<Cow<'a, str>>,  | 
87  |  |     pub compressor: Option<Cow<'a, str>>,  | 
88  |  |     pub digest_function: Option<Cow<'a, str>>,  | 
89  |  |     pub hash: Cow<'a, str>,  | 
90  |  |     size: Cow<'a, str>,  | 
91  |  |     pub expected_size: usize,  | 
92  |  |     pub optional_metadata: Option<Cow<'a, str>>,  | 
93  |  | }  | 
94  |  |  | 
95  |  | impl<'a> ResourceInfo<'a> { | 
96  | 74  |     pub fn new(resource_name: &'a str, is_upload: bool) -> Result<Self, Error> { | 
97  |  |         // The most amount of slashes there can be to get to "(compressed-)blobs" section is 7.  | 
98  | 74  |         let mut rparts = resource_name.rsplitn(7, '/');  | 
99  | 74  |         let mut output = ResourceInfo::default();  | 
100  | 74  |         let mut end_bytes_processed = 0;  | 
101  | 74  |         let end_state60  = recursive_parse(  | 
102  | 74  |             &mut rparts,  | 
103  | 74  |             &mut output,  | 
104  | 74  |             State::Unknown,  | 
105  | 74  |             &mut end_bytes_processed,  | 
106  |  |         )  | 
107  | 74  |         .err_tip(|| format!("{ERROR_MSG} in {resource_name}"14 ))?14 ;  | 
108  | 1  |         error_if!(  | 
109  | 60  |             end_state != State::OptionalMetadata,   Branch (109:13): [True: 1, False: 59]
   Branch (109:13): [Folded - Ignored]
  | 
110  |  |             "Expected the final state to be OptionalMetadata. Got: {end_state:?} for {resource_name} is_upload: {is_upload}" | 
111  |  |         );  | 
112  |  |  | 
113  |  |         // Slice off the processed parts of `resource_name`.  | 
114  | 59  |         let beginning_part = if end_bytes_processed == resource_name.len() {  Branch (114:33): [True: 8, False: 51]
   Branch (114:33): [Folded - Ignored]
  | 
115  | 8  |             ""  | 
116  |  |         } else { | 
117  | 51  |             &resource_name[..resource_name.len() - end_bytes_processed - SLASH_SIZE]  | 
118  |  |         };  | 
119  | 59  |         if !is_upload {  Branch (119:12): [True: 21, False: 38]
   Branch (119:12): [Folded - Ignored]
  | 
120  | 21  |             output.instance_name = Cow::Borrowed(beginning_part);  | 
121  | 21  |             return Ok(output);  | 
122  | 38  |         }  | 
123  |  |  | 
124  |  |         // If it's an upload, at this point it will have be:  | 
125  |  |         // `{?instance_name}/uploads/{uuid}`. | 
126  |  |         // Remember, `instance_name` can contain slashes and/or special names  | 
127  |  |         // like "blobs" or "uploads".  | 
128  | 38  |         let mut parts = beginning_part.rsplitn(3, '/');  | 
129  | 38  |         output.uuid = Some(Cow::Borrowed(  | 
130  | 38  |             parts  | 
131  | 38  |                 .next()  | 
132  | 38  |                 .err_tip(|| format!("{ERROR_MSG} in {resource_name}"0 ))?0 ,  | 
133  |  |         ));  | 
134  |  |         { | 
135  |  |             // Sanity check that our next item is "uploads".  | 
136  | 38  |             let uploads = parts  | 
137  | 38  |                 .next()  | 
138  | 38  |                 .err_tip(|| format!("{ERROR_MSG} in {resource_name}"0 ))?0 ;  | 
139  | 0  |             error_if!(  | 
140  | 38  |                 uploads != "uploads",   Branch (140:17): [True: 0, False: 38]
   Branch (140:17): [Folded - Ignored]
  | 
141  |  |                 "Expected part to be 'uploads'. Got: {uploads} for {resource_name} is_upload: {is_upload}" | 
142  |  |             );  | 
143  |  |         }  | 
144  |  |  | 
145  |  |         // `instance_name` is optional.  | 
146  | 38  |         if let Some(instance_name30 ) = parts.next() {   Branch (146:16): [True: 30, False: 8]
   Branch (146:16): [Folded - Ignored]
  | 
147  | 30  |             output.instance_name = Cow::Borrowed(instance_name);  | 
148  | 30  |         }8   | 
149  | 38  |         Ok(output)  | 
150  | 74  |     }  | 
151  |  |  | 
152  |  |     /// Returns a new `ResourceInfo` with all fields owned.  | 
153  | 16  |     pub fn to_owned(&self) -> ResourceInfo<'static> { | 
154  |  |         ResourceInfo { | 
155  | 16  |             instance_name: Cow::Owned(self.instance_name.to_string()),  | 
156  | 16  |             uuid: self.uuid.as_ref().map(|uuid| Cow::Owned(uuid.to_string())),  | 
157  | 16  |             compressor: self  | 
158  | 16  |                 .compressor  | 
159  | 16  |                 .as_ref()  | 
160  | 16  |                 .map(|compressor| Cow::Owned(compressor0 .to_string0 ())),  | 
161  | 16  |             digest_function: self  | 
162  | 16  |                 .digest_function  | 
163  | 16  |                 .as_ref()  | 
164  | 16  |                 .map(|digest_function| Cow::Owned(digest_function0 .to_string0 ())),  | 
165  | 16  |             hash: Cow::Owned(self.hash.to_string()),  | 
166  | 16  |             size: Cow::Owned(self.size.to_string()),  | 
167  | 16  |             expected_size: self.expected_size,  | 
168  | 16  |             optional_metadata: self  | 
169  | 16  |                 .optional_metadata  | 
170  | 16  |                 .as_ref()  | 
171  | 16  |                 .map(|optional_metadata| Cow::Owned(optional_metadata0 .to_string0 ())),  | 
172  |  |         }  | 
173  | 16  |     }  | 
174  |  |  | 
175  | 35  |     pub fn to_string(&self, is_upload: bool) -> String { | 
176  |  |         [  | 
177  | 35  |             Some(self.instance_name.as_ref()),  | 
178  | 35  |             is_upload.then_some("uploads"), | 
179  | 35  |             self.uuid.as_ref().map(AsRef::as_ref),  | 
180  |  |             Some(  | 
181  | 35  |                 self.compressor  | 
182  | 35  |                     .as_ref()  | 
183  | 35  |                     .map_or("blobs", |_| "compressed-blobs"), | 
184  |  |             ),  | 
185  | 35  |             self.compressor.as_ref().map(AsRef::as_ref),  | 
186  | 35  |             self.digest_function.as_ref().map(AsRef::as_ref),  | 
187  | 35  |             Some(self.hash.as_ref()),  | 
188  | 35  |             Some(self.size.as_ref()),  | 
189  | 35  |             self.optional_metadata.as_ref().map(AsRef::as_ref),  | 
190  |  |         ]  | 
191  | 35  |         .into_iter()  | 
192  | 35  |         .flatten()  | 
193  | 222  |         .filter35 (|part| !part.is_empty())  | 
194  | 35  |         .collect::<Vec<&str>>()  | 
195  | 35  |         .join("/") | 
196  | 35  |     }  | 
197  |  | }  | 
198  |  |  | 
199  |  | #[derive(Debug, PartialEq)]  | 
200  |  | enum State { | 
201  |  |     Unknown,  | 
202  |  |     Compressor,  | 
203  |  |     DigestFunction,  | 
204  |  |     Hash,  | 
205  |  |     Size,  | 
206  |  |     OptionalMetadata,  | 
207  |  | }  | 
208  |  |  | 
209  |  | // Iterate backwards looking for "(compressed-)blobs", once found, move forward  | 
210  |  | // populating the output struct. This recursive function utilises the stack to  | 
211  |  | // temporarily hold the reference to the previous item reducing the need for  | 
212  |  | // a heap allocation.  | 
213  | 279  | fn recursive_parse<'a>(  | 
214  | 279  |     rparts: &mut impl Iterator<Item = &'a str>,  | 
215  | 279  |     output: &mut ResourceInfo<'a>,  | 
216  | 279  |     mut state: State,  | 
217  | 279  |     bytes_processed: &mut usize,  | 
218  | 279  | ) -> Result<State, Error> { | 
219  | 279  |     let part271  = rparts.next().err_tip(|| "on rparts.next()")?8 ;  | 
220  | 271  |     if state == State::Unknown {  Branch (220:8): [True: 271, False: 0]
   Branch (220:8): [Folded - Ignored]
  | 
221  | 271  |         if part == "blobs" {  Branch (221:12): [True: 47, False: 224]
   Branch (221:12): [Folded - Ignored]
  | 
222  | 47  |             *bytes_processed = part.len() + SLASH_SIZE;  | 
223  | 47  |             return Ok(State::DigestFunction);  | 
224  | 224  |         }  | 
225  | 224  |         if part == "compressed-blobs" {  Branch (225:12): [True: 19, False: 205]
   Branch (225:12): [Folded - Ignored]
  | 
226  | 19  |             *bytes_processed = part.len() + SLASH_SIZE;  | 
227  | 19  |             return Ok(State::Compressor);  | 
228  | 205  |         }  | 
229  | 205  |         state = recursive_parse(rparts, output, state, bytes_processed)?29 ;  | 
230  | 0  |     }  | 
231  |  |  | 
232  |  |     loop { | 
233  | 223  |         match state { | 
234  |  |             State::Unknown => { | 
235  | 0  |                 return Err(make_input_err!(  | 
236  | 0  |                     "Unknown state should never be reached in ResourceInfo::new"  | 
237  | 0  |                 ));  | 
238  |  |             }  | 
239  |  |             State::Compressor => { | 
240  | 19  |                 state = State::DigestFunction;  | 
241  | 19  |                 if COMPRESSORS.contains(&part) {  Branch (241:20): [True: 16, False: 3]
   Branch (241:20): [Folded - Ignored]
  | 
242  | 16  |                     output.compressor = Some(Cow::Borrowed(part));  | 
243  | 16  |                     *bytes_processed += part.len() + SLASH_SIZE;  | 
244  | 16  |                     return Ok(state);  | 
245  | 3  |                 }  | 
246  | 3  |                 return Err(make_input_err!("Expected compressor, got {part}")); | 
247  |  |             }  | 
248  |  |             State::DigestFunction => { | 
249  | 63  |                 state = State::Hash;  | 
250  | 63  |                 if DIGEST_FUNCTIONS.contains(&part) {  Branch (250:20): [True: 16, False: 47]
   Branch (250:20): [Folded - Ignored]
  | 
251  | 16  |                     output.digest_function = Some(Cow::Borrowed(part));  | 
252  | 16  |                     *bytes_processed += part.len() + SLASH_SIZE;  | 
253  | 16  |                     return Ok(state);  | 
254  | 47  |                 }  | 
255  |  |             }  | 
256  |  |             State::Hash => { | 
257  | 63  |                 output.hash = Cow::Borrowed(part);  | 
258  | 63  |                 *bytes_processed += part.len() + SLASH_SIZE;  | 
259  |  |                 // TODO(palfrey) Set the digest_function if it is not set based on the hash size.  | 
260  | 63  |                 return Ok(State::Size);  | 
261  |  |             }  | 
262  |  |             State::Size => { | 
263  | 62  |                 output.size = Cow::Borrowed(part);  | 
264  | 62  |                 output.expected_size = part.parse::<usize>().map_err(|_| {3   | 
265  | 3  |                     make_input_err!(  | 
266  |  |                         "Digest size_bytes was not convertible to usize. Got: {}", | 
267  |  |                         part  | 
268  |  |                     )  | 
269  | 3  |                 })?;  | 
270  | 59  |                 *bytes_processed += part.len(); // Special case {size}, so it does not count one slash. | 
271  | 59  |                 return Ok(State::OptionalMetadata);  | 
272  |  |             }  | 
273  |  |             State::OptionalMetadata => { | 
274  | 16  |                 output.optional_metadata = Some(Cow::Borrowed(part));  | 
275  | 16  |                 *bytes_processed += part.len() + SLASH_SIZE;  | 
276  |  |                 // If we get here, we are done parsing backwards and have successfully parsed  | 
277  |  |                 // everything beyond the "(compressed-)blobs" section.  | 
278  | 16  |                 return Ok(State::OptionalMetadata);  | 
279  |  |             }  | 
280  |  |         }  | 
281  |  |     }  | 
282  | 279  | }  |