/build/source/nativelink-util/src/resource_info.rs
Line | Count | Source |
1 | | // Copyright 2024 The NativeLink Authors. All rights reserved. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | use std::borrow::Cow; |
16 | | use std::convert::AsRef; |
17 | | |
18 | | use nativelink_error::{error_if, make_input_err, Error, ResultExt}; |
19 | | |
20 | | const ERROR_MSG: &str = concat!( |
21 | | "Expected resource_name to be of pattern ", |
22 | | "'{?instance_name/}(?uploads/{uuid}/)blobs/{?/digest_function}{/hash}/{size}{?/optional_metadata}' or ", |
23 | | "'{?instance_name/}(?uploads/{uuid}/)compressed-blobs{?/compressor}{?/digest_function}{/hash}/{size}{?/optional_metadata}'", |
24 | | ); |
25 | | const COMPRESSORS: [&str; 4] = ["identity", "zstd", "deflate", "brotli"]; |
26 | | const DIGEST_FUNCTIONS: [&str; 9] = [ |
27 | | "sha256", |
28 | | "sha1", |
29 | | "md5", |
30 | | "vso", |
31 | | "sha384", |
32 | | "sha512", |
33 | | "murmur3", |
34 | | "sha256tree", |
35 | | "blake3", |
36 | | ]; |
37 | | |
38 | | // Named struct to make the code easier to read when adding the slash size. |
39 | | const SLASH_SIZE: usize = 1; |
40 | | |
41 | | // Rules are as follows: |
42 | | // |
43 | | // "instance_name" may contain slashes and may contain or equal "uploads", "compressed-blobs" and "blobs". |
44 | | // if is_upload is false: |
45 | | // {instance_name}/ compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata} |
46 | | // {instance_name}/ compressed-blobs/{compressor}/{digest_function/}{hash}/{size} |
47 | | // {instance_name}/ blobs/ {digest_function/}{hash}/{size}{/optional_metadata} |
48 | | // {instance_name}/ blobs/ {digest_function/}{hash}/{size} |
49 | | // compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata} |
50 | | // compressed-blobs/{compressor}/{digest_function/}{hash}/{size} |
51 | | // blobs/ {digest_function/}{hash}/{size}{/optional_metadata} |
52 | | // blobs/ {digest_function/}{hash}/{size} |
53 | | // {instance_name}/ compressed-blobs/{compressor}/ {hash}/{size}{/optional_metadata} |
54 | | // {instance_name}/ compressed-blobs/{compressor}/ {hash}/{size} |
55 | | // {instance_name}/ blobs/ {hash}/{size}{/optional_metadata} |
56 | | // {instance_name}/ blobs/ {hash}/{size} |
57 | | // compressed-blobs/{compressor}/ {hash}/{size}{/optional_metadata} |
58 | | // compressed-blobs/{compressor}/ {hash}/{size} |
59 | | // |
60 | | // blobs/ {hash}/{size}{/optional_metadata} |
61 | | // blobs/ {hash}/{size} |
62 | | // |
63 | | // if is_upload is true: |
64 | | // {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata} |
65 | | // {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size} |
66 | | // {instance_name}/uploads/{uuid}/blobs/ {digest_function/}{hash}/{size}{/optional_metadata} |
67 | | // {instance_name}/uploads/{uuid}/blobs/ {digest_function/}{hash}/{size} |
68 | | // uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata} |
69 | | // uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size} |
70 | | // uploads/{uuid}/blobs/ {digest_function/}{hash}/{size}{/optional_metadata} |
71 | | // uploads/{uuid}/blobs/ {digest_function/}{hash}/{size} |
72 | | // {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/ {hash}/{size}{/optional_metadata} |
73 | | // {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/ {hash}/{size} |
74 | | // {instance_name}/uploads/{uuid}/blobs/ {hash}/{size}{/optional_metadata} |
75 | | // {instance_name}/uploads/{uuid}/blobs/ {hash}/{size} |
76 | | // uploads/{uuid}/compressed-blobs/{compressor}/ {hash}/{size}{/optional_metadata} |
77 | | // uploads/{uuid}/compressed-blobs/{compressor}/ {hash}/{size} |
78 | | // uploads/{uuid}/blobs/ {hash}/{size}{/optional_metadata} |
79 | | // uploads/{uuid}/blobs/ {hash}/{size} |
80 | | // |
81 | | |
82 | | // Useful utility struct for converting bazel's (uri-like path) into its parts. |
83 | | #[derive(Debug, Default)] |
84 | | pub struct ResourceInfo<'a> { |
85 | | pub instance_name: Cow<'a, str>, |
86 | | pub uuid: Option<Cow<'a, str>>, |
87 | | pub compressor: Option<Cow<'a, str>>, |
88 | | pub digest_function: Option<Cow<'a, str>>, |
89 | | pub hash: Cow<'a, str>, |
90 | | size: Cow<'a, str>, |
91 | | pub expected_size: usize, |
92 | | pub optional_metadata: Option<Cow<'a, str>>, |
93 | | } |
94 | | |
95 | | impl<'a> ResourceInfo<'a> { |
96 | 73 | pub fn new(resource_name: &'a str, is_upload: bool) -> Result<ResourceInfo<'a>, Error> { |
97 | 73 | // The most amount of slashes there can be to get to "(compressed-)blobs" section is 7. |
98 | 73 | let mut rparts = resource_name.rsplitn(7, '/'); |
99 | 73 | let mut output = ResourceInfo::default(); |
100 | 73 | let mut end_bytes_processed = 0; |
101 | 73 | let end_state59 = recursive_parse( |
102 | 73 | &mut rparts, |
103 | 73 | &mut output, |
104 | 73 | State::Unknown, |
105 | 73 | &mut end_bytes_processed, |
106 | 73 | ) |
107 | 73 | .err_tip(|| format!("{ERROR_MSG} in {resource_name}")14 )?14 ; |
108 | 1 | error_if!( |
109 | 59 | end_state != State::OptionalMetadata, Branch (109:13): [True: 1, False: 58]
Branch (109:13): [Folded - Ignored]
|
110 | | "Expected the final state to be OptionalMetadata. Got: {end_state:?} for {resource_name} is_upload: {is_upload}" |
111 | | ); |
112 | | |
113 | | // Slice off the processed parts of `resource_name`. |
114 | 58 | let beginning_part = if end_bytes_processed == resource_name.len() { Branch (114:33): [True: 8, False: 50]
Branch (114:33): [Folded - Ignored]
|
115 | 8 | "" |
116 | | } else { |
117 | 50 | &resource_name[..resource_name.len() - end_bytes_processed - SLASH_SIZE] |
118 | | }; |
119 | 58 | if !is_upload { Branch (119:12): [True: 21, False: 37]
Branch (119:12): [Folded - Ignored]
|
120 | 21 | output.instance_name = Cow::Borrowed(beginning_part); |
121 | 21 | return Ok(output); |
122 | 37 | } |
123 | 37 | |
124 | 37 | // If it's an upload, at this point it will have be: |
125 | 37 | // `{?instance_name}/uploads/{uuid}`. |
126 | 37 | // Remember, `instance_name` can contain slashes and/or special names |
127 | 37 | // like "blobs" or "uploads". |
128 | 37 | let mut parts = beginning_part.rsplitn(3, '/'); |
129 | 37 | output.uuid = Some(Cow::Borrowed( |
130 | 37 | parts |
131 | 37 | .next() |
132 | 37 | .err_tip(|| format!("{ERROR_MSG} in {resource_name}")0 )?0 , |
133 | | )); |
134 | | { |
135 | | // Sanity check that our next item is "uploads". |
136 | 37 | let uploads = parts |
137 | 37 | .next() |
138 | 37 | .err_tip(|| format!("{ERROR_MSG} in {resource_name}")0 )?0 ; |
139 | 0 | error_if!( |
140 | 37 | uploads != "uploads", Branch (140:17): [True: 0, False: 37]
Branch (140:17): [Folded - Ignored]
|
141 | | "Expected part to be 'uploads'. Got: {uploads} for {resource_name} is_upload: {is_upload}" |
142 | | ); |
143 | | } |
144 | | |
145 | | // `instance_name` is optional. |
146 | 37 | if let Some(instance_name29 ) = parts.next() { Branch (146:16): [True: 29, False: 8]
Branch (146:16): [Folded - Ignored]
|
147 | 29 | output.instance_name = Cow::Borrowed(instance_name); |
148 | 29 | }8 |
149 | 37 | Ok(output) |
150 | 73 | } |
151 | | |
152 | | /// Returns a new `ResourceInfo` with all fields owned. |
153 | 15 | pub fn to_owned(&self) -> ResourceInfo<'static> { |
154 | 15 | ResourceInfo { |
155 | 15 | instance_name: Cow::Owned(self.instance_name.to_string()), |
156 | 15 | uuid: self.uuid.as_ref().map(|uuid| Cow::Owned(uuid.to_string())), |
157 | 15 | compressor: self |
158 | 15 | .compressor |
159 | 15 | .as_ref() |
160 | 15 | .map(|compressor| Cow::Owned(compressor.to_string())0 ), |
161 | 15 | digest_function: self |
162 | 15 | .digest_function |
163 | 15 | .as_ref() |
164 | 15 | .map(|digest_function| Cow::Owned(digest_function.to_string())0 ), |
165 | 15 | hash: Cow::Owned(self.hash.to_string()), |
166 | 15 | size: Cow::Owned(self.size.to_string()), |
167 | 15 | expected_size: self.expected_size, |
168 | 15 | optional_metadata: self |
169 | 15 | .optional_metadata |
170 | 15 | .as_ref() |
171 | 15 | .map(|optional_metadata| Cow::Owned(optional_metadata.to_string())0 ), |
172 | 15 | } |
173 | 15 | } |
174 | | |
175 | 35 | pub fn to_string(&self, is_upload: bool) -> String { |
176 | 35 | [ |
177 | 35 | Some(self.instance_name.as_ref()), |
178 | 35 | is_upload.then_some("uploads"), |
179 | 35 | self.uuid.as_ref().map(AsRef::as_ref), |
180 | 35 | Some( |
181 | 35 | self.compressor |
182 | 35 | .as_ref() |
183 | 35 | .map_or("blobs", |_| "compressed-blobs"15 ), |
184 | 35 | ), |
185 | 35 | self.compressor.as_ref().map(AsRef::as_ref), |
186 | 35 | self.digest_function.as_ref().map(AsRef::as_ref), |
187 | 35 | Some(self.hash.as_ref()), |
188 | 35 | Some(self.size.as_ref()), |
189 | 35 | self.optional_metadata.as_ref().map(AsRef::as_ref), |
190 | 35 | ] |
191 | 35 | .into_iter() |
192 | 35 | .flatten() |
193 | 222 | .filter(|part| !part.is_empty()) |
194 | 35 | .collect::<Vec<&str>>() |
195 | 35 | .join("/") |
196 | 35 | } |
197 | | } |
198 | | |
199 | | #[derive(Debug, PartialEq)] |
200 | | enum State { |
201 | | Unknown, |
202 | | Compressor, |
203 | | DigestFunction, |
204 | | Hash, |
205 | | Size, |
206 | | OptionalMetadata, |
207 | | } |
208 | | |
209 | | // Iterate backwards looking for "(compressed-)blobs", once found, move forward |
210 | | // populating the output struct. This recursive function utilises the stack to |
211 | | // temporarily hold the reference to the previous item reducing the need for |
212 | | // a heap allocation. |
213 | 276 | fn recursive_parse<'a>( |
214 | 276 | rparts: &mut impl Iterator<Item = &'a str>, |
215 | 276 | output: &mut ResourceInfo<'a>, |
216 | 276 | mut state: State, |
217 | 276 | bytes_processed: &mut usize, |
218 | 276 | ) -> Result<State, Error> { |
219 | 276 | let part268 = rparts.next().err_tip(|| "on rparts.next()"8 )?8 ; |
220 | 268 | if state == State::Unknown { Branch (220:8): [True: 268, False: 0]
Branch (220:8): [Folded - Ignored]
|
221 | 268 | if part == "blobs" { Branch (221:12): [True: 46, False: 222]
Branch (221:12): [Folded - Ignored]
|
222 | 46 | *bytes_processed = part.len() + SLASH_SIZE; |
223 | 46 | return Ok(State::DigestFunction); |
224 | 222 | } |
225 | 222 | if part == "compressed-blobs" { Branch (225:12): [True: 19, False: 203]
Branch (225:12): [Folded - Ignored]
|
226 | 19 | *bytes_processed = part.len() + SLASH_SIZE; |
227 | 19 | return Ok(State::Compressor); |
228 | 203 | } |
229 | 203 | state = recursive_parse(rparts, output, state, bytes_processed)?29 ; |
230 | 0 | } |
231 | | |
232 | | loop { |
233 | 220 | match state { |
234 | | State::Unknown => { |
235 | 0 | return Err(make_input_err!( |
236 | 0 | "Unknown state should never be reached in ResourceInfo::new" |
237 | 0 | )) |
238 | | } |
239 | | State::Compressor => { |
240 | 19 | state = State::DigestFunction; |
241 | 19 | if COMPRESSORS.contains(&part) { Branch (241:20): [True: 16, False: 3]
Branch (241:20): [Folded - Ignored]
|
242 | 16 | output.compressor = Some(Cow::Borrowed(part)); |
243 | 16 | *bytes_processed += part.len() + SLASH_SIZE; |
244 | 16 | return Ok(state); |
245 | 3 | } |
246 | 3 | return Err(make_input_err!("Expected compressor, got {part}")); |
247 | | } |
248 | | State::DigestFunction => { |
249 | 62 | state = State::Hash; |
250 | 62 | if DIGEST_FUNCTIONS.contains(&part) { Branch (250:20): [True: 16, False: 46]
Branch (250:20): [Folded - Ignored]
|
251 | 16 | output.digest_function = Some(Cow::Borrowed(part)); |
252 | 16 | *bytes_processed += part.len() + SLASH_SIZE; |
253 | 16 | return Ok(state); |
254 | 46 | } |
255 | 46 | continue; |
256 | | } |
257 | | State::Hash => { |
258 | 62 | output.hash = Cow::Borrowed(part); |
259 | 62 | *bytes_processed += part.len() + SLASH_SIZE; |
260 | 62 | // TODO(allada) Set the digest_function if it is not set based on the hash size. |
261 | 62 | return Ok(State::Size); |
262 | | } |
263 | | State::Size => { |
264 | 61 | output.size = Cow::Borrowed(part); |
265 | 61 | output.expected_size = part.parse::<usize>().map_err(|_| { |
266 | 3 | make_input_err!( |
267 | 3 | "Digest size_bytes was not convertible to usize. Got: {}", |
268 | 3 | part |
269 | 3 | ) |
270 | 61 | })?3 ; |
271 | 58 | *bytes_processed += part.len(); // Special case {size}, so it does not count one slash. |
272 | 58 | return Ok(State::OptionalMetadata); |
273 | | } |
274 | | State::OptionalMetadata => { |
275 | 16 | output.optional_metadata = Some(Cow::Borrowed(part)); |
276 | 16 | *bytes_processed += part.len() + SLASH_SIZE; |
277 | 16 | // If we get here, we are done parsing backwards and have successfully parsed |
278 | 16 | // everything beyond the "(compressed-)blobs" section. |
279 | 16 | return Ok(State::OptionalMetadata); |
280 | | } |
281 | | } |
282 | | } |
283 | 276 | } |