/build/source/nativelink-util/src/resource_info.rs
Line | Count | Source |
1 | | // Copyright 2024 The NativeLink Authors. All rights reserved. |
2 | | // |
3 | | // Licensed under the Functional Source License, Version 1.1, Apache 2.0 Future License (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // See LICENSE file for details |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | use core::convert::AsRef; |
16 | | use std::borrow::Cow; |
17 | | |
18 | | use nativelink_error::{Error, ResultExt, error_if, make_input_err}; |
19 | | use tonic::Code; |
20 | | |
21 | | const ERROR_MSG: &str = concat!( |
22 | | "Expected resource_name to be of pattern ", |
23 | | "'{?instance_name/}(?uploads/{uuid}/)blobs/{?/digest_function}{/hash}/{size}{?/optional_metadata}' or ", |
24 | | "'{?instance_name/}(?uploads/{uuid}/)compressed-blobs{?/compressor}{?/digest_function}{/hash}/{size}{?/optional_metadata}'", |
25 | | ); |
26 | | const COMPRESSORS: [&str; 4] = ["identity", "zstd", "deflate", "brotli"]; |
27 | | const DIGEST_FUNCTIONS: [&str; 9] = [ |
28 | | "sha256", |
29 | | "sha1", |
30 | | "md5", |
31 | | "vso", |
32 | | "sha384", |
33 | | "sha512", |
34 | | "murmur3", |
35 | | "sha256tree", |
36 | | "blake3", |
37 | | ]; |
38 | | |
39 | | // Named struct to make the code easier to read when adding the slash size. |
40 | | const SLASH_SIZE: usize = 1; |
41 | | |
42 | | // Rules are as follows: |
43 | | // |
44 | | // "instance_name" may contain slashes and may contain or equal "uploads", "compressed-blobs" and "blobs". |
45 | | // if is_upload is false: |
46 | | // {instance_name}/ compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata} |
47 | | // {instance_name}/ compressed-blobs/{compressor}/{digest_function/}{hash}/{size} |
48 | | // {instance_name}/ blobs/ {digest_function/}{hash}/{size}{/optional_metadata} |
49 | | // {instance_name}/ blobs/ {digest_function/}{hash}/{size} |
50 | | // compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata} |
51 | | // compressed-blobs/{compressor}/{digest_function/}{hash}/{size} |
52 | | // blobs/ {digest_function/}{hash}/{size}{/optional_metadata} |
53 | | // blobs/ {digest_function/}{hash}/{size} |
54 | | // {instance_name}/ compressed-blobs/{compressor}/ {hash}/{size}{/optional_metadata} |
55 | | // {instance_name}/ compressed-blobs/{compressor}/ {hash}/{size} |
56 | | // {instance_name}/ blobs/ {hash}/{size}{/optional_metadata} |
57 | | // {instance_name}/ blobs/ {hash}/{size} |
58 | | // compressed-blobs/{compressor}/ {hash}/{size}{/optional_metadata} |
59 | | // compressed-blobs/{compressor}/ {hash}/{size} |
60 | | // |
61 | | // blobs/ {hash}/{size}{/optional_metadata} |
62 | | // blobs/ {hash}/{size} |
63 | | // |
64 | | // if is_upload is true: |
65 | | // {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata} |
66 | | // {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size} |
67 | | // {instance_name}/uploads/{uuid}/blobs/ {digest_function/}{hash}/{size}{/optional_metadata} |
68 | | // {instance_name}/uploads/{uuid}/blobs/ {digest_function/}{hash}/{size} |
69 | | // uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata} |
70 | | // uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size} |
71 | | // uploads/{uuid}/blobs/ {digest_function/}{hash}/{size}{/optional_metadata} |
72 | | // uploads/{uuid}/blobs/ {digest_function/}{hash}/{size} |
73 | | // {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/ {hash}/{size}{/optional_metadata} |
74 | | // {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/ {hash}/{size} |
75 | | // {instance_name}/uploads/{uuid}/blobs/ {hash}/{size}{/optional_metadata} |
76 | | // {instance_name}/uploads/{uuid}/blobs/ {hash}/{size} |
77 | | // uploads/{uuid}/compressed-blobs/{compressor}/ {hash}/{size}{/optional_metadata} |
78 | | // uploads/{uuid}/compressed-blobs/{compressor}/ {hash}/{size} |
79 | | // uploads/{uuid}/blobs/ {hash}/{size}{/optional_metadata} |
80 | | // uploads/{uuid}/blobs/ {hash}/{size} |
81 | | // |
82 | | |
83 | | // Useful utility struct for converting bazel's (uri-like path) into its parts. |
84 | | #[derive(Debug, Default)] |
85 | | pub struct ResourceInfo<'a> { |
86 | | pub instance_name: Cow<'a, str>, |
87 | | pub uuid: Option<Cow<'a, str>>, |
88 | | pub compressor: Option<Cow<'a, str>>, |
89 | | pub digest_function: Option<Cow<'a, str>>, |
90 | | pub hash: Cow<'a, str>, |
91 | | size: Cow<'a, str>, |
92 | | pub expected_size: usize, |
93 | | pub optional_metadata: Option<Cow<'a, str>>, |
94 | | } |
95 | | |
96 | | impl<'a> ResourceInfo<'a> { |
97 | 77 | pub fn new(resource_name: &'a str, is_upload: bool) -> Result<Self, Error> { |
98 | | // The most amount of slashes there can be to get to "(compressed-)blobs" section is 7. |
99 | 77 | let mut rparts = resource_name.rsplitn(7, '/'); |
100 | 77 | let mut output = ResourceInfo::default(); |
101 | 77 | let mut end_bytes_processed = 0; |
102 | 77 | let end_state63 = recursive_parse( |
103 | 77 | &mut rparts, |
104 | 77 | &mut output, |
105 | 77 | State::Unknown, |
106 | 77 | &mut end_bytes_processed, |
107 | | ) |
108 | 77 | .err_tip(|| format!14 ("{ERROR_MSG} in {resource_name}"))?14 ; |
109 | 1 | error_if!( |
110 | 63 | end_state != State::OptionalMetadata, |
111 | | "Expected the final state to be OptionalMetadata. Got: {end_state:?} for {resource_name} is_upload: {is_upload}" |
112 | | ); |
113 | | |
114 | | // Slice off the processed parts of `resource_name`. |
115 | 62 | let beginning_part = if end_bytes_processed == resource_name.len() { |
116 | 8 | "" |
117 | | } else { |
118 | 54 | &resource_name[..resource_name.len() - end_bytes_processed - SLASH_SIZE] |
119 | | }; |
120 | 62 | if !is_upload { |
121 | 21 | output.instance_name = Cow::Borrowed(beginning_part); |
122 | 21 | return Ok(output); |
123 | 41 | } |
124 | | |
125 | | // If it's an upload, at this point it will have be: |
126 | | // `{?instance_name}/uploads/{uuid}`. |
127 | | // Remember, `instance_name` can contain slashes and/or special names |
128 | | // like "blobs" or "uploads". |
129 | 41 | let mut parts = beginning_part.rsplitn(3, '/'); |
130 | 41 | output.uuid = Some(Cow::Borrowed( |
131 | 41 | parts |
132 | 41 | .next() |
133 | 41 | .err_tip(|| format!0 ("{ERROR_MSG} in {resource_name}"))?0 , |
134 | | )); |
135 | | { |
136 | | // Sanity check that our next item is "uploads". |
137 | 41 | let uploads = parts |
138 | 41 | .next() |
139 | 41 | .err_tip(|| format!0 ("{ERROR_MSG} in {resource_name}"))?0 ; |
140 | 0 | error_if!( |
141 | 41 | uploads != "uploads", |
142 | | "Expected part to be 'uploads'. Got: {uploads} for {resource_name} is_upload: {is_upload}" |
143 | | ); |
144 | | } |
145 | | |
146 | | // `instance_name` is optional. |
147 | 41 | if let Some(instance_name33 ) = parts.next() { |
148 | 33 | output.instance_name = Cow::Borrowed(instance_name); |
149 | 33 | }8 |
150 | 41 | Ok(output) |
151 | 77 | } |
152 | | |
153 | | /// Returns a new `ResourceInfo` with all fields owned. |
154 | 16 | pub fn to_owned(&self) -> ResourceInfo<'static> { |
155 | | ResourceInfo { |
156 | 16 | instance_name: Cow::Owned(self.instance_name.to_string()), |
157 | 16 | uuid: self.uuid.as_ref().map(|uuid| Cow::Owned(uuid.to_string())), |
158 | 16 | compressor: self |
159 | 16 | .compressor |
160 | 16 | .as_ref() |
161 | 16 | .map(|compressor| Cow::Owned(compressor0 .to_string0 ())), |
162 | 16 | digest_function: self |
163 | 16 | .digest_function |
164 | 16 | .as_ref() |
165 | 16 | .map(|digest_function| Cow::Owned(digest_function0 .to_string0 ())), |
166 | 16 | hash: Cow::Owned(self.hash.to_string()), |
167 | 16 | size: Cow::Owned(self.size.to_string()), |
168 | 16 | expected_size: self.expected_size, |
169 | 16 | optional_metadata: self |
170 | 16 | .optional_metadata |
171 | 16 | .as_ref() |
172 | 16 | .map(|optional_metadata| Cow::Owned(optional_metadata0 .to_string0 ())), |
173 | | } |
174 | 16 | } |
175 | | |
176 | 35 | pub fn to_string(&self, is_upload: bool) -> String { |
177 | | [ |
178 | 35 | Some(self.instance_name.as_ref()), |
179 | 35 | is_upload.then_some("uploads"), |
180 | 35 | self.uuid.as_ref().map(AsRef::as_ref), |
181 | | Some( |
182 | 35 | self.compressor |
183 | 35 | .as_ref() |
184 | 35 | .map_or("blobs", |_| "compressed-blobs"), |
185 | | ), |
186 | 35 | self.compressor.as_ref().map(AsRef::as_ref), |
187 | 35 | self.digest_function.as_ref().map(AsRef::as_ref), |
188 | 35 | Some(self.hash.as_ref()), |
189 | 35 | Some(self.size.as_ref()), |
190 | 35 | self.optional_metadata.as_ref().map(AsRef::as_ref), |
191 | | ] |
192 | 35 | .into_iter() |
193 | 35 | .flatten() |
194 | 222 | .filter35 (|part| !part.is_empty()) |
195 | 35 | .collect::<Vec<&str>>() |
196 | 35 | .join("/") |
197 | 35 | } |
198 | | } |
199 | | |
200 | | #[derive(Debug, PartialEq)] |
201 | | enum State { |
202 | | Unknown, |
203 | | Compressor, |
204 | | DigestFunction, |
205 | | Hash, |
206 | | Size, |
207 | | OptionalMetadata, |
208 | | } |
209 | | |
210 | | // Iterate backwards looking for "(compressed-)blobs", once found, move forward |
211 | | // populating the output struct. This recursive function utilises the stack to |
212 | | // temporarily hold the reference to the previous item reducing the need for |
213 | | // a heap allocation. |
214 | 288 | fn recursive_parse<'a>( |
215 | 288 | rparts: &mut impl Iterator<Item = &'a str>, |
216 | 288 | output: &mut ResourceInfo<'a>, |
217 | 288 | mut state: State, |
218 | 288 | bytes_processed: &mut usize, |
219 | 288 | ) -> Result<State, Error> { |
220 | 288 | let part280 = rparts.next().err_tip(|| "on rparts.next()")?8 ; |
221 | 280 | if state == State::Unknown { |
222 | 280 | if part == "blobs" { |
223 | 50 | *bytes_processed = part.len() + SLASH_SIZE; |
224 | 50 | return Ok(State::DigestFunction); |
225 | 230 | } |
226 | 230 | if part == "compressed-blobs" { |
227 | 19 | *bytes_processed = part.len() + SLASH_SIZE; |
228 | 19 | return Ok(State::Compressor); |
229 | 211 | } |
230 | 211 | state = recursive_parse(rparts, output, state, bytes_processed)?29 ; |
231 | 0 | } |
232 | | |
233 | | loop { |
234 | 232 | match state { |
235 | | State::Unknown => { |
236 | 0 | return Err(make_input_err!( |
237 | 0 | "Unknown state should never be reached in ResourceInfo::new" |
238 | 0 | )); |
239 | | } |
240 | | State::Compressor => { |
241 | 19 | state = State::DigestFunction; |
242 | 19 | if COMPRESSORS.contains(&part) { |
243 | 16 | output.compressor = Some(Cow::Borrowed(part)); |
244 | 16 | *bytes_processed += part.len() + SLASH_SIZE; |
245 | 16 | return Ok(state); |
246 | 3 | } |
247 | 3 | return Err(make_input_err!("Expected compressor, got {part}")); |
248 | | } |
249 | | State::DigestFunction => { |
250 | 66 | state = State::Hash; |
251 | 66 | if DIGEST_FUNCTIONS.contains(&part) { |
252 | 16 | output.digest_function = Some(Cow::Borrowed(part)); |
253 | 16 | *bytes_processed += part.len() + SLASH_SIZE; |
254 | 16 | return Ok(state); |
255 | 50 | } |
256 | | } |
257 | | State::Hash => { |
258 | 66 | output.hash = Cow::Borrowed(part); |
259 | 66 | *bytes_processed += part.len() + SLASH_SIZE; |
260 | | // TODO(palfrey) Set the digest_function if it is not set based on the hash size. |
261 | 66 | return Ok(State::Size); |
262 | | } |
263 | | State::Size => { |
264 | 65 | output.size = Cow::Borrowed(part); |
265 | 65 | output.expected_size = part.parse::<usize>().map_err(|err| {3 |
266 | 3 | Error::from_std_err(Code::InvalidArgument, &err).append(format!( |
267 | | "Digest size_bytes was not convertible to usize. Got: {part}", |
268 | | )) |
269 | 3 | })?; |
270 | 62 | *bytes_processed += part.len(); // Special case {size}, so it does not count one slash. |
271 | 62 | return Ok(State::OptionalMetadata); |
272 | | } |
273 | | State::OptionalMetadata => { |
274 | 16 | output.optional_metadata = Some(Cow::Borrowed(part)); |
275 | 16 | *bytes_processed += part.len() + SLASH_SIZE; |
276 | | // If we get here, we are done parsing backwards and have successfully parsed |
277 | | // everything beyond the "(compressed-)blobs" section. |
278 | 16 | return Ok(State::OptionalMetadata); |
279 | | } |
280 | | } |
281 | | } |
282 | 288 | } |