Coverage Report

Created: 2026-06-04 10:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/build/source/nativelink-util/src/resource_info.rs
Line
Count
Source
1
// Copyright 2024 The NativeLink Authors. All rights reserved.
2
//
3
// Licensed under the Functional Source License, Version 1.1, Apache 2.0 Future License (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//    See LICENSE file for details
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
use core::convert::AsRef;
16
use std::borrow::Cow;
17
18
use nativelink_error::{Error, ResultExt, error_if, make_input_err};
19
use tonic::Code;
20
21
const ERROR_MSG: &str = concat!(
22
    "Expected resource_name to be of pattern ",
23
    "'{?instance_name/}(?uploads/{uuid}/)blobs/{?/digest_function}{/hash}/{size}{?/optional_metadata}' or ",
24
    "'{?instance_name/}(?uploads/{uuid}/)compressed-blobs{?/compressor}{?/digest_function}{/hash}/{size}{?/optional_metadata}'",
25
);
26
const COMPRESSORS: [&str; 4] = ["identity", "zstd", "deflate", "brotli"];
27
const DIGEST_FUNCTIONS: [&str; 9] = [
28
    "sha256",
29
    "sha1",
30
    "md5",
31
    "vso",
32
    "sha384",
33
    "sha512",
34
    "murmur3",
35
    "sha256tree",
36
    "blake3",
37
];
38
39
// Named struct to make the code easier to read when adding the slash size.
40
const SLASH_SIZE: usize = 1;
41
42
// Rules are as follows:
43
//
44
// "instance_name" may contain slashes and may contain or equal "uploads", "compressed-blobs" and "blobs".
45
// if is_upload is false:
46
// {instance_name}/               compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata}
47
// {instance_name}/               compressed-blobs/{compressor}/{digest_function/}{hash}/{size}
48
// {instance_name}/               blobs/                        {digest_function/}{hash}/{size}{/optional_metadata}
49
// {instance_name}/               blobs/                        {digest_function/}{hash}/{size}
50
//                                compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata}
51
//                                compressed-blobs/{compressor}/{digest_function/}{hash}/{size}
52
//                                blobs/                        {digest_function/}{hash}/{size}{/optional_metadata}
53
//                                blobs/                        {digest_function/}{hash}/{size}
54
// {instance_name}/               compressed-blobs/{compressor}/                  {hash}/{size}{/optional_metadata}
55
// {instance_name}/               compressed-blobs/{compressor}/                  {hash}/{size}
56
// {instance_name}/               blobs/                                          {hash}/{size}{/optional_metadata}
57
// {instance_name}/               blobs/                                          {hash}/{size}
58
//                                compressed-blobs/{compressor}/                  {hash}/{size}{/optional_metadata}
59
//                                compressed-blobs/{compressor}/                  {hash}/{size}
60
//
61
//                                blobs/                                          {hash}/{size}{/optional_metadata}
62
//                                blobs/                                          {hash}/{size}
63
//
64
// if is_upload is true:
65
// {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata}
66
// {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size}
67
// {instance_name}/uploads/{uuid}/blobs/                        {digest_function/}{hash}/{size}{/optional_metadata}
68
// {instance_name}/uploads/{uuid}/blobs/                        {digest_function/}{hash}/{size}
69
//                 uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata}
70
//                 uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size}
71
//                 uploads/{uuid}/blobs/                        {digest_function/}{hash}/{size}{/optional_metadata}
72
//                 uploads/{uuid}/blobs/                        {digest_function/}{hash}/{size}
73
// {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/                  {hash}/{size}{/optional_metadata}
74
// {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/                  {hash}/{size}
75
// {instance_name}/uploads/{uuid}/blobs/                                          {hash}/{size}{/optional_metadata}
76
// {instance_name}/uploads/{uuid}/blobs/                                          {hash}/{size}
77
//                 uploads/{uuid}/compressed-blobs/{compressor}/                  {hash}/{size}{/optional_metadata}
78
//                 uploads/{uuid}/compressed-blobs/{compressor}/                  {hash}/{size}
79
//                 uploads/{uuid}/blobs/                                          {hash}/{size}{/optional_metadata}
80
//                 uploads/{uuid}/blobs/                                          {hash}/{size}
81
//
82
83
// Useful utility struct for converting bazel's (uri-like path) into its parts.
84
#[derive(Debug, Default)]
85
pub struct ResourceInfo<'a> {
86
    pub instance_name: Cow<'a, str>,
87
    pub uuid: Option<Cow<'a, str>>,
88
    pub compressor: Option<Cow<'a, str>>,
89
    pub digest_function: Option<Cow<'a, str>>,
90
    pub hash: Cow<'a, str>,
91
    size: Cow<'a, str>,
92
    pub expected_size: usize,
93
    pub optional_metadata: Option<Cow<'a, str>>,
94
}
95
96
impl<'a> ResourceInfo<'a> {
97
83
    pub fn new(resource_name: &'a str, is_upload: bool) -> Result<Self, Error> {
98
        // The most amount of slashes there can be to get to "(compressed-)blobs" section is 7.
99
83
        let mut rparts = resource_name.rsplitn(7, '/');
100
83
        let mut output = ResourceInfo::default();
101
83
        let mut end_bytes_processed = 0;
102
83
        let 
end_state69
= recursive_parse(
103
83
            &mut rparts,
104
83
            &mut output,
105
83
            State::Unknown,
106
83
            &mut end_bytes_processed,
107
        )
108
83
        .err_tip(|| 
format!14
("{ERROR_MSG} in {resource_name}"))
?14
;
109
1
        error_if!(
110
69
            end_state != State::OptionalMetadata,
111
            "Expected the final state to be OptionalMetadata. Got: {end_state:?} for {resource_name} is_upload: {is_upload}"
112
        );
113
114
        // Slice off the processed parts of `resource_name`.
115
68
        let beginning_part = if end_bytes_processed == resource_name.len() {
116
8
            ""
117
        } else {
118
60
            &resource_name[..resource_name.len() - end_bytes_processed - SLASH_SIZE]
119
        };
120
68
        if !is_upload {
121
21
            output.instance_name = Cow::Borrowed(beginning_part);
122
21
            return Ok(output);
123
47
        }
124
125
        // If it's an upload, at this point it will have be:
126
        // `{?instance_name}/uploads/{uuid}`.
127
        // Remember, `instance_name` can contain slashes and/or special names
128
        // like "blobs" or "uploads".
129
47
        let mut parts = beginning_part.rsplitn(3, '/');
130
47
        output.uuid = Some(Cow::Borrowed(
131
47
            parts
132
47
                .next()
133
47
                .err_tip(|| 
format!0
("{ERROR_MSG} in {resource_name}"))
?0
,
134
        ));
135
        {
136
            // Sanity check that our next item is "uploads".
137
47
            let uploads = parts
138
47
                .next()
139
47
                .err_tip(|| 
format!0
("{ERROR_MSG} in {resource_name}"))
?0
;
140
0
            error_if!(
141
47
                uploads != "uploads",
142
                "Expected part to be 'uploads'. Got: {uploads} for {resource_name} is_upload: {is_upload}"
143
            );
144
        }
145
146
        // `instance_name` is optional.
147
47
        if let Some(
instance_name39
) = parts.next() {
148
39
            output.instance_name = Cow::Borrowed(instance_name);
149
39
        
}8
150
47
        Ok(output)
151
83
    }
152
153
    /// Returns a new `ResourceInfo` with all fields owned.
154
18
    pub fn to_owned(&self) -> ResourceInfo<'static> {
155
        ResourceInfo {
156
18
            instance_name: Cow::Owned(self.instance_name.to_string()),
157
18
            uuid: self.uuid.as_ref().map(|uuid| Cow::Owned(uuid.to_string())),
158
18
            compressor: self
159
18
                .compressor
160
18
                .as_ref()
161
18
                .map(|compressor| Cow::Owned(
compressor0
.
to_string0
())),
162
18
            digest_function: self
163
18
                .digest_function
164
18
                .as_ref()
165
18
                .map(|digest_function| Cow::Owned(
digest_function1
.
to_string1
())),
166
18
            hash: Cow::Owned(self.hash.to_string()),
167
18
            size: Cow::Owned(self.size.to_string()),
168
18
            expected_size: self.expected_size,
169
18
            optional_metadata: self
170
18
                .optional_metadata
171
18
                .as_ref()
172
18
                .map(|optional_metadata| Cow::Owned(
optional_metadata0
.
to_string0
())),
173
        }
174
18
    }
175
176
35
    pub fn to_string(&self, is_upload: bool) -> String {
177
        [
178
35
            Some(self.instance_name.as_ref()),
179
35
            is_upload.then_some("uploads"),
180
35
            self.uuid.as_ref().map(AsRef::as_ref),
181
            Some(
182
35
                self.compressor
183
35
                    .as_ref()
184
35
                    .map_or("blobs", |_| "compressed-blobs"),
185
            ),
186
35
            self.compressor.as_ref().map(AsRef::as_ref),
187
35
            self.digest_function.as_ref().map(AsRef::as_ref),
188
35
            Some(self.hash.as_ref()),
189
35
            Some(self.size.as_ref()),
190
35
            self.optional_metadata.as_ref().map(AsRef::as_ref),
191
        ]
192
35
        .into_iter()
193
35
        .flatten()
194
222
        .
filter35
(|part| !part.is_empty())
195
35
        .collect::<Vec<&str>>()
196
35
        .join("/")
197
35
    }
198
}
199
200
#[derive(Debug, PartialEq)]
201
enum State {
202
    Unknown,
203
    Compressor,
204
    DigestFunction,
205
    Hash,
206
    Size,
207
    OptionalMetadata,
208
}
209
210
// Iterate backwards looking for "(compressed-)blobs", once found, move forward
211
// populating the output struct. This recursive function utilises the stack to
212
// temporarily hold the reference to the previous item reducing the need for
213
// a heap allocation.
214
309
fn recursive_parse<'a>(
215
309
    rparts: &mut impl Iterator<Item = &'a str>,
216
309
    output: &mut ResourceInfo<'a>,
217
309
    mut state: State,
218
309
    bytes_processed: &mut usize,
219
309
) -> Result<State, Error> {
220
309
    let 
part301
= rparts.next().err_tip(|| "on rparts.next()")
?8
;
221
301
    if state == State::Unknown {
222
301
        if part == "blobs" {
223
56
            *bytes_processed = part.len() + SLASH_SIZE;
224
56
            return Ok(State::DigestFunction);
225
245
        }
226
245
        if part == "compressed-blobs" {
227
19
            *bytes_processed = part.len() + SLASH_SIZE;
228
19
            return Ok(State::Compressor);
229
226
        }
230
226
        state = recursive_parse(rparts, output, state, bytes_processed)
?29
;
231
0
    }
232
233
    loop {
234
250
        match state {
235
            State::Unknown => {
236
0
                return Err(make_input_err!(
237
0
                    "Unknown state should never be reached in ResourceInfo::new"
238
0
                ));
239
            }
240
            State::Compressor => {
241
19
                state = State::DigestFunction;
242
19
                if COMPRESSORS.contains(&part) {
243
16
                    output.compressor = Some(Cow::Borrowed(part));
244
16
                    *bytes_processed += part.len() + SLASH_SIZE;
245
16
                    return Ok(state);
246
3
                }
247
3
                return Err(make_input_err!("Expected compressor, got {part}"));
248
            }
249
            State::DigestFunction => {
250
72
                state = State::Hash;
251
72
                if DIGEST_FUNCTIONS.contains(&part) {
252
19
                    output.digest_function = Some(Cow::Borrowed(part));
253
19
                    *bytes_processed += part.len() + SLASH_SIZE;
254
19
                    return Ok(state);
255
53
                }
256
            }
257
            State::Hash => {
258
72
                output.hash = Cow::Borrowed(part);
259
72
                *bytes_processed += part.len() + SLASH_SIZE;
260
                // TODO(palfrey) Set the digest_function if it is not set based on the hash size.
261
72
                return Ok(State::Size);
262
            }
263
            State::Size => {
264
71
                output.size = Cow::Borrowed(part);
265
71
                output.expected_size = part.parse::<usize>().map_err(|err| 
{3
266
3
                    Error::from_std_err(Code::InvalidArgument, &err).append(format!(
267
                        "Digest size_bytes was not convertible to usize. Got: {part}",
268
                    ))
269
3
                })?;
270
68
                *bytes_processed += part.len(); // Special case {size}, so it does not count one slash.
271
68
                return Ok(State::OptionalMetadata);
272
            }
273
            State::OptionalMetadata => {
274
16
                output.optional_metadata = Some(Cow::Borrowed(part));
275
16
                *bytes_processed += part.len() + SLASH_SIZE;
276
                // If we get here, we are done parsing backwards and have successfully parsed
277
                // everything beyond the "(compressed-)blobs" section.
278
16
                return Ok(State::OptionalMetadata);
279
            }
280
        }
281
    }
282
309
}