Coverage Report

Created: 2026-04-14 11:55

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/build/source/nativelink-util/src/resource_info.rs
Line
Count
Source
1
// Copyright 2024 The NativeLink Authors. All rights reserved.
2
//
3
// Licensed under the Functional Source License, Version 1.1, Apache 2.0 Future License (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//    See LICENSE file for details
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
use core::convert::AsRef;
16
use std::borrow::Cow;
17
18
use nativelink_error::{Error, ResultExt, error_if, make_input_err};
19
use tonic::Code;
20
21
const ERROR_MSG: &str = concat!(
22
    "Expected resource_name to be of pattern ",
23
    "'{?instance_name/}(?uploads/{uuid}/)blobs/{?/digest_function}{/hash}/{size}{?/optional_metadata}' or ",
24
    "'{?instance_name/}(?uploads/{uuid}/)compressed-blobs{?/compressor}{?/digest_function}{/hash}/{size}{?/optional_metadata}'",
25
);
26
const COMPRESSORS: [&str; 4] = ["identity", "zstd", "deflate", "brotli"];
27
const DIGEST_FUNCTIONS: [&str; 9] = [
28
    "sha256",
29
    "sha1",
30
    "md5",
31
    "vso",
32
    "sha384",
33
    "sha512",
34
    "murmur3",
35
    "sha256tree",
36
    "blake3",
37
];
38
39
// Named struct to make the code easier to read when adding the slash size.
40
const SLASH_SIZE: usize = 1;
41
42
// Rules are as follows:
43
//
44
// "instance_name" may contain slashes and may contain or equal "uploads", "compressed-blobs" and "blobs".
45
// if is_upload is false:
46
// {instance_name}/               compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata}
47
// {instance_name}/               compressed-blobs/{compressor}/{digest_function/}{hash}/{size}
48
// {instance_name}/               blobs/                        {digest_function/}{hash}/{size}{/optional_metadata}
49
// {instance_name}/               blobs/                        {digest_function/}{hash}/{size}
50
//                                compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata}
51
//                                compressed-blobs/{compressor}/{digest_function/}{hash}/{size}
52
//                                blobs/                        {digest_function/}{hash}/{size}{/optional_metadata}
53
//                                blobs/                        {digest_function/}{hash}/{size}
54
// {instance_name}/               compressed-blobs/{compressor}/                  {hash}/{size}{/optional_metadata}
55
// {instance_name}/               compressed-blobs/{compressor}/                  {hash}/{size}
56
// {instance_name}/               blobs/                                          {hash}/{size}{/optional_metadata}
57
// {instance_name}/               blobs/                                          {hash}/{size}
58
//                                compressed-blobs/{compressor}/                  {hash}/{size}{/optional_metadata}
59
//                                compressed-blobs/{compressor}/                  {hash}/{size}
60
//
61
//                                blobs/                                          {hash}/{size}{/optional_metadata}
62
//                                blobs/                                          {hash}/{size}
63
//
64
// if is_upload is true:
65
// {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata}
66
// {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size}
67
// {instance_name}/uploads/{uuid}/blobs/                        {digest_function/}{hash}/{size}{/optional_metadata}
68
// {instance_name}/uploads/{uuid}/blobs/                        {digest_function/}{hash}/{size}
69
//                 uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata}
70
//                 uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size}
71
//                 uploads/{uuid}/blobs/                        {digest_function/}{hash}/{size}{/optional_metadata}
72
//                 uploads/{uuid}/blobs/                        {digest_function/}{hash}/{size}
73
// {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/                  {hash}/{size}{/optional_metadata}
74
// {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/                  {hash}/{size}
75
// {instance_name}/uploads/{uuid}/blobs/                                          {hash}/{size}{/optional_metadata}
76
// {instance_name}/uploads/{uuid}/blobs/                                          {hash}/{size}
77
//                 uploads/{uuid}/compressed-blobs/{compressor}/                  {hash}/{size}{/optional_metadata}
78
//                 uploads/{uuid}/compressed-blobs/{compressor}/                  {hash}/{size}
79
//                 uploads/{uuid}/blobs/                                          {hash}/{size}{/optional_metadata}
80
//                 uploads/{uuid}/blobs/                                          {hash}/{size}
81
//
82
83
// Useful utility struct for converting bazel's (uri-like path) into its parts.
84
#[derive(Debug, Default)]
85
pub struct ResourceInfo<'a> {
86
    pub instance_name: Cow<'a, str>,
87
    pub uuid: Option<Cow<'a, str>>,
88
    pub compressor: Option<Cow<'a, str>>,
89
    pub digest_function: Option<Cow<'a, str>>,
90
    pub hash: Cow<'a, str>,
91
    size: Cow<'a, str>,
92
    pub expected_size: usize,
93
    pub optional_metadata: Option<Cow<'a, str>>,
94
}
95
96
impl<'a> ResourceInfo<'a> {
97
77
    pub fn new(resource_name: &'a str, is_upload: bool) -> Result<Self, Error> {
98
        // The most amount of slashes there can be to get to "(compressed-)blobs" section is 7.
99
77
        let mut rparts = resource_name.rsplitn(7, '/');
100
77
        let mut output = ResourceInfo::default();
101
77
        let mut end_bytes_processed = 0;
102
77
        let 
end_state63
= recursive_parse(
103
77
            &mut rparts,
104
77
            &mut output,
105
77
            State::Unknown,
106
77
            &mut end_bytes_processed,
107
        )
108
77
        .err_tip(|| 
format!14
("{ERROR_MSG} in {resource_name}"))
?14
;
109
1
        error_if!(
110
63
            end_state != State::OptionalMetadata,
111
            "Expected the final state to be OptionalMetadata. Got: {end_state:?} for {resource_name} is_upload: {is_upload}"
112
        );
113
114
        // Slice off the processed parts of `resource_name`.
115
62
        let beginning_part = if end_bytes_processed == resource_name.len() {
116
8
            ""
117
        } else {
118
54
            &resource_name[..resource_name.len() - end_bytes_processed - SLASH_SIZE]
119
        };
120
62
        if !is_upload {
121
21
            output.instance_name = Cow::Borrowed(beginning_part);
122
21
            return Ok(output);
123
41
        }
124
125
        // If it's an upload, at this point it will have be:
126
        // `{?instance_name}/uploads/{uuid}`.
127
        // Remember, `instance_name` can contain slashes and/or special names
128
        // like "blobs" or "uploads".
129
41
        let mut parts = beginning_part.rsplitn(3, '/');
130
41
        output.uuid = Some(Cow::Borrowed(
131
41
            parts
132
41
                .next()
133
41
                .err_tip(|| 
format!0
("{ERROR_MSG} in {resource_name}"))
?0
,
134
        ));
135
        {
136
            // Sanity check that our next item is "uploads".
137
41
            let uploads = parts
138
41
                .next()
139
41
                .err_tip(|| 
format!0
("{ERROR_MSG} in {resource_name}"))
?0
;
140
0
            error_if!(
141
41
                uploads != "uploads",
142
                "Expected part to be 'uploads'. Got: {uploads} for {resource_name} is_upload: {is_upload}"
143
            );
144
        }
145
146
        // `instance_name` is optional.
147
41
        if let Some(
instance_name33
) = parts.next() {
148
33
            output.instance_name = Cow::Borrowed(instance_name);
149
33
        
}8
150
41
        Ok(output)
151
77
    }
152
153
    /// Returns a new `ResourceInfo` with all fields owned.
154
16
    pub fn to_owned(&self) -> ResourceInfo<'static> {
155
        ResourceInfo {
156
16
            instance_name: Cow::Owned(self.instance_name.to_string()),
157
16
            uuid: self.uuid.as_ref().map(|uuid| Cow::Owned(uuid.to_string())),
158
16
            compressor: self
159
16
                .compressor
160
16
                .as_ref()
161
16
                .map(|compressor| Cow::Owned(
compressor0
.
to_string0
())),
162
16
            digest_function: self
163
16
                .digest_function
164
16
                .as_ref()
165
16
                .map(|digest_function| Cow::Owned(
digest_function0
.
to_string0
())),
166
16
            hash: Cow::Owned(self.hash.to_string()),
167
16
            size: Cow::Owned(self.size.to_string()),
168
16
            expected_size: self.expected_size,
169
16
            optional_metadata: self
170
16
                .optional_metadata
171
16
                .as_ref()
172
16
                .map(|optional_metadata| Cow::Owned(
optional_metadata0
.
to_string0
())),
173
        }
174
16
    }
175
176
35
    pub fn to_string(&self, is_upload: bool) -> String {
177
        [
178
35
            Some(self.instance_name.as_ref()),
179
35
            is_upload.then_some("uploads"),
180
35
            self.uuid.as_ref().map(AsRef::as_ref),
181
            Some(
182
35
                self.compressor
183
35
                    .as_ref()
184
35
                    .map_or("blobs", |_| "compressed-blobs"),
185
            ),
186
35
            self.compressor.as_ref().map(AsRef::as_ref),
187
35
            self.digest_function.as_ref().map(AsRef::as_ref),
188
35
            Some(self.hash.as_ref()),
189
35
            Some(self.size.as_ref()),
190
35
            self.optional_metadata.as_ref().map(AsRef::as_ref),
191
        ]
192
35
        .into_iter()
193
35
        .flatten()
194
222
        .
filter35
(|part| !part.is_empty())
195
35
        .collect::<Vec<&str>>()
196
35
        .join("/")
197
35
    }
198
}
199
200
#[derive(Debug, PartialEq)]
201
enum State {
202
    Unknown,
203
    Compressor,
204
    DigestFunction,
205
    Hash,
206
    Size,
207
    OptionalMetadata,
208
}
209
210
// Iterate backwards looking for "(compressed-)blobs", once found, move forward
211
// populating the output struct. This recursive function utilises the stack to
212
// temporarily hold the reference to the previous item reducing the need for
213
// a heap allocation.
214
288
fn recursive_parse<'a>(
215
288
    rparts: &mut impl Iterator<Item = &'a str>,
216
288
    output: &mut ResourceInfo<'a>,
217
288
    mut state: State,
218
288
    bytes_processed: &mut usize,
219
288
) -> Result<State, Error> {
220
288
    let 
part280
= rparts.next().err_tip(|| "on rparts.next()")
?8
;
221
280
    if state == State::Unknown {
222
280
        if part == "blobs" {
223
50
            *bytes_processed = part.len() + SLASH_SIZE;
224
50
            return Ok(State::DigestFunction);
225
230
        }
226
230
        if part == "compressed-blobs" {
227
19
            *bytes_processed = part.len() + SLASH_SIZE;
228
19
            return Ok(State::Compressor);
229
211
        }
230
211
        state = recursive_parse(rparts, output, state, bytes_processed)
?29
;
231
0
    }
232
233
    loop {
234
232
        match state {
235
            State::Unknown => {
236
0
                return Err(make_input_err!(
237
0
                    "Unknown state should never be reached in ResourceInfo::new"
238
0
                ));
239
            }
240
            State::Compressor => {
241
19
                state = State::DigestFunction;
242
19
                if COMPRESSORS.contains(&part) {
243
16
                    output.compressor = Some(Cow::Borrowed(part));
244
16
                    *bytes_processed += part.len() + SLASH_SIZE;
245
16
                    return Ok(state);
246
3
                }
247
3
                return Err(make_input_err!("Expected compressor, got {part}"));
248
            }
249
            State::DigestFunction => {
250
66
                state = State::Hash;
251
66
                if DIGEST_FUNCTIONS.contains(&part) {
252
16
                    output.digest_function = Some(Cow::Borrowed(part));
253
16
                    *bytes_processed += part.len() + SLASH_SIZE;
254
16
                    return Ok(state);
255
50
                }
256
            }
257
            State::Hash => {
258
66
                output.hash = Cow::Borrowed(part);
259
66
                *bytes_processed += part.len() + SLASH_SIZE;
260
                // TODO(palfrey) Set the digest_function if it is not set based on the hash size.
261
66
                return Ok(State::Size);
262
            }
263
            State::Size => {
264
65
                output.size = Cow::Borrowed(part);
265
65
                output.expected_size = part.parse::<usize>().map_err(|err| 
{3
266
3
                    Error::from_std_err(Code::InvalidArgument, &err).append(format!(
267
                        "Digest size_bytes was not convertible to usize. Got: {part}",
268
                    ))
269
3
                })?;
270
62
                *bytes_processed += part.len(); // Special case {size}, so it does not count one slash.
271
62
                return Ok(State::OptionalMetadata);
272
            }
273
            State::OptionalMetadata => {
274
16
                output.optional_metadata = Some(Cow::Borrowed(part));
275
16
                *bytes_processed += part.len() + SLASH_SIZE;
276
                // If we get here, we are done parsing backwards and have successfully parsed
277
                // everything beyond the "(compressed-)blobs" section.
278
16
                return Ok(State::OptionalMetadata);
279
            }
280
        }
281
    }
282
288
}