Coverage Report

Created: 2024-11-20 10:13

/build/source/nativelink-util/src/resource_info.rs
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2024 The NativeLink Authors. All rights reserved.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//    http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
use std::borrow::Cow;
16
use std::convert::AsRef;
17
18
use nativelink_error::{error_if, make_input_err, Error, ResultExt};
19
20
const ERROR_MSG: &str = concat!(
21
    "Expected resource_name to be of pattern ",
22
    "'{?instance_name/}(?uploads/{uuid}/)blobs/{?/digest_function}{/hash}/{size}{?/optional_metadata}' or ",
23
    "'{?instance_name/}(?uploads/{uuid}/)compressed-blobs{?/compressor}{?/digest_function}{/hash}/{size}{?/optional_metadata}'",
24
);
25
const COMPRESSORS: [&str; 4] = ["identity", "zstd", "deflate", "brotli"];
26
const DIGEST_FUNCTIONS: [&str; 9] = [
27
    "sha256",
28
    "sha1",
29
    "md5",
30
    "vso",
31
    "sha384",
32
    "sha512",
33
    "murmur3",
34
    "sha256tree",
35
    "blake3",
36
];
37
38
// Named struct to make the code easier to read when adding the slash size.
39
const SLASH_SIZE: usize = 1;
40
41
// Rules are as follows:
42
//
43
// "instance_name" may contain slashes and may contain or equal "uploads", "compressed-blobs" and "blobs".
44
// if is_upload is false:
45
// {instance_name}/               compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata}
46
// {instance_name}/               compressed-blobs/{compressor}/{digest_function/}{hash}/{size}
47
// {instance_name}/               blobs/                        {digest_function/}{hash}/{size}{/optional_metadata}
48
// {instance_name}/               blobs/                        {digest_function/}{hash}/{size}
49
//                                compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata}
50
//                                compressed-blobs/{compressor}/{digest_function/}{hash}/{size}
51
//                                blobs/                        {digest_function/}{hash}/{size}{/optional_metadata}
52
//                                blobs/                        {digest_function/}{hash}/{size}
53
// {instance_name}/               compressed-blobs/{compressor}/                  {hash}/{size}{/optional_metadata}
54
// {instance_name}/               compressed-blobs/{compressor}/                  {hash}/{size}
55
// {instance_name}/               blobs/                                          {hash}/{size}{/optional_metadata}
56
// {instance_name}/               blobs/                                          {hash}/{size}
57
//                                compressed-blobs/{compressor}/                  {hash}/{size}{/optional_metadata}
58
//                                compressed-blobs/{compressor}/                  {hash}/{size}
59
//
60
//                                blobs/                                          {hash}/{size}{/optional_metadata}
61
//                                blobs/                                          {hash}/{size}
62
//
63
// if is_upload is true:
64
// {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata}
65
// {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size}
66
// {instance_name}/uploads/{uuid}/blobs/                        {digest_function/}{hash}/{size}{/optional_metadata}
67
// {instance_name}/uploads/{uuid}/blobs/                        {digest_function/}{hash}/{size}
68
//                 uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata}
69
//                 uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size}
70
//                 uploads/{uuid}/blobs/                        {digest_function/}{hash}/{size}{/optional_metadata}
71
//                 uploads/{uuid}/blobs/                        {digest_function/}{hash}/{size}
72
// {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/                  {hash}/{size}{/optional_metadata}
73
// {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/                  {hash}/{size}
74
// {instance_name}/uploads/{uuid}/blobs/                                          {hash}/{size}{/optional_metadata}
75
// {instance_name}/uploads/{uuid}/blobs/                                          {hash}/{size}
76
//                 uploads/{uuid}/compressed-blobs/{compressor}/                  {hash}/{size}{/optional_metadata}
77
//                 uploads/{uuid}/compressed-blobs/{compressor}/                  {hash}/{size}
78
//                 uploads/{uuid}/blobs/                                          {hash}/{size}{/optional_metadata}
79
//                 uploads/{uuid}/blobs/                                          {hash}/{size}
80
//
81
82
// Useful utility struct for converting bazel's (uri-like path) into its parts.
83
#[derive(Debug, Default)]
84
pub struct ResourceInfo<'a> {
85
    pub instance_name: Cow<'a, str>,
86
    pub uuid: Option<Cow<'a, str>>,
87
    pub compressor: Option<Cow<'a, str>>,
88
    pub digest_function: Option<Cow<'a, str>>,
89
    pub hash: Cow<'a, str>,
90
    size: Cow<'a, str>,
91
    pub expected_size: usize,
92
    pub optional_metadata: Option<Cow<'a, str>>,
93
}
94
95
impl<'a> ResourceInfo<'a> {
96
73
    pub fn new(resource_name: &'a str, is_upload: bool) -> Result<ResourceInfo<'a>, Error> {
97
73
        // The most amount of slashes there can be to get to "(compressed-)blobs" section is 7.
98
73
        let mut rparts = resource_name.rsplitn(7, '/');
99
73
        let mut output = ResourceInfo::default();
100
73
        let mut end_bytes_processed = 0;
101
73
        let 
end_state59
= recursive_parse(
102
73
            &mut rparts,
103
73
            &mut output,
104
73
            State::Unknown,
105
73
            &mut end_bytes_processed,
106
73
        )
107
73
        .err_tip(|| 
format!("{ERROR_MSG} in {resource_name}")14
)
?14
;
108
1
        error_if!(
109
59
            end_state != State::OptionalMetadata,
  Branch (109:13): [True: 1, False: 58]
  Branch (109:13): [Folded - Ignored]
110
            "Expected the final state to be OptionalMetadata. Got: {end_state:?} for {resource_name} is_upload: {is_upload}"
111
        );
112
113
        // Slice off the processed parts of `resource_name`.
114
58
        let beginning_part = if end_bytes_processed == resource_name.len() {
  Branch (114:33): [True: 8, False: 50]
  Branch (114:33): [Folded - Ignored]
115
8
            ""
116
        } else {
117
50
            &resource_name[..resource_name.len() - end_bytes_processed - SLASH_SIZE]
118
        };
119
58
        if !is_upload {
  Branch (119:12): [True: 21, False: 37]
  Branch (119:12): [Folded - Ignored]
120
21
            output.instance_name = Cow::Borrowed(beginning_part);
121
21
            return Ok(output);
122
37
        }
123
37
124
37
        // If it's an upload, at this point it will have be:
125
37
        // `{?instance_name}/uploads/{uuid}`.
126
37
        // Remember, `instance_name` can contain slashes and/or special names
127
37
        // like "blobs" or "uploads".
128
37
        let mut parts = beginning_part.rsplitn(3, '/');
129
37
        output.uuid = Some(Cow::Borrowed(
130
37
            parts
131
37
                .next()
132
37
                .err_tip(|| 
format!("{ERROR_MSG} in {resource_name}")0
)
?0
,
133
        ));
134
        {
135
            // Sanity check that our next item is "uploads".
136
37
            let uploads = parts
137
37
                .next()
138
37
                .err_tip(|| 
format!("{ERROR_MSG} in {resource_name}")0
)
?0
;
139
0
            error_if!(
140
37
                uploads != "uploads",
  Branch (140:17): [True: 0, False: 37]
  Branch (140:17): [Folded - Ignored]
141
                "Expected part to be 'uploads'. Got: {uploads} for {resource_name} is_upload: {is_upload}"
142
            );
143
        }
144
145
        // `instance_name` is optional.
146
37
        if let Some(
instance_name29
) = parts.next() {
  Branch (146:16): [True: 29, False: 8]
  Branch (146:16): [Folded - Ignored]
147
29
            output.instance_name = Cow::Borrowed(instance_name);
148
29
        }
8
149
37
        Ok(output)
150
73
    }
151
152
    /// Returns a new ResourceInfo with all fields owned.
153
15
    pub fn to_owned(&self) -> ResourceInfo<'static> {
154
15
        ResourceInfo {
155
15
            instance_name: Cow::Owned(self.instance_name.to_string()),
156
15
            uuid: self.uuid.as_ref().map(|uuid| Cow::Owned(uuid.to_string())),
157
15
            compressor: self
158
15
                .compressor
159
15
                .as_ref()
160
15
                .map(|compressor| 
Cow::Owned(compressor.to_string())0
),
161
15
            digest_function: self
162
15
                .digest_function
163
15
                .as_ref()
164
15
                .map(|digest_function| 
Cow::Owned(digest_function.to_string())0
),
165
15
            hash: Cow::Owned(self.hash.to_string()),
166
15
            size: Cow::Owned(self.size.to_string()),
167
15
            expected_size: self.expected_size,
168
15
            optional_metadata: self
169
15
                .optional_metadata
170
15
                .as_ref()
171
15
                .map(|optional_metadata| 
Cow::Owned(optional_metadata.to_string())0
),
172
15
        }
173
15
    }
174
175
35
    pub fn to_string(&self, is_upload: bool) -> String {
176
35
        [
177
35
            Some(self.instance_name.as_ref()),
178
35
            is_upload.then_some("uploads"),
179
35
            self.uuid.as_ref().map(AsRef::as_ref),
180
35
            Some(
181
35
                self.compressor
182
35
                    .as_ref()
183
35
                    .map_or("blobs", |_| 
"compressed-blobs"15
),
184
35
            ),
185
35
            self.compressor.as_ref().map(AsRef::as_ref),
186
35
            self.digest_function.as_ref().map(AsRef::as_ref),
187
35
            Some(self.hash.as_ref()),
188
35
            Some(self.size.as_ref()),
189
35
            self.optional_metadata.as_ref().map(AsRef::as_ref),
190
35
        ]
191
35
        .into_iter()
192
35
        .flatten()
193
222
        .filter(|part| !part.is_empty())
194
35
        .collect::<Vec<&str>>()
195
35
        .join("/")
196
35
    }
197
}
198
199
#[derive(Debug, PartialEq)]
200
enum State {
201
    Unknown,
202
    Compressor,
203
    DigestFunction,
204
    Hash,
205
    Size,
206
    OptionalMetadata,
207
}
208
209
// Iterate backwards looking for "(compressed-)blobs", once found, move forward
210
// populating the output struct. This recursive function utilises the stack to
211
// temporarily hold the reference to the previous item reducing the need for
212
// a heap allocation.
213
276
fn recursive_parse<'a>(
214
276
    rparts: &mut impl Iterator<Item = &'a str>,
215
276
    output: &mut ResourceInfo<'a>,
216
276
    mut state: State,
217
276
    bytes_processed: &mut usize,
218
276
) -> Result<State, Error> {
219
276
    let 
part268
= rparts.next().err_tip(||
"on rparts.next()"8
)
?8
;
220
268
    if state == State::Unknown {
  Branch (220:8): [True: 268, False: 0]
  Branch (220:8): [Folded - Ignored]
221
268
        if part == "blobs" {
  Branch (221:12): [True: 46, False: 222]
  Branch (221:12): [Folded - Ignored]
222
46
            *bytes_processed = part.len() + SLASH_SIZE;
223
46
            return Ok(State::DigestFunction);
224
222
        }
225
222
        if part == "compressed-blobs" {
  Branch (225:12): [True: 19, False: 203]
  Branch (225:12): [Folded - Ignored]
226
19
            *bytes_processed = part.len() + SLASH_SIZE;
227
19
            return Ok(State::Compressor);
228
203
        }
229
203
        state = recursive_parse(rparts, output, state, bytes_processed)
?29
;
230
0
    }
231
232
    loop {
233
220
        match state {
234
            State::Unknown => {
235
0
                return Err(make_input_err!(
236
0
                    "Unknown state should never be reached in ResourceInfo::new"
237
0
                ))
238
            }
239
            State::Compressor => {
240
19
                state = State::DigestFunction;
241
19
                if COMPRESSORS.contains(&part) {
  Branch (241:20): [True: 16, False: 3]
  Branch (241:20): [Folded - Ignored]
242
16
                    output.compressor = Some(Cow::Borrowed(part));
243
16
                    *bytes_processed += part.len() + SLASH_SIZE;
244
16
                    return Ok(state);
245
3
                }
246
3
                return Err(make_input_err!("Expected compressor, got {part}"));
247
            }
248
            State::DigestFunction => {
249
62
                state = State::Hash;
250
62
                if DIGEST_FUNCTIONS.contains(&part) {
  Branch (250:20): [True: 16, False: 46]
  Branch (250:20): [Folded - Ignored]
251
16
                    output.digest_function = Some(Cow::Borrowed(part));
252
16
                    *bytes_processed += part.len() + SLASH_SIZE;
253
16
                    return Ok(state);
254
46
                }
255
46
                continue;
256
            }
257
            State::Hash => {
258
62
                output.hash = Cow::Borrowed(part);
259
62
                *bytes_processed += part.len() + SLASH_SIZE;
260
62
                // TODO(allada) Set the digest_function if it is not set based on the hash size.
261
62
                return Ok(State::Size);
262
            }
263
            State::Size => {
264
61
                output.size = Cow::Borrowed(part);
265
61
                output.expected_size = part.parse::<usize>().map_err(|_| {
266
3
                    make_input_err!(
267
3
                        "Digest size_bytes was not convertible to usize. Got: {}",
268
3
                        part
269
3
                    )
270
61
                })
?3
;
271
58
                *bytes_processed += part.len(); // Special case {size}, so it does not count one slash.
272
58
                return Ok(State::OptionalMetadata);
273
            }
274
            State::OptionalMetadata => {
275
16
                output.optional_metadata = Some(Cow::Borrowed(part));
276
16
                *bytes_processed += part.len() + SLASH_SIZE;
277
16
                // If we get here, we are done parsing backwards and have successfully parsed
278
16
                // everything beyond the "(compressed-)blobs" section.
279
16
                return Ok(State::OptionalMetadata);
280
            }
281
        }
282
    }
283
276
}