Coverage Report

Created: 2025-10-17 15:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/build/source/nativelink-store/src/gcs_client/client.rs
Line
Count
Source
1
// Copyright 2024 The NativeLink Authors. All rights reserved.
2
//
3
// Licensed under the Functional Source License, Version 1.1, Apache 2.0 Future License (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//    See LICENSE file for details
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
use core::fmt::{Debug, Formatter};
16
use core::future::Future;
17
use core::time::Duration;
18
use std::collections::HashMap;
19
use std::sync::Arc;
20
21
use bytes::Bytes;
22
use futures::Stream;
23
use gcloud_auth::credentials::CredentialsFile;
24
use gcloud_storage::client::{Client, ClientConfig};
25
use gcloud_storage::http::Error as GcsError;
26
use gcloud_storage::http::objects::Object;
27
use gcloud_storage::http::objects::download::Range;
28
use gcloud_storage::http::objects::get::GetObjectRequest;
29
use gcloud_storage::http::objects::upload::{Media, UploadObjectRequest, UploadType};
30
use gcloud_storage::http::resumable_upload_client::{ChunkSize, UploadStatus};
31
use nativelink_config::stores::ExperimentalGcsSpec;
32
use nativelink_error::{Code, Error, ResultExt, make_err};
33
use nativelink_util::buf_channel::DropCloserReadHalf;
34
use rand::Rng;
35
use tokio::sync::{OwnedSemaphorePermit, Semaphore};
36
use tokio::time::sleep;
37
38
use crate::gcs_client::types::{
39
    CHUNK_SIZE, DEFAULT_CONCURRENT_UPLOADS, DEFAULT_CONTENT_TYPE, GcsObject,
40
    INITIAL_UPLOAD_RETRY_DELAY_MS, MAX_UPLOAD_RETRIES, MAX_UPLOAD_RETRY_DELAY_MS, ObjectPath,
41
    SIMPLE_UPLOAD_THRESHOLD, Timestamp,
42
};
43
44
/// A trait that defines the required GCS operations.
45
/// This abstraction allows for easier testing by mocking GCS responses.
46
pub trait GcsOperations: Send + Sync + Debug {
47
    /// Read metadata for a GCS object
48
    fn read_object_metadata(
49
        &self,
50
        object: &ObjectPath,
51
    ) -> impl Future<Output = Result<Option<GcsObject>, Error>> + Send;
52
53
    /// Read the content of a GCS object, optionally with a range
54
    fn read_object_content(
55
        &self,
56
        object_path: &ObjectPath,
57
        start: u64,
58
        end: Option<u64>,
59
    ) -> impl Future<
60
        Output = Result<Box<dyn Stream<Item = Result<Bytes, Error>> + Send + Unpin>, Error>,
61
    > + Send;
62
63
    /// Write object with simple upload (for smaller objects)
64
    fn write_object(
65
        &self,
66
        object_path: &ObjectPath,
67
        content: Vec<u8>,
68
    ) -> impl Future<Output = Result<(), Error>> + Send;
69
70
    /// Start a resumable write operation and return the upload URL
71
    fn start_resumable_write(
72
        &self,
73
        object_path: &ObjectPath,
74
    ) -> impl Future<Output = Result<String, Error>> + Send;
75
76
    /// Upload a chunk of data in a resumable upload session
77
    fn upload_chunk(
78
        &self,
79
        upload_url: &str,
80
        object_path: &ObjectPath,
81
        data: Bytes,
82
        offset: u64,
83
        end_offset: u64,
84
        total_size: Option<u64>,
85
    ) -> impl Future<Output = Result<(), Error>> + Send;
86
87
    /// Complete high-level operation to upload data from a reader
88
    fn upload_from_reader(
89
        &self,
90
        object_path: &ObjectPath,
91
        reader: &mut DropCloserReadHalf,
92
        upload_id: &str,
93
        max_size: u64,
94
    ) -> impl Future<Output = Result<(), Error>>;
95
96
    /// Check if an object exists
97
    fn object_exists(
98
        &self,
99
        object_path: &ObjectPath,
100
    ) -> impl Future<Output = Result<bool, Error>> + Send;
101
}
102
103
/// Main client for interacting with Google Cloud Storage
104
pub struct GcsClient {
105
    client: Client,
106
    resumable_chunk_size: usize,
107
    semaphore: Arc<Semaphore>,
108
}
109
110
impl GcsClient {
111
    /// Create a new GCS client from the provided spec
112
0
    pub async fn new(spec: &ExperimentalGcsSpec) -> Result<Self, Error> {
113
        // Attempt to get the authentication from a file with the environment
114
        // variable GOOGLE_APPLICATION_CREDENTIALS or directly from the
115
        // environment in variable GOOGLE_APPLICATION_CREDENTIALS_JSON.  If that
116
        // fails, attempt to get authentication from the environment.
117
0
        let maybe_client_config = match CredentialsFile::new().await {
118
0
            Ok(credentials) => ClientConfig::default().with_credentials(credentials).await,
119
0
            Err(_) => ClientConfig::default().with_auth().await,
120
        }
121
0
        .map_err(|e| {
122
0
            make_err!(
123
0
                Code::Internal,
124
                "Failed to create client config with credentials: {e:?}"
125
            )
126
0
        });
127
128
        // If authentication is required then error, otherwise use anonymous.
129
0
        let client_config = if spec.authentication_required {
  Branch (129:32): [True: 0, False: 0]
  Branch (129:32): [Folded - Ignored]
130
0
            maybe_client_config.err_tip(|| "Authentication required and none found.")?
131
        } else {
132
0
            maybe_client_config.unwrap_or_else(|_| ClientConfig::default().anonymous())
133
        };
134
135
        // Creating client with the configured authentication
136
0
        let client = Client::new(client_config);
137
0
        let resumable_chunk_size = spec.resumable_chunk_size.unwrap_or(CHUNK_SIZE);
138
139
        // Get max connections from config
140
0
        let max_connections = spec
141
0
            .common
142
0
            .multipart_max_concurrent_uploads
143
0
            .unwrap_or(DEFAULT_CONCURRENT_UPLOADS);
144
145
0
        Ok(Self {
146
0
            client,
147
0
            resumable_chunk_size,
148
0
            semaphore: Arc::new(Semaphore::new(max_connections)),
149
0
        })
150
0
    }
151
152
    /// Generic method to execute operations with connection limiting
153
0
    async fn with_connection<F, Fut, T>(&self, operation: F) -> Result<T, Error>
154
0
    where
155
0
        F: FnOnce() -> Fut + Send,
156
0
        Fut: Future<Output = Result<T, Error>> + Send,
157
0
    {
158
0
        let permit =
159
0
            self.semaphore.acquire().await.map_err(|e| {
160
0
                make_err!(Code::Internal, "Failed to acquire connection permit: {}", e)
161
0
            })?;
162
163
0
        let result = operation().await;
164
0
        drop(permit);
165
0
        result
166
0
    }
167
168
    /// Convert GCS object to our internal representation
169
0
    fn convert_to_gcs_object(&self, obj: Object) -> GcsObject {
170
0
        let update_time = obj.updated.map(|dt| Timestamp {
171
0
            seconds: dt.unix_timestamp(),
172
            nanos: 0,
173
0
        });
174
175
        GcsObject {
176
0
            name: obj.name,
177
0
            bucket: obj.bucket,
178
0
            size: obj.size,
179
0
            content_type: obj
180
0
                .content_type
181
0
                .unwrap_or_else(|| DEFAULT_CONTENT_TYPE.to_string()),
182
0
            update_time,
183
        }
184
0
    }
185
186
    /// Handle error from GCS operations
187
0
    fn handle_gcs_error(err: &GcsError) -> Error {
188
0
        let code = match &err {
189
0
            GcsError::Response(resp) => match resp.code {
190
0
                404 => Code::NotFound,
191
0
                401 | 403 => Code::PermissionDenied,
192
0
                408 | 429 => Code::ResourceExhausted,
193
0
                500..=599 => Code::Unavailable,
194
0
                _ => Code::Unknown,
195
            },
196
0
            _ => Code::Internal,
197
        };
198
199
0
        make_err!(code, "GCS operation failed: {}", err)
200
0
    }
201
202
    /// Reading data from reader and upload in a single operation
203
0
    async fn read_and_upload_all(
204
0
        &self,
205
0
        object_path: &ObjectPath,
206
0
        reader: &mut DropCloserReadHalf,
207
0
        max_size: u64,
208
0
    ) -> Result<(), Error> {
209
0
        let initial_capacity = core::cmp::min(max_size as usize, 10 * 1024 * 1024);
210
0
        let mut data = Vec::with_capacity(initial_capacity);
211
0
        let max_size = max_size as usize;
212
0
        let mut total_size = 0usize;
213
214
0
        while total_size < max_size {
  Branch (214:15): [Folded - Ignored]
  Branch (214:15): [Folded - Ignored]
215
0
            let to_read = core::cmp::min(self.resumable_chunk_size, max_size - total_size);
216
0
            let chunk = reader.consume(Some(to_read)).await?;
217
218
0
            if chunk.is_empty() {
  Branch (218:16): [Folded - Ignored]
  Branch (218:16): [Folded - Ignored]
219
0
                break;
220
0
            }
221
222
0
            data.extend_from_slice(&chunk);
223
0
            total_size += chunk.len();
224
        }
225
226
0
        self.write_object(object_path, data).await
227
0
    }
228
229
    /// Implementing a resumable upload using the resumable upload API
230
0
    async fn try_resumable_upload(
231
0
        &self,
232
0
        object_path: &ObjectPath,
233
0
        reader: &mut DropCloserReadHalf,
234
0
        max_size: u64,
235
0
    ) -> Result<(), Error> {
236
0
        self.with_connection(|| async {
237
0
            let request = UploadObjectRequest {
238
0
                bucket: object_path.bucket.clone(),
239
0
                ..Default::default()
240
0
            };
241
242
0
            let mut metadata = HashMap::<String, String>::new();
243
0
            metadata.insert("name".to_string(), object_path.path.clone());
244
245
            // Use Multipart upload type with metadata
246
0
            let upload_type = UploadType::Multipart(Box::new(Object {
247
0
                name: object_path.path.clone(),
248
0
                content_type: Some(DEFAULT_CONTENT_TYPE.to_string()),
249
0
                metadata: Some(metadata),
250
0
                ..Default::default()
251
0
            }));
252
253
            // Prepare resumable upload
254
0
            let uploader = self
255
0
                .client
256
0
                .prepare_resumable_upload(&request, &upload_type)
257
0
                .await
258
0
                .map_err(|e| Self::handle_gcs_error(&e))?;
259
260
            // Upload data in chunks
261
0
            let mut offset: u64 = 0;
262
0
            let max_size = max_size as usize;
263
0
            let mut total_uploaded = 0usize;
264
265
0
            while total_uploaded < max_size {
  Branch (265:19): [Folded - Ignored]
  Branch (265:19): [Folded - Ignored]
266
0
                let to_read = core::cmp::min(self.resumable_chunk_size, max_size - total_uploaded);
267
0
                let chunk = reader.consume(Some(to_read)).await?;
268
269
0
                if chunk.is_empty() {
  Branch (269:20): [Folded - Ignored]
  Branch (269:20): [Folded - Ignored]
270
0
                    break;
271
0
                }
272
273
0
                let chunk_size = chunk.len() as u64;
274
0
                total_uploaded += chunk.len();
275
276
0
                let is_final = total_uploaded >= max_size || chunk.len() < to_read;
  Branch (276:32): [Folded - Ignored]
  Branch (276:32): [Folded - Ignored]
277
0
                let total_size = if is_final {
  Branch (277:37): [Folded - Ignored]
  Branch (277:37): [Folded - Ignored]
278
0
                    Some(offset + chunk_size)
279
                } else {
280
0
                    Some(max_size as u64)
281
                };
282
283
0
                let chunk_def = ChunkSize::new(offset, offset + chunk_size - 1, total_size);
284
285
                // Upload chunk
286
0
                let status = uploader
287
0
                    .upload_multiple_chunk(chunk, &chunk_def)
288
0
                    .await
289
0
                    .map_err(|e| Self::handle_gcs_error(&e))?;
290
291
                // Update offset for next chunk
292
0
                offset += chunk_size;
293
294
0
                if let UploadStatus::Ok(_) = status {
  Branch (294:24): [Folded - Ignored]
  Branch (294:24): [Folded - Ignored]
295
0
                    break;
296
0
                }
297
            }
298
299
            // If nothing was uploaded, finalizing with empty content
300
0
            if offset == 0 {
  Branch (300:16): [Folded - Ignored]
  Branch (300:16): [Folded - Ignored]
301
0
                let chunk_def = ChunkSize::new(0, 0, Some(0));
302
0
                uploader
303
0
                    .upload_multiple_chunk(Vec::new(), &chunk_def)
304
0
                    .await
305
0
                    .map_err(|e| Self::handle_gcs_error(&e))?;
306
0
            }
307
308
            // Check if the object exists
309
0
            match self.read_object_metadata(object_path).await? {
310
0
                Some(_) => Ok(()),
311
0
                None => Err(make_err!(
312
0
                    Code::Internal,
313
0
                    "Upload completed but object not found"
314
0
                )),
315
            }
316
0
        })
317
0
        .await
318
0
    }
319
}
320
321
impl Debug for GcsClient {
322
0
    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
323
0
        f.debug_struct("GcsClient")
324
0
            .field("resumable_chunk_size", &self.resumable_chunk_size)
325
0
            .field("max_connections", &self.semaphore.available_permits())
326
0
            .finish_non_exhaustive()
327
0
    }
328
}
329
330
impl GcsOperations for GcsClient {
331
0
    async fn read_object_metadata(
332
0
        &self,
333
0
        object_path: &ObjectPath,
334
0
    ) -> Result<Option<GcsObject>, Error> {
335
0
        self.with_connection(|| async {
336
0
            let request = GetObjectRequest {
337
0
                bucket: object_path.bucket.clone(),
338
0
                object: object_path.path.clone(),
339
0
                ..Default::default()
340
0
            };
341
342
0
            match self.client.get_object(&request).await {
343
0
                Ok(obj) => Ok(Some(self.convert_to_gcs_object(obj))),
344
0
                Err(err) => {
345
0
                    if let GcsError::Response(resp) = &err {
  Branch (345:28): [True: 0, False: 0]
  Branch (345:28): [Folded - Ignored]
346
0
                        if resp.code == 404 {
  Branch (346:28): [True: 0, False: 0]
  Branch (346:28): [Folded - Ignored]
347
0
                            return Ok(None);
348
0
                        }
349
0
                    }
350
0
                    Err(Self::handle_gcs_error(&err))
351
                }
352
            }
353
0
        })
354
0
        .await
355
0
    }
356
357
0
    async fn read_object_content(
358
0
        &self,
359
0
        object_path: &ObjectPath,
360
0
        start: u64,
361
0
        end: Option<u64>,
362
0
    ) -> Result<Box<dyn Stream<Item = Result<Bytes, Error>> + Send + Unpin>, Error> {
363
        type StreamItem = Result<Bytes, gcloud_storage::http::Error>;
364
        struct ReadStream<T: Stream<Item = StreamItem> + Send + Unpin> {
365
            stream: T,
366
            permit: Option<OwnedSemaphorePermit>,
367
        }
368
369
        impl<T: Stream<Item = StreamItem> + Send + Unpin> Stream for ReadStream<T> {
370
            type Item = Result<Bytes, Error>;
371
372
0
            fn poll_next(
373
0
                mut self: core::pin::Pin<&mut Self>,
374
0
                cx: &mut core::task::Context<'_>,
375
0
            ) -> core::task::Poll<Option<Self::Item>> {
376
0
                match std::pin::pin!(&mut self.stream).poll_next(cx) {
377
0
                    core::task::Poll::Ready(Some(Ok(bytes))) => {
378
0
                        core::task::Poll::Ready(Some(Ok(bytes)))
379
                    }
380
0
                    core::task::Poll::Ready(Some(Err(err))) => {
381
0
                        self.permit.take();
382
0
                        core::task::Poll::Ready(Some(Err(GcsClient::handle_gcs_error(&err))))
383
                    }
384
                    core::task::Poll::Ready(None) => {
385
0
                        self.permit.take();
386
0
                        core::task::Poll::Ready(None)
387
                    }
388
0
                    core::task::Poll::Pending => core::task::Poll::Pending,
389
                }
390
0
            }
391
392
0
            fn size_hint(&self) -> (usize, Option<usize>) {
393
0
                self.stream.size_hint()
394
0
            }
395
        }
396
397
0
        let permit =
398
0
            self.semaphore.clone().acquire_owned().await.map_err(|e| {
399
0
                make_err!(Code::Internal, "Failed to acquire connection permit: {}", e)
400
0
            })?;
401
0
        let request = GetObjectRequest {
402
0
            bucket: object_path.bucket.clone(),
403
0
            object: object_path.path.clone(),
404
0
            ..Default::default()
405
0
        };
406
407
0
        let start = (start > 0).then_some(start);
408
0
        let range = Range(start, end);
409
410
        // Download the object
411
0
        let stream = self
412
0
            .client
413
0
            .download_streamed_object(&request, &range)
414
0
            .await
415
0
            .map_err(|e| Self::handle_gcs_error(&e))?;
416
417
0
        Ok(Box::new(ReadStream {
418
0
            stream,
419
0
            permit: Some(permit),
420
0
        }))
421
0
    }
422
423
0
    async fn write_object(&self, object_path: &ObjectPath, content: Vec<u8>) -> Result<(), Error> {
424
0
        self.with_connection(|| async {
425
0
            let request = UploadObjectRequest {
426
0
                bucket: object_path.bucket.clone(),
427
0
                ..Default::default()
428
0
            };
429
430
0
            let media = Media::new(object_path.path.clone());
431
0
            let upload_type = UploadType::Simple(media);
432
433
0
            self.client
434
0
                .upload_object(&request, content, &upload_type)
435
0
                .await
436
0
                .map_err(|e| Self::handle_gcs_error(&e))?;
437
438
0
            Ok(())
439
0
        })
440
0
        .await
441
0
    }
442
443
0
    async fn start_resumable_write(&self, object_path: &ObjectPath) -> Result<String, Error> {
444
0
        self.with_connection(|| async {
445
0
            let request = UploadObjectRequest {
446
0
                bucket: object_path.bucket.clone(),
447
0
                ..Default::default()
448
0
            };
449
450
0
            let upload_type = UploadType::Multipart(Box::new(Object {
451
0
                name: object_path.path.clone(),
452
0
                content_type: Some(DEFAULT_CONTENT_TYPE.to_string()),
453
0
                ..Default::default()
454
0
            }));
455
456
            // Start resumable upload session
457
0
            let uploader = self
458
0
                .client
459
0
                .prepare_resumable_upload(&request, &upload_type)
460
0
                .await
461
0
                .map_err(|e| Self::handle_gcs_error(&e))?;
462
463
0
            Ok(uploader.url().to_string())
464
0
        })
465
0
        .await
466
0
    }
467
468
0
    async fn upload_chunk(
469
0
        &self,
470
0
        upload_url: &str,
471
0
        _object_path: &ObjectPath,
472
0
        data: Bytes,
473
0
        offset: u64,
474
0
        end_offset: u64,
475
0
        total_size: Option<u64>,
476
0
    ) -> Result<(), Error> {
477
0
        self.with_connection(|| async {
478
0
            let uploader = self.client.get_resumable_upload(upload_url.to_string());
479
480
0
            let last_byte = if end_offset == 0 { 0 } else { end_offset - 1 };
  Branch (480:32): [True: 0, False: 0]
  Branch (480:32): [Folded - Ignored]
481
0
            let chunk_def = ChunkSize::new(offset, last_byte, total_size);
482
483
            // Upload chunk
484
0
            uploader
485
0
                .upload_multiple_chunk(data, &chunk_def)
486
0
                .await
487
0
                .map_err(|e| Self::handle_gcs_error(&e))?;
488
489
0
            Ok(())
490
0
        })
491
0
        .await
492
0
    }
493
494
0
    async fn upload_from_reader(
495
0
        &self,
496
0
        object_path: &ObjectPath,
497
0
        reader: &mut DropCloserReadHalf,
498
0
        _upload_id: &str,
499
0
        max_size: u64,
500
0
    ) -> Result<(), Error> {
501
0
        let mut retry_count = 0;
502
0
        let mut retry_delay = INITIAL_UPLOAD_RETRY_DELAY_MS;
503
504
        loop {
505
0
            let result = if max_size < SIMPLE_UPLOAD_THRESHOLD {
  Branch (505:29): [Folded - Ignored]
  Branch (505:29): [Folded - Ignored]
506
0
                self.read_and_upload_all(object_path, reader, max_size)
507
0
                    .await
508
            } else {
509
0
                self.try_resumable_upload(object_path, reader, max_size)
510
0
                    .await
511
            };
512
513
0
            match result {
514
0
                Ok(()) => return Ok(()),
515
0
                Err(e) => {
516
0
                    let is_retriable = matches!(
517
0
                        e.code,
518
                        Code::Unavailable | Code::ResourceExhausted | Code::DeadlineExceeded
519
0
                    ) || (e.code == Code::Internal
  Branch (519:27): [Folded - Ignored]
  Branch (519:27): [Folded - Ignored]
520
0
                        && e.to_string().contains("connection"));
521
522
0
                    if !is_retriable || retry_count >= MAX_UPLOAD_RETRIES {
  Branch (522:24): [Folded - Ignored]
  Branch (522:41): [Folded - Ignored]
  Branch (522:24): [Folded - Ignored]
  Branch (522:41): [Folded - Ignored]
523
0
                        return Err(e);
524
0
                    }
525
526
0
                    if let Err(reset_err) = reader.try_reset_stream() {
  Branch (526:28): [Folded - Ignored]
  Branch (526:28): [Folded - Ignored]
527
0
                        return Err(e.merge(reset_err));
528
0
                    }
529
530
0
                    sleep(Duration::from_millis(retry_delay)).await;
531
0
                    retry_delay = core::cmp::min(retry_delay * 2, MAX_UPLOAD_RETRY_DELAY_MS);
532
533
0
                    let mut rng = rand::rng();
534
0
                    let jitter_factor = rng.random::<f64>().mul_add(0.4, 0.8);
535
0
                    retry_delay = (retry_delay as f64 * jitter_factor) as u64;
536
537
0
                    retry_count += 1;
538
                }
539
            }
540
        }
541
0
    }
542
543
0
    async fn object_exists(&self, object_path: &ObjectPath) -> Result<bool, Error> {
544
0
        let metadata = self.read_object_metadata(object_path).await?;
545
0
        Ok(metadata.is_some())
546
0
    }
547
}