Coverage Report

Created: 2026-04-14 11:55

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/build/source/nativelink-store/src/gcs_client/client.rs
Line
Count
Source
1
// Copyright 2024 The NativeLink Authors. All rights reserved.
2
//
3
// Licensed under the Functional Source License, Version 1.1, Apache 2.0 Future License (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//    See LICENSE file for details
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
use core::fmt::{Debug, Formatter};
16
use core::future::Future;
17
use core::time::Duration;
18
use std::collections::HashMap;
19
use std::sync::Arc;
20
21
use bytes::Bytes;
22
use futures::Stream;
23
use gcloud_auth::credentials::CredentialsFile;
24
use gcloud_storage::client::{Client, ClientConfig};
25
use gcloud_storage::http::Error as GcsError;
26
use gcloud_storage::http::objects::Object;
27
use gcloud_storage::http::objects::download::Range;
28
use gcloud_storage::http::objects::get::GetObjectRequest;
29
use gcloud_storage::http::objects::upload::{Media, UploadObjectRequest, UploadType};
30
use gcloud_storage::http::resumable_upload_client::{ChunkSize, UploadStatus};
31
use nativelink_config::stores::ExperimentalGcsSpec;
32
use nativelink_error::{Code, Error, ResultExt, make_err};
33
use nativelink_util::buf_channel::DropCloserReadHalf;
34
use rand::Rng;
35
use tokio::sync::{OwnedSemaphorePermit, Semaphore};
36
use tokio::time::sleep;
37
38
use crate::gcs_client::types::{
39
    CHUNK_SIZE, DEFAULT_CONCURRENT_UPLOADS, DEFAULT_CONTENT_TYPE, GcsObject,
40
    INITIAL_UPLOAD_RETRY_DELAY_MS, MAX_UPLOAD_RETRIES, MAX_UPLOAD_RETRY_DELAY_MS, ObjectPath,
41
    SIMPLE_UPLOAD_THRESHOLD, Timestamp,
42
};
43
44
/// A trait that defines the required GCS operations.
45
/// This abstraction allows for easier testing by mocking GCS responses.
46
pub trait GcsOperations: Send + Sync + Debug {
47
    /// Read metadata for a GCS object
48
    fn read_object_metadata(
49
        &self,
50
        object: &ObjectPath,
51
    ) -> impl Future<Output = Result<Option<GcsObject>, Error>> + Send;
52
53
    /// Read the content of a GCS object, optionally with a range
54
    fn read_object_content(
55
        &self,
56
        object_path: &ObjectPath,
57
        start: u64,
58
        end: Option<u64>,
59
    ) -> impl Future<
60
        Output = Result<Box<dyn Stream<Item = Result<Bytes, Error>> + Send + Unpin>, Error>,
61
    > + Send;
62
63
    /// Write object with simple upload (for smaller objects)
64
    fn write_object(
65
        &self,
66
        object_path: &ObjectPath,
67
        content: Vec<u8>,
68
    ) -> impl Future<Output = Result<(), Error>> + Send;
69
70
    /// Start a resumable write operation and return the upload URL
71
    fn start_resumable_write(
72
        &self,
73
        object_path: &ObjectPath,
74
    ) -> impl Future<Output = Result<String, Error>> + Send;
75
76
    /// Upload a chunk of data in a resumable upload session
77
    fn upload_chunk(
78
        &self,
79
        upload_url: &str,
80
        object_path: &ObjectPath,
81
        data: Bytes,
82
        offset: u64,
83
        end_offset: u64,
84
        total_size: Option<u64>,
85
    ) -> impl Future<Output = Result<(), Error>> + Send;
86
87
    /// Complete high-level operation to upload data from a reader
88
    fn upload_from_reader(
89
        &self,
90
        object_path: &ObjectPath,
91
        reader: &mut DropCloserReadHalf,
92
        upload_id: &str,
93
        max_size: u64,
94
    ) -> impl Future<Output = Result<(), Error>>;
95
96
    /// Check if an object exists
97
    fn object_exists(
98
        &self,
99
        object_path: &ObjectPath,
100
    ) -> impl Future<Output = Result<bool, Error>> + Send;
101
}
102
103
/// Main client for interacting with Google Cloud Storage
104
pub struct GcsClient {
105
    client: Client,
106
    resumable_chunk_size: usize,
107
    semaphore: Arc<Semaphore>,
108
}
109
110
impl GcsClient {
111
21
    fn create_client_config(spec: &ExperimentalGcsSpec) -> Result<ClientConfig, Error> {
112
21
        let mut client_config = ClientConfig::default();
113
21
        let connect_timeout = if spec.connection_timeout_s > 0 {
114
0
            Duration::from_secs(spec.connection_timeout_s)
115
        } else {
116
21
            Duration::from_secs(3)
117
        };
118
21
        let read_timeout = if spec.read_timeout_s > 0 {
119
0
            Duration::from_secs(spec.read_timeout_s)
120
        } else {
121
21
            Duration::from_secs(3)
122
        };
123
21
        let client = reqwest::ClientBuilder::new()
124
21
            .connect_timeout(connect_timeout)
125
21
            .read_timeout(read_timeout)
126
21
            .build()
127
21
            .map_err(|e| 
{0
128
0
                Error::from_std_err(Code::Internal, &e).append("Unable to create GCS client")
129
0
            })?;
130
21
        let mid_client = reqwest_middleware::ClientBuilder::new(client).build();
131
21
        client_config.http = Some(mid_client);
132
21
        Ok(client_config)
133
21
    }
134
135
    /// Create a new GCS client from the provided spec
136
0
    pub async fn new(spec: &ExperimentalGcsSpec) -> Result<Self, Error> {
137
        // Attempt to get the authentication from a file with the environment
138
        // variable GOOGLE_APPLICATION_CREDENTIALS or directly from the
139
        // environment in variable GOOGLE_APPLICATION_CREDENTIALS_JSON.  If that
140
        // fails, attempt to get authentication from the environment.
141
0
        let maybe_client_config = match CredentialsFile::new().await {
142
0
            Ok(credentials) => {
143
0
                Self::create_client_config(spec)?
144
0
                    .with_credentials(credentials)
145
0
                    .await
146
            }
147
0
            Err(_) => Self::create_client_config(spec)?.with_auth().await,
148
        }
149
0
        .map_err(|e| {
150
0
            Error::from_std_err(Code::Internal, &e)
151
0
                .append("Failed to create client config with credentials")
152
0
        });
153
154
        // If authentication is required then error, otherwise use anonymous.
155
0
        let client_config = if spec.authentication_required {
156
0
            maybe_client_config.err_tip(|| "Authentication required and none found.")?
157
        } else {
158
0
            maybe_client_config
159
0
                .or_else(|_| Self::create_client_config(spec).map(ClientConfig::anonymous))?
160
        };
161
162
        // Creating client with the configured authentication
163
0
        let client = Client::new(client_config);
164
0
        let resumable_chunk_size = spec.resumable_chunk_size.unwrap_or(CHUNK_SIZE);
165
166
        // Get max connections from config
167
0
        let max_connections = spec
168
0
            .common
169
0
            .multipart_max_concurrent_uploads
170
0
            .unwrap_or(DEFAULT_CONCURRENT_UPLOADS);
171
172
0
        Ok(Self {
173
0
            client,
174
0
            resumable_chunk_size,
175
0
            semaphore: Arc::new(Semaphore::new(max_connections)),
176
0
        })
177
0
    }
178
179
    /// Create a mock GCS client for testing
180
21
    pub fn new_mock(spec: &ExperimentalGcsSpec, endpoint: String) -> Result<Self, Error> {
181
21
        let mut client_config = ClientConfig::anonymous(Self::create_client_config(spec)
?0
);
182
21
        client_config.storage_endpoint = endpoint;
183
184
21
        let client = Client::new(client_config);
185
21
        let resumable_chunk_size = spec.resumable_chunk_size.unwrap_or(CHUNK_SIZE);
186
21
        let max_connections = spec
187
21
            .common
188
21
            .multipart_max_concurrent_uploads
189
21
            .unwrap_or(DEFAULT_CONCURRENT_UPLOADS);
190
191
21
        Ok(Self {
192
21
            client,
193
21
            resumable_chunk_size,
194
21
            semaphore: Arc::new(Semaphore::new(max_connections)),
195
21
        })
196
21
    }
197
198
    /// Generic method to execute operations with connection limiting
199
21
    async fn with_connection<F, Fut, T>(&self, operation: F) -> Result<T, Error>
200
21
    where
201
21
        F: FnOnce() -> Fut + Send,
202
21
        Fut: Future<Output = Result<T, Error>> + Send,
203
21
    {
204
21
        let permit = self.semaphore.acquire().await.map_err(|e| 
{0
205
0
            Error::from_std_err(Code::Internal, &e).append("Failed to acquire connection permit")
206
0
        })?;
207
208
21
        let result = operation().await;
209
21
        drop(permit);
210
21
        result
211
21
    }
212
213
    /// Convert GCS object to our internal representation
214
0
    fn convert_to_gcs_object(&self, obj: Object) -> GcsObject {
215
0
        let update_time = obj.updated.map(|dt| Timestamp {
216
0
            seconds: dt.unix_timestamp(),
217
            nanos: 0,
218
0
        });
219
220
        GcsObject {
221
0
            name: obj.name,
222
0
            bucket: obj.bucket,
223
0
            size: obj.size,
224
0
            content_type: obj
225
0
                .content_type
226
0
                .unwrap_or_else(|| DEFAULT_CONTENT_TYPE.to_string()),
227
0
            update_time,
228
        }
229
0
    }
230
231
    /// Handle error from GCS operations
232
21
    fn handle_gcs_error(err: &GcsError) -> Error {
233
21
        let code = match &err {
234
0
            GcsError::Response(resp) => match resp.code {
235
0
                404 => Code::NotFound,
236
0
                401 | 403 => Code::PermissionDenied,
237
0
                408 | 429 => Code::ResourceExhausted,
238
0
                500..=599 => Code::Unavailable,
239
0
                _ => Code::Unknown,
240
            },
241
21
            GcsError::HttpClient(resp) => match resp.status() {
242
20
                Some(http::StatusCode::NOT_FOUND) => 
Code::NotFound2
,
243
18
                Some(http::StatusCode::UNAUTHORIZED | 
http::StatusCode::FORBIDDEN16
) => {
244
4
                    Code::PermissionDenied
245
                }
246
14
                Some(http::StatusCode::REQUEST_TIMEOUT | 
http::StatusCode::TOO_MANY_REQUESTS12
) => {
247
4
                    Code::ResourceExhausted
248
                }
249
10
                Some(
code8
) if code.is_server_error(
)8
=>
Code::Unavailable8
,
250
3
                _ => Code::Unknown,
251
            },
252
0
            _ => Code::Internal,
253
        };
254
255
21
        Error::from_std_err(code, &err).append("GCS operation failed")
256
21
    }
257
258
    /// Reading data from reader and upload in a single operation
259
0
    async fn read_and_upload_all(
260
0
        &self,
261
0
        object_path: &ObjectPath,
262
0
        reader: &mut DropCloserReadHalf,
263
0
        max_size: u64,
264
0
    ) -> Result<(), Error> {
265
0
        let initial_capacity = core::cmp::min(
266
0
            usize::try_from(max_size).unwrap_or(usize::MAX),
267
0
            10 * 1024 * 1024,
268
        );
269
0
        let mut data = Vec::with_capacity(initial_capacity);
270
0
        let max_size = usize::try_from(max_size).unwrap_or(usize::MAX);
271
0
        let mut total_size = 0usize;
272
273
0
        while total_size < max_size {
274
0
            let to_read = core::cmp::min(self.resumable_chunk_size, max_size - total_size);
275
0
            let chunk = reader.consume(Some(to_read)).await?;
276
277
0
            if chunk.is_empty() {
278
0
                break;
279
0
            }
280
281
0
            data.extend_from_slice(&chunk);
282
0
            total_size += chunk.len();
283
        }
284
285
0
        self.write_object(object_path, data).await
286
0
    }
287
288
    /// Implementing a resumable upload using the resumable upload API
289
0
    async fn try_resumable_upload(
290
0
        &self,
291
0
        object_path: &ObjectPath,
292
0
        reader: &mut DropCloserReadHalf,
293
0
        max_size: u64,
294
0
    ) -> Result<(), Error> {
295
0
        self.with_connection(|| async {
296
0
            let request = UploadObjectRequest {
297
0
                bucket: object_path.bucket.clone(),
298
0
                ..Default::default()
299
0
            };
300
301
0
            let mut metadata = HashMap::<String, String>::new();
302
0
            metadata.insert("name".to_string(), object_path.path.clone());
303
304
            // Use Multipart upload type with metadata
305
0
            let upload_type = UploadType::Multipart(Box::new(Object {
306
0
                name: object_path.path.clone(),
307
0
                content_type: Some(DEFAULT_CONTENT_TYPE.to_string()),
308
0
                metadata: Some(metadata),
309
0
                ..Default::default()
310
0
            }));
311
312
            // Prepare resumable upload
313
0
            let uploader = self
314
0
                .client
315
0
                .prepare_resumable_upload(&request, &upload_type)
316
0
                .await
317
0
                .map_err(|e| Self::handle_gcs_error(&e))?;
318
319
            // Upload data in chunks
320
0
            let mut offset: u64 = 0;
321
0
            let max_size = usize::try_from(max_size).unwrap_or(usize::MAX);
322
0
            let mut total_uploaded = 0usize;
323
324
0
            while total_uploaded < max_size {
325
0
                let to_read = core::cmp::min(self.resumable_chunk_size, max_size - total_uploaded);
326
0
                let chunk = reader.consume(Some(to_read)).await?;
327
328
0
                if chunk.is_empty() {
329
0
                    break;
330
0
                }
331
332
0
                let chunk_size = chunk.len() as u64;
333
0
                total_uploaded += chunk.len();
334
335
0
                let is_final = total_uploaded >= max_size || chunk.len() < to_read;
336
0
                let total_size = if is_final {
337
0
                    Some(offset + chunk_size)
338
                } else {
339
0
                    Some(max_size as u64)
340
                };
341
342
0
                let chunk_def = ChunkSize::new(offset, offset + chunk_size - 1, total_size);
343
344
                // Upload chunk
345
0
                let status = uploader
346
0
                    .upload_multiple_chunk(chunk, &chunk_def)
347
0
                    .await
348
0
                    .map_err(|e| Self::handle_gcs_error(&e))?;
349
350
                // Update offset for next chunk
351
0
                offset += chunk_size;
352
353
0
                if let UploadStatus::Ok(_) = status {
354
0
                    break;
355
0
                }
356
            }
357
358
            // If nothing was uploaded, finalizing with empty content
359
0
            if offset == 0 {
360
0
                let chunk_def = ChunkSize::new(0, 0, Some(0));
361
0
                uploader
362
0
                    .upload_multiple_chunk(Vec::new(), &chunk_def)
363
0
                    .await
364
0
                    .map_err(|e| Self::handle_gcs_error(&e))?;
365
0
            }
366
367
            // Check if the object exists
368
0
            match self.read_object_metadata(object_path).await? {
369
0
                Some(_) => Ok(()),
370
0
                None => Err(make_err!(
371
0
                    Code::Internal,
372
0
                    "Upload completed but object not found"
373
0
                )),
374
            }
375
0
        })
376
0
        .await
377
0
    }
378
}
379
380
impl Debug for GcsClient {
381
0
    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
382
0
        f.debug_struct("GcsClient")
383
0
            .field("resumable_chunk_size", &self.resumable_chunk_size)
384
0
            .field("max_connections", &self.semaphore.available_permits())
385
0
            .finish_non_exhaustive()
386
0
    }
387
}
388
389
impl GcsOperations for GcsClient {
390
21
    async fn read_object_metadata(
391
21
        &self,
392
21
        object_path: &ObjectPath,
393
21
    ) -> Result<Option<GcsObject>, Error> {
394
21
        self.with_connection(|| async {
395
21
            let request = GetObjectRequest {
396
21
                bucket: object_path.bucket.clone(),
397
21
                object: object_path.path.clone(),
398
21
                ..Default::default()
399
21
            };
400
401
21
            match self.client.get_object(&request).await {
402
0
                Ok(obj) => Ok(Some(self.convert_to_gcs_object(obj))),
403
21
                Err(err) => {
404
21
                    if let GcsError::Response(
resp0
) = &err
405
0
                        && resp.code == 404
406
                    {
407
0
                        return Ok(None);
408
21
                    }
409
21
                    Err(Self::handle_gcs_error(&err))
410
                }
411
            }
412
42
        })
413
21
        .await
414
21
    }
415
416
0
    async fn read_object_content(
417
0
        &self,
418
0
        object_path: &ObjectPath,
419
0
        start: u64,
420
0
        end: Option<u64>,
421
0
    ) -> Result<Box<dyn Stream<Item = Result<Bytes, Error>> + Send + Unpin>, Error> {
422
        type StreamItem = Result<Bytes, gcloud_storage::http::Error>;
423
        struct ReadStream<T: Stream<Item = StreamItem> + Send + Unpin> {
424
            stream: T,
425
            permit: Option<OwnedSemaphorePermit>,
426
        }
427
428
        impl<T: Stream<Item = StreamItem> + Send + Unpin> Stream for ReadStream<T> {
429
            type Item = Result<Bytes, Error>;
430
431
0
            fn poll_next(
432
0
                mut self: core::pin::Pin<&mut Self>,
433
0
                cx: &mut core::task::Context<'_>,
434
0
            ) -> core::task::Poll<Option<Self::Item>> {
435
0
                match std::pin::pin!(&mut self.stream).poll_next(cx) {
436
0
                    core::task::Poll::Ready(Some(Ok(bytes))) => {
437
0
                        core::task::Poll::Ready(Some(Ok(bytes)))
438
                    }
439
0
                    core::task::Poll::Ready(Some(Err(err))) => {
440
0
                        self.permit.take();
441
0
                        core::task::Poll::Ready(Some(Err(GcsClient::handle_gcs_error(&err))))
442
                    }
443
                    core::task::Poll::Ready(None) => {
444
0
                        self.permit.take();
445
0
                        core::task::Poll::Ready(None)
446
                    }
447
0
                    core::task::Poll::Pending => core::task::Poll::Pending,
448
                }
449
0
            }
450
451
0
            fn size_hint(&self) -> (usize, Option<usize>) {
452
0
                self.stream.size_hint()
453
0
            }
454
        }
455
456
0
        let permit = self.semaphore.clone().acquire_owned().await.map_err(|e| {
457
0
            Error::from_std_err(Code::Internal, &e).append("Failed to acquire connection permit")
458
0
        })?;
459
0
        let request = GetObjectRequest {
460
0
            bucket: object_path.bucket.clone(),
461
0
            object: object_path.path.clone(),
462
0
            ..Default::default()
463
0
        };
464
465
0
        let start = (start > 0).then_some(start);
466
0
        let range = Range(start, end);
467
468
        // Download the object
469
0
        let stream = self
470
0
            .client
471
0
            .download_streamed_object(&request, &range)
472
0
            .await
473
0
            .map_err(|e| Self::handle_gcs_error(&e))?;
474
475
0
        Ok(Box::new(ReadStream {
476
0
            stream,
477
0
            permit: Some(permit),
478
0
        }))
479
0
    }
480
481
0
    async fn write_object(&self, object_path: &ObjectPath, content: Vec<u8>) -> Result<(), Error> {
482
0
        self.with_connection(|| async {
483
0
            let request = UploadObjectRequest {
484
0
                bucket: object_path.bucket.clone(),
485
0
                ..Default::default()
486
0
            };
487
488
0
            let media = Media::new(object_path.path.clone());
489
0
            let upload_type = UploadType::Simple(media);
490
491
0
            self.client
492
0
                .upload_object(&request, content, &upload_type)
493
0
                .await
494
0
                .map_err(|e| Self::handle_gcs_error(&e))?;
495
496
0
            Ok(())
497
0
        })
498
0
        .await
499
0
    }
500
501
0
    async fn start_resumable_write(&self, object_path: &ObjectPath) -> Result<String, Error> {
502
0
        self.with_connection(|| async {
503
0
            let request = UploadObjectRequest {
504
0
                bucket: object_path.bucket.clone(),
505
0
                ..Default::default()
506
0
            };
507
508
0
            let upload_type = UploadType::Multipart(Box::new(Object {
509
0
                name: object_path.path.clone(),
510
0
                content_type: Some(DEFAULT_CONTENT_TYPE.to_string()),
511
0
                ..Default::default()
512
0
            }));
513
514
            // Start resumable upload session
515
0
            let uploader = self
516
0
                .client
517
0
                .prepare_resumable_upload(&request, &upload_type)
518
0
                .await
519
0
                .map_err(|e| Self::handle_gcs_error(&e))?;
520
521
0
            Ok(uploader.url().to_string())
522
0
        })
523
0
        .await
524
0
    }
525
526
0
    async fn upload_chunk(
527
0
        &self,
528
0
        upload_url: &str,
529
0
        _object_path: &ObjectPath,
530
0
        data: Bytes,
531
0
        offset: u64,
532
0
        end_offset: u64,
533
0
        total_size: Option<u64>,
534
0
    ) -> Result<(), Error> {
535
0
        self.with_connection(|| async {
536
0
            let uploader = self.client.get_resumable_upload(upload_url.to_string());
537
538
0
            let last_byte = if end_offset == 0 { 0 } else { end_offset - 1 };
539
0
            let chunk_def = ChunkSize::new(offset, last_byte, total_size);
540
541
            // Upload chunk
542
0
            uploader
543
0
                .upload_multiple_chunk(data, &chunk_def)
544
0
                .await
545
0
                .map_err(|e| Self::handle_gcs_error(&e))?;
546
547
0
            Ok(())
548
0
        })
549
0
        .await
550
0
    }
551
552
0
    async fn upload_from_reader(
553
0
        &self,
554
0
        object_path: &ObjectPath,
555
0
        reader: &mut DropCloserReadHalf,
556
0
        _upload_id: &str,
557
0
        max_size: u64,
558
0
    ) -> Result<(), Error> {
559
0
        let mut retry_count = 0;
560
0
        let mut retry_delay = INITIAL_UPLOAD_RETRY_DELAY_MS;
561
562
        loop {
563
0
            let result = if max_size < SIMPLE_UPLOAD_THRESHOLD {
564
0
                self.read_and_upload_all(object_path, reader, max_size)
565
0
                    .await
566
            } else {
567
0
                self.try_resumable_upload(object_path, reader, max_size)
568
0
                    .await
569
            };
570
571
0
            match result {
572
0
                Ok(()) => return Ok(()),
573
0
                Err(e) => {
574
0
                    let is_retriable = matches!(
575
0
                        e.code,
576
                        Code::Unavailable | Code::ResourceExhausted | Code::DeadlineExceeded
577
0
                    ) || (e.code == Code::Internal
578
0
                        && e.to_string().contains("connection"));
579
580
0
                    if !is_retriable || retry_count >= MAX_UPLOAD_RETRIES {
581
0
                        return Err(e);
582
0
                    }
583
584
0
                    if let Err(reset_err) = reader.try_reset_stream() {
585
0
                        return Err(e.merge(reset_err));
586
0
                    }
587
588
0
                    sleep(Duration::from_millis(retry_delay)).await;
589
0
                    retry_delay = core::cmp::min(retry_delay * 2, MAX_UPLOAD_RETRY_DELAY_MS);
590
591
0
                    let mut rng = rand::rng();
592
0
                    let jitter_factor = rng.random::<f64>().mul_add(0.4, 0.8);
593
0
                    retry_delay = Duration::from_millis(retry_delay)
594
0
                        .mul_f64(jitter_factor)
595
0
                        .as_millis()
596
0
                        .try_into()
597
0
                        .unwrap_or(u64::MAX);
598
599
0
                    retry_count += 1;
600
                }
601
            }
602
        }
603
0
    }
604
605
0
    async fn object_exists(&self, object_path: &ObjectPath) -> Result<bool, Error> {
606
0
        let metadata = self.read_object_metadata(object_path).await?;
607
0
        Ok(metadata.is_some())
608
0
    }
609
}