diff options
| author | Stefan Boberg <[email protected]> | 2026-03-18 11:27:07 +0100 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2026-03-18 11:27:07 +0100 |
| commit | e64d76ae1b6993582bf161a61049f0771414a779 (patch) | |
| tree | 083f3df42cc9e2c7ddbee225708b4848eb217d11 /src/zenutil/include | |
| parent | Compute batching (#849) (diff) | |
| download | zen-e64d76ae1b6993582bf161a61049f0771414a779.tar.xz zen-e64d76ae1b6993582bf161a61049f0771414a779.zip | |
Simple S3 client (#836)
This functionality is intended to be used to manage datasets for test cases, but may be useful elsewhere in the future.
- **Add S3 client with AWS Signature V4 (SigV4) signing** — new `S3Client` in `zenutil/cloud/` supporting `GetObject`, `PutObject`, `DeleteObject`, `HeadObject`, and `ListObjects` operations
- **Add EC2 IMDS credential provider** — automatically fetches and refreshes temporary AWS credentials from the EC2 Instance Metadata Service (IMDSv2) for use by the S3 client
- **Add SigV4 signing library** — standalone implementation of AWS Signature Version 4 request signing (headers and query-string presigning)
- **Add path-style addressing support** — enables compatibility with S3-compatible stores like MinIO (in addition to virtual-hosted style)
- **Add S3 integration tests** — includes a `MinioProcess` test helper that spins up a local MinIO server, plus integration tests exercising the S3 client end-to-end
- **Add S3-backed `HttpObjectStoreService` tests** — integration tests verifying the zenserver object store works against an S3 backend
- **Refactor mock IMDS into `zenutil/cloud/`** — moved and generalized the mock IMDS server from `zencompute` so it can be reused by both compute and S3 credential tests
Diffstat (limited to 'src/zenutil/include')
| -rw-r--r-- | src/zenutil/include/zenutil/cloud/cloudprovider.h | 19 | ||||
| -rw-r--r-- | src/zenutil/include/zenutil/cloud/imdscredentials.h | 58 | ||||
| -rw-r--r-- | src/zenutil/include/zenutil/cloud/minioprocess.h | 48 | ||||
| -rw-r--r-- | src/zenutil/include/zenutil/cloud/mockimds.h | 110 | ||||
| -rw-r--r-- | src/zenutil/include/zenutil/cloud/s3client.h | 215 | ||||
| -rw-r--r-- | src/zenutil/include/zenutil/cloud/sigv4.h | 116 |
6 files changed, 566 insertions, 0 deletions
diff --git a/src/zenutil/include/zenutil/cloud/cloudprovider.h b/src/zenutil/include/zenutil/cloud/cloudprovider.h new file mode 100644 index 000000000..5825eb308 --- /dev/null +++ b/src/zenutil/include/zenutil/cloud/cloudprovider.h @@ -0,0 +1,19 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <string_view> + +namespace zen::compute { + +enum class CloudProvider +{ + None, + AWS, + Azure, + GCP +}; + +std::string_view ToString(CloudProvider Provider); + +} // namespace zen::compute diff --git a/src/zenutil/include/zenutil/cloud/imdscredentials.h b/src/zenutil/include/zenutil/cloud/imdscredentials.h new file mode 100644 index 000000000..33df5a1e2 --- /dev/null +++ b/src/zenutil/include/zenutil/cloud/imdscredentials.h @@ -0,0 +1,58 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zenutil/cloud/sigv4.h> + +#include <zenbase/refcount.h> +#include <zencore/logging.h> +#include <zencore/thread.h> +#include <zenhttp/httpclient.h> + +#include <chrono> +#include <string> + +namespace zen { + +struct ImdsCredentialProviderOptions +{ + std::string Endpoint = "http://169.254.169.254"; // Override for testing + std::chrono::milliseconds ConnectTimeout{1000}; + std::chrono::milliseconds RequestTimeout{5000}; +}; + +/// Fetches and caches temporary AWS credentials from the EC2 Instance Metadata +/// Service (IMDSv2). Thread-safe; credentials are refreshed automatically before +/// they expire. +class ImdsCredentialProvider : public RefCounted +{ +public: + explicit ImdsCredentialProvider(const ImdsCredentialProviderOptions& Options = {}); + ~ImdsCredentialProvider(); + + /// Fetch or return cached credentials. Thread-safe. + /// Returns empty credentials (empty AccessKeyId) on failure. + SigV4Credentials GetCredentials(); + + /// Force a refresh on next GetCredentials() call. + void InvalidateCache(); + +private: + bool FetchToken(); + bool FetchCredentials(); + + LoggerRef Log() { return m_Log; } + + LoggerRef m_Log; + HttpClient m_HttpClient; + + mutable RwLock m_Lock; + std::string m_ImdsToken; + SigV4Credentials m_CachedCredentials; + std::string m_RoleName; + std::chrono::steady_clock::time_point m_ExpiresAt; +}; + +void imdscredentials_forcelink(); + +} // namespace zen diff --git a/src/zenutil/include/zenutil/cloud/minioprocess.h b/src/zenutil/include/zenutil/cloud/minioprocess.h new file mode 100644 index 000000000..7af350e60 --- /dev/null +++ b/src/zenutil/include/zenutil/cloud/minioprocess.h @@ -0,0 +1,48 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zenbase/zenbase.h> + +#include <cstdint> +#include <memory> +#include <string> +#include <string_view> + +namespace zen { + +struct MinioProcessOptions +{ + uint16_t Port = 9000; + std::string RootUser = "minioadmin"; + std::string RootPassword = "minioadmin"; +}; + +class MinioProcess +{ +public: + explicit MinioProcess(const MinioProcessOptions& Options = {}); + ~MinioProcess(); + + MinioProcess(const MinioProcess&) = delete; + MinioProcess& operator=(const MinioProcess&) = delete; + + void SpawnMinioServer(); + void StopMinioServer(); + + /// Pre-create a bucket by creating a subdirectory in the MinIO data directory. + /// Can be called before or after SpawnMinioServer(). MinIO discovers these at startup + /// and also picks up new directories at runtime. + void CreateBucket(std::string_view BucketName); + + uint16_t Port() const; + std::string_view RootUser() const; + std::string_view RootPassword() const; + std::string Endpoint() const; + +private: + struct Impl; + std::unique_ptr<Impl> m_Impl; +}; + +} // namespace zen diff --git a/src/zenutil/include/zenutil/cloud/mockimds.h b/src/zenutil/include/zenutil/cloud/mockimds.h new file mode 100644 index 000000000..d0c0155b0 --- /dev/null +++ b/src/zenutil/include/zenutil/cloud/mockimds.h @@ -0,0 +1,110 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zenhttp/httpserver.h> +#include <zenutil/cloud/cloudprovider.h> + +#include <string> + +#if ZEN_WITH_TESTS + +namespace zen::compute { + +/** + * Mock IMDS (Instance Metadata Service) for testing cloud metadata and + * credential providers. + * + * Implements an HttpService that responds to the same URL paths as the real + * cloud provider metadata endpoints (AWS IMDSv2, Azure IMDS, GCP metadata). + * Tests configure which provider is "active" and set the desired response + * values, then pass the mock server's address as the ImdsEndpoint to the + * CloudMetadata constructor. + * + * When a request arrives for a provider that is not the ActiveProvider, the + * mock returns 404, causing CloudMetadata to write a sentinel file and move + * on to the next provider — exactly like a failed probe on bare metal. + * + * All config fields are public and can be mutated between poll cycles to + * simulate state changes (e.g. a spot interruption appearing mid-run). + * + * Usage: + * MockImdsService Mock; + * Mock.ActiveProvider = CloudProvider::AWS; + * Mock.Aws.InstanceId = "i-test"; + * // ... stand up ASIO server, register Mock, create CloudMetadata with endpoint + */ +class MockImdsService : public HttpService +{ +public: + /** AWS IMDSv2 response configuration. */ + struct AwsConfig + { + std::string Token = "mock-aws-token-v2"; + std::string InstanceId = "i-0123456789abcdef0"; + std::string AvailabilityZone = "us-east-1a"; + std::string LifeCycle = "on-demand"; // "spot" or "on-demand" + + // Empty string → endpoint returns 404 (instance not in an ASG). + // Non-empty → returned as the response body. "InService" means healthy; + // anything else (e.g. "Terminated:Wait") triggers termination detection. + std::string AutoscalingState; + + // Empty string → endpoint returns 404 (no spot interruption). + // Non-empty → returned as the response body, signalling a spot reclaim. + std::string SpotAction; + + // IAM credential fields for ImdsCredentialProvider testing + std::string IamRoleName = "test-role"; + std::string IamAccessKeyId = "ASIAIOSFODNN7EXAMPLE"; + std::string IamSecretAccessKey = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"; + std::string IamSessionToken = "FwoGZXIvYXdzEBYaDEXAMPLETOKEN"; + std::string IamExpiration = "2099-01-01T00:00:00Z"; + }; + + /** Azure IMDS response configuration. */ + struct AzureConfig + { + std::string VmId = "vm-12345678-1234-1234-1234-123456789abc"; + std::string Location = "eastus"; + std::string Priority = "Regular"; // "Spot" or "Regular" + + // Empty → instance is not in a VM Scale Set (no autoscaling). + std::string VmScaleSetName; + + // Empty → no scheduled events. Set to "Preempt", "Terminate", or + // "Reboot" to simulate a termination-class event. + std::string ScheduledEventType; + std::string ScheduledEventStatus = "Scheduled"; + }; + + /** GCP metadata response configuration. */ + struct GcpConfig + { + std::string InstanceId = "1234567890123456789"; + std::string Zone = "projects/123456/zones/us-central1-a"; + std::string Preemptible = "FALSE"; // "TRUE" or "FALSE" + std::string MaintenanceEvent = "NONE"; // "NONE" or event description + }; + + /** Which provider's endpoints respond successfully. + * Requests targeting other providers receive 404. + */ + CloudProvider ActiveProvider = CloudProvider::None; + + AwsConfig Aws; + AzureConfig Azure; + GcpConfig Gcp; + + const char* BaseUri() const override; + void HandleRequest(HttpServerRequest& Request) override; + +private: + void HandleAwsRequest(HttpServerRequest& Request); + void HandleAzureRequest(HttpServerRequest& Request); + void HandleGcpRequest(HttpServerRequest& Request); +}; + +} // namespace zen::compute + +#endif // ZEN_WITH_TESTS diff --git a/src/zenutil/include/zenutil/cloud/s3client.h b/src/zenutil/include/zenutil/cloud/s3client.h new file mode 100644 index 000000000..47501c5b5 --- /dev/null +++ b/src/zenutil/include/zenutil/cloud/s3client.h @@ -0,0 +1,215 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zenutil/cloud/imdscredentials.h> +#include <zenutil/cloud/sigv4.h> + +#include <zencore/iobuffer.h> +#include <zencore/logging.h> +#include <zenhttp/httpclient.h> + +#include <zencore/thread.h> + +#include <string> +#include <string_view> +#include <vector> + +namespace zen { + +struct S3ClientOptions +{ + std::string Region = "us-east-1"; + std::string BucketName; + std::string Endpoint; // e.g., "https://s3.us-east-1.amazonaws.com". If empty, derived from Region. + + /// Use path-style addressing (endpoint/bucket/key) instead of virtual-hosted style + /// (bucket.endpoint/key). Required for S3-compatible services like MinIO that don't + /// support virtual-hosted style. + bool PathStyle = false; + + SigV4Credentials Credentials; + + /// When set, credentials are fetched from EC2 IMDS on demand. + /// Overrides the static Credentials field. + Ref<ImdsCredentialProvider> CredentialProvider; + + std::chrono::milliseconds ConnectTimeout{5000}; + std::chrono::milliseconds Timeout{}; + uint8_t RetryCount = 3; +}; + +struct S3ObjectInfo +{ + std::string Key; + uint64_t Size = 0; + std::string ETag; + std::string LastModified; +}; + +/// Result type for S3 operations. Empty Error string indicates success. +struct S3Result +{ + std::string Error; + + bool IsSuccess() const { return Error.empty(); } + explicit operator bool() const { return IsSuccess(); } +}; + +enum class HeadObjectResult +{ + Found, + NotFound, + Error, +}; + +/// Result of GetObject — carries the downloaded content. +struct S3GetObjectResult : S3Result +{ + IoBuffer Content; + + std::string_view AsText() const { return std::string_view(reinterpret_cast<const char*>(Content.GetData()), Content.GetSize()); } +}; + +/// Result of HeadObject — carries object metadata and existence status. +struct S3HeadObjectResult : S3Result +{ + S3ObjectInfo Info; + HeadObjectResult Status = HeadObjectResult::NotFound; +}; + +/// Result of ListObjects — carries the list of matching objects. +struct S3ListObjectsResult : S3Result +{ + std::vector<S3ObjectInfo> Objects; +}; + +/// Result of CreateMultipartUpload — carries the upload ID. +struct S3CreateMultipartUploadResult : S3Result +{ + std::string UploadId; +}; + +/// Result of UploadPart — carries the part ETag. +struct S3UploadPartResult : S3Result +{ + std::string ETag; +}; + +/// Client for S3-compatible object storage. +/// +/// Supports basic object operations (GET, PUT, DELETE, HEAD), listing, multipart +/// uploads, and pre-signed URL generation. Requests are authenticated with AWS +/// Signature Version 4; the signing key is cached per day to avoid redundant HMAC +/// derivation. +/// +/// Limitations: +/// - Multipart uploads are sequential (no parallel part upload). +/// - XML responses are parsed with a minimal tag extractor that only decodes the five +/// standard XML entities; CDATA sections and nested/namespaced tags are not handled. +/// - Automatic credential refresh is supported via ImdsCredentialProvider. +class S3Client +{ +public: + explicit S3Client(const S3ClientOptions& Options); + ~S3Client(); + + /// Upload an object to S3 + S3Result PutObject(std::string_view Key, IoBuffer Content); + + /// Download an object from S3 + S3GetObjectResult GetObject(std::string_view Key); + + /// Delete an object from S3 + S3Result DeleteObject(std::string_view Key); + + /// Check if an object exists and get its metadata + S3HeadObjectResult HeadObject(std::string_view Key); + + /// List objects with the given prefix + /// @param MaxKeys Maximum number of keys to return (0 = default/1000) + S3ListObjectsResult ListObjects(std::string_view Prefix, uint32_t MaxKeys = 0); + + /// Multipart upload: initiate a multipart upload and return the upload ID + S3CreateMultipartUploadResult CreateMultipartUpload(std::string_view Key); + + /// Multipart upload: upload a single part + /// @param PartNumber Part number (1-based, 1 to 10000) + /// @param Content The part data (minimum 5 MB except for the last part) + S3UploadPartResult UploadPart(std::string_view Key, std::string_view UploadId, uint32_t PartNumber, IoBuffer Content); + + /// Multipart upload: complete a multipart upload by assembling previously uploaded parts + /// @param PartETags List of {part_number, etag} pairs from UploadPart calls + S3Result CompleteMultipartUpload(std::string_view Key, + std::string_view UploadId, + const std::vector<std::pair<uint32_t, std::string>>& PartETags); + + /// Multipart upload: abort an in-progress multipart upload, discarding all uploaded parts + S3Result AbortMultipartUpload(std::string_view Key, std::string_view UploadId); + + /// High-level multipart upload: automatically splits content into parts and uploads + /// @param PartSize Size of each part in bytes (minimum 5 MB, default 8 MB) + S3Result PutObjectMultipart(std::string_view Key, IoBuffer Content, uint64_t PartSize = 8 * 1024 * 1024); + + /// Generate a pre-signed URL for downloading an object (GET) + /// @param Key The object key + /// @param ExpiresIn URL validity duration (default 1 hour, max 7 days) + std::string GeneratePresignedGetUrl(std::string_view Key, std::chrono::seconds ExpiresIn = std::chrono::hours(1)); + + /// Generate a pre-signed URL for uploading an object (PUT) + /// @param Key The object key + /// @param ExpiresIn URL validity duration (default 1 hour, max 7 days) + std::string GeneratePresignedPutUrl(std::string_view Key, std::chrono::seconds ExpiresIn = std::chrono::hours(1)); + + std::string_view BucketName() const { return m_BucketName; } + std::string_view Region() const { return m_Region; } + +private: + /// Shared implementation for pre-signed URL generation + std::string GeneratePresignedUrlForMethod(std::string_view Key, std::string_view Method, std::chrono::seconds ExpiresIn); + + LoggerRef Log() { return m_Log; } + + /// Build the endpoint URL for the bucket + std::string BuildEndpoint() const; + + /// Build the host header value + std::string BuildHostHeader() const; + + /// Build the S3 object path from a key, accounting for path-style addressing + std::string KeyToPath(std::string_view Key) const; + + /// Build the bucket root path ("/" for virtual-hosted, "/bucket/" for path-style) + std::string BucketRootPath() const; + + /// Sign a request and return headers with Authorization, x-amz-date, x-amz-content-sha256 + HttpClient::KeyValueMap SignRequest(std::string_view Method, + std::string_view Path, + std::string_view QueryString, + std::string_view PayloadHash); + + /// Get or compute the signing key for the given date stamp, caching across requests on the same day + Sha256Digest GetSigningKey(std::string_view DateStamp); + + /// Get the current credentials, either from the provider or from static config + SigV4Credentials GetCurrentCredentials(); + + LoggerRef m_Log; + std::string m_BucketName; + std::string m_Region; + std::string m_Endpoint; + std::string m_Host; + bool m_PathStyle; + SigV4Credentials m_Credentials; + Ref<ImdsCredentialProvider> m_CredentialProvider; + HttpClient m_HttpClient; + + // Cached signing key (only changes once per day, protected by RwLock for thread safety) + mutable RwLock m_SigningKeyLock; + std::string m_CachedDateStamp; + Sha256Digest m_CachedSigningKey{}; +}; + +void s3client_forcelink(); + +} // namespace zen diff --git a/src/zenutil/include/zenutil/cloud/sigv4.h b/src/zenutil/include/zenutil/cloud/sigv4.h new file mode 100644 index 000000000..9ac08df76 --- /dev/null +++ b/src/zenutil/include/zenutil/cloud/sigv4.h @@ -0,0 +1,116 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/zencore.h> + +#include <array> +#include <chrono> +#include <string> +#include <string_view> +#include <vector> + +namespace zen { + +/// SHA-256 digest (32 bytes) +using Sha256Digest = std::array<uint8_t, 32>; + +/// Compute SHA-256 hash of the given data +Sha256Digest ComputeSha256(const void* Data, size_t Size); +Sha256Digest ComputeSha256(std::string_view Data); + +/// Compute HMAC-SHA256 with the given key and data +Sha256Digest ComputeHmacSha256(const void* Key, size_t KeySize, const void* Data, size_t DataSize); +Sha256Digest ComputeHmacSha256(const Sha256Digest& Key, std::string_view Data); + +/// Convert a SHA-256 digest to lowercase hex string +std::string Sha256ToHex(const Sha256Digest& Digest); + +/// Securely zero memory containing secret key material (prevents compiler from optimizing away) +void SecureZeroSecret(void* Data, size_t Size); + +/// AWS Signature Version 4 signing + +struct SigV4Credentials +{ + std::string AccessKeyId; + std::string SecretAccessKey; + std::string SessionToken; // Optional; required for temporary credentials (STS/SSO) +}; + +struct SigV4SignedHeaders +{ + /// The value for the "Authorization" header + std::string Authorization; + + /// The ISO 8601 date-time string used in signing (for x-amz-date header) + std::string AmzDate; + + /// The SHA-256 hex digest of the payload (for x-amz-content-sha256 header) + std::string PayloadHash; +}; + +/// Get the current UTC timestamp in ISO 8601 format (YYYYMMDDTHHMMSSZ) +std::string GetAmzTimestamp(); + +/// URI-encode a string per AWS requirements (RFC 3986 unreserved chars are not encoded) +/// @param EncodeSlash If false, '/' is left unencoded (use for URI paths) +std::string AwsUriEncode(std::string_view Input, bool EncodeSlash = true); + +/// Build a canonical query string from key=value pairs. +/// Parameters are URI-encoded and sorted by key name as required by SigV4. +/// Takes parameters by value to sort in-place without copying. +std::string BuildCanonicalQueryString(std::vector<std::pair<std::string, std::string>> Parameters); + +/// Sign an HTTP request using AWS Signature Version 4 +/// +/// @param Credentials AWS access key and secret key +/// @param Method HTTP method (GET, PUT, DELETE, HEAD, etc.) +/// @param Url The path portion of the URL (e.g., "/bucket/key") +/// @param CanonicalQueryString Pre-built canonical query string (use BuildCanonicalQueryString) +/// @param Region The AWS region (e.g., "us-east-1") +/// @param Service The AWS service (e.g., "s3") +/// @param AmzDate The ISO 8601 date-time string (from GetAmzTimestamp()) +/// @param Headers Sorted list of {lowercase-header-name, value} pairs to sign. +/// Must include "host" and "x-amz-content-sha256". +/// Should NOT include "authorization". +/// @param PayloadHash Hex SHA-256 hash of the request payload. Use +/// "UNSIGNED-PAYLOAD" for unsigned payloads. +/// @param SigningKey Optional pre-computed signing key. If null, derived from Credentials + date + Region + Service. +SigV4SignedHeaders SignRequestV4(const SigV4Credentials& Credentials, + std::string_view Method, + std::string_view Url, + std::string_view CanonicalQueryString, + std::string_view Region, + std::string_view Service, + std::string_view AmzDate, + const std::vector<std::pair<std::string, std::string>>& Headers, + std::string_view PayloadHash, + const Sha256Digest* SigningKey = nullptr); + +/// Generate a pre-signed URL using AWS Signature Version 4 query string authentication. +/// +/// The returned URL can be used by anyone (no credentials needed) until it expires. +/// +/// @param Credentials AWS access key and secret key +/// @param Method HTTP method the URL will be used with (typically "GET" or "PUT") +/// @param Scheme URL scheme ("https" or "http") +/// @param Host The host (e.g., "bucket.s3.us-east-1.amazonaws.com") +/// @param Path The path portion (e.g., "/key") +/// @param Region The AWS region (e.g., "us-east-1") +/// @param Service The AWS service (e.g., "s3") +/// @param ExpiresIn URL validity duration +/// @param ExtraQueryParams Additional query parameters to include (e.g., response-content-type) +std::string GeneratePresignedUrl(const SigV4Credentials& Credentials, + std::string_view Method, + std::string_view Scheme, + std::string_view Host, + std::string_view Path, + std::string_view Region, + std::string_view Service, + std::chrono::seconds ExpiresIn, + const std::vector<std::pair<std::string, std::string>>& ExtraQueryParams = {}); + +void sigv4_forcelink(); + +} // namespace zen |