diff options
| author | Dan Engelbrecht <[email protected]> | 2026-05-05 14:59:21 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2026-05-05 14:59:21 +0200 |
| commit | 46f456ffd4d0717a035253ff9076ca6ee664e536 (patch) | |
| tree | 69d7a9a43b9874fd3990c43aa5ff4135c35d53d9 /src/zenutil/include | |
| parent | watchdog ephemeral port exhaust (#1022) (diff) | |
| download | archived-zen-46f456ffd4d0717a035253ff9076ca6ee664e536.tar.xz archived-zen-46f456ffd4d0717a035253ff9076ca6ee664e536.zip | |
hub async s3 client (#1024)
- Feature: `AsyncHttpClient` adds cancellable request tokens, streaming GET to a file (`AsyncDownload`), zero-copy chunk-callback GET (`AsyncStream`), pull-mode body source for streaming `AsyncPut`, retry layer mirroring the synchronous client, and a submit-side in-flight cap (`HttpClientSettings::MaxConcurrentRequests`) so hub-scale fanout against a single host cannot stall queued handles into curl's connect-timeout window
- Feature: Hub hydration can route S3 transfers through a non-blocking `AsyncHttpClient` (curl_multi + asio) backed by a single io thread; hydrate and dehydrate now pipeline requests instead of blocking worker threads
- `--hub-hydration-async-enabled` (Lua: `hub.hydration.async.enabled`, default true)
- `--hub-hydration-async-max-concurrent-requests` (Lua: `hub.hydration.async.maxconcurrentrequests`, default `clamp(cpu*4, 128, 512)`)
- Feature: Hub provision/deprovision/obliterate now run as two phases on separate worker pools so per-module hydration cannot starve child-process spawn/despawn (and vice versa)
- New `--hub-instance-spawn-threads` (Lua: `hub.instance.spawnthreads`, default `clamp(cpu/8, 4, 16)`) drives child-process spawn/despawn
- `--hub-instance-provision-threads` (Lua: `hub.instance.provisionthreads`) now drives per-module hydrate/dehydrate scheduling only; default changed from `max(cpu/4, 2)` to `clamp(cpu/8, 4, 12)`
- `--hub-hydration-threads` (Lua: `hub.hydration.threads`) now controls per-file workers inside a single hydrate/dehydrate; default changed from `max(cpu/4, 2)` to `clamp(cpu/8, 4, 12)`
- Feature: `AsyncHttpClient` owns its `asio::io_context` and one io thread by default; the `(BaseUri, io_context&)` constructor is preserved for callers that want to share an externally-driven `io_context` across clients (caller MUST keep the loop running until the client destructs)
- Feature: `Hub::Configuration` C++ struct fields renamed (`OptionalProvisionWorkerPool`/`OptionalHydrationWorkerPool` -> `OptionalProvisionPool`/`OptionalSpawnPool`/`OptionalHydrationPool`). Embedders constructing `Hub` directly must update field names; provision and spawn pools must both be set or both null (asserted at construction).
- Bugfix: `S3Client` signing-key cache no longer returns stale signatures after IMDS-rotated credentials change `AccessKeyId`; cache is now keyed on `(DateStamp, AccessKeyId)`
Diffstat (limited to 'src/zenutil/include')
| -rw-r--r-- | src/zenutil/include/zenutil/cloud/s3client.h | 42 | ||||
| -rw-r--r-- | src/zenutil/include/zenutil/cloud/s3requestbuilder.h | 71 | ||||
| -rw-r--r-- | src/zenutil/include/zenutil/cloud/s3response.h | 51 | ||||
| -rw-r--r-- | src/zenutil/include/zenutil/cloud/sigv4.h | 20 |
4 files changed, 147 insertions, 37 deletions
diff --git a/src/zenutil/include/zenutil/cloud/s3client.h b/src/zenutil/include/zenutil/cloud/s3client.h index 1ce2a768e..f09788b82 100644 --- a/src/zenutil/include/zenutil/cloud/s3client.h +++ b/src/zenutil/include/zenutil/cloud/s3client.h @@ -3,6 +3,7 @@ #pragma once #include <zenutil/cloud/imdscredentials.h> +#include <zenutil/cloud/s3requestbuilder.h> #include <zenutil/cloud/sigv4.h> #include <zencore/iobuffer.h> @@ -187,8 +188,8 @@ public: /// @param ExpiresIn URL validity duration (default 1 hour, max 7 days) std::string GeneratePresignedPutUrl(std::string_view Key, std::chrono::seconds ExpiresIn = std::chrono::hours(1)); - std::string_view BucketName() const { return m_BucketName; } - std::string_view Region() const { return m_Region; } + std::string_view BucketName() const { return m_Builder.BucketName(); } + std::string_view Region() const { return m_Builder.Region(); } private: /// Shared implementation for pre-signed URL generation @@ -196,31 +197,6 @@ private: LoggerRef Log() { return m_Log; } - /// Build the endpoint URL for the bucket - std::string BuildEndpoint() const; - - /// Build the host header value - std::string BuildHostHeader() const; - - /// Build the S3 object path from a key, accounting for path-style addressing - std::string KeyToPath(std::string_view Key) const; - - /// Build the bucket root path ("/" for virtual-hosted, "/bucket/" for path-style) - std::string BucketRootPath() const; - - /// Sign a request and return headers with Authorization, x-amz-date, x-amz-content-sha256. - /// Additional x-amz-* headers that must participate in the signature are passed via - /// ExtraSignedHeaders (lowercase name, value); they are also copied into the returned map. - HttpClient::KeyValueMap SignRequest(const SigV4Credentials& Credentials, - std::string_view Method, - std::string_view Path, - std::string_view QueryString, - std::string_view PayloadHash, - std::span<const std::pair<std::string, std::string>> ExtraSignedHeaders = {}); - - /// Get or compute the signing key for the given date stamp, caching across requests on the same day - Sha256Digest GetSigningKey(std::string_view DateStamp); - /// Get the current credentials, either from the provider or from static config SigV4Credentials GetCurrentCredentials(); @@ -242,20 +218,12 @@ private: std::string BuildNoCredentialsError(std::string Context); LoggerRef m_Log; - std::string m_BucketName; - std::string m_Region; - std::string m_Endpoint; - std::string m_Host; - bool m_PathStyle; + S3RequestBuilder m_Builder; + mutable RwLock m_CredentialsLock; SigV4Credentials m_Credentials; Ref<ImdsCredentialProvider> m_CredentialProvider; HttpClient m_HttpClient; bool m_Verbose = false; - - // Cached signing key (only changes once per day, protected by RwLock for thread safety) - mutable RwLock m_SigningKeyLock; - std::string m_CachedDateStamp; - Sha256Digest m_CachedSigningKey{}; }; void s3client_forcelink(); diff --git a/src/zenutil/include/zenutil/cloud/s3requestbuilder.h b/src/zenutil/include/zenutil/cloud/s3requestbuilder.h new file mode 100644 index 000000000..c46167fba --- /dev/null +++ b/src/zenutil/include/zenutil/cloud/s3requestbuilder.h @@ -0,0 +1,71 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zenutil/cloud/sigv4.h> + +#include <zencore/thread.h> +#include <zenhttp/httpclient.h> + +#include <span> +#include <string> +#include <string_view> + +namespace zen { + +// Stateless builder of signed S3 requests, shared between blocking S3Client +// (HttpClient-backed) and S3AsyncStorage (AsyncHttpClient-backed). Owns the +// per-day signing-key cache so identical signatures across consecutive +// requests do not redo HMAC derivation. +// +// Configuration (region, bucket, endpoint, addressing style) is fixed at +// construction. Credentials are passed per Sign() call. The cache is keyed on +// (DateStamp, AccessKeyId), so any AccessKeyId rotation invalidates the +// derived signing key. STS / IMDS rotate AccessKeyId and SecretAccessKey +// together, so this is sufficient there. Callers hand-rotating SecretAccessKey +// while reusing AccessKeyId would sign with a stale derived key - not a usage +// pattern this builder supports. +class S3RequestBuilder +{ +public: + S3RequestBuilder(std::string Region, std::string BucketName, std::string Endpoint, bool PathStyle); + + std::string_view Region() const { return m_Region; } + std::string_view BucketName() const { return m_BucketName; } + std::string_view Endpoint() const { return m_Endpoint; } + std::string_view Host() const { return m_Host; } + bool PathStyle() const { return m_PathStyle; } + + std::string KeyToPath(std::string_view Key) const; + std::string BucketRootPath() const; + + // Sign a request. Returns headers including Authorization, x-amz-date, + // x-amz-content-sha256, and x-amz-security-token (when SessionToken set). + // ExtraSignedHeaders are lowercase-name pairs that must participate in the + // signature; they are also copied into the returned map. + HttpClient::KeyValueMap SignRequest(const SigV4Credentials& Credentials, + std::string_view Method, + std::string_view Path, + std::string_view CanonicalQueryString, + std::string_view PayloadHash, + std::span<const std::pair<std::string, std::string>> ExtraSignedHeaders = {}); + +private: + Sha256Digest GetSigningKey(std::string_view DateStamp, const SigV4Credentials& Credentials); + + static std::string DeriveEndpoint(std::string_view Endpoint, std::string_view Region, std::string_view BucketName, bool PathStyle); + static std::string HostFromEndpoint(std::string_view Endpoint); + + std::string m_Region; + std::string m_BucketName; + std::string m_Endpoint; + std::string m_Host; + bool m_PathStyle; + + mutable RwLock m_SigningKeyLock; + std::string m_CachedDateStamp; + std::string m_CachedAccessKeyId; + Sha256Digest m_CachedSigningKey{}; +}; + +} // namespace zen diff --git a/src/zenutil/include/zenutil/cloud/s3response.h b/src/zenutil/include/zenutil/cloud/s3response.h new file mode 100644 index 000000000..9dec3215b --- /dev/null +++ b/src/zenutil/include/zenutil/cloud/s3response.h @@ -0,0 +1,51 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/string.h> +#include <zenhttp/httpclient.h> + +#include <string> +#include <string_view> + +namespace zen { + +// Helpers for parsing S3 (or S3-compatible) HTTP responses. Shared between +// the blocking S3Client and the async S3AsyncStorage path so XML/error +// handling stays consistent across implementations. +// +// The XML parser is intentionally minimal: only ListBucketResult / Error +// shapes are needed. CDATA, namespaces, and attributes are not handled. + +constexpr std::string_view S3EmptyPayloadHash = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"; + +// Find text between <Tag> and </Tag>. Returns a view into Xml; empty if not found. +std::string_view S3ExtractXmlValue(std::string_view Xml, std::string_view Tag); + +// Decode the five standard XML entities into Out. +void S3DecodeXmlEntities(std::string_view Input, StringBuilderBase& Out); + +// Convenience overload returning std::string. +std::string S3DecodeXmlEntities(std::string_view Input); + +// Append a canonical query string to a path: "Path?CanonicalQS" or "Path" when QS empty. +std::string S3BuildRequestPath(std::string_view Path, std::string_view CanonicalQS); + +// Case-insensitive header lookup in a response header map. Returns nullptr if not found. +const std::string* S3FindResponseHeader(const HttpClient::KeyValueMap& Headers, std::string_view Name); + +// Extract Code/Message from an S3 <Error> body. Returns true only when at least one of +// Code/Message parsed non-empty - malformed bodies (Error tag present but no parseable +// children) are reported as parse miss to keep formatted error strings non-empty. +bool S3ExtractError(std::string_view Body, std::string_view& OutCode, std::string_view& OutMessage); + +// Detect throttling: HTTP 503/429 or known S3 throttle codes (SlowDown, ServiceUnavailable, etc). +// Code is checked on both the HTTP status and the XML error code. +bool S3IsThrottled(const HttpClient::Response& Response, std::string_view ErrorCode); + +// Build a human-readable error message for a failed S3 response. Falls back to the +// generic HTTP/transport message when no <Error> body is available. Logs a +// distinct "S3 THROTTLED" warning when the response indicates throttling. +std::string S3ErrorMessage(std::string_view Prefix, const HttpClient::Response& Response); + +} // namespace zen diff --git a/src/zenutil/include/zenutil/cloud/sigv4.h b/src/zenutil/include/zenutil/cloud/sigv4.h index 9ac08df76..a16daf2cf 100644 --- a/src/zenutil/include/zenutil/cloud/sigv4.h +++ b/src/zenutil/include/zenutil/cloud/sigv4.h @@ -19,6 +19,26 @@ using Sha256Digest = std::array<uint8_t, 32>; Sha256Digest ComputeSha256(const void* Data, size_t Size); Sha256Digest ComputeSha256(std::string_view Data); +/// Streaming SHA-256: feed data in chunks, finalize once. +/// Move-only; copying digest state is not supported by the underlying APIs. +class Sha256Stream +{ +public: + Sha256Stream(); + ~Sha256Stream(); + + Sha256Stream(const Sha256Stream&) = delete; + Sha256Stream& operator=(const Sha256Stream&) = delete; + Sha256Stream(Sha256Stream&& Other) noexcept; + Sha256Stream& operator=(Sha256Stream&& Other) noexcept; + + void Update(const void* Data, size_t Size); + Sha256Digest Finalize(); // single-use; further Update/Finalize is undefined + +private: + void* m_Ctx = nullptr; +}; + /// Compute HMAC-SHA256 with the given key and data Sha256Digest ComputeHmacSha256(const void* Key, size_t KeySize, const void* Data, size_t DataSize); Sha256Digest ComputeHmacSha256(const Sha256Digest& Key, std::string_view Data); |