aboutsummaryrefslogtreecommitdiff
path: root/src/zenutil/include
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2026-05-05 14:59:21 +0200
committerGitHub Enterprise <[email protected]>2026-05-05 14:59:21 +0200
commit46f456ffd4d0717a035253ff9076ca6ee664e536 (patch)
tree69d7a9a43b9874fd3990c43aa5ff4135c35d53d9 /src/zenutil/include
parentwatchdog ephemeral port exhaust (#1022) (diff)
downloadarchived-zen-46f456ffd4d0717a035253ff9076ca6ee664e536.tar.xz
archived-zen-46f456ffd4d0717a035253ff9076ca6ee664e536.zip
hub async s3 client (#1024)
- Feature: `AsyncHttpClient` adds cancellable request tokens, streaming GET to a file (`AsyncDownload`), zero-copy chunk-callback GET (`AsyncStream`), pull-mode body source for streaming `AsyncPut`, retry layer mirroring the synchronous client, and a submit-side in-flight cap (`HttpClientSettings::MaxConcurrentRequests`) so hub-scale fanout against a single host cannot stall queued handles into curl's connect-timeout window - Feature: Hub hydration can route S3 transfers through a non-blocking `AsyncHttpClient` (curl_multi + asio) backed by a single io thread; hydrate and dehydrate now pipeline requests instead of blocking worker threads - `--hub-hydration-async-enabled` (Lua: `hub.hydration.async.enabled`, default true) - `--hub-hydration-async-max-concurrent-requests` (Lua: `hub.hydration.async.maxconcurrentrequests`, default `clamp(cpu*4, 128, 512)`) - Feature: Hub provision/deprovision/obliterate now run as two phases on separate worker pools so per-module hydration cannot starve child-process spawn/despawn (and vice versa) - New `--hub-instance-spawn-threads` (Lua: `hub.instance.spawnthreads`, default `clamp(cpu/8, 4, 16)`) drives child-process spawn/despawn - `--hub-instance-provision-threads` (Lua: `hub.instance.provisionthreads`) now drives per-module hydrate/dehydrate scheduling only; default changed from `max(cpu/4, 2)` to `clamp(cpu/8, 4, 12)` - `--hub-hydration-threads` (Lua: `hub.hydration.threads`) now controls per-file workers inside a single hydrate/dehydrate; default changed from `max(cpu/4, 2)` to `clamp(cpu/8, 4, 12)` - Feature: `AsyncHttpClient` owns its `asio::io_context` and one io thread by default; the `(BaseUri, io_context&)` constructor is preserved for callers that want to share an externally-driven `io_context` across clients (caller MUST keep the loop running until the client destructs) - Feature: `Hub::Configuration` C++ struct fields renamed (`OptionalProvisionWorkerPool`/`OptionalHydrationWorkerPool` -> `OptionalProvisionPool`/`OptionalSpawnPool`/`OptionalHydrationPool`). Embedders constructing `Hub` directly must update field names; provision and spawn pools must both be set or both null (asserted at construction). - Bugfix: `S3Client` signing-key cache no longer returns stale signatures after IMDS-rotated credentials change `AccessKeyId`; cache is now keyed on `(DateStamp, AccessKeyId)`
Diffstat (limited to 'src/zenutil/include')
-rw-r--r--src/zenutil/include/zenutil/cloud/s3client.h42
-rw-r--r--src/zenutil/include/zenutil/cloud/s3requestbuilder.h71
-rw-r--r--src/zenutil/include/zenutil/cloud/s3response.h51
-rw-r--r--src/zenutil/include/zenutil/cloud/sigv4.h20
4 files changed, 147 insertions, 37 deletions
diff --git a/src/zenutil/include/zenutil/cloud/s3client.h b/src/zenutil/include/zenutil/cloud/s3client.h
index 1ce2a768e..f09788b82 100644
--- a/src/zenutil/include/zenutil/cloud/s3client.h
+++ b/src/zenutil/include/zenutil/cloud/s3client.h
@@ -3,6 +3,7 @@
#pragma once
#include <zenutil/cloud/imdscredentials.h>
+#include <zenutil/cloud/s3requestbuilder.h>
#include <zenutil/cloud/sigv4.h>
#include <zencore/iobuffer.h>
@@ -187,8 +188,8 @@ public:
/// @param ExpiresIn URL validity duration (default 1 hour, max 7 days)
std::string GeneratePresignedPutUrl(std::string_view Key, std::chrono::seconds ExpiresIn = std::chrono::hours(1));
- std::string_view BucketName() const { return m_BucketName; }
- std::string_view Region() const { return m_Region; }
+ std::string_view BucketName() const { return m_Builder.BucketName(); }
+ std::string_view Region() const { return m_Builder.Region(); }
private:
/// Shared implementation for pre-signed URL generation
@@ -196,31 +197,6 @@ private:
LoggerRef Log() { return m_Log; }
- /// Build the endpoint URL for the bucket
- std::string BuildEndpoint() const;
-
- /// Build the host header value
- std::string BuildHostHeader() const;
-
- /// Build the S3 object path from a key, accounting for path-style addressing
- std::string KeyToPath(std::string_view Key) const;
-
- /// Build the bucket root path ("/" for virtual-hosted, "/bucket/" for path-style)
- std::string BucketRootPath() const;
-
- /// Sign a request and return headers with Authorization, x-amz-date, x-amz-content-sha256.
- /// Additional x-amz-* headers that must participate in the signature are passed via
- /// ExtraSignedHeaders (lowercase name, value); they are also copied into the returned map.
- HttpClient::KeyValueMap SignRequest(const SigV4Credentials& Credentials,
- std::string_view Method,
- std::string_view Path,
- std::string_view QueryString,
- std::string_view PayloadHash,
- std::span<const std::pair<std::string, std::string>> ExtraSignedHeaders = {});
-
- /// Get or compute the signing key for the given date stamp, caching across requests on the same day
- Sha256Digest GetSigningKey(std::string_view DateStamp);
-
/// Get the current credentials, either from the provider or from static config
SigV4Credentials GetCurrentCredentials();
@@ -242,20 +218,12 @@ private:
std::string BuildNoCredentialsError(std::string Context);
LoggerRef m_Log;
- std::string m_BucketName;
- std::string m_Region;
- std::string m_Endpoint;
- std::string m_Host;
- bool m_PathStyle;
+ S3RequestBuilder m_Builder;
+ mutable RwLock m_CredentialsLock;
SigV4Credentials m_Credentials;
Ref<ImdsCredentialProvider> m_CredentialProvider;
HttpClient m_HttpClient;
bool m_Verbose = false;
-
- // Cached signing key (only changes once per day, protected by RwLock for thread safety)
- mutable RwLock m_SigningKeyLock;
- std::string m_CachedDateStamp;
- Sha256Digest m_CachedSigningKey{};
};
void s3client_forcelink();
diff --git a/src/zenutil/include/zenutil/cloud/s3requestbuilder.h b/src/zenutil/include/zenutil/cloud/s3requestbuilder.h
new file mode 100644
index 000000000..c46167fba
--- /dev/null
+++ b/src/zenutil/include/zenutil/cloud/s3requestbuilder.h
@@ -0,0 +1,71 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zenutil/cloud/sigv4.h>
+
+#include <zencore/thread.h>
+#include <zenhttp/httpclient.h>
+
+#include <span>
+#include <string>
+#include <string_view>
+
+namespace zen {
+
+// Stateless builder of signed S3 requests, shared between blocking S3Client
+// (HttpClient-backed) and S3AsyncStorage (AsyncHttpClient-backed). Owns the
+// per-day signing-key cache so identical signatures across consecutive
+// requests do not redo HMAC derivation.
+//
+// Configuration (region, bucket, endpoint, addressing style) is fixed at
+// construction. Credentials are passed per Sign() call. The cache is keyed on
+// (DateStamp, AccessKeyId), so any AccessKeyId rotation invalidates the
+// derived signing key. STS / IMDS rotate AccessKeyId and SecretAccessKey
+// together, so this is sufficient there. Callers hand-rotating SecretAccessKey
+// while reusing AccessKeyId would sign with a stale derived key - not a usage
+// pattern this builder supports.
+class S3RequestBuilder
+{
+public:
+ S3RequestBuilder(std::string Region, std::string BucketName, std::string Endpoint, bool PathStyle);
+
+ std::string_view Region() const { return m_Region; }
+ std::string_view BucketName() const { return m_BucketName; }
+ std::string_view Endpoint() const { return m_Endpoint; }
+ std::string_view Host() const { return m_Host; }
+ bool PathStyle() const { return m_PathStyle; }
+
+ std::string KeyToPath(std::string_view Key) const;
+ std::string BucketRootPath() const;
+
+ // Sign a request. Returns headers including Authorization, x-amz-date,
+ // x-amz-content-sha256, and x-amz-security-token (when SessionToken set).
+ // ExtraSignedHeaders are lowercase-name pairs that must participate in the
+ // signature; they are also copied into the returned map.
+ HttpClient::KeyValueMap SignRequest(const SigV4Credentials& Credentials,
+ std::string_view Method,
+ std::string_view Path,
+ std::string_view CanonicalQueryString,
+ std::string_view PayloadHash,
+ std::span<const std::pair<std::string, std::string>> ExtraSignedHeaders = {});
+
+private:
+ Sha256Digest GetSigningKey(std::string_view DateStamp, const SigV4Credentials& Credentials);
+
+ static std::string DeriveEndpoint(std::string_view Endpoint, std::string_view Region, std::string_view BucketName, bool PathStyle);
+ static std::string HostFromEndpoint(std::string_view Endpoint);
+
+ std::string m_Region;
+ std::string m_BucketName;
+ std::string m_Endpoint;
+ std::string m_Host;
+ bool m_PathStyle;
+
+ mutable RwLock m_SigningKeyLock;
+ std::string m_CachedDateStamp;
+ std::string m_CachedAccessKeyId;
+ Sha256Digest m_CachedSigningKey{};
+};
+
+} // namespace zen
diff --git a/src/zenutil/include/zenutil/cloud/s3response.h b/src/zenutil/include/zenutil/cloud/s3response.h
new file mode 100644
index 000000000..9dec3215b
--- /dev/null
+++ b/src/zenutil/include/zenutil/cloud/s3response.h
@@ -0,0 +1,51 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/string.h>
+#include <zenhttp/httpclient.h>
+
+#include <string>
+#include <string_view>
+
+namespace zen {
+
+// Helpers for parsing S3 (or S3-compatible) HTTP responses. Shared between
+// the blocking S3Client and the async S3AsyncStorage path so XML/error
+// handling stays consistent across implementations.
+//
+// The XML parser is intentionally minimal: only ListBucketResult / Error
+// shapes are needed. CDATA, namespaces, and attributes are not handled.
+
+constexpr std::string_view S3EmptyPayloadHash = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
+
+// Find text between <Tag> and </Tag>. Returns a view into Xml; empty if not found.
+std::string_view S3ExtractXmlValue(std::string_view Xml, std::string_view Tag);
+
+// Decode the five standard XML entities into Out.
+void S3DecodeXmlEntities(std::string_view Input, StringBuilderBase& Out);
+
+// Convenience overload returning std::string.
+std::string S3DecodeXmlEntities(std::string_view Input);
+
+// Append a canonical query string to a path: "Path?CanonicalQS" or "Path" when QS empty.
+std::string S3BuildRequestPath(std::string_view Path, std::string_view CanonicalQS);
+
+// Case-insensitive header lookup in a response header map. Returns nullptr if not found.
+const std::string* S3FindResponseHeader(const HttpClient::KeyValueMap& Headers, std::string_view Name);
+
+// Extract Code/Message from an S3 <Error> body. Returns true only when at least one of
+// Code/Message parsed non-empty - malformed bodies (Error tag present but no parseable
+// children) are reported as parse miss to keep formatted error strings non-empty.
+bool S3ExtractError(std::string_view Body, std::string_view& OutCode, std::string_view& OutMessage);
+
+// Detect throttling: HTTP 503/429 or known S3 throttle codes (SlowDown, ServiceUnavailable, etc).
+// Code is checked on both the HTTP status and the XML error code.
+bool S3IsThrottled(const HttpClient::Response& Response, std::string_view ErrorCode);
+
+// Build a human-readable error message for a failed S3 response. Falls back to the
+// generic HTTP/transport message when no <Error> body is available. Logs a
+// distinct "S3 THROTTLED" warning when the response indicates throttling.
+std::string S3ErrorMessage(std::string_view Prefix, const HttpClient::Response& Response);
+
+} // namespace zen
diff --git a/src/zenutil/include/zenutil/cloud/sigv4.h b/src/zenutil/include/zenutil/cloud/sigv4.h
index 9ac08df76..a16daf2cf 100644
--- a/src/zenutil/include/zenutil/cloud/sigv4.h
+++ b/src/zenutil/include/zenutil/cloud/sigv4.h
@@ -19,6 +19,26 @@ using Sha256Digest = std::array<uint8_t, 32>;
Sha256Digest ComputeSha256(const void* Data, size_t Size);
Sha256Digest ComputeSha256(std::string_view Data);
+/// Streaming SHA-256: feed data in chunks, finalize once.
+/// Move-only; copying digest state is not supported by the underlying APIs.
+class Sha256Stream
+{
+public:
+ Sha256Stream();
+ ~Sha256Stream();
+
+ Sha256Stream(const Sha256Stream&) = delete;
+ Sha256Stream& operator=(const Sha256Stream&) = delete;
+ Sha256Stream(Sha256Stream&& Other) noexcept;
+ Sha256Stream& operator=(Sha256Stream&& Other) noexcept;
+
+ void Update(const void* Data, size_t Size);
+ Sha256Digest Finalize(); // single-use; further Update/Finalize is undefined
+
+private:
+ void* m_Ctx = nullptr;
+};
+
/// Compute HMAC-SHA256 with the given key and data
Sha256Digest ComputeHmacSha256(const void* Key, size_t KeySize, const void* Data, size_t DataSize);
Sha256Digest ComputeHmacSha256(const Sha256Digest& Key, std::string_view Data);