aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/zen/zen.cpp3
-rw-r--r--src/zen/zen.h2
-rw-r--r--src/zencore/include/zencore/fmtutils.h23
-rw-r--r--src/zencore/include/zencore/thread.h7
-rw-r--r--src/zenhorde/xmake.lua2
-rw-r--r--src/zenhttp/clients/httpclientcommon.h6
-rw-r--r--src/zenhttp/clients/httpclientcpr.cpp1285
-rw-r--r--src/zenhttp/clients/httpclientcpr.h188
-rw-r--r--src/zenhttp/clients/httpclientcurl.cpp4
-rw-r--r--src/zenhttp/clients/httpclientcurl.h5
-rw-r--r--src/zenhttp/httpclient.cpp111
-rw-r--r--src/zenhttp/httpclient_test.cpp11
-rw-r--r--src/zenhttp/httpserver.cpp111
-rw-r--r--src/zenhttp/include/zenhttp/auth/authservice.h4
-rw-r--r--src/zenhttp/include/zenhttp/cprutils.h98
-rw-r--r--src/zenhttp/include/zenhttp/httpclient.h9
-rw-r--r--src/zenhttp/include/zenhttp/httpcommon.h3
-rw-r--r--src/zenhttp/include/zenhttp/httpserver.h8
-rw-r--r--src/zenhttp/include/zenhttp/httpstats.h1
-rw-r--r--src/zenhttp/monitoring/httpstats.cpp154
-rw-r--r--src/zenhttp/xmake.lua7
-rw-r--r--src/zenremotestore/builds/jupiterbuildstorage.cpp144
-rw-r--r--src/zenremotestore/include/zenremotestore/builds/jupiterbuildstorage.h2
-rw-r--r--src/zenremotestore/zenremotestore.cpp2
-rw-r--r--src/zenserver-test/cache-tests.cpp8
-rw-r--r--src/zenserver-test/hub-tests.cpp322
-rw-r--r--src/zenserver/compute/computeserver.cpp2
-rw-r--r--src/zenserver/config/config.cpp2
-rw-r--r--src/zenserver/config/config.h2
-rw-r--r--src/zenserver/diag/logging.cpp2
-rw-r--r--src/zenserver/frontend/frontend.cpp30
-rw-r--r--src/zenserver/frontend/frontend.h21
-rw-r--r--src/zenserver/frontend/html/pages/builds.js88
-rw-r--r--src/zenserver/frontend/html/pages/hub.js15
-rw-r--r--src/zenserver/frontend/html/pages/objectstore.js48
-rw-r--r--src/zenserver/frontend/html/pages/page.js72
-rw-r--r--src/zenserver/frontend/html/pages/projects.js50
-rw-r--r--src/zenserver/frontend/html/pages/start.js115
-rw-r--r--src/zenserver/frontend/html/pages/workspaces.js236
-rw-r--r--src/zenserver/frontend/html/zen.css84
-rw-r--r--src/zenserver/hub/httphubservice.cpp221
-rw-r--r--src/zenserver/hub/httphubservice.h20
-rw-r--r--src/zenserver/hub/hub.cpp1909
-rw-r--r--src/zenserver/hub/hub.h182
-rw-r--r--src/zenserver/hub/hubinstancestate.h21
-rw-r--r--src/zenserver/hub/storageserverinstance.cpp174
-rw-r--r--src/zenserver/hub/storageserverinstance.h43
-rw-r--r--src/zenserver/hub/zenhubserver.cpp123
-rw-r--r--src/zenserver/hub/zenhubserver.h33
-rw-r--r--src/zenserver/proxy/httpproxystats.cpp12
-rw-r--r--src/zenserver/proxy/zenproxyserver.cpp2
-rw-r--r--src/zenserver/sessions/httpsessions.cpp30
-rw-r--r--src/zenserver/sessions/httpsessions.h5
-rw-r--r--src/zenserver/storage/admin/admin.h4
-rw-r--r--src/zenserver/storage/buildstore/httpbuildstore.cpp40
-rw-r--r--src/zenserver/storage/buildstore/httpbuildstore.h9
-rw-r--r--src/zenserver/storage/cache/httpstructuredcache.cpp149
-rw-r--r--src/zenserver/storage/cache/httpstructuredcache.h5
-rw-r--r--src/zenserver/storage/objectstore/objectstore.cpp90
-rw-r--r--src/zenserver/storage/objectstore/objectstore.h38
-rw-r--r--src/zenserver/storage/projectstore/httpprojectstore.cpp38
-rw-r--r--src/zenserver/storage/projectstore/httpprojectstore.h5
-rw-r--r--src/zenserver/storage/upstream/upstreamservice.h4
-rw-r--r--src/zenserver/storage/workspaces/httpworkspaces.cpp38
-rw-r--r--src/zenserver/storage/workspaces/httpworkspaces.h5
-rw-r--r--src/zenserver/storage/zenstorageserver.cpp4
-rw-r--r--src/zenstore/include/zenstore/projectstore.h1
-rw-r--r--src/zenstore/projectstore.cpp7
-rw-r--r--src/zentelemetry/include/zentelemetry/hyperloglog.h1
-rw-r--r--src/zenutil/consul/consul.cpp36
-rw-r--r--src/zenutil/include/zenutil/consul.h5
-rw-r--r--src/zenutil/xmake.lua4
72 files changed, 3564 insertions, 2981 deletions
diff --git a/src/zen/zen.cpp b/src/zen/zen.cpp
index cbaf64e31..3277eb856 100644
--- a/src/zen/zen.cpp
+++ b/src/zen/zen.cpp
@@ -799,7 +799,7 @@ main(int argc, char** argv)
Options.add_options()("help", "Show command line help");
Options.add_options()("c, command", "Sub command", cxxopts::value<std::string>(SubCommand));
Options.add_options()("httpclient",
- "Select HTTP client implementation (e.g. 'curl', 'cpr')",
+ "Select HTTP client implementation",
cxxopts::value<std::string>(GlobalOptions.HttpClientBackend)->default_value("curl"));
int CoreLimit = 0;
@@ -949,6 +949,7 @@ main(int argc, char** argv)
.IsTest = false,
.NoConsoleOutput = GlobalOptions.LoggingConfig.NoConsoleOutput,
.QuietConsole = GlobalOptions.LoggingConfig.QuietConsole,
+ .ForceColor = GlobalOptions.LoggingConfig.ForceColor,
.AbsLogFile = GlobalOptions.LoggingConfig.AbsLogFile,
.LogId = GlobalOptions.LoggingConfig.LogId};
zen::InitializeLogging(LogOptions);
diff --git a/src/zen/zen.h b/src/zen/zen.h
index 97cc9af6f..64d9390a3 100644
--- a/src/zen/zen.h
+++ b/src/zen/zen.h
@@ -18,7 +18,7 @@ struct ZenCliOptions
ZenLoggingConfig LoggingConfig;
- std::string HttpClientBackend; // Choice of HTTP client implementation (e.g. "curl", "cpr")
+ std::string HttpClientBackend; // Choice of HTTP client implementation
// Arguments after " -- " on command line are passed through and not parsed
std::string PassthroughCommandLine;
diff --git a/src/zencore/include/zencore/fmtutils.h b/src/zencore/include/zencore/fmtutils.h
index 404e570fd..4ec05f901 100644
--- a/src/zencore/include/zencore/fmtutils.h
+++ b/src/zencore/include/zencore/fmtutils.h
@@ -15,6 +15,29 @@ ZEN_THIRD_PARTY_INCLUDES_END
#include <chrono>
#include <string_view>
+// Generic formatter for any type with a free ToString(T) function returning a
+// string-like type. This covers enum-to-string conversions (HttpResponseCode,
+// SessionState, etc.) without needing per-type fmt::formatter specializations.
+// ADL is used to find ToString, so it works across namespaces.
+
+template<typename T>
+concept HasFreeToString = requires(const T& v)
+{
+ {
+ ToString(v)
+ } -> std::convertible_to<std::string_view>;
+};
+
+template<HasFreeToString T>
+struct fmt::formatter<T> : fmt::formatter<std::string_view>
+{
+ template<typename FormatContext>
+ auto format(const T& Value, FormatContext& Ctx) const
+ {
+ return fmt::formatter<std::string_view>::format(ToString(Value), Ctx);
+ }
+};
+
// Custom formatting for some zencore types
template<typename T>
diff --git a/src/zencore/include/zencore/thread.h b/src/zencore/include/zencore/thread.h
index d7262324f..56ce5904b 100644
--- a/src/zencore/include/zencore/thread.h
+++ b/src/zencore/include/zencore/thread.h
@@ -190,6 +190,13 @@ class Latch
public:
Latch(std::ptrdiff_t Count) : Counter(Count) {}
+ void Reset(std::ptrdiff_t Count)
+ {
+ ZEN_ASSERT(Counter.load() == 0);
+ Complete.Reset();
+ Counter.store(Count);
+ }
+
void CountDown()
{
std::ptrdiff_t Old = Counter.fetch_sub(1);
diff --git a/src/zenhorde/xmake.lua b/src/zenhorde/xmake.lua
index 48d028e86..0e69e9c5f 100644
--- a/src/zenhorde/xmake.lua
+++ b/src/zenhorde/xmake.lua
@@ -14,7 +14,7 @@ target('zenhorde')
end
if is_plat("linux") or is_plat("macosx") then
- add_packages("openssl")
+ add_packages("openssl3")
end
if is_os("macosx") then
diff --git a/src/zenhttp/clients/httpclientcommon.h b/src/zenhttp/clients/httpclientcommon.h
index e8d969cc8..078d4a52f 100644
--- a/src/zenhttp/clients/httpclientcommon.h
+++ b/src/zenhttp/clients/httpclientcommon.h
@@ -21,7 +21,10 @@ public:
using Response = HttpClient::Response;
using KeyValueMap = HttpClient::KeyValueMap;
- [[nodiscard]] virtual Response Put(std::string_view Url, const IoBuffer& Payload, const KeyValueMap& AdditionalHeader = {}) = 0;
+ [[nodiscard]] virtual Response Put(std::string_view Url,
+ const IoBuffer& Payload,
+ const KeyValueMap& AdditionalHeader = {},
+ const KeyValueMap& Parameters = {}) = 0;
[[nodiscard]] virtual Response Put(std::string_view Url, const KeyValueMap& Parameters = {}) = 0;
[[nodiscard]] virtual Response Get(std::string_view Url,
const KeyValueMap& AdditionalHeader = {},
@@ -59,6 +62,7 @@ public:
LoggerRef Log() { return m_Log; }
std::string_view GetBaseUri() const { return m_BaseUri; }
+ void SetBaseUri(std::string_view NewBaseUri) { m_BaseUri = NewBaseUri; }
std::string_view GetSessionId() const { return m_SessionId; }
bool Authenticate();
diff --git a/src/zenhttp/clients/httpclientcpr.cpp b/src/zenhttp/clients/httpclientcpr.cpp
deleted file mode 100644
index bd6de3ff7..000000000
--- a/src/zenhttp/clients/httpclientcpr.cpp
+++ /dev/null
@@ -1,1285 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#include "httpclientcpr.h"
-
-#include <zencore/compactbinary.h>
-#include <zencore/compactbinarybuilder.h>
-#include <zencore/compactbinarypackage.h>
-#include <zencore/compactbinaryutil.h>
-#include <zencore/compress.h>
-#include <zencore/filesystem.h>
-#include <zencore/iobuffer.h>
-#include <zencore/iohash.h>
-#include <zencore/session.h>
-#include <zencore/stream.h>
-#include <zenhttp/packageformat.h>
-#include <algorithm>
-
-ZEN_THIRD_PARTY_INCLUDES_START
-#include <cpr/ssl_options.h>
-#include <cpr/unix_socket.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-
-namespace zen {
-
-HttpClientBase*
-CreateCprHttpClient(std::string_view BaseUri, const HttpClientSettings& ConnectionSettings, std::function<bool()>&& CheckIfAbortFunction)
-{
- return new CprHttpClient(BaseUri, ConnectionSettings, std::move(CheckIfAbortFunction));
-}
-
-static std::atomic<uint32_t> HttpClientRequestIdCounter{0};
-
-//////////////////////////////////////////////////////////////////////////
-
-static HttpClientErrorCode
-MapCprError(cpr::ErrorCode Code)
-{
- switch (Code)
- {
- case cpr::ErrorCode::OK:
- return HttpClientErrorCode::kOK;
- case cpr::ErrorCode::CONNECTION_FAILURE:
- return HttpClientErrorCode::kConnectionFailure;
- case cpr::ErrorCode::HOST_RESOLUTION_FAILURE:
- return HttpClientErrorCode::kHostResolutionFailure;
- case cpr::ErrorCode::PROXY_RESOLUTION_FAILURE:
- return HttpClientErrorCode::kProxyResolutionFailure;
- case cpr::ErrorCode::INTERNAL_ERROR:
- return HttpClientErrorCode::kInternalError;
- case cpr::ErrorCode::NETWORK_RECEIVE_ERROR:
- return HttpClientErrorCode::kNetworkReceiveError;
- case cpr::ErrorCode::NETWORK_SEND_FAILURE:
- return HttpClientErrorCode::kNetworkSendFailure;
- case cpr::ErrorCode::OPERATION_TIMEDOUT:
- return HttpClientErrorCode::kOperationTimedOut;
- case cpr::ErrorCode::SSL_CONNECT_ERROR:
- return HttpClientErrorCode::kSSLConnectError;
- case cpr::ErrorCode::SSL_LOCAL_CERTIFICATE_ERROR:
- case cpr::ErrorCode::SSL_REMOTE_CERTIFICATE_ERROR:
- return HttpClientErrorCode::kSSLCertificateError;
- case cpr::ErrorCode::SSL_CACERT_ERROR:
- return HttpClientErrorCode::kSSLCACertError;
- case cpr::ErrorCode::GENERIC_SSL_ERROR:
- return HttpClientErrorCode::kGenericSSLError;
- case cpr::ErrorCode::REQUEST_CANCELLED:
- return HttpClientErrorCode::kRequestCancelled;
- default:
- return HttpClientErrorCode::kOtherError;
- }
-}
-
-//////////////////////////////////////////////////////////////////////////
-//
-// CPR helpers
-
-static cpr::Body
-AsCprBody(const CbObject& Obj)
-{
- return cpr::Body((const char*)Obj.GetBuffer().GetData(), Obj.GetBuffer().GetSize());
-}
-
-static cpr::Body
-AsCprBody(const IoBuffer& Obj)
-{
- return cpr::Body((const char*)Obj.GetData(), Obj.GetSize());
-}
-
-static bool
-ShouldRetry(const cpr::Response& Response)
-{
- switch (Response.error.code)
- {
- case cpr::ErrorCode::OK:
- break;
- case cpr::ErrorCode::INTERNAL_ERROR:
- case cpr::ErrorCode::NETWORK_RECEIVE_ERROR:
- case cpr::ErrorCode::NETWORK_SEND_FAILURE:
- case cpr::ErrorCode::OPERATION_TIMEDOUT:
- return true;
- default:
- return false;
- }
- switch ((HttpResponseCode)Response.status_code)
- {
- case HttpResponseCode::RequestTimeout:
- case HttpResponseCode::TooManyRequests:
- case HttpResponseCode::InternalServerError:
- case HttpResponseCode::BadGateway:
- case HttpResponseCode::ServiceUnavailable:
- case HttpResponseCode::GatewayTimeout:
- return true;
- default:
- return false;
- }
-};
-
-static std::pair<std::string, std::string>
-HeaderContentType(ZenContentType ContentType)
-{
- return std::make_pair("Content-Type", std::string(MapContentTypeToString(ContentType)));
-}
-
-//////////////////////////////////////////////////////////////////////////
-
-CprHttpClient::CprHttpClient(std::string_view BaseUri,
- const HttpClientSettings& Connectionsettings,
- std::function<bool()>&& CheckIfAbortFunction)
-: HttpClientBase(BaseUri, Connectionsettings, std::move(CheckIfAbortFunction))
-{
-}
-
-bool
-CprHttpClient::ShouldLogErrorCode(HttpResponseCode ResponseCode) const
-{
- if (m_CheckIfAbortFunction && m_CheckIfAbortFunction())
- {
- // Quiet
- return false;
- }
- const auto& Expected = m_ConnectionSettings.ExpectedErrorCodes;
- return std::find(Expected.begin(), Expected.end(), ResponseCode) == Expected.end();
-}
-
-CprHttpClient::~CprHttpClient()
-{
- ZEN_TRACE_CPU("CprHttpClient::~CprHttpClient");
- m_SessionLock.WithExclusiveLock([&] {
- for (auto CprSession : m_Sessions)
- {
- delete CprSession;
- }
- m_Sessions.clear();
- });
-}
-
-HttpClient::Response
-CprHttpClient::ResponseWithPayload(std::string_view SessionId,
- cpr::Response&& HttpResponse,
- const HttpResponseCode WorkResponseCode,
- IoBuffer&& Payload,
- std::vector<HttpClient::Response::MultipartBoundary>&& BoundaryPositions)
-{
- // This ends up doing a memcpy, would be good to get rid of it by streaming results
- // into buffer directly
- IoBuffer ResponseBuffer = Payload ? std::move(Payload) : IoBuffer(IoBuffer::Clone, HttpResponse.text.data(), HttpResponse.text.size());
-
- if (auto It = HttpResponse.header.find("Content-Type"); It != HttpResponse.header.end())
- {
- const HttpContentType ContentType = ParseContentType(It->second);
- ResponseBuffer.SetContentType(ContentType);
- }
-
- if (!IsHttpSuccessCode(WorkResponseCode) && WorkResponseCode != HttpResponseCode::NotFound)
- {
- if (ShouldLogErrorCode(WorkResponseCode))
- {
- ZEN_WARN("HttpClient request failed (session: {}): {}", SessionId, HttpResponse);
- }
- }
-
- std::sort(BoundaryPositions.begin(),
- BoundaryPositions.end(),
- [](const HttpClient::Response::MultipartBoundary& Lhs, const HttpClient::Response::MultipartBoundary& Rhs) {
- return Lhs.RangeOffset < Rhs.RangeOffset;
- });
-
- return HttpClient::Response{.StatusCode = WorkResponseCode,
- .ResponsePayload = std::move(ResponseBuffer),
- .Header = HttpClient::KeyValueMap(HttpResponse.header.begin(), HttpResponse.header.end()),
- .UploadedBytes = gsl::narrow<int64_t>(HttpResponse.uploaded_bytes),
- .DownloadedBytes = gsl::narrow<int64_t>(HttpResponse.downloaded_bytes),
- .ElapsedSeconds = HttpResponse.elapsed,
- .Ranges = std::move(BoundaryPositions)};
-}
-
-HttpClient::Response
-CprHttpClient::CommonResponse(std::string_view SessionId,
- cpr::Response&& HttpResponse,
- IoBuffer&& Payload,
- std::vector<HttpClient::Response::MultipartBoundary>&& BoundaryPositions)
-{
- const HttpResponseCode WorkResponseCode = HttpResponseCode(HttpResponse.status_code);
- if (HttpResponse.error)
- {
- const bool Quiet = m_CheckIfAbortFunction && m_CheckIfAbortFunction();
- if (!Quiet)
- {
- if (HttpResponse.error.code != cpr::ErrorCode::OPERATION_TIMEDOUT &&
- HttpResponse.error.code != cpr::ErrorCode::CONNECTION_FAILURE &&
- HttpResponse.error.code != cpr::ErrorCode::REQUEST_CANCELLED)
- {
- ZEN_WARN("HttpClient client failure (session: {}): {}", SessionId, HttpResponse);
- }
- }
-
- // Client side failure code
- return HttpClient::Response{
- .StatusCode = WorkResponseCode,
- .ResponsePayload = IoBufferBuilder::MakeCloneFromMemory(HttpResponse.text.data(), HttpResponse.text.size()),
- .Header = HttpClient::KeyValueMap(HttpResponse.header.begin(), HttpResponse.header.end()),
- .UploadedBytes = gsl::narrow<int64_t>(HttpResponse.uploaded_bytes),
- .DownloadedBytes = gsl::narrow<int64_t>(HttpResponse.downloaded_bytes),
- .ElapsedSeconds = HttpResponse.elapsed,
- .Error =
- HttpClient::ErrorContext{.ErrorCode = MapCprError(HttpResponse.error.code), .ErrorMessage = HttpResponse.error.message}};
- }
-
- if (WorkResponseCode == HttpResponseCode::NoContent || (HttpResponse.text.empty() && !Payload))
- {
- return HttpClient::Response{.StatusCode = WorkResponseCode,
- .Header = HttpClient::KeyValueMap(HttpResponse.header.begin(), HttpResponse.header.end()),
- .UploadedBytes = gsl::narrow<int64_t>(HttpResponse.uploaded_bytes),
- .DownloadedBytes = gsl::narrow<int64_t>(HttpResponse.downloaded_bytes),
- .ElapsedSeconds = HttpResponse.elapsed};
- }
- else
- {
- return ResponseWithPayload(SessionId, std::move(HttpResponse), WorkResponseCode, std::move(Payload), std::move(BoundaryPositions));
- }
-}
-
-bool
-CprHttpClient::ValidatePayload(cpr::Response& Response, std::unique_ptr<detail::TempPayloadFile>& PayloadFile)
-{
- ZEN_TRACE_CPU("ValidatePayload");
- IoBuffer ResponseBuffer = (Response.text.empty() && PayloadFile) ? PayloadFile->BorrowIoBuffer()
- : IoBuffer(IoBuffer::Wrap, Response.text.data(), Response.text.size());
-
- if (auto ContentLength = Response.header.find("Content-Length"); ContentLength != Response.header.end())
- {
- std::optional<uint64_t> ExpectedContentSize = ParseInt<uint64_t>(ContentLength->second);
- if (!ExpectedContentSize.has_value())
- {
- Response.error =
- cpr::Error(/*CURLE_READ_ERROR*/ 26, fmt::format("Can not parse Content-Length header. Value: '{}'", ContentLength->second));
- return false;
- }
- if (ExpectedContentSize.value() != ResponseBuffer.GetSize())
- {
- Response.error = cpr::Error(
- /*CURLE_READ_ERROR*/ 26,
- fmt::format("Payload size {} does not match Content-Length {}", ResponseBuffer.GetSize(), ContentLength->second));
- return false;
- }
- }
-
- if (Response.status_code == (long)HttpResponseCode::PartialContent)
- {
- return true;
- }
-
- if (auto JupiterHash = Response.header.find("X-Jupiter-IoHash"); JupiterHash != Response.header.end())
- {
- IoHash ExpectedPayloadHash;
- if (IoHash::TryParse(JupiterHash->second, ExpectedPayloadHash))
- {
- IoHash PayloadHash = IoHash::HashBuffer(ResponseBuffer);
- if (PayloadHash != ExpectedPayloadHash)
- {
- Response.error = cpr::Error(/*CURLE_READ_ERROR*/ 26,
- fmt::format("Payload hash {} does not match X-Jupiter-IoHash {}",
- PayloadHash.ToHexString(),
- ExpectedPayloadHash.ToHexString()));
- return false;
- }
- }
- }
-
- if (auto ContentType = Response.header.find("Content-Type"); ContentType != Response.header.end())
- {
- if (ContentType->second == "application/x-ue-comp")
- {
- IoHash RawHash;
- uint64_t RawSize;
- if (CompressedBuffer::ValidateCompressedHeader(ResponseBuffer, RawHash, RawSize, /*OutOptionalTotalCompressedSize*/ nullptr))
- {
- return true;
- }
- else
- {
- Response.error = cpr::Error(/*CURLE_READ_ERROR*/ 26, "Compressed binary failed validation");
- return false;
- }
- }
- if (ContentType->second == "application/x-ue-cb")
- {
- if (CbValidateError Error = ValidateCompactBinary(ResponseBuffer.GetView(), CbValidateMode::Default);
- Error == CbValidateError::None)
- {
- return true;
- }
- else
- {
- Response.error = cpr::Error(/*CURLE_READ_ERROR*/ 26, fmt::format("Compact binary failed validation: {}", ToString(Error)));
- return false;
- }
- }
- }
-
- return true;
-}
-
-cpr::Response
-CprHttpClient::DoWithRetry(std::string_view SessionId,
- std::function<cpr::Response()>&& Func,
- std::function<bool(cpr::Response& Result)>&& Validate)
-{
- uint8_t Attempt = 0;
- cpr::Response Result = Func();
- while (Attempt < m_ConnectionSettings.RetryCount)
- {
- if (m_CheckIfAbortFunction && m_CheckIfAbortFunction())
- {
- return Result;
- }
- if (!ShouldRetry(Result))
- {
- if (Result.error || !IsHttpSuccessCode(Result.status_code))
- {
- break;
- }
- if (Validate(Result))
- {
- break;
- }
- }
- Sleep(100 * (Attempt + 1));
- Attempt++;
- if (ShouldLogErrorCode(HttpResponseCode(Result.status_code)))
- {
- ZEN_INFO("{} Attempt {}/{}",
- CommonResponse(SessionId, std::move(Result), {}).ErrorMessage("Retry"),
- Attempt,
- m_ConnectionSettings.RetryCount + 1);
- }
- Result = Func();
- }
- return Result;
-}
-
-cpr::Response
-CprHttpClient::DoWithRetry(std::string_view SessionId,
- std::function<cpr::Response()>&& Func,
- std::unique_ptr<detail::TempPayloadFile>& PayloadFile)
-{
- uint8_t Attempt = 0;
- cpr::Response Result = Func();
- while (Attempt < m_ConnectionSettings.RetryCount)
- {
- if (m_CheckIfAbortFunction && m_CheckIfAbortFunction())
- {
- return Result;
- }
- if (!ShouldRetry(Result))
- {
- if (Result.error || !IsHttpSuccessCode(Result.status_code))
- {
- break;
- }
- if (ValidatePayload(Result, PayloadFile))
- {
- break;
- }
- }
- Sleep(100 * (Attempt + 1));
- Attempt++;
- if (ShouldLogErrorCode(HttpResponseCode(Result.status_code)))
- {
- ZEN_INFO("{} Attempt {}/{}",
- CommonResponse(SessionId, std::move(Result), {}).ErrorMessage("Retry"),
- Attempt,
- m_ConnectionSettings.RetryCount + 1);
- }
- Result = Func();
- }
- return Result;
-}
-
-//////////////////////////////////////////////////////////////////////////
-
-CprHttpClient::Session
-CprHttpClient::AllocSession(const std::string_view BaseUrl,
- const std::string_view ResourcePath,
- const HttpClientSettings& ConnectionSettings,
- const KeyValueMap& AdditionalHeader,
- const KeyValueMap& Parameters,
- const std::string_view SessionId,
- std::optional<std::string> AccessToken)
-{
- ZEN_TRACE_CPU("CprHttpClient::AllocSession");
- cpr::Session* CprSession = nullptr;
- m_SessionLock.WithExclusiveLock([&] {
- if (!m_Sessions.empty())
- {
- CprSession = m_Sessions.back();
- m_Sessions.pop_back();
- }
- });
-
- if (CprSession == nullptr)
- {
- CprSession = new cpr::Session();
- CprSession->SetConnectTimeout(ConnectionSettings.ConnectTimeout);
- CprSession->SetTimeout(ConnectionSettings.Timeout);
- if (ConnectionSettings.AssumeHttp2)
- {
- CprSession->SetHttpVersion(cpr::HttpVersion{cpr::HttpVersionCode::VERSION_2_0_PRIOR_KNOWLEDGE});
- }
- if (ConnectionSettings.Verbose)
- {
- // CprSession->SetVerbose(cpr::Verbose{ true });
- CprSession->SetDebugCallback(cpr::DebugCallback{
- [this](cpr::DebugCallback::InfoType type, std::string data, intptr_t userdata) {
- cpr::Session* CprSession = (cpr::Session*)userdata;
- ZEN_UNUSED(CprSession);
- switch (type)
- {
- case cpr::DebugCallback::InfoType::TEXT:
- if (data.find("need more data"sv) == std::string::npos)
- {
- ZEN_INFO("TEXT: {}", data);
- }
- break;
- case cpr::DebugCallback::InfoType::HEADER_IN:
- ZEN_INFO("HIN : {}", data);
- break;
- case cpr::DebugCallback::InfoType::HEADER_OUT:
- if (std::string::size_type TokenPos = data.find("Authorization: Bearer "sv); TokenPos != std::string::npos)
- {
- TokenPos += 22;
- std::string::size_type TokenEndPos = data.find_first_of("\r\n", TokenPos);
- if (TokenEndPos == std::string::npos)
- {
- TokenEndPos = data.length();
- }
- std::string Copy = data;
- Copy.replace(Copy.begin() + TokenPos,
- Copy.begin() + TokenEndPos,
- fmt::format("[{} char token]", TokenEndPos - TokenPos));
- ZEN_INFO("HOUT: {}", Copy);
- }
- else
- {
- ZEN_INFO("HOUT: {}", data);
- }
- break;
- case cpr::DebugCallback::InfoType::DATA_IN:
- // ZEN_INFO("DATA_IN: {}", data);
- break;
- case cpr::DebugCallback::InfoType::DATA_OUT:
- // ZEN_INFO("DATA_OUT: {}", data);
- break;
- case cpr::DebugCallback::InfoType::SSL_DATA_IN:
- // ZEN_INFO("SSL_DATA_IN: {}", data);
- break;
- case cpr::DebugCallback::InfoType::SSL_DATA_OUT:
- // ZEN_INFO("SSL_DATA_OUT: {}", data);
- break;
- }
- },
- (intptr_t)CprSession});
- }
- }
-
- if (!AdditionalHeader->empty())
- {
- CprSession->SetHeader(cpr::Header(AdditionalHeader->begin(), AdditionalHeader->end()));
- }
- if (!SessionId.empty())
- {
- CprSession->UpdateHeader({{"UE-Session", std::string(SessionId)}});
- }
- if (ConnectionSettings.ForbidReuseConnection)
- {
- CprSession->UpdateHeader({{"Connection", "close"}});
- }
-
- if (AccessToken.has_value())
- {
- CprSession->UpdateHeader({{"Authorization", AccessToken.value()}});
- }
- if (!Parameters->empty())
- {
- cpr::Parameters Tmp;
- for (auto It = Parameters->begin(); It != Parameters->end(); It++)
- {
- Tmp.Add({It->first, It->second});
- }
- CprSession->SetParameters(Tmp);
- }
- else
- {
- CprSession->SetParameters({});
- }
-
- if (!ConnectionSettings.UnixSocketPath.empty())
- {
- CprSession->SetUnixSocket(cpr::UnixSocket(PathToUtf8(ConnectionSettings.UnixSocketPath)));
- }
-
- if (ConnectionSettings.InsecureSsl || !ConnectionSettings.CaBundlePath.empty())
- {
- cpr::SslOptions SslOpts;
- if (ConnectionSettings.InsecureSsl)
- {
- SslOpts.SetOption(cpr::ssl::VerifyHost{false});
- SslOpts.SetOption(cpr::ssl::VerifyPeer{false});
- }
- if (!ConnectionSettings.CaBundlePath.empty())
- {
- SslOpts.SetOption(cpr::ssl::CaInfo{ConnectionSettings.CaBundlePath});
- }
- CprSession->SetSslOptions(SslOpts);
- }
-
- ExtendableStringBuilder<128> UrlBuffer;
- UrlBuffer << BaseUrl << ResourcePath;
- CprSession->SetUrl(UrlBuffer.c_str());
-
- return Session(this, CprSession);
-}
-
-void
-CprHttpClient::ReleaseSession(cpr::Session* CprSession)
-{
- ZEN_TRACE_CPU("CprHttpClient::ReleaseSession");
- CprSession->SetUrl({});
- CprSession->SetHeader({});
- CprSession->SetBody({});
- m_SessionLock.WithExclusiveLock([&] { m_Sessions.push_back(CprSession); });
-}
-
-CprHttpClient::Response
-CprHttpClient::TransactPackage(std::string_view Url, CbPackage Package, const KeyValueMap& AdditionalHeader)
-{
- ZEN_TRACE_CPU("CprHttpClient::TransactPackage");
-
- Session Sess = AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken());
-
- // First, list of offered chunks for filtering on the server end
-
- std::vector<IoHash> AttachmentsToSend;
- std::span<const CbAttachment> Attachments = Package.GetAttachments();
-
- const uint32_t RequestId = ++HttpClientRequestIdCounter;
- auto RequestIdString = fmt::to_string(RequestId);
-
- if (Attachments.empty() == false)
- {
- CbObjectWriter Writer;
- Writer.BeginArray("offer");
-
- for (const CbAttachment& Attachment : Attachments)
- {
- Writer.AddHash(Attachment.GetHash());
- }
-
- Writer.EndArray();
-
- BinaryWriter MemWriter;
- Writer.Save(MemWriter);
-
- Sess->UpdateHeader({HeaderContentType(HttpContentType::kCbPackageOffer), {"UE-Request", RequestIdString}});
- Sess->SetBody(cpr::Body{(const char*)MemWriter.Data(), MemWriter.Size()});
-
- cpr::Response FilterResponse = Sess.Post();
-
- if (FilterResponse.status_code == 200)
- {
- IoBuffer ResponseBuffer(IoBuffer::Wrap, FilterResponse.text.data(), FilterResponse.text.size());
- CbValidateError ValidationError = CbValidateError::None;
- if (CbObject ResponseObject = ValidateAndReadCompactBinaryObject(std::move(ResponseBuffer), ValidationError);
- ValidationError == CbValidateError::None)
- {
- for (CbFieldView& Entry : ResponseObject["need"])
- {
- ZEN_ASSERT(Entry.IsHash());
- AttachmentsToSend.push_back(Entry.AsHash());
- }
- }
- }
- }
-
- // Prepare package for send
-
- CbPackage SendPackage;
- SendPackage.SetObject(Package.GetObject(), Package.GetObjectHash());
-
- for (const IoHash& AttachmentCid : AttachmentsToSend)
- {
- const CbAttachment* Attachment = Package.FindAttachment(AttachmentCid);
-
- if (Attachment)
- {
- SendPackage.AddAttachment(*Attachment);
- }
- else
- {
- // This should be an error -- server asked to have something we can't find
- }
- }
-
- // Transmit package payload
-
- CompositeBuffer Message = FormatPackageMessageBuffer(SendPackage);
- SharedBuffer FlatMessage = Message.Flatten();
-
- Sess->UpdateHeader({HeaderContentType(HttpContentType::kCbPackage), {"UE-Request", RequestIdString}});
- Sess->SetBody(cpr::Body{(const char*)FlatMessage.GetData(), FlatMessage.GetSize()});
-
- cpr::Response FilterResponse = Sess.Post();
-
- if (!IsHttpSuccessCode(FilterResponse.status_code))
- {
- return {.StatusCode = HttpResponseCode(FilterResponse.status_code)};
- }
-
- IoBuffer ResponseBuffer(IoBuffer::Clone, FilterResponse.text.data(), FilterResponse.text.size());
-
- if (auto It = FilterResponse.header.find("Content-Type"); It != FilterResponse.header.end())
- {
- HttpContentType ContentType = ParseContentType(It->second);
-
- ResponseBuffer.SetContentType(ContentType);
- }
-
- return {.StatusCode = HttpResponseCode(FilterResponse.status_code), .ResponsePayload = std::move(ResponseBuffer)};
-}
-
-//////////////////////////////////////////////////////////////////////////
-//
-// Standard HTTP verbs
-//
-
-CprHttpClient::Response
-CprHttpClient::Put(std::string_view Url, const IoBuffer& Payload, const KeyValueMap& AdditionalHeader)
-{
- ZEN_TRACE_CPU("CprHttpClient::Put");
-
- return CommonResponse(
- m_SessionId,
- DoWithRetry(m_SessionId,
- [&]() {
- Session Sess =
- AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken());
- Sess->SetBody(AsCprBody(Payload));
- Sess->UpdateHeader({HeaderContentType(Payload.GetContentType())});
- return Sess.Put();
- }),
- {});
-}
-
-CprHttpClient::Response
-CprHttpClient::Put(std::string_view Url, const KeyValueMap& Parameters)
-{
- ZEN_TRACE_CPU("CprHttpClient::Put");
-
- return CommonResponse(m_SessionId,
- DoWithRetry(m_SessionId,
- [&]() {
- Session Sess = AllocSession(m_BaseUri,
- Url,
- m_ConnectionSettings,
- {{"Content-Length", "0"}},
- Parameters,
- m_SessionId,
- GetAccessToken());
- return Sess.Put();
- }),
- {});
-}
-
-CprHttpClient::Response
-CprHttpClient::Get(std::string_view Url, const KeyValueMap& AdditionalHeader, const KeyValueMap& Parameters)
-{
- ZEN_TRACE_CPU("CprHttpClient::Get");
- return CommonResponse(
- m_SessionId,
- DoWithRetry(
- m_SessionId,
- [&]() {
- Session Sess =
- AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, Parameters, m_SessionId, GetAccessToken());
- return Sess.Get();
- },
- [this](cpr::Response& Result) {
- std::unique_ptr<detail::TempPayloadFile> NoTempFile;
- return ValidatePayload(Result, NoTempFile);
- }),
- {});
-}
-
-CprHttpClient::Response
-CprHttpClient::Head(std::string_view Url, const KeyValueMap& AdditionalHeader)
-{
- ZEN_TRACE_CPU("CprHttpClient::Head");
-
- return CommonResponse(
- m_SessionId,
- DoWithRetry(m_SessionId,
- [&]() {
- Session Sess =
- AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken());
- return Sess.Head();
- }),
- {});
-}
-
-CprHttpClient::Response
-CprHttpClient::Delete(std::string_view Url, const KeyValueMap& AdditionalHeader)
-{
- ZEN_TRACE_CPU("CprHttpClient::Delete");
-
- return CommonResponse(
- m_SessionId,
- DoWithRetry(m_SessionId,
- [&]() {
- Session Sess =
- AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken());
- return Sess.Delete();
- }),
- {});
-}
-
-CprHttpClient::Response
-CprHttpClient::Post(std::string_view Url, const KeyValueMap& AdditionalHeader, const KeyValueMap& Parameters)
-{
- ZEN_TRACE_CPU("CprHttpClient::PostNoPayload");
-
- return CommonResponse(
- m_SessionId,
- DoWithRetry(m_SessionId,
- [&]() {
- Session Sess =
- AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, Parameters, m_SessionId, GetAccessToken());
- return Sess.Post();
- }),
- {});
-}
-
-CprHttpClient::Response
-CprHttpClient::Post(std::string_view Url, const IoBuffer& Payload, const KeyValueMap& AdditionalHeader)
-{
- return Post(Url, Payload, Payload.GetContentType(), AdditionalHeader);
-}
-
-CprHttpClient::Response
-CprHttpClient::Post(std::string_view Url, const IoBuffer& Payload, ZenContentType ContentType, const KeyValueMap& AdditionalHeader)
-{
- ZEN_TRACE_CPU("CprHttpClient::PostWithPayload");
-
- return CommonResponse(
- m_SessionId,
- DoWithRetry(
- m_SessionId,
- [&]() {
- Session Sess = AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken());
- Sess->UpdateHeader({HeaderContentType(ContentType)});
-
- IoBufferFileReference FileRef = {nullptr, 0, 0};
- if (Payload.GetFileReference(FileRef))
- {
- uint64_t Offset = 0;
- detail::BufferedReadFileStream Buffer(FileRef.FileHandle, FileRef.FileChunkOffset, FileRef.FileChunkSize, 512u * 1024u);
- auto ReadCallback = [&Payload, &Offset, &Buffer](char* buffer, size_t& size, intptr_t) {
- size = Min<size_t>(size, Payload.GetSize() - Offset);
- Buffer.Read(buffer, size);
- Offset += size;
- return true;
- };
- return Sess.Post(cpr::ReadCallback(gsl::narrow<cpr::cpr_off_t>(Payload.GetSize()), ReadCallback));
- }
- Sess->SetBody(AsCprBody(Payload));
- return Sess.Post();
- }),
- {});
-}
-
-CprHttpClient::Response
-CprHttpClient::Post(std::string_view Url,
- CbObject Payload,
- const KeyValueMap& AdditionalHeader,
- const std::filesystem::path& TempFolderPath)
-{
- ZEN_TRACE_CPU("CprHttpClient::PostObjectPayload");
-
- std::string PayloadString;
- std::unique_ptr<detail::TempPayloadFile> PayloadFile;
-
- cpr::Response Response = DoWithRetry(
- m_SessionId,
- [&]() {
- PayloadString.clear();
- PayloadFile.reset();
-
- Session Sess = AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken());
-
- Sess->SetBody(AsCprBody(Payload));
- Sess->UpdateHeader({HeaderContentType(ZenContentType::kCbObject)});
-
- std::vector<std::pair<std::string, std::string>> ReceivedHeaders;
- auto HeaderCallback = [&](std::string header, intptr_t) {
- const std::pair<std::string_view, std::string_view> Header = detail::GetHeaderKeyAndValue(header);
- if (StrCaseCompare(std::string(Header.first).c_str(), "Content-Length") == 0)
- {
- std::optional<size_t> ContentLength = ParseInt<size_t>(Header.second);
- if (ContentLength.has_value())
- {
- if (!TempFolderPath.empty() && ContentLength.value() > m_ConnectionSettings.MaximumInMemoryDownloadSize)
- {
- PayloadFile = std::make_unique<detail::TempPayloadFile>();
- std::error_code Ec = PayloadFile->Open(TempFolderPath, ContentLength.value());
- if (Ec)
- {
- ZEN_WARN("Failed to create temp file in '{}' for HttpClient::Post. Reason: {}",
- TempFolderPath.string(),
- Ec.message());
- PayloadFile.reset();
- }
- }
- else
- {
- PayloadString.reserve(ContentLength.value());
- }
- }
- }
- if (!Header.first.empty())
- {
- ReceivedHeaders.emplace_back(std::move(Header));
- }
- return 1;
- };
-
- auto DownloadCallback = [&](std::string data, intptr_t) {
- if (m_CheckIfAbortFunction && m_CheckIfAbortFunction())
- {
- return false;
- }
-
- if (PayloadFile)
- {
- ZEN_ASSERT(PayloadString.empty());
- std::error_code Ec = PayloadFile->Write(data);
- if (Ec)
- {
- ZEN_WARN("Failed to write to temp file in '{}' for HttpClient::Post. Reason: {}",
- TempFolderPath.string(),
- Ec.message());
- return false;
- }
- }
- else
- {
- PayloadString.append(data);
- }
- return true;
- };
- cpr::Response Response = Sess.Post({}, cpr::WriteCallback{DownloadCallback}, cpr::HeaderCallback{HeaderCallback});
- for (const std::pair<std::string, std::string>& H : ReceivedHeaders)
- {
- Response.header.insert_or_assign(H.first, H.second);
- }
- if (!PayloadString.empty())
- {
- Response.text = std::move(PayloadString);
- }
- return Response;
- },
- PayloadFile);
- return CommonResponse(m_SessionId, std::move(Response), PayloadFile ? PayloadFile->DetachToIoBuffer() : IoBuffer{});
-}
-
-CprHttpClient::Response
-CprHttpClient::Post(std::string_view Url, CbPackage Pkg, const KeyValueMap& AdditionalHeader)
-{
- return Post(Url, zen::FormatPackageMessageBuffer(Pkg), ZenContentType::kCbPackage, AdditionalHeader);
-}
-
-CprHttpClient::Response
-CprHttpClient::Post(std::string_view Url, const CompositeBuffer& Payload, ZenContentType ContentType, const KeyValueMap& AdditionalHeader)
-{
- ZEN_TRACE_CPU("CprHttpClient::Post");
-
- return CommonResponse(
- m_SessionId,
- DoWithRetry(m_SessionId,
- [&]() {
- Session Sess =
- AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken());
- Sess->UpdateHeader({HeaderContentType(ContentType)});
-
- detail::CompositeBufferReadStream Reader(Payload, 512u * 1024u);
- auto ReadCallback = [this, &Reader](char* buffer, size_t& size, intptr_t) {
- if (m_CheckIfAbortFunction && m_CheckIfAbortFunction())
- {
- return false;
- }
- size = Reader.Read(buffer, size);
- return true;
- };
- return Sess.Post(cpr::ReadCallback(gsl::narrow<cpr::cpr_off_t>(Payload.GetSize()), ReadCallback));
- }),
- {});
-}
-
-CprHttpClient::Response
-CprHttpClient::Upload(std::string_view Url, const IoBuffer& Payload, const KeyValueMap& AdditionalHeader)
-{
- ZEN_TRACE_CPU("CprHttpClient::Upload");
-
- return CommonResponse(
- m_SessionId,
- DoWithRetry(
- m_SessionId,
- [&]() {
- Session Sess = AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken());
- Sess->UpdateHeader({HeaderContentType(Payload.GetContentType())});
-
- IoBufferFileReference FileRef = {nullptr, 0, 0};
- if (Payload.GetFileReference(FileRef))
- {
- uint64_t Offset = 0;
- detail::BufferedReadFileStream Buffer(FileRef.FileHandle, FileRef.FileChunkOffset, FileRef.FileChunkSize, 512u * 1024u);
- auto ReadCallback = [this, &Payload, &Offset, &Buffer](char* buffer, size_t& size, intptr_t) {
- if (m_CheckIfAbortFunction && m_CheckIfAbortFunction())
- {
- return false;
- }
-
- size = Min<size_t>(size, Payload.GetSize() - Offset);
- Buffer.Read(buffer, size);
- Offset += size;
- return true;
- };
- return Sess.Put(cpr::ReadCallback(gsl::narrow<cpr::cpr_off_t>(Payload.GetSize()), ReadCallback));
- }
- Sess->SetBody(AsCprBody(Payload));
- return Sess.Put();
- }),
- {});
-}
-
-CprHttpClient::Response
-CprHttpClient::Upload(std::string_view Url, const CompositeBuffer& Payload, ZenContentType ContentType, const KeyValueMap& AdditionalHeader)
-{
- ZEN_TRACE_CPU("CprHttpClient::Upload");
-
- return CommonResponse(
- m_SessionId,
- DoWithRetry(m_SessionId,
- [&]() {
- Session Sess =
- AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken());
- Sess->UpdateHeader({HeaderContentType(ContentType)});
-
- detail::CompositeBufferReadStream Reader(Payload, 512u * 1024u);
- auto ReadCallback = [this, &Reader](char* buffer, size_t& size, intptr_t) {
- if (m_CheckIfAbortFunction && m_CheckIfAbortFunction())
- {
- return false;
- }
- size = Reader.Read(buffer, size);
- return true;
- };
- return Sess.Put(cpr::ReadCallback(gsl::narrow<cpr::cpr_off_t>(Payload.GetSize()), ReadCallback));
- }),
- {});
-}
-
-CprHttpClient::Response
-CprHttpClient::Download(std::string_view Url, const std::filesystem::path& TempFolderPath, const KeyValueMap& AdditionalHeader)
-{
- ZEN_TRACE_CPU("CprHttpClient::Download");
-
- std::string PayloadString;
- std::unique_ptr<detail::TempPayloadFile> PayloadFile;
-
- HttpContentType ContentType = HttpContentType::kUnknownContentType;
- detail::MultipartBoundaryParser BoundaryParser;
- bool IsMultiRangeResponse = false;
-
- cpr::Response Response = DoWithRetry(
- m_SessionId,
- [&]() {
- // Reset state from any previous attempt
- PayloadString.clear();
- PayloadFile.reset();
- BoundaryParser.Boundaries.clear();
- ContentType = HttpContentType::kUnknownContentType;
- IsMultiRangeResponse = false;
-
- auto DownloadCallback = [&](std::string data, intptr_t) {
- if (m_CheckIfAbortFunction && m_CheckIfAbortFunction())
- {
- return false;
- }
-
- if (IsMultiRangeResponse)
- {
- BoundaryParser.ParseInput(data);
- }
-
- if (PayloadFile)
- {
- ZEN_ASSERT(PayloadString.empty());
- std::error_code Ec = PayloadFile->Write(data);
- if (Ec)
- {
- ZEN_WARN("Failed to write to temp file in '{}' for HttpClient::Download. Reason: {}",
- TempFolderPath.string(),
- Ec.message());
- return false;
- }
- }
- else
- {
- PayloadString.append(data);
- }
- return true;
- };
-
- uint64_t RequestedContentLength = (uint64_t)-1;
- if (auto RangeIt = AdditionalHeader.Entries.find("Range"); RangeIt != AdditionalHeader.Entries.end())
- {
- if (RangeIt->second.starts_with("bytes"))
- {
- std::string_view RangeValue(RangeIt->second);
- size_t RangeStartPos = RangeValue.find('=', 5);
- if (RangeStartPos != std::string::npos)
- {
- RangeStartPos++;
- while (RangeStartPos < RangeValue.length() && RangeValue[RangeStartPos] == ' ')
- {
- RangeStartPos++;
- }
- RequestedContentLength = 0;
-
- while (RangeStartPos < RangeValue.length())
- {
- size_t RangeEnd = RangeValue.find_first_of(", \r\n", RangeStartPos);
- if (RangeEnd == std::string::npos)
- {
- RangeEnd = RangeValue.length();
- }
-
- std::string_view RangeString = RangeValue.substr(RangeStartPos, RangeEnd - RangeStartPos);
- size_t RangeSplitPos = RangeString.find('-');
- if (RangeSplitPos != std::string::npos)
- {
- std::optional<size_t> RequestedRangeStart = ParseInt<size_t>(RangeString.substr(0, RangeSplitPos));
- std::optional<size_t> RequestedRangeEnd = ParseInt<size_t>(RangeString.substr(RangeSplitPos + 1));
- if (RequestedRangeStart.has_value() && RequestedRangeEnd.has_value())
- {
- RequestedContentLength += RequestedRangeEnd.value() - RequestedRangeStart.value() + 1;
- }
- }
- RangeStartPos = RangeEnd;
- while (RangeStartPos != RangeValue.length() &&
- (RangeValue[RangeStartPos] == ',' || RangeValue[RangeStartPos] == ' '))
- {
- RangeStartPos++;
- }
- }
- }
- }
- }
-
- cpr::Response Response;
- {
- std::vector<std::pair<std::string, std::string>> ReceivedHeaders;
- auto HeaderCallback = [&](std::string header, intptr_t) {
- if (RequestedContentLength != (uint64_t)-1 && RequestedContentLength > m_ConnectionSettings.MaximumInMemoryDownloadSize)
- {
- ZEN_DEBUG("Multirange request");
- }
- const std::pair<std::string_view, std::string_view> Header = detail::GetHeaderKeyAndValue(header);
- const std::string Key(Header.first);
- if (StrCaseCompare(Key.c_str(), "Content-Length") == 0)
- {
- std::optional<size_t> ContentLength = ParseInt<size_t>(Header.second);
- if (ContentLength.has_value())
- {
- if (!TempFolderPath.empty() && ContentLength.value() > m_ConnectionSettings.MaximumInMemoryDownloadSize)
- {
- PayloadFile = std::make_unique<detail::TempPayloadFile>();
- std::error_code Ec = PayloadFile->Open(TempFolderPath, ContentLength.value());
- if (Ec)
- {
- ZEN_WARN("Failed to create temp file in '{}' for HttpClient::Download. Reason: {}",
- TempFolderPath.string(),
- Ec.message());
- PayloadFile.reset();
- }
- }
- else
- {
- PayloadString.reserve(ContentLength.value());
- }
- }
- }
- else if (StrCaseCompare(Key.c_str(), "Content-Type") == 0)
- {
- IsMultiRangeResponse = BoundaryParser.Init(Header.second);
- if (!IsMultiRangeResponse)
- {
- ContentType = ParseContentType(Header.second);
- }
- }
- else if (StrCaseCompare(Key.c_str(), "Content-Range") == 0)
- {
- if (!IsMultiRangeResponse)
- {
- std::pair<uint64_t, uint64_t> Range = detail::ParseContentRange(Header.second);
- if (Range.second != 0)
- {
- BoundaryParser.Boundaries.push_back(HttpClient::Response::MultipartBoundary{.OffsetInPayload = 0,
- .RangeOffset = Range.first,
- .RangeLength = Range.second,
- .ContentType = ContentType});
- }
- }
- }
- if (!Header.first.empty())
- {
- ReceivedHeaders.emplace_back(std::move(Header));
- }
- return 1;
- };
-
- Session Sess = AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken());
- Response = Sess.Download(cpr::WriteCallback{DownloadCallback}, cpr::HeaderCallback{HeaderCallback});
- for (const std::pair<std::string, std::string>& H : ReceivedHeaders)
- {
- Response.header.insert_or_assign(H.first, H.second);
- }
- }
- if (m_ConnectionSettings.AllowResume)
- {
- auto SupportsRanges = [](const cpr::Response& Response) -> bool {
- if (Response.header.find("Content-Range") != Response.header.end())
- {
- return true;
- }
- if (auto It = Response.header.find("Accept-Ranges"); It != Response.header.end())
- {
- return It->second == "bytes"sv;
- }
- return false;
- };
-
- auto ShouldResume = [&SupportsRanges, &IsMultiRangeResponse](const cpr::Response& Response) -> bool {
- if (IsMultiRangeResponse)
- {
- return false;
- }
- if (ShouldRetry(Response))
- {
- return SupportsRanges(Response);
- }
- return false;
- };
-
- if (ShouldResume(Response))
- {
- auto It = Response.header.find("Content-Length");
- if (It != Response.header.end())
- {
- uint64_t ContentLength = RequestedContentLength;
- if (ContentLength == uint64_t(-1))
- {
- if (auto ParsedContentLength = ParseInt<int64_t>(It->second); ParsedContentLength.has_value())
- {
- ContentLength = ParsedContentLength.value();
- }
- }
-
- std::vector<std::pair<std::string, std::string>> ReceivedHeaders;
-
- auto HeaderCallback = [&](std::string header, intptr_t) {
- const std::pair<std::string_view, std::string_view> Header = detail::GetHeaderKeyAndValue(header);
- if (!Header.first.empty())
- {
- ReceivedHeaders.emplace_back(std::move(Header));
- }
-
- if (StrCaseCompare(std::string(Header.first).c_str(), "Content-Range") == 0)
- {
- if (Header.second.starts_with("bytes "sv))
- {
- size_t RangeStartEnd = Header.second.find('-', 6);
- if (RangeStartEnd != std::string::npos)
- {
- const auto Start = ParseInt<uint64_t>(Header.second.substr(6, RangeStartEnd - 6));
- if (Start)
- {
- uint64_t DownloadedSize = PayloadFile ? PayloadFile->GetSize() : PayloadString.length();
- if (Start.value() == DownloadedSize)
- {
- return 1;
- }
- else if (Start.value() > DownloadedSize)
- {
- return 0;
- }
- if (PayloadFile)
- {
- PayloadFile->ResetWritePos(Start.value());
- }
- else
- {
- PayloadString = PayloadString.substr(0, Start.value());
- }
- return 1;
- }
- }
- }
- return 0;
- }
- return 1;
- };
-
- KeyValueMap HeadersWithRange(AdditionalHeader);
- do
- {
- uint64_t DownloadedSize = PayloadFile ? PayloadFile->GetSize() : PayloadString.length();
-
- std::string Range = fmt::format("bytes={}-{}", DownloadedSize, DownloadedSize + ContentLength - 1);
- if (auto RangeIt = HeadersWithRange.Entries.find("Range"); RangeIt != HeadersWithRange.Entries.end())
- {
- if (RangeIt->second == Range)
- {
- // If we didn't make any progress, abort
- break;
- }
- }
- HeadersWithRange.Entries.insert_or_assign("Range", Range);
-
- Session Sess =
- AllocSession(m_BaseUri, Url, m_ConnectionSettings, HeadersWithRange, {}, m_SessionId, GetAccessToken());
- Response = Sess.Download(cpr::WriteCallback{DownloadCallback}, cpr::HeaderCallback{HeaderCallback});
- for (const std::pair<std::string, std::string>& H : ReceivedHeaders)
- {
- Response.header.insert_or_assign(H.first, H.second);
- }
- ReceivedHeaders.clear();
- } while (ShouldResume(Response));
- }
- }
- }
-
- if (!PayloadString.empty())
- {
- Response.text = std::move(PayloadString);
- }
- return Response;
- },
- PayloadFile);
-
- return CommonResponse(m_SessionId,
- std::move(Response),
- PayloadFile ? PayloadFile->DetachToIoBuffer() : IoBuffer{},
- std::move(BoundaryParser.Boundaries));
-}
-
-} // namespace zen
diff --git a/src/zenhttp/clients/httpclientcpr.h b/src/zenhttp/clients/httpclientcpr.h
deleted file mode 100644
index 509ca5ae2..000000000
--- a/src/zenhttp/clients/httpclientcpr.h
+++ /dev/null
@@ -1,188 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#pragma once
-
-#include "httpclientcommon.h"
-
-#include <zencore/logging.h>
-#include <zenhttp/cprutils.h>
-#include <zenhttp/httpclient.h>
-
-ZEN_THIRD_PARTY_INCLUDES_START
-#include <cpr/body.h>
-#include <cpr/session.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-
-namespace zen {
-
-class CprHttpClient : public HttpClientBase
-{
-public:
- CprHttpClient(std::string_view BaseUri, const HttpClientSettings& Connectionsettings, std::function<bool()>&& CheckIfAbortFunction);
- ~CprHttpClient();
-
- // HttpClientBase
-
- [[nodiscard]] virtual Response Put(std::string_view Url, const IoBuffer& Payload, const KeyValueMap& AdditionalHeader = {}) override;
- [[nodiscard]] virtual Response Put(std::string_view Url, const KeyValueMap& Parameters = {}) override;
- [[nodiscard]] virtual Response Get(std::string_view Url,
- const KeyValueMap& AdditionalHeader = {},
- const KeyValueMap& Parameters = {}) override;
- [[nodiscard]] virtual Response Head(std::string_view Url, const KeyValueMap& AdditionalHeader = {}) override;
- [[nodiscard]] virtual Response Delete(std::string_view Url, const KeyValueMap& AdditionalHeader = {}) override;
- [[nodiscard]] virtual Response Post(std::string_view Url,
- const KeyValueMap& AdditionalHeader = {},
- const KeyValueMap& Parameters = {}) override;
- [[nodiscard]] virtual Response Post(std::string_view Url, const IoBuffer& Payload, const KeyValueMap& AdditionalHeader = {}) override;
- [[nodiscard]] virtual Response Post(std::string_view Url,
- const IoBuffer& Payload,
- ZenContentType ContentType,
- const KeyValueMap& AdditionalHeader = {}) override;
- [[nodiscard]] virtual Response Post(std::string_view Url,
- CbObject Payload,
- const KeyValueMap& AdditionalHeader = {},
- const std::filesystem::path& TempFolderPath = {}) override;
- [[nodiscard]] virtual Response Post(std::string_view Url, CbPackage Payload, const KeyValueMap& AdditionalHeader = {}) override;
- [[nodiscard]] virtual Response Post(std::string_view Url,
- const CompositeBuffer& Payload,
- ZenContentType ContentType,
- const KeyValueMap& AdditionalHeader = {}) override;
- [[nodiscard]] virtual Response Upload(std::string_view Url, const IoBuffer& Payload, const KeyValueMap& AdditionalHeader = {}) override;
- [[nodiscard]] virtual Response Upload(std::string_view Url,
- const CompositeBuffer& Payload,
- ZenContentType ContentType,
- const KeyValueMap& AdditionalHeader = {}) override;
-
- [[nodiscard]] virtual Response Download(std::string_view Url,
- const std::filesystem::path& TempFolderPath,
- const KeyValueMap& AdditionalHeader = {}) override;
-
- [[nodiscard]] virtual Response TransactPackage(std::string_view Url,
- CbPackage Package,
- const KeyValueMap& AdditionalHeader = {}) override;
-
-private:
- struct Session
- {
- Session(CprHttpClient* InOuter, cpr::Session* InSession) : Outer(InOuter), CprSession(InSession) {}
- ~Session() { Outer->ReleaseSession(CprSession); }
-
- inline cpr::Session* operator->() const { return CprSession; }
- inline cpr::Response Get()
- {
- ZEN_TRACE_CPU("HttpClient::Impl::Get");
- cpr::Response Result = CprSession->Get();
- ZEN_TRACE("GET {}", Result);
- return Result;
- }
- inline cpr::Response Download(cpr::WriteCallback&& Write, std::optional<cpr::HeaderCallback>&& Header = {})
- {
- ZEN_TRACE_CPU("HttpClient::Impl::Download");
- if (Header)
- {
- CprSession->SetHeaderCallback(std::move(Header.value()));
- }
- cpr::Response Result = CprSession->Download(Write);
- ZEN_TRACE("GET {}", Result);
- CprSession->SetHeaderCallback({});
- CprSession->SetWriteCallback({});
- return Result;
- }
- inline cpr::Response Head()
- {
- ZEN_TRACE_CPU("HttpClient::Impl::Head");
- cpr::Response Result = CprSession->Head();
- ZEN_TRACE("HEAD {}", Result);
- return Result;
- }
- inline cpr::Response Put(std::optional<cpr::ReadCallback>&& Read = {})
- {
- ZEN_TRACE_CPU("HttpClient::Impl::Put");
- if (Read)
- {
- CprSession->SetReadCallback(std::move(Read.value()));
- }
- cpr::Response Result = CprSession->Put();
- ZEN_TRACE("PUT {}", Result);
- CprSession->SetReadCallback({});
- return Result;
- }
- inline cpr::Response Post(std::optional<cpr::ReadCallback>&& Read = {},
- std::optional<cpr::WriteCallback>&& Write = {},
- std::optional<cpr::HeaderCallback>&& Header = {})
- {
- ZEN_TRACE_CPU("HttpClient::Impl::Post");
- if (Read)
- {
- CprSession->SetReadCallback(std::move(Read.value()));
- }
- if (Write)
- {
- CprSession->SetWriteCallback(std::move(Write.value()));
- }
- if (Header)
- {
- CprSession->SetHeaderCallback(std::move(Header.value()));
- }
- cpr::Response Result = CprSession->Post();
- ZEN_TRACE("POST {}", Result);
- CprSession->SetHeaderCallback({});
- CprSession->SetWriteCallback({});
- CprSession->SetReadCallback({});
- return Result;
- }
- inline cpr::Response Delete()
- {
- ZEN_TRACE_CPU("HttpClient::Impl::Delete");
- cpr::Response Result = CprSession->Delete();
- ZEN_TRACE("DELETE {}", Result);
- return Result;
- }
-
- LoggerRef Log() { return Outer->Log(); }
-
- private:
- CprHttpClient* Outer;
- cpr::Session* CprSession;
-
- Session(Session&&) = delete;
- Session& operator=(Session&&) = delete;
- };
-
- Session AllocSession(const std::string_view BaseUrl,
- const std::string_view Url,
- const HttpClientSettings& ConnectionSettings,
- const KeyValueMap& AdditionalHeader,
- const KeyValueMap& Parameters,
- const std::string_view SessionId,
- std::optional<std::string> AccessToken);
-
- RwLock m_SessionLock;
- std::vector<cpr::Session*> m_Sessions;
-
- void ReleaseSession(cpr::Session*);
-
- cpr::Response DoWithRetry(std::string_view SessionId,
- std::function<cpr::Response()>&& Func,
- std::unique_ptr<detail::TempPayloadFile>& PayloadFile);
- cpr::Response DoWithRetry(
- std::string_view SessionId,
- std::function<cpr::Response()>&& Func,
- std::function<bool(cpr::Response& Result)>&& Validate = [](cpr::Response&) { return true; });
-
- bool ShouldLogErrorCode(HttpResponseCode ResponseCode) const;
- bool ValidatePayload(cpr::Response& Response, std::unique_ptr<detail::TempPayloadFile>& PayloadFile);
-
- HttpClient::Response CommonResponse(std::string_view SessionId,
- cpr::Response&& HttpResponse,
- IoBuffer&& Payload,
- std::vector<HttpClient::Response::MultipartBoundary>&& BoundaryPositions = {});
-
- HttpClient::Response ResponseWithPayload(std::string_view SessionId,
- cpr::Response&& HttpResponse,
- const HttpResponseCode WorkResponseCode,
- IoBuffer&& Payload,
- std::vector<HttpClient::Response::MultipartBoundary>&& BoundaryPositions);
-};
-
-} // namespace zen
diff --git a/src/zenhttp/clients/httpclientcurl.cpp b/src/zenhttp/clients/httpclientcurl.cpp
index e76157254..d150b44c6 100644
--- a/src/zenhttp/clients/httpclientcurl.cpp
+++ b/src/zenhttp/clients/httpclientcurl.cpp
@@ -980,7 +980,7 @@ CurlHttpClient::TransactPackage(std::string_view Url, CbPackage Package, const K
//
CurlHttpClient::Response
-CurlHttpClient::Put(std::string_view Url, const IoBuffer& Payload, const KeyValueMap& AdditionalHeader)
+CurlHttpClient::Put(std::string_view Url, const IoBuffer& Payload, const KeyValueMap& AdditionalHeader, const KeyValueMap& Parameters)
{
ZEN_TRACE_CPU("CurlHttpClient::Put");
@@ -989,7 +989,7 @@ CurlHttpClient::Put(std::string_view Url, const IoBuffer& Payload, const KeyValu
DoWithRetry(
m_SessionId,
[&]() -> CurlResult {
- Session Sess = AllocSession(Url, {});
+ Session Sess = AllocSession(Url, Parameters);
CURL* H = Sess.Get();
Sess.SetHeaders(
diff --git a/src/zenhttp/clients/httpclientcurl.h b/src/zenhttp/clients/httpclientcurl.h
index b7fa52e6c..bdeb46633 100644
--- a/src/zenhttp/clients/httpclientcurl.h
+++ b/src/zenhttp/clients/httpclientcurl.h
@@ -21,7 +21,10 @@ public:
// HttpClientBase
- [[nodiscard]] virtual Response Put(std::string_view Url, const IoBuffer& Payload, const KeyValueMap& AdditionalHeader = {}) override;
+ [[nodiscard]] virtual Response Put(std::string_view Url,
+ const IoBuffer& Payload,
+ const KeyValueMap& AdditionalHeader = {},
+ const KeyValueMap& Parameters = {}) override;
[[nodiscard]] virtual Response Put(std::string_view Url, const KeyValueMap& Parameters = {}) override;
[[nodiscard]] virtual Response Get(std::string_view Url,
const KeyValueMap& AdditionalHeader = {},
diff --git a/src/zenhttp/httpclient.cpp b/src/zenhttp/httpclient.cpp
index 13c86e9ae..ace7a3c7f 100644
--- a/src/zenhttp/httpclient.cpp
+++ b/src/zenhttp/httpclient.cpp
@@ -36,12 +36,6 @@
namespace zen {
-#if ZEN_WITH_CPR
-extern HttpClientBase* CreateCprHttpClient(std::string_view BaseUri,
- const HttpClientSettings& ConnectionSettings,
- std::function<bool()>&& CheckIfAbortFunction);
-#endif
-
extern HttpClientBase* CreateCurlHttpClient(std::string_view BaseUri,
const HttpClientSettings& ConnectionSettings,
std::function<bool()>&& CheckIfAbortFunction);
@@ -57,14 +51,7 @@ SetDefaultHttpClientBackend(HttpClientBackend Backend)
void
SetDefaultHttpClientBackend(std::string_view Backend)
{
-#if ZEN_WITH_CPR
- if (Backend == "cpr")
- {
- g_DefaultHttpClientBackend = HttpClientBackend::kCpr;
- }
- else
-#endif
- if (Backend == "curl")
+ if (Backend == "curl")
{
g_DefaultHttpClientBackend = HttpClientBackend::kCurl;
}
@@ -378,22 +365,7 @@ HttpClient::HttpClient(std::string_view BaseUri, const HttpClientSettings& Conne
, m_ConnectionSettings(ConnectionSettings)
{
m_SessionId = GetSessionIdString();
-
- HttpClientBackend EffectiveBackend =
- ConnectionSettings.Backend != HttpClientBackend::kDefault ? ConnectionSettings.Backend : g_DefaultHttpClientBackend;
-
- switch (EffectiveBackend)
- {
-#if ZEN_WITH_CPR
- case HttpClientBackend::kCpr:
- m_Inner = CreateCprHttpClient(BaseUri, ConnectionSettings, std::move(CheckIfAbortFunction));
- break;
-#endif
- case HttpClientBackend::kCurl:
- default:
- m_Inner = CreateCurlHttpClient(BaseUri, ConnectionSettings, std::move(CheckIfAbortFunction));
- break;
- }
+ m_Inner = CreateCurlHttpClient(BaseUri, ConnectionSettings, std::move(CheckIfAbortFunction));
}
HttpClient::~HttpClient()
@@ -402,6 +374,13 @@ HttpClient::~HttpClient()
}
void
+HttpClient::SetBaseUri(std::string_view NewBaseUri)
+{
+ m_BaseUri = NewBaseUri;
+ m_Inner->SetBaseUri(NewBaseUri);
+}
+
+void
HttpClient::SetSessionId(const Oid& SessionId)
{
if (SessionId == Oid::Zero)
@@ -415,9 +394,12 @@ HttpClient::SetSessionId(const Oid& SessionId)
}
HttpClient::Response
-HttpClient::Put(std::string_view Url, const IoBuffer& Payload, const HttpClient::KeyValueMap& AdditionalHeader)
+HttpClient::Put(std::string_view Url,
+ const IoBuffer& Payload,
+ const HttpClient::KeyValueMap& AdditionalHeader,
+ const HttpClient::KeyValueMap& Parameters)
{
- return m_Inner->Put(Url, Payload, AdditionalHeader);
+ return m_Inner->Put(Url, Payload, AdditionalHeader, Parameters);
}
HttpClient::Response
@@ -977,6 +959,71 @@ TEST_CASE("httpclient.password")
AsioServer->RequestExit();
}
}
+TEST_CASE("httpclient.setbaseuri")
+{
+ struct TestHttpService : public HttpService
+ {
+ explicit TestHttpService(std::string_view Identity) : m_Identity(Identity) {}
+
+ virtual const char* BaseUri() const override { return "/test/"; }
+ virtual void HandleRequest(HttpServerRequest& Req) override
+ {
+ Req.WriteResponse(HttpResponseCode::OK, HttpContentType::kText, m_Identity);
+ }
+
+ std::string m_Identity;
+ };
+
+ ScopedTemporaryDirectory TmpDir1;
+ ScopedTemporaryDirectory TmpDir2;
+ TestHttpService Service1("server-one");
+ TestHttpService Service2("server-two");
+
+ Ref<HttpServer> Server1 = CreateHttpAsioServer(AsioConfig{});
+ Ref<HttpServer> Server2 = CreateHttpAsioServer(AsioConfig{});
+
+ int Port1 = Server1->Initialize(0, TmpDir1.Path());
+ int Port2 = Server2->Initialize(0, TmpDir2.Path());
+ REQUIRE(Port1 != -1);
+ REQUIRE(Port2 != -1);
+
+ Server1->RegisterService(Service1);
+ Server2->RegisterService(Service2);
+
+ std::thread Thread1([&]() { Server1->Run(false); });
+ std::thread Thread2([&]() { Server2->Run(false); });
+
+ auto _ = MakeGuard([&]() {
+ if (Thread1.joinable())
+ {
+ Thread1.join();
+ }
+ if (Thread2.joinable())
+ {
+ Thread2.join();
+ }
+ Server1->Close();
+ Server2->Close();
+ });
+
+ HttpClient Client(fmt::format("127.0.0.1:{}", Port1), HttpClientSettings{}, {});
+ CHECK_EQ(Client.GetBaseUri(), fmt::format("127.0.0.1:{}", Port1));
+
+ HttpClient::Response Resp1 = Client.Get("/test/hello");
+ CHECK(Resp1.IsSuccess());
+ CHECK_EQ(Resp1.AsText(), "server-one");
+
+ Client.SetBaseUri(fmt::format("127.0.0.1:{}", Port2));
+ CHECK_EQ(Client.GetBaseUri(), fmt::format("127.0.0.1:{}", Port2));
+
+ HttpClient::Response Resp2 = Client.Get("/test/hello");
+ CHECK(Resp2.IsSuccess());
+ CHECK_EQ(Resp2.AsText(), "server-two");
+
+ Server1->RequestExit();
+ Server2->RequestExit();
+}
+
TEST_SUITE_END();
void
diff --git a/src/zenhttp/httpclient_test.cpp b/src/zenhttp/httpclient_test.cpp
index 7a657c464..af653cbb2 100644
--- a/src/zenhttp/httpclient_test.cpp
+++ b/src/zenhttp/httpclient_test.cpp
@@ -492,6 +492,17 @@ TEST_CASE("httpclient.put")
CHECK_EQ(Resp.StatusCode, HttpResponseCode::Created);
CHECK_EQ(Resp.AsText(), "resource created");
}
+
+ SUBCASE("PUT with payload and query parameters")
+ {
+ const char* Payload = "put payload data";
+ IoBuffer Buf(IoBuffer::Clone, Payload, strlen(Payload));
+ Buf.SetContentType(ZenContentType::kText);
+
+ HttpClient::Response Resp = Client.Put("/api/test/echo/uri", Buf, {}, {{"key", "value"}});
+ CHECK(Resp.IsSuccess());
+ CHECK_EQ(Resp.AsText(), "echo/uri\nkey=value");
+ }
}
TEST_CASE("httpclient.upload")
diff --git a/src/zenhttp/httpserver.cpp b/src/zenhttp/httpserver.cpp
index a46c5b851..e05c9815f 100644
--- a/src/zenhttp/httpserver.cpp
+++ b/src/zenhttp/httpserver.cpp
@@ -329,6 +329,10 @@ ReasonStringForHttpResultCode(int HttpCode)
return "Continue"sv;
case 101:
return "Switching Protocols"sv;
+ case 102:
+ return "Processing"sv;
+ case 103:
+ return "Early Hints"sv;
// 2xx Success
@@ -338,12 +342,20 @@ ReasonStringForHttpResultCode(int HttpCode)
return "Created"sv;
case 202:
return "Accepted"sv;
+ case 203:
+ return "Non-Authoritative Information"sv;
case 204:
return "No Content"sv;
case 205:
return "Reset Content"sv;
case 206:
return "Partial Content"sv;
+ case 207:
+ return "Multi-Status"sv;
+ case 208:
+ return "Already Reported"sv;
+ case 226:
+ return "IM Used"sv;
// 3xx Redirection
@@ -424,6 +436,8 @@ ReasonStringForHttpResultCode(int HttpCode)
return "Too Many Requests"sv;
case 431:
return "Request Header Fields Too Large"sv;
+ case 451:
+ return "Unavailable For Legal Reasons"sv;
// 5xx Server errors
@@ -798,7 +812,18 @@ HttpRequestRouter::HandleRequest(zen::HttpServerRequest& Request)
const HttpVerb Verb = Request.RequestVerb();
- std::string_view Uri = Request.RelativeUri();
+ std::string_view Uri = Request.RelativeUri();
+
+ // Strip the separator slash left over after the service prefix is removed.
+ // When a service has BaseUri "/foo", the prefix length is set to len("/foo") = 4.
+ // Stripping 4 chars from "/foo/bar" yields "/bar" — the path separator becomes
+ // the first character of the relative URI. Remove it so patterns like "bar" or
+ // "{id}" match without needing to account for the leading slash.
+ if (!Uri.empty() && Uri.front() == '/')
+ {
+ Uri.remove_prefix(1);
+ }
+
HttpRouterRequest RouterRequest(Request);
for (const MatcherEndpoint& Handler : m_MatcherEndpoints)
@@ -974,6 +999,12 @@ HttpServer::SetHttpRequestFilter(IHttpRequestFilter* RequestFilter)
OnSetHttpRequestFilter(RequestFilter);
}
+void
+HttpServer::HandleStatsRequest(HttpServerRequest& Request)
+{
+ Request.WriteResponse(HttpResponseCode::OK, CollectStats());
+}
+
CbObject
HttpServer::CollectStats()
{
@@ -1004,12 +1035,6 @@ HttpServer::CollectStats()
return Cbo.Save();
}
-void
-HttpServer::HandleStatsRequest(HttpServerRequest& Request)
-{
- Request.WriteResponse(HttpResponseCode::OK, CollectStats());
-}
-
//////////////////////////////////////////////////////////////////////////
HttpRpcHandler::HttpRpcHandler()
@@ -1446,6 +1471,78 @@ TEST_CASE("http.common")
}
}
+ SUBCASE("router-leading-slash")
+ {
+ // Verify that HandleRequest strips the leading slash that server implementations
+ // leave in RelativeUri() when the service base URI has no trailing slash.
+ // e.g. BaseUri "/stats" + prefix-strip of "/stats/foo" yields "/foo", not "foo".
+
+ bool HandledLiteral = false;
+ bool HandledPattern = false;
+ bool HandledTwoSeg = false;
+ std::vector<std::string> Captures;
+ auto Reset = [&] {
+ HandledLiteral = HandledPattern = HandledTwoSeg = false;
+ Captures.clear();
+ };
+
+ TestHttpService Service;
+ HttpRequestRouter r;
+
+ r.AddMatcher("seg", [](std::string_view In) -> bool { return !In.empty() && In.find('/') == std::string_view::npos; });
+
+ r.RegisterRoute(
+ "activity_counters",
+ [&](auto& /*Req*/) { HandledLiteral = true; },
+ HttpVerb::kGet);
+
+ r.RegisterRoute(
+ "{seg}",
+ [&](auto& Req) {
+ HandledPattern = true;
+ Captures = {std::string(Req.GetCapture(1))};
+ },
+ HttpVerb::kGet);
+
+ r.RegisterRoute(
+ "prefix/{seg}",
+ [&](auto& Req) {
+ HandledTwoSeg = true;
+ Captures = {std::string(Req.GetCapture(1))};
+ },
+ HttpVerb::kGet);
+
+ // Single-segment literal with leading slash — simulates real server RelativeUri
+ {
+ Reset();
+ TestHttpServerRequest req{Service, "/activity_counters"sv};
+ r.HandleRequest(req);
+ CHECK(HandledLiteral);
+ CHECK(!HandledPattern);
+ }
+
+ // Single-segment pattern with leading slash
+ {
+ Reset();
+ TestHttpServerRequest req{Service, "/hello"sv};
+ r.HandleRequest(req);
+ CHECK(!HandledLiteral);
+ CHECK(HandledPattern);
+ REQUIRE_EQ(Captures.size(), 1);
+ CHECK_EQ(Captures[0], "hello"sv);
+ }
+
+ // Two-segment route with leading slash — first literal segment
+ {
+ Reset();
+ TestHttpServerRequest req{Service, "/prefix/world"sv};
+ r.HandleRequest(req);
+ CHECK(HandledTwoSeg);
+ REQUIRE_EQ(Captures.size(), 1);
+ CHECK_EQ(Captures[0], "world"sv);
+ }
+ }
+
SUBCASE("content-type")
{
for (uint8_t i = 0; i < uint8_t(HttpContentType::kCOUNT); ++i)
diff --git a/src/zenhttp/include/zenhttp/auth/authservice.h b/src/zenhttp/include/zenhttp/auth/authservice.h
index 64b86e21f..ee67c0f5b 100644
--- a/src/zenhttp/include/zenhttp/auth/authservice.h
+++ b/src/zenhttp/include/zenhttp/auth/authservice.h
@@ -8,14 +8,14 @@ namespace zen {
class AuthMgr;
-class HttpAuthService final : public zen::HttpService
+class HttpAuthService final : public HttpService
{
public:
HttpAuthService(AuthMgr& AuthMgr);
virtual ~HttpAuthService();
virtual const char* BaseUri() const override;
- virtual void HandleRequest(zen::HttpServerRequest& Request) override;
+ virtual void HandleRequest(HttpServerRequest& Request) override;
private:
AuthMgr& m_AuthMgr;
diff --git a/src/zenhttp/include/zenhttp/cprutils.h b/src/zenhttp/include/zenhttp/cprutils.h
deleted file mode 100644
index 3cfe652c5..000000000
--- a/src/zenhttp/include/zenhttp/cprutils.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#pragma once
-
-#if ZEN_WITH_CPR
-
-# include <zencore/compactbinary.h>
-# include <zencore/compactbinaryvalidation.h>
-# include <zencore/iobuffer.h>
-# include <zencore/string.h>
-# include <zenhttp/formatters.h>
-# include <zenhttp/httpclient.h>
-# include <zenhttp/httpcommon.h>
-
-ZEN_THIRD_PARTY_INCLUDES_START
-# include <cpr/response.h>
-# include <fmt/format.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-
-template<>
-struct fmt::formatter<cpr::Response>
-{
- constexpr auto parse(format_parse_context& Ctx) -> decltype(Ctx.begin()) { return Ctx.end(); }
-
- template<typename FormatContext>
- auto format(const cpr::Response& Response, FormatContext& Ctx) const -> decltype(Ctx.out())
- {
- using namespace std::literals;
-
- if (Response.error)
- {
- return fmt::format_to(Ctx.out(),
- "Failed: Url: {}, Reason: ({}) '{}'",
- Response.url.str(),
- int(Response.error.code),
- Response.error.message);
- }
- else
- {
- const zen::NiceTimeSpanMs NiceResponseTime(uint64_t(Response.elapsed * 1000));
-
- if (zen::IsHttpSuccessCode(Response.status_code))
- {
- return fmt::format_to(Ctx.out(),
- "OK: Url: {}, Status: ({}) '{}', Bytes: {}/{} (Up/Down), Elapsed: {}",
- Response.url.str(),
- Response.status_code,
- zen::ToString(zen::HttpResponseCode(Response.status_code)),
- Response.uploaded_bytes,
- Response.downloaded_bytes,
- NiceResponseTime.c_str());
- }
- else
- {
- const auto It = Response.header.find("Content-Type");
- const std::string_view ContentType = It != Response.header.end() ? It->second : "<None>"sv;
-
- if (ContentType == "application/x-ue-cb"sv)
- {
- zen::IoBuffer Body(zen::IoBuffer::Wrap, Response.text.data(), Response.text.size());
- zen::CbObjectView Obj(Body.Data());
- zen::ExtendableStringBuilder<256> Sb;
- std::string_view Json = Obj.ToJson(Sb).ToView();
-
- return fmt::format_to(
- Ctx.out(),
- "Failed: Url: {}, Status: ({}) '{}', Reason: '{}'. Bytes: {}/{} (Up/Down), Elapsed: {}, Response: '{}'",
- Response.url.str(),
- Response.status_code,
- zen::ToString(zen::HttpResponseCode(Response.status_code)),
- Response.reason,
- Response.uploaded_bytes,
- Response.downloaded_bytes,
- NiceResponseTime.c_str(),
- Json);
- }
- else
- {
- zen::BodyLogFormatter Body(Response.text);
-
- return fmt::format_to(
- Ctx.out(),
- "Failed: Url: {}, Status: ({}) '{}', Reason: '{}'. Bytes: {}/{} (Up/Down), Elapsed: {}, Response: '{}'",
- Response.url.str(),
- Response.status_code,
- zen::ToString(zen::HttpResponseCode(Response.status_code)),
- Response.reason,
- Response.uploaded_bytes,
- Response.downloaded_bytes,
- NiceResponseTime.c_str(),
- Body.GetText());
- }
- }
- }
- }
-};
-
-#endif // ZEN_WITH_CPR
diff --git a/src/zenhttp/include/zenhttp/httpclient.h b/src/zenhttp/include/zenhttp/httpclient.h
index 9531b9366..e199b700f 100644
--- a/src/zenhttp/include/zenhttp/httpclient.h
+++ b/src/zenhttp/include/zenhttp/httpclient.h
@@ -52,9 +52,6 @@ enum class HttpClientErrorCode : int
enum class HttpClientBackend : uint8_t
{
kDefault,
-#if ZEN_WITH_CPR
- kCpr,
-#endif
kCurl,
};
@@ -326,7 +323,10 @@ public:
return std::make_pair("Accept", MapContentTypeToString(ContentType));
}
- [[nodiscard]] Response Put(std::string_view Url, const IoBuffer& Payload, const KeyValueMap& AdditionalHeader = {});
+ [[nodiscard]] Response Put(std::string_view Url,
+ const IoBuffer& Payload,
+ const KeyValueMap& AdditionalHeader = {},
+ const KeyValueMap& Parameters = {});
[[nodiscard]] Response Put(std::string_view Url, const KeyValueMap& Parameters = {});
[[nodiscard]] Response Get(std::string_view Url, const KeyValueMap& AdditionalHeader = {}, const KeyValueMap& Parameters = {});
[[nodiscard]] Response Head(std::string_view Url, const KeyValueMap& AdditionalHeader = {});
@@ -361,6 +361,7 @@ public:
LoggerRef Log() { return m_Log; }
std::string_view GetBaseUri() const { return m_BaseUri; }
std::string_view GetSessionId() const { return m_SessionId; }
+ void SetBaseUri(std::string_view NewBaseUri);
void SetSessionId(const Oid& SessionId);
bool Authenticate();
diff --git a/src/zenhttp/include/zenhttp/httpcommon.h b/src/zenhttp/include/zenhttp/httpcommon.h
index 8fca35ac5..f9a99f3cc 100644
--- a/src/zenhttp/include/zenhttp/httpcommon.h
+++ b/src/zenhttp/include/zenhttp/httpcommon.h
@@ -91,6 +91,7 @@ enum class HttpResponseCode
//!< were not for the fact that the condition has evaluated to false.
UseProxy = 305, //!< \deprecated \parblock Due to security concerns regarding in-band configuration of a proxy. \endparblock
//!< The requested resource MUST be accessed through the proxy given by the Location field.
+ SwitchProxy = 306, //!< \deprecated No longer used. Originally meant subsequent requests should use the specified proxy.
TemporaryRedirect = 307, //!< Indicates that the target resource resides temporarily under a different URI and the user agent MUST NOT
//!< change the request method if it performs an automatic redirection to that URI.
PermanentRedirect = 308, //!< The target resource has been assigned a new permanent URI and any future references to this resource
@@ -133,12 +134,14 @@ enum class HttpResponseCode
ExpectationFailed = 417, //!< Indicates that the expectation given in the request's Expect header field could not be met by at least
//!< one of the inbound servers.
ImATeapot = 418, //!< Any attempt to brew coffee with a teapot should result in the error code 418 I'm a teapot.
+ MisdirectedRequest = 421, //!< Indicates that the request was directed at a server that is not able to produce a response.
UnprocessableEntity = 422, //!< Means the server understands the content type of the request entity (hence a 415(Unsupported Media
//!< Type) status code is inappropriate), and the syntax of the request entity is correct (thus a 400 (Bad
//!< Request) status code is inappropriate) but was unable to process the contained instructions.
Locked = 423, //!< Means the source or destination resource of a method is locked.
FailedDependency = 424, //!< Means that the method could not be performed on the resource because the requested action depended on
//!< another action and that action failed.
+ TooEarly = 425, //!< Indicates that the server is unwilling to risk processing a request that might be replayed.
UpgradeRequired = 426, //!< Indicates that the server refuses to perform the request using the current protocol but might be willing to
//!< do so after the client upgrades to a different protocol.
PreconditionRequired = 428, //!< Indicates that the origin server requires the request to be conditional.
diff --git a/src/zenhttp/include/zenhttp/httpserver.h b/src/zenhttp/include/zenhttp/httpserver.h
index 633eb06be..5eaed6004 100644
--- a/src/zenhttp/include/zenhttp/httpserver.h
+++ b/src/zenhttp/include/zenhttp/httpserver.h
@@ -220,6 +220,12 @@ struct IHttpStatsProvider
* not override this will be skipped in WebSocket broadcasts.
*/
virtual CbObject CollectStats() { return {}; }
+
+ /** Return a number indicating activity. Increase the number
+ * when activity is detected. Example would be to return the
+ * number of received requests
+ */
+ virtual uint64_t GetActivityCounter() { return 0; }
};
struct IHttpStatsService
@@ -302,8 +308,8 @@ public:
}
// IHttpStatsProvider
- virtual CbObject CollectStats() override;
virtual void HandleStatsRequest(HttpServerRequest& Request) override;
+ virtual CbObject CollectStats() override;
private:
std::vector<HttpService*> m_KnownServices;
diff --git a/src/zenhttp/include/zenhttp/httpstats.h b/src/zenhttp/include/zenhttp/httpstats.h
index 460315faf..bce771c75 100644
--- a/src/zenhttp/include/zenhttp/httpstats.h
+++ b/src/zenhttp/include/zenhttp/httpstats.h
@@ -62,6 +62,7 @@ private:
std::atomic<bool> m_PushEnabled{false};
void BroadcastStats();
+ void Initialize();
// Thread-based push (when no io_context is provided)
std::thread m_PushThread;
diff --git a/src/zenhttp/monitoring/httpstats.cpp b/src/zenhttp/monitoring/httpstats.cpp
index 283cedca7..7e6207e56 100644
--- a/src/zenhttp/monitoring/httpstats.cpp
+++ b/src/zenhttp/monitoring/httpstats.cpp
@@ -16,6 +16,7 @@ HttpStatsService::HttpStatsService(bool EnableWebSockets) : m_Log(logging::Get("
m_PushEnabled.store(true);
m_PushThread = std::thread([this] { PushThreadFunction(); });
}
+ Initialize();
}
HttpStatsService::HttpStatsService(asio::io_context& IoContext, bool EnableWebSockets) : m_Log(logging::Get("stats"))
@@ -26,6 +27,110 @@ HttpStatsService::HttpStatsService(asio::io_context& IoContext, bool EnableWebSo
m_PushTimer = std::make_unique<asio::steady_timer>(IoContext);
EnqueuePushTimer();
}
+ Initialize();
+}
+
+void
+HttpStatsService::Initialize()
+{
+ m_Router.AddMatcher("handler_id", [](std::string_view Str) -> bool {
+ if (Str.empty())
+ {
+ return false;
+ }
+ for (const auto C : Str)
+ {
+ if (std::isalnum(C) || C == '$')
+ {
+ // fine
+ }
+ else
+ {
+ // not fine
+ return false;
+ }
+ }
+ return true;
+ });
+
+ m_Router.RegisterRoute(
+ "activity_counters",
+ [this](HttpRouterRequest& Request) {
+ CbObjectWriter Obj;
+
+ std::uint64_t SumActivity = 0;
+
+ std::vector<std::pair<std::string, uint64_t>> Activities;
+ {
+ RwLock::SharedLockScope _(m_Lock);
+ Activities.reserve(m_Providers.size());
+ for (const auto& It : m_Providers)
+ {
+ const std::string& HandlerName = It.first;
+ IHttpStatsProvider* Provider = It.second;
+ ZEN_ASSERT(Provider != nullptr);
+ uint64_t ProviderActivityCounter = Provider->GetActivityCounter();
+ if (ProviderActivityCounter != 0)
+ {
+ Activities.push_back(std::make_pair(HandlerName, ProviderActivityCounter));
+ }
+ SumActivity += ProviderActivityCounter;
+ }
+ }
+
+ Obj.BeginArray("providers");
+ for (const std::pair<std::string, uint64_t>& Activity : Activities)
+ {
+ const std::string& HandlerName = Activity.first;
+ uint64_t ProviderActivityCounter = Activity.second;
+ Obj.BeginObject();
+ {
+ Obj.AddString("provider", HandlerName);
+ Obj.AddInteger("activity_counter", ProviderActivityCounter);
+ }
+ Obj.EndObject();
+ }
+ Obj.EndArray();
+
+ Obj.AddInteger("sum", SumActivity);
+
+ Request.ServerRequest().WriteResponse(HttpResponseCode::OK, Obj.Save());
+ },
+ HttpVerb::kGet);
+
+ m_Router.RegisterRoute(
+ "{handler_id}",
+ [this](HttpRouterRequest& Request) {
+ std::string_view Handler = Request.GetCapture(1);
+ RwLock::SharedLockScope _(m_Lock);
+ if (auto It = m_Providers.find(std::string{Handler}); It != end(m_Providers))
+ {
+ return It->second->HandleStatsRequest(Request.ServerRequest());
+ }
+ Request.ServerRequest().WriteResponse(HttpResponseCode::NotFound);
+ },
+ HttpVerb::kHead | HttpVerb::kGet);
+
+ m_Router.RegisterRoute(
+ "",
+ [this](HttpRouterRequest& Request) {
+ CbObjectWriter Cbo;
+
+ Cbo.BeginArray("providers");
+
+ {
+ RwLock::SharedLockScope _(m_Lock);
+ for (auto& Kv : m_Providers)
+ {
+ Cbo << Kv.first;
+ }
+ }
+
+ Cbo.EndArray();
+
+ Request.ServerRequest().WriteResponse(HttpResponseCode::OK, Cbo.Save());
+ },
+ HttpVerb::kHead | HttpVerb::kGet);
}
HttpStatsService::~HttpStatsService()
@@ -82,54 +187,7 @@ void
HttpStatsService::HandleRequest(HttpServerRequest& Request)
{
ZEN_TRACE_CPU("HttpStatsService::HandleRequest");
- using namespace std::literals;
-
- std::string_view Key = Request.RelativeUri();
-
- switch (Request.RequestVerb())
- {
- case HttpVerb::kHead:
- case HttpVerb::kGet:
- {
- if (Key.empty())
- {
- CbObjectWriter Cbo;
-
- Cbo.BeginArray("providers");
-
- {
- RwLock::SharedLockScope _(m_Lock);
- for (auto& Kv : m_Providers)
- {
- Cbo << Kv.first;
- }
- }
-
- Cbo.EndArray();
-
- Request.WriteResponse(HttpResponseCode::OK, Cbo.Save());
- }
- else if (Key[0] == '/')
- {
- Key.remove_prefix(1);
- size_t SlashPos = Key.find_first_of("/?");
- if (SlashPos != std::string::npos)
- {
- Key = Key.substr(0, SlashPos);
- }
-
- RwLock::SharedLockScope _(m_Lock);
- if (auto It = m_Providers.find(std::string{Key}); It != end(m_Providers))
- {
- return It->second->HandleStatsRequest(Request);
- }
- }
- }
-
- [[fallthrough]];
- default:
- return;
- }
+ m_Router.HandleRequest(Request);
}
//////////////////////////////////////////////////////////////////////////
diff --git a/src/zenhttp/xmake.lua b/src/zenhttp/xmake.lua
index b4c65ea96..7b050ae35 100644
--- a/src/zenhttp/xmake.lua
+++ b/src/zenhttp/xmake.lua
@@ -9,12 +9,7 @@ target('zenhttp')
add_files("servers/wshttpsys.cpp", {unity_ignored=true})
add_includedirs("include", {public=true})
add_deps("zencore", "zentelemetry", "transport-sdk", "asio")
- if has_config("zencpr") then
- add_deps("cpr")
- else
- remove_files("clients/httpclientcpr.cpp")
- end
- add_packages("http_parser", "json11")
+ add_packages("http_parser", "json11", "libcurl")
add_options("httpsys")
if is_plat("linux", "macosx") then
diff --git a/src/zenremotestore/builds/jupiterbuildstorage.cpp b/src/zenremotestore/builds/jupiterbuildstorage.cpp
index c3f7b9e71..ad4c4bc89 100644
--- a/src/zenremotestore/builds/jupiterbuildstorage.cpp
+++ b/src/zenremotestore/builds/jupiterbuildstorage.cpp
@@ -14,7 +14,7 @@ ZEN_THIRD_PARTY_INCLUDES_START
#include <tsl/robin_map.h>
ZEN_THIRD_PARTY_INCLUDES_END
-#include <regex>
+#include <string_view>
namespace zen {
@@ -572,35 +572,135 @@ ParseBuildStorageUrl(std::string_view InUrl,
Url.erase(ApiString, ExtendedApiString.length());
}
- const std::string ArtifactURLRegExString = R"((http[s]?:\/\/.*?)\/(.*?)\/(.*?)\/(.*))";
- const std::regex ArtifactURLRegEx(ArtifactURLRegExString, std::regex::ECMAScript | std::regex::icase);
- std::match_results<std::string_view::const_iterator> MatchResults;
- std::string_view UrlToParse(Url);
- if (regex_match(begin(UrlToParse), end(UrlToParse), MatchResults, ArtifactURLRegEx) && MatchResults.size() == 5)
- {
- auto GetMatch = [&MatchResults](uint32_t Index) -> std::string_view {
- ZEN_ASSERT(Index < MatchResults.size());
+ // Parse URL of the form: http[s]://host/namespace/bucket/buildid
+ std::string_view Remaining(Url);
- const auto& Match = MatchResults[Index];
+ // Find the end of the scheme (e.g. "http://" or "https://")
+ size_t SchemeEnd = Remaining.find("://");
+ if (SchemeEnd == std::string_view::npos)
+ {
+ return false;
+ }
+ SchemeEnd += 3; // skip past "://"
- return std::string_view(&*Match.first, Match.second - Match.first);
- };
+ // Find the first '/' after the host
+ size_t HostEnd = Remaining.find('/', SchemeEnd);
+ if (HostEnd == std::string_view::npos)
+ {
+ return false;
+ }
- const std::string_view Host = GetMatch(1);
- const std::string_view Namespace = GetMatch(2);
- const std::string_view Bucket = GetMatch(3);
- const std::string_view BuildId = GetMatch(4);
+ // Find the '/' after namespace
+ size_t NamespaceEnd = Remaining.find('/', HostEnd + 1);
+ if (NamespaceEnd == std::string_view::npos)
+ {
+ return false;
+ }
- OutHost = Host;
- OutNamespace = Namespace;
- OutBucket = Bucket;
- OutBuildId = BuildId;
- return true;
+ // Find the '/' after bucket
+ size_t BucketEnd = Remaining.find('/', NamespaceEnd + 1);
+ if (BucketEnd == std::string_view::npos)
+ {
+ return false;
}
- else
+
+ // BuildId must be non-empty
+ if (BucketEnd + 1 >= Remaining.size())
{
return false;
}
+
+ OutHost = Remaining.substr(0, HostEnd);
+ OutNamespace = Remaining.substr(HostEnd + 1, NamespaceEnd - HostEnd - 1);
+ OutBucket = Remaining.substr(NamespaceEnd + 1, BucketEnd - NamespaceEnd - 1);
+ OutBuildId = Remaining.substr(BucketEnd + 1);
+ return true;
+}
+
+} // namespace zen
+
+#if ZEN_WITH_TESTS
+
+# include <zencore/testing.h>
+
+namespace zen {
+
+void
+jupiterbuildstorage_forcelink()
+{
}
} // namespace zen
+
+TEST_SUITE_BEGIN("remotestore.jupiterbuildstorage");
+
+TEST_CASE("ParseBuildStorageUrl.ValidUrl")
+{
+ std::string Host, Namespace, Bucket, BuildId;
+ bool Result =
+ zen::ParseBuildStorageUrl("https://horde.devtools.epicgames.com/mynamespace/mybucket/mybuildid", Host, Namespace, Bucket, BuildId);
+ CHECK(Result);
+ CHECK(Host == "https://horde.devtools.epicgames.com");
+ CHECK(Namespace == "mynamespace");
+ CHECK(Bucket == "mybucket");
+ CHECK(BuildId == "mybuildid");
+}
+
+TEST_CASE("ParseBuildStorageUrl.ValidUrlWithApiPrefix")
+{
+ std::string Host, Namespace, Bucket, BuildId;
+ bool Result = zen::ParseBuildStorageUrl("https://horde.devtools.epicgames.com/api/v2/builds/mynamespace/mybucket/mybuildid",
+ Host,
+ Namespace,
+ Bucket,
+ BuildId);
+ CHECK(Result);
+ CHECK(Host == "https://horde.devtools.epicgames.com");
+ CHECK(Namespace == "mynamespace");
+ CHECK(Bucket == "mybucket");
+ CHECK(BuildId == "mybuildid");
+}
+
+TEST_CASE("ParseBuildStorageUrl.HttpScheme")
+{
+ std::string Host, Namespace, Bucket, BuildId;
+ bool Result = zen::ParseBuildStorageUrl("http://localhost/ns/bucket/build123", Host, Namespace, Bucket, BuildId);
+ CHECK(Result);
+ CHECK(Host == "http://localhost");
+ CHECK(Namespace == "ns");
+ CHECK(Bucket == "bucket");
+ CHECK(BuildId == "build123");
+}
+
+TEST_CASE("ParseBuildStorageUrl.BuildIdWithSlashes")
+{
+ std::string Host, Namespace, Bucket, BuildId;
+ bool Result = zen::ParseBuildStorageUrl("https://host/ns/bucket/build/with/slashes", Host, Namespace, Bucket, BuildId);
+ CHECK(Result);
+ CHECK(Host == "https://host");
+ CHECK(Namespace == "ns");
+ CHECK(Bucket == "bucket");
+ CHECK(BuildId == "build/with/slashes");
+}
+
+TEST_CASE("ParseBuildStorageUrl.MissingBuildId")
+{
+ std::string Host, Namespace, Bucket, BuildId;
+ CHECK_FALSE(zen::ParseBuildStorageUrl("https://host/ns/bucket/", Host, Namespace, Bucket, BuildId));
+}
+
+TEST_CASE("ParseBuildStorageUrl.MissingBucket")
+{
+ std::string Host, Namespace, Bucket, BuildId;
+ CHECK_FALSE(zen::ParseBuildStorageUrl("https://host/ns", Host, Namespace, Bucket, BuildId));
+}
+
+TEST_CASE("ParseBuildStorageUrl.NoScheme")
+{
+ std::string Host, Namespace, Bucket, BuildId;
+ CHECK_FALSE(zen::ParseBuildStorageUrl("host/ns/bucket/buildid", Host, Namespace, Bucket, BuildId));
+}
+
+TEST_SUITE_END();
+
+#endif // ZEN_WITH_TESTS
diff --git a/src/zenremotestore/include/zenremotestore/builds/jupiterbuildstorage.h b/src/zenremotestore/include/zenremotestore/builds/jupiterbuildstorage.h
index 888ec8ead..270835521 100644
--- a/src/zenremotestore/include/zenremotestore/builds/jupiterbuildstorage.h
+++ b/src/zenremotestore/include/zenremotestore/builds/jupiterbuildstorage.h
@@ -22,4 +22,6 @@ bool ParseBuildStorageUrl(std::string_view InUrl,
std::string& OutBucket,
std::string& OutBuildId);
+void jupiterbuildstorage_forcelink();
+
} // namespace zen
diff --git a/src/zenremotestore/zenremotestore.cpp b/src/zenremotestore/zenremotestore.cpp
index a0bb17260..0b205b296 100644
--- a/src/zenremotestore/zenremotestore.cpp
+++ b/src/zenremotestore/zenremotestore.cpp
@@ -5,6 +5,7 @@
#include <zenremotestore/builds/buildmanifest.h>
#include <zenremotestore/builds/buildsavedstate.h>
#include <zenremotestore/builds/buildstorageoperations.h>
+#include <zenremotestore/builds/jupiterbuildstorage.h>
#include <zenremotestore/chunking/chunkedcontent.h>
#include <zenremotestore/chunking/chunkedfile.h>
#include <zenremotestore/chunking/chunkingcache.h>
@@ -20,6 +21,7 @@ zenremotestore_forcelinktests()
{
buildmanifest_forcelink();
buildsavedstate_forcelink();
+ jupiterbuildstorage_forcelink();
buildstorageoperations_forcelink();
chunkblock_forcelink();
chunkedcontent_forcelink();
diff --git a/src/zenserver-test/cache-tests.cpp b/src/zenserver-test/cache-tests.cpp
index 334dd04ab..14748e214 100644
--- a/src/zenserver-test/cache-tests.cpp
+++ b/src/zenserver-test/cache-tests.cpp
@@ -1193,14 +1193,10 @@ TEST_CASE("zcache.rpc")
// CbPackage Package;
// CHECK(Request.Format(Package));
- // IoBuffer Body = FormatPackageMessageBuffer(Package).Flatten().AsIoBuffer();
- // cpr::Response Result = cpr::Post(cpr::Url{fmt::format("{}/$rpc", LocalCfg.BaseUri)},
- // cpr::Header{{"Content-Type", "application/x-ue-cbpkg"}, {"Accept", "application/x-ue-cbpkg"}},
- // cpr::Body{(const char*)Body.GetData(), Body.GetSize()});
+ // IoBuffer Body = FormatPackageMessageBuffer(Package).Flatten().AsIoBuffer();
+ // // TODO: rewrite using HttpClient instead of removed CPR dependency
- // CHECK(Result.status_code == 200);
// cacherequests::PutCacheRecordsResult ParsedResult;
- // CbPackage Response = ParsePackageMessage(zen::IoBuffer(zen::IoBuffer::Wrap, Result.text.data(), Result.text.size()));
// CHECK(!Response.IsNull());
// CHECK(ParsedResult.Parse(Response));
// for (bool ResponseSuccess : ParsedResult.Success)
diff --git a/src/zenserver-test/hub-tests.cpp b/src/zenserver-test/hub-tests.cpp
index dbe6fa785..b2da552fc 100644
--- a/src/zenserver-test/hub-tests.cpp
+++ b/src/zenserver-test/hub-tests.cpp
@@ -33,6 +33,77 @@ using namespace std::literals;
static const HttpClientSettings kFastTimeout{.ConnectTimeout = std::chrono::milliseconds(200)};
+static bool
+WaitForModuleState(HttpClient& Client, std::string_view ModuleId, std::string_view ExpectedState, int TimeoutMs = 10000)
+{
+ Stopwatch Timer;
+ while (Timer.GetElapsedTimeMs() < static_cast<uint64_t>(TimeoutMs))
+ {
+ HttpClient::Response R = Client.Get(fmt::format("modules/{}", ModuleId));
+ if (R && R.AsObject()["state"].AsString() == ExpectedState)
+ {
+ return true;
+ }
+ Sleep(100);
+ }
+ HttpClient::Response R = Client.Get(fmt::format("modules/{}", ModuleId));
+ return R && R.AsObject()["state"].AsString() == ExpectedState;
+}
+
+// Provision a module, retrying on 409 Conflict to handle the window where an async
+// deprovision has removed the module from InstanceLookup but not yet from
+// DeprovisioningModules (which CanProvisionInstance checks).
+static HttpClient::Response
+ProvisionModule(HttpClient& Client, std::string_view ModuleId, int TimeoutMs = 10000)
+{
+ Stopwatch Timer;
+ HttpClient::Response Result;
+ do
+ {
+ Result = Client.Post(fmt::format("modules/{}/provision", ModuleId));
+ if (Result || Result.StatusCode != HttpResponseCode::Conflict)
+ {
+ return Result;
+ }
+ Sleep(100);
+ } while (Timer.GetElapsedTimeMs() < static_cast<uint64_t>(TimeoutMs));
+ return Result;
+}
+
+// Wait for a port to stop accepting connections (i.e. the process has terminated).
+// Needed after async deprovision: WaitForModuleGone returns as soon as the module
+// leaves m_InstanceLookup (synchronous), but the background worker that kills the
+// process may not have run yet.
+static bool
+WaitForPortUnreachable(HttpClient& Client, std::string_view Path = "/health/", int TimeoutMs = 10000)
+{
+ Stopwatch Timer;
+ while (Timer.GetElapsedTimeMs() < static_cast<uint64_t>(TimeoutMs))
+ {
+ if (!Client.Get(Path))
+ {
+ return true;
+ }
+ Sleep(100);
+ }
+ return !Client.Get(Path);
+}
+
+static bool
+WaitForModuleGone(HttpClient& Client, std::string_view ModuleId, int TimeoutMs = 10000)
+{
+ Stopwatch Timer;
+ while (Timer.GetElapsedTimeMs() < static_cast<uint64_t>(TimeoutMs))
+ {
+ if (Client.Get(fmt::format("modules/{}", ModuleId)).StatusCode == HttpResponseCode::NotFound)
+ {
+ return true;
+ }
+ Sleep(100);
+ }
+ return Client.Get(fmt::format("modules/{}", ModuleId)).StatusCode == HttpResponseCode::NotFound;
+}
+
TEST_SUITE_BEGIN("server.hub");
TEST_CASE("hub.lifecycle.children")
@@ -65,9 +136,7 @@ TEST_CASE("hub.lifecycle.children")
AbcPort = AbcResult["port"].AsUInt16(0);
CHECK_NE(AbcPort, 0);
- Result = Client.Get("modules/abc");
- REQUIRE(Result);
- CHECK_EQ(Result.AsObject()["state"].AsString(), "provisioned"sv);
+ REQUIRE(WaitForModuleState(Client, "abc", "provisioned"));
// This should be a fresh instance with no contents
@@ -91,6 +160,8 @@ TEST_CASE("hub.lifecycle.children")
DefPort = DefResult["port"].AsUInt16(0);
REQUIRE_NE(DefPort, 0);
+ REQUIRE(WaitForModuleState(Client, "def", "provisioned"));
+
// This should be a fresh instance with no contents
HttpClient DefClient(fmt::format("http://localhost:{}", DefPort), kFastTimeout);
@@ -110,21 +181,24 @@ TEST_CASE("hub.lifecycle.children")
Result = Client.Post("modules/ghi/provision");
REQUIRE(Result);
+ REQUIRE(WaitForModuleState(Client, "ghi", "provisioned"));
// Tear down instances
Result = Client.Post("modules/abc/deprovision");
REQUIRE(Result);
+ REQUIRE(WaitForModuleGone(Client, "abc"));
{
HttpClient ModClient(fmt::format("http://localhost:{}", AbcPort), kFastTimeout);
- CHECK(!ModClient.Get("/health/"));
+ CHECK(WaitForPortUnreachable(ModClient));
}
Result = Client.Post("modules/def/deprovision");
REQUIRE(Result);
+ REQUIRE(WaitForModuleGone(Client, "def"));
{
HttpClient ModClient(fmt::format("http://localhost:{}", DefPort), kFastTimeout);
- CHECK(!ModClient.Get("/health/"));
+ CHECK(WaitForPortUnreachable(ModClient));
}
Result = Client.Post("modules/ghi/deprovision");
@@ -132,7 +206,7 @@ TEST_CASE("hub.lifecycle.children")
// re-provision to verify that (de)hydration preserved state
{
- Result = Client.Post("modules/abc/provision");
+ Result = ProvisionModule(Client, "abc");
REQUIRE(Result);
CbObject AbcResult = Result.AsObject();
@@ -140,6 +214,8 @@ TEST_CASE("hub.lifecycle.children")
AbcPort = AbcResult["port"].AsUInt16(0);
REQUIRE_NE(AbcPort, 0);
+ REQUIRE(WaitForModuleState(Client, "abc", "provisioned"));
+
// This should contain the content from the previous run
HttpClient AbcClient(fmt::format("http://localhost:{}", AbcPort), kFastTimeout);
@@ -156,7 +232,7 @@ TEST_CASE("hub.lifecycle.children")
}
{
- Result = Client.Post("modules/def/provision");
+ Result = ProvisionModule(Client, "def");
REQUIRE(Result);
CbObject DefResult = Result.AsObject();
@@ -164,6 +240,8 @@ TEST_CASE("hub.lifecycle.children")
DefPort = DefResult["port"].AsUInt16(0);
REQUIRE_NE(DefPort, 0);
+ REQUIRE(WaitForModuleState(Client, "def", "provisioned"));
+
// This should contain the content from the previous run
HttpClient DefClient(fmt::format("http://localhost:{}", DefPort), kFastTimeout);
@@ -181,22 +259,24 @@ TEST_CASE("hub.lifecycle.children")
Result = Client.Post("modules/abc/deprovision");
REQUIRE(Result);
+ REQUIRE(WaitForModuleGone(Client, "abc"));
{
HttpClient ModClient(fmt::format("http://localhost:{}", AbcPort), kFastTimeout);
- CHECK(!ModClient.Get("/health/"));
+ CHECK(WaitForPortUnreachable(ModClient));
}
Result = Client.Post("modules/def/deprovision");
REQUIRE(Result);
+ REQUIRE(WaitForModuleGone(Client, "def"));
{
HttpClient ModClient(fmt::format("http://localhost:{}", DefPort), kFastTimeout);
- CHECK(!ModClient.Get("/health/"));
+ CHECK(WaitForPortUnreachable(ModClient));
}
// re-provision to verify that (de)hydration preserved state, including
// state which was generated after the very first dehydration
{
- Result = Client.Post("modules/abc/provision");
+ Result = ProvisionModule(Client, "abc");
REQUIRE(Result);
CbObject AbcResult = Result.AsObject();
@@ -204,6 +284,8 @@ TEST_CASE("hub.lifecycle.children")
AbcPort = AbcResult["port"].AsUInt16(0);
REQUIRE_NE(AbcPort, 0);
+ REQUIRE(WaitForModuleState(Client, "abc", "provisioned"));
+
// This should contain the content from the previous two runs
HttpClient AbcClient(fmt::format("http://localhost:{}", AbcPort), kFastTimeout);
@@ -221,7 +303,7 @@ TEST_CASE("hub.lifecycle.children")
}
{
- Result = Client.Post("modules/def/provision");
+ Result = ProvisionModule(Client, "def");
REQUIRE(Result);
CbObject DefResult = Result.AsObject();
@@ -229,6 +311,8 @@ TEST_CASE("hub.lifecycle.children")
DefPort = DefResult["port"].AsUInt16(0);
REQUIRE_NE(DefPort, 0);
+ REQUIRE(WaitForModuleState(Client, "def", "provisioned"));
+
// This should contain the content from the previous two runs
HttpClient DefClient(fmt::format("http://localhost:{}", DefPort), kFastTimeout);
@@ -247,16 +331,18 @@ TEST_CASE("hub.lifecycle.children")
Result = Client.Post("modules/abc/deprovision");
REQUIRE(Result);
+ REQUIRE(WaitForModuleGone(Client, "abc"));
{
HttpClient ModClient(fmt::format("http://localhost:{}", AbcPort), kFastTimeout);
- CHECK(!ModClient.Get("/health/"));
+ CHECK(WaitForPortUnreachable(ModClient));
}
Result = Client.Post("modules/def/deprovision");
REQUIRE(Result);
+ REQUIRE(WaitForModuleGone(Client, "def"));
{
HttpClient ModClient(fmt::format("http://localhost:{}", DefPort), kFastTimeout);
- CHECK(!ModClient.Get("/health/"));
+ CHECK(WaitForPortUnreachable(ModClient));
}
// final sanity check that the hub is still responsive and all modules are gone
@@ -308,14 +394,45 @@ TEST_CASE("hub.consul.hub.registration")
ConsulProc.SpawnConsulAgent();
ZenServerInstance Instance(TestEnv, ZenServerInstance::ServerMode::kHubServer);
- const uint16_t PortNumber =
- Instance.SpawnServerAndWaitUntilReady("--consul-endpoint=http://localhost:8500/ --instance-id=test-instance");
+ const uint16_t PortNumber = Instance.SpawnServerAndWaitUntilReady(
+ "--consul-endpoint=http://localhost:8500/ --instance-id=test-instance "
+ "--consul-health-interval-seconds=5 --consul-deregister-after-seconds=60");
REQUIRE(PortNumber != 0);
consul::ConsulClient Client("http://localhost:8500/");
-
REQUIRE(WaitForConsulService(Client, "zen-hub-test-instance", true, 5000));
+ // Verify custom intervals flowed through to the registered check
+ {
+ std::string JsonError;
+ CbFieldIterator ChecksRoot = LoadCompactBinaryFromJson(Client.GetAgentChecksJson(), JsonError);
+ REQUIRE(JsonError.empty());
+
+ CbObjectView HubCheck;
+ for (CbFieldView F : ChecksRoot)
+ {
+ if (!F.IsObject())
+ {
+ continue;
+ }
+ for (CbFieldView C : F.AsObjectView())
+ {
+ CbObjectView Check = C.AsObjectView();
+ if (Check["ServiceID"sv].AsString() == "zen-hub-test-instance"sv)
+ {
+ HubCheck = Check;
+ break;
+ }
+ }
+ }
+ REQUIRE(HubCheck);
+ // Consul does not reflect DeregisterCriticalServiceAfter back in /v1/agent/checks for
+ // service-embedded checks; Definition is always an empty object. Only Type and Interval
+ // are accessible at the top level.
+ CHECK_EQ(HubCheck["Type"sv].AsString(), "http"sv);
+ CHECK_EQ(HubCheck["Interval"sv].AsString(), "5s"sv);
+ }
+
Instance.Shutdown();
CHECK(!Client.HasService("zen-hub-test-instance"));
@@ -393,16 +510,15 @@ TEST_CASE("hub.consul.provision.registration")
HttpClient::Response Result = HubClient.Post("modules/testmod/provision");
REQUIRE(Result);
- CHECK(Client.HasService("testmod"));
- {
- const uint16_t ModulePort = Result.AsObject()["port"].AsUInt16(0);
- REQUIRE(ModulePort != 0);
+ // Service is registered in Consul during Provisioning (before the child process starts),
+ // so this returns as soon as the state transition fires, not when the server is ready.
+ REQUIRE(WaitForConsulService(Client, "testmod", true, 10000));
- {
- HttpClient ModClient(fmt::format("http://localhost:{}", ModulePort), kFastTimeout);
- CHECK(ModClient.Get("/health/"));
- }
+ const uint16_t ModulePort = Result.AsObject()["port"].AsUInt16(0);
+ REQUIRE(ModulePort != 0);
+ // Consul fields are set during Provisioning and can be verified before the server is ready.
+ {
std::string JsonError;
CbFieldIterator ServicesRoot = LoadCompactBinaryFromJson(Client.GetAgentServicesJson(), JsonError);
REQUIRE(JsonError.empty());
@@ -417,7 +533,7 @@ TEST_CASE("hub.consul.provision.registration")
}
REQUIRE(ServicesMap);
- // Verify fields registered by OnProvisioned
+ // Verify fields registered by OnModuleStateChanged
{
CbObjectView ModService = ServicesMap["testmod"].AsObjectView();
CHECK_EQ(ModService["ID"sv].AsString(), "testmod"sv);
@@ -455,8 +571,75 @@ TEST_CASE("hub.consul.provision.registration")
CHECK_EQ(HubService["Port"sv].AsDouble(0), double(PortNumber));
}
+ // Verify hub health check endpoint URL (registered from startup with an active interval)
+ {
+ std::string ChecksJsonError;
+ CbFieldIterator ChecksRoot = LoadCompactBinaryFromJson(Client.GetAgentChecksJson(), ChecksJsonError);
+ REQUIRE(ChecksJsonError.empty());
+
+ CbObjectView HubCheck;
+ for (CbFieldView F : ChecksRoot)
+ {
+ if (!F.IsObject())
+ {
+ continue;
+ }
+ for (CbFieldView C : F.AsObjectView())
+ {
+ CbObjectView Check = C.AsObjectView();
+ if (Check["ServiceID"sv].AsString() == "zen-hub-test-instance"sv)
+ {
+ HubCheck = Check;
+ }
+ }
+ }
+ REQUIRE(HubCheck);
+ // Consul does not reflect HTTP URL back in /v1/agent/checks for service-embedded checks.
+ CHECK_EQ(HubCheck["Type"sv].AsString(), "http"sv);
+ }
+ }
+
+ // Wait for Provisioned before touching the module's HTTP endpoint.
+ REQUIRE(WaitForModuleState(HubClient, "testmod", "provisioned"));
+
+ // Verify module health check endpoint URL. No health check is registered during Provisioning
+ // (to avoid Consul marking the service critical before the child process is ready); it is added
+ // on transition to Provisioned.
+ {
+ std::string ChecksJsonError;
+ CbFieldIterator ChecksRoot = LoadCompactBinaryFromJson(Client.GetAgentChecksJson(), ChecksJsonError);
+ REQUIRE(ChecksJsonError.empty());
+
+ CbObjectView ModCheck;
+ for (CbFieldView F : ChecksRoot)
+ {
+ if (!F.IsObject())
+ {
+ continue;
+ }
+ for (CbFieldView C : F.AsObjectView())
+ {
+ CbObjectView Check = C.AsObjectView();
+ if (Check["ServiceID"sv].AsString() == "testmod"sv)
+ {
+ ModCheck = Check;
+ }
+ }
+ }
+ REQUIRE(ModCheck);
+ // Consul does not reflect HTTP URL back in /v1/agent/checks for service-embedded checks.
+ CHECK_EQ(ModCheck["Type"sv].AsString(), "http"sv);
+ }
+
+ {
+ HttpClient ModClient(fmt::format("http://localhost:{}", ModulePort), kFastTimeout);
+ CHECK(ModClient.Get("/health/"));
+ }
+
+ {
Result = HubClient.Post("modules/testmod/deprovision");
REQUIRE(Result);
+ REQUIRE(WaitForConsulService(Client, "testmod", false, 10000));
{
HttpClient ModClient(fmt::format("http://localhost:{}", ModulePort), kFastTimeout);
@@ -482,13 +665,12 @@ TEST_CASE("hub.hibernate.lifecycle")
// Provision
HttpClient::Response Result = Client.Post("modules/testmod/provision");
REQUIRE(Result);
+ CHECK_EQ(Result.StatusCode, HttpResponseCode::Accepted);
CHECK_EQ(Result.AsObject()["moduleId"].AsString(), "testmod"sv);
const uint16_t ModulePort = Result.AsObject()["port"].AsUInt16(0);
REQUIRE_NE(ModulePort, 0);
- Result = Client.Get("modules/testmod");
- REQUIRE(Result);
- CHECK_EQ(Result.AsObject()["state"].AsString(), "provisioned"sv);
+ REQUIRE(WaitForModuleState(Client, "testmod", "provisioned"));
{
HttpClient ModClient(fmt::format("http://localhost:{}", ModulePort), kFastTimeout);
CHECK(ModClient.Get("/health/"));
@@ -502,11 +684,10 @@ TEST_CASE("hub.hibernate.lifecycle")
// Hibernate - state should become "hibernated", server should be unreachable
Result = Client.Post("modules/testmod/hibernate");
REQUIRE(Result);
+ CHECK_EQ(Result.StatusCode, HttpResponseCode::Accepted);
CHECK_EQ(Result.AsObject()["moduleId"].AsString(), "testmod"sv);
- Result = Client.Get("modules/testmod");
- REQUIRE(Result);
- CHECK_EQ(Result.AsObject()["state"].AsString(), "hibernated"sv);
+ REQUIRE(WaitForModuleState(Client, "testmod", "hibernated"));
{
HttpClient ModClient(fmt::format("http://localhost:{}", ModulePort), kFastTimeout);
CHECK(!ModClient.Get("/health/"));
@@ -515,11 +696,10 @@ TEST_CASE("hub.hibernate.lifecycle")
// Wake - state should return to "provisioned", server should be reachable, data should be intact
Result = Client.Post("modules/testmod/wake");
REQUIRE(Result);
+ CHECK_EQ(Result.StatusCode, HttpResponseCode::Accepted);
CHECK_EQ(Result.AsObject()["moduleId"].AsString(), "testmod"sv);
- Result = Client.Get("modules/testmod");
- REQUIRE(Result);
- CHECK_EQ(Result.AsObject()["state"].AsString(), "provisioned"sv);
+ REQUIRE(WaitForModuleState(Client, "testmod", "provisioned"));
{
HttpClient ModClient(fmt::format("http://localhost:{}", ModulePort), kFastTimeout);
CHECK(ModClient.Get("/health/"));
@@ -532,17 +712,20 @@ TEST_CASE("hub.hibernate.lifecycle")
// Deprovision - server should become unreachable
Result = Client.Post("modules/testmod/deprovision");
REQUIRE(Result);
+ CHECK_EQ(Result.StatusCode, HttpResponseCode::Accepted);
+ REQUIRE(WaitForModuleGone(Client, "testmod"));
{
HttpClient ModClient(fmt::format("http://localhost:{}", ModulePort), kFastTimeout);
- CHECK(!ModClient.Get("/health/"));
+ CHECK(WaitForPortUnreachable(ModClient));
}
// Re-provision - server should be reachable on its (potentially new) port
- Result = Client.Post("modules/testmod/provision");
+ Result = ProvisionModule(Client, "testmod");
REQUIRE(Result);
CHECK_EQ(Result.AsObject()["moduleId"].AsString(), "testmod"sv);
const uint16_t ModulePort2 = Result.AsObject()["port"].AsUInt16(0);
REQUIRE_NE(ModulePort2, 0);
+ REQUIRE(WaitForModuleState(Client, "testmod", "provisioned"));
{
HttpClient ModClient(fmt::format("http://localhost:{}", ModulePort2), kFastTimeout);
CHECK(ModClient.Get("/health/"));
@@ -551,9 +734,10 @@ TEST_CASE("hub.hibernate.lifecycle")
// Final deprovision - server should become unreachable
Result = Client.Post("modules/testmod/deprovision");
REQUIRE(Result);
+ REQUIRE(WaitForModuleGone(Client, "testmod"));
{
HttpClient ModClient(fmt::format("http://localhost:{}", ModulePort2), kFastTimeout);
- CHECK(!ModClient.Get("/health/"));
+ CHECK(WaitForPortUnreachable(ModClient));
}
}
@@ -574,24 +758,76 @@ TEST_CASE("hub.hibernate.errors")
CHECK(!Result);
CHECK_EQ(Result.StatusCode, HttpResponseCode::NotFound);
- // Double-hibernate: first call succeeds, second returns 400 (wrong state)
+ Result = Client.Post("modules/unknown/deprovision");
+ CHECK(!Result);
+ CHECK_EQ(Result.StatusCode, HttpResponseCode::NotFound);
+
+ Result = Client.Delete("modules/unknown");
+ CHECK(!Result);
+ CHECK_EQ(Result.StatusCode, HttpResponseCode::NotFound);
+
+ // Double-provision: second call while first is in-flight returns 202 Accepted with the same port.
Result = Client.Post("modules/errmod/provision");
REQUIRE(Result);
+ CHECK_EQ(Result.StatusCode, HttpResponseCode::Accepted);
+ const uint16_t ErrmodPort = Result.AsObject()["port"].AsUInt16(0);
+ REQUIRE_NE(ErrmodPort, 0);
+ // Provisioning the same module while in-flight returns 202 Accepted with the allocated port.
+ // Evaluated synchronously before WorkerPool dispatch, so safe regardless of timing.
+ Result = Client.Post("modules/errmod/provision");
+ CHECK(Result);
+ CHECK_EQ(Result.StatusCode, HttpResponseCode::Accepted);
+ CHECK_EQ(Result.AsObject()["port"].AsUInt16(0), ErrmodPort);
+
+ REQUIRE(WaitForModuleState(Client, "errmod", "provisioned"));
+
+ // Already provisioned: provision and wake both return 200 Completed.
+ Result = Client.Post("modules/errmod/provision");
+ CHECK(Result);
+ CHECK_EQ(Result.StatusCode, HttpResponseCode::OK);
+
+ Result = Client.Post("modules/errmod/wake");
+ CHECK(Result);
+ CHECK_EQ(Result.StatusCode, HttpResponseCode::OK);
+
+ // Double-hibernate: second call while first is in-flight returns 202 Accepted.
Result = Client.Post("modules/errmod/hibernate");
REQUIRE(Result);
Result = Client.Post("modules/errmod/hibernate");
- CHECK(!Result);
- CHECK_EQ(Result.StatusCode, HttpResponseCode::BadRequest);
+ CHECK(Result);
+ CHECK_EQ(Result.StatusCode, HttpResponseCode::Accepted);
+
+ REQUIRE(WaitForModuleState(Client, "errmod", "hibernated"));
- // Wake on provisioned: succeeds (state restored), then waking again returns 400
+ // Already hibernated: hibernate returns 200 Completed.
+ Result = Client.Post("modules/errmod/hibernate");
+ CHECK(Result);
+ CHECK_EQ(Result.StatusCode, HttpResponseCode::OK);
+
+ // Double-wake: second call while first is in-flight returns 202 Accepted.
Result = Client.Post("modules/errmod/wake");
REQUIRE(Result);
Result = Client.Post("modules/errmod/wake");
- CHECK(!Result);
- CHECK_EQ(Result.StatusCode, HttpResponseCode::BadRequest);
+ CHECK(Result);
+ CHECK_EQ(Result.StatusCode, HttpResponseCode::Accepted);
+
+ // Double-deprovision: second call while first is in-flight returns 202 Accepted.
+ // errmod2 is a fresh module to avoid waiting on the still-waking errmod.
+ Result = Client.Post("modules/errmod2/provision");
+ REQUIRE(Result);
+ CHECK_EQ(Result.StatusCode, HttpResponseCode::Accepted);
+ REQUIRE(WaitForModuleState(Client, "errmod2", "provisioned"));
+
+ Result = Client.Post("modules/errmod2/deprovision");
+ REQUIRE(Result);
+ CHECK_EQ(Result.StatusCode, HttpResponseCode::Accepted);
+
+ Result = Client.Post("modules/errmod2/deprovision");
+ CHECK(Result);
+ CHECK_EQ(Result.StatusCode, HttpResponseCode::Accepted);
}
TEST_SUITE_END();
diff --git a/src/zenserver/compute/computeserver.cpp b/src/zenserver/compute/computeserver.cpp
index d1875f41a..1673cea6c 100644
--- a/src/zenserver/compute/computeserver.cpp
+++ b/src/zenserver/compute/computeserver.cpp
@@ -481,7 +481,7 @@ ZenComputeServer::InitializeServices(const ZenComputeServerConfig& ServerConfig)
ServerConfig.DataDir / "functions",
ServerConfig.MaxConcurrentActions);
- m_FrontendService = std::make_unique<HttpFrontendService>(m_ContentRoot, m_StatusService);
+ m_FrontendService = std::make_unique<HttpFrontendService>(m_ContentRoot, m_StatsService, m_StatusService);
# if ZEN_WITH_NOMAD
// Nomad provisioner
diff --git a/src/zenserver/config/config.cpp b/src/zenserver/config/config.cpp
index 15f6f79f3..daad154bc 100644
--- a/src/zenserver/config/config.cpp
+++ b/src/zenserver/config/config.cpp
@@ -417,7 +417,7 @@ ZenServerCmdLineOptions::AddCliOptions(cxxopts::Options& options, ZenServerConfi
options.add_option("network",
"",
"httpclient",
- "Select HTTP client implementation (e.g. 'curl', 'cpr')",
+ "Select HTTP client implementation",
cxxopts::value<std::string>(ServerOptions.HttpClient.Backend)->default_value("curl"),
"<http client>");
diff --git a/src/zenserver/config/config.h b/src/zenserver/config/config.h
index 5078fe71a..d35a1a8c7 100644
--- a/src/zenserver/config/config.h
+++ b/src/zenserver/config/config.h
@@ -40,7 +40,7 @@ struct ZenSentryConfig
struct HttpClientConfig
{
- std::string Backend = "cpr"; // Choice of HTTP client implementation (e.g. "curl", "cpr")
+ std::string Backend = "curl"; // Choice of HTTP client implementation
};
struct ZenServerConfig
diff --git a/src/zenserver/diag/logging.cpp b/src/zenserver/diag/logging.cpp
index 7513e56f7..f3d8dbfe3 100644
--- a/src/zenserver/diag/logging.cpp
+++ b/src/zenserver/diag/logging.cpp
@@ -111,8 +111,8 @@ InitializeServerLogging(const ZenServerConfig& InOptions, bool WithCacheService)
const zen::Oid ServerSessionId = zen::GetSessionId();
- static constinit logging::LogPoint SessionIdPoint{{}, logging::Info, "server session id: {}"};
logging::Registry::Instance().ApplyAll([&](auto Logger) {
+ static constinit logging::LogPoint SessionIdPoint{{}, logging::Info, "server session id: {}"};
ZEN_MEMSCOPE(ELLMTag::Logging);
Logger->Log(SessionIdPoint, fmt::make_format_args(ServerSessionId));
});
diff --git a/src/zenserver/frontend/frontend.cpp b/src/zenserver/frontend/frontend.cpp
index 697cc014e..52ec5b8b3 100644
--- a/src/zenserver/frontend/frontend.cpp
+++ b/src/zenserver/frontend/frontend.cpp
@@ -9,6 +9,7 @@
#include <zencore/logging.h>
#include <zencore/string.h>
#include <zencore/trace.h>
+#include <zenhttp/httpstats.h>
ZEN_THIRD_PARTY_INCLUDES_START
#if ZEN_PLATFORM_WINDOWS
@@ -28,8 +29,9 @@ static unsigned char gHtmlZipData[] = {
namespace zen {
////////////////////////////////////////////////////////////////////////////////
-HttpFrontendService::HttpFrontendService(std::filesystem::path Directory, HttpStatusService& StatusService)
+HttpFrontendService::HttpFrontendService(std::filesystem::path Directory, HttpStatsService& StatsService, HttpStatusService& StatusService)
: m_Directory(Directory)
+, m_StatsService(StatsService)
, m_StatusService(StatusService)
{
ZEN_TRACE_CPU("HttpFrontendService::HttpFrontendService");
@@ -94,12 +96,14 @@ HttpFrontendService::HttpFrontendService(std::filesystem::path Directory, HttpSt
{
ZEN_INFO("front-end is NOT AVAILABLE");
}
+ m_StatsService.RegisterHandler("dashboard", *this);
m_StatusService.RegisterHandler("dashboard", *this);
}
HttpFrontendService::~HttpFrontendService()
{
m_StatusService.UnregisterHandler("dashboard", *this);
+ m_StatsService.UnregisterHandler("dashboard", *this);
}
const char*
@@ -122,6 +126,8 @@ HttpFrontendService::HandleRequest(zen::HttpServerRequest& Request)
{
using namespace std::literals;
+ metrics::OperationTiming::Scope $(m_HttpRequests);
+
ExtendableStringBuilder<256> UriBuilder;
std::string_view Uri = Request.RelativeUriWithExtension();
@@ -230,4 +236,26 @@ HttpFrontendService::HandleRequest(zen::HttpServerRequest& Request)
}
}
+void
+HttpFrontendService::HandleStatsRequest(HttpServerRequest& Request)
+{
+ Request.WriteResponse(HttpResponseCode::OK, CollectStats());
+}
+
+CbObject
+HttpFrontendService::CollectStats()
+{
+ ZEN_TRACE_CPU("HttpFrontendService::Stats");
+ CbObjectWriter Cbo;
+
+ EmitSnapshot("requests", m_HttpRequests, Cbo);
+ return Cbo.Save();
+}
+
+uint64_t
+HttpFrontendService::GetActivityCounter()
+{
+ return m_HttpRequests.Count();
+}
+
} // namespace zen
diff --git a/src/zenserver/frontend/frontend.h b/src/zenserver/frontend/frontend.h
index 0ae3170ad..e0b86f1de 100644
--- a/src/zenserver/frontend/frontend.h
+++ b/src/zenserver/frontend/frontend.h
@@ -11,20 +11,27 @@
namespace zen {
-class HttpFrontendService final : public zen::HttpService, public IHttpStatusProvider
+class HttpStatsService;
+
+class HttpFrontendService final : public HttpService, public IHttpStatusProvider, public IHttpStatsProvider
{
public:
- HttpFrontendService(std::filesystem::path Directory, HttpStatusService& StatusService);
+ HttpFrontendService(std::filesystem::path Directory, HttpStatsService& StatsService, HttpStatusService& StatusService);
virtual ~HttpFrontendService();
virtual const char* BaseUri() const override;
- virtual void HandleRequest(zen::HttpServerRequest& Request) override;
+ virtual void HandleRequest(HttpServerRequest& Request) override;
virtual void HandleStatusRequest(HttpServerRequest& Request) override;
+ virtual void HandleStatsRequest(HttpServerRequest& Request) override;
+ virtual CbObject CollectStats() override;
+ virtual uint64_t GetActivityCounter() override;
private:
- std::unique_ptr<ZipFs> m_ZipFs;
- std::filesystem::path m_Directory;
- std::filesystem::path m_DocsDirectory;
- HttpStatusService& m_StatusService;
+ std::unique_ptr<ZipFs> m_ZipFs;
+ std::filesystem::path m_Directory;
+ std::filesystem::path m_DocsDirectory;
+ HttpStatsService& m_StatsService;
+ HttpStatusService& m_StatusService;
+ metrics::OperationTiming m_HttpRequests;
};
} // namespace zen
diff --git a/src/zenserver/frontend/html/pages/builds.js b/src/zenserver/frontend/html/pages/builds.js
new file mode 100644
index 000000000..095f0bf29
--- /dev/null
+++ b/src/zenserver/frontend/html/pages/builds.js
@@ -0,0 +1,88 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+"use strict";
+
+import { ZenPage } from "./page.js"
+import { Fetcher } from "../util/fetcher.js"
+import { Friendly } from "../util/friendly.js"
+
+////////////////////////////////////////////////////////////////////////////////
+export class Page extends ZenPage
+{
+ generate_crumbs() {}
+
+ async main()
+ {
+ this.set_title("build store");
+
+ // Build Store Stats
+ const stats_section = this.add_section("Build Store Stats");
+ stats_section.tag().classify("dropall").text("raw yaml \u2192").on_click(() => {
+ window.open("/stats/builds.yaml", "_blank");
+ });
+ this._stats_grid = stats_section.tag().classify("grid").classify("stats-tiles");
+
+ const stats = await new Fetcher().resource("stats", "builds").json();
+ if (stats)
+ {
+ this._render_stats(stats);
+ }
+
+ this.connect_stats_ws((all_stats) => {
+ const s = all_stats["builds"];
+ if (s)
+ {
+ this._render_stats(s);
+ }
+ });
+ }
+
+ _render_stats(stats)
+ {
+ const grid = this._stats_grid;
+ const safe = (obj, path) => path.split(".").reduce((a, b) => a && a[b], obj);
+
+ grid.inner().innerHTML = "";
+
+ // HTTP Requests tile
+ this._render_http_requests_tile(grid, safe(stats, "requests"), safe(stats, "store.badrequestcount") || 0);
+
+ // Build Store tile
+ {
+ const blobs = safe(stats, "store.blobs");
+ const metadata = safe(stats, "store.metadata");
+ if (blobs || metadata)
+ {
+ const tile = grid.tag().classify("card").classify("stats-tile");
+ tile.tag().classify("card-title").text("Build Store");
+ const columns = tile.tag().classify("tile-columns");
+
+ const left = columns.tag().classify("tile-metrics");
+ this._metric(left, Friendly.bytes(safe(stats, "store.size.disk") || 0), "disk", true);
+ if (blobs)
+ {
+ this._metric(left, Friendly.sep(blobs.count || 0), "blobs");
+ this._metric(left, Friendly.sep(blobs.readcount || 0), "blob reads");
+ this._metric(left, Friendly.sep(blobs.writecount || 0), "blob writes");
+ const blobHitRatio = (blobs.readcount || 0) > 0
+ ? (((blobs.hitcount || 0) / blobs.readcount) * 100).toFixed(1) + "%"
+ : "-";
+ this._metric(left, blobHitRatio, "blob hit ratio");
+ }
+
+ const right = columns.tag().classify("tile-metrics");
+ if (metadata)
+ {
+ this._metric(right, Friendly.sep(metadata.count || 0), "metadata entries", true);
+ this._metric(right, Friendly.sep(metadata.readcount || 0), "meta reads");
+ this._metric(right, Friendly.sep(metadata.writecount || 0), "meta writes");
+ const metaHitRatio = (metadata.readcount || 0) > 0
+ ? (((metadata.hitcount || 0) / metadata.readcount) * 100).toFixed(1) + "%"
+ : "-";
+ this._metric(right, metaHitRatio, "meta hit ratio");
+ }
+ }
+ }
+ }
+
+}
diff --git a/src/zenserver/frontend/html/pages/hub.js b/src/zenserver/frontend/html/pages/hub.js
index 149a5c79c..78e3a090c 100644
--- a/src/zenserver/frontend/html/pages/hub.js
+++ b/src/zenserver/frontend/html/pages/hub.js
@@ -178,7 +178,7 @@ export class Page extends ZenPage
try
{
const [stats, status] = await Promise.all([
- new Fetcher().resource("/hub/stats").json(),
+ new Fetcher().resource("stats", "hub").json(),
new Fetcher().resource("/hub/status").json(),
]);
@@ -198,6 +198,9 @@ export class Page extends ZenPage
const max = data.maxInstanceCount || 0;
const limit = data.instanceLimit || 0;
+ // HTTP Requests tile
+ this._render_http_requests_tile(grid, data.requests);
+
{
const tile = grid.tag().classify("card").classify("stats-tile");
tile.tag().classify("card-title").text("Active Modules");
@@ -611,14 +614,4 @@ export class Page extends ZenPage
await fetch(`/hub/modules/${moduleId}/${action}`, { method: "POST" });
}
- _metric(parent, value, label, hero = false)
- {
- const m = parent.tag().classify("tile-metric");
- if (hero)
- {
- m.classify("tile-metric-hero");
- }
- m.tag().classify("metric-value").text(value);
- m.tag().classify("metric-label").text(label);
- }
}
diff --git a/src/zenserver/frontend/html/pages/objectstore.js b/src/zenserver/frontend/html/pages/objectstore.js
index 69e0a91b3..6b4890614 100644
--- a/src/zenserver/frontend/html/pages/objectstore.js
+++ b/src/zenserver/frontend/html/pages/objectstore.js
@@ -30,13 +30,16 @@ export class Page extends ZenPage
{
try
{
- const data = await new Fetcher().resource("/obj/").json();
- this._render(data);
+ const [data, stats] = await Promise.all([
+ new Fetcher().resource("/obj/").json(),
+ new Fetcher().resource("stats", "obj").json().catch(() => null),
+ ]);
+ this._render(data, stats);
}
catch (e) { /* service unavailable */ }
}
- _render(data)
+ _render(data, stats)
{
const buckets = data.buckets || [];
@@ -53,32 +56,17 @@ export class Page extends ZenPage
const total_objects = buckets.reduce((sum, b) => sum + (b.object_count || 0), 0);
const total_size = buckets.reduce((sum, b) => sum + (b.size || 0), 0);
- {
- const tile = grid.tag().classify("card").classify("stats-tile");
- tile.tag().classify("card-title").text("Buckets");
- const body = tile.tag().classify("tile-metrics");
- this._metric(body, Friendly.sep(buckets.length), "total", true);
- }
-
- {
- const tile = grid.tag().classify("card").classify("stats-tile");
- tile.tag().classify("card-title").text("Objects");
- const body = tile.tag().classify("tile-metrics");
- this._metric(body, Friendly.sep(total_objects), "total", true);
- }
+ // HTTP Requests tile
+ this._render_http_requests_tile(grid, stats && stats.requests);
{
const tile = grid.tag().classify("card").classify("stats-tile");
- tile.tag().classify("card-title").text("Storage");
+ tile.tag().classify("card-title").text("Object Store");
const body = tile.tag().classify("tile-metrics");
- this._metric(body, Friendly.bytes(total_size), "total size", true);
- }
-
- {
- const tile = grid.tag().classify("card").classify("stats-tile");
- tile.tag().classify("card-title").text("Served");
- const body = tile.tag().classify("tile-metrics");
- this._metric(body, Friendly.bytes(data.total_bytes_served || 0), "total bytes served", true);
+ this._metric(body, Friendly.sep(buckets.length), "buckets", true);
+ this._metric(body, Friendly.sep(total_objects), "objects");
+ this._metric(body, Friendly.bytes(total_size), "storage");
+ this._metric(body, Friendly.bytes(data.total_bytes_served || 0), "bytes served");
}
}
@@ -219,14 +207,4 @@ export class Page extends ZenPage
}
}
- _metric(parent, value, label, hero = false)
- {
- const m = parent.tag().classify("tile-metric");
- if (hero)
- {
- m.classify("tile-metric-hero");
- }
- m.tag().classify("metric-value").text(value);
- m.tag().classify("metric-label").text(label);
- }
}
diff --git a/src/zenserver/frontend/html/pages/page.js b/src/zenserver/frontend/html/pages/page.js
index d969d651d..cf8d3e3dd 100644
--- a/src/zenserver/frontend/html/pages/page.js
+++ b/src/zenserver/frontend/html/pages/page.js
@@ -4,6 +4,7 @@
import { WidgetHost } from "../util/widgets.js"
import { Fetcher } from "../util/fetcher.js"
+import { Friendly } from "../util/friendly.js"
////////////////////////////////////////////////////////////////////////////////
export class PageBase extends WidgetHost
@@ -148,8 +149,10 @@ export class ZenPage extends PageBase
const service_dashboards = [
{ base_uri: "/sessions/", label: "Sessions", href: "/dashboard/?page=sessions" },
{ base_uri: "/z$/", label: "Cache", href: "/dashboard/?page=cache" },
+ { base_uri: "/builds/", label: "Build Store", href: "/dashboard/?page=builds" },
{ base_uri: "/prj/", label: "Projects", href: "/dashboard/?page=projects" },
{ base_uri: "/obj/", label: "Object Store", href: "/dashboard/?page=objectstore" },
+ { base_uri: "/ws/", label: "Workspaces", href: "/dashboard/?page=workspaces" },
{ base_uri: "/compute/", label: "Compute", href: "/dashboard/?page=compute" },
{ base_uri: "/orch/", label: "Orchestrator", href: "/dashboard/?page=orchestrator" },
{ base_uri: "/hub/", label: "Hub", href: "/dashboard/?page=hub" },
@@ -265,4 +268,73 @@ export class ZenPage extends PageBase
new_crumb(auto_name);
}
+
+ _metric(parent, value, label, hero = false)
+ {
+ const m = parent.tag().classify("tile-metric");
+ if (hero)
+ {
+ m.classify("tile-metric-hero");
+ }
+ m.tag().classify("metric-value").text(value);
+ m.tag().classify("metric-label").text(label);
+ }
+
+ _render_http_requests_tile(grid, req, bad_requests = undefined)
+ {
+ if (!req)
+ {
+ return;
+ }
+ const tile = grid.tag().classify("card").classify("stats-tile");
+ tile.tag().classify("card-title").text("HTTP Requests");
+ const columns = tile.tag().classify("tile-columns");
+
+ const left = columns.tag().classify("tile-metrics");
+ const reqData = req.requests || req;
+ this._metric(left, Friendly.sep(reqData.count || 0), "total requests", true);
+ if (reqData.rate_mean > 0)
+ {
+ this._metric(left, Friendly.sep(reqData.rate_mean, 1) + "/s", "req/sec (mean)");
+ }
+ if (reqData.rate_1 > 0)
+ {
+ this._metric(left, Friendly.sep(reqData.rate_1, 1) + "/s", "req/sec (1m)");
+ }
+ if (reqData.rate_5 > 0)
+ {
+ this._metric(left, Friendly.sep(reqData.rate_5, 1) + "/s", "req/sec (5m)");
+ }
+ if (reqData.rate_15 > 0)
+ {
+ this._metric(left, Friendly.sep(reqData.rate_15, 1) + "/s", "req/sec (15m)");
+ }
+ if (bad_requests !== undefined)
+ {
+ this._metric(left, Friendly.sep(bad_requests), "bad requests");
+ }
+
+ const right = columns.tag().classify("tile-metrics");
+ this._metric(right, Friendly.duration(reqData.t_avg || 0), "avg latency", true);
+ if (reqData.t_p75)
+ {
+ this._metric(right, Friendly.duration(reqData.t_p75), "p75");
+ }
+ if (reqData.t_p95)
+ {
+ this._metric(right, Friendly.duration(reqData.t_p95), "p95");
+ }
+ if (reqData.t_p99)
+ {
+ this._metric(right, Friendly.duration(reqData.t_p99), "p99");
+ }
+ if (reqData.t_p999)
+ {
+ this._metric(right, Friendly.duration(reqData.t_p999), "p999");
+ }
+ if (reqData.t_max)
+ {
+ this._metric(right, Friendly.duration(reqData.t_max), "max");
+ }
+ }
}
diff --git a/src/zenserver/frontend/html/pages/projects.js b/src/zenserver/frontend/html/pages/projects.js
index a3c0d1555..2469bf70b 100644
--- a/src/zenserver/frontend/html/pages/projects.js
+++ b/src/zenserver/frontend/html/pages/projects.js
@@ -159,44 +159,7 @@ export class Page extends ZenPage
grid.inner().innerHTML = "";
// HTTP Requests tile
- {
- const req = safe(stats, "requests");
- if (req)
- {
- const tile = grid.tag().classify("card").classify("stats-tile");
- tile.tag().classify("card-title").text("HTTP Requests");
- const columns = tile.tag().classify("tile-columns");
-
- const left = columns.tag().classify("tile-metrics");
- const reqData = req.requests || req;
- this._metric(left, Friendly.sep(safe(stats, "store.requestcount") || 0), "total requests", true);
- if (reqData.rate_mean > 0)
- {
- this._metric(left, Friendly.sep(reqData.rate_mean, 1) + "/s", "req/sec (mean)");
- }
- if (reqData.rate_1 > 0)
- {
- this._metric(left, Friendly.sep(reqData.rate_1, 1) + "/s", "req/sec (1m)");
- }
- const badRequests = safe(stats, "store.badrequestcount") || 0;
- this._metric(left, Friendly.sep(badRequests), "bad requests");
-
- const right = columns.tag().classify("tile-metrics");
- this._metric(right, Friendly.duration(reqData.t_avg || 0), "avg latency", true);
- if (reqData.t_p75)
- {
- this._metric(right, Friendly.duration(reqData.t_p75), "p75");
- }
- if (reqData.t_p95)
- {
- this._metric(right, Friendly.duration(reqData.t_p95), "p95");
- }
- if (reqData.t_p99)
- {
- this._metric(right, Friendly.duration(reqData.t_p99), "p99");
- }
- }
- }
+ this._render_http_requests_tile(grid, safe(stats, "requests"), safe(stats, "store.badrequestcount") || 0);
// Store Operations tile
{
@@ -268,17 +231,6 @@ export class Page extends ZenPage
}
}
- _metric(parent, value, label, hero = false)
- {
- const m = parent.tag().classify("tile-metric");
- if (hero)
- {
- m.classify("tile-metric-hero");
- }
- m.tag().classify("metric-value").text(value);
- m.tag().classify("metric-label").text(label);
- }
-
async view_project(project_id)
{
// Toggle off if already selected
diff --git a/src/zenserver/frontend/html/pages/start.js b/src/zenserver/frontend/html/pages/start.js
index df70ea2f4..e5b4d14f1 100644
--- a/src/zenserver/frontend/html/pages/start.js
+++ b/src/zenserver/frontend/html/pages/start.js
@@ -36,6 +36,15 @@ export class Page extends ZenPage
all_stats[provider] = await new Fetcher().resource("stats", provider).json();
}));
+ this._http_panel = section.tag().classify("card").classify("stats-tile").classify("stats-http-panel");
+ this._http_panel.inner().addEventListener("click", () => { window.location = "?page=metrics"; });
+ this._http_panel.tag().classify("http-title").text("HTTP");
+ const req_section = this._http_panel.tag().classify("http-section");
+ req_section.tag().classify("http-section-label").text("Requests");
+ this._http_req_metrics = req_section.tag().classify("tile-metrics");
+ const ws_section = this._http_panel.tag().classify("http-section");
+ ws_section.tag().classify("http-section-label").text("Websockets");
+ this._http_ws_metrics = ws_section.tag().classify("tile-metrics");
this._stats_grid = section.tag().classify("grid").classify("stats-tiles");
this._safe_lookup = safe_lookup;
this._render_stats(all_stats);
@@ -113,7 +122,6 @@ export class Page extends ZenPage
);
var cell = row.get_cell(0);
cell.tag().text(namespace).on_click(() => this.view_zcache(namespace));
- row.get_cell(1).tag().text(namespace);
cell = row.get_cell(-1);
const action_tb = new Toolbar(cell, true);
@@ -143,44 +151,43 @@ export class Page extends ZenPage
const grid = this._stats_grid;
const safe_lookup = this._safe_lookup;
- // Clear existing tiles
+ // Clear and repopulate service tiles grid
grid.inner().innerHTML = "";
- // HTTP tile — aggregate request stats across all providers
- {
- const tile = grid.tag().classify("card").classify("stats-tile");
- tile.tag().classify("card-title").text("HTTP");
- const columns = tile.tag().classify("tile-columns");
-
- // Left column: request stats
- const left = columns.tag().classify("tile-metrics");
-
- let total_requests = 0;
- let total_rate = 0;
- for (const p in all_stats)
- {
- total_requests += (safe_lookup(all_stats[p], "requests.count") || 0);
- total_rate += (safe_lookup(all_stats[p], "requests.rate_1") || 0);
- }
+ // HTTP panel — update metrics containers built once in main()
+ const left = this._http_req_metrics;
+ left.inner().innerHTML = "";
- this._add_tile_metric(left, Friendly.sep(total_requests), "total requests", true);
- if (total_rate > 0)
- this._add_tile_metric(left, Friendly.sep(total_rate, 1) + "/s", "req/sec (1m)");
+ let total_requests = 0;
+ let total_rate = 0;
+ for (const p in all_stats)
+ {
+ total_requests += (safe_lookup(all_stats[p], "requests.count") || 0);
+ total_rate += (safe_lookup(all_stats[p], "requests.rate_1") || 0);
+ }
- // Right column: websocket stats
- const ws = all_stats["http"] ? (all_stats["http"]["websockets"] || {}) : {};
- const right = columns.tag().classify("tile-metrics");
+ this._add_tile_metric(left, Friendly.sep(total_requests), "total requests", true);
+ if (total_rate > 0)
+ {
+ this._add_tile_metric(left, Friendly.sep(total_rate, 1) + "/s", "req/sec (1m)");
+ }
- this._add_tile_metric(right, Friendly.sep(ws.active_connections || 0), "ws connections", true);
- const ws_frames = (ws.frames_received || 0) + (ws.frames_sent || 0);
- if (ws_frames > 0)
- this._add_tile_metric(right, Friendly.sep(ws_frames), "ws frames");
- const ws_bytes = (ws.bytes_received || 0) + (ws.bytes_sent || 0);
- if (ws_bytes > 0)
- this._add_tile_metric(right, Friendly.bytes(ws_bytes), "ws traffic");
+ const right = this._http_ws_metrics;
+ right.inner().innerHTML = "";
- tile.on_click(() => { window.location = "?page=metrics"; });
+ const ws = all_stats["http"] ? (all_stats["http"]["websockets"] || {}) : {};
+ this._add_tile_metric(right, Friendly.sep(ws.active_connections || 0), "ws connections", true);
+ const ws_frames = (ws.frames_received || 0) + (ws.frames_sent || 0);
+ if (ws_frames > 0)
+ {
+ this._add_tile_metric(right, Friendly.sep(ws_frames), "ws frames");
}
+ const ws_bytes = (ws.bytes_received || 0) + (ws.bytes_sent || 0);
+ if (ws_bytes > 0)
+ {
+ this._add_tile_metric(right, Friendly.bytes(ws_bytes), "ws traffic");
+ }
+
// Cache tile (z$)
if (all_stats["z$"])
@@ -198,7 +205,7 @@ export class Page extends ZenPage
this._add_tile_metric(body, safe_lookup(s, "cache.size.disk", Friendly.bytes) || "-", "disk");
this._add_tile_metric(body, safe_lookup(s, "cache.size.memory", Friendly.bytes) || "-", "memory");
- tile.on_click(() => { window.location = "?page=stat&provider=z$"; });
+ tile.inner().addEventListener("click", () => { window.location = "?page=stat&provider=z$"; });
}
// Project Store tile (prj)
@@ -210,9 +217,9 @@ export class Page extends ZenPage
const body = tile.tag().classify("tile-metrics");
this._add_tile_metric(body, safe_lookup(s, "requests.count", Friendly.sep) || "-", "requests", true);
- this._add_tile_metric(body, safe_lookup(s, "store.size.disk", Friendly.bytes) || "-", "disk");
+ this._add_tile_metric(body, safe_lookup(s, "project_count", Friendly.sep) || "-", "projects");
- tile.on_click(() => { window.location = "?page=stat&provider=prj"; });
+ tile.inner().addEventListener("click", () => { window.location = "?page=stat&provider=prj"; });
}
// Build Store tile (builds)
@@ -226,7 +233,7 @@ export class Page extends ZenPage
this._add_tile_metric(body, safe_lookup(s, "requests.count", Friendly.sep) || "-", "requests", true);
this._add_tile_metric(body, safe_lookup(s, "store.size.disk", Friendly.bytes) || "-", "disk");
- tile.on_click(() => { window.location = "?page=stat&provider=builds"; });
+ tile.inner().addEventListener("click", () => { window.location = "?page=builds"; });
}
// Proxy tile
@@ -250,7 +257,37 @@ export class Page extends ZenPage
this._add_tile_metric(body, Friendly.sep(mappings.length), "mappings");
this._add_tile_metric(body, Friendly.bytes(totalBytes), "traffic");
- tile.on_click(() => { window.location = "?page=proxy"; });
+ tile.inner().addEventListener("click", () => { window.location = "?page=proxy"; });
+ }
+
+ // Hub tile
+ if (all_stats["hub"])
+ {
+ const s = all_stats["hub"];
+ const tile = grid.tag().classify("card").classify("stats-tile");
+ tile.tag().classify("card-title").text("Hub");
+ const body = tile.tag().classify("tile-metrics");
+
+ const current = safe_lookup(s, "currentInstanceCount") || 0;
+ const limit = safe_lookup(s, "instanceLimit") || safe_lookup(s, "maxInstanceCount") || 0;
+ this._add_tile_metric(body, `${current} / ${limit}`, "instances", true);
+ this._add_tile_metric(body, safe_lookup(s, "requests.count", Friendly.sep) || "-", "requests");
+
+ tile.inner().addEventListener("click", () => { window.location = "?page=stat&provider=hub"; });
+ }
+
+ // Object Store tile (obj)
+ if (all_stats["obj"])
+ {
+ const s = all_stats["obj"];
+ const tile = grid.tag().classify("card").classify("stats-tile");
+ tile.tag().classify("card-title").text("Object Store");
+ const body = tile.tag().classify("tile-metrics");
+
+ this._add_tile_metric(body, safe_lookup(s, "requests.count", Friendly.sep) || "-", "requests", true);
+ this._add_tile_metric(body, safe_lookup(s, "total_bytes_served", Friendly.bytes) || "-", "bytes served");
+
+ tile.inner().addEventListener("click", () => { window.location = "?page=stat&provider=obj"; });
}
// Workspace tile (ws)
@@ -262,9 +299,9 @@ export class Page extends ZenPage
const body = tile.tag().classify("tile-metrics");
this._add_tile_metric(body, safe_lookup(s, "requests.count", Friendly.sep) || "-", "requests", true);
- this._add_tile_metric(body, safe_lookup(s, "workspaces.filescount", Friendly.sep) || "-", "files");
+ this._add_tile_metric(body, safe_lookup(s, "workspaces", Friendly.sep) || "-", "workspaces");
- tile.on_click(() => { window.location = "?page=stat&provider=ws"; });
+ tile.inner().addEventListener("click", () => { window.location = "?page=stat&provider=ws"; });
}
}
diff --git a/src/zenserver/frontend/html/pages/workspaces.js b/src/zenserver/frontend/html/pages/workspaces.js
new file mode 100644
index 000000000..d31fd7373
--- /dev/null
+++ b/src/zenserver/frontend/html/pages/workspaces.js
@@ -0,0 +1,236 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+"use strict";
+
+import { ZenPage } from "./page.js"
+import { Fetcher } from "../util/fetcher.js"
+
+////////////////////////////////////////////////////////////////////////////////
+export class Page extends ZenPage
+{
+ async main()
+ {
+ this.set_title("workspaces");
+
+ // Workspace Service Stats
+ const stats_section = this.add_section("Workspace Service Stats");
+ this._stats_grid = stats_section.tag().classify("grid").classify("stats-tiles");
+
+ const stats = await new Fetcher().resource("stats", "ws").json().catch(() => null);
+ if (stats) { this._render_stats(stats); }
+
+ this.connect_stats_ws((all_stats) => {
+ const s = all_stats["ws"];
+ if (s) { this._render_stats(s); }
+ });
+
+ const section = this.add_section("Workspaces");
+ const host = section.tag();
+
+ // Toolbar: refresh button
+ const toolbar = host.tag().classify("module-bulk-bar");
+ this._btn_refresh = toolbar.tag("button").classify("module-bulk-btn").inner();
+ this._btn_refresh.textContent = "\u21BB Refresh";
+ this._btn_refresh.addEventListener("click", () => this._do_refresh());
+
+ // Workspace table (raw DOM — in-place row updates require stable element refs)
+ const table = document.createElement("table");
+ table.className = "module-table";
+ const thead = document.createElement("thead");
+ const hrow = document.createElement("tr");
+ for (const label of ["WORKSPACE ID", "ROOT PATH"])
+ {
+ const th = document.createElement("th");
+ th.textContent = label;
+ hrow.appendChild(th);
+ }
+ thead.appendChild(hrow);
+ table.appendChild(thead);
+ this._tbody = document.createElement("tbody");
+ table.appendChild(this._tbody);
+ host.inner().appendChild(table);
+
+ // State
+ this._expanded = new Set(); // workspace ids with shares panel open
+ this._row_cache = new Map(); // workspace id -> row refs, for in-place DOM updates
+ this._loading = false;
+
+ await this._load();
+ }
+
+ async _load()
+ {
+ if (this._loading) { return; }
+ this._loading = true;
+ this._btn_refresh.disabled = true;
+ try
+ {
+ const data = await new Fetcher().resource("/ws/").json();
+ const workspaces = data.workspaces || [];
+ this._render(workspaces);
+ }
+ catch (e) { /* service unavailable */ }
+ finally
+ {
+ this._loading = false;
+ this._btn_refresh.disabled = false;
+ }
+ }
+
+ async _do_refresh()
+ {
+ if (this._loading) { return; }
+ this._btn_refresh.disabled = true;
+ try
+ {
+ await new Fetcher().resource("/ws/refresh").text();
+ }
+ catch (e) { /* ignore */ }
+ await this._load();
+ }
+
+ _render(workspaces)
+ {
+ const ws_map = new Map(workspaces.map(w => [w.id, w]));
+
+ // Remove rows for workspaces no longer present
+ for (const [id, row] of this._row_cache)
+ {
+ if (!ws_map.has(id))
+ {
+ row.tr.remove();
+ row.detail_tr.remove();
+ this._row_cache.delete(id);
+ this._expanded.delete(id);
+ }
+ }
+
+ // Create or update rows, then reorder tbody to match response order.
+ // appendChild on an existing node moves it, so iterating in response order
+ // achieves correct ordering without touching rows already in the right position.
+ for (const ws of workspaces)
+ {
+ const id = ws.id || "";
+ const shares = ws.shares || [];
+
+ let row = this._row_cache.get(id);
+ if (row)
+ {
+ // Update in-place — preserves DOM node identity so expanded state is kept
+ row.root_path_node.nodeValue = ws.root_path || "";
+ row.detail_tr.style.display = this._expanded.has(id) ? "" : "none";
+ row.btn_expand.textContent = this._expanded.has(id) ? "\u25BE" : "\u25B8";
+ const shares_json = JSON.stringify(shares);
+ if (shares_json !== row.shares_json)
+ {
+ row.shares_json = shares_json;
+ this._render_shares(row.sh_tbody, shares);
+ }
+ }
+ else
+ {
+ // Create new workspace row
+ const tr = document.createElement("tr");
+ const detail_tr = document.createElement("tr");
+ detail_tr.className = "module-metrics-row";
+ detail_tr.style.display = this._expanded.has(id) ? "" : "none";
+
+ const btn_expand = document.createElement("button");
+ btn_expand.className = "module-expand-btn";
+ btn_expand.textContent = this._expanded.has(id) ? "\u25BE" : "\u25B8";
+ btn_expand.addEventListener("click", () => {
+ if (this._expanded.has(id))
+ {
+ this._expanded.delete(id);
+ detail_tr.style.display = "none";
+ btn_expand.textContent = "\u25B8";
+ }
+ else
+ {
+ this._expanded.add(id);
+ detail_tr.style.display = "";
+ btn_expand.textContent = "\u25BE";
+ }
+ });
+
+ const id_wrap = document.createElement("span");
+ id_wrap.className = "ws-id-wrap";
+ id_wrap.appendChild(btn_expand);
+ id_wrap.appendChild(document.createTextNode("\u00A0" + id));
+ const td_id = document.createElement("td");
+ td_id.appendChild(id_wrap);
+ tr.appendChild(td_id);
+
+ const root_path_node = document.createTextNode(ws.root_path || "");
+ const td_root = document.createElement("td");
+ td_root.appendChild(root_path_node);
+ tr.appendChild(td_root);
+
+ // Detail row: nested shares table
+ const sh_table = document.createElement("table");
+ sh_table.className = "module-table ws-share-table";
+ const sh_thead = document.createElement("thead");
+ const sh_hrow = document.createElement("tr");
+ for (const label of ["SHARE ID", "SHARE PATH", "ALIAS"])
+ {
+ const th = document.createElement("th");
+ th.textContent = label;
+ sh_hrow.appendChild(th);
+ }
+ sh_thead.appendChild(sh_hrow);
+ sh_table.appendChild(sh_thead);
+ const sh_tbody = document.createElement("tbody");
+ sh_table.appendChild(sh_tbody);
+ const detail_td = document.createElement("td");
+ detail_td.colSpan = 2;
+ detail_td.className = "ws-detail-cell";
+ detail_td.appendChild(sh_table);
+ detail_tr.appendChild(detail_td);
+
+ this._render_shares(sh_tbody, shares);
+
+ row = { tr, detail_tr, root_path_node, sh_tbody, btn_expand, shares_json: JSON.stringify(shares) };
+ this._row_cache.set(id, row);
+ }
+
+ this._tbody.appendChild(row.tr);
+ this._tbody.appendChild(row.detail_tr);
+ }
+ }
+
+ _render_stats(stats)
+ {
+ const grid = this._stats_grid;
+ grid.inner().innerHTML = "";
+
+ // HTTP Requests tile
+ this._render_http_requests_tile(grid, stats.requests);
+ }
+
+ _render_shares(sh_tbody, shares)
+ {
+ sh_tbody.innerHTML = "";
+ if (shares.length === 0)
+ {
+ const tr = document.createElement("tr");
+ const td = document.createElement("td");
+ td.colSpan = 3;
+ td.className = "ws-no-shares-cell";
+ td.textContent = "No shares";
+ tr.appendChild(td);
+ sh_tbody.appendChild(tr);
+ return;
+ }
+ for (const share of shares)
+ {
+ const tr = document.createElement("tr");
+ for (const text of [share.id || "", share.share_path || "", share.alias || ""])
+ {
+ const td = document.createElement("td");
+ td.textContent = text;
+ tr.appendChild(td);
+ }
+ sh_tbody.appendChild(tr);
+ }
+ }
+}
diff --git a/src/zenserver/frontend/html/zen.css b/src/zenserver/frontend/html/zen.css
index b4f7270fc..d9f7491ea 100644
--- a/src/zenserver/frontend/html/zen.css
+++ b/src/zenserver/frontend/html/zen.css
@@ -803,18 +803,17 @@ zen-banner + zen-nav::part(nav-bar) {
/* stats tiles -------------------------------------------------------------- */
-.stats-tiles {
- grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
+.grid.stats-tiles {
+ grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
}
.stats-tile {
cursor: pointer;
- transition: border-color 0.15s, background 0.15s;
+ transition: border-color 0.15s;
}
.stats-tile:hover {
border-color: var(--theme_p0);
- background: var(--theme_p4);
}
.stats-tile-detailed {
@@ -873,6 +872,81 @@ zen-banner + zen-nav::part(nav-bar) {
font-size: 28px;
}
+/* HTTP summary panel ------------------------------------------------------- */
+
+.stats-http-panel {
+ display: grid;
+ grid-template-columns: 20% 1fr 1fr;
+ align-items: center;
+ margin-bottom: 16px;
+}
+
+.http-title {
+ font-size: 22px;
+ font-weight: 700;
+ color: var(--theme_bright);
+ text-transform: uppercase;
+ letter-spacing: 1px;
+ line-height: 1;
+}
+
+.http-section {
+ display: flex;
+ flex-direction: column;
+ gap: 8px;
+ padding: 0 24px;
+ border-left: 1px solid var(--theme_g2);
+}
+
+.http-section-label {
+ font-size: 11px;
+ font-weight: 600;
+ color: var(--theme_g1);
+ text-transform: uppercase;
+ letter-spacing: 0.5px;
+}
+
+.stats-http-panel .tile-metrics {
+ flex-direction: row;
+ align-items: center;
+ gap: 20px;
+}
+
+/* workspaces page ---------------------------------------------------------- */
+
+.ws-id-wrap {
+ display: inline-flex;
+ align-items: center;
+ font-family: 'SF Mono', 'Cascadia Mono', Consolas, 'DejaVu Sans Mono', monospace;
+ font-size: 14px;
+}
+
+.ws-share-table {
+ width: 100%;
+ margin: 4px 0;
+}
+
+.ws-share-table th {
+ padding: 4px;
+}
+
+.ws-share-table td {
+ font-family: 'SF Mono', 'Cascadia Mono', Consolas, 'DejaVu Sans Mono', monospace;
+ font-size: 13px;
+ padding: 4px;
+}
+
+.ws-share-table td.ws-no-shares-cell {
+ color: var(--theme_g1);
+ font-style: italic;
+ font-family: inherit;
+ padding: 4px 8px;
+}
+
+.module-metrics-row td.ws-detail-cell {
+ padding-left: 24px;
+}
+
/* start -------------------------------------------------------------------- */
#start {
@@ -1030,7 +1104,7 @@ html:has(#map) {
.card-title {
font-size: 14px;
font-weight: 600;
- color: var(--theme_g1);
+ color: var(--theme_g0);
margin-bottom: 12px;
text-transform: uppercase;
letter-spacing: 0.5px;
diff --git a/src/zenserver/hub/httphubservice.cpp b/src/zenserver/hub/httphubservice.cpp
index 34f4294e4..ebefcf2e3 100644
--- a/src/zenserver/hub/httphubservice.cpp
+++ b/src/zenserver/hub/httphubservice.cpp
@@ -8,10 +8,45 @@
#include <zencore/compactbinarybuilder.h>
#include <zencore/fmtutils.h>
#include <zencore/logging.h>
+#include <zenhttp/httpstats.h>
namespace zen {
-HttpHubService::HttpHubService(Hub& Hub) : m_Hub(Hub)
+namespace {
+ bool HandleFailureResults(HttpServerRequest& Request, const Hub::Response& Resp)
+ {
+ if (Resp.ResponseCode == Hub::EResponseCode::Rejected)
+ {
+ if (Resp.Message.empty())
+ {
+ Request.WriteResponse(HttpResponseCode::Conflict);
+ }
+ else
+ {
+ Request.WriteResponse(HttpResponseCode::Conflict, HttpContentType::kText, Resp.Message);
+ }
+ return true;
+ }
+ if (Resp.ResponseCode == Hub::EResponseCode::NotFound)
+ {
+ if (Resp.Message.empty())
+ {
+ Request.WriteResponse(HttpResponseCode::NotFound);
+ }
+ else
+ {
+ Request.WriteResponse(HttpResponseCode::NotFound, HttpContentType::kText, Resp.Message);
+ }
+ return true;
+ }
+ return false;
+ }
+} // namespace
+
+HttpHubService::HttpHubService(Hub& Hub, HttpStatsService& StatsService, HttpStatusService& StatusService)
+: m_Hub(Hub)
+, m_StatsService(StatsService)
+, m_StatusService(StatusService)
{
using namespace std::literals;
@@ -83,144 +118,113 @@ HttpHubService::HttpHubService(Hub& Hub) : m_Hub(Hub)
[this](HttpRouterRequest& Req) {
std::string_view ModuleId = Req.GetCapture(1);
- std::string FailureReason = "unknown";
- HttpResponseCode ResponseCode = HttpResponseCode::OK;
-
try
{
HubProvisionedInstanceInfo Info;
- if (m_Hub.Provision(ModuleId, /* out */ Info, /* out */ FailureReason))
- {
- CbObjectWriter Obj;
- Obj << "moduleId" << ModuleId;
- Obj << "baseUri" << Info.BaseUri;
- Obj << "port" << Info.Port;
- Req.ServerRequest().WriteResponse(HttpResponseCode::OK, Obj.Save());
+ Hub::Response Resp = m_Hub.Provision(ModuleId, Info);
- return;
- }
- else
+ if (HandleFailureResults(Req.ServerRequest(), Resp))
{
- ResponseCode = HttpResponseCode::BadRequest;
+ return;
}
+
+ const HttpResponseCode HttpCode =
+ (Resp.ResponseCode == Hub::EResponseCode::Accepted) ? HttpResponseCode::Accepted : HttpResponseCode::OK;
+ CbObjectWriter Obj;
+ Obj << "moduleId" << ModuleId;
+ Obj << "baseUri" << Info.BaseUri;
+ Obj << "port" << Info.Port;
+ return Req.ServerRequest().WriteResponse(HttpCode, Obj.Save());
}
catch (const std::exception& Ex)
{
ZEN_ERROR("Exception while provisioning module '{}': {}", ModuleId, Ex.what());
-
- FailureReason = Ex.what();
- ResponseCode = HttpResponseCode::InternalServerError;
+ throw;
}
-
- Req.ServerRequest().WriteResponse(ResponseCode, HttpContentType::kText, FailureReason);
},
HttpVerb::kPost);
m_Router.RegisterRoute(
"modules/{moduleid}/deprovision",
[this](HttpRouterRequest& Req) {
- std::string_view ModuleId = Req.GetCapture(1);
- std::string FailureReason = "unknown";
+ std::string_view ModuleId = Req.GetCapture(1);
try
{
- if (!m_Hub.Deprovision(std::string(ModuleId), /* out */ FailureReason))
+ Hub::Response Resp = m_Hub.Deprovision(std::string(ModuleId));
+
+ if (HandleFailureResults(Req.ServerRequest(), Resp))
{
- if (FailureReason.empty())
- {
- return Req.ServerRequest().WriteResponse(HttpResponseCode::NotFound);
- }
- else
- {
- return Req.ServerRequest().WriteResponse(HttpResponseCode::BadRequest, HttpContentType::kText, FailureReason);
- }
+ return;
}
+ const HttpResponseCode HttpCode =
+ (Resp.ResponseCode == Hub::EResponseCode::Accepted) ? HttpResponseCode::Accepted : HttpResponseCode::OK;
CbObjectWriter Obj;
Obj << "moduleId" << ModuleId;
-
- return Req.ServerRequest().WriteResponse(HttpResponseCode::OK, Obj.Save());
+ return Req.ServerRequest().WriteResponse(HttpCode, Obj.Save());
}
catch (const std::exception& Ex)
{
ZEN_ERROR("Exception while deprovisioning module '{}': {}", ModuleId, Ex.what());
-
- FailureReason = Ex.what();
+ throw;
}
-
- Req.ServerRequest().WriteResponse(HttpResponseCode::InternalServerError, HttpContentType::kText, FailureReason);
},
HttpVerb::kPost);
m_Router.RegisterRoute(
"modules/{moduleid}/hibernate",
[this](HttpRouterRequest& Req) {
- std::string_view ModuleId = Req.GetCapture(1);
- std::string FailureReason = "unknown";
+ std::string_view ModuleId = Req.GetCapture(1);
try
{
- if (!m_Hub.Hibernate(std::string(ModuleId), /* out */ FailureReason))
+ Hub::Response Resp = m_Hub.Hibernate(std::string(ModuleId));
+
+ if (HandleFailureResults(Req.ServerRequest(), Resp))
{
- if (FailureReason.empty())
- {
- return Req.ServerRequest().WriteResponse(HttpResponseCode::NotFound);
- }
- else
- {
- return Req.ServerRequest().WriteResponse(HttpResponseCode::BadRequest, HttpContentType::kText, FailureReason);
- }
+ return;
}
+ const HttpResponseCode HttpCode =
+ (Resp.ResponseCode == Hub::EResponseCode::Accepted) ? HttpResponseCode::Accepted : HttpResponseCode::OK;
CbObjectWriter Obj;
Obj << "moduleId" << ModuleId;
-
- return Req.ServerRequest().WriteResponse(HttpResponseCode::OK, Obj.Save());
+ return Req.ServerRequest().WriteResponse(HttpCode, Obj.Save());
}
catch (const std::exception& Ex)
{
ZEN_ERROR("Exception while hibernating module '{}': {}", ModuleId, Ex.what());
-
- FailureReason = Ex.what();
+ throw;
}
-
- Req.ServerRequest().WriteResponse(HttpResponseCode::InternalServerError, HttpContentType::kText, FailureReason);
},
HttpVerb::kPost);
m_Router.RegisterRoute(
"modules/{moduleid}/wake",
[this](HttpRouterRequest& Req) {
- std::string_view ModuleId = Req.GetCapture(1);
- std::string FailureReason = "unknown";
+ std::string_view ModuleId = Req.GetCapture(1);
try
{
- if (!m_Hub.Wake(std::string(ModuleId), /* out */ FailureReason))
+ Hub::Response Resp = m_Hub.Wake(std::string(ModuleId));
+
+ if (HandleFailureResults(Req.ServerRequest(), Resp))
{
- if (FailureReason.empty())
- {
- return Req.ServerRequest().WriteResponse(HttpResponseCode::NotFound);
- }
- else
- {
- return Req.ServerRequest().WriteResponse(HttpResponseCode::BadRequest, HttpContentType::kText, FailureReason);
- }
+ return;
}
+ const HttpResponseCode HttpCode =
+ (Resp.ResponseCode == Hub::EResponseCode::Accepted) ? HttpResponseCode::Accepted : HttpResponseCode::OK;
CbObjectWriter Obj;
Obj << "moduleId" << ModuleId;
-
- return Req.ServerRequest().WriteResponse(HttpResponseCode::OK, Obj.Save());
+ return Req.ServerRequest().WriteResponse(HttpCode, Obj.Save());
}
catch (const std::exception& Ex)
{
ZEN_ERROR("Exception while waking module '{}': {}", ModuleId, Ex.what());
-
- FailureReason = Ex.what();
+ throw;
}
-
- Req.ServerRequest().WriteResponse(HttpResponseCode::InternalServerError, HttpContentType::kText, FailureReason);
},
HttpVerb::kPost);
@@ -234,10 +238,15 @@ HttpHubService::HttpHubService(Hub& Hub) : m_Hub(Hub)
Req.ServerRequest().WriteResponse(HttpResponseCode::OK, Obj.Save());
},
HttpVerb::kGet);
+
+ m_StatsService.RegisterHandler("hub", *this);
+ m_StatusService.RegisterHandler("hub", *this);
}
HttpHubService::~HttpHubService()
{
+ m_StatusService.UnregisterHandler("hub", *this);
+ m_StatsService.UnregisterHandler("hub", *this);
}
const char*
@@ -254,9 +263,50 @@ HttpHubService::SetNotificationEndpoint(std::string_view UpstreamNotificationEnd
}
void
-HttpHubService::HandleRequest(zen::HttpServerRequest& Request)
+HttpHubService::HandleRequest(HttpServerRequest& Request)
+{
+ using namespace std::literals;
+
+ metrics::OperationTiming::Scope $(m_HttpRequests);
+ if (m_Router.HandleRequest(Request) == false)
+ {
+ ZEN_WARN("No route found for {0}", Request.RelativeUri());
+ return Request.WriteResponse(HttpResponseCode::NotFound, HttpContentType::kText, "Not found"sv);
+ }
+}
+
+void
+HttpHubService::HandleStatusRequest(HttpServerRequest& Request)
{
- m_Router.HandleRequest(Request);
+ CbObjectWriter Cbo;
+ Cbo << "ok" << true;
+ Request.WriteResponse(HttpResponseCode::OK, Cbo.Save());
+}
+
+void
+HttpHubService::HandleStatsRequest(HttpServerRequest& Request)
+{
+ Request.WriteResponse(HttpResponseCode::OK, CollectStats());
+}
+
+CbObject
+HttpHubService::CollectStats()
+{
+ CbObjectWriter Cbo;
+
+ EmitSnapshot("requests", m_HttpRequests, Cbo);
+
+ Cbo << "currentInstanceCount" << m_Hub.GetInstanceCount();
+ Cbo << "maxInstanceCount" << m_Hub.GetMaxInstanceCount();
+ Cbo << "instanceLimit" << m_Hub.GetConfig().InstanceLimit;
+
+ return Cbo.Save();
+}
+
+uint64_t
+HttpHubService::GetActivityCounter()
+{
+ return m_HttpRequests.Count();
}
void
@@ -288,27 +338,27 @@ HttpHubService::HandleModuleDelete(HttpServerRequest& Request, std::string_view
if (InstanceInfo.State == HubInstanceState::Provisioned || InstanceInfo.State == HubInstanceState::Hibernated ||
InstanceInfo.State == HubInstanceState::Crashed)
{
- std::string FailureReason;
try
{
- if (!m_Hub.Deprovision(std::string(ModuleId), FailureReason))
+ Hub::Response Resp = m_Hub.Deprovision(std::string(ModuleId));
+
+ if (HandleFailureResults(Request, Resp))
{
- if (FailureReason.empty())
- {
- Request.WriteResponse(HttpResponseCode::NotFound);
- }
- else
- {
- Request.WriteResponse(HttpResponseCode::BadRequest, HttpContentType::kText, FailureReason);
- }
return;
}
+
+ // TODO: nuke all related storage
+
+ const HttpResponseCode HttpCode =
+ (Resp.ResponseCode == Hub::EResponseCode::Accepted) ? HttpResponseCode::Accepted : HttpResponseCode::OK;
+ CbObjectWriter Obj;
+ Obj << "moduleId" << ModuleId;
+ return Request.WriteResponse(HttpCode, Obj.Save());
}
catch (const std::exception& Ex)
{
ZEN_ERROR("Exception while deprovisioning module '{}': {}", ModuleId, Ex.what());
- Request.WriteResponse(HttpResponseCode::InternalServerError, HttpContentType::kText, Ex.what());
- return;
+ throw;
}
}
@@ -316,7 +366,6 @@ HttpHubService::HandleModuleDelete(HttpServerRequest& Request, std::string_view
CbObjectWriter Obj;
Obj << "moduleId" << ModuleId;
- Obj << "state" << ToString(InstanceInfo.State);
Request.WriteResponse(HttpResponseCode::OK, Obj.Save());
}
diff --git a/src/zenserver/hub/httphubservice.h b/src/zenserver/hub/httphubservice.h
index d08eeea2a..1bb1c303e 100644
--- a/src/zenserver/hub/httphubservice.h
+++ b/src/zenserver/hub/httphubservice.h
@@ -3,9 +3,11 @@
#pragma once
#include <zenhttp/httpserver.h>
+#include <zenhttp/httpstatus.h>
namespace zen {
+class HttpStatsService;
class Hub;
/** ZenServer Hub Service
@@ -14,25 +16,33 @@ class Hub;
* use in UEFN content worker style scenarios.
*
*/
-class HttpHubService : public zen::HttpService
+class HttpHubService : public HttpService, public IHttpStatusProvider, public IHttpStatsProvider
{
public:
- HttpHubService(Hub& Hub);
+ HttpHubService(Hub& Hub, HttpStatsService& StatsService, HttpStatusService& StatusService);
~HttpHubService();
HttpHubService(const HttpHubService&) = delete;
HttpHubService& operator=(const HttpHubService&) = delete;
virtual const char* BaseUri() const override;
- virtual void HandleRequest(zen::HttpServerRequest& Request) override;
+ virtual void HandleRequest(HttpServerRequest& Request) override;
+ virtual void HandleStatusRequest(HttpServerRequest& Request) override;
+ virtual void HandleStatsRequest(HttpServerRequest& Request) override;
+ virtual CbObject CollectStats() override;
+ virtual uint64_t GetActivityCounter() override;
void SetNotificationEndpoint(std::string_view UpstreamNotificationEndpoint, std::string_view InstanceId);
private:
- HttpRequestRouter m_Router;
-
Hub& m_Hub;
+ HttpRequestRouter m_Router;
+ metrics::OperationTiming m_HttpRequests;
+
+ HttpStatsService& m_StatsService;
+ HttpStatusService& m_StatusService;
+
void HandleModuleGet(HttpServerRequest& Request, std::string_view ModuleId);
void HandleModuleDelete(HttpServerRequest& Request, std::string_view ModuleId);
};
diff --git a/src/zenserver/hub/hub.cpp b/src/zenserver/hub/hub.cpp
index 6a2609443..6c44e2333 100644
--- a/src/zenserver/hub/hub.cpp
+++ b/src/zenserver/hub/hub.cpp
@@ -10,6 +10,8 @@
#include <zencore/logging.h>
#include <zencore/scopeguard.h>
#include <zencore/timer.h>
+#include <zencore/workthreadpool.h>
+#include <zenhttp/httpclient.h>
ZEN_THIRD_PARTY_INCLUDES_START
#include <EASTL/fixed_vector.h>
@@ -20,8 +22,6 @@ ZEN_THIRD_PARTY_INCLUDES_END
# include <zencore/filesystem.h>
# include <zencore/testing.h>
# include <zencore/testutils.h>
-# include <zencore/workthreadpool.h>
-# include <zenhttp/httpclient.h>
#endif
#include <numeric>
@@ -122,10 +122,17 @@ private:
//////////////////////////////////////////////////////////////////////////
-Hub::Hub(const Configuration& Config, ZenServerEnvironment&& RunEnvironment, AsyncModuleStateChangeCallbackFunc&& ModuleStateChangeCallback)
+Hub::Hub(const Configuration& Config,
+ ZenServerEnvironment&& RunEnvironment,
+ WorkerThreadPool* OptionalWorkerPool,
+ AsyncModuleStateChangeCallbackFunc&& ModuleStateChangeCallback)
: m_Config(Config)
, m_RunEnvironment(std::move(RunEnvironment))
+, m_WorkerPool(OptionalWorkerPool)
+, m_BackgroundWorkLatch(1)
, m_ModuleStateChangeCallback(std::move(ModuleStateChangeCallback))
+, m_ActiveInstances(Config.InstanceLimit)
+, m_FreeActiveInstanceIndexes(Config.InstanceLimit)
{
m_HostMetrics = GetSystemMetrics();
m_ResourceLimits.DiskUsageBytes = 1000ull * 1024 * 1024 * 1024;
@@ -148,10 +155,7 @@ Hub::Hub(const Configuration& Config, ZenServerEnvironment&& RunEnvironment, Asy
ZEN_ASSERT(uint64_t(Config.BasePortNumber) + Config.InstanceLimit <= std::numeric_limits<uint16_t>::max());
m_InstanceLookup.reserve(Config.InstanceLimit);
- m_ActiveInstances.reserve(Config.InstanceLimit);
-
- m_FreePorts.resize(Config.InstanceLimit);
- std::iota(m_FreePorts.begin(), m_FreePorts.end(), Config.BasePortNumber);
+ std::iota(m_FreeActiveInstanceIndexes.begin(), m_FreeActiveInstanceIndexes.end(), 0);
#if ZEN_PLATFORM_WINDOWS
if (m_Config.UseJobObject)
@@ -175,7 +179,10 @@ Hub::~Hub()
try
{
// Safety call - should normally be properly Shutdown by owner
- Shutdown();
+ if (!m_ShutdownFlag.load())
+ {
+ Shutdown();
+ }
}
catch (const std::exception& e)
{
@@ -196,54 +203,53 @@ Hub::Shutdown()
m_WatchDog = {};
- m_Lock.WithExclusiveLock([this] {
- for (auto& [ModuleId, ActiveInstanceIndex] : m_InstanceLookup)
- {
- std::unique_ptr<StorageServerInstance>& InstanceRaw = m_ActiveInstances[ActiveInstanceIndex];
- {
- StorageServerInstance::ExclusiveLockedPtr Instance(InstanceRaw->LockExclusive(/*Wait*/ true));
+ bool Expected = false;
+ bool WaitForBackgroundWork = m_ShutdownFlag.compare_exchange_strong(Expected, true);
+ if (WaitForBackgroundWork && m_WorkerPool)
+ {
+ m_BackgroundWorkLatch.CountDown();
+ m_BackgroundWorkLatch.Wait();
+ // Shutdown flag is set and all background work is drained, safe to shut down remaining instances
- uint16_t BasePort = Instance.GetBasePort();
- std::string BaseUri; // TODO?
- HubInstanceState OldState = Instance.GetState();
- HubInstanceState NewState = OldState;
- InstanceStateUpdateGuard StateGuard(*this, ModuleId, OldState, NewState, BasePort, BaseUri);
+ m_BackgroundWorkLatch.Reset(1);
+ }
- try
- {
- (void)Instance.Deprovision();
- }
- catch (const std::exception& Ex)
- {
- ZEN_WARN("Failed to deprovision instance for module '{}' during hub shutdown: {}", ModuleId, Ex.what());
- }
- // Instance is being destroyed; always report Unprovisioned so callbacks (e.g. Consul) fire.
- NewState = HubInstanceState::Unprovisioned;
- Instance = {};
+ EnumerateModules([&](std::string_view ModuleId, const InstanceInfo& Info) {
+ ZEN_UNUSED(Info);
+ try
+ {
+ const Response DepResp = InternalDeprovision(std::string(ModuleId), [](ActiveInstance& Instance) {
+ ZEN_UNUSED(Instance);
+ return true;
+ });
+ if (DepResp.ResponseCode != EResponseCode::Completed && DepResp.ResponseCode != EResponseCode::Accepted)
+ {
+ ZEN_WARN("Deprovision instance for module '{}' during hub shutdown rejected: {}", ModuleId, DepResp.Message);
}
- InstanceRaw.reset();
}
- m_InstanceLookup.clear();
- m_ActiveInstances.clear();
- m_FreeActiveInstanceIndexes.clear();
+ catch (const std::exception& Ex)
+ {
+ ZEN_WARN("Failed to deprovision instance for module '{}' during hub shutdown: {}", ModuleId, Ex.what());
+ }
});
+
+ if (WaitForBackgroundWork && m_WorkerPool)
+ {
+ m_BackgroundWorkLatch.CountDown();
+ m_BackgroundWorkLatch.Wait();
+ }
}
-bool
-Hub::Provision(std::string_view ModuleId, HubProvisionedInstanceInfo& OutInfo, std::string& OutReason)
+Hub::Response
+Hub::Provision(std::string_view ModuleId, HubProvisionedInstanceInfo& OutInfo)
{
+ ZEN_ASSERT(!m_ShutdownFlag.load());
StorageServerInstance::ExclusiveLockedPtr Instance;
- bool IsNewInstance = false;
- uint16_t AllocatedPort = 0;
+ bool IsNewInstance = false;
+ size_t ActiveInstanceIndex = (size_t)-1;
+ HubInstanceState OldState = HubInstanceState::Unprovisioned;
{
RwLock::ExclusiveLockScope _(m_Lock);
- auto RestoreAllocatedPort = MakeGuard([this, ModuleId, &IsNewInstance, &AllocatedPort]() {
- if (IsNewInstance && AllocatedPort != 0 && !m_InstanceLookup.contains(std::string(ModuleId)))
- {
- m_FreePorts.push_back(AllocatedPort);
- AllocatedPort = 0;
- }
- });
if (auto It = m_InstanceLookup.find(std::string(ModuleId)); It == m_InstanceLookup.end())
{
@@ -252,51 +258,53 @@ Hub::Provision(std::string_view ModuleId, HubProvisionedInstanceInfo& OutInfo, s
{
ZEN_WARN("Cannot provision new storage server instance for module '{}': {}", ModuleId, Reason);
- OutReason = Reason;
-
- return false;
+ return Response{EResponseCode::Rejected, Reason};
}
IsNewInstance = true;
- AllocatedPort = m_FreePorts.front();
- ZEN_ASSERT(AllocatedPort != 0);
- m_FreePorts.pop_front();
+ ActiveInstanceIndex = m_FreeActiveInstanceIndexes.front();
+ m_FreeActiveInstanceIndexes.pop_front();
+ ZEN_ASSERT(m_ActiveInstances.size() > ActiveInstanceIndex);
- auto NewInstance = std::make_unique<StorageServerInstance>(
- m_RunEnvironment,
- StorageServerInstance::Configuration{.BasePort = AllocatedPort,
- .HydrationTempPath = m_HydrationTempPath,
- .HydrationTargetSpecification = m_HydrationTargetSpecification,
- .HttpThreadCount = m_Config.InstanceHttpThreadCount,
- .CoreLimit = m_Config.InstanceCoreLimit,
- .ConfigPath = m_Config.InstanceConfigPath},
- ModuleId);
+ try
+ {
+ auto NewInstance = std::make_unique<StorageServerInstance>(
+ m_RunEnvironment,
+ StorageServerInstance::Configuration{.BasePort = GetInstanceIndexAssignedPort(ActiveInstanceIndex),
+ .HydrationTempPath = m_HydrationTempPath,
+ .HydrationTargetSpecification = m_HydrationTargetSpecification,
+ .HttpThreadCount = m_Config.InstanceHttpThreadCount,
+ .CoreLimit = m_Config.InstanceCoreLimit,
+ .ConfigPath = m_Config.InstanceConfigPath},
+ ModuleId);
#if ZEN_PLATFORM_WINDOWS
- if (m_JobObject.IsValid())
- {
- NewInstance->SetJobObject(&m_JobObject);
- }
+ if (m_JobObject.IsValid())
+ {
+ NewInstance->SetJobObject(&m_JobObject);
+ }
#endif
- Instance = NewInstance->LockExclusive(/*Wait*/ true);
+ Instance = NewInstance->LockExclusive(/*Wait*/ true);
- size_t ActiveInstanceIndex = (size_t)-1;
- if (!m_FreeActiveInstanceIndexes.empty())
- {
- ActiveInstanceIndex = m_FreeActiveInstanceIndexes.back();
- m_FreeActiveInstanceIndexes.pop_back();
- ZEN_ASSERT(m_ActiveInstances.size() > ActiveInstanceIndex);
- m_ActiveInstances[ActiveInstanceIndex] = std::move(NewInstance);
+ m_ActiveInstances[ActiveInstanceIndex].Instance = std::move(NewInstance);
+ m_InstanceLookup.insert_or_assign(std::string(ModuleId), ActiveInstanceIndex);
+ // Set Provisioning while both hub lock and instance lock are held so that any
+ // concurrent Deprovision sees the in-flight state, not Unprovisioned.
+ OldState = UpdateInstanceState(Instance, ActiveInstanceIndex, HubInstanceState::Provisioning);
}
- else
+ catch (const std::exception&)
{
- ActiveInstanceIndex = m_ActiveInstances.size();
- m_ActiveInstances.emplace_back(std::move(NewInstance));
+ Instance = {};
+ m_ActiveInstances[ActiveInstanceIndex].Instance.reset();
+ m_ActiveInstances[ActiveInstanceIndex].State.store(HubInstanceState::Unprovisioned);
+ m_InstanceLookup.erase(std::string(ModuleId));
+ m_FreeActiveInstanceIndexes.push_back(ActiveInstanceIndex);
+ throw;
}
- ZEN_ASSERT(ActiveInstanceIndex != (size_t)-1);
- m_InstanceLookup.insert_or_assign(std::string(ModuleId), ActiveInstanceIndex);
+
+ OutInfo.Port = GetInstanceIndexAssignedPort(ActiveInstanceIndex);
ZEN_INFO("Created new storage server instance for module '{}'", ModuleId);
@@ -308,317 +316,623 @@ Hub::Provision(std::string_view ModuleId, HubProvisionedInstanceInfo& OutInfo, s
}
else
{
- const size_t ActiveInstanceIndex = It->second;
+ ActiveInstanceIndex = It->second;
ZEN_ASSERT(m_ActiveInstances.size() > ActiveInstanceIndex);
- if (m_RecoveringModules.contains(std::string(ModuleId)))
+ HubInstanceState CurrentState = m_ActiveInstances[ActiveInstanceIndex].State.load();
+
+ std::unique_ptr<StorageServerInstance>& InstanceRaw = m_ActiveInstances[ActiveInstanceIndex].Instance;
+ ZEN_ASSERT(InstanceRaw);
+
+ OutInfo.Port = InstanceRaw->GetBasePort();
+
+ switch (CurrentState)
{
- OutReason = fmt::format("Module '{}' is currently recovering from a crash", ModuleId);
- ZEN_WARN("Attempted to provision module '{}' which is currently recovering", ModuleId);
- return false;
+ case HubInstanceState::Provisioning:
+ return Response{EResponseCode::Accepted};
+ case HubInstanceState::Crashed:
+ case HubInstanceState::Unprovisioned:
+ break;
+ case HubInstanceState::Provisioned:
+ return Response{EResponseCode::Completed};
+ case HubInstanceState::Hibernated:
+ _.ReleaseNow();
+ return Wake(std::string(ModuleId));
+ default:
+ return Response{EResponseCode::Rejected,
+ fmt::format("Module '{}' is currently in state '{}'", ModuleId, ToString(CurrentState))};
}
- std::unique_ptr<StorageServerInstance>& InstanceRaw = m_ActiveInstances[ActiveInstanceIndex];
- Instance = InstanceRaw->LockExclusive(/*Wait*/ true);
- AllocatedPort = InstanceRaw->GetBasePort();
- }
+ Instance = InstanceRaw->LockExclusive(/*Wait*/ true);
- m_ProvisioningModules.emplace(std::string(ModuleId));
+ // Re-validate state after acquiring the instance lock: a concurrent Provision may have
+ // completed between our hub-lock read and LockExclusive, transitioning the state away
+ // from Crashed/Unprovisioned.
+ HubInstanceState ActualState = m_ActiveInstances[ActiveInstanceIndex].State.load();
+ if (ActualState != HubInstanceState::Crashed && ActualState != HubInstanceState::Unprovisioned)
+ {
+ Instance = {};
+ if (ActualState == HubInstanceState::Provisioned)
+ {
+ return Response{EResponseCode::Completed};
+ }
+ if (ActualState == HubInstanceState::Provisioning)
+ {
+ return Response{EResponseCode::Accepted};
+ }
+ return Response{
+ EResponseCode::Rejected,
+ fmt::format("Module '{}' state changed to '{}' before provision could proceed", ModuleId, ToString(ActualState))};
+ }
+ // Set Provisioning while both hub lock and instance lock are held so that any
+ // concurrent Deprovision sees the in-flight state, not Crashed/Unprovisioned.
+ OldState = UpdateInstanceState(Instance, ActiveInstanceIndex, HubInstanceState::Provisioning);
+ }
}
+ // NOTE: done while not holding the hub lock, to avoid blocking other operations.
+ // Both hub-lock paths above set OldState and updated the state to Provisioning before
+ // releasing the hub lock, so concurrent operations already see the in-flight state.
+
ZEN_ASSERT(Instance);
+ ZEN_ASSERT(ActiveInstanceIndex != (size_t)-1);
- uint16_t BasePort = Instance.GetBasePort();
- std::string BaseUri; // TODO?
- HubInstanceState OldState = Instance.GetState();
- HubInstanceState NewState = OldState;
- InstanceStateUpdateGuard StateGuard(*this, ModuleId, OldState, NewState, BasePort, BaseUri);
+ NotifyStateUpdate(ModuleId, OldState, HubInstanceState::Provisioning, OutInfo.Port, {});
- auto RemoveProvisioningModule = MakeGuard([&] {
- RwLock::ExclusiveLockScope _(m_Lock);
- m_ProvisioningModules.erase(std::string(ModuleId));
- if (IsNewInstance && AllocatedPort != 0 && !m_InstanceLookup.contains(std::string(ModuleId)))
+ if (m_WorkerPool)
+ {
+ m_BackgroundWorkLatch.AddCount(1);
+ try
{
- m_FreePorts.push_back(AllocatedPort);
- AllocatedPort = 0;
+ m_WorkerPool->ScheduleWork(
+ [this,
+ ModuleId = std::string(ModuleId),
+ ActiveInstanceIndex,
+ OldState,
+ IsNewInstance,
+ Instance = std::make_shared<StorageServerInstance::ExclusiveLockedPtr>(std::move(Instance))]() {
+ auto _ = MakeGuard([this]() { m_BackgroundWorkLatch.CountDown(); });
+ try
+ {
+ CompleteProvision(*Instance, ActiveInstanceIndex, OldState, IsNewInstance);
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_ERROR("Failed async provision of module '{}': {}", ModuleId, Ex.what());
+ }
+ },
+ WorkerThreadPool::EMode::EnableBacklog);
}
- });
-
- // NOTE: this is done while not holding the hub lock, as provisioning may take time
- // and we don't want to block other operations. We track which modules are being
- // provisioned using m_ProvisioningModules, and reject attempts to provision/deprovision
- // those modules while in this state.
+ catch (const std::exception& DispatchEx)
+ {
+ // Dispatch failed: undo latch increment and roll back state.
+ ZEN_ERROR("Failed async dispatch provision of module '{}': {}", ModuleId, DispatchEx.what());
+ m_BackgroundWorkLatch.CountDown();
- try
- {
- (void)Instance.Provision(); // false = already in target state (idempotent); not an error
- NewState = Instance.GetState();
- Instance = {};
- }
- catch (const std::exception& Ex)
- {
- ZEN_ERROR("Failed to provision storage server instance for module '{}': {}", ModuleId, Ex.what());
- NewState = Instance.GetState();
- Instance = {};
+ // dispatch failed before the lambda ran, so ActiveInstance::State is still Provisioning
+ NotifyStateUpdate(ModuleId, HubInstanceState::Provisioning, OldState, OutInfo.Port, {});
- if (IsNewInstance)
- {
- // Clean up failed instance provisioning
std::unique_ptr<StorageServerInstance> DestroyInstance;
{
- RwLock::ExclusiveLockScope _(m_Lock);
- if (auto It = m_InstanceLookup.find(std::string(ModuleId)); It != m_InstanceLookup.end())
+ RwLock::ExclusiveLockScope HubLock(m_Lock);
+ ZEN_ASSERT_SLOW(m_InstanceLookup.find(std::string(ModuleId)) != m_InstanceLookup.end());
+ ZEN_ASSERT_SLOW(m_InstanceLookup.find(std::string(ModuleId))->second == ActiveInstanceIndex);
+ if (IsNewInstance)
{
- const size_t ActiveInstanceIndex = It->second;
- ZEN_ASSERT(ActiveInstanceIndex < m_ActiveInstances.size());
- DestroyInstance = std::move(m_ActiveInstances[ActiveInstanceIndex]);
- ZEN_ASSERT(DestroyInstance);
- ZEN_ASSERT(!m_ActiveInstances[ActiveInstanceIndex]);
+ DestroyInstance = std::move(m_ActiveInstances[ActiveInstanceIndex].Instance);
m_FreeActiveInstanceIndexes.push_back(ActiveInstanceIndex);
- m_InstanceLookup.erase(It);
+ m_InstanceLookup.erase(std::string(ModuleId));
}
+ UpdateInstanceState(HubLock, ActiveInstanceIndex, OldState);
}
- try
- {
- DestroyInstance.reset();
- NewState = HubInstanceState::Unprovisioned;
- }
- catch (const std::exception& DestroyEx)
- {
- ZEN_ERROR("Failed to destroy instance for failed provision module '{}': {}", ModuleId, DestroyEx.what());
- }
+ DestroyInstance.reset();
+
+ throw;
}
- throw;
+ }
+ else
+ {
+ CompleteProvision(Instance, ActiveInstanceIndex, OldState, IsNewInstance);
}
- OutReason.clear();
- OutInfo.Port = AllocatedPort;
- // TODO: base URI? Would need to know what host name / IP to use
-
- return true;
+ return Response{m_WorkerPool ? EResponseCode::Accepted : EResponseCode::Completed};
}
-bool
-Hub::Deprovision(const std::string& ModuleId, std::string& OutReason)
+void
+Hub::CompleteProvision(StorageServerInstance::ExclusiveLockedPtr& Instance,
+ size_t ActiveInstanceIndex,
+ HubInstanceState OldState,
+ bool IsNewInstance)
{
- std::unique_ptr<StorageServerInstance> RawInstance;
- StorageServerInstance::ExclusiveLockedPtr Instance;
+ const std::string ModuleId(Instance.GetModuleId());
+ const uint16_t Port = Instance.GetBasePort();
+ std::string BaseUri; // TODO?
+ if (m_ShutdownFlag.load() == false)
{
- RwLock::ExclusiveLockScope _(m_Lock);
-
- if (m_ProvisioningModules.contains(ModuleId))
+ try
{
- OutReason = fmt::format("Module '{}' is currently being provisioned", ModuleId);
-
- ZEN_WARN("Attempted to deprovision module '{}' which is currently being provisioned", ModuleId);
-
- return false;
+ switch (OldState)
+ {
+ case HubInstanceState::Crashed:
+ case HubInstanceState::Unprovisioned:
+ Instance.Provision();
+ break;
+ case HubInstanceState::Hibernated:
+ ZEN_ASSERT(false); // unreachable: Provision redirects Hibernated->Wake before setting Provisioning
+ break;
+ default:
+ ZEN_ASSERT(false);
+ }
+ UpdateInstanceState(Instance, ActiveInstanceIndex, HubInstanceState::Provisioned);
+ NotifyStateUpdate(ModuleId, HubInstanceState::Provisioning, HubInstanceState::Provisioned, Port, BaseUri);
+ Instance = {};
+ return;
}
+ catch (const std::exception& Ex)
+ {
+ ZEN_ERROR("Failed to provision storage server instance for module '{}': {}", ModuleId, Ex.what());
+ // Instance will be notified and removed below.
+ }
+ }
- if (m_RecoveringModules.contains(ModuleId))
+ if (IsNewInstance)
+ {
+ NotifyStateUpdate(ModuleId, HubInstanceState::Provisioning, HubInstanceState::Unprovisioned, Port, {});
+ Instance = {};
+ std::unique_ptr<StorageServerInstance> DestroyInstance;
{
- OutReason = fmt::format("Module '{}' is currently recovering from a crash", ModuleId);
- ZEN_WARN("Attempted to deprovision module '{}' which is currently recovering", ModuleId);
- return false;
+ RwLock::ExclusiveLockScope HubLock(m_Lock);
+ ZEN_ASSERT_SLOW(m_InstanceLookup.find(std::string(ModuleId)) != m_InstanceLookup.end());
+ ZEN_ASSERT_SLOW(m_InstanceLookup.find(std::string(ModuleId))->second == ActiveInstanceIndex);
+ DestroyInstance = std::move(m_ActiveInstances[ActiveInstanceIndex].Instance);
+ m_FreeActiveInstanceIndexes.push_back(ActiveInstanceIndex);
+ m_InstanceLookup.erase(std::string(ModuleId));
+ UpdateInstanceState(HubLock, ActiveInstanceIndex, HubInstanceState::Unprovisioned);
}
+ DestroyInstance.reset();
+ }
+ else
+ {
+ // OldState = Crashed: restore without cleanup (instance stays in lookup)
+ NotifyStateUpdate(ModuleId, HubInstanceState::Provisioning, OldState, Port, {});
+ UpdateInstanceState(Instance, ActiveInstanceIndex, OldState);
+ Instance = {};
+ }
+}
+
+Hub::Response
+Hub::Deprovision(const std::string& ModuleId)
+{
+ ZEN_ASSERT(!m_ShutdownFlag.load());
+ return InternalDeprovision(ModuleId, [](ActiveInstance& Instance) {
+ ZEN_UNUSED(Instance);
+ return true;
+ });
+}
+
+Hub::Response
+Hub::InternalDeprovision(const std::string& ModuleId, std::function<bool(ActiveInstance& Instance)>&& DeprovisionGate)
+{
+ StorageServerInstance::ExclusiveLockedPtr Instance;
+ size_t ActiveInstanceIndex = (size_t)-1;
+ {
+ RwLock::ExclusiveLockScope _(m_Lock);
if (auto It = m_InstanceLookup.find(ModuleId); It == m_InstanceLookup.end())
{
ZEN_WARN("Attempted to deprovision non-existent module '{}'", ModuleId);
- OutReason.clear(); // empty = not found (-> 404)
- return false;
+ return Response{EResponseCode::NotFound};
}
else
{
- const size_t ActiveInstanceIndex = It->second;
+ ActiveInstanceIndex = It->second;
ZEN_ASSERT(ActiveInstanceIndex < m_ActiveInstances.size());
- RawInstance = std::move(m_ActiveInstances[ActiveInstanceIndex]);
+
+ if (!DeprovisionGate(m_ActiveInstances[ActiveInstanceIndex]))
+ {
+ return Response{EResponseCode::Rejected, fmt::format("Module '{}' deprovision denied by gate", ModuleId)};
+ }
+
+ HubInstanceState CurrentState = m_ActiveInstances[ActiveInstanceIndex].State.load();
+
+ switch (CurrentState)
+ {
+ case HubInstanceState::Deprovisioning:
+ return Response{EResponseCode::Accepted};
+ case HubInstanceState::Crashed:
+ case HubInstanceState::Hibernated:
+ case HubInstanceState::Provisioned:
+ break;
+ case HubInstanceState::Unprovisioned:
+ return Response{EResponseCode::Completed};
+ case HubInstanceState::Recovering:
+ // Recovering is watchdog-managed; reject to avoid interfering with the in-progress
+ // recovery. The watchdog will transition to Provisioned or Unprovisioned, after
+ // which deprovision can be retried.
+ return Response{EResponseCode::Rejected, fmt::format("Module '{}' is currently recovering from a crash", ModuleId)};
+ default:
+ return Response{EResponseCode::Rejected,
+ fmt::format("Module '{}' is currently in state '{}'", ModuleId, ToString(CurrentState))};
+ }
+
+ std::unique_ptr<StorageServerInstance>& RawInstance = m_ActiveInstances[ActiveInstanceIndex].Instance;
ZEN_ASSERT(RawInstance != nullptr);
- m_FreeActiveInstanceIndexes.push_back(ActiveInstanceIndex);
- m_InstanceLookup.erase(It);
- m_DeprovisioningModules.emplace(ModuleId);
Instance = RawInstance->LockExclusive(/*Wait*/ true);
}
}
- ZEN_ASSERT(RawInstance);
+ // NOTE: done while not holding the hub lock, to avoid blocking other operations.
+ // The exclusive instance lock acquired above prevents concurrent LockExclusive callers
+ // from modifying instance state. The state transition to Deprovisioning happens below,
+ // after the hub lock is released.
+
ZEN_ASSERT(Instance);
+ ZEN_ASSERT(ActiveInstanceIndex != (size_t)-1);
- uint16_t BasePort = Instance.GetBasePort();
- std::string BaseUri; // TODO?
- HubInstanceState OldState = Instance.GetState();
- HubInstanceState NewState = OldState;
- InstanceStateUpdateGuard StateGuard(*this, ModuleId, OldState, NewState, BasePort, BaseUri);
+ HubInstanceState OldState = UpdateInstanceState(Instance, ActiveInstanceIndex, HubInstanceState::Deprovisioning);
+ const uint16_t Port = Instance.GetBasePort();
+ NotifyStateUpdate(ModuleId, OldState, HubInstanceState::Deprovisioning, Port, {});
- // The module is deprovisioned outside the hub lock to avoid blocking other operations.
- //
- // To ensure that no new provisioning can occur while we're deprovisioning,
- // we add the module ID to m_DeprovisioningModules and remove it once
- // deprovisioning is complete.
+ if (m_WorkerPool)
+ {
+ std::shared_ptr<StorageServerInstance::ExclusiveLockedPtr> SharedInstancePtr =
+ std::make_shared<StorageServerInstance::ExclusiveLockedPtr>(std::move(Instance));
- auto _ = MakeGuard([&] {
+ m_BackgroundWorkLatch.AddCount(1);
+ try
{
- RwLock::ExclusiveLockScope _(m_Lock);
- m_DeprovisioningModules.erase(ModuleId);
- m_FreePorts.push_back(BasePort);
+ m_WorkerPool->ScheduleWork(
+ [this, ModuleId = std::string(ModuleId), ActiveInstanceIndex, Instance = std::move(SharedInstancePtr)]() mutable {
+ auto _ = MakeGuard([this]() { m_BackgroundWorkLatch.CountDown(); });
+ try
+ {
+ CompleteDeprovision(*Instance, ActiveInstanceIndex);
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_ERROR("Failed async deprovision of module '{}': {}", ModuleId, Ex.what());
+ }
+ },
+ WorkerThreadPool::EMode::EnableBacklog);
}
- });
+ catch (const std::exception& DispatchEx)
+ {
+ // Dispatch failed: undo latch increment and roll back state.
+ ZEN_ERROR("Failed async dispatch deprovision of module '{}': {}", ModuleId, DispatchEx.what());
+ m_BackgroundWorkLatch.CountDown();
+
+ NotifyStateUpdate(ModuleId, HubInstanceState::Deprovisioning, OldState, Port, {});
+ {
+ RwLock::ExclusiveLockScope HubLock(m_Lock);
+ ZEN_ASSERT_SLOW(m_InstanceLookup.find(std::string(ModuleId)) != m_InstanceLookup.end());
+ ZEN_ASSERT_SLOW(m_InstanceLookup.find(std::string(ModuleId))->second == ActiveInstanceIndex);
+ UpdateInstanceState(HubLock, ActiveInstanceIndex, OldState);
+ }
+
+ throw;
+ }
+ }
+ else
+ {
+ CompleteDeprovision(Instance, ActiveInstanceIndex);
+ }
+
+ return Response{m_WorkerPool ? EResponseCode::Accepted : EResponseCode::Completed};
+}
+
+void
+Hub::CompleteDeprovision(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex)
+{
+ const std::string ModuleId(Instance.GetModuleId());
+ const uint16_t Port = Instance.GetBasePort();
try
{
- (void)Instance.Deprovision();
+ Instance.Deprovision();
}
catch (const std::exception& Ex)
{
ZEN_ERROR("Failed to deprovision storage server instance for module '{}': {}", ModuleId, Ex.what());
- // The module is already removed from m_InstanceLookup; treat as gone so callbacks fire correctly.
- NewState = HubInstanceState::Unprovisioned;
+ // Effectively unreachable: Shutdown() never throws and Dehydrate() failures are swallowed
+ // by DeprovisionLocked. Kept as a safety net; if somehow reached, transition to Crashed
+ // so the watchdog can attempt recovery.
Instance = {};
+ {
+ RwLock::ExclusiveLockScope HubLock(m_Lock);
+ UpdateInstanceState(HubLock, ActiveInstanceIndex, HubInstanceState::Crashed);
+ }
+ NotifyStateUpdate(ModuleId, HubInstanceState::Deprovisioning, HubInstanceState::Crashed, Port, {});
throw;
}
- NewState = Instance.GetState();
+
+ NotifyStateUpdate(ModuleId, HubInstanceState::Deprovisioning, HubInstanceState::Unprovisioned, Port, {});
Instance = {};
- OutReason.clear();
- return true;
+ std::unique_ptr<StorageServerInstance> DeleteInstance;
+ {
+ RwLock::ExclusiveLockScope HubLock(m_Lock);
+ auto It = m_InstanceLookup.find(std::string(ModuleId));
+ ZEN_ASSERT_SLOW(It != m_InstanceLookup.end());
+ ZEN_ASSERT_SLOW(It->second == ActiveInstanceIndex);
+ DeleteInstance = std::move(m_ActiveInstances[ActiveInstanceIndex].Instance);
+ m_FreeActiveInstanceIndexes.push_back(ActiveInstanceIndex);
+ m_InstanceLookup.erase(It);
+ UpdateInstanceState(HubLock, ActiveInstanceIndex, HubInstanceState::Unprovisioned);
+ }
+ DeleteInstance.reset();
}
-bool
-Hub::Hibernate(const std::string& ModuleId, std::string& OutReason)
+Hub::Response
+Hub::Hibernate(const std::string& ModuleId)
{
+ ZEN_ASSERT(!m_ShutdownFlag.load());
+
StorageServerInstance::ExclusiveLockedPtr Instance;
+ size_t ActiveInstanceIndex = (size_t)-1;
{
RwLock::ExclusiveLockScope _(m_Lock);
- if (m_ProvisioningModules.contains(ModuleId) || m_DeprovisioningModules.contains(ModuleId) ||
- m_HibernatingModules.contains(ModuleId) || m_WakingModules.contains(ModuleId) || m_RecoveringModules.contains(ModuleId))
- {
- OutReason = fmt::format("Module '{}' is currently changing state", ModuleId);
- return false;
- }
-
auto It = m_InstanceLookup.find(ModuleId);
if (It == m_InstanceLookup.end())
{
- OutReason.clear(); // empty = not found (-> 404)
- return false;
+ return Response{EResponseCode::NotFound};
}
- const size_t ActiveInstanceIndex = It->second;
+ ActiveInstanceIndex = It->second;
ZEN_ASSERT(ActiveInstanceIndex < m_ActiveInstances.size());
- Instance = m_ActiveInstances[ActiveInstanceIndex]->LockExclusive(/*Wait*/ true);
- m_HibernatingModules.emplace(ModuleId);
+
+ HubInstanceState CurrentState = m_ActiveInstances[ActiveInstanceIndex].State.load();
+
+ switch (CurrentState)
+ {
+ case HubInstanceState::Hibernating:
+ return Response{EResponseCode::Accepted};
+ case HubInstanceState::Provisioned:
+ break;
+ case HubInstanceState::Hibernated:
+ return Response{EResponseCode::Completed};
+ default:
+ return Response{EResponseCode::Rejected,
+ fmt::format("Module '{}' is currently in state '{}'", ModuleId, ToString(CurrentState))};
+ }
+
+ std::unique_ptr<StorageServerInstance>& InstanceRaw = m_ActiveInstances[ActiveInstanceIndex].Instance;
+ ZEN_ASSERT(InstanceRaw);
+
+ Instance = InstanceRaw->LockExclusive(/*Wait*/ true);
+
+ // Re-validate state after acquiring the instance lock: WatchDog may have transitioned
+ // Provisioned -> Crashed between our hub-lock read and the LockExclusive call above.
+
+ HubInstanceState ActualState = m_ActiveInstances[ActiveInstanceIndex].State.load();
+ if (ActualState != HubInstanceState::Provisioned)
+ {
+ Instance = {};
+ return Response{
+ EResponseCode::Rejected,
+ fmt::format("Module '{}' state changed to '{}' before hibernate could proceed", ModuleId, ToString(ActualState))};
+ }
}
+ // NOTE: done while not holding the hub lock, to avoid blocking other operations.
+ // Any concurrent caller that acquired the hub lock and saw Provisioned will now block on
+ // LockExclusive(Wait=true); by the time it acquires the lock, UpdateInstanceState below
+ // will have already changed the state and the re-validate above will reject it.
+
ZEN_ASSERT(Instance);
- uint16_t BasePort = Instance.GetBasePort();
- std::string BaseUri; // TODO?
- HubInstanceState OldState = Instance.GetState();
- HubInstanceState NewState = OldState;
- InstanceStateUpdateGuard StateGuard(*this, ModuleId, OldState, NewState, BasePort, BaseUri);
+ ZEN_ASSERT(ActiveInstanceIndex != (size_t)-1);
- auto RemoveHibernatingModule = MakeGuard([&] {
- RwLock::ExclusiveLockScope _(m_Lock);
- m_HibernatingModules.erase(ModuleId);
- });
+ HubInstanceState OldState = UpdateInstanceState(Instance, ActiveInstanceIndex, HubInstanceState::Hibernating);
+ const uint16_t Port = Instance.GetBasePort();
+ NotifyStateUpdate(ModuleId, OldState, HubInstanceState::Hibernating, Port, {});
- // NOTE: done while not holding the hub lock, as hibernation may take time.
- // m_HibernatingModules tracks which modules are being hibernated, blocking
- // concurrent Hibernate/Wake/Provision/Deprovision attempts on the same module.
- try
+ if (m_WorkerPool)
{
- if (!Instance.Hibernate())
+ m_BackgroundWorkLatch.AddCount(1);
+ try
{
- OutReason = fmt::format("Module '{}' cannot be hibernated from state '{}'", ModuleId, ToString(Instance.GetState()));
- NewState = Instance.GetState();
- return false;
+ m_WorkerPool->ScheduleWork(
+ [this,
+ ModuleId = std::string(ModuleId),
+ ActiveInstanceIndex,
+ OldState,
+ Instance = std::make_shared<StorageServerInstance::ExclusiveLockedPtr>(std::move(Instance))]() {
+ auto _ = MakeGuard([this]() { m_BackgroundWorkLatch.CountDown(); });
+ try
+ {
+ CompleteHibernate(*Instance, ActiveInstanceIndex, OldState);
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_ERROR("Failed async hibernate of module '{}': {}", ModuleId, Ex.what());
+ }
+ },
+ WorkerThreadPool::EMode::EnableBacklog);
}
- NewState = Instance.GetState();
+ catch (const std::exception& DispatchEx)
+ {
+ // Dispatch failed: undo latch increment and roll back state.
+ ZEN_ERROR("Failed async dispatch hibernate of module '{}': {}", ModuleId, DispatchEx.what());
+ m_BackgroundWorkLatch.CountDown();
+
+ NotifyStateUpdate(ModuleId, HubInstanceState::Hibernating, OldState, Port, {});
+ {
+ RwLock::ExclusiveLockScope HubLock(m_Lock);
+ ZEN_ASSERT_SLOW(m_InstanceLookup.find(std::string(ModuleId)) != m_InstanceLookup.end());
+ ZEN_ASSERT_SLOW(m_InstanceLookup.find(std::string(ModuleId))->second == ActiveInstanceIndex);
+ UpdateInstanceState(HubLock, ActiveInstanceIndex, OldState);
+ }
+
+ throw;
+ }
+ }
+ else
+ {
+ CompleteHibernate(Instance, ActiveInstanceIndex, OldState);
+ }
+
+ return Response{m_WorkerPool ? EResponseCode::Accepted : EResponseCode::Completed};
+}
+
+void
+Hub::CompleteHibernate(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex, HubInstanceState OldState)
+{
+ const std::string ModuleId(Instance.GetModuleId());
+ const uint16_t Port = Instance.GetBasePort();
+
+ try
+ {
+ Instance.Hibernate();
+ UpdateInstanceState(Instance, ActiveInstanceIndex, HubInstanceState::Hibernated);
+ NotifyStateUpdate(ModuleId, HubInstanceState::Hibernating, HubInstanceState::Hibernated, Port, {});
Instance = {};
}
catch (const std::exception& Ex)
{
ZEN_ERROR("Failed to hibernate storage server instance for module '{}': {}", ModuleId, Ex.what());
- NewState = Instance.GetState();
+ UpdateInstanceState(Instance, ActiveInstanceIndex, OldState);
+ NotifyStateUpdate(ModuleId, HubInstanceState::Hibernating, OldState, Port, {});
Instance = {};
throw;
}
-
- OutReason.clear();
-
- return true;
}
-bool
-Hub::Wake(const std::string& ModuleId, std::string& OutReason)
+Hub::Response
+Hub::Wake(const std::string& ModuleId)
{
+ ZEN_ASSERT(!m_ShutdownFlag.load());
+
StorageServerInstance::ExclusiveLockedPtr Instance;
+ size_t ActiveInstanceIndex = (size_t)-1;
{
RwLock::ExclusiveLockScope _(m_Lock);
- if (m_ProvisioningModules.contains(ModuleId) || m_DeprovisioningModules.contains(ModuleId) ||
- m_HibernatingModules.contains(ModuleId) || m_WakingModules.contains(ModuleId) || m_RecoveringModules.contains(ModuleId))
- {
- OutReason = fmt::format("Module '{}' is currently changing state", ModuleId);
- return false;
- }
-
auto It = m_InstanceLookup.find(ModuleId);
if (It == m_InstanceLookup.end())
{
- OutReason.clear(); // empty = not found (-> 404)
- return false;
+ return Response{EResponseCode::NotFound};
}
- const size_t ActiveInstanceIndex = It->second;
+ ActiveInstanceIndex = It->second;
ZEN_ASSERT(ActiveInstanceIndex < m_ActiveInstances.size());
- Instance = m_ActiveInstances[ActiveInstanceIndex]->LockExclusive(/*Wait*/ true);
- m_WakingModules.emplace(ModuleId);
+
+ HubInstanceState CurrentState = m_ActiveInstances[ActiveInstanceIndex].State.load();
+
+ switch (CurrentState)
+ {
+ case HubInstanceState::Waking:
+ return Response{EResponseCode::Accepted};
+ case HubInstanceState::Hibernated:
+ break;
+ case HubInstanceState::Provisioned:
+ return Response{EResponseCode::Completed};
+ default:
+ return Response{EResponseCode::Rejected,
+ fmt::format("Module '{}' is currently in state '{}'", ModuleId, ToString(CurrentState))};
+ }
+
+ std::unique_ptr<StorageServerInstance>& InstanceRaw = m_ActiveInstances[ActiveInstanceIndex].Instance;
+ ZEN_ASSERT(InstanceRaw);
+
+ Instance = InstanceRaw->LockExclusive(/*Wait*/ true);
+
+ // Re-validate state after acquiring the instance lock: a concurrent Wake or Deprovision may
+ // have transitioned Hibernated -> something else between our hub-lock read and LockExclusive.
+ HubInstanceState ActualState = m_ActiveInstances[ActiveInstanceIndex].State.load();
+ if (ActualState != HubInstanceState::Hibernated)
+ {
+ Instance = {};
+ return Response{EResponseCode::Rejected,
+ fmt::format("Module '{}' state changed to '{}' before wake could proceed", ModuleId, ToString(ActualState))};
+ }
}
+ // NOTE: done while not holding the hub lock, to avoid blocking other operations.
+ // Any concurrent caller that acquired the hub lock and saw Hibernated will now block on
+ // LockExclusive(Wait=true); by the time it acquires the lock, UpdateInstanceState below
+ // will have already changed the state and the re-validate above will reject it.
+
ZEN_ASSERT(Instance);
+ ZEN_ASSERT(ActiveInstanceIndex != (size_t)-1);
- uint16_t BasePort = Instance.GetBasePort();
- std::string BaseUri; // TODO?
- HubInstanceState OldState = Instance.GetState();
- HubInstanceState NewState = OldState;
- InstanceStateUpdateGuard StateGuard(*this, ModuleId, OldState, NewState, BasePort, BaseUri);
+ HubInstanceState OldState = UpdateInstanceState(Instance, ActiveInstanceIndex, HubInstanceState::Waking);
+ const uint16_t Port = Instance.GetBasePort();
+ NotifyStateUpdate(ModuleId, OldState, HubInstanceState::Waking, Port, {});
- auto RemoveWakingModule = MakeGuard([&] {
- RwLock::ExclusiveLockScope _(m_Lock);
- m_WakingModules.erase(ModuleId);
- });
-
- // NOTE: done while not holding the hub lock, as waking may take time.
- // m_WakingModules tracks which modules are being woken, blocking
- // concurrent Hibernate/Wake/Provision/Deprovision attempts on the same module.
- try
+ if (m_WorkerPool)
{
- if (!Instance.Wake())
+ m_BackgroundWorkLatch.AddCount(1);
+ try
{
- OutReason = fmt::format("Module '{}' cannot be woken from state '{}'", ModuleId, ToString(Instance.GetState()));
- NewState = Instance.GetState();
- return false;
+ m_WorkerPool->ScheduleWork(
+ [this,
+ ModuleId = std::string(ModuleId),
+ ActiveInstanceIndex,
+ OldState,
+ Instance = std::make_shared<StorageServerInstance::ExclusiveLockedPtr>(std::move(Instance))]() {
+ auto _ = MakeGuard([this]() { m_BackgroundWorkLatch.CountDown(); });
+ try
+ {
+ CompleteWake(*Instance, ActiveInstanceIndex, OldState);
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_ERROR("Failed async wake of module '{}': {}", ModuleId, Ex.what());
+ }
+ },
+ WorkerThreadPool::EMode::EnableBacklog);
+ }
+ catch (const std::exception& DispatchEx)
+ {
+ // Dispatch failed: undo latch increment and roll back state.
+ ZEN_ERROR("Failed async dispatch wake of module '{}': {}", ModuleId, DispatchEx.what());
+ m_BackgroundWorkLatch.CountDown();
+
+ NotifyStateUpdate(ModuleId, HubInstanceState::Waking, OldState, Port, {});
+ {
+ RwLock::ExclusiveLockScope HubLock(m_Lock);
+ ZEN_ASSERT_SLOW(m_InstanceLookup.find(std::string(ModuleId)) != m_InstanceLookup.end());
+ ZEN_ASSERT_SLOW(m_InstanceLookup.find(std::string(ModuleId))->second == ActiveInstanceIndex);
+ UpdateInstanceState(HubLock, ActiveInstanceIndex, OldState);
+ }
+
+ throw;
}
- NewState = Instance.GetState();
+ }
+ else
+ {
+ CompleteWake(Instance, ActiveInstanceIndex, OldState);
+ }
+
+ return Response{m_WorkerPool ? EResponseCode::Accepted : EResponseCode::Completed};
+}
+
+void
+Hub::CompleteWake(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex, HubInstanceState OldState)
+{
+ const std::string ModuleId(Instance.GetModuleId());
+ const uint16_t Port = Instance.GetBasePort();
+
+ try
+ {
+ Instance.Wake();
+ UpdateInstanceState(Instance, ActiveInstanceIndex, HubInstanceState::Provisioned);
+ NotifyStateUpdate(ModuleId, HubInstanceState::Waking, HubInstanceState::Provisioned, Port, {});
Instance = {};
}
catch (const std::exception& Ex)
{
ZEN_ERROR("Failed to wake storage server instance for module '{}': {}", ModuleId, Ex.what());
- NewState = Instance.GetState();
+ UpdateInstanceState(Instance, ActiveInstanceIndex, OldState);
+ NotifyStateUpdate(ModuleId, HubInstanceState::Waking, OldState, Port, {});
Instance = {};
throw;
}
-
- OutReason.clear();
-
- return true;
}
bool
@@ -631,10 +945,10 @@ Hub::Find(std::string_view ModuleId, InstanceInfo* OutInstanceInfo)
{
const size_t ActiveInstanceIndex = It->second;
ZEN_ASSERT(ActiveInstanceIndex < m_ActiveInstances.size());
- const std::unique_ptr<StorageServerInstance>& Instance = m_ActiveInstances[ActiveInstanceIndex];
+ const std::unique_ptr<StorageServerInstance>& Instance = m_ActiveInstances[ActiveInstanceIndex].Instance;
ZEN_ASSERT(Instance);
InstanceInfo Info{
- Instance->GetState(),
+ m_ActiveInstances[ActiveInstanceIndex].State.load(),
std::chrono::system_clock::now() // TODO
};
Instance->GetProcessMetrics(Info.Metrics);
@@ -655,10 +969,10 @@ Hub::EnumerateModules(std::function<void(std::string_view ModuleId, const Instan
RwLock::SharedLockScope _(m_Lock);
for (auto& [ModuleId, ActiveInstanceIndex] : m_InstanceLookup)
{
- const std::unique_ptr<StorageServerInstance>& Instance = m_ActiveInstances[ActiveInstanceIndex];
+ const std::unique_ptr<StorageServerInstance>& Instance = m_ActiveInstances[ActiveInstanceIndex].Instance;
ZEN_ASSERT(Instance);
InstanceInfo Info{
- Instance->GetState(),
+ m_ActiveInstances[ActiveInstanceIndex].State.load(),
std::chrono::system_clock::now() // TODO
};
Instance->GetProcessMetrics(Info.Metrics);
@@ -703,144 +1017,316 @@ Hub::UpdateStats()
bool
Hub::CanProvisionInstance(std::string_view ModuleId, std::string& OutReason)
{
- if (m_DeprovisioningModules.contains(std::string(ModuleId)))
- {
- OutReason = fmt::format("module '{}' is currently being deprovisioned", ModuleId);
-
- return false;
- }
-
- if (m_ProvisioningModules.contains(std::string(ModuleId)))
- {
- OutReason = fmt::format("module '{}' is currently being provisioned", ModuleId);
-
- return false;
- }
-
- if (gsl::narrow_cast<int>(m_InstanceLookup.size()) >= m_Config.InstanceLimit)
+ ZEN_UNUSED(ModuleId);
+ if (m_FreeActiveInstanceIndexes.empty())
{
OutReason = fmt::format("instance limit ({}) exceeded", m_Config.InstanceLimit);
return false;
}
- // Since deprovisioning happens outside the lock and we don't return the port until the instance is fully shut down, we might be below
- // the instance count limit but with no free ports available
- if (m_FreePorts.empty())
- {
- OutReason = fmt::format("no free ports available, deprovisioning of instances might be in flight ({})",
- m_Config.InstanceLimit - m_InstanceLookup.size());
-
- return false;
- }
-
// TODO: handle additional resource metrics
return true;
}
+uint16_t
+Hub::GetInstanceIndexAssignedPort(size_t ActiveInstanceIndex) const
+{
+ return gsl::narrow<uint16_t>(m_Config.BasePortNumber + ActiveInstanceIndex);
+}
+
+HubInstanceState
+Hub::UpdateInstanceStateLocked(size_t ActiveInstanceIndex, HubInstanceState NewState)
+{
+ ZEN_ASSERT(ActiveInstanceIndex < m_ActiveInstances.size());
+ ZEN_ASSERT_SLOW([](HubInstanceState From, HubInstanceState To) {
+ switch (From)
+ {
+ case HubInstanceState::Unprovisioned:
+ return To == HubInstanceState::Provisioning;
+ case HubInstanceState::Provisioned:
+ return To == HubInstanceState::Hibernating || To == HubInstanceState::Deprovisioning || To == HubInstanceState::Crashed;
+ case HubInstanceState::Hibernated:
+ return To == HubInstanceState::Waking || To == HubInstanceState::Deprovisioning;
+ case HubInstanceState::Crashed:
+ return To == HubInstanceState::Provisioning || To == HubInstanceState::Deprovisioning || To == HubInstanceState::Recovering;
+ case HubInstanceState::Provisioning:
+ return To == HubInstanceState::Provisioned || To == HubInstanceState::Unprovisioned || To == HubInstanceState::Crashed;
+ case HubInstanceState::Hibernating:
+ return To == HubInstanceState::Hibernated || To == HubInstanceState::Provisioned;
+ case HubInstanceState::Waking:
+ return To == HubInstanceState::Provisioned || To == HubInstanceState::Hibernated;
+ case HubInstanceState::Deprovisioning:
+ return To == HubInstanceState::Unprovisioned || To == HubInstanceState::Provisioned || To == HubInstanceState::Hibernated ||
+ To == HubInstanceState::Crashed;
+ case HubInstanceState::Recovering:
+ return To == HubInstanceState::Provisioned || To == HubInstanceState::Unprovisioned;
+ }
+ return false;
+ }(m_ActiveInstances[ActiveInstanceIndex].State.load(), NewState));
+ m_ActiveInstances[ActiveInstanceIndex].LastKnownActivitySum.store(0);
+ m_ActiveInstances[ActiveInstanceIndex].LastActivityTime.store(std::chrono::system_clock::now());
+ return m_ActiveInstances[ActiveInstanceIndex].State.exchange(NewState);
+}
+
void
Hub::AttemptRecoverInstance(std::string_view ModuleId)
{
StorageServerInstance::ExclusiveLockedPtr Instance;
- StorageServerInstance* RawInstance = nullptr;
+ size_t ActiveInstanceIndex = (size_t)-1;
{
RwLock::ExclusiveLockScope _(m_Lock);
- if (m_RecoveringModules.contains(std::string(ModuleId)) || m_ProvisioningModules.contains(std::string(ModuleId)) ||
- m_DeprovisioningModules.contains(std::string(ModuleId)) || m_HibernatingModules.contains(std::string(ModuleId)) ||
- m_WakingModules.contains(std::string(ModuleId)))
+ auto It = m_InstanceLookup.find(std::string(ModuleId));
+ if (It == m_InstanceLookup.end())
{
return;
}
- auto It = m_InstanceLookup.find(std::string(ModuleId));
- if (It == m_InstanceLookup.end())
+ ActiveInstanceIndex = It->second;
+ ZEN_ASSERT(ActiveInstanceIndex < m_ActiveInstances.size());
+ std::unique_ptr<StorageServerInstance>& InstanceRaw = m_ActiveInstances[ActiveInstanceIndex].Instance;
+ ZEN_ASSERT(InstanceRaw);
+ HubInstanceState CurrentState = m_ActiveInstances[ActiveInstanceIndex].State.load();
+ if (CurrentState != HubInstanceState::Crashed)
{
return;
}
- const size_t ActiveInstanceIndex = It->second;
- ZEN_ASSERT(ActiveInstanceIndex < m_ActiveInstances.size());
- RawInstance = m_ActiveInstances[ActiveInstanceIndex].get();
- Instance = RawInstance->LockExclusive(/*Wait*/ true);
- m_RecoveringModules.emplace(std::string(ModuleId));
+ Instance = m_ActiveInstances[ActiveInstanceIndex].Instance->LockExclusive(/*Wait*/ false);
+ if (!Instance)
+ {
+ // Instance lock is held by another operation; the watchdog will retry on the next cycle if the state is still Crashed.
+ return;
+ }
+
+ ZEN_ASSERT(!Instance.IsRunning());
+
+ (void)UpdateInstanceState(Instance, ActiveInstanceIndex, HubInstanceState::Recovering);
}
ZEN_ASSERT(Instance);
+ ZEN_ASSERT(ActiveInstanceIndex != (size_t)-1);
+ ZEN_ASSERT_SLOW(m_ActiveInstances[ActiveInstanceIndex].State.load() == HubInstanceState::Recovering);
- uint16_t BasePort = Instance.GetBasePort();
- std::string BaseUri; // TODO?
- HubInstanceState OldState = Instance.GetState();
- HubInstanceState NewState = OldState;
- InstanceStateUpdateGuard StateGuard(*this, ModuleId, OldState, NewState, BasePort, BaseUri);
-
- auto RemoveRecoveringModule = MakeGuard([&] {
- RwLock::ExclusiveLockScope _(m_Lock);
- m_RecoveringModules.erase(std::string(ModuleId));
- });
+ NotifyStateUpdate(ModuleId, HubInstanceState::Crashed, HubInstanceState::Recovering, Instance.GetBasePort(), /*BaseUri*/ {});
- // Re-validate: state may have changed between releasing shared lock and acquiring exclusive lock
- if (Instance.GetState() != HubInstanceState::Provisioned || Instance.IsRunning())
+ // Dehydrate before trying to recover so any salvageable data is preserved.
+ try
{
- return;
+ Instance.Deprovision();
}
-
- if (Instance.RecoverFromCrash())
+ catch (const std::exception& Ex)
{
- NewState = Instance.GetState();
+ ZEN_ERROR("Failed to deprovision instance for module '{}' during crash recovery cleanup: {}", ModuleId, Ex.what());
+ NotifyStateUpdate(ModuleId, HubInstanceState::Recovering, HubInstanceState::Unprovisioned, Instance.GetBasePort(), /*BaseUri*/ {});
Instance = {};
+ std::unique_ptr<StorageServerInstance> DestroyInstance;
+ {
+ RwLock::ExclusiveLockScope HubLock(m_Lock);
+ auto It = m_InstanceLookup.find(std::string(ModuleId));
+ ZEN_ASSERT_SLOW(It != m_InstanceLookup.end());
+ ZEN_ASSERT_SLOW(ActiveInstanceIndex == It->second);
+
+ DestroyInstance = std::move(m_ActiveInstances[ActiveInstanceIndex].Instance);
+ m_FreeActiveInstanceIndexes.push_back(ActiveInstanceIndex);
+ m_InstanceLookup.erase(It);
+ (void)UpdateInstanceState(HubLock, ActiveInstanceIndex, HubInstanceState::Unprovisioned);
+ }
+ DestroyInstance.reset();
return;
}
- // Restart threw but data dir is intact - run Dehydrate via Deprovision before cleanup.
try
{
- (void)Instance.Deprovision();
+ Instance.Provision();
+ UpdateInstanceState(Instance, ActiveInstanceIndex, HubInstanceState::Provisioned);
+ NotifyStateUpdate(ModuleId, HubInstanceState::Recovering, HubInstanceState::Provisioned, Instance.GetBasePort(), /*BaseUri*/ {});
+ Instance = {};
}
catch (const std::exception& Ex)
{
- ZEN_ERROR("Failed to deprovision instance for module '{}' during crash recovery cleanup: {}", ModuleId, Ex.what());
+ ZEN_ERROR("Failed to reprovision instance for module '{}' during crash recovery reprovision: {}", ModuleId, Ex.what());
+ NotifyStateUpdate(ModuleId, HubInstanceState::Recovering, HubInstanceState::Unprovisioned, Instance.GetBasePort(), /*BaseUri*/ {});
+ Instance = {};
+ std::unique_ptr<StorageServerInstance> DestroyInstance;
+ {
+ RwLock::ExclusiveLockScope HubLock(m_Lock);
+ auto It = m_InstanceLookup.find(std::string(ModuleId));
+ ZEN_ASSERT_SLOW(It != m_InstanceLookup.end());
+ ZEN_ASSERT_SLOW(ActiveInstanceIndex == It->second);
+
+ DestroyInstance = std::move(m_ActiveInstances[ActiveInstanceIndex].Instance);
+ m_FreeActiveInstanceIndexes.push_back(ActiveInstanceIndex);
+ m_InstanceLookup.erase(It);
+ (void)UpdateInstanceState(HubLock, ActiveInstanceIndex, HubInstanceState::Unprovisioned);
+ }
+ DestroyInstance.reset();
+ return;
}
- NewState = Instance.GetState();
- Instance = {};
+}
- std::unique_ptr<StorageServerInstance> DestroyInstance;
+bool
+Hub::CheckInstanceStatus(HttpClient& ActivityCheckClient,
+ StorageServerInstance::SharedLockedPtr&& LockedInstance,
+ size_t ActiveInstanceIndex)
+{
+ HubInstanceState InstanceState = m_ActiveInstances[ActiveInstanceIndex].State.load();
+ if (LockedInstance.IsRunning())
{
- RwLock::ExclusiveLockScope _(m_Lock);
- if (auto It = m_InstanceLookup.find(std::string(ModuleId)); It != m_InstanceLookup.end())
+ LockedInstance.UpdateMetrics();
+ if (InstanceState == HubInstanceState::Provisioned)
{
- const size_t ActiveInstanceIndex = It->second;
- ZEN_ASSERT(ActiveInstanceIndex < m_ActiveInstances.size());
- DestroyInstance = std::move(m_ActiveInstances[ActiveInstanceIndex]);
- m_FreeActiveInstanceIndexes.push_back(ActiveInstanceIndex);
- m_InstanceLookup.erase(It);
+ const std::string ModuleId(LockedInstance.GetModuleId());
+
+ const uint16_t Port = LockedInstance.GetBasePort();
+ const uint64_t PreviousActivitySum = m_ActiveInstances[ActiveInstanceIndex].LastKnownActivitySum.load();
+ const std::chrono::system_clock::time_point LastActivityTime = m_ActiveInstances[ActiveInstanceIndex].LastActivityTime.load();
+
+ const std::chrono::system_clock::time_point Now = std::chrono::system_clock::now();
+
+ // We do the activity check without holding a lock to the instance
+ LockedInstance = {};
+
+ uint64_t ActivitySum = PreviousActivitySum;
+
+ std::chrono::system_clock::time_point NextCheckTime =
+ LastActivityTime + m_Config.WatchDog.ProvisionedInactivityTimeout - m_Config.WatchDog.InactivityCheckMargin;
+ if (Now >= NextCheckTime)
+ {
+ ActivityCheckClient.SetBaseUri(fmt::format("http://localhost:{}", Port));
+ HttpClient::Response Result =
+ ActivityCheckClient.Get("/stats/activity_counters", HttpClient::Accept(HttpContentType::kCbObject));
+ if (Result.IsSuccess())
+ {
+ CbObject Response = Result.AsObject();
+ if (Response)
+ {
+ ActivitySum = Response["sum"].AsUInt64();
+ }
+ }
+ }
+
+ if (ActivitySum != PreviousActivitySum)
+ {
+ m_Lock.WithSharedLock([this, InstanceState, PreviousActivitySum, &LastActivityTime, ActivitySum, Now, ModuleId]() {
+ if (auto It = m_InstanceLookup.find(ModuleId); It != m_InstanceLookup.end())
+ {
+ const uint64_t ActiveInstanceIndex = It->second;
+ ActiveInstance& Instance = m_ActiveInstances[ActiveInstanceIndex];
+
+ HubInstanceState CurrentState = Instance.State.load();
+ if (CurrentState == InstanceState)
+ {
+ if (Instance.LastActivityTime.load() == LastActivityTime &&
+ Instance.LastKnownActivitySum.load() == PreviousActivitySum)
+ {
+ Instance.LastActivityTime.store(Now);
+ Instance.LastKnownActivitySum.store(ActivitySum);
+ }
+ }
+ }
+ });
+ }
+ else if (LastActivityTime + m_Config.WatchDog.ProvisionedInactivityTimeout < Now)
+ {
+ ZEN_INFO("Instance {} has not been active for {}, attempting deprovision...",
+ ModuleId,
+ NiceTimeSpanMs(std::chrono::duration_cast<std::chrono::milliseconds>(Now - LastActivityTime).count()));
+ (void)InternalDeprovision(
+ ModuleId,
+ [ModuleId, InstanceState, LastActivityTime, PreviousActivitySum](ActiveInstance& Instance) -> bool {
+ HubInstanceState CurrentState = Instance.State.load();
+ if (CurrentState != InstanceState)
+ {
+ ZEN_INFO("Instance {} idle deprovision aborted - state changed to {}", ModuleId, ToString(CurrentState));
+ return false;
+ }
+ if (Instance.LastActivityTime.load() != LastActivityTime ||
+ Instance.LastKnownActivitySum.load() != PreviousActivitySum)
+ {
+ ZEN_INFO("Instance {} idle deprovision aborted due to activity", ModuleId);
+ return false;
+ }
+ return true;
+ });
+ }
}
- m_FreePorts.push_back(BasePort);
- m_RecoveringModules.erase(std::string(ModuleId));
+
+ return true;
}
- RemoveRecoveringModule.Dismiss();
+ else if (InstanceState == HubInstanceState::Provisioned)
+ {
+ // Process is not running but state says it should be - instance died unexpectedly.
+ const std::string ModuleId(LockedInstance.GetModuleId());
+ const uint16_t Port = LockedInstance.GetBasePort();
+ UpdateInstanceState(LockedInstance, ActiveInstanceIndex, HubInstanceState::Crashed);
+ NotifyStateUpdate(ModuleId, HubInstanceState::Provisioned, HubInstanceState::Crashed, Port, {});
+ LockedInstance = {};
- try
+ return false;
+ }
+ else if (InstanceState == HubInstanceState::Hibernated)
{
- DestroyInstance.reset();
- NewState = HubInstanceState::Unprovisioned;
+ // Process is not running - no HTTP activity check is possible.
+ // Use a pure time-based check; the margin window does not apply here.
+ const std::string ModuleId = std::string(LockedInstance.GetModuleId());
+ const std::chrono::system_clock::time_point LastActivityTime = m_ActiveInstances[ActiveInstanceIndex].LastActivityTime.load();
+ const uint64_t PreviousActivitySum = m_ActiveInstances[ActiveInstanceIndex].LastKnownActivitySum.load();
+ const std::chrono::system_clock::time_point Now = std::chrono::system_clock::now();
+ LockedInstance = {};
+
+ if (LastActivityTime + m_Config.WatchDog.HibernatedInactivityTimeout < Now)
+ {
+ ZEN_INFO("Hibernated instance {} has not been active for {}, attempting deprovision...",
+ ModuleId,
+ NiceTimeSpanMs(std::chrono::duration_cast<std::chrono::milliseconds>(Now - LastActivityTime).count()));
+ (void)InternalDeprovision(
+ ModuleId,
+ [ModuleId, InstanceState, LastActivityTime, PreviousActivitySum](ActiveInstance& Instance) -> bool {
+ HubInstanceState CurrentState = Instance.State.load();
+ if (CurrentState != InstanceState)
+ {
+ ZEN_INFO("Hibernated instance {} idle deprovision aborted - state changed to {}", ModuleId, ToString(CurrentState));
+ return false;
+ }
+ if (Instance.LastActivityTime.load() != LastActivityTime || Instance.LastKnownActivitySum.load() != PreviousActivitySum)
+ {
+ ZEN_INFO("Hibernated instance {} idle deprovision aborted due to activity", ModuleId);
+ return false;
+ }
+ return true;
+ });
+ }
+ return true;
}
- catch (const std::exception& Ex)
+ else
{
- ZEN_ERROR("Failed to destroy recovered instance for module '{}': {}", ModuleId, Ex.what());
+ // transitional state (Provisioning, Deprovisioning, Hibernating, Waking, Recovering) - expected, skip.
+ // Crashed is handled above via AttemptRecoverInstance; it appears here only when the instance
+ // lock was busy on a previous cycle and recovery is already pending.
+ return true;
}
}
void
Hub::WatchDog()
{
- constexpr uint64_t WatchDogWakeupTimeMs = 5000;
- constexpr uint64_t WatchDogProcessingTimeMs = 500;
+ const uint64_t CycleIntervalMs = std::chrono::duration_cast<std::chrono::milliseconds>(m_Config.WatchDog.CycleInterval).count();
+ const uint64_t CycleProcessingBudgetMs =
+ std::chrono::duration_cast<std::chrono::milliseconds>(m_Config.WatchDog.CycleProcessingBudget).count();
+ const uint64_t InstanceCheckThrottleMs =
+ std::chrono::duration_cast<std::chrono::milliseconds>(m_Config.WatchDog.InstanceCheckThrottle).count();
+
+ HttpClient ActivityCheckClient("http://localhost",
+ HttpClientSettings{.ConnectTimeout = m_Config.WatchDog.ActivityCheckConnectTimeout,
+ .Timeout = m_Config.WatchDog.ActivityCheckRequestTimeout},
+ [&]() -> bool { return m_WatchDogEvent.Wait(0); });
size_t CheckInstanceIndex = SIZE_MAX; // first increment wraps to 0
- while (!m_WatchDogEvent.Wait(WatchDogWakeupTimeMs))
+ while (!m_WatchDogEvent.Wait(gsl::narrow<int>(CycleIntervalMs)))
{
try
{
@@ -850,7 +1336,7 @@ Hub::WatchDog()
Stopwatch Timer;
bool ShuttingDown = false;
- while (SlotsRemaining > 0 && Timer.GetElapsedTimeMs() < WatchDogProcessingTimeMs && !ShuttingDown)
+ while (SlotsRemaining > 0 && Timer.GetElapsedTimeMs() < CycleProcessingBudgetMs && !ShuttingDown)
{
StorageServerInstance::SharedLockedPtr LockedInstance;
m_Lock.WithSharedLock([this, &CheckInstanceIndex, &LockedInstance, &SlotsRemaining]() {
@@ -863,7 +1349,7 @@ Hub::WatchDog()
{
CheckInstanceIndex = 0;
}
- StorageServerInstance* Instance = m_ActiveInstances[CheckInstanceIndex].get();
+ StorageServerInstance* Instance = m_ActiveInstances[CheckInstanceIndex].Instance.get();
if (Instance)
{
LockedInstance = Instance->LockShared(/*Wait*/ false);
@@ -878,22 +1364,18 @@ Hub::WatchDog()
continue;
}
- if (LockedInstance.IsRunning())
+ std::string ModuleId(LockedInstance.GetModuleId());
+
+ bool InstanceIsOk = CheckInstanceStatus(ActivityCheckClient, std::move(LockedInstance), CheckInstanceIndex);
+ if (InstanceIsOk)
{
- LockedInstance.UpdateMetrics();
+ ShuttingDown = m_WatchDogEvent.Wait(gsl::narrow<int>(InstanceCheckThrottleMs));
}
- else if (LockedInstance.GetState() == HubInstanceState::Provisioned)
+ else
{
- // Process is not running but state says it should be - instance died unexpectedly.
- const std::string ModuleId(LockedInstance.GetModuleId());
- LockedInstance = {};
+ ZEN_WARN("Instance for module '{}' is not running, attempting recovery", ModuleId);
AttemptRecoverInstance(ModuleId);
}
- // else: transitional state (Provisioning, Deprovisioning, Hibernating, Waking, Recovering, Crashed) - expected, skip.
- LockedInstance = {};
-
- // Rate-limit: pause briefly between live-instance checks and respond to shutdown.
- ShuttingDown = m_WatchDogEvent.Wait(5);
}
}
catch (const std::exception& Ex)
@@ -905,11 +1387,11 @@ Hub::WatchDog()
}
void
-Hub::OnStateUpdate(std::string_view ModuleId,
- HubInstanceState OldState,
- HubInstanceState& NewState,
- uint16_t BasePort,
- std::string_view BaseUri)
+Hub::NotifyStateUpdate(std::string_view ModuleId,
+ HubInstanceState OldState,
+ HubInstanceState NewState,
+ uint16_t BasePort,
+ std::string_view BaseUri)
{
if (m_ModuleStateChangeCallback && OldState != NewState)
{
@@ -942,9 +1424,10 @@ namespace hub_testutils {
std::unique_ptr<Hub> MakeHub(const std::filesystem::path& BaseDir,
Hub::Configuration Config = {},
- Hub::AsyncModuleStateChangeCallbackFunc StateChangeCallback = {})
+ Hub::AsyncModuleStateChangeCallbackFunc StateChangeCallback = {},
+ WorkerThreadPool* WorkerPool = nullptr)
{
- return std::make_unique<Hub>(Config, MakeHubEnvironment(BaseDir), std::move(StateChangeCallback));
+ return std::make_unique<Hub>(Config, MakeHubEnvironment(BaseDir), WorkerPool, std::move(StateChangeCallback));
}
struct CallbackRecord
@@ -978,6 +1461,42 @@ namespace hub_testutils {
}
};
+ // Poll until Find() returns false for the given module (i.e. async deprovision completes).
+ static bool WaitForInstanceGone(Hub& HubInstance,
+ std::string_view ModuleId,
+ std::chrono::milliseconds PollInterval = std::chrono::milliseconds(50),
+ std::chrono::seconds Timeout = std::chrono::seconds(30))
+ {
+ const auto Deadline = std::chrono::steady_clock::now() + Timeout;
+ while (std::chrono::steady_clock::now() < Deadline)
+ {
+ if (!HubInstance.Find(ModuleId))
+ {
+ return true;
+ }
+ std::this_thread::sleep_for(PollInterval);
+ }
+ return !HubInstance.Find(ModuleId);
+ }
+
+ // Poll until GetInstanceCount() reaches ExpectedCount (i.e. all async deprovisions complete).
+ static bool WaitForInstanceCount(Hub& HubInstance,
+ int ExpectedCount,
+ std::chrono::milliseconds PollInterval = std::chrono::milliseconds(50),
+ std::chrono::seconds Timeout = std::chrono::seconds(30))
+ {
+ const auto Deadline = std::chrono::steady_clock::now() + Timeout;
+ while (std::chrono::steady_clock::now() < Deadline)
+ {
+ if (HubInstance.GetInstanceCount() == ExpectedCount)
+ {
+ return true;
+ }
+ std::this_thread::sleep_for(PollInterval);
+ }
+ return HubInstance.GetInstanceCount() == ExpectedCount;
+ }
+
} // namespace hub_testutils
TEST_CASE("hub.provision_basic")
@@ -989,10 +1508,8 @@ TEST_CASE("hub.provision_basic")
CHECK_FALSE(HubInstance->Find("module_a"));
HubProvisionedInstanceInfo Info;
- std::string Reason;
- const bool ProvisionResult = HubInstance->Provision("module_a", Info, Reason);
- REQUIRE_MESSAGE(ProvisionResult, Reason);
- CHECK(Reason.empty());
+ const Hub::Response ProvisionResult = HubInstance->Provision("module_a", Info);
+ REQUIRE_MESSAGE(ProvisionResult.ResponseCode == Hub::EResponseCode::Completed, ProvisionResult.Message);
CHECK_NE(Info.Port, 0);
CHECK_EQ(HubInstance->GetInstanceCount(), 1);
Hub::InstanceInfo InstanceInfo;
@@ -1004,9 +1521,8 @@ TEST_CASE("hub.provision_basic")
CHECK(ModClient.Get("/health/"));
}
- const bool DeprovisionResult = HubInstance->Deprovision("module_a", Reason);
- CHECK(DeprovisionResult);
- CHECK(Reason.empty());
+ const Hub::Response DeprovisionResult = HubInstance->Deprovision("module_a");
+ CHECK(DeprovisionResult.ResponseCode == Hub::EResponseCode::Completed);
CHECK_EQ(HubInstance->GetInstanceCount(), 0);
CHECK_FALSE(HubInstance->Find("module_a"));
@@ -1037,9 +1553,8 @@ TEST_CASE("hub.provision_config")
CHECK_FALSE(HubInstance->Find("module_a"));
HubProvisionedInstanceInfo Info;
- std::string Reason;
- const bool ProvisionResult = HubInstance->Provision("module_a", Info, Reason);
- REQUIRE_MESSAGE(ProvisionResult, Reason);
+ const Hub::Response ProvisionResult = HubInstance->Provision("module_a", Info);
+ REQUIRE_MESSAGE(ProvisionResult.ResponseCode == Hub::EResponseCode::Completed, ProvisionResult.Message);
CHECK_NE(Info.Port, 0);
CHECK_EQ(HubInstance->GetInstanceCount(), 1);
Hub::InstanceInfo InstanceInfo;
@@ -1056,8 +1571,8 @@ TEST_CASE("hub.provision_config")
CHECK(ModClient.Get("/health/"));
}
- const bool DeprovisionResult = HubInstance->Deprovision("module_a", Reason);
- CHECK(DeprovisionResult);
+ const Hub::Response DeprovisionResult = HubInstance->Deprovision("module_a");
+ CHECK(DeprovisionResult.ResponseCode == Hub::EResponseCode::Completed);
CHECK_EQ(HubInstance->GetInstanceCount(), 0);
CHECK_FALSE(HubInstance->Find("module_a"));
@@ -1076,10 +1591,9 @@ TEST_CASE("hub.provision_callbacks")
std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), {}, CaptureInstance.CaptureFunc());
HubProvisionedInstanceInfo Info;
- std::string Reason;
- const bool ProvisionResult = HubInstance->Provision("cb_module", Info, Reason);
- REQUIRE_MESSAGE(ProvisionResult, Reason);
+ const Hub::Response ProvisionResult = HubInstance->Provision("cb_module", Info);
+ REQUIRE_MESSAGE(ProvisionResult.ResponseCode == Hub::EResponseCode::Completed, ProvisionResult.Message);
{
RwLock::SharedLockScope _(CaptureInstance.CallbackMutex);
@@ -1094,8 +1608,8 @@ TEST_CASE("hub.provision_callbacks")
CHECK(ModClient.Get("/health/"));
}
- const bool DeprovisionResult = HubInstance->Deprovision("cb_module", Reason);
- CHECK(DeprovisionResult);
+ const Hub::Response DeprovisionResult = HubInstance->Deprovision("cb_module");
+ CHECK(DeprovisionResult.ResponseCode == Hub::EResponseCode::Completed);
{
HttpClient ModClient(fmt::format("http://localhost:{}", Info.Port), kFastTimeout);
@@ -1111,6 +1625,49 @@ TEST_CASE("hub.provision_callbacks")
}
}
+TEST_CASE("hub.provision_callback_sequence")
+{
+ ScopedTemporaryDirectory TempDir;
+
+ struct TransitionRecord
+ {
+ HubInstanceState OldState;
+ HubInstanceState NewState;
+ };
+ RwLock CaptureMutex;
+ std::vector<TransitionRecord> Transitions;
+
+ auto CaptureFunc =
+ [&](std::string_view ModuleId, const HubProvisionedInstanceInfo& Info, HubInstanceState OldState, HubInstanceState NewState) {
+ ZEN_UNUSED(ModuleId);
+ ZEN_UNUSED(Info);
+ CaptureMutex.WithExclusiveLock([&]() { Transitions.push_back({OldState, NewState}); });
+ };
+
+ std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), {}, std::move(CaptureFunc));
+
+ HubProvisionedInstanceInfo Info;
+ {
+ const Hub::Response R = HubInstance->Provision("seq_module", Info);
+ REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message);
+ }
+ {
+ const Hub::Response R = HubInstance->Deprovision("seq_module");
+ REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message);
+ }
+
+ RwLock::SharedLockScope _(CaptureMutex);
+ REQUIRE_EQ(Transitions.size(), 4u);
+ CHECK_EQ(Transitions[0].OldState, HubInstanceState::Unprovisioned);
+ CHECK_EQ(Transitions[0].NewState, HubInstanceState::Provisioning);
+ CHECK_EQ(Transitions[1].OldState, HubInstanceState::Provisioning);
+ CHECK_EQ(Transitions[1].NewState, HubInstanceState::Provisioned);
+ CHECK_EQ(Transitions[2].OldState, HubInstanceState::Provisioned);
+ CHECK_EQ(Transitions[2].NewState, HubInstanceState::Deprovisioning);
+ CHECK_EQ(Transitions[3].OldState, HubInstanceState::Deprovisioning);
+ CHECK_EQ(Transitions[3].NewState, HubInstanceState::Unprovisioned);
+}
+
TEST_CASE("hub.instance_limit")
{
ScopedTemporaryDirectory TempDir;
@@ -1121,27 +1678,24 @@ TEST_CASE("hub.instance_limit")
std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config);
HubProvisionedInstanceInfo Info;
- std::string Reason;
- const bool FirstResult = HubInstance->Provision("limit_a", Info, Reason);
- REQUIRE_MESSAGE(FirstResult, Reason);
+ const Hub::Response FirstResult = HubInstance->Provision("limit_a", Info);
+ REQUIRE_MESSAGE(FirstResult.ResponseCode == Hub::EResponseCode::Completed, FirstResult.Message);
- const bool SecondResult = HubInstance->Provision("limit_b", Info, Reason);
- REQUIRE_MESSAGE(SecondResult, Reason);
+ const Hub::Response SecondResult = HubInstance->Provision("limit_b", Info);
+ REQUIRE_MESSAGE(SecondResult.ResponseCode == Hub::EResponseCode::Completed, SecondResult.Message);
CHECK_EQ(HubInstance->GetInstanceCount(), 2);
- Reason.clear();
- const bool ThirdResult = HubInstance->Provision("limit_c", Info, Reason);
- CHECK_FALSE(ThirdResult);
+ const Hub::Response ThirdResult = HubInstance->Provision("limit_c", Info);
+ CHECK(ThirdResult.ResponseCode == Hub::EResponseCode::Rejected);
CHECK_EQ(HubInstance->GetInstanceCount(), 2);
- CHECK_NE(Reason.find("instance limit"), std::string::npos);
+ CHECK_NE(ThirdResult.Message.find("instance limit"), std::string::npos);
- HubInstance->Deprovision("limit_a", Reason);
+ HubInstance->Deprovision("limit_a");
CHECK_EQ(HubInstance->GetInstanceCount(), 1);
- Reason.clear();
- const bool FourthResult = HubInstance->Provision("limit_d", Info, Reason);
- CHECK_MESSAGE(FourthResult, Reason);
+ const Hub::Response FourthResult = HubInstance->Provision("limit_d", Info);
+ CHECK_MESSAGE(FourthResult.ResponseCode == Hub::EResponseCode::Completed, FourthResult.Message);
CHECK_EQ(HubInstance->GetInstanceCount(), 2);
}
@@ -1151,10 +1705,15 @@ TEST_CASE("hub.enumerate_modules")
std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path());
HubProvisionedInstanceInfo Info;
- std::string Reason;
- REQUIRE_MESSAGE(HubInstance->Provision("enum_a", Info, Reason), Reason);
- REQUIRE_MESSAGE(HubInstance->Provision("enum_b", Info, Reason), Reason);
+ {
+ const Hub::Response R = HubInstance->Provision("enum_a", Info);
+ REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message);
+ }
+ {
+ const Hub::Response R = HubInstance->Provision("enum_b", Info);
+ REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message);
+ }
std::vector<std::string> Ids;
int ProvisionedCount = 0;
@@ -1172,7 +1731,7 @@ TEST_CASE("hub.enumerate_modules")
CHECK(FoundA);
CHECK(FoundB);
- HubInstance->Deprovision("enum_a", Reason);
+ HubInstance->Deprovision("enum_a");
Ids.clear();
ProvisionedCount = 0;
HubInstance->EnumerateModules([&](std::string_view ModuleId, const Hub::InstanceInfo& InstanceInfo) {
@@ -1195,17 +1754,22 @@ TEST_CASE("hub.max_instance_count")
CHECK_EQ(HubInstance->GetMaxInstanceCount(), 0);
HubProvisionedInstanceInfo Info;
- std::string Reason;
- REQUIRE_MESSAGE(HubInstance->Provision("max_a", Info, Reason), Reason);
+ {
+ const Hub::Response R = HubInstance->Provision("max_a", Info);
+ REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message);
+ }
CHECK_GE(HubInstance->GetMaxInstanceCount(), 1);
- REQUIRE_MESSAGE(HubInstance->Provision("max_b", Info, Reason), Reason);
+ {
+ const Hub::Response R = HubInstance->Provision("max_b", Info);
+ REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message);
+ }
CHECK_GE(HubInstance->GetMaxInstanceCount(), 2);
const int MaxAfterTwo = HubInstance->GetMaxInstanceCount();
- HubInstance->Deprovision("max_a", Reason);
+ HubInstance->Deprovision("max_a");
CHECK_EQ(HubInstance->GetInstanceCount(), 1);
CHECK_EQ(HubInstance->GetMaxInstanceCount(), MaxAfterTwo);
}
@@ -1228,8 +1792,8 @@ TEST_CASE("hub.concurrent_callbacks")
for (int I = 0; I < kHalf; ++I)
{
HubProvisionedInstanceInfo Info;
- std::string Reason;
- REQUIRE_MESSAGE(HubInstance->Provision(fmt::format("pre_{}", I), Info, Reason), Reason);
+ const Hub::Response ProvR = HubInstance->Provision(fmt::format("pre_{}", I), Info);
+ REQUIRE_MESSAGE(ProvR.ResponseCode == Hub::EResponseCode::Completed, ProvR.Message);
}
CHECK_EQ(HubInstance->GetInstanceCount(), kHalf);
@@ -1253,23 +1817,21 @@ TEST_CASE("hub.concurrent_callbacks")
for (int I = 0; I < kHalf; ++I)
{
- ProvisionFutures[I] = Provisioners.EnqueueTask(std::packaged_task<void()>([&, I] {
- HubProvisionedInstanceInfo Info;
- std::string Reason;
- const bool Result =
- HubInstance->Provision(fmt::format("new_{}", I), Info, Reason);
- ProvisionResults[I] = Result ? 1 : 0;
- ProvisionReasons[I] = Reason;
- }),
- WorkerThreadPool::EMode::EnableBacklog);
-
- DeprovisionFutures[I] = Deprovisioneers.EnqueueTask(std::packaged_task<void()>([&, I] {
- std::string Reason;
- const bool Result =
- HubInstance->Deprovision(fmt::format("pre_{}", I), Reason);
- DeprovisionResults[I] = Result ? 1 : 0;
- }),
- WorkerThreadPool::EMode::EnableBacklog);
+ ProvisionFutures[I] =
+ Provisioners.EnqueueTask(std::packaged_task<void()>([&, I] {
+ HubProvisionedInstanceInfo Info;
+ const Hub::Response Result = HubInstance->Provision(fmt::format("new_{}", I), Info);
+ ProvisionResults[I] = (Result.ResponseCode == Hub::EResponseCode::Completed) ? 1 : 0;
+ ProvisionReasons[I] = Result.Message;
+ }),
+ WorkerThreadPool::EMode::EnableBacklog);
+
+ DeprovisionFutures[I] =
+ Deprovisioneers.EnqueueTask(std::packaged_task<void()>([&, I] {
+ const Hub::Response Result = HubInstance->Deprovision(fmt::format("pre_{}", I));
+ DeprovisionResults[I] = (Result.ResponseCode == Hub::EResponseCode::Completed) ? 1 : 0;
+ }),
+ WorkerThreadPool::EMode::EnableBacklog);
}
for (std::future<void>& F : ProvisionFutures)
@@ -1324,14 +1886,13 @@ TEST_CASE("hub.job_object")
std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config);
HubProvisionedInstanceInfo Info;
- std::string Reason;
- const bool ProvisionResult = HubInstance->Provision("jobobj_a", Info, Reason);
- REQUIRE_MESSAGE(ProvisionResult, Reason);
+ const Hub::Response ProvisionResult = HubInstance->Provision("jobobj_a", Info);
+ REQUIRE_MESSAGE(ProvisionResult.ResponseCode == Hub::EResponseCode::Completed, ProvisionResult.Message);
CHECK_NE(Info.Port, 0);
- const bool DeprovisionResult = HubInstance->Deprovision("jobobj_a", Reason);
- CHECK(DeprovisionResult);
+ const Hub::Response DeprovisionResult = HubInstance->Deprovision("jobobj_a");
+ CHECK(DeprovisionResult.ResponseCode == Hub::EResponseCode::Completed);
CHECK_EQ(HubInstance->GetInstanceCount(), 0);
}
@@ -1344,14 +1905,13 @@ TEST_CASE("hub.job_object")
std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config);
HubProvisionedInstanceInfo Info;
- std::string Reason;
- const bool ProvisionResult = HubInstance->Provision("nojobobj_a", Info, Reason);
- REQUIRE_MESSAGE(ProvisionResult, Reason);
+ const Hub::Response ProvisionResult = HubInstance->Provision("nojobobj_a", Info);
+ REQUIRE_MESSAGE(ProvisionResult.ResponseCode == Hub::EResponseCode::Completed, ProvisionResult.Message);
CHECK_NE(Info.Port, 0);
- const bool DeprovisionResult = HubInstance->Deprovision("nojobobj_a", Reason);
- CHECK(DeprovisionResult);
+ const Hub::Response DeprovisionResult = HubInstance->Deprovision("nojobobj_a");
+ CHECK(DeprovisionResult.ResponseCode == Hub::EResponseCode::Completed);
CHECK_EQ(HubInstance->GetInstanceCount(), 0);
}
}
@@ -1366,11 +1926,12 @@ TEST_CASE("hub.hibernate_wake")
HubProvisionedInstanceInfo ProvInfo;
Hub::InstanceInfo Info;
- std::string Reason;
// Provision
- REQUIRE_MESSAGE(HubInstance->Provision("hib_a", ProvInfo, Reason), Reason);
- CHECK(Reason.empty());
+ {
+ const Hub::Response R = HubInstance->Provision("hib_a", ProvInfo);
+ REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message);
+ }
REQUIRE(HubInstance->Find("hib_a", &Info));
CHECK_EQ(Info.State, HubInstanceState::Provisioned);
{
@@ -1379,9 +1940,8 @@ TEST_CASE("hub.hibernate_wake")
}
// Hibernate
- const bool HibernateResult = HubInstance->Hibernate("hib_a", Reason);
- REQUIRE_MESSAGE(HibernateResult, Reason);
- CHECK(Reason.empty());
+ const Hub::Response HibernateResult = HubInstance->Hibernate("hib_a");
+ REQUIRE_MESSAGE(HibernateResult.ResponseCode == Hub::EResponseCode::Completed, HibernateResult.Message);
REQUIRE(HubInstance->Find("hib_a", &Info));
CHECK_EQ(Info.State, HubInstanceState::Hibernated);
{
@@ -1390,9 +1950,8 @@ TEST_CASE("hub.hibernate_wake")
}
// Wake
- const bool WakeResult = HubInstance->Wake("hib_a", Reason);
- REQUIRE_MESSAGE(WakeResult, Reason);
- CHECK(Reason.empty());
+ const Hub::Response WakeResult = HubInstance->Wake("hib_a");
+ REQUIRE_MESSAGE(WakeResult.ResponseCode == Hub::EResponseCode::Completed, WakeResult.Message);
REQUIRE(HubInstance->Find("hib_a", &Info));
CHECK_EQ(Info.State, HubInstanceState::Provisioned);
{
@@ -1401,9 +1960,8 @@ TEST_CASE("hub.hibernate_wake")
}
// Deprovision
- const bool DeprovisionResult = HubInstance->Deprovision("hib_a", Reason);
- CHECK(DeprovisionResult);
- CHECK(Reason.empty());
+ const Hub::Response DeprovisionResult = HubInstance->Deprovision("hib_a");
+ CHECK(DeprovisionResult.ResponseCode == Hub::EResponseCode::Completed);
CHECK_FALSE(HubInstance->Find("hib_a"));
{
HttpClient ModClient(fmt::format("http://localhost:{}", ProvInfo.Port), kFastTimeout);
@@ -1419,53 +1977,167 @@ TEST_CASE("hub.hibernate_wake_errors")
std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config);
HubProvisionedInstanceInfo ProvInfo;
- std::string Reason;
- // Hibernate/wake on a non-existent module - should return false with empty reason (-> 404)
- CHECK_FALSE(HubInstance->Hibernate("never_provisioned", Reason));
- CHECK(Reason.empty());
+ // Hibernate/wake on a non-existent module - returns NotFound (-> 404)
+ CHECK(HubInstance->Hibernate("never_provisioned").ResponseCode == Hub::EResponseCode::NotFound);
+ CHECK(HubInstance->Wake("never_provisioned").ResponseCode == Hub::EResponseCode::NotFound);
- CHECK_FALSE(HubInstance->Wake("never_provisioned", Reason));
- CHECK(Reason.empty());
+ // Double-hibernate: second hibernate on already-hibernated module returns Completed (idempotent)
+ {
+ const Hub::Response R = HubInstance->Provision("err_b", ProvInfo);
+ REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message);
+ }
+ {
+ const Hub::Response R = HubInstance->Hibernate("err_b");
+ REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message);
+ }
+
+ {
+ const Hub::Response HibResp = HubInstance->Hibernate("err_b");
+ CHECK(HibResp.ResponseCode == Hub::EResponseCode::Completed);
+ }
+
+ // Wake on provisioned: succeeds (-> Provisioned), then wake again returns Completed (idempotent)
+ {
+ const Hub::Response R = HubInstance->Wake("err_b");
+ REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message);
+ }
+
+ {
+ const Hub::Response WakeResp = HubInstance->Wake("err_b");
+ CHECK(WakeResp.ResponseCode == Hub::EResponseCode::Completed);
+ }
+
+ // Deprovision not-found - returns NotFound (-> 404)
+ CHECK(HubInstance->Deprovision("never_provisioned").ResponseCode == Hub::EResponseCode::NotFound);
+}
+
+TEST_CASE("hub.async_hibernate_wake")
+{
+ ScopedTemporaryDirectory TempDir;
- // Double-hibernate: first hibernate succeeds, second returns false with non-empty reason (-> 400)
- REQUIRE_MESSAGE(HubInstance->Provision("err_b", ProvInfo, Reason), Reason);
- CHECK(Reason.empty());
- REQUIRE_MESSAGE(HubInstance->Hibernate("err_b", Reason), Reason);
- CHECK(Reason.empty());
+ Hub::Configuration Config;
+ Config.BasePortNumber = 23000;
- Reason.clear();
- CHECK_FALSE(HubInstance->Hibernate("err_b", Reason));
- CHECK_FALSE(Reason.empty());
+ WorkerThreadPool WorkerPool(2, "hub_async_hib_wake");
+ std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config, {}, &WorkerPool);
- // Wake on provisioned: succeeds (-> Provisioned), then wake again returns false (-> 400)
- REQUIRE_MESSAGE(HubInstance->Wake("err_b", Reason), Reason);
- CHECK(Reason.empty());
+ HubProvisionedInstanceInfo ProvInfo;
+ Hub::InstanceInfo Info;
+
+ constexpr auto kPollInterval = std::chrono::milliseconds(50);
+ constexpr auto kTimeout = std::chrono::seconds(30);
+
+ // Provision and wait until Provisioned
+ {
+ const Hub::Response R = HubInstance->Provision("async_hib_a", ProvInfo);
+ REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Accepted, R.Message);
+ }
+ {
+ const auto Deadline = std::chrono::steady_clock::now() + kTimeout;
+ bool Ready = false;
+ while (std::chrono::steady_clock::now() < Deadline)
+ {
+ if (HubInstance->Find("async_hib_a", &Info) && Info.State == HubInstanceState::Provisioned)
+ {
+ Ready = true;
+ break;
+ }
+ std::this_thread::sleep_for(kPollInterval);
+ }
+ REQUIRE_MESSAGE(Ready, "Instance did not reach Provisioned state within timeout");
+ }
+
+ // Hibernate asynchronously and poll until Hibernated
+ {
+ const Hub::Response R = HubInstance->Hibernate("async_hib_a");
+ REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Accepted, R.Message);
+ }
+ {
+ const auto Deadline = std::chrono::steady_clock::now() + kTimeout;
+ bool Hibernated = false;
+ while (std::chrono::steady_clock::now() < Deadline)
+ {
+ if (HubInstance->Find("async_hib_a", &Info) && Info.State == HubInstanceState::Hibernated)
+ {
+ Hibernated = true;
+ break;
+ }
+ std::this_thread::sleep_for(kPollInterval);
+ }
+ REQUIRE_MESSAGE(Hibernated, "Instance did not reach Hibernated state within timeout");
+ }
+ {
+ HttpClient ModClient(fmt::format("http://localhost:{}", ProvInfo.Port), kFastTimeout);
+ CHECK(!ModClient.Get("/health/"));
+ }
- Reason.clear();
- CHECK_FALSE(HubInstance->Wake("err_b", Reason));
- CHECK_FALSE(Reason.empty());
+ // Wake asynchronously and poll until Provisioned
+ {
+ const Hub::Response R = HubInstance->Wake("async_hib_a");
+ REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Accepted, R.Message);
+ }
+ {
+ const auto Deadline = std::chrono::steady_clock::now() + kTimeout;
+ bool Woken = false;
+ while (std::chrono::steady_clock::now() < Deadline)
+ {
+ if (HubInstance->Find("async_hib_a", &Info) && Info.State == HubInstanceState::Provisioned)
+ {
+ Woken = true;
+ break;
+ }
+ std::this_thread::sleep_for(kPollInterval);
+ }
+ REQUIRE_MESSAGE(Woken, "Instance did not reach Provisioned state after wake within timeout");
+ }
+ {
+ HttpClient ModClient(fmt::format("http://localhost:{}", ProvInfo.Port), kFastTimeout);
+ CHECK(ModClient.Get("/health/"));
+ }
- // Deprovision not-found - should return false with empty reason (-> 404)
- CHECK_FALSE(HubInstance->Deprovision("never_provisioned", Reason));
- CHECK(Reason.empty());
+ // Deprovision asynchronously and poll until the instance is gone
+ {
+ const Hub::Response R = HubInstance->Deprovision("async_hib_a");
+ CHECK_MESSAGE(R.ResponseCode == Hub::EResponseCode::Accepted, R.Message);
+ }
+ REQUIRE_MESSAGE(hub_testutils::WaitForInstanceGone(*HubInstance, "async_hib_a"), "Instance did not deprovision within timeout");
}
TEST_CASE("hub.recover_process_crash")
{
ScopedTemporaryDirectory TempDir;
- std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path());
+
+ struct TransitionRecord
+ {
+ HubInstanceState OldState;
+ HubInstanceState NewState;
+ };
+ RwLock CaptureMutex;
+ std::vector<TransitionRecord> Transitions;
+ auto CaptureFunc = [&](std::string_view, const HubProvisionedInstanceInfo&, HubInstanceState OldState, HubInstanceState NewState) {
+ CaptureMutex.WithExclusiveLock([&]() { Transitions.push_back({OldState, NewState}); });
+ };
+
+ // Fast watchdog cycle so crash detection is near-instant instead of waiting up to the 3s default.
+ Hub::Configuration Config;
+ Config.WatchDog.CycleInterval = std::chrono::milliseconds(10);
+ Config.WatchDog.InstanceCheckThrottle = std::chrono::milliseconds(1);
+
+ std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config, std::move(CaptureFunc));
HubProvisionedInstanceInfo Info;
- std::string Reason;
- REQUIRE_MESSAGE(HubInstance->Provision("module_a", Info, Reason), Reason);
+ {
+ const Hub::Response R = HubInstance->Provision("module_a", Info);
+ REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message);
+ }
// Kill the child process to simulate a crash, then poll until the watchdog detects it,
// recovers the instance, and the new process is serving requests.
HubInstance->TerminateModuleForTesting("module_a");
- constexpr auto kPollIntervalMs = std::chrono::milliseconds(200);
- constexpr auto kTimeoutMs = std::chrono::seconds(20);
+ constexpr auto kPollIntervalMs = std::chrono::milliseconds(50);
+ constexpr auto kTimeoutMs = std::chrono::seconds(15);
const auto Deadline = std::chrono::steady_clock::now() + kTimeoutMs;
// A successful HTTP health check on the same port confirms the new process is up.
@@ -1486,22 +2158,50 @@ TEST_CASE("hub.recover_process_crash")
}
}
CHECK_MESSAGE(Recovered, "Instance did not recover within timeout");
+
+ // Verify the full crash/recovery callback sequence
+ {
+ RwLock::SharedLockScope _(CaptureMutex);
+ REQUIRE_GE(Transitions.size(), 3u);
+ // Find the Provisioned->Crashed transition
+ const auto CrashedIt = std::find_if(Transitions.begin(), Transitions.end(), [](const TransitionRecord& R) {
+ return R.OldState == HubInstanceState::Provisioned && R.NewState == HubInstanceState::Crashed;
+ });
+ REQUIRE_NE(CrashedIt, Transitions.end());
+ // Recovery sequence follows: Crashed->Recovering, Recovering->Provisioned
+ const auto RecoveringIt = CrashedIt + 1;
+ REQUIRE_NE(RecoveringIt, Transitions.end());
+ CHECK_EQ(RecoveringIt->OldState, HubInstanceState::Crashed);
+ CHECK_EQ(RecoveringIt->NewState, HubInstanceState::Recovering);
+ const auto RecoveredIt = RecoveringIt + 1;
+ REQUIRE_NE(RecoveredIt, Transitions.end());
+ CHECK_EQ(RecoveredIt->OldState, HubInstanceState::Recovering);
+ CHECK_EQ(RecoveredIt->NewState, HubInstanceState::Provisioned);
+ }
}
TEST_CASE("hub.recover_process_crash_then_deprovision")
{
ScopedTemporaryDirectory TempDir;
- std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path());
+
+ // Fast watchdog cycle so crash detection is near-instant instead of waiting up to the 3s default.
+ Hub::Configuration Config;
+ Config.WatchDog.CycleInterval = std::chrono::milliseconds(10);
+ Config.WatchDog.InstanceCheckThrottle = std::chrono::milliseconds(1);
+
+ std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config);
HubProvisionedInstanceInfo Info;
- std::string Reason;
- REQUIRE_MESSAGE(HubInstance->Provision("module_a", Info, Reason), Reason);
+ {
+ const Hub::Response R = HubInstance->Provision("module_a", Info);
+ REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message);
+ }
// Kill the child process, wait for the watchdog to detect and recover the instance.
HubInstance->TerminateModuleForTesting("module_a");
- constexpr auto kPollIntervalMs = std::chrono::milliseconds(200);
- constexpr auto kTimeoutMs = std::chrono::seconds(20);
+ constexpr auto kPollIntervalMs = std::chrono::milliseconds(50);
+ constexpr auto kTimeoutMs = std::chrono::seconds(15);
const auto Deadline = std::chrono::steady_clock::now() + kTimeoutMs;
bool Recovered = false;
@@ -1518,16 +2218,273 @@ TEST_CASE("hub.recover_process_crash_then_deprovision")
REQUIRE_MESSAGE(Recovered, "Instance did not recover within timeout");
// After recovery, deprovision should succeed and a re-provision should work.
- CHECK_MESSAGE(HubInstance->Deprovision("module_a", Reason), Reason);
+ {
+ const Hub::Response R = HubInstance->Deprovision("module_a");
+ CHECK_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message);
+ }
CHECK_EQ(HubInstance->GetInstanceCount(), 0);
HubProvisionedInstanceInfo NewInfo;
- CHECK_MESSAGE(HubInstance->Provision("module_a", NewInfo, Reason), Reason);
+ {
+ const Hub::Response R = HubInstance->Provision("module_a", NewInfo);
+ CHECK_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message);
+ }
CHECK_NE(NewInfo.Port, 0);
HttpClient NewClient(fmt::format("http://localhost:{}", NewInfo.Port), kFastTimeout);
CHECK_MESSAGE(NewClient.Get("/health/"), "Re-provisioned instance is not serving requests");
}
+TEST_CASE("hub.async_provision_concurrent")
+{
+ ScopedTemporaryDirectory TempDir;
+
+ constexpr int kModuleCount = 8;
+
+ Hub::Configuration Config;
+ Config.BasePortNumber = 22800;
+ Config.InstanceLimit = kModuleCount;
+
+ WorkerThreadPool WorkerPool(4, "hub_async_concurrent");
+ std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config, {}, &WorkerPool);
+
+ std::vector<HubProvisionedInstanceInfo> Infos(kModuleCount);
+ std::vector<std::string> Reasons(kModuleCount);
+ std::vector<int> Results(kModuleCount, 0);
+
+ {
+ WorkerThreadPool Callers(kModuleCount, "hub_async_callers");
+ std::vector<std::future<void>> Futures(kModuleCount);
+
+ for (int I = 0; I < kModuleCount; ++I)
+ {
+ Futures[I] = Callers.EnqueueTask(std::packaged_task<void()>([&, I] {
+ const Hub::Response Resp = HubInstance->Provision(fmt::format("async_c{}", I), Infos[I]);
+ Results[I] = (Resp.ResponseCode == Hub::EResponseCode::Accepted) ? 1 : 0;
+ Reasons[I] = Resp.Message;
+ }),
+ WorkerThreadPool::EMode::EnableBacklog);
+ }
+ for (std::future<void>& F : Futures)
+ {
+ F.get();
+ }
+ }
+
+ for (int I = 0; I < kModuleCount; ++I)
+ {
+ REQUIRE_MESSAGE(Results[I] != 0, Reasons[I]);
+ CHECK_NE(Infos[I].Port, 0);
+ }
+
+ // Poll until all instances reach Provisioned state
+ constexpr auto kPollInterval = std::chrono::milliseconds(50);
+ constexpr auto kTimeout = std::chrono::seconds(30);
+ const auto Deadline = std::chrono::steady_clock::now() + kTimeout;
+
+ bool AllProvisioned = false;
+ while (std::chrono::steady_clock::now() < Deadline)
+ {
+ int ProvisionedCount = 0;
+ for (int I = 0; I < kModuleCount; ++I)
+ {
+ Hub::InstanceInfo InstanceInfo;
+ if (HubInstance->Find(fmt::format("async_c{}", I), &InstanceInfo) && InstanceInfo.State == HubInstanceState::Provisioned)
+ {
+ ++ProvisionedCount;
+ }
+ }
+ if (ProvisionedCount == kModuleCount)
+ {
+ AllProvisioned = true;
+ break;
+ }
+ std::this_thread::sleep_for(kPollInterval);
+ }
+ CHECK_MESSAGE(AllProvisioned, "Not all instances reached Provisioned state within timeout");
+
+ for (int I = 0; I < kModuleCount; ++I)
+ {
+ HttpClient ModClient(fmt::format("http://localhost:{}", Infos[I].Port), kFastTimeout);
+ CHECK_MESSAGE(ModClient.Get("/health/"), fmt::format("async_c{} not serving requests", I));
+ }
+
+ for (int I = 0; I < kModuleCount; ++I)
+ {
+ const Hub::Response DepResp = HubInstance->Deprovision(fmt::format("async_c{}", I));
+ CHECK_MESSAGE(DepResp.ResponseCode == Hub::EResponseCode::Accepted, DepResp.Message);
+ }
+ REQUIRE_MESSAGE(hub_testutils::WaitForInstanceCount(*HubInstance, 0), "Not all instances deprovisioned within timeout");
+}
+
+TEST_CASE("hub.async_provision_shutdown_waits")
+{
+ ScopedTemporaryDirectory TempDir;
+
+ constexpr int kModuleCount = 8;
+
+ Hub::Configuration Config;
+ Config.InstanceLimit = kModuleCount;
+ Config.BasePortNumber = 22900;
+
+ WorkerThreadPool WorkerPool(2, "hub_async_shutdown");
+ std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config, {}, &WorkerPool);
+
+ std::vector<HubProvisionedInstanceInfo> Infos(kModuleCount);
+
+ for (int I = 0; I < kModuleCount; ++I)
+ {
+ const Hub::Response ProvResult = HubInstance->Provision(fmt::format("async_c{}", I), Infos[I]);
+ REQUIRE_MESSAGE(ProvResult.ResponseCode == Hub::EResponseCode::Accepted, ProvResult.Message);
+ REQUIRE_NE(Infos[I].Port, 0);
+ }
+
+ // Shut down without polling for Provisioned; Shutdown() must drain the latch and clean up.
+ HubInstance->Shutdown();
+
+ CHECK_EQ(HubInstance->GetInstanceCount(), 0);
+
+ for (int I = 0; I < kModuleCount; ++I)
+ {
+ HttpClient ModClient(fmt::format("http://localhost:{}", Infos[I].Port), kFastTimeout);
+ CHECK_FALSE(ModClient.Get("/health/"));
+ }
+}
+
+TEST_CASE("hub.async_provision_rejected")
+{
+ // Rejection from CanProvisionInstance fires synchronously even when a WorkerPool is present.
+ ScopedTemporaryDirectory TempDir;
+
+ Hub::Configuration Config;
+ Config.InstanceLimit = 1;
+ Config.BasePortNumber = 23100;
+
+ WorkerThreadPool WorkerPool(2, "hub_async_rejected");
+ std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config, {}, &WorkerPool);
+
+ HubProvisionedInstanceInfo Info;
+
+ // First provision: dispatched to WorkerPool, returns Accepted
+ const Hub::Response FirstResult = HubInstance->Provision("async_r1", Info);
+ REQUIRE_MESSAGE(FirstResult.ResponseCode == Hub::EResponseCode::Accepted, FirstResult.Message);
+ REQUIRE_NE(Info.Port, 0);
+
+ // Second provision: CanProvisionInstance rejects synchronously (limit reached), returns Rejected
+ HubProvisionedInstanceInfo Info2;
+ const Hub::Response SecondResult = HubInstance->Provision("async_r2", Info2);
+ CHECK(SecondResult.ResponseCode == Hub::EResponseCode::Rejected);
+ CHECK_FALSE(SecondResult.Message.empty());
+ CHECK_NE(SecondResult.Message.find("instance limit"), std::string::npos);
+ CHECK_EQ(HubInstance->GetInstanceCount(), 1);
+}
+
+TEST_CASE("hub.instance.inactivity.deprovision")
+{
+ ScopedTemporaryDirectory TempDir;
+
+ // Aggressive watchdog settings to keep test duration short.
+ // Provisioned timeout (2s) > Hibernated timeout (1s) - this is the key invariant under test.
+ // Margin (1s) means the HTTP activity check fires at LastActivityTime+1s for Provisioned instances.
+ // The Hibernated branch ignores the margin and uses a direct time-based check.
+ Hub::Configuration Config;
+ Config.BasePortNumber = 23200;
+ Config.InstanceLimit = 3;
+ Config.WatchDog.CycleInterval = std::chrono::milliseconds(10);
+ Config.WatchDog.InstanceCheckThrottle = std::chrono::milliseconds(1);
+ Config.WatchDog.ProvisionedInactivityTimeout = std::chrono::seconds(2);
+ Config.WatchDog.HibernatedInactivityTimeout = std::chrono::seconds(1);
+ Config.WatchDog.InactivityCheckMargin = std::chrono::seconds(1);
+ Config.WatchDog.ActivityCheckConnectTimeout = std::chrono::milliseconds(200);
+ Config.WatchDog.ActivityCheckRequestTimeout = std::chrono::milliseconds(500);
+
+ std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config);
+
+ // Provision in order: idle first, idle_hib second (then hibernate), persistent last.
+ // idle_hib uses the shorter Hibernated timeout (1s) and expires before idle (2s provisioned).
+ // persistent gets real HTTP PUTs so its activity timer is reset; it must still be alive
+ // after both idle instances are gone.
+
+ HubProvisionedInstanceInfo IdleInfo;
+ {
+ const Hub::Response R = HubInstance->Provision("idle", IdleInfo);
+ REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message);
+ }
+
+ HubProvisionedInstanceInfo IdleHibInfo;
+ {
+ const Hub::Response R = HubInstance->Provision("idle_hib", IdleHibInfo);
+ REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message);
+ const Hub::Response H = HubInstance->Hibernate("idle_hib");
+ REQUIRE_MESSAGE(H.ResponseCode == Hub::EResponseCode::Completed, H.Message);
+ }
+
+ HubProvisionedInstanceInfo PersistentInfo;
+ {
+ const Hub::Response R = HubInstance->Provision("persistent", PersistentInfo);
+ REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message);
+ }
+
+ auto PokeInstance = [&](uint16_t Port) {
+ // Make a real storage request to increment the instance's activity sum.
+ // The watchdog detects the changed sum on the next cycle and resets LastActivityTime.
+ {
+ HttpClient PersistentClient(fmt::format("http://localhost:{}", Port),
+ HttpClientSettings{.ConnectTimeout = std::chrono::milliseconds(200)});
+ uint64_t Tick = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now() -
+ std::chrono::steady_clock::time_point::min())
+ .count();
+ IoHash Key = IoHash::HashBuffer(&Tick, sizeof(Tick));
+ const HttpClient::Response PutResult =
+ PersistentClient.Put(fmt::format("/z$/ns1/b/{}", Key),
+ IoBufferBuilder::MakeFromMemory(MakeMemoryView(std::string_view("keepalive"))));
+ CHECK(PutResult);
+ }
+ };
+
+ PokeInstance(IdleInfo.Port);
+ PokeInstance(PersistentInfo.Port);
+
+ Sleep(100);
+
+ // Phase 1: immediately after setup all three instances must still be alive.
+ // No timeout has elapsed yet (only 100ms have passed).
+ CHECK_MESSAGE(HubInstance->Find("idle"), "idle was deprovisioned within 100ms - its 2s provisioned timeout has not elapsed");
+
+ CHECK_MESSAGE(HubInstance->Find("idle_hib"), "idle_hib was deprovisioned within 100ms - its 1s hibernated timeout has not elapsed");
+
+ CHECK_MESSAGE(HubInstance->Find("persistent"),
+ "persistent was deprovisioned within 100ms - its 2s provisioned timeout has not elapsed");
+
+ // Phase 2: idle_hib must be deprovisioned by the watchdog within its 1s hibernated timeout.
+ // idle must remain alive - its 2s provisioned timeout has not elapsed yet.
+ CHECK_MESSAGE(hub_testutils::WaitForInstanceGone(*HubInstance, "idle_hib", std::chrono::milliseconds(100), std::chrono::seconds(3)),
+ "idle_hib was not deprovisioned within its 1s hibernated timeout");
+
+ CHECK_MESSAGE(!HubInstance->Find("idle_hib"), "idle_hib should be gone after its 1s hibernated timeout elapsed");
+
+ CHECK_MESSAGE(HubInstance->Find("idle"),
+ "idle was deprovisioned before its 2s provisioned timeout - only idle_hib's 1s hibernated timeout has elapsed");
+
+ CHECK_MESSAGE(HubInstance->Find("persistent"),
+ "persistent was incorrectly deprovisioned - its activity timer was reset by PokeInstance");
+
+ PokeInstance(PersistentInfo.Port);
+
+ // Phase 3: idle must be deprovisioned by the watchdog within its 2s provisioned timeout.
+ // persistent must remain alive - its activity timer was reset by PokeInstance.
+ CHECK_MESSAGE(hub_testutils::WaitForInstanceGone(*HubInstance, "idle", std::chrono::milliseconds(100), std::chrono::seconds(4)),
+ "idle was not deprovisioned within its 2s provisioned timeout");
+
+ CHECK_MESSAGE(!HubInstance->Find("idle_hib"), "idle_hib should still be gone - it was deprovisioned in phase 2");
+
+ CHECK_MESSAGE(!HubInstance->Find("idle"), "idle should be gone after its 3s provisioned timeout elapsed");
+
+ CHECK_MESSAGE(HubInstance->Find("persistent"),
+ "persistent was incorrectly deprovisioned - its activity timer was reset by PokeInstance");
+
+ HubInstance->Shutdown();
+}
+
TEST_SUITE_END();
void
@@ -1539,7 +2496,7 @@ Hub::TerminateModuleForTesting(const std::string& ModuleId)
{
return;
}
- StorageServerInstance::SharedLockedPtr Locked = m_ActiveInstances[It->second]->LockShared(/*Wait*/ true);
+ StorageServerInstance::SharedLockedPtr Locked = m_ActiveInstances[It->second].Instance->LockShared(/*Wait*/ true);
if (Locked)
{
Locked.TerminateForTesting();
diff --git a/src/zenserver/hub/hub.h b/src/zenserver/hub/hub.h
index 28e77e729..c343b19e2 100644
--- a/src/zenserver/hub/hub.h
+++ b/src/zenserver/hub/hub.h
@@ -4,21 +4,23 @@
#include "hubinstancestate.h"
#include "resourcemetrics.h"
+#include "storageserverinstance.h"
#include <zencore/system.h>
#include <zenutil/zenserverprocess.h>
+#include <chrono>
#include <deque>
#include <filesystem>
#include <functional>
#include <memory>
#include <thread>
#include <unordered_map>
-#include <unordered_set>
namespace zen {
-class StorageServerInstance;
+class HttpClient;
+class WorkerThreadPool;
/**
* Hub
@@ -35,6 +37,19 @@ struct HubProvisionedInstanceInfo
class Hub
{
public:
+ struct WatchDogConfiguration
+ {
+ std::chrono::milliseconds CycleInterval = std::chrono::seconds(3);
+ std::chrono::milliseconds CycleProcessingBudget = std::chrono::milliseconds(500);
+ std::chrono::milliseconds InstanceCheckThrottle = std::chrono::milliseconds(5);
+ std::chrono::seconds ProvisionedInactivityTimeout = std::chrono::minutes(10);
+ std::chrono::seconds HibernatedInactivityTimeout = std::chrono::minutes(30);
+ std::chrono::seconds InactivityCheckMargin = std::chrono::minutes(1);
+
+ std::chrono::milliseconds ActivityCheckConnectTimeout = std::chrono::milliseconds(100);
+ std::chrono::milliseconds ActivityCheckRequestTimeout = std::chrono::milliseconds(200);
+ };
+
struct Configuration
{
/** Enable or disable the use of a Windows Job Object for child process management.
@@ -51,6 +66,8 @@ public:
int InstanceCoreLimit = 0; // Automatic
std::filesystem::path InstanceConfigPath;
std::string HydrationTargetSpecification;
+
+ WatchDogConfiguration WatchDog;
};
typedef std::function<
@@ -59,6 +76,7 @@ public:
Hub(const Configuration& Config,
ZenServerEnvironment&& RunEnvironment,
+ WorkerThreadPool* OptionalWorkerPool = nullptr,
AsyncModuleStateChangeCallbackFunc&& ModuleStateChangeCallback = {});
~Hub();
@@ -78,42 +96,49 @@ public:
*/
void Shutdown();
+ enum class EResponseCode
+ {
+ NotFound,
+ Rejected,
+ Accepted,
+ Completed
+ };
+
+ struct Response
+ {
+ EResponseCode ResponseCode = EResponseCode::Rejected;
+ std::string Message;
+ };
+
/**
* Provision a storage server instance for the given module ID.
*
* @param ModuleId The ID of the module to provision.
- * @param OutInfo If successful, information about the provisioned instance will be returned here.
- * @param OutReason If unsuccessful, the reason will be returned here.
+ * @param OutInfo On success, information about the provisioned instance is returned here.
*/
- bool Provision(std::string_view ModuleId, HubProvisionedInstanceInfo& OutInfo, std::string& OutReason);
+ Response Provision(std::string_view ModuleId, HubProvisionedInstanceInfo& OutInfo);
/**
* Deprovision a storage server instance for the given module ID.
*
* @param ModuleId The ID of the module to deprovision.
- * @param OutReason If unsuccessful, the reason will be returned here.
- * @return true if the instance was found and deprovisioned, false otherwise.
*/
- bool Deprovision(const std::string& ModuleId, std::string& OutReason);
+ Response Deprovision(const std::string& ModuleId);
/**
* Hibernate a storage server instance for the given module ID.
* The instance is shut down but its data is preserved; it can be woken later.
*
* @param ModuleId The ID of the module to hibernate.
- * @param OutReason If unsuccessful, the reason will be returned here (empty = not found).
- * @return true if the instance was hibernated, false otherwise.
*/
- bool Hibernate(const std::string& ModuleId, std::string& OutReason);
+ Response Hibernate(const std::string& ModuleId);
/**
* Wake a hibernated storage server instance for the given module ID.
*
* @param ModuleId The ID of the module to wake.
- * @param OutReason If unsuccessful, the reason will be returned here (empty = not found).
- * @return true if the instance was woken, false otherwise.
*/
- bool Wake(const std::string& ModuleId, std::string& OutReason);
+ Response Wake(const std::string& ModuleId);
/**
* Find info about storage server instance for the given module ID.
@@ -144,6 +169,9 @@ public:
private:
const Configuration m_Config;
ZenServerEnvironment m_RunEnvironment;
+ WorkerThreadPool* m_WorkerPool = nullptr;
+ Latch m_BackgroundWorkLatch;
+ std::atomic<bool> m_ShutdownFlag = false;
AsyncModuleStateChangeCallbackFunc m_ModuleStateChangeCallback;
@@ -153,64 +181,86 @@ private:
#if ZEN_PLATFORM_WINDOWS
JobObject m_JobObject;
#endif
- RwLock m_Lock;
- std::unordered_map<std::string, size_t> m_InstanceLookup;
- std::unordered_set<std::string> m_DeprovisioningModules;
- std::unordered_set<std::string> m_ProvisioningModules;
- std::unordered_set<std::string> m_HibernatingModules;
- std::unordered_set<std::string> m_WakingModules;
- std::unordered_set<std::string> m_RecoveringModules;
- std::vector<std::unique_ptr<StorageServerInstance>> m_ActiveInstances;
- std::vector<size_t> m_FreeActiveInstanceIndexes;
- ResourceMetrics m_ResourceLimits;
- SystemMetrics m_HostMetrics;
- std::atomic<int> m_MaxInstanceCount = 0;
- std::deque<uint16_t> m_FreePorts;
- std::thread m_WatchDog;
+ RwLock m_Lock;
+ std::unordered_map<std::string, size_t> m_InstanceLookup;
- Event m_WatchDogEvent;
- void WatchDog();
- void AttemptRecoverInstance(std::string_view ModuleId);
+ struct ActiveInstance
+ {
+ // Invariant: Instance == nullptr if and only if State == Unprovisioned.
+ // Both fields are only created/destroyed under the hub's exclusive lock.
+ // State is an atomic because the watchdog reads it under a shared instance lock
+ // without holding the hub lock.
+ std::unique_ptr<StorageServerInstance> Instance;
+ std::atomic<HubInstanceState> State = HubInstanceState::Unprovisioned;
+ // TODO: We should move current metrics here (from StorageServerInstance)
+
+ // Read and updated by WatchDog, updates to State triggers a reset of both
+ std::atomic<uint64_t> LastKnownActivitySum = 0;
+ std::atomic<std::chrono::system_clock::time_point> LastActivityTime = std::chrono::system_clock::time_point::min();
+ };
- void UpdateStats();
- void UpdateCapacityMetrics();
- bool CanProvisionInstance(std::string_view ModuleId, std::string& OutReason);
+ // UpdateInstanceState is overloaded to accept a locked instance pointer (exclusive or shared) or the hub exclusive
+ // lock scope as a proof token that the caller holds an appropriate lock before mutating ActiveInstance::State.
+ // State mutation and notification (NotifyStateUpdate) are intentionally decoupled - see NotifyStateUpdate below.
- class InstanceStateUpdateGuard
+ HubInstanceState UpdateInstanceState(const StorageServerInstance::ExclusiveLockedPtr& Instance,
+ size_t ActiveInstanceIndex,
+ HubInstanceState NewState)
{
- public:
- InstanceStateUpdateGuard(Hub& InHub,
- std::string_view ModuleId,
- HubInstanceState OldState,
- HubInstanceState& NewState,
- uint16_t BasePort,
- const std::string& BaseUri)
- : m_Hub(InHub)
- , m_ModuleId(ModuleId)
- , m_OldState(OldState)
- , m_NewState(NewState)
- , m_BasePort(BasePort)
- , m_BaseUri(BaseUri)
- {
- }
- ~InstanceStateUpdateGuard() { m_Hub.OnStateUpdate(m_ModuleId, m_OldState, m_NewState, m_BasePort, m_BaseUri); }
-
- private:
- Hub& m_Hub;
- const std::string m_ModuleId;
- HubInstanceState m_OldState;
- HubInstanceState& m_NewState;
- uint16_t m_BasePort;
- const std::string m_BaseUri;
- };
+ ZEN_ASSERT(Instance);
+ return UpdateInstanceStateLocked(ActiveInstanceIndex, NewState);
+ }
+ HubInstanceState UpdateInstanceState(const StorageServerInstance::SharedLockedPtr& Instance,
+ size_t ActiveInstanceIndex,
+ HubInstanceState NewState)
+ {
+ ZEN_ASSERT(Instance);
+ return UpdateInstanceStateLocked(ActiveInstanceIndex, NewState);
+ }
+ HubInstanceState UpdateInstanceState(const RwLock::ExclusiveLockScope& HubLock, size_t ActiveInstanceIndex, HubInstanceState NewState)
+ {
+ ZEN_UNUSED(HubLock);
+ return UpdateInstanceStateLocked(ActiveInstanceIndex, NewState);
+ }
+ HubInstanceState UpdateInstanceStateLocked(size_t ActiveInstanceIndex, HubInstanceState NewState);
+
+ std::vector<ActiveInstance> m_ActiveInstances;
+ std::deque<size_t> m_FreeActiveInstanceIndexes;
+ ResourceMetrics m_ResourceLimits;
+ SystemMetrics m_HostMetrics;
+ std::atomic<int> m_MaxInstanceCount = 0;
+ std::thread m_WatchDog;
- void OnStateUpdate(std::string_view ModuleId,
- HubInstanceState OldState,
- HubInstanceState& NewState,
- uint16_t BasePort,
- std::string_view BaseUri);
+ Event m_WatchDogEvent;
+ void WatchDog();
+ bool CheckInstanceStatus(HttpClient& ActivityHttpClient,
+ StorageServerInstance::SharedLockedPtr&& LockedInstance,
+ size_t ActiveInstanceIndex);
+ void AttemptRecoverInstance(std::string_view ModuleId);
- friend class InstanceStateUpdateGuard;
+ void UpdateStats();
+ void UpdateCapacityMetrics();
+ bool CanProvisionInstance(std::string_view ModuleId, std::string& OutReason);
+ uint16_t GetInstanceIndexAssignedPort(size_t ActiveInstanceIndex) const;
+
+ Response InternalDeprovision(const std::string& ModuleId, std::function<bool(ActiveInstance& Instance)>&& DeprovisionGate);
+ void CompleteProvision(StorageServerInstance::ExclusiveLockedPtr& Instance,
+ size_t ActiveInstanceIndex,
+ HubInstanceState OldState,
+ bool IsNewInstance);
+ void CompleteDeprovision(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex);
+ void CompleteHibernate(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex, HubInstanceState OldState);
+ void CompleteWake(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex, HubInstanceState OldState);
+
+ // Notifications may fire slightly out of sync with the Hub's internal State flag.
+ // The guarantee is that notifications are sent in the correct order, but the State
+ // flag may be updated either before or after the notification fires depending on the
+ // code path. Callers must not assume a specific ordering between the two.
+ void NotifyStateUpdate(std::string_view ModuleId,
+ HubInstanceState OldState,
+ HubInstanceState NewState,
+ uint16_t BasePort,
+ std::string_view BaseUri);
};
#if ZEN_WITH_TESTS
diff --git a/src/zenserver/hub/hubinstancestate.h b/src/zenserver/hub/hubinstancestate.h
index 2dee89ff0..c895f75d1 100644
--- a/src/zenserver/hub/hubinstancestate.h
+++ b/src/zenserver/hub/hubinstancestate.h
@@ -9,15 +9,18 @@ namespace zen {
enum class HubInstanceState : uint32_t
{
- Unprovisioned, // Initial state; process not running
- Provisioning, // Hydrating and spawning process
- Provisioned, // Process running and serving requests
- Hibernating, // Shutting down process, preserving data on disk
- Hibernated, // Process stopped, data preserved; can be woken
- Waking, // Starting process from preserved data
- Deprovisioning, // Shutting down process and cleaning up data
- Crashed, // Process died unexpectedly while Provisioned; recovery pending
- Recovering, // Attempting in-place restart after a crash
+ // Stable states - possible to initiate state change to a different stable state via the transitioning states
+ Unprovisioned, // Initial state; process not running
+ Provisioned, // Process running and serving requests
+ Hibernated, // Process stopped, data preserved; can be woken
+ Crashed, // Process died unexpectedly while Provisioned; recovery pending
+
+ // Transitioning states - there is explicit ownership during this state and it may not be stolen
+ Provisioning, // Unprovisioned -> Provisioned (Hydrating and spawning process)
+ Hibernating, // Provisioned -> Hibernated (Shutting down process, preserving data on disk)
+ Waking, // Hibernated -> Provisioned (Starting process from preserved data)
+ Deprovisioning, // Provisioned/Hibernated/Crashed -> Unprovisioned (Shutting down process and cleaning up data)
+ Recovering, // Crashed -> Provisioned/Deprovisioned (Attempting in-place restart after a crash)
};
std::string_view ToString(HubInstanceState State);
diff --git a/src/zenserver/hub/storageserverinstance.cpp b/src/zenserver/hub/storageserverinstance.cpp
index 99f0c29f3..6b139dbf1 100644
--- a/src/zenserver/hub/storageserverinstance.cpp
+++ b/src/zenserver/hub/storageserverinstance.cpp
@@ -69,177 +69,86 @@ StorageServerInstance::GetProcessMetrics(ProcessMetrics& OutMetrics) const
OutMetrics.PeakPagefileUsage = m_PeakPagefileUsage.load();
}
-bool
+void
StorageServerInstance::ProvisionLocked()
{
- if (m_State.load() == HubInstanceState::Provisioned)
+ if (m_ServerInstance.IsRunning())
{
ZEN_WARN("Storage server instance for module '{}' is already provisioned", m_ModuleId);
- return false;
- }
-
- if (m_State.load() == HubInstanceState::Crashed)
- {
- ZEN_WARN("Storage server instance for module '{}' is in crashed state; re-provisioning from scratch", m_ModuleId);
- m_State = HubInstanceState::Unprovisioned;
- }
-
- if (m_State.load() == HubInstanceState::Hibernated)
- {
- return WakeLocked();
- }
-
- if (m_State.load() != HubInstanceState::Unprovisioned)
- {
- ZEN_WARN("Storage server instance for module '{}' is in unexpected state '{}', cannot provision",
- m_ModuleId,
- ToString(m_State.load()));
- return false;
+ return;
}
ZEN_INFO("Provisioning storage server instance for module '{}', at '{}'", m_ModuleId, m_BaseDir);
-
- m_State = HubInstanceState::Provisioning;
try
{
Hydrate();
SpawnServerProcess();
- m_State = HubInstanceState::Provisioned;
- return true;
}
- catch (...)
+ catch (const std::exception& Ex)
{
- m_State = HubInstanceState::Unprovisioned;
+ ZEN_WARN("Failed spawning server instance for module '{}', at '{}' during provisioning. Reason: {}",
+ m_ModuleId,
+ m_BaseDir,
+ Ex.what());
throw;
}
}
-bool
+void
StorageServerInstance::DeprovisionLocked()
{
- const HubInstanceState CurrentState = m_State.load();
- if (CurrentState != HubInstanceState::Provisioned && CurrentState != HubInstanceState::Crashed &&
- CurrentState != HubInstanceState::Hibernated)
+ if (m_ServerInstance.IsRunning())
{
- ZEN_WARN("Attempted to deprovision storage server instance for module '{}' which is not provisioned (state: '{}')",
- m_ModuleId,
- ToString(CurrentState));
- return false;
- }
-
- ZEN_INFO("Deprovisioning storage server instance for module '{}'", m_ModuleId);
-
- m_State = HubInstanceState::Deprovisioning;
- if (CurrentState == HubInstanceState::Provisioned)
- {
- try
- {
- m_ServerInstance.Shutdown();
- }
- catch (...)
- {
- m_State = HubInstanceState::Provisioned; // Shutdown failed; process may still be running
- throw;
- }
+ // m_ServerInstance.Shutdown() never throws.
+ m_ServerInstance.Shutdown();
}
- // Crashed or Hibernated: process already dead; skip Shutdown
+ // Crashed or Hibernated: process already dead; skip Shutdown.
+ // Dehydrate preserves instance state for future re-provisioning. Failure means saved state
+ // may be stale or absent, but the process is already dead so the slot can still be released.
+ // Swallow the exception and proceed with cleanup rather than leaving the module stuck.
try
{
Dehydrate();
}
- catch (...)
+ catch (const std::exception& Ex)
{
- m_State = HubInstanceState::Crashed; // Dehydrate failed; process is already dead
- throw;
+ ZEN_WARN("Dehydration of module {} failed during deprovisioning, current state not saved. Reason: {}", m_ModuleId, Ex.what());
}
-
- m_State = HubInstanceState::Unprovisioned;
- return true;
}
-bool
+void
StorageServerInstance::HibernateLocked()
{
// Signal server to shut down, but keep data around for later wake
- if (m_State.load() != HubInstanceState::Provisioned)
- {
- ZEN_WARN("Attempted to hibernate storage server instance for module '{}' which is not provisioned (state: '{}')",
- m_ModuleId,
- ToString(m_State.load()));
- return false;
- }
-
if (!m_ServerInstance.IsRunning())
{
- ZEN_WARN("Attempted to hibernate storage server instance for module '{}' which is not running", m_ModuleId);
- return false;
+ return;
}
- m_State = HubInstanceState::Hibernating;
- try
- {
- m_ServerInstance.Shutdown();
- m_State = HubInstanceState::Hibernated;
- return true;
- }
- catch (...)
- {
- m_State = HubInstanceState::Provisioned; // Shutdown failed; instance is still running
- throw;
- }
+ // m_ServerInstance.Shutdown() never throws.
+ m_ServerInstance.Shutdown();
}
-bool
+void
StorageServerInstance::WakeLocked()
{
// Start server in-place using existing data
- if (m_State.load() != HubInstanceState::Hibernated)
+ if (m_ServerInstance.IsRunning())
{
- ZEN_WARN("Attempted to wake storage server instance for module '{}' which is not hibernated (state: '{}')",
- m_ModuleId,
- ToString(m_State.load()));
- return false;
+ return;
}
- ZEN_ASSERT_FORMAT(!m_ServerInstance.IsRunning(), "Storage server instance for module '{}' is already running", m_ModuleId);
-
- m_State = HubInstanceState::Waking;
try
{
SpawnServerProcess();
- m_State = HubInstanceState::Provisioned;
- return true;
- }
- catch (...)
- {
- m_State = HubInstanceState::Hibernated;
- throw;
- }
-}
-
-bool
-StorageServerInstance::RecoverCrashedLocked()
-{
- ZEN_ASSERT(m_State.load() == HubInstanceState::Provisioned);
- ZEN_ASSERT(!m_ServerInstance.IsRunning());
-
- ZEN_WARN("Storage server instance for module '{}' has crashed; attempting in-place recovery", m_ModuleId);
- m_State = HubInstanceState::Recovering;
- try
- {
- SpawnServerProcess();
- m_State = HubInstanceState::Provisioned;
- ZEN_INFO("Storage server instance for module '{}' recovered successfully", m_ModuleId);
- return true;
}
catch (const std::exception& Ex)
{
- ZEN_ERROR("Failed to restart module '{}': {}", m_ModuleId, Ex.what());
- m_State = HubInstanceState::Crashed;
- return false;
+ ZEN_WARN("Failed spawning server instance for module '{}', at '{}' during waking. Reason: {}", m_ModuleId, m_BaseDir, Ex.what());
+ throw;
}
}
@@ -337,13 +246,13 @@ bool
StorageServerInstance::SharedLockedPtr::IsRunning() const
{
ZEN_ASSERT(m_Instance != nullptr);
- return m_Instance->m_State.load() == HubInstanceState::Provisioned && m_Instance->m_ServerInstance.IsRunning();
+ return m_Instance->m_ServerInstance.IsRunning();
}
void
StorageServerInstance::UpdateMetricsLocked()
{
- if (m_State.load() == HubInstanceState::Provisioned)
+ if (m_ServerInstance.IsRunning())
{
ProcessMetrics Metrics;
zen::GetProcessMetrics(m_ServerInstance.GetProcessHandle(), Metrics);
@@ -436,42 +345,35 @@ bool
StorageServerInstance::ExclusiveLockedPtr::IsRunning() const
{
ZEN_ASSERT(m_Instance != nullptr);
- return m_Instance->m_State.load() == HubInstanceState::Provisioned && m_Instance->m_ServerInstance.IsRunning();
+ return m_Instance->m_ServerInstance.IsRunning();
}
-bool
+void
StorageServerInstance::ExclusiveLockedPtr::Provision()
{
ZEN_ASSERT(m_Instance != nullptr);
- return m_Instance->ProvisionLocked();
+ m_Instance->ProvisionLocked();
}
-bool
+void
StorageServerInstance::ExclusiveLockedPtr::Deprovision()
{
ZEN_ASSERT(m_Instance != nullptr);
- return m_Instance->DeprovisionLocked();
+ m_Instance->DeprovisionLocked();
}
-bool
+void
StorageServerInstance::ExclusiveLockedPtr::Hibernate()
{
ZEN_ASSERT(m_Instance != nullptr);
- return m_Instance->HibernateLocked();
+ m_Instance->HibernateLocked();
}
-bool
+void
StorageServerInstance::ExclusiveLockedPtr::Wake()
{
ZEN_ASSERT(m_Instance != nullptr);
- return m_Instance->WakeLocked();
-}
-
-bool
-StorageServerInstance::ExclusiveLockedPtr::RecoverFromCrash()
-{
- ZEN_ASSERT(m_Instance != nullptr);
- return m_Instance->RecoverCrashedLocked();
+ m_Instance->WakeLocked();
}
} // namespace zen
diff --git a/src/zenserver/hub/storageserverinstance.h b/src/zenserver/hub/storageserverinstance.h
index a0ca496dc..94c47630c 100644
--- a/src/zenserver/hub/storageserverinstance.h
+++ b/src/zenserver/hub/storageserverinstance.h
@@ -2,7 +2,6 @@
#pragma once
-#include "hubinstancestate.h"
#include "resourcemetrics.h"
#include <zenutil/zenserverprocess.h>
@@ -38,8 +37,7 @@ public:
const ResourceMetrics& GetResourceMetrics() const { return m_ResourceMetrics; }
inline std::string_view GetModuleId() const { return m_ModuleId; }
- inline HubInstanceState GetState() const { return m_State.load(); }
- inline uint16_t GetBasePort() const { return m_Config.BasePort; };
+ inline uint16_t GetBasePort() const { return m_Config.BasePort; }
void GetProcessMetrics(ProcessMetrics& OutMetrics) const;
#if ZEN_PLATFORM_WINDOWS
@@ -63,12 +61,7 @@ public:
operator bool() const { return m_Instance != nullptr; }
std::string_view GetModuleId() const;
- HubInstanceState GetState() const
- {
- ZEN_ASSERT(m_Instance);
- return m_Instance->m_State.load();
- }
- uint16_t GetBasePort() const
+ uint16_t GetBasePort() const
{
ZEN_ASSERT(m_Instance);
return m_Instance->GetBasePort();
@@ -114,12 +107,7 @@ public:
operator bool() const { return m_Instance != nullptr; }
std::string_view GetModuleId() const;
- HubInstanceState GetState() const
- {
- ZEN_ASSERT(m_Instance);
- return m_Instance->m_State.load();
- }
- uint16_t GetBasePort() const
+ uint16_t GetBasePort() const
{
ZEN_ASSERT(m_Instance);
return m_Instance->GetBasePort();
@@ -132,15 +120,10 @@ public:
return m_Instance->m_ResourceMetrics;
}
- // For Provision, Deprovision, Hibernate, Wake:
- // true = operation performed (state changed)
- // false = precondition not met (wrong state), nothing attempted
- // throws = operation attempted but failed; m_State corrected before throw
- [[nodiscard]] bool Provision();
- [[nodiscard]] bool Deprovision();
- [[nodiscard]] bool Hibernate();
- [[nodiscard]] bool Wake();
- [[nodiscard]] bool RecoverFromCrash(); // true = recovered; false = spawn failed (Crashed), caller must Deprovision() + cleanup
+ void Provision();
+ void Deprovision();
+ void Hibernate();
+ void Wake();
private:
RwLock* m_Lock = nullptr;
@@ -150,12 +133,11 @@ public:
[[nodiscard]] ExclusiveLockedPtr LockExclusive(bool Wait) { return ExclusiveLockedPtr(m_Lock, this, Wait); }
private:
- [[nodiscard]] bool ProvisionLocked();
- [[nodiscard]] bool DeprovisionLocked();
+ void ProvisionLocked();
+ void DeprovisionLocked();
- [[nodiscard]] bool HibernateLocked();
- [[nodiscard]] bool WakeLocked();
- [[nodiscard]] bool RecoverCrashedLocked(); // true = recovered (Provisioned); false = spawn failed (Crashed)
+ void HibernateLocked();
+ void WakeLocked();
void UpdateMetricsLocked();
@@ -164,8 +146,7 @@ private:
std::string m_ModuleId;
ZenServerInstance m_ServerInstance;
- std::atomic<HubInstanceState> m_State{HubInstanceState::Unprovisioned};
- std::filesystem::path m_BaseDir;
+ std::filesystem::path m_BaseDir;
std::filesystem::path m_TempDir;
ResourceMetrics m_ResourceMetrics;
diff --git a/src/zenserver/hub/zenhubserver.cpp b/src/zenserver/hub/zenhubserver.cpp
index f9ff655ec..314031246 100644
--- a/src/zenserver/hub/zenhubserver.cpp
+++ b/src/zenserver/hub/zenhubserver.cpp
@@ -16,6 +16,7 @@
#include <zencore/windows.h>
#include <zenhttp/httpapiservice.h>
#include <zenutil/service.h>
+#include <zenutil/workerpools.h>
ZEN_THIRD_PARTY_INCLUDES_START
#include <cxxopts.hpp>
@@ -73,6 +74,20 @@ ZenHubServerConfigurator::AddCliOptions(cxxopts::Options& Options)
Options.add_option("hub",
"",
+ "consul-health-interval-seconds",
+ "Interval in seconds between Consul health checks",
+ cxxopts::value<uint32_t>(m_ServerOptions.ConsulHealthIntervalSeconds)->default_value("10"),
+ "<seconds>");
+
+ Options.add_option("hub",
+ "",
+ "consul-deregister-after-seconds",
+ "Seconds after which Consul deregisters an unhealthy service",
+ cxxopts::value<uint32_t>(m_ServerOptions.ConsulDeregisterAfterSeconds)->default_value("30"),
+ "<seconds>");
+
+ Options.add_option("hub",
+ "",
"hub-base-port-number",
"Base port number for provisioned instances",
cxxopts::value<uint16_t>(m_ServerOptions.HubBasePortNumber)->default_value("21000"),
@@ -132,6 +147,62 @@ ZenHubServerConfigurator::AddCliOptions(cxxopts::Options& Options)
cxxopts::value<bool>(m_ServerOptions.HubUseJobObject)->default_value("true"),
"");
#endif // ZEN_PLATFORM_WINDOWS
+
+ Options.add_option("hub",
+ "",
+ "hub-watchdog-cycle-interval-ms",
+ "Interval between watchdog cycles in milliseconds",
+ cxxopts::value<uint32_t>(m_ServerOptions.WatchdogConfig.CycleIntervalMs)->default_value("3000"),
+ "<ms>");
+
+ Options.add_option("hub",
+ "",
+ "hub-watchdog-cycle-processing-budget-ms",
+ "Maximum processing time budget per watchdog cycle in milliseconds",
+ cxxopts::value<uint32_t>(m_ServerOptions.WatchdogConfig.CycleProcessingBudgetMs)->default_value("500"),
+ "<ms>");
+
+ Options.add_option("hub",
+ "",
+ "hub-watchdog-instance-check-throttle-ms",
+ "Delay between checking successive instances per watchdog cycle in milliseconds",
+ cxxopts::value<uint32_t>(m_ServerOptions.WatchdogConfig.InstanceCheckThrottleMs)->default_value("5"),
+ "<ms>");
+
+ Options.add_option("hub",
+ "",
+ "hub-watchdog-provisioned-inactivity-timeout-seconds",
+ "Seconds of inactivity after which a provisioned instance is deprovisioned",
+ cxxopts::value<uint32_t>(m_ServerOptions.WatchdogConfig.ProvisionedInactivityTimeoutSeconds)->default_value("600"),
+ "<seconds>");
+
+ Options.add_option("hub",
+ "",
+ "hub-watchdog-hibernated-inactivity-timeout-seconds",
+ "Seconds of inactivity after which a hibernated instance is deprovisioned",
+ cxxopts::value<uint32_t>(m_ServerOptions.WatchdogConfig.HibernatedInactivityTimeoutSeconds)->default_value("1800"),
+ "<seconds>");
+
+ Options.add_option("hub",
+ "",
+ "hub-watchdog-inactivity-check-margin-seconds",
+ "Margin in seconds subtracted from inactivity timeout before triggering an activity check",
+ cxxopts::value<uint32_t>(m_ServerOptions.WatchdogConfig.InactivityCheckMarginSeconds)->default_value("60"),
+ "<seconds>");
+
+ Options.add_option("hub",
+ "",
+ "hub-watchdog-activity-check-connect-timeout-ms",
+ "Connect timeout in milliseconds for instance activity check requests",
+ cxxopts::value<uint32_t>(m_ServerOptions.WatchdogConfig.ActivityCheckConnectTimeoutMs)->default_value("100"),
+ "<ms>");
+
+ Options.add_option("hub",
+ "",
+ "hub-watchdog-activity-check-request-timeout-ms",
+ "Request timeout in milliseconds for instance activity check requests",
+ cxxopts::value<uint32_t>(m_ServerOptions.WatchdogConfig.ActivityCheckRequestTimeoutMs)->default_value("200"),
+ "<ms>");
}
void
@@ -180,7 +251,8 @@ ZenHubServer::OnModuleStateChanged(std::string_view HubInstanceId,
{
return;
}
- if (NewState == HubInstanceState::Provisioned)
+
+ if (NewState == HubInstanceState::Provisioning || NewState == HubInstanceState::Provisioned)
{
consul::ServiceRegistrationInfo ServiceInfo{
.ServiceId = std::string(ModuleId),
@@ -190,8 +262,12 @@ ZenHubServer::OnModuleStateChanged(std::string_view HubInstanceId,
.Tags = std::vector<std::pair<std::string, std::string>>{std::make_pair("module", std::string(ModuleId)),
std::make_pair("zen-hub", std::string(HubInstanceId)),
std::make_pair("version", std::string(ZEN_CFG_VERSION))},
- .HealthIntervalSeconds = 10,
- .DeregisterAfterSeconds = 30};
+ .HealthIntervalSeconds = NewState == HubInstanceState::Provisioning
+ ? 0u
+ : m_ConsulHealthIntervalSeconds, // Disable health checks while not finished provisioning
+ .DeregisterAfterSeconds = NewState == HubInstanceState::Provisioning
+ ? 0u
+ : m_ConsulDeregisterAfterSeconds}; // Disable health checks while not finished provisioning
if (!m_ConsulClient->RegisterService(ServiceInfo))
{
@@ -218,7 +294,7 @@ ZenHubServer::OnModuleStateChanged(std::string_view HubInstanceId,
ZEN_INFO("Deregistered storage server instance for module '{}' at port {} from Consul", ModuleId, Info.Port);
}
}
- // Transitional states (Provisioning, Deprovisioning, Hibernating, Waking, Recovering, Crashed)
+ // Transitional states (Deprovisioning, Hibernating, Waking, Recovering, Crashed)
// and Hibernated are intentionally ignored.
}
@@ -300,21 +376,32 @@ ZenHubServer::InitializeState(const ZenHubServerConfig& ServerConfig)
void
ZenHubServer::InitializeServices(const ZenHubServerConfig& ServerConfig)
{
- ZEN_UNUSED(ServerConfig);
-
ZEN_INFO("instantiating Hub");
m_Hub = std::make_unique<Hub>(
- Hub::Configuration{.UseJobObject = ServerConfig.HubUseJobObject,
- .BasePortNumber = ServerConfig.HubBasePortNumber,
- .InstanceLimit = ServerConfig.HubInstanceLimit,
- .InstanceHttpThreadCount = ServerConfig.HubInstanceHttpThreadCount,
- .InstanceCoreLimit = ServerConfig.HubInstanceCoreLimit,
- .InstanceConfigPath = ServerConfig.HubInstanceConfigPath,
- .HydrationTargetSpecification = ServerConfig.HydrationTargetSpecification},
+ Hub::Configuration{
+ .UseJobObject = ServerConfig.HubUseJobObject,
+ .BasePortNumber = ServerConfig.HubBasePortNumber,
+ .InstanceLimit = ServerConfig.HubInstanceLimit,
+ .InstanceHttpThreadCount = ServerConfig.HubInstanceHttpThreadCount,
+ .InstanceCoreLimit = ServerConfig.HubInstanceCoreLimit,
+ .InstanceConfigPath = ServerConfig.HubInstanceConfigPath,
+ .HydrationTargetSpecification = ServerConfig.HydrationTargetSpecification,
+ .WatchDog =
+ {
+ .CycleInterval = std::chrono::milliseconds(ServerConfig.WatchdogConfig.CycleIntervalMs),
+ .CycleProcessingBudget = std::chrono::milliseconds(ServerConfig.WatchdogConfig.CycleProcessingBudgetMs),
+ .InstanceCheckThrottle = std::chrono::milliseconds(ServerConfig.WatchdogConfig.InstanceCheckThrottleMs),
+ .ProvisionedInactivityTimeout = std::chrono::seconds(ServerConfig.WatchdogConfig.ProvisionedInactivityTimeoutSeconds),
+ .HibernatedInactivityTimeout = std::chrono::seconds(ServerConfig.WatchdogConfig.HibernatedInactivityTimeoutSeconds),
+ .InactivityCheckMargin = std::chrono::seconds(ServerConfig.WatchdogConfig.InactivityCheckMarginSeconds),
+ .ActivityCheckConnectTimeout = std::chrono::milliseconds(ServerConfig.WatchdogConfig.ActivityCheckConnectTimeoutMs),
+ .ActivityCheckRequestTimeout = std::chrono::milliseconds(ServerConfig.WatchdogConfig.ActivityCheckRequestTimeoutMs),
+ }},
ZenServerEnvironment(ZenServerEnvironment::Hub,
ServerConfig.DataDir / "hub",
ServerConfig.DataDir / "servers",
ServerConfig.HubInstanceHttpClass),
+ &GetMediumWorkerPool(EWorkloadType::Background),
m_ConsulClient ? Hub::AsyncModuleStateChangeCallbackFunc{[this, HubInstanceId = fmt::format("zen-hub-{}", ServerConfig.InstanceId)](
std::string_view ModuleId,
const HubProvisionedInstanceInfo& Info,
@@ -328,10 +415,10 @@ ZenHubServer::InitializeServices(const ZenHubServerConfig& ServerConfig)
m_ApiService = std::make_unique<zen::HttpApiService>(*m_Http);
ZEN_INFO("instantiating hub service");
- m_HubService = std::make_unique<HttpHubService>(*m_Hub);
+ m_HubService = std::make_unique<HttpHubService>(*m_Hub, m_StatsService, m_StatusService);
m_HubService->SetNotificationEndpoint(ServerConfig.UpstreamNotificationEndpoint, ServerConfig.InstanceId);
- m_FrontendService = std::make_unique<HttpFrontendService>(m_ContentRoot, m_StatusService);
+ m_FrontendService = std::make_unique<HttpFrontendService>(m_ContentRoot, m_StatsService, m_StatusService);
}
void
@@ -383,7 +470,9 @@ ZenHubServer::InitializeConsulRegistration(const ZenHubServerConfig& ServerConfi
try
{
- m_ConsulClient = std::make_unique<consul::ConsulClient>(ServerConfig.ConsulEndpoint, ConsulAccessToken);
+ m_ConsulClient = std::make_unique<consul::ConsulClient>(ServerConfig.ConsulEndpoint, ConsulAccessToken);
+ m_ConsulHealthIntervalSeconds = ServerConfig.ConsulHealthIntervalSeconds;
+ m_ConsulDeregisterAfterSeconds = ServerConfig.ConsulDeregisterAfterSeconds;
consul::ServiceRegistrationInfo Info;
Info.ServiceId = fmt::format("zen-hub-{}", ServerConfig.InstanceId);
@@ -397,6 +486,8 @@ ZenHubServer::InitializeConsulRegistration(const ZenHubServerConfig& ServerConfi
std::make_pair("base-port-number", fmt::format("{}", ServerConfig.HubBasePortNumber)),
std::make_pair("instance-limit", fmt::format("{}", ServerConfig.HubInstanceLimit)),
std::make_pair("use-job-object", fmt::format("{}", ServerConfig.HubUseJobObject))};
+ Info.HealthIntervalSeconds = ServerConfig.ConsulHealthIntervalSeconds;
+ Info.DeregisterAfterSeconds = ServerConfig.ConsulDeregisterAfterSeconds;
m_ConsulRegistration = std::make_unique<consul::ServiceRegistration>(m_ConsulClient.get(), Info);
diff --git a/src/zenserver/hub/zenhubserver.h b/src/zenserver/hub/zenhubserver.h
index 0fb192b9f..77df3eaa3 100644
--- a/src/zenserver/hub/zenhubserver.h
+++ b/src/zenserver/hub/zenhubserver.h
@@ -20,20 +20,35 @@ class HttpApiService;
class HttpFrontendService;
class HttpHubService;
+struct ZenHubWatchdogConfig
+{
+ uint32_t CycleIntervalMs = 3000;
+ uint32_t CycleProcessingBudgetMs = 500;
+ uint32_t InstanceCheckThrottleMs = 5;
+ uint32_t ProvisionedInactivityTimeoutSeconds = 600;
+ uint32_t HibernatedInactivityTimeoutSeconds = 1800;
+ uint32_t InactivityCheckMarginSeconds = 60; // Activity check is triggered this far before the inactivity timeout
+ uint32_t ActivityCheckConnectTimeoutMs = 100;
+ uint32_t ActivityCheckRequestTimeoutMs = 200;
+};
+
struct ZenHubServerConfig : public ZenServerConfig
{
std::string UpstreamNotificationEndpoint;
std::string InstanceId; // For use in notifications
std::string ConsulEndpoint; // If set, enables Consul service registration
std::string ConsulTokenEnv; // Environment variable name to read a Consul token from; defaults to CONSUL_HTTP_TOKEN if empty
- uint16_t HubBasePortNumber = 21000;
- int HubInstanceLimit = 1000;
- bool HubUseJobObject = true;
- std::string HubInstanceHttpClass = "asio";
- uint32_t HubInstanceHttpThreadCount = 0; // Automatic
- int HubInstanceCoreLimit = 0; // Automatic
- std::filesystem::path HubInstanceConfigPath; // Path to Lua config file
- std::string HydrationTargetSpecification; // hydration/dehydration target specification
+ uint32_t ConsulHealthIntervalSeconds = 10; // Interval in seconds between Consul health checks
+ uint32_t ConsulDeregisterAfterSeconds = 30; // Seconds before Consul deregisters an unhealthy service
+ uint16_t HubBasePortNumber = 21000;
+ int HubInstanceLimit = 1000;
+ bool HubUseJobObject = true;
+ std::string HubInstanceHttpClass = "asio";
+ uint32_t HubInstanceHttpThreadCount = 0; // Automatic
+ int HubInstanceCoreLimit = 0; // Automatic
+ std::filesystem::path HubInstanceConfigPath; // Path to Lua config file
+ std::string HydrationTargetSpecification; // hydration/dehydration target specification
+ ZenHubWatchdogConfig WatchdogConfig;
};
class Hub;
@@ -108,6 +123,8 @@ private:
std::unique_ptr<consul::ConsulClient> m_ConsulClient;
std::unique_ptr<consul::ServiceRegistration> m_ConsulRegistration;
+ uint32_t m_ConsulHealthIntervalSeconds = 10;
+ uint32_t m_ConsulDeregisterAfterSeconds = 30;
void InitializeState(const ZenHubServerConfig& ServerConfig);
void InitializeServices(const ZenHubServerConfig& ServerConfig);
diff --git a/src/zenserver/proxy/httpproxystats.cpp b/src/zenserver/proxy/httpproxystats.cpp
index 6aa3e5c9b..337be2417 100644
--- a/src/zenserver/proxy/httpproxystats.cpp
+++ b/src/zenserver/proxy/httpproxystats.cpp
@@ -140,6 +140,12 @@ HttpProxyStatsService::HandleRecordStatus(HttpServerRequest& Request)
Request.WriteResponse(HttpResponseCode::OK, Cbo.Save());
}
+void
+HttpProxyStatsService::HandleStatsRequest(HttpServerRequest& Request)
+{
+ Request.WriteResponse(HttpResponseCode::OK, CollectStats());
+}
+
CbObject
HttpProxyStatsService::CollectStats()
{
@@ -225,10 +231,4 @@ HttpProxyStatsService::CollectStats()
return Cbo.Save();
}
-void
-HttpProxyStatsService::HandleStatsRequest(HttpServerRequest& Request)
-{
- Request.WriteResponse(HttpResponseCode::OK, CollectStats());
-}
-
} // namespace zen
diff --git a/src/zenserver/proxy/zenproxyserver.cpp b/src/zenserver/proxy/zenproxyserver.cpp
index cf84c159a..7e59a7b7e 100644
--- a/src/zenserver/proxy/zenproxyserver.cpp
+++ b/src/zenserver/proxy/zenproxyserver.cpp
@@ -324,7 +324,7 @@ ZenProxyServer::Initialize(const ZenProxyServerConfig& ServerConfig, ZenServerSt
m_ApiService = std::make_unique<HttpApiService>(*m_Http);
m_Http->RegisterService(*m_ApiService);
- m_FrontendService = std::make_unique<HttpFrontendService>(m_ContentRoot, m_StatusService);
+ m_FrontendService = std::make_unique<HttpFrontendService>(m_ContentRoot, m_StatsService, m_StatusService);
m_Http->RegisterService(*m_FrontendService);
std::string DefaultRecordDir = (m_DataRoot / "recordings").string();
diff --git a/src/zenserver/sessions/httpsessions.cpp b/src/zenserver/sessions/httpsessions.cpp
index 429ba98cf..fdf2e1f21 100644
--- a/src/zenserver/sessions/httpsessions.cpp
+++ b/src/zenserver/sessions/httpsessions.cpp
@@ -49,6 +49,21 @@ HttpSessionsService::HandleRequest(HttpServerRequest& Request)
}
}
+void
+HttpSessionsService::HandleStatusRequest(HttpServerRequest& Request)
+{
+ ZEN_TRACE_CPU("HttpSessionsService::Status");
+ CbObjectWriter Cbo;
+ Cbo << "ok" << true;
+ Request.WriteResponse(HttpResponseCode::OK, Cbo.Save());
+}
+
+void
+HttpSessionsService::HandleStatsRequest(HttpServerRequest& HttpReq)
+{
+ HttpReq.WriteResponse(HttpResponseCode::OK, CollectStats());
+}
+
CbObject
HttpSessionsService::CollectStats()
{
@@ -72,19 +87,10 @@ HttpSessionsService::CollectStats()
return Cbo.Save();
}
-void
-HttpSessionsService::HandleStatsRequest(HttpServerRequest& HttpReq)
+uint64_t
+HttpSessionsService::GetActivityCounter()
{
- HttpReq.WriteResponse(HttpResponseCode::OK, CollectStats());
-}
-
-void
-HttpSessionsService::HandleStatusRequest(HttpServerRequest& Request)
-{
- ZEN_TRACE_CPU("HttpSessionsService::Status");
- CbObjectWriter Cbo;
- Cbo << "ok" << true;
- Request.WriteResponse(HttpResponseCode::OK, Cbo.Save());
+ return m_HttpRequests.Count();
}
void
diff --git a/src/zenserver/sessions/httpsessions.h b/src/zenserver/sessions/httpsessions.h
index a5783a46b..86a23f835 100644
--- a/src/zenserver/sessions/httpsessions.h
+++ b/src/zenserver/sessions/httpsessions.h
@@ -29,9 +29,10 @@ public:
virtual const char* BaseUri() const override;
virtual void HandleRequest(HttpServerRequest& Request) override;
- virtual CbObject CollectStats() override;
- virtual void HandleStatsRequest(HttpServerRequest& Request) override;
virtual void HandleStatusRequest(HttpServerRequest& Request) override;
+ virtual void HandleStatsRequest(HttpServerRequest& Request) override;
+ virtual CbObject CollectStats() override;
+ virtual uint64_t GetActivityCounter() override;
void SetSelfSessionId(const Oid& Id) { m_SelfSessionId = Id; }
diff --git a/src/zenserver/storage/admin/admin.h b/src/zenserver/storage/admin/admin.h
index ee3da4579..361153e42 100644
--- a/src/zenserver/storage/admin/admin.h
+++ b/src/zenserver/storage/admin/admin.h
@@ -13,7 +13,7 @@ class JobQueue;
class ZenCacheStore;
struct ZenServerConfig;
-class HttpAdminService : public zen::HttpService
+class HttpAdminService : public HttpService
{
public:
struct LogPaths
@@ -31,7 +31,7 @@ public:
~HttpAdminService();
virtual const char* BaseUri() const override;
- virtual void HandleRequest(zen::HttpServerRequest& Request) override;
+ virtual void HandleRequest(HttpServerRequest& Request) override;
private:
HttpRequestRouter m_Router;
diff --git a/src/zenserver/storage/buildstore/httpbuildstore.cpp b/src/zenserver/storage/buildstore/httpbuildstore.cpp
index de9589078..bbbb0c37b 100644
--- a/src/zenserver/storage/buildstore/httpbuildstore.cpp
+++ b/src/zenserver/storage/buildstore/httpbuildstore.cpp
@@ -605,6 +605,26 @@ HttpBuildStoreService::BlobsExistsRequest(HttpRouterRequest& Req)
return ServerRequest.WriteResponse(HttpResponseCode::OK, ResponseObject);
}
+void
+HttpBuildStoreService::HandleStatusRequest(HttpServerRequest& Request)
+{
+ ZEN_TRACE_CPU("HttpBuildStoreService::Status");
+ CbObjectWriter Cbo;
+ Cbo << "ok" << true;
+ Cbo.BeginObject("capabilities");
+ {
+ Cbo << "maxrangecountperrequest" << MaxRangeCountPerRequestSupported;
+ }
+ Cbo.EndObject(); // capabilities
+ Request.WriteResponse(HttpResponseCode::OK, Cbo.Save());
+}
+
+void
+HttpBuildStoreService::HandleStatsRequest(HttpServerRequest& Request)
+{
+ Request.WriteResponse(HttpResponseCode::OK, CollectStats());
+}
+
CbObject
HttpBuildStoreService::CollectStats()
{
@@ -663,24 +683,10 @@ HttpBuildStoreService::CollectStats()
return Cbo.Save();
}
-void
-HttpBuildStoreService::HandleStatsRequest(HttpServerRequest& Request)
-{
- Request.WriteResponse(HttpResponseCode::OK, CollectStats());
-}
-
-void
-HttpBuildStoreService::HandleStatusRequest(HttpServerRequest& Request)
+uint64_t
+HttpBuildStoreService::GetActivityCounter()
{
- ZEN_TRACE_CPU("HttpBuildStoreService::Status");
- CbObjectWriter Cbo;
- Cbo << "ok" << true;
- Cbo.BeginObject("capabilities");
- {
- Cbo << "maxrangecountperrequest" << MaxRangeCountPerRequestSupported;
- }
- Cbo.EndObject(); // capabilities
- Request.WriteResponse(HttpResponseCode::OK, Cbo.Save());
+ return m_HttpRequests.Count();
}
} // namespace zen
diff --git a/src/zenserver/storage/buildstore/httpbuildstore.h b/src/zenserver/storage/buildstore/httpbuildstore.h
index 2a09b71cf..864d12edc 100644
--- a/src/zenserver/storage/buildstore/httpbuildstore.h
+++ b/src/zenserver/storage/buildstore/httpbuildstore.h
@@ -13,18 +13,19 @@ namespace zen {
class BuildStore;
-class HttpBuildStoreService final : public zen::HttpService, public IHttpStatusProvider, public IHttpStatsProvider
+class HttpBuildStoreService final : public HttpService, public IHttpStatusProvider, public IHttpStatsProvider
{
public:
HttpBuildStoreService(HttpStatusService& StatusService, HttpStatsService& StatsService, BuildStore& Store);
virtual ~HttpBuildStoreService();
virtual const char* BaseUri() const override;
- virtual void HandleRequest(zen::HttpServerRequest& Request) override;
+ virtual void HandleRequest(HttpServerRequest& Request) override;
- virtual CbObject CollectStats() override;
- virtual void HandleStatsRequest(HttpServerRequest& Request) override;
virtual void HandleStatusRequest(HttpServerRequest& Request) override;
+ virtual void HandleStatsRequest(HttpServerRequest& Request) override;
+ virtual CbObject CollectStats() override;
+ virtual uint64_t GetActivityCounter() override;
private:
struct BuildStoreStats
diff --git a/src/zenserver/storage/cache/httpstructuredcache.cpp b/src/zenserver/storage/cache/httpstructuredcache.cpp
index bbdb03ba4..c1727270c 100644
--- a/src/zenserver/storage/cache/httpstructuredcache.cpp
+++ b/src/zenserver/storage/cache/httpstructuredcache.cpp
@@ -1827,113 +1827,12 @@ HttpStructuredCacheService::HandleRpcRequest(HttpServerRequest& Request, std::st
}
}
-CbObject
-HttpStructuredCacheService::CollectStats()
+void
+HttpStructuredCacheService::HandleStatusRequest(HttpServerRequest& Request)
{
- ZEN_MEMSCOPE(GetCacheHttpTag());
-
CbObjectWriter Cbo;
-
- EmitSnapshot("requests", m_HttpRequests, Cbo);
-
- const uint64_t HitCount = m_CacheStats.HitCount;
- const uint64_t UpstreamHitCount = m_CacheStats.UpstreamHitCount;
- const uint64_t MissCount = m_CacheStats.MissCount;
- const uint64_t WriteCount = m_CacheStats.WriteCount;
- const uint64_t BadRequestCount = m_CacheStats.BadRequestCount;
- struct CidStoreStats StoreStats = m_CidStore.Stats();
- const uint64_t ChunkHitCount = StoreStats.HitCount;
- const uint64_t ChunkMissCount = StoreStats.MissCount;
- const uint64_t ChunkWriteCount = StoreStats.WriteCount;
- const uint64_t TotalCount = HitCount + MissCount;
-
- const uint64_t RpcRequests = m_CacheStats.RpcRequests;
- const uint64_t RpcRecordRequests = m_CacheStats.RpcRecordRequests;
- const uint64_t RpcRecordBatchRequests = m_CacheStats.RpcRecordBatchRequests;
- const uint64_t RpcValueRequests = m_CacheStats.RpcValueRequests;
- const uint64_t RpcValueBatchRequests = m_CacheStats.RpcValueBatchRequests;
- const uint64_t RpcChunkRequests = m_CacheStats.RpcChunkRequests;
- const uint64_t RpcChunkBatchRequests = m_CacheStats.RpcChunkBatchRequests;
-
- const CidStoreSize CidSize = m_CidStore.TotalSize();
- const CacheStoreSize CacheSize = m_CacheStore.TotalSize();
-
- Cbo.BeginObject("cache");
- {
- Cbo << "badrequestcount" << BadRequestCount;
- Cbo.BeginObject("rpc");
- Cbo << "count" << RpcRequests;
- Cbo << "ops" << RpcRecordBatchRequests + RpcValueBatchRequests + RpcChunkBatchRequests;
- Cbo.BeginObject("records");
- Cbo << "count" << RpcRecordRequests;
- Cbo << "ops" << RpcRecordBatchRequests;
- Cbo.EndObject();
- Cbo.BeginObject("values");
- Cbo << "count" << RpcValueRequests;
- Cbo << "ops" << RpcValueBatchRequests;
- Cbo.EndObject();
- Cbo.BeginObject("chunks");
- Cbo << "count" << RpcChunkRequests;
- Cbo << "ops" << RpcChunkBatchRequests;
- Cbo.EndObject();
- Cbo.EndObject();
-
- Cbo.BeginObject("size");
- {
- Cbo << "disk" << CacheSize.DiskSize;
- Cbo << "memory" << CacheSize.MemorySize;
- }
- Cbo.EndObject();
-
- Cbo << "hits" << HitCount << "misses" << MissCount << "writes" << WriteCount;
- Cbo << "hit_ratio" << (TotalCount > 0 ? (double(HitCount) / double(TotalCount)) : 0.0);
-
- if (m_UpstreamCache.IsActive())
- {
- Cbo << "upstream_ratio" << (HitCount > 0 ? (double(UpstreamHitCount) / double(HitCount)) : 0.0);
- Cbo << "upstream_hits" << m_CacheStats.UpstreamHitCount;
- }
-
- Cbo << "cidhits" << ChunkHitCount << "cidmisses" << ChunkMissCount << "cidwrites" << ChunkWriteCount;
-
- {
- ZenCacheStore::CacheStoreStats StoreStatsData = m_CacheStore.Stats();
- Cbo.BeginObject("store");
- Cbo << "hits" << StoreStatsData.HitCount << "misses" << StoreStatsData.MissCount << "writes" << StoreStatsData.WriteCount
- << "rejected_writes" << StoreStatsData.RejectedWriteCount << "rejected_reads" << StoreStatsData.RejectedReadCount;
- const uint64_t StoreTotal = StoreStatsData.HitCount + StoreStatsData.MissCount;
- Cbo << "hit_ratio" << (StoreTotal > 0 ? (double(StoreStatsData.HitCount) / double(StoreTotal)) : 0.0);
- EmitSnapshot("read", StoreStatsData.GetOps, Cbo);
- EmitSnapshot("write", StoreStatsData.PutOps, Cbo);
- Cbo.EndObject();
- }
- }
- Cbo.EndObject();
-
- if (m_UpstreamCache.IsActive())
- {
- EmitSnapshot("upstream_gets", m_UpstreamGetRequestTiming, Cbo);
- Cbo.BeginObject("upstream");
- {
- m_UpstreamCache.GetStatus(Cbo);
- }
- Cbo.EndObject();
- }
-
- Cbo.BeginObject("cid");
- {
- Cbo.BeginObject("size");
- {
- Cbo << "tiny" << CidSize.TinySize;
- Cbo << "small" << CidSize.SmallSize;
- Cbo << "large" << CidSize.LargeSize;
- Cbo << "total" << CidSize.TotalSize;
- }
- Cbo.EndObject();
- }
- Cbo.EndObject();
-
- return Cbo.Save();
+ Cbo << "ok" << true;
+ Request.WriteResponse(HttpResponseCode::OK, Cbo.Save());
}
void
@@ -1944,12 +1843,6 @@ HttpStructuredCacheService::HandleStatsRequest(HttpServerRequest& Request)
bool ShowCidStoreStats = Request.GetQueryParams().GetValue("cidstorestats") == "true";
bool ShowCacheStoreStats = Request.GetQueryParams().GetValue("cachestorestats") == "true";
- if (!ShowCidStoreStats && !ShowCacheStoreStats)
- {
- Request.WriteResponse(HttpResponseCode::OK, CollectStats());
- return;
- }
-
// Full stats with optional detailed store/cid breakdowns
CbObjectWriter Cbo;
@@ -2156,12 +2049,38 @@ HttpStructuredCacheService::HandleStatsRequest(HttpServerRequest& Request)
Request.WriteResponse(HttpResponseCode::OK, Cbo.Save());
}
-void
-HttpStructuredCacheService::HandleStatusRequest(HttpServerRequest& Request)
+CbObject
+HttpStructuredCacheService::CollectStats()
{
+ ZEN_TRACE_CPU("HttpStructuredCacheService::Stats");
+ ZEN_MEMSCOPE(GetCacheHttpTag());
+
CbObjectWriter Cbo;
- Cbo << "ok" << true;
- Request.WriteResponse(HttpResponseCode::OK, Cbo.Save());
+
+ EmitSnapshot("requests", m_HttpRequests, Cbo);
+
+ const CacheStoreSize CacheSize = m_CacheStore.TotalSize();
+
+ Cbo.BeginObject("cache");
+ {
+ Cbo.BeginObject("size");
+ {
+ Cbo << "disk" << CacheSize.DiskSize;
+ Cbo << "memory" << CacheSize.MemorySize;
+ }
+ Cbo.EndObject();
+
+ Cbo << "hits" << m_CacheStats.HitCount << "misses" << m_CacheStats.MissCount;
+ }
+ Cbo.EndObject();
+
+ return Cbo.Save();
+}
+
+uint64_t
+HttpStructuredCacheService::GetActivityCounter()
+{
+ return m_HttpRequests.Count();
}
bool
diff --git a/src/zenserver/storage/cache/httpstructuredcache.h b/src/zenserver/storage/cache/httpstructuredcache.h
index d462415d4..fc80b449e 100644
--- a/src/zenserver/storage/cache/httpstructuredcache.h
+++ b/src/zenserver/storage/cache/httpstructuredcache.h
@@ -105,9 +105,10 @@ private:
void HandleCacheRequest(HttpServerRequest& Request);
void HandleCacheNamespaceRequest(HttpServerRequest& Request, std::string_view Namespace);
void HandleCacheBucketRequest(HttpServerRequest& Request, std::string_view Namespace, std::string_view Bucket);
- virtual CbObject CollectStats() override;
- virtual void HandleStatsRequest(HttpServerRequest& Request) override;
virtual void HandleStatusRequest(HttpServerRequest& Request) override;
+ virtual void HandleStatsRequest(HttpServerRequest& Request) override;
+ virtual CbObject CollectStats() override;
+ virtual uint64_t GetActivityCounter() override;
bool AreDiskWritesAllowed() const;
diff --git a/src/zenserver/storage/objectstore/objectstore.cpp b/src/zenserver/storage/objectstore/objectstore.cpp
index 493326a32..d6516fa1a 100644
--- a/src/zenserver/storage/objectstore/objectstore.cpp
+++ b/src/zenserver/storage/objectstore/objectstore.cpp
@@ -14,6 +14,7 @@
#include "zencore/compactbinarybuilder.h"
#include "zenhttp/httpcommon.h"
#include "zenhttp/httpserver.h"
+#include "zenhttp/httpstats.h"
#include <filesystem>
#include <thread>
@@ -220,17 +221,20 @@ private:
StringBuilderBase& Builder;
};
-HttpObjectStoreService::HttpObjectStoreService(HttpStatusService& StatusService, ObjectStoreConfig Cfg)
-: m_StatusService(StatusService)
+HttpObjectStoreService::HttpObjectStoreService(HttpStatsService& StatsService, HttpStatusService& StatusService, ObjectStoreConfig Cfg)
+: m_StatsService(StatsService)
+, m_StatusService(StatusService)
, m_Cfg(std::move(Cfg))
{
- Inititalize();
+ Initialize();
+ m_StatsService.RegisterHandler("obj", *this);
m_StatusService.RegisterHandler("obj", *this);
}
HttpObjectStoreService::~HttpObjectStoreService()
{
m_StatusService.UnregisterHandler("obj", *this);
+ m_StatsService.UnregisterHandler("obj", *this);
}
const char*
@@ -240,8 +244,10 @@ HttpObjectStoreService::BaseUri() const
}
void
-HttpObjectStoreService::HandleRequest(zen::HttpServerRequest& Request)
+HttpObjectStoreService::HandleRequest(HttpServerRequest& Request)
{
+ metrics::OperationTiming::Scope $(m_HttpRequests);
+
if (m_Router.HandleRequest(Request) == false)
{
ZEN_LOG_WARN(LogObj, "No route found for {0}", Request.RelativeUri());
@@ -258,12 +264,36 @@ HttpObjectStoreService::HandleStatusRequest(HttpServerRequest& Request)
}
void
-HttpObjectStoreService::Inititalize()
+HttpObjectStoreService::HandleStatsRequest(HttpServerRequest& Request)
+{
+ Request.WriteResponse(HttpResponseCode::OK, CollectStats());
+}
+
+CbObject
+HttpObjectStoreService::CollectStats()
+{
+ ZEN_TRACE_CPU("HttpObjectStoreService::Stats");
+ CbObjectWriter Cbo;
+
+ EmitSnapshot("requests", m_HttpRequests, Cbo);
+ Cbo << "total_bytes_served" << m_TotalBytesServed.load();
+
+ return Cbo.Save();
+}
+
+uint64_t
+HttpObjectStoreService::GetActivityCounter()
+{
+ return m_HttpRequests.Count();
+}
+
+void
+HttpObjectStoreService::Initialize()
{
- ZEN_TRACE_CPU("HttpObjectStoreService::Inititalize");
+ ZEN_TRACE_CPU("HttpObjectStoreService::Initialize");
namespace fs = std::filesystem;
- ZEN_LOG_INFO(LogObj, "Initialzing Object Store in '{}'", m_Cfg.RootDirectory);
+ ZEN_LOG_INFO(LogObj, "Initializing Object Store in '{}'", m_Cfg.RootDirectory);
const fs::path BucketsPath = m_Cfg.RootDirectory / "buckets";
if (!IsDir(BucketsPath))
@@ -281,27 +311,27 @@ HttpObjectStoreService::Inititalize()
m_Router.RegisterRoute(
"",
- [this](zen::HttpRouterRequest& Request) { ListBuckets(Request); },
+ [this](HttpRouterRequest& Request) { ListBuckets(Request); },
HttpVerb::kGet);
m_Router.RegisterRoute(
"bucket",
- [this](zen::HttpRouterRequest& Request) { ListBuckets(Request); },
+ [this](HttpRouterRequest& Request) { ListBuckets(Request); },
HttpVerb::kGet);
m_Router.RegisterRoute(
"bucket",
- [this](zen::HttpRouterRequest& Request) { CreateBucket(Request); },
+ [this](HttpRouterRequest& Request) { CreateBucket(Request); },
HttpVerb::kPost | HttpVerb::kPut);
m_Router.RegisterRoute(
"bucket",
- [this](zen::HttpRouterRequest& Request) { DeleteBucket(Request); },
+ [this](HttpRouterRequest& Request) { DeleteBucket(Request); },
HttpVerb::kDelete);
m_Router.RegisterRoute(
"bucket/{path}",
- [this](zen::HttpRouterRequest& Request) {
+ [this](HttpRouterRequest& Request) {
const std::string_view Path = Request.GetCapture(1);
const auto Sep = Path.find_last_of('.');
const bool IsObject = Sep != std::string_view::npos && Path.size() - Sep > 0;
@@ -319,7 +349,7 @@ HttpObjectStoreService::Inititalize()
m_Router.RegisterRoute(
"bucket/{bucket}/{path}",
- [this](zen::HttpRouterRequest& Request) { PutObject(Request); },
+ [this](HttpRouterRequest& Request) { PutObject(Request); },
HttpVerb::kPost | HttpVerb::kPut);
}
@@ -327,7 +357,7 @@ std::filesystem::path
HttpObjectStoreService::GetBucketDirectory(std::string_view BucketName)
{
{
- std::lock_guard _(BucketsMutex);
+ std::lock_guard _(m_BucketsMutex);
if (const auto It = std::find_if(std::begin(m_Cfg.Buckets),
std::end(m_Cfg.Buckets),
@@ -342,7 +372,7 @@ HttpObjectStoreService::GetBucketDirectory(std::string_view BucketName)
}
void
-HttpObjectStoreService::ListBuckets(zen::HttpRouterRequest& Request)
+HttpObjectStoreService::ListBuckets(HttpRouterRequest& Request)
{
namespace fs = std::filesystem;
@@ -351,7 +381,7 @@ HttpObjectStoreService::ListBuckets(zen::HttpRouterRequest& Request)
CbObjectWriter Response;
Response.BeginArray("buckets");
{
- std::lock_guard _(BucketsMutex);
+ std::lock_guard _(m_BucketsMutex);
// Configured buckets
for (const ObjectStoreConfig::BucketConfig& Bucket : m_Cfg.Buckets)
@@ -428,13 +458,13 @@ HttpObjectStoreService::ListBuckets(zen::HttpRouterRequest& Request)
}
Response.EndArray();
- Response << "total_bytes_served" << TotalBytesServed.load();
+ Response << "total_bytes_served" << m_TotalBytesServed.load();
return Request.ServerRequest().WriteResponse(HttpResponseCode::OK, Response.Save());
}
void
-HttpObjectStoreService::CreateBucket(zen::HttpRouterRequest& Request)
+HttpObjectStoreService::CreateBucket(HttpRouterRequest& Request)
{
namespace fs = std::filesystem;
@@ -448,7 +478,7 @@ HttpObjectStoreService::CreateBucket(zen::HttpRouterRequest& Request)
const fs::path BucketPath = m_Cfg.RootDirectory / "buckets" / BucketName;
{
- std::lock_guard _(BucketsMutex);
+ std::lock_guard _(m_BucketsMutex);
if (!IsDir(BucketPath))
{
CreateDirectories(BucketPath);
@@ -462,7 +492,7 @@ HttpObjectStoreService::CreateBucket(zen::HttpRouterRequest& Request)
}
void
-HttpObjectStoreService::ListBucket(zen::HttpRouterRequest& Request, const std::string_view Path)
+HttpObjectStoreService::ListBucket(HttpRouterRequest& Request, const std::string_view Path)
{
namespace fs = std::filesystem;
@@ -533,7 +563,7 @@ HttpObjectStoreService::ListBucket(zen::HttpRouterRequest& Request, const std::s
if (IsDir(FullPath))
{
- std::lock_guard _(BucketsMutex);
+ std::lock_guard _(m_BucketsMutex);
Traversal.TraverseFileSystem(FullPath, FileVisitor);
}
CbObject Result = FileVisitor.GetResult();
@@ -552,7 +582,7 @@ HttpObjectStoreService::ListBucket(zen::HttpRouterRequest& Request, const std::s
}
void
-HttpObjectStoreService::DeleteBucket(zen::HttpRouterRequest& Request)
+HttpObjectStoreService::DeleteBucket(HttpRouterRequest& Request)
{
namespace fs = std::filesystem;
@@ -566,7 +596,7 @@ HttpObjectStoreService::DeleteBucket(zen::HttpRouterRequest& Request)
const fs::path BucketPath = m_Cfg.RootDirectory / "buckets" / BucketName;
{
- std::lock_guard _(BucketsMutex);
+ std::lock_guard _(m_BucketsMutex);
DeleteDirectories(BucketPath);
}
@@ -575,7 +605,7 @@ HttpObjectStoreService::DeleteBucket(zen::HttpRouterRequest& Request)
}
void
-HttpObjectStoreService::GetObject(zen::HttpRouterRequest& Request, const std::string_view Path)
+HttpObjectStoreService::GetObject(HttpRouterRequest& Request, const std::string_view Path)
{
namespace fs = std::filesystem;
@@ -606,7 +636,7 @@ HttpObjectStoreService::GetObject(zen::HttpRouterRequest& Request, const std::st
return Request.ServerRequest().WriteResponse(HttpResponseCode::NotFound);
}
- zen::HttpRanges Ranges;
+ HttpRanges Ranges;
if (Request.ServerRequest().TryGetRanges(Ranges); Ranges.size() > 1)
{
// Only a single range is supported
@@ -615,7 +645,7 @@ HttpObjectStoreService::GetObject(zen::HttpRouterRequest& Request, const std::st
FileContents File;
{
- std::lock_guard _(BucketsMutex);
+ std::lock_guard _(m_BucketsMutex);
File = ReadFile(FilePath);
}
@@ -635,7 +665,7 @@ HttpObjectStoreService::GetObject(zen::HttpRouterRequest& Request, const std::st
if (Ranges.empty())
{
- const uint64_t TotalServed = TotalBytesServed.fetch_add(FileBuf.Size()) + FileBuf.Size();
+ const uint64_t TotalServed = m_TotalBytesServed.fetch_add(FileBuf.Size()) + FileBuf.Size();
ZEN_LOG_DEBUG(LogObj,
"GET - '{}/{}' ({}) [OK] (Served: {})",
@@ -650,7 +680,7 @@ HttpObjectStoreService::GetObject(zen::HttpRouterRequest& Request, const std::st
{
const auto Range = Ranges[0];
const uint64_t RangeSize = 1 + (Range.End - Range.Start);
- const uint64_t TotalServed = TotalBytesServed.fetch_add(RangeSize) + RangeSize;
+ const uint64_t TotalServed = m_TotalBytesServed.fetch_add(RangeSize) + RangeSize;
ZEN_LOG_DEBUG(LogObj,
"GET - '{}/{}' (Range: {}-{}) ({}/{}) [OK] (Served: {})",
@@ -674,7 +704,7 @@ HttpObjectStoreService::GetObject(zen::HttpRouterRequest& Request, const std::st
}
void
-HttpObjectStoreService::PutObject(zen::HttpRouterRequest& Request)
+HttpObjectStoreService::PutObject(HttpRouterRequest& Request)
{
namespace fs = std::filesystem;
@@ -699,7 +729,7 @@ HttpObjectStoreService::PutObject(zen::HttpRouterRequest& Request)
const fs::path FileDirectory = FilePath.parent_path();
{
- std::lock_guard _(BucketsMutex);
+ std::lock_guard _(m_BucketsMutex);
if (!IsDir(FileDirectory))
{
diff --git a/src/zenserver/storage/objectstore/objectstore.h b/src/zenserver/storage/objectstore/objectstore.h
index cc47b50c4..f51254357 100644
--- a/src/zenserver/storage/objectstore/objectstore.h
+++ b/src/zenserver/storage/objectstore/objectstore.h
@@ -11,6 +11,7 @@
namespace zen {
class HttpRouterRequest;
+class HttpStatsService;
struct ObjectStoreConfig
{
@@ -24,31 +25,36 @@ struct ObjectStoreConfig
std::vector<BucketConfig> Buckets;
};
-class HttpObjectStoreService final : public zen::HttpService, public IHttpStatusProvider
+class HttpObjectStoreService final : public HttpService, public IHttpStatusProvider, public IHttpStatsProvider
{
public:
- HttpObjectStoreService(HttpStatusService& StatusService, ObjectStoreConfig Cfg);
+ HttpObjectStoreService(HttpStatsService& StatsService, HttpStatusService& StatusService, ObjectStoreConfig Cfg);
virtual ~HttpObjectStoreService();
virtual const char* BaseUri() const override;
- virtual void HandleRequest(zen::HttpServerRequest& Request) override;
+ virtual void HandleRequest(HttpServerRequest& Request) override;
virtual void HandleStatusRequest(HttpServerRequest& Request) override;
+ virtual void HandleStatsRequest(HttpServerRequest& Request) override;
+ virtual CbObject CollectStats() override;
+ virtual uint64_t GetActivityCounter() override;
private:
- void Inititalize();
+ void Initialize();
std::filesystem::path GetBucketDirectory(std::string_view BucketName);
- void ListBuckets(zen::HttpRouterRequest& Request);
- void CreateBucket(zen::HttpRouterRequest& Request);
- void ListBucket(zen::HttpRouterRequest& Request, const std::string_view Path);
- void DeleteBucket(zen::HttpRouterRequest& Request);
- void GetObject(zen::HttpRouterRequest& Request, const std::string_view Path);
- void PutObject(zen::HttpRouterRequest& Request);
-
- HttpStatusService& m_StatusService;
- ObjectStoreConfig m_Cfg;
- std::mutex BucketsMutex;
- HttpRequestRouter m_Router;
- std::atomic_uint64_t TotalBytesServed{0};
+ void ListBuckets(HttpRouterRequest& Request);
+ void CreateBucket(HttpRouterRequest& Request);
+ void ListBucket(HttpRouterRequest& Request, const std::string_view Path);
+ void DeleteBucket(HttpRouterRequest& Request);
+ void GetObject(HttpRouterRequest& Request, const std::string_view Path);
+ void PutObject(HttpRouterRequest& Request);
+
+ HttpStatsService& m_StatsService;
+ HttpStatusService& m_StatusService;
+ ObjectStoreConfig m_Cfg;
+ std::mutex m_BucketsMutex;
+ HttpRequestRouter m_Router;
+ std::atomic_uint64_t m_TotalBytesServed{0};
+ metrics::OperationTiming m_HttpRequests;
};
} // namespace zen
diff --git a/src/zenserver/storage/projectstore/httpprojectstore.cpp b/src/zenserver/storage/projectstore/httpprojectstore.cpp
index 03b8aa382..a7c8c66b6 100644
--- a/src/zenserver/storage/projectstore/httpprojectstore.cpp
+++ b/src/zenserver/storage/projectstore/httpprojectstore.cpp
@@ -836,8 +836,17 @@ HttpProjectService::HandleRequest(HttpServerRequest& Request)
}
}
-CbObject
-HttpProjectService::CollectStats()
+void
+HttpProjectService::HandleStatusRequest(HttpServerRequest& Request)
+{
+ ZEN_TRACE_CPU("HttpProjectService::Status");
+ CbObjectWriter Cbo;
+ Cbo << "ok" << true;
+ Request.WriteResponse(HttpResponseCode::OK, Cbo.Save());
+}
+
+void
+HttpProjectService::HandleStatsRequest(HttpServerRequest& HttpReq)
{
ZEN_TRACE_CPU("ProjectService::Stats");
@@ -848,6 +857,8 @@ HttpProjectService::CollectStats()
EmitSnapshot("requests", m_HttpRequests, Cbo);
+ Cbo << "project_count" << (uint64_t)m_ProjectStore->ProjectCount();
+
Cbo.BeginObject("store");
{
Cbo.BeginObject("size");
@@ -903,22 +914,25 @@ HttpProjectService::CollectStats()
}
Cbo.EndObject();
- return Cbo.Save();
+ HttpReq.WriteResponse(HttpResponseCode::OK, Cbo.Save());
}
-void
-HttpProjectService::HandleStatsRequest(HttpServerRequest& HttpReq)
+CbObject
+HttpProjectService::CollectStats()
{
- HttpReq.WriteResponse(HttpResponseCode::OK, CollectStats());
+ CbObjectWriter Cbo;
+ // CollectStats does not use the HandleStatsRequest implementation to get stats since it uses some heavy operations such as
+ // m_ProjectStore->StorageSize();
+ EmitSnapshot("requests", m_HttpRequests, Cbo);
+ Cbo << "project_count" << (uint64_t)m_ProjectStore->ProjectCount();
+
+ return Cbo.Save();
}
-void
-HttpProjectService::HandleStatusRequest(HttpServerRequest& Request)
+uint64_t
+HttpProjectService::GetActivityCounter()
{
- ZEN_TRACE_CPU("HttpProjectService::Status");
- CbObjectWriter Cbo;
- Cbo << "ok" << true;
- Request.WriteResponse(HttpResponseCode::OK, Cbo.Save());
+ return m_HttpRequests.Count();
}
void
diff --git a/src/zenserver/storage/projectstore/httpprojectstore.h b/src/zenserver/storage/projectstore/httpprojectstore.h
index 917337324..e3ed02f26 100644
--- a/src/zenserver/storage/projectstore/httpprojectstore.h
+++ b/src/zenserver/storage/projectstore/httpprojectstore.h
@@ -53,9 +53,10 @@ public:
virtual const char* BaseUri() const override;
virtual void HandleRequest(HttpServerRequest& Request) override;
- virtual CbObject CollectStats() override;
- virtual void HandleStatsRequest(HttpServerRequest& Request) override;
virtual void HandleStatusRequest(HttpServerRequest& Request) override;
+ virtual void HandleStatsRequest(HttpServerRequest& Request) override;
+ virtual CbObject CollectStats() override;
+ virtual uint64_t GetActivityCounter() override;
private:
struct ProjectStats
diff --git a/src/zenserver/storage/upstream/upstreamservice.h b/src/zenserver/storage/upstream/upstreamservice.h
index f1da03c8c..c0063c055 100644
--- a/src/zenserver/storage/upstream/upstreamservice.h
+++ b/src/zenserver/storage/upstream/upstreamservice.h
@@ -9,14 +9,14 @@ namespace zen {
class AuthMgr;
class UpstreamCache;
-class HttpUpstreamService final : public zen::HttpService
+class HttpUpstreamService final : public HttpService
{
public:
HttpUpstreamService(UpstreamCache& Upstream, AuthMgr& Mgr);
virtual ~HttpUpstreamService();
virtual const char* BaseUri() const override;
- virtual void HandleRequest(zen::HttpServerRequest& Request) override;
+ virtual void HandleRequest(HttpServerRequest& Request) override;
private:
UpstreamCache& m_Upstream;
diff --git a/src/zenserver/storage/workspaces/httpworkspaces.cpp b/src/zenserver/storage/workspaces/httpworkspaces.cpp
index 785dd62f0..12e7bae73 100644
--- a/src/zenserver/storage/workspaces/httpworkspaces.cpp
+++ b/src/zenserver/storage/workspaces/httpworkspaces.cpp
@@ -110,10 +110,18 @@ HttpWorkspacesService::HandleRequest(HttpServerRequest& Request)
}
}
-CbObject
-HttpWorkspacesService::CollectStats()
+void
+HttpWorkspacesService::HandleStatusRequest(HttpServerRequest& Request)
+{
+ ZEN_TRACE_CPU("HttpWorkspacesService::Status");
+ CbObjectWriter Cbo;
+ Cbo << "ok" << true;
+ Request.WriteResponse(HttpResponseCode::OK, Cbo.Save());
+}
+
+void
+HttpWorkspacesService::HandleStatsRequest(HttpServerRequest& HttpReq)
{
- ZEN_TRACE_CPU("WorkspacesService::Stats");
CbObjectWriter Cbo;
EmitSnapshot("requests", m_HttpRequests, Cbo);
@@ -150,22 +158,26 @@ HttpWorkspacesService::CollectStats()
}
Cbo.EndObject();
- return Cbo.Save();
+ HttpReq.WriteResponse(HttpResponseCode::OK, Cbo.Save());
}
-void
-HttpWorkspacesService::HandleStatsRequest(HttpServerRequest& HttpReq)
+CbObject
+HttpWorkspacesService::CollectStats()
{
- HttpReq.WriteResponse(HttpResponseCode::OK, CollectStats());
+ ZEN_TRACE_CPU("HttpWorkspacesService::Stats");
+ CbObjectWriter Cbo;
+
+ EmitSnapshot("requests", m_HttpRequests, Cbo);
+
+ Cbo << "workspaces" << m_Workspaces.GetWorkspaces().size();
+
+ return Cbo.Save();
}
-void
-HttpWorkspacesService::HandleStatusRequest(HttpServerRequest& Request)
+uint64_t
+HttpWorkspacesService::GetActivityCounter()
{
- ZEN_TRACE_CPU("HttpWorkspacesService::Status");
- CbObjectWriter Cbo;
- Cbo << "ok" << true;
- Request.WriteResponse(HttpResponseCode::OK, Cbo.Save());
+ return m_HttpRequests.Count();
}
void
diff --git a/src/zenserver/storage/workspaces/httpworkspaces.h b/src/zenserver/storage/workspaces/httpworkspaces.h
index 7c5ddeff1..4af1316f8 100644
--- a/src/zenserver/storage/workspaces/httpworkspaces.h
+++ b/src/zenserver/storage/workspaces/httpworkspaces.h
@@ -29,9 +29,10 @@ public:
virtual const char* BaseUri() const override;
virtual void HandleRequest(HttpServerRequest& Request) override;
- virtual CbObject CollectStats() override;
- virtual void HandleStatsRequest(HttpServerRequest& Request) override;
virtual void HandleStatusRequest(HttpServerRequest& Request) override;
+ virtual void HandleStatsRequest(HttpServerRequest& Request) override;
+ virtual CbObject CollectStats() override;
+ virtual uint64_t GetActivityCounter() override;
private:
struct WorkspacesStats
diff --git a/src/zenserver/storage/zenstorageserver.cpp b/src/zenserver/storage/zenstorageserver.cpp
index de00eb1c2..bc0a8f4ac 100644
--- a/src/zenserver/storage/zenstorageserver.cpp
+++ b/src/zenserver/storage/zenstorageserver.cpp
@@ -170,7 +170,7 @@ ZenStorageServer::RegisterServices()
m_Http->RegisterService(*m_HttpSessionsService);
}
- m_FrontendService = std::make_unique<HttpFrontendService>(m_ContentRoot, m_StatusService);
+ m_FrontendService = std::make_unique<HttpFrontendService>(m_ContentRoot, m_StatsService, m_StatusService);
if (m_FrontendService)
{
@@ -307,7 +307,7 @@ ZenStorageServer::InitializeServices(const ZenStorageServerConfig& ServerOptions
ObjCfg.Buckets.push_back(std::move(NewBucket));
}
- m_ObjStoreService = std::make_unique<HttpObjectStoreService>(m_StatusService, std::move(ObjCfg));
+ m_ObjStoreService = std::make_unique<HttpObjectStoreService>(m_StatsService, m_StatusService, std::move(ObjCfg));
}
if (ServerOptions.BuildStoreConfig.Enabled)
diff --git a/src/zenstore/include/zenstore/projectstore.h b/src/zenstore/include/zenstore/projectstore.h
index 6f49cd024..100a82907 100644
--- a/src/zenstore/include/zenstore/projectstore.h
+++ b/src/zenstore/include/zenstore/projectstore.h
@@ -456,6 +456,7 @@ public:
bool DeleteProject(std::string_view ProjectId);
bool Exists(std::string_view ProjectId);
void Flush();
+ size_t ProjectCount() const;
void DiscoverProjects();
void IterateProjects(std::function<void(Project& Prj)>&& Fn);
diff --git a/src/zenstore/projectstore.cpp b/src/zenstore/projectstore.cpp
index 56d0f7d2b..13674da4d 100644
--- a/src/zenstore/projectstore.cpp
+++ b/src/zenstore/projectstore.cpp
@@ -4406,6 +4406,13 @@ ProjectStore::DiscoverProjects()
}
}
+size_t
+ProjectStore::ProjectCount() const
+{
+ RwLock::SharedLockScope _(m_ProjectsLock);
+ return m_Projects.size();
+}
+
void
ProjectStore::IterateProjects(std::function<void(Project& Prj)>&& Fn)
{
diff --git a/src/zentelemetry/include/zentelemetry/hyperloglog.h b/src/zentelemetry/include/zentelemetry/hyperloglog.h
index 2daf75a43..502e2aee5 100644
--- a/src/zentelemetry/include/zentelemetry/hyperloglog.h
+++ b/src/zentelemetry/include/zentelemetry/hyperloglog.h
@@ -9,6 +9,7 @@
#include <array>
#include <atomic>
+#include <cmath>
#include <cstdint>
#include <string_view>
diff --git a/src/zenutil/consul/consul.cpp b/src/zenutil/consul/consul.cpp
index d463c0938..c9144e589 100644
--- a/src/zenutil/consul/consul.cpp
+++ b/src/zenutil/consul/consul.cpp
@@ -167,6 +167,8 @@ ConsulClient::RegisterService(const ServiceRegistrationInfo& Info)
ApplyCommonHeaders(AdditionalHeaders);
AdditionalHeaders.Entries.emplace(HttpClient::Accept(HttpContentType::kJSON));
+ HttpClient::KeyValueMap AdditionalParameters(std::make_pair<std::string, std::string>("replace-existing-checks", "true"));
+
CbObjectWriter Writer;
{
Writer.AddString("ID"sv, Info.ServiceId);
@@ -185,13 +187,21 @@ ConsulClient::RegisterService(const ServiceRegistrationInfo& Info)
}
Writer.EndArray(); // Tags
}
- Writer.BeginObject("Check"sv);
+ if (Info.HealthIntervalSeconds != 0)
{
- Writer.AddString("HTTP"sv, fmt::format("http://{}:{}/{}", Info.Address, Info.Port, Info.HealthEndpoint));
- Writer.AddString("Interval"sv, fmt::format("{}s", Info.HealthIntervalSeconds));
- Writer.AddString("DeregisterCriticalServiceAfter"sv, fmt::format("{}s", Info.DeregisterAfterSeconds));
+ // Consul requires Interval whenever HTTP is specified; omit the Check block entirely
+ // when no interval is configured (e.g. during Provisioning).
+ Writer.BeginObject("Check"sv);
+ {
+ Writer.AddString("HTTP"sv, fmt::format("http://{}:{}/{}", Info.Address, Info.Port, Info.HealthEndpoint));
+ Writer.AddString("Interval"sv, fmt::format("{}s", Info.HealthIntervalSeconds));
+ if (Info.DeregisterAfterSeconds != 0)
+ {
+ Writer.AddString("DeregisterCriticalServiceAfter"sv, fmt::format("{}s", Info.DeregisterAfterSeconds));
+ }
+ }
+ Writer.EndObject(); // Check
}
- Writer.EndObject(); // Check
}
ExtendableStringBuilder<512> SB;
@@ -199,7 +209,7 @@ ConsulClient::RegisterService(const ServiceRegistrationInfo& Info)
IoBuffer PayloadBuffer(IoBuffer::Wrap, SB.Data(), SB.Size());
PayloadBuffer.SetContentType(HttpContentType::kJSON);
- HttpClient::Response Result = m_HttpClient.Put("v1/agent/service/register", PayloadBuffer, AdditionalHeaders);
+ HttpClient::Response Result = m_HttpClient.Put("v1/agent/service/register", PayloadBuffer, AdditionalHeaders, AdditionalParameters);
if (!Result)
{
@@ -321,6 +331,20 @@ ConsulClient::GetAgentServicesJson()
return Result.ToText();
}
+std::string
+ConsulClient::GetAgentChecksJson()
+{
+ HttpClient::KeyValueMap AdditionalHeaders;
+ ApplyCommonHeaders(AdditionalHeaders);
+
+ HttpClient::Response Result = m_HttpClient.Get("v1/agent/checks", AdditionalHeaders);
+ if (!Result)
+ {
+ return "{}";
+ }
+ return Result.ToText();
+}
+
//////////////////////////////////////////////////////////////////////////
ServiceRegistration::ServiceRegistration(ConsulClient* Client, const ServiceRegistrationInfo& Info) : m_Client(Client), m_Info(Info)
diff --git a/src/zenutil/include/zenutil/consul.h b/src/zenutil/include/zenutil/consul.h
index 7bf2ce437..4002d5d23 100644
--- a/src/zenutil/include/zenutil/consul.h
+++ b/src/zenutil/include/zenutil/consul.h
@@ -21,8 +21,8 @@ struct ServiceRegistrationInfo
uint16_t Port = 0;
std::string HealthEndpoint;
std::vector<std::pair<std::string, std::string>> Tags;
- int HealthIntervalSeconds = 10;
- int DeregisterAfterSeconds = 30;
+ uint32_t HealthIntervalSeconds = 10;
+ uint32_t DeregisterAfterSeconds = 30;
};
class ConsulClient
@@ -44,6 +44,7 @@ public:
// Query methods for testing
bool HasService(std::string_view ServiceId);
std::string GetAgentServicesJson();
+ std::string GetAgentChecksJson();
// Blocking query on v1/agent/services. Blocks until the service list changes or
// the wait period expires. InOutIndex must be 0 for the first call; it is updated
diff --git a/src/zenutil/xmake.lua b/src/zenutil/xmake.lua
index 1e19f7b2f..83a6b7f93 100644
--- a/src/zenutil/xmake.lua
+++ b/src/zenutil/xmake.lua
@@ -11,6 +11,10 @@ target('zenutil')
add_deps("robin-map")
add_packages("json11")
+ if is_plat("linux", "macosx") then
+ add_packages("openssl3")
+ end
+
if is_plat("linux") then
add_includedirs("$(projectdir)/thirdparty/systemd/include")
add_linkdirs("$(projectdir)/thirdparty/systemd/lib")