diff options
Diffstat (limited to 'src')
72 files changed, 3564 insertions, 2981 deletions
diff --git a/src/zen/zen.cpp b/src/zen/zen.cpp index cbaf64e31..3277eb856 100644 --- a/src/zen/zen.cpp +++ b/src/zen/zen.cpp @@ -799,7 +799,7 @@ main(int argc, char** argv) Options.add_options()("help", "Show command line help"); Options.add_options()("c, command", "Sub command", cxxopts::value<std::string>(SubCommand)); Options.add_options()("httpclient", - "Select HTTP client implementation (e.g. 'curl', 'cpr')", + "Select HTTP client implementation", cxxopts::value<std::string>(GlobalOptions.HttpClientBackend)->default_value("curl")); int CoreLimit = 0; @@ -949,6 +949,7 @@ main(int argc, char** argv) .IsTest = false, .NoConsoleOutput = GlobalOptions.LoggingConfig.NoConsoleOutput, .QuietConsole = GlobalOptions.LoggingConfig.QuietConsole, + .ForceColor = GlobalOptions.LoggingConfig.ForceColor, .AbsLogFile = GlobalOptions.LoggingConfig.AbsLogFile, .LogId = GlobalOptions.LoggingConfig.LogId}; zen::InitializeLogging(LogOptions); diff --git a/src/zen/zen.h b/src/zen/zen.h index 97cc9af6f..64d9390a3 100644 --- a/src/zen/zen.h +++ b/src/zen/zen.h @@ -18,7 +18,7 @@ struct ZenCliOptions ZenLoggingConfig LoggingConfig; - std::string HttpClientBackend; // Choice of HTTP client implementation (e.g. "curl", "cpr") + std::string HttpClientBackend; // Choice of HTTP client implementation // Arguments after " -- " on command line are passed through and not parsed std::string PassthroughCommandLine; diff --git a/src/zencore/include/zencore/fmtutils.h b/src/zencore/include/zencore/fmtutils.h index 404e570fd..4ec05f901 100644 --- a/src/zencore/include/zencore/fmtutils.h +++ b/src/zencore/include/zencore/fmtutils.h @@ -15,6 +15,29 @@ ZEN_THIRD_PARTY_INCLUDES_END #include <chrono> #include <string_view> +// Generic formatter for any type with a free ToString(T) function returning a +// string-like type. This covers enum-to-string conversions (HttpResponseCode, +// SessionState, etc.) without needing per-type fmt::formatter specializations. +// ADL is used to find ToString, so it works across namespaces. + +template<typename T> +concept HasFreeToString = requires(const T& v) +{ + { + ToString(v) + } -> std::convertible_to<std::string_view>; +}; + +template<HasFreeToString T> +struct fmt::formatter<T> : fmt::formatter<std::string_view> +{ + template<typename FormatContext> + auto format(const T& Value, FormatContext& Ctx) const + { + return fmt::formatter<std::string_view>::format(ToString(Value), Ctx); + } +}; + // Custom formatting for some zencore types template<typename T> diff --git a/src/zencore/include/zencore/thread.h b/src/zencore/include/zencore/thread.h index d7262324f..56ce5904b 100644 --- a/src/zencore/include/zencore/thread.h +++ b/src/zencore/include/zencore/thread.h @@ -190,6 +190,13 @@ class Latch public: Latch(std::ptrdiff_t Count) : Counter(Count) {} + void Reset(std::ptrdiff_t Count) + { + ZEN_ASSERT(Counter.load() == 0); + Complete.Reset(); + Counter.store(Count); + } + void CountDown() { std::ptrdiff_t Old = Counter.fetch_sub(1); diff --git a/src/zenhorde/xmake.lua b/src/zenhorde/xmake.lua index 48d028e86..0e69e9c5f 100644 --- a/src/zenhorde/xmake.lua +++ b/src/zenhorde/xmake.lua @@ -14,7 +14,7 @@ target('zenhorde') end if is_plat("linux") or is_plat("macosx") then - add_packages("openssl") + add_packages("openssl3") end if is_os("macosx") then diff --git a/src/zenhttp/clients/httpclientcommon.h b/src/zenhttp/clients/httpclientcommon.h index e8d969cc8..078d4a52f 100644 --- a/src/zenhttp/clients/httpclientcommon.h +++ b/src/zenhttp/clients/httpclientcommon.h @@ -21,7 +21,10 @@ public: using Response = HttpClient::Response; using KeyValueMap = HttpClient::KeyValueMap; - [[nodiscard]] virtual Response Put(std::string_view Url, const IoBuffer& Payload, const KeyValueMap& AdditionalHeader = {}) = 0; + [[nodiscard]] virtual Response Put(std::string_view Url, + const IoBuffer& Payload, + const KeyValueMap& AdditionalHeader = {}, + const KeyValueMap& Parameters = {}) = 0; [[nodiscard]] virtual Response Put(std::string_view Url, const KeyValueMap& Parameters = {}) = 0; [[nodiscard]] virtual Response Get(std::string_view Url, const KeyValueMap& AdditionalHeader = {}, @@ -59,6 +62,7 @@ public: LoggerRef Log() { return m_Log; } std::string_view GetBaseUri() const { return m_BaseUri; } + void SetBaseUri(std::string_view NewBaseUri) { m_BaseUri = NewBaseUri; } std::string_view GetSessionId() const { return m_SessionId; } bool Authenticate(); diff --git a/src/zenhttp/clients/httpclientcpr.cpp b/src/zenhttp/clients/httpclientcpr.cpp deleted file mode 100644 index bd6de3ff7..000000000 --- a/src/zenhttp/clients/httpclientcpr.cpp +++ /dev/null @@ -1,1285 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#include "httpclientcpr.h" - -#include <zencore/compactbinary.h> -#include <zencore/compactbinarybuilder.h> -#include <zencore/compactbinarypackage.h> -#include <zencore/compactbinaryutil.h> -#include <zencore/compress.h> -#include <zencore/filesystem.h> -#include <zencore/iobuffer.h> -#include <zencore/iohash.h> -#include <zencore/session.h> -#include <zencore/stream.h> -#include <zenhttp/packageformat.h> -#include <algorithm> - -ZEN_THIRD_PARTY_INCLUDES_START -#include <cpr/ssl_options.h> -#include <cpr/unix_socket.h> -ZEN_THIRD_PARTY_INCLUDES_END - -namespace zen { - -HttpClientBase* -CreateCprHttpClient(std::string_view BaseUri, const HttpClientSettings& ConnectionSettings, std::function<bool()>&& CheckIfAbortFunction) -{ - return new CprHttpClient(BaseUri, ConnectionSettings, std::move(CheckIfAbortFunction)); -} - -static std::atomic<uint32_t> HttpClientRequestIdCounter{0}; - -////////////////////////////////////////////////////////////////////////// - -static HttpClientErrorCode -MapCprError(cpr::ErrorCode Code) -{ - switch (Code) - { - case cpr::ErrorCode::OK: - return HttpClientErrorCode::kOK; - case cpr::ErrorCode::CONNECTION_FAILURE: - return HttpClientErrorCode::kConnectionFailure; - case cpr::ErrorCode::HOST_RESOLUTION_FAILURE: - return HttpClientErrorCode::kHostResolutionFailure; - case cpr::ErrorCode::PROXY_RESOLUTION_FAILURE: - return HttpClientErrorCode::kProxyResolutionFailure; - case cpr::ErrorCode::INTERNAL_ERROR: - return HttpClientErrorCode::kInternalError; - case cpr::ErrorCode::NETWORK_RECEIVE_ERROR: - return HttpClientErrorCode::kNetworkReceiveError; - case cpr::ErrorCode::NETWORK_SEND_FAILURE: - return HttpClientErrorCode::kNetworkSendFailure; - case cpr::ErrorCode::OPERATION_TIMEDOUT: - return HttpClientErrorCode::kOperationTimedOut; - case cpr::ErrorCode::SSL_CONNECT_ERROR: - return HttpClientErrorCode::kSSLConnectError; - case cpr::ErrorCode::SSL_LOCAL_CERTIFICATE_ERROR: - case cpr::ErrorCode::SSL_REMOTE_CERTIFICATE_ERROR: - return HttpClientErrorCode::kSSLCertificateError; - case cpr::ErrorCode::SSL_CACERT_ERROR: - return HttpClientErrorCode::kSSLCACertError; - case cpr::ErrorCode::GENERIC_SSL_ERROR: - return HttpClientErrorCode::kGenericSSLError; - case cpr::ErrorCode::REQUEST_CANCELLED: - return HttpClientErrorCode::kRequestCancelled; - default: - return HttpClientErrorCode::kOtherError; - } -} - -////////////////////////////////////////////////////////////////////////// -// -// CPR helpers - -static cpr::Body -AsCprBody(const CbObject& Obj) -{ - return cpr::Body((const char*)Obj.GetBuffer().GetData(), Obj.GetBuffer().GetSize()); -} - -static cpr::Body -AsCprBody(const IoBuffer& Obj) -{ - return cpr::Body((const char*)Obj.GetData(), Obj.GetSize()); -} - -static bool -ShouldRetry(const cpr::Response& Response) -{ - switch (Response.error.code) - { - case cpr::ErrorCode::OK: - break; - case cpr::ErrorCode::INTERNAL_ERROR: - case cpr::ErrorCode::NETWORK_RECEIVE_ERROR: - case cpr::ErrorCode::NETWORK_SEND_FAILURE: - case cpr::ErrorCode::OPERATION_TIMEDOUT: - return true; - default: - return false; - } - switch ((HttpResponseCode)Response.status_code) - { - case HttpResponseCode::RequestTimeout: - case HttpResponseCode::TooManyRequests: - case HttpResponseCode::InternalServerError: - case HttpResponseCode::BadGateway: - case HttpResponseCode::ServiceUnavailable: - case HttpResponseCode::GatewayTimeout: - return true; - default: - return false; - } -}; - -static std::pair<std::string, std::string> -HeaderContentType(ZenContentType ContentType) -{ - return std::make_pair("Content-Type", std::string(MapContentTypeToString(ContentType))); -} - -////////////////////////////////////////////////////////////////////////// - -CprHttpClient::CprHttpClient(std::string_view BaseUri, - const HttpClientSettings& Connectionsettings, - std::function<bool()>&& CheckIfAbortFunction) -: HttpClientBase(BaseUri, Connectionsettings, std::move(CheckIfAbortFunction)) -{ -} - -bool -CprHttpClient::ShouldLogErrorCode(HttpResponseCode ResponseCode) const -{ - if (m_CheckIfAbortFunction && m_CheckIfAbortFunction()) - { - // Quiet - return false; - } - const auto& Expected = m_ConnectionSettings.ExpectedErrorCodes; - return std::find(Expected.begin(), Expected.end(), ResponseCode) == Expected.end(); -} - -CprHttpClient::~CprHttpClient() -{ - ZEN_TRACE_CPU("CprHttpClient::~CprHttpClient"); - m_SessionLock.WithExclusiveLock([&] { - for (auto CprSession : m_Sessions) - { - delete CprSession; - } - m_Sessions.clear(); - }); -} - -HttpClient::Response -CprHttpClient::ResponseWithPayload(std::string_view SessionId, - cpr::Response&& HttpResponse, - const HttpResponseCode WorkResponseCode, - IoBuffer&& Payload, - std::vector<HttpClient::Response::MultipartBoundary>&& BoundaryPositions) -{ - // This ends up doing a memcpy, would be good to get rid of it by streaming results - // into buffer directly - IoBuffer ResponseBuffer = Payload ? std::move(Payload) : IoBuffer(IoBuffer::Clone, HttpResponse.text.data(), HttpResponse.text.size()); - - if (auto It = HttpResponse.header.find("Content-Type"); It != HttpResponse.header.end()) - { - const HttpContentType ContentType = ParseContentType(It->second); - ResponseBuffer.SetContentType(ContentType); - } - - if (!IsHttpSuccessCode(WorkResponseCode) && WorkResponseCode != HttpResponseCode::NotFound) - { - if (ShouldLogErrorCode(WorkResponseCode)) - { - ZEN_WARN("HttpClient request failed (session: {}): {}", SessionId, HttpResponse); - } - } - - std::sort(BoundaryPositions.begin(), - BoundaryPositions.end(), - [](const HttpClient::Response::MultipartBoundary& Lhs, const HttpClient::Response::MultipartBoundary& Rhs) { - return Lhs.RangeOffset < Rhs.RangeOffset; - }); - - return HttpClient::Response{.StatusCode = WorkResponseCode, - .ResponsePayload = std::move(ResponseBuffer), - .Header = HttpClient::KeyValueMap(HttpResponse.header.begin(), HttpResponse.header.end()), - .UploadedBytes = gsl::narrow<int64_t>(HttpResponse.uploaded_bytes), - .DownloadedBytes = gsl::narrow<int64_t>(HttpResponse.downloaded_bytes), - .ElapsedSeconds = HttpResponse.elapsed, - .Ranges = std::move(BoundaryPositions)}; -} - -HttpClient::Response -CprHttpClient::CommonResponse(std::string_view SessionId, - cpr::Response&& HttpResponse, - IoBuffer&& Payload, - std::vector<HttpClient::Response::MultipartBoundary>&& BoundaryPositions) -{ - const HttpResponseCode WorkResponseCode = HttpResponseCode(HttpResponse.status_code); - if (HttpResponse.error) - { - const bool Quiet = m_CheckIfAbortFunction && m_CheckIfAbortFunction(); - if (!Quiet) - { - if (HttpResponse.error.code != cpr::ErrorCode::OPERATION_TIMEDOUT && - HttpResponse.error.code != cpr::ErrorCode::CONNECTION_FAILURE && - HttpResponse.error.code != cpr::ErrorCode::REQUEST_CANCELLED) - { - ZEN_WARN("HttpClient client failure (session: {}): {}", SessionId, HttpResponse); - } - } - - // Client side failure code - return HttpClient::Response{ - .StatusCode = WorkResponseCode, - .ResponsePayload = IoBufferBuilder::MakeCloneFromMemory(HttpResponse.text.data(), HttpResponse.text.size()), - .Header = HttpClient::KeyValueMap(HttpResponse.header.begin(), HttpResponse.header.end()), - .UploadedBytes = gsl::narrow<int64_t>(HttpResponse.uploaded_bytes), - .DownloadedBytes = gsl::narrow<int64_t>(HttpResponse.downloaded_bytes), - .ElapsedSeconds = HttpResponse.elapsed, - .Error = - HttpClient::ErrorContext{.ErrorCode = MapCprError(HttpResponse.error.code), .ErrorMessage = HttpResponse.error.message}}; - } - - if (WorkResponseCode == HttpResponseCode::NoContent || (HttpResponse.text.empty() && !Payload)) - { - return HttpClient::Response{.StatusCode = WorkResponseCode, - .Header = HttpClient::KeyValueMap(HttpResponse.header.begin(), HttpResponse.header.end()), - .UploadedBytes = gsl::narrow<int64_t>(HttpResponse.uploaded_bytes), - .DownloadedBytes = gsl::narrow<int64_t>(HttpResponse.downloaded_bytes), - .ElapsedSeconds = HttpResponse.elapsed}; - } - else - { - return ResponseWithPayload(SessionId, std::move(HttpResponse), WorkResponseCode, std::move(Payload), std::move(BoundaryPositions)); - } -} - -bool -CprHttpClient::ValidatePayload(cpr::Response& Response, std::unique_ptr<detail::TempPayloadFile>& PayloadFile) -{ - ZEN_TRACE_CPU("ValidatePayload"); - IoBuffer ResponseBuffer = (Response.text.empty() && PayloadFile) ? PayloadFile->BorrowIoBuffer() - : IoBuffer(IoBuffer::Wrap, Response.text.data(), Response.text.size()); - - if (auto ContentLength = Response.header.find("Content-Length"); ContentLength != Response.header.end()) - { - std::optional<uint64_t> ExpectedContentSize = ParseInt<uint64_t>(ContentLength->second); - if (!ExpectedContentSize.has_value()) - { - Response.error = - cpr::Error(/*CURLE_READ_ERROR*/ 26, fmt::format("Can not parse Content-Length header. Value: '{}'", ContentLength->second)); - return false; - } - if (ExpectedContentSize.value() != ResponseBuffer.GetSize()) - { - Response.error = cpr::Error( - /*CURLE_READ_ERROR*/ 26, - fmt::format("Payload size {} does not match Content-Length {}", ResponseBuffer.GetSize(), ContentLength->second)); - return false; - } - } - - if (Response.status_code == (long)HttpResponseCode::PartialContent) - { - return true; - } - - if (auto JupiterHash = Response.header.find("X-Jupiter-IoHash"); JupiterHash != Response.header.end()) - { - IoHash ExpectedPayloadHash; - if (IoHash::TryParse(JupiterHash->second, ExpectedPayloadHash)) - { - IoHash PayloadHash = IoHash::HashBuffer(ResponseBuffer); - if (PayloadHash != ExpectedPayloadHash) - { - Response.error = cpr::Error(/*CURLE_READ_ERROR*/ 26, - fmt::format("Payload hash {} does not match X-Jupiter-IoHash {}", - PayloadHash.ToHexString(), - ExpectedPayloadHash.ToHexString())); - return false; - } - } - } - - if (auto ContentType = Response.header.find("Content-Type"); ContentType != Response.header.end()) - { - if (ContentType->second == "application/x-ue-comp") - { - IoHash RawHash; - uint64_t RawSize; - if (CompressedBuffer::ValidateCompressedHeader(ResponseBuffer, RawHash, RawSize, /*OutOptionalTotalCompressedSize*/ nullptr)) - { - return true; - } - else - { - Response.error = cpr::Error(/*CURLE_READ_ERROR*/ 26, "Compressed binary failed validation"); - return false; - } - } - if (ContentType->second == "application/x-ue-cb") - { - if (CbValidateError Error = ValidateCompactBinary(ResponseBuffer.GetView(), CbValidateMode::Default); - Error == CbValidateError::None) - { - return true; - } - else - { - Response.error = cpr::Error(/*CURLE_READ_ERROR*/ 26, fmt::format("Compact binary failed validation: {}", ToString(Error))); - return false; - } - } - } - - return true; -} - -cpr::Response -CprHttpClient::DoWithRetry(std::string_view SessionId, - std::function<cpr::Response()>&& Func, - std::function<bool(cpr::Response& Result)>&& Validate) -{ - uint8_t Attempt = 0; - cpr::Response Result = Func(); - while (Attempt < m_ConnectionSettings.RetryCount) - { - if (m_CheckIfAbortFunction && m_CheckIfAbortFunction()) - { - return Result; - } - if (!ShouldRetry(Result)) - { - if (Result.error || !IsHttpSuccessCode(Result.status_code)) - { - break; - } - if (Validate(Result)) - { - break; - } - } - Sleep(100 * (Attempt + 1)); - Attempt++; - if (ShouldLogErrorCode(HttpResponseCode(Result.status_code))) - { - ZEN_INFO("{} Attempt {}/{}", - CommonResponse(SessionId, std::move(Result), {}).ErrorMessage("Retry"), - Attempt, - m_ConnectionSettings.RetryCount + 1); - } - Result = Func(); - } - return Result; -} - -cpr::Response -CprHttpClient::DoWithRetry(std::string_view SessionId, - std::function<cpr::Response()>&& Func, - std::unique_ptr<detail::TempPayloadFile>& PayloadFile) -{ - uint8_t Attempt = 0; - cpr::Response Result = Func(); - while (Attempt < m_ConnectionSettings.RetryCount) - { - if (m_CheckIfAbortFunction && m_CheckIfAbortFunction()) - { - return Result; - } - if (!ShouldRetry(Result)) - { - if (Result.error || !IsHttpSuccessCode(Result.status_code)) - { - break; - } - if (ValidatePayload(Result, PayloadFile)) - { - break; - } - } - Sleep(100 * (Attempt + 1)); - Attempt++; - if (ShouldLogErrorCode(HttpResponseCode(Result.status_code))) - { - ZEN_INFO("{} Attempt {}/{}", - CommonResponse(SessionId, std::move(Result), {}).ErrorMessage("Retry"), - Attempt, - m_ConnectionSettings.RetryCount + 1); - } - Result = Func(); - } - return Result; -} - -////////////////////////////////////////////////////////////////////////// - -CprHttpClient::Session -CprHttpClient::AllocSession(const std::string_view BaseUrl, - const std::string_view ResourcePath, - const HttpClientSettings& ConnectionSettings, - const KeyValueMap& AdditionalHeader, - const KeyValueMap& Parameters, - const std::string_view SessionId, - std::optional<std::string> AccessToken) -{ - ZEN_TRACE_CPU("CprHttpClient::AllocSession"); - cpr::Session* CprSession = nullptr; - m_SessionLock.WithExclusiveLock([&] { - if (!m_Sessions.empty()) - { - CprSession = m_Sessions.back(); - m_Sessions.pop_back(); - } - }); - - if (CprSession == nullptr) - { - CprSession = new cpr::Session(); - CprSession->SetConnectTimeout(ConnectionSettings.ConnectTimeout); - CprSession->SetTimeout(ConnectionSettings.Timeout); - if (ConnectionSettings.AssumeHttp2) - { - CprSession->SetHttpVersion(cpr::HttpVersion{cpr::HttpVersionCode::VERSION_2_0_PRIOR_KNOWLEDGE}); - } - if (ConnectionSettings.Verbose) - { - // CprSession->SetVerbose(cpr::Verbose{ true }); - CprSession->SetDebugCallback(cpr::DebugCallback{ - [this](cpr::DebugCallback::InfoType type, std::string data, intptr_t userdata) { - cpr::Session* CprSession = (cpr::Session*)userdata; - ZEN_UNUSED(CprSession); - switch (type) - { - case cpr::DebugCallback::InfoType::TEXT: - if (data.find("need more data"sv) == std::string::npos) - { - ZEN_INFO("TEXT: {}", data); - } - break; - case cpr::DebugCallback::InfoType::HEADER_IN: - ZEN_INFO("HIN : {}", data); - break; - case cpr::DebugCallback::InfoType::HEADER_OUT: - if (std::string::size_type TokenPos = data.find("Authorization: Bearer "sv); TokenPos != std::string::npos) - { - TokenPos += 22; - std::string::size_type TokenEndPos = data.find_first_of("\r\n", TokenPos); - if (TokenEndPos == std::string::npos) - { - TokenEndPos = data.length(); - } - std::string Copy = data; - Copy.replace(Copy.begin() + TokenPos, - Copy.begin() + TokenEndPos, - fmt::format("[{} char token]", TokenEndPos - TokenPos)); - ZEN_INFO("HOUT: {}", Copy); - } - else - { - ZEN_INFO("HOUT: {}", data); - } - break; - case cpr::DebugCallback::InfoType::DATA_IN: - // ZEN_INFO("DATA_IN: {}", data); - break; - case cpr::DebugCallback::InfoType::DATA_OUT: - // ZEN_INFO("DATA_OUT: {}", data); - break; - case cpr::DebugCallback::InfoType::SSL_DATA_IN: - // ZEN_INFO("SSL_DATA_IN: {}", data); - break; - case cpr::DebugCallback::InfoType::SSL_DATA_OUT: - // ZEN_INFO("SSL_DATA_OUT: {}", data); - break; - } - }, - (intptr_t)CprSession}); - } - } - - if (!AdditionalHeader->empty()) - { - CprSession->SetHeader(cpr::Header(AdditionalHeader->begin(), AdditionalHeader->end())); - } - if (!SessionId.empty()) - { - CprSession->UpdateHeader({{"UE-Session", std::string(SessionId)}}); - } - if (ConnectionSettings.ForbidReuseConnection) - { - CprSession->UpdateHeader({{"Connection", "close"}}); - } - - if (AccessToken.has_value()) - { - CprSession->UpdateHeader({{"Authorization", AccessToken.value()}}); - } - if (!Parameters->empty()) - { - cpr::Parameters Tmp; - for (auto It = Parameters->begin(); It != Parameters->end(); It++) - { - Tmp.Add({It->first, It->second}); - } - CprSession->SetParameters(Tmp); - } - else - { - CprSession->SetParameters({}); - } - - if (!ConnectionSettings.UnixSocketPath.empty()) - { - CprSession->SetUnixSocket(cpr::UnixSocket(PathToUtf8(ConnectionSettings.UnixSocketPath))); - } - - if (ConnectionSettings.InsecureSsl || !ConnectionSettings.CaBundlePath.empty()) - { - cpr::SslOptions SslOpts; - if (ConnectionSettings.InsecureSsl) - { - SslOpts.SetOption(cpr::ssl::VerifyHost{false}); - SslOpts.SetOption(cpr::ssl::VerifyPeer{false}); - } - if (!ConnectionSettings.CaBundlePath.empty()) - { - SslOpts.SetOption(cpr::ssl::CaInfo{ConnectionSettings.CaBundlePath}); - } - CprSession->SetSslOptions(SslOpts); - } - - ExtendableStringBuilder<128> UrlBuffer; - UrlBuffer << BaseUrl << ResourcePath; - CprSession->SetUrl(UrlBuffer.c_str()); - - return Session(this, CprSession); -} - -void -CprHttpClient::ReleaseSession(cpr::Session* CprSession) -{ - ZEN_TRACE_CPU("CprHttpClient::ReleaseSession"); - CprSession->SetUrl({}); - CprSession->SetHeader({}); - CprSession->SetBody({}); - m_SessionLock.WithExclusiveLock([&] { m_Sessions.push_back(CprSession); }); -} - -CprHttpClient::Response -CprHttpClient::TransactPackage(std::string_view Url, CbPackage Package, const KeyValueMap& AdditionalHeader) -{ - ZEN_TRACE_CPU("CprHttpClient::TransactPackage"); - - Session Sess = AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - - // First, list of offered chunks for filtering on the server end - - std::vector<IoHash> AttachmentsToSend; - std::span<const CbAttachment> Attachments = Package.GetAttachments(); - - const uint32_t RequestId = ++HttpClientRequestIdCounter; - auto RequestIdString = fmt::to_string(RequestId); - - if (Attachments.empty() == false) - { - CbObjectWriter Writer; - Writer.BeginArray("offer"); - - for (const CbAttachment& Attachment : Attachments) - { - Writer.AddHash(Attachment.GetHash()); - } - - Writer.EndArray(); - - BinaryWriter MemWriter; - Writer.Save(MemWriter); - - Sess->UpdateHeader({HeaderContentType(HttpContentType::kCbPackageOffer), {"UE-Request", RequestIdString}}); - Sess->SetBody(cpr::Body{(const char*)MemWriter.Data(), MemWriter.Size()}); - - cpr::Response FilterResponse = Sess.Post(); - - if (FilterResponse.status_code == 200) - { - IoBuffer ResponseBuffer(IoBuffer::Wrap, FilterResponse.text.data(), FilterResponse.text.size()); - CbValidateError ValidationError = CbValidateError::None; - if (CbObject ResponseObject = ValidateAndReadCompactBinaryObject(std::move(ResponseBuffer), ValidationError); - ValidationError == CbValidateError::None) - { - for (CbFieldView& Entry : ResponseObject["need"]) - { - ZEN_ASSERT(Entry.IsHash()); - AttachmentsToSend.push_back(Entry.AsHash()); - } - } - } - } - - // Prepare package for send - - CbPackage SendPackage; - SendPackage.SetObject(Package.GetObject(), Package.GetObjectHash()); - - for (const IoHash& AttachmentCid : AttachmentsToSend) - { - const CbAttachment* Attachment = Package.FindAttachment(AttachmentCid); - - if (Attachment) - { - SendPackage.AddAttachment(*Attachment); - } - else - { - // This should be an error -- server asked to have something we can't find - } - } - - // Transmit package payload - - CompositeBuffer Message = FormatPackageMessageBuffer(SendPackage); - SharedBuffer FlatMessage = Message.Flatten(); - - Sess->UpdateHeader({HeaderContentType(HttpContentType::kCbPackage), {"UE-Request", RequestIdString}}); - Sess->SetBody(cpr::Body{(const char*)FlatMessage.GetData(), FlatMessage.GetSize()}); - - cpr::Response FilterResponse = Sess.Post(); - - if (!IsHttpSuccessCode(FilterResponse.status_code)) - { - return {.StatusCode = HttpResponseCode(FilterResponse.status_code)}; - } - - IoBuffer ResponseBuffer(IoBuffer::Clone, FilterResponse.text.data(), FilterResponse.text.size()); - - if (auto It = FilterResponse.header.find("Content-Type"); It != FilterResponse.header.end()) - { - HttpContentType ContentType = ParseContentType(It->second); - - ResponseBuffer.SetContentType(ContentType); - } - - return {.StatusCode = HttpResponseCode(FilterResponse.status_code), .ResponsePayload = std::move(ResponseBuffer)}; -} - -////////////////////////////////////////////////////////////////////////// -// -// Standard HTTP verbs -// - -CprHttpClient::Response -CprHttpClient::Put(std::string_view Url, const IoBuffer& Payload, const KeyValueMap& AdditionalHeader) -{ - ZEN_TRACE_CPU("CprHttpClient::Put"); - - return CommonResponse( - m_SessionId, - DoWithRetry(m_SessionId, - [&]() { - Session Sess = - AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - Sess->SetBody(AsCprBody(Payload)); - Sess->UpdateHeader({HeaderContentType(Payload.GetContentType())}); - return Sess.Put(); - }), - {}); -} - -CprHttpClient::Response -CprHttpClient::Put(std::string_view Url, const KeyValueMap& Parameters) -{ - ZEN_TRACE_CPU("CprHttpClient::Put"); - - return CommonResponse(m_SessionId, - DoWithRetry(m_SessionId, - [&]() { - Session Sess = AllocSession(m_BaseUri, - Url, - m_ConnectionSettings, - {{"Content-Length", "0"}}, - Parameters, - m_SessionId, - GetAccessToken()); - return Sess.Put(); - }), - {}); -} - -CprHttpClient::Response -CprHttpClient::Get(std::string_view Url, const KeyValueMap& AdditionalHeader, const KeyValueMap& Parameters) -{ - ZEN_TRACE_CPU("CprHttpClient::Get"); - return CommonResponse( - m_SessionId, - DoWithRetry( - m_SessionId, - [&]() { - Session Sess = - AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, Parameters, m_SessionId, GetAccessToken()); - return Sess.Get(); - }, - [this](cpr::Response& Result) { - std::unique_ptr<detail::TempPayloadFile> NoTempFile; - return ValidatePayload(Result, NoTempFile); - }), - {}); -} - -CprHttpClient::Response -CprHttpClient::Head(std::string_view Url, const KeyValueMap& AdditionalHeader) -{ - ZEN_TRACE_CPU("CprHttpClient::Head"); - - return CommonResponse( - m_SessionId, - DoWithRetry(m_SessionId, - [&]() { - Session Sess = - AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - return Sess.Head(); - }), - {}); -} - -CprHttpClient::Response -CprHttpClient::Delete(std::string_view Url, const KeyValueMap& AdditionalHeader) -{ - ZEN_TRACE_CPU("CprHttpClient::Delete"); - - return CommonResponse( - m_SessionId, - DoWithRetry(m_SessionId, - [&]() { - Session Sess = - AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - return Sess.Delete(); - }), - {}); -} - -CprHttpClient::Response -CprHttpClient::Post(std::string_view Url, const KeyValueMap& AdditionalHeader, const KeyValueMap& Parameters) -{ - ZEN_TRACE_CPU("CprHttpClient::PostNoPayload"); - - return CommonResponse( - m_SessionId, - DoWithRetry(m_SessionId, - [&]() { - Session Sess = - AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, Parameters, m_SessionId, GetAccessToken()); - return Sess.Post(); - }), - {}); -} - -CprHttpClient::Response -CprHttpClient::Post(std::string_view Url, const IoBuffer& Payload, const KeyValueMap& AdditionalHeader) -{ - return Post(Url, Payload, Payload.GetContentType(), AdditionalHeader); -} - -CprHttpClient::Response -CprHttpClient::Post(std::string_view Url, const IoBuffer& Payload, ZenContentType ContentType, const KeyValueMap& AdditionalHeader) -{ - ZEN_TRACE_CPU("CprHttpClient::PostWithPayload"); - - return CommonResponse( - m_SessionId, - DoWithRetry( - m_SessionId, - [&]() { - Session Sess = AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - Sess->UpdateHeader({HeaderContentType(ContentType)}); - - IoBufferFileReference FileRef = {nullptr, 0, 0}; - if (Payload.GetFileReference(FileRef)) - { - uint64_t Offset = 0; - detail::BufferedReadFileStream Buffer(FileRef.FileHandle, FileRef.FileChunkOffset, FileRef.FileChunkSize, 512u * 1024u); - auto ReadCallback = [&Payload, &Offset, &Buffer](char* buffer, size_t& size, intptr_t) { - size = Min<size_t>(size, Payload.GetSize() - Offset); - Buffer.Read(buffer, size); - Offset += size; - return true; - }; - return Sess.Post(cpr::ReadCallback(gsl::narrow<cpr::cpr_off_t>(Payload.GetSize()), ReadCallback)); - } - Sess->SetBody(AsCprBody(Payload)); - return Sess.Post(); - }), - {}); -} - -CprHttpClient::Response -CprHttpClient::Post(std::string_view Url, - CbObject Payload, - const KeyValueMap& AdditionalHeader, - const std::filesystem::path& TempFolderPath) -{ - ZEN_TRACE_CPU("CprHttpClient::PostObjectPayload"); - - std::string PayloadString; - std::unique_ptr<detail::TempPayloadFile> PayloadFile; - - cpr::Response Response = DoWithRetry( - m_SessionId, - [&]() { - PayloadString.clear(); - PayloadFile.reset(); - - Session Sess = AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - - Sess->SetBody(AsCprBody(Payload)); - Sess->UpdateHeader({HeaderContentType(ZenContentType::kCbObject)}); - - std::vector<std::pair<std::string, std::string>> ReceivedHeaders; - auto HeaderCallback = [&](std::string header, intptr_t) { - const std::pair<std::string_view, std::string_view> Header = detail::GetHeaderKeyAndValue(header); - if (StrCaseCompare(std::string(Header.first).c_str(), "Content-Length") == 0) - { - std::optional<size_t> ContentLength = ParseInt<size_t>(Header.second); - if (ContentLength.has_value()) - { - if (!TempFolderPath.empty() && ContentLength.value() > m_ConnectionSettings.MaximumInMemoryDownloadSize) - { - PayloadFile = std::make_unique<detail::TempPayloadFile>(); - std::error_code Ec = PayloadFile->Open(TempFolderPath, ContentLength.value()); - if (Ec) - { - ZEN_WARN("Failed to create temp file in '{}' for HttpClient::Post. Reason: {}", - TempFolderPath.string(), - Ec.message()); - PayloadFile.reset(); - } - } - else - { - PayloadString.reserve(ContentLength.value()); - } - } - } - if (!Header.first.empty()) - { - ReceivedHeaders.emplace_back(std::move(Header)); - } - return 1; - }; - - auto DownloadCallback = [&](std::string data, intptr_t) { - if (m_CheckIfAbortFunction && m_CheckIfAbortFunction()) - { - return false; - } - - if (PayloadFile) - { - ZEN_ASSERT(PayloadString.empty()); - std::error_code Ec = PayloadFile->Write(data); - if (Ec) - { - ZEN_WARN("Failed to write to temp file in '{}' for HttpClient::Post. Reason: {}", - TempFolderPath.string(), - Ec.message()); - return false; - } - } - else - { - PayloadString.append(data); - } - return true; - }; - cpr::Response Response = Sess.Post({}, cpr::WriteCallback{DownloadCallback}, cpr::HeaderCallback{HeaderCallback}); - for (const std::pair<std::string, std::string>& H : ReceivedHeaders) - { - Response.header.insert_or_assign(H.first, H.second); - } - if (!PayloadString.empty()) - { - Response.text = std::move(PayloadString); - } - return Response; - }, - PayloadFile); - return CommonResponse(m_SessionId, std::move(Response), PayloadFile ? PayloadFile->DetachToIoBuffer() : IoBuffer{}); -} - -CprHttpClient::Response -CprHttpClient::Post(std::string_view Url, CbPackage Pkg, const KeyValueMap& AdditionalHeader) -{ - return Post(Url, zen::FormatPackageMessageBuffer(Pkg), ZenContentType::kCbPackage, AdditionalHeader); -} - -CprHttpClient::Response -CprHttpClient::Post(std::string_view Url, const CompositeBuffer& Payload, ZenContentType ContentType, const KeyValueMap& AdditionalHeader) -{ - ZEN_TRACE_CPU("CprHttpClient::Post"); - - return CommonResponse( - m_SessionId, - DoWithRetry(m_SessionId, - [&]() { - Session Sess = - AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - Sess->UpdateHeader({HeaderContentType(ContentType)}); - - detail::CompositeBufferReadStream Reader(Payload, 512u * 1024u); - auto ReadCallback = [this, &Reader](char* buffer, size_t& size, intptr_t) { - if (m_CheckIfAbortFunction && m_CheckIfAbortFunction()) - { - return false; - } - size = Reader.Read(buffer, size); - return true; - }; - return Sess.Post(cpr::ReadCallback(gsl::narrow<cpr::cpr_off_t>(Payload.GetSize()), ReadCallback)); - }), - {}); -} - -CprHttpClient::Response -CprHttpClient::Upload(std::string_view Url, const IoBuffer& Payload, const KeyValueMap& AdditionalHeader) -{ - ZEN_TRACE_CPU("CprHttpClient::Upload"); - - return CommonResponse( - m_SessionId, - DoWithRetry( - m_SessionId, - [&]() { - Session Sess = AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - Sess->UpdateHeader({HeaderContentType(Payload.GetContentType())}); - - IoBufferFileReference FileRef = {nullptr, 0, 0}; - if (Payload.GetFileReference(FileRef)) - { - uint64_t Offset = 0; - detail::BufferedReadFileStream Buffer(FileRef.FileHandle, FileRef.FileChunkOffset, FileRef.FileChunkSize, 512u * 1024u); - auto ReadCallback = [this, &Payload, &Offset, &Buffer](char* buffer, size_t& size, intptr_t) { - if (m_CheckIfAbortFunction && m_CheckIfAbortFunction()) - { - return false; - } - - size = Min<size_t>(size, Payload.GetSize() - Offset); - Buffer.Read(buffer, size); - Offset += size; - return true; - }; - return Sess.Put(cpr::ReadCallback(gsl::narrow<cpr::cpr_off_t>(Payload.GetSize()), ReadCallback)); - } - Sess->SetBody(AsCprBody(Payload)); - return Sess.Put(); - }), - {}); -} - -CprHttpClient::Response -CprHttpClient::Upload(std::string_view Url, const CompositeBuffer& Payload, ZenContentType ContentType, const KeyValueMap& AdditionalHeader) -{ - ZEN_TRACE_CPU("CprHttpClient::Upload"); - - return CommonResponse( - m_SessionId, - DoWithRetry(m_SessionId, - [&]() { - Session Sess = - AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - Sess->UpdateHeader({HeaderContentType(ContentType)}); - - detail::CompositeBufferReadStream Reader(Payload, 512u * 1024u); - auto ReadCallback = [this, &Reader](char* buffer, size_t& size, intptr_t) { - if (m_CheckIfAbortFunction && m_CheckIfAbortFunction()) - { - return false; - } - size = Reader.Read(buffer, size); - return true; - }; - return Sess.Put(cpr::ReadCallback(gsl::narrow<cpr::cpr_off_t>(Payload.GetSize()), ReadCallback)); - }), - {}); -} - -CprHttpClient::Response -CprHttpClient::Download(std::string_view Url, const std::filesystem::path& TempFolderPath, const KeyValueMap& AdditionalHeader) -{ - ZEN_TRACE_CPU("CprHttpClient::Download"); - - std::string PayloadString; - std::unique_ptr<detail::TempPayloadFile> PayloadFile; - - HttpContentType ContentType = HttpContentType::kUnknownContentType; - detail::MultipartBoundaryParser BoundaryParser; - bool IsMultiRangeResponse = false; - - cpr::Response Response = DoWithRetry( - m_SessionId, - [&]() { - // Reset state from any previous attempt - PayloadString.clear(); - PayloadFile.reset(); - BoundaryParser.Boundaries.clear(); - ContentType = HttpContentType::kUnknownContentType; - IsMultiRangeResponse = false; - - auto DownloadCallback = [&](std::string data, intptr_t) { - if (m_CheckIfAbortFunction && m_CheckIfAbortFunction()) - { - return false; - } - - if (IsMultiRangeResponse) - { - BoundaryParser.ParseInput(data); - } - - if (PayloadFile) - { - ZEN_ASSERT(PayloadString.empty()); - std::error_code Ec = PayloadFile->Write(data); - if (Ec) - { - ZEN_WARN("Failed to write to temp file in '{}' for HttpClient::Download. Reason: {}", - TempFolderPath.string(), - Ec.message()); - return false; - } - } - else - { - PayloadString.append(data); - } - return true; - }; - - uint64_t RequestedContentLength = (uint64_t)-1; - if (auto RangeIt = AdditionalHeader.Entries.find("Range"); RangeIt != AdditionalHeader.Entries.end()) - { - if (RangeIt->second.starts_with("bytes")) - { - std::string_view RangeValue(RangeIt->second); - size_t RangeStartPos = RangeValue.find('=', 5); - if (RangeStartPos != std::string::npos) - { - RangeStartPos++; - while (RangeStartPos < RangeValue.length() && RangeValue[RangeStartPos] == ' ') - { - RangeStartPos++; - } - RequestedContentLength = 0; - - while (RangeStartPos < RangeValue.length()) - { - size_t RangeEnd = RangeValue.find_first_of(", \r\n", RangeStartPos); - if (RangeEnd == std::string::npos) - { - RangeEnd = RangeValue.length(); - } - - std::string_view RangeString = RangeValue.substr(RangeStartPos, RangeEnd - RangeStartPos); - size_t RangeSplitPos = RangeString.find('-'); - if (RangeSplitPos != std::string::npos) - { - std::optional<size_t> RequestedRangeStart = ParseInt<size_t>(RangeString.substr(0, RangeSplitPos)); - std::optional<size_t> RequestedRangeEnd = ParseInt<size_t>(RangeString.substr(RangeSplitPos + 1)); - if (RequestedRangeStart.has_value() && RequestedRangeEnd.has_value()) - { - RequestedContentLength += RequestedRangeEnd.value() - RequestedRangeStart.value() + 1; - } - } - RangeStartPos = RangeEnd; - while (RangeStartPos != RangeValue.length() && - (RangeValue[RangeStartPos] == ',' || RangeValue[RangeStartPos] == ' ')) - { - RangeStartPos++; - } - } - } - } - } - - cpr::Response Response; - { - std::vector<std::pair<std::string, std::string>> ReceivedHeaders; - auto HeaderCallback = [&](std::string header, intptr_t) { - if (RequestedContentLength != (uint64_t)-1 && RequestedContentLength > m_ConnectionSettings.MaximumInMemoryDownloadSize) - { - ZEN_DEBUG("Multirange request"); - } - const std::pair<std::string_view, std::string_view> Header = detail::GetHeaderKeyAndValue(header); - const std::string Key(Header.first); - if (StrCaseCompare(Key.c_str(), "Content-Length") == 0) - { - std::optional<size_t> ContentLength = ParseInt<size_t>(Header.second); - if (ContentLength.has_value()) - { - if (!TempFolderPath.empty() && ContentLength.value() > m_ConnectionSettings.MaximumInMemoryDownloadSize) - { - PayloadFile = std::make_unique<detail::TempPayloadFile>(); - std::error_code Ec = PayloadFile->Open(TempFolderPath, ContentLength.value()); - if (Ec) - { - ZEN_WARN("Failed to create temp file in '{}' for HttpClient::Download. Reason: {}", - TempFolderPath.string(), - Ec.message()); - PayloadFile.reset(); - } - } - else - { - PayloadString.reserve(ContentLength.value()); - } - } - } - else if (StrCaseCompare(Key.c_str(), "Content-Type") == 0) - { - IsMultiRangeResponse = BoundaryParser.Init(Header.second); - if (!IsMultiRangeResponse) - { - ContentType = ParseContentType(Header.second); - } - } - else if (StrCaseCompare(Key.c_str(), "Content-Range") == 0) - { - if (!IsMultiRangeResponse) - { - std::pair<uint64_t, uint64_t> Range = detail::ParseContentRange(Header.second); - if (Range.second != 0) - { - BoundaryParser.Boundaries.push_back(HttpClient::Response::MultipartBoundary{.OffsetInPayload = 0, - .RangeOffset = Range.first, - .RangeLength = Range.second, - .ContentType = ContentType}); - } - } - } - if (!Header.first.empty()) - { - ReceivedHeaders.emplace_back(std::move(Header)); - } - return 1; - }; - - Session Sess = AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - Response = Sess.Download(cpr::WriteCallback{DownloadCallback}, cpr::HeaderCallback{HeaderCallback}); - for (const std::pair<std::string, std::string>& H : ReceivedHeaders) - { - Response.header.insert_or_assign(H.first, H.second); - } - } - if (m_ConnectionSettings.AllowResume) - { - auto SupportsRanges = [](const cpr::Response& Response) -> bool { - if (Response.header.find("Content-Range") != Response.header.end()) - { - return true; - } - if (auto It = Response.header.find("Accept-Ranges"); It != Response.header.end()) - { - return It->second == "bytes"sv; - } - return false; - }; - - auto ShouldResume = [&SupportsRanges, &IsMultiRangeResponse](const cpr::Response& Response) -> bool { - if (IsMultiRangeResponse) - { - return false; - } - if (ShouldRetry(Response)) - { - return SupportsRanges(Response); - } - return false; - }; - - if (ShouldResume(Response)) - { - auto It = Response.header.find("Content-Length"); - if (It != Response.header.end()) - { - uint64_t ContentLength = RequestedContentLength; - if (ContentLength == uint64_t(-1)) - { - if (auto ParsedContentLength = ParseInt<int64_t>(It->second); ParsedContentLength.has_value()) - { - ContentLength = ParsedContentLength.value(); - } - } - - std::vector<std::pair<std::string, std::string>> ReceivedHeaders; - - auto HeaderCallback = [&](std::string header, intptr_t) { - const std::pair<std::string_view, std::string_view> Header = detail::GetHeaderKeyAndValue(header); - if (!Header.first.empty()) - { - ReceivedHeaders.emplace_back(std::move(Header)); - } - - if (StrCaseCompare(std::string(Header.first).c_str(), "Content-Range") == 0) - { - if (Header.second.starts_with("bytes "sv)) - { - size_t RangeStartEnd = Header.second.find('-', 6); - if (RangeStartEnd != std::string::npos) - { - const auto Start = ParseInt<uint64_t>(Header.second.substr(6, RangeStartEnd - 6)); - if (Start) - { - uint64_t DownloadedSize = PayloadFile ? PayloadFile->GetSize() : PayloadString.length(); - if (Start.value() == DownloadedSize) - { - return 1; - } - else if (Start.value() > DownloadedSize) - { - return 0; - } - if (PayloadFile) - { - PayloadFile->ResetWritePos(Start.value()); - } - else - { - PayloadString = PayloadString.substr(0, Start.value()); - } - return 1; - } - } - } - return 0; - } - return 1; - }; - - KeyValueMap HeadersWithRange(AdditionalHeader); - do - { - uint64_t DownloadedSize = PayloadFile ? PayloadFile->GetSize() : PayloadString.length(); - - std::string Range = fmt::format("bytes={}-{}", DownloadedSize, DownloadedSize + ContentLength - 1); - if (auto RangeIt = HeadersWithRange.Entries.find("Range"); RangeIt != HeadersWithRange.Entries.end()) - { - if (RangeIt->second == Range) - { - // If we didn't make any progress, abort - break; - } - } - HeadersWithRange.Entries.insert_or_assign("Range", Range); - - Session Sess = - AllocSession(m_BaseUri, Url, m_ConnectionSettings, HeadersWithRange, {}, m_SessionId, GetAccessToken()); - Response = Sess.Download(cpr::WriteCallback{DownloadCallback}, cpr::HeaderCallback{HeaderCallback}); - for (const std::pair<std::string, std::string>& H : ReceivedHeaders) - { - Response.header.insert_or_assign(H.first, H.second); - } - ReceivedHeaders.clear(); - } while (ShouldResume(Response)); - } - } - } - - if (!PayloadString.empty()) - { - Response.text = std::move(PayloadString); - } - return Response; - }, - PayloadFile); - - return CommonResponse(m_SessionId, - std::move(Response), - PayloadFile ? PayloadFile->DetachToIoBuffer() : IoBuffer{}, - std::move(BoundaryParser.Boundaries)); -} - -} // namespace zen diff --git a/src/zenhttp/clients/httpclientcpr.h b/src/zenhttp/clients/httpclientcpr.h deleted file mode 100644 index 509ca5ae2..000000000 --- a/src/zenhttp/clients/httpclientcpr.h +++ /dev/null @@ -1,188 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#pragma once - -#include "httpclientcommon.h" - -#include <zencore/logging.h> -#include <zenhttp/cprutils.h> -#include <zenhttp/httpclient.h> - -ZEN_THIRD_PARTY_INCLUDES_START -#include <cpr/body.h> -#include <cpr/session.h> -ZEN_THIRD_PARTY_INCLUDES_END - -namespace zen { - -class CprHttpClient : public HttpClientBase -{ -public: - CprHttpClient(std::string_view BaseUri, const HttpClientSettings& Connectionsettings, std::function<bool()>&& CheckIfAbortFunction); - ~CprHttpClient(); - - // HttpClientBase - - [[nodiscard]] virtual Response Put(std::string_view Url, const IoBuffer& Payload, const KeyValueMap& AdditionalHeader = {}) override; - [[nodiscard]] virtual Response Put(std::string_view Url, const KeyValueMap& Parameters = {}) override; - [[nodiscard]] virtual Response Get(std::string_view Url, - const KeyValueMap& AdditionalHeader = {}, - const KeyValueMap& Parameters = {}) override; - [[nodiscard]] virtual Response Head(std::string_view Url, const KeyValueMap& AdditionalHeader = {}) override; - [[nodiscard]] virtual Response Delete(std::string_view Url, const KeyValueMap& AdditionalHeader = {}) override; - [[nodiscard]] virtual Response Post(std::string_view Url, - const KeyValueMap& AdditionalHeader = {}, - const KeyValueMap& Parameters = {}) override; - [[nodiscard]] virtual Response Post(std::string_view Url, const IoBuffer& Payload, const KeyValueMap& AdditionalHeader = {}) override; - [[nodiscard]] virtual Response Post(std::string_view Url, - const IoBuffer& Payload, - ZenContentType ContentType, - const KeyValueMap& AdditionalHeader = {}) override; - [[nodiscard]] virtual Response Post(std::string_view Url, - CbObject Payload, - const KeyValueMap& AdditionalHeader = {}, - const std::filesystem::path& TempFolderPath = {}) override; - [[nodiscard]] virtual Response Post(std::string_view Url, CbPackage Payload, const KeyValueMap& AdditionalHeader = {}) override; - [[nodiscard]] virtual Response Post(std::string_view Url, - const CompositeBuffer& Payload, - ZenContentType ContentType, - const KeyValueMap& AdditionalHeader = {}) override; - [[nodiscard]] virtual Response Upload(std::string_view Url, const IoBuffer& Payload, const KeyValueMap& AdditionalHeader = {}) override; - [[nodiscard]] virtual Response Upload(std::string_view Url, - const CompositeBuffer& Payload, - ZenContentType ContentType, - const KeyValueMap& AdditionalHeader = {}) override; - - [[nodiscard]] virtual Response Download(std::string_view Url, - const std::filesystem::path& TempFolderPath, - const KeyValueMap& AdditionalHeader = {}) override; - - [[nodiscard]] virtual Response TransactPackage(std::string_view Url, - CbPackage Package, - const KeyValueMap& AdditionalHeader = {}) override; - -private: - struct Session - { - Session(CprHttpClient* InOuter, cpr::Session* InSession) : Outer(InOuter), CprSession(InSession) {} - ~Session() { Outer->ReleaseSession(CprSession); } - - inline cpr::Session* operator->() const { return CprSession; } - inline cpr::Response Get() - { - ZEN_TRACE_CPU("HttpClient::Impl::Get"); - cpr::Response Result = CprSession->Get(); - ZEN_TRACE("GET {}", Result); - return Result; - } - inline cpr::Response Download(cpr::WriteCallback&& Write, std::optional<cpr::HeaderCallback>&& Header = {}) - { - ZEN_TRACE_CPU("HttpClient::Impl::Download"); - if (Header) - { - CprSession->SetHeaderCallback(std::move(Header.value())); - } - cpr::Response Result = CprSession->Download(Write); - ZEN_TRACE("GET {}", Result); - CprSession->SetHeaderCallback({}); - CprSession->SetWriteCallback({}); - return Result; - } - inline cpr::Response Head() - { - ZEN_TRACE_CPU("HttpClient::Impl::Head"); - cpr::Response Result = CprSession->Head(); - ZEN_TRACE("HEAD {}", Result); - return Result; - } - inline cpr::Response Put(std::optional<cpr::ReadCallback>&& Read = {}) - { - ZEN_TRACE_CPU("HttpClient::Impl::Put"); - if (Read) - { - CprSession->SetReadCallback(std::move(Read.value())); - } - cpr::Response Result = CprSession->Put(); - ZEN_TRACE("PUT {}", Result); - CprSession->SetReadCallback({}); - return Result; - } - inline cpr::Response Post(std::optional<cpr::ReadCallback>&& Read = {}, - std::optional<cpr::WriteCallback>&& Write = {}, - std::optional<cpr::HeaderCallback>&& Header = {}) - { - ZEN_TRACE_CPU("HttpClient::Impl::Post"); - if (Read) - { - CprSession->SetReadCallback(std::move(Read.value())); - } - if (Write) - { - CprSession->SetWriteCallback(std::move(Write.value())); - } - if (Header) - { - CprSession->SetHeaderCallback(std::move(Header.value())); - } - cpr::Response Result = CprSession->Post(); - ZEN_TRACE("POST {}", Result); - CprSession->SetHeaderCallback({}); - CprSession->SetWriteCallback({}); - CprSession->SetReadCallback({}); - return Result; - } - inline cpr::Response Delete() - { - ZEN_TRACE_CPU("HttpClient::Impl::Delete"); - cpr::Response Result = CprSession->Delete(); - ZEN_TRACE("DELETE {}", Result); - return Result; - } - - LoggerRef Log() { return Outer->Log(); } - - private: - CprHttpClient* Outer; - cpr::Session* CprSession; - - Session(Session&&) = delete; - Session& operator=(Session&&) = delete; - }; - - Session AllocSession(const std::string_view BaseUrl, - const std::string_view Url, - const HttpClientSettings& ConnectionSettings, - const KeyValueMap& AdditionalHeader, - const KeyValueMap& Parameters, - const std::string_view SessionId, - std::optional<std::string> AccessToken); - - RwLock m_SessionLock; - std::vector<cpr::Session*> m_Sessions; - - void ReleaseSession(cpr::Session*); - - cpr::Response DoWithRetry(std::string_view SessionId, - std::function<cpr::Response()>&& Func, - std::unique_ptr<detail::TempPayloadFile>& PayloadFile); - cpr::Response DoWithRetry( - std::string_view SessionId, - std::function<cpr::Response()>&& Func, - std::function<bool(cpr::Response& Result)>&& Validate = [](cpr::Response&) { return true; }); - - bool ShouldLogErrorCode(HttpResponseCode ResponseCode) const; - bool ValidatePayload(cpr::Response& Response, std::unique_ptr<detail::TempPayloadFile>& PayloadFile); - - HttpClient::Response CommonResponse(std::string_view SessionId, - cpr::Response&& HttpResponse, - IoBuffer&& Payload, - std::vector<HttpClient::Response::MultipartBoundary>&& BoundaryPositions = {}); - - HttpClient::Response ResponseWithPayload(std::string_view SessionId, - cpr::Response&& HttpResponse, - const HttpResponseCode WorkResponseCode, - IoBuffer&& Payload, - std::vector<HttpClient::Response::MultipartBoundary>&& BoundaryPositions); -}; - -} // namespace zen diff --git a/src/zenhttp/clients/httpclientcurl.cpp b/src/zenhttp/clients/httpclientcurl.cpp index e76157254..d150b44c6 100644 --- a/src/zenhttp/clients/httpclientcurl.cpp +++ b/src/zenhttp/clients/httpclientcurl.cpp @@ -980,7 +980,7 @@ CurlHttpClient::TransactPackage(std::string_view Url, CbPackage Package, const K // CurlHttpClient::Response -CurlHttpClient::Put(std::string_view Url, const IoBuffer& Payload, const KeyValueMap& AdditionalHeader) +CurlHttpClient::Put(std::string_view Url, const IoBuffer& Payload, const KeyValueMap& AdditionalHeader, const KeyValueMap& Parameters) { ZEN_TRACE_CPU("CurlHttpClient::Put"); @@ -989,7 +989,7 @@ CurlHttpClient::Put(std::string_view Url, const IoBuffer& Payload, const KeyValu DoWithRetry( m_SessionId, [&]() -> CurlResult { - Session Sess = AllocSession(Url, {}); + Session Sess = AllocSession(Url, Parameters); CURL* H = Sess.Get(); Sess.SetHeaders( diff --git a/src/zenhttp/clients/httpclientcurl.h b/src/zenhttp/clients/httpclientcurl.h index b7fa52e6c..bdeb46633 100644 --- a/src/zenhttp/clients/httpclientcurl.h +++ b/src/zenhttp/clients/httpclientcurl.h @@ -21,7 +21,10 @@ public: // HttpClientBase - [[nodiscard]] virtual Response Put(std::string_view Url, const IoBuffer& Payload, const KeyValueMap& AdditionalHeader = {}) override; + [[nodiscard]] virtual Response Put(std::string_view Url, + const IoBuffer& Payload, + const KeyValueMap& AdditionalHeader = {}, + const KeyValueMap& Parameters = {}) override; [[nodiscard]] virtual Response Put(std::string_view Url, const KeyValueMap& Parameters = {}) override; [[nodiscard]] virtual Response Get(std::string_view Url, const KeyValueMap& AdditionalHeader = {}, diff --git a/src/zenhttp/httpclient.cpp b/src/zenhttp/httpclient.cpp index 13c86e9ae..ace7a3c7f 100644 --- a/src/zenhttp/httpclient.cpp +++ b/src/zenhttp/httpclient.cpp @@ -36,12 +36,6 @@ namespace zen { -#if ZEN_WITH_CPR -extern HttpClientBase* CreateCprHttpClient(std::string_view BaseUri, - const HttpClientSettings& ConnectionSettings, - std::function<bool()>&& CheckIfAbortFunction); -#endif - extern HttpClientBase* CreateCurlHttpClient(std::string_view BaseUri, const HttpClientSettings& ConnectionSettings, std::function<bool()>&& CheckIfAbortFunction); @@ -57,14 +51,7 @@ SetDefaultHttpClientBackend(HttpClientBackend Backend) void SetDefaultHttpClientBackend(std::string_view Backend) { -#if ZEN_WITH_CPR - if (Backend == "cpr") - { - g_DefaultHttpClientBackend = HttpClientBackend::kCpr; - } - else -#endif - if (Backend == "curl") + if (Backend == "curl") { g_DefaultHttpClientBackend = HttpClientBackend::kCurl; } @@ -378,22 +365,7 @@ HttpClient::HttpClient(std::string_view BaseUri, const HttpClientSettings& Conne , m_ConnectionSettings(ConnectionSettings) { m_SessionId = GetSessionIdString(); - - HttpClientBackend EffectiveBackend = - ConnectionSettings.Backend != HttpClientBackend::kDefault ? ConnectionSettings.Backend : g_DefaultHttpClientBackend; - - switch (EffectiveBackend) - { -#if ZEN_WITH_CPR - case HttpClientBackend::kCpr: - m_Inner = CreateCprHttpClient(BaseUri, ConnectionSettings, std::move(CheckIfAbortFunction)); - break; -#endif - case HttpClientBackend::kCurl: - default: - m_Inner = CreateCurlHttpClient(BaseUri, ConnectionSettings, std::move(CheckIfAbortFunction)); - break; - } + m_Inner = CreateCurlHttpClient(BaseUri, ConnectionSettings, std::move(CheckIfAbortFunction)); } HttpClient::~HttpClient() @@ -402,6 +374,13 @@ HttpClient::~HttpClient() } void +HttpClient::SetBaseUri(std::string_view NewBaseUri) +{ + m_BaseUri = NewBaseUri; + m_Inner->SetBaseUri(NewBaseUri); +} + +void HttpClient::SetSessionId(const Oid& SessionId) { if (SessionId == Oid::Zero) @@ -415,9 +394,12 @@ HttpClient::SetSessionId(const Oid& SessionId) } HttpClient::Response -HttpClient::Put(std::string_view Url, const IoBuffer& Payload, const HttpClient::KeyValueMap& AdditionalHeader) +HttpClient::Put(std::string_view Url, + const IoBuffer& Payload, + const HttpClient::KeyValueMap& AdditionalHeader, + const HttpClient::KeyValueMap& Parameters) { - return m_Inner->Put(Url, Payload, AdditionalHeader); + return m_Inner->Put(Url, Payload, AdditionalHeader, Parameters); } HttpClient::Response @@ -977,6 +959,71 @@ TEST_CASE("httpclient.password") AsioServer->RequestExit(); } } +TEST_CASE("httpclient.setbaseuri") +{ + struct TestHttpService : public HttpService + { + explicit TestHttpService(std::string_view Identity) : m_Identity(Identity) {} + + virtual const char* BaseUri() const override { return "/test/"; } + virtual void HandleRequest(HttpServerRequest& Req) override + { + Req.WriteResponse(HttpResponseCode::OK, HttpContentType::kText, m_Identity); + } + + std::string m_Identity; + }; + + ScopedTemporaryDirectory TmpDir1; + ScopedTemporaryDirectory TmpDir2; + TestHttpService Service1("server-one"); + TestHttpService Service2("server-two"); + + Ref<HttpServer> Server1 = CreateHttpAsioServer(AsioConfig{}); + Ref<HttpServer> Server2 = CreateHttpAsioServer(AsioConfig{}); + + int Port1 = Server1->Initialize(0, TmpDir1.Path()); + int Port2 = Server2->Initialize(0, TmpDir2.Path()); + REQUIRE(Port1 != -1); + REQUIRE(Port2 != -1); + + Server1->RegisterService(Service1); + Server2->RegisterService(Service2); + + std::thread Thread1([&]() { Server1->Run(false); }); + std::thread Thread2([&]() { Server2->Run(false); }); + + auto _ = MakeGuard([&]() { + if (Thread1.joinable()) + { + Thread1.join(); + } + if (Thread2.joinable()) + { + Thread2.join(); + } + Server1->Close(); + Server2->Close(); + }); + + HttpClient Client(fmt::format("127.0.0.1:{}", Port1), HttpClientSettings{}, {}); + CHECK_EQ(Client.GetBaseUri(), fmt::format("127.0.0.1:{}", Port1)); + + HttpClient::Response Resp1 = Client.Get("/test/hello"); + CHECK(Resp1.IsSuccess()); + CHECK_EQ(Resp1.AsText(), "server-one"); + + Client.SetBaseUri(fmt::format("127.0.0.1:{}", Port2)); + CHECK_EQ(Client.GetBaseUri(), fmt::format("127.0.0.1:{}", Port2)); + + HttpClient::Response Resp2 = Client.Get("/test/hello"); + CHECK(Resp2.IsSuccess()); + CHECK_EQ(Resp2.AsText(), "server-two"); + + Server1->RequestExit(); + Server2->RequestExit(); +} + TEST_SUITE_END(); void diff --git a/src/zenhttp/httpclient_test.cpp b/src/zenhttp/httpclient_test.cpp index 7a657c464..af653cbb2 100644 --- a/src/zenhttp/httpclient_test.cpp +++ b/src/zenhttp/httpclient_test.cpp @@ -492,6 +492,17 @@ TEST_CASE("httpclient.put") CHECK_EQ(Resp.StatusCode, HttpResponseCode::Created); CHECK_EQ(Resp.AsText(), "resource created"); } + + SUBCASE("PUT with payload and query parameters") + { + const char* Payload = "put payload data"; + IoBuffer Buf(IoBuffer::Clone, Payload, strlen(Payload)); + Buf.SetContentType(ZenContentType::kText); + + HttpClient::Response Resp = Client.Put("/api/test/echo/uri", Buf, {}, {{"key", "value"}}); + CHECK(Resp.IsSuccess()); + CHECK_EQ(Resp.AsText(), "echo/uri\nkey=value"); + } } TEST_CASE("httpclient.upload") diff --git a/src/zenhttp/httpserver.cpp b/src/zenhttp/httpserver.cpp index a46c5b851..e05c9815f 100644 --- a/src/zenhttp/httpserver.cpp +++ b/src/zenhttp/httpserver.cpp @@ -329,6 +329,10 @@ ReasonStringForHttpResultCode(int HttpCode) return "Continue"sv; case 101: return "Switching Protocols"sv; + case 102: + return "Processing"sv; + case 103: + return "Early Hints"sv; // 2xx Success @@ -338,12 +342,20 @@ ReasonStringForHttpResultCode(int HttpCode) return "Created"sv; case 202: return "Accepted"sv; + case 203: + return "Non-Authoritative Information"sv; case 204: return "No Content"sv; case 205: return "Reset Content"sv; case 206: return "Partial Content"sv; + case 207: + return "Multi-Status"sv; + case 208: + return "Already Reported"sv; + case 226: + return "IM Used"sv; // 3xx Redirection @@ -424,6 +436,8 @@ ReasonStringForHttpResultCode(int HttpCode) return "Too Many Requests"sv; case 431: return "Request Header Fields Too Large"sv; + case 451: + return "Unavailable For Legal Reasons"sv; // 5xx Server errors @@ -798,7 +812,18 @@ HttpRequestRouter::HandleRequest(zen::HttpServerRequest& Request) const HttpVerb Verb = Request.RequestVerb(); - std::string_view Uri = Request.RelativeUri(); + std::string_view Uri = Request.RelativeUri(); + + // Strip the separator slash left over after the service prefix is removed. + // When a service has BaseUri "/foo", the prefix length is set to len("/foo") = 4. + // Stripping 4 chars from "/foo/bar" yields "/bar" — the path separator becomes + // the first character of the relative URI. Remove it so patterns like "bar" or + // "{id}" match without needing to account for the leading slash. + if (!Uri.empty() && Uri.front() == '/') + { + Uri.remove_prefix(1); + } + HttpRouterRequest RouterRequest(Request); for (const MatcherEndpoint& Handler : m_MatcherEndpoints) @@ -974,6 +999,12 @@ HttpServer::SetHttpRequestFilter(IHttpRequestFilter* RequestFilter) OnSetHttpRequestFilter(RequestFilter); } +void +HttpServer::HandleStatsRequest(HttpServerRequest& Request) +{ + Request.WriteResponse(HttpResponseCode::OK, CollectStats()); +} + CbObject HttpServer::CollectStats() { @@ -1004,12 +1035,6 @@ HttpServer::CollectStats() return Cbo.Save(); } -void -HttpServer::HandleStatsRequest(HttpServerRequest& Request) -{ - Request.WriteResponse(HttpResponseCode::OK, CollectStats()); -} - ////////////////////////////////////////////////////////////////////////// HttpRpcHandler::HttpRpcHandler() @@ -1446,6 +1471,78 @@ TEST_CASE("http.common") } } + SUBCASE("router-leading-slash") + { + // Verify that HandleRequest strips the leading slash that server implementations + // leave in RelativeUri() when the service base URI has no trailing slash. + // e.g. BaseUri "/stats" + prefix-strip of "/stats/foo" yields "/foo", not "foo". + + bool HandledLiteral = false; + bool HandledPattern = false; + bool HandledTwoSeg = false; + std::vector<std::string> Captures; + auto Reset = [&] { + HandledLiteral = HandledPattern = HandledTwoSeg = false; + Captures.clear(); + }; + + TestHttpService Service; + HttpRequestRouter r; + + r.AddMatcher("seg", [](std::string_view In) -> bool { return !In.empty() && In.find('/') == std::string_view::npos; }); + + r.RegisterRoute( + "activity_counters", + [&](auto& /*Req*/) { HandledLiteral = true; }, + HttpVerb::kGet); + + r.RegisterRoute( + "{seg}", + [&](auto& Req) { + HandledPattern = true; + Captures = {std::string(Req.GetCapture(1))}; + }, + HttpVerb::kGet); + + r.RegisterRoute( + "prefix/{seg}", + [&](auto& Req) { + HandledTwoSeg = true; + Captures = {std::string(Req.GetCapture(1))}; + }, + HttpVerb::kGet); + + // Single-segment literal with leading slash — simulates real server RelativeUri + { + Reset(); + TestHttpServerRequest req{Service, "/activity_counters"sv}; + r.HandleRequest(req); + CHECK(HandledLiteral); + CHECK(!HandledPattern); + } + + // Single-segment pattern with leading slash + { + Reset(); + TestHttpServerRequest req{Service, "/hello"sv}; + r.HandleRequest(req); + CHECK(!HandledLiteral); + CHECK(HandledPattern); + REQUIRE_EQ(Captures.size(), 1); + CHECK_EQ(Captures[0], "hello"sv); + } + + // Two-segment route with leading slash — first literal segment + { + Reset(); + TestHttpServerRequest req{Service, "/prefix/world"sv}; + r.HandleRequest(req); + CHECK(HandledTwoSeg); + REQUIRE_EQ(Captures.size(), 1); + CHECK_EQ(Captures[0], "world"sv); + } + } + SUBCASE("content-type") { for (uint8_t i = 0; i < uint8_t(HttpContentType::kCOUNT); ++i) diff --git a/src/zenhttp/include/zenhttp/auth/authservice.h b/src/zenhttp/include/zenhttp/auth/authservice.h index 64b86e21f..ee67c0f5b 100644 --- a/src/zenhttp/include/zenhttp/auth/authservice.h +++ b/src/zenhttp/include/zenhttp/auth/authservice.h @@ -8,14 +8,14 @@ namespace zen { class AuthMgr; -class HttpAuthService final : public zen::HttpService +class HttpAuthService final : public HttpService { public: HttpAuthService(AuthMgr& AuthMgr); virtual ~HttpAuthService(); virtual const char* BaseUri() const override; - virtual void HandleRequest(zen::HttpServerRequest& Request) override; + virtual void HandleRequest(HttpServerRequest& Request) override; private: AuthMgr& m_AuthMgr; diff --git a/src/zenhttp/include/zenhttp/cprutils.h b/src/zenhttp/include/zenhttp/cprutils.h deleted file mode 100644 index 3cfe652c5..000000000 --- a/src/zenhttp/include/zenhttp/cprutils.h +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#pragma once - -#if ZEN_WITH_CPR - -# include <zencore/compactbinary.h> -# include <zencore/compactbinaryvalidation.h> -# include <zencore/iobuffer.h> -# include <zencore/string.h> -# include <zenhttp/formatters.h> -# include <zenhttp/httpclient.h> -# include <zenhttp/httpcommon.h> - -ZEN_THIRD_PARTY_INCLUDES_START -# include <cpr/response.h> -# include <fmt/format.h> -ZEN_THIRD_PARTY_INCLUDES_END - -template<> -struct fmt::formatter<cpr::Response> -{ - constexpr auto parse(format_parse_context& Ctx) -> decltype(Ctx.begin()) { return Ctx.end(); } - - template<typename FormatContext> - auto format(const cpr::Response& Response, FormatContext& Ctx) const -> decltype(Ctx.out()) - { - using namespace std::literals; - - if (Response.error) - { - return fmt::format_to(Ctx.out(), - "Failed: Url: {}, Reason: ({}) '{}'", - Response.url.str(), - int(Response.error.code), - Response.error.message); - } - else - { - const zen::NiceTimeSpanMs NiceResponseTime(uint64_t(Response.elapsed * 1000)); - - if (zen::IsHttpSuccessCode(Response.status_code)) - { - return fmt::format_to(Ctx.out(), - "OK: Url: {}, Status: ({}) '{}', Bytes: {}/{} (Up/Down), Elapsed: {}", - Response.url.str(), - Response.status_code, - zen::ToString(zen::HttpResponseCode(Response.status_code)), - Response.uploaded_bytes, - Response.downloaded_bytes, - NiceResponseTime.c_str()); - } - else - { - const auto It = Response.header.find("Content-Type"); - const std::string_view ContentType = It != Response.header.end() ? It->second : "<None>"sv; - - if (ContentType == "application/x-ue-cb"sv) - { - zen::IoBuffer Body(zen::IoBuffer::Wrap, Response.text.data(), Response.text.size()); - zen::CbObjectView Obj(Body.Data()); - zen::ExtendableStringBuilder<256> Sb; - std::string_view Json = Obj.ToJson(Sb).ToView(); - - return fmt::format_to( - Ctx.out(), - "Failed: Url: {}, Status: ({}) '{}', Reason: '{}'. Bytes: {}/{} (Up/Down), Elapsed: {}, Response: '{}'", - Response.url.str(), - Response.status_code, - zen::ToString(zen::HttpResponseCode(Response.status_code)), - Response.reason, - Response.uploaded_bytes, - Response.downloaded_bytes, - NiceResponseTime.c_str(), - Json); - } - else - { - zen::BodyLogFormatter Body(Response.text); - - return fmt::format_to( - Ctx.out(), - "Failed: Url: {}, Status: ({}) '{}', Reason: '{}'. Bytes: {}/{} (Up/Down), Elapsed: {}, Response: '{}'", - Response.url.str(), - Response.status_code, - zen::ToString(zen::HttpResponseCode(Response.status_code)), - Response.reason, - Response.uploaded_bytes, - Response.downloaded_bytes, - NiceResponseTime.c_str(), - Body.GetText()); - } - } - } - } -}; - -#endif // ZEN_WITH_CPR diff --git a/src/zenhttp/include/zenhttp/httpclient.h b/src/zenhttp/include/zenhttp/httpclient.h index 9531b9366..e199b700f 100644 --- a/src/zenhttp/include/zenhttp/httpclient.h +++ b/src/zenhttp/include/zenhttp/httpclient.h @@ -52,9 +52,6 @@ enum class HttpClientErrorCode : int enum class HttpClientBackend : uint8_t { kDefault, -#if ZEN_WITH_CPR - kCpr, -#endif kCurl, }; @@ -326,7 +323,10 @@ public: return std::make_pair("Accept", MapContentTypeToString(ContentType)); } - [[nodiscard]] Response Put(std::string_view Url, const IoBuffer& Payload, const KeyValueMap& AdditionalHeader = {}); + [[nodiscard]] Response Put(std::string_view Url, + const IoBuffer& Payload, + const KeyValueMap& AdditionalHeader = {}, + const KeyValueMap& Parameters = {}); [[nodiscard]] Response Put(std::string_view Url, const KeyValueMap& Parameters = {}); [[nodiscard]] Response Get(std::string_view Url, const KeyValueMap& AdditionalHeader = {}, const KeyValueMap& Parameters = {}); [[nodiscard]] Response Head(std::string_view Url, const KeyValueMap& AdditionalHeader = {}); @@ -361,6 +361,7 @@ public: LoggerRef Log() { return m_Log; } std::string_view GetBaseUri() const { return m_BaseUri; } std::string_view GetSessionId() const { return m_SessionId; } + void SetBaseUri(std::string_view NewBaseUri); void SetSessionId(const Oid& SessionId); bool Authenticate(); diff --git a/src/zenhttp/include/zenhttp/httpcommon.h b/src/zenhttp/include/zenhttp/httpcommon.h index 8fca35ac5..f9a99f3cc 100644 --- a/src/zenhttp/include/zenhttp/httpcommon.h +++ b/src/zenhttp/include/zenhttp/httpcommon.h @@ -91,6 +91,7 @@ enum class HttpResponseCode //!< were not for the fact that the condition has evaluated to false. UseProxy = 305, //!< \deprecated \parblock Due to security concerns regarding in-band configuration of a proxy. \endparblock //!< The requested resource MUST be accessed through the proxy given by the Location field. + SwitchProxy = 306, //!< \deprecated No longer used. Originally meant subsequent requests should use the specified proxy. TemporaryRedirect = 307, //!< Indicates that the target resource resides temporarily under a different URI and the user agent MUST NOT //!< change the request method if it performs an automatic redirection to that URI. PermanentRedirect = 308, //!< The target resource has been assigned a new permanent URI and any future references to this resource @@ -133,12 +134,14 @@ enum class HttpResponseCode ExpectationFailed = 417, //!< Indicates that the expectation given in the request's Expect header field could not be met by at least //!< one of the inbound servers. ImATeapot = 418, //!< Any attempt to brew coffee with a teapot should result in the error code 418 I'm a teapot. + MisdirectedRequest = 421, //!< Indicates that the request was directed at a server that is not able to produce a response. UnprocessableEntity = 422, //!< Means the server understands the content type of the request entity (hence a 415(Unsupported Media //!< Type) status code is inappropriate), and the syntax of the request entity is correct (thus a 400 (Bad //!< Request) status code is inappropriate) but was unable to process the contained instructions. Locked = 423, //!< Means the source or destination resource of a method is locked. FailedDependency = 424, //!< Means that the method could not be performed on the resource because the requested action depended on //!< another action and that action failed. + TooEarly = 425, //!< Indicates that the server is unwilling to risk processing a request that might be replayed. UpgradeRequired = 426, //!< Indicates that the server refuses to perform the request using the current protocol but might be willing to //!< do so after the client upgrades to a different protocol. PreconditionRequired = 428, //!< Indicates that the origin server requires the request to be conditional. diff --git a/src/zenhttp/include/zenhttp/httpserver.h b/src/zenhttp/include/zenhttp/httpserver.h index 633eb06be..5eaed6004 100644 --- a/src/zenhttp/include/zenhttp/httpserver.h +++ b/src/zenhttp/include/zenhttp/httpserver.h @@ -220,6 +220,12 @@ struct IHttpStatsProvider * not override this will be skipped in WebSocket broadcasts. */ virtual CbObject CollectStats() { return {}; } + + /** Return a number indicating activity. Increase the number + * when activity is detected. Example would be to return the + * number of received requests + */ + virtual uint64_t GetActivityCounter() { return 0; } }; struct IHttpStatsService @@ -302,8 +308,8 @@ public: } // IHttpStatsProvider - virtual CbObject CollectStats() override; virtual void HandleStatsRequest(HttpServerRequest& Request) override; + virtual CbObject CollectStats() override; private: std::vector<HttpService*> m_KnownServices; diff --git a/src/zenhttp/include/zenhttp/httpstats.h b/src/zenhttp/include/zenhttp/httpstats.h index 460315faf..bce771c75 100644 --- a/src/zenhttp/include/zenhttp/httpstats.h +++ b/src/zenhttp/include/zenhttp/httpstats.h @@ -62,6 +62,7 @@ private: std::atomic<bool> m_PushEnabled{false}; void BroadcastStats(); + void Initialize(); // Thread-based push (when no io_context is provided) std::thread m_PushThread; diff --git a/src/zenhttp/monitoring/httpstats.cpp b/src/zenhttp/monitoring/httpstats.cpp index 283cedca7..7e6207e56 100644 --- a/src/zenhttp/monitoring/httpstats.cpp +++ b/src/zenhttp/monitoring/httpstats.cpp @@ -16,6 +16,7 @@ HttpStatsService::HttpStatsService(bool EnableWebSockets) : m_Log(logging::Get(" m_PushEnabled.store(true); m_PushThread = std::thread([this] { PushThreadFunction(); }); } + Initialize(); } HttpStatsService::HttpStatsService(asio::io_context& IoContext, bool EnableWebSockets) : m_Log(logging::Get("stats")) @@ -26,6 +27,110 @@ HttpStatsService::HttpStatsService(asio::io_context& IoContext, bool EnableWebSo m_PushTimer = std::make_unique<asio::steady_timer>(IoContext); EnqueuePushTimer(); } + Initialize(); +} + +void +HttpStatsService::Initialize() +{ + m_Router.AddMatcher("handler_id", [](std::string_view Str) -> bool { + if (Str.empty()) + { + return false; + } + for (const auto C : Str) + { + if (std::isalnum(C) || C == '$') + { + // fine + } + else + { + // not fine + return false; + } + } + return true; + }); + + m_Router.RegisterRoute( + "activity_counters", + [this](HttpRouterRequest& Request) { + CbObjectWriter Obj; + + std::uint64_t SumActivity = 0; + + std::vector<std::pair<std::string, uint64_t>> Activities; + { + RwLock::SharedLockScope _(m_Lock); + Activities.reserve(m_Providers.size()); + for (const auto& It : m_Providers) + { + const std::string& HandlerName = It.first; + IHttpStatsProvider* Provider = It.second; + ZEN_ASSERT(Provider != nullptr); + uint64_t ProviderActivityCounter = Provider->GetActivityCounter(); + if (ProviderActivityCounter != 0) + { + Activities.push_back(std::make_pair(HandlerName, ProviderActivityCounter)); + } + SumActivity += ProviderActivityCounter; + } + } + + Obj.BeginArray("providers"); + for (const std::pair<std::string, uint64_t>& Activity : Activities) + { + const std::string& HandlerName = Activity.first; + uint64_t ProviderActivityCounter = Activity.second; + Obj.BeginObject(); + { + Obj.AddString("provider", HandlerName); + Obj.AddInteger("activity_counter", ProviderActivityCounter); + } + Obj.EndObject(); + } + Obj.EndArray(); + + Obj.AddInteger("sum", SumActivity); + + Request.ServerRequest().WriteResponse(HttpResponseCode::OK, Obj.Save()); + }, + HttpVerb::kGet); + + m_Router.RegisterRoute( + "{handler_id}", + [this](HttpRouterRequest& Request) { + std::string_view Handler = Request.GetCapture(1); + RwLock::SharedLockScope _(m_Lock); + if (auto It = m_Providers.find(std::string{Handler}); It != end(m_Providers)) + { + return It->second->HandleStatsRequest(Request.ServerRequest()); + } + Request.ServerRequest().WriteResponse(HttpResponseCode::NotFound); + }, + HttpVerb::kHead | HttpVerb::kGet); + + m_Router.RegisterRoute( + "", + [this](HttpRouterRequest& Request) { + CbObjectWriter Cbo; + + Cbo.BeginArray("providers"); + + { + RwLock::SharedLockScope _(m_Lock); + for (auto& Kv : m_Providers) + { + Cbo << Kv.first; + } + } + + Cbo.EndArray(); + + Request.ServerRequest().WriteResponse(HttpResponseCode::OK, Cbo.Save()); + }, + HttpVerb::kHead | HttpVerb::kGet); } HttpStatsService::~HttpStatsService() @@ -82,54 +187,7 @@ void HttpStatsService::HandleRequest(HttpServerRequest& Request) { ZEN_TRACE_CPU("HttpStatsService::HandleRequest"); - using namespace std::literals; - - std::string_view Key = Request.RelativeUri(); - - switch (Request.RequestVerb()) - { - case HttpVerb::kHead: - case HttpVerb::kGet: - { - if (Key.empty()) - { - CbObjectWriter Cbo; - - Cbo.BeginArray("providers"); - - { - RwLock::SharedLockScope _(m_Lock); - for (auto& Kv : m_Providers) - { - Cbo << Kv.first; - } - } - - Cbo.EndArray(); - - Request.WriteResponse(HttpResponseCode::OK, Cbo.Save()); - } - else if (Key[0] == '/') - { - Key.remove_prefix(1); - size_t SlashPos = Key.find_first_of("/?"); - if (SlashPos != std::string::npos) - { - Key = Key.substr(0, SlashPos); - } - - RwLock::SharedLockScope _(m_Lock); - if (auto It = m_Providers.find(std::string{Key}); It != end(m_Providers)) - { - return It->second->HandleStatsRequest(Request); - } - } - } - - [[fallthrough]]; - default: - return; - } + m_Router.HandleRequest(Request); } ////////////////////////////////////////////////////////////////////////// diff --git a/src/zenhttp/xmake.lua b/src/zenhttp/xmake.lua index b4c65ea96..7b050ae35 100644 --- a/src/zenhttp/xmake.lua +++ b/src/zenhttp/xmake.lua @@ -9,12 +9,7 @@ target('zenhttp') add_files("servers/wshttpsys.cpp", {unity_ignored=true}) add_includedirs("include", {public=true}) add_deps("zencore", "zentelemetry", "transport-sdk", "asio") - if has_config("zencpr") then - add_deps("cpr") - else - remove_files("clients/httpclientcpr.cpp") - end - add_packages("http_parser", "json11") + add_packages("http_parser", "json11", "libcurl") add_options("httpsys") if is_plat("linux", "macosx") then diff --git a/src/zenremotestore/builds/jupiterbuildstorage.cpp b/src/zenremotestore/builds/jupiterbuildstorage.cpp index c3f7b9e71..ad4c4bc89 100644 --- a/src/zenremotestore/builds/jupiterbuildstorage.cpp +++ b/src/zenremotestore/builds/jupiterbuildstorage.cpp @@ -14,7 +14,7 @@ ZEN_THIRD_PARTY_INCLUDES_START #include <tsl/robin_map.h> ZEN_THIRD_PARTY_INCLUDES_END -#include <regex> +#include <string_view> namespace zen { @@ -572,35 +572,135 @@ ParseBuildStorageUrl(std::string_view InUrl, Url.erase(ApiString, ExtendedApiString.length()); } - const std::string ArtifactURLRegExString = R"((http[s]?:\/\/.*?)\/(.*?)\/(.*?)\/(.*))"; - const std::regex ArtifactURLRegEx(ArtifactURLRegExString, std::regex::ECMAScript | std::regex::icase); - std::match_results<std::string_view::const_iterator> MatchResults; - std::string_view UrlToParse(Url); - if (regex_match(begin(UrlToParse), end(UrlToParse), MatchResults, ArtifactURLRegEx) && MatchResults.size() == 5) - { - auto GetMatch = [&MatchResults](uint32_t Index) -> std::string_view { - ZEN_ASSERT(Index < MatchResults.size()); + // Parse URL of the form: http[s]://host/namespace/bucket/buildid + std::string_view Remaining(Url); - const auto& Match = MatchResults[Index]; + // Find the end of the scheme (e.g. "http://" or "https://") + size_t SchemeEnd = Remaining.find("://"); + if (SchemeEnd == std::string_view::npos) + { + return false; + } + SchemeEnd += 3; // skip past "://" - return std::string_view(&*Match.first, Match.second - Match.first); - }; + // Find the first '/' after the host + size_t HostEnd = Remaining.find('/', SchemeEnd); + if (HostEnd == std::string_view::npos) + { + return false; + } - const std::string_view Host = GetMatch(1); - const std::string_view Namespace = GetMatch(2); - const std::string_view Bucket = GetMatch(3); - const std::string_view BuildId = GetMatch(4); + // Find the '/' after namespace + size_t NamespaceEnd = Remaining.find('/', HostEnd + 1); + if (NamespaceEnd == std::string_view::npos) + { + return false; + } - OutHost = Host; - OutNamespace = Namespace; - OutBucket = Bucket; - OutBuildId = BuildId; - return true; + // Find the '/' after bucket + size_t BucketEnd = Remaining.find('/', NamespaceEnd + 1); + if (BucketEnd == std::string_view::npos) + { + return false; } - else + + // BuildId must be non-empty + if (BucketEnd + 1 >= Remaining.size()) { return false; } + + OutHost = Remaining.substr(0, HostEnd); + OutNamespace = Remaining.substr(HostEnd + 1, NamespaceEnd - HostEnd - 1); + OutBucket = Remaining.substr(NamespaceEnd + 1, BucketEnd - NamespaceEnd - 1); + OutBuildId = Remaining.substr(BucketEnd + 1); + return true; +} + +} // namespace zen + +#if ZEN_WITH_TESTS + +# include <zencore/testing.h> + +namespace zen { + +void +jupiterbuildstorage_forcelink() +{ } } // namespace zen + +TEST_SUITE_BEGIN("remotestore.jupiterbuildstorage"); + +TEST_CASE("ParseBuildStorageUrl.ValidUrl") +{ + std::string Host, Namespace, Bucket, BuildId; + bool Result = + zen::ParseBuildStorageUrl("https://horde.devtools.epicgames.com/mynamespace/mybucket/mybuildid", Host, Namespace, Bucket, BuildId); + CHECK(Result); + CHECK(Host == "https://horde.devtools.epicgames.com"); + CHECK(Namespace == "mynamespace"); + CHECK(Bucket == "mybucket"); + CHECK(BuildId == "mybuildid"); +} + +TEST_CASE("ParseBuildStorageUrl.ValidUrlWithApiPrefix") +{ + std::string Host, Namespace, Bucket, BuildId; + bool Result = zen::ParseBuildStorageUrl("https://horde.devtools.epicgames.com/api/v2/builds/mynamespace/mybucket/mybuildid", + Host, + Namespace, + Bucket, + BuildId); + CHECK(Result); + CHECK(Host == "https://horde.devtools.epicgames.com"); + CHECK(Namespace == "mynamespace"); + CHECK(Bucket == "mybucket"); + CHECK(BuildId == "mybuildid"); +} + +TEST_CASE("ParseBuildStorageUrl.HttpScheme") +{ + std::string Host, Namespace, Bucket, BuildId; + bool Result = zen::ParseBuildStorageUrl("http://localhost/ns/bucket/build123", Host, Namespace, Bucket, BuildId); + CHECK(Result); + CHECK(Host == "http://localhost"); + CHECK(Namespace == "ns"); + CHECK(Bucket == "bucket"); + CHECK(BuildId == "build123"); +} + +TEST_CASE("ParseBuildStorageUrl.BuildIdWithSlashes") +{ + std::string Host, Namespace, Bucket, BuildId; + bool Result = zen::ParseBuildStorageUrl("https://host/ns/bucket/build/with/slashes", Host, Namespace, Bucket, BuildId); + CHECK(Result); + CHECK(Host == "https://host"); + CHECK(Namespace == "ns"); + CHECK(Bucket == "bucket"); + CHECK(BuildId == "build/with/slashes"); +} + +TEST_CASE("ParseBuildStorageUrl.MissingBuildId") +{ + std::string Host, Namespace, Bucket, BuildId; + CHECK_FALSE(zen::ParseBuildStorageUrl("https://host/ns/bucket/", Host, Namespace, Bucket, BuildId)); +} + +TEST_CASE("ParseBuildStorageUrl.MissingBucket") +{ + std::string Host, Namespace, Bucket, BuildId; + CHECK_FALSE(zen::ParseBuildStorageUrl("https://host/ns", Host, Namespace, Bucket, BuildId)); +} + +TEST_CASE("ParseBuildStorageUrl.NoScheme") +{ + std::string Host, Namespace, Bucket, BuildId; + CHECK_FALSE(zen::ParseBuildStorageUrl("host/ns/bucket/buildid", Host, Namespace, Bucket, BuildId)); +} + +TEST_SUITE_END(); + +#endif // ZEN_WITH_TESTS diff --git a/src/zenremotestore/include/zenremotestore/builds/jupiterbuildstorage.h b/src/zenremotestore/include/zenremotestore/builds/jupiterbuildstorage.h index 888ec8ead..270835521 100644 --- a/src/zenremotestore/include/zenremotestore/builds/jupiterbuildstorage.h +++ b/src/zenremotestore/include/zenremotestore/builds/jupiterbuildstorage.h @@ -22,4 +22,6 @@ bool ParseBuildStorageUrl(std::string_view InUrl, std::string& OutBucket, std::string& OutBuildId); +void jupiterbuildstorage_forcelink(); + } // namespace zen diff --git a/src/zenremotestore/zenremotestore.cpp b/src/zenremotestore/zenremotestore.cpp index a0bb17260..0b205b296 100644 --- a/src/zenremotestore/zenremotestore.cpp +++ b/src/zenremotestore/zenremotestore.cpp @@ -5,6 +5,7 @@ #include <zenremotestore/builds/buildmanifest.h> #include <zenremotestore/builds/buildsavedstate.h> #include <zenremotestore/builds/buildstorageoperations.h> +#include <zenremotestore/builds/jupiterbuildstorage.h> #include <zenremotestore/chunking/chunkedcontent.h> #include <zenremotestore/chunking/chunkedfile.h> #include <zenremotestore/chunking/chunkingcache.h> @@ -20,6 +21,7 @@ zenremotestore_forcelinktests() { buildmanifest_forcelink(); buildsavedstate_forcelink(); + jupiterbuildstorage_forcelink(); buildstorageoperations_forcelink(); chunkblock_forcelink(); chunkedcontent_forcelink(); diff --git a/src/zenserver-test/cache-tests.cpp b/src/zenserver-test/cache-tests.cpp index 334dd04ab..14748e214 100644 --- a/src/zenserver-test/cache-tests.cpp +++ b/src/zenserver-test/cache-tests.cpp @@ -1193,14 +1193,10 @@ TEST_CASE("zcache.rpc") // CbPackage Package; // CHECK(Request.Format(Package)); - // IoBuffer Body = FormatPackageMessageBuffer(Package).Flatten().AsIoBuffer(); - // cpr::Response Result = cpr::Post(cpr::Url{fmt::format("{}/$rpc", LocalCfg.BaseUri)}, - // cpr::Header{{"Content-Type", "application/x-ue-cbpkg"}, {"Accept", "application/x-ue-cbpkg"}}, - // cpr::Body{(const char*)Body.GetData(), Body.GetSize()}); + // IoBuffer Body = FormatPackageMessageBuffer(Package).Flatten().AsIoBuffer(); + // // TODO: rewrite using HttpClient instead of removed CPR dependency - // CHECK(Result.status_code == 200); // cacherequests::PutCacheRecordsResult ParsedResult; - // CbPackage Response = ParsePackageMessage(zen::IoBuffer(zen::IoBuffer::Wrap, Result.text.data(), Result.text.size())); // CHECK(!Response.IsNull()); // CHECK(ParsedResult.Parse(Response)); // for (bool ResponseSuccess : ParsedResult.Success) diff --git a/src/zenserver-test/hub-tests.cpp b/src/zenserver-test/hub-tests.cpp index dbe6fa785..b2da552fc 100644 --- a/src/zenserver-test/hub-tests.cpp +++ b/src/zenserver-test/hub-tests.cpp @@ -33,6 +33,77 @@ using namespace std::literals; static const HttpClientSettings kFastTimeout{.ConnectTimeout = std::chrono::milliseconds(200)}; +static bool +WaitForModuleState(HttpClient& Client, std::string_view ModuleId, std::string_view ExpectedState, int TimeoutMs = 10000) +{ + Stopwatch Timer; + while (Timer.GetElapsedTimeMs() < static_cast<uint64_t>(TimeoutMs)) + { + HttpClient::Response R = Client.Get(fmt::format("modules/{}", ModuleId)); + if (R && R.AsObject()["state"].AsString() == ExpectedState) + { + return true; + } + Sleep(100); + } + HttpClient::Response R = Client.Get(fmt::format("modules/{}", ModuleId)); + return R && R.AsObject()["state"].AsString() == ExpectedState; +} + +// Provision a module, retrying on 409 Conflict to handle the window where an async +// deprovision has removed the module from InstanceLookup but not yet from +// DeprovisioningModules (which CanProvisionInstance checks). +static HttpClient::Response +ProvisionModule(HttpClient& Client, std::string_view ModuleId, int TimeoutMs = 10000) +{ + Stopwatch Timer; + HttpClient::Response Result; + do + { + Result = Client.Post(fmt::format("modules/{}/provision", ModuleId)); + if (Result || Result.StatusCode != HttpResponseCode::Conflict) + { + return Result; + } + Sleep(100); + } while (Timer.GetElapsedTimeMs() < static_cast<uint64_t>(TimeoutMs)); + return Result; +} + +// Wait for a port to stop accepting connections (i.e. the process has terminated). +// Needed after async deprovision: WaitForModuleGone returns as soon as the module +// leaves m_InstanceLookup (synchronous), but the background worker that kills the +// process may not have run yet. +static bool +WaitForPortUnreachable(HttpClient& Client, std::string_view Path = "/health/", int TimeoutMs = 10000) +{ + Stopwatch Timer; + while (Timer.GetElapsedTimeMs() < static_cast<uint64_t>(TimeoutMs)) + { + if (!Client.Get(Path)) + { + return true; + } + Sleep(100); + } + return !Client.Get(Path); +} + +static bool +WaitForModuleGone(HttpClient& Client, std::string_view ModuleId, int TimeoutMs = 10000) +{ + Stopwatch Timer; + while (Timer.GetElapsedTimeMs() < static_cast<uint64_t>(TimeoutMs)) + { + if (Client.Get(fmt::format("modules/{}", ModuleId)).StatusCode == HttpResponseCode::NotFound) + { + return true; + } + Sleep(100); + } + return Client.Get(fmt::format("modules/{}", ModuleId)).StatusCode == HttpResponseCode::NotFound; +} + TEST_SUITE_BEGIN("server.hub"); TEST_CASE("hub.lifecycle.children") @@ -65,9 +136,7 @@ TEST_CASE("hub.lifecycle.children") AbcPort = AbcResult["port"].AsUInt16(0); CHECK_NE(AbcPort, 0); - Result = Client.Get("modules/abc"); - REQUIRE(Result); - CHECK_EQ(Result.AsObject()["state"].AsString(), "provisioned"sv); + REQUIRE(WaitForModuleState(Client, "abc", "provisioned")); // This should be a fresh instance with no contents @@ -91,6 +160,8 @@ TEST_CASE("hub.lifecycle.children") DefPort = DefResult["port"].AsUInt16(0); REQUIRE_NE(DefPort, 0); + REQUIRE(WaitForModuleState(Client, "def", "provisioned")); + // This should be a fresh instance with no contents HttpClient DefClient(fmt::format("http://localhost:{}", DefPort), kFastTimeout); @@ -110,21 +181,24 @@ TEST_CASE("hub.lifecycle.children") Result = Client.Post("modules/ghi/provision"); REQUIRE(Result); + REQUIRE(WaitForModuleState(Client, "ghi", "provisioned")); // Tear down instances Result = Client.Post("modules/abc/deprovision"); REQUIRE(Result); + REQUIRE(WaitForModuleGone(Client, "abc")); { HttpClient ModClient(fmt::format("http://localhost:{}", AbcPort), kFastTimeout); - CHECK(!ModClient.Get("/health/")); + CHECK(WaitForPortUnreachable(ModClient)); } Result = Client.Post("modules/def/deprovision"); REQUIRE(Result); + REQUIRE(WaitForModuleGone(Client, "def")); { HttpClient ModClient(fmt::format("http://localhost:{}", DefPort), kFastTimeout); - CHECK(!ModClient.Get("/health/")); + CHECK(WaitForPortUnreachable(ModClient)); } Result = Client.Post("modules/ghi/deprovision"); @@ -132,7 +206,7 @@ TEST_CASE("hub.lifecycle.children") // re-provision to verify that (de)hydration preserved state { - Result = Client.Post("modules/abc/provision"); + Result = ProvisionModule(Client, "abc"); REQUIRE(Result); CbObject AbcResult = Result.AsObject(); @@ -140,6 +214,8 @@ TEST_CASE("hub.lifecycle.children") AbcPort = AbcResult["port"].AsUInt16(0); REQUIRE_NE(AbcPort, 0); + REQUIRE(WaitForModuleState(Client, "abc", "provisioned")); + // This should contain the content from the previous run HttpClient AbcClient(fmt::format("http://localhost:{}", AbcPort), kFastTimeout); @@ -156,7 +232,7 @@ TEST_CASE("hub.lifecycle.children") } { - Result = Client.Post("modules/def/provision"); + Result = ProvisionModule(Client, "def"); REQUIRE(Result); CbObject DefResult = Result.AsObject(); @@ -164,6 +240,8 @@ TEST_CASE("hub.lifecycle.children") DefPort = DefResult["port"].AsUInt16(0); REQUIRE_NE(DefPort, 0); + REQUIRE(WaitForModuleState(Client, "def", "provisioned")); + // This should contain the content from the previous run HttpClient DefClient(fmt::format("http://localhost:{}", DefPort), kFastTimeout); @@ -181,22 +259,24 @@ TEST_CASE("hub.lifecycle.children") Result = Client.Post("modules/abc/deprovision"); REQUIRE(Result); + REQUIRE(WaitForModuleGone(Client, "abc")); { HttpClient ModClient(fmt::format("http://localhost:{}", AbcPort), kFastTimeout); - CHECK(!ModClient.Get("/health/")); + CHECK(WaitForPortUnreachable(ModClient)); } Result = Client.Post("modules/def/deprovision"); REQUIRE(Result); + REQUIRE(WaitForModuleGone(Client, "def")); { HttpClient ModClient(fmt::format("http://localhost:{}", DefPort), kFastTimeout); - CHECK(!ModClient.Get("/health/")); + CHECK(WaitForPortUnreachable(ModClient)); } // re-provision to verify that (de)hydration preserved state, including // state which was generated after the very first dehydration { - Result = Client.Post("modules/abc/provision"); + Result = ProvisionModule(Client, "abc"); REQUIRE(Result); CbObject AbcResult = Result.AsObject(); @@ -204,6 +284,8 @@ TEST_CASE("hub.lifecycle.children") AbcPort = AbcResult["port"].AsUInt16(0); REQUIRE_NE(AbcPort, 0); + REQUIRE(WaitForModuleState(Client, "abc", "provisioned")); + // This should contain the content from the previous two runs HttpClient AbcClient(fmt::format("http://localhost:{}", AbcPort), kFastTimeout); @@ -221,7 +303,7 @@ TEST_CASE("hub.lifecycle.children") } { - Result = Client.Post("modules/def/provision"); + Result = ProvisionModule(Client, "def"); REQUIRE(Result); CbObject DefResult = Result.AsObject(); @@ -229,6 +311,8 @@ TEST_CASE("hub.lifecycle.children") DefPort = DefResult["port"].AsUInt16(0); REQUIRE_NE(DefPort, 0); + REQUIRE(WaitForModuleState(Client, "def", "provisioned")); + // This should contain the content from the previous two runs HttpClient DefClient(fmt::format("http://localhost:{}", DefPort), kFastTimeout); @@ -247,16 +331,18 @@ TEST_CASE("hub.lifecycle.children") Result = Client.Post("modules/abc/deprovision"); REQUIRE(Result); + REQUIRE(WaitForModuleGone(Client, "abc")); { HttpClient ModClient(fmt::format("http://localhost:{}", AbcPort), kFastTimeout); - CHECK(!ModClient.Get("/health/")); + CHECK(WaitForPortUnreachable(ModClient)); } Result = Client.Post("modules/def/deprovision"); REQUIRE(Result); + REQUIRE(WaitForModuleGone(Client, "def")); { HttpClient ModClient(fmt::format("http://localhost:{}", DefPort), kFastTimeout); - CHECK(!ModClient.Get("/health/")); + CHECK(WaitForPortUnreachable(ModClient)); } // final sanity check that the hub is still responsive and all modules are gone @@ -308,14 +394,45 @@ TEST_CASE("hub.consul.hub.registration") ConsulProc.SpawnConsulAgent(); ZenServerInstance Instance(TestEnv, ZenServerInstance::ServerMode::kHubServer); - const uint16_t PortNumber = - Instance.SpawnServerAndWaitUntilReady("--consul-endpoint=http://localhost:8500/ --instance-id=test-instance"); + const uint16_t PortNumber = Instance.SpawnServerAndWaitUntilReady( + "--consul-endpoint=http://localhost:8500/ --instance-id=test-instance " + "--consul-health-interval-seconds=5 --consul-deregister-after-seconds=60"); REQUIRE(PortNumber != 0); consul::ConsulClient Client("http://localhost:8500/"); - REQUIRE(WaitForConsulService(Client, "zen-hub-test-instance", true, 5000)); + // Verify custom intervals flowed through to the registered check + { + std::string JsonError; + CbFieldIterator ChecksRoot = LoadCompactBinaryFromJson(Client.GetAgentChecksJson(), JsonError); + REQUIRE(JsonError.empty()); + + CbObjectView HubCheck; + for (CbFieldView F : ChecksRoot) + { + if (!F.IsObject()) + { + continue; + } + for (CbFieldView C : F.AsObjectView()) + { + CbObjectView Check = C.AsObjectView(); + if (Check["ServiceID"sv].AsString() == "zen-hub-test-instance"sv) + { + HubCheck = Check; + break; + } + } + } + REQUIRE(HubCheck); + // Consul does not reflect DeregisterCriticalServiceAfter back in /v1/agent/checks for + // service-embedded checks; Definition is always an empty object. Only Type and Interval + // are accessible at the top level. + CHECK_EQ(HubCheck["Type"sv].AsString(), "http"sv); + CHECK_EQ(HubCheck["Interval"sv].AsString(), "5s"sv); + } + Instance.Shutdown(); CHECK(!Client.HasService("zen-hub-test-instance")); @@ -393,16 +510,15 @@ TEST_CASE("hub.consul.provision.registration") HttpClient::Response Result = HubClient.Post("modules/testmod/provision"); REQUIRE(Result); - CHECK(Client.HasService("testmod")); - { - const uint16_t ModulePort = Result.AsObject()["port"].AsUInt16(0); - REQUIRE(ModulePort != 0); + // Service is registered in Consul during Provisioning (before the child process starts), + // so this returns as soon as the state transition fires, not when the server is ready. + REQUIRE(WaitForConsulService(Client, "testmod", true, 10000)); - { - HttpClient ModClient(fmt::format("http://localhost:{}", ModulePort), kFastTimeout); - CHECK(ModClient.Get("/health/")); - } + const uint16_t ModulePort = Result.AsObject()["port"].AsUInt16(0); + REQUIRE(ModulePort != 0); + // Consul fields are set during Provisioning and can be verified before the server is ready. + { std::string JsonError; CbFieldIterator ServicesRoot = LoadCompactBinaryFromJson(Client.GetAgentServicesJson(), JsonError); REQUIRE(JsonError.empty()); @@ -417,7 +533,7 @@ TEST_CASE("hub.consul.provision.registration") } REQUIRE(ServicesMap); - // Verify fields registered by OnProvisioned + // Verify fields registered by OnModuleStateChanged { CbObjectView ModService = ServicesMap["testmod"].AsObjectView(); CHECK_EQ(ModService["ID"sv].AsString(), "testmod"sv); @@ -455,8 +571,75 @@ TEST_CASE("hub.consul.provision.registration") CHECK_EQ(HubService["Port"sv].AsDouble(0), double(PortNumber)); } + // Verify hub health check endpoint URL (registered from startup with an active interval) + { + std::string ChecksJsonError; + CbFieldIterator ChecksRoot = LoadCompactBinaryFromJson(Client.GetAgentChecksJson(), ChecksJsonError); + REQUIRE(ChecksJsonError.empty()); + + CbObjectView HubCheck; + for (CbFieldView F : ChecksRoot) + { + if (!F.IsObject()) + { + continue; + } + for (CbFieldView C : F.AsObjectView()) + { + CbObjectView Check = C.AsObjectView(); + if (Check["ServiceID"sv].AsString() == "zen-hub-test-instance"sv) + { + HubCheck = Check; + } + } + } + REQUIRE(HubCheck); + // Consul does not reflect HTTP URL back in /v1/agent/checks for service-embedded checks. + CHECK_EQ(HubCheck["Type"sv].AsString(), "http"sv); + } + } + + // Wait for Provisioned before touching the module's HTTP endpoint. + REQUIRE(WaitForModuleState(HubClient, "testmod", "provisioned")); + + // Verify module health check endpoint URL. No health check is registered during Provisioning + // (to avoid Consul marking the service critical before the child process is ready); it is added + // on transition to Provisioned. + { + std::string ChecksJsonError; + CbFieldIterator ChecksRoot = LoadCompactBinaryFromJson(Client.GetAgentChecksJson(), ChecksJsonError); + REQUIRE(ChecksJsonError.empty()); + + CbObjectView ModCheck; + for (CbFieldView F : ChecksRoot) + { + if (!F.IsObject()) + { + continue; + } + for (CbFieldView C : F.AsObjectView()) + { + CbObjectView Check = C.AsObjectView(); + if (Check["ServiceID"sv].AsString() == "testmod"sv) + { + ModCheck = Check; + } + } + } + REQUIRE(ModCheck); + // Consul does not reflect HTTP URL back in /v1/agent/checks for service-embedded checks. + CHECK_EQ(ModCheck["Type"sv].AsString(), "http"sv); + } + + { + HttpClient ModClient(fmt::format("http://localhost:{}", ModulePort), kFastTimeout); + CHECK(ModClient.Get("/health/")); + } + + { Result = HubClient.Post("modules/testmod/deprovision"); REQUIRE(Result); + REQUIRE(WaitForConsulService(Client, "testmod", false, 10000)); { HttpClient ModClient(fmt::format("http://localhost:{}", ModulePort), kFastTimeout); @@ -482,13 +665,12 @@ TEST_CASE("hub.hibernate.lifecycle") // Provision HttpClient::Response Result = Client.Post("modules/testmod/provision"); REQUIRE(Result); + CHECK_EQ(Result.StatusCode, HttpResponseCode::Accepted); CHECK_EQ(Result.AsObject()["moduleId"].AsString(), "testmod"sv); const uint16_t ModulePort = Result.AsObject()["port"].AsUInt16(0); REQUIRE_NE(ModulePort, 0); - Result = Client.Get("modules/testmod"); - REQUIRE(Result); - CHECK_EQ(Result.AsObject()["state"].AsString(), "provisioned"sv); + REQUIRE(WaitForModuleState(Client, "testmod", "provisioned")); { HttpClient ModClient(fmt::format("http://localhost:{}", ModulePort), kFastTimeout); CHECK(ModClient.Get("/health/")); @@ -502,11 +684,10 @@ TEST_CASE("hub.hibernate.lifecycle") // Hibernate - state should become "hibernated", server should be unreachable Result = Client.Post("modules/testmod/hibernate"); REQUIRE(Result); + CHECK_EQ(Result.StatusCode, HttpResponseCode::Accepted); CHECK_EQ(Result.AsObject()["moduleId"].AsString(), "testmod"sv); - Result = Client.Get("modules/testmod"); - REQUIRE(Result); - CHECK_EQ(Result.AsObject()["state"].AsString(), "hibernated"sv); + REQUIRE(WaitForModuleState(Client, "testmod", "hibernated")); { HttpClient ModClient(fmt::format("http://localhost:{}", ModulePort), kFastTimeout); CHECK(!ModClient.Get("/health/")); @@ -515,11 +696,10 @@ TEST_CASE("hub.hibernate.lifecycle") // Wake - state should return to "provisioned", server should be reachable, data should be intact Result = Client.Post("modules/testmod/wake"); REQUIRE(Result); + CHECK_EQ(Result.StatusCode, HttpResponseCode::Accepted); CHECK_EQ(Result.AsObject()["moduleId"].AsString(), "testmod"sv); - Result = Client.Get("modules/testmod"); - REQUIRE(Result); - CHECK_EQ(Result.AsObject()["state"].AsString(), "provisioned"sv); + REQUIRE(WaitForModuleState(Client, "testmod", "provisioned")); { HttpClient ModClient(fmt::format("http://localhost:{}", ModulePort), kFastTimeout); CHECK(ModClient.Get("/health/")); @@ -532,17 +712,20 @@ TEST_CASE("hub.hibernate.lifecycle") // Deprovision - server should become unreachable Result = Client.Post("modules/testmod/deprovision"); REQUIRE(Result); + CHECK_EQ(Result.StatusCode, HttpResponseCode::Accepted); + REQUIRE(WaitForModuleGone(Client, "testmod")); { HttpClient ModClient(fmt::format("http://localhost:{}", ModulePort), kFastTimeout); - CHECK(!ModClient.Get("/health/")); + CHECK(WaitForPortUnreachable(ModClient)); } // Re-provision - server should be reachable on its (potentially new) port - Result = Client.Post("modules/testmod/provision"); + Result = ProvisionModule(Client, "testmod"); REQUIRE(Result); CHECK_EQ(Result.AsObject()["moduleId"].AsString(), "testmod"sv); const uint16_t ModulePort2 = Result.AsObject()["port"].AsUInt16(0); REQUIRE_NE(ModulePort2, 0); + REQUIRE(WaitForModuleState(Client, "testmod", "provisioned")); { HttpClient ModClient(fmt::format("http://localhost:{}", ModulePort2), kFastTimeout); CHECK(ModClient.Get("/health/")); @@ -551,9 +734,10 @@ TEST_CASE("hub.hibernate.lifecycle") // Final deprovision - server should become unreachable Result = Client.Post("modules/testmod/deprovision"); REQUIRE(Result); + REQUIRE(WaitForModuleGone(Client, "testmod")); { HttpClient ModClient(fmt::format("http://localhost:{}", ModulePort2), kFastTimeout); - CHECK(!ModClient.Get("/health/")); + CHECK(WaitForPortUnreachable(ModClient)); } } @@ -574,24 +758,76 @@ TEST_CASE("hub.hibernate.errors") CHECK(!Result); CHECK_EQ(Result.StatusCode, HttpResponseCode::NotFound); - // Double-hibernate: first call succeeds, second returns 400 (wrong state) + Result = Client.Post("modules/unknown/deprovision"); + CHECK(!Result); + CHECK_EQ(Result.StatusCode, HttpResponseCode::NotFound); + + Result = Client.Delete("modules/unknown"); + CHECK(!Result); + CHECK_EQ(Result.StatusCode, HttpResponseCode::NotFound); + + // Double-provision: second call while first is in-flight returns 202 Accepted with the same port. Result = Client.Post("modules/errmod/provision"); REQUIRE(Result); + CHECK_EQ(Result.StatusCode, HttpResponseCode::Accepted); + const uint16_t ErrmodPort = Result.AsObject()["port"].AsUInt16(0); + REQUIRE_NE(ErrmodPort, 0); + // Provisioning the same module while in-flight returns 202 Accepted with the allocated port. + // Evaluated synchronously before WorkerPool dispatch, so safe regardless of timing. + Result = Client.Post("modules/errmod/provision"); + CHECK(Result); + CHECK_EQ(Result.StatusCode, HttpResponseCode::Accepted); + CHECK_EQ(Result.AsObject()["port"].AsUInt16(0), ErrmodPort); + + REQUIRE(WaitForModuleState(Client, "errmod", "provisioned")); + + // Already provisioned: provision and wake both return 200 Completed. + Result = Client.Post("modules/errmod/provision"); + CHECK(Result); + CHECK_EQ(Result.StatusCode, HttpResponseCode::OK); + + Result = Client.Post("modules/errmod/wake"); + CHECK(Result); + CHECK_EQ(Result.StatusCode, HttpResponseCode::OK); + + // Double-hibernate: second call while first is in-flight returns 202 Accepted. Result = Client.Post("modules/errmod/hibernate"); REQUIRE(Result); Result = Client.Post("modules/errmod/hibernate"); - CHECK(!Result); - CHECK_EQ(Result.StatusCode, HttpResponseCode::BadRequest); + CHECK(Result); + CHECK_EQ(Result.StatusCode, HttpResponseCode::Accepted); + + REQUIRE(WaitForModuleState(Client, "errmod", "hibernated")); - // Wake on provisioned: succeeds (state restored), then waking again returns 400 + // Already hibernated: hibernate returns 200 Completed. + Result = Client.Post("modules/errmod/hibernate"); + CHECK(Result); + CHECK_EQ(Result.StatusCode, HttpResponseCode::OK); + + // Double-wake: second call while first is in-flight returns 202 Accepted. Result = Client.Post("modules/errmod/wake"); REQUIRE(Result); Result = Client.Post("modules/errmod/wake"); - CHECK(!Result); - CHECK_EQ(Result.StatusCode, HttpResponseCode::BadRequest); + CHECK(Result); + CHECK_EQ(Result.StatusCode, HttpResponseCode::Accepted); + + // Double-deprovision: second call while first is in-flight returns 202 Accepted. + // errmod2 is a fresh module to avoid waiting on the still-waking errmod. + Result = Client.Post("modules/errmod2/provision"); + REQUIRE(Result); + CHECK_EQ(Result.StatusCode, HttpResponseCode::Accepted); + REQUIRE(WaitForModuleState(Client, "errmod2", "provisioned")); + + Result = Client.Post("modules/errmod2/deprovision"); + REQUIRE(Result); + CHECK_EQ(Result.StatusCode, HttpResponseCode::Accepted); + + Result = Client.Post("modules/errmod2/deprovision"); + CHECK(Result); + CHECK_EQ(Result.StatusCode, HttpResponseCode::Accepted); } TEST_SUITE_END(); diff --git a/src/zenserver/compute/computeserver.cpp b/src/zenserver/compute/computeserver.cpp index d1875f41a..1673cea6c 100644 --- a/src/zenserver/compute/computeserver.cpp +++ b/src/zenserver/compute/computeserver.cpp @@ -481,7 +481,7 @@ ZenComputeServer::InitializeServices(const ZenComputeServerConfig& ServerConfig) ServerConfig.DataDir / "functions", ServerConfig.MaxConcurrentActions); - m_FrontendService = std::make_unique<HttpFrontendService>(m_ContentRoot, m_StatusService); + m_FrontendService = std::make_unique<HttpFrontendService>(m_ContentRoot, m_StatsService, m_StatusService); # if ZEN_WITH_NOMAD // Nomad provisioner diff --git a/src/zenserver/config/config.cpp b/src/zenserver/config/config.cpp index 15f6f79f3..daad154bc 100644 --- a/src/zenserver/config/config.cpp +++ b/src/zenserver/config/config.cpp @@ -417,7 +417,7 @@ ZenServerCmdLineOptions::AddCliOptions(cxxopts::Options& options, ZenServerConfi options.add_option("network", "", "httpclient", - "Select HTTP client implementation (e.g. 'curl', 'cpr')", + "Select HTTP client implementation", cxxopts::value<std::string>(ServerOptions.HttpClient.Backend)->default_value("curl"), "<http client>"); diff --git a/src/zenserver/config/config.h b/src/zenserver/config/config.h index 5078fe71a..d35a1a8c7 100644 --- a/src/zenserver/config/config.h +++ b/src/zenserver/config/config.h @@ -40,7 +40,7 @@ struct ZenSentryConfig struct HttpClientConfig { - std::string Backend = "cpr"; // Choice of HTTP client implementation (e.g. "curl", "cpr") + std::string Backend = "curl"; // Choice of HTTP client implementation }; struct ZenServerConfig diff --git a/src/zenserver/diag/logging.cpp b/src/zenserver/diag/logging.cpp index 7513e56f7..f3d8dbfe3 100644 --- a/src/zenserver/diag/logging.cpp +++ b/src/zenserver/diag/logging.cpp @@ -111,8 +111,8 @@ InitializeServerLogging(const ZenServerConfig& InOptions, bool WithCacheService) const zen::Oid ServerSessionId = zen::GetSessionId(); - static constinit logging::LogPoint SessionIdPoint{{}, logging::Info, "server session id: {}"}; logging::Registry::Instance().ApplyAll([&](auto Logger) { + static constinit logging::LogPoint SessionIdPoint{{}, logging::Info, "server session id: {}"}; ZEN_MEMSCOPE(ELLMTag::Logging); Logger->Log(SessionIdPoint, fmt::make_format_args(ServerSessionId)); }); diff --git a/src/zenserver/frontend/frontend.cpp b/src/zenserver/frontend/frontend.cpp index 697cc014e..52ec5b8b3 100644 --- a/src/zenserver/frontend/frontend.cpp +++ b/src/zenserver/frontend/frontend.cpp @@ -9,6 +9,7 @@ #include <zencore/logging.h> #include <zencore/string.h> #include <zencore/trace.h> +#include <zenhttp/httpstats.h> ZEN_THIRD_PARTY_INCLUDES_START #if ZEN_PLATFORM_WINDOWS @@ -28,8 +29,9 @@ static unsigned char gHtmlZipData[] = { namespace zen { //////////////////////////////////////////////////////////////////////////////// -HttpFrontendService::HttpFrontendService(std::filesystem::path Directory, HttpStatusService& StatusService) +HttpFrontendService::HttpFrontendService(std::filesystem::path Directory, HttpStatsService& StatsService, HttpStatusService& StatusService) : m_Directory(Directory) +, m_StatsService(StatsService) , m_StatusService(StatusService) { ZEN_TRACE_CPU("HttpFrontendService::HttpFrontendService"); @@ -94,12 +96,14 @@ HttpFrontendService::HttpFrontendService(std::filesystem::path Directory, HttpSt { ZEN_INFO("front-end is NOT AVAILABLE"); } + m_StatsService.RegisterHandler("dashboard", *this); m_StatusService.RegisterHandler("dashboard", *this); } HttpFrontendService::~HttpFrontendService() { m_StatusService.UnregisterHandler("dashboard", *this); + m_StatsService.UnregisterHandler("dashboard", *this); } const char* @@ -122,6 +126,8 @@ HttpFrontendService::HandleRequest(zen::HttpServerRequest& Request) { using namespace std::literals; + metrics::OperationTiming::Scope $(m_HttpRequests); + ExtendableStringBuilder<256> UriBuilder; std::string_view Uri = Request.RelativeUriWithExtension(); @@ -230,4 +236,26 @@ HttpFrontendService::HandleRequest(zen::HttpServerRequest& Request) } } +void +HttpFrontendService::HandleStatsRequest(HttpServerRequest& Request) +{ + Request.WriteResponse(HttpResponseCode::OK, CollectStats()); +} + +CbObject +HttpFrontendService::CollectStats() +{ + ZEN_TRACE_CPU("HttpFrontendService::Stats"); + CbObjectWriter Cbo; + + EmitSnapshot("requests", m_HttpRequests, Cbo); + return Cbo.Save(); +} + +uint64_t +HttpFrontendService::GetActivityCounter() +{ + return m_HttpRequests.Count(); +} + } // namespace zen diff --git a/src/zenserver/frontend/frontend.h b/src/zenserver/frontend/frontend.h index 0ae3170ad..e0b86f1de 100644 --- a/src/zenserver/frontend/frontend.h +++ b/src/zenserver/frontend/frontend.h @@ -11,20 +11,27 @@ namespace zen { -class HttpFrontendService final : public zen::HttpService, public IHttpStatusProvider +class HttpStatsService; + +class HttpFrontendService final : public HttpService, public IHttpStatusProvider, public IHttpStatsProvider { public: - HttpFrontendService(std::filesystem::path Directory, HttpStatusService& StatusService); + HttpFrontendService(std::filesystem::path Directory, HttpStatsService& StatsService, HttpStatusService& StatusService); virtual ~HttpFrontendService(); virtual const char* BaseUri() const override; - virtual void HandleRequest(zen::HttpServerRequest& Request) override; + virtual void HandleRequest(HttpServerRequest& Request) override; virtual void HandleStatusRequest(HttpServerRequest& Request) override; + virtual void HandleStatsRequest(HttpServerRequest& Request) override; + virtual CbObject CollectStats() override; + virtual uint64_t GetActivityCounter() override; private: - std::unique_ptr<ZipFs> m_ZipFs; - std::filesystem::path m_Directory; - std::filesystem::path m_DocsDirectory; - HttpStatusService& m_StatusService; + std::unique_ptr<ZipFs> m_ZipFs; + std::filesystem::path m_Directory; + std::filesystem::path m_DocsDirectory; + HttpStatsService& m_StatsService; + HttpStatusService& m_StatusService; + metrics::OperationTiming m_HttpRequests; }; } // namespace zen diff --git a/src/zenserver/frontend/html/pages/builds.js b/src/zenserver/frontend/html/pages/builds.js new file mode 100644 index 000000000..095f0bf29 --- /dev/null +++ b/src/zenserver/frontend/html/pages/builds.js @@ -0,0 +1,88 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +"use strict"; + +import { ZenPage } from "./page.js" +import { Fetcher } from "../util/fetcher.js" +import { Friendly } from "../util/friendly.js" + +//////////////////////////////////////////////////////////////////////////////// +export class Page extends ZenPage +{ + generate_crumbs() {} + + async main() + { + this.set_title("build store"); + + // Build Store Stats + const stats_section = this.add_section("Build Store Stats"); + stats_section.tag().classify("dropall").text("raw yaml \u2192").on_click(() => { + window.open("/stats/builds.yaml", "_blank"); + }); + this._stats_grid = stats_section.tag().classify("grid").classify("stats-tiles"); + + const stats = await new Fetcher().resource("stats", "builds").json(); + if (stats) + { + this._render_stats(stats); + } + + this.connect_stats_ws((all_stats) => { + const s = all_stats["builds"]; + if (s) + { + this._render_stats(s); + } + }); + } + + _render_stats(stats) + { + const grid = this._stats_grid; + const safe = (obj, path) => path.split(".").reduce((a, b) => a && a[b], obj); + + grid.inner().innerHTML = ""; + + // HTTP Requests tile + this._render_http_requests_tile(grid, safe(stats, "requests"), safe(stats, "store.badrequestcount") || 0); + + // Build Store tile + { + const blobs = safe(stats, "store.blobs"); + const metadata = safe(stats, "store.metadata"); + if (blobs || metadata) + { + const tile = grid.tag().classify("card").classify("stats-tile"); + tile.tag().classify("card-title").text("Build Store"); + const columns = tile.tag().classify("tile-columns"); + + const left = columns.tag().classify("tile-metrics"); + this._metric(left, Friendly.bytes(safe(stats, "store.size.disk") || 0), "disk", true); + if (blobs) + { + this._metric(left, Friendly.sep(blobs.count || 0), "blobs"); + this._metric(left, Friendly.sep(blobs.readcount || 0), "blob reads"); + this._metric(left, Friendly.sep(blobs.writecount || 0), "blob writes"); + const blobHitRatio = (blobs.readcount || 0) > 0 + ? (((blobs.hitcount || 0) / blobs.readcount) * 100).toFixed(1) + "%" + : "-"; + this._metric(left, blobHitRatio, "blob hit ratio"); + } + + const right = columns.tag().classify("tile-metrics"); + if (metadata) + { + this._metric(right, Friendly.sep(metadata.count || 0), "metadata entries", true); + this._metric(right, Friendly.sep(metadata.readcount || 0), "meta reads"); + this._metric(right, Friendly.sep(metadata.writecount || 0), "meta writes"); + const metaHitRatio = (metadata.readcount || 0) > 0 + ? (((metadata.hitcount || 0) / metadata.readcount) * 100).toFixed(1) + "%" + : "-"; + this._metric(right, metaHitRatio, "meta hit ratio"); + } + } + } + } + +} diff --git a/src/zenserver/frontend/html/pages/hub.js b/src/zenserver/frontend/html/pages/hub.js index 149a5c79c..78e3a090c 100644 --- a/src/zenserver/frontend/html/pages/hub.js +++ b/src/zenserver/frontend/html/pages/hub.js @@ -178,7 +178,7 @@ export class Page extends ZenPage try { const [stats, status] = await Promise.all([ - new Fetcher().resource("/hub/stats").json(), + new Fetcher().resource("stats", "hub").json(), new Fetcher().resource("/hub/status").json(), ]); @@ -198,6 +198,9 @@ export class Page extends ZenPage const max = data.maxInstanceCount || 0; const limit = data.instanceLimit || 0; + // HTTP Requests tile + this._render_http_requests_tile(grid, data.requests); + { const tile = grid.tag().classify("card").classify("stats-tile"); tile.tag().classify("card-title").text("Active Modules"); @@ -611,14 +614,4 @@ export class Page extends ZenPage await fetch(`/hub/modules/${moduleId}/${action}`, { method: "POST" }); } - _metric(parent, value, label, hero = false) - { - const m = parent.tag().classify("tile-metric"); - if (hero) - { - m.classify("tile-metric-hero"); - } - m.tag().classify("metric-value").text(value); - m.tag().classify("metric-label").text(label); - } } diff --git a/src/zenserver/frontend/html/pages/objectstore.js b/src/zenserver/frontend/html/pages/objectstore.js index 69e0a91b3..6b4890614 100644 --- a/src/zenserver/frontend/html/pages/objectstore.js +++ b/src/zenserver/frontend/html/pages/objectstore.js @@ -30,13 +30,16 @@ export class Page extends ZenPage { try { - const data = await new Fetcher().resource("/obj/").json(); - this._render(data); + const [data, stats] = await Promise.all([ + new Fetcher().resource("/obj/").json(), + new Fetcher().resource("stats", "obj").json().catch(() => null), + ]); + this._render(data, stats); } catch (e) { /* service unavailable */ } } - _render(data) + _render(data, stats) { const buckets = data.buckets || []; @@ -53,32 +56,17 @@ export class Page extends ZenPage const total_objects = buckets.reduce((sum, b) => sum + (b.object_count || 0), 0); const total_size = buckets.reduce((sum, b) => sum + (b.size || 0), 0); - { - const tile = grid.tag().classify("card").classify("stats-tile"); - tile.tag().classify("card-title").text("Buckets"); - const body = tile.tag().classify("tile-metrics"); - this._metric(body, Friendly.sep(buckets.length), "total", true); - } - - { - const tile = grid.tag().classify("card").classify("stats-tile"); - tile.tag().classify("card-title").text("Objects"); - const body = tile.tag().classify("tile-metrics"); - this._metric(body, Friendly.sep(total_objects), "total", true); - } + // HTTP Requests tile + this._render_http_requests_tile(grid, stats && stats.requests); { const tile = grid.tag().classify("card").classify("stats-tile"); - tile.tag().classify("card-title").text("Storage"); + tile.tag().classify("card-title").text("Object Store"); const body = tile.tag().classify("tile-metrics"); - this._metric(body, Friendly.bytes(total_size), "total size", true); - } - - { - const tile = grid.tag().classify("card").classify("stats-tile"); - tile.tag().classify("card-title").text("Served"); - const body = tile.tag().classify("tile-metrics"); - this._metric(body, Friendly.bytes(data.total_bytes_served || 0), "total bytes served", true); + this._metric(body, Friendly.sep(buckets.length), "buckets", true); + this._metric(body, Friendly.sep(total_objects), "objects"); + this._metric(body, Friendly.bytes(total_size), "storage"); + this._metric(body, Friendly.bytes(data.total_bytes_served || 0), "bytes served"); } } @@ -219,14 +207,4 @@ export class Page extends ZenPage } } - _metric(parent, value, label, hero = false) - { - const m = parent.tag().classify("tile-metric"); - if (hero) - { - m.classify("tile-metric-hero"); - } - m.tag().classify("metric-value").text(value); - m.tag().classify("metric-label").text(label); - } } diff --git a/src/zenserver/frontend/html/pages/page.js b/src/zenserver/frontend/html/pages/page.js index d969d651d..cf8d3e3dd 100644 --- a/src/zenserver/frontend/html/pages/page.js +++ b/src/zenserver/frontend/html/pages/page.js @@ -4,6 +4,7 @@ import { WidgetHost } from "../util/widgets.js" import { Fetcher } from "../util/fetcher.js" +import { Friendly } from "../util/friendly.js" //////////////////////////////////////////////////////////////////////////////// export class PageBase extends WidgetHost @@ -148,8 +149,10 @@ export class ZenPage extends PageBase const service_dashboards = [ { base_uri: "/sessions/", label: "Sessions", href: "/dashboard/?page=sessions" }, { base_uri: "/z$/", label: "Cache", href: "/dashboard/?page=cache" }, + { base_uri: "/builds/", label: "Build Store", href: "/dashboard/?page=builds" }, { base_uri: "/prj/", label: "Projects", href: "/dashboard/?page=projects" }, { base_uri: "/obj/", label: "Object Store", href: "/dashboard/?page=objectstore" }, + { base_uri: "/ws/", label: "Workspaces", href: "/dashboard/?page=workspaces" }, { base_uri: "/compute/", label: "Compute", href: "/dashboard/?page=compute" }, { base_uri: "/orch/", label: "Orchestrator", href: "/dashboard/?page=orchestrator" }, { base_uri: "/hub/", label: "Hub", href: "/dashboard/?page=hub" }, @@ -265,4 +268,73 @@ export class ZenPage extends PageBase new_crumb(auto_name); } + + _metric(parent, value, label, hero = false) + { + const m = parent.tag().classify("tile-metric"); + if (hero) + { + m.classify("tile-metric-hero"); + } + m.tag().classify("metric-value").text(value); + m.tag().classify("metric-label").text(label); + } + + _render_http_requests_tile(grid, req, bad_requests = undefined) + { + if (!req) + { + return; + } + const tile = grid.tag().classify("card").classify("stats-tile"); + tile.tag().classify("card-title").text("HTTP Requests"); + const columns = tile.tag().classify("tile-columns"); + + const left = columns.tag().classify("tile-metrics"); + const reqData = req.requests || req; + this._metric(left, Friendly.sep(reqData.count || 0), "total requests", true); + if (reqData.rate_mean > 0) + { + this._metric(left, Friendly.sep(reqData.rate_mean, 1) + "/s", "req/sec (mean)"); + } + if (reqData.rate_1 > 0) + { + this._metric(left, Friendly.sep(reqData.rate_1, 1) + "/s", "req/sec (1m)"); + } + if (reqData.rate_5 > 0) + { + this._metric(left, Friendly.sep(reqData.rate_5, 1) + "/s", "req/sec (5m)"); + } + if (reqData.rate_15 > 0) + { + this._metric(left, Friendly.sep(reqData.rate_15, 1) + "/s", "req/sec (15m)"); + } + if (bad_requests !== undefined) + { + this._metric(left, Friendly.sep(bad_requests), "bad requests"); + } + + const right = columns.tag().classify("tile-metrics"); + this._metric(right, Friendly.duration(reqData.t_avg || 0), "avg latency", true); + if (reqData.t_p75) + { + this._metric(right, Friendly.duration(reqData.t_p75), "p75"); + } + if (reqData.t_p95) + { + this._metric(right, Friendly.duration(reqData.t_p95), "p95"); + } + if (reqData.t_p99) + { + this._metric(right, Friendly.duration(reqData.t_p99), "p99"); + } + if (reqData.t_p999) + { + this._metric(right, Friendly.duration(reqData.t_p999), "p999"); + } + if (reqData.t_max) + { + this._metric(right, Friendly.duration(reqData.t_max), "max"); + } + } } diff --git a/src/zenserver/frontend/html/pages/projects.js b/src/zenserver/frontend/html/pages/projects.js index a3c0d1555..2469bf70b 100644 --- a/src/zenserver/frontend/html/pages/projects.js +++ b/src/zenserver/frontend/html/pages/projects.js @@ -159,44 +159,7 @@ export class Page extends ZenPage grid.inner().innerHTML = ""; // HTTP Requests tile - { - const req = safe(stats, "requests"); - if (req) - { - const tile = grid.tag().classify("card").classify("stats-tile"); - tile.tag().classify("card-title").text("HTTP Requests"); - const columns = tile.tag().classify("tile-columns"); - - const left = columns.tag().classify("tile-metrics"); - const reqData = req.requests || req; - this._metric(left, Friendly.sep(safe(stats, "store.requestcount") || 0), "total requests", true); - if (reqData.rate_mean > 0) - { - this._metric(left, Friendly.sep(reqData.rate_mean, 1) + "/s", "req/sec (mean)"); - } - if (reqData.rate_1 > 0) - { - this._metric(left, Friendly.sep(reqData.rate_1, 1) + "/s", "req/sec (1m)"); - } - const badRequests = safe(stats, "store.badrequestcount") || 0; - this._metric(left, Friendly.sep(badRequests), "bad requests"); - - const right = columns.tag().classify("tile-metrics"); - this._metric(right, Friendly.duration(reqData.t_avg || 0), "avg latency", true); - if (reqData.t_p75) - { - this._metric(right, Friendly.duration(reqData.t_p75), "p75"); - } - if (reqData.t_p95) - { - this._metric(right, Friendly.duration(reqData.t_p95), "p95"); - } - if (reqData.t_p99) - { - this._metric(right, Friendly.duration(reqData.t_p99), "p99"); - } - } - } + this._render_http_requests_tile(grid, safe(stats, "requests"), safe(stats, "store.badrequestcount") || 0); // Store Operations tile { @@ -268,17 +231,6 @@ export class Page extends ZenPage } } - _metric(parent, value, label, hero = false) - { - const m = parent.tag().classify("tile-metric"); - if (hero) - { - m.classify("tile-metric-hero"); - } - m.tag().classify("metric-value").text(value); - m.tag().classify("metric-label").text(label); - } - async view_project(project_id) { // Toggle off if already selected diff --git a/src/zenserver/frontend/html/pages/start.js b/src/zenserver/frontend/html/pages/start.js index df70ea2f4..e5b4d14f1 100644 --- a/src/zenserver/frontend/html/pages/start.js +++ b/src/zenserver/frontend/html/pages/start.js @@ -36,6 +36,15 @@ export class Page extends ZenPage all_stats[provider] = await new Fetcher().resource("stats", provider).json(); })); + this._http_panel = section.tag().classify("card").classify("stats-tile").classify("stats-http-panel"); + this._http_panel.inner().addEventListener("click", () => { window.location = "?page=metrics"; }); + this._http_panel.tag().classify("http-title").text("HTTP"); + const req_section = this._http_panel.tag().classify("http-section"); + req_section.tag().classify("http-section-label").text("Requests"); + this._http_req_metrics = req_section.tag().classify("tile-metrics"); + const ws_section = this._http_panel.tag().classify("http-section"); + ws_section.tag().classify("http-section-label").text("Websockets"); + this._http_ws_metrics = ws_section.tag().classify("tile-metrics"); this._stats_grid = section.tag().classify("grid").classify("stats-tiles"); this._safe_lookup = safe_lookup; this._render_stats(all_stats); @@ -113,7 +122,6 @@ export class Page extends ZenPage ); var cell = row.get_cell(0); cell.tag().text(namespace).on_click(() => this.view_zcache(namespace)); - row.get_cell(1).tag().text(namespace); cell = row.get_cell(-1); const action_tb = new Toolbar(cell, true); @@ -143,44 +151,43 @@ export class Page extends ZenPage const grid = this._stats_grid; const safe_lookup = this._safe_lookup; - // Clear existing tiles + // Clear and repopulate service tiles grid grid.inner().innerHTML = ""; - // HTTP tile — aggregate request stats across all providers - { - const tile = grid.tag().classify("card").classify("stats-tile"); - tile.tag().classify("card-title").text("HTTP"); - const columns = tile.tag().classify("tile-columns"); - - // Left column: request stats - const left = columns.tag().classify("tile-metrics"); - - let total_requests = 0; - let total_rate = 0; - for (const p in all_stats) - { - total_requests += (safe_lookup(all_stats[p], "requests.count") || 0); - total_rate += (safe_lookup(all_stats[p], "requests.rate_1") || 0); - } + // HTTP panel — update metrics containers built once in main() + const left = this._http_req_metrics; + left.inner().innerHTML = ""; - this._add_tile_metric(left, Friendly.sep(total_requests), "total requests", true); - if (total_rate > 0) - this._add_tile_metric(left, Friendly.sep(total_rate, 1) + "/s", "req/sec (1m)"); + let total_requests = 0; + let total_rate = 0; + for (const p in all_stats) + { + total_requests += (safe_lookup(all_stats[p], "requests.count") || 0); + total_rate += (safe_lookup(all_stats[p], "requests.rate_1") || 0); + } - // Right column: websocket stats - const ws = all_stats["http"] ? (all_stats["http"]["websockets"] || {}) : {}; - const right = columns.tag().classify("tile-metrics"); + this._add_tile_metric(left, Friendly.sep(total_requests), "total requests", true); + if (total_rate > 0) + { + this._add_tile_metric(left, Friendly.sep(total_rate, 1) + "/s", "req/sec (1m)"); + } - this._add_tile_metric(right, Friendly.sep(ws.active_connections || 0), "ws connections", true); - const ws_frames = (ws.frames_received || 0) + (ws.frames_sent || 0); - if (ws_frames > 0) - this._add_tile_metric(right, Friendly.sep(ws_frames), "ws frames"); - const ws_bytes = (ws.bytes_received || 0) + (ws.bytes_sent || 0); - if (ws_bytes > 0) - this._add_tile_metric(right, Friendly.bytes(ws_bytes), "ws traffic"); + const right = this._http_ws_metrics; + right.inner().innerHTML = ""; - tile.on_click(() => { window.location = "?page=metrics"; }); + const ws = all_stats["http"] ? (all_stats["http"]["websockets"] || {}) : {}; + this._add_tile_metric(right, Friendly.sep(ws.active_connections || 0), "ws connections", true); + const ws_frames = (ws.frames_received || 0) + (ws.frames_sent || 0); + if (ws_frames > 0) + { + this._add_tile_metric(right, Friendly.sep(ws_frames), "ws frames"); } + const ws_bytes = (ws.bytes_received || 0) + (ws.bytes_sent || 0); + if (ws_bytes > 0) + { + this._add_tile_metric(right, Friendly.bytes(ws_bytes), "ws traffic"); + } + // Cache tile (z$) if (all_stats["z$"]) @@ -198,7 +205,7 @@ export class Page extends ZenPage this._add_tile_metric(body, safe_lookup(s, "cache.size.disk", Friendly.bytes) || "-", "disk"); this._add_tile_metric(body, safe_lookup(s, "cache.size.memory", Friendly.bytes) || "-", "memory"); - tile.on_click(() => { window.location = "?page=stat&provider=z$"; }); + tile.inner().addEventListener("click", () => { window.location = "?page=stat&provider=z$"; }); } // Project Store tile (prj) @@ -210,9 +217,9 @@ export class Page extends ZenPage const body = tile.tag().classify("tile-metrics"); this._add_tile_metric(body, safe_lookup(s, "requests.count", Friendly.sep) || "-", "requests", true); - this._add_tile_metric(body, safe_lookup(s, "store.size.disk", Friendly.bytes) || "-", "disk"); + this._add_tile_metric(body, safe_lookup(s, "project_count", Friendly.sep) || "-", "projects"); - tile.on_click(() => { window.location = "?page=stat&provider=prj"; }); + tile.inner().addEventListener("click", () => { window.location = "?page=stat&provider=prj"; }); } // Build Store tile (builds) @@ -226,7 +233,7 @@ export class Page extends ZenPage this._add_tile_metric(body, safe_lookup(s, "requests.count", Friendly.sep) || "-", "requests", true); this._add_tile_metric(body, safe_lookup(s, "store.size.disk", Friendly.bytes) || "-", "disk"); - tile.on_click(() => { window.location = "?page=stat&provider=builds"; }); + tile.inner().addEventListener("click", () => { window.location = "?page=builds"; }); } // Proxy tile @@ -250,7 +257,37 @@ export class Page extends ZenPage this._add_tile_metric(body, Friendly.sep(mappings.length), "mappings"); this._add_tile_metric(body, Friendly.bytes(totalBytes), "traffic"); - tile.on_click(() => { window.location = "?page=proxy"; }); + tile.inner().addEventListener("click", () => { window.location = "?page=proxy"; }); + } + + // Hub tile + if (all_stats["hub"]) + { + const s = all_stats["hub"]; + const tile = grid.tag().classify("card").classify("stats-tile"); + tile.tag().classify("card-title").text("Hub"); + const body = tile.tag().classify("tile-metrics"); + + const current = safe_lookup(s, "currentInstanceCount") || 0; + const limit = safe_lookup(s, "instanceLimit") || safe_lookup(s, "maxInstanceCount") || 0; + this._add_tile_metric(body, `${current} / ${limit}`, "instances", true); + this._add_tile_metric(body, safe_lookup(s, "requests.count", Friendly.sep) || "-", "requests"); + + tile.inner().addEventListener("click", () => { window.location = "?page=stat&provider=hub"; }); + } + + // Object Store tile (obj) + if (all_stats["obj"]) + { + const s = all_stats["obj"]; + const tile = grid.tag().classify("card").classify("stats-tile"); + tile.tag().classify("card-title").text("Object Store"); + const body = tile.tag().classify("tile-metrics"); + + this._add_tile_metric(body, safe_lookup(s, "requests.count", Friendly.sep) || "-", "requests", true); + this._add_tile_metric(body, safe_lookup(s, "total_bytes_served", Friendly.bytes) || "-", "bytes served"); + + tile.inner().addEventListener("click", () => { window.location = "?page=stat&provider=obj"; }); } // Workspace tile (ws) @@ -262,9 +299,9 @@ export class Page extends ZenPage const body = tile.tag().classify("tile-metrics"); this._add_tile_metric(body, safe_lookup(s, "requests.count", Friendly.sep) || "-", "requests", true); - this._add_tile_metric(body, safe_lookup(s, "workspaces.filescount", Friendly.sep) || "-", "files"); + this._add_tile_metric(body, safe_lookup(s, "workspaces", Friendly.sep) || "-", "workspaces"); - tile.on_click(() => { window.location = "?page=stat&provider=ws"; }); + tile.inner().addEventListener("click", () => { window.location = "?page=stat&provider=ws"; }); } } diff --git a/src/zenserver/frontend/html/pages/workspaces.js b/src/zenserver/frontend/html/pages/workspaces.js new file mode 100644 index 000000000..d31fd7373 --- /dev/null +++ b/src/zenserver/frontend/html/pages/workspaces.js @@ -0,0 +1,236 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +"use strict"; + +import { ZenPage } from "./page.js" +import { Fetcher } from "../util/fetcher.js" + +//////////////////////////////////////////////////////////////////////////////// +export class Page extends ZenPage +{ + async main() + { + this.set_title("workspaces"); + + // Workspace Service Stats + const stats_section = this.add_section("Workspace Service Stats"); + this._stats_grid = stats_section.tag().classify("grid").classify("stats-tiles"); + + const stats = await new Fetcher().resource("stats", "ws").json().catch(() => null); + if (stats) { this._render_stats(stats); } + + this.connect_stats_ws((all_stats) => { + const s = all_stats["ws"]; + if (s) { this._render_stats(s); } + }); + + const section = this.add_section("Workspaces"); + const host = section.tag(); + + // Toolbar: refresh button + const toolbar = host.tag().classify("module-bulk-bar"); + this._btn_refresh = toolbar.tag("button").classify("module-bulk-btn").inner(); + this._btn_refresh.textContent = "\u21BB Refresh"; + this._btn_refresh.addEventListener("click", () => this._do_refresh()); + + // Workspace table (raw DOM — in-place row updates require stable element refs) + const table = document.createElement("table"); + table.className = "module-table"; + const thead = document.createElement("thead"); + const hrow = document.createElement("tr"); + for (const label of ["WORKSPACE ID", "ROOT PATH"]) + { + const th = document.createElement("th"); + th.textContent = label; + hrow.appendChild(th); + } + thead.appendChild(hrow); + table.appendChild(thead); + this._tbody = document.createElement("tbody"); + table.appendChild(this._tbody); + host.inner().appendChild(table); + + // State + this._expanded = new Set(); // workspace ids with shares panel open + this._row_cache = new Map(); // workspace id -> row refs, for in-place DOM updates + this._loading = false; + + await this._load(); + } + + async _load() + { + if (this._loading) { return; } + this._loading = true; + this._btn_refresh.disabled = true; + try + { + const data = await new Fetcher().resource("/ws/").json(); + const workspaces = data.workspaces || []; + this._render(workspaces); + } + catch (e) { /* service unavailable */ } + finally + { + this._loading = false; + this._btn_refresh.disabled = false; + } + } + + async _do_refresh() + { + if (this._loading) { return; } + this._btn_refresh.disabled = true; + try + { + await new Fetcher().resource("/ws/refresh").text(); + } + catch (e) { /* ignore */ } + await this._load(); + } + + _render(workspaces) + { + const ws_map = new Map(workspaces.map(w => [w.id, w])); + + // Remove rows for workspaces no longer present + for (const [id, row] of this._row_cache) + { + if (!ws_map.has(id)) + { + row.tr.remove(); + row.detail_tr.remove(); + this._row_cache.delete(id); + this._expanded.delete(id); + } + } + + // Create or update rows, then reorder tbody to match response order. + // appendChild on an existing node moves it, so iterating in response order + // achieves correct ordering without touching rows already in the right position. + for (const ws of workspaces) + { + const id = ws.id || ""; + const shares = ws.shares || []; + + let row = this._row_cache.get(id); + if (row) + { + // Update in-place — preserves DOM node identity so expanded state is kept + row.root_path_node.nodeValue = ws.root_path || ""; + row.detail_tr.style.display = this._expanded.has(id) ? "" : "none"; + row.btn_expand.textContent = this._expanded.has(id) ? "\u25BE" : "\u25B8"; + const shares_json = JSON.stringify(shares); + if (shares_json !== row.shares_json) + { + row.shares_json = shares_json; + this._render_shares(row.sh_tbody, shares); + } + } + else + { + // Create new workspace row + const tr = document.createElement("tr"); + const detail_tr = document.createElement("tr"); + detail_tr.className = "module-metrics-row"; + detail_tr.style.display = this._expanded.has(id) ? "" : "none"; + + const btn_expand = document.createElement("button"); + btn_expand.className = "module-expand-btn"; + btn_expand.textContent = this._expanded.has(id) ? "\u25BE" : "\u25B8"; + btn_expand.addEventListener("click", () => { + if (this._expanded.has(id)) + { + this._expanded.delete(id); + detail_tr.style.display = "none"; + btn_expand.textContent = "\u25B8"; + } + else + { + this._expanded.add(id); + detail_tr.style.display = ""; + btn_expand.textContent = "\u25BE"; + } + }); + + const id_wrap = document.createElement("span"); + id_wrap.className = "ws-id-wrap"; + id_wrap.appendChild(btn_expand); + id_wrap.appendChild(document.createTextNode("\u00A0" + id)); + const td_id = document.createElement("td"); + td_id.appendChild(id_wrap); + tr.appendChild(td_id); + + const root_path_node = document.createTextNode(ws.root_path || ""); + const td_root = document.createElement("td"); + td_root.appendChild(root_path_node); + tr.appendChild(td_root); + + // Detail row: nested shares table + const sh_table = document.createElement("table"); + sh_table.className = "module-table ws-share-table"; + const sh_thead = document.createElement("thead"); + const sh_hrow = document.createElement("tr"); + for (const label of ["SHARE ID", "SHARE PATH", "ALIAS"]) + { + const th = document.createElement("th"); + th.textContent = label; + sh_hrow.appendChild(th); + } + sh_thead.appendChild(sh_hrow); + sh_table.appendChild(sh_thead); + const sh_tbody = document.createElement("tbody"); + sh_table.appendChild(sh_tbody); + const detail_td = document.createElement("td"); + detail_td.colSpan = 2; + detail_td.className = "ws-detail-cell"; + detail_td.appendChild(sh_table); + detail_tr.appendChild(detail_td); + + this._render_shares(sh_tbody, shares); + + row = { tr, detail_tr, root_path_node, sh_tbody, btn_expand, shares_json: JSON.stringify(shares) }; + this._row_cache.set(id, row); + } + + this._tbody.appendChild(row.tr); + this._tbody.appendChild(row.detail_tr); + } + } + + _render_stats(stats) + { + const grid = this._stats_grid; + grid.inner().innerHTML = ""; + + // HTTP Requests tile + this._render_http_requests_tile(grid, stats.requests); + } + + _render_shares(sh_tbody, shares) + { + sh_tbody.innerHTML = ""; + if (shares.length === 0) + { + const tr = document.createElement("tr"); + const td = document.createElement("td"); + td.colSpan = 3; + td.className = "ws-no-shares-cell"; + td.textContent = "No shares"; + tr.appendChild(td); + sh_tbody.appendChild(tr); + return; + } + for (const share of shares) + { + const tr = document.createElement("tr"); + for (const text of [share.id || "", share.share_path || "", share.alias || ""]) + { + const td = document.createElement("td"); + td.textContent = text; + tr.appendChild(td); + } + sh_tbody.appendChild(tr); + } + } +} diff --git a/src/zenserver/frontend/html/zen.css b/src/zenserver/frontend/html/zen.css index b4f7270fc..d9f7491ea 100644 --- a/src/zenserver/frontend/html/zen.css +++ b/src/zenserver/frontend/html/zen.css @@ -803,18 +803,17 @@ zen-banner + zen-nav::part(nav-bar) { /* stats tiles -------------------------------------------------------------- */ -.stats-tiles { - grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); +.grid.stats-tiles { + grid-template-columns: repeat(auto-fit, minmax(120px, 1fr)); } .stats-tile { cursor: pointer; - transition: border-color 0.15s, background 0.15s; + transition: border-color 0.15s; } .stats-tile:hover { border-color: var(--theme_p0); - background: var(--theme_p4); } .stats-tile-detailed { @@ -873,6 +872,81 @@ zen-banner + zen-nav::part(nav-bar) { font-size: 28px; } +/* HTTP summary panel ------------------------------------------------------- */ + +.stats-http-panel { + display: grid; + grid-template-columns: 20% 1fr 1fr; + align-items: center; + margin-bottom: 16px; +} + +.http-title { + font-size: 22px; + font-weight: 700; + color: var(--theme_bright); + text-transform: uppercase; + letter-spacing: 1px; + line-height: 1; +} + +.http-section { + display: flex; + flex-direction: column; + gap: 8px; + padding: 0 24px; + border-left: 1px solid var(--theme_g2); +} + +.http-section-label { + font-size: 11px; + font-weight: 600; + color: var(--theme_g1); + text-transform: uppercase; + letter-spacing: 0.5px; +} + +.stats-http-panel .tile-metrics { + flex-direction: row; + align-items: center; + gap: 20px; +} + +/* workspaces page ---------------------------------------------------------- */ + +.ws-id-wrap { + display: inline-flex; + align-items: center; + font-family: 'SF Mono', 'Cascadia Mono', Consolas, 'DejaVu Sans Mono', monospace; + font-size: 14px; +} + +.ws-share-table { + width: 100%; + margin: 4px 0; +} + +.ws-share-table th { + padding: 4px; +} + +.ws-share-table td { + font-family: 'SF Mono', 'Cascadia Mono', Consolas, 'DejaVu Sans Mono', monospace; + font-size: 13px; + padding: 4px; +} + +.ws-share-table td.ws-no-shares-cell { + color: var(--theme_g1); + font-style: italic; + font-family: inherit; + padding: 4px 8px; +} + +.module-metrics-row td.ws-detail-cell { + padding-left: 24px; +} + /* start -------------------------------------------------------------------- */ #start { @@ -1030,7 +1104,7 @@ html:has(#map) { .card-title { font-size: 14px; font-weight: 600; - color: var(--theme_g1); + color: var(--theme_g0); margin-bottom: 12px; text-transform: uppercase; letter-spacing: 0.5px; diff --git a/src/zenserver/hub/httphubservice.cpp b/src/zenserver/hub/httphubservice.cpp index 34f4294e4..ebefcf2e3 100644 --- a/src/zenserver/hub/httphubservice.cpp +++ b/src/zenserver/hub/httphubservice.cpp @@ -8,10 +8,45 @@ #include <zencore/compactbinarybuilder.h> #include <zencore/fmtutils.h> #include <zencore/logging.h> +#include <zenhttp/httpstats.h> namespace zen { -HttpHubService::HttpHubService(Hub& Hub) : m_Hub(Hub) +namespace { + bool HandleFailureResults(HttpServerRequest& Request, const Hub::Response& Resp) + { + if (Resp.ResponseCode == Hub::EResponseCode::Rejected) + { + if (Resp.Message.empty()) + { + Request.WriteResponse(HttpResponseCode::Conflict); + } + else + { + Request.WriteResponse(HttpResponseCode::Conflict, HttpContentType::kText, Resp.Message); + } + return true; + } + if (Resp.ResponseCode == Hub::EResponseCode::NotFound) + { + if (Resp.Message.empty()) + { + Request.WriteResponse(HttpResponseCode::NotFound); + } + else + { + Request.WriteResponse(HttpResponseCode::NotFound, HttpContentType::kText, Resp.Message); + } + return true; + } + return false; + } +} // namespace + +HttpHubService::HttpHubService(Hub& Hub, HttpStatsService& StatsService, HttpStatusService& StatusService) +: m_Hub(Hub) +, m_StatsService(StatsService) +, m_StatusService(StatusService) { using namespace std::literals; @@ -83,144 +118,113 @@ HttpHubService::HttpHubService(Hub& Hub) : m_Hub(Hub) [this](HttpRouterRequest& Req) { std::string_view ModuleId = Req.GetCapture(1); - std::string FailureReason = "unknown"; - HttpResponseCode ResponseCode = HttpResponseCode::OK; - try { HubProvisionedInstanceInfo Info; - if (m_Hub.Provision(ModuleId, /* out */ Info, /* out */ FailureReason)) - { - CbObjectWriter Obj; - Obj << "moduleId" << ModuleId; - Obj << "baseUri" << Info.BaseUri; - Obj << "port" << Info.Port; - Req.ServerRequest().WriteResponse(HttpResponseCode::OK, Obj.Save()); + Hub::Response Resp = m_Hub.Provision(ModuleId, Info); - return; - } - else + if (HandleFailureResults(Req.ServerRequest(), Resp)) { - ResponseCode = HttpResponseCode::BadRequest; + return; } + + const HttpResponseCode HttpCode = + (Resp.ResponseCode == Hub::EResponseCode::Accepted) ? HttpResponseCode::Accepted : HttpResponseCode::OK; + CbObjectWriter Obj; + Obj << "moduleId" << ModuleId; + Obj << "baseUri" << Info.BaseUri; + Obj << "port" << Info.Port; + return Req.ServerRequest().WriteResponse(HttpCode, Obj.Save()); } catch (const std::exception& Ex) { ZEN_ERROR("Exception while provisioning module '{}': {}", ModuleId, Ex.what()); - - FailureReason = Ex.what(); - ResponseCode = HttpResponseCode::InternalServerError; + throw; } - - Req.ServerRequest().WriteResponse(ResponseCode, HttpContentType::kText, FailureReason); }, HttpVerb::kPost); m_Router.RegisterRoute( "modules/{moduleid}/deprovision", [this](HttpRouterRequest& Req) { - std::string_view ModuleId = Req.GetCapture(1); - std::string FailureReason = "unknown"; + std::string_view ModuleId = Req.GetCapture(1); try { - if (!m_Hub.Deprovision(std::string(ModuleId), /* out */ FailureReason)) + Hub::Response Resp = m_Hub.Deprovision(std::string(ModuleId)); + + if (HandleFailureResults(Req.ServerRequest(), Resp)) { - if (FailureReason.empty()) - { - return Req.ServerRequest().WriteResponse(HttpResponseCode::NotFound); - } - else - { - return Req.ServerRequest().WriteResponse(HttpResponseCode::BadRequest, HttpContentType::kText, FailureReason); - } + return; } + const HttpResponseCode HttpCode = + (Resp.ResponseCode == Hub::EResponseCode::Accepted) ? HttpResponseCode::Accepted : HttpResponseCode::OK; CbObjectWriter Obj; Obj << "moduleId" << ModuleId; - - return Req.ServerRequest().WriteResponse(HttpResponseCode::OK, Obj.Save()); + return Req.ServerRequest().WriteResponse(HttpCode, Obj.Save()); } catch (const std::exception& Ex) { ZEN_ERROR("Exception while deprovisioning module '{}': {}", ModuleId, Ex.what()); - - FailureReason = Ex.what(); + throw; } - - Req.ServerRequest().WriteResponse(HttpResponseCode::InternalServerError, HttpContentType::kText, FailureReason); }, HttpVerb::kPost); m_Router.RegisterRoute( "modules/{moduleid}/hibernate", [this](HttpRouterRequest& Req) { - std::string_view ModuleId = Req.GetCapture(1); - std::string FailureReason = "unknown"; + std::string_view ModuleId = Req.GetCapture(1); try { - if (!m_Hub.Hibernate(std::string(ModuleId), /* out */ FailureReason)) + Hub::Response Resp = m_Hub.Hibernate(std::string(ModuleId)); + + if (HandleFailureResults(Req.ServerRequest(), Resp)) { - if (FailureReason.empty()) - { - return Req.ServerRequest().WriteResponse(HttpResponseCode::NotFound); - } - else - { - return Req.ServerRequest().WriteResponse(HttpResponseCode::BadRequest, HttpContentType::kText, FailureReason); - } + return; } + const HttpResponseCode HttpCode = + (Resp.ResponseCode == Hub::EResponseCode::Accepted) ? HttpResponseCode::Accepted : HttpResponseCode::OK; CbObjectWriter Obj; Obj << "moduleId" << ModuleId; - - return Req.ServerRequest().WriteResponse(HttpResponseCode::OK, Obj.Save()); + return Req.ServerRequest().WriteResponse(HttpCode, Obj.Save()); } catch (const std::exception& Ex) { ZEN_ERROR("Exception while hibernating module '{}': {}", ModuleId, Ex.what()); - - FailureReason = Ex.what(); + throw; } - - Req.ServerRequest().WriteResponse(HttpResponseCode::InternalServerError, HttpContentType::kText, FailureReason); }, HttpVerb::kPost); m_Router.RegisterRoute( "modules/{moduleid}/wake", [this](HttpRouterRequest& Req) { - std::string_view ModuleId = Req.GetCapture(1); - std::string FailureReason = "unknown"; + std::string_view ModuleId = Req.GetCapture(1); try { - if (!m_Hub.Wake(std::string(ModuleId), /* out */ FailureReason)) + Hub::Response Resp = m_Hub.Wake(std::string(ModuleId)); + + if (HandleFailureResults(Req.ServerRequest(), Resp)) { - if (FailureReason.empty()) - { - return Req.ServerRequest().WriteResponse(HttpResponseCode::NotFound); - } - else - { - return Req.ServerRequest().WriteResponse(HttpResponseCode::BadRequest, HttpContentType::kText, FailureReason); - } + return; } + const HttpResponseCode HttpCode = + (Resp.ResponseCode == Hub::EResponseCode::Accepted) ? HttpResponseCode::Accepted : HttpResponseCode::OK; CbObjectWriter Obj; Obj << "moduleId" << ModuleId; - - return Req.ServerRequest().WriteResponse(HttpResponseCode::OK, Obj.Save()); + return Req.ServerRequest().WriteResponse(HttpCode, Obj.Save()); } catch (const std::exception& Ex) { ZEN_ERROR("Exception while waking module '{}': {}", ModuleId, Ex.what()); - - FailureReason = Ex.what(); + throw; } - - Req.ServerRequest().WriteResponse(HttpResponseCode::InternalServerError, HttpContentType::kText, FailureReason); }, HttpVerb::kPost); @@ -234,10 +238,15 @@ HttpHubService::HttpHubService(Hub& Hub) : m_Hub(Hub) Req.ServerRequest().WriteResponse(HttpResponseCode::OK, Obj.Save()); }, HttpVerb::kGet); + + m_StatsService.RegisterHandler("hub", *this); + m_StatusService.RegisterHandler("hub", *this); } HttpHubService::~HttpHubService() { + m_StatusService.UnregisterHandler("hub", *this); + m_StatsService.UnregisterHandler("hub", *this); } const char* @@ -254,9 +263,50 @@ HttpHubService::SetNotificationEndpoint(std::string_view UpstreamNotificationEnd } void -HttpHubService::HandleRequest(zen::HttpServerRequest& Request) +HttpHubService::HandleRequest(HttpServerRequest& Request) +{ + using namespace std::literals; + + metrics::OperationTiming::Scope $(m_HttpRequests); + if (m_Router.HandleRequest(Request) == false) + { + ZEN_WARN("No route found for {0}", Request.RelativeUri()); + return Request.WriteResponse(HttpResponseCode::NotFound, HttpContentType::kText, "Not found"sv); + } +} + +void +HttpHubService::HandleStatusRequest(HttpServerRequest& Request) { - m_Router.HandleRequest(Request); + CbObjectWriter Cbo; + Cbo << "ok" << true; + Request.WriteResponse(HttpResponseCode::OK, Cbo.Save()); +} + +void +HttpHubService::HandleStatsRequest(HttpServerRequest& Request) +{ + Request.WriteResponse(HttpResponseCode::OK, CollectStats()); +} + +CbObject +HttpHubService::CollectStats() +{ + CbObjectWriter Cbo; + + EmitSnapshot("requests", m_HttpRequests, Cbo); + + Cbo << "currentInstanceCount" << m_Hub.GetInstanceCount(); + Cbo << "maxInstanceCount" << m_Hub.GetMaxInstanceCount(); + Cbo << "instanceLimit" << m_Hub.GetConfig().InstanceLimit; + + return Cbo.Save(); +} + +uint64_t +HttpHubService::GetActivityCounter() +{ + return m_HttpRequests.Count(); } void @@ -288,27 +338,27 @@ HttpHubService::HandleModuleDelete(HttpServerRequest& Request, std::string_view if (InstanceInfo.State == HubInstanceState::Provisioned || InstanceInfo.State == HubInstanceState::Hibernated || InstanceInfo.State == HubInstanceState::Crashed) { - std::string FailureReason; try { - if (!m_Hub.Deprovision(std::string(ModuleId), FailureReason)) + Hub::Response Resp = m_Hub.Deprovision(std::string(ModuleId)); + + if (HandleFailureResults(Request, Resp)) { - if (FailureReason.empty()) - { - Request.WriteResponse(HttpResponseCode::NotFound); - } - else - { - Request.WriteResponse(HttpResponseCode::BadRequest, HttpContentType::kText, FailureReason); - } return; } + + // TODO: nuke all related storage + + const HttpResponseCode HttpCode = + (Resp.ResponseCode == Hub::EResponseCode::Accepted) ? HttpResponseCode::Accepted : HttpResponseCode::OK; + CbObjectWriter Obj; + Obj << "moduleId" << ModuleId; + return Request.WriteResponse(HttpCode, Obj.Save()); } catch (const std::exception& Ex) { ZEN_ERROR("Exception while deprovisioning module '{}': {}", ModuleId, Ex.what()); - Request.WriteResponse(HttpResponseCode::InternalServerError, HttpContentType::kText, Ex.what()); - return; + throw; } } @@ -316,7 +366,6 @@ HttpHubService::HandleModuleDelete(HttpServerRequest& Request, std::string_view CbObjectWriter Obj; Obj << "moduleId" << ModuleId; - Obj << "state" << ToString(InstanceInfo.State); Request.WriteResponse(HttpResponseCode::OK, Obj.Save()); } diff --git a/src/zenserver/hub/httphubservice.h b/src/zenserver/hub/httphubservice.h index d08eeea2a..1bb1c303e 100644 --- a/src/zenserver/hub/httphubservice.h +++ b/src/zenserver/hub/httphubservice.h @@ -3,9 +3,11 @@ #pragma once #include <zenhttp/httpserver.h> +#include <zenhttp/httpstatus.h> namespace zen { +class HttpStatsService; class Hub; /** ZenServer Hub Service @@ -14,25 +16,33 @@ class Hub; * use in UEFN content worker style scenarios. * */ -class HttpHubService : public zen::HttpService +class HttpHubService : public HttpService, public IHttpStatusProvider, public IHttpStatsProvider { public: - HttpHubService(Hub& Hub); + HttpHubService(Hub& Hub, HttpStatsService& StatsService, HttpStatusService& StatusService); ~HttpHubService(); HttpHubService(const HttpHubService&) = delete; HttpHubService& operator=(const HttpHubService&) = delete; virtual const char* BaseUri() const override; - virtual void HandleRequest(zen::HttpServerRequest& Request) override; + virtual void HandleRequest(HttpServerRequest& Request) override; + virtual void HandleStatusRequest(HttpServerRequest& Request) override; + virtual void HandleStatsRequest(HttpServerRequest& Request) override; + virtual CbObject CollectStats() override; + virtual uint64_t GetActivityCounter() override; void SetNotificationEndpoint(std::string_view UpstreamNotificationEndpoint, std::string_view InstanceId); private: - HttpRequestRouter m_Router; - Hub& m_Hub; + HttpRequestRouter m_Router; + metrics::OperationTiming m_HttpRequests; + + HttpStatsService& m_StatsService; + HttpStatusService& m_StatusService; + void HandleModuleGet(HttpServerRequest& Request, std::string_view ModuleId); void HandleModuleDelete(HttpServerRequest& Request, std::string_view ModuleId); }; diff --git a/src/zenserver/hub/hub.cpp b/src/zenserver/hub/hub.cpp index 6a2609443..6c44e2333 100644 --- a/src/zenserver/hub/hub.cpp +++ b/src/zenserver/hub/hub.cpp @@ -10,6 +10,8 @@ #include <zencore/logging.h> #include <zencore/scopeguard.h> #include <zencore/timer.h> +#include <zencore/workthreadpool.h> +#include <zenhttp/httpclient.h> ZEN_THIRD_PARTY_INCLUDES_START #include <EASTL/fixed_vector.h> @@ -20,8 +22,6 @@ ZEN_THIRD_PARTY_INCLUDES_END # include <zencore/filesystem.h> # include <zencore/testing.h> # include <zencore/testutils.h> -# include <zencore/workthreadpool.h> -# include <zenhttp/httpclient.h> #endif #include <numeric> @@ -122,10 +122,17 @@ private: ////////////////////////////////////////////////////////////////////////// -Hub::Hub(const Configuration& Config, ZenServerEnvironment&& RunEnvironment, AsyncModuleStateChangeCallbackFunc&& ModuleStateChangeCallback) +Hub::Hub(const Configuration& Config, + ZenServerEnvironment&& RunEnvironment, + WorkerThreadPool* OptionalWorkerPool, + AsyncModuleStateChangeCallbackFunc&& ModuleStateChangeCallback) : m_Config(Config) , m_RunEnvironment(std::move(RunEnvironment)) +, m_WorkerPool(OptionalWorkerPool) +, m_BackgroundWorkLatch(1) , m_ModuleStateChangeCallback(std::move(ModuleStateChangeCallback)) +, m_ActiveInstances(Config.InstanceLimit) +, m_FreeActiveInstanceIndexes(Config.InstanceLimit) { m_HostMetrics = GetSystemMetrics(); m_ResourceLimits.DiskUsageBytes = 1000ull * 1024 * 1024 * 1024; @@ -148,10 +155,7 @@ Hub::Hub(const Configuration& Config, ZenServerEnvironment&& RunEnvironment, Asy ZEN_ASSERT(uint64_t(Config.BasePortNumber) + Config.InstanceLimit <= std::numeric_limits<uint16_t>::max()); m_InstanceLookup.reserve(Config.InstanceLimit); - m_ActiveInstances.reserve(Config.InstanceLimit); - - m_FreePorts.resize(Config.InstanceLimit); - std::iota(m_FreePorts.begin(), m_FreePorts.end(), Config.BasePortNumber); + std::iota(m_FreeActiveInstanceIndexes.begin(), m_FreeActiveInstanceIndexes.end(), 0); #if ZEN_PLATFORM_WINDOWS if (m_Config.UseJobObject) @@ -175,7 +179,10 @@ Hub::~Hub() try { // Safety call - should normally be properly Shutdown by owner - Shutdown(); + if (!m_ShutdownFlag.load()) + { + Shutdown(); + } } catch (const std::exception& e) { @@ -196,54 +203,53 @@ Hub::Shutdown() m_WatchDog = {}; - m_Lock.WithExclusiveLock([this] { - for (auto& [ModuleId, ActiveInstanceIndex] : m_InstanceLookup) - { - std::unique_ptr<StorageServerInstance>& InstanceRaw = m_ActiveInstances[ActiveInstanceIndex]; - { - StorageServerInstance::ExclusiveLockedPtr Instance(InstanceRaw->LockExclusive(/*Wait*/ true)); + bool Expected = false; + bool WaitForBackgroundWork = m_ShutdownFlag.compare_exchange_strong(Expected, true); + if (WaitForBackgroundWork && m_WorkerPool) + { + m_BackgroundWorkLatch.CountDown(); + m_BackgroundWorkLatch.Wait(); + // Shutdown flag is set and all background work is drained, safe to shut down remaining instances - uint16_t BasePort = Instance.GetBasePort(); - std::string BaseUri; // TODO? - HubInstanceState OldState = Instance.GetState(); - HubInstanceState NewState = OldState; - InstanceStateUpdateGuard StateGuard(*this, ModuleId, OldState, NewState, BasePort, BaseUri); + m_BackgroundWorkLatch.Reset(1); + } - try - { - (void)Instance.Deprovision(); - } - catch (const std::exception& Ex) - { - ZEN_WARN("Failed to deprovision instance for module '{}' during hub shutdown: {}", ModuleId, Ex.what()); - } - // Instance is being destroyed; always report Unprovisioned so callbacks (e.g. Consul) fire. - NewState = HubInstanceState::Unprovisioned; - Instance = {}; + EnumerateModules([&](std::string_view ModuleId, const InstanceInfo& Info) { + ZEN_UNUSED(Info); + try + { + const Response DepResp = InternalDeprovision(std::string(ModuleId), [](ActiveInstance& Instance) { + ZEN_UNUSED(Instance); + return true; + }); + if (DepResp.ResponseCode != EResponseCode::Completed && DepResp.ResponseCode != EResponseCode::Accepted) + { + ZEN_WARN("Deprovision instance for module '{}' during hub shutdown rejected: {}", ModuleId, DepResp.Message); } - InstanceRaw.reset(); } - m_InstanceLookup.clear(); - m_ActiveInstances.clear(); - m_FreeActiveInstanceIndexes.clear(); + catch (const std::exception& Ex) + { + ZEN_WARN("Failed to deprovision instance for module '{}' during hub shutdown: {}", ModuleId, Ex.what()); + } }); + + if (WaitForBackgroundWork && m_WorkerPool) + { + m_BackgroundWorkLatch.CountDown(); + m_BackgroundWorkLatch.Wait(); + } } -bool -Hub::Provision(std::string_view ModuleId, HubProvisionedInstanceInfo& OutInfo, std::string& OutReason) +Hub::Response +Hub::Provision(std::string_view ModuleId, HubProvisionedInstanceInfo& OutInfo) { + ZEN_ASSERT(!m_ShutdownFlag.load()); StorageServerInstance::ExclusiveLockedPtr Instance; - bool IsNewInstance = false; - uint16_t AllocatedPort = 0; + bool IsNewInstance = false; + size_t ActiveInstanceIndex = (size_t)-1; + HubInstanceState OldState = HubInstanceState::Unprovisioned; { RwLock::ExclusiveLockScope _(m_Lock); - auto RestoreAllocatedPort = MakeGuard([this, ModuleId, &IsNewInstance, &AllocatedPort]() { - if (IsNewInstance && AllocatedPort != 0 && !m_InstanceLookup.contains(std::string(ModuleId))) - { - m_FreePorts.push_back(AllocatedPort); - AllocatedPort = 0; - } - }); if (auto It = m_InstanceLookup.find(std::string(ModuleId)); It == m_InstanceLookup.end()) { @@ -252,51 +258,53 @@ Hub::Provision(std::string_view ModuleId, HubProvisionedInstanceInfo& OutInfo, s { ZEN_WARN("Cannot provision new storage server instance for module '{}': {}", ModuleId, Reason); - OutReason = Reason; - - return false; + return Response{EResponseCode::Rejected, Reason}; } IsNewInstance = true; - AllocatedPort = m_FreePorts.front(); - ZEN_ASSERT(AllocatedPort != 0); - m_FreePorts.pop_front(); + ActiveInstanceIndex = m_FreeActiveInstanceIndexes.front(); + m_FreeActiveInstanceIndexes.pop_front(); + ZEN_ASSERT(m_ActiveInstances.size() > ActiveInstanceIndex); - auto NewInstance = std::make_unique<StorageServerInstance>( - m_RunEnvironment, - StorageServerInstance::Configuration{.BasePort = AllocatedPort, - .HydrationTempPath = m_HydrationTempPath, - .HydrationTargetSpecification = m_HydrationTargetSpecification, - .HttpThreadCount = m_Config.InstanceHttpThreadCount, - .CoreLimit = m_Config.InstanceCoreLimit, - .ConfigPath = m_Config.InstanceConfigPath}, - ModuleId); + try + { + auto NewInstance = std::make_unique<StorageServerInstance>( + m_RunEnvironment, + StorageServerInstance::Configuration{.BasePort = GetInstanceIndexAssignedPort(ActiveInstanceIndex), + .HydrationTempPath = m_HydrationTempPath, + .HydrationTargetSpecification = m_HydrationTargetSpecification, + .HttpThreadCount = m_Config.InstanceHttpThreadCount, + .CoreLimit = m_Config.InstanceCoreLimit, + .ConfigPath = m_Config.InstanceConfigPath}, + ModuleId); #if ZEN_PLATFORM_WINDOWS - if (m_JobObject.IsValid()) - { - NewInstance->SetJobObject(&m_JobObject); - } + if (m_JobObject.IsValid()) + { + NewInstance->SetJobObject(&m_JobObject); + } #endif - Instance = NewInstance->LockExclusive(/*Wait*/ true); + Instance = NewInstance->LockExclusive(/*Wait*/ true); - size_t ActiveInstanceIndex = (size_t)-1; - if (!m_FreeActiveInstanceIndexes.empty()) - { - ActiveInstanceIndex = m_FreeActiveInstanceIndexes.back(); - m_FreeActiveInstanceIndexes.pop_back(); - ZEN_ASSERT(m_ActiveInstances.size() > ActiveInstanceIndex); - m_ActiveInstances[ActiveInstanceIndex] = std::move(NewInstance); + m_ActiveInstances[ActiveInstanceIndex].Instance = std::move(NewInstance); + m_InstanceLookup.insert_or_assign(std::string(ModuleId), ActiveInstanceIndex); + // Set Provisioning while both hub lock and instance lock are held so that any + // concurrent Deprovision sees the in-flight state, not Unprovisioned. + OldState = UpdateInstanceState(Instance, ActiveInstanceIndex, HubInstanceState::Provisioning); } - else + catch (const std::exception&) { - ActiveInstanceIndex = m_ActiveInstances.size(); - m_ActiveInstances.emplace_back(std::move(NewInstance)); + Instance = {}; + m_ActiveInstances[ActiveInstanceIndex].Instance.reset(); + m_ActiveInstances[ActiveInstanceIndex].State.store(HubInstanceState::Unprovisioned); + m_InstanceLookup.erase(std::string(ModuleId)); + m_FreeActiveInstanceIndexes.push_back(ActiveInstanceIndex); + throw; } - ZEN_ASSERT(ActiveInstanceIndex != (size_t)-1); - m_InstanceLookup.insert_or_assign(std::string(ModuleId), ActiveInstanceIndex); + + OutInfo.Port = GetInstanceIndexAssignedPort(ActiveInstanceIndex); ZEN_INFO("Created new storage server instance for module '{}'", ModuleId); @@ -308,317 +316,623 @@ Hub::Provision(std::string_view ModuleId, HubProvisionedInstanceInfo& OutInfo, s } else { - const size_t ActiveInstanceIndex = It->second; + ActiveInstanceIndex = It->second; ZEN_ASSERT(m_ActiveInstances.size() > ActiveInstanceIndex); - if (m_RecoveringModules.contains(std::string(ModuleId))) + HubInstanceState CurrentState = m_ActiveInstances[ActiveInstanceIndex].State.load(); + + std::unique_ptr<StorageServerInstance>& InstanceRaw = m_ActiveInstances[ActiveInstanceIndex].Instance; + ZEN_ASSERT(InstanceRaw); + + OutInfo.Port = InstanceRaw->GetBasePort(); + + switch (CurrentState) { - OutReason = fmt::format("Module '{}' is currently recovering from a crash", ModuleId); - ZEN_WARN("Attempted to provision module '{}' which is currently recovering", ModuleId); - return false; + case HubInstanceState::Provisioning: + return Response{EResponseCode::Accepted}; + case HubInstanceState::Crashed: + case HubInstanceState::Unprovisioned: + break; + case HubInstanceState::Provisioned: + return Response{EResponseCode::Completed}; + case HubInstanceState::Hibernated: + _.ReleaseNow(); + return Wake(std::string(ModuleId)); + default: + return Response{EResponseCode::Rejected, + fmt::format("Module '{}' is currently in state '{}'", ModuleId, ToString(CurrentState))}; } - std::unique_ptr<StorageServerInstance>& InstanceRaw = m_ActiveInstances[ActiveInstanceIndex]; - Instance = InstanceRaw->LockExclusive(/*Wait*/ true); - AllocatedPort = InstanceRaw->GetBasePort(); - } + Instance = InstanceRaw->LockExclusive(/*Wait*/ true); - m_ProvisioningModules.emplace(std::string(ModuleId)); + // Re-validate state after acquiring the instance lock: a concurrent Provision may have + // completed between our hub-lock read and LockExclusive, transitioning the state away + // from Crashed/Unprovisioned. + HubInstanceState ActualState = m_ActiveInstances[ActiveInstanceIndex].State.load(); + if (ActualState != HubInstanceState::Crashed && ActualState != HubInstanceState::Unprovisioned) + { + Instance = {}; + if (ActualState == HubInstanceState::Provisioned) + { + return Response{EResponseCode::Completed}; + } + if (ActualState == HubInstanceState::Provisioning) + { + return Response{EResponseCode::Accepted}; + } + return Response{ + EResponseCode::Rejected, + fmt::format("Module '{}' state changed to '{}' before provision could proceed", ModuleId, ToString(ActualState))}; + } + // Set Provisioning while both hub lock and instance lock are held so that any + // concurrent Deprovision sees the in-flight state, not Crashed/Unprovisioned. + OldState = UpdateInstanceState(Instance, ActiveInstanceIndex, HubInstanceState::Provisioning); + } } + // NOTE: done while not holding the hub lock, to avoid blocking other operations. + // Both hub-lock paths above set OldState and updated the state to Provisioning before + // releasing the hub lock, so concurrent operations already see the in-flight state. + ZEN_ASSERT(Instance); + ZEN_ASSERT(ActiveInstanceIndex != (size_t)-1); - uint16_t BasePort = Instance.GetBasePort(); - std::string BaseUri; // TODO? - HubInstanceState OldState = Instance.GetState(); - HubInstanceState NewState = OldState; - InstanceStateUpdateGuard StateGuard(*this, ModuleId, OldState, NewState, BasePort, BaseUri); + NotifyStateUpdate(ModuleId, OldState, HubInstanceState::Provisioning, OutInfo.Port, {}); - auto RemoveProvisioningModule = MakeGuard([&] { - RwLock::ExclusiveLockScope _(m_Lock); - m_ProvisioningModules.erase(std::string(ModuleId)); - if (IsNewInstance && AllocatedPort != 0 && !m_InstanceLookup.contains(std::string(ModuleId))) + if (m_WorkerPool) + { + m_BackgroundWorkLatch.AddCount(1); + try { - m_FreePorts.push_back(AllocatedPort); - AllocatedPort = 0; + m_WorkerPool->ScheduleWork( + [this, + ModuleId = std::string(ModuleId), + ActiveInstanceIndex, + OldState, + IsNewInstance, + Instance = std::make_shared<StorageServerInstance::ExclusiveLockedPtr>(std::move(Instance))]() { + auto _ = MakeGuard([this]() { m_BackgroundWorkLatch.CountDown(); }); + try + { + CompleteProvision(*Instance, ActiveInstanceIndex, OldState, IsNewInstance); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("Failed async provision of module '{}': {}", ModuleId, Ex.what()); + } + }, + WorkerThreadPool::EMode::EnableBacklog); } - }); - - // NOTE: this is done while not holding the hub lock, as provisioning may take time - // and we don't want to block other operations. We track which modules are being - // provisioned using m_ProvisioningModules, and reject attempts to provision/deprovision - // those modules while in this state. + catch (const std::exception& DispatchEx) + { + // Dispatch failed: undo latch increment and roll back state. + ZEN_ERROR("Failed async dispatch provision of module '{}': {}", ModuleId, DispatchEx.what()); + m_BackgroundWorkLatch.CountDown(); - try - { - (void)Instance.Provision(); // false = already in target state (idempotent); not an error - NewState = Instance.GetState(); - Instance = {}; - } - catch (const std::exception& Ex) - { - ZEN_ERROR("Failed to provision storage server instance for module '{}': {}", ModuleId, Ex.what()); - NewState = Instance.GetState(); - Instance = {}; + // dispatch failed before the lambda ran, so ActiveInstance::State is still Provisioning + NotifyStateUpdate(ModuleId, HubInstanceState::Provisioning, OldState, OutInfo.Port, {}); - if (IsNewInstance) - { - // Clean up failed instance provisioning std::unique_ptr<StorageServerInstance> DestroyInstance; { - RwLock::ExclusiveLockScope _(m_Lock); - if (auto It = m_InstanceLookup.find(std::string(ModuleId)); It != m_InstanceLookup.end()) + RwLock::ExclusiveLockScope HubLock(m_Lock); + ZEN_ASSERT_SLOW(m_InstanceLookup.find(std::string(ModuleId)) != m_InstanceLookup.end()); + ZEN_ASSERT_SLOW(m_InstanceLookup.find(std::string(ModuleId))->second == ActiveInstanceIndex); + if (IsNewInstance) { - const size_t ActiveInstanceIndex = It->second; - ZEN_ASSERT(ActiveInstanceIndex < m_ActiveInstances.size()); - DestroyInstance = std::move(m_ActiveInstances[ActiveInstanceIndex]); - ZEN_ASSERT(DestroyInstance); - ZEN_ASSERT(!m_ActiveInstances[ActiveInstanceIndex]); + DestroyInstance = std::move(m_ActiveInstances[ActiveInstanceIndex].Instance); m_FreeActiveInstanceIndexes.push_back(ActiveInstanceIndex); - m_InstanceLookup.erase(It); + m_InstanceLookup.erase(std::string(ModuleId)); } + UpdateInstanceState(HubLock, ActiveInstanceIndex, OldState); } - try - { - DestroyInstance.reset(); - NewState = HubInstanceState::Unprovisioned; - } - catch (const std::exception& DestroyEx) - { - ZEN_ERROR("Failed to destroy instance for failed provision module '{}': {}", ModuleId, DestroyEx.what()); - } + DestroyInstance.reset(); + + throw; } - throw; + } + else + { + CompleteProvision(Instance, ActiveInstanceIndex, OldState, IsNewInstance); } - OutReason.clear(); - OutInfo.Port = AllocatedPort; - // TODO: base URI? Would need to know what host name / IP to use - - return true; + return Response{m_WorkerPool ? EResponseCode::Accepted : EResponseCode::Completed}; } -bool -Hub::Deprovision(const std::string& ModuleId, std::string& OutReason) +void +Hub::CompleteProvision(StorageServerInstance::ExclusiveLockedPtr& Instance, + size_t ActiveInstanceIndex, + HubInstanceState OldState, + bool IsNewInstance) { - std::unique_ptr<StorageServerInstance> RawInstance; - StorageServerInstance::ExclusiveLockedPtr Instance; + const std::string ModuleId(Instance.GetModuleId()); + const uint16_t Port = Instance.GetBasePort(); + std::string BaseUri; // TODO? + if (m_ShutdownFlag.load() == false) { - RwLock::ExclusiveLockScope _(m_Lock); - - if (m_ProvisioningModules.contains(ModuleId)) + try { - OutReason = fmt::format("Module '{}' is currently being provisioned", ModuleId); - - ZEN_WARN("Attempted to deprovision module '{}' which is currently being provisioned", ModuleId); - - return false; + switch (OldState) + { + case HubInstanceState::Crashed: + case HubInstanceState::Unprovisioned: + Instance.Provision(); + break; + case HubInstanceState::Hibernated: + ZEN_ASSERT(false); // unreachable: Provision redirects Hibernated->Wake before setting Provisioning + break; + default: + ZEN_ASSERT(false); + } + UpdateInstanceState(Instance, ActiveInstanceIndex, HubInstanceState::Provisioned); + NotifyStateUpdate(ModuleId, HubInstanceState::Provisioning, HubInstanceState::Provisioned, Port, BaseUri); + Instance = {}; + return; } + catch (const std::exception& Ex) + { + ZEN_ERROR("Failed to provision storage server instance for module '{}': {}", ModuleId, Ex.what()); + // Instance will be notified and removed below. + } + } - if (m_RecoveringModules.contains(ModuleId)) + if (IsNewInstance) + { + NotifyStateUpdate(ModuleId, HubInstanceState::Provisioning, HubInstanceState::Unprovisioned, Port, {}); + Instance = {}; + std::unique_ptr<StorageServerInstance> DestroyInstance; { - OutReason = fmt::format("Module '{}' is currently recovering from a crash", ModuleId); - ZEN_WARN("Attempted to deprovision module '{}' which is currently recovering", ModuleId); - return false; + RwLock::ExclusiveLockScope HubLock(m_Lock); + ZEN_ASSERT_SLOW(m_InstanceLookup.find(std::string(ModuleId)) != m_InstanceLookup.end()); + ZEN_ASSERT_SLOW(m_InstanceLookup.find(std::string(ModuleId))->second == ActiveInstanceIndex); + DestroyInstance = std::move(m_ActiveInstances[ActiveInstanceIndex].Instance); + m_FreeActiveInstanceIndexes.push_back(ActiveInstanceIndex); + m_InstanceLookup.erase(std::string(ModuleId)); + UpdateInstanceState(HubLock, ActiveInstanceIndex, HubInstanceState::Unprovisioned); } + DestroyInstance.reset(); + } + else + { + // OldState = Crashed: restore without cleanup (instance stays in lookup) + NotifyStateUpdate(ModuleId, HubInstanceState::Provisioning, OldState, Port, {}); + UpdateInstanceState(Instance, ActiveInstanceIndex, OldState); + Instance = {}; + } +} + +Hub::Response +Hub::Deprovision(const std::string& ModuleId) +{ + ZEN_ASSERT(!m_ShutdownFlag.load()); + return InternalDeprovision(ModuleId, [](ActiveInstance& Instance) { + ZEN_UNUSED(Instance); + return true; + }); +} + +Hub::Response +Hub::InternalDeprovision(const std::string& ModuleId, std::function<bool(ActiveInstance& Instance)>&& DeprovisionGate) +{ + StorageServerInstance::ExclusiveLockedPtr Instance; + size_t ActiveInstanceIndex = (size_t)-1; + { + RwLock::ExclusiveLockScope _(m_Lock); if (auto It = m_InstanceLookup.find(ModuleId); It == m_InstanceLookup.end()) { ZEN_WARN("Attempted to deprovision non-existent module '{}'", ModuleId); - OutReason.clear(); // empty = not found (-> 404) - return false; + return Response{EResponseCode::NotFound}; } else { - const size_t ActiveInstanceIndex = It->second; + ActiveInstanceIndex = It->second; ZEN_ASSERT(ActiveInstanceIndex < m_ActiveInstances.size()); - RawInstance = std::move(m_ActiveInstances[ActiveInstanceIndex]); + + if (!DeprovisionGate(m_ActiveInstances[ActiveInstanceIndex])) + { + return Response{EResponseCode::Rejected, fmt::format("Module '{}' deprovision denied by gate", ModuleId)}; + } + + HubInstanceState CurrentState = m_ActiveInstances[ActiveInstanceIndex].State.load(); + + switch (CurrentState) + { + case HubInstanceState::Deprovisioning: + return Response{EResponseCode::Accepted}; + case HubInstanceState::Crashed: + case HubInstanceState::Hibernated: + case HubInstanceState::Provisioned: + break; + case HubInstanceState::Unprovisioned: + return Response{EResponseCode::Completed}; + case HubInstanceState::Recovering: + // Recovering is watchdog-managed; reject to avoid interfering with the in-progress + // recovery. The watchdog will transition to Provisioned or Unprovisioned, after + // which deprovision can be retried. + return Response{EResponseCode::Rejected, fmt::format("Module '{}' is currently recovering from a crash", ModuleId)}; + default: + return Response{EResponseCode::Rejected, + fmt::format("Module '{}' is currently in state '{}'", ModuleId, ToString(CurrentState))}; + } + + std::unique_ptr<StorageServerInstance>& RawInstance = m_ActiveInstances[ActiveInstanceIndex].Instance; ZEN_ASSERT(RawInstance != nullptr); - m_FreeActiveInstanceIndexes.push_back(ActiveInstanceIndex); - m_InstanceLookup.erase(It); - m_DeprovisioningModules.emplace(ModuleId); Instance = RawInstance->LockExclusive(/*Wait*/ true); } } - ZEN_ASSERT(RawInstance); + // NOTE: done while not holding the hub lock, to avoid blocking other operations. + // The exclusive instance lock acquired above prevents concurrent LockExclusive callers + // from modifying instance state. The state transition to Deprovisioning happens below, + // after the hub lock is released. + ZEN_ASSERT(Instance); + ZEN_ASSERT(ActiveInstanceIndex != (size_t)-1); - uint16_t BasePort = Instance.GetBasePort(); - std::string BaseUri; // TODO? - HubInstanceState OldState = Instance.GetState(); - HubInstanceState NewState = OldState; - InstanceStateUpdateGuard StateGuard(*this, ModuleId, OldState, NewState, BasePort, BaseUri); + HubInstanceState OldState = UpdateInstanceState(Instance, ActiveInstanceIndex, HubInstanceState::Deprovisioning); + const uint16_t Port = Instance.GetBasePort(); + NotifyStateUpdate(ModuleId, OldState, HubInstanceState::Deprovisioning, Port, {}); - // The module is deprovisioned outside the hub lock to avoid blocking other operations. - // - // To ensure that no new provisioning can occur while we're deprovisioning, - // we add the module ID to m_DeprovisioningModules and remove it once - // deprovisioning is complete. + if (m_WorkerPool) + { + std::shared_ptr<StorageServerInstance::ExclusiveLockedPtr> SharedInstancePtr = + std::make_shared<StorageServerInstance::ExclusiveLockedPtr>(std::move(Instance)); - auto _ = MakeGuard([&] { + m_BackgroundWorkLatch.AddCount(1); + try { - RwLock::ExclusiveLockScope _(m_Lock); - m_DeprovisioningModules.erase(ModuleId); - m_FreePorts.push_back(BasePort); + m_WorkerPool->ScheduleWork( + [this, ModuleId = std::string(ModuleId), ActiveInstanceIndex, Instance = std::move(SharedInstancePtr)]() mutable { + auto _ = MakeGuard([this]() { m_BackgroundWorkLatch.CountDown(); }); + try + { + CompleteDeprovision(*Instance, ActiveInstanceIndex); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("Failed async deprovision of module '{}': {}", ModuleId, Ex.what()); + } + }, + WorkerThreadPool::EMode::EnableBacklog); } - }); + catch (const std::exception& DispatchEx) + { + // Dispatch failed: undo latch increment and roll back state. + ZEN_ERROR("Failed async dispatch deprovision of module '{}': {}", ModuleId, DispatchEx.what()); + m_BackgroundWorkLatch.CountDown(); + + NotifyStateUpdate(ModuleId, HubInstanceState::Deprovisioning, OldState, Port, {}); + { + RwLock::ExclusiveLockScope HubLock(m_Lock); + ZEN_ASSERT_SLOW(m_InstanceLookup.find(std::string(ModuleId)) != m_InstanceLookup.end()); + ZEN_ASSERT_SLOW(m_InstanceLookup.find(std::string(ModuleId))->second == ActiveInstanceIndex); + UpdateInstanceState(HubLock, ActiveInstanceIndex, OldState); + } + + throw; + } + } + else + { + CompleteDeprovision(Instance, ActiveInstanceIndex); + } + + return Response{m_WorkerPool ? EResponseCode::Accepted : EResponseCode::Completed}; +} + +void +Hub::CompleteDeprovision(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex) +{ + const std::string ModuleId(Instance.GetModuleId()); + const uint16_t Port = Instance.GetBasePort(); try { - (void)Instance.Deprovision(); + Instance.Deprovision(); } catch (const std::exception& Ex) { ZEN_ERROR("Failed to deprovision storage server instance for module '{}': {}", ModuleId, Ex.what()); - // The module is already removed from m_InstanceLookup; treat as gone so callbacks fire correctly. - NewState = HubInstanceState::Unprovisioned; + // Effectively unreachable: Shutdown() never throws and Dehydrate() failures are swallowed + // by DeprovisionLocked. Kept as a safety net; if somehow reached, transition to Crashed + // so the watchdog can attempt recovery. Instance = {}; + { + RwLock::ExclusiveLockScope HubLock(m_Lock); + UpdateInstanceState(HubLock, ActiveInstanceIndex, HubInstanceState::Crashed); + } + NotifyStateUpdate(ModuleId, HubInstanceState::Deprovisioning, HubInstanceState::Crashed, Port, {}); throw; } - NewState = Instance.GetState(); + + NotifyStateUpdate(ModuleId, HubInstanceState::Deprovisioning, HubInstanceState::Unprovisioned, Port, {}); Instance = {}; - OutReason.clear(); - return true; + std::unique_ptr<StorageServerInstance> DeleteInstance; + { + RwLock::ExclusiveLockScope HubLock(m_Lock); + auto It = m_InstanceLookup.find(std::string(ModuleId)); + ZEN_ASSERT_SLOW(It != m_InstanceLookup.end()); + ZEN_ASSERT_SLOW(It->second == ActiveInstanceIndex); + DeleteInstance = std::move(m_ActiveInstances[ActiveInstanceIndex].Instance); + m_FreeActiveInstanceIndexes.push_back(ActiveInstanceIndex); + m_InstanceLookup.erase(It); + UpdateInstanceState(HubLock, ActiveInstanceIndex, HubInstanceState::Unprovisioned); + } + DeleteInstance.reset(); } -bool -Hub::Hibernate(const std::string& ModuleId, std::string& OutReason) +Hub::Response +Hub::Hibernate(const std::string& ModuleId) { + ZEN_ASSERT(!m_ShutdownFlag.load()); + StorageServerInstance::ExclusiveLockedPtr Instance; + size_t ActiveInstanceIndex = (size_t)-1; { RwLock::ExclusiveLockScope _(m_Lock); - if (m_ProvisioningModules.contains(ModuleId) || m_DeprovisioningModules.contains(ModuleId) || - m_HibernatingModules.contains(ModuleId) || m_WakingModules.contains(ModuleId) || m_RecoveringModules.contains(ModuleId)) - { - OutReason = fmt::format("Module '{}' is currently changing state", ModuleId); - return false; - } - auto It = m_InstanceLookup.find(ModuleId); if (It == m_InstanceLookup.end()) { - OutReason.clear(); // empty = not found (-> 404) - return false; + return Response{EResponseCode::NotFound}; } - const size_t ActiveInstanceIndex = It->second; + ActiveInstanceIndex = It->second; ZEN_ASSERT(ActiveInstanceIndex < m_ActiveInstances.size()); - Instance = m_ActiveInstances[ActiveInstanceIndex]->LockExclusive(/*Wait*/ true); - m_HibernatingModules.emplace(ModuleId); + + HubInstanceState CurrentState = m_ActiveInstances[ActiveInstanceIndex].State.load(); + + switch (CurrentState) + { + case HubInstanceState::Hibernating: + return Response{EResponseCode::Accepted}; + case HubInstanceState::Provisioned: + break; + case HubInstanceState::Hibernated: + return Response{EResponseCode::Completed}; + default: + return Response{EResponseCode::Rejected, + fmt::format("Module '{}' is currently in state '{}'", ModuleId, ToString(CurrentState))}; + } + + std::unique_ptr<StorageServerInstance>& InstanceRaw = m_ActiveInstances[ActiveInstanceIndex].Instance; + ZEN_ASSERT(InstanceRaw); + + Instance = InstanceRaw->LockExclusive(/*Wait*/ true); + + // Re-validate state after acquiring the instance lock: WatchDog may have transitioned + // Provisioned -> Crashed between our hub-lock read and the LockExclusive call above. + + HubInstanceState ActualState = m_ActiveInstances[ActiveInstanceIndex].State.load(); + if (ActualState != HubInstanceState::Provisioned) + { + Instance = {}; + return Response{ + EResponseCode::Rejected, + fmt::format("Module '{}' state changed to '{}' before hibernate could proceed", ModuleId, ToString(ActualState))}; + } } + // NOTE: done while not holding the hub lock, to avoid blocking other operations. + // Any concurrent caller that acquired the hub lock and saw Provisioned will now block on + // LockExclusive(Wait=true); by the time it acquires the lock, UpdateInstanceState below + // will have already changed the state and the re-validate above will reject it. + ZEN_ASSERT(Instance); - uint16_t BasePort = Instance.GetBasePort(); - std::string BaseUri; // TODO? - HubInstanceState OldState = Instance.GetState(); - HubInstanceState NewState = OldState; - InstanceStateUpdateGuard StateGuard(*this, ModuleId, OldState, NewState, BasePort, BaseUri); + ZEN_ASSERT(ActiveInstanceIndex != (size_t)-1); - auto RemoveHibernatingModule = MakeGuard([&] { - RwLock::ExclusiveLockScope _(m_Lock); - m_HibernatingModules.erase(ModuleId); - }); + HubInstanceState OldState = UpdateInstanceState(Instance, ActiveInstanceIndex, HubInstanceState::Hibernating); + const uint16_t Port = Instance.GetBasePort(); + NotifyStateUpdate(ModuleId, OldState, HubInstanceState::Hibernating, Port, {}); - // NOTE: done while not holding the hub lock, as hibernation may take time. - // m_HibernatingModules tracks which modules are being hibernated, blocking - // concurrent Hibernate/Wake/Provision/Deprovision attempts on the same module. - try + if (m_WorkerPool) { - if (!Instance.Hibernate()) + m_BackgroundWorkLatch.AddCount(1); + try { - OutReason = fmt::format("Module '{}' cannot be hibernated from state '{}'", ModuleId, ToString(Instance.GetState())); - NewState = Instance.GetState(); - return false; + m_WorkerPool->ScheduleWork( + [this, + ModuleId = std::string(ModuleId), + ActiveInstanceIndex, + OldState, + Instance = std::make_shared<StorageServerInstance::ExclusiveLockedPtr>(std::move(Instance))]() { + auto _ = MakeGuard([this]() { m_BackgroundWorkLatch.CountDown(); }); + try + { + CompleteHibernate(*Instance, ActiveInstanceIndex, OldState); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("Failed async hibernate of module '{}': {}", ModuleId, Ex.what()); + } + }, + WorkerThreadPool::EMode::EnableBacklog); } - NewState = Instance.GetState(); + catch (const std::exception& DispatchEx) + { + // Dispatch failed: undo latch increment and roll back state. + ZEN_ERROR("Failed async dispatch hibernate of module '{}': {}", ModuleId, DispatchEx.what()); + m_BackgroundWorkLatch.CountDown(); + + NotifyStateUpdate(ModuleId, HubInstanceState::Hibernating, OldState, Port, {}); + { + RwLock::ExclusiveLockScope HubLock(m_Lock); + ZEN_ASSERT_SLOW(m_InstanceLookup.find(std::string(ModuleId)) != m_InstanceLookup.end()); + ZEN_ASSERT_SLOW(m_InstanceLookup.find(std::string(ModuleId))->second == ActiveInstanceIndex); + UpdateInstanceState(HubLock, ActiveInstanceIndex, OldState); + } + + throw; + } + } + else + { + CompleteHibernate(Instance, ActiveInstanceIndex, OldState); + } + + return Response{m_WorkerPool ? EResponseCode::Accepted : EResponseCode::Completed}; +} + +void +Hub::CompleteHibernate(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex, HubInstanceState OldState) +{ + const std::string ModuleId(Instance.GetModuleId()); + const uint16_t Port = Instance.GetBasePort(); + + try + { + Instance.Hibernate(); + UpdateInstanceState(Instance, ActiveInstanceIndex, HubInstanceState::Hibernated); + NotifyStateUpdate(ModuleId, HubInstanceState::Hibernating, HubInstanceState::Hibernated, Port, {}); Instance = {}; } catch (const std::exception& Ex) { ZEN_ERROR("Failed to hibernate storage server instance for module '{}': {}", ModuleId, Ex.what()); - NewState = Instance.GetState(); + UpdateInstanceState(Instance, ActiveInstanceIndex, OldState); + NotifyStateUpdate(ModuleId, HubInstanceState::Hibernating, OldState, Port, {}); Instance = {}; throw; } - - OutReason.clear(); - - return true; } -bool -Hub::Wake(const std::string& ModuleId, std::string& OutReason) +Hub::Response +Hub::Wake(const std::string& ModuleId) { + ZEN_ASSERT(!m_ShutdownFlag.load()); + StorageServerInstance::ExclusiveLockedPtr Instance; + size_t ActiveInstanceIndex = (size_t)-1; { RwLock::ExclusiveLockScope _(m_Lock); - if (m_ProvisioningModules.contains(ModuleId) || m_DeprovisioningModules.contains(ModuleId) || - m_HibernatingModules.contains(ModuleId) || m_WakingModules.contains(ModuleId) || m_RecoveringModules.contains(ModuleId)) - { - OutReason = fmt::format("Module '{}' is currently changing state", ModuleId); - return false; - } - auto It = m_InstanceLookup.find(ModuleId); if (It == m_InstanceLookup.end()) { - OutReason.clear(); // empty = not found (-> 404) - return false; + return Response{EResponseCode::NotFound}; } - const size_t ActiveInstanceIndex = It->second; + ActiveInstanceIndex = It->second; ZEN_ASSERT(ActiveInstanceIndex < m_ActiveInstances.size()); - Instance = m_ActiveInstances[ActiveInstanceIndex]->LockExclusive(/*Wait*/ true); - m_WakingModules.emplace(ModuleId); + + HubInstanceState CurrentState = m_ActiveInstances[ActiveInstanceIndex].State.load(); + + switch (CurrentState) + { + case HubInstanceState::Waking: + return Response{EResponseCode::Accepted}; + case HubInstanceState::Hibernated: + break; + case HubInstanceState::Provisioned: + return Response{EResponseCode::Completed}; + default: + return Response{EResponseCode::Rejected, + fmt::format("Module '{}' is currently in state '{}'", ModuleId, ToString(CurrentState))}; + } + + std::unique_ptr<StorageServerInstance>& InstanceRaw = m_ActiveInstances[ActiveInstanceIndex].Instance; + ZEN_ASSERT(InstanceRaw); + + Instance = InstanceRaw->LockExclusive(/*Wait*/ true); + + // Re-validate state after acquiring the instance lock: a concurrent Wake or Deprovision may + // have transitioned Hibernated -> something else between our hub-lock read and LockExclusive. + HubInstanceState ActualState = m_ActiveInstances[ActiveInstanceIndex].State.load(); + if (ActualState != HubInstanceState::Hibernated) + { + Instance = {}; + return Response{EResponseCode::Rejected, + fmt::format("Module '{}' state changed to '{}' before wake could proceed", ModuleId, ToString(ActualState))}; + } } + // NOTE: done while not holding the hub lock, to avoid blocking other operations. + // Any concurrent caller that acquired the hub lock and saw Hibernated will now block on + // LockExclusive(Wait=true); by the time it acquires the lock, UpdateInstanceState below + // will have already changed the state and the re-validate above will reject it. + ZEN_ASSERT(Instance); + ZEN_ASSERT(ActiveInstanceIndex != (size_t)-1); - uint16_t BasePort = Instance.GetBasePort(); - std::string BaseUri; // TODO? - HubInstanceState OldState = Instance.GetState(); - HubInstanceState NewState = OldState; - InstanceStateUpdateGuard StateGuard(*this, ModuleId, OldState, NewState, BasePort, BaseUri); + HubInstanceState OldState = UpdateInstanceState(Instance, ActiveInstanceIndex, HubInstanceState::Waking); + const uint16_t Port = Instance.GetBasePort(); + NotifyStateUpdate(ModuleId, OldState, HubInstanceState::Waking, Port, {}); - auto RemoveWakingModule = MakeGuard([&] { - RwLock::ExclusiveLockScope _(m_Lock); - m_WakingModules.erase(ModuleId); - }); - - // NOTE: done while not holding the hub lock, as waking may take time. - // m_WakingModules tracks which modules are being woken, blocking - // concurrent Hibernate/Wake/Provision/Deprovision attempts on the same module. - try + if (m_WorkerPool) { - if (!Instance.Wake()) + m_BackgroundWorkLatch.AddCount(1); + try { - OutReason = fmt::format("Module '{}' cannot be woken from state '{}'", ModuleId, ToString(Instance.GetState())); - NewState = Instance.GetState(); - return false; + m_WorkerPool->ScheduleWork( + [this, + ModuleId = std::string(ModuleId), + ActiveInstanceIndex, + OldState, + Instance = std::make_shared<StorageServerInstance::ExclusiveLockedPtr>(std::move(Instance))]() { + auto _ = MakeGuard([this]() { m_BackgroundWorkLatch.CountDown(); }); + try + { + CompleteWake(*Instance, ActiveInstanceIndex, OldState); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("Failed async wake of module '{}': {}", ModuleId, Ex.what()); + } + }, + WorkerThreadPool::EMode::EnableBacklog); + } + catch (const std::exception& DispatchEx) + { + // Dispatch failed: undo latch increment and roll back state. + ZEN_ERROR("Failed async dispatch wake of module '{}': {}", ModuleId, DispatchEx.what()); + m_BackgroundWorkLatch.CountDown(); + + NotifyStateUpdate(ModuleId, HubInstanceState::Waking, OldState, Port, {}); + { + RwLock::ExclusiveLockScope HubLock(m_Lock); + ZEN_ASSERT_SLOW(m_InstanceLookup.find(std::string(ModuleId)) != m_InstanceLookup.end()); + ZEN_ASSERT_SLOW(m_InstanceLookup.find(std::string(ModuleId))->second == ActiveInstanceIndex); + UpdateInstanceState(HubLock, ActiveInstanceIndex, OldState); + } + + throw; } - NewState = Instance.GetState(); + } + else + { + CompleteWake(Instance, ActiveInstanceIndex, OldState); + } + + return Response{m_WorkerPool ? EResponseCode::Accepted : EResponseCode::Completed}; +} + +void +Hub::CompleteWake(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex, HubInstanceState OldState) +{ + const std::string ModuleId(Instance.GetModuleId()); + const uint16_t Port = Instance.GetBasePort(); + + try + { + Instance.Wake(); + UpdateInstanceState(Instance, ActiveInstanceIndex, HubInstanceState::Provisioned); + NotifyStateUpdate(ModuleId, HubInstanceState::Waking, HubInstanceState::Provisioned, Port, {}); Instance = {}; } catch (const std::exception& Ex) { ZEN_ERROR("Failed to wake storage server instance for module '{}': {}", ModuleId, Ex.what()); - NewState = Instance.GetState(); + UpdateInstanceState(Instance, ActiveInstanceIndex, OldState); + NotifyStateUpdate(ModuleId, HubInstanceState::Waking, OldState, Port, {}); Instance = {}; throw; } - - OutReason.clear(); - - return true; } bool @@ -631,10 +945,10 @@ Hub::Find(std::string_view ModuleId, InstanceInfo* OutInstanceInfo) { const size_t ActiveInstanceIndex = It->second; ZEN_ASSERT(ActiveInstanceIndex < m_ActiveInstances.size()); - const std::unique_ptr<StorageServerInstance>& Instance = m_ActiveInstances[ActiveInstanceIndex]; + const std::unique_ptr<StorageServerInstance>& Instance = m_ActiveInstances[ActiveInstanceIndex].Instance; ZEN_ASSERT(Instance); InstanceInfo Info{ - Instance->GetState(), + m_ActiveInstances[ActiveInstanceIndex].State.load(), std::chrono::system_clock::now() // TODO }; Instance->GetProcessMetrics(Info.Metrics); @@ -655,10 +969,10 @@ Hub::EnumerateModules(std::function<void(std::string_view ModuleId, const Instan RwLock::SharedLockScope _(m_Lock); for (auto& [ModuleId, ActiveInstanceIndex] : m_InstanceLookup) { - const std::unique_ptr<StorageServerInstance>& Instance = m_ActiveInstances[ActiveInstanceIndex]; + const std::unique_ptr<StorageServerInstance>& Instance = m_ActiveInstances[ActiveInstanceIndex].Instance; ZEN_ASSERT(Instance); InstanceInfo Info{ - Instance->GetState(), + m_ActiveInstances[ActiveInstanceIndex].State.load(), std::chrono::system_clock::now() // TODO }; Instance->GetProcessMetrics(Info.Metrics); @@ -703,144 +1017,316 @@ Hub::UpdateStats() bool Hub::CanProvisionInstance(std::string_view ModuleId, std::string& OutReason) { - if (m_DeprovisioningModules.contains(std::string(ModuleId))) - { - OutReason = fmt::format("module '{}' is currently being deprovisioned", ModuleId); - - return false; - } - - if (m_ProvisioningModules.contains(std::string(ModuleId))) - { - OutReason = fmt::format("module '{}' is currently being provisioned", ModuleId); - - return false; - } - - if (gsl::narrow_cast<int>(m_InstanceLookup.size()) >= m_Config.InstanceLimit) + ZEN_UNUSED(ModuleId); + if (m_FreeActiveInstanceIndexes.empty()) { OutReason = fmt::format("instance limit ({}) exceeded", m_Config.InstanceLimit); return false; } - // Since deprovisioning happens outside the lock and we don't return the port until the instance is fully shut down, we might be below - // the instance count limit but with no free ports available - if (m_FreePorts.empty()) - { - OutReason = fmt::format("no free ports available, deprovisioning of instances might be in flight ({})", - m_Config.InstanceLimit - m_InstanceLookup.size()); - - return false; - } - // TODO: handle additional resource metrics return true; } +uint16_t +Hub::GetInstanceIndexAssignedPort(size_t ActiveInstanceIndex) const +{ + return gsl::narrow<uint16_t>(m_Config.BasePortNumber + ActiveInstanceIndex); +} + +HubInstanceState +Hub::UpdateInstanceStateLocked(size_t ActiveInstanceIndex, HubInstanceState NewState) +{ + ZEN_ASSERT(ActiveInstanceIndex < m_ActiveInstances.size()); + ZEN_ASSERT_SLOW([](HubInstanceState From, HubInstanceState To) { + switch (From) + { + case HubInstanceState::Unprovisioned: + return To == HubInstanceState::Provisioning; + case HubInstanceState::Provisioned: + return To == HubInstanceState::Hibernating || To == HubInstanceState::Deprovisioning || To == HubInstanceState::Crashed; + case HubInstanceState::Hibernated: + return To == HubInstanceState::Waking || To == HubInstanceState::Deprovisioning; + case HubInstanceState::Crashed: + return To == HubInstanceState::Provisioning || To == HubInstanceState::Deprovisioning || To == HubInstanceState::Recovering; + case HubInstanceState::Provisioning: + return To == HubInstanceState::Provisioned || To == HubInstanceState::Unprovisioned || To == HubInstanceState::Crashed; + case HubInstanceState::Hibernating: + return To == HubInstanceState::Hibernated || To == HubInstanceState::Provisioned; + case HubInstanceState::Waking: + return To == HubInstanceState::Provisioned || To == HubInstanceState::Hibernated; + case HubInstanceState::Deprovisioning: + return To == HubInstanceState::Unprovisioned || To == HubInstanceState::Provisioned || To == HubInstanceState::Hibernated || + To == HubInstanceState::Crashed; + case HubInstanceState::Recovering: + return To == HubInstanceState::Provisioned || To == HubInstanceState::Unprovisioned; + } + return false; + }(m_ActiveInstances[ActiveInstanceIndex].State.load(), NewState)); + m_ActiveInstances[ActiveInstanceIndex].LastKnownActivitySum.store(0); + m_ActiveInstances[ActiveInstanceIndex].LastActivityTime.store(std::chrono::system_clock::now()); + return m_ActiveInstances[ActiveInstanceIndex].State.exchange(NewState); +} + void Hub::AttemptRecoverInstance(std::string_view ModuleId) { StorageServerInstance::ExclusiveLockedPtr Instance; - StorageServerInstance* RawInstance = nullptr; + size_t ActiveInstanceIndex = (size_t)-1; { RwLock::ExclusiveLockScope _(m_Lock); - if (m_RecoveringModules.contains(std::string(ModuleId)) || m_ProvisioningModules.contains(std::string(ModuleId)) || - m_DeprovisioningModules.contains(std::string(ModuleId)) || m_HibernatingModules.contains(std::string(ModuleId)) || - m_WakingModules.contains(std::string(ModuleId))) + auto It = m_InstanceLookup.find(std::string(ModuleId)); + if (It == m_InstanceLookup.end()) { return; } - auto It = m_InstanceLookup.find(std::string(ModuleId)); - if (It == m_InstanceLookup.end()) + ActiveInstanceIndex = It->second; + ZEN_ASSERT(ActiveInstanceIndex < m_ActiveInstances.size()); + std::unique_ptr<StorageServerInstance>& InstanceRaw = m_ActiveInstances[ActiveInstanceIndex].Instance; + ZEN_ASSERT(InstanceRaw); + HubInstanceState CurrentState = m_ActiveInstances[ActiveInstanceIndex].State.load(); + if (CurrentState != HubInstanceState::Crashed) { return; } - const size_t ActiveInstanceIndex = It->second; - ZEN_ASSERT(ActiveInstanceIndex < m_ActiveInstances.size()); - RawInstance = m_ActiveInstances[ActiveInstanceIndex].get(); - Instance = RawInstance->LockExclusive(/*Wait*/ true); - m_RecoveringModules.emplace(std::string(ModuleId)); + Instance = m_ActiveInstances[ActiveInstanceIndex].Instance->LockExclusive(/*Wait*/ false); + if (!Instance) + { + // Instance lock is held by another operation; the watchdog will retry on the next cycle if the state is still Crashed. + return; + } + + ZEN_ASSERT(!Instance.IsRunning()); + + (void)UpdateInstanceState(Instance, ActiveInstanceIndex, HubInstanceState::Recovering); } ZEN_ASSERT(Instance); + ZEN_ASSERT(ActiveInstanceIndex != (size_t)-1); + ZEN_ASSERT_SLOW(m_ActiveInstances[ActiveInstanceIndex].State.load() == HubInstanceState::Recovering); - uint16_t BasePort = Instance.GetBasePort(); - std::string BaseUri; // TODO? - HubInstanceState OldState = Instance.GetState(); - HubInstanceState NewState = OldState; - InstanceStateUpdateGuard StateGuard(*this, ModuleId, OldState, NewState, BasePort, BaseUri); - - auto RemoveRecoveringModule = MakeGuard([&] { - RwLock::ExclusiveLockScope _(m_Lock); - m_RecoveringModules.erase(std::string(ModuleId)); - }); + NotifyStateUpdate(ModuleId, HubInstanceState::Crashed, HubInstanceState::Recovering, Instance.GetBasePort(), /*BaseUri*/ {}); - // Re-validate: state may have changed between releasing shared lock and acquiring exclusive lock - if (Instance.GetState() != HubInstanceState::Provisioned || Instance.IsRunning()) + // Dehydrate before trying to recover so any salvageable data is preserved. + try { - return; + Instance.Deprovision(); } - - if (Instance.RecoverFromCrash()) + catch (const std::exception& Ex) { - NewState = Instance.GetState(); + ZEN_ERROR("Failed to deprovision instance for module '{}' during crash recovery cleanup: {}", ModuleId, Ex.what()); + NotifyStateUpdate(ModuleId, HubInstanceState::Recovering, HubInstanceState::Unprovisioned, Instance.GetBasePort(), /*BaseUri*/ {}); Instance = {}; + std::unique_ptr<StorageServerInstance> DestroyInstance; + { + RwLock::ExclusiveLockScope HubLock(m_Lock); + auto It = m_InstanceLookup.find(std::string(ModuleId)); + ZEN_ASSERT_SLOW(It != m_InstanceLookup.end()); + ZEN_ASSERT_SLOW(ActiveInstanceIndex == It->second); + + DestroyInstance = std::move(m_ActiveInstances[ActiveInstanceIndex].Instance); + m_FreeActiveInstanceIndexes.push_back(ActiveInstanceIndex); + m_InstanceLookup.erase(It); + (void)UpdateInstanceState(HubLock, ActiveInstanceIndex, HubInstanceState::Unprovisioned); + } + DestroyInstance.reset(); return; } - // Restart threw but data dir is intact - run Dehydrate via Deprovision before cleanup. try { - (void)Instance.Deprovision(); + Instance.Provision(); + UpdateInstanceState(Instance, ActiveInstanceIndex, HubInstanceState::Provisioned); + NotifyStateUpdate(ModuleId, HubInstanceState::Recovering, HubInstanceState::Provisioned, Instance.GetBasePort(), /*BaseUri*/ {}); + Instance = {}; } catch (const std::exception& Ex) { - ZEN_ERROR("Failed to deprovision instance for module '{}' during crash recovery cleanup: {}", ModuleId, Ex.what()); + ZEN_ERROR("Failed to reprovision instance for module '{}' during crash recovery reprovision: {}", ModuleId, Ex.what()); + NotifyStateUpdate(ModuleId, HubInstanceState::Recovering, HubInstanceState::Unprovisioned, Instance.GetBasePort(), /*BaseUri*/ {}); + Instance = {}; + std::unique_ptr<StorageServerInstance> DestroyInstance; + { + RwLock::ExclusiveLockScope HubLock(m_Lock); + auto It = m_InstanceLookup.find(std::string(ModuleId)); + ZEN_ASSERT_SLOW(It != m_InstanceLookup.end()); + ZEN_ASSERT_SLOW(ActiveInstanceIndex == It->second); + + DestroyInstance = std::move(m_ActiveInstances[ActiveInstanceIndex].Instance); + m_FreeActiveInstanceIndexes.push_back(ActiveInstanceIndex); + m_InstanceLookup.erase(It); + (void)UpdateInstanceState(HubLock, ActiveInstanceIndex, HubInstanceState::Unprovisioned); + } + DestroyInstance.reset(); + return; } - NewState = Instance.GetState(); - Instance = {}; +} - std::unique_ptr<StorageServerInstance> DestroyInstance; +bool +Hub::CheckInstanceStatus(HttpClient& ActivityCheckClient, + StorageServerInstance::SharedLockedPtr&& LockedInstance, + size_t ActiveInstanceIndex) +{ + HubInstanceState InstanceState = m_ActiveInstances[ActiveInstanceIndex].State.load(); + if (LockedInstance.IsRunning()) { - RwLock::ExclusiveLockScope _(m_Lock); - if (auto It = m_InstanceLookup.find(std::string(ModuleId)); It != m_InstanceLookup.end()) + LockedInstance.UpdateMetrics(); + if (InstanceState == HubInstanceState::Provisioned) { - const size_t ActiveInstanceIndex = It->second; - ZEN_ASSERT(ActiveInstanceIndex < m_ActiveInstances.size()); - DestroyInstance = std::move(m_ActiveInstances[ActiveInstanceIndex]); - m_FreeActiveInstanceIndexes.push_back(ActiveInstanceIndex); - m_InstanceLookup.erase(It); + const std::string ModuleId(LockedInstance.GetModuleId()); + + const uint16_t Port = LockedInstance.GetBasePort(); + const uint64_t PreviousActivitySum = m_ActiveInstances[ActiveInstanceIndex].LastKnownActivitySum.load(); + const std::chrono::system_clock::time_point LastActivityTime = m_ActiveInstances[ActiveInstanceIndex].LastActivityTime.load(); + + const std::chrono::system_clock::time_point Now = std::chrono::system_clock::now(); + + // We do the activity check without holding a lock to the instance + LockedInstance = {}; + + uint64_t ActivitySum = PreviousActivitySum; + + std::chrono::system_clock::time_point NextCheckTime = + LastActivityTime + m_Config.WatchDog.ProvisionedInactivityTimeout - m_Config.WatchDog.InactivityCheckMargin; + if (Now >= NextCheckTime) + { + ActivityCheckClient.SetBaseUri(fmt::format("http://localhost:{}", Port)); + HttpClient::Response Result = + ActivityCheckClient.Get("/stats/activity_counters", HttpClient::Accept(HttpContentType::kCbObject)); + if (Result.IsSuccess()) + { + CbObject Response = Result.AsObject(); + if (Response) + { + ActivitySum = Response["sum"].AsUInt64(); + } + } + } + + if (ActivitySum != PreviousActivitySum) + { + m_Lock.WithSharedLock([this, InstanceState, PreviousActivitySum, &LastActivityTime, ActivitySum, Now, ModuleId]() { + if (auto It = m_InstanceLookup.find(ModuleId); It != m_InstanceLookup.end()) + { + const uint64_t ActiveInstanceIndex = It->second; + ActiveInstance& Instance = m_ActiveInstances[ActiveInstanceIndex]; + + HubInstanceState CurrentState = Instance.State.load(); + if (CurrentState == InstanceState) + { + if (Instance.LastActivityTime.load() == LastActivityTime && + Instance.LastKnownActivitySum.load() == PreviousActivitySum) + { + Instance.LastActivityTime.store(Now); + Instance.LastKnownActivitySum.store(ActivitySum); + } + } + } + }); + } + else if (LastActivityTime + m_Config.WatchDog.ProvisionedInactivityTimeout < Now) + { + ZEN_INFO("Instance {} has not been active for {}, attempting deprovision...", + ModuleId, + NiceTimeSpanMs(std::chrono::duration_cast<std::chrono::milliseconds>(Now - LastActivityTime).count())); + (void)InternalDeprovision( + ModuleId, + [ModuleId, InstanceState, LastActivityTime, PreviousActivitySum](ActiveInstance& Instance) -> bool { + HubInstanceState CurrentState = Instance.State.load(); + if (CurrentState != InstanceState) + { + ZEN_INFO("Instance {} idle deprovision aborted - state changed to {}", ModuleId, ToString(CurrentState)); + return false; + } + if (Instance.LastActivityTime.load() != LastActivityTime || + Instance.LastKnownActivitySum.load() != PreviousActivitySum) + { + ZEN_INFO("Instance {} idle deprovision aborted due to activity", ModuleId); + return false; + } + return true; + }); + } } - m_FreePorts.push_back(BasePort); - m_RecoveringModules.erase(std::string(ModuleId)); + + return true; } - RemoveRecoveringModule.Dismiss(); + else if (InstanceState == HubInstanceState::Provisioned) + { + // Process is not running but state says it should be - instance died unexpectedly. + const std::string ModuleId(LockedInstance.GetModuleId()); + const uint16_t Port = LockedInstance.GetBasePort(); + UpdateInstanceState(LockedInstance, ActiveInstanceIndex, HubInstanceState::Crashed); + NotifyStateUpdate(ModuleId, HubInstanceState::Provisioned, HubInstanceState::Crashed, Port, {}); + LockedInstance = {}; - try + return false; + } + else if (InstanceState == HubInstanceState::Hibernated) { - DestroyInstance.reset(); - NewState = HubInstanceState::Unprovisioned; + // Process is not running - no HTTP activity check is possible. + // Use a pure time-based check; the margin window does not apply here. + const std::string ModuleId = std::string(LockedInstance.GetModuleId()); + const std::chrono::system_clock::time_point LastActivityTime = m_ActiveInstances[ActiveInstanceIndex].LastActivityTime.load(); + const uint64_t PreviousActivitySum = m_ActiveInstances[ActiveInstanceIndex].LastKnownActivitySum.load(); + const std::chrono::system_clock::time_point Now = std::chrono::system_clock::now(); + LockedInstance = {}; + + if (LastActivityTime + m_Config.WatchDog.HibernatedInactivityTimeout < Now) + { + ZEN_INFO("Hibernated instance {} has not been active for {}, attempting deprovision...", + ModuleId, + NiceTimeSpanMs(std::chrono::duration_cast<std::chrono::milliseconds>(Now - LastActivityTime).count())); + (void)InternalDeprovision( + ModuleId, + [ModuleId, InstanceState, LastActivityTime, PreviousActivitySum](ActiveInstance& Instance) -> bool { + HubInstanceState CurrentState = Instance.State.load(); + if (CurrentState != InstanceState) + { + ZEN_INFO("Hibernated instance {} idle deprovision aborted - state changed to {}", ModuleId, ToString(CurrentState)); + return false; + } + if (Instance.LastActivityTime.load() != LastActivityTime || Instance.LastKnownActivitySum.load() != PreviousActivitySum) + { + ZEN_INFO("Hibernated instance {} idle deprovision aborted due to activity", ModuleId); + return false; + } + return true; + }); + } + return true; } - catch (const std::exception& Ex) + else { - ZEN_ERROR("Failed to destroy recovered instance for module '{}': {}", ModuleId, Ex.what()); + // transitional state (Provisioning, Deprovisioning, Hibernating, Waking, Recovering) - expected, skip. + // Crashed is handled above via AttemptRecoverInstance; it appears here only when the instance + // lock was busy on a previous cycle and recovery is already pending. + return true; } } void Hub::WatchDog() { - constexpr uint64_t WatchDogWakeupTimeMs = 5000; - constexpr uint64_t WatchDogProcessingTimeMs = 500; + const uint64_t CycleIntervalMs = std::chrono::duration_cast<std::chrono::milliseconds>(m_Config.WatchDog.CycleInterval).count(); + const uint64_t CycleProcessingBudgetMs = + std::chrono::duration_cast<std::chrono::milliseconds>(m_Config.WatchDog.CycleProcessingBudget).count(); + const uint64_t InstanceCheckThrottleMs = + std::chrono::duration_cast<std::chrono::milliseconds>(m_Config.WatchDog.InstanceCheckThrottle).count(); + + HttpClient ActivityCheckClient("http://localhost", + HttpClientSettings{.ConnectTimeout = m_Config.WatchDog.ActivityCheckConnectTimeout, + .Timeout = m_Config.WatchDog.ActivityCheckRequestTimeout}, + [&]() -> bool { return m_WatchDogEvent.Wait(0); }); size_t CheckInstanceIndex = SIZE_MAX; // first increment wraps to 0 - while (!m_WatchDogEvent.Wait(WatchDogWakeupTimeMs)) + while (!m_WatchDogEvent.Wait(gsl::narrow<int>(CycleIntervalMs))) { try { @@ -850,7 +1336,7 @@ Hub::WatchDog() Stopwatch Timer; bool ShuttingDown = false; - while (SlotsRemaining > 0 && Timer.GetElapsedTimeMs() < WatchDogProcessingTimeMs && !ShuttingDown) + while (SlotsRemaining > 0 && Timer.GetElapsedTimeMs() < CycleProcessingBudgetMs && !ShuttingDown) { StorageServerInstance::SharedLockedPtr LockedInstance; m_Lock.WithSharedLock([this, &CheckInstanceIndex, &LockedInstance, &SlotsRemaining]() { @@ -863,7 +1349,7 @@ Hub::WatchDog() { CheckInstanceIndex = 0; } - StorageServerInstance* Instance = m_ActiveInstances[CheckInstanceIndex].get(); + StorageServerInstance* Instance = m_ActiveInstances[CheckInstanceIndex].Instance.get(); if (Instance) { LockedInstance = Instance->LockShared(/*Wait*/ false); @@ -878,22 +1364,18 @@ Hub::WatchDog() continue; } - if (LockedInstance.IsRunning()) + std::string ModuleId(LockedInstance.GetModuleId()); + + bool InstanceIsOk = CheckInstanceStatus(ActivityCheckClient, std::move(LockedInstance), CheckInstanceIndex); + if (InstanceIsOk) { - LockedInstance.UpdateMetrics(); + ShuttingDown = m_WatchDogEvent.Wait(gsl::narrow<int>(InstanceCheckThrottleMs)); } - else if (LockedInstance.GetState() == HubInstanceState::Provisioned) + else { - // Process is not running but state says it should be - instance died unexpectedly. - const std::string ModuleId(LockedInstance.GetModuleId()); - LockedInstance = {}; + ZEN_WARN("Instance for module '{}' is not running, attempting recovery", ModuleId); AttemptRecoverInstance(ModuleId); } - // else: transitional state (Provisioning, Deprovisioning, Hibernating, Waking, Recovering, Crashed) - expected, skip. - LockedInstance = {}; - - // Rate-limit: pause briefly between live-instance checks and respond to shutdown. - ShuttingDown = m_WatchDogEvent.Wait(5); } } catch (const std::exception& Ex) @@ -905,11 +1387,11 @@ Hub::WatchDog() } void -Hub::OnStateUpdate(std::string_view ModuleId, - HubInstanceState OldState, - HubInstanceState& NewState, - uint16_t BasePort, - std::string_view BaseUri) +Hub::NotifyStateUpdate(std::string_view ModuleId, + HubInstanceState OldState, + HubInstanceState NewState, + uint16_t BasePort, + std::string_view BaseUri) { if (m_ModuleStateChangeCallback && OldState != NewState) { @@ -942,9 +1424,10 @@ namespace hub_testutils { std::unique_ptr<Hub> MakeHub(const std::filesystem::path& BaseDir, Hub::Configuration Config = {}, - Hub::AsyncModuleStateChangeCallbackFunc StateChangeCallback = {}) + Hub::AsyncModuleStateChangeCallbackFunc StateChangeCallback = {}, + WorkerThreadPool* WorkerPool = nullptr) { - return std::make_unique<Hub>(Config, MakeHubEnvironment(BaseDir), std::move(StateChangeCallback)); + return std::make_unique<Hub>(Config, MakeHubEnvironment(BaseDir), WorkerPool, std::move(StateChangeCallback)); } struct CallbackRecord @@ -978,6 +1461,42 @@ namespace hub_testutils { } }; + // Poll until Find() returns false for the given module (i.e. async deprovision completes). + static bool WaitForInstanceGone(Hub& HubInstance, + std::string_view ModuleId, + std::chrono::milliseconds PollInterval = std::chrono::milliseconds(50), + std::chrono::seconds Timeout = std::chrono::seconds(30)) + { + const auto Deadline = std::chrono::steady_clock::now() + Timeout; + while (std::chrono::steady_clock::now() < Deadline) + { + if (!HubInstance.Find(ModuleId)) + { + return true; + } + std::this_thread::sleep_for(PollInterval); + } + return !HubInstance.Find(ModuleId); + } + + // Poll until GetInstanceCount() reaches ExpectedCount (i.e. all async deprovisions complete). + static bool WaitForInstanceCount(Hub& HubInstance, + int ExpectedCount, + std::chrono::milliseconds PollInterval = std::chrono::milliseconds(50), + std::chrono::seconds Timeout = std::chrono::seconds(30)) + { + const auto Deadline = std::chrono::steady_clock::now() + Timeout; + while (std::chrono::steady_clock::now() < Deadline) + { + if (HubInstance.GetInstanceCount() == ExpectedCount) + { + return true; + } + std::this_thread::sleep_for(PollInterval); + } + return HubInstance.GetInstanceCount() == ExpectedCount; + } + } // namespace hub_testutils TEST_CASE("hub.provision_basic") @@ -989,10 +1508,8 @@ TEST_CASE("hub.provision_basic") CHECK_FALSE(HubInstance->Find("module_a")); HubProvisionedInstanceInfo Info; - std::string Reason; - const bool ProvisionResult = HubInstance->Provision("module_a", Info, Reason); - REQUIRE_MESSAGE(ProvisionResult, Reason); - CHECK(Reason.empty()); + const Hub::Response ProvisionResult = HubInstance->Provision("module_a", Info); + REQUIRE_MESSAGE(ProvisionResult.ResponseCode == Hub::EResponseCode::Completed, ProvisionResult.Message); CHECK_NE(Info.Port, 0); CHECK_EQ(HubInstance->GetInstanceCount(), 1); Hub::InstanceInfo InstanceInfo; @@ -1004,9 +1521,8 @@ TEST_CASE("hub.provision_basic") CHECK(ModClient.Get("/health/")); } - const bool DeprovisionResult = HubInstance->Deprovision("module_a", Reason); - CHECK(DeprovisionResult); - CHECK(Reason.empty()); + const Hub::Response DeprovisionResult = HubInstance->Deprovision("module_a"); + CHECK(DeprovisionResult.ResponseCode == Hub::EResponseCode::Completed); CHECK_EQ(HubInstance->GetInstanceCount(), 0); CHECK_FALSE(HubInstance->Find("module_a")); @@ -1037,9 +1553,8 @@ TEST_CASE("hub.provision_config") CHECK_FALSE(HubInstance->Find("module_a")); HubProvisionedInstanceInfo Info; - std::string Reason; - const bool ProvisionResult = HubInstance->Provision("module_a", Info, Reason); - REQUIRE_MESSAGE(ProvisionResult, Reason); + const Hub::Response ProvisionResult = HubInstance->Provision("module_a", Info); + REQUIRE_MESSAGE(ProvisionResult.ResponseCode == Hub::EResponseCode::Completed, ProvisionResult.Message); CHECK_NE(Info.Port, 0); CHECK_EQ(HubInstance->GetInstanceCount(), 1); Hub::InstanceInfo InstanceInfo; @@ -1056,8 +1571,8 @@ TEST_CASE("hub.provision_config") CHECK(ModClient.Get("/health/")); } - const bool DeprovisionResult = HubInstance->Deprovision("module_a", Reason); - CHECK(DeprovisionResult); + const Hub::Response DeprovisionResult = HubInstance->Deprovision("module_a"); + CHECK(DeprovisionResult.ResponseCode == Hub::EResponseCode::Completed); CHECK_EQ(HubInstance->GetInstanceCount(), 0); CHECK_FALSE(HubInstance->Find("module_a")); @@ -1076,10 +1591,9 @@ TEST_CASE("hub.provision_callbacks") std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), {}, CaptureInstance.CaptureFunc()); HubProvisionedInstanceInfo Info; - std::string Reason; - const bool ProvisionResult = HubInstance->Provision("cb_module", Info, Reason); - REQUIRE_MESSAGE(ProvisionResult, Reason); + const Hub::Response ProvisionResult = HubInstance->Provision("cb_module", Info); + REQUIRE_MESSAGE(ProvisionResult.ResponseCode == Hub::EResponseCode::Completed, ProvisionResult.Message); { RwLock::SharedLockScope _(CaptureInstance.CallbackMutex); @@ -1094,8 +1608,8 @@ TEST_CASE("hub.provision_callbacks") CHECK(ModClient.Get("/health/")); } - const bool DeprovisionResult = HubInstance->Deprovision("cb_module", Reason); - CHECK(DeprovisionResult); + const Hub::Response DeprovisionResult = HubInstance->Deprovision("cb_module"); + CHECK(DeprovisionResult.ResponseCode == Hub::EResponseCode::Completed); { HttpClient ModClient(fmt::format("http://localhost:{}", Info.Port), kFastTimeout); @@ -1111,6 +1625,49 @@ TEST_CASE("hub.provision_callbacks") } } +TEST_CASE("hub.provision_callback_sequence") +{ + ScopedTemporaryDirectory TempDir; + + struct TransitionRecord + { + HubInstanceState OldState; + HubInstanceState NewState; + }; + RwLock CaptureMutex; + std::vector<TransitionRecord> Transitions; + + auto CaptureFunc = + [&](std::string_view ModuleId, const HubProvisionedInstanceInfo& Info, HubInstanceState OldState, HubInstanceState NewState) { + ZEN_UNUSED(ModuleId); + ZEN_UNUSED(Info); + CaptureMutex.WithExclusiveLock([&]() { Transitions.push_back({OldState, NewState}); }); + }; + + std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), {}, std::move(CaptureFunc)); + + HubProvisionedInstanceInfo Info; + { + const Hub::Response R = HubInstance->Provision("seq_module", Info); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); + } + { + const Hub::Response R = HubInstance->Deprovision("seq_module"); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); + } + + RwLock::SharedLockScope _(CaptureMutex); + REQUIRE_EQ(Transitions.size(), 4u); + CHECK_EQ(Transitions[0].OldState, HubInstanceState::Unprovisioned); + CHECK_EQ(Transitions[0].NewState, HubInstanceState::Provisioning); + CHECK_EQ(Transitions[1].OldState, HubInstanceState::Provisioning); + CHECK_EQ(Transitions[1].NewState, HubInstanceState::Provisioned); + CHECK_EQ(Transitions[2].OldState, HubInstanceState::Provisioned); + CHECK_EQ(Transitions[2].NewState, HubInstanceState::Deprovisioning); + CHECK_EQ(Transitions[3].OldState, HubInstanceState::Deprovisioning); + CHECK_EQ(Transitions[3].NewState, HubInstanceState::Unprovisioned); +} + TEST_CASE("hub.instance_limit") { ScopedTemporaryDirectory TempDir; @@ -1121,27 +1678,24 @@ TEST_CASE("hub.instance_limit") std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config); HubProvisionedInstanceInfo Info; - std::string Reason; - const bool FirstResult = HubInstance->Provision("limit_a", Info, Reason); - REQUIRE_MESSAGE(FirstResult, Reason); + const Hub::Response FirstResult = HubInstance->Provision("limit_a", Info); + REQUIRE_MESSAGE(FirstResult.ResponseCode == Hub::EResponseCode::Completed, FirstResult.Message); - const bool SecondResult = HubInstance->Provision("limit_b", Info, Reason); - REQUIRE_MESSAGE(SecondResult, Reason); + const Hub::Response SecondResult = HubInstance->Provision("limit_b", Info); + REQUIRE_MESSAGE(SecondResult.ResponseCode == Hub::EResponseCode::Completed, SecondResult.Message); CHECK_EQ(HubInstance->GetInstanceCount(), 2); - Reason.clear(); - const bool ThirdResult = HubInstance->Provision("limit_c", Info, Reason); - CHECK_FALSE(ThirdResult); + const Hub::Response ThirdResult = HubInstance->Provision("limit_c", Info); + CHECK(ThirdResult.ResponseCode == Hub::EResponseCode::Rejected); CHECK_EQ(HubInstance->GetInstanceCount(), 2); - CHECK_NE(Reason.find("instance limit"), std::string::npos); + CHECK_NE(ThirdResult.Message.find("instance limit"), std::string::npos); - HubInstance->Deprovision("limit_a", Reason); + HubInstance->Deprovision("limit_a"); CHECK_EQ(HubInstance->GetInstanceCount(), 1); - Reason.clear(); - const bool FourthResult = HubInstance->Provision("limit_d", Info, Reason); - CHECK_MESSAGE(FourthResult, Reason); + const Hub::Response FourthResult = HubInstance->Provision("limit_d", Info); + CHECK_MESSAGE(FourthResult.ResponseCode == Hub::EResponseCode::Completed, FourthResult.Message); CHECK_EQ(HubInstance->GetInstanceCount(), 2); } @@ -1151,10 +1705,15 @@ TEST_CASE("hub.enumerate_modules") std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path()); HubProvisionedInstanceInfo Info; - std::string Reason; - REQUIRE_MESSAGE(HubInstance->Provision("enum_a", Info, Reason), Reason); - REQUIRE_MESSAGE(HubInstance->Provision("enum_b", Info, Reason), Reason); + { + const Hub::Response R = HubInstance->Provision("enum_a", Info); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); + } + { + const Hub::Response R = HubInstance->Provision("enum_b", Info); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); + } std::vector<std::string> Ids; int ProvisionedCount = 0; @@ -1172,7 +1731,7 @@ TEST_CASE("hub.enumerate_modules") CHECK(FoundA); CHECK(FoundB); - HubInstance->Deprovision("enum_a", Reason); + HubInstance->Deprovision("enum_a"); Ids.clear(); ProvisionedCount = 0; HubInstance->EnumerateModules([&](std::string_view ModuleId, const Hub::InstanceInfo& InstanceInfo) { @@ -1195,17 +1754,22 @@ TEST_CASE("hub.max_instance_count") CHECK_EQ(HubInstance->GetMaxInstanceCount(), 0); HubProvisionedInstanceInfo Info; - std::string Reason; - REQUIRE_MESSAGE(HubInstance->Provision("max_a", Info, Reason), Reason); + { + const Hub::Response R = HubInstance->Provision("max_a", Info); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); + } CHECK_GE(HubInstance->GetMaxInstanceCount(), 1); - REQUIRE_MESSAGE(HubInstance->Provision("max_b", Info, Reason), Reason); + { + const Hub::Response R = HubInstance->Provision("max_b", Info); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); + } CHECK_GE(HubInstance->GetMaxInstanceCount(), 2); const int MaxAfterTwo = HubInstance->GetMaxInstanceCount(); - HubInstance->Deprovision("max_a", Reason); + HubInstance->Deprovision("max_a"); CHECK_EQ(HubInstance->GetInstanceCount(), 1); CHECK_EQ(HubInstance->GetMaxInstanceCount(), MaxAfterTwo); } @@ -1228,8 +1792,8 @@ TEST_CASE("hub.concurrent_callbacks") for (int I = 0; I < kHalf; ++I) { HubProvisionedInstanceInfo Info; - std::string Reason; - REQUIRE_MESSAGE(HubInstance->Provision(fmt::format("pre_{}", I), Info, Reason), Reason); + const Hub::Response ProvR = HubInstance->Provision(fmt::format("pre_{}", I), Info); + REQUIRE_MESSAGE(ProvR.ResponseCode == Hub::EResponseCode::Completed, ProvR.Message); } CHECK_EQ(HubInstance->GetInstanceCount(), kHalf); @@ -1253,23 +1817,21 @@ TEST_CASE("hub.concurrent_callbacks") for (int I = 0; I < kHalf; ++I) { - ProvisionFutures[I] = Provisioners.EnqueueTask(std::packaged_task<void()>([&, I] { - HubProvisionedInstanceInfo Info; - std::string Reason; - const bool Result = - HubInstance->Provision(fmt::format("new_{}", I), Info, Reason); - ProvisionResults[I] = Result ? 1 : 0; - ProvisionReasons[I] = Reason; - }), - WorkerThreadPool::EMode::EnableBacklog); - - DeprovisionFutures[I] = Deprovisioneers.EnqueueTask(std::packaged_task<void()>([&, I] { - std::string Reason; - const bool Result = - HubInstance->Deprovision(fmt::format("pre_{}", I), Reason); - DeprovisionResults[I] = Result ? 1 : 0; - }), - WorkerThreadPool::EMode::EnableBacklog); + ProvisionFutures[I] = + Provisioners.EnqueueTask(std::packaged_task<void()>([&, I] { + HubProvisionedInstanceInfo Info; + const Hub::Response Result = HubInstance->Provision(fmt::format("new_{}", I), Info); + ProvisionResults[I] = (Result.ResponseCode == Hub::EResponseCode::Completed) ? 1 : 0; + ProvisionReasons[I] = Result.Message; + }), + WorkerThreadPool::EMode::EnableBacklog); + + DeprovisionFutures[I] = + Deprovisioneers.EnqueueTask(std::packaged_task<void()>([&, I] { + const Hub::Response Result = HubInstance->Deprovision(fmt::format("pre_{}", I)); + DeprovisionResults[I] = (Result.ResponseCode == Hub::EResponseCode::Completed) ? 1 : 0; + }), + WorkerThreadPool::EMode::EnableBacklog); } for (std::future<void>& F : ProvisionFutures) @@ -1324,14 +1886,13 @@ TEST_CASE("hub.job_object") std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config); HubProvisionedInstanceInfo Info; - std::string Reason; - const bool ProvisionResult = HubInstance->Provision("jobobj_a", Info, Reason); - REQUIRE_MESSAGE(ProvisionResult, Reason); + const Hub::Response ProvisionResult = HubInstance->Provision("jobobj_a", Info); + REQUIRE_MESSAGE(ProvisionResult.ResponseCode == Hub::EResponseCode::Completed, ProvisionResult.Message); CHECK_NE(Info.Port, 0); - const bool DeprovisionResult = HubInstance->Deprovision("jobobj_a", Reason); - CHECK(DeprovisionResult); + const Hub::Response DeprovisionResult = HubInstance->Deprovision("jobobj_a"); + CHECK(DeprovisionResult.ResponseCode == Hub::EResponseCode::Completed); CHECK_EQ(HubInstance->GetInstanceCount(), 0); } @@ -1344,14 +1905,13 @@ TEST_CASE("hub.job_object") std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config); HubProvisionedInstanceInfo Info; - std::string Reason; - const bool ProvisionResult = HubInstance->Provision("nojobobj_a", Info, Reason); - REQUIRE_MESSAGE(ProvisionResult, Reason); + const Hub::Response ProvisionResult = HubInstance->Provision("nojobobj_a", Info); + REQUIRE_MESSAGE(ProvisionResult.ResponseCode == Hub::EResponseCode::Completed, ProvisionResult.Message); CHECK_NE(Info.Port, 0); - const bool DeprovisionResult = HubInstance->Deprovision("nojobobj_a", Reason); - CHECK(DeprovisionResult); + const Hub::Response DeprovisionResult = HubInstance->Deprovision("nojobobj_a"); + CHECK(DeprovisionResult.ResponseCode == Hub::EResponseCode::Completed); CHECK_EQ(HubInstance->GetInstanceCount(), 0); } } @@ -1366,11 +1926,12 @@ TEST_CASE("hub.hibernate_wake") HubProvisionedInstanceInfo ProvInfo; Hub::InstanceInfo Info; - std::string Reason; // Provision - REQUIRE_MESSAGE(HubInstance->Provision("hib_a", ProvInfo, Reason), Reason); - CHECK(Reason.empty()); + { + const Hub::Response R = HubInstance->Provision("hib_a", ProvInfo); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); + } REQUIRE(HubInstance->Find("hib_a", &Info)); CHECK_EQ(Info.State, HubInstanceState::Provisioned); { @@ -1379,9 +1940,8 @@ TEST_CASE("hub.hibernate_wake") } // Hibernate - const bool HibernateResult = HubInstance->Hibernate("hib_a", Reason); - REQUIRE_MESSAGE(HibernateResult, Reason); - CHECK(Reason.empty()); + const Hub::Response HibernateResult = HubInstance->Hibernate("hib_a"); + REQUIRE_MESSAGE(HibernateResult.ResponseCode == Hub::EResponseCode::Completed, HibernateResult.Message); REQUIRE(HubInstance->Find("hib_a", &Info)); CHECK_EQ(Info.State, HubInstanceState::Hibernated); { @@ -1390,9 +1950,8 @@ TEST_CASE("hub.hibernate_wake") } // Wake - const bool WakeResult = HubInstance->Wake("hib_a", Reason); - REQUIRE_MESSAGE(WakeResult, Reason); - CHECK(Reason.empty()); + const Hub::Response WakeResult = HubInstance->Wake("hib_a"); + REQUIRE_MESSAGE(WakeResult.ResponseCode == Hub::EResponseCode::Completed, WakeResult.Message); REQUIRE(HubInstance->Find("hib_a", &Info)); CHECK_EQ(Info.State, HubInstanceState::Provisioned); { @@ -1401,9 +1960,8 @@ TEST_CASE("hub.hibernate_wake") } // Deprovision - const bool DeprovisionResult = HubInstance->Deprovision("hib_a", Reason); - CHECK(DeprovisionResult); - CHECK(Reason.empty()); + const Hub::Response DeprovisionResult = HubInstance->Deprovision("hib_a"); + CHECK(DeprovisionResult.ResponseCode == Hub::EResponseCode::Completed); CHECK_FALSE(HubInstance->Find("hib_a")); { HttpClient ModClient(fmt::format("http://localhost:{}", ProvInfo.Port), kFastTimeout); @@ -1419,53 +1977,167 @@ TEST_CASE("hub.hibernate_wake_errors") std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config); HubProvisionedInstanceInfo ProvInfo; - std::string Reason; - // Hibernate/wake on a non-existent module - should return false with empty reason (-> 404) - CHECK_FALSE(HubInstance->Hibernate("never_provisioned", Reason)); - CHECK(Reason.empty()); + // Hibernate/wake on a non-existent module - returns NotFound (-> 404) + CHECK(HubInstance->Hibernate("never_provisioned").ResponseCode == Hub::EResponseCode::NotFound); + CHECK(HubInstance->Wake("never_provisioned").ResponseCode == Hub::EResponseCode::NotFound); - CHECK_FALSE(HubInstance->Wake("never_provisioned", Reason)); - CHECK(Reason.empty()); + // Double-hibernate: second hibernate on already-hibernated module returns Completed (idempotent) + { + const Hub::Response R = HubInstance->Provision("err_b", ProvInfo); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); + } + { + const Hub::Response R = HubInstance->Hibernate("err_b"); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); + } + + { + const Hub::Response HibResp = HubInstance->Hibernate("err_b"); + CHECK(HibResp.ResponseCode == Hub::EResponseCode::Completed); + } + + // Wake on provisioned: succeeds (-> Provisioned), then wake again returns Completed (idempotent) + { + const Hub::Response R = HubInstance->Wake("err_b"); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); + } + + { + const Hub::Response WakeResp = HubInstance->Wake("err_b"); + CHECK(WakeResp.ResponseCode == Hub::EResponseCode::Completed); + } + + // Deprovision not-found - returns NotFound (-> 404) + CHECK(HubInstance->Deprovision("never_provisioned").ResponseCode == Hub::EResponseCode::NotFound); +} + +TEST_CASE("hub.async_hibernate_wake") +{ + ScopedTemporaryDirectory TempDir; - // Double-hibernate: first hibernate succeeds, second returns false with non-empty reason (-> 400) - REQUIRE_MESSAGE(HubInstance->Provision("err_b", ProvInfo, Reason), Reason); - CHECK(Reason.empty()); - REQUIRE_MESSAGE(HubInstance->Hibernate("err_b", Reason), Reason); - CHECK(Reason.empty()); + Hub::Configuration Config; + Config.BasePortNumber = 23000; - Reason.clear(); - CHECK_FALSE(HubInstance->Hibernate("err_b", Reason)); - CHECK_FALSE(Reason.empty()); + WorkerThreadPool WorkerPool(2, "hub_async_hib_wake"); + std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config, {}, &WorkerPool); - // Wake on provisioned: succeeds (-> Provisioned), then wake again returns false (-> 400) - REQUIRE_MESSAGE(HubInstance->Wake("err_b", Reason), Reason); - CHECK(Reason.empty()); + HubProvisionedInstanceInfo ProvInfo; + Hub::InstanceInfo Info; + + constexpr auto kPollInterval = std::chrono::milliseconds(50); + constexpr auto kTimeout = std::chrono::seconds(30); + + // Provision and wait until Provisioned + { + const Hub::Response R = HubInstance->Provision("async_hib_a", ProvInfo); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Accepted, R.Message); + } + { + const auto Deadline = std::chrono::steady_clock::now() + kTimeout; + bool Ready = false; + while (std::chrono::steady_clock::now() < Deadline) + { + if (HubInstance->Find("async_hib_a", &Info) && Info.State == HubInstanceState::Provisioned) + { + Ready = true; + break; + } + std::this_thread::sleep_for(kPollInterval); + } + REQUIRE_MESSAGE(Ready, "Instance did not reach Provisioned state within timeout"); + } + + // Hibernate asynchronously and poll until Hibernated + { + const Hub::Response R = HubInstance->Hibernate("async_hib_a"); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Accepted, R.Message); + } + { + const auto Deadline = std::chrono::steady_clock::now() + kTimeout; + bool Hibernated = false; + while (std::chrono::steady_clock::now() < Deadline) + { + if (HubInstance->Find("async_hib_a", &Info) && Info.State == HubInstanceState::Hibernated) + { + Hibernated = true; + break; + } + std::this_thread::sleep_for(kPollInterval); + } + REQUIRE_MESSAGE(Hibernated, "Instance did not reach Hibernated state within timeout"); + } + { + HttpClient ModClient(fmt::format("http://localhost:{}", ProvInfo.Port), kFastTimeout); + CHECK(!ModClient.Get("/health/")); + } - Reason.clear(); - CHECK_FALSE(HubInstance->Wake("err_b", Reason)); - CHECK_FALSE(Reason.empty()); + // Wake asynchronously and poll until Provisioned + { + const Hub::Response R = HubInstance->Wake("async_hib_a"); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Accepted, R.Message); + } + { + const auto Deadline = std::chrono::steady_clock::now() + kTimeout; + bool Woken = false; + while (std::chrono::steady_clock::now() < Deadline) + { + if (HubInstance->Find("async_hib_a", &Info) && Info.State == HubInstanceState::Provisioned) + { + Woken = true; + break; + } + std::this_thread::sleep_for(kPollInterval); + } + REQUIRE_MESSAGE(Woken, "Instance did not reach Provisioned state after wake within timeout"); + } + { + HttpClient ModClient(fmt::format("http://localhost:{}", ProvInfo.Port), kFastTimeout); + CHECK(ModClient.Get("/health/")); + } - // Deprovision not-found - should return false with empty reason (-> 404) - CHECK_FALSE(HubInstance->Deprovision("never_provisioned", Reason)); - CHECK(Reason.empty()); + // Deprovision asynchronously and poll until the instance is gone + { + const Hub::Response R = HubInstance->Deprovision("async_hib_a"); + CHECK_MESSAGE(R.ResponseCode == Hub::EResponseCode::Accepted, R.Message); + } + REQUIRE_MESSAGE(hub_testutils::WaitForInstanceGone(*HubInstance, "async_hib_a"), "Instance did not deprovision within timeout"); } TEST_CASE("hub.recover_process_crash") { ScopedTemporaryDirectory TempDir; - std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path()); + + struct TransitionRecord + { + HubInstanceState OldState; + HubInstanceState NewState; + }; + RwLock CaptureMutex; + std::vector<TransitionRecord> Transitions; + auto CaptureFunc = [&](std::string_view, const HubProvisionedInstanceInfo&, HubInstanceState OldState, HubInstanceState NewState) { + CaptureMutex.WithExclusiveLock([&]() { Transitions.push_back({OldState, NewState}); }); + }; + + // Fast watchdog cycle so crash detection is near-instant instead of waiting up to the 3s default. + Hub::Configuration Config; + Config.WatchDog.CycleInterval = std::chrono::milliseconds(10); + Config.WatchDog.InstanceCheckThrottle = std::chrono::milliseconds(1); + + std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config, std::move(CaptureFunc)); HubProvisionedInstanceInfo Info; - std::string Reason; - REQUIRE_MESSAGE(HubInstance->Provision("module_a", Info, Reason), Reason); + { + const Hub::Response R = HubInstance->Provision("module_a", Info); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); + } // Kill the child process to simulate a crash, then poll until the watchdog detects it, // recovers the instance, and the new process is serving requests. HubInstance->TerminateModuleForTesting("module_a"); - constexpr auto kPollIntervalMs = std::chrono::milliseconds(200); - constexpr auto kTimeoutMs = std::chrono::seconds(20); + constexpr auto kPollIntervalMs = std::chrono::milliseconds(50); + constexpr auto kTimeoutMs = std::chrono::seconds(15); const auto Deadline = std::chrono::steady_clock::now() + kTimeoutMs; // A successful HTTP health check on the same port confirms the new process is up. @@ -1486,22 +2158,50 @@ TEST_CASE("hub.recover_process_crash") } } CHECK_MESSAGE(Recovered, "Instance did not recover within timeout"); + + // Verify the full crash/recovery callback sequence + { + RwLock::SharedLockScope _(CaptureMutex); + REQUIRE_GE(Transitions.size(), 3u); + // Find the Provisioned->Crashed transition + const auto CrashedIt = std::find_if(Transitions.begin(), Transitions.end(), [](const TransitionRecord& R) { + return R.OldState == HubInstanceState::Provisioned && R.NewState == HubInstanceState::Crashed; + }); + REQUIRE_NE(CrashedIt, Transitions.end()); + // Recovery sequence follows: Crashed->Recovering, Recovering->Provisioned + const auto RecoveringIt = CrashedIt + 1; + REQUIRE_NE(RecoveringIt, Transitions.end()); + CHECK_EQ(RecoveringIt->OldState, HubInstanceState::Crashed); + CHECK_EQ(RecoveringIt->NewState, HubInstanceState::Recovering); + const auto RecoveredIt = RecoveringIt + 1; + REQUIRE_NE(RecoveredIt, Transitions.end()); + CHECK_EQ(RecoveredIt->OldState, HubInstanceState::Recovering); + CHECK_EQ(RecoveredIt->NewState, HubInstanceState::Provisioned); + } } TEST_CASE("hub.recover_process_crash_then_deprovision") { ScopedTemporaryDirectory TempDir; - std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path()); + + // Fast watchdog cycle so crash detection is near-instant instead of waiting up to the 3s default. + Hub::Configuration Config; + Config.WatchDog.CycleInterval = std::chrono::milliseconds(10); + Config.WatchDog.InstanceCheckThrottle = std::chrono::milliseconds(1); + + std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config); HubProvisionedInstanceInfo Info; - std::string Reason; - REQUIRE_MESSAGE(HubInstance->Provision("module_a", Info, Reason), Reason); + { + const Hub::Response R = HubInstance->Provision("module_a", Info); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); + } // Kill the child process, wait for the watchdog to detect and recover the instance. HubInstance->TerminateModuleForTesting("module_a"); - constexpr auto kPollIntervalMs = std::chrono::milliseconds(200); - constexpr auto kTimeoutMs = std::chrono::seconds(20); + constexpr auto kPollIntervalMs = std::chrono::milliseconds(50); + constexpr auto kTimeoutMs = std::chrono::seconds(15); const auto Deadline = std::chrono::steady_clock::now() + kTimeoutMs; bool Recovered = false; @@ -1518,16 +2218,273 @@ TEST_CASE("hub.recover_process_crash_then_deprovision") REQUIRE_MESSAGE(Recovered, "Instance did not recover within timeout"); // After recovery, deprovision should succeed and a re-provision should work. - CHECK_MESSAGE(HubInstance->Deprovision("module_a", Reason), Reason); + { + const Hub::Response R = HubInstance->Deprovision("module_a"); + CHECK_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); + } CHECK_EQ(HubInstance->GetInstanceCount(), 0); HubProvisionedInstanceInfo NewInfo; - CHECK_MESSAGE(HubInstance->Provision("module_a", NewInfo, Reason), Reason); + { + const Hub::Response R = HubInstance->Provision("module_a", NewInfo); + CHECK_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); + } CHECK_NE(NewInfo.Port, 0); HttpClient NewClient(fmt::format("http://localhost:{}", NewInfo.Port), kFastTimeout); CHECK_MESSAGE(NewClient.Get("/health/"), "Re-provisioned instance is not serving requests"); } +TEST_CASE("hub.async_provision_concurrent") +{ + ScopedTemporaryDirectory TempDir; + + constexpr int kModuleCount = 8; + + Hub::Configuration Config; + Config.BasePortNumber = 22800; + Config.InstanceLimit = kModuleCount; + + WorkerThreadPool WorkerPool(4, "hub_async_concurrent"); + std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config, {}, &WorkerPool); + + std::vector<HubProvisionedInstanceInfo> Infos(kModuleCount); + std::vector<std::string> Reasons(kModuleCount); + std::vector<int> Results(kModuleCount, 0); + + { + WorkerThreadPool Callers(kModuleCount, "hub_async_callers"); + std::vector<std::future<void>> Futures(kModuleCount); + + for (int I = 0; I < kModuleCount; ++I) + { + Futures[I] = Callers.EnqueueTask(std::packaged_task<void()>([&, I] { + const Hub::Response Resp = HubInstance->Provision(fmt::format("async_c{}", I), Infos[I]); + Results[I] = (Resp.ResponseCode == Hub::EResponseCode::Accepted) ? 1 : 0; + Reasons[I] = Resp.Message; + }), + WorkerThreadPool::EMode::EnableBacklog); + } + for (std::future<void>& F : Futures) + { + F.get(); + } + } + + for (int I = 0; I < kModuleCount; ++I) + { + REQUIRE_MESSAGE(Results[I] != 0, Reasons[I]); + CHECK_NE(Infos[I].Port, 0); + } + + // Poll until all instances reach Provisioned state + constexpr auto kPollInterval = std::chrono::milliseconds(50); + constexpr auto kTimeout = std::chrono::seconds(30); + const auto Deadline = std::chrono::steady_clock::now() + kTimeout; + + bool AllProvisioned = false; + while (std::chrono::steady_clock::now() < Deadline) + { + int ProvisionedCount = 0; + for (int I = 0; I < kModuleCount; ++I) + { + Hub::InstanceInfo InstanceInfo; + if (HubInstance->Find(fmt::format("async_c{}", I), &InstanceInfo) && InstanceInfo.State == HubInstanceState::Provisioned) + { + ++ProvisionedCount; + } + } + if (ProvisionedCount == kModuleCount) + { + AllProvisioned = true; + break; + } + std::this_thread::sleep_for(kPollInterval); + } + CHECK_MESSAGE(AllProvisioned, "Not all instances reached Provisioned state within timeout"); + + for (int I = 0; I < kModuleCount; ++I) + { + HttpClient ModClient(fmt::format("http://localhost:{}", Infos[I].Port), kFastTimeout); + CHECK_MESSAGE(ModClient.Get("/health/"), fmt::format("async_c{} not serving requests", I)); + } + + for (int I = 0; I < kModuleCount; ++I) + { + const Hub::Response DepResp = HubInstance->Deprovision(fmt::format("async_c{}", I)); + CHECK_MESSAGE(DepResp.ResponseCode == Hub::EResponseCode::Accepted, DepResp.Message); + } + REQUIRE_MESSAGE(hub_testutils::WaitForInstanceCount(*HubInstance, 0), "Not all instances deprovisioned within timeout"); +} + +TEST_CASE("hub.async_provision_shutdown_waits") +{ + ScopedTemporaryDirectory TempDir; + + constexpr int kModuleCount = 8; + + Hub::Configuration Config; + Config.InstanceLimit = kModuleCount; + Config.BasePortNumber = 22900; + + WorkerThreadPool WorkerPool(2, "hub_async_shutdown"); + std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config, {}, &WorkerPool); + + std::vector<HubProvisionedInstanceInfo> Infos(kModuleCount); + + for (int I = 0; I < kModuleCount; ++I) + { + const Hub::Response ProvResult = HubInstance->Provision(fmt::format("async_c{}", I), Infos[I]); + REQUIRE_MESSAGE(ProvResult.ResponseCode == Hub::EResponseCode::Accepted, ProvResult.Message); + REQUIRE_NE(Infos[I].Port, 0); + } + + // Shut down without polling for Provisioned; Shutdown() must drain the latch and clean up. + HubInstance->Shutdown(); + + CHECK_EQ(HubInstance->GetInstanceCount(), 0); + + for (int I = 0; I < kModuleCount; ++I) + { + HttpClient ModClient(fmt::format("http://localhost:{}", Infos[I].Port), kFastTimeout); + CHECK_FALSE(ModClient.Get("/health/")); + } +} + +TEST_CASE("hub.async_provision_rejected") +{ + // Rejection from CanProvisionInstance fires synchronously even when a WorkerPool is present. + ScopedTemporaryDirectory TempDir; + + Hub::Configuration Config; + Config.InstanceLimit = 1; + Config.BasePortNumber = 23100; + + WorkerThreadPool WorkerPool(2, "hub_async_rejected"); + std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config, {}, &WorkerPool); + + HubProvisionedInstanceInfo Info; + + // First provision: dispatched to WorkerPool, returns Accepted + const Hub::Response FirstResult = HubInstance->Provision("async_r1", Info); + REQUIRE_MESSAGE(FirstResult.ResponseCode == Hub::EResponseCode::Accepted, FirstResult.Message); + REQUIRE_NE(Info.Port, 0); + + // Second provision: CanProvisionInstance rejects synchronously (limit reached), returns Rejected + HubProvisionedInstanceInfo Info2; + const Hub::Response SecondResult = HubInstance->Provision("async_r2", Info2); + CHECK(SecondResult.ResponseCode == Hub::EResponseCode::Rejected); + CHECK_FALSE(SecondResult.Message.empty()); + CHECK_NE(SecondResult.Message.find("instance limit"), std::string::npos); + CHECK_EQ(HubInstance->GetInstanceCount(), 1); +} + +TEST_CASE("hub.instance.inactivity.deprovision") +{ + ScopedTemporaryDirectory TempDir; + + // Aggressive watchdog settings to keep test duration short. + // Provisioned timeout (2s) > Hibernated timeout (1s) - this is the key invariant under test. + // Margin (1s) means the HTTP activity check fires at LastActivityTime+1s for Provisioned instances. + // The Hibernated branch ignores the margin and uses a direct time-based check. + Hub::Configuration Config; + Config.BasePortNumber = 23200; + Config.InstanceLimit = 3; + Config.WatchDog.CycleInterval = std::chrono::milliseconds(10); + Config.WatchDog.InstanceCheckThrottle = std::chrono::milliseconds(1); + Config.WatchDog.ProvisionedInactivityTimeout = std::chrono::seconds(2); + Config.WatchDog.HibernatedInactivityTimeout = std::chrono::seconds(1); + Config.WatchDog.InactivityCheckMargin = std::chrono::seconds(1); + Config.WatchDog.ActivityCheckConnectTimeout = std::chrono::milliseconds(200); + Config.WatchDog.ActivityCheckRequestTimeout = std::chrono::milliseconds(500); + + std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config); + + // Provision in order: idle first, idle_hib second (then hibernate), persistent last. + // idle_hib uses the shorter Hibernated timeout (1s) and expires before idle (2s provisioned). + // persistent gets real HTTP PUTs so its activity timer is reset; it must still be alive + // after both idle instances are gone. + + HubProvisionedInstanceInfo IdleInfo; + { + const Hub::Response R = HubInstance->Provision("idle", IdleInfo); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); + } + + HubProvisionedInstanceInfo IdleHibInfo; + { + const Hub::Response R = HubInstance->Provision("idle_hib", IdleHibInfo); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); + const Hub::Response H = HubInstance->Hibernate("idle_hib"); + REQUIRE_MESSAGE(H.ResponseCode == Hub::EResponseCode::Completed, H.Message); + } + + HubProvisionedInstanceInfo PersistentInfo; + { + const Hub::Response R = HubInstance->Provision("persistent", PersistentInfo); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); + } + + auto PokeInstance = [&](uint16_t Port) { + // Make a real storage request to increment the instance's activity sum. + // The watchdog detects the changed sum on the next cycle and resets LastActivityTime. + { + HttpClient PersistentClient(fmt::format("http://localhost:{}", Port), + HttpClientSettings{.ConnectTimeout = std::chrono::milliseconds(200)}); + uint64_t Tick = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now() - + std::chrono::steady_clock::time_point::min()) + .count(); + IoHash Key = IoHash::HashBuffer(&Tick, sizeof(Tick)); + const HttpClient::Response PutResult = + PersistentClient.Put(fmt::format("/z$/ns1/b/{}", Key), + IoBufferBuilder::MakeFromMemory(MakeMemoryView(std::string_view("keepalive")))); + CHECK(PutResult); + } + }; + + PokeInstance(IdleInfo.Port); + PokeInstance(PersistentInfo.Port); + + Sleep(100); + + // Phase 1: immediately after setup all three instances must still be alive. + // No timeout has elapsed yet (only 100ms have passed). + CHECK_MESSAGE(HubInstance->Find("idle"), "idle was deprovisioned within 100ms - its 2s provisioned timeout has not elapsed"); + + CHECK_MESSAGE(HubInstance->Find("idle_hib"), "idle_hib was deprovisioned within 100ms - its 1s hibernated timeout has not elapsed"); + + CHECK_MESSAGE(HubInstance->Find("persistent"), + "persistent was deprovisioned within 100ms - its 2s provisioned timeout has not elapsed"); + + // Phase 2: idle_hib must be deprovisioned by the watchdog within its 1s hibernated timeout. + // idle must remain alive - its 2s provisioned timeout has not elapsed yet. + CHECK_MESSAGE(hub_testutils::WaitForInstanceGone(*HubInstance, "idle_hib", std::chrono::milliseconds(100), std::chrono::seconds(3)), + "idle_hib was not deprovisioned within its 1s hibernated timeout"); + + CHECK_MESSAGE(!HubInstance->Find("idle_hib"), "idle_hib should be gone after its 1s hibernated timeout elapsed"); + + CHECK_MESSAGE(HubInstance->Find("idle"), + "idle was deprovisioned before its 2s provisioned timeout - only idle_hib's 1s hibernated timeout has elapsed"); + + CHECK_MESSAGE(HubInstance->Find("persistent"), + "persistent was incorrectly deprovisioned - its activity timer was reset by PokeInstance"); + + PokeInstance(PersistentInfo.Port); + + // Phase 3: idle must be deprovisioned by the watchdog within its 2s provisioned timeout. + // persistent must remain alive - its activity timer was reset by PokeInstance. + CHECK_MESSAGE(hub_testutils::WaitForInstanceGone(*HubInstance, "idle", std::chrono::milliseconds(100), std::chrono::seconds(4)), + "idle was not deprovisioned within its 2s provisioned timeout"); + + CHECK_MESSAGE(!HubInstance->Find("idle_hib"), "idle_hib should still be gone - it was deprovisioned in phase 2"); + + CHECK_MESSAGE(!HubInstance->Find("idle"), "idle should be gone after its 3s provisioned timeout elapsed"); + + CHECK_MESSAGE(HubInstance->Find("persistent"), + "persistent was incorrectly deprovisioned - its activity timer was reset by PokeInstance"); + + HubInstance->Shutdown(); +} + TEST_SUITE_END(); void @@ -1539,7 +2496,7 @@ Hub::TerminateModuleForTesting(const std::string& ModuleId) { return; } - StorageServerInstance::SharedLockedPtr Locked = m_ActiveInstances[It->second]->LockShared(/*Wait*/ true); + StorageServerInstance::SharedLockedPtr Locked = m_ActiveInstances[It->second].Instance->LockShared(/*Wait*/ true); if (Locked) { Locked.TerminateForTesting(); diff --git a/src/zenserver/hub/hub.h b/src/zenserver/hub/hub.h index 28e77e729..c343b19e2 100644 --- a/src/zenserver/hub/hub.h +++ b/src/zenserver/hub/hub.h @@ -4,21 +4,23 @@ #include "hubinstancestate.h" #include "resourcemetrics.h" +#include "storageserverinstance.h" #include <zencore/system.h> #include <zenutil/zenserverprocess.h> +#include <chrono> #include <deque> #include <filesystem> #include <functional> #include <memory> #include <thread> #include <unordered_map> -#include <unordered_set> namespace zen { -class StorageServerInstance; +class HttpClient; +class WorkerThreadPool; /** * Hub @@ -35,6 +37,19 @@ struct HubProvisionedInstanceInfo class Hub { public: + struct WatchDogConfiguration + { + std::chrono::milliseconds CycleInterval = std::chrono::seconds(3); + std::chrono::milliseconds CycleProcessingBudget = std::chrono::milliseconds(500); + std::chrono::milliseconds InstanceCheckThrottle = std::chrono::milliseconds(5); + std::chrono::seconds ProvisionedInactivityTimeout = std::chrono::minutes(10); + std::chrono::seconds HibernatedInactivityTimeout = std::chrono::minutes(30); + std::chrono::seconds InactivityCheckMargin = std::chrono::minutes(1); + + std::chrono::milliseconds ActivityCheckConnectTimeout = std::chrono::milliseconds(100); + std::chrono::milliseconds ActivityCheckRequestTimeout = std::chrono::milliseconds(200); + }; + struct Configuration { /** Enable or disable the use of a Windows Job Object for child process management. @@ -51,6 +66,8 @@ public: int InstanceCoreLimit = 0; // Automatic std::filesystem::path InstanceConfigPath; std::string HydrationTargetSpecification; + + WatchDogConfiguration WatchDog; }; typedef std::function< @@ -59,6 +76,7 @@ public: Hub(const Configuration& Config, ZenServerEnvironment&& RunEnvironment, + WorkerThreadPool* OptionalWorkerPool = nullptr, AsyncModuleStateChangeCallbackFunc&& ModuleStateChangeCallback = {}); ~Hub(); @@ -78,42 +96,49 @@ public: */ void Shutdown(); + enum class EResponseCode + { + NotFound, + Rejected, + Accepted, + Completed + }; + + struct Response + { + EResponseCode ResponseCode = EResponseCode::Rejected; + std::string Message; + }; + /** * Provision a storage server instance for the given module ID. * * @param ModuleId The ID of the module to provision. - * @param OutInfo If successful, information about the provisioned instance will be returned here. - * @param OutReason If unsuccessful, the reason will be returned here. + * @param OutInfo On success, information about the provisioned instance is returned here. */ - bool Provision(std::string_view ModuleId, HubProvisionedInstanceInfo& OutInfo, std::string& OutReason); + Response Provision(std::string_view ModuleId, HubProvisionedInstanceInfo& OutInfo); /** * Deprovision a storage server instance for the given module ID. * * @param ModuleId The ID of the module to deprovision. - * @param OutReason If unsuccessful, the reason will be returned here. - * @return true if the instance was found and deprovisioned, false otherwise. */ - bool Deprovision(const std::string& ModuleId, std::string& OutReason); + Response Deprovision(const std::string& ModuleId); /** * Hibernate a storage server instance for the given module ID. * The instance is shut down but its data is preserved; it can be woken later. * * @param ModuleId The ID of the module to hibernate. - * @param OutReason If unsuccessful, the reason will be returned here (empty = not found). - * @return true if the instance was hibernated, false otherwise. */ - bool Hibernate(const std::string& ModuleId, std::string& OutReason); + Response Hibernate(const std::string& ModuleId); /** * Wake a hibernated storage server instance for the given module ID. * * @param ModuleId The ID of the module to wake. - * @param OutReason If unsuccessful, the reason will be returned here (empty = not found). - * @return true if the instance was woken, false otherwise. */ - bool Wake(const std::string& ModuleId, std::string& OutReason); + Response Wake(const std::string& ModuleId); /** * Find info about storage server instance for the given module ID. @@ -144,6 +169,9 @@ public: private: const Configuration m_Config; ZenServerEnvironment m_RunEnvironment; + WorkerThreadPool* m_WorkerPool = nullptr; + Latch m_BackgroundWorkLatch; + std::atomic<bool> m_ShutdownFlag = false; AsyncModuleStateChangeCallbackFunc m_ModuleStateChangeCallback; @@ -153,64 +181,86 @@ private: #if ZEN_PLATFORM_WINDOWS JobObject m_JobObject; #endif - RwLock m_Lock; - std::unordered_map<std::string, size_t> m_InstanceLookup; - std::unordered_set<std::string> m_DeprovisioningModules; - std::unordered_set<std::string> m_ProvisioningModules; - std::unordered_set<std::string> m_HibernatingModules; - std::unordered_set<std::string> m_WakingModules; - std::unordered_set<std::string> m_RecoveringModules; - std::vector<std::unique_ptr<StorageServerInstance>> m_ActiveInstances; - std::vector<size_t> m_FreeActiveInstanceIndexes; - ResourceMetrics m_ResourceLimits; - SystemMetrics m_HostMetrics; - std::atomic<int> m_MaxInstanceCount = 0; - std::deque<uint16_t> m_FreePorts; - std::thread m_WatchDog; + RwLock m_Lock; + std::unordered_map<std::string, size_t> m_InstanceLookup; - Event m_WatchDogEvent; - void WatchDog(); - void AttemptRecoverInstance(std::string_view ModuleId); + struct ActiveInstance + { + // Invariant: Instance == nullptr if and only if State == Unprovisioned. + // Both fields are only created/destroyed under the hub's exclusive lock. + // State is an atomic because the watchdog reads it under a shared instance lock + // without holding the hub lock. + std::unique_ptr<StorageServerInstance> Instance; + std::atomic<HubInstanceState> State = HubInstanceState::Unprovisioned; + // TODO: We should move current metrics here (from StorageServerInstance) + + // Read and updated by WatchDog, updates to State triggers a reset of both + std::atomic<uint64_t> LastKnownActivitySum = 0; + std::atomic<std::chrono::system_clock::time_point> LastActivityTime = std::chrono::system_clock::time_point::min(); + }; - void UpdateStats(); - void UpdateCapacityMetrics(); - bool CanProvisionInstance(std::string_view ModuleId, std::string& OutReason); + // UpdateInstanceState is overloaded to accept a locked instance pointer (exclusive or shared) or the hub exclusive + // lock scope as a proof token that the caller holds an appropriate lock before mutating ActiveInstance::State. + // State mutation and notification (NotifyStateUpdate) are intentionally decoupled - see NotifyStateUpdate below. - class InstanceStateUpdateGuard + HubInstanceState UpdateInstanceState(const StorageServerInstance::ExclusiveLockedPtr& Instance, + size_t ActiveInstanceIndex, + HubInstanceState NewState) { - public: - InstanceStateUpdateGuard(Hub& InHub, - std::string_view ModuleId, - HubInstanceState OldState, - HubInstanceState& NewState, - uint16_t BasePort, - const std::string& BaseUri) - : m_Hub(InHub) - , m_ModuleId(ModuleId) - , m_OldState(OldState) - , m_NewState(NewState) - , m_BasePort(BasePort) - , m_BaseUri(BaseUri) - { - } - ~InstanceStateUpdateGuard() { m_Hub.OnStateUpdate(m_ModuleId, m_OldState, m_NewState, m_BasePort, m_BaseUri); } - - private: - Hub& m_Hub; - const std::string m_ModuleId; - HubInstanceState m_OldState; - HubInstanceState& m_NewState; - uint16_t m_BasePort; - const std::string m_BaseUri; - }; + ZEN_ASSERT(Instance); + return UpdateInstanceStateLocked(ActiveInstanceIndex, NewState); + } + HubInstanceState UpdateInstanceState(const StorageServerInstance::SharedLockedPtr& Instance, + size_t ActiveInstanceIndex, + HubInstanceState NewState) + { + ZEN_ASSERT(Instance); + return UpdateInstanceStateLocked(ActiveInstanceIndex, NewState); + } + HubInstanceState UpdateInstanceState(const RwLock::ExclusiveLockScope& HubLock, size_t ActiveInstanceIndex, HubInstanceState NewState) + { + ZEN_UNUSED(HubLock); + return UpdateInstanceStateLocked(ActiveInstanceIndex, NewState); + } + HubInstanceState UpdateInstanceStateLocked(size_t ActiveInstanceIndex, HubInstanceState NewState); + + std::vector<ActiveInstance> m_ActiveInstances; + std::deque<size_t> m_FreeActiveInstanceIndexes; + ResourceMetrics m_ResourceLimits; + SystemMetrics m_HostMetrics; + std::atomic<int> m_MaxInstanceCount = 0; + std::thread m_WatchDog; - void OnStateUpdate(std::string_view ModuleId, - HubInstanceState OldState, - HubInstanceState& NewState, - uint16_t BasePort, - std::string_view BaseUri); + Event m_WatchDogEvent; + void WatchDog(); + bool CheckInstanceStatus(HttpClient& ActivityHttpClient, + StorageServerInstance::SharedLockedPtr&& LockedInstance, + size_t ActiveInstanceIndex); + void AttemptRecoverInstance(std::string_view ModuleId); - friend class InstanceStateUpdateGuard; + void UpdateStats(); + void UpdateCapacityMetrics(); + bool CanProvisionInstance(std::string_view ModuleId, std::string& OutReason); + uint16_t GetInstanceIndexAssignedPort(size_t ActiveInstanceIndex) const; + + Response InternalDeprovision(const std::string& ModuleId, std::function<bool(ActiveInstance& Instance)>&& DeprovisionGate); + void CompleteProvision(StorageServerInstance::ExclusiveLockedPtr& Instance, + size_t ActiveInstanceIndex, + HubInstanceState OldState, + bool IsNewInstance); + void CompleteDeprovision(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex); + void CompleteHibernate(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex, HubInstanceState OldState); + void CompleteWake(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex, HubInstanceState OldState); + + // Notifications may fire slightly out of sync with the Hub's internal State flag. + // The guarantee is that notifications are sent in the correct order, but the State + // flag may be updated either before or after the notification fires depending on the + // code path. Callers must not assume a specific ordering between the two. + void NotifyStateUpdate(std::string_view ModuleId, + HubInstanceState OldState, + HubInstanceState NewState, + uint16_t BasePort, + std::string_view BaseUri); }; #if ZEN_WITH_TESTS diff --git a/src/zenserver/hub/hubinstancestate.h b/src/zenserver/hub/hubinstancestate.h index 2dee89ff0..c895f75d1 100644 --- a/src/zenserver/hub/hubinstancestate.h +++ b/src/zenserver/hub/hubinstancestate.h @@ -9,15 +9,18 @@ namespace zen { enum class HubInstanceState : uint32_t { - Unprovisioned, // Initial state; process not running - Provisioning, // Hydrating and spawning process - Provisioned, // Process running and serving requests - Hibernating, // Shutting down process, preserving data on disk - Hibernated, // Process stopped, data preserved; can be woken - Waking, // Starting process from preserved data - Deprovisioning, // Shutting down process and cleaning up data - Crashed, // Process died unexpectedly while Provisioned; recovery pending - Recovering, // Attempting in-place restart after a crash + // Stable states - possible to initiate state change to a different stable state via the transitioning states + Unprovisioned, // Initial state; process not running + Provisioned, // Process running and serving requests + Hibernated, // Process stopped, data preserved; can be woken + Crashed, // Process died unexpectedly while Provisioned; recovery pending + + // Transitioning states - there is explicit ownership during this state and it may not be stolen + Provisioning, // Unprovisioned -> Provisioned (Hydrating and spawning process) + Hibernating, // Provisioned -> Hibernated (Shutting down process, preserving data on disk) + Waking, // Hibernated -> Provisioned (Starting process from preserved data) + Deprovisioning, // Provisioned/Hibernated/Crashed -> Unprovisioned (Shutting down process and cleaning up data) + Recovering, // Crashed -> Provisioned/Deprovisioned (Attempting in-place restart after a crash) }; std::string_view ToString(HubInstanceState State); diff --git a/src/zenserver/hub/storageserverinstance.cpp b/src/zenserver/hub/storageserverinstance.cpp index 99f0c29f3..6b139dbf1 100644 --- a/src/zenserver/hub/storageserverinstance.cpp +++ b/src/zenserver/hub/storageserverinstance.cpp @@ -69,177 +69,86 @@ StorageServerInstance::GetProcessMetrics(ProcessMetrics& OutMetrics) const OutMetrics.PeakPagefileUsage = m_PeakPagefileUsage.load(); } -bool +void StorageServerInstance::ProvisionLocked() { - if (m_State.load() == HubInstanceState::Provisioned) + if (m_ServerInstance.IsRunning()) { ZEN_WARN("Storage server instance for module '{}' is already provisioned", m_ModuleId); - return false; - } - - if (m_State.load() == HubInstanceState::Crashed) - { - ZEN_WARN("Storage server instance for module '{}' is in crashed state; re-provisioning from scratch", m_ModuleId); - m_State = HubInstanceState::Unprovisioned; - } - - if (m_State.load() == HubInstanceState::Hibernated) - { - return WakeLocked(); - } - - if (m_State.load() != HubInstanceState::Unprovisioned) - { - ZEN_WARN("Storage server instance for module '{}' is in unexpected state '{}', cannot provision", - m_ModuleId, - ToString(m_State.load())); - return false; + return; } ZEN_INFO("Provisioning storage server instance for module '{}', at '{}'", m_ModuleId, m_BaseDir); - - m_State = HubInstanceState::Provisioning; try { Hydrate(); SpawnServerProcess(); - m_State = HubInstanceState::Provisioned; - return true; } - catch (...) + catch (const std::exception& Ex) { - m_State = HubInstanceState::Unprovisioned; + ZEN_WARN("Failed spawning server instance for module '{}', at '{}' during provisioning. Reason: {}", + m_ModuleId, + m_BaseDir, + Ex.what()); throw; } } -bool +void StorageServerInstance::DeprovisionLocked() { - const HubInstanceState CurrentState = m_State.load(); - if (CurrentState != HubInstanceState::Provisioned && CurrentState != HubInstanceState::Crashed && - CurrentState != HubInstanceState::Hibernated) + if (m_ServerInstance.IsRunning()) { - ZEN_WARN("Attempted to deprovision storage server instance for module '{}' which is not provisioned (state: '{}')", - m_ModuleId, - ToString(CurrentState)); - return false; - } - - ZEN_INFO("Deprovisioning storage server instance for module '{}'", m_ModuleId); - - m_State = HubInstanceState::Deprovisioning; - if (CurrentState == HubInstanceState::Provisioned) - { - try - { - m_ServerInstance.Shutdown(); - } - catch (...) - { - m_State = HubInstanceState::Provisioned; // Shutdown failed; process may still be running - throw; - } + // m_ServerInstance.Shutdown() never throws. + m_ServerInstance.Shutdown(); } - // Crashed or Hibernated: process already dead; skip Shutdown + // Crashed or Hibernated: process already dead; skip Shutdown. + // Dehydrate preserves instance state for future re-provisioning. Failure means saved state + // may be stale or absent, but the process is already dead so the slot can still be released. + // Swallow the exception and proceed with cleanup rather than leaving the module stuck. try { Dehydrate(); } - catch (...) + catch (const std::exception& Ex) { - m_State = HubInstanceState::Crashed; // Dehydrate failed; process is already dead - throw; + ZEN_WARN("Dehydration of module {} failed during deprovisioning, current state not saved. Reason: {}", m_ModuleId, Ex.what()); } - - m_State = HubInstanceState::Unprovisioned; - return true; } -bool +void StorageServerInstance::HibernateLocked() { // Signal server to shut down, but keep data around for later wake - if (m_State.load() != HubInstanceState::Provisioned) - { - ZEN_WARN("Attempted to hibernate storage server instance for module '{}' which is not provisioned (state: '{}')", - m_ModuleId, - ToString(m_State.load())); - return false; - } - if (!m_ServerInstance.IsRunning()) { - ZEN_WARN("Attempted to hibernate storage server instance for module '{}' which is not running", m_ModuleId); - return false; + return; } - m_State = HubInstanceState::Hibernating; - try - { - m_ServerInstance.Shutdown(); - m_State = HubInstanceState::Hibernated; - return true; - } - catch (...) - { - m_State = HubInstanceState::Provisioned; // Shutdown failed; instance is still running - throw; - } + // m_ServerInstance.Shutdown() never throws. + m_ServerInstance.Shutdown(); } -bool +void StorageServerInstance::WakeLocked() { // Start server in-place using existing data - if (m_State.load() != HubInstanceState::Hibernated) + if (m_ServerInstance.IsRunning()) { - ZEN_WARN("Attempted to wake storage server instance for module '{}' which is not hibernated (state: '{}')", - m_ModuleId, - ToString(m_State.load())); - return false; + return; } - ZEN_ASSERT_FORMAT(!m_ServerInstance.IsRunning(), "Storage server instance for module '{}' is already running", m_ModuleId); - - m_State = HubInstanceState::Waking; try { SpawnServerProcess(); - m_State = HubInstanceState::Provisioned; - return true; - } - catch (...) - { - m_State = HubInstanceState::Hibernated; - throw; - } -} - -bool -StorageServerInstance::RecoverCrashedLocked() -{ - ZEN_ASSERT(m_State.load() == HubInstanceState::Provisioned); - ZEN_ASSERT(!m_ServerInstance.IsRunning()); - - ZEN_WARN("Storage server instance for module '{}' has crashed; attempting in-place recovery", m_ModuleId); - m_State = HubInstanceState::Recovering; - try - { - SpawnServerProcess(); - m_State = HubInstanceState::Provisioned; - ZEN_INFO("Storage server instance for module '{}' recovered successfully", m_ModuleId); - return true; } catch (const std::exception& Ex) { - ZEN_ERROR("Failed to restart module '{}': {}", m_ModuleId, Ex.what()); - m_State = HubInstanceState::Crashed; - return false; + ZEN_WARN("Failed spawning server instance for module '{}', at '{}' during waking. Reason: {}", m_ModuleId, m_BaseDir, Ex.what()); + throw; } } @@ -337,13 +246,13 @@ bool StorageServerInstance::SharedLockedPtr::IsRunning() const { ZEN_ASSERT(m_Instance != nullptr); - return m_Instance->m_State.load() == HubInstanceState::Provisioned && m_Instance->m_ServerInstance.IsRunning(); + return m_Instance->m_ServerInstance.IsRunning(); } void StorageServerInstance::UpdateMetricsLocked() { - if (m_State.load() == HubInstanceState::Provisioned) + if (m_ServerInstance.IsRunning()) { ProcessMetrics Metrics; zen::GetProcessMetrics(m_ServerInstance.GetProcessHandle(), Metrics); @@ -436,42 +345,35 @@ bool StorageServerInstance::ExclusiveLockedPtr::IsRunning() const { ZEN_ASSERT(m_Instance != nullptr); - return m_Instance->m_State.load() == HubInstanceState::Provisioned && m_Instance->m_ServerInstance.IsRunning(); + return m_Instance->m_ServerInstance.IsRunning(); } -bool +void StorageServerInstance::ExclusiveLockedPtr::Provision() { ZEN_ASSERT(m_Instance != nullptr); - return m_Instance->ProvisionLocked(); + m_Instance->ProvisionLocked(); } -bool +void StorageServerInstance::ExclusiveLockedPtr::Deprovision() { ZEN_ASSERT(m_Instance != nullptr); - return m_Instance->DeprovisionLocked(); + m_Instance->DeprovisionLocked(); } -bool +void StorageServerInstance::ExclusiveLockedPtr::Hibernate() { ZEN_ASSERT(m_Instance != nullptr); - return m_Instance->HibernateLocked(); + m_Instance->HibernateLocked(); } -bool +void StorageServerInstance::ExclusiveLockedPtr::Wake() { ZEN_ASSERT(m_Instance != nullptr); - return m_Instance->WakeLocked(); -} - -bool -StorageServerInstance::ExclusiveLockedPtr::RecoverFromCrash() -{ - ZEN_ASSERT(m_Instance != nullptr); - return m_Instance->RecoverCrashedLocked(); + m_Instance->WakeLocked(); } } // namespace zen diff --git a/src/zenserver/hub/storageserverinstance.h b/src/zenserver/hub/storageserverinstance.h index a0ca496dc..94c47630c 100644 --- a/src/zenserver/hub/storageserverinstance.h +++ b/src/zenserver/hub/storageserverinstance.h @@ -2,7 +2,6 @@ #pragma once -#include "hubinstancestate.h" #include "resourcemetrics.h" #include <zenutil/zenserverprocess.h> @@ -38,8 +37,7 @@ public: const ResourceMetrics& GetResourceMetrics() const { return m_ResourceMetrics; } inline std::string_view GetModuleId() const { return m_ModuleId; } - inline HubInstanceState GetState() const { return m_State.load(); } - inline uint16_t GetBasePort() const { return m_Config.BasePort; }; + inline uint16_t GetBasePort() const { return m_Config.BasePort; } void GetProcessMetrics(ProcessMetrics& OutMetrics) const; #if ZEN_PLATFORM_WINDOWS @@ -63,12 +61,7 @@ public: operator bool() const { return m_Instance != nullptr; } std::string_view GetModuleId() const; - HubInstanceState GetState() const - { - ZEN_ASSERT(m_Instance); - return m_Instance->m_State.load(); - } - uint16_t GetBasePort() const + uint16_t GetBasePort() const { ZEN_ASSERT(m_Instance); return m_Instance->GetBasePort(); @@ -114,12 +107,7 @@ public: operator bool() const { return m_Instance != nullptr; } std::string_view GetModuleId() const; - HubInstanceState GetState() const - { - ZEN_ASSERT(m_Instance); - return m_Instance->m_State.load(); - } - uint16_t GetBasePort() const + uint16_t GetBasePort() const { ZEN_ASSERT(m_Instance); return m_Instance->GetBasePort(); @@ -132,15 +120,10 @@ public: return m_Instance->m_ResourceMetrics; } - // For Provision, Deprovision, Hibernate, Wake: - // true = operation performed (state changed) - // false = precondition not met (wrong state), nothing attempted - // throws = operation attempted but failed; m_State corrected before throw - [[nodiscard]] bool Provision(); - [[nodiscard]] bool Deprovision(); - [[nodiscard]] bool Hibernate(); - [[nodiscard]] bool Wake(); - [[nodiscard]] bool RecoverFromCrash(); // true = recovered; false = spawn failed (Crashed), caller must Deprovision() + cleanup + void Provision(); + void Deprovision(); + void Hibernate(); + void Wake(); private: RwLock* m_Lock = nullptr; @@ -150,12 +133,11 @@ public: [[nodiscard]] ExclusiveLockedPtr LockExclusive(bool Wait) { return ExclusiveLockedPtr(m_Lock, this, Wait); } private: - [[nodiscard]] bool ProvisionLocked(); - [[nodiscard]] bool DeprovisionLocked(); + void ProvisionLocked(); + void DeprovisionLocked(); - [[nodiscard]] bool HibernateLocked(); - [[nodiscard]] bool WakeLocked(); - [[nodiscard]] bool RecoverCrashedLocked(); // true = recovered (Provisioned); false = spawn failed (Crashed) + void HibernateLocked(); + void WakeLocked(); void UpdateMetricsLocked(); @@ -164,8 +146,7 @@ private: std::string m_ModuleId; ZenServerInstance m_ServerInstance; - std::atomic<HubInstanceState> m_State{HubInstanceState::Unprovisioned}; - std::filesystem::path m_BaseDir; + std::filesystem::path m_BaseDir; std::filesystem::path m_TempDir; ResourceMetrics m_ResourceMetrics; diff --git a/src/zenserver/hub/zenhubserver.cpp b/src/zenserver/hub/zenhubserver.cpp index f9ff655ec..314031246 100644 --- a/src/zenserver/hub/zenhubserver.cpp +++ b/src/zenserver/hub/zenhubserver.cpp @@ -16,6 +16,7 @@ #include <zencore/windows.h> #include <zenhttp/httpapiservice.h> #include <zenutil/service.h> +#include <zenutil/workerpools.h> ZEN_THIRD_PARTY_INCLUDES_START #include <cxxopts.hpp> @@ -73,6 +74,20 @@ ZenHubServerConfigurator::AddCliOptions(cxxopts::Options& Options) Options.add_option("hub", "", + "consul-health-interval-seconds", + "Interval in seconds between Consul health checks", + cxxopts::value<uint32_t>(m_ServerOptions.ConsulHealthIntervalSeconds)->default_value("10"), + "<seconds>"); + + Options.add_option("hub", + "", + "consul-deregister-after-seconds", + "Seconds after which Consul deregisters an unhealthy service", + cxxopts::value<uint32_t>(m_ServerOptions.ConsulDeregisterAfterSeconds)->default_value("30"), + "<seconds>"); + + Options.add_option("hub", + "", "hub-base-port-number", "Base port number for provisioned instances", cxxopts::value<uint16_t>(m_ServerOptions.HubBasePortNumber)->default_value("21000"), @@ -132,6 +147,62 @@ ZenHubServerConfigurator::AddCliOptions(cxxopts::Options& Options) cxxopts::value<bool>(m_ServerOptions.HubUseJobObject)->default_value("true"), ""); #endif // ZEN_PLATFORM_WINDOWS + + Options.add_option("hub", + "", + "hub-watchdog-cycle-interval-ms", + "Interval between watchdog cycles in milliseconds", + cxxopts::value<uint32_t>(m_ServerOptions.WatchdogConfig.CycleIntervalMs)->default_value("3000"), + "<ms>"); + + Options.add_option("hub", + "", + "hub-watchdog-cycle-processing-budget-ms", + "Maximum processing time budget per watchdog cycle in milliseconds", + cxxopts::value<uint32_t>(m_ServerOptions.WatchdogConfig.CycleProcessingBudgetMs)->default_value("500"), + "<ms>"); + + Options.add_option("hub", + "", + "hub-watchdog-instance-check-throttle-ms", + "Delay between checking successive instances per watchdog cycle in milliseconds", + cxxopts::value<uint32_t>(m_ServerOptions.WatchdogConfig.InstanceCheckThrottleMs)->default_value("5"), + "<ms>"); + + Options.add_option("hub", + "", + "hub-watchdog-provisioned-inactivity-timeout-seconds", + "Seconds of inactivity after which a provisioned instance is deprovisioned", + cxxopts::value<uint32_t>(m_ServerOptions.WatchdogConfig.ProvisionedInactivityTimeoutSeconds)->default_value("600"), + "<seconds>"); + + Options.add_option("hub", + "", + "hub-watchdog-hibernated-inactivity-timeout-seconds", + "Seconds of inactivity after which a hibernated instance is deprovisioned", + cxxopts::value<uint32_t>(m_ServerOptions.WatchdogConfig.HibernatedInactivityTimeoutSeconds)->default_value("1800"), + "<seconds>"); + + Options.add_option("hub", + "", + "hub-watchdog-inactivity-check-margin-seconds", + "Margin in seconds subtracted from inactivity timeout before triggering an activity check", + cxxopts::value<uint32_t>(m_ServerOptions.WatchdogConfig.InactivityCheckMarginSeconds)->default_value("60"), + "<seconds>"); + + Options.add_option("hub", + "", + "hub-watchdog-activity-check-connect-timeout-ms", + "Connect timeout in milliseconds for instance activity check requests", + cxxopts::value<uint32_t>(m_ServerOptions.WatchdogConfig.ActivityCheckConnectTimeoutMs)->default_value("100"), + "<ms>"); + + Options.add_option("hub", + "", + "hub-watchdog-activity-check-request-timeout-ms", + "Request timeout in milliseconds for instance activity check requests", + cxxopts::value<uint32_t>(m_ServerOptions.WatchdogConfig.ActivityCheckRequestTimeoutMs)->default_value("200"), + "<ms>"); } void @@ -180,7 +251,8 @@ ZenHubServer::OnModuleStateChanged(std::string_view HubInstanceId, { return; } - if (NewState == HubInstanceState::Provisioned) + + if (NewState == HubInstanceState::Provisioning || NewState == HubInstanceState::Provisioned) { consul::ServiceRegistrationInfo ServiceInfo{ .ServiceId = std::string(ModuleId), @@ -190,8 +262,12 @@ ZenHubServer::OnModuleStateChanged(std::string_view HubInstanceId, .Tags = std::vector<std::pair<std::string, std::string>>{std::make_pair("module", std::string(ModuleId)), std::make_pair("zen-hub", std::string(HubInstanceId)), std::make_pair("version", std::string(ZEN_CFG_VERSION))}, - .HealthIntervalSeconds = 10, - .DeregisterAfterSeconds = 30}; + .HealthIntervalSeconds = NewState == HubInstanceState::Provisioning + ? 0u + : m_ConsulHealthIntervalSeconds, // Disable health checks while not finished provisioning + .DeregisterAfterSeconds = NewState == HubInstanceState::Provisioning + ? 0u + : m_ConsulDeregisterAfterSeconds}; // Disable health checks while not finished provisioning if (!m_ConsulClient->RegisterService(ServiceInfo)) { @@ -218,7 +294,7 @@ ZenHubServer::OnModuleStateChanged(std::string_view HubInstanceId, ZEN_INFO("Deregistered storage server instance for module '{}' at port {} from Consul", ModuleId, Info.Port); } } - // Transitional states (Provisioning, Deprovisioning, Hibernating, Waking, Recovering, Crashed) + // Transitional states (Deprovisioning, Hibernating, Waking, Recovering, Crashed) // and Hibernated are intentionally ignored. } @@ -300,21 +376,32 @@ ZenHubServer::InitializeState(const ZenHubServerConfig& ServerConfig) void ZenHubServer::InitializeServices(const ZenHubServerConfig& ServerConfig) { - ZEN_UNUSED(ServerConfig); - ZEN_INFO("instantiating Hub"); m_Hub = std::make_unique<Hub>( - Hub::Configuration{.UseJobObject = ServerConfig.HubUseJobObject, - .BasePortNumber = ServerConfig.HubBasePortNumber, - .InstanceLimit = ServerConfig.HubInstanceLimit, - .InstanceHttpThreadCount = ServerConfig.HubInstanceHttpThreadCount, - .InstanceCoreLimit = ServerConfig.HubInstanceCoreLimit, - .InstanceConfigPath = ServerConfig.HubInstanceConfigPath, - .HydrationTargetSpecification = ServerConfig.HydrationTargetSpecification}, + Hub::Configuration{ + .UseJobObject = ServerConfig.HubUseJobObject, + .BasePortNumber = ServerConfig.HubBasePortNumber, + .InstanceLimit = ServerConfig.HubInstanceLimit, + .InstanceHttpThreadCount = ServerConfig.HubInstanceHttpThreadCount, + .InstanceCoreLimit = ServerConfig.HubInstanceCoreLimit, + .InstanceConfigPath = ServerConfig.HubInstanceConfigPath, + .HydrationTargetSpecification = ServerConfig.HydrationTargetSpecification, + .WatchDog = + { + .CycleInterval = std::chrono::milliseconds(ServerConfig.WatchdogConfig.CycleIntervalMs), + .CycleProcessingBudget = std::chrono::milliseconds(ServerConfig.WatchdogConfig.CycleProcessingBudgetMs), + .InstanceCheckThrottle = std::chrono::milliseconds(ServerConfig.WatchdogConfig.InstanceCheckThrottleMs), + .ProvisionedInactivityTimeout = std::chrono::seconds(ServerConfig.WatchdogConfig.ProvisionedInactivityTimeoutSeconds), + .HibernatedInactivityTimeout = std::chrono::seconds(ServerConfig.WatchdogConfig.HibernatedInactivityTimeoutSeconds), + .InactivityCheckMargin = std::chrono::seconds(ServerConfig.WatchdogConfig.InactivityCheckMarginSeconds), + .ActivityCheckConnectTimeout = std::chrono::milliseconds(ServerConfig.WatchdogConfig.ActivityCheckConnectTimeoutMs), + .ActivityCheckRequestTimeout = std::chrono::milliseconds(ServerConfig.WatchdogConfig.ActivityCheckRequestTimeoutMs), + }}, ZenServerEnvironment(ZenServerEnvironment::Hub, ServerConfig.DataDir / "hub", ServerConfig.DataDir / "servers", ServerConfig.HubInstanceHttpClass), + &GetMediumWorkerPool(EWorkloadType::Background), m_ConsulClient ? Hub::AsyncModuleStateChangeCallbackFunc{[this, HubInstanceId = fmt::format("zen-hub-{}", ServerConfig.InstanceId)]( std::string_view ModuleId, const HubProvisionedInstanceInfo& Info, @@ -328,10 +415,10 @@ ZenHubServer::InitializeServices(const ZenHubServerConfig& ServerConfig) m_ApiService = std::make_unique<zen::HttpApiService>(*m_Http); ZEN_INFO("instantiating hub service"); - m_HubService = std::make_unique<HttpHubService>(*m_Hub); + m_HubService = std::make_unique<HttpHubService>(*m_Hub, m_StatsService, m_StatusService); m_HubService->SetNotificationEndpoint(ServerConfig.UpstreamNotificationEndpoint, ServerConfig.InstanceId); - m_FrontendService = std::make_unique<HttpFrontendService>(m_ContentRoot, m_StatusService); + m_FrontendService = std::make_unique<HttpFrontendService>(m_ContentRoot, m_StatsService, m_StatusService); } void @@ -383,7 +470,9 @@ ZenHubServer::InitializeConsulRegistration(const ZenHubServerConfig& ServerConfi try { - m_ConsulClient = std::make_unique<consul::ConsulClient>(ServerConfig.ConsulEndpoint, ConsulAccessToken); + m_ConsulClient = std::make_unique<consul::ConsulClient>(ServerConfig.ConsulEndpoint, ConsulAccessToken); + m_ConsulHealthIntervalSeconds = ServerConfig.ConsulHealthIntervalSeconds; + m_ConsulDeregisterAfterSeconds = ServerConfig.ConsulDeregisterAfterSeconds; consul::ServiceRegistrationInfo Info; Info.ServiceId = fmt::format("zen-hub-{}", ServerConfig.InstanceId); @@ -397,6 +486,8 @@ ZenHubServer::InitializeConsulRegistration(const ZenHubServerConfig& ServerConfi std::make_pair("base-port-number", fmt::format("{}", ServerConfig.HubBasePortNumber)), std::make_pair("instance-limit", fmt::format("{}", ServerConfig.HubInstanceLimit)), std::make_pair("use-job-object", fmt::format("{}", ServerConfig.HubUseJobObject))}; + Info.HealthIntervalSeconds = ServerConfig.ConsulHealthIntervalSeconds; + Info.DeregisterAfterSeconds = ServerConfig.ConsulDeregisterAfterSeconds; m_ConsulRegistration = std::make_unique<consul::ServiceRegistration>(m_ConsulClient.get(), Info); diff --git a/src/zenserver/hub/zenhubserver.h b/src/zenserver/hub/zenhubserver.h index 0fb192b9f..77df3eaa3 100644 --- a/src/zenserver/hub/zenhubserver.h +++ b/src/zenserver/hub/zenhubserver.h @@ -20,20 +20,35 @@ class HttpApiService; class HttpFrontendService; class HttpHubService; +struct ZenHubWatchdogConfig +{ + uint32_t CycleIntervalMs = 3000; + uint32_t CycleProcessingBudgetMs = 500; + uint32_t InstanceCheckThrottleMs = 5; + uint32_t ProvisionedInactivityTimeoutSeconds = 600; + uint32_t HibernatedInactivityTimeoutSeconds = 1800; + uint32_t InactivityCheckMarginSeconds = 60; // Activity check is triggered this far before the inactivity timeout + uint32_t ActivityCheckConnectTimeoutMs = 100; + uint32_t ActivityCheckRequestTimeoutMs = 200; +}; + struct ZenHubServerConfig : public ZenServerConfig { std::string UpstreamNotificationEndpoint; std::string InstanceId; // For use in notifications std::string ConsulEndpoint; // If set, enables Consul service registration std::string ConsulTokenEnv; // Environment variable name to read a Consul token from; defaults to CONSUL_HTTP_TOKEN if empty - uint16_t HubBasePortNumber = 21000; - int HubInstanceLimit = 1000; - bool HubUseJobObject = true; - std::string HubInstanceHttpClass = "asio"; - uint32_t HubInstanceHttpThreadCount = 0; // Automatic - int HubInstanceCoreLimit = 0; // Automatic - std::filesystem::path HubInstanceConfigPath; // Path to Lua config file - std::string HydrationTargetSpecification; // hydration/dehydration target specification + uint32_t ConsulHealthIntervalSeconds = 10; // Interval in seconds between Consul health checks + uint32_t ConsulDeregisterAfterSeconds = 30; // Seconds before Consul deregisters an unhealthy service + uint16_t HubBasePortNumber = 21000; + int HubInstanceLimit = 1000; + bool HubUseJobObject = true; + std::string HubInstanceHttpClass = "asio"; + uint32_t HubInstanceHttpThreadCount = 0; // Automatic + int HubInstanceCoreLimit = 0; // Automatic + std::filesystem::path HubInstanceConfigPath; // Path to Lua config file + std::string HydrationTargetSpecification; // hydration/dehydration target specification + ZenHubWatchdogConfig WatchdogConfig; }; class Hub; @@ -108,6 +123,8 @@ private: std::unique_ptr<consul::ConsulClient> m_ConsulClient; std::unique_ptr<consul::ServiceRegistration> m_ConsulRegistration; + uint32_t m_ConsulHealthIntervalSeconds = 10; + uint32_t m_ConsulDeregisterAfterSeconds = 30; void InitializeState(const ZenHubServerConfig& ServerConfig); void InitializeServices(const ZenHubServerConfig& ServerConfig); diff --git a/src/zenserver/proxy/httpproxystats.cpp b/src/zenserver/proxy/httpproxystats.cpp index 6aa3e5c9b..337be2417 100644 --- a/src/zenserver/proxy/httpproxystats.cpp +++ b/src/zenserver/proxy/httpproxystats.cpp @@ -140,6 +140,12 @@ HttpProxyStatsService::HandleRecordStatus(HttpServerRequest& Request) Request.WriteResponse(HttpResponseCode::OK, Cbo.Save()); } +void +HttpProxyStatsService::HandleStatsRequest(HttpServerRequest& Request) +{ + Request.WriteResponse(HttpResponseCode::OK, CollectStats()); +} + CbObject HttpProxyStatsService::CollectStats() { @@ -225,10 +231,4 @@ HttpProxyStatsService::CollectStats() return Cbo.Save(); } -void -HttpProxyStatsService::HandleStatsRequest(HttpServerRequest& Request) -{ - Request.WriteResponse(HttpResponseCode::OK, CollectStats()); -} - } // namespace zen diff --git a/src/zenserver/proxy/zenproxyserver.cpp b/src/zenserver/proxy/zenproxyserver.cpp index cf84c159a..7e59a7b7e 100644 --- a/src/zenserver/proxy/zenproxyserver.cpp +++ b/src/zenserver/proxy/zenproxyserver.cpp @@ -324,7 +324,7 @@ ZenProxyServer::Initialize(const ZenProxyServerConfig& ServerConfig, ZenServerSt m_ApiService = std::make_unique<HttpApiService>(*m_Http); m_Http->RegisterService(*m_ApiService); - m_FrontendService = std::make_unique<HttpFrontendService>(m_ContentRoot, m_StatusService); + m_FrontendService = std::make_unique<HttpFrontendService>(m_ContentRoot, m_StatsService, m_StatusService); m_Http->RegisterService(*m_FrontendService); std::string DefaultRecordDir = (m_DataRoot / "recordings").string(); diff --git a/src/zenserver/sessions/httpsessions.cpp b/src/zenserver/sessions/httpsessions.cpp index 429ba98cf..fdf2e1f21 100644 --- a/src/zenserver/sessions/httpsessions.cpp +++ b/src/zenserver/sessions/httpsessions.cpp @@ -49,6 +49,21 @@ HttpSessionsService::HandleRequest(HttpServerRequest& Request) } } +void +HttpSessionsService::HandleStatusRequest(HttpServerRequest& Request) +{ + ZEN_TRACE_CPU("HttpSessionsService::Status"); + CbObjectWriter Cbo; + Cbo << "ok" << true; + Request.WriteResponse(HttpResponseCode::OK, Cbo.Save()); +} + +void +HttpSessionsService::HandleStatsRequest(HttpServerRequest& HttpReq) +{ + HttpReq.WriteResponse(HttpResponseCode::OK, CollectStats()); +} + CbObject HttpSessionsService::CollectStats() { @@ -72,19 +87,10 @@ HttpSessionsService::CollectStats() return Cbo.Save(); } -void -HttpSessionsService::HandleStatsRequest(HttpServerRequest& HttpReq) +uint64_t +HttpSessionsService::GetActivityCounter() { - HttpReq.WriteResponse(HttpResponseCode::OK, CollectStats()); -} - -void -HttpSessionsService::HandleStatusRequest(HttpServerRequest& Request) -{ - ZEN_TRACE_CPU("HttpSessionsService::Status"); - CbObjectWriter Cbo; - Cbo << "ok" << true; - Request.WriteResponse(HttpResponseCode::OK, Cbo.Save()); + return m_HttpRequests.Count(); } void diff --git a/src/zenserver/sessions/httpsessions.h b/src/zenserver/sessions/httpsessions.h index a5783a46b..86a23f835 100644 --- a/src/zenserver/sessions/httpsessions.h +++ b/src/zenserver/sessions/httpsessions.h @@ -29,9 +29,10 @@ public: virtual const char* BaseUri() const override; virtual void HandleRequest(HttpServerRequest& Request) override; - virtual CbObject CollectStats() override; - virtual void HandleStatsRequest(HttpServerRequest& Request) override; virtual void HandleStatusRequest(HttpServerRequest& Request) override; + virtual void HandleStatsRequest(HttpServerRequest& Request) override; + virtual CbObject CollectStats() override; + virtual uint64_t GetActivityCounter() override; void SetSelfSessionId(const Oid& Id) { m_SelfSessionId = Id; } diff --git a/src/zenserver/storage/admin/admin.h b/src/zenserver/storage/admin/admin.h index ee3da4579..361153e42 100644 --- a/src/zenserver/storage/admin/admin.h +++ b/src/zenserver/storage/admin/admin.h @@ -13,7 +13,7 @@ class JobQueue; class ZenCacheStore; struct ZenServerConfig; -class HttpAdminService : public zen::HttpService +class HttpAdminService : public HttpService { public: struct LogPaths @@ -31,7 +31,7 @@ public: ~HttpAdminService(); virtual const char* BaseUri() const override; - virtual void HandleRequest(zen::HttpServerRequest& Request) override; + virtual void HandleRequest(HttpServerRequest& Request) override; private: HttpRequestRouter m_Router; diff --git a/src/zenserver/storage/buildstore/httpbuildstore.cpp b/src/zenserver/storage/buildstore/httpbuildstore.cpp index de9589078..bbbb0c37b 100644 --- a/src/zenserver/storage/buildstore/httpbuildstore.cpp +++ b/src/zenserver/storage/buildstore/httpbuildstore.cpp @@ -605,6 +605,26 @@ HttpBuildStoreService::BlobsExistsRequest(HttpRouterRequest& Req) return ServerRequest.WriteResponse(HttpResponseCode::OK, ResponseObject); } +void +HttpBuildStoreService::HandleStatusRequest(HttpServerRequest& Request) +{ + ZEN_TRACE_CPU("HttpBuildStoreService::Status"); + CbObjectWriter Cbo; + Cbo << "ok" << true; + Cbo.BeginObject("capabilities"); + { + Cbo << "maxrangecountperrequest" << MaxRangeCountPerRequestSupported; + } + Cbo.EndObject(); // capabilities + Request.WriteResponse(HttpResponseCode::OK, Cbo.Save()); +} + +void +HttpBuildStoreService::HandleStatsRequest(HttpServerRequest& Request) +{ + Request.WriteResponse(HttpResponseCode::OK, CollectStats()); +} + CbObject HttpBuildStoreService::CollectStats() { @@ -663,24 +683,10 @@ HttpBuildStoreService::CollectStats() return Cbo.Save(); } -void -HttpBuildStoreService::HandleStatsRequest(HttpServerRequest& Request) -{ - Request.WriteResponse(HttpResponseCode::OK, CollectStats()); -} - -void -HttpBuildStoreService::HandleStatusRequest(HttpServerRequest& Request) +uint64_t +HttpBuildStoreService::GetActivityCounter() { - ZEN_TRACE_CPU("HttpBuildStoreService::Status"); - CbObjectWriter Cbo; - Cbo << "ok" << true; - Cbo.BeginObject("capabilities"); - { - Cbo << "maxrangecountperrequest" << MaxRangeCountPerRequestSupported; - } - Cbo.EndObject(); // capabilities - Request.WriteResponse(HttpResponseCode::OK, Cbo.Save()); + return m_HttpRequests.Count(); } } // namespace zen diff --git a/src/zenserver/storage/buildstore/httpbuildstore.h b/src/zenserver/storage/buildstore/httpbuildstore.h index 2a09b71cf..864d12edc 100644 --- a/src/zenserver/storage/buildstore/httpbuildstore.h +++ b/src/zenserver/storage/buildstore/httpbuildstore.h @@ -13,18 +13,19 @@ namespace zen { class BuildStore; -class HttpBuildStoreService final : public zen::HttpService, public IHttpStatusProvider, public IHttpStatsProvider +class HttpBuildStoreService final : public HttpService, public IHttpStatusProvider, public IHttpStatsProvider { public: HttpBuildStoreService(HttpStatusService& StatusService, HttpStatsService& StatsService, BuildStore& Store); virtual ~HttpBuildStoreService(); virtual const char* BaseUri() const override; - virtual void HandleRequest(zen::HttpServerRequest& Request) override; + virtual void HandleRequest(HttpServerRequest& Request) override; - virtual CbObject CollectStats() override; - virtual void HandleStatsRequest(HttpServerRequest& Request) override; virtual void HandleStatusRequest(HttpServerRequest& Request) override; + virtual void HandleStatsRequest(HttpServerRequest& Request) override; + virtual CbObject CollectStats() override; + virtual uint64_t GetActivityCounter() override; private: struct BuildStoreStats diff --git a/src/zenserver/storage/cache/httpstructuredcache.cpp b/src/zenserver/storage/cache/httpstructuredcache.cpp index bbdb03ba4..c1727270c 100644 --- a/src/zenserver/storage/cache/httpstructuredcache.cpp +++ b/src/zenserver/storage/cache/httpstructuredcache.cpp @@ -1827,113 +1827,12 @@ HttpStructuredCacheService::HandleRpcRequest(HttpServerRequest& Request, std::st } } -CbObject -HttpStructuredCacheService::CollectStats() +void +HttpStructuredCacheService::HandleStatusRequest(HttpServerRequest& Request) { - ZEN_MEMSCOPE(GetCacheHttpTag()); - CbObjectWriter Cbo; - - EmitSnapshot("requests", m_HttpRequests, Cbo); - - const uint64_t HitCount = m_CacheStats.HitCount; - const uint64_t UpstreamHitCount = m_CacheStats.UpstreamHitCount; - const uint64_t MissCount = m_CacheStats.MissCount; - const uint64_t WriteCount = m_CacheStats.WriteCount; - const uint64_t BadRequestCount = m_CacheStats.BadRequestCount; - struct CidStoreStats StoreStats = m_CidStore.Stats(); - const uint64_t ChunkHitCount = StoreStats.HitCount; - const uint64_t ChunkMissCount = StoreStats.MissCount; - const uint64_t ChunkWriteCount = StoreStats.WriteCount; - const uint64_t TotalCount = HitCount + MissCount; - - const uint64_t RpcRequests = m_CacheStats.RpcRequests; - const uint64_t RpcRecordRequests = m_CacheStats.RpcRecordRequests; - const uint64_t RpcRecordBatchRequests = m_CacheStats.RpcRecordBatchRequests; - const uint64_t RpcValueRequests = m_CacheStats.RpcValueRequests; - const uint64_t RpcValueBatchRequests = m_CacheStats.RpcValueBatchRequests; - const uint64_t RpcChunkRequests = m_CacheStats.RpcChunkRequests; - const uint64_t RpcChunkBatchRequests = m_CacheStats.RpcChunkBatchRequests; - - const CidStoreSize CidSize = m_CidStore.TotalSize(); - const CacheStoreSize CacheSize = m_CacheStore.TotalSize(); - - Cbo.BeginObject("cache"); - { - Cbo << "badrequestcount" << BadRequestCount; - Cbo.BeginObject("rpc"); - Cbo << "count" << RpcRequests; - Cbo << "ops" << RpcRecordBatchRequests + RpcValueBatchRequests + RpcChunkBatchRequests; - Cbo.BeginObject("records"); - Cbo << "count" << RpcRecordRequests; - Cbo << "ops" << RpcRecordBatchRequests; - Cbo.EndObject(); - Cbo.BeginObject("values"); - Cbo << "count" << RpcValueRequests; - Cbo << "ops" << RpcValueBatchRequests; - Cbo.EndObject(); - Cbo.BeginObject("chunks"); - Cbo << "count" << RpcChunkRequests; - Cbo << "ops" << RpcChunkBatchRequests; - Cbo.EndObject(); - Cbo.EndObject(); - - Cbo.BeginObject("size"); - { - Cbo << "disk" << CacheSize.DiskSize; - Cbo << "memory" << CacheSize.MemorySize; - } - Cbo.EndObject(); - - Cbo << "hits" << HitCount << "misses" << MissCount << "writes" << WriteCount; - Cbo << "hit_ratio" << (TotalCount > 0 ? (double(HitCount) / double(TotalCount)) : 0.0); - - if (m_UpstreamCache.IsActive()) - { - Cbo << "upstream_ratio" << (HitCount > 0 ? (double(UpstreamHitCount) / double(HitCount)) : 0.0); - Cbo << "upstream_hits" << m_CacheStats.UpstreamHitCount; - } - - Cbo << "cidhits" << ChunkHitCount << "cidmisses" << ChunkMissCount << "cidwrites" << ChunkWriteCount; - - { - ZenCacheStore::CacheStoreStats StoreStatsData = m_CacheStore.Stats(); - Cbo.BeginObject("store"); - Cbo << "hits" << StoreStatsData.HitCount << "misses" << StoreStatsData.MissCount << "writes" << StoreStatsData.WriteCount - << "rejected_writes" << StoreStatsData.RejectedWriteCount << "rejected_reads" << StoreStatsData.RejectedReadCount; - const uint64_t StoreTotal = StoreStatsData.HitCount + StoreStatsData.MissCount; - Cbo << "hit_ratio" << (StoreTotal > 0 ? (double(StoreStatsData.HitCount) / double(StoreTotal)) : 0.0); - EmitSnapshot("read", StoreStatsData.GetOps, Cbo); - EmitSnapshot("write", StoreStatsData.PutOps, Cbo); - Cbo.EndObject(); - } - } - Cbo.EndObject(); - - if (m_UpstreamCache.IsActive()) - { - EmitSnapshot("upstream_gets", m_UpstreamGetRequestTiming, Cbo); - Cbo.BeginObject("upstream"); - { - m_UpstreamCache.GetStatus(Cbo); - } - Cbo.EndObject(); - } - - Cbo.BeginObject("cid"); - { - Cbo.BeginObject("size"); - { - Cbo << "tiny" << CidSize.TinySize; - Cbo << "small" << CidSize.SmallSize; - Cbo << "large" << CidSize.LargeSize; - Cbo << "total" << CidSize.TotalSize; - } - Cbo.EndObject(); - } - Cbo.EndObject(); - - return Cbo.Save(); + Cbo << "ok" << true; + Request.WriteResponse(HttpResponseCode::OK, Cbo.Save()); } void @@ -1944,12 +1843,6 @@ HttpStructuredCacheService::HandleStatsRequest(HttpServerRequest& Request) bool ShowCidStoreStats = Request.GetQueryParams().GetValue("cidstorestats") == "true"; bool ShowCacheStoreStats = Request.GetQueryParams().GetValue("cachestorestats") == "true"; - if (!ShowCidStoreStats && !ShowCacheStoreStats) - { - Request.WriteResponse(HttpResponseCode::OK, CollectStats()); - return; - } - // Full stats with optional detailed store/cid breakdowns CbObjectWriter Cbo; @@ -2156,12 +2049,38 @@ HttpStructuredCacheService::HandleStatsRequest(HttpServerRequest& Request) Request.WriteResponse(HttpResponseCode::OK, Cbo.Save()); } -void -HttpStructuredCacheService::HandleStatusRequest(HttpServerRequest& Request) +CbObject +HttpStructuredCacheService::CollectStats() { + ZEN_TRACE_CPU("HttpStructuredCacheService::Stats"); + ZEN_MEMSCOPE(GetCacheHttpTag()); + CbObjectWriter Cbo; - Cbo << "ok" << true; - Request.WriteResponse(HttpResponseCode::OK, Cbo.Save()); + + EmitSnapshot("requests", m_HttpRequests, Cbo); + + const CacheStoreSize CacheSize = m_CacheStore.TotalSize(); + + Cbo.BeginObject("cache"); + { + Cbo.BeginObject("size"); + { + Cbo << "disk" << CacheSize.DiskSize; + Cbo << "memory" << CacheSize.MemorySize; + } + Cbo.EndObject(); + + Cbo << "hits" << m_CacheStats.HitCount << "misses" << m_CacheStats.MissCount; + } + Cbo.EndObject(); + + return Cbo.Save(); +} + +uint64_t +HttpStructuredCacheService::GetActivityCounter() +{ + return m_HttpRequests.Count(); } bool diff --git a/src/zenserver/storage/cache/httpstructuredcache.h b/src/zenserver/storage/cache/httpstructuredcache.h index d462415d4..fc80b449e 100644 --- a/src/zenserver/storage/cache/httpstructuredcache.h +++ b/src/zenserver/storage/cache/httpstructuredcache.h @@ -105,9 +105,10 @@ private: void HandleCacheRequest(HttpServerRequest& Request); void HandleCacheNamespaceRequest(HttpServerRequest& Request, std::string_view Namespace); void HandleCacheBucketRequest(HttpServerRequest& Request, std::string_view Namespace, std::string_view Bucket); - virtual CbObject CollectStats() override; - virtual void HandleStatsRequest(HttpServerRequest& Request) override; virtual void HandleStatusRequest(HttpServerRequest& Request) override; + virtual void HandleStatsRequest(HttpServerRequest& Request) override; + virtual CbObject CollectStats() override; + virtual uint64_t GetActivityCounter() override; bool AreDiskWritesAllowed() const; diff --git a/src/zenserver/storage/objectstore/objectstore.cpp b/src/zenserver/storage/objectstore/objectstore.cpp index 493326a32..d6516fa1a 100644 --- a/src/zenserver/storage/objectstore/objectstore.cpp +++ b/src/zenserver/storage/objectstore/objectstore.cpp @@ -14,6 +14,7 @@ #include "zencore/compactbinarybuilder.h" #include "zenhttp/httpcommon.h" #include "zenhttp/httpserver.h" +#include "zenhttp/httpstats.h" #include <filesystem> #include <thread> @@ -220,17 +221,20 @@ private: StringBuilderBase& Builder; }; -HttpObjectStoreService::HttpObjectStoreService(HttpStatusService& StatusService, ObjectStoreConfig Cfg) -: m_StatusService(StatusService) +HttpObjectStoreService::HttpObjectStoreService(HttpStatsService& StatsService, HttpStatusService& StatusService, ObjectStoreConfig Cfg) +: m_StatsService(StatsService) +, m_StatusService(StatusService) , m_Cfg(std::move(Cfg)) { - Inititalize(); + Initialize(); + m_StatsService.RegisterHandler("obj", *this); m_StatusService.RegisterHandler("obj", *this); } HttpObjectStoreService::~HttpObjectStoreService() { m_StatusService.UnregisterHandler("obj", *this); + m_StatsService.UnregisterHandler("obj", *this); } const char* @@ -240,8 +244,10 @@ HttpObjectStoreService::BaseUri() const } void -HttpObjectStoreService::HandleRequest(zen::HttpServerRequest& Request) +HttpObjectStoreService::HandleRequest(HttpServerRequest& Request) { + metrics::OperationTiming::Scope $(m_HttpRequests); + if (m_Router.HandleRequest(Request) == false) { ZEN_LOG_WARN(LogObj, "No route found for {0}", Request.RelativeUri()); @@ -258,12 +264,36 @@ HttpObjectStoreService::HandleStatusRequest(HttpServerRequest& Request) } void -HttpObjectStoreService::Inititalize() +HttpObjectStoreService::HandleStatsRequest(HttpServerRequest& Request) +{ + Request.WriteResponse(HttpResponseCode::OK, CollectStats()); +} + +CbObject +HttpObjectStoreService::CollectStats() +{ + ZEN_TRACE_CPU("HttpObjectStoreService::Stats"); + CbObjectWriter Cbo; + + EmitSnapshot("requests", m_HttpRequests, Cbo); + Cbo << "total_bytes_served" << m_TotalBytesServed.load(); + + return Cbo.Save(); +} + +uint64_t +HttpObjectStoreService::GetActivityCounter() +{ + return m_HttpRequests.Count(); +} + +void +HttpObjectStoreService::Initialize() { - ZEN_TRACE_CPU("HttpObjectStoreService::Inititalize"); + ZEN_TRACE_CPU("HttpObjectStoreService::Initialize"); namespace fs = std::filesystem; - ZEN_LOG_INFO(LogObj, "Initialzing Object Store in '{}'", m_Cfg.RootDirectory); + ZEN_LOG_INFO(LogObj, "Initializing Object Store in '{}'", m_Cfg.RootDirectory); const fs::path BucketsPath = m_Cfg.RootDirectory / "buckets"; if (!IsDir(BucketsPath)) @@ -281,27 +311,27 @@ HttpObjectStoreService::Inititalize() m_Router.RegisterRoute( "", - [this](zen::HttpRouterRequest& Request) { ListBuckets(Request); }, + [this](HttpRouterRequest& Request) { ListBuckets(Request); }, HttpVerb::kGet); m_Router.RegisterRoute( "bucket", - [this](zen::HttpRouterRequest& Request) { ListBuckets(Request); }, + [this](HttpRouterRequest& Request) { ListBuckets(Request); }, HttpVerb::kGet); m_Router.RegisterRoute( "bucket", - [this](zen::HttpRouterRequest& Request) { CreateBucket(Request); }, + [this](HttpRouterRequest& Request) { CreateBucket(Request); }, HttpVerb::kPost | HttpVerb::kPut); m_Router.RegisterRoute( "bucket", - [this](zen::HttpRouterRequest& Request) { DeleteBucket(Request); }, + [this](HttpRouterRequest& Request) { DeleteBucket(Request); }, HttpVerb::kDelete); m_Router.RegisterRoute( "bucket/{path}", - [this](zen::HttpRouterRequest& Request) { + [this](HttpRouterRequest& Request) { const std::string_view Path = Request.GetCapture(1); const auto Sep = Path.find_last_of('.'); const bool IsObject = Sep != std::string_view::npos && Path.size() - Sep > 0; @@ -319,7 +349,7 @@ HttpObjectStoreService::Inititalize() m_Router.RegisterRoute( "bucket/{bucket}/{path}", - [this](zen::HttpRouterRequest& Request) { PutObject(Request); }, + [this](HttpRouterRequest& Request) { PutObject(Request); }, HttpVerb::kPost | HttpVerb::kPut); } @@ -327,7 +357,7 @@ std::filesystem::path HttpObjectStoreService::GetBucketDirectory(std::string_view BucketName) { { - std::lock_guard _(BucketsMutex); + std::lock_guard _(m_BucketsMutex); if (const auto It = std::find_if(std::begin(m_Cfg.Buckets), std::end(m_Cfg.Buckets), @@ -342,7 +372,7 @@ HttpObjectStoreService::GetBucketDirectory(std::string_view BucketName) } void -HttpObjectStoreService::ListBuckets(zen::HttpRouterRequest& Request) +HttpObjectStoreService::ListBuckets(HttpRouterRequest& Request) { namespace fs = std::filesystem; @@ -351,7 +381,7 @@ HttpObjectStoreService::ListBuckets(zen::HttpRouterRequest& Request) CbObjectWriter Response; Response.BeginArray("buckets"); { - std::lock_guard _(BucketsMutex); + std::lock_guard _(m_BucketsMutex); // Configured buckets for (const ObjectStoreConfig::BucketConfig& Bucket : m_Cfg.Buckets) @@ -428,13 +458,13 @@ HttpObjectStoreService::ListBuckets(zen::HttpRouterRequest& Request) } Response.EndArray(); - Response << "total_bytes_served" << TotalBytesServed.load(); + Response << "total_bytes_served" << m_TotalBytesServed.load(); return Request.ServerRequest().WriteResponse(HttpResponseCode::OK, Response.Save()); } void -HttpObjectStoreService::CreateBucket(zen::HttpRouterRequest& Request) +HttpObjectStoreService::CreateBucket(HttpRouterRequest& Request) { namespace fs = std::filesystem; @@ -448,7 +478,7 @@ HttpObjectStoreService::CreateBucket(zen::HttpRouterRequest& Request) const fs::path BucketPath = m_Cfg.RootDirectory / "buckets" / BucketName; { - std::lock_guard _(BucketsMutex); + std::lock_guard _(m_BucketsMutex); if (!IsDir(BucketPath)) { CreateDirectories(BucketPath); @@ -462,7 +492,7 @@ HttpObjectStoreService::CreateBucket(zen::HttpRouterRequest& Request) } void -HttpObjectStoreService::ListBucket(zen::HttpRouterRequest& Request, const std::string_view Path) +HttpObjectStoreService::ListBucket(HttpRouterRequest& Request, const std::string_view Path) { namespace fs = std::filesystem; @@ -533,7 +563,7 @@ HttpObjectStoreService::ListBucket(zen::HttpRouterRequest& Request, const std::s if (IsDir(FullPath)) { - std::lock_guard _(BucketsMutex); + std::lock_guard _(m_BucketsMutex); Traversal.TraverseFileSystem(FullPath, FileVisitor); } CbObject Result = FileVisitor.GetResult(); @@ -552,7 +582,7 @@ HttpObjectStoreService::ListBucket(zen::HttpRouterRequest& Request, const std::s } void -HttpObjectStoreService::DeleteBucket(zen::HttpRouterRequest& Request) +HttpObjectStoreService::DeleteBucket(HttpRouterRequest& Request) { namespace fs = std::filesystem; @@ -566,7 +596,7 @@ HttpObjectStoreService::DeleteBucket(zen::HttpRouterRequest& Request) const fs::path BucketPath = m_Cfg.RootDirectory / "buckets" / BucketName; { - std::lock_guard _(BucketsMutex); + std::lock_guard _(m_BucketsMutex); DeleteDirectories(BucketPath); } @@ -575,7 +605,7 @@ HttpObjectStoreService::DeleteBucket(zen::HttpRouterRequest& Request) } void -HttpObjectStoreService::GetObject(zen::HttpRouterRequest& Request, const std::string_view Path) +HttpObjectStoreService::GetObject(HttpRouterRequest& Request, const std::string_view Path) { namespace fs = std::filesystem; @@ -606,7 +636,7 @@ HttpObjectStoreService::GetObject(zen::HttpRouterRequest& Request, const std::st return Request.ServerRequest().WriteResponse(HttpResponseCode::NotFound); } - zen::HttpRanges Ranges; + HttpRanges Ranges; if (Request.ServerRequest().TryGetRanges(Ranges); Ranges.size() > 1) { // Only a single range is supported @@ -615,7 +645,7 @@ HttpObjectStoreService::GetObject(zen::HttpRouterRequest& Request, const std::st FileContents File; { - std::lock_guard _(BucketsMutex); + std::lock_guard _(m_BucketsMutex); File = ReadFile(FilePath); } @@ -635,7 +665,7 @@ HttpObjectStoreService::GetObject(zen::HttpRouterRequest& Request, const std::st if (Ranges.empty()) { - const uint64_t TotalServed = TotalBytesServed.fetch_add(FileBuf.Size()) + FileBuf.Size(); + const uint64_t TotalServed = m_TotalBytesServed.fetch_add(FileBuf.Size()) + FileBuf.Size(); ZEN_LOG_DEBUG(LogObj, "GET - '{}/{}' ({}) [OK] (Served: {})", @@ -650,7 +680,7 @@ HttpObjectStoreService::GetObject(zen::HttpRouterRequest& Request, const std::st { const auto Range = Ranges[0]; const uint64_t RangeSize = 1 + (Range.End - Range.Start); - const uint64_t TotalServed = TotalBytesServed.fetch_add(RangeSize) + RangeSize; + const uint64_t TotalServed = m_TotalBytesServed.fetch_add(RangeSize) + RangeSize; ZEN_LOG_DEBUG(LogObj, "GET - '{}/{}' (Range: {}-{}) ({}/{}) [OK] (Served: {})", @@ -674,7 +704,7 @@ HttpObjectStoreService::GetObject(zen::HttpRouterRequest& Request, const std::st } void -HttpObjectStoreService::PutObject(zen::HttpRouterRequest& Request) +HttpObjectStoreService::PutObject(HttpRouterRequest& Request) { namespace fs = std::filesystem; @@ -699,7 +729,7 @@ HttpObjectStoreService::PutObject(zen::HttpRouterRequest& Request) const fs::path FileDirectory = FilePath.parent_path(); { - std::lock_guard _(BucketsMutex); + std::lock_guard _(m_BucketsMutex); if (!IsDir(FileDirectory)) { diff --git a/src/zenserver/storage/objectstore/objectstore.h b/src/zenserver/storage/objectstore/objectstore.h index cc47b50c4..f51254357 100644 --- a/src/zenserver/storage/objectstore/objectstore.h +++ b/src/zenserver/storage/objectstore/objectstore.h @@ -11,6 +11,7 @@ namespace zen { class HttpRouterRequest; +class HttpStatsService; struct ObjectStoreConfig { @@ -24,31 +25,36 @@ struct ObjectStoreConfig std::vector<BucketConfig> Buckets; }; -class HttpObjectStoreService final : public zen::HttpService, public IHttpStatusProvider +class HttpObjectStoreService final : public HttpService, public IHttpStatusProvider, public IHttpStatsProvider { public: - HttpObjectStoreService(HttpStatusService& StatusService, ObjectStoreConfig Cfg); + HttpObjectStoreService(HttpStatsService& StatsService, HttpStatusService& StatusService, ObjectStoreConfig Cfg); virtual ~HttpObjectStoreService(); virtual const char* BaseUri() const override; - virtual void HandleRequest(zen::HttpServerRequest& Request) override; + virtual void HandleRequest(HttpServerRequest& Request) override; virtual void HandleStatusRequest(HttpServerRequest& Request) override; + virtual void HandleStatsRequest(HttpServerRequest& Request) override; + virtual CbObject CollectStats() override; + virtual uint64_t GetActivityCounter() override; private: - void Inititalize(); + void Initialize(); std::filesystem::path GetBucketDirectory(std::string_view BucketName); - void ListBuckets(zen::HttpRouterRequest& Request); - void CreateBucket(zen::HttpRouterRequest& Request); - void ListBucket(zen::HttpRouterRequest& Request, const std::string_view Path); - void DeleteBucket(zen::HttpRouterRequest& Request); - void GetObject(zen::HttpRouterRequest& Request, const std::string_view Path); - void PutObject(zen::HttpRouterRequest& Request); - - HttpStatusService& m_StatusService; - ObjectStoreConfig m_Cfg; - std::mutex BucketsMutex; - HttpRequestRouter m_Router; - std::atomic_uint64_t TotalBytesServed{0}; + void ListBuckets(HttpRouterRequest& Request); + void CreateBucket(HttpRouterRequest& Request); + void ListBucket(HttpRouterRequest& Request, const std::string_view Path); + void DeleteBucket(HttpRouterRequest& Request); + void GetObject(HttpRouterRequest& Request, const std::string_view Path); + void PutObject(HttpRouterRequest& Request); + + HttpStatsService& m_StatsService; + HttpStatusService& m_StatusService; + ObjectStoreConfig m_Cfg; + std::mutex m_BucketsMutex; + HttpRequestRouter m_Router; + std::atomic_uint64_t m_TotalBytesServed{0}; + metrics::OperationTiming m_HttpRequests; }; } // namespace zen diff --git a/src/zenserver/storage/projectstore/httpprojectstore.cpp b/src/zenserver/storage/projectstore/httpprojectstore.cpp index 03b8aa382..a7c8c66b6 100644 --- a/src/zenserver/storage/projectstore/httpprojectstore.cpp +++ b/src/zenserver/storage/projectstore/httpprojectstore.cpp @@ -836,8 +836,17 @@ HttpProjectService::HandleRequest(HttpServerRequest& Request) } } -CbObject -HttpProjectService::CollectStats() +void +HttpProjectService::HandleStatusRequest(HttpServerRequest& Request) +{ + ZEN_TRACE_CPU("HttpProjectService::Status"); + CbObjectWriter Cbo; + Cbo << "ok" << true; + Request.WriteResponse(HttpResponseCode::OK, Cbo.Save()); +} + +void +HttpProjectService::HandleStatsRequest(HttpServerRequest& HttpReq) { ZEN_TRACE_CPU("ProjectService::Stats"); @@ -848,6 +857,8 @@ HttpProjectService::CollectStats() EmitSnapshot("requests", m_HttpRequests, Cbo); + Cbo << "project_count" << (uint64_t)m_ProjectStore->ProjectCount(); + Cbo.BeginObject("store"); { Cbo.BeginObject("size"); @@ -903,22 +914,25 @@ HttpProjectService::CollectStats() } Cbo.EndObject(); - return Cbo.Save(); + HttpReq.WriteResponse(HttpResponseCode::OK, Cbo.Save()); } -void -HttpProjectService::HandleStatsRequest(HttpServerRequest& HttpReq) +CbObject +HttpProjectService::CollectStats() { - HttpReq.WriteResponse(HttpResponseCode::OK, CollectStats()); + CbObjectWriter Cbo; + // CollectStats does not use the HandleStatsRequest implementation to get stats since it uses some heavy operations such as + // m_ProjectStore->StorageSize(); + EmitSnapshot("requests", m_HttpRequests, Cbo); + Cbo << "project_count" << (uint64_t)m_ProjectStore->ProjectCount(); + + return Cbo.Save(); } -void -HttpProjectService::HandleStatusRequest(HttpServerRequest& Request) +uint64_t +HttpProjectService::GetActivityCounter() { - ZEN_TRACE_CPU("HttpProjectService::Status"); - CbObjectWriter Cbo; - Cbo << "ok" << true; - Request.WriteResponse(HttpResponseCode::OK, Cbo.Save()); + return m_HttpRequests.Count(); } void diff --git a/src/zenserver/storage/projectstore/httpprojectstore.h b/src/zenserver/storage/projectstore/httpprojectstore.h index 917337324..e3ed02f26 100644 --- a/src/zenserver/storage/projectstore/httpprojectstore.h +++ b/src/zenserver/storage/projectstore/httpprojectstore.h @@ -53,9 +53,10 @@ public: virtual const char* BaseUri() const override; virtual void HandleRequest(HttpServerRequest& Request) override; - virtual CbObject CollectStats() override; - virtual void HandleStatsRequest(HttpServerRequest& Request) override; virtual void HandleStatusRequest(HttpServerRequest& Request) override; + virtual void HandleStatsRequest(HttpServerRequest& Request) override; + virtual CbObject CollectStats() override; + virtual uint64_t GetActivityCounter() override; private: struct ProjectStats diff --git a/src/zenserver/storage/upstream/upstreamservice.h b/src/zenserver/storage/upstream/upstreamservice.h index f1da03c8c..c0063c055 100644 --- a/src/zenserver/storage/upstream/upstreamservice.h +++ b/src/zenserver/storage/upstream/upstreamservice.h @@ -9,14 +9,14 @@ namespace zen { class AuthMgr; class UpstreamCache; -class HttpUpstreamService final : public zen::HttpService +class HttpUpstreamService final : public HttpService { public: HttpUpstreamService(UpstreamCache& Upstream, AuthMgr& Mgr); virtual ~HttpUpstreamService(); virtual const char* BaseUri() const override; - virtual void HandleRequest(zen::HttpServerRequest& Request) override; + virtual void HandleRequest(HttpServerRequest& Request) override; private: UpstreamCache& m_Upstream; diff --git a/src/zenserver/storage/workspaces/httpworkspaces.cpp b/src/zenserver/storage/workspaces/httpworkspaces.cpp index 785dd62f0..12e7bae73 100644 --- a/src/zenserver/storage/workspaces/httpworkspaces.cpp +++ b/src/zenserver/storage/workspaces/httpworkspaces.cpp @@ -110,10 +110,18 @@ HttpWorkspacesService::HandleRequest(HttpServerRequest& Request) } } -CbObject -HttpWorkspacesService::CollectStats() +void +HttpWorkspacesService::HandleStatusRequest(HttpServerRequest& Request) +{ + ZEN_TRACE_CPU("HttpWorkspacesService::Status"); + CbObjectWriter Cbo; + Cbo << "ok" << true; + Request.WriteResponse(HttpResponseCode::OK, Cbo.Save()); +} + +void +HttpWorkspacesService::HandleStatsRequest(HttpServerRequest& HttpReq) { - ZEN_TRACE_CPU("WorkspacesService::Stats"); CbObjectWriter Cbo; EmitSnapshot("requests", m_HttpRequests, Cbo); @@ -150,22 +158,26 @@ HttpWorkspacesService::CollectStats() } Cbo.EndObject(); - return Cbo.Save(); + HttpReq.WriteResponse(HttpResponseCode::OK, Cbo.Save()); } -void -HttpWorkspacesService::HandleStatsRequest(HttpServerRequest& HttpReq) +CbObject +HttpWorkspacesService::CollectStats() { - HttpReq.WriteResponse(HttpResponseCode::OK, CollectStats()); + ZEN_TRACE_CPU("HttpWorkspacesService::Stats"); + CbObjectWriter Cbo; + + EmitSnapshot("requests", m_HttpRequests, Cbo); + + Cbo << "workspaces" << m_Workspaces.GetWorkspaces().size(); + + return Cbo.Save(); } -void -HttpWorkspacesService::HandleStatusRequest(HttpServerRequest& Request) +uint64_t +HttpWorkspacesService::GetActivityCounter() { - ZEN_TRACE_CPU("HttpWorkspacesService::Status"); - CbObjectWriter Cbo; - Cbo << "ok" << true; - Request.WriteResponse(HttpResponseCode::OK, Cbo.Save()); + return m_HttpRequests.Count(); } void diff --git a/src/zenserver/storage/workspaces/httpworkspaces.h b/src/zenserver/storage/workspaces/httpworkspaces.h index 7c5ddeff1..4af1316f8 100644 --- a/src/zenserver/storage/workspaces/httpworkspaces.h +++ b/src/zenserver/storage/workspaces/httpworkspaces.h @@ -29,9 +29,10 @@ public: virtual const char* BaseUri() const override; virtual void HandleRequest(HttpServerRequest& Request) override; - virtual CbObject CollectStats() override; - virtual void HandleStatsRequest(HttpServerRequest& Request) override; virtual void HandleStatusRequest(HttpServerRequest& Request) override; + virtual void HandleStatsRequest(HttpServerRequest& Request) override; + virtual CbObject CollectStats() override; + virtual uint64_t GetActivityCounter() override; private: struct WorkspacesStats diff --git a/src/zenserver/storage/zenstorageserver.cpp b/src/zenserver/storage/zenstorageserver.cpp index de00eb1c2..bc0a8f4ac 100644 --- a/src/zenserver/storage/zenstorageserver.cpp +++ b/src/zenserver/storage/zenstorageserver.cpp @@ -170,7 +170,7 @@ ZenStorageServer::RegisterServices() m_Http->RegisterService(*m_HttpSessionsService); } - m_FrontendService = std::make_unique<HttpFrontendService>(m_ContentRoot, m_StatusService); + m_FrontendService = std::make_unique<HttpFrontendService>(m_ContentRoot, m_StatsService, m_StatusService); if (m_FrontendService) { @@ -307,7 +307,7 @@ ZenStorageServer::InitializeServices(const ZenStorageServerConfig& ServerOptions ObjCfg.Buckets.push_back(std::move(NewBucket)); } - m_ObjStoreService = std::make_unique<HttpObjectStoreService>(m_StatusService, std::move(ObjCfg)); + m_ObjStoreService = std::make_unique<HttpObjectStoreService>(m_StatsService, m_StatusService, std::move(ObjCfg)); } if (ServerOptions.BuildStoreConfig.Enabled) diff --git a/src/zenstore/include/zenstore/projectstore.h b/src/zenstore/include/zenstore/projectstore.h index 6f49cd024..100a82907 100644 --- a/src/zenstore/include/zenstore/projectstore.h +++ b/src/zenstore/include/zenstore/projectstore.h @@ -456,6 +456,7 @@ public: bool DeleteProject(std::string_view ProjectId); bool Exists(std::string_view ProjectId); void Flush(); + size_t ProjectCount() const; void DiscoverProjects(); void IterateProjects(std::function<void(Project& Prj)>&& Fn); diff --git a/src/zenstore/projectstore.cpp b/src/zenstore/projectstore.cpp index 56d0f7d2b..13674da4d 100644 --- a/src/zenstore/projectstore.cpp +++ b/src/zenstore/projectstore.cpp @@ -4406,6 +4406,13 @@ ProjectStore::DiscoverProjects() } } +size_t +ProjectStore::ProjectCount() const +{ + RwLock::SharedLockScope _(m_ProjectsLock); + return m_Projects.size(); +} + void ProjectStore::IterateProjects(std::function<void(Project& Prj)>&& Fn) { diff --git a/src/zentelemetry/include/zentelemetry/hyperloglog.h b/src/zentelemetry/include/zentelemetry/hyperloglog.h index 2daf75a43..502e2aee5 100644 --- a/src/zentelemetry/include/zentelemetry/hyperloglog.h +++ b/src/zentelemetry/include/zentelemetry/hyperloglog.h @@ -9,6 +9,7 @@ #include <array> #include <atomic> +#include <cmath> #include <cstdint> #include <string_view> diff --git a/src/zenutil/consul/consul.cpp b/src/zenutil/consul/consul.cpp index d463c0938..c9144e589 100644 --- a/src/zenutil/consul/consul.cpp +++ b/src/zenutil/consul/consul.cpp @@ -167,6 +167,8 @@ ConsulClient::RegisterService(const ServiceRegistrationInfo& Info) ApplyCommonHeaders(AdditionalHeaders); AdditionalHeaders.Entries.emplace(HttpClient::Accept(HttpContentType::kJSON)); + HttpClient::KeyValueMap AdditionalParameters(std::make_pair<std::string, std::string>("replace-existing-checks", "true")); + CbObjectWriter Writer; { Writer.AddString("ID"sv, Info.ServiceId); @@ -185,13 +187,21 @@ ConsulClient::RegisterService(const ServiceRegistrationInfo& Info) } Writer.EndArray(); // Tags } - Writer.BeginObject("Check"sv); + if (Info.HealthIntervalSeconds != 0) { - Writer.AddString("HTTP"sv, fmt::format("http://{}:{}/{}", Info.Address, Info.Port, Info.HealthEndpoint)); - Writer.AddString("Interval"sv, fmt::format("{}s", Info.HealthIntervalSeconds)); - Writer.AddString("DeregisterCriticalServiceAfter"sv, fmt::format("{}s", Info.DeregisterAfterSeconds)); + // Consul requires Interval whenever HTTP is specified; omit the Check block entirely + // when no interval is configured (e.g. during Provisioning). + Writer.BeginObject("Check"sv); + { + Writer.AddString("HTTP"sv, fmt::format("http://{}:{}/{}", Info.Address, Info.Port, Info.HealthEndpoint)); + Writer.AddString("Interval"sv, fmt::format("{}s", Info.HealthIntervalSeconds)); + if (Info.DeregisterAfterSeconds != 0) + { + Writer.AddString("DeregisterCriticalServiceAfter"sv, fmt::format("{}s", Info.DeregisterAfterSeconds)); + } + } + Writer.EndObject(); // Check } - Writer.EndObject(); // Check } ExtendableStringBuilder<512> SB; @@ -199,7 +209,7 @@ ConsulClient::RegisterService(const ServiceRegistrationInfo& Info) IoBuffer PayloadBuffer(IoBuffer::Wrap, SB.Data(), SB.Size()); PayloadBuffer.SetContentType(HttpContentType::kJSON); - HttpClient::Response Result = m_HttpClient.Put("v1/agent/service/register", PayloadBuffer, AdditionalHeaders); + HttpClient::Response Result = m_HttpClient.Put("v1/agent/service/register", PayloadBuffer, AdditionalHeaders, AdditionalParameters); if (!Result) { @@ -321,6 +331,20 @@ ConsulClient::GetAgentServicesJson() return Result.ToText(); } +std::string +ConsulClient::GetAgentChecksJson() +{ + HttpClient::KeyValueMap AdditionalHeaders; + ApplyCommonHeaders(AdditionalHeaders); + + HttpClient::Response Result = m_HttpClient.Get("v1/agent/checks", AdditionalHeaders); + if (!Result) + { + return "{}"; + } + return Result.ToText(); +} + ////////////////////////////////////////////////////////////////////////// ServiceRegistration::ServiceRegistration(ConsulClient* Client, const ServiceRegistrationInfo& Info) : m_Client(Client), m_Info(Info) diff --git a/src/zenutil/include/zenutil/consul.h b/src/zenutil/include/zenutil/consul.h index 7bf2ce437..4002d5d23 100644 --- a/src/zenutil/include/zenutil/consul.h +++ b/src/zenutil/include/zenutil/consul.h @@ -21,8 +21,8 @@ struct ServiceRegistrationInfo uint16_t Port = 0; std::string HealthEndpoint; std::vector<std::pair<std::string, std::string>> Tags; - int HealthIntervalSeconds = 10; - int DeregisterAfterSeconds = 30; + uint32_t HealthIntervalSeconds = 10; + uint32_t DeregisterAfterSeconds = 30; }; class ConsulClient @@ -44,6 +44,7 @@ public: // Query methods for testing bool HasService(std::string_view ServiceId); std::string GetAgentServicesJson(); + std::string GetAgentChecksJson(); // Blocking query on v1/agent/services. Blocks until the service list changes or // the wait period expires. InOutIndex must be 0 for the first call; it is updated diff --git a/src/zenutil/xmake.lua b/src/zenutil/xmake.lua index 1e19f7b2f..83a6b7f93 100644 --- a/src/zenutil/xmake.lua +++ b/src/zenutil/xmake.lua @@ -11,6 +11,10 @@ target('zenutil') add_deps("robin-map") add_packages("json11") + if is_plat("linux", "macosx") then + add_packages("openssl3") + end + if is_plat("linux") then add_includedirs("$(projectdir)/thirdparty/systemd/include") add_linkdirs("$(projectdir)/thirdparty/systemd/lib") |