diff options
Diffstat (limited to 'src/zenserver')
63 files changed, 5173 insertions, 3975 deletions
diff --git a/src/zenserver/compute/computeserver.cpp b/src/zenserver/compute/computeserver.cpp index 1673cea6c..b110f7538 100644 --- a/src/zenserver/compute/computeserver.cpp +++ b/src/zenserver/compute/computeserver.cpp @@ -22,6 +22,8 @@ # if ZEN_WITH_HORDE # include <zenhorde/hordeconfig.h> # include <zenhorde/hordeprovisioner.h> +# include <zenhttp/httpclientauth.h> +# include <zenutil/authutils.h> # endif # if ZEN_WITH_NOMAD # include <zennomad/nomadconfig.h> @@ -67,6 +69,20 @@ ZenComputeServerConfigurator::AddCliOptions(cxxopts::Options& Options) Options.add_option("compute", "", + "coordinator-session", + "Session ID of the orchestrator (for stale-instance rejection)", + cxxopts::value<std::string>(m_ServerOptions.CoordinatorSession)->default_value(""), + ""); + + Options.add_option("compute", + "", + "announce-url", + "Override URL announced to the coordinator (e.g. relay-visible endpoint)", + cxxopts::value<std::string>(m_ServerOptions.AnnounceUrl)->default_value(""), + ""); + + Options.add_option("compute", + "", "idms", "Enable IDMS cloud detection; optionally specify a custom probe endpoint", cxxopts::value<std::string>(m_ServerOptions.IdmsEndpoint)->default_value("")->implicit_value("auto"), @@ -79,6 +95,20 @@ ZenComputeServerConfigurator::AddCliOptions(cxxopts::Options& Options) cxxopts::value<bool>(m_ServerOptions.EnableWorkerWebSocket)->default_value("false"), ""); + Options.add_option("compute", + "", + "provision-clean", + "Pass --clean to provisioned worker instances so they wipe state on startup", + cxxopts::value<bool>(m_ServerOptions.ProvisionClean)->default_value("false"), + ""); + + Options.add_option("compute", + "", + "provision-tracehost", + "Pass --tracehost to provisioned worker instances for remote trace collection", + cxxopts::value<std::string>(m_ServerOptions.ProvisionTraceHost)->default_value(""), + ""); + # if ZEN_WITH_HORDE // Horde provisioning options Options.add_option("horde", @@ -139,6 +169,13 @@ ZenComputeServerConfigurator::AddCliOptions(cxxopts::Options& Options) Options.add_option("horde", "", + "horde-drain-grace-period", + "Grace period in seconds for draining agents before force-kill", + cxxopts::value<int>(m_ServerOptions.HordeConfig.DrainGracePeriodSeconds)->default_value("300"), + ""); + + Options.add_option("horde", + "", "horde-host", "Host address for Horde agents to connect back to", cxxopts::value<std::string>(m_ServerOptions.HordeConfig.HostAddress)->default_value(""), @@ -164,6 +201,13 @@ ZenComputeServerConfigurator::AddCliOptions(cxxopts::Options& Options) "Port number for Zen service communication", cxxopts::value<uint16_t>(m_ServerOptions.HordeConfig.ZenServicePort)->default_value("8558"), ""); + + Options.add_option("horde", + "", + "horde-oidctoken-exe-path", + "Path to OidcToken executable for automatic Horde authentication", + cxxopts::value<std::string>(m_HordeOidcTokenExePath)->default_value(""), + ""); # endif # if ZEN_WITH_NOMAD @@ -313,6 +357,30 @@ ZenComputeServerConfigurator::ValidateOptions() # if ZEN_WITH_HORDE horde::FromString(m_ServerOptions.HordeConfig.Mode, m_HordeModeStr); horde::FromString(m_ServerOptions.HordeConfig.EncryptionMode, m_HordeEncryptionStr); + + // Set up OidcToken-based authentication if no static token was provided + if (m_ServerOptions.HordeConfig.AuthToken.empty() && !m_ServerOptions.HordeConfig.ServerUrl.empty()) + { + std::filesystem::path OidcExePath = FindOidcTokenExePath(m_HordeOidcTokenExePath); + if (!OidcExePath.empty()) + { + ZEN_INFO("using OidcToken executable for Horde authentication: {}", OidcExePath); + auto Provider = httpclientauth::CreateFromOidcTokenExecutable(OidcExePath, + m_ServerOptions.HordeConfig.ServerUrl, + /*Quiet=*/true, + /*Unattended=*/false, + /*Hidden=*/true, + /*IsHordeUrl=*/true); + if (Provider) + { + m_ServerOptions.HordeConfig.AccessTokenProvider = std::move(*Provider); + } + else + { + ZEN_WARN("OidcToken authentication failed; Horde requests will be unauthenticated"); + } + } + } # endif # if ZEN_WITH_NOMAD @@ -347,6 +415,8 @@ ZenComputeServer::Initialize(const ZenComputeServerConfig& ServerConfig, ZenServ } m_CoordinatorEndpoint = ServerConfig.CoordinatorEndpoint; + m_CoordinatorSession = ServerConfig.CoordinatorSession; + m_AnnounceUrl = ServerConfig.AnnounceUrl; m_InstanceId = ServerConfig.InstanceId; m_EnableWorkerWebSocket = ServerConfig.EnableWorkerWebSocket; @@ -379,13 +449,20 @@ ZenComputeServer::Cleanup() m_AnnounceTimer.cancel(); # if ZEN_WITH_HORDE - // Shut down Horde provisioner first — this signals all agent threads + // Disconnect the provisioner state provider before destroying the + // provisioner so the orchestrator HTTP layer cannot call into it. + if (m_OrchestratorService) + { + m_OrchestratorService->SetProvisionerStateProvider(nullptr); + } + + // Shut down Horde provisioner - this signals all agent threads // to exit and joins them before we tear down HTTP services. m_HordeProvisioner.reset(); # endif # if ZEN_WITH_NOMAD - // Shut down Nomad provisioner — stops the management thread and + // Shut down Nomad provisioner - stops the management thread and // sends stop requests for all tracked jobs. m_NomadProvisioner.reset(); # endif @@ -419,12 +496,12 @@ ZenComputeServer::Cleanup() m_IoRunner.join(); } - ShutdownServices(); - if (m_Http) { m_Http->Close(); } + + ShutdownServices(); } catch (const std::exception& Ex) { @@ -444,11 +521,12 @@ ZenComputeServer::InitializeServices(const ZenComputeServerConfig& ServerConfig) ZEN_TRACE_CPU("ZenComputeServer::InitializeServices"); ZEN_INFO("initializing compute services"); - CidStoreConfiguration Config; - Config.RootDirectory = m_DataRoot / "cas"; + m_ActionStore = std::make_unique<MemoryCidStore>(); - m_CidStore = std::make_unique<CidStore>(m_GcManager); - m_CidStore->Initialize(Config); + CidStoreConfiguration WorkerStoreConfig; + WorkerStoreConfig.RootDirectory = m_DataRoot / "cas"; + m_WorkerStore = std::make_unique<CidStore>(m_GcManager); + m_WorkerStore->Initialize(WorkerStoreConfig); if (!ServerConfig.IdmsEndpoint.empty()) { @@ -476,10 +554,12 @@ ZenComputeServer::InitializeServices(const ZenComputeServerConfig& ServerConfig) std::make_unique<zen::compute::HttpOrchestratorService>(ServerConfig.DataDir / "orch", ServerConfig.EnableWorkerWebSocket); ZEN_INFO("instantiating function service"); - m_ComputeService = std::make_unique<zen::compute::HttpComputeService>(*m_CidStore, + m_ComputeService = std::make_unique<zen::compute::HttpComputeService>(*m_ActionStore, + *m_WorkerStore, m_StatsService, ServerConfig.DataDir / "functions", ServerConfig.MaxConcurrentActions); + m_ComputeService->SetShutdownCallback([this] { RequestExit(0); }); m_FrontendService = std::make_unique<HttpFrontendService>(m_ContentRoot, m_StatsService, m_StatusService); @@ -504,7 +584,11 @@ ZenComputeServer::InitializeServices(const ZenComputeServerConfig& ServerConfig) OrchestratorEndpoint << '/'; } - m_NomadProvisioner = std::make_unique<nomad::NomadProvisioner>(NomadCfg, OrchestratorEndpoint); + m_NomadProvisioner = std::make_unique<nomad::NomadProvisioner>(NomadCfg, + OrchestratorEndpoint, + m_OrchestratorService->GetSessionId().ToString(), + ServerConfig.ProvisionClean, + ServerConfig.ProvisionTraceHost); } } # endif @@ -535,7 +619,14 @@ ZenComputeServer::InitializeServices(const ZenComputeServerConfig& ServerConfig) : std::filesystem::path(HordeConfig.BinariesPath); std::filesystem::path WorkingDir = ServerConfig.DataDir / "horde"; - m_HordeProvisioner = std::make_unique<horde::HordeProvisioner>(HordeConfig, BinariesPath, WorkingDir, OrchestratorEndpoint); + m_HordeProvisioner = std::make_unique<horde::HordeProvisioner>(HordeConfig, + BinariesPath, + WorkingDir, + OrchestratorEndpoint, + m_OrchestratorService->GetSessionId().ToString(), + ServerConfig.ProvisionClean, + ServerConfig.ProvisionTraceHost); + m_OrchestratorService->SetProvisionerStateProvider(m_HordeProvisioner.get()); } } # endif @@ -563,6 +654,10 @@ ZenComputeServer::GetInstanceId() const std::string ZenComputeServer::GetAnnounceUrl() const { + if (!m_AnnounceUrl.empty()) + { + return m_AnnounceUrl; + } return m_Http->GetServiceUri(nullptr); } @@ -633,6 +728,11 @@ ZenComputeServer::BuildAnnounceBody() << "nomad"; } + if (!m_CoordinatorSession.empty()) + { + AnnounceBody << "coordinator_session" << m_CoordinatorSession; + } + ResolveCloudMetadata(); if (m_CloudMetadata) { @@ -779,8 +879,10 @@ ZenComputeServer::ProvisionerMaintenanceTick() # if ZEN_WITH_HORDE if (m_HordeProvisioner) { - m_HordeProvisioner->SetTargetCoreCount(UINT32_MAX); + // Re-apply current target to spawn agent threads for any that have + // exited since the last tick, without overwriting a user-set target. auto Stats = m_HordeProvisioner->GetStats(); + m_HordeProvisioner->SetTargetCoreCount(Stats.TargetCoreCount); ZEN_DEBUG("Horde maintenance: target={}, estimated={}, active={}", Stats.TargetCoreCount, Stats.EstimatedCoreCount, @@ -882,12 +984,14 @@ ZenComputeServer::Run() OnReady(); + StartSelfSession("zencompute"); + PostAnnounce(); EnqueueAnnounceTimer(); InitializeOrchestratorWebSocket(); # if ZEN_WITH_HORDE - // Start Horde provisioning if configured — request maximum allowed cores. + // Start Horde provisioning if configured - request maximum allowed cores. // SetTargetCoreCount clamps to HordeConfig::MaxCores internally. if (m_HordeProvisioner) { @@ -899,7 +1003,7 @@ ZenComputeServer::Run() # endif # if ZEN_WITH_NOMAD - // Start Nomad provisioning if configured — request maximum allowed cores. + // Start Nomad provisioning if configured - request maximum allowed cores. // SetTargetCoreCount clamps to NomadConfig::MaxCores internally. if (m_NomadProvisioner) { diff --git a/src/zenserver/compute/computeserver.h b/src/zenserver/compute/computeserver.h index 8f4edc0f0..aa9c1a5b3 100644 --- a/src/zenserver/compute/computeserver.h +++ b/src/zenserver/compute/computeserver.h @@ -10,6 +10,7 @@ # include <zencore/system.h> # include <zenhttp/httpwsclient.h> # include <zenstore/gc.h> +# include <zenstore/memorycidstore.h> # include "frontend/frontend.h" namespace cxxopts { @@ -41,7 +42,6 @@ class NomadProvisioner; namespace zen { -class CidStore; class HttpApiService; struct ZenComputeServerConfig : public ZenServerConfig @@ -49,9 +49,13 @@ struct ZenComputeServerConfig : public ZenServerConfig std::string UpstreamNotificationEndpoint; std::string InstanceId; // For use in notifications std::string CoordinatorEndpoint; + std::string CoordinatorSession; ///< Session ID for stale-instance rejection + std::string AnnounceUrl; ///< Override for self-announced URL (e.g. relay-visible endpoint) std::string IdmsEndpoint; int32_t MaxConcurrentActions = 0; // 0 = auto (LogicalProcessorCount * 2) - bool EnableWorkerWebSocket = false; // Use WebSocket for worker↔orchestrator link + bool EnableWorkerWebSocket = false; // Use WebSocket for worker<->orchestrator link + bool ProvisionClean = false; // Pass --clean to provisioned workers + std::string ProvisionTraceHost; // Pass --tracehost to provisioned workers # if ZEN_WITH_HORDE horde::HordeConfig HordeConfig; @@ -84,6 +88,7 @@ private: # if ZEN_WITH_HORDE std::string m_HordeModeStr = "direct"; std::string m_HordeEncryptionStr = "none"; + std::string m_HordeOidcTokenExePath; # endif # if ZEN_WITH_NOMAD @@ -131,7 +136,8 @@ public: private: GcManager m_GcManager; GcScheduler m_GcScheduler{m_GcManager}; - std::unique_ptr<CidStore> m_CidStore; + std::unique_ptr<MemoryCidStore> m_ActionStore; + std::unique_ptr<CidStore> m_WorkerStore; std::unique_ptr<HttpApiService> m_ApiService; std::unique_ptr<zen::compute::HttpComputeService> m_ComputeService; std::unique_ptr<zen::compute::HttpOrchestratorService> m_OrchestratorService; @@ -146,6 +152,8 @@ private: # endif SystemMetricsTracker m_MetricsTracker; std::string m_CoordinatorEndpoint; + std::string m_CoordinatorSession; + std::string m_AnnounceUrl; std::string m_InstanceId; asio::steady_timer m_AnnounceTimer{m_IoContext}; @@ -163,7 +171,7 @@ private: std::string GetInstanceId() const; CbObject BuildAnnounceBody(); - // Worker→orchestrator WebSocket client + // Worker->orchestrator WebSocket client struct OrchestratorWsHandler : public IWsClientHandler { ZenComputeServer& Server; diff --git a/src/zenserver/config/config.cpp b/src/zenserver/config/config.cpp index daad154bc..2a89fc637 100644 --- a/src/zenserver/config/config.cpp +++ b/src/zenserver/config/config.cpp @@ -12,6 +12,7 @@ #include <zencore/compactbinaryutil.h> #include <zencore/compactbinaryvalidation.h> #include <zencore/except.h> +#include <zencore/filesystem.h> #include <zencore/fmtutils.h> #include <zencore/iobuffer.h> #include <zencore/logging.h> @@ -266,6 +267,10 @@ ZenServerCmdLineOptions::AddCliOptions(cxxopts::Options& options, ZenServerConfi options.add_options()("powercycle", "Exit immediately after initialization is complete", cxxopts::value<bool>(ServerOptions.IsPowerCycle)); + options.add_options()("enable-execution-history", + "Record this invocation in the per-user execution history " + "(use --enable-execution-history=false to suppress)", + cxxopts::value<bool>()->default_value("true")->implicit_value("true")); options.add_option("diagnostics", "", @@ -478,15 +483,27 @@ ZenServerCmdLineOptions::ApplyOptions(cxxopts::Options& options, ZenServerConfig throw std::runtime_error(fmt::format("'--snapshot-dir' ('{}') must be a directory", ServerOptions.BaseSnapshotDir)); } - ServerOptions.SystemRootDir = MakeSafeAbsolutePath(SystemRootDir); - ServerOptions.DataDir = MakeSafeAbsolutePath(DataDir); - ServerOptions.ContentDir = MakeSafeAbsolutePath(ContentDir); - ServerOptions.ConfigFile = MakeSafeAbsolutePath(ConfigFile); - ServerOptions.BaseSnapshotDir = MakeSafeAbsolutePath(BaseSnapshotDir); + SystemRootDir = ExpandEnvironmentVariables(SystemRootDir); + ServerOptions.SystemRootDir = MakeSafeAbsolutePath(SystemRootDir); + + DataDir = ExpandEnvironmentVariables(DataDir); + ServerOptions.DataDir = MakeSafeAbsolutePath(DataDir); + + ContentDir = ExpandEnvironmentVariables(ContentDir); + ServerOptions.ContentDir = MakeSafeAbsolutePath(ContentDir); + + ConfigFile = ExpandEnvironmentVariables(ConfigFile); + ServerOptions.ConfigFile = MakeSafeAbsolutePath(ConfigFile); + + BaseSnapshotDir = ExpandEnvironmentVariables(BaseSnapshotDir); + ServerOptions.BaseSnapshotDir = MakeSafeAbsolutePath(BaseSnapshotDir); + + ExpandEnvironmentVariables(SecurityConfigPath); ServerOptions.SecurityConfigPath = MakeSafeAbsolutePath(SecurityConfigPath); if (!UnixSocketPath.empty()) { + UnixSocketPath = ExpandEnvironmentVariables(UnixSocketPath); ServerOptions.HttpConfig.UnixSocketPath = MakeSafeAbsolutePath(UnixSocketPath); } diff --git a/src/zenserver/diag/logging.cpp b/src/zenserver/diag/logging.cpp index f3d8dbfe3..e1a8fed7d 100644 --- a/src/zenserver/diag/logging.cpp +++ b/src/zenserver/diag/logging.cpp @@ -112,7 +112,7 @@ InitializeServerLogging(const ZenServerConfig& InOptions, bool WithCacheService) const zen::Oid ServerSessionId = zen::GetSessionId(); logging::Registry::Instance().ApplyAll([&](auto Logger) { - static constinit logging::LogPoint SessionIdPoint{{}, logging::Info, "server session id: {}"}; + static constinit logging::LogPoint SessionIdPoint{0, 0, logging::Info, "server session id: {}"}; ZEN_MEMSCOPE(ELLMTag::Logging); Logger->Log(SessionIdPoint, fmt::make_format_args(ServerSessionId)); }); diff --git a/src/zenserver/frontend/frontend.cpp b/src/zenserver/frontend/frontend.cpp index 52ec5b8b3..812536074 100644 --- a/src/zenserver/frontend/frontend.cpp +++ b/src/zenserver/frontend/frontend.cpp @@ -160,7 +160,7 @@ HttpFrontendService::HandleRequest(zen::HttpServerRequest& Request) ContentType = ParseContentType(DotExt); - // Extensions used only for static file serving — not in the global + // Extensions used only for static file serving - not in the global // ParseContentType table because that table also drives URI extension // stripping for content negotiation, and we don't want /api/foo.txt to // have its extension removed. diff --git a/src/zenserver/frontend/frontend.h b/src/zenserver/frontend/frontend.h index e0b86f1de..0e7a4fe3c 100644 --- a/src/zenserver/frontend/frontend.h +++ b/src/zenserver/frontend/frontend.h @@ -4,7 +4,7 @@ #include <zenhttp/httpserver.h> #include <zenhttp/httpstatus.h> -#include "zipfs.h" +#include <zenhttp/zipfs.h> #include <filesystem> #include <memory> diff --git a/src/zenserver/frontend/html/banner.js b/src/zenserver/frontend/html/banner.js index 01679e621..5b54f6fb9 100644 --- a/src/zenserver/frontend/html/banner.js +++ b/src/zenserver/frontend/html/banner.js @@ -14,12 +14,14 @@ * load 0–100 integer, shown as a percentage (default: hidden) * tagline custom tagline text (default: "Orchestrator Overview" / "Orchestrator") * subtitle text after "ZEN" in the wordmark (default: "COMPUTE") + * logo-src URL for the logo image (default: inline SVG) + * version version string shown next to the wordmark (default: hidden) */ class ZenBanner extends HTMLElement { static get observedAttributes() { - return ['variant', 'cluster-status', 'load', 'tagline', 'subtitle', 'logo-src']; + return ['variant', 'cluster-status', 'load', 'tagline', 'subtitle', 'logo-src', 'version']; } attributeChangedCallback() { @@ -41,6 +43,7 @@ class ZenBanner extends HTMLElement { get _tagline() { return this.getAttribute('tagline'); } // null → default get _subtitle() { return this.getAttribute('subtitle'); } // null → "COMPUTE" get _logoSrc() { return this.getAttribute('logo-src'); } // null → inline SVG + get _version() { return this.getAttribute('version'); } // null → hidden get _statusColor() { return { nominal: '#7ecfb8', degraded: '#d4a84b', offline: '#c0504d' }[this._status] ?? '#7ecfb8'; @@ -82,6 +85,7 @@ class ZenBanner extends HTMLElement { const divH = compact ? '32px' : '48px'; const nameSize = compact ? '15px' : '22px'; const tagSize = compact ? '9px' : '11px'; + const verSize = compact ? '11px' : '15px'; const sc = this._statusColor; const lc = this._loadColor; @@ -159,6 +163,12 @@ class ZenBanner extends HTMLElement { gap: 4px; } + .wordmark-row { + display: flex; + align-items: baseline; + gap: 10px; + } + .wordmark { font-weight: 700; font-size: ${nameSize}; @@ -170,6 +180,14 @@ class ZenBanner extends HTMLElement { .wordmark span { color: #7ecfb8; } + .version { + font-weight: 700; + font-size: ${verSize}; + letter-spacing: 0.08em; + line-height: 1; + color: var(--theme_bright, #e8e4dc); + } + .tagline { font-family: 'Noto Serif JP', serif; font-weight: 300; @@ -290,12 +308,18 @@ class ZenBanner extends HTMLElement { </div> ` : ''; + const version = this._version; + const versionEl = version ? `<div class="version">v${version}</div>` : ''; + return ` <a class="banner" part="banner" href="/dashboard/"> <div class="logo-mark">${this._logoMark()}</div> <div class="divider"></div> <div class="text-block"> - <div class="wordmark">ZEN<span> ${this._subtitle ?? 'COMPUTE'}</span></div> + <div class="wordmark-row"> + <div class="wordmark">ZEN<span> ${this._subtitle ?? 'COMPUTE'}</span></div> + ${versionEl} + </div> <div class="tagline">${this._tagline ?? (compact ? 'Orchestrator' : 'Orchestrator Overview')}</div> </div> <div class="spacer"></div> diff --git a/src/zenserver/frontend/html/compute/compute.html b/src/zenserver/frontend/html/compute/compute.html deleted file mode 100644 index c07bbb692..000000000 --- a/src/zenserver/frontend/html/compute/compute.html +++ /dev/null @@ -1,925 +0,0 @@ -<!DOCTYPE html> -<html lang="en"> -<head> - <meta charset="UTF-8"> - <meta name="viewport" content="width=device-width, initial-scale=1.0"> - <title>Zen Compute Dashboard</title> - <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/chart.umd.min.js"></script> - <link rel="stylesheet" type="text/css" href="../zen.css" /> - <script src="../util/sanitize.js"></script> - <script src="../theme.js"></script> - <script src="../banner.js" defer></script> - <script src="../nav.js" defer></script> - <style> - .grid { - grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); - } - - .chart-container { - position: relative; - height: 300px; - margin-top: 20px; - } - - .stats-row { - display: flex; - justify-content: space-between; - margin-bottom: 12px; - padding: 8px 0; - border-bottom: 1px solid var(--theme_border_subtle); - } - - .stats-row:last-child { - border-bottom: none; - margin-bottom: 0; - } - - .stats-label { - color: var(--theme_g1); - font-size: 13px; - } - - .stats-value { - color: var(--theme_bright); - font-weight: 600; - font-size: 13px; - } - - .rate-stats { - display: grid; - grid-template-columns: repeat(3, 1fr); - gap: 16px; - margin-top: 16px; - } - - .rate-item { - text-align: center; - } - - .rate-value { - font-size: 20px; - font-weight: 600; - color: var(--theme_p0); - } - - .rate-label { - font-size: 11px; - color: var(--theme_g1); - margin-top: 4px; - text-transform: uppercase; - } - - .worker-row { - cursor: pointer; - transition: background 0.15s; - } - - .worker-row:hover { - background: var(--theme_p4); - } - - .worker-row.selected { - background: var(--theme_p3); - } - - .worker-detail { - margin-top: 20px; - border-top: 1px solid var(--theme_g2); - padding-top: 16px; - } - - .worker-detail-title { - font-size: 15px; - font-weight: 600; - color: var(--theme_bright); - margin-bottom: 12px; - } - - .detail-section { - margin-bottom: 16px; - } - - .detail-section-label { - font-size: 11px; - font-weight: 600; - color: var(--theme_g1); - text-transform: uppercase; - letter-spacing: 0.5px; - margin-bottom: 6px; - } - - .detail-table { - width: 100%; - border-collapse: collapse; - font-size: 12px; - } - - .detail-table td { - padding: 4px 8px; - color: var(--theme_g0); - border-bottom: 1px solid var(--theme_border_subtle); - vertical-align: top; - } - - .detail-table td:first-child { - color: var(--theme_g1); - width: 40%; - font-family: monospace; - } - - .detail-table tr:last-child td { - border-bottom: none; - } - - .detail-mono { - font-family: monospace; - font-size: 11px; - color: var(--theme_g1); - } - - .detail-tag { - display: inline-block; - padding: 2px 8px; - border-radius: 4px; - background: var(--theme_border_subtle); - color: var(--theme_g0); - font-size: 11px; - margin: 2px 4px 2px 0; - } - </style> -</head> -<body> - <div class="container" style="max-width: 1400px; margin: 0 auto;"> - <zen-banner cluster-status="nominal" load="0" tagline="Node Overview" logo-src="../favicon.ico"></zen-banner> - <zen-nav> - <a href="/dashboard/">Home</a> - <a href="compute.html">Node</a> - <a href="orchestrator.html">Orchestrator</a> - </zen-nav> - <div class="timestamp">Last updated: <span id="last-update">Never</span></div> - - <div id="error-container"></div> - - <!-- Action Queue Stats --> - <div class="section-title">Action Queue</div> - <div class="grid"> - <div class="card"> - <div class="card-title">Pending Actions</div> - <div class="metric-value" id="actions-pending">-</div> - <div class="metric-label">Waiting to be scheduled</div> - </div> - <div class="card"> - <div class="card-title">Running Actions</div> - <div class="metric-value" id="actions-running">-</div> - <div class="metric-label">Currently executing</div> - </div> - <div class="card"> - <div class="card-title">Completed Actions</div> - <div class="metric-value" id="actions-complete">-</div> - <div class="metric-label">Results available</div> - </div> - </div> - - <!-- Action Queue Chart --> - <div class="card" style="margin-bottom: 30px;"> - <div class="card-title">Action Queue History</div> - <div class="chart-container"> - <canvas id="queue-chart"></canvas> - </div> - </div> - - <!-- Performance Metrics --> - <div class="section-title">Performance Metrics</div> - <div class="card" style="margin-bottom: 30px;"> - <div class="card-title">Completion Rate</div> - <div class="rate-stats"> - <div class="rate-item"> - <div class="rate-value" id="rate-1">-</div> - <div class="rate-label">1 min rate</div> - </div> - <div class="rate-item"> - <div class="rate-value" id="rate-5">-</div> - <div class="rate-label">5 min rate</div> - </div> - <div class="rate-item"> - <div class="rate-value" id="rate-15">-</div> - <div class="rate-label">15 min rate</div> - </div> - </div> - <div style="margin-top: 20px;"> - <div class="stats-row"> - <span class="stats-label">Total Retired</span> - <span class="stats-value" id="retired-count">-</span> - </div> - <div class="stats-row"> - <span class="stats-label">Mean Rate</span> - <span class="stats-value" id="rate-mean">-</span> - </div> - </div> - </div> - - <!-- Workers --> - <div class="section-title">Workers</div> - <div class="card" style="margin-bottom: 30px;"> - <div class="card-title">Worker Status</div> - <div class="stats-row"> - <span class="stats-label">Registered Workers</span> - <span class="stats-value" id="worker-count">-</span> - </div> - <div id="worker-table-container" style="margin-top: 16px; display: none;"> - <table id="worker-table"> - <thead> - <tr> - <th>Name</th> - <th>Platform</th> - <th style="text-align: right;">Cores</th> - <th style="text-align: right;">Timeout</th> - <th style="text-align: right;">Functions</th> - <th>Worker ID</th> - </tr> - </thead> - <tbody id="worker-table-body"></tbody> - </table> - <div id="worker-detail" class="worker-detail" style="display: none;"></div> - </div> - </div> - - <!-- Queues --> - <div class="section-title">Queues</div> - <div class="card" style="margin-bottom: 30px;"> - <div class="card-title">Queue Status</div> - <div id="queue-list-empty" class="empty-state" style="text-align: left;">No queues.</div> - <div id="queue-list-container" style="display: none;"> - <table id="queue-list-table"> - <thead> - <tr> - <th style="text-align: right; width: 60px;">ID</th> - <th style="text-align: center; width: 80px;">Status</th> - <th style="text-align: right;">Active</th> - <th style="text-align: right;">Completed</th> - <th style="text-align: right;">Failed</th> - <th style="text-align: right;">Abandoned</th> - <th style="text-align: right;">Cancelled</th> - <th>Token</th> - </tr> - </thead> - <tbody id="queue-list-body"></tbody> - </table> - </div> - </div> - - <!-- Action History --> - <div class="section-title">Recent Actions</div> - <div class="card" style="margin-bottom: 30px;"> - <div class="card-title">Action History</div> - <div id="action-history-empty" class="empty-state" style="text-align: left;">No actions recorded yet.</div> - <div id="action-history-container" style="display: none;"> - <table id="action-history-table"> - <thead> - <tr> - <th style="text-align: right; width: 60px;">LSN</th> - <th style="text-align: right; width: 60px;">Queue</th> - <th style="text-align: center; width: 70px;">Status</th> - <th>Function</th> - <th style="text-align: right; width: 80px;">Started</th> - <th style="text-align: right; width: 80px;">Finished</th> - <th style="text-align: right; width: 80px;">Duration</th> - <th>Worker ID</th> - <th>Action ID</th> - </tr> - </thead> - <tbody id="action-history-body"></tbody> - </table> - </div> - </div> - - <!-- System Resources --> - <div class="section-title">System Resources</div> - <div class="grid"> - <div class="card"> - <div class="card-title">CPU Usage</div> - <div class="metric-value" id="cpu-usage">-</div> - <div class="metric-label">Percent</div> - <div class="progress-bar"> - <div class="progress-fill" id="cpu-progress" style="width: 0%"></div> - </div> - <div style="position: relative; height: 60px; margin-top: 12px;"> - <canvas id="cpu-chart"></canvas> - </div> - <div style="margin-top: 12px;"> - <div class="stats-row"> - <span class="stats-label">Packages</span> - <span class="stats-value" id="cpu-packages">-</span> - </div> - <div class="stats-row"> - <span class="stats-label">Physical Cores</span> - <span class="stats-value" id="cpu-cores">-</span> - </div> - <div class="stats-row"> - <span class="stats-label">Logical Processors</span> - <span class="stats-value" id="cpu-lp">-</span> - </div> - </div> - </div> - <div class="card"> - <div class="card-title">Memory</div> - <div class="stats-row"> - <span class="stats-label">Used</span> - <span class="stats-value" id="memory-used">-</span> - </div> - <div class="stats-row"> - <span class="stats-label">Total</span> - <span class="stats-value" id="memory-total">-</span> - </div> - <div class="progress-bar"> - <div class="progress-fill" id="memory-progress" style="width: 0%"></div> - </div> - </div> - <div class="card"> - <div class="card-title">Disk</div> - <div class="stats-row"> - <span class="stats-label">Used</span> - <span class="stats-value" id="disk-used">-</span> - </div> - <div class="stats-row"> - <span class="stats-label">Total</span> - <span class="stats-value" id="disk-total">-</span> - </div> - <div class="progress-bar"> - <div class="progress-fill" id="disk-progress" style="width: 0%"></div> - </div> - </div> - </div> - </div> - - <script> - // Configuration - const BASE_URL = window.location.origin; - const REFRESH_INTERVAL = 2000; // 2 seconds - const MAX_HISTORY_POINTS = 60; // Show last 2 minutes - - // Data storage - const history = { - timestamps: [], - pending: [], - running: [], - completed: [], - cpu: [] - }; - - // CPU sparkline chart - const cpuCtx = document.getElementById('cpu-chart').getContext('2d'); - const cpuChart = new Chart(cpuCtx, { - type: 'line', - data: { - labels: [], - datasets: [{ - data: [], - borderColor: '#58a6ff', - backgroundColor: 'rgba(88, 166, 255, 0.15)', - borderWidth: 1.5, - tension: 0.4, - fill: true, - pointRadius: 0 - }] - }, - options: { - responsive: true, - maintainAspectRatio: false, - animation: false, - plugins: { legend: { display: false }, tooltip: { enabled: false } }, - scales: { - x: { display: false }, - y: { display: false, min: 0, max: 100 } - } - } - }); - - // Queue chart setup - const ctx = document.getElementById('queue-chart').getContext('2d'); - const chart = new Chart(ctx, { - type: 'line', - data: { - labels: [], - datasets: [ - { - label: 'Pending', - data: [], - borderColor: '#f0883e', - backgroundColor: 'rgba(240, 136, 62, 0.1)', - tension: 0.4, - fill: true - }, - { - label: 'Running', - data: [], - borderColor: '#58a6ff', - backgroundColor: 'rgba(88, 166, 255, 0.1)', - tension: 0.4, - fill: true - }, - { - label: 'Completed', - data: [], - borderColor: '#3fb950', - backgroundColor: 'rgba(63, 185, 80, 0.1)', - tension: 0.4, - fill: true - } - ] - }, - options: { - responsive: true, - maintainAspectRatio: false, - plugins: { - legend: { - display: true, - labels: { - color: '#8b949e' - } - } - }, - scales: { - x: { - display: false - }, - y: { - beginAtZero: true, - ticks: { - color: '#8b949e' - }, - grid: { - color: '#21262d' - } - } - } - } - }); - - // Helper functions - - function formatBytes(bytes) { - if (bytes === 0) return '0 B'; - const k = 1024; - const sizes = ['B', 'KB', 'MB', 'GB', 'TB']; - const i = Math.floor(Math.log(bytes) / Math.log(k)); - return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i]; - } - - function formatRate(rate) { - return rate.toFixed(2) + '/s'; - } - - function showError(message) { - const container = document.getElementById('error-container'); - container.innerHTML = `<div class="error">Error: ${escapeHtml(message)}</div>`; - } - - function clearError() { - document.getElementById('error-container').innerHTML = ''; - } - - function updateTimestamp() { - const now = new Date(); - document.getElementById('last-update').textContent = now.toLocaleTimeString(); - } - - // Fetch functions - async function fetchJSON(endpoint) { - const response = await fetch(`${BASE_URL}${endpoint}`, { - headers: { - 'Accept': 'application/json' - } - }); - if (!response.ok) { - throw new Error(`HTTP ${response.status}: ${response.statusText}`); - } - return await response.json(); - } - - async function fetchHealth() { - try { - const response = await fetch(`${BASE_URL}/compute/ready`); - const isHealthy = response.status === 200; - - const banner = document.querySelector('zen-banner'); - - if (isHealthy) { - banner.setAttribute('cluster-status', 'nominal'); - banner.setAttribute('load', '0'); - } else { - banner.setAttribute('cluster-status', 'degraded'); - banner.setAttribute('load', '0'); - } - - return isHealthy; - } catch (error) { - const banner = document.querySelector('zen-banner'); - banner.setAttribute('cluster-status', 'degraded'); - banner.setAttribute('load', '0'); - throw error; - } - } - - async function fetchStats() { - const data = await fetchJSON('/stats/compute'); - - // Update action counts - document.getElementById('actions-pending').textContent = data.actions_pending || 0; - document.getElementById('actions-running').textContent = data.actions_submitted || 0; - document.getElementById('actions-complete').textContent = data.actions_complete || 0; - - // Update completion rates - if (data.actions_retired) { - document.getElementById('rate-1').textContent = formatRate(data.actions_retired.rate_1 || 0); - document.getElementById('rate-5').textContent = formatRate(data.actions_retired.rate_5 || 0); - document.getElementById('rate-15').textContent = formatRate(data.actions_retired.rate_15 || 0); - document.getElementById('retired-count').textContent = data.actions_retired.count || 0; - document.getElementById('rate-mean').textContent = formatRate(data.actions_retired.rate_mean || 0); - } - - // Update chart - const now = new Date().toLocaleTimeString(); - history.timestamps.push(now); - history.pending.push(data.actions_pending || 0); - history.running.push(data.actions_submitted || 0); - history.completed.push(data.actions_complete || 0); - - // Keep only last N points - if (history.timestamps.length > MAX_HISTORY_POINTS) { - history.timestamps.shift(); - history.pending.shift(); - history.running.shift(); - history.completed.shift(); - } - - chart.data.labels = history.timestamps; - chart.data.datasets[0].data = history.pending; - chart.data.datasets[1].data = history.running; - chart.data.datasets[2].data = history.completed; - chart.update('none'); - } - - async function fetchSysInfo() { - const data = await fetchJSON('/compute/sysinfo'); - - // Update CPU - const cpuUsage = data.cpu_usage || 0; - document.getElementById('cpu-usage').textContent = cpuUsage.toFixed(1) + '%'; - document.getElementById('cpu-progress').style.width = cpuUsage + '%'; - - const banner = document.querySelector('zen-banner'); - banner.setAttribute('load', cpuUsage.toFixed(1)); - - history.cpu.push(cpuUsage); - if (history.cpu.length > MAX_HISTORY_POINTS) history.cpu.shift(); - cpuChart.data.labels = history.cpu.map(() => ''); - cpuChart.data.datasets[0].data = history.cpu; - cpuChart.update('none'); - - document.getElementById('cpu-packages').textContent = data.cpu_count ?? '-'; - document.getElementById('cpu-cores').textContent = data.core_count ?? '-'; - document.getElementById('cpu-lp').textContent = data.lp_count ?? '-'; - - // Update Memory - const memUsed = data.memory_used || 0; - const memTotal = data.memory_total || 1; - const memPercent = (memUsed / memTotal) * 100; - document.getElementById('memory-used').textContent = formatBytes(memUsed); - document.getElementById('memory-total').textContent = formatBytes(memTotal); - document.getElementById('memory-progress').style.width = memPercent + '%'; - - // Update Disk - const diskUsed = data.disk_used || 0; - const diskTotal = data.disk_total || 1; - const diskPercent = (diskUsed / diskTotal) * 100; - document.getElementById('disk-used').textContent = formatBytes(diskUsed); - document.getElementById('disk-total').textContent = formatBytes(diskTotal); - document.getElementById('disk-progress').style.width = diskPercent + '%'; - } - - // Persists the selected worker ID across refreshes - let selectedWorkerId = null; - - function renderWorkerDetail(id, desc) { - const panel = document.getElementById('worker-detail'); - - if (!desc) { - panel.style.display = 'none'; - return; - } - - function field(label, value) { - return `<tr><td>${label}</td><td>${value ?? '-'}</td></tr>`; - } - - function monoField(label, value) { - return `<tr><td>${label}</td><td class="detail-mono">${value ?? '-'}</td></tr>`; - } - - // Functions - const functions = desc.functions || []; - const functionsHtml = functions.length === 0 ? '<span style="color:var(--theme_faint);font-size:12px;">none</span>' : - `<table class="detail-table">${functions.map(f => - `<tr><td>${escapeHtml(f.name || '-')}</td><td class="detail-mono">${escapeHtml(f.version || '-')}</td></tr>` - ).join('')}</table>`; - - // Executables - const executables = desc.executables || []; - const totalExecSize = executables.reduce((sum, e) => sum + (e.size || 0), 0); - const execHtml = executables.length === 0 ? '<span style="color:var(--theme_faint);font-size:12px;">none</span>' : - `<table class="detail-table"> - <tr style="font-size:11px;"> - <td style="color:var(--theme_faint);padding-bottom:4px;">Path</td> - <td style="color:var(--theme_faint);padding-bottom:4px;">Hash</td> - <td style="color:var(--theme_faint);padding-bottom:4px;text-align:right;">Size</td> - </tr> - ${executables.map(e => - `<tr> - <td>${escapeHtml(e.name || '-')}</td> - <td class="detail-mono">${escapeHtml(e.hash || '-')}</td> - <td style="text-align:right;white-space:nowrap;">${e.size != null ? formatBytes(e.size) : '-'}</td> - </tr>` - ).join('')} - <tr style="border-top:1px solid var(--theme_g2);"> - <td style="color:var(--theme_g1);padding-top:6px;">Total</td> - <td></td> - <td style="text-align:right;white-space:nowrap;padding-top:6px;color:var(--theme_bright);font-weight:600;">${formatBytes(totalExecSize)}</td> - </tr> - </table>`; - - // Files - const files = desc.files || []; - const filesHtml = files.length === 0 ? '<span style="color:var(--theme_faint);font-size:12px;">none</span>' : - `<table class="detail-table">${files.map(f => - `<tr><td>${escapeHtml(f.name || f)}</td><td class="detail-mono">${escapeHtml(f.hash || '')}</td></tr>` - ).join('')}</table>`; - - // Dirs - const dirs = desc.dirs || []; - const dirsHtml = dirs.length === 0 ? '<span style="color:var(--theme_faint);font-size:12px;">none</span>' : - dirs.map(d => `<span class="detail-tag">${escapeHtml(d)}</span>`).join(''); - - // Environment - const env = desc.environment || []; - const envHtml = env.length === 0 ? '<span style="color:var(--theme_faint);font-size:12px;">none</span>' : - env.map(e => `<span class="detail-tag">${escapeHtml(e)}</span>`).join(''); - - panel.innerHTML = ` - <div class="worker-detail-title">${escapeHtml(desc.name || id)}</div> - <div class="detail-section"> - <table class="detail-table"> - ${field('Worker ID', `<span class="detail-mono">${escapeHtml(id)}</span>`)} - ${field('Path', escapeHtml(desc.path || '-'))} - ${field('Platform', escapeHtml(desc.host || '-'))} - ${monoField('Build System', desc.buildsystem_version)} - ${field('Cores', desc.cores)} - ${field('Timeout', desc.timeout != null ? desc.timeout + 's' : null)} - </table> - </div> - <div class="detail-section"> - <div class="detail-section-label">Functions</div> - ${functionsHtml} - </div> - <div class="detail-section"> - <div class="detail-section-label">Executables</div> - ${execHtml} - </div> - <div class="detail-section"> - <div class="detail-section-label">Files</div> - ${filesHtml} - </div> - <div class="detail-section"> - <div class="detail-section-label">Directories</div> - ${dirsHtml} - </div> - <div class="detail-section"> - <div class="detail-section-label">Environment</div> - ${envHtml} - </div> - `; - panel.style.display = 'block'; - } - - async function fetchWorkers() { - const data = await fetchJSON('/compute/workers'); - const workerIds = data.workers || []; - - document.getElementById('worker-count').textContent = workerIds.length; - - const container = document.getElementById('worker-table-container'); - const tbody = document.getElementById('worker-table-body'); - - if (workerIds.length === 0) { - container.style.display = 'none'; - selectedWorkerId = null; - return; - } - - const descriptors = await Promise.all( - workerIds.map(id => fetchJSON(`/compute/workers/${id}`).catch(() => null)) - ); - - // Build a map for quick lookup by ID - const descriptorMap = {}; - workerIds.forEach((id, i) => { descriptorMap[id] = descriptors[i]; }); - - tbody.innerHTML = ''; - descriptors.forEach((desc, i) => { - const id = workerIds[i]; - const name = desc ? (desc.name || '-') : '-'; - const host = desc ? (desc.host || '-') : '-'; - const cores = desc ? (desc.cores != null ? desc.cores : '-') : '-'; - const timeout = desc ? (desc.timeout != null ? desc.timeout + 's' : '-') : '-'; - const functions = desc ? (desc.functions ? desc.functions.length : 0) : '-'; - - const tr = document.createElement('tr'); - tr.className = 'worker-row' + (id === selectedWorkerId ? ' selected' : ''); - tr.dataset.workerId = id; - tr.innerHTML = ` - <td style="color: var(--theme_bright);">${escapeHtml(name)}</td> - <td>${escapeHtml(host)}</td> - <td style="text-align: right;">${escapeHtml(String(cores))}</td> - <td style="text-align: right;">${escapeHtml(String(timeout))}</td> - <td style="text-align: right;">${escapeHtml(String(functions))}</td> - <td style="color: var(--theme_g1); font-family: monospace; font-size: 11px;">${escapeHtml(id)}</td> - `; - tr.addEventListener('click', () => { - document.querySelectorAll('.worker-row').forEach(r => r.classList.remove('selected')); - if (selectedWorkerId === id) { - // Toggle off - selectedWorkerId = null; - document.getElementById('worker-detail').style.display = 'none'; - } else { - selectedWorkerId = id; - tr.classList.add('selected'); - renderWorkerDetail(id, descriptorMap[id]); - } - }); - tbody.appendChild(tr); - }); - - // Re-render detail if selected worker is still present - if (selectedWorkerId && descriptorMap[selectedWorkerId]) { - renderWorkerDetail(selectedWorkerId, descriptorMap[selectedWorkerId]); - } else if (selectedWorkerId && !descriptorMap[selectedWorkerId]) { - selectedWorkerId = null; - document.getElementById('worker-detail').style.display = 'none'; - } - - container.style.display = 'block'; - } - - // Windows FILETIME: 100ns ticks since 1601-01-01. Convert to JS Date. - const FILETIME_EPOCH_OFFSET_MS = 11644473600000n; - function filetimeToDate(ticks) { - if (!ticks) return null; - const ms = BigInt(ticks) / 10000n - FILETIME_EPOCH_OFFSET_MS; - return new Date(Number(ms)); - } - - function formatTime(date) { - if (!date) return '-'; - return date.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit', second: '2-digit' }); - } - - function formatDuration(startDate, endDate) { - if (!startDate || !endDate) return '-'; - const ms = endDate - startDate; - if (ms < 0) return '-'; - if (ms < 1000) return ms + ' ms'; - if (ms < 60000) return (ms / 1000).toFixed(2) + ' s'; - const m = Math.floor(ms / 60000); - const s = ((ms % 60000) / 1000).toFixed(0).padStart(2, '0'); - return `${m}m ${s}s`; - } - - async function fetchQueues() { - const data = await fetchJSON('/compute/queues'); - const queues = data.queues || []; - - const empty = document.getElementById('queue-list-empty'); - const container = document.getElementById('queue-list-container'); - const tbody = document.getElementById('queue-list-body'); - - if (queues.length === 0) { - empty.style.display = ''; - container.style.display = 'none'; - return; - } - - empty.style.display = 'none'; - tbody.innerHTML = ''; - - for (const q of queues) { - const id = q.queue_id ?? '-'; - const badge = q.state === 'cancelled' - ? '<span class="status-badge failure">cancelled</span>' - : q.state === 'draining' - ? '<span class="status-badge" style="background:color-mix(in srgb, var(--theme_warn) 15%, transparent);color:var(--theme_warn);">draining</span>' - : q.is_complete - ? '<span class="status-badge success">complete</span>' - : '<span class="status-badge" style="background:color-mix(in srgb, var(--theme_p0) 15%, transparent);color:var(--theme_p0);">active</span>'; - const token = q.queue_token - ? `<span class="detail-mono">${escapeHtml(q.queue_token)}</span>` - : '<span style="color:var(--theme_faint);">-</span>'; - - const tr = document.createElement('tr'); - tr.innerHTML = ` - <td style="text-align: right; font-family: monospace; color: var(--theme_bright);">${escapeHtml(String(id))}</td> - <td style="text-align: center;">${badge}</td> - <td style="text-align: right;">${q.active_count ?? 0}</td> - <td style="text-align: right; color: var(--theme_ok);">${q.completed_count ?? 0}</td> - <td style="text-align: right; color: var(--theme_fail);">${q.failed_count ?? 0}</td> - <td style="text-align: right; color: var(--theme_warn);">${q.abandoned_count ?? 0}</td> - <td style="text-align: right; color: var(--theme_warn);">${q.cancelled_count ?? 0}</td> - <td>${token}</td> - `; - tbody.appendChild(tr); - } - - container.style.display = 'block'; - } - - async function fetchActionHistory() { - const data = await fetchJSON('/compute/jobs/history?limit=50'); - const entries = data.history || []; - - const empty = document.getElementById('action-history-empty'); - const container = document.getElementById('action-history-container'); - const tbody = document.getElementById('action-history-body'); - - if (entries.length === 0) { - empty.style.display = ''; - container.style.display = 'none'; - return; - } - - empty.style.display = 'none'; - tbody.innerHTML = ''; - - // Entries arrive oldest-first; reverse to show newest at top - for (const entry of [...entries].reverse()) { - const lsn = entry.lsn ?? '-'; - const succeeded = entry.succeeded; - const badge = succeeded == null - ? '<span class="status-badge" style="background:var(--theme_border_subtle);color:var(--theme_g1);">unknown</span>' - : succeeded - ? '<span class="status-badge success">ok</span>' - : '<span class="status-badge failure">failed</span>'; - const desc = entry.actionDescriptor || {}; - const fn = desc.Function || '-'; - const workerId = entry.workerId || '-'; - const actionId = entry.actionId || '-'; - - const startDate = filetimeToDate(entry.time_Running); - const endDate = filetimeToDate(entry.time_Completed ?? entry.time_Failed); - - const queueId = entry.queueId || 0; - const queueCell = queueId - ? `<a href="/compute/queues/${queueId}" style="color: var(--theme_ln); text-decoration: none; font-family: monospace;">${escapeHtml(String(queueId))}</a>` - : '<span style="color: var(--theme_faint);">-</span>'; - - const tr = document.createElement('tr'); - tr.innerHTML = ` - <td style="text-align: right; font-family: monospace; color: var(--theme_g1);">${escapeHtml(String(lsn))}</td> - <td style="text-align: right;">${queueCell}</td> - <td style="text-align: center;">${badge}</td> - <td style="color: var(--theme_bright);">${escapeHtml(fn)}</td> - <td style="text-align: right; font-size: 12px; white-space: nowrap; color: var(--theme_g1);">${formatTime(startDate)}</td> - <td style="text-align: right; font-size: 12px; white-space: nowrap; color: var(--theme_g1);">${formatTime(endDate)}</td> - <td style="text-align: right; font-size: 12px; white-space: nowrap;">${formatDuration(startDate, endDate)}</td> - <td style="font-family: monospace; font-size: 11px; color: var(--theme_g1);">${escapeHtml(workerId)}</td> - <td style="font-family: monospace; font-size: 11px; color: var(--theme_g1);">${escapeHtml(actionId)}</td> - `; - tbody.appendChild(tr); - } - - container.style.display = 'block'; - } - - async function updateDashboard() { - try { - await Promise.all([ - fetchHealth(), - fetchStats(), - fetchSysInfo(), - fetchWorkers(), - fetchQueues(), - fetchActionHistory() - ]); - - clearError(); - updateTimestamp(); - } catch (error) { - console.error('Error updating dashboard:', error); - showError(error.message); - } - } - - // Start updating - updateDashboard(); - setInterval(updateDashboard, REFRESH_INTERVAL); - </script> -</body> -</html> diff --git a/src/zenserver/frontend/html/compute/hub.html b/src/zenserver/frontend/html/compute/hub.html index b15b34577..41c80d3a3 100644 --- a/src/zenserver/frontend/html/compute/hub.html +++ b/src/zenserver/frontend/html/compute/hub.html @@ -83,7 +83,7 @@ } async function fetchStats() { - var data = await fetchJSON('/hub/stats'); + var data = await fetchJSON('/stats/hub'); var current = data.currentInstanceCount || 0; var max = data.maxInstanceCount || 0; diff --git a/src/zenserver/frontend/html/compute/index.html b/src/zenserver/frontend/html/compute/index.html index 9597fd7f3..aaa09aec0 100644 --- a/src/zenserver/frontend/html/compute/index.html +++ b/src/zenserver/frontend/html/compute/index.html @@ -1 +1 @@ -<meta http-equiv="refresh" content="0; url=compute.html" />
\ No newline at end of file +<meta http-equiv="refresh" content="0; url=/dashboard/?page=compute" />
\ No newline at end of file diff --git a/src/zenserver/frontend/html/compute/orchestrator.html b/src/zenserver/frontend/html/compute/orchestrator.html deleted file mode 100644 index d1a2bb015..000000000 --- a/src/zenserver/frontend/html/compute/orchestrator.html +++ /dev/null @@ -1,669 +0,0 @@ -<!DOCTYPE html> -<html lang="en"> -<head> - <meta charset="UTF-8"> - <meta name="viewport" content="width=device-width, initial-scale=1.0"> - <link rel="stylesheet" type="text/css" href="../zen.css" /> - <script src="../util/sanitize.js"></script> - <script src="../theme.js"></script> - <script src="../banner.js" defer></script> - <script src="../nav.js" defer></script> - <title>Zen Orchestrator Dashboard</title> - <style> - .agent-count { - display: flex; - align-items: center; - gap: 8px; - font-size: 14px; - padding: 8px 16px; - border-radius: 6px; - background: var(--theme_g3); - border: 1px solid var(--theme_g2); - } - - .agent-count .count { - font-size: 20px; - font-weight: 600; - color: var(--theme_bright); - } - </style> -</head> -<body> - <div class="container" style="max-width: 1400px; margin: 0 auto;"> - <zen-banner cluster-status="nominal" load="0" logo-src="../favicon.ico"></zen-banner> - <zen-nav> - <a href="/dashboard/">Home</a> - <a href="compute.html">Node</a> - <a href="orchestrator.html">Orchestrator</a> - </zen-nav> - <div class="header"> - <div> - <div class="timestamp">Last updated: <span id="last-update">Never</span></div> - </div> - <div class="agent-count"> - <span>Agents:</span> - <span class="count" id="agent-count">-</span> - </div> - </div> - - <div id="error-container"></div> - - <div class="card"> - <div class="card-title">Compute Agents</div> - <div id="empty-state" class="empty-state">No agents registered.</div> - <table id="agent-table" style="display: none;"> - <thead> - <tr> - <th style="width: 40px; text-align: center;">Health</th> - <th>Hostname</th> - <th style="text-align: right;">CPUs</th> - <th style="text-align: right;">CPU Usage</th> - <th style="text-align: right;">Memory</th> - <th style="text-align: right;">Queues</th> - <th style="text-align: right;">Pending</th> - <th style="text-align: right;">Running</th> - <th style="text-align: right;">Completed</th> - <th style="text-align: right;">Traffic</th> - <th style="text-align: right;">Last Seen</th> - </tr> - </thead> - <tbody id="agent-table-body"></tbody> - </table> - </div> - <div class="card" style="margin-top: 20px;"> - <div class="card-title">Connected Clients</div> - <div id="clients-empty" class="empty-state">No clients connected.</div> - <table id="clients-table" style="display: none;"> - <thead> - <tr> - <th style="width: 40px; text-align: center;">Health</th> - <th>Client ID</th> - <th>Hostname</th> - <th>Address</th> - <th style="text-align: right;">Last Seen</th> - </tr> - </thead> - <tbody id="clients-table-body"></tbody> - </table> - </div> - <div class="card" style="margin-top: 20px;"> - <div style="display: flex; align-items: center; gap: 12px; margin-bottom: 12px;"> - <div class="card-title" style="margin-bottom: 0;">Event History</div> - <div class="history-tabs"> - <button class="history-tab active" data-tab="workers" onclick="switchHistoryTab('workers')">Workers</button> - <button class="history-tab" data-tab="clients" onclick="switchHistoryTab('clients')">Clients</button> - </div> - </div> - <div id="history-panel-workers"> - <div id="history-empty" class="empty-state">No provisioning events recorded.</div> - <table id="history-table" style="display: none;"> - <thead> - <tr> - <th>Time</th> - <th>Event</th> - <th>Worker</th> - <th>Hostname</th> - </tr> - </thead> - <tbody id="history-table-body"></tbody> - </table> - </div> - <div id="history-panel-clients" style="display: none;"> - <div id="client-history-empty" class="empty-state">No client events recorded.</div> - <table id="client-history-table" style="display: none;"> - <thead> - <tr> - <th>Time</th> - <th>Event</th> - <th>Client</th> - <th>Hostname</th> - </tr> - </thead> - <tbody id="client-history-table-body"></tbody> - </table> - </div> - </div> - </div> - - <script> - const BASE_URL = window.location.origin; - const REFRESH_INTERVAL = 2000; - - function showError(message) { - document.getElementById('error-container').innerHTML = - '<div class="error">Error: ' + escapeHtml(message) + '</div>'; - } - - function clearError() { - document.getElementById('error-container').innerHTML = ''; - } - - function formatLastSeen(dtMs) { - if (dtMs == null) return '-'; - var seconds = Math.floor(dtMs / 1000); - if (seconds < 60) return seconds + 's ago'; - var minutes = Math.floor(seconds / 60); - if (minutes < 60) return minutes + 'm ' + (seconds % 60) + 's ago'; - var hours = Math.floor(minutes / 60); - return hours + 'h ' + (minutes % 60) + 'm ago'; - } - - function healthClass(dtMs, reachable) { - if (reachable === false) return 'health-red'; - if (dtMs == null) return 'health-red'; - var seconds = dtMs / 1000; - if (seconds < 30 && reachable === true) return 'health-green'; - if (seconds < 120) return 'health-yellow'; - return 'health-red'; - } - - function healthTitle(dtMs, reachable) { - var seenStr = dtMs != null ? 'Last seen ' + formatLastSeen(dtMs) : 'Never seen'; - if (reachable === true) return seenStr + ' · Reachable'; - if (reachable === false) return seenStr + ' · Unreachable'; - return seenStr + ' · Reachability unknown'; - } - - function formatCpuUsage(percent) { - if (percent == null || percent === 0) return '-'; - return percent.toFixed(1) + '%'; - } - - function formatMemory(usedBytes, totalBytes) { - if (!totalBytes) return '-'; - var usedGiB = usedBytes / (1024 * 1024 * 1024); - var totalGiB = totalBytes / (1024 * 1024 * 1024); - return usedGiB.toFixed(1) + ' / ' + totalGiB.toFixed(1) + ' GiB'; - } - - function formatBytes(bytes) { - if (!bytes) return '-'; - if (bytes < 1024) return bytes + ' B'; - if (bytes < 1024 * 1024) return (bytes / 1024).toFixed(1) + ' KiB'; - if (bytes < 1024 * 1024 * 1024) return (bytes / (1024 * 1024)).toFixed(1) + ' MiB'; - if (bytes < 1024 * 1024 * 1024 * 1024) return (bytes / (1024 * 1024 * 1024)).toFixed(1) + ' GiB'; - return (bytes / (1024 * 1024 * 1024 * 1024)).toFixed(1) + ' TiB'; - } - - function formatTraffic(recv, sent) { - if (!recv && !sent) return '-'; - return formatBytes(recv) + ' / ' + formatBytes(sent); - } - - function parseIpFromUri(uri) { - try { - var url = new URL(uri); - var host = url.hostname; - // Strip IPv6 brackets - if (host.startsWith('[') && host.endsWith(']')) host = host.slice(1, -1); - // Only handle IPv4 - var parts = host.split('.'); - if (parts.length !== 4) return null; - var octets = parts.map(Number); - if (octets.some(function(o) { return isNaN(o) || o < 0 || o > 255; })) return null; - return octets; - } catch (e) { - return null; - } - } - - function computeCidr(ips) { - if (ips.length === 0) return null; - if (ips.length === 1) return ips[0].join('.') + '/32'; - - // Convert each IP to a 32-bit integer - var ints = ips.map(function(o) { - return ((o[0] << 24) | (o[1] << 16) | (o[2] << 8) | o[3]) >>> 0; - }); - - // Find common prefix length by ANDing all identical high bits - var common = ~0 >>> 0; - for (var i = 1; i < ints.length; i++) { - // XOR to find differing bits, then mask away everything from the first difference down - var diff = (ints[0] ^ ints[i]) >>> 0; - if (diff !== 0) { - var bit = 31 - Math.floor(Math.log2(diff)); - var mask = bit > 0 ? ((~0 << (32 - bit)) >>> 0) : 0; - common = (common & mask) >>> 0; - } - } - - // Count leading ones in the common mask - var prefix = 0; - for (var b = 31; b >= 0; b--) { - if ((common >>> b) & 1) prefix++; - else break; - } - - // Network address - var net = (ints[0] & common) >>> 0; - var a = (net >>> 24) & 0xff; - var bv = (net >>> 16) & 0xff; - var c = (net >>> 8) & 0xff; - var d = net & 0xff; - return a + '.' + bv + '.' + c + '.' + d + '/' + prefix; - } - - function renderDashboard(data) { - var banner = document.querySelector('zen-banner'); - if (data.hostname) { - banner.setAttribute('tagline', 'Orchestrator \u2014 ' + data.hostname); - } - var workers = data.workers || []; - - document.getElementById('agent-count').textContent = workers.length; - - if (workers.length === 0) { - banner.setAttribute('cluster-status', 'degraded'); - banner.setAttribute('load', '0'); - } else { - banner.setAttribute('cluster-status', 'nominal'); - } - - var emptyState = document.getElementById('empty-state'); - var table = document.getElementById('agent-table'); - var tbody = document.getElementById('agent-table-body'); - - if (workers.length === 0) { - emptyState.style.display = ''; - table.style.display = 'none'; - } else { - emptyState.style.display = 'none'; - table.style.display = ''; - - tbody.innerHTML = ''; - var totalCpus = 0; - var totalWeightedCpuUsage = 0; - var totalMemUsed = 0; - var totalMemTotal = 0; - var totalQueues = 0; - var totalPending = 0; - var totalRunning = 0; - var totalCompleted = 0; - var totalBytesRecv = 0; - var totalBytesSent = 0; - var allIps = []; - for (var i = 0; i < workers.length; i++) { - var w = workers[i]; - var uri = w.uri || ''; - var dt = w.dt; - var dashboardUrl = uri + '/dashboard/compute/'; - - var id = w.id || ''; - - var hostname = w.hostname || ''; - var cpus = w.cpus || 0; - totalCpus += cpus; - if (cpus > 0 && typeof w.cpu_usage === 'number') { - totalWeightedCpuUsage += w.cpu_usage * cpus; - } - - var memTotal = w.memory_total || 0; - var memUsed = w.memory_used || 0; - totalMemTotal += memTotal; - totalMemUsed += memUsed; - - var activeQueues = w.active_queues || 0; - totalQueues += activeQueues; - - var actionsPending = w.actions_pending || 0; - var actionsRunning = w.actions_running || 0; - var actionsCompleted = w.actions_completed || 0; - totalPending += actionsPending; - totalRunning += actionsRunning; - totalCompleted += actionsCompleted; - - var bytesRecv = w.bytes_received || 0; - var bytesSent = w.bytes_sent || 0; - totalBytesRecv += bytesRecv; - totalBytesSent += bytesSent; - - var ip = parseIpFromUri(uri); - if (ip) allIps.push(ip); - - var reachable = w.reachable; - var hClass = healthClass(dt, reachable); - var hTitle = healthTitle(dt, reachable); - - var platform = w.platform || ''; - var badges = ''; - if (platform) { - var platColors = { windows: '#0078d4', wine: '#722f37', linux: '#e95420', macos: '#a2aaad' }; - var platColor = platColors[platform] || '#8b949e'; - badges += ' <span style="display:inline-block;padding:1px 6px;border-radius:10px;font-size:10px;font-weight:600;color:#fff;background:' + platColor + ';vertical-align:middle;margin-left:4px;">' + escapeHtml(platform) + '</span>'; - } - var provisioner = w.provisioner || ''; - if (provisioner) { - var provColors = { horde: '#8957e5', nomad: '#3fb950' }; - var provColor = provColors[provisioner] || '#8b949e'; - badges += ' <span style="display:inline-block;padding:1px 6px;border-radius:10px;font-size:10px;font-weight:600;color:#fff;background:' + provColor + ';vertical-align:middle;margin-left:4px;">' + escapeHtml(provisioner) + '</span>'; - } - - var tr = document.createElement('tr'); - tr.title = id; - tr.innerHTML = - '<td style="text-align: center;"><span class="health-dot ' + hClass + '" title="' + escapeHtml(hTitle) + '"></span></td>' + - '<td><a href="' + escapeHtml(dashboardUrl) + '" target="_blank">' + escapeHtml(hostname) + '</a>' + badges + '</td>' + - '<td style="text-align: right;">' + (cpus > 0 ? cpus : '-') + '</td>' + - '<td style="text-align: right;">' + formatCpuUsage(w.cpu_usage) + '</td>' + - '<td style="text-align: right;">' + formatMemory(memUsed, memTotal) + '</td>' + - '<td style="text-align: right;">' + (activeQueues > 0 ? activeQueues : '-') + '</td>' + - '<td style="text-align: right;">' + actionsPending + '</td>' + - '<td style="text-align: right;">' + actionsRunning + '</td>' + - '<td style="text-align: right;">' + actionsCompleted + '</td>' + - '<td style="text-align: right; font-size: 11px; color: var(--theme_g1);">' + formatTraffic(bytesRecv, bytesSent) + '</td>' + - '<td style="text-align: right; color: var(--theme_g1);">' + formatLastSeen(dt) + '</td>'; - tbody.appendChild(tr); - } - - var clusterLoad = totalCpus > 0 ? (totalWeightedCpuUsage / totalCpus) : 0; - banner.setAttribute('load', clusterLoad.toFixed(1)); - - // Total row - var cidr = computeCidr(allIps); - var totalTr = document.createElement('tr'); - totalTr.className = 'total-row'; - totalTr.innerHTML = - '<td></td>' + - '<td style="text-align: right; color: var(--theme_g1); text-transform: uppercase; font-size: 11px;">Total' + (cidr ? ' <span style="font-family: monospace; font-weight: normal;">' + escapeHtml(cidr) + '</span>' : '') + '</td>' + - '<td style="text-align: right;">' + totalCpus + '</td>' + - '<td></td>' + - '<td style="text-align: right;">' + formatMemory(totalMemUsed, totalMemTotal) + '</td>' + - '<td style="text-align: right;">' + totalQueues + '</td>' + - '<td style="text-align: right;">' + totalPending + '</td>' + - '<td style="text-align: right;">' + totalRunning + '</td>' + - '<td style="text-align: right;">' + totalCompleted + '</td>' + - '<td style="text-align: right; font-size: 11px;">' + formatTraffic(totalBytesRecv, totalBytesSent) + '</td>' + - '<td></td>'; - tbody.appendChild(totalTr); - } - - clearError(); - document.getElementById('last-update').textContent = new Date().toLocaleTimeString(); - - // Render provisioning history if present in WebSocket payload - if (data.events) { - renderProvisioningHistory(data.events); - } - - // Render connected clients if present - if (data.clients) { - renderClients(data.clients); - } - - // Render client history if present - if (data.client_events) { - renderClientHistory(data.client_events); - } - } - - function eventBadge(type) { - var colors = { joined: 'var(--theme_ok)', left: 'var(--theme_fail)', returned: 'var(--theme_warn)' }; - var labels = { joined: 'Joined', left: 'Left', returned: 'Returned' }; - var color = colors[type] || 'var(--theme_g1)'; - var label = labels[type] || type; - return '<span style="display:inline-block;padding:2px 8px;border-radius:4px;font-size:11px;font-weight:600;color:var(--theme_g4);background:' + color + ';">' + escapeHtml(label) + '</span>'; - } - - function formatTimestamp(ts) { - if (!ts) return '-'; - // CbObject DateTime serialized as ticks (100ns since 0001-01-01) or ISO string - var date; - if (typeof ts === 'number') { - // .NET-style ticks: convert to Unix ms - var unixMs = (ts - 621355968000000000) / 10000; - date = new Date(unixMs); - } else { - date = new Date(ts); - } - if (isNaN(date.getTime())) return '-'; - return date.toLocaleTimeString(); - } - - var activeHistoryTab = 'workers'; - - function switchHistoryTab(tab) { - activeHistoryTab = tab; - var tabs = document.querySelectorAll('.history-tab'); - for (var i = 0; i < tabs.length; i++) { - tabs[i].classList.toggle('active', tabs[i].getAttribute('data-tab') === tab); - } - document.getElementById('history-panel-workers').style.display = tab === 'workers' ? '' : 'none'; - document.getElementById('history-panel-clients').style.display = tab === 'clients' ? '' : 'none'; - } - - function renderProvisioningHistory(events) { - var emptyState = document.getElementById('history-empty'); - var table = document.getElementById('history-table'); - var tbody = document.getElementById('history-table-body'); - - if (!events || events.length === 0) { - emptyState.style.display = ''; - table.style.display = 'none'; - return; - } - - emptyState.style.display = 'none'; - table.style.display = ''; - tbody.innerHTML = ''; - - for (var i = 0; i < events.length; i++) { - var evt = events[i]; - var tr = document.createElement('tr'); - tr.innerHTML = - '<td style="color: var(--theme_g1);">' + formatTimestamp(evt.ts) + '</td>' + - '<td>' + eventBadge(evt.type) + '</td>' + - '<td>' + escapeHtml(evt.worker_id || '') + '</td>' + - '<td>' + escapeHtml(evt.hostname || '') + '</td>'; - tbody.appendChild(tr); - } - } - - function clientHealthClass(dtMs) { - if (dtMs == null) return 'health-red'; - var seconds = dtMs / 1000; - if (seconds < 30) return 'health-green'; - if (seconds < 120) return 'health-yellow'; - return 'health-red'; - } - - function renderClients(clients) { - var emptyState = document.getElementById('clients-empty'); - var table = document.getElementById('clients-table'); - var tbody = document.getElementById('clients-table-body'); - - if (!clients || clients.length === 0) { - emptyState.style.display = ''; - table.style.display = 'none'; - return; - } - - emptyState.style.display = 'none'; - table.style.display = ''; - tbody.innerHTML = ''; - - for (var i = 0; i < clients.length; i++) { - var c = clients[i]; - var dt = c.dt; - var hClass = clientHealthClass(dt); - var hTitle = dt != null ? 'Last seen ' + formatLastSeen(dt) : 'Never seen'; - - var sessionBadge = ''; - if (c.session_id) { - sessionBadge = ' <span style="font-family:monospace;font-size:10px;color:var(--theme_faint);" title="Session ' + escapeHtml(c.session_id) + '">' + escapeHtml(c.session_id.substring(0, 8)) + '</span>'; - } - - var tr = document.createElement('tr'); - tr.innerHTML = - '<td style="text-align: center;"><span class="health-dot ' + hClass + '" title="' + escapeHtml(hTitle) + '"></span></td>' + - '<td>' + escapeHtml(c.id || '') + sessionBadge + '</td>' + - '<td>' + escapeHtml(c.hostname || '') + '</td>' + - '<td style="font-family: monospace; font-size: 12px; color: var(--theme_g1);">' + escapeHtml(c.address || '') + '</td>' + - '<td style="text-align: right; color: var(--theme_g1);">' + formatLastSeen(dt) + '</td>'; - tbody.appendChild(tr); - } - } - - function clientEventBadge(type) { - var colors = { connected: 'var(--theme_ok)', disconnected: 'var(--theme_fail)', updated: 'var(--theme_warn)' }; - var labels = { connected: 'Connected', disconnected: 'Disconnected', updated: 'Updated' }; - var color = colors[type] || 'var(--theme_g1)'; - var label = labels[type] || type; - return '<span style="display:inline-block;padding:2px 8px;border-radius:4px;font-size:11px;font-weight:600;color:var(--theme_g4);background:' + color + ';">' + escapeHtml(label) + '</span>'; - } - - function renderClientHistory(events) { - var emptyState = document.getElementById('client-history-empty'); - var table = document.getElementById('client-history-table'); - var tbody = document.getElementById('client-history-table-body'); - - if (!events || events.length === 0) { - emptyState.style.display = ''; - table.style.display = 'none'; - return; - } - - emptyState.style.display = 'none'; - table.style.display = ''; - tbody.innerHTML = ''; - - for (var i = 0; i < events.length; i++) { - var evt = events[i]; - var tr = document.createElement('tr'); - tr.innerHTML = - '<td style="color: var(--theme_g1);">' + formatTimestamp(evt.ts) + '</td>' + - '<td>' + clientEventBadge(evt.type) + '</td>' + - '<td>' + escapeHtml(evt.client_id || '') + '</td>' + - '<td>' + escapeHtml(evt.hostname || '') + '</td>'; - tbody.appendChild(tr); - } - } - - // Fetch-based polling fallback - var pollTimer = null; - - async function fetchProvisioningHistory() { - try { - var response = await fetch(BASE_URL + '/orch/history?limit=50', { - headers: { 'Accept': 'application/json' } - }); - if (response.ok) { - var data = await response.json(); - renderProvisioningHistory(data.events || []); - } - } catch (e) { - console.error('Error fetching provisioning history:', e); - } - } - - async function fetchClients() { - try { - var response = await fetch(BASE_URL + '/orch/clients', { - headers: { 'Accept': 'application/json' } - }); - if (response.ok) { - var data = await response.json(); - renderClients(data.clients || []); - } - } catch (e) { - console.error('Error fetching clients:', e); - } - } - - async function fetchClientHistory() { - try { - var response = await fetch(BASE_URL + '/orch/clients/history?limit=50', { - headers: { 'Accept': 'application/json' } - }); - if (response.ok) { - var data = await response.json(); - renderClientHistory(data.client_events || []); - } - } catch (e) { - console.error('Error fetching client history:', e); - } - } - - async function fetchDashboard() { - var banner = document.querySelector('zen-banner'); - try { - var response = await fetch(BASE_URL + '/orch/agents', { - headers: { 'Accept': 'application/json' } - }); - - if (!response.ok) { - banner.setAttribute('cluster-status', 'degraded'); - throw new Error('HTTP ' + response.status + ': ' + response.statusText); - } - - renderDashboard(await response.json()); - fetchProvisioningHistory(); - fetchClients(); - fetchClientHistory(); - } catch (error) { - console.error('Error updating dashboard:', error); - showError(error.message); - banner.setAttribute('cluster-status', 'offline'); - } - } - - function startPolling() { - if (pollTimer) return; - fetchDashboard(); - pollTimer = setInterval(fetchDashboard, REFRESH_INTERVAL); - } - - function stopPolling() { - if (pollTimer) { - clearInterval(pollTimer); - pollTimer = null; - } - } - - // WebSocket connection with automatic reconnect and polling fallback - var ws = null; - - function connectWebSocket() { - var proto = window.location.protocol === 'https:' ? 'wss:' : 'ws:'; - ws = new WebSocket(proto + '//' + window.location.host + '/orch/ws'); - - ws.onopen = function() { - stopPolling(); - clearError(); - }; - - ws.onmessage = function(event) { - try { - renderDashboard(JSON.parse(event.data)); - } catch (e) { - console.error('WebSocket message parse error:', e); - } - }; - - ws.onclose = function() { - ws = null; - startPolling(); - setTimeout(connectWebSocket, 3000); - }; - - ws.onerror = function() { - // onclose will fire after onerror - }; - } - - // Fetch orchestrator hostname for the banner - fetch(BASE_URL + '/orch/status', { headers: { 'Accept': 'application/json' } }) - .then(function(r) { return r.ok ? r.json() : null; }) - .then(function(d) { - if (d && d.hostname) { - document.querySelector('zen-banner').setAttribute('tagline', 'Orchestrator \u2014 ' + d.hostname); - } - }) - .catch(function() {}); - - // Initial load via fetch, then try WebSocket - fetchDashboard(); - connectWebSocket(); - </script> -</body> -</html> diff --git a/src/zenserver/frontend/html/pages/builds.js b/src/zenserver/frontend/html/pages/builds.js index 6b3426378..c63d13b91 100644 --- a/src/zenserver/frontend/html/pages/builds.js +++ b/src/zenserver/frontend/html/pages/builds.js @@ -39,6 +39,7 @@ export class Page extends ZenPage _render_stats(stats) { + stats = this._merge_last_stats(stats); const grid = this._stats_grid; const safe = (obj, path) => path.split(".").reduce((a, b) => a && a[b], obj); @@ -49,39 +50,30 @@ export class Page extends ZenPage // Build Store tile { - const blobs = safe(stats, "store.blobs"); - const metadata = safe(stats, "store.metadata"); - if (blobs || metadata) - { - const tile = grid.tag().classify("card").classify("stats-tile"); - tile.tag().classify("card-title").text("Build Store"); - const columns = tile.tag().classify("tile-columns"); + const blobs = safe(stats, "store.blobs") || {}; + const metadata = safe(stats, "store.metadata") || {}; + const tile = grid.tag().classify("card").classify("stats-tile"); + tile.tag().classify("card-title").text("Build Store"); + const columns = tile.tag().classify("tile-columns"); - const left = columns.tag().classify("tile-metrics"); - this._metric(left, Friendly.bytes(safe(stats, "store.size.disk") || 0), "disk", true); - if (blobs) - { - this._metric(left, Friendly.sep(blobs.count || 0), "blobs"); - this._metric(left, Friendly.sep(blobs.readcount || 0), "blob reads"); - this._metric(left, Friendly.sep(blobs.writecount || 0), "blob writes"); - const blobHitRatio = (blobs.readcount || 0) > 0 - ? (((blobs.hitcount || 0) / blobs.readcount) * 100).toFixed(1) + "%" - : "-"; - this._metric(left, blobHitRatio, "blob hit ratio"); - } + const left = columns.tag().classify("tile-metrics"); + this._metric(left, Friendly.bytes(safe(stats, "store.size.disk") || 0), "disk", true); + this._metric(left, Friendly.sep(blobs.count || 0), "blobs"); + this._metric(left, Friendly.sep(blobs.readcount || 0), "blob reads"); + this._metric(left, Friendly.sep(blobs.writecount || 0), "blob writes"); + const blobHitRatio = (blobs.readcount || 0) > 0 + ? (((blobs.hitcount || 0) / blobs.readcount) * 100).toFixed(1) + "%" + : "-"; + this._metric(left, blobHitRatio, "blob hit ratio"); - const right = columns.tag().classify("tile-metrics"); - if (metadata) - { - this._metric(right, Friendly.sep(metadata.count || 0), "metadata entries", true); - this._metric(right, Friendly.sep(metadata.readcount || 0), "meta reads"); - this._metric(right, Friendly.sep(metadata.writecount || 0), "meta writes"); - const metaHitRatio = (metadata.readcount || 0) > 0 - ? (((metadata.hitcount || 0) / metadata.readcount) * 100).toFixed(1) + "%" - : "-"; - this._metric(right, metaHitRatio, "meta hit ratio"); - } - } + const right = columns.tag().classify("tile-metrics"); + this._metric(right, Friendly.sep(metadata.count || 0), "metadata entries", true); + this._metric(right, Friendly.sep(metadata.readcount || 0), "meta reads"); + this._metric(right, Friendly.sep(metadata.writecount || 0), "meta writes"); + const metaHitRatio = (metadata.readcount || 0) > 0 + ? (((metadata.hitcount || 0) / metadata.readcount) * 100).toFixed(1) + "%" + : "-"; + this._metric(right, metaHitRatio, "meta hit ratio"); } } diff --git a/src/zenserver/frontend/html/pages/cache.js b/src/zenserver/frontend/html/pages/cache.js index e0f6f73b6..683f7df4f 100644 --- a/src/zenserver/frontend/html/pages/cache.js +++ b/src/zenserver/frontend/html/pages/cache.js @@ -6,7 +6,7 @@ import { ZenPage } from "./page.js" import { Fetcher } from "../util/fetcher.js" import { Friendly } from "../util/friendly.js" import { Modal } from "../util/modal.js" -import { Table, Toolbar } from "../util/widgets.js" +import { Table, Toolbar, Pager, add_copy_button } from "../util/widgets.js" //////////////////////////////////////////////////////////////////////////////// export class Page extends ZenPage @@ -44,8 +44,6 @@ export class Page extends ZenPage // Cache Namespaces var section = this._collapsible_section("Cache Namespaces"); - section.tag().classify("dropall").text("drop-all").on_click(() => this.drop_all()); - var columns = [ "namespace", "dir", @@ -56,31 +54,30 @@ export class Page extends ZenPage "actions", ]; - var zcache_info = await new Fetcher().resource("/z$/").json(); this._cache_table = section.add_widget(Table, columns, Table.Flag_FitLeft|Table.Flag_PackRight|Table.Flag_AlignNumeric); - for (const namespace of zcache_info["Namespaces"] || []) - { - new Fetcher().resource(`/z$/${namespace}/`).json().then((data) => { - const row = this._cache_table.add_row( - "", - data["Configuration"]["RootDir"], - data["Buckets"].length, - data["EntryCount"], - Friendly.bytes(data["StorageSize"].DiskSize), - Friendly.bytes(data["StorageSize"].MemorySize) - ); - var cell = row.get_cell(0); - cell.tag().text(namespace).on_click(() => this.view_namespace(namespace)); - - cell = row.get_cell(-1); - const action_tb = new Toolbar(cell, true); - action_tb.left().add("view").on_click(() => this.view_namespace(namespace)); - action_tb.left().add("drop").on_click(() => this.drop_namespace(namespace)); - - row.attr("zs_name", namespace); - }); - } + this._cache_pager = new Pager(section, 25, () => this._render_cache_page(), + Pager.make_search_fn(() => this._cache_data, item => item.namespace)); + const cache_drop_link = document.createElement("span"); + cache_drop_link.className = "dropall zen_action"; + cache_drop_link.style.position = "static"; + cache_drop_link.textContent = "drop-all"; + cache_drop_link.addEventListener("click", () => this.drop_all()); + this._cache_pager.prepend(cache_drop_link); + + const loading = Pager.loading(section); + const zcache_info = await new Fetcher().resource("/z$/").json(); + const namespaces = zcache_info["Namespaces"] || []; + const results = await Promise.allSettled( + namespaces.map(ns => new Fetcher().resource(`/z$/${ns}/`).json().then(data => ({ namespace: ns, data }))) + ); + this._cache_data = results + .filter(r => r.status === "fulfilled") + .map(r => r.value) + .sort((a, b) => a.namespace.localeCompare(b.namespace)); + this._cache_pager.set_total(this._cache_data.length); + this._render_cache_page(); + loading.remove(); // Namespace detail area (inside namespaces section so it collapses together) this._namespace_host = section; @@ -95,51 +92,79 @@ export class Page extends ZenPage } } + _render_cache_page() + { + const { start, end } = this._cache_pager.page_range(); + this._cache_table.clear(start); + for (let i = start; i < end; i++) + { + const item = this._cache_data[i]; + const data = item.data; + const row = this._cache_table.add_row( + "", + data["Configuration"]["RootDir"], + data["Buckets"].length, + data["EntryCount"], + Friendly.bytes(data["StorageSize"].DiskSize), + Friendly.bytes(data["StorageSize"].MemorySize) + ); + + const cell = row.get_cell(0); + cell.tag().text(item.namespace).on_click(() => this.view_namespace(item.namespace)); + add_copy_button(cell.inner(), item.namespace); + add_copy_button(row.get_cell(1).inner(), data["Configuration"]["RootDir"]); + + const action_cell = row.get_cell(-1); + const action_tb = new Toolbar(action_cell, true); + action_tb.left().add("view").on_click(() => this.view_namespace(item.namespace)); + action_tb.left().add("drop").on_click(() => this.drop_namespace(item.namespace)); + + row.attr("zs_name", item.namespace); + } + } + _render_stats(stats) { + stats = this._merge_last_stats(stats); const safe = (obj, path) => path.split(".").reduce((a, b) => a && a[b], obj); const grid = this._stats_grid; - this._last_stats = stats; grid.inner().innerHTML = ""; // Store I/O tile { - const store = safe(stats, "cache.store"); - if (store) - { - const tile = grid.tag().classify("card").classify("stats-tile").classify("stats-tile-detailed"); - if (this._selected_category === "store") tile.classify("stats-tile-selected"); - tile.on_click(() => this._select_category("store")); - tile.tag().classify("card-title").text("Store I/O"); - const columns = tile.tag().classify("tile-columns"); - - const left = columns.tag().classify("tile-metrics"); - const storeHits = store.hits || 0; - const storeMisses = store.misses || 0; - const storeTotal = storeHits + storeMisses; - const storeRatio = storeTotal > 0 ? ((storeHits / storeTotal) * 100).toFixed(1) + "%" : "-"; - this._metric(left, storeRatio, "store hit ratio", true); - this._metric(left, Friendly.sep(storeHits), "hits"); - this._metric(left, Friendly.sep(storeMisses), "misses"); - this._metric(left, Friendly.sep(store.writes || 0), "writes"); - this._metric(left, Friendly.sep(store.rejected_reads || 0), "rejected reads"); - this._metric(left, Friendly.sep(store.rejected_writes || 0), "rejected writes"); - - const right = columns.tag().classify("tile-metrics"); - const readRateMean = safe(store, "read.bytes.rate_mean") || 0; - const readRate1 = safe(store, "read.bytes.rate_1") || 0; - const readRate5 = safe(store, "read.bytes.rate_5") || 0; - const writeRateMean = safe(store, "write.bytes.rate_mean") || 0; - const writeRate1 = safe(store, "write.bytes.rate_1") || 0; - const writeRate5 = safe(store, "write.bytes.rate_5") || 0; - this._metric(right, Friendly.bytes(readRateMean) + "/s", "read rate (mean)", true); - this._metric(right, Friendly.bytes(readRate1) + "/s", "read rate (1m)"); - this._metric(right, Friendly.bytes(readRate5) + "/s", "read rate (5m)"); - this._metric(right, Friendly.bytes(writeRateMean) + "/s", "write rate (mean)"); - this._metric(right, Friendly.bytes(writeRate1) + "/s", "write rate (1m)"); - this._metric(right, Friendly.bytes(writeRate5) + "/s", "write rate (5m)"); - } + const store = safe(stats, "cache.store") || {}; + const tile = grid.tag().classify("card").classify("stats-tile").classify("stats-tile-detailed"); + if (this._selected_category === "store") tile.classify("stats-tile-selected"); + tile.on_click(() => this._select_category("store")); + tile.tag().classify("card-title").text("Store I/O"); + const columns = tile.tag().classify("tile-columns"); + + const left = columns.tag().classify("tile-metrics"); + const storeHits = store.hits || 0; + const storeMisses = store.misses || 0; + const storeTotal = storeHits + storeMisses; + const storeRatio = storeTotal > 0 ? ((storeHits / storeTotal) * 100).toFixed(1) + "%" : "-"; + this._metric(left, storeRatio, "store hit ratio", true); + this._metric(left, Friendly.sep(storeHits), "hits"); + this._metric(left, Friendly.sep(storeMisses), "misses"); + this._metric(left, Friendly.sep(store.writes || 0), "writes"); + this._metric(left, Friendly.sep(store.rejected_reads || 0), "rejected reads"); + this._metric(left, Friendly.sep(store.rejected_writes || 0), "rejected writes"); + + const right = columns.tag().classify("tile-metrics"); + const readRateMean = safe(store, "read.bytes.rate_mean") || 0; + const readRate1 = safe(store, "read.bytes.rate_1") || 0; + const readRate5 = safe(store, "read.bytes.rate_5") || 0; + const writeRateMean = safe(store, "write.bytes.rate_mean") || 0; + const writeRate1 = safe(store, "write.bytes.rate_1") || 0; + const writeRate5 = safe(store, "write.bytes.rate_5") || 0; + this._metric(right, Friendly.bytes(readRateMean) + "/s", "read rate (mean)", true); + this._metric(right, Friendly.bytes(readRate1) + "/s", "read rate (1m)"); + this._metric(right, Friendly.bytes(readRate5) + "/s", "read rate (5m)"); + this._metric(right, Friendly.bytes(writeRateMean) + "/s", "write rate (mean)"); + this._metric(right, Friendly.bytes(writeRate1) + "/s", "write rate (1m)"); + this._metric(right, Friendly.bytes(writeRate5) + "/s", "write rate (5m)"); } // Hit/Miss tile @@ -175,89 +200,83 @@ export class Page extends ZenPage // HTTP Requests tile { - const req = safe(stats, "requests"); - if (req) - { - const tile = grid.tag().classify("card").classify("stats-tile"); - tile.tag().classify("card-title").text("HTTP Requests"); - const columns = tile.tag().classify("tile-columns"); + const req = safe(stats, "requests") || {}; + const tile = grid.tag().classify("card").classify("stats-tile"); + tile.tag().classify("card-title").text("HTTP Requests"); + const columns = tile.tag().classify("tile-columns"); - const left = columns.tag().classify("tile-metrics"); - const reqData = req.requests || req; - this._metric(left, Friendly.sep(reqData.count || 0), "total requests", true); - if (reqData.rate_mean > 0) - { - this._metric(left, Friendly.sep(reqData.rate_mean, 1) + "/s", "req/sec (mean)"); - } - if (reqData.rate_1 > 0) - { - this._metric(left, Friendly.sep(reqData.rate_1, 1) + "/s", "req/sec (1m)"); - } - if (reqData.rate_5 > 0) - { - this._metric(left, Friendly.sep(reqData.rate_5, 1) + "/s", "req/sec (5m)"); - } - if (reqData.rate_15 > 0) - { - this._metric(left, Friendly.sep(reqData.rate_15, 1) + "/s", "req/sec (15m)"); - } - const badRequests = safe(stats, "cache.badrequestcount") || 0; - this._metric(left, Friendly.sep(badRequests), "bad requests"); + const left = columns.tag().classify("tile-metrics"); + const reqData = req.requests || req; + this._metric(left, Friendly.sep(reqData.count || 0), "total requests", true); + if (reqData.rate_mean > 0) + { + this._metric(left, Friendly.sep(reqData.rate_mean, 1) + "/s", "req/sec (mean)"); + } + if (reqData.rate_1 > 0) + { + this._metric(left, Friendly.sep(reqData.rate_1, 1) + "/s", "req/sec (1m)"); + } + if (reqData.rate_5 > 0) + { + this._metric(left, Friendly.sep(reqData.rate_5, 1) + "/s", "req/sec (5m)"); + } + if (reqData.rate_15 > 0) + { + this._metric(left, Friendly.sep(reqData.rate_15, 1) + "/s", "req/sec (15m)"); + } + const badRequests = safe(stats, "cache.badrequestcount") || 0; + this._metric(left, Friendly.sep(badRequests), "bad requests"); - const right = columns.tag().classify("tile-metrics"); - this._metric(right, Friendly.duration(reqData.t_avg || 0), "avg latency", true); - if (reqData.t_p75) - { - this._metric(right, Friendly.duration(reqData.t_p75), "p75"); - } - if (reqData.t_p95) - { - this._metric(right, Friendly.duration(reqData.t_p95), "p95"); - } - if (reqData.t_p99) - { - this._metric(right, Friendly.duration(reqData.t_p99), "p99"); - } - if (reqData.t_p999) - { - this._metric(right, Friendly.duration(reqData.t_p999), "p999"); - } - if (reqData.t_max) - { - this._metric(right, Friendly.duration(reqData.t_max), "max"); - } + const right = columns.tag().classify("tile-metrics"); + this._metric(right, Friendly.duration(reqData.t_avg || 0), "avg latency", true); + if (reqData.t_p75) + { + this._metric(right, Friendly.duration(reqData.t_p75), "p75"); + } + if (reqData.t_p95) + { + this._metric(right, Friendly.duration(reqData.t_p95), "p95"); + } + if (reqData.t_p99) + { + this._metric(right, Friendly.duration(reqData.t_p99), "p99"); + } + if (reqData.t_p999) + { + this._metric(right, Friendly.duration(reqData.t_p999), "p999"); + } + if (reqData.t_max) + { + this._metric(right, Friendly.duration(reqData.t_max), "max"); } } // RPC tile { - const rpc = safe(stats, "cache.rpc"); - if (rpc) - { - const tile = grid.tag().classify("card").classify("stats-tile"); - tile.tag().classify("card-title").text("RPC"); - const columns = tile.tag().classify("tile-columns"); + const rpc = safe(stats, "cache.rpc") || {}; + const tile = grid.tag().classify("card").classify("stats-tile"); + tile.tag().classify("card-title").text("RPC"); + const columns = tile.tag().classify("tile-columns"); - const left = columns.tag().classify("tile-metrics"); - this._metric(left, Friendly.sep(rpc.count || 0), "rpc calls", true); - this._metric(left, Friendly.sep(rpc.ops || 0), "batch ops"); + const left = columns.tag().classify("tile-metrics"); + this._metric(left, Friendly.sep(rpc.count || 0), "rpc calls", true); + this._metric(left, Friendly.sep(rpc.ops || 0), "batch ops"); - const right = columns.tag().classify("tile-metrics"); - if (rpc.records) - { - this._metric(right, Friendly.sep(rpc.records.count || 0), "record calls"); - this._metric(right, Friendly.sep(rpc.records.ops || 0), "record ops"); - } - if (rpc.values) - { - this._metric(right, Friendly.sep(rpc.values.count || 0), "value calls"); - this._metric(right, Friendly.sep(rpc.values.ops || 0), "value ops"); - } - if (rpc.chunks) - { - this._metric(right, Friendly.sep(rpc.chunks.count || 0), "chunk calls"); - this._metric(right, Friendly.sep(rpc.chunks.ops || 0), "chunk ops"); - } + const right = columns.tag().classify("tile-metrics"); + if (rpc.records) + { + this._metric(right, Friendly.sep(rpc.records.count || 0), "record calls"); + this._metric(right, Friendly.sep(rpc.records.ops || 0), "record ops"); + } + if (rpc.values) + { + this._metric(right, Friendly.sep(rpc.values.count || 0), "value calls"); + this._metric(right, Friendly.sep(rpc.values.ops || 0), "value ops"); + } + if (rpc.chunks) + { + this._metric(right, Friendly.sep(rpc.chunks.count || 0), "chunk calls"); + this._metric(right, Friendly.sep(rpc.chunks.ops || 0), "chunk ops"); } } @@ -280,7 +299,7 @@ export class Page extends ZenPage this._metric(right, safe(stats, "cid.size.large") != null ? Friendly.bytes(safe(stats, "cid.size.large")) : "-", "cid large"); } - // Upstream tile (only if upstream is active) + // Upstream tile (only shown when upstream is active) { const upstream = safe(stats, "upstream"); if (upstream) @@ -611,10 +630,9 @@ export class Page extends ZenPage async drop_all() { const drop = async () => { - for (const row of this._cache_table) + for (const item of this._cache_data || []) { - const namespace = row.attr("zs_name"); - await new Fetcher().resource("z$", namespace).delete(); + await new Fetcher().resource("z$", item.namespace).delete(); } this.reload(); }; diff --git a/src/zenserver/frontend/html/pages/compute.js b/src/zenserver/frontend/html/pages/compute.js index 2eb4d4e9b..c2257029e 100644 --- a/src/zenserver/frontend/html/pages/compute.js +++ b/src/zenserver/frontend/html/pages/compute.js @@ -5,7 +5,7 @@ import { ZenPage } from "./page.js" import { Fetcher } from "../util/fetcher.js" import { Friendly } from "../util/friendly.js" -import { Table } from "../util/widgets.js" +import { Table, add_copy_button } from "../util/widgets.js" const MAX_HISTORY_POINTS = 60; @@ -352,8 +352,9 @@ export class Page extends ZenPage id, ); - // Worker ID column: monospace for hex readability + // Worker ID column: monospace for hex readability, copy button row.get_cell(5).style("fontFamily", "'SF Mono', 'Cascadia Mono', Consolas, 'DejaVu Sans Mono', monospace"); + add_copy_button(row.get_cell(5).inner(), id); // Make name clickable to expand detail const cell = row.get_cell(0); @@ -524,7 +525,7 @@ export class Page extends ZenPage : q.state === "draining" ? "draining" : q.is_complete ? "complete" : "active"; - this._queues_table.add_row( + const qrow = this._queues_table.add_row( id, status, String(q.active_count ?? 0), @@ -534,6 +535,10 @@ export class Page extends ZenPage String(q.cancelled_count ?? 0), q.queue_token || "-", ); + if (q.queue_token) + { + add_copy_button(qrow.get_cell(7).inner(), q.queue_token); + } } } @@ -590,7 +595,9 @@ export class Page extends ZenPage // use monospace for readability, and show full value on hover const mono = "'SF Mono', 'Cascadia Mono', Consolas, 'DejaVu Sans Mono', monospace"; row.get_cell(7).style("textAlign", "right").style("fontFamily", mono).attr("title", workerId); + if (workerId !== "-") { add_copy_button(row.get_cell(7).inner(), workerId); } row.get_cell(8).style("textAlign", "right").style("fontFamily", mono).attr("title", actionId); + if (actionId !== "-") { add_copy_button(row.get_cell(8).inner(), actionId); } } } diff --git a/src/zenserver/frontend/html/pages/entry.js b/src/zenserver/frontend/html/pages/entry.js index 1e4c82e3f..e381f4a71 100644 --- a/src/zenserver/frontend/html/pages/entry.js +++ b/src/zenserver/frontend/html/pages/entry.js @@ -168,7 +168,7 @@ export class Page extends ZenPage if (key === "cook.artifacts") { action_tb.left().add("view-raw").on_click(() => { - window.location = "/" + ["prj", project, "oplog", oplog, value+".json"].join("/"); + window.open("/" + ["prj", project, "oplog", oplog, value+".json"].join("/"), "_self"); }); } diff --git a/src/zenserver/frontend/html/pages/hub.js b/src/zenserver/frontend/html/pages/hub.js index c6f96d496..b2bca9324 100644 --- a/src/zenserver/frontend/html/pages/hub.js +++ b/src/zenserver/frontend/html/pages/hub.js @@ -6,6 +6,7 @@ import { ZenPage } from "./page.js" import { Fetcher } from "../util/fetcher.js" import { Friendly } from "../util/friendly.js" import { Modal } from "../util/modal.js" +import { flash_highlight, copy_button } from "../util/widgets.js" //////////////////////////////////////////////////////////////////////////////// const STABLE_STATES = new Set(["provisioned", "hibernated", "crashed"]); @@ -20,6 +21,7 @@ function _btn_enabled(state, action) if (action === "hibernate") { return state === "provisioned"; } if (action === "wake") { return state === "hibernated"; } if (action === "deprovision") { return _is_actionable(state); } + if (action === "obliterate") { return _is_actionable(state); } return false; } @@ -96,20 +98,24 @@ export class Page extends ZenPage this._bulk_label.className = "module-bulk-label"; this._btn_bulk_hibernate = _make_bulk_btn("\u23F8", "Hibernate", () => this._exec_action("hibernate", [...this._selected])); this._btn_bulk_wake = _make_bulk_btn("\u25B6", "Wake", () => this._exec_action("wake", [...this._selected])); - this._btn_bulk_deprov = _make_bulk_btn("\u2715", "Deprovision",() => this._confirm_deprovision([...this._selected])); + this._btn_bulk_deprov = _make_bulk_btn("\u23F9", "Deprovision",() => this._confirm_deprovision([...this._selected])); + this._btn_bulk_oblit = _make_bulk_btn("\uD83D\uDD25", "Obliterate", () => this._confirm_obliterate([...this._selected])); const bulk_sep = document.createElement("div"); bulk_sep.className = "module-bulk-sep"; this._btn_hibernate_all = _make_bulk_btn("\u23F8", "Hibernate All", () => this._confirm_all("hibernate", "Hibernate All")); this._btn_wake_all = _make_bulk_btn("\u25B6", "Wake All", () => this._confirm_all("wake", "Wake All")); - this._btn_deprov_all = _make_bulk_btn("\u2715", "Deprovision All",() => this._confirm_all("deprovision", "Deprovision All")); + this._btn_deprov_all = _make_bulk_btn("\u23F9", "Deprovision All",() => this._confirm_all("deprovision", "Deprovision All")); + this._btn_oblit_all = _make_bulk_btn("\uD83D\uDD25", "Obliterate All", () => this._confirm_obliterate(this._modules_data.map(m => m.moduleId))); this._bulk_bar.appendChild(this._bulk_label); this._bulk_bar.appendChild(this._btn_bulk_hibernate); this._bulk_bar.appendChild(this._btn_bulk_wake); this._bulk_bar.appendChild(this._btn_bulk_deprov); + this._bulk_bar.appendChild(this._btn_bulk_oblit); this._bulk_bar.appendChild(bulk_sep); this._bulk_bar.appendChild(this._btn_hibernate_all); this._bulk_bar.appendChild(this._btn_wake_all); this._bulk_bar.appendChild(this._btn_deprov_all); + this._bulk_bar.appendChild(this._btn_oblit_all); mod_host.appendChild(this._bulk_bar); // Module table @@ -152,6 +158,38 @@ export class Page extends ZenPage this._btn_next.className = "module-pager-btn"; this._btn_next.textContent = "Next \u2192"; this._btn_next.addEventListener("click", () => this._go_page(this._page + 1)); + this._btn_provision = _make_bulk_btn("+", "Provision", () => this._show_provision_modal()); + this._btn_obliterate = _make_bulk_btn("\uD83D\uDD25", "Obliterate", () => this._show_obliterate_modal()); + this._search_input = document.createElement("input"); + this._search_input.type = "text"; + this._search_input.className = "module-pager-search"; + this._search_input.placeholder = "Search module\u2026"; + this._search_input.addEventListener("keydown", (e) => + { + if (e.key === "Enter") + { + const term = this._search_input.value.trim().toLowerCase(); + if (!term) { return; } + const idx = this._modules_data.findIndex(m => + (m.moduleId || "").toLowerCase().includes(term) + ); + if (idx >= 0) + { + const id = this._modules_data[idx].moduleId; + this._navigate_to_module(id); + this._flash_module(id); + } + else + { + this._search_input.style.outline = "2px solid var(--theme_fail)"; + setTimeout(() => { this._search_input.style.outline = ""; }, 1000); + } + } + }); + + pager.appendChild(this._btn_provision); + pager.appendChild(this._btn_obliterate); + pager.appendChild(this._search_input); pager.appendChild(this._btn_prev); pager.appendChild(this._pager_label); pager.appendChild(this._btn_next); @@ -164,8 +202,11 @@ export class Page extends ZenPage this._row_cache = new Map(); // moduleId → row refs, for in-place DOM updates this._updating = false; this._page = 0; - this._page_size = 50; + this._page_size = 25; this._expanded = new Set(); // moduleIds with open metrics panel + this._pending_highlight = null; // moduleId to navigate+flash after next poll + this._pending_highlight_timer = null; + this._loading = mod_section.tag().classify("pager-loading").text("Loading\u2026").inner(); await this._update(); this._poll_timer = setInterval(() => this._update(), 2000); @@ -184,6 +225,15 @@ export class Page extends ZenPage this._render_capacity(stats); this._render_modules(status); + if (this._loading) { this._loading.remove(); this._loading = null; } + if (this._pending_highlight && this._module_map.has(this._pending_highlight)) + { + const id = this._pending_highlight; + this._pending_highlight = null; + clearTimeout(this._pending_highlight_timer); + this._navigate_to_module(id); + this._flash_module(id); + } } catch (e) { /* service unavailable */ } finally { this._updating = false; } @@ -234,10 +284,12 @@ export class Page extends ZenPage const left = columns.tag().classify("tile-metrics"); this._metric(left, Friendly.bytes(disk_used), "disk used", true); + this._metric(left, Friendly.bytes(machine.disk_total_bytes), "disk total"); if (disk_limit > 0) { this._metric(left, Friendly.bytes(disk_limit), "disk limit"); } const right = columns.tag().classify("tile-metrics"); this._metric(right, Friendly.bytes(mem_used), "memory used", true); + this._metric(right, Friendly.bytes(machine.memory_total_mib * 1024 * 1024), "memory total"); if (mem_limit > 0) { this._metric(right, Friendly.bytes(mem_limit), "memory limit"); } if (machine.virtual_memory_total_mib > 0) { @@ -293,7 +345,7 @@ export class Page extends ZenPage row.idx.textContent = i + 1; row.cb.checked = this._selected.has(id); row.dot.setAttribute("data-state", state); - if (state === "deprovisioning") + if (state === "deprovisioning" || state === "obliterating") { row.dot.setAttribute("data-prev-state", prev); } @@ -303,6 +355,7 @@ export class Page extends ZenPage } row.state_text.nodeValue = state; row.port_text.nodeValue = m.port ? String(m.port) : ""; + row.copy_port_btn.style.display = m.port ? "" : "none"; if (m.state_change_time) { const state_label = state.charAt(0).toUpperCase() + state.slice(1); @@ -315,6 +368,7 @@ export class Page extends ZenPage row.btn_hibernate.disabled = !_btn_enabled(state, "hibernate"); row.btn_wake.disabled = !_btn_enabled(state, "wake"); row.btn_deprov.disabled = !_btn_enabled(state, "deprovision"); + row.btn_oblit.disabled = !_btn_enabled(state, "obliterate"); if (m.process_metrics) { @@ -374,6 +428,8 @@ export class Page extends ZenPage id_wrap.style.cssText = "display:inline-flex;align-items:center;font-family:monospace;font-size:14px;"; id_wrap.appendChild(btn_expand); id_wrap.appendChild(document.createTextNode("\u00A0" + id)); + const copy_id_btn = copy_button(id); + id_wrap.appendChild(copy_id_btn); td_id.appendChild(id_wrap); tr.appendChild(td_id); @@ -381,7 +437,7 @@ export class Page extends ZenPage const dot = document.createElement("span"); dot.className = "module-state-dot"; dot.setAttribute("data-state", state); - if (state === "deprovisioning") + if (state === "deprovisioning" || state === "obliterating") { dot.setAttribute("data-prev-state", prev); } @@ -395,23 +451,29 @@ export class Page extends ZenPage td_port.style.cssText = "font-variant-numeric:tabular-nums;"; const port_node = document.createTextNode(port ? String(port) : ""); td_port.appendChild(port_node); + const copy_port_btn = copy_button(() => port_node.nodeValue); + copy_port_btn.style.display = port ? "" : "none"; + td_port.appendChild(copy_port_btn); tr.appendChild(td_port); const td_action = document.createElement("td"); td_action.className = "module-action-cell"; const [wrap_o, btn_o] = _make_action_btn("\u2197", "Open dashboard", () => { - window.open(`${window.location.protocol}//${window.location.hostname}:${port}`, "_blank"); + window.open(`/hub/proxy/${port}/dashboard/`, "_blank"); }); btn_o.disabled = state !== "provisioned"; const [wrap_h, btn_h] = _make_action_btn("\u23F8", "Hibernate", () => this._post_module_action(id, "hibernate").then(() => this._update())); const [wrap_w, btn_w] = _make_action_btn("\u25B6", "Wake", () => this._post_module_action(id, "wake").then(() => this._update())); - const [wrap_d, btn_d] = _make_action_btn("\u2715", "Deprovision", () => this._confirm_deprovision([id])); + const [wrap_d, btn_d] = _make_action_btn("\u23F9", "Deprovision", () => this._confirm_deprovision([id])); + const [wrap_x, btn_x] = _make_action_btn("\uD83D\uDD25", "Obliterate", () => this._confirm_obliterate([id])); btn_h.disabled = !_btn_enabled(state, "hibernate"); btn_w.disabled = !_btn_enabled(state, "wake"); btn_d.disabled = !_btn_enabled(state, "deprovision"); + btn_x.disabled = !_btn_enabled(state, "obliterate"); td_action.appendChild(wrap_h); td_action.appendChild(wrap_w); td_action.appendChild(wrap_d); + td_action.appendChild(wrap_x); td_action.appendChild(wrap_o); tr.appendChild(td_action); @@ -472,7 +534,7 @@ export class Page extends ZenPage metrics_td.appendChild(metrics_grid); metrics_tr.appendChild(metrics_td); - row = { tr, metrics_tr, idx: td_idx, cb, dot, state_text: state_node, port_text: port_node, btn_expand, btn_open: btn_o, btn_hibernate: btn_h, btn_wake: btn_w, btn_deprov: btn_d, metric_nodes, state_since_node, state_age_node, state_since_label, state_age_label }; + row = { tr, metrics_tr, idx: td_idx, cb, dot, state_text: state_node, port_text: port_node, copy_port_btn, btn_expand, btn_open: btn_o, btn_hibernate: btn_h, btn_wake: btn_w, btn_deprov: btn_d, btn_oblit: btn_x, metric_nodes, state_since_node, state_age_node, state_since_label, state_age_label }; this._row_cache.set(id, row); } @@ -582,6 +644,7 @@ export class Page extends ZenPage this._btn_bulk_hibernate.disabled = !this._all_selected_in_state("provisioned"); this._btn_bulk_wake.disabled = !this._all_selected_in_state("hibernated"); this._btn_bulk_deprov.disabled = selected === 0; + this._btn_bulk_oblit.disabled = selected === 0; this._select_all_cb.disabled = total === 0; this._select_all_cb.checked = selected === total && total > 0; @@ -594,6 +657,7 @@ export class Page extends ZenPage this._btn_hibernate_all.disabled = empty; this._btn_wake_all.disabled = empty; this._btn_deprov_all.disabled = empty; + this._btn_oblit_all.disabled = empty; } _on_select_all() @@ -639,6 +703,35 @@ export class Page extends ZenPage .option("Deprovision", () => this._exec_action("deprovision", ids)); } + _confirm_obliterate(ids) + { + const warn = "\uD83D\uDD25 WARNING: This action is irreversible! \uD83D\uDD25"; + const detail = "All local and backend data will be permanently destroyed.\nThis cannot be undone."; + let message; + if (ids.length === 1) + { + const id = ids[0]; + const state = this._module_state(id) || "unknown"; + message = `${warn}\n\n${detail}\n\nModule ID: ${id}\nCurrent state: ${state}`; + } + else + { + message = `${warn}\n\nObliterate ${ids.length} modules.\n\n${detail}`; + } + + new Modal() + .title("\uD83D\uDD25 Obliterate") + .message(message) + .option("Cancel", null) + .option("\uD83D\uDD25 Obliterate", () => this._exec_obliterate(ids)); + } + + async _exec_obliterate(ids) + { + await Promise.allSettled(ids.map(id => fetch(`/hub/modules/${encodeURIComponent(id)}`, { method: "DELETE" }))); + await this._update(); + } + _confirm_all(action, label) { // Capture IDs at modal-open time so action targets the displayed list @@ -663,4 +756,191 @@ export class Page extends ZenPage await fetch(`/hub/modules/${moduleId}/${action}`, { method: "POST" }); } + _show_module_input_modal({ title, submit_label, warning, on_submit }) + { + const MODULE_ID_RE = /^[A-Za-z0-9][A-Za-z0-9-]*$/; + + const overlay = document.createElement("div"); + overlay.className = "zen_modal"; + + const bg = document.createElement("div"); + bg.className = "zen_modal_bg"; + bg.addEventListener("click", () => overlay.remove()); + overlay.appendChild(bg); + + const dialog = document.createElement("div"); + overlay.appendChild(dialog); + + const title_el = document.createElement("div"); + title_el.className = "zen_modal_title"; + title_el.textContent = title; + dialog.appendChild(title_el); + + const content = document.createElement("div"); + content.className = "zen_modal_message"; + content.style.textAlign = "center"; + + if (warning) + { + const warn = document.createElement("div"); + warn.style.cssText = "color:var(--theme_fail);font-weight:bold;margin-bottom:12px;"; + warn.textContent = warning; + content.appendChild(warn); + } + + const input = document.createElement("input"); + input.type = "text"; + input.placeholder = "module-name"; + input.style.cssText = "width:100%;font-size:14px;padding:8px 12px;"; + content.appendChild(input); + + const error_div = document.createElement("div"); + error_div.style.cssText = "color:var(--theme_fail);font-size:12px;margin-top:8px;min-height:1.2em;"; + content.appendChild(error_div); + + dialog.appendChild(content); + + const buttons = document.createElement("div"); + buttons.className = "zen_modal_buttons"; + + const btn_cancel = document.createElement("div"); + btn_cancel.textContent = "Cancel"; + btn_cancel.addEventListener("click", () => overlay.remove()); + + const btn_submit = document.createElement("div"); + btn_submit.textContent = submit_label; + + buttons.appendChild(btn_cancel); + buttons.appendChild(btn_submit); + dialog.appendChild(buttons); + + let submitting = false; + + const set_submit_enabled = (enabled) => { + btn_submit.style.opacity = enabled ? "" : "0.4"; + btn_submit.style.pointerEvents = enabled ? "" : "none"; + }; + + set_submit_enabled(false); + + const validate = () => { + if (submitting) { return false; } + const val = input.value.trim(); + if (val.length === 0) + { + error_div.textContent = ""; + set_submit_enabled(false); + return false; + } + if (!MODULE_ID_RE.test(val)) + { + error_div.textContent = "Only letters, numbers, and hyphens allowed (must start with a letter or number)"; + set_submit_enabled(false); + return false; + } + error_div.textContent = ""; + set_submit_enabled(true); + return true; + }; + + input.addEventListener("input", validate); + + const submit = async () => { + if (submitting) { return; } + const moduleId = input.value.trim(); + if (!MODULE_ID_RE.test(moduleId)) { return; } + + submitting = true; + set_submit_enabled(false); + error_div.textContent = ""; + + try + { + const ok = await on_submit(moduleId); + if (ok) + { + overlay.remove(); + await this._update(); + return; + } + } + catch (e) + { + error_div.textContent = e.message || "Request failed"; + } + submitting = false; + set_submit_enabled(true); + }; + + btn_submit.addEventListener("click", submit); + input.addEventListener("keydown", (e) => { + if (e.key === "Enter" && validate()) { submit(); } + if (e.key === "Escape") { overlay.remove(); } + }); + + document.body.appendChild(overlay); + input.focus(); + + return { error_div }; + } + + _show_provision_modal() + { + const { error_div } = this._show_module_input_modal({ + title: "Provision Module", + submit_label: "Provision", + on_submit: async (moduleId) => { + const resp = await fetch(`/hub/modules/${encodeURIComponent(moduleId)}/provision`, { method: "POST" }); + if (!resp.ok) + { + const msg = await resp.text(); + error_div.textContent = msg || ("HTTP " + resp.status); + return false; + } + // Endpoint returns compact binary (CbObjectWriter), not text + if (resp.status === 200 || resp.status === 202) + { + this._pending_highlight = moduleId; + this._pending_highlight_timer = setTimeout(() => { this._pending_highlight = null; }, 5000); + } + return true; + } + }); + } + + _show_obliterate_modal() + { + const { error_div } = this._show_module_input_modal({ + title: "\uD83D\uDD25 Obliterate Module", + submit_label: "\uD83D\uDD25 Obliterate", + warning: "\uD83D\uDD25 WARNING: This action is irreversible! \uD83D\uDD25\nAll local and backend data will be permanently destroyed.", + on_submit: async (moduleId) => { + const resp = await fetch(`/hub/modules/${encodeURIComponent(moduleId)}`, { method: "DELETE" }); + if (resp.ok) + { + return true; + } + const msg = await resp.text(); + error_div.textContent = msg || ("HTTP " + resp.status); + return false; + } + }); + } + + _navigate_to_module(moduleId) + { + const idx = this._modules_data.findIndex(m => m.moduleId === moduleId); + if (idx >= 0) + { + this._page = Math.floor(idx / this._page_size); + this._render_page(); + } + } + + _flash_module(id) + { + const cached = this._row_cache.get(id); + if (cached) { flash_highlight(cached.tr); } + } + } diff --git a/src/zenserver/frontend/html/pages/orchestrator.js b/src/zenserver/frontend/html/pages/orchestrator.js index a280fabdb..d11306998 100644 --- a/src/zenserver/frontend/html/pages/orchestrator.js +++ b/src/zenserver/frontend/html/pages/orchestrator.js @@ -5,7 +5,7 @@ import { ZenPage } from "./page.js" import { Fetcher } from "../util/fetcher.js" import { Friendly } from "../util/friendly.js" -import { Table } from "../util/widgets.js" +import { Table, add_copy_button } from "../util/widgets.js" //////////////////////////////////////////////////////////////////////////////// export class Page extends ZenPage @@ -14,6 +14,14 @@ export class Page extends ZenPage { this.set_title("orchestrator"); + // Provisioner section (hidden until data arrives) + this._prov_section = this._collapsible_section("Provisioner"); + this._prov_section._parent.inner().style.display = "none"; + this._prov_grid = null; + this._prov_target_dirty = false; + this._prov_commit_timer = null; + this._prov_last_target = null; + // Agents section const agents_section = this._collapsible_section("Compute Agents"); this._agents_host = agents_section; @@ -50,11 +58,12 @@ export class Page extends ZenPage { try { - const [agents, history, clients, client_history] = await Promise.all([ + const [agents, history, clients, client_history, prov] = await Promise.all([ new Fetcher().resource("/orch/agents").json(), new Fetcher().resource("/orch/history").param("limit", "50").json().catch(() => null), new Fetcher().resource("/orch/clients").json().catch(() => null), new Fetcher().resource("/orch/clients/history").param("limit", "50").json().catch(() => null), + new Fetcher().resource("/orch/provisioner/status").json().catch(() => null), ]); this._render_agents(agents); @@ -70,6 +79,7 @@ export class Page extends ZenPage { this._render_client_history(client_history.client_events || []); } + this._render_provisioner(prov); } catch (e) { /* service unavailable */ } } @@ -109,6 +119,7 @@ export class Page extends ZenPage { this._render_client_history(data.client_events); } + this._render_provisioner(data.provisioner); } catch (e) { /* ignore parse errors */ } }; @@ -156,7 +167,7 @@ export class Page extends ZenPage return; } - let totalCpus = 0, totalWeightedCpu = 0; + let totalCpus = 0, activeCpus = 0, totalWeightedCpu = 0; let totalMemUsed = 0, totalMemTotal = 0; let totalQueues = 0, totalPending = 0, totalRunning = 0, totalCompleted = 0; let totalRecv = 0, totalSent = 0; @@ -173,8 +184,14 @@ export class Page extends ZenPage const completed = w.actions_completed || 0; const recv = w.bytes_received || 0; const sent = w.bytes_sent || 0; + const provisioner = w.provisioner || ""; + const isProvisioned = provisioner !== ""; totalCpus += cpus; + if (w.provisioner_status === "active") + { + activeCpus += cpus; + } if (cpus > 0 && typeof cpuUsage === "number") { totalWeightedCpu += cpuUsage * cpus; @@ -209,12 +226,49 @@ export class Page extends ZenPage cell.inner().textContent = ""; cell.tag("a").text(hostname).attr("href", w.uri + "/dashboard/compute/").attr("target", "_blank"); } + + // Visual treatment based on provisioner status + const provStatus = w.provisioner_status || ""; + if (!isProvisioned) + { + row.inner().style.opacity = "0.45"; + } + else + { + const hostCell = row.get_cell(0); + const el = hostCell.inner(); + const badge = document.createElement("span"); + const badgeBase = "display:inline-block;margin-left:6px;padding:1px 5px;border-radius:8px;" + + "font-size:9px;font-weight:600;color:#fff;vertical-align:middle;"; + + if (provStatus === "draining") + { + badge.textContent = "draining"; + badge.style.cssText = badgeBase + "background:var(--theme_warn);"; + row.inner().style.opacity = "0.6"; + } + else if (provStatus === "active") + { + badge.textContent = provisioner; + badge.style.cssText = badgeBase + "background:#8957e5;"; + } + else + { + badge.textContent = "deallocated"; + badge.style.cssText = badgeBase + "background:var(--theme_fail);"; + row.inner().style.opacity = "0.45"; + } + el.appendChild(badge); + } } - // Total row + // Total row — show active / total in CPUs column + const cpuLabel = activeCpus < totalCpus + ? Friendly.sep(activeCpus) + " / " + Friendly.sep(totalCpus) + : Friendly.sep(totalCpus); const total = this._agents_table.add_row( "TOTAL", - Friendly.sep(totalCpus), + cpuLabel, "", totalMemTotal > 0 ? Friendly.bytes(totalMemUsed) + " / " + Friendly.bytes(totalMemTotal) : "-", Friendly.sep(totalQueues), @@ -244,12 +298,13 @@ export class Page extends ZenPage for (const c of clients) { - this._clients_table.add_row( + const crow = this._clients_table.add_row( c.id || "", c.hostname || "", c.address || "", this._format_last_seen(c.dt), ); + if (c.id) { add_copy_button(crow.get_cell(0).inner(), c.id); } } } @@ -305,6 +360,154 @@ export class Page extends ZenPage } } + _render_provisioner(prov) + { + const container = this._prov_section._parent.inner(); + + if (!prov || !prov.name) + { + container.style.display = "none"; + return; + } + container.style.display = ""; + + if (!this._prov_grid) + { + this._prov_grid = this._prov_section.tag().classify("grid").classify("stats-tiles"); + this._prov_tiles = {}; + + // Target cores tile with editable input + const target_tile = this._prov_grid.tag().classify("card").classify("stats-tile"); + target_tile.tag().classify("card-title").text("Target Cores"); + const target_body = target_tile.tag().classify("tile-metrics"); + const target_m = target_body.tag().classify("tile-metric").classify("tile-metric-hero"); + const input = document.createElement("input"); + input.type = "number"; + input.min = "0"; + input.style.cssText = "width:100px;padding:4px 8px;border:1px solid var(--theme_g2);border-radius:4px;" + + "background:var(--theme_g4);color:var(--theme_bright);font-size:20px;font-weight:600;text-align:right;"; + target_m.inner().appendChild(input); + target_m.tag().classify("metric-label").text("target"); + this._prov_tiles.target_input = input; + + input.addEventListener("focus", () => { this._prov_target_dirty = true; }); + input.addEventListener("input", () => { + this._prov_target_dirty = true; + if (this._prov_commit_timer) + { + clearTimeout(this._prov_commit_timer); + } + this._prov_commit_timer = setTimeout(() => this._commit_provisioner_target(), 800); + }); + input.addEventListener("keydown", (e) => { + if (e.key === "Enter") + { + if (this._prov_commit_timer) + { + clearTimeout(this._prov_commit_timer); + } + this._commit_provisioner_target(); + input.blur(); + } + }); + input.addEventListener("blur", () => { + if (this._prov_commit_timer) + { + clearTimeout(this._prov_commit_timer); + } + this._commit_provisioner_target(); + }); + + // Active cores + const active_tile = this._prov_grid.tag().classify("card").classify("stats-tile"); + active_tile.tag().classify("card-title").text("Active Cores"); + const active_body = active_tile.tag().classify("tile-metrics"); + this._prov_tiles.active = active_body; + + // Estimated cores + const est_tile = this._prov_grid.tag().classify("card").classify("stats-tile"); + est_tile.tag().classify("card-title").text("Estimated Cores"); + const est_body = est_tile.tag().classify("tile-metrics"); + this._prov_tiles.estimated = est_body; + + // Agents + const agents_tile = this._prov_grid.tag().classify("card").classify("stats-tile"); + agents_tile.tag().classify("card-title").text("Agents"); + const agents_body = agents_tile.tag().classify("tile-metrics"); + this._prov_tiles.agents = agents_body; + + // Draining + const drain_tile = this._prov_grid.tag().classify("card").classify("stats-tile"); + drain_tile.tag().classify("card-title").text("Draining"); + const drain_body = drain_tile.tag().classify("tile-metrics"); + this._prov_tiles.draining = drain_body; + } + + // Update values + const input = this._prov_tiles.target_input; + if (!this._prov_target_dirty && document.activeElement !== input) + { + input.value = prov.target_cores; + } + this._prov_last_target = prov.target_cores; + + // Re-render metric tiles (clear and recreate content) + for (const key of ["active", "estimated", "agents", "draining"]) + { + this._prov_tiles[key].inner().innerHTML = ""; + } + this._metric(this._prov_tiles.active, Friendly.sep(prov.active_cores), "cores", true); + this._metric(this._prov_tiles.estimated, Friendly.sep(prov.estimated_cores), "cores", true); + this._metric(this._prov_tiles.agents, Friendly.sep(prov.agents), "active", true); + this._metric(this._prov_tiles.draining, Friendly.sep(prov.agents_draining || 0), "agents", true); + } + + async _commit_provisioner_target() + { + const input = this._prov_tiles?.target_input; + if (!input || this._prov_committing) + { + return; + } + const value = parseInt(input.value, 10); + if (isNaN(value) || value < 0) + { + return; + } + if (value === this._prov_last_target) + { + this._prov_target_dirty = false; + return; + } + this._prov_committing = true; + try + { + const resp = await fetch("/orch/provisioner/target", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ target_cores: value }), + }); + if (resp.ok) + { + this._prov_target_dirty = false; + console.log("Target cores set to", value); + } + else + { + const text = await resp.text(); + console.error("Failed to set target cores: HTTP", resp.status, text); + } + } + catch (e) + { + console.error("Failed to set target cores:", e); + } + finally + { + this._prov_committing = false; + } + } + _metric(parent, value, label, hero = false) { const m = parent.tag().classify("tile-metric"); diff --git a/src/zenserver/frontend/html/pages/page.js b/src/zenserver/frontend/html/pages/page.js index ff530ff8e..d3069f506 100644 --- a/src/zenserver/frontend/html/pages/page.js +++ b/src/zenserver/frontend/html/pages/page.js @@ -6,6 +6,26 @@ import { WidgetHost } from "../util/widgets.js" import { Fetcher } from "../util/fetcher.js" import { Friendly } from "../util/friendly.js" +function _deep_merge_stats(base, update) +{ + const result = Object.assign({}, base); + for (const key of Object.keys(update)) + { + const bv = result[key]; + const uv = update[key]; + if (uv && typeof uv === "object" && !Array.isArray(uv) + && bv && typeof bv === "object" && !Array.isArray(bv)) + { + result[key] = _deep_merge_stats(bv, uv); + } + else + { + result[key] = uv; + } + } + return result; +} + //////////////////////////////////////////////////////////////////////////////// export class PageBase extends WidgetHost { @@ -79,6 +99,11 @@ export class ZenPage extends PageBase this._banner = banner; this._poll_status(); + + new Fetcher().resource("/health/version").text().then((data) => { + const v = data ? data.trim() : ""; + if (v) banner.attr("version", v); + }).catch(() => {}); } static _mode_taglines = { @@ -282,10 +307,7 @@ export class ZenPage extends PageBase _render_http_requests_tile(grid, req, bad_requests = undefined) { - if (!req) - { - return; - } + req = req || {}; const tile = grid.tag().classify("card").classify("stats-tile"); tile.tag().classify("card-title").text("HTTP Requests"); const columns = tile.tag().classify("tile-columns"); @@ -338,6 +360,16 @@ export class ZenPage extends PageBase } } + _merge_last_stats(stats) + { + if (this._last_stats) + { + stats = _deep_merge_stats(this._last_stats, stats); + } + this._last_stats = stats; + return stats; + } + _collapsible_section(name) { const section = this.add_section(name); diff --git a/src/zenserver/frontend/html/pages/projects.js b/src/zenserver/frontend/html/pages/projects.js index dfe4faeb8..2e76a80f1 100644 --- a/src/zenserver/frontend/html/pages/projects.js +++ b/src/zenserver/frontend/html/pages/projects.js @@ -6,7 +6,7 @@ import { ZenPage } from "./page.js" import { Fetcher } from "../util/fetcher.js" import { Friendly } from "../util/friendly.js" import { Modal } from "../util/modal.js" -import { Table, Toolbar } from "../util/widgets.js" +import { Table, Toolbar, Pager, add_copy_button } from "../util/widgets.js" //////////////////////////////////////////////////////////////////////////////// export class Page extends ZenPage @@ -39,8 +39,6 @@ export class Page extends ZenPage // Projects list var section = this._collapsible_section("Projects"); - section.tag().classify("dropall").text("drop-all").on_click(() => this.drop_all()); - var columns = [ "name", "project dir", @@ -51,51 +49,21 @@ export class Page extends ZenPage this._project_table = section.add_widget(Table, columns, Table.Flag_FitLeft|Table.Flag_PackRight|Table.Flag_Sortable|Table.Flag_AlignNumeric); - var projects = await new Fetcher().resource("/prj/list").json(); - projects.sort((a, b) => (b.LastAccessTime || 0) - (a.LastAccessTime || 0)); - - for (const project of projects) - { - var row = this._project_table.add_row( - "", - "", - "", - "", - ); - - var cell = row.get_cell(0); - cell.tag().text(project.Id).on_click(() => this.view_project(project.Id)); - - if (project.ProjectRootDir) - { - row.get_cell(1).tag("a").text(project.ProjectRootDir) - .attr("href", "vscode://" + project.ProjectRootDir.replace(/\\/g, "/")); - } - if (project.EngineRootDir) - { - row.get_cell(2).tag("a").text(project.EngineRootDir) - .attr("href", "vscode://" + project.EngineRootDir.replace(/\\/g, "/")); - } - - cell = row.get_cell(-1); - const action_tb = new Toolbar(cell, true).left(); - action_tb.add("view").on_click(() => this.view_project(project.Id)); - action_tb.add("drop").on_click(() => this.drop_project(project.Id)); - - row.attr("zs_name", project.Id); - - // Fetch project details to get oplog count - new Fetcher().resource("prj", project.Id).json().then((info) => { - const oplogs = info["oplogs"] || []; - row.get_cell(3).text(Friendly.sep(oplogs.length)).style("textAlign", "right"); - // Right-align the corresponding header cell - const header = this._project_table._element.firstElementChild; - if (header && header.children[4]) - { - header.children[4].style.textAlign = "right"; - } - }).catch(() => {}); - } + this._project_pager = new Pager(section, 25, () => this._render_projects_page(), + Pager.make_search_fn(() => this._projects_data, p => p.Id)); + const drop_link = document.createElement("span"); + drop_link.className = "dropall zen_action"; + drop_link.style.position = "static"; + drop_link.textContent = "drop-all"; + drop_link.addEventListener("click", () => this.drop_all()); + this._project_pager.prepend(drop_link); + + const loading = Pager.loading(section); + this._projects_data = await new Fetcher().resource("/prj/list").json(); + this._projects_data.sort((a, b) => a.Id.localeCompare(b.Id)); + this._project_pager.set_total(this._projects_data.length); + this._render_projects_page(); + loading.remove(); // Project detail area (inside projects section so it collapses together) this._project_host = section; @@ -120,6 +88,7 @@ export class Page extends ZenPage _render_stats(stats) { + stats = this._merge_last_stats(stats); const safe = (obj, path) => path.split(".").reduce((a, b) => a && a[b], obj); const grid = this._stats_grid; @@ -130,54 +99,48 @@ export class Page extends ZenPage // Store Operations tile { - const store = safe(stats, "store"); - if (store) - { - const tile = grid.tag().classify("card").classify("stats-tile"); - tile.tag().classify("card-title").text("Store Operations"); - const columns = tile.tag().classify("tile-columns"); - - const left = columns.tag().classify("tile-metrics"); - const proj = store.project || {}; - this._metric(left, Friendly.sep(proj.readcount || 0), "project reads", true); - this._metric(left, Friendly.sep(proj.writecount || 0), "project writes"); - this._metric(left, Friendly.sep(proj.deletecount || 0), "project deletes"); - - const right = columns.tag().classify("tile-metrics"); - const oplog = store.oplog || {}; - this._metric(right, Friendly.sep(oplog.readcount || 0), "oplog reads", true); - this._metric(right, Friendly.sep(oplog.writecount || 0), "oplog writes"); - this._metric(right, Friendly.sep(oplog.deletecount || 0), "oplog deletes"); - } + const store = safe(stats, "store") || {}; + const tile = grid.tag().classify("card").classify("stats-tile"); + tile.tag().classify("card-title").text("Store Operations"); + const columns = tile.tag().classify("tile-columns"); + + const left = columns.tag().classify("tile-metrics"); + const proj = store.project || {}; + this._metric(left, Friendly.sep(proj.readcount || 0), "project reads", true); + this._metric(left, Friendly.sep(proj.writecount || 0), "project writes"); + this._metric(left, Friendly.sep(proj.deletecount || 0), "project deletes"); + + const right = columns.tag().classify("tile-metrics"); + const oplog = store.oplog || {}; + this._metric(right, Friendly.sep(oplog.readcount || 0), "oplog reads", true); + this._metric(right, Friendly.sep(oplog.writecount || 0), "oplog writes"); + this._metric(right, Friendly.sep(oplog.deletecount || 0), "oplog deletes"); } // Op & Chunk tile { - const store = safe(stats, "store"); - if (store) - { - const tile = grid.tag().classify("card").classify("stats-tile"); - tile.tag().classify("card-title").text("Ops & Chunks"); - const columns = tile.tag().classify("tile-columns"); - - const left = columns.tag().classify("tile-metrics"); - const op = store.op || {}; - const opTotal = (op.hitcount || 0) + (op.misscount || 0); - const opRatio = opTotal > 0 ? (((op.hitcount || 0) / opTotal) * 100).toFixed(1) + "%" : "-"; - this._metric(left, opRatio, "op hit ratio", true); - this._metric(left, Friendly.sep(op.hitcount || 0), "op hits"); - this._metric(left, Friendly.sep(op.misscount || 0), "op misses"); - this._metric(left, Friendly.sep(op.writecount || 0), "op writes"); - - const right = columns.tag().classify("tile-metrics"); - const chunk = store.chunk || {}; - const chunkTotal = (chunk.hitcount || 0) + (chunk.misscount || 0); - const chunkRatio = chunkTotal > 0 ? (((chunk.hitcount || 0) / chunkTotal) * 100).toFixed(1) + "%" : "-"; - this._metric(right, chunkRatio, "chunk hit ratio", true); - this._metric(right, Friendly.sep(chunk.hitcount || 0), "chunk hits"); - this._metric(right, Friendly.sep(chunk.misscount || 0), "chunk misses"); - this._metric(right, Friendly.sep(chunk.writecount || 0), "chunk writes"); - } + const store = safe(stats, "store") || {}; + const tile = grid.tag().classify("card").classify("stats-tile"); + tile.tag().classify("card-title").text("Ops & Chunks"); + const columns = tile.tag().classify("tile-columns"); + + const left = columns.tag().classify("tile-metrics"); + const op = store.op || {}; + const opTotal = (op.hitcount || 0) + (op.misscount || 0); + const opRatio = opTotal > 0 ? (((op.hitcount || 0) / opTotal) * 100).toFixed(1) + "%" : "-"; + this._metric(left, opRatio, "op hit ratio", true); + this._metric(left, Friendly.sep(op.hitcount || 0), "op hits"); + this._metric(left, Friendly.sep(op.misscount || 0), "op misses"); + this._metric(left, Friendly.sep(op.writecount || 0), "op writes"); + + const right = columns.tag().classify("tile-metrics"); + const chunk = store.chunk || {}; + const chunkTotal = (chunk.hitcount || 0) + (chunk.misscount || 0); + const chunkRatio = chunkTotal > 0 ? (((chunk.hitcount || 0) / chunkTotal) * 100).toFixed(1) + "%" : "-"; + this._metric(right, chunkRatio, "chunk hit ratio", true); + this._metric(right, Friendly.sep(chunk.hitcount || 0), "chunk hits"); + this._metric(right, Friendly.sep(chunk.misscount || 0), "chunk misses"); + this._metric(right, Friendly.sep(chunk.writecount || 0), "chunk writes"); } // Storage tile @@ -198,6 +161,57 @@ export class Page extends ZenPage } } + _render_projects_page() + { + const { start, end } = this._project_pager.page_range(); + this._project_table.clear(start); + for (let i = start; i < end; i++) + { + const project = this._projects_data[i]; + const row = this._project_table.add_row( + "", + "", + "", + "", + ); + + const cell = row.get_cell(0); + cell.tag().text(project.Id).on_click(() => this.view_project(project.Id)); + add_copy_button(cell.inner(), project.Id); + + if (project.ProjectRootDir) + { + row.get_cell(1).tag("a").text(project.ProjectRootDir) + .attr("href", "vscode://" + project.ProjectRootDir.replace(/\\/g, "/")); + add_copy_button(row.get_cell(1).inner(), project.ProjectRootDir); + } + if (project.EngineRootDir) + { + row.get_cell(2).tag("a").text(project.EngineRootDir) + .attr("href", "vscode://" + project.EngineRootDir.replace(/\\/g, "/")); + add_copy_button(row.get_cell(2).inner(), project.EngineRootDir); + } + + const action_cell = row.get_cell(-1); + const action_tb = new Toolbar(action_cell, true).left(); + action_tb.add("view").on_click(() => this.view_project(project.Id)); + action_tb.add("drop").on_click(() => this.drop_project(project.Id)); + + row.attr("zs_name", project.Id); + + new Fetcher().resource("prj", project.Id).json().then((info) => { + const oplogs = info["oplogs"] || []; + row.get_cell(3).text(Friendly.sep(oplogs.length)).style("textAlign", "right"); + }).catch(() => {}); + } + + const header = this._project_table._element.firstElementChild; + if (header && header.children[4]) + { + header.children[4].style.textAlign = "right"; + } + } + async view_project(project_id) { // Toggle off if already selected @@ -318,10 +332,9 @@ export class Page extends ZenPage async drop_all() { const drop = async () => { - for (const row of this._project_table) + for (const project of this._projects_data || []) { - const project_id = row.attr("zs_name"); - await new Fetcher().resource("prj", project_id).delete(); + await new Fetcher().resource("prj", project.Id).delete(); } this.reload(); }; diff --git a/src/zenserver/frontend/html/pages/start.js b/src/zenserver/frontend/html/pages/start.js index e5b4d14f1..d06040b2f 100644 --- a/src/zenserver/frontend/html/pages/start.js +++ b/src/zenserver/frontend/html/pages/start.js @@ -6,7 +6,7 @@ import { ZenPage } from "./page.js" import { Fetcher } from "../util/fetcher.js" import { Friendly } from "../util/friendly.js" import { Modal } from "../util/modal.js" -import { Table, Toolbar } from "../util/widgets.js" +import { Table, Toolbar, Pager } from "../util/widgets.js" //////////////////////////////////////////////////////////////////////////////// export class Page extends ZenPage @@ -50,54 +50,40 @@ export class Page extends ZenPage this._render_stats(all_stats); // project list - var project_table = null; if (available.has("/prj/")) { var section = this.add_section("Cooked Projects"); - section.tag().classify("dropall").text("drop-all").on_click(() => this.drop_all("projects")); - var columns = [ "name", "project_dir", "engine_dir", "actions", ]; - project_table = section.add_widget(Table, columns); - - var projects = await new Fetcher().resource("/prj/list").json(); - projects.sort((a, b) => (b.LastAccessTime || 0) - (a.LastAccessTime || 0)); - projects = projects.slice(0, 25); - projects.sort((a, b) => a.Id.localeCompare(b.Id)); - - for (const project of projects) - { - var row = project_table.add_row( - "", - project.ProjectRootDir, - project.EngineRootDir, - ); - - var cell = row.get_cell(0); - cell.tag().text(project.Id).on_click((x) => this.view_project(x), project.Id); - - var cell = row.get_cell(-1); - var action_tb = new Toolbar(cell, true); - action_tb.left().add("view").on_click((x) => this.view_project(x), project.Id); - action_tb.left().add("drop").on_click((x) => this.drop_project(x), project.Id); - - row.attr("zs_name", project.Id); - } + this._project_table = section.add_widget(Table, columns); + + this._project_pager = new Pager(section, 25, () => this._render_projects_page(), + Pager.make_search_fn(() => this._projects_data, p => p.Id)); + const drop_link = document.createElement("span"); + drop_link.className = "dropall zen_action"; + drop_link.style.position = "static"; + drop_link.textContent = "drop-all"; + drop_link.addEventListener("click", () => this.drop_all("projects")); + this._project_pager.prepend(drop_link); + + const prj_loading = Pager.loading(section); + this._projects_data = await new Fetcher().resource("/prj/list").json(); + this._projects_data.sort((a, b) => a.Id.localeCompare(b.Id)); + this._project_pager.set_total(this._projects_data.length); + this._render_projects_page(); + prj_loading.remove(); } // cache - var cache_table = null; if (available.has("/z$/")) { var section = this.add_section("Cache"); - section.tag().classify("dropall").text("drop-all").on_click(() => this.drop_all("z$")); - var columns = [ "namespace", "dir", @@ -107,30 +93,30 @@ export class Page extends ZenPage "size mem", "actions", ]; - var zcache_info = await new Fetcher().resource("/z$/").json(); - cache_table = section.add_widget(Table, columns, Table.Flag_FitLeft|Table.Flag_PackRight); - for (const namespace of zcache_info["Namespaces"] || []) - { - new Fetcher().resource(`/z$/${namespace}/`).json().then((data) => { - const row = cache_table.add_row( - "", - data["Configuration"]["RootDir"], - data["Buckets"].length, - data["EntryCount"], - Friendly.bytes(data["StorageSize"].DiskSize), - Friendly.bytes(data["StorageSize"].MemorySize) - ); - var cell = row.get_cell(0); - cell.tag().text(namespace).on_click(() => this.view_zcache(namespace)); - - cell = row.get_cell(-1); - const action_tb = new Toolbar(cell, true); - action_tb.left().add("view").on_click(() => this.view_zcache(namespace)); - action_tb.left().add("drop").on_click(() => this.drop_zcache(namespace)); - - row.attr("zs_name", namespace); - }); - } + this._cache_table = section.add_widget(Table, columns, Table.Flag_FitLeft|Table.Flag_PackRight); + + this._cache_pager = new Pager(section, 25, () => this._render_cache_page(), + Pager.make_search_fn(() => this._cache_data, item => item.namespace)); + const cache_drop_link = document.createElement("span"); + cache_drop_link.className = "dropall zen_action"; + cache_drop_link.style.position = "static"; + cache_drop_link.textContent = "drop-all"; + cache_drop_link.addEventListener("click", () => this.drop_all("z$")); + this._cache_pager.prepend(cache_drop_link); + + const cache_loading = Pager.loading(section); + const zcache_info = await new Fetcher().resource("/z$/").json(); + const namespaces = zcache_info["Namespaces"] || []; + const results = await Promise.allSettled( + namespaces.map(ns => new Fetcher().resource(`/z$/${ns}/`).json().then(data => ({ namespace: ns, data }))) + ); + this._cache_data = results + .filter(r => r.status === "fulfilled") + .map(r => r.value) + .sort((a, b) => a.namespace.localeCompare(b.namespace)); + this._cache_pager.set_total(this._cache_data.length); + this._render_cache_page(); + cache_loading.remove(); } // version @@ -139,15 +125,13 @@ export class Page extends ZenPage version.param("detailed", "true"); version.text().then((data) => ver_tag.text(data)); - this._project_table = project_table; - this._cache_table = cache_table; - // WebSocket for live stats updates this.connect_stats_ws((all_stats) => this._render_stats(all_stats)); } _render_stats(all_stats) { + all_stats = this._merge_last_stats(all_stats); const grid = this._stats_grid; const safe_lookup = this._safe_lookup; @@ -316,6 +300,60 @@ export class Page extends ZenPage m.tag().classify("metric-label").text(label); } + _render_projects_page() + { + const { start, end } = this._project_pager.page_range(); + this._project_table.clear(start); + for (let i = start; i < end; i++) + { + const project = this._projects_data[i]; + const row = this._project_table.add_row( + "", + project.ProjectRootDir, + project.EngineRootDir, + ); + + const cell = row.get_cell(0); + cell.tag().text(project.Id).on_click((x) => this.view_project(x), project.Id); + + const action_cell = row.get_cell(-1); + const action_tb = new Toolbar(action_cell, true); + action_tb.left().add("view").on_click((x) => this.view_project(x), project.Id); + action_tb.left().add("drop").on_click((x) => this.drop_project(x), project.Id); + + row.attr("zs_name", project.Id); + } + } + + _render_cache_page() + { + const { start, end } = this._cache_pager.page_range(); + this._cache_table.clear(start); + for (let i = start; i < end; i++) + { + const item = this._cache_data[i]; + const data = item.data; + const row = this._cache_table.add_row( + "", + data["Configuration"]["RootDir"], + data["Buckets"].length, + data["EntryCount"], + Friendly.bytes(data["StorageSize"].DiskSize), + Friendly.bytes(data["StorageSize"].MemorySize) + ); + + const cell = row.get_cell(0); + cell.tag().text(item.namespace).on_click(() => this.view_zcache(item.namespace)); + + const action_cell = row.get_cell(-1); + const action_tb = new Toolbar(action_cell, true); + action_tb.left().add("view").on_click(() => this.view_zcache(item.namespace)); + action_tb.left().add("drop").on_click(() => this.drop_zcache(item.namespace)); + + row.attr("zs_name", item.namespace); + } + } + view_stat(provider) { window.location = "?page=stat&provider=" + provider; @@ -361,20 +399,18 @@ export class Page extends ZenPage async drop_all_projects() { - for (const row of this._project_table) + for (const project of this._projects_data || []) { - const project_id = row.attr("zs_name"); - await new Fetcher().resource("prj", project_id).delete(); + await new Fetcher().resource("prj", project.Id).delete(); } this.reload(); } async drop_all_zcache() { - for (const row of this._cache_table) + for (const item of this._cache_data || []) { - const namespace = row.attr("zs_name"); - await new Fetcher().resource("z$", namespace).delete(); + await new Fetcher().resource("z$", item.namespace).delete(); } this.reload(); } diff --git a/src/zenserver/frontend/html/pages/workspaces.js b/src/zenserver/frontend/html/pages/workspaces.js index 2442fb35b..db02e8be1 100644 --- a/src/zenserver/frontend/html/pages/workspaces.js +++ b/src/zenserver/frontend/html/pages/workspaces.js @@ -4,6 +4,7 @@ import { ZenPage } from "./page.js" import { Fetcher } from "../util/fetcher.js" +import { copy_button } from "../util/widgets.js" //////////////////////////////////////////////////////////////////////////////// export class Page extends ZenPage @@ -157,6 +158,7 @@ export class Page extends ZenPage id_wrap.className = "ws-id-wrap"; id_wrap.appendChild(btn_expand); id_wrap.appendChild(document.createTextNode("\u00A0" + id)); + id_wrap.appendChild(copy_button(id)); const td_id = document.createElement("td"); td_id.appendChild(id_wrap); tr.appendChild(td_id); @@ -200,6 +202,7 @@ export class Page extends ZenPage _render_stats(stats) { + stats = this._merge_last_stats(stats); const grid = this._stats_grid; grid.inner().innerHTML = ""; diff --git a/src/zenserver/frontend/html/util/widgets.js b/src/zenserver/frontend/html/util/widgets.js index 17bd2fde7..651686a11 100644 --- a/src/zenserver/frontend/html/util/widgets.js +++ b/src/zenserver/frontend/html/util/widgets.js @@ -6,6 +6,58 @@ import { Component } from "./component.js" import { Friendly } from "../util/friendly.js" //////////////////////////////////////////////////////////////////////////////// +export function flash_highlight(element) +{ + if (!element) { return; } + element.classList.add("pager-search-highlight"); + setTimeout(() => { element.classList.remove("pager-search-highlight"); }, 1500); +} + +//////////////////////////////////////////////////////////////////////////////// +export function copy_button(value_or_fn) +{ + if (!navigator.clipboard) + { + const stub = document.createElement("span"); + stub.style.display = "none"; + return stub; + } + + let reset_timer = 0; + const btn = document.createElement("button"); + btn.className = "zen-copy-btn"; + btn.title = "Copy to clipboard"; + btn.textContent = "\u29C9"; + btn.addEventListener("click", async (e) => { + e.stopPropagation(); + const v = typeof value_or_fn === "function" ? value_or_fn() : value_or_fn; + if (!v) { return; } + try + { + await navigator.clipboard.writeText(v); + clearTimeout(reset_timer); + btn.classList.add("zen-copy-ok"); + btn.textContent = "\u2713"; + reset_timer = setTimeout(() => { btn.classList.remove("zen-copy-ok"); btn.textContent = "\u29C9"; }, 800); + } + catch (_e) { /* clipboard not available */ } + }); + return btn; +} + +// Wraps the existing children of `element` plus a copy button into an +// inline-flex nowrap container so the button never wraps to a new line. +export function add_copy_button(element, value_or_fn) +{ + if (!navigator.clipboard) { return; } + const wrap = document.createElement("span"); + wrap.className = "zen-copy-wrap"; + while (element.firstChild) { wrap.appendChild(element.firstChild); } + wrap.appendChild(copy_button(value_or_fn)); + element.appendChild(wrap); +} + +//////////////////////////////////////////////////////////////////////////////// class Widget extends Component { } @@ -402,6 +454,135 @@ export class ProgressBar extends Widget //////////////////////////////////////////////////////////////////////////////// +export class Pager +{ + constructor(section, page_size, on_change, search_fn) + { + this._page = 0; + this._page_size = page_size; + this._total = 0; + this._on_change = on_change; + this._search_fn = search_fn || null; + this._search_input = null; + + const pager = section.tag().classify("module-pager").inner(); + this._btn_prev = document.createElement("button"); + this._btn_prev.className = "module-pager-btn"; + this._btn_prev.textContent = "\u2190 Prev"; + this._btn_prev.addEventListener("click", () => this._go_page(this._page - 1)); + this._label = document.createElement("span"); + this._label.className = "module-pager-label"; + this._btn_next = document.createElement("button"); + this._btn_next.className = "module-pager-btn"; + this._btn_next.textContent = "Next \u2192"; + this._btn_next.addEventListener("click", () => this._go_page(this._page + 1)); + + if (this._search_fn) + { + this._search_input = document.createElement("input"); + this._search_input.type = "text"; + this._search_input.className = "module-pager-search"; + this._search_input.placeholder = "Search\u2026"; + this._search_input.addEventListener("keydown", (e) => + { + if (e.key === "Enter") + { + this._do_search(this._search_input.value.trim()); + } + }); + pager.appendChild(this._search_input); + } + + pager.appendChild(this._btn_prev); + pager.appendChild(this._label); + pager.appendChild(this._btn_next); + this._pager = pager; + + this._update_ui(); + } + + prepend(element) + { + const ref = this._search_input || this._btn_prev; + this._pager.insertBefore(element, ref); + } + + set_total(n) + { + this._total = n; + const max_page = Math.max(0, Math.ceil(n / this._page_size) - 1); + if (this._page > max_page) + { + this._page = max_page; + } + this._update_ui(); + } + + page_range() + { + const start = this._page * this._page_size; + const end = Math.min(start + this._page_size, this._total); + return { start, end }; + } + + _go_page(n) + { + const max = Math.max(0, Math.ceil(this._total / this._page_size) - 1); + this._page = Math.max(0, Math.min(n, max)); + this._update_ui(); + this._on_change(); + } + + _do_search(term) + { + if (!term || !this._search_fn) + { + return; + } + const result = this._search_fn(term); + if (!result) + { + this._search_input.style.outline = "2px solid var(--theme_fail)"; + setTimeout(() => { this._search_input.style.outline = ""; }, 1000); + return; + } + this._go_page(Math.floor(result.index / this._page_size)); + flash_highlight(this._pager.parentNode.querySelector(`[zs_name="${CSS.escape(result.name)}"]`)); + } + + _update_ui() + { + const total = this._total; + const page_count = Math.max(1, Math.ceil(total / this._page_size)); + const start = this._page * this._page_size + 1; + const end = Math.min(start + this._page_size - 1, total); + + this._btn_prev.disabled = this._page === 0; + this._btn_next.disabled = this._page >= page_count - 1; + this._label.textContent = total === 0 + ? "No items" + : `${start}\u2013${end} of ${total}`; + } + + static make_search_fn(get_data, get_key) + { + return (term) => { + const t = term.toLowerCase(); + const data = get_data(); + const i = data.findIndex(item => get_key(item).toLowerCase().includes(t)); + return i < 0 ? null : { index: i, name: get_key(data[i]) }; + }; + } + + static loading(section) + { + return section.tag().classify("pager-loading").text("Loading\u2026").inner(); + } +} + + + +//////////////////////////////////////////////////////////////////////////////// export class WidgetHost { constructor(parent, depth=1) diff --git a/src/zenserver/frontend/html/zen.css b/src/zenserver/frontend/html/zen.css index cb3d78cf2..d3c6c9036 100644 --- a/src/zenserver/frontend/html/zen.css +++ b/src/zenserver/frontend/html/zen.css @@ -1611,6 +1611,25 @@ tr:last-child td { animation: module-dot-deprovisioning-from-provisioned 1s steps(1, end) infinite; } +@keyframes module-dot-obliterating-from-provisioned { + 0%, 59.9% { background: var(--theme_fail); } + 60%, 100% { background: var(--theme_ok); } +} +@keyframes module-dot-obliterating-from-hibernated { + 0%, 59.9% { background: var(--theme_fail); } + 60%, 100% { background: var(--theme_warn); } +} + +.module-state-dot[data-state="obliterating"][data-prev-state="provisioned"] { + animation: module-dot-obliterating-from-provisioned 0.5s steps(1, end) infinite; +} +.module-state-dot[data-state="obliterating"][data-prev-state="hibernated"] { + animation: module-dot-obliterating-from-hibernated 0.5s steps(1, end) infinite; +} +.module-state-dot[data-state="obliterating"] { + animation: module-dot-obliterating-from-provisioned 0.5s steps(1, end) infinite; +} + .module-action-cell { white-space: nowrap; display: flex; @@ -1730,6 +1749,53 @@ tr:last-child td { text-align: center; } +.module-pager-search { + font-size: 12px; + padding: 4px 8px; + width: 14em; + border: 1px solid var(--theme_g2); + border-radius: 4px; + background: var(--theme_g4); + color: var(--theme_g0); + outline: none; + transition: border-color 0.15s, outline 0.3s; +} + +.module-pager-search:focus { + border-color: var(--theme_p0); +} + +.module-pager-search::placeholder { + color: var(--theme_g1); +} + +@keyframes pager-search-flash { + from { box-shadow: inset 0 0 0 100px var(--theme_p2); } + to { box-shadow: inset 0 0 0 100px transparent; } +} + +.zen_table > .pager-search-highlight > div { + animation: pager-search-flash 1s linear forwards; +} + +.module-table .pager-search-highlight td { + animation: pager-search-flash 1s linear forwards; +} + +@keyframes pager-loading-pulse { + 0%, 100% { opacity: 0.6; } + 50% { opacity: 0.2; } +} + +.pager-loading { + color: var(--theme_g1); + font-style: italic; + font-size: 14px; + font-weight: 600; + padding: 12px 0; + animation: pager-loading-pulse 1.5s ease-in-out infinite; +} + .module-table td, .module-table th { padding-top: 4px; padding-bottom: 4px; @@ -1750,6 +1816,35 @@ tr:last-child td { color: var(--theme_bright); } +.zen-copy-btn { + background: transparent; + border: 1px solid var(--theme_g2); + border-radius: 4px; + color: var(--theme_g1); + cursor: pointer; + font-size: 12px; + line-height: 1; + padding: 2px 5px; + margin-left: 6px; + vertical-align: middle; + flex-shrink: 0; + transition: background 0.1s, color 0.1s; +} +.zen-copy-btn:hover { + background: var(--theme_g2); + color: var(--theme_bright); +} +.zen-copy-btn.zen-copy-ok { + color: var(--theme_ok); + border-color: var(--theme_ok); +} + +.zen-copy-wrap { + display: inline-flex; + align-items: center; + white-space: nowrap; +} + .module-metrics-row td { padding: 6px 10px 10px 42px; background: var(--theme_g3); diff --git a/src/zenserver/frontend/zipfs.cpp b/src/zenserver/frontend/zipfs.cpp deleted file mode 100644 index c7c8687ca..000000000 --- a/src/zenserver/frontend/zipfs.cpp +++ /dev/null @@ -1,228 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#include "zipfs.h" - -#include <zencore/logging.h> - -ZEN_THIRD_PARTY_INCLUDES_START -#include <zlib.h> -ZEN_THIRD_PARTY_INCLUDES_END - -namespace zen { - -////////////////////////////////////////////////////////////////////////// -namespace { - -#if ZEN_COMPILER_MSC -# pragma warning(push) -# pragma warning(disable : 4200) -#endif - - using ZipInt16 = uint16_t; - - struct ZipInt32 - { - operator uint32_t() const { return *(uint32_t*)Parts; } - uint16_t Parts[2]; - }; - - struct EocdRecord - { - enum : uint32_t - { - Magic = 0x0605'4b50, - }; - ZipInt32 Signature; - ZipInt16 ThisDiskIndex; - ZipInt16 CdStartDiskIndex; - ZipInt16 CdRecordThisDiskCount; - ZipInt16 CdRecordCount; - ZipInt32 CdSize; - ZipInt32 CdOffset; - ZipInt16 CommentSize; - char Comment[]; - }; - - struct CentralDirectoryRecord - { - enum : uint32_t - { - Magic = 0x0201'4b50, - }; - - ZipInt32 Signature; - ZipInt16 VersionMadeBy; - ZipInt16 VersionRequired; - ZipInt16 Flags; - ZipInt16 CompressionMethod; - ZipInt16 LastModTime; - ZipInt16 LastModDate; - ZipInt32 Crc32; - ZipInt32 CompressedSize; - ZipInt32 OriginalSize; - ZipInt16 FileNameLength; - ZipInt16 ExtraFieldLength; - ZipInt16 CommentLength; - ZipInt16 DiskIndex; - ZipInt16 InternalFileAttr; - ZipInt32 ExternalFileAttr; - ZipInt32 Offset; - char FileName[]; - }; - - struct LocalFileHeader - { - enum : uint32_t - { - Magic = 0x0403'4b50, - }; - - ZipInt32 Signature; - ZipInt16 VersionRequired; - ZipInt16 Flags; - ZipInt16 CompressionMethod; - ZipInt16 LastModTime; - ZipInt16 LastModDate; - ZipInt32 Crc32; - ZipInt32 CompressedSize; - ZipInt32 OriginalSize; - ZipInt16 FileNameLength; - ZipInt16 ExtraFieldLength; - char FileName[]; - }; - -#if ZEN_COMPILER_MSC -# pragma warning(pop) -#endif - -} // namespace - -////////////////////////////////////////////////////////////////////////// -ZipFs::ZipFs(IoBuffer&& Buffer) -{ - MemoryView View = Buffer.GetView(); - - uint8_t* Cursor = (uint8_t*)(View.GetData()) + View.GetSize(); - if (View.GetSize() < sizeof(EocdRecord)) - { - return; - } - - const auto* EocdCursor = (EocdRecord*)(Cursor - sizeof(EocdRecord)); - - // It is more correct to search backwards for EocdRecord::Magic as the - // comment can be of a variable length. But here we're not going to support - // zip files with comments. - if (EocdCursor->Signature != EocdRecord::Magic) - { - return; - } - - // Zip64 isn't supported either - if (EocdCursor->ThisDiskIndex == 0xffff) - { - return; - } - - Cursor = (uint8_t*)EocdCursor - uint32_t(EocdCursor->CdOffset) - uint32_t(EocdCursor->CdSize); - - const auto* CdCursor = (CentralDirectoryRecord*)(Cursor + EocdCursor->CdOffset); - for (int i = 0, n = EocdCursor->CdRecordCount; i < n; ++i) - { - const CentralDirectoryRecord& Cd = *CdCursor; - - bool Acceptable = true; - Acceptable &= (Cd.OriginalSize > 0); // has some content - Acceptable &= (Cd.CompressionMethod == 0 || Cd.CompressionMethod == 8); // stored or deflate - if (Acceptable) - { - const uint8_t* Lfh = Cursor + Cd.Offset; - if (uintptr_t(Lfh - Cursor) < View.GetSize()) - { - std::string_view FileName(Cd.FileName, Cd.FileNameLength); - FileItem Item; - Item.View = MemoryView{Lfh, size_t(0)}; - Item.CompressionMethod = Cd.CompressionMethod; - Item.CompressedSize = Cd.CompressedSize; - Item.UncompressedSize = Cd.OriginalSize; - m_Files.insert(std::make_pair(FileName, std::move(Item))); - } - } - - uint32_t ExtraBytes = Cd.FileNameLength + Cd.ExtraFieldLength + Cd.CommentLength; - CdCursor = (CentralDirectoryRecord*)(Cd.FileName + ExtraBytes); - } - - m_Buffer = std::move(Buffer); -} - -////////////////////////////////////////////////////////////////////////// -IoBuffer -ZipFs::GetFile(const std::string_view& FileName) const -{ - { - RwLock::SharedLockScope _(m_FilesLock); - - FileMap::const_iterator Iter = m_Files.find(FileName); - if (Iter == m_Files.end()) - { - return {}; - } - - const FileItem& Item = Iter->second; - if (Item.View.GetSize() > 0) - { - return IoBuffer(IoBuffer::Wrap, Item.View.GetData(), Item.View.GetSize()); - } - } - - RwLock::ExclusiveLockScope _(m_FilesLock); - - FileItem& Item = m_Files.find(FileName)->second; - if (Item.View.GetSize() > 0) - { - return IoBuffer(IoBuffer::Wrap, Item.View.GetData(), Item.View.GetSize()); - } - - const auto* Lfh = (LocalFileHeader*)(Item.View.GetData()); - const uint8_t* FileData = (const uint8_t*)(Lfh->FileName + Lfh->FileNameLength + Lfh->ExtraFieldLength); - - if (Item.CompressionMethod == 0) - { - // Stored — point directly into the buffer - Item.View = MemoryView(FileData, Item.UncompressedSize); - } - else - { - // Deflate — decompress using zlib - Item.DecompressedData = IoBuffer(Item.UncompressedSize); - - z_stream Stream = {}; - Stream.next_in = const_cast<Bytef*>(FileData); - Stream.avail_in = Item.CompressedSize; - Stream.next_out = (Bytef*)Item.DecompressedData.GetMutableView().GetData(); - Stream.avail_out = Item.UncompressedSize; - - // Use raw inflate (-MAX_WBITS) since zip stores raw deflate streams - if (inflateInit2(&Stream, -MAX_WBITS) != Z_OK) - { - ZEN_WARN("failed to initialize inflate for '{}'", FileName); - return {}; - } - - int Result = inflate(&Stream, Z_FINISH); - inflateEnd(&Stream); - - if (Result != Z_STREAM_END) - { - ZEN_WARN("failed to decompress '{}' (zlib error {})", FileName, Result); - return {}; - } - - Item.View = Item.DecompressedData.GetView(); - } - - return IoBuffer(IoBuffer::Wrap, Item.View.GetData(), Item.View.GetSize()); -} - -} // namespace zen diff --git a/src/zenserver/frontend/zipfs.h b/src/zenserver/frontend/zipfs.h deleted file mode 100644 index c6acf7334..000000000 --- a/src/zenserver/frontend/zipfs.h +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#pragma once - -#include <zencore/iobuffer.h> -#include <zencore/thread.h> - -#include <unordered_map> - -namespace zen { - -class ZipFs -{ -public: - explicit ZipFs(IoBuffer&& Buffer); - - IoBuffer GetFile(const std::string_view& FileName) const; - -private: - struct FileItem - { - MemoryView View; // Initially points to LFH (size=0); resolved to file data on first access - uint32_t CompressedSize = 0; - uint32_t UncompressedSize = 0; - uint16_t CompressionMethod = 0; - IoBuffer DecompressedData; // Owns decompressed buffer for deflate entries - }; - - using FileMap = std::unordered_map<std::string_view, FileItem>; - mutable RwLock m_FilesLock; - FileMap mutable m_Files; - IoBuffer m_Buffer; -}; - -} // namespace zen diff --git a/src/zenserver/frontend/zipfs_test.cpp b/src/zenserver/frontend/zipfs_test.cpp deleted file mode 100644 index b5937b71c..000000000 --- a/src/zenserver/frontend/zipfs_test.cpp +++ /dev/null @@ -1,214 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#include "zipfs.h" - -#include <zencore/iobuffer.h> - -#if ZEN_WITH_TESTS - -ZEN_THIRD_PARTY_INCLUDES_START -# include <doctest/doctest.h> -# include <zlib.h> -ZEN_THIRD_PARTY_INCLUDES_END - -# include <cstring> -# include <vector> - -TEST_SUITE_BEGIN("server.zipfs"); - -namespace { - -// Helpers to build a minimal zip file in memory -struct ZipBuilder -{ - std::vector<uint8_t> Data; - - struct Entry - { - std::string Name; - uint32_t LocalHeaderOffset; - uint16_t CompressionMethod; - uint32_t CompressedSize; - uint32_t UncompressedSize; - }; - - std::vector<Entry> Entries; - - void Append(const void* Src, size_t Size) - { - const uint8_t* Bytes = (const uint8_t*)Src; - Data.insert(Data.end(), Bytes, Bytes + Size); - } - - void AppendU16(uint16_t V) { Append(&V, 2); } - void AppendU32(uint32_t V) { Append(&V, 4); } - - void AddFile(const std::string& Name, const void* Content, size_t ContentSize, bool Deflate) - { - std::vector<uint8_t> FileData; - uint16_t Method = 0; - - if (Deflate) - { - // Compress with raw deflate (no zlib/gzip header) - uLongf BoundSize = compressBound((uLong)ContentSize); - std::vector<uint8_t> TempBuf(BoundSize); - - z_stream Stream = {}; - Stream.next_in = (Bytef*)Content; - Stream.avail_in = (uInt)ContentSize; - Stream.next_out = TempBuf.data(); - Stream.avail_out = (uInt)TempBuf.size(); - - deflateInit2(&Stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED, -MAX_WBITS, 8, Z_DEFAULT_STRATEGY); - deflate(&Stream, Z_FINISH); - deflateEnd(&Stream); - - TempBuf.resize(Stream.total_out); - FileData = std::move(TempBuf); - Method = 8; - } - else - { - FileData.assign((const uint8_t*)Content, (const uint8_t*)Content + ContentSize); - } - - Entry E; - E.Name = Name; - E.LocalHeaderOffset = (uint32_t)Data.size(); - E.CompressionMethod = Method; - E.CompressedSize = (uint32_t)FileData.size(); - E.UncompressedSize = (uint32_t)ContentSize; - Entries.push_back(E); - - // Local file header - AppendU32(0x04034b50); // signature - AppendU16(20); // version needed - AppendU16(0); // flags - AppendU16(Method); // compression method - AppendU16(0); // last mod time - AppendU16(0); // last mod date - AppendU32(0); // crc32 (not validated by ZipFs) - AppendU32(E.CompressedSize); // compressed size - AppendU32(E.UncompressedSize); // uncompressed size - AppendU16((uint16_t)Name.size()); // file name length - AppendU16(0); // extra field length - Append(Name.data(), Name.size()); // file name - Append(FileData.data(), FileData.size()); - } - - zen::IoBuffer Build() - { - uint32_t CdOffset = (uint32_t)Data.size(); - - for (const Entry& E : Entries) - { - // Central directory record - AppendU32(0x02014b50); // signature - AppendU16(20); // version made by - AppendU16(20); // version needed - AppendU16(0); // flags - AppendU16(E.CompressionMethod); // compression method - AppendU16(0); // last mod time - AppendU16(0); // last mod date - AppendU32(0); // crc32 - AppendU32(E.CompressedSize); // compressed size - AppendU32(E.UncompressedSize); // uncompressed size - AppendU16((uint16_t)E.Name.size()); // file name length - AppendU16(0); // extra field length - AppendU16(0); // comment length - AppendU16(0); // disk index - AppendU16(0); // internal file attr - AppendU32(0); // external file attr - AppendU32(E.LocalHeaderOffset); // offset - Append(E.Name.data(), E.Name.size()); - } - - uint32_t CdSize = (uint32_t)Data.size() - CdOffset; - - // End of central directory record - AppendU32(0x06054b50); // signature - AppendU16(0); // this disk - AppendU16(0); // cd start disk - AppendU16((uint16_t)Entries.size()); // cd records this disk - AppendU16((uint16_t)Entries.size()); // cd records total - AppendU32(CdSize); // cd size - AppendU32(CdOffset); // cd offset - AppendU16(0); // comment length - - zen::IoBuffer Buffer(Data.size()); - std::memcpy(Buffer.GetMutableView().GetData(), Data.data(), Data.size()); - return Buffer; - } -}; - -} // namespace - -TEST_CASE("zipfs.stored") -{ - const char* Content = "Hello, World!"; - - ZipBuilder Zip; - Zip.AddFile("test.txt", Content, std::strlen(Content), false); - - zen::ZipFs Fs(Zip.Build()); - - zen::IoBuffer Result = Fs.GetFile("test.txt"); - REQUIRE(Result); - CHECK(Result.GetView().GetSize() == std::strlen(Content)); - CHECK(std::memcmp(Result.GetView().GetData(), Content, std::strlen(Content)) == 0); -} - -TEST_CASE("zipfs.deflate") -{ - const char* Content = "This is some content that will be deflate compressed in the zip file."; - - ZipBuilder Zip; - Zip.AddFile("compressed.txt", Content, std::strlen(Content), true); - - zen::ZipFs Fs(Zip.Build()); - - zen::IoBuffer Result = Fs.GetFile("compressed.txt"); - REQUIRE(Result); - CHECK(Result.GetView().GetSize() == std::strlen(Content)); - CHECK(std::memcmp(Result.GetView().GetData(), Content, std::strlen(Content)) == 0); -} - -TEST_CASE("zipfs.mixed") -{ - const char* StoredContent = "stored content"; - const char* DeflateContent = "deflate content that is compressed"; - - ZipBuilder Zip; - Zip.AddFile("stored.txt", StoredContent, std::strlen(StoredContent), false); - Zip.AddFile("deflated.txt", DeflateContent, std::strlen(DeflateContent), true); - - zen::ZipFs Fs(Zip.Build()); - - zen::IoBuffer Stored = Fs.GetFile("stored.txt"); - REQUIRE(Stored); - CHECK(Stored.GetView().GetSize() == std::strlen(StoredContent)); - CHECK(std::memcmp(Stored.GetView().GetData(), StoredContent, std::strlen(StoredContent)) == 0); - - zen::IoBuffer Deflated = Fs.GetFile("deflated.txt"); - REQUIRE(Deflated); - CHECK(Deflated.GetView().GetSize() == std::strlen(DeflateContent)); - CHECK(std::memcmp(Deflated.GetView().GetData(), DeflateContent, std::strlen(DeflateContent)) == 0); -} - -TEST_CASE("zipfs.not_found") -{ - const char* Content = "data"; - - ZipBuilder Zip; - Zip.AddFile("exists.txt", Content, std::strlen(Content), false); - - zen::ZipFs Fs(Zip.Build()); - - zen::IoBuffer Result = Fs.GetFile("missing.txt"); - CHECK(!Result); -} - -TEST_SUITE_END(); - -#endif // ZEN_WITH_TESTS diff --git a/src/zenserver/hub/README.md b/src/zenserver/hub/README.md index 322be3649..c75349fa5 100644 --- a/src/zenserver/hub/README.md +++ b/src/zenserver/hub/README.md @@ -3,23 +3,32 @@ The Zen Server can act in a "hub" mode. In this mode, the only services offered are the basic health and diagnostic services alongside an API to provision and deprovision Storage server instances. +A module ID is an alphanumeric identifier (hyphens allowed) that identifies a dataset, typically +associated with a content plug-in module. + ## Generic Server API GET `/health` - returns an `OK!` payload when all enabled services are up and responding ## Hub API -GET `{moduleid}` - alphanumeric identifier to identify a dataset (typically associated with a content plug-in module) - -GET `/hub/status` - obtain a summary of the currently live instances +GET `/hub/status` - obtain a summary of all currently live instances GET `/hub/modules/{moduleid}` - retrieve information about a module +DELETE `/hub/modules/{moduleid}` - obliterate a module (permanently destroys all data) + POST `/hub/modules/{moduleid}/provision` - provision service for module POST `/hub/modules/{moduleid}/deprovision` - deprovision service for module -GET `/hub/stats` - retrieve stats for service +POST `/hub/modules/{moduleid}/hibernate` - hibernate a provisioned module + +POST `/hub/modules/{moduleid}/wake` - wake a hibernated module + +GET `/stats/hub` - retrieve stats for the hub service + +`/hub/proxy/{port}/{path}` - reverse proxy to a child instance dashboard (all HTTP verbs) ## Hub Configuration diff --git a/src/zenserver/hub/httphubservice.cpp b/src/zenserver/hub/httphubservice.cpp index d52da5ae7..e4b0c28d0 100644 --- a/src/zenserver/hub/httphubservice.cpp +++ b/src/zenserver/hub/httphubservice.cpp @@ -2,6 +2,7 @@ #include "httphubservice.h" +#include "httpproxyhandler.h" #include "hub.h" #include "storageserverinstance.h" @@ -43,10 +44,11 @@ namespace { } } // namespace -HttpHubService::HttpHubService(Hub& Hub, HttpStatsService& StatsService, HttpStatusService& StatusService) +HttpHubService::HttpHubService(Hub& Hub, HttpProxyHandler& Proxy, HttpStatsService& StatsService, HttpStatusService& StatusService) : m_Hub(Hub) , m_StatsService(StatsService) , m_StatusService(StatusService) +, m_Proxy(Proxy) { using namespace std::literals; @@ -67,6 +69,23 @@ HttpHubService::HttpHubService(Hub& Hub, HttpStatsService& StatsService, HttpSta return true; }); + m_Router.AddMatcher("port", [](std::string_view Str) -> bool { + if (Str.empty()) + { + return false; + } + for (const auto C : Str) + { + if (!std::isdigit(C)) + { + return false; + } + } + return true; + }); + + m_Router.AddMatcher("proxypath", [](std::string_view Str) -> bool { return !Str.empty(); }); + m_Router.RegisterRoute( "status", [this](HttpRouterRequest& Req) { @@ -102,6 +121,11 @@ HttpHubService::HttpHubService(Hub& Hub, HttpStatsService& StatsService, HttpSta HttpVerb::kGet); m_Router.RegisterRoute( + "deprovision", + [this](HttpRouterRequest& Req) { HandleDeprovisionAll(Req.ServerRequest()); }, + HttpVerb::kPost); + + m_Router.RegisterRoute( "modules/{moduleid}", [this](HttpRouterRequest& Req) { std::string_view ModuleId = Req.GetCapture(1); @@ -232,6 +256,25 @@ HttpHubService::HttpHubService(Hub& Hub, HttpStatsService& StatsService, HttpSta }, HttpVerb::kPost); + m_Router.RegisterRoute( + "proxy/{port}/{proxypath}", + [this](HttpRouterRequest& Req) { + std::string_view PortStr = Req.GetCapture(1); + + // Use RelativeUriWithExtension to preserve the file extension that the + // router's URI parser strips (e.g. ".css", ".js") - the upstream server + // needs the full path including the extension. + std::string_view FullUri = Req.ServerRequest().RelativeUriWithExtension(); + std::string_view Prefix = "proxy/"; + + // FullUri is "proxy/{port}/{path...}" - skip past "proxy/{port}/" + size_t PathStart = Prefix.size() + PortStr.size() + 1; + std::string_view PathTail = (PathStart < FullUri.size()) ? FullUri.substr(PathStart) : std::string_view{}; + + m_Proxy.HandleProxyRequest(Req.ServerRequest(), PortStr, PathTail); + }, + HttpVerb::kGet | HttpVerb::kPost | HttpVerb::kPut | HttpVerb::kDelete | HttpVerb::kHead); + m_StatsService.RegisterHandler("hub", *this); m_StatusService.RegisterHandler("hub", *this); } @@ -333,6 +376,81 @@ HttpHubService::GetActivityCounter() } void +HttpHubService::HandleDeprovisionAll(HttpServerRequest& Request) +{ + std::vector<std::string> ModulesToDeprovision; + m_Hub.EnumerateModules([&ModulesToDeprovision](std::string_view ModuleId, const Hub::InstanceInfo& InstanceInfo) { + if (InstanceInfo.State == HubInstanceState::Provisioned || InstanceInfo.State == HubInstanceState::Hibernated) + { + ModulesToDeprovision.push_back(std::string(ModuleId)); + } + }); + + if (ModulesToDeprovision.empty()) + { + return Request.WriteResponse(HttpResponseCode::OK); + } + std::vector<std::string> Rejected; + std::vector<std::string> Accepted; + std::vector<std::string> Completed; + for (const std::string& ModuleId : ModulesToDeprovision) + { + Hub::Response Response = m_Hub.Deprovision(ModuleId); + switch (Response.ResponseCode) + { + case Hub::EResponseCode::NotFound: + // Ignore + break; + case Hub::EResponseCode::Rejected: + Rejected.push_back(ModuleId); + break; + case Hub::EResponseCode::Accepted: + Accepted.push_back(ModuleId); + break; + case Hub::EResponseCode::Completed: + Completed.push_back(ModuleId); + break; + } + } + if (Rejected.empty() && Accepted.empty() && Completed.empty()) + { + return Request.WriteResponse(HttpResponseCode::OK); + } + HttpResponseCode Response = HttpResponseCode::OK; + CbObjectWriter Writer; + if (!Completed.empty()) + { + Writer.BeginArray("Completed"); + for (const std::string& ModuleId : Completed) + { + Writer.AddString(ModuleId); + } + Writer.EndArray(); // Completed + } + if (!Accepted.empty()) + { + Writer.BeginArray("Accepted"); + for (const std::string& ModuleId : Accepted) + { + Writer.AddString(ModuleId); + } + Writer.EndArray(); // Accepted + Response = HttpResponseCode::Accepted; + } + if (!Rejected.empty()) + { + Writer.BeginArray("Rejected"); + for (const std::string& ModuleId : Rejected) + { + Writer.AddString(ModuleId); + } + Writer.EndArray(); // Rejected + Response = HttpResponseCode::Conflict; + } + Request.WriteResponse(Response, Writer.Save()); +} + +void HttpHubService::HandleModuleGet(HttpServerRequest& Request, std::string_view ModuleId) { Hub::InstanceInfo InstanceInfo; @@ -351,45 +469,36 @@ HttpHubService::HandleModuleGet(HttpServerRequest& Request, std::string_view Mod void HttpHubService::HandleModuleDelete(HttpServerRequest& Request, std::string_view ModuleId) { - Hub::InstanceInfo InstanceInfo; - if (!m_Hub.Find(ModuleId, &InstanceInfo)) + Hub::Response Resp = m_Hub.Obliterate(std::string(ModuleId)); + + if (HandleFailureResults(Request, Resp)) { - Request.WriteResponse(HttpResponseCode::NotFound); return; } - if (InstanceInfo.State == HubInstanceState::Provisioned || InstanceInfo.State == HubInstanceState::Hibernated || - InstanceInfo.State == HubInstanceState::Crashed) - { - try - { - Hub::Response Resp = m_Hub.Deprovision(std::string(ModuleId)); - - if (HandleFailureResults(Request, Resp)) - { - return; - } - - // TODO: nuke all related storage + const HttpResponseCode HttpCode = + (Resp.ResponseCode == Hub::EResponseCode::Accepted) ? HttpResponseCode::Accepted : HttpResponseCode::OK; + CbObjectWriter Obj; + Obj << "moduleId" << ModuleId; + Request.WriteResponse(HttpCode, Obj.Save()); +} - const HttpResponseCode HttpCode = - (Resp.ResponseCode == Hub::EResponseCode::Accepted) ? HttpResponseCode::Accepted : HttpResponseCode::OK; - CbObjectWriter Obj; - Obj << "moduleId" << ModuleId; - return Request.WriteResponse(HttpCode, Obj.Save()); - } - catch (const std::exception& Ex) - { - ZEN_ERROR("Exception while deprovisioning module '{}': {}", ModuleId, Ex.what()); - throw; - } - } +void +HttpHubService::OnWebSocketOpen(Ref<WebSocketConnection> Connection, std::string_view RelativeUri) +{ + m_Proxy.OnWebSocketOpen(std::move(Connection), RelativeUri); +} - // TODO: nuke all related storage +void +HttpHubService::OnWebSocketMessage(WebSocketConnection& Conn, const WebSocketMessage& Msg) +{ + m_Proxy.OnWebSocketMessage(Conn, Msg); +} - CbObjectWriter Obj; - Obj << "moduleId" << ModuleId; - Request.WriteResponse(HttpResponseCode::OK, Obj.Save()); +void +HttpHubService::OnWebSocketClose(WebSocketConnection& Conn, uint16_t Code, std::string_view Reason) +{ + m_Proxy.OnWebSocketClose(Conn, Code, Reason); } } // namespace zen diff --git a/src/zenserver/hub/httphubservice.h b/src/zenserver/hub/httphubservice.h index 1bb1c303e..f4d1b0b89 100644 --- a/src/zenserver/hub/httphubservice.h +++ b/src/zenserver/hub/httphubservice.h @@ -2,11 +2,16 @@ #pragma once +#include <zencore/thread.h> #include <zenhttp/httpserver.h> #include <zenhttp/httpstatus.h> +#include <zenhttp/websocket.h> + +#include <memory> namespace zen { +class HttpProxyHandler; class HttpStatsService; class Hub; @@ -16,10 +21,10 @@ class Hub; * use in UEFN content worker style scenarios. * */ -class HttpHubService : public HttpService, public IHttpStatusProvider, public IHttpStatsProvider +class HttpHubService : public HttpService, public IHttpStatusProvider, public IHttpStatsProvider, public IWebSocketHandler { public: - HttpHubService(Hub& Hub, HttpStatsService& StatsService, HttpStatusService& StatusService); + HttpHubService(Hub& Hub, HttpProxyHandler& Proxy, HttpStatsService& StatsService, HttpStatusService& StatusService); ~HttpHubService(); HttpHubService(const HttpHubService&) = delete; @@ -32,6 +37,11 @@ public: virtual CbObject CollectStats() override; virtual uint64_t GetActivityCounter() override; + // IWebSocketHandler + void OnWebSocketOpen(Ref<WebSocketConnection> Connection, std::string_view RelativeUri) override; + void OnWebSocketMessage(WebSocketConnection& Conn, const WebSocketMessage& Msg) override; + void OnWebSocketClose(WebSocketConnection& Conn, uint16_t Code, std::string_view Reason) override; + void SetNotificationEndpoint(std::string_view UpstreamNotificationEndpoint, std::string_view InstanceId); private: @@ -43,8 +53,11 @@ private: HttpStatsService& m_StatsService; HttpStatusService& m_StatusService; + void HandleDeprovisionAll(HttpServerRequest& Request); void HandleModuleGet(HttpServerRequest& Request, std::string_view ModuleId); void HandleModuleDelete(HttpServerRequest& Request, std::string_view ModuleId); + + HttpProxyHandler& m_Proxy; }; } // namespace zen diff --git a/src/zenserver/hub/httpproxyhandler.cpp b/src/zenserver/hub/httpproxyhandler.cpp new file mode 100644 index 000000000..235d7388f --- /dev/null +++ b/src/zenserver/hub/httpproxyhandler.cpp @@ -0,0 +1,528 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "httpproxyhandler.h" + +#include <zencore/fmtutils.h> +#include <zencore/logging.h> +#include <zencore/string.h> +#include <zenhttp/httpclient.h> +#include <zenhttp/httpwsclient.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <fmt/format.h> +ZEN_THIRD_PARTY_INCLUDES_END + +#include <charconv> + +#if ZEN_WITH_TESTS +# include <zencore/testing.h> +#endif // ZEN_WITH_TESTS + +namespace zen { + +namespace { + + std::string InjectProxyScript(std::string_view Html, uint16_t Port) + { + ExtendableStringBuilder<2048> Script; + Script.Append("<script>\n(function(){\n var P = \"/hub/proxy/"); + Script.Append(fmt::format("{}", Port)); + Script.Append( + "\";\n" + " var OF = window.fetch;\n" + " window.fetch = function(u, o) {\n" + " if (typeof u === \"string\") {\n" + " try {\n" + " var p = new URL(u, location.origin);\n" + " if (p.origin === location.origin && !p.pathname.startsWith(P))\n" + " { p.pathname = P + p.pathname; u = p.toString(); }\n" + " } catch(e) {\n" + " if (u.startsWith(\"/\") && !u.startsWith(P)) u = P + u;\n" + " }\n" + " }\n" + " return OF.call(this, u, o);\n" + " };\n" + " var OW = window.WebSocket;\n" + " window.WebSocket = function(u, pr) {\n" + " try {\n" + " var p = new URL(u);\n" + " if (p.hostname === location.hostname\n" + " && String(p.port || (p.protocol === \"wss:\" ? \"443\" : \"80\"))\n" + " === String(location.port || (location.protocol === \"https:\" ? \"443\" : \"80\"))\n" + " && !p.pathname.startsWith(P))\n" + " { p.pathname = P + p.pathname; u = p.toString(); }\n" + " } catch(e) {}\n" + " return pr !== undefined ? new OW(u, pr) : new OW(u);\n" + " };\n" + " window.WebSocket.prototype = OW.prototype;\n" + " window.WebSocket.CONNECTING = OW.CONNECTING;\n" + " window.WebSocket.OPEN = OW.OPEN;\n" + " window.WebSocket.CLOSING = OW.CLOSING;\n" + " window.WebSocket.CLOSED = OW.CLOSED;\n" + " var OO = window.open;\n" + " window.open = function(u, t, f) {\n" + " if (typeof u === \"string\") {\n" + " try {\n" + " var p = new URL(u, location.origin);\n" + " if (p.origin === location.origin && !p.pathname.startsWith(P))\n" + " { p.pathname = P + p.pathname; u = p.toString(); }\n" + " } catch(e) {}\n" + " }\n" + " return OO.call(this, u, t, f);\n" + " };\n" + " document.addEventListener(\"click\", function(e) {\n" + " var t = e.composedPath ? e.composedPath()[0] : e.target;\n" + " while (t && t.tagName !== \"A\") t = t.parentNode || t.host;\n" + " if (!t || !t.href) return;\n" + " try {\n" + " var h = new URL(t.href);\n" + " if (h.origin === location.origin && !h.pathname.startsWith(P))\n" + " { h.pathname = P + h.pathname; e.preventDefault(); window.location.href = h.toString(); }\n" + " } catch(x) {}\n" + " }, true);\n" + "})();\n</script>"); + + std::string ScriptStr = Script.ToString(); + + size_t HeadClose = Html.find("</head>"); + if (HeadClose != std::string_view::npos) + { + std::string Result; + Result.reserve(Html.size() + ScriptStr.size()); + Result.append(Html.substr(0, HeadClose)); + Result.append(ScriptStr); + Result.append(Html.substr(HeadClose)); + return Result; + } + + std::string Result; + Result.reserve(Html.size() + ScriptStr.size()); + Result.append(ScriptStr); + Result.append(Html); + return Result; + } + +} // namespace + +struct HttpProxyHandler::WsBridge : public RefCounted, public IWsClientHandler +{ + Ref<WebSocketConnection> ClientConn; + std::unique_ptr<HttpWsClient> UpstreamClient; + uint16_t Port = 0; + + void OnWsOpen() override {} + + void OnWsMessage(const WebSocketMessage& Msg) override + { + if (!ClientConn->IsOpen()) + { + return; + } + switch (Msg.Opcode) + { + case WebSocketOpcode::kText: + ClientConn->SendText(std::string_view(static_cast<const char*>(Msg.Payload.GetData()), Msg.Payload.GetSize())); + break; + case WebSocketOpcode::kBinary: + ClientConn->SendBinary(std::span<const uint8_t>(static_cast<const uint8_t*>(Msg.Payload.GetData()), Msg.Payload.GetSize())); + break; + default: + break; + } + } + + void OnWsClose(uint16_t Code, std::string_view Reason) override + { + if (ClientConn->IsOpen()) + { + ClientConn->Close(Code, Reason); + } + } +}; + +HttpProxyHandler::HttpProxyHandler() +{ +} + +HttpProxyHandler::HttpProxyHandler(PortValidator ValidatePort) : m_ValidatePort(std::move(ValidatePort)) +{ +} + +void +HttpProxyHandler::SetPortValidator(PortValidator ValidatePort) +{ + m_ValidatePort = std::move(ValidatePort); +} + +HttpProxyHandler::~HttpProxyHandler() +{ + try + { + Shutdown(); + } + catch (...) + { + } +} + +HttpClient& +HttpProxyHandler::GetOrCreateProxyClient(uint16_t Port) +{ + HttpClient* Result = nullptr; + m_ProxyClientsLock.WithExclusiveLock([&] { + auto It = m_ProxyClients.find(Port); + if (It == m_ProxyClients.end()) + { + HttpClientSettings Settings; + Settings.LogCategory = "hub-proxy"; + Settings.ConnectTimeout = std::chrono::milliseconds(5000); + Settings.Timeout = std::chrono::milliseconds(30000); + auto Client = std::make_unique<HttpClient>(fmt::format("http://127.0.0.1:{}", Port), Settings); + Result = Client.get(); + m_ProxyClients.emplace(Port, std::move(Client)); + } + else + { + Result = It->second.get(); + } + }); + return *Result; +} + +void +HttpProxyHandler::HandleProxyRequest(HttpServerRequest& Request, std::string_view PortStr, std::string_view PathTail) +{ + uint16_t Port = 0; + auto [Ptr, Ec] = std::from_chars(PortStr.data(), PortStr.data() + PortStr.size(), Port); + if (Ec != std::errc{} || Ptr != PortStr.data() + PortStr.size()) + { + Request.WriteResponse(HttpResponseCode::BadRequest, HttpContentType::kText, "invalid proxy URL"); + return; + } + + if (!m_ValidatePort(Port)) + { + Request.WriteResponse(HttpResponseCode::NotFound, HttpContentType::kText, "target instance not available"); + return; + } + + HttpClient& Client = GetOrCreateProxyClient(Port); + + std::string RequestPath; + RequestPath.reserve(1 + PathTail.size()); + RequestPath.push_back('/'); + RequestPath.append(PathTail); + + std::string_view QueryString = Request.QueryString(); + if (!QueryString.empty()) + { + RequestPath.push_back('?'); + RequestPath.append(QueryString); + } + + HttpClient::KeyValueMap ForwardHeaders; + HttpContentType AcceptType = Request.AcceptContentType(); + if (AcceptType != HttpContentType::kUnknownContentType) + { + ForwardHeaders->emplace("Accept", std::string(MapContentTypeToString(AcceptType))); + } + + std::string_view Auth = Request.GetAuthorizationHeader(); + if (!Auth.empty()) + { + ForwardHeaders->emplace("Authorization", std::string(Auth)); + } + + HttpContentType ReqContentType = Request.RequestContentType(); + if (ReqContentType != HttpContentType::kUnknownContentType) + { + ForwardHeaders->emplace("Content-Type", std::string(MapContentTypeToString(ReqContentType))); + } + + HttpClient::Response Response; + + switch (Request.RequestVerb()) + { + case HttpVerb::kGet: + Response = Client.Get(RequestPath, ForwardHeaders); + break; + case HttpVerb::kPost: + { + IoBuffer Payload = Request.ReadPayload(); + Response = Client.Post(RequestPath, Payload, ForwardHeaders); + break; + } + case HttpVerb::kPut: + { + IoBuffer Payload = Request.ReadPayload(); + Response = Client.Put(RequestPath, Payload, ForwardHeaders); + break; + } + case HttpVerb::kDelete: + Response = Client.Delete(RequestPath, ForwardHeaders); + break; + case HttpVerb::kHead: + Response = Client.Head(RequestPath, ForwardHeaders); + break; + default: + Request.WriteResponse(HttpResponseCode::MethodNotAllowed, HttpContentType::kText, "method not supported"); + return; + } + + if (Response.Error) + { + if (!m_ValidatePort(Port)) + { + Request.WriteResponse(HttpResponseCode::NotFound, HttpContentType::kText, "target instance not available"); + return; + } + + ZEN_WARN("proxy request to port {} failed: {}", Port, Response.Error->ErrorMessage); + switch (Response.Error->ErrorCode) + { + case HttpClientErrorCode::kConnectionFailure: + case HttpClientErrorCode::kHostResolutionFailure: + return Request.WriteResponse(HttpResponseCode::NotFound, + HttpContentType::kText, + fmt::format("instance not reachable: {}", Response.Error->ErrorMessage)); + case HttpClientErrorCode::kOperationTimedOut: + return Request.WriteResponse(HttpResponseCode::GatewayTimeout, + HttpContentType::kText, + fmt::format("upstream request timed out: {}", Response.Error->ErrorMessage)); + case HttpClientErrorCode::kRequestCancelled: + return Request.WriteResponse(HttpResponseCode::ServiceUnavailable, + HttpContentType::kText, + fmt::format("upstream request cancelled: {}", Response.Error->ErrorMessage)); + default: + return Request.WriteResponse(HttpResponseCode::BadGateway, + HttpContentType::kText, + fmt::format("upstream request failed: {}", Response.Error->ErrorMessage)); + } + } + + HttpContentType ContentType = Response.ResponsePayload.GetContentType(); + + if (ContentType == HttpContentType::kHTML) + { + std::string_view Html(static_cast<const char*>(Response.ResponsePayload.GetData()), Response.ResponsePayload.GetSize()); + std::string Injected = InjectProxyScript(Html, Port); + Request.WriteResponse(Response.StatusCode, HttpContentType::kHTML, std::string_view(Injected)); + } + else + { + Request.WriteResponse(Response.StatusCode, ContentType, std::move(Response.ResponsePayload)); + } +} + +void +HttpProxyHandler::PrunePort(uint16_t Port) +{ + m_ProxyClientsLock.WithExclusiveLock([&] { m_ProxyClients.erase(Port); }); + + std::vector<Ref<WsBridge>> Stale; + m_WsBridgesLock.WithExclusiveLock([&] { + for (auto It = m_WsBridges.begin(); It != m_WsBridges.end();) + { + if (It->second->Port == Port) + { + Stale.push_back(std::move(It->second)); + It = m_WsBridges.erase(It); + } + else + { + ++It; + } + } + }); + + for (auto& Bridge : Stale) + { + if (Bridge->UpstreamClient) + { + Bridge->UpstreamClient->Close(1001, "instance shutting down"); + } + if (Bridge->ClientConn->IsOpen()) + { + Bridge->ClientConn->Close(1001, "instance shutting down"); + } + } +} + +void +HttpProxyHandler::Shutdown() +{ + m_WsBridgesLock.WithExclusiveLock([&] { m_WsBridges.clear(); }); + m_ProxyClientsLock.WithExclusiveLock([&] { m_ProxyClients.clear(); }); +} + +////////////////////////////////////////////////////////////////////////// +// +// WebSocket proxy +// + +void +HttpProxyHandler::OnWebSocketOpen(Ref<WebSocketConnection> Connection, std::string_view RelativeUri) +{ + const std::string_view ProxyPrefix = "proxy/"; + if (!RelativeUri.starts_with(ProxyPrefix)) + { + Connection->Close(1008, "unsupported WebSocket endpoint"); + return; + } + + std::string_view ProxyTail = RelativeUri.substr(ProxyPrefix.size()); + + size_t SlashPos = ProxyTail.find('/'); + std::string_view PortStr = (SlashPos != std::string_view::npos) ? ProxyTail.substr(0, SlashPos) : ProxyTail; + std::string_view Path = (SlashPos != std::string_view::npos) ? ProxyTail.substr(SlashPos) : "/"; + + uint16_t Port = 0; + auto [Ptr, Ec] = std::from_chars(PortStr.data(), PortStr.data() + PortStr.size(), Port); + if (Ec != std::errc{} || Ptr != PortStr.data() + PortStr.size()) + { + Connection->Close(1008, "invalid proxy URL"); + return; + } + + if (!m_ValidatePort(Port)) + { + Connection->Close(1008, "target instance not available"); + return; + } + + std::string WsUrl = HttpToWsUrl(fmt::format("http://127.0.0.1:{}", Port), Path); + + Ref<WsBridge> Bridge(new WsBridge()); + Bridge->ClientConn = Connection; + Bridge->Port = Port; + + Bridge->UpstreamClient = std::make_unique<HttpWsClient>(WsUrl, *Bridge); + + try + { + Bridge->UpstreamClient->Connect(); + } + catch (const std::exception& Ex) + { + ZEN_WARN("proxy WebSocket connect to {} failed: {}", WsUrl, Ex.what()); + Connection->Close(1011, "upstream connect failed"); + return; + } + + WebSocketConnection* Key = Connection.Get(); + m_WsBridgesLock.WithExclusiveLock([&] { m_WsBridges.emplace(Key, std::move(Bridge)); }); +} + +void +HttpProxyHandler::OnWebSocketMessage(WebSocketConnection& Conn, const WebSocketMessage& Msg) +{ + Ref<WsBridge> Bridge; + m_WsBridgesLock.WithSharedLock([&] { + auto It = m_WsBridges.find(&Conn); + if (It != m_WsBridges.end()) + { + Bridge = It->second; + } + }); + + if (!Bridge || !Bridge->UpstreamClient) + { + return; + } + + switch (Msg.Opcode) + { + case WebSocketOpcode::kText: + Bridge->UpstreamClient->SendText(std::string_view(static_cast<const char*>(Msg.Payload.GetData()), Msg.Payload.GetSize())); + break; + case WebSocketOpcode::kBinary: + Bridge->UpstreamClient->SendBinary( + std::span<const uint8_t>(static_cast<const uint8_t*>(Msg.Payload.GetData()), Msg.Payload.GetSize())); + break; + case WebSocketOpcode::kClose: + Bridge->UpstreamClient->Close(Msg.CloseCode, {}); + break; + default: + break; + } +} + +void +HttpProxyHandler::OnWebSocketClose(WebSocketConnection& Conn, uint16_t Code, std::string_view Reason) +{ + Ref<WsBridge> Bridge = m_WsBridgesLock.WithExclusiveLock([this, &Conn]() -> Ref<WsBridge> { + auto It = m_WsBridges.find(&Conn); + if (It != m_WsBridges.end()) + { + Ref<WsBridge> Bridge = std::move(It->second); + m_WsBridges.erase(It); + return Bridge; + } + return {}; + }); + + if (Bridge && Bridge->UpstreamClient) + { + Bridge->UpstreamClient->Close(Code, Reason); + } +} + +#if ZEN_WITH_TESTS + +TEST_SUITE_BEGIN("server.httpproxyhandler"); + +TEST_CASE("server.httpproxyhandler.html_injection") +{ + SUBCASE("injects before </head>") + { + std::string Result = InjectProxyScript("<html><head></head><body></body></html>", 21005); + CHECK(Result.find("<script>") != std::string::npos); + CHECK(Result.find("/hub/proxy/21005") != std::string::npos); + size_t ScriptEnd = Result.find("</script>"); + size_t HeadClose = Result.find("</head>"); + REQUIRE(ScriptEnd != std::string::npos); + REQUIRE(HeadClose != std::string::npos); + CHECK(ScriptEnd < HeadClose); + } + + SUBCASE("prepends when no </head>") + { + std::string Result = InjectProxyScript("<body>content</body>", 21005); + CHECK(Result.find("<script>") == 0); + CHECK(Result.find("<body>content</body>") != std::string::npos); + } + + SUBCASE("empty html") + { + std::string Result = InjectProxyScript("", 21005); + CHECK(Result.find("<script>") != std::string::npos); + CHECK(Result.find("/hub/proxy/21005") != std::string::npos); + } + + SUBCASE("preserves original content") + { + std::string_view Html = "<html><head><title>Test</title></head><body><h1>Dashboard</h1></body></html>"; + std::string Result = InjectProxyScript(Html, 21005); + CHECK(Result.find("<title>Test</title>") != std::string::npos); + CHECK(Result.find("<h1>Dashboard</h1>") != std::string::npos); + } +} + +TEST_CASE("server.httpproxyhandler.port_embedding") +{ + std::string Result = InjectProxyScript("<head></head>", 80); + CHECK(Result.find("/hub/proxy/80") != std::string::npos); + + Result = InjectProxyScript("<head></head>", 65535); + CHECK(Result.find("/hub/proxy/65535") != std::string::npos); +} + +TEST_SUITE_END(); + +void +httpproxyhandler_forcelink() +{ +} +#endif // ZEN_WITH_TESTS + +} // namespace zen diff --git a/src/zenserver/hub/httpproxyhandler.h b/src/zenserver/hub/httpproxyhandler.h new file mode 100644 index 000000000..8667c0ca1 --- /dev/null +++ b/src/zenserver/hub/httpproxyhandler.h @@ -0,0 +1,52 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/thread.h> +#include <zenhttp/httpserver.h> +#include <zenhttp/websocket.h> + +#include <functional> +#include <memory> +#include <unordered_map> + +namespace zen { + +class HttpClient; + +class HttpProxyHandler +{ +public: + using PortValidator = std::function<bool(uint16_t)>; + + HttpProxyHandler(); + explicit HttpProxyHandler(PortValidator ValidatePort); + ~HttpProxyHandler(); + + void SetPortValidator(PortValidator ValidatePort); + + HttpProxyHandler(const HttpProxyHandler&) = delete; + HttpProxyHandler& operator=(const HttpProxyHandler&) = delete; + + void HandleProxyRequest(HttpServerRequest& Request, std::string_view PortStr, std::string_view PathTail); + void PrunePort(uint16_t Port); + void Shutdown(); + + void OnWebSocketOpen(Ref<WebSocketConnection> Connection, std::string_view RelativeUri); + void OnWebSocketMessage(WebSocketConnection& Conn, const WebSocketMessage& Msg); + void OnWebSocketClose(WebSocketConnection& Conn, uint16_t Code, std::string_view Reason); + +private: + PortValidator m_ValidatePort; + + HttpClient& GetOrCreateProxyClient(uint16_t Port); + + RwLock m_ProxyClientsLock; + std::unordered_map<uint16_t, std::unique_ptr<HttpClient>> m_ProxyClients; + + struct WsBridge; + RwLock m_WsBridgesLock; + std::unordered_map<WebSocketConnection*, Ref<WsBridge>> m_WsBridges; +}; + +} // namespace zen diff --git a/src/zenserver/hub/hub.cpp b/src/zenserver/hub/hub.cpp index 76c7a8f6d..c03c1a9a0 100644 --- a/src/zenserver/hub/hub.cpp +++ b/src/zenserver/hub/hub.cpp @@ -9,6 +9,7 @@ #include <zencore/fmtutils.h> #include <zencore/logging.h> #include <zencore/scopeguard.h> +#include <zencore/string.h> #include <zencore/timer.h> #include <zencore/workthreadpool.h> #include <zenhttp/httpclient.h> @@ -170,32 +171,35 @@ Hub::GetMachineMetrics(SystemMetrics& OutSystemMetrict, DiskSpace& OutDiskSpace) ////////////////////////////////////////////////////////////////////////// -Hub::Hub(const Configuration& Config, - ZenServerEnvironment&& RunEnvironment, - WorkerThreadPool* OptionalWorkerPool, - AsyncModuleStateChangeCallbackFunc&& ModuleStateChangeCallback) +Hub::Hub(const Configuration& Config, ZenServerEnvironment&& RunEnvironment, AsyncModuleStateChangeCallbackFunc&& ModuleStateChangeCallback) : m_Config(Config) , m_RunEnvironment(std::move(RunEnvironment)) -, m_WorkerPool(OptionalWorkerPool) +, m_WorkerPool(Config.OptionalProvisionWorkerPool) , m_BackgroundWorkLatch(1) , m_ModuleStateChangeCallback(std::move(ModuleStateChangeCallback)) , m_ActiveInstances(Config.InstanceLimit) , m_FreeActiveInstanceIndexes(Config.InstanceLimit) { + ZEN_ASSERT_FORMAT( + Config.OptionalProvisionWorkerPool != Config.OptionalHydrationWorkerPool || Config.OptionalProvisionWorkerPool == nullptr, + "Provision and hydration worker pools must be distinct to avoid deadlocks"); + + HydrationBase::Configuration HydrationConfig; if (!m_Config.HydrationTargetSpecification.empty()) { - m_HydrationTargetSpecification = m_Config.HydrationTargetSpecification; + HydrationConfig.TargetSpecification = m_Config.HydrationTargetSpecification; } else if (!m_Config.HydrationOptions) { std::filesystem::path FileHydrationPath = m_RunEnvironment.CreateChildDir("hydration_storage"); ZEN_INFO("using file hydration path: '{}'", FileHydrationPath); - m_HydrationTargetSpecification = fmt::format("file://{}", WideToUtf8(FileHydrationPath.native())); + HydrationConfig.TargetSpecification = fmt::format("file://{}", WideToUtf8(FileHydrationPath.native())); } else { - m_HydrationOptions = m_Config.HydrationOptions; + HydrationConfig.Options = m_Config.HydrationOptions; } + m_Hydration = InitHydration(HydrationConfig); m_HydrationTempPath = m_RunEnvironment.CreateChildDir("hydration_temp"); ZEN_INFO("using hydration temp path: '{}'", m_HydrationTempPath); @@ -323,13 +327,18 @@ Hub::Provision(std::string_view ModuleId, HubProvisionedInstanceInfo& OutInfo) { auto NewInstance = std::make_unique<StorageServerInstance>( m_RunEnvironment, - StorageServerInstance::Configuration{.BasePort = GetInstanceIndexAssignedPort(ActiveInstanceIndex), - .HydrationTempPath = m_HydrationTempPath, - .HydrationTargetSpecification = m_HydrationTargetSpecification, - .HydrationOptions = m_HydrationOptions, - .HttpThreadCount = m_Config.InstanceHttpThreadCount, - .CoreLimit = m_Config.InstanceCoreLimit, - .ConfigPath = m_Config.InstanceConfigPath}, + *m_Hydration, + StorageServerInstance::Configuration{.BasePort = GetInstanceIndexAssignedPort(ActiveInstanceIndex), + .StateDir = m_RunEnvironment.CreateChildDir(ModuleId), + .TempDir = m_HydrationTempPath / ModuleId, + .HttpThreadCount = m_Config.InstanceHttpThreadCount, + .CoreLimit = m_Config.InstanceCoreLimit, + .ConfigPath = m_Config.InstanceConfigPath, + .Malloc = m_Config.InstanceMalloc, + .Trace = m_Config.InstanceTrace, + .TraceHost = m_Config.InstanceTraceHost, + .TraceFile = m_Config.InstanceTraceFile, + .OptionalWorkerPool = m_Config.OptionalHydrationWorkerPool}, ModuleId); #if ZEN_PLATFORM_WINDOWS @@ -383,11 +392,14 @@ Hub::Provision(std::string_view ModuleId, HubProvisionedInstanceInfo& OutInfo) switch (CurrentState) { case HubInstanceState::Provisioning: + case HubInstanceState::Recovering: + case HubInstanceState::Waking: return Response{EResponseCode::Accepted}; case HubInstanceState::Crashed: case HubInstanceState::Unprovisioned: break; case HubInstanceState::Provisioned: + m_ActiveInstances[ActiveInstanceIndex].LastActivityTime.store(std::chrono::system_clock::now()); return Response{EResponseCode::Completed}; case HubInstanceState::Hibernated: _.ReleaseNow(); @@ -408,6 +420,7 @@ Hub::Provision(std::string_view ModuleId, HubProvisionedInstanceInfo& OutInfo) Instance = {}; if (ActualState == HubInstanceState::Provisioned) { + m_ActiveInstances[ActiveInstanceIndex].LastActivityTime.store(std::chrono::system_clock::now()); return Response{EResponseCode::Completed}; } if (ActualState == HubInstanceState::Provisioning) @@ -594,6 +607,7 @@ Hub::InternalDeprovision(const std::string& ModuleId, std::function<bool(ActiveI switch (CurrentState) { case HubInstanceState::Deprovisioning: + case HubInstanceState::Obliterating: return Response{EResponseCode::Accepted}; case HubInstanceState::Crashed: case HubInstanceState::Hibernated: @@ -639,11 +653,11 @@ Hub::InternalDeprovision(const std::string& ModuleId, std::function<bool(ActiveI try { m_WorkerPool->ScheduleWork( - [this, ModuleId = std::string(ModuleId), ActiveInstanceIndex, Instance = std::move(SharedInstancePtr)]() mutable { + [this, ModuleId = std::string(ModuleId), ActiveInstanceIndex, Instance = std::move(SharedInstancePtr), OldState]() mutable { auto _ = MakeGuard([this]() { m_BackgroundWorkLatch.CountDown(); }); try { - CompleteDeprovision(*Instance, ActiveInstanceIndex); + CompleteDeprovision(*Instance, ActiveInstanceIndex, OldState); } catch (const std::exception& Ex) { @@ -671,20 +685,235 @@ Hub::InternalDeprovision(const std::string& ModuleId, std::function<bool(ActiveI } else { - CompleteDeprovision(Instance, ActiveInstanceIndex); + CompleteDeprovision(Instance, ActiveInstanceIndex, OldState); + } + + return Response{m_WorkerPool ? EResponseCode::Accepted : EResponseCode::Completed}; +} + +Hub::Response +Hub::Obliterate(const std::string& ModuleId) +{ + ZEN_ASSERT(!m_ShutdownFlag.load()); + + StorageServerInstance::ExclusiveLockedPtr Instance; + size_t ActiveInstanceIndex = (size_t)-1; + { + RwLock::ExclusiveLockScope Lock(m_Lock); + + if (auto It = m_InstanceLookup.find(ModuleId); It != m_InstanceLookup.end()) + { + ActiveInstanceIndex = It->second; + ZEN_ASSERT(ActiveInstanceIndex < m_ActiveInstances.size()); + + HubInstanceState CurrentState = m_ActiveInstances[ActiveInstanceIndex].State.load(); + + switch (CurrentState) + { + case HubInstanceState::Obliterating: + return Response{EResponseCode::Accepted}; + case HubInstanceState::Provisioned: + case HubInstanceState::Hibernated: + case HubInstanceState::Crashed: + break; + case HubInstanceState::Deprovisioning: + return Response{EResponseCode::Rejected, + fmt::format("Module '{}' is being deprovisioned, retry after completion", ModuleId)}; + case HubInstanceState::Recovering: + return Response{EResponseCode::Rejected, fmt::format("Module '{}' is currently recovering from a crash", ModuleId)}; + case HubInstanceState::Unprovisioned: + return Response{EResponseCode::Completed}; + default: + return Response{EResponseCode::Rejected, + fmt::format("Module '{}' is currently in state '{}'", ModuleId, ToString(CurrentState))}; + } + + std::unique_ptr<StorageServerInstance>& RawInstance = m_ActiveInstances[ActiveInstanceIndex].Instance; + ZEN_ASSERT(RawInstance != nullptr); + + Instance = RawInstance->LockExclusive(/*Wait*/ true); + } + else + { + // Module not tracked by hub - obliterate backend data directly. + // Covers the deprovisioned case where data was preserved via dehydration. + if (m_ObliteratingInstances.contains(ModuleId)) + { + return Response{EResponseCode::Accepted}; + } + + m_ObliteratingInstances.insert(ModuleId); + Lock.ReleaseNow(); + + if (m_WorkerPool) + { + m_BackgroundWorkLatch.AddCount(1); + try + { + m_WorkerPool->ScheduleWork( + [this, ModuleId = std::string(ModuleId)]() { + auto Guard = MakeGuard([this, ModuleId]() { + m_Lock.WithExclusiveLock([this, ModuleId]() { m_ObliteratingInstances.erase(ModuleId); }); + m_BackgroundWorkLatch.CountDown(); + }); + try + { + ObliterateBackendData(ModuleId); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("Failed async obliterate of untracked module '{}': {}", ModuleId, Ex.what()); + } + }, + WorkerThreadPool::EMode::EnableBacklog); + } + catch (const std::exception& DispatchEx) + { + ZEN_ERROR("Failed to dispatch async obliterate of untracked module '{}': {}", ModuleId, DispatchEx.what()); + m_BackgroundWorkLatch.CountDown(); + { + RwLock::ExclusiveLockScope _(m_Lock); + m_ObliteratingInstances.erase(ModuleId); + } + throw; + } + + return Response{EResponseCode::Accepted}; + } + + auto _ = MakeGuard([this, &ModuleId]() { + RwLock::ExclusiveLockScope _(m_Lock); + m_ObliteratingInstances.erase(ModuleId); + }); + + ObliterateBackendData(ModuleId); + + return Response{EResponseCode::Completed}; + } + } + + HubInstanceState OldState = UpdateInstanceState(Instance, ActiveInstanceIndex, HubInstanceState::Obliterating); + const uint16_t Port = Instance.GetBasePort(); + NotifyStateUpdate(ModuleId, OldState, HubInstanceState::Obliterating, Port, {}); + + if (m_WorkerPool) + { + std::shared_ptr<StorageServerInstance::ExclusiveLockedPtr> SharedInstancePtr = + std::make_shared<StorageServerInstance::ExclusiveLockedPtr>(std::move(Instance)); + + m_BackgroundWorkLatch.AddCount(1); + try + { + m_WorkerPool->ScheduleWork( + [this, ModuleId = std::string(ModuleId), ActiveInstanceIndex, Instance = std::move(SharedInstancePtr)]() mutable { + auto _ = MakeGuard([this]() { m_BackgroundWorkLatch.CountDown(); }); + try + { + CompleteObliterate(*Instance, ActiveInstanceIndex); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("Failed async obliterate of module '{}': {}", ModuleId, Ex.what()); + } + }, + WorkerThreadPool::EMode::EnableBacklog); + } + catch (const std::exception& DispatchEx) + { + ZEN_ERROR("Failed async dispatch obliterate of module '{}': {}", ModuleId, DispatchEx.what()); + m_BackgroundWorkLatch.CountDown(); + + NotifyStateUpdate(ModuleId, HubInstanceState::Obliterating, OldState, Port, {}); + { + RwLock::ExclusiveLockScope HubLock(m_Lock); + ZEN_ASSERT_SLOW(m_InstanceLookup.find(std::string(ModuleId)) != m_InstanceLookup.end()); + ZEN_ASSERT_SLOW(m_InstanceLookup.find(std::string(ModuleId))->second == ActiveInstanceIndex); + UpdateInstanceState(HubLock, ActiveInstanceIndex, OldState); + } + + throw; + } + } + else + { + CompleteObliterate(Instance, ActiveInstanceIndex); } return Response{m_WorkerPool ? EResponseCode::Accepted : EResponseCode::Completed}; } void -Hub::CompleteDeprovision(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex) +Hub::CompleteObliterate(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex) { const std::string ModuleId(Instance.GetModuleId()); const uint16_t Port = Instance.GetBasePort(); try { + Instance.Obliterate(); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("Failed to obliterate storage server instance for module '{}': {}", ModuleId, Ex.what()); + Instance = {}; + { + RwLock::ExclusiveLockScope HubLock(m_Lock); + UpdateInstanceState(HubLock, ActiveInstanceIndex, HubInstanceState::Crashed); + } + NotifyStateUpdate(ModuleId, HubInstanceState::Obliterating, HubInstanceState::Crashed, Port, {}); + throw; + } + + NotifyStateUpdate(ModuleId, HubInstanceState::Obliterating, HubInstanceState::Unprovisioned, Port, {}); + RemoveInstance(Instance, ActiveInstanceIndex, ModuleId); +} + +void +Hub::CompleteDeprovision(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex, HubInstanceState OldState) +{ + const std::string ModuleId(Instance.GetModuleId()); + const uint16_t Port = Instance.GetBasePort(); + + try + { + if (OldState == HubInstanceState::Provisioned) + { + ZEN_INFO("Triggering GC for module {}", ModuleId); + Stopwatch GcTimer; + + HttpClient GcClient(fmt::format("http://localhost:{}", Port)); + + HttpClient::KeyValueMap Params; + Params.Entries.insert({"smallobjects", "true"}); + Params.Entries.insert({"skipcid", "false"}); + HttpClient::Response Response = GcClient.Post("/admin/gc", HttpClient::Accept(HttpContentType::kCbObject), Params); + bool GcCompleted = false; + Stopwatch DeadlineTimer; + while (Response && DeadlineTimer.GetElapsedTimeMs() < 5000) + { + Response = GcClient.Get("/admin/gc", HttpClient::Accept(HttpContentType::kCbObject)); + if (Response) + { + bool Complete = Response.AsObject()["Status"].AsString() != "Running"; + if (Complete) + { + GcCompleted = true; + break; + } + Sleep(50); + } + } + if (GcCompleted) + { + ZEN_INFO("GC for module {} completed in {}", ModuleId, NiceLatencyNs(GcTimer.GetElapsedTimeUs() * 1000)); + } + else + { + ZEN_WARN("GC for module {} did not complete after {}, proceeding with shutdown", + ModuleId, + NiceLatencyNs(GcTimer.GetElapsedTimeUs() * 1000)); + } + } Instance.Deprovision(); } catch (const std::exception& Ex) @@ -703,20 +932,7 @@ Hub::CompleteDeprovision(StorageServerInstance::ExclusiveLockedPtr& Instance, si } NotifyStateUpdate(ModuleId, HubInstanceState::Deprovisioning, HubInstanceState::Unprovisioned, Port, {}); - Instance = {}; - - std::unique_ptr<StorageServerInstance> DeleteInstance; - { - RwLock::ExclusiveLockScope HubLock(m_Lock); - auto It = m_InstanceLookup.find(std::string(ModuleId)); - ZEN_ASSERT_SLOW(It != m_InstanceLookup.end()); - ZEN_ASSERT_SLOW(It->second == ActiveInstanceIndex); - DeleteInstance = std::move(m_ActiveInstances[ActiveInstanceIndex].Instance); - m_FreeActiveInstanceIndexes.push_back(ActiveInstanceIndex); - m_InstanceLookup.erase(It); - UpdateInstanceState(HubLock, ActiveInstanceIndex, HubInstanceState::Unprovisioned); - } - DeleteInstance.reset(); + RemoveInstance(Instance, ActiveInstanceIndex, ModuleId); } Hub::Response @@ -989,6 +1205,46 @@ Hub::CompleteWake(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t Ac } } +void +Hub::RemoveInstance(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex, std::string_view ModuleId) +{ + Instance = {}; + + std::unique_ptr<StorageServerInstance> DeleteInstance; + { + RwLock::ExclusiveLockScope HubLock(m_Lock); + auto It = m_InstanceLookup.find(std::string(ModuleId)); + ZEN_ASSERT_SLOW(It != m_InstanceLookup.end()); + ZEN_ASSERT_SLOW(It->second == ActiveInstanceIndex); + DeleteInstance = std::move(m_ActiveInstances[ActiveInstanceIndex].Instance); + m_FreeActiveInstanceIndexes.push_back(ActiveInstanceIndex); + m_InstanceLookup.erase(It); + UpdateInstanceState(HubLock, ActiveInstanceIndex, HubInstanceState::Unprovisioned); + } + DeleteInstance.reset(); +} + +void +Hub::ObliterateBackendData(std::string_view ModuleId) +{ + std::filesystem::path ServerStateDir = m_RunEnvironment.GetChildBaseDir() / ModuleId; + std::filesystem::path TempDir = m_HydrationTempPath / ModuleId; + + std::atomic<bool> AbortFlag{false}; + std::atomic<bool> PauseFlag{false}; + + HydrationConfig Config{.ServerStateDir = ServerStateDir, .TempDir = TempDir, .ModuleId = std::string(ModuleId)}; + if (m_Config.OptionalHydrationWorkerPool) + { + Config.Threading.emplace(HydrationConfig::ThreadingOptions{.WorkerPool = m_Config.OptionalHydrationWorkerPool, + .AbortFlag = &AbortFlag, + .PauseFlag = &PauseFlag}); + } + + std::unique_ptr<HydrationStrategyBase> Hydrator = m_Hydration->CreateHydrator(Config); + Hydrator->Obliterate(); +} + bool Hub::Find(std::string_view ModuleId, InstanceInfo* OutInstanceInfo) { @@ -1047,7 +1303,12 @@ Hub::GetInstanceCount() bool Hub::CanProvisionInstanceLocked(std::string_view ModuleId, std::string& OutReason) { - ZEN_UNUSED(ModuleId); + if (m_ObliteratingInstances.contains(std::string(ModuleId))) + { + OutReason = fmt::format("module '{}' is being obliterated", ModuleId); + return false; + } + if (m_FreeActiveInstanceIndexes.empty()) { OutReason = fmt::format("instance limit ({}) exceeded", m_Config.InstanceLimit); @@ -1083,6 +1344,21 @@ Hub::GetInstanceIndexAssignedPort(size_t ActiveInstanceIndex) const return gsl::narrow<uint16_t>(m_Config.BasePortNumber + ActiveInstanceIndex); } +bool +Hub::IsInstancePort(uint16_t Port) const +{ + if (Port < m_Config.BasePortNumber) + { + return false; + } + size_t Index = Port - m_Config.BasePortNumber; + if (Index >= m_ActiveInstances.size()) + { + return false; + } + return m_ActiveInstances[Index].State.load(std::memory_order_relaxed) != HubInstanceState::Unprovisioned; +} + HubInstanceState Hub::UpdateInstanceStateLocked(size_t ActiveInstanceIndex, HubInstanceState NewState) { @@ -1093,11 +1369,13 @@ Hub::UpdateInstanceStateLocked(size_t ActiveInstanceIndex, HubInstanceState NewS case HubInstanceState::Unprovisioned: return To == HubInstanceState::Provisioning; case HubInstanceState::Provisioned: - return To == HubInstanceState::Hibernating || To == HubInstanceState::Deprovisioning || To == HubInstanceState::Crashed; + return To == HubInstanceState::Hibernating || To == HubInstanceState::Deprovisioning || To == HubInstanceState::Crashed || + To == HubInstanceState::Obliterating; case HubInstanceState::Hibernated: - return To == HubInstanceState::Waking || To == HubInstanceState::Deprovisioning; + return To == HubInstanceState::Waking || To == HubInstanceState::Deprovisioning || To == HubInstanceState::Obliterating; case HubInstanceState::Crashed: - return To == HubInstanceState::Provisioning || To == HubInstanceState::Deprovisioning || To == HubInstanceState::Recovering; + return To == HubInstanceState::Provisioning || To == HubInstanceState::Deprovisioning || + To == HubInstanceState::Recovering || To == HubInstanceState::Obliterating; case HubInstanceState::Provisioning: return To == HubInstanceState::Provisioned || To == HubInstanceState::Unprovisioned || To == HubInstanceState::Crashed; case HubInstanceState::Hibernating: @@ -1109,6 +1387,8 @@ Hub::UpdateInstanceStateLocked(size_t ActiveInstanceIndex, HubInstanceState NewS To == HubInstanceState::Crashed; case HubInstanceState::Recovering: return To == HubInstanceState::Provisioned || To == HubInstanceState::Unprovisioned; + case HubInstanceState::Obliterating: + return To == HubInstanceState::Unprovisioned || To == HubInstanceState::Crashed; } return false; }(m_ActiveInstances[ActiveInstanceIndex].State.load(), NewState)); @@ -1124,10 +1404,14 @@ Hub::AttemptRecoverInstance(std::string_view ModuleId) { StorageServerInstance::ExclusiveLockedPtr Instance; size_t ActiveInstanceIndex = (size_t)-1; - { RwLock::ExclusiveLockScope _(m_Lock); + if (m_ShutdownFlag.load()) + { + return; + } + auto It = m_InstanceLookup.find(std::string(ModuleId)); if (It == m_InstanceLookup.end()) { @@ -1351,7 +1635,7 @@ Hub::CheckInstanceStatus(HttpClient& ActivityCheckClient, } else { - // transitional state (Provisioning, Deprovisioning, Hibernating, Waking, Recovering) - expected, skip. + // transitional state (Provisioning, Deprovisioning, Hibernating, Waking, Recovering, Obliterating) - expected, skip. // Crashed is handled above via AttemptRecoverInstance; it appears here only when the instance // lock was busy on a previous cycle and recovery is already pending. return true; @@ -1511,6 +1795,14 @@ static const HttpClientSettings kFastTimeout{.ConnectTimeout = std::chrono::mill namespace hub_testutils { + struct TestHubPools + { + WorkerThreadPool ProvisionPool; + WorkerThreadPool HydrationPool; + + explicit TestHubPools(int ThreadCount) : ProvisionPool(ThreadCount, "hub_test_prov"), HydrationPool(ThreadCount, "hub_test_hydr") {} + }; + ZenServerEnvironment MakeHubEnvironment(const std::filesystem::path& BaseDir) { return ZenServerEnvironment(ZenServerEnvironment::Hub, GetRunningExecutablePath().parent_path(), BaseDir); @@ -1519,9 +1811,14 @@ namespace hub_testutils { std::unique_ptr<Hub> MakeHub(const std::filesystem::path& BaseDir, Hub::Configuration Config = {}, Hub::AsyncModuleStateChangeCallbackFunc StateChangeCallback = {}, - WorkerThreadPool* WorkerPool = nullptr) + TestHubPools* Pools = nullptr) { - return std::make_unique<Hub>(Config, MakeHubEnvironment(BaseDir), WorkerPool, std::move(StateChangeCallback)); + if (Pools) + { + Config.OptionalProvisionWorkerPool = &Pools->ProvisionPool; + Config.OptionalHydrationWorkerPool = &Pools->HydrationPool; + } + return std::make_unique<Hub>(Config, MakeHubEnvironment(BaseDir), std::move(StateChangeCallback)); } struct CallbackRecord @@ -1593,14 +1890,32 @@ namespace hub_testutils { } // namespace hub_testutils -TEST_CASE("hub.provision_basic") +TEST_CASE("hub.provision") { ScopedTemporaryDirectory TempDir; - std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path()); + + struct TransitionRecord + { + HubInstanceState OldState; + HubInstanceState NewState; + }; + RwLock CaptureMutex; + std::vector<TransitionRecord> Transitions; + + hub_testutils::StateChangeCapture CaptureInstance; + + auto CaptureFunc = + [&](std::string_view ModuleId, const HubProvisionedInstanceInfo& Info, HubInstanceState OldState, HubInstanceState NewState) { + CaptureMutex.WithExclusiveLock([&]() { Transitions.push_back({OldState, NewState}); }); + CaptureInstance.CaptureFunc()(ModuleId, Info, OldState, NewState); + }; + + std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), {}, std::move(CaptureFunc)); CHECK_EQ(HubInstance->GetInstanceCount(), 0); CHECK_FALSE(HubInstance->Find("module_a")); + // Provision HubProvisionedInstanceInfo Info; const Hub::Response ProvisionResult = HubInstance->Provision("module_a", Info); REQUIRE_MESSAGE(ProvisionResult.ResponseCode == Hub::EResponseCode::Completed, ProvisionResult.Message); @@ -1617,6 +1932,15 @@ TEST_CASE("hub.provision_basic") CHECK(ModClient.Get("/health/")); } + // Verify provision callback + { + RwLock::SharedLockScope _(CaptureInstance.CallbackMutex); + REQUIRE_EQ(CaptureInstance.ProvisionCallbacks.size(), 1u); + CHECK_EQ(CaptureInstance.ProvisionCallbacks[0].ModuleId, "module_a"); + CHECK_EQ(CaptureInstance.ProvisionCallbacks[0].Port, Info.Port); + } + + // Deprovision const Hub::Response DeprovisionResult = HubInstance->Deprovision("module_a"); CHECK(DeprovisionResult.ResponseCode == Hub::EResponseCode::Completed); CHECK_EQ(HubInstance->GetInstanceCount(), 0); @@ -1626,6 +1950,28 @@ TEST_CASE("hub.provision_basic") HttpClient ModClient(fmt::format("http://localhost:{}", Info.Port), kFastTimeout); CHECK(!ModClient.Get("/health/")); } + + // Verify deprovision callback + { + RwLock::SharedLockScope _(CaptureInstance.CallbackMutex); + REQUIRE_EQ(CaptureInstance.DeprovisionCallbacks.size(), 1u); + CHECK_EQ(CaptureInstance.DeprovisionCallbacks[0].ModuleId, "module_a"); + CHECK_EQ(CaptureInstance.DeprovisionCallbacks[0].Port, Info.Port); + } + + // Verify full transition sequence + { + RwLock::SharedLockScope _(CaptureMutex); + REQUIRE_EQ(Transitions.size(), 4u); + CHECK_EQ(Transitions[0].OldState, HubInstanceState::Unprovisioned); + CHECK_EQ(Transitions[0].NewState, HubInstanceState::Provisioning); + CHECK_EQ(Transitions[1].OldState, HubInstanceState::Provisioning); + CHECK_EQ(Transitions[1].NewState, HubInstanceState::Provisioned); + CHECK_EQ(Transitions[2].OldState, HubInstanceState::Provisioned); + CHECK_EQ(Transitions[2].NewState, HubInstanceState::Deprovisioning); + CHECK_EQ(Transitions[3].OldState, HubInstanceState::Deprovisioning); + CHECK_EQ(Transitions[3].NewState, HubInstanceState::Unprovisioned); + } } TEST_CASE("hub.provision_config") @@ -1678,92 +2024,6 @@ TEST_CASE("hub.provision_config") } } -TEST_CASE("hub.provision_callbacks") -{ - ScopedTemporaryDirectory TempDir; - - hub_testutils::StateChangeCapture CaptureInstance; - - std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), {}, CaptureInstance.CaptureFunc()); - - HubProvisionedInstanceInfo Info; - - const Hub::Response ProvisionResult = HubInstance->Provision("cb_module", Info); - REQUIRE_MESSAGE(ProvisionResult.ResponseCode == Hub::EResponseCode::Completed, ProvisionResult.Message); - - { - RwLock::SharedLockScope _(CaptureInstance.CallbackMutex); - REQUIRE_EQ(CaptureInstance.ProvisionCallbacks.size(), 1u); - CHECK_EQ(CaptureInstance.ProvisionCallbacks[0].ModuleId, "cb_module"); - CHECK_EQ(CaptureInstance.ProvisionCallbacks[0].Port, Info.Port); - CHECK_NE(CaptureInstance.ProvisionCallbacks[0].Port, 0); - } - - { - HttpClient ModClient(fmt::format("http://localhost:{}", Info.Port), kFastTimeout); - CHECK(ModClient.Get("/health/")); - } - - const Hub::Response DeprovisionResult = HubInstance->Deprovision("cb_module"); - CHECK(DeprovisionResult.ResponseCode == Hub::EResponseCode::Completed); - - { - HttpClient ModClient(fmt::format("http://localhost:{}", Info.Port), kFastTimeout); - CHECK(!ModClient.Get("/health/")); - } - - { - RwLock::SharedLockScope _(CaptureInstance.CallbackMutex); - REQUIRE_EQ(CaptureInstance.DeprovisionCallbacks.size(), 1u); - CHECK_EQ(CaptureInstance.DeprovisionCallbacks[0].ModuleId, "cb_module"); - CHECK_EQ(CaptureInstance.DeprovisionCallbacks[0].Port, Info.Port); - CHECK_EQ(CaptureInstance.DeprovisionCallbacks.size(), 1u); - } -} - -TEST_CASE("hub.provision_callback_sequence") -{ - ScopedTemporaryDirectory TempDir; - - struct TransitionRecord - { - HubInstanceState OldState; - HubInstanceState NewState; - }; - RwLock CaptureMutex; - std::vector<TransitionRecord> Transitions; - - auto CaptureFunc = - [&](std::string_view ModuleId, const HubProvisionedInstanceInfo& Info, HubInstanceState OldState, HubInstanceState NewState) { - ZEN_UNUSED(ModuleId); - ZEN_UNUSED(Info); - CaptureMutex.WithExclusiveLock([&]() { Transitions.push_back({OldState, NewState}); }); - }; - - std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), {}, std::move(CaptureFunc)); - - HubProvisionedInstanceInfo Info; - { - const Hub::Response R = HubInstance->Provision("seq_module", Info); - REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); - } - { - const Hub::Response R = HubInstance->Deprovision("seq_module"); - REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); - } - - RwLock::SharedLockScope _(CaptureMutex); - REQUIRE_EQ(Transitions.size(), 4u); - CHECK_EQ(Transitions[0].OldState, HubInstanceState::Unprovisioned); - CHECK_EQ(Transitions[0].NewState, HubInstanceState::Provisioning); - CHECK_EQ(Transitions[1].OldState, HubInstanceState::Provisioning); - CHECK_EQ(Transitions[1].NewState, HubInstanceState::Provisioned); - CHECK_EQ(Transitions[2].OldState, HubInstanceState::Provisioned); - CHECK_EQ(Transitions[2].NewState, HubInstanceState::Deprovisioning); - CHECK_EQ(Transitions[3].OldState, HubInstanceState::Deprovisioning); - CHECK_EQ(Transitions[3].NewState, HubInstanceState::Unprovisioned); -} - TEST_CASE("hub.instance_limit") { ScopedTemporaryDirectory TempDir; @@ -1795,54 +2055,7 @@ TEST_CASE("hub.instance_limit") CHECK_EQ(HubInstance->GetInstanceCount(), 2); } -TEST_CASE("hub.enumerate_modules") -{ - ScopedTemporaryDirectory TempDir; - std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path()); - - HubProvisionedInstanceInfo Info; - - { - const Hub::Response R = HubInstance->Provision("enum_a", Info); - REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); - } - { - const Hub::Response R = HubInstance->Provision("enum_b", Info); - REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); - } - - std::vector<std::string> Ids; - int ProvisionedCount = 0; - HubInstance->EnumerateModules([&](std::string_view ModuleId, const Hub::InstanceInfo& InstanceInfo) { - Ids.push_back(std::string(ModuleId)); - if (InstanceInfo.State == HubInstanceState::Provisioned) - { - ProvisionedCount++; - } - }); - CHECK_EQ(Ids.size(), 2u); - CHECK_EQ(ProvisionedCount, 2); - const bool FoundA = std::find(Ids.begin(), Ids.end(), "enum_a") != Ids.end(); - const bool FoundB = std::find(Ids.begin(), Ids.end(), "enum_b") != Ids.end(); - CHECK(FoundA); - CHECK(FoundB); - - HubInstance->Deprovision("enum_a"); - Ids.clear(); - ProvisionedCount = 0; - HubInstance->EnumerateModules([&](std::string_view ModuleId, const Hub::InstanceInfo& InstanceInfo) { - Ids.push_back(std::string(ModuleId)); - if (InstanceInfo.State == HubInstanceState::Provisioned) - { - ProvisionedCount++; - } - }); - REQUIRE_EQ(Ids.size(), 1u); - CHECK_EQ(Ids[0], "enum_b"); - CHECK_EQ(ProvisionedCount, 1); -} - -TEST_CASE("hub.max_instance_count") +TEST_CASE("hub.enumerate_and_instance_tracking") { ScopedTemporaryDirectory TempDir; std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path()); @@ -1852,22 +2065,56 @@ TEST_CASE("hub.max_instance_count") HubProvisionedInstanceInfo Info; { - const Hub::Response R = HubInstance->Provision("max_a", Info); + const Hub::Response R = HubInstance->Provision("track_a", Info); REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); } CHECK_GE(HubInstance->GetMaxInstanceCount(), 1); { - const Hub::Response R = HubInstance->Provision("max_b", Info); + const Hub::Response R = HubInstance->Provision("track_b", Info); REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); } CHECK_GE(HubInstance->GetMaxInstanceCount(), 2); + // Enumerate both modules + { + std::vector<std::string> Ids; + int ProvisionedCount = 0; + HubInstance->EnumerateModules([&](std::string_view ModuleId, const Hub::InstanceInfo& InstanceInfo) { + Ids.push_back(std::string(ModuleId)); + if (InstanceInfo.State == HubInstanceState::Provisioned) + { + ProvisionedCount++; + } + }); + CHECK_EQ(Ids.size(), 2u); + CHECK_EQ(ProvisionedCount, 2); + CHECK(std::find(Ids.begin(), Ids.end(), "track_a") != Ids.end()); + CHECK(std::find(Ids.begin(), Ids.end(), "track_b") != Ids.end()); + } + const int MaxAfterTwo = HubInstance->GetMaxInstanceCount(); - HubInstance->Deprovision("max_a"); + // Deprovision one - max instance count must not decrease + HubInstance->Deprovision("track_a"); CHECK_EQ(HubInstance->GetInstanceCount(), 1); CHECK_EQ(HubInstance->GetMaxInstanceCount(), MaxAfterTwo); + + // Enumerate after deprovision + { + std::vector<std::string> Ids; + int ProvisionedCount = 0; + HubInstance->EnumerateModules([&](std::string_view ModuleId, const Hub::InstanceInfo& InstanceInfo) { + Ids.push_back(std::string(ModuleId)); + if (InstanceInfo.State == HubInstanceState::Provisioned) + { + ProvisionedCount++; + } + }); + REQUIRE_EQ(Ids.size(), 1u); + CHECK_EQ(Ids[0], "track_b"); + CHECK_EQ(ProvisionedCount, 1); + } } TEST_CASE("hub.concurrent_callbacks") @@ -2013,7 +2260,7 @@ TEST_CASE("hub.job_object") } # endif // ZEN_PLATFORM_WINDOWS -TEST_CASE("hub.hibernate_wake") +TEST_CASE("hub.hibernate_wake_obliterate") { ScopedTemporaryDirectory TempDir; Hub::Configuration Config; @@ -2023,6 +2270,11 @@ TEST_CASE("hub.hibernate_wake") HubProvisionedInstanceInfo ProvInfo; Hub::InstanceInfo Info; + // Error cases on non-existent modules (no provision needed) + CHECK(HubInstance->Hibernate("never_provisioned").ResponseCode == Hub::EResponseCode::NotFound); + CHECK(HubInstance->Wake("never_provisioned").ResponseCode == Hub::EResponseCode::NotFound); + CHECK(HubInstance->Deprovision("never_provisioned").ResponseCode == Hub::EResponseCode::NotFound); + // Provision { const Hub::Response R = HubInstance->Provision("hib_a", ProvInfo); @@ -2038,9 +2290,14 @@ TEST_CASE("hub.hibernate_wake") CHECK(ModClient.Get("/health/")); } + // Double-wake on provisioned module is idempotent + CHECK(HubInstance->Wake("hib_a").ResponseCode == Hub::EResponseCode::Completed); + // Hibernate - const Hub::Response HibernateResult = HubInstance->Hibernate("hib_a"); - REQUIRE_MESSAGE(HibernateResult.ResponseCode == Hub::EResponseCode::Completed, HibernateResult.Message); + { + const Hub::Response R = HubInstance->Hibernate("hib_a"); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); + } REQUIRE(HubInstance->Find("hib_a", &Info)); CHECK_EQ(Info.State, HubInstanceState::Hibernated); const std::chrono::system_clock::time_point HibernatedTime = Info.StateChangeTime; @@ -2050,9 +2307,14 @@ TEST_CASE("hub.hibernate_wake") CHECK(!ModClient.Get("/health/")); } + // Double-hibernate on already-hibernated module is idempotent + CHECK(HubInstance->Hibernate("hib_a").ResponseCode == Hub::EResponseCode::Completed); + // Wake - const Hub::Response WakeResult = HubInstance->Wake("hib_a"); - REQUIRE_MESSAGE(WakeResult.ResponseCode == Hub::EResponseCode::Completed, WakeResult.Message); + { + const Hub::Response R = HubInstance->Wake("hib_a"); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); + } REQUIRE(HubInstance->Find("hib_a", &Info)); CHECK_EQ(Info.State, HubInstanceState::Provisioned); CHECK_GE(Info.StateChangeTime, HibernatedTime); @@ -2061,57 +2323,63 @@ TEST_CASE("hub.hibernate_wake") CHECK(ModClient.Get("/health/")); } - // Deprovision - const Hub::Response DeprovisionResult = HubInstance->Deprovision("hib_a"); - CHECK(DeprovisionResult.ResponseCode == Hub::EResponseCode::Completed); - CHECK_FALSE(HubInstance->Find("hib_a")); + // Hibernate again for obliterate-from-hibernated test { - HttpClient ModClient(fmt::format("http://localhost:{}", ProvInfo.Port), kFastTimeout); - CHECK(!ModClient.Get("/health/")); + const Hub::Response R = HubInstance->Hibernate("hib_a"); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); } -} - -TEST_CASE("hub.hibernate_wake_errors") -{ - ScopedTemporaryDirectory TempDir; - Hub::Configuration Config; - Config.BasePortNumber = 22700; - std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config); - - HubProvisionedInstanceInfo ProvInfo; + REQUIRE(HubInstance->Find("hib_a", &Info)); + CHECK_EQ(Info.State, HubInstanceState::Hibernated); - // Hibernate/wake on a non-existent module - returns NotFound (-> 404) - CHECK(HubInstance->Hibernate("never_provisioned").ResponseCode == Hub::EResponseCode::NotFound); - CHECK(HubInstance->Wake("never_provisioned").ResponseCode == Hub::EResponseCode::NotFound); + // Obliterate from hibernated + { + const Hub::Response R = HubInstance->Obliterate("hib_a"); + CHECK(R.ResponseCode == Hub::EResponseCode::Completed); + } + CHECK_EQ(HubInstance->GetInstanceCount(), 0); + CHECK_FALSE(HubInstance->Find("hib_a")); - // Double-hibernate: second hibernate on already-hibernated module returns Completed (idempotent) + // Re-provision for obliterate-from-provisioned test { - const Hub::Response R = HubInstance->Provision("err_b", ProvInfo); + const Hub::Response R = HubInstance->Provision("hib_a", ProvInfo); REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); } + REQUIRE(HubInstance->Find("hib_a", &Info)); + CHECK_EQ(Info.State, HubInstanceState::Provisioned); { - const Hub::Response R = HubInstance->Hibernate("err_b"); - REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); + HttpClient ModClient(fmt::format("http://localhost:{}", ProvInfo.Port), kFastTimeout); + CHECK(ModClient.Get("/health/")); } + // Obliterate from provisioned + { + const Hub::Response R = HubInstance->Obliterate("hib_a"); + CHECK(R.ResponseCode == Hub::EResponseCode::Completed); + } + CHECK_EQ(HubInstance->GetInstanceCount(), 0); + CHECK_FALSE(HubInstance->Find("hib_a")); { - const Hub::Response HibResp = HubInstance->Hibernate("err_b"); - CHECK(HibResp.ResponseCode == Hub::EResponseCode::Completed); + HttpClient ModClient(fmt::format("http://localhost:{}", ProvInfo.Port), kFastTimeout); + CHECK(!ModClient.Get("/health/")); } - // Wake on provisioned: succeeds (-> Provisioned), then wake again returns Completed (idempotent) + // Obliterate deprovisioned module (not tracked by hub, backend data may exist) { - const Hub::Response R = HubInstance->Wake("err_b"); + const Hub::Response R = HubInstance->Provision("hib_a", ProvInfo); REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); } - { - const Hub::Response WakeResp = HubInstance->Wake("err_b"); - CHECK(WakeResp.ResponseCode == Hub::EResponseCode::Completed); + const Hub::Response R = HubInstance->Deprovision("hib_a"); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); + } + CHECK_FALSE(HubInstance->Find("hib_a")); + { + const Hub::Response R = HubInstance->Obliterate("hib_a"); + CHECK(R.ResponseCode == Hub::EResponseCode::Completed); } - // Deprovision not-found - returns NotFound (-> 404) - CHECK(HubInstance->Deprovision("never_provisioned").ResponseCode == Hub::EResponseCode::NotFound); + // Obliterate of a never-provisioned module also succeeds (no-op backend cleanup) + CHECK(HubInstance->Obliterate("never_existed").ResponseCode == Hub::EResponseCode::Completed); } TEST_CASE("hub.async_hibernate_wake") @@ -2121,8 +2389,8 @@ TEST_CASE("hub.async_hibernate_wake") Hub::Configuration Config; Config.BasePortNumber = 23000; - WorkerThreadPool WorkerPool(2, "hub_async_hib_wake"); - std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config, {}, &WorkerPool); + hub_testutils::TestHubPools Pools(2); + std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config, {}, &Pools); HubProvisionedInstanceInfo ProvInfo; Hub::InstanceInfo Info; @@ -2252,25 +2520,21 @@ TEST_CASE("hub.recover_process_crash") if (HubInstance->Find("module_a", &InstanceInfo) && InstanceInfo.State == HubInstanceState::Provisioned && ModClient.Get("/health/")) { - // Recovery must reuse the same port - the instance was never removed from the hub's - // port table during recovery, so AttemptRecoverInstance reuses m_Config.BasePort. CHECK_EQ(InstanceInfo.Port, Info.Port); Recovered = true; break; } } - CHECK_MESSAGE(Recovered, "Instance did not recover within timeout"); + REQUIRE_MESSAGE(Recovered, "Instance did not recover within timeout"); // Verify the full crash/recovery callback sequence { RwLock::SharedLockScope _(CaptureMutex); REQUIRE_GE(Transitions.size(), 3u); - // Find the Provisioned->Crashed transition const auto CrashedIt = std::find_if(Transitions.begin(), Transitions.end(), [](const TransitionRecord& R) { return R.OldState == HubInstanceState::Provisioned && R.NewState == HubInstanceState::Crashed; }); REQUIRE_NE(CrashedIt, Transitions.end()); - // Recovery sequence follows: Crashed->Recovering, Recovering->Provisioned const auto RecoveringIt = CrashedIt + 1; REQUIRE_NE(RecoveringIt, Transitions.end()); CHECK_EQ(RecoveringIt->OldState, HubInstanceState::Crashed); @@ -2280,44 +2544,6 @@ TEST_CASE("hub.recover_process_crash") CHECK_EQ(RecoveredIt->OldState, HubInstanceState::Recovering); CHECK_EQ(RecoveredIt->NewState, HubInstanceState::Provisioned); } -} - -TEST_CASE("hub.recover_process_crash_then_deprovision") -{ - ScopedTemporaryDirectory TempDir; - - // Fast watchdog cycle so crash detection is near-instant instead of waiting up to the 3s default. - Hub::Configuration Config; - Config.WatchDog.CycleInterval = std::chrono::milliseconds(10); - Config.WatchDog.InstanceCheckThrottle = std::chrono::milliseconds(1); - - std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config); - - HubProvisionedInstanceInfo Info; - { - const Hub::Response R = HubInstance->Provision("module_a", Info); - REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); - } - - // Kill the child process, wait for the watchdog to detect and recover the instance. - HubInstance->TerminateModuleForTesting("module_a"); - - constexpr auto kPollIntervalMs = std::chrono::milliseconds(50); - constexpr auto kTimeoutMs = std::chrono::seconds(15); - const auto Deadline = std::chrono::steady_clock::now() + kTimeoutMs; - - bool Recovered = false; - while (std::chrono::steady_clock::now() < Deadline) - { - std::this_thread::sleep_for(kPollIntervalMs); - Hub::InstanceInfo InstanceInfo; - if (HubInstance->Find("module_a", &InstanceInfo) && InstanceInfo.State == HubInstanceState::Provisioned) - { - Recovered = true; - break; - } - } - REQUIRE_MESSAGE(Recovered, "Instance did not recover within timeout"); // After recovery, deprovision should succeed and a re-provision should work. { @@ -2346,8 +2572,8 @@ TEST_CASE("hub.async_provision_concurrent") Config.BasePortNumber = 22800; Config.InstanceLimit = kModuleCount; - WorkerThreadPool WorkerPool(4, "hub_async_concurrent"); - std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config, {}, &WorkerPool); + hub_testutils::TestHubPools Pools(4); + std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config, {}, &Pools); std::vector<HubProvisionedInstanceInfo> Infos(kModuleCount); std::vector<std::string> Reasons(kModuleCount); @@ -2428,8 +2654,8 @@ TEST_CASE("hub.async_provision_shutdown_waits") Config.InstanceLimit = kModuleCount; Config.BasePortNumber = 22900; - WorkerThreadPool WorkerPool(2, "hub_async_shutdown"); - std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config, {}, &WorkerPool); + hub_testutils::TestHubPools Pools(2); + std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config, {}, &Pools); std::vector<HubProvisionedInstanceInfo> Infos(kModuleCount); @@ -2461,8 +2687,8 @@ TEST_CASE("hub.async_provision_rejected") Config.InstanceLimit = 1; Config.BasePortNumber = 23100; - WorkerThreadPool WorkerPool(2, "hub_async_rejected"); - std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config, {}, &WorkerPool); + hub_testutils::TestHubPools Pools(2); + std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config, {}, &Pools); HubProvisionedInstanceInfo Info; @@ -2550,12 +2776,12 @@ TEST_CASE("hub.instance.inactivity.deprovision") // Phase 1: immediately after setup all three instances must still be alive. // No timeout has elapsed yet (only 100ms have passed). - CHECK_MESSAGE(HubInstance->Find("idle"), "idle was deprovisioned within 100ms - its 2s provisioned timeout has not elapsed"); + CHECK_MESSAGE(HubInstance->Find("idle"), "idle was deprovisioned within 100ms - its 4s provisioned timeout has not elapsed"); CHECK_MESSAGE(HubInstance->Find("idle_hib"), "idle_hib was deprovisioned within 100ms - its 1s hibernated timeout has not elapsed"); CHECK_MESSAGE(HubInstance->Find("persistent"), - "persistent was deprovisioned within 100ms - its 2s provisioned timeout has not elapsed"); + "persistent was deprovisioned within 100ms - its 4s provisioned timeout has not elapsed"); // Phase 2: idle_hib must be deprovisioned by the watchdog within its 1s hibernated timeout. // idle must remain alive - its 2s provisioned timeout has not elapsed yet. @@ -2579,7 +2805,7 @@ TEST_CASE("hub.instance.inactivity.deprovision") CHECK_MESSAGE(!HubInstance->Find("idle_hib"), "idle_hib should still be gone - it was deprovisioned in phase 2"); - CHECK_MESSAGE(!HubInstance->Find("idle"), "idle should be gone after its 3s provisioned timeout elapsed"); + CHECK_MESSAGE(!HubInstance->Find("idle"), "idle should be gone after its 4s provisioned timeout elapsed"); CHECK_MESSAGE(HubInstance->Find("persistent"), "persistent was incorrectly deprovisioned - its activity timer was reset by PokeInstance"); diff --git a/src/zenserver/hub/hub.h b/src/zenserver/hub/hub.h index ac3e680ae..40d046ce0 100644 --- a/src/zenserver/hub/hub.h +++ b/src/zenserver/hub/hub.h @@ -18,6 +18,7 @@ #include <memory> #include <thread> #include <unordered_map> +#include <unordered_set> namespace zen { @@ -66,6 +67,10 @@ public: uint32_t InstanceHttpThreadCount = 0; // Automatic int InstanceCoreLimit = 0; // Automatic + std::string InstanceMalloc; + std::string InstanceTrace; + std::string InstanceTraceHost; + std::string InstanceTraceFile; std::filesystem::path InstanceConfigPath; std::string HydrationTargetSpecification; CbObject HydrationOptions; @@ -73,6 +78,9 @@ public: WatchDogConfiguration WatchDog; ResourceMetrics ResourceLimits; + + WorkerThreadPool* OptionalProvisionWorkerPool = nullptr; + WorkerThreadPool* OptionalHydrationWorkerPool = nullptr; }; typedef std::function< @@ -81,7 +89,6 @@ public: Hub(const Configuration& Config, ZenServerEnvironment&& RunEnvironment, - WorkerThreadPool* OptionalWorkerPool = nullptr, AsyncModuleStateChangeCallbackFunc&& ModuleStateChangeCallback = {}); ~Hub(); @@ -131,6 +138,14 @@ public: Response Deprovision(const std::string& ModuleId); /** + * Obliterate a storage server instance and all associated data. + * Shuts down the process, deletes backend hydration data, and cleans local state. + * + * @param ModuleId The ID of the module to obliterate. + */ + Response Obliterate(const std::string& ModuleId); + + /** * Hibernate a storage server instance for the given module ID. * The instance is shut down but its data is preserved; it can be woken later. * @@ -167,6 +182,8 @@ public: void GetMachineMetrics(SystemMetrics& OutSystemMetrict, DiskSpace& OutDiskSpace) const; + bool IsInstancePort(uint16_t Port) const; + const Configuration& GetConfig() const { return m_Config; } #if ZEN_WITH_TESTS @@ -182,9 +199,8 @@ private: AsyncModuleStateChangeCallbackFunc m_ModuleStateChangeCallback; - std::string m_HydrationTargetSpecification; - CbObject m_HydrationOptions; - std::filesystem::path m_HydrationTempPath; + std::unique_ptr<HydrationBase> m_Hydration; + std::filesystem::path m_HydrationTempPath; #if ZEN_PLATFORM_WINDOWS JobObject m_JobObject; @@ -253,12 +269,13 @@ private: } HubInstanceState UpdateInstanceStateLocked(size_t ActiveInstanceIndex, HubInstanceState NewState); - std::vector<ActiveInstance> m_ActiveInstances; - std::deque<size_t> m_FreeActiveInstanceIndexes; - SystemMetrics m_SystemMetrics; - DiskSpace m_DiskSpace; - std::atomic<int> m_MaxInstanceCount = 0; - std::thread m_WatchDog; + std::vector<ActiveInstance> m_ActiveInstances; + std::deque<size_t> m_FreeActiveInstanceIndexes; + SystemMetrics m_SystemMetrics; + DiskSpace m_DiskSpace; + std::atomic<int> m_MaxInstanceCount = 0; + std::thread m_WatchDog; + std::unordered_set<std::string> m_ObliteratingInstances; Event m_WatchDogEvent; void WatchDog(); @@ -276,9 +293,12 @@ private: size_t ActiveInstanceIndex, HubInstanceState OldState, bool IsNewInstance); - void CompleteDeprovision(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex); - void CompleteHibernate(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex, HubInstanceState OldState); - void CompleteWake(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex, HubInstanceState OldState); + void CompleteDeprovision(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex, HubInstanceState OldState); + void CompleteObliterate(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex); + void CompleteHibernate(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex, HubInstanceState OldState); + void CompleteWake(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex, HubInstanceState OldState); + void RemoveInstance(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex, std::string_view ModuleId); + void ObliterateBackendData(std::string_view ModuleId); // Notifications may fire slightly out of sync with the Hub's internal State flag. // The guarantee is that notifications are sent in the correct order, but the State diff --git a/src/zenserver/hub/hubinstancestate.cpp b/src/zenserver/hub/hubinstancestate.cpp index c47fdd294..310305e5d 100644 --- a/src/zenserver/hub/hubinstancestate.cpp +++ b/src/zenserver/hub/hubinstancestate.cpp @@ -29,6 +29,8 @@ ToString(HubInstanceState State) return "crashed"; case HubInstanceState::Recovering: return "recovering"; + case HubInstanceState::Obliterating: + return "obliterating"; } ZEN_ASSERT(false); return "unknown"; diff --git a/src/zenserver/hub/hubinstancestate.h b/src/zenserver/hub/hubinstancestate.h index c895f75d1..c7188aa5c 100644 --- a/src/zenserver/hub/hubinstancestate.h +++ b/src/zenserver/hub/hubinstancestate.h @@ -20,7 +20,8 @@ enum class HubInstanceState : uint32_t Hibernating, // Provisioned -> Hibernated (Shutting down process, preserving data on disk) Waking, // Hibernated -> Provisioned (Starting process from preserved data) Deprovisioning, // Provisioned/Hibernated/Crashed -> Unprovisioned (Shutting down process and cleaning up data) - Recovering, // Crashed -> Provisioned/Deprovisioned (Attempting in-place restart after a crash) + Recovering, // Crashed -> Provisioned/Unprovisioned (Attempting in-place restart after a crash) + Obliterating, // Provisioned/Hibernated/Crashed -> Unprovisioned (Destroying all local and backend data) }; std::string_view ToString(HubInstanceState State); diff --git a/src/zenserver/hub/hydration.cpp b/src/zenserver/hub/hydration.cpp index ed16bfe56..c7f25bab6 100644 --- a/src/zenserver/hub/hydration.cpp +++ b/src/zenserver/hub/hydration.cpp @@ -5,24 +5,28 @@ #include <zencore/basicfile.h> #include <zencore/compactbinary.h> #include <zencore/compactbinarybuilder.h> +#include <zencore/compactbinaryutil.h> +#include <zencore/compress.h> #include <zencore/except_fmt.h> #include <zencore/filesystem.h> #include <zencore/fmtutils.h> #include <zencore/logging.h> +#include <zencore/parallelwork.h> +#include <zencore/stream.h> #include <zencore/system.h> +#include <zencore/thread.h> #include <zencore/timer.h> #include <zenutil/cloud/imdscredentials.h> #include <zenutil/cloud/s3client.h> +#include <zenutil/filesystemutils.h> -ZEN_THIRD_PARTY_INCLUDES_START -#include <json11.hpp> -ZEN_THIRD_PARTY_INCLUDES_END +#include <numeric> +#include <unordered_map> +#include <unordered_set> #if ZEN_WITH_TESTS -# include <zencore/parallelwork.h> # include <zencore/testing.h> # include <zencore/testutils.h> -# include <zencore/thread.h> # include <zencore/workthreadpool.h> # include <zenutil/cloud/minioprocess.h> # include <cstring> @@ -30,7 +34,7 @@ ZEN_THIRD_PARTY_INCLUDES_END namespace zen { -namespace { +namespace hydration_impl { /// UTC time decomposed to calendar fields with sub-second milliseconds. struct UtcTime @@ -56,597 +60,1343 @@ namespace { } }; -} // namespace + std::filesystem::path FastRelativePath(const std::filesystem::path& Root, const std::filesystem::path& Abs) + { + auto [_, ItAbs] = std::mismatch(Root.begin(), Root.end(), Abs.begin(), Abs.end()); + std::filesystem::path RelativePath; + for (auto I = ItAbs; I != Abs.end(); I++) + { + RelativePath = RelativePath / *I; + } + return RelativePath; + } -/////////////////////////////////////////////////////////////////////////// + void CleanDirectory(WorkerThreadPool& WorkerPool, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + const std::filesystem::path& Path) + { + CleanDirectory(WorkerPool, AbortFlag, PauseFlag, Path, std::vector<std::string>{}, {}, 0); + } -constexpr std::string_view FileHydratorPrefix = "file://"; -constexpr std::string_view FileHydratorType = "file"; + /////////////////////////////////////////////////////////////////////// + // Hydration / dehydration statistics. Atomics so they are safe to update + // from parallel worker lambdas. Summary is emitted once after the operation + // completes (success or failure). -struct FileHydrator : public HydrationStrategyBase -{ - virtual void Configure(const HydrationConfig& Config) override; - virtual void Hydrate() override; - virtual void Dehydrate() override; + struct PhaseStats + { + std::atomic<uint64_t> Files{0}; // host-side: count of work scheduled in this phase + std::atomic<uint64_t> Bytes{0}; // lambda-side: bytes transferred on successful completion + std::atomic<uint64_t> ElapsedUs{0}; // wall time around Work.Wait() -private: - HydrationConfig m_Config; - std::filesystem::path m_StorageModuleRootDir; -}; + RwLock ThreadIdsLock; + std::unordered_set<int> ThreadIds; -void -FileHydrator::Configure(const HydrationConfig& Config) -{ - m_Config = Config; + void RecordThread() + { + int Tid = zen::GetCurrentThreadId(); + ThreadIdsLock.WithExclusiveLock([&] { ThreadIds.insert(Tid); }); + } + }; - std::filesystem::path ConfigPath; - if (!m_Config.TargetSpecification.empty()) + struct DehydrateStatistics { - ConfigPath = Utf8ToWide(m_Config.TargetSpecification.substr(FileHydratorPrefix.length())); - } - else + PhaseStats Hash; + PhaseStats Upload; + PhaseStats Touch; // Touch shares Upload's ParallelWork / ElapsedUs + + std::atomic<uint64_t> LoadStateUs{0}; + std::atomic<uint64_t> DirScanUs{0}; + std::atomic<uint64_t> ListExistingUs{0}; + std::atomic<uint64_t> MetadataSaveUs{0}; + std::atomic<uint64_t> CleanUs{0}; + + std::atomic<uint64_t> TotalFiles{0}; + std::atomic<uint64_t> TotalBytes{0}; + std::atomic<uint64_t> TotalUs{0}; + }; + + struct HydrateStatistics { - CbObjectView Settings = m_Config.Options["settings"].AsObjectView(); - std::string_view Path = Settings["path"].AsString(); - if (Path.empty()) - { - throw zen::runtime_error("Hydration config 'file' type requires 'settings.path'"); - } - ConfigPath = Utf8ToWide(std::string(Path)); - } - MakeSafeAbsolutePathInPlace(ConfigPath); + PhaseStats Download; - if (!std::filesystem::exists(ConfigPath)) + std::atomic<uint64_t> LoadMetadataUs{0}; + std::atomic<uint64_t> CleanUs{0}; + std::atomic<uint64_t> RenameOrCopyUs{0}; + std::atomic<uint64_t> VerifyScanUs{0}; + + std::atomic<uint64_t> TotalFiles{0}; + std::atomic<uint64_t> TotalBytes{0}; + std::atomic<uint64_t> TotalUs{0}; + }; + + // Bits-per-second rate computed at microsecond precision. Zero-safe. + inline uint64_t BitsPerSecond(uint64_t Bytes, uint64_t ElapsedUs) { - throw std::invalid_argument(fmt::format("Target does not exist: '{}'", ConfigPath.string())); + if (ElapsedUs == 0) + { + return 0; + } + return Bytes * 8 * 1'000'000ull / ElapsedUs; } - m_StorageModuleRootDir = ConfigPath / m_Config.ModuleId; + /////////////////////////////////////////////////////////////////////// + // Per-module storage interface driven by IncrementalHydrator. + + class StorageBase + { + public: + virtual ~StorageBase() = default; + + virtual std::string Describe() const = 0; + virtual void SaveMetadata(const CbObject& Data) = 0; + virtual CbObject LoadMetadata() = 0; + virtual CbObject GetSettings() = 0; + virtual void ParseSettings(const CbObjectView& Settings) = 0; + virtual std::vector<IoHash> List() = 0; + virtual void Put(ParallelWork& Work, + WorkerThreadPool& WorkerPool, + const IoHash& Hash, + uint64_t Size, + const std::filesystem::path& SourcePath, + PhaseStats& Stats) = 0; + virtual void Get(ParallelWork& Work, + WorkerThreadPool& WorkerPool, + const IoHash& Hash, + uint64_t Size, + const std::filesystem::path& DestinationPath, + PhaseStats& Stats) = 0; + virtual void Touch(ParallelWork& Work, WorkerThreadPool& WorkerPool, const IoHash& Hash, PhaseStats& Stats) = 0; + virtual void Delete(ParallelWork& Work, WorkerThreadPool& WorkerPool) = 0; + }; - CreateDirectories(m_StorageModuleRootDir); -} + class FileStorage : public StorageBase + { + public: + static constexpr std::string_view Prefix = "file://"; + static constexpr std::string_view Type = "file"; + + explicit FileStorage(std::filesystem::path ModulePath); + + virtual std::string Describe() const override { return fmt::format("file://{}", m_StoragePath.generic_string()); } + virtual void SaveMetadata(const CbObject& Data) override; + virtual CbObject LoadMetadata() override; + virtual CbObject GetSettings() override { return {}; } + virtual void ParseSettings(const CbObjectView&) override {} + virtual std::vector<IoHash> List() override; + virtual void Put(ParallelWork& Work, + WorkerThreadPool& WorkerPool, + const IoHash& Hash, + uint64_t Size, + const std::filesystem::path& SourcePath, + PhaseStats& Stats) override; + virtual void Get(ParallelWork& Work, + WorkerThreadPool& WorkerPool, + const IoHash& Hash, + uint64_t Size, + const std::filesystem::path& DestinationPath, + PhaseStats& Stats) override; + virtual void Touch(ParallelWork&, WorkerThreadPool&, const IoHash&, PhaseStats&) override {} + virtual void Delete(ParallelWork& Work, WorkerThreadPool& WorkerPool) override; + + private: + std::filesystem::path m_StoragePath; + std::filesystem::path m_StatePathName; + std::filesystem::path m_CASPath; + }; -void -FileHydrator::Hydrate() -{ - ZEN_INFO("Hydrating state from '{}' to '{}'", m_StorageModuleRootDir, m_Config.ServerStateDir); + class S3Storage : public StorageBase + { + public: + static constexpr std::string_view Prefix = "s3://"; + static constexpr std::string_view Type = "s3"; + static constexpr uint64_t DefaultMultipartChunkSize = 32u * 1024u * 1024u; + + S3Storage(S3Client& Client, std::string KeyPrefix, std::filesystem::path TempDir, uint64_t MultipartChunkSize); + + virtual std::string Describe() const override { return fmt::format("s3://{}/{}", m_Client.BucketName(), m_KeyPrefix); } + virtual void SaveMetadata(const CbObject& Data) override; + virtual CbObject LoadMetadata() override; + virtual CbObject GetSettings() override; + virtual void ParseSettings(const CbObjectView& Settings) override; + virtual std::vector<IoHash> List() override; + virtual void Put(ParallelWork& Work, + WorkerThreadPool& WorkerPool, + const IoHash& Hash, + uint64_t Size, + const std::filesystem::path& SourcePath, + PhaseStats& Stats) override; + virtual void Get(ParallelWork& Work, + WorkerThreadPool& WorkerPool, + const IoHash& Hash, + uint64_t Size, + const std::filesystem::path& DestinationPath, + PhaseStats& Stats) override; + virtual void Touch(ParallelWork& Work, WorkerThreadPool& WorkerPool, const IoHash& Hash, PhaseStats& Stats) override; + virtual void Delete(ParallelWork& Work, WorkerThreadPool& WorkerPool) override; + + private: + S3Client& m_Client; + std::string m_KeyPrefix; + std::filesystem::path m_TempDir; + uint64_t m_MultipartChunkSize; + }; - Stopwatch Timer; + /////////////////////////////////////////////////////////////////////// + // FileStorage implementations - // Ensure target is clean - ZEN_DEBUG("Wiping server state at '{}'", m_Config.ServerStateDir); - const bool ForceRemoveReadOnlyFiles = true; - CleanDirectory(m_Config.ServerStateDir, ForceRemoveReadOnlyFiles); + FileStorage::FileStorage(std::filesystem::path ModulePath) : m_StoragePath(std::move(ModulePath)) + { + MakeSafeAbsolutePathInPlace(m_StoragePath); + m_StatePathName = m_StoragePath / "current-state.cbo"; + m_CASPath = m_StoragePath / "cas"; + CreateDirectories(m_CASPath); + } - bool WipeServerState = false; + void FileStorage::SaveMetadata(const CbObject& Data) + { + BinaryWriter Output; + SaveCompactBinary(Output, Data); + WriteFile(m_StatePathName, IoBuffer(IoBuffer::Wrap, Output.GetData(), Output.GetSize())); + } - try + CbObject FileStorage::LoadMetadata() { - ZEN_DEBUG("Copying '{}' to '{}'", m_StorageModuleRootDir, m_Config.ServerStateDir); - CopyTree(m_StorageModuleRootDir, m_Config.ServerStateDir, {.EnableClone = true}); + if (!IsFile(m_StatePathName)) + { + return {}; + } + FileContents Content = ReadFile(m_StatePathName); + if (Content.ErrorCode) + { + ThrowSystemError(Content.ErrorCode.value(), "Failed to read state file"); + } + IoBuffer Payload = Content.Flatten(); + CbValidateError Error; + CbObject Result = ValidateAndReadCompactBinaryObject(std::move(Payload), Error); + if (Error != CbValidateError::None) + { + throw std::runtime_error(fmt::format("Failed to read {} state file. Reason: {}", m_StatePathName, ToString(Error))); + } + return Result; } - catch (std::exception& Ex) + + std::vector<IoHash> FileStorage::List() { - ZEN_WARN("Copy failed: {}. Will wipe any partially copied state from '{}'", Ex.what(), m_Config.ServerStateDir); + DirectoryContent DirContent; + GetDirectoryContent(m_CASPath, DirectoryContentFlags::IncludeFiles, DirContent); + std::vector<IoHash> Result; + Result.reserve(DirContent.Files.size()); + for (const std::filesystem::path& Path : DirContent.Files) + { + IoHash Hash; + if (IoHash::TryParse(Path.filename().string(), Hash)) + { + Result.push_back(Hash); + } + } + return Result; + } + + void FileStorage::Put(ParallelWork& Work, + WorkerThreadPool& WorkerPool, + const IoHash& Hash, + uint64_t Size, + const std::filesystem::path& SourcePath, + PhaseStats& Stats) + { + Work.ScheduleWork( + WorkerPool, + [this, Hash = IoHash(Hash), Size, SourcePath = std::filesystem::path(SourcePath), &Stats](std::atomic<bool>& AbortFlag) { + Stats.RecordThread(); + if (!AbortFlag.load()) + { + std::filesystem::path DestPath = m_CASPath / fmt::format("{}", Hash); + if (std::error_code Ec = CopyFile(SourcePath, DestPath, CopyFileOptions{.EnableClone = true}); Ec) + { + throw std::system_error(Ec, fmt::format("Failed to copy '{}' to '{}'", SourcePath, DestPath)); + } + Stats.Bytes.fetch_add(Size, std::memory_order_relaxed); + } + }); + } - // We don't do the clean right here to avoid potentially running into double-throws - WipeServerState = true; + void FileStorage::Get(ParallelWork& Work, + WorkerThreadPool& WorkerPool, + const IoHash& Hash, + uint64_t Size, + const std::filesystem::path& DestinationPath, + PhaseStats& Stats) + { + Work.ScheduleWork(WorkerPool, + [this, Hash = IoHash(Hash), Size, DestinationPath = std::filesystem::path(DestinationPath), &Stats]( + std::atomic<bool>& AbortFlag) { + Stats.RecordThread(); + if (!AbortFlag.load()) + { + std::filesystem::path SourcePath = m_CASPath / fmt::format("{}", Hash); + if (std::error_code Ec = CopyFile(SourcePath, DestinationPath, CopyFileOptions{.EnableClone = true}); Ec) + { + throw std::system_error(Ec, fmt::format("Failed to copy '{}' to '{}'", SourcePath, DestinationPath)); + } + Stats.Bytes.fetch_add(Size, std::memory_order_relaxed); + } + }); } - if (WipeServerState) + void FileStorage::Delete(ParallelWork& Work, WorkerThreadPool& WorkerPool) { - ZEN_DEBUG("Cleaning server state '{}'", m_Config.ServerStateDir); - CleanDirectory(m_Config.ServerStateDir, ForceRemoveReadOnlyFiles); + ZEN_UNUSED(Work); + ZEN_UNUSED(WorkerPool); + DeleteDirectories(m_StoragePath); } - else + + /////////////////////////////////////////////////////////////////////// + // S3Storage implementations + + S3Storage::S3Storage(S3Client& Client, std::string KeyPrefix, std::filesystem::path TempDir, uint64_t MultipartChunkSize) + : m_Client(Client) + , m_KeyPrefix(std::move(KeyPrefix)) + , m_TempDir(std::move(TempDir)) + , m_MultipartChunkSize(MultipartChunkSize) { - ZEN_INFO("Hydration complete in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); } -} -void -FileHydrator::Dehydrate() -{ - ZEN_INFO("Dehydrating state from '{}' to '{}'", m_Config.ServerStateDir, m_StorageModuleRootDir); + void S3Storage::SaveMetadata(const CbObject& Data) + { + BinaryWriter Output; + SaveCompactBinary(Output, Data); + IoBuffer Payload(IoBuffer::Clone, Output.GetData(), Output.GetSize()); - Stopwatch Timer; + std::string Key = m_KeyPrefix + "/incremental-state.cbo"; + S3Result Result = m_Client.PutObject(Key, std::move(Payload)); + if (!Result.IsSuccess()) + { + throw zen::runtime_error("Failed to save incremental metadata to '{}': {}", Key, Result.Error); + } + } - const std::filesystem::path TargetDir = m_StorageModuleRootDir; + CbObject S3Storage::LoadMetadata() + { + std::string Key = m_KeyPrefix + "/incremental-state.cbo"; + S3GetObjectResult Result = m_Client.GetObject(Key); + if (!Result.IsSuccess()) + { + if (Result.Error == S3GetObjectResult::NotFoundErrorText) + { + return {}; + } + throw zen::runtime_error("Failed to load incremental metadata from '{}': {}", Key, Result.Error); + } - // Ensure target is clean. This could be replaced with an atomic copy at a later date - // (i.e copy into a temporary directory name and rename it once complete) + CbValidateError Error; + CbObject Meta = ValidateAndReadCompactBinaryObject(std::move(Result.Content), Error); + if (Error != CbValidateError::None) + { + throw zen::runtime_error("Failed to parse incremental metadata from '{}': {}", Key, ToString(Error)); + } + return Meta; + } - ZEN_DEBUG("Cleaning storage root '{}'", TargetDir); - const bool ForceRemoveReadOnlyFiles = true; - CleanDirectory(TargetDir, ForceRemoveReadOnlyFiles); + CbObject S3Storage::GetSettings() + { + CbObjectWriter Writer; + Writer << "MultipartChunkSize" << m_MultipartChunkSize; + return Writer.Save(); + } - bool CopySuccess = true; + void S3Storage::ParseSettings(const CbObjectView& Settings) + { + m_MultipartChunkSize = Settings["MultipartChunkSize"].AsUInt64(S3Storage::DefaultMultipartChunkSize); + } - try + std::vector<IoHash> S3Storage::List() { - ZEN_DEBUG("Copying '{}' to '{}'", m_Config.ServerStateDir, TargetDir); - for (const std::filesystem::directory_entry& Entry : std::filesystem::directory_iterator(m_Config.ServerStateDir)) + std::string CasPrefix = m_KeyPrefix + "/cas/"; + S3ListObjectsResult Result = m_Client.ListObjects(CasPrefix); + if (!Result.IsSuccess()) + { + throw zen::runtime_error("Failed to list S3 objects under '{}': {}", CasPrefix, Result.Error); + } + + std::vector<IoHash> Hashes; + Hashes.reserve(Result.Objects.size()); + for (const S3ObjectInfo& Obj : Result.Objects) { - if (Entry.path().filename() == ".sentry-native") + size_t LastSlash = Obj.Key.rfind('/'); + if (LastSlash == std::string::npos) { continue; } - std::filesystem::path Dest = TargetDir / Entry.path().filename(); - if (Entry.is_directory()) + IoHash Hash; + if (IoHash::TryParse(Obj.Key.substr(LastSlash + 1), Hash)) { - CreateDirectories(Dest); - CopyTree(Entry.path(), Dest, {.EnableClone = true}); - } - else - { - CopyFile(Entry.path(), Dest, {.EnableClone = true}); + Hashes.push_back(Hash); } } + return Hashes; } - catch (std::exception& Ex) - { - ZEN_WARN("Copy failed: {}. Will wipe any partially copied state from '{}'", Ex.what(), m_StorageModuleRootDir); - // We don't do the clean right here to avoid potentially running into double-throws - CopySuccess = false; - } + void S3Storage::Put(ParallelWork& Work, + WorkerThreadPool& WorkerPool, + const IoHash& Hash, + uint64_t Size, + const std::filesystem::path& SourcePath, + PhaseStats& Stats) + { + Work.ScheduleWork( + WorkerPool, + [this, Hash = IoHash(Hash), Size, SourcePath = std::filesystem::path(SourcePath), &Stats](std::atomic<bool>& AbortFlag) { + Stats.RecordThread(); + if (AbortFlag.load()) + { + return; + } + S3Client& Client = m_Client; + std::string Key = m_KeyPrefix + "/cas/" + fmt::format("{}", Hash); - if (!CopySuccess) - { - ZEN_DEBUG("Removing partially copied state from '{}'", TargetDir); - CleanDirectory(TargetDir, ForceRemoveReadOnlyFiles); + if (Size >= (m_MultipartChunkSize + (m_MultipartChunkSize / 4))) + { + BasicFile File(SourcePath, BasicFile::Mode::kRead); + S3Result Result = Client.PutObjectMultipart( + Key, + Size, + [&File](uint64_t Offset, uint64_t ChunkSize) { return File.ReadRange(Offset, ChunkSize); }, + m_MultipartChunkSize); + if (!Result.IsSuccess()) + { + throw zen::runtime_error("Failed to upload '{}' to S3: {}", Key, Result.Error); + } + } + else + { + BasicFile File(SourcePath, BasicFile::Mode::kRead); + S3Result Result = Client.PutObject(Key, File.ReadAll()); + if (!Result.IsSuccess()) + { + throw zen::runtime_error("Failed to upload '{}' to S3: {}", Key, Result.Error); + } + } + Stats.Bytes.fetch_add(Size, std::memory_order_relaxed); + }); } - ZEN_DEBUG("Wiping server state '{}'", m_Config.ServerStateDir); - CleanDirectory(m_Config.ServerStateDir, ForceRemoveReadOnlyFiles); - - if (CopySuccess) + void S3Storage::Get(ParallelWork& Work, + WorkerThreadPool& WorkerPool, + const IoHash& Hash, + uint64_t Size, + const std::filesystem::path& DestinationPath, + PhaseStats& Stats) { - ZEN_INFO("Dehydration complete in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); - } -} + std::string Key = m_KeyPrefix + "/cas/" + fmt::format("{}", Hash); -/////////////////////////////////////////////////////////////////////////// + if (Size >= (m_MultipartChunkSize + (m_MultipartChunkSize / 4))) + { + class WorkData + { + public: + WorkData(const std::filesystem::path& DestPath, uint64_t Size) : m_DestFile(DestPath, BasicFile::Mode::kTruncate) + { + PrepareFileForScatteredWrite(m_DestFile.Handle(), Size); + } + ~WorkData() { m_DestFile.Flush(); } + void Write(const void* Data, uint64_t Size, uint64_t Offset) { m_DestFile.Write(Data, Size, Offset); } -constexpr std::string_view S3HydratorPrefix = "s3://"; -constexpr std::string_view S3HydratorType = "s3"; + private: + BasicFile m_DestFile; + }; -struct S3Hydrator : public HydrationStrategyBase -{ - void Configure(const HydrationConfig& Config) override; - void Dehydrate() override; - void Hydrate() override; + std::shared_ptr<WorkData> Data = std::make_shared<WorkData>(DestinationPath, Size); -private: - S3Client CreateS3Client() const; - std::string BuildTimestampFolderName() const; - std::string MakeObjectKey(std::string_view FolderName, const std::filesystem::path& RelPath) const; + uint64_t Offset = 0; + while (Offset < Size) + { + uint64_t ChunkSize = std::min<uint64_t>(m_MultipartChunkSize, Size - Offset); - HydrationConfig m_Config; - std::string m_Bucket; - std::string m_KeyPrefix; // "<user-prefix>/<ModuleId>" or just "<ModuleId>" - no trailing slash - std::string m_Region; - SigV4Credentials m_Credentials; - Ref<ImdsCredentialProvider> m_CredentialProvider; + Work.ScheduleWork(WorkerPool, [this, Key = Key, Offset, ChunkSize, Data, &Stats](std::atomic<bool>& AbortFlag) { + Stats.RecordThread(); + if (AbortFlag) + { + return; + } + S3GetObjectResult Chunk = m_Client.GetObjectRange(Key, Offset, ChunkSize); + if (!Chunk.IsSuccess()) + { + throw zen::runtime_error("Failed to download '{}' bytes [{}-{}] from S3: {}", + Key, + Offset, + Offset + ChunkSize - 1, + Chunk.Error); + } - static constexpr uint64_t MultipartChunkSize = 8 * 1024 * 1024; -}; + Data->Write(Chunk.Content.GetData(), Chunk.Content.GetSize(), Offset); + Stats.Bytes.fetch_add(ChunkSize, std::memory_order_relaxed); + }); + Offset += ChunkSize; + } + } + else + { + Work.ScheduleWork( + WorkerPool, + [this, Key = Key, Size, DestinationPath = std::filesystem::path(DestinationPath), &Stats](std::atomic<bool>& AbortFlag) { + Stats.RecordThread(); + if (AbortFlag) + { + return; + } + S3GetObjectResult Chunk = m_Client.GetObject(Key, m_TempDir); + if (!Chunk.IsSuccess()) + { + throw zen::runtime_error("Failed to download '{}' from S3: {}", Key, Chunk.Error); + } -void -S3Hydrator::Configure(const HydrationConfig& Config) -{ - m_Config = Config; + if (IoBufferFileReference FileRef; Chunk.Content.GetFileReference(FileRef)) + { + std::error_code Ec; + std::filesystem::path ChunkPath = PathFromHandle(FileRef.FileHandle, Ec); + if (Ec) + { + WriteFile(DestinationPath, Chunk.Content); + } + else + { + Chunk.Content.SetDeleteOnClose(false); + Chunk.Content = {}; + RenameFile(ChunkPath, DestinationPath, Ec); + if (Ec) + { + Chunk.Content = IoBufferBuilder::MakeFromFile(ChunkPath); + Chunk.Content.SetDeleteOnClose(true); + WriteFile(DestinationPath, Chunk.Content); + } + } + } + else + { + WriteFile(DestinationPath, Chunk.Content); + } + Stats.Bytes.fetch_add(Size, std::memory_order_relaxed); + }); + } + } - CbObjectView Settings = m_Config.Options["settings"].AsObjectView(); - std::string_view Spec; - if (!m_Config.TargetSpecification.empty()) + void S3Storage::Touch(ParallelWork& Work, WorkerThreadPool& WorkerPool, const IoHash& Hash, PhaseStats& Stats) { - Spec = m_Config.TargetSpecification; - Spec.remove_prefix(S3HydratorPrefix.size()); + Work.ScheduleWork(WorkerPool, [this, Hash = IoHash(Hash), &Stats](std::atomic<bool>& AbortFlag) { + Stats.RecordThread(); + if (AbortFlag.load()) + { + return; + } + std::string Key = m_KeyPrefix + "/cas/" + fmt::format("{}", Hash); + S3Result Result = m_Client.Touch(Key); + if (!Result.IsSuccess()) + { + throw zen::runtime_error("Failed to touch '{}' in S3: {}", Key, Result.Error); + } + }); } - else + + void S3Storage::Delete(ParallelWork& Work, WorkerThreadPool& WorkerPool) { - std::string_view Uri = Settings["uri"].AsString(); - if (Uri.empty()) + std::string ModulePrefix = m_KeyPrefix + "/"; + S3ListObjectsResult ListResult = m_Client.ListObjects(ModulePrefix); + if (!ListResult.IsSuccess()) { - throw zen::runtime_error("Hydration config 's3' type requires 'settings.uri'"); + throw zen::runtime_error("Failed to list S3 objects for deletion under '{}': {}", ModulePrefix, ListResult.Error); + } + for (const S3ObjectInfo& Obj : ListResult.Objects) + { + Work.ScheduleWork(WorkerPool, [this, Key = Obj.Key](std::atomic<bool>& AbortFlag) { + if (AbortFlag.load()) + { + return; + } + S3Result DelResult = m_Client.DeleteObject(Key); + if (!DelResult.IsSuccess()) + { + throw zen::runtime_error("Failed to delete S3 object '{}': {}", Key, DelResult.Error); + } + }); } - Spec = Uri; - Spec.remove_prefix(S3HydratorPrefix.size()); } - size_t SlashPos = Spec.find('/'); - std::string UserPrefix = SlashPos != std::string_view::npos ? std::string(Spec.substr(SlashPos + 1)) : std::string{}; - m_Bucket = std::string(SlashPos != std::string_view::npos ? Spec.substr(0, SlashPos) : Spec); - m_KeyPrefix = UserPrefix.empty() ? m_Config.ModuleId : UserPrefix + "/" + m_Config.ModuleId; - - ZEN_ASSERT(!m_Bucket.empty()); - - std::string Region = std::string(Settings["region"].AsString()); - if (Region.empty()) - { - Region = GetEnvVariable("AWS_DEFAULT_REGION"); + /////////////////////////////////////////////////////////////////////// + // IncrementalHydrator: the only HydrationStrategyBase implementation. + // Summary emission for hydrate/dehydrate operations. + + void LogDehydrateSummary(std::string_view Prefix, + const DehydrateStatistics& Stats, + std::string_view ModuleId, + const std::filesystem::path& Source, + std::string_view Target) + { + const uint64_t HashUs = Stats.Hash.ElapsedUs.load(); + const uint64_t UploadUs = Stats.Upload.ElapsedUs.load(); + ZEN_INFO( + "{} module '{}': {} files ({}) in {}\n" + " Source: {}\n" + " Target: {}\n" + " Load state: {}\n" + " Dir scan: {}\n" + " Hash phase: {} {}/{} ({}) hashed, {}bits/s, {} threads\n" + " List existing: {}\n" + " Upload phase: {} {}/{} ({}) uploaded, {} ({}) touched, {}bits/s, {} threads\n" + " Metadata save: {}\n" + " Clean: {}", + Prefix, + ModuleId, + ThousandsNum(Stats.TotalFiles.load()), + NiceBytes(Stats.TotalBytes.load()), + NiceLatencyNs(Stats.TotalUs.load() * 1000), + Source.generic_string(), + Target, + NiceLatencyNs(Stats.LoadStateUs.load() * 1000), + NiceLatencyNs(Stats.DirScanUs.load() * 1000), + NiceLatencyNs(HashUs * 1000), + ThousandsNum(Stats.Hash.Files.load()), + ThousandsNum(Stats.TotalFiles.load()), + NiceBytes(Stats.Hash.Bytes.load()), + NiceNum(BitsPerSecond(Stats.Hash.Bytes.load(), HashUs)), + Stats.Hash.ThreadIds.size(), + NiceLatencyNs(Stats.ListExistingUs.load() * 1000), + NiceLatencyNs(UploadUs * 1000), + ThousandsNum(Stats.Upload.Files.load()), + ThousandsNum(Stats.TotalFiles.load()), + NiceBytes(Stats.Upload.Bytes.load()), + ThousandsNum(Stats.Touch.Files.load()), + NiceBytes(Stats.Touch.Bytes.load()), + NiceNum(BitsPerSecond(Stats.Upload.Bytes.load(), UploadUs)), + Stats.Upload.ThreadIds.size(), + NiceLatencyNs(Stats.MetadataSaveUs.load() * 1000), + NiceLatencyNs(Stats.CleanUs.load() * 1000)); } - if (Region.empty()) - { - Region = GetEnvVariable("AWS_REGION"); - } - if (Region.empty()) - { - Region = "us-east-1"; - } - m_Region = std::move(Region); - std::string AccessKeyId = GetEnvVariable("AWS_ACCESS_KEY_ID"); - if (AccessKeyId.empty()) - { - m_CredentialProvider = Ref<ImdsCredentialProvider>(new ImdsCredentialProvider({})); + void LogHydrateSummary(std::string_view Prefix, + const HydrateStatistics& Stats, + std::string_view ModuleId, + std::string_view Source, + const std::filesystem::path& Target) + { + const uint64_t DownloadUs = Stats.Download.ElapsedUs.load(); + ZEN_INFO( + "{} module '{}': {} files ({}) in {}\n" + " Source: {}\n" + " Target: {}\n" + " Load metadata: {}\n" + " Download phase: {} {}/{} ({}) downloaded, {}bits/s, {} threads\n" + " Clean: {}\n" + " Rename/copy: {}\n" + " Verify scan: {}", + Prefix, + ModuleId, + ThousandsNum(Stats.TotalFiles.load()), + NiceBytes(Stats.TotalBytes.load()), + NiceLatencyNs(Stats.TotalUs.load() * 1000), + Source, + Target.generic_string(), + NiceLatencyNs(Stats.LoadMetadataUs.load() * 1000), + NiceLatencyNs(DownloadUs * 1000), + ThousandsNum(Stats.Download.Files.load()), + ThousandsNum(Stats.TotalFiles.load()), + NiceBytes(Stats.Download.Bytes.load()), + NiceNum(BitsPerSecond(Stats.Download.Bytes.load(), DownloadUs)), + Stats.Download.ThreadIds.size(), + NiceLatencyNs(Stats.CleanUs.load() * 1000), + NiceLatencyNs(Stats.RenameOrCopyUs.load() * 1000), + NiceLatencyNs(Stats.VerifyScanUs.load() * 1000)); } - else - { - m_Credentials.AccessKeyId = std::move(AccessKeyId); - m_Credentials.SecretAccessKey = GetEnvVariable("AWS_SECRET_ACCESS_KEY"); - m_Credentials.SessionToken = GetEnvVariable("AWS_SESSION_TOKEN"); - } -} -S3Client -S3Hydrator::CreateS3Client() const -{ - S3ClientOptions Options; - Options.BucketName = m_Bucket; - Options.Region = m_Region; + /////////////////////////////////////////////////////////////////////// + // Holds a per-module StorageBase and threading context; drives the + // hydrate/dehydrate algorithm. - CbObjectView Settings = m_Config.Options["settings"].AsObjectView(); - std::string_view Endpoint = Settings["endpoint"].AsString(); - if (!Endpoint.empty()) + class IncrementalHydrator : public HydrationStrategyBase { - Options.Endpoint = std::string(Endpoint); - Options.PathStyle = Settings["path-style"].AsBool(); - } + public: + IncrementalHydrator(const HydrationConfig& Config, std::unique_ptr<StorageBase> Storage); + virtual ~IncrementalHydrator() override; - if (m_CredentialProvider) - { - Options.CredentialProvider = m_CredentialProvider; - } - else - { - Options.Credentials = m_Credentials; - } + virtual void Dehydrate(const CbObject& CachedState) override; + virtual CbObject Hydrate() override; + virtual void Obliterate() override; - Options.HttpSettings.MaximumInMemoryDownloadSize = 16u * 1024u; + private: + struct Entry + { + std::filesystem::path RelativePath; + uint64_t Size; + uint64_t ModTick; + IoHash Hash; + }; - return S3Client(Options); -} + std::unique_ptr<StorageBase> m_Storage; + HydrationConfig m_Config; + WorkerThreadPool m_FallbackWorkPool; + std::atomic<bool> m_FallbackAbortFlag{false}; + std::atomic<bool> m_FallbackPauseFlag{false}; + HydrationConfig::ThreadingOptions m_Threading{.WorkerPool = &m_FallbackWorkPool, + .AbortFlag = &m_FallbackAbortFlag, + .PauseFlag = &m_FallbackPauseFlag}; + }; -std::string -S3Hydrator::BuildTimestampFolderName() const -{ - UtcTime Now = UtcTime::Now(); - return fmt::format("{:04d}{:02d}{:02d}-{:02d}{:02d}{:02d}-{:03d}", - Now.Tm.tm_year + 1900, - Now.Tm.tm_mon + 1, - Now.Tm.tm_mday, - Now.Tm.tm_hour, - Now.Tm.tm_min, - Now.Tm.tm_sec, - Now.Ms); -} + /////////////////////////////////////////////////////////////////////// + // IncrementalHydrator implementations -std::string -S3Hydrator::MakeObjectKey(std::string_view FolderName, const std::filesystem::path& RelPath) const -{ - return m_KeyPrefix + "/" + std::string(FolderName) + "/" + RelPath.generic_string(); -} + IncrementalHydrator::IncrementalHydrator(const HydrationConfig& Config, std::unique_ptr<StorageBase> Storage) + : m_Storage(std::move(Storage)) + , m_Config(Config) + , m_FallbackWorkPool(0) + { + if (Config.Threading) + { + m_Threading = *Config.Threading; + } + } -void -S3Hydrator::Dehydrate() -{ - ZEN_INFO("Dehydrating state from '{}' to s3://{}/{}", m_Config.ServerStateDir, m_Bucket, m_KeyPrefix); + IncrementalHydrator::~IncrementalHydrator() { m_Storage.reset(); } - try + void IncrementalHydrator::Dehydrate(const CbObject& CachedState) { - S3Client Client = CreateS3Client(); - std::string FolderName = BuildTimestampFolderName(); - uint64_t TotalBytes = 0; - uint32_t FileCount = 0; - Stopwatch Timer; + Stopwatch TotalTimer; + DehydrateStatistics Stats; + const std::string StorageTarget = m_Storage->Describe(); - DirectoryContent DirContent; - GetDirectoryContent(m_Config.ServerStateDir, DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::Recursive, DirContent); - - for (const std::filesystem::path& AbsPath : DirContent.Files) + const std::filesystem::path ServerStateDir = MakeSafeAbsolutePath(m_Config.ServerStateDir); + try { - std::filesystem::path RelPath = AbsPath.lexically_relative(m_Config.ServerStateDir); - if (RelPath.empty() || *RelPath.begin() == "..") + std::unordered_map<std::string, size_t> StateEntryLookup; + std::vector<Entry> StateEntries; { - throw zen::runtime_error( - "lexically_relative produced a '..'-escape path for '{}' relative to '{}' - " - "path form mismatch (e.g. \\\\?\\ prefix on one but not the other)", - AbsPath.string(), - m_Config.ServerStateDir.string()); + Stopwatch LoadStateTimer; + for (CbFieldView FieldView : CachedState["Files"].AsArrayView()) + { + CbObjectView EntryView = FieldView.AsObjectView(); + std::filesystem::path RelativePath(EntryView["Path"].AsString()); + uint64_t Size = EntryView["Size"].AsUInt64(); + uint64_t ModTick = EntryView["ModTick"].AsUInt64(); + IoHash Hash = EntryView["Hash"].AsHash(); + + StateEntryLookup.insert_or_assign(RelativePath.generic_string(), StateEntries.size()); + StateEntries.push_back(Entry{.RelativePath = RelativePath, .Size = Size, .ModTick = ModTick, .Hash = Hash}); + } + Stats.LoadStateUs = LoadStateTimer.GetElapsedTimeUs(); } - if (*RelPath.begin() == ".sentry-native") + + DirectoryContent DirContent; { - continue; + Stopwatch DirScanTimer; + GetDirectoryContent(*m_Threading.WorkerPool, + ServerStateDir, + DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::Recursive | + DirectoryContentFlags::IncludeFileSizes | DirectoryContentFlags::IncludeModificationTick, + DirContent); + Stats.DirScanUs = DirScanTimer.GetElapsedTimeUs(); } - std::string Key = MakeObjectKey(FolderName, RelPath); - BasicFile File(AbsPath, BasicFile::Mode::kRead); - uint64_t FileSize = File.FileSize(); + ZEN_INFO("Dehydrating module '{}' from folder '{}'. {} ({}) files", + m_Config.ModuleId, + m_Config.ServerStateDir, + DirContent.Files.size(), + NiceBytes(std::accumulate(DirContent.FileSizes.begin(), DirContent.FileSizes.end(), uint64_t(0)))); - S3Result UploadResult = Client.PutObjectMultipart( - Key, - FileSize, - [&File](uint64_t Offset, uint64_t Size) { return File.ReadRange(Offset, Size); }, - MultipartChunkSize); - if (!UploadResult.IsSuccess()) - { - throw zen::runtime_error("Failed to upload '{}' to S3: {}", Key, UploadResult.Error); - } + std::vector<Entry> Entries; + Entries.resize(DirContent.Files.size()); - TotalBytes += FileSize; - ++FileCount; - } + uint64_t TotalBytes = 0; + uint64_t TotalFiles = 0; - // Write current-state.json - uint64_t UploadDurationMs = Timer.GetElapsedTimeMs(); - - UtcTime Now = UtcTime::Now(); - std::string UploadTimeUtc = fmt::format("{:04d}-{:02d}-{:02d}T{:02d}:{:02d}:{:02d}.{:03d}Z", - Now.Tm.tm_year + 1900, - Now.Tm.tm_mon + 1, - Now.Tm.tm_mday, - Now.Tm.tm_hour, - Now.Tm.tm_min, - Now.Tm.tm_sec, - Now.Ms); - - CbObjectWriter Meta; - Meta << "FolderName" << FolderName; - Meta << "ModuleId" << m_Config.ModuleId; - Meta << "HostName" << GetMachineName(); - Meta << "UploadTimeUtc" << UploadTimeUtc; - Meta << "UploadDurationMs" << UploadDurationMs; - Meta << "TotalSizeBytes" << TotalBytes; - Meta << "FileCount" << FileCount; - - ExtendableStringBuilder<1024> JsonBuilder; - Meta.Save().ToJson(JsonBuilder); - - std::string MetaKey = m_KeyPrefix + "/current-state.json"; - std::string_view JsonText = JsonBuilder.ToView(); - IoBuffer MetaBuf(IoBuffer::Clone, JsonText.data(), JsonText.size()); - S3Result MetaUploadResult = Client.PutObject(MetaKey, std::move(MetaBuf)); - if (!MetaUploadResult.IsSuccess()) - { - throw zen::runtime_error("Failed to write current-state.json to '{}': {}", MetaKey, MetaUploadResult.Error); - } + std::unordered_set<IoHash> ExistsLookup; - ZEN_INFO("Dehydration complete: {} files, {}, {}", FileCount, NiceBytes(TotalBytes), NiceTimeSpanMs(UploadDurationMs)); - } - catch (std::exception& Ex) - { - // Any in-progress multipart upload has already been aborted by PutObjectMultipart. - // current-state.json is only written on success, so the previous S3 state remains valid. - ZEN_WARN("S3 dehydration failed: {}. S3 state not updated.", Ex.what()); - } -} + { + Stopwatch HashTimer; + ParallelWork Work(*m_Threading.AbortFlag, *m_Threading.PauseFlag, WorkerThreadPool::EMode::EnableBacklog); -void -S3Hydrator::Hydrate() -{ - ZEN_INFO("Hydrating state from s3://{}/{} to '{}'", m_Bucket, m_KeyPrefix, m_Config.ServerStateDir); + for (size_t FileIndex = 0; FileIndex < DirContent.Files.size(); FileIndex++) + { + const std::filesystem::path AbsPath = MakeSafeAbsolutePath(DirContent.Files[FileIndex]); + if (AbsPath.filename() == "reserve.gc") + { + continue; + } + const std::filesystem::path RelativePath = FastRelativePath(ServerStateDir, DirContent.Files[FileIndex]); + if (*RelativePath.begin() == ".sentry-native") + { + continue; + } + if (RelativePath == ".lock") + { + continue; + } + + Entry& CurrentEntry = Entries[TotalFiles]; + CurrentEntry.RelativePath = RelativePath; + CurrentEntry.Size = DirContent.FileSizes[FileIndex]; + CurrentEntry.ModTick = DirContent.FileModificationTicks[FileIndex]; + + bool FoundHash = false; + if (auto KnownIt = StateEntryLookup.find(CurrentEntry.RelativePath.generic_string()); KnownIt != StateEntryLookup.end()) + { + const Entry& StateEntry = StateEntries[KnownIt->second]; + if (StateEntry.Size == CurrentEntry.Size && StateEntry.ModTick == CurrentEntry.ModTick) + { + CurrentEntry.Hash = StateEntry.Hash; + FoundHash = true; + } + } - Stopwatch Timer; - const bool ForceRemoveReadOnlyFiles = true; + if (!FoundHash) + { + Work.ScheduleWork(*m_Threading.WorkerPool, + [AbsPath, EntryIndex = TotalFiles, &Entries, &Stats](std::atomic<bool>& AbortFlag) { + Stats.Hash.RecordThread(); + if (AbortFlag) + { + return; + } + + Entry& CurrentEntry = Entries[EntryIndex]; + + bool FoundHash = false; + if (AbsPath.extension().empty()) + { + auto It = CurrentEntry.RelativePath.begin(); + if (It != CurrentEntry.RelativePath.end() && It->filename().string().ends_with("cas")) + { + IoHash RawHash; + uint64_t RawSize; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed( + SharedBuffer(IoBufferBuilder::MakeFromFile(AbsPath)), + RawHash, + RawSize); + if (Compressed) + { + // We compose a meta-hash since taking the RawHash might collide with an + // existing non-compressed file with the same content The collision is + // unlikely except if the compressed data is zero bytes causing RawHash + // to be the same as an empty file. + IoHashStream Hasher; + Hasher.Append(RawHash.Hash, sizeof(RawHash.Hash)); + Hasher.Append(&CurrentEntry.Size, sizeof(CurrentEntry.Size)); + CurrentEntry.Hash = Hasher.GetHash(); + FoundHash = true; + } + } + } + + if (!FoundHash) + { + CurrentEntry.Hash = IoHash::HashBuffer(IoBufferBuilder::MakeFromFile(AbsPath)); + } + Stats.Hash.Bytes.fetch_add(CurrentEntry.Size, std::memory_order_relaxed); + }); + Stats.Hash.Files.fetch_add(1, std::memory_order_relaxed); + } + TotalFiles++; + TotalBytes += CurrentEntry.Size; + } - // Clean temp dir before starting in case of leftover state from a previous failed hydration - ZEN_DEBUG("Cleaning temp dir '{}'", m_Config.TempDir); - CleanDirectory(m_Config.TempDir, ForceRemoveReadOnlyFiles); + { + Stopwatch ListTimer; + std::vector<IoHash> ExistingEntries = m_Storage->List(); + ExistsLookup.insert(ExistingEntries.begin(), ExistingEntries.end()); + Stats.ListExistingUs = ListTimer.GetElapsedTimeUs(); + } - bool WipeServerState = false; + Work.Wait(); - try - { - S3Client Client = CreateS3Client(); - std::string MetaKey = m_KeyPrefix + "/current-state.json"; + Entries.resize(TotalFiles); + Stats.Hash.ElapsedUs = HashTimer.GetElapsedTimeUs(); + Stats.TotalFiles = TotalFiles; + Stats.TotalBytes = TotalBytes; + } - S3GetObjectResult MetaResult = Client.GetObject(MetaKey); - if (!MetaResult.IsSuccess()) - { - if (MetaResult.Error == S3GetObjectResult::NotFoundErrorText) + uint64_t UploadDurationMs = 0; { - ZEN_INFO("No state found in S3 at {}", MetaKey); + Stopwatch UploadTimer; + ParallelWork Work(*m_Threading.AbortFlag, *m_Threading.PauseFlag, WorkerThreadPool::EMode::EnableBacklog); - ZEN_DEBUG("Wiping server state '{}'", m_Config.ServerStateDir); - CleanDirectory(m_Config.ServerStateDir, ForceRemoveReadOnlyFiles); - return; + for (const Entry& CurrentEntry : Entries) + { + if (!ExistsLookup.contains(CurrentEntry.Hash)) + { + m_Storage->Put(Work, + *m_Threading.WorkerPool, + CurrentEntry.Hash, + CurrentEntry.Size, + MakeSafeAbsolutePath(ServerStateDir / CurrentEntry.RelativePath), + Stats.Upload); + Stats.Upload.Files.fetch_add(1, std::memory_order_relaxed); + } + else + { + // Refresh the backend's modification time so lifecycle-expiration policies + // do not evict CAS entries that are still referenced by this module. + m_Storage->Touch(Work, *m_Threading.WorkerPool, CurrentEntry.Hash, Stats.Touch); + Stats.Touch.Files.fetch_add(1, std::memory_order_relaxed); + Stats.Touch.Bytes.fetch_add(CurrentEntry.Size, std::memory_order_relaxed); + } + } + + Work.Wait(); + Stats.Upload.ElapsedUs = UploadTimer.GetElapsedTimeUs(); + UploadDurationMs = TotalTimer.GetElapsedTimeMs(); + + Stopwatch MetadataTimer; + UtcTime Now = UtcTime::Now(); + std::string UploadTimeUtc = fmt::format("{:04d}-{:02d}-{:02d}T{:02d}:{:02d}:{:02d}.{:03d}Z", + Now.Tm.tm_year + 1900, + Now.Tm.tm_mon + 1, + Now.Tm.tm_mday, + Now.Tm.tm_hour, + Now.Tm.tm_min, + Now.Tm.tm_sec, + Now.Ms); + + CbObjectWriter Meta; + Meta << "SourceFolder" << ServerStateDir.generic_string(); + Meta << "ModuleId" << m_Config.ModuleId; + Meta << "HostName" << GetMachineName(); + Meta << "UploadTimeUtc" << UploadTimeUtc; + Meta << "UploadDurationMs" << UploadDurationMs; + Meta << "TotalSizeBytes" << TotalBytes; + Meta << "StorageSettings" << m_Storage->GetSettings(); + + Meta.BeginArray("Files"); + for (const Entry& CurrentEntry : Entries) + { + Meta.BeginObject(); + { + Meta << "Path" << CurrentEntry.RelativePath.generic_string(); + Meta << "Size" << CurrentEntry.Size; + Meta << "ModTick" << CurrentEntry.ModTick; + Meta << "Hash" << CurrentEntry.Hash; + } + Meta.EndObject(); + } + Meta.EndArray(); + + m_Storage->SaveMetadata(Meta.Save()); + Stats.MetadataSaveUs = MetadataTimer.GetElapsedTimeUs(); } - throw zen::runtime_error("Failed to read current-state.json from '{}': {}", MetaKey, MetaResult.Error); - } - std::string ParseError; - json11::Json MetaJson = json11::Json::parse(std::string(MetaResult.AsText()), ParseError); - if (!ParseError.empty()) - { - throw zen::runtime_error("Failed to parse current-state.json from '{}': {}", MetaKey, ParseError); - } + ZEN_DEBUG("Cleaning server state '{}'", m_Config.ServerStateDir); + { + Stopwatch CleanTimer; + CleanDirectory(*m_Threading.WorkerPool, *m_Threading.AbortFlag, *m_Threading.PauseFlag, ServerStateDir); + Stats.CleanUs = CleanTimer.GetElapsedTimeUs(); + } - std::string FolderName = MetaJson["FolderName"].string_value(); - if (FolderName.empty()) - { - throw zen::runtime_error("current-state.json from '{}' has missing or empty FolderName", MetaKey); + Stats.TotalUs = TotalTimer.GetElapsedTimeUs(); + LogDehydrateSummary("Dehydration complete", Stats, m_Config.ModuleId, ServerStateDir, StorageTarget); } - - std::string FolderPrefix = m_KeyPrefix + "/" + FolderName + "/"; - S3ListObjectsResult ListResult = Client.ListObjects(FolderPrefix); - if (!ListResult.IsSuccess()) + catch (const std::exception& Ex) { - throw zen::runtime_error("Failed to list S3 objects under '{}': {}", FolderPrefix, ListResult.Error); + ZEN_WARN("Dehydration of module '{}' failed: {}. Leaving server state '{}'", + m_Config.ModuleId, + Ex.what(), + m_Config.ServerStateDir); + Stats.TotalUs = TotalTimer.GetElapsedTimeUs(); + LogDehydrateSummary("Dehydration failed", Stats, m_Config.ModuleId, ServerStateDir, StorageTarget); } + } - for (const S3ObjectInfo& Obj : ListResult.Objects) + CbObject IncrementalHydrator::Hydrate() + { + Stopwatch TotalTimer; + HydrateStatistics Stats; + const std::string StorageSource = m_Storage->Describe(); + + const std::filesystem::path ServerStateDir = MakeSafeAbsolutePath(m_Config.ServerStateDir); + const std::filesystem::path TempDir = MakeSafeAbsolutePath(m_Config.TempDir); + try { - if (!Obj.Key.starts_with(FolderPrefix)) + CbObject Meta; { - ZEN_WARN("Skipping unexpected S3 key '{}' (expected prefix '{}')", Obj.Key, FolderPrefix); - continue; + Stopwatch LoadTimer; + Meta = m_Storage->LoadMetadata(); + Stats.LoadMetadataUs = LoadTimer.GetElapsedTimeUs(); } + if (!Meta) + { + ZEN_INFO("No dehydrated state for module {} found, cleaning server state: '{}'", + m_Config.ModuleId, + m_Config.ServerStateDir); + CleanDirectory(*m_Threading.WorkerPool, *m_Threading.AbortFlag, *m_Threading.PauseFlag, ServerStateDir); + return CbObject(); + } + + std::unordered_map<std::string, size_t> EntryLookup; + std::vector<Entry> Entries; + uint64_t TotalSize = 0; - std::string RelKey = Obj.Key.substr(FolderPrefix.size()); - if (RelKey.empty()) + for (CbFieldView FieldView : Meta["Files"]) { - continue; + CbObjectView EntryView = FieldView.AsObjectView(); + if (EntryView) + { + Entry NewEntry = {.RelativePath = std::filesystem::path(EntryView["Path"].AsString()), + .Size = EntryView["Size"].AsUInt64(), + .ModTick = EntryView["ModTick"].AsUInt64(), + .Hash = EntryView["Hash"].AsHash()}; + TotalSize += NewEntry.Size; + EntryLookup.insert_or_assign(NewEntry.RelativePath.generic_string(), Entries.size()); + Entries.emplace_back(std::move(NewEntry)); + } } - std::filesystem::path DestPath = MakeSafeAbsolutePath(m_Config.TempDir / std::filesystem::path(RelKey)); - CreateDirectories(DestPath.parent_path()); - if (Obj.Size > MultipartChunkSize) + Stats.TotalFiles = Entries.size(); + Stats.TotalBytes = TotalSize; + + ZEN_INFO("Hydrating module '{}' to folder '{}'. {} ({}) files", + m_Config.ModuleId, + m_Config.ServerStateDir, + Entries.size(), + NiceBytes(TotalSize)); + + m_Storage->ParseSettings(Meta["StorageSettings"].AsObjectView()); + { - BasicFile DestFile(DestPath, BasicFile::Mode::kTruncate); - DestFile.SetFileSize(Obj.Size); + Stopwatch DownloadTimer; + ParallelWork Work(*m_Threading.AbortFlag, *m_Threading.PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + + for (const Entry& CurrentEntry : Entries) + { + std::filesystem::path Path = MakeSafeAbsolutePath(TempDir / CurrentEntry.RelativePath); + CreateDirectories(Path.parent_path()); + m_Storage->Get(Work, *m_Threading.WorkerPool, CurrentEntry.Hash, CurrentEntry.Size, Path, Stats.Download); + Stats.Download.Files.fetch_add(1, std::memory_order_relaxed); + } - BasicFileWriter Writer(DestFile, 64 * 1024); + Work.Wait(); + Stats.Download.ElapsedUs = DownloadTimer.GetElapsedTimeUs(); + } + + // Downloaded successfully - swap into ServerStateDir + ZEN_DEBUG("Cleaning server state '{}'", m_Config.ServerStateDir); + { + Stopwatch CleanTimer; + CleanDirectory(*m_Threading.WorkerPool, *m_Threading.AbortFlag, *m_Threading.PauseFlag, ServerStateDir); + Stats.CleanUs = CleanTimer.GetElapsedTimeUs(); + } - uint64_t Offset = 0; - while (Offset < Obj.Size) + { + Stopwatch RenameTimer; + // If the two paths share at least one common component they are on the same drive/volume + // and atomic renames will succeed. Otherwise fall back to a full copy. + auto [ItTmp, ItState] = std::mismatch(TempDir.begin(), TempDir.end(), ServerStateDir.begin(), ServerStateDir.end()); + if (ItTmp != TempDir.begin()) { - uint64_t ChunkSize = std::min<uint64_t>(MultipartChunkSize, Obj.Size - Offset); - S3GetObjectResult Chunk = Client.GetObjectRange(Obj.Key, Offset, ChunkSize); - if (!Chunk.IsSuccess()) + DirectoryContent DirContent; + GetDirectoryContent(*m_Threading.WorkerPool, + TempDir, + DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeDirs, + DirContent); + + for (const std::filesystem::path& AbsPath : DirContent.Directories) { - throw zen::runtime_error("Failed to download '{}' bytes [{}-{}] from S3: {}", - Obj.Key, - Offset, - Offset + ChunkSize - 1, - Chunk.Error); + std::filesystem::path Dest = MakeSafeAbsolutePath(ServerStateDir / AbsPath.filename()); + std::error_code Ec = RenameDirectoryWithRetry(AbsPath, Dest); + if (Ec) + { + throw std::system_error(Ec, fmt::format("Failed to rename directory from '{}' to '{}'", AbsPath, Dest)); + } + } + for (const std::filesystem::path& AbsPath : DirContent.Files) + { + std::filesystem::path Dest = MakeSafeAbsolutePath(ServerStateDir / AbsPath.filename()); + std::error_code Ec = RenameFileWithRetry(AbsPath, Dest); + if (Ec) + { + throw std::system_error(Ec, fmt::format("Failed to rename file from '{}' to '{}'", AbsPath, Dest)); + } } - Writer.Write(Chunk.Content.GetData(), Chunk.Content.GetSize(), Offset); - Offset += ChunkSize; + ZEN_DEBUG("Cleaning temp dir '{}'", m_Config.TempDir); + CleanDirectory(*m_Threading.WorkerPool, *m_Threading.AbortFlag, *m_Threading.PauseFlag, TempDir); } - - Writer.Flush(); - } - else - { - S3GetObjectResult Chunk = Client.GetObject(Obj.Key, m_Config.TempDir); - if (!Chunk.IsSuccess()) + else { - throw zen::runtime_error("Failed to download '{}' from S3: {}", Obj.Key, Chunk.Error); + // Slow path: TempDir and ServerStateDir are on different filesystems, so rename + // would fail. Copy the tree instead and clean up the temp files afterwards. + ZEN_DEBUG("TempDir and ServerStateDir are on different filesystems - using CopyTree"); + CopyTree(TempDir, ServerStateDir, {.EnableClone = true}); + ZEN_DEBUG("Cleaning temp dir '{}'", m_Config.TempDir); + CleanDirectory(*m_Threading.WorkerPool, *m_Threading.AbortFlag, *m_Threading.PauseFlag, TempDir); } + Stats.RenameOrCopyUs = RenameTimer.GetElapsedTimeUs(); + } - if (IoBufferFileReference FileRef; Chunk.Content.GetFileReference(FileRef)) + CbObject StateObject; + { + Stopwatch VerifyTimer; + DirectoryContent DirContent; + GetDirectoryContent(*m_Threading.WorkerPool, + ServerStateDir, + DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::Recursive | + DirectoryContentFlags::IncludeFileSizes | DirectoryContentFlags::IncludeModificationTick, + DirContent); + + CbObjectWriter HydrateState; + HydrateState.BeginArray("Files"); + for (size_t FileIndex = 0; FileIndex < DirContent.Files.size(); FileIndex++) { - std::error_code Ec; - std::filesystem::path ChunkPath = PathFromHandle(FileRef.FileHandle, Ec); - if (Ec) + std::filesystem::path RelativePath = FastRelativePath(ServerStateDir, DirContent.Files[FileIndex]); + + if (auto It = EntryLookup.find(RelativePath.generic_string()); It != EntryLookup.end()) { - WriteFile(DestPath, Chunk.Content); + HydrateState.BeginObject(); + { + HydrateState << "Path" << RelativePath.generic_string(); + HydrateState << "Size" << DirContent.FileSizes[FileIndex]; + HydrateState << "ModTick" << DirContent.FileModificationTicks[FileIndex]; + HydrateState << "Hash" << Entries[It->second].Hash; + } + HydrateState.EndObject(); } else { - Chunk.Content.SetDeleteOnClose(false); - Chunk.Content = {}; - RenameFile(ChunkPath, DestPath, Ec); + ZEN_ASSERT(false); } } - else - { - WriteFile(DestPath, Chunk.Content); - } + HydrateState.EndArray(); + + StateObject = HydrateState.Save(); + Stats.VerifyScanUs = VerifyTimer.GetElapsedTimeUs(); } + + Stats.TotalUs = TotalTimer.GetElapsedTimeUs(); + LogHydrateSummary("Hydration complete", Stats, m_Config.ModuleId, StorageSource, ServerStateDir); + + return StateObject; + } + catch (const std::exception& Ex) + { + ZEN_WARN("Hydration of module '{}' failed: {}. Cleaning server state '{}'", + m_Config.ModuleId, + Ex.what(), + m_Config.ServerStateDir); + CleanDirectory(*m_Threading.WorkerPool, *m_Threading.AbortFlag, *m_Threading.PauseFlag, ServerStateDir); + ZEN_DEBUG("Cleaning temp dir '{}'", m_Config.TempDir); + CleanDirectory(*m_Threading.WorkerPool, *m_Threading.AbortFlag, *m_Threading.PauseFlag, TempDir); + Stats.TotalUs = TotalTimer.GetElapsedTimeUs(); + LogHydrateSummary("Hydration failed", Stats, m_Config.ModuleId, StorageSource, ServerStateDir); + return {}; } + } - // Downloaded successfully - swap into ServerStateDir - ZEN_DEBUG("Wiping server state '{}'", m_Config.ServerStateDir); - CleanDirectory(m_Config.ServerStateDir, ForceRemoveReadOnlyFiles); + void IncrementalHydrator::Obliterate() + { + const std::filesystem::path ServerStateDir = MakeSafeAbsolutePath(m_Config.ServerStateDir); + const std::filesystem::path TempDir = MakeSafeAbsolutePath(m_Config.TempDir); - // If the two paths share at least one common component they are on the same drive/volume - // and atomic renames will succeed. Otherwise fall back to a full copy. - auto [ItTmp, ItState] = - std::mismatch(m_Config.TempDir.begin(), m_Config.TempDir.end(), m_Config.ServerStateDir.begin(), m_Config.ServerStateDir.end()); - if (ItTmp != m_Config.TempDir.begin()) + try { - DirectoryContent DirContent; - GetDirectoryContent(m_Config.TempDir, DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeDirs, DirContent); + ParallelWork Work(*m_Threading.AbortFlag, *m_Threading.PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + m_Storage->Delete(Work, *m_Threading.WorkerPool); + Work.Wait(); + } + catch (const std::exception& Ex) + { + ZEN_WARN("Failed to delete backend storage for module '{}': {}. Proceeding with local cleanup.", m_Config.ModuleId, Ex.what()); + } - for (const std::filesystem::path& AbsPath : DirContent.Directories) - { - std::filesystem::path Dest = MakeSafeAbsolutePath(m_Config.ServerStateDir / AbsPath.filename()); - RenameDirectory(AbsPath, Dest); - } - for (const std::filesystem::path& AbsPath : DirContent.Files) - { - std::filesystem::path Dest = MakeSafeAbsolutePath(m_Config.ServerStateDir / AbsPath.filename()); - RenameFile(AbsPath, Dest); - } + CleanDirectory(*m_Threading.WorkerPool, *m_Threading.AbortFlag, *m_Threading.PauseFlag, ServerStateDir); + CleanDirectory(*m_Threading.WorkerPool, *m_Threading.AbortFlag, *m_Threading.PauseFlag, TempDir); + } - ZEN_DEBUG("Cleaning temp dir '{}'", m_Config.TempDir); - CleanDirectory(m_Config.TempDir, ForceRemoveReadOnlyFiles); +} // namespace hydration_impl + +/////////////////////////////////////////////////////////////////////////// +// HydrationBase subclasses - own hub-wide backend state, hand per-module +// storages the exact inputs they need in CreateHydrator. + +class FileHydration : public HydrationBase +{ +public: + explicit FileHydration(const Configuration& Config); + + virtual std::unique_ptr<HydrationStrategyBase> CreateHydrator(const HydrationConfig& Config) override; + +private: + std::filesystem::path m_StorageRoot; +}; + +class S3Hydration : public HydrationBase +{ +public: + explicit S3Hydration(const Configuration& Config); + + virtual std::unique_ptr<HydrationStrategyBase> CreateHydrator(const HydrationConfig& Config) override; + +private: + std::string m_Bucket; + std::string m_Region; + std::string m_Endpoint; + bool m_PathStyle = false; + std::string m_KeyPrefixRoot; + SigV4Credentials m_Credentials; + Ref<ImdsCredentialProvider> m_CredentialProvider; + std::unique_ptr<S3Client> m_Client; + uint64_t m_DefaultMultipartChunkSize; +}; + +/////////////////////////////////////////////////////////////////////////// +// Implementations + +FileHydration::FileHydration(const Configuration& Config) +{ + if (!Config.TargetSpecification.empty()) + { + m_StorageRoot = Utf8ToWide(Config.TargetSpecification.substr(hydration_impl::FileStorage::Prefix.length())); + if (m_StorageRoot.empty()) + { + throw zen::runtime_error("Hydration config 'file' type requires a directory path"); } - else + } + else + { + CbObjectView Settings = Config.Options["settings"].AsObjectView(); + std::string_view Path = Settings["path"].AsString(); + if (Path.empty()) { - // Slow path: TempDir and ServerStateDir are on different filesystems, so rename - // would fail. Copy the tree instead and clean up the temp files afterwards. - ZEN_DEBUG("TempDir and ServerStateDir are on different filesystems - using CopyTree"); - CopyTree(m_Config.TempDir, m_Config.ServerStateDir, {.EnableClone = true}); - ZEN_DEBUG("Cleaning temp dir '{}'", m_Config.TempDir); - CleanDirectory(m_Config.TempDir, ForceRemoveReadOnlyFiles); + throw zen::runtime_error("Hydration config 'file' type requires 'settings.path'"); + } + m_StorageRoot = Utf8ToWide(std::string(Path)); + } + MakeSafeAbsolutePathInPlace(m_StorageRoot); +} + +std::unique_ptr<HydrationStrategyBase> +FileHydration::CreateHydrator(const HydrationConfig& Config) +{ + using namespace hydration_impl; + return std::make_unique<IncrementalHydrator>(Config, std::make_unique<FileStorage>(m_StorageRoot / Config.ModuleId)); +} + +S3Hydration::S3Hydration(const Configuration& Config) +{ + using namespace hydration_impl; + + CbObjectView Settings = Config.Options["settings"].AsObjectView(); + std::string_view Spec; + if (!Config.TargetSpecification.empty()) + { + Spec = Config.TargetSpecification; + Spec.remove_prefix(S3Storage::Prefix.size()); + } + else + { + std::string_view Uri = Settings["uri"].AsString(); + if (Uri.empty()) + { + throw zen::runtime_error("Incremental S3 hydration config requires 'settings.uri'"); } + Spec = Uri; + Spec.remove_prefix(S3Storage::Prefix.size()); + } + + size_t SlashPos = Spec.find('/'); + m_Bucket = std::string(SlashPos != std::string_view::npos ? Spec.substr(0, SlashPos) : Spec); + m_KeyPrefixRoot = SlashPos != std::string_view::npos ? std::string(Spec.substr(SlashPos + 1)) : std::string{}; + + if (m_Bucket.empty()) + { + throw zen::runtime_error("Incremental S3 hydration config requires a bucket name"); + } - ZEN_INFO("Hydration complete from folder '{}' in {}", FolderName, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + std::string Region = std::string(Settings["region"].AsString()); + if (Region.empty()) + { + Region = GetEnvVariable("AWS_DEFAULT_REGION"); + } + if (Region.empty()) + { + Region = GetEnvVariable("AWS_REGION"); + } + if (Region.empty()) + { + Region = "us-east-1"; } - catch (std::exception& Ex) + m_Region = std::move(Region); + + std::string_view Endpoint = Settings["endpoint"].AsString(); + if (!Endpoint.empty()) { - ZEN_WARN("S3 hydration failed: {}. Will wipe any partially installed state.", Ex.what()); + m_Endpoint = std::string(Endpoint); + m_PathStyle = Settings["path-style"].AsBool(); + } - // We don't do the clean right here to avoid potentially running into double-throws - WipeServerState = true; + std::string AccessKeyId = GetEnvVariable("AWS_ACCESS_KEY_ID"); + if (AccessKeyId.empty()) + { + m_CredentialProvider = Ref<ImdsCredentialProvider>(new ImdsCredentialProvider({})); + } + else + { + m_Credentials.AccessKeyId = std::move(AccessKeyId); + m_Credentials.SecretAccessKey = GetEnvVariable("AWS_SECRET_ACCESS_KEY"); + m_Credentials.SessionToken = GetEnvVariable("AWS_SESSION_TOKEN"); } - if (WipeServerState) + m_DefaultMultipartChunkSize = Settings["chunksize"].AsUInt64(S3Storage::DefaultMultipartChunkSize); + + S3ClientOptions ClientOptions; + ClientOptions.BucketName = m_Bucket; + ClientOptions.Region = m_Region; + ClientOptions.Endpoint = m_Endpoint; + ClientOptions.PathStyle = m_PathStyle; + if (m_CredentialProvider) { - ZEN_DEBUG("Cleaning server state '{}'", m_Config.ServerStateDir); - CleanDirectory(m_Config.ServerStateDir, ForceRemoveReadOnlyFiles); - ZEN_DEBUG("Cleaning temp dir '{}'", m_Config.TempDir); - CleanDirectory(m_Config.TempDir, ForceRemoveReadOnlyFiles); + ClientOptions.CredentialProvider = m_CredentialProvider; } + else + { + ClientOptions.Credentials = m_Credentials; + } + ClientOptions.HttpSettings.MaximumInMemoryDownloadSize = 16u * 1024u; + + m_Client = std::make_unique<S3Client>(ClientOptions); } std::unique_ptr<HydrationStrategyBase> -CreateHydrator(const HydrationConfig& Config) +S3Hydration::CreateHydrator(const HydrationConfig& Config) +{ + using namespace hydration_impl; + std::string KeyPrefix = m_KeyPrefixRoot.empty() ? std::string(Config.ModuleId) : fmt::format("{}/{}", m_KeyPrefixRoot, Config.ModuleId); + return std::make_unique<IncrementalHydrator>( + Config, + std::make_unique<S3Storage>(*m_Client, std::move(KeyPrefix), Config.TempDir, m_DefaultMultipartChunkSize)); +} + +std::unique_ptr<HydrationBase> +InitHydration(const HydrationBase::Configuration& Config) { + using namespace hydration_impl; + if (!Config.TargetSpecification.empty()) { - if (StrCaseCompare(Config.TargetSpecification.substr(0, FileHydratorPrefix.length()), FileHydratorPrefix) == 0) + if (StrCaseCompare(Config.TargetSpecification.substr(0, FileStorage::Prefix.length()), FileStorage::Prefix) == 0) { - std::unique_ptr<HydrationStrategyBase> Hydrator = std::make_unique<FileHydrator>(); - Hydrator->Configure(Config); - return Hydrator; + return std::make_unique<FileHydration>(Config); } - if (StrCaseCompare(Config.TargetSpecification.substr(0, S3HydratorPrefix.length()), S3HydratorPrefix) == 0) + if (StrCaseCompare(Config.TargetSpecification.substr(0, S3Storage::Prefix.length()), S3Storage::Prefix) == 0) { - std::unique_ptr<HydrationStrategyBase> Hydrator = std::make_unique<S3Hydrator>(); - Hydrator->Configure(Config); - return Hydrator; + return std::make_unique<S3Hydration>(Config); } - throw std::runtime_error(fmt::format("Unknown hydration strategy: {}", Config.TargetSpecification)); + throw zen::runtime_error("Unknown hydration strategy: {}", Config.TargetSpecification); } std::string_view Type = Config.Options["type"].AsString(); - if (Type == FileHydratorType) + if (Type == FileStorage::Type) { - std::unique_ptr<HydrationStrategyBase> Hydrator = std::make_unique<FileHydrator>(); - Hydrator->Configure(Config); - return Hydrator; + return std::make_unique<FileHydration>(Config); } - if (Type == S3HydratorType) + if (Type == S3Storage::Type) { - std::unique_ptr<HydrationStrategyBase> Hydrator = std::make_unique<S3Hydrator>(); - Hydrator->Configure(Config); - return Hydrator; + return std::make_unique<S3Hydration>(Config); } if (!Type.empty()) { @@ -659,60 +1409,14 @@ CreateHydrator(const HydrationConfig& Config) namespace { - /// Scoped RAII helper to set/restore a single environment variable within a test. - /// Used to configure AWS credentials for each S3 test's MinIO instance - /// without polluting the global environment. - struct ScopedEnvVar + struct TestThreading { - std::string m_Name; - std::optional<std::string> m_OldValue; // nullopt = was not set; "" = was set to empty string + WorkerThreadPool WorkerPool; + std::atomic<bool> AbortFlag{false}; + std::atomic<bool> PauseFlag{false}; + HydrationConfig::ThreadingOptions Options{.WorkerPool = &WorkerPool, .AbortFlag = &AbortFlag, .PauseFlag = &PauseFlag}; - ScopedEnvVar(std::string_view Name, std::string_view Value) : m_Name(Name) - { -# if ZEN_PLATFORM_WINDOWS - // Use the raw API so we can distinguish "not set" (ERROR_ENVVAR_NOT_FOUND) - // from "set to empty string" (returns 0 with no error). - char Buf[1]; - DWORD Len = GetEnvironmentVariableA(m_Name.c_str(), Buf, sizeof(Buf)); - if (Len == 0 && GetLastError() == ERROR_ENVVAR_NOT_FOUND) - { - m_OldValue = std::nullopt; - } - else - { - // Len == 0 with no error: variable exists but is empty. - // Len > sizeof(Buf): value is non-empty; Len is the required buffer size - // (including null terminator) - allocate and re-read. - std::string Old(Len == 0 ? 0 : Len - 1, '\0'); - if (Len > sizeof(Buf)) - { - GetEnvironmentVariableA(m_Name.c_str(), Old.data(), Len); - } - m_OldValue = std::move(Old); - } - SetEnvironmentVariableA(m_Name.c_str(), std::string(Value).c_str()); -# else - // getenv returns nullptr when not set, "" when set to empty string. - const char* Existing = getenv(m_Name.c_str()); - m_OldValue = Existing ? std::optional<std::string>(Existing) : std::nullopt; - setenv(m_Name.c_str(), std::string(Value).c_str(), 1); -# endif - } - ~ScopedEnvVar() - { -# if ZEN_PLATFORM_WINDOWS - SetEnvironmentVariableA(m_Name.c_str(), m_OldValue.has_value() ? m_OldValue->c_str() : nullptr); -# else - if (m_OldValue.has_value()) - { - setenv(m_Name.c_str(), m_OldValue->c_str(), 1); - } - else - { - unsetenv(m_Name.c_str()); - } -# endif - } + explicit TestThreading(int ThreadCount) : WorkerPool(ThreadCount) {} }; /// Create a small file hierarchy under BaseDir: @@ -720,10 +1424,10 @@ namespace { /// subdir/file_b.bin /// subdir/nested/file_c.bin /// Returns a vector of (relative path, content) pairs for later verification. - std::vector<std::pair<std::filesystem::path, IoBuffer>> CreateTestTree(const std::filesystem::path& BaseDir) - { - std::vector<std::pair<std::filesystem::path, IoBuffer>> Files; + typedef std::vector<std::pair<std::filesystem::path, IoBuffer>> TestFileList; + TestFileList AddTestFiles(const std::filesystem::path& BaseDir, TestFileList& Files) + { auto AddFile = [&](std::filesystem::path RelPath, IoBuffer Content) { std::filesystem::path FullPath = BaseDir / RelPath; CreateDirectories(FullPath.parent_path()); @@ -737,9 +1441,33 @@ namespace { AddFile("subdir/nested/file_d.bin", CreateSemiRandomBlob(512)); AddFile("subdir/nested/file_e.bin", CreateSemiRandomBlob(512)); AddFile("subdir/nested/file_f.bin", CreateSemiRandomBlob(512)); + + return Files; + } + + TestFileList CreateSmallTestTree(const std::filesystem::path& BaseDir) + { + TestFileList Files; + AddTestFiles(BaseDir, Files); + return Files; + } + + TestFileList CreateTestTree(const std::filesystem::path& BaseDir) + { + TestFileList Files; + AddTestFiles(BaseDir, Files); + + auto AddFile = [&](std::filesystem::path RelPath, IoBuffer Content) { + std::filesystem::path FullPath = BaseDir / RelPath; + CreateDirectories(FullPath.parent_path()); + WriteFile(FullPath, Content); + Files.emplace_back(std::move(RelPath), std::move(Content)); + }; + AddFile("subdir/nested/medium.bulk", CreateSemiRandomBlob(256u * 1024u)); AddFile("subdir/nested/big.bulk", CreateSemiRandomBlob(512u * 1024u)); AddFile("subdir/nested/huge.bulk", CreateSemiRandomBlob(9u * 1024u * 1024u)); + AddFile("subdir/nested/biggest.bulk", CreateSemiRandomBlob(63u * 1024u * 1024u)); return Files; } @@ -777,35 +1505,27 @@ TEST_CASE("hydration.file.dehydrate_hydrate") CreateDirectories(HydrationTemp); const std::string ModuleId = "testmodule"; - auto TestFiles = CreateTestTree(ServerStateDir); + auto TestFiles = CreateSmallTestTree(ServerStateDir); + + auto Hydration = InitHydration({.TargetSpecification = "file://" + HydrationStore.string()}); - HydrationConfig Config; - Config.ServerStateDir = ServerStateDir; - Config.TempDir = HydrationTemp; - Config.ModuleId = ModuleId; - Config.TargetSpecification = "file://" + HydrationStore.string(); + HydrationConfig Config{.ServerStateDir = ServerStateDir, .TempDir = HydrationTemp, .ModuleId = ModuleId}; // Dehydrate: copy server state to file store - { - std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); - Hydrator->Dehydrate(); - } + Hydration->CreateHydrator(Config)->Dehydrate(CbObject()); // Verify the module folder exists in the store and ServerStateDir was wiped CHECK(std::filesystem::exists(HydrationStore / ModuleId)); CHECK(std::filesystem::is_empty(ServerStateDir)); // Hydrate: restore server state from file store - { - std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); - Hydrator->Hydrate(); - } + Hydration->CreateHydrator(Config)->Hydrate(); // Verify restored contents match the original VerifyTree(ServerStateDir, TestFiles); } -TEST_CASE("hydration.file.dehydrate_cleans_server_state") +TEST_CASE("hydration.file.hydrate_overwrites_existing_state") { ScopedTemporaryDirectory TempDir; @@ -816,22 +1536,25 @@ TEST_CASE("hydration.file.dehydrate_cleans_server_state") CreateDirectories(HydrationStore); CreateDirectories(HydrationTemp); - CreateTestTree(ServerStateDir); + auto TestFiles = CreateSmallTestTree(ServerStateDir); - HydrationConfig Config; - Config.ServerStateDir = ServerStateDir; - Config.TempDir = HydrationTemp; - Config.ModuleId = "testmodule"; - Config.TargetSpecification = "file://" + HydrationStore.string(); + auto Hydration = InitHydration({.TargetSpecification = "file://" + HydrationStore.string()}); - std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); - Hydrator->Dehydrate(); + HydrationConfig Config{.ServerStateDir = ServerStateDir, .TempDir = HydrationTemp, .ModuleId = "testmodule"}; - // FileHydrator::Dehydrate() must wipe ServerStateDir when done - CHECK(std::filesystem::is_empty(ServerStateDir)); + Hydration->CreateHydrator(Config)->Dehydrate(CbObject()); + + // Put a stale file in ServerStateDir to simulate leftover state + WriteFile(ServerStateDir / "stale.bin", CreateSemiRandomBlob(256)); + + // Hydrate - must wipe stale file and restore original + Hydration->CreateHydrator(Config)->Hydrate(); + + CHECK_FALSE(std::filesystem::exists(ServerStateDir / "stale.bin")); + VerifyTree(ServerStateDir, TestFiles); } -TEST_CASE("hydration.file.hydrate_overwrites_existing_state") +TEST_CASE("hydration.file.excluded_files_not_dehydrated") { ScopedTemporaryDirectory TempDir; @@ -842,31 +1565,70 @@ TEST_CASE("hydration.file.hydrate_overwrites_existing_state") CreateDirectories(HydrationStore); CreateDirectories(HydrationTemp); - auto TestFiles = CreateTestTree(ServerStateDir); + auto TestFiles = CreateSmallTestTree(ServerStateDir); - HydrationConfig Config; - Config.ServerStateDir = ServerStateDir; - Config.TempDir = HydrationTemp; - Config.ModuleId = "testmodule"; - Config.TargetSpecification = "file://" + HydrationStore.string(); + // Add files that the dehydrator should skip + WriteFile(ServerStateDir / "reserve.gc", CreateSemiRandomBlob(64)); + CreateDirectories(ServerStateDir / ".sentry-native"); + WriteFile(ServerStateDir / ".sentry-native" / "db.lock", CreateSemiRandomBlob(32)); + WriteFile(ServerStateDir / ".sentry-native" / "breadcrumb.json", CreateSemiRandomBlob(128)); - // Dehydrate the original state - { - std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); - Hydrator->Dehydrate(); - } + auto Hydration = InitHydration({.TargetSpecification = "file://" + HydrationStore.string()}); - // Put a stale file in ServerStateDir to simulate leftover state - WriteFile(ServerStateDir / "stale.bin", CreateSemiRandomBlob(256)); + HydrationConfig Config{.ServerStateDir = ServerStateDir, .TempDir = HydrationTemp, .ModuleId = "testmodule_excl"}; - // Hydrate - must wipe stale file and restore original - { - std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); - Hydrator->Hydrate(); - } + Hydration->CreateHydrator(Config)->Dehydrate(CbObject()); - CHECK_FALSE(std::filesystem::exists(ServerStateDir / "stale.bin")); + // Hydrate into a clean directory + CleanDirectory(ServerStateDir, true); + Hydration->CreateHydrator(Config)->Hydrate(); + + // Normal files must be restored VerifyTree(ServerStateDir, TestFiles); + // Excluded files must NOT be restored + CHECK_FALSE(std::filesystem::exists(ServerStateDir / "reserve.gc")); + CHECK_FALSE(std::filesystem::exists(ServerStateDir / ".sentry-native")); +} + +// --------------------------------------------------------------------------- +// FileHydrator obliterate test +// --------------------------------------------------------------------------- + +TEST_CASE("hydration.file.obliterate") +{ + ScopedTemporaryDirectory TempDir; + + std::filesystem::path ServerStateDir = TempDir.Path() / "server_state"; + std::filesystem::path HydrationStore = TempDir.Path() / "hydration_store"; + std::filesystem::path HydrationTemp = TempDir.Path() / "hydration_temp"; + CreateDirectories(ServerStateDir); + CreateDirectories(HydrationStore); + CreateDirectories(HydrationTemp); + + const std::string ModuleId = "obliterate_test"; + CreateSmallTestTree(ServerStateDir); + + auto Hydration = InitHydration({.TargetSpecification = "file://" + HydrationStore.string()}); + + HydrationConfig Config{.ServerStateDir = ServerStateDir, .TempDir = HydrationTemp, .ModuleId = ModuleId}; + + // Dehydrate so the backend store has data + Hydration->CreateHydrator(Config)->Dehydrate(CbObject()); + CHECK(std::filesystem::exists(HydrationStore / ModuleId)); + + // Put some files back in ServerStateDir and TempDir to verify cleanup + CreateSmallTestTree(ServerStateDir); + WriteFile(HydrationTemp / "leftover.tmp", CreateSemiRandomBlob(64)); + + // Obliterate + Hydration->CreateHydrator(Config)->Obliterate(); + + // Backend store directory deleted + CHECK_FALSE(std::filesystem::exists(HydrationStore / ModuleId)); + // ServerStateDir cleaned + CHECK(std::filesystem::is_empty(ServerStateDir)); + // TempDir cleaned + CHECK(std::filesystem::is_empty(HydrationTemp)); } // --------------------------------------------------------------------------- @@ -883,6 +1645,8 @@ TEST_CASE("hydration.file.concurrent") std::filesystem::path HydrationStore = TempDir.Path() / "hydration_store"; CreateDirectories(HydrationStore); + TestThreading Threading(8); + struct ModuleData { HydrationConfig Config; @@ -890,6 +1654,8 @@ TEST_CASE("hydration.file.concurrent") }; std::vector<ModuleData> Modules(kModuleCount); + auto Hydration = InitHydration({.TargetSpecification = "file://" + HydrationStore.string()}); + for (int I = 0; I < kModuleCount; ++I) { std::string ModuleId = fmt::format("file_concurrent_{}", I); @@ -898,11 +1664,11 @@ TEST_CASE("hydration.file.concurrent") CreateDirectories(StateDir); CreateDirectories(TempPath); - Modules[I].Config.ServerStateDir = StateDir; - Modules[I].Config.TempDir = TempPath; - Modules[I].Config.ModuleId = ModuleId; - Modules[I].Config.TargetSpecification = "file://" + HydrationStore.string(); - Modules[I].Files = CreateTestTree(StateDir); + Modules[I].Config.ServerStateDir = StateDir; + Modules[I].Config.TempDir = TempPath; + Modules[I].Config.ModuleId = ModuleId; + Modules[I].Config.Threading = Threading.Options; + Modules[I].Files = CreateSmallTestTree(StateDir); } // Concurrent dehydrate @@ -914,9 +1680,8 @@ TEST_CASE("hydration.file.concurrent") for (int I = 0; I < kModuleCount; ++I) { - Work.ScheduleWork(Pool, [&Config = Modules[I].Config](std::atomic<bool>&) { - std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); - Hydrator->Dehydrate(); + Work.ScheduleWork(Pool, [&Hydration, &Config = Modules[I].Config](std::atomic<bool>&) { + Hydration->CreateHydrator(Config)->Dehydrate(CbObject()); }); } Work.Wait(); @@ -932,9 +1697,8 @@ TEST_CASE("hydration.file.concurrent") for (int I = 0; I < kModuleCount; ++I) { - Work.ScheduleWork(Pool, [&Config = Modules[I].Config](std::atomic<bool>&) { - std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); - Hydrator->Hydrate(); + Work.ScheduleWork(Pool, [&Hydration, &Config = Modules[I].Config](std::atomic<bool>&) { + Hydration->CreateHydrator(Config)->Hydrate(); }); } Work.Wait(); @@ -951,76 +1715,13 @@ TEST_CASE("hydration.file.concurrent") // --------------------------------------------------------------------------- // S3Hydrator tests // -// Each test case spawns its own local MinIO instance (self-contained, no external setup needed). +// Each test case spawns a local MinIO instance (self-contained, no external setup needed). // The MinIO binary must be present in the same directory as the test executable (copied by xmake). // --------------------------------------------------------------------------- TEST_CASE("hydration.s3.dehydrate_hydrate") { MinioProcessOptions MinioOpts; - MinioOpts.Port = 19010; - MinioProcess Minio(MinioOpts); - Minio.SpawnMinioServer(); - Minio.CreateBucket("zen-hydration-test"); - - ScopedEnvVar EnvAccessKey("AWS_ACCESS_KEY_ID", Minio.RootUser()); - ScopedEnvVar EnvSecretKey("AWS_SECRET_ACCESS_KEY", Minio.RootPassword()); - - ScopedTemporaryDirectory TempDir; - - std::filesystem::path ServerStateDir = TempDir.Path() / "server_state"; - std::filesystem::path HydrationTemp = TempDir.Path() / "hydration_temp"; - CreateDirectories(ServerStateDir); - CreateDirectories(HydrationTemp); - - const std::string ModuleId = "s3test_roundtrip"; - auto TestFiles = CreateTestTree(ServerStateDir); - - HydrationConfig Config; - Config.ServerStateDir = ServerStateDir; - Config.TempDir = HydrationTemp; - Config.ModuleId = ModuleId; - std::string ConfigJson = - fmt::format(R"({{"type":"s3","settings":{{"uri":"s3://zen-hydration-test","endpoint":"{}","path-style":true}}}})", - Minio.Endpoint()); - std::string ParseError; - CbFieldIterator Root = LoadCompactBinaryFromJson(ConfigJson, ParseError); - ZEN_ASSERT(ParseError.empty() && Root.IsObject()); - Config.Options = std::move(Root).AsObject(); - - // Dehydrate: upload server state to MinIO - { - std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); - Hydrator->Dehydrate(); - } - - // Wipe server state - CleanDirectory(ServerStateDir, true); - CHECK(std::filesystem::is_empty(ServerStateDir)); - - // Hydrate: download from MinIO back to server state - { - std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); - Hydrator->Hydrate(); - } - - // Verify restored contents match the original - VerifyTree(ServerStateDir, TestFiles); -} - -TEST_CASE("hydration.s3.current_state_json_selects_latest_folder") -{ - // Each Dehydrate() uploads files to a new timestamp-named folder and then overwrites - // current-state.json to point at that folder. Old folders are NOT deleted. - // Hydrate() must read current-state.json to determine which folder to restore from. - // - // This test verifies that: - // 1. After two dehydrations, Hydrate() restores from the second snapshot, not the first, - // confirming that current-state.json was updated between dehydrations. - // 2. current-state.json is updated to point at the second (latest) folder. - // 3. Hydrate() restores the v2 snapshot (identified by v2marker.bin), NOT the v1 snapshot. - - MinioProcessOptions MinioOpts; MinioOpts.Port = 19011; MinioProcess Minio(MinioOpts); Minio.SpawnMinioServer(); @@ -1036,12 +1737,7 @@ TEST_CASE("hydration.s3.current_state_json_selects_latest_folder") CreateDirectories(ServerStateDir); CreateDirectories(HydrationTemp); - const std::string ModuleId = "s3test_folder_select"; - - HydrationConfig Config; - Config.ServerStateDir = ServerStateDir; - Config.TempDir = HydrationTemp; - Config.ModuleId = ModuleId; + HydrationBase::Configuration BaseConfig; { std::string ConfigJson = fmt::format(R"({{"type":"s3","settings":{{"uri":"s3://zen-hydration-test","endpoint":"{}","path-style":true}}}})", @@ -1049,108 +1745,37 @@ TEST_CASE("hydration.s3.current_state_json_selects_latest_folder") std::string ParseError; CbFieldIterator Root = LoadCompactBinaryFromJson(ConfigJson, ParseError); ZEN_ASSERT(ParseError.empty() && Root.IsObject()); - Config.Options = std::move(Root).AsObject(); + BaseConfig.Options = std::move(Root).AsObject(); } + auto Hydration = InitHydration(BaseConfig); - // v1: dehydrate without a marker file - CreateTestTree(ServerStateDir); - { - std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); - Hydrator->Dehydrate(); - } + HydrationConfig Config{.ServerStateDir = ServerStateDir, .TempDir = HydrationTemp, .ModuleId = "s3test_roundtrip"}; - // ServerStateDir is now empty. Wait so the v2 timestamp folder name is strictly later - // (timestamp resolution is 1 ms, but macOS scheduler granularity requires a larger margin). - Sleep(100); + // Hydrate with no prior S3 state (first-boot path). Pre-populate ServerStateDir + // with a stale file to confirm the cleanup branch wipes it. + WriteFile(ServerStateDir / "stale.bin", CreateSemiRandomBlob(256)); + Hydration->CreateHydrator(Config)->Hydrate(); + CHECK(std::filesystem::is_empty(ServerStateDir)); + + // v1: dehydrate without a marker file + CreateSmallTestTree(ServerStateDir); + Hydration->CreateHydrator(Config)->Dehydrate(CbObject()); // v2: dehydrate WITH a marker file that only v2 has - CreateTestTree(ServerStateDir); + CreateSmallTestTree(ServerStateDir); WriteFile(ServerStateDir / "v2marker.bin", CreateSemiRandomBlob(64)); - { - std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); - Hydrator->Dehydrate(); - } + Hydration->CreateHydrator(Config)->Dehydrate(CbObject()); - // Hydrate must restore v2 (current-state.json points to the v2 folder) + // Hydrate must restore v2 (the latest dehydrated state) CleanDirectory(ServerStateDir, true); - { - std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); - Hydrator->Hydrate(); - } + Hydration->CreateHydrator(Config)->Hydrate(); - // v2 marker must be present - confirms current-state.json pointed to the v2 folder + // v2 marker must be present - confirms the second dehydration overwrote the first CHECK(std::filesystem::exists(ServerStateDir / "v2marker.bin")); - // Subdirectory hierarchy must also be intact CHECK(std::filesystem::exists(ServerStateDir / "subdir" / "file_b.bin")); CHECK(std::filesystem::exists(ServerStateDir / "subdir" / "nested" / "file_c.bin")); } -TEST_CASE("hydration.s3.module_isolation") -{ - // Two independent modules dehydrate/hydrate without interfering with each other. - // Uses VerifyTree with per-module byte content to detect cross-module data mixing. - MinioProcessOptions MinioOpts; - MinioOpts.Port = 19012; - MinioProcess Minio(MinioOpts); - Minio.SpawnMinioServer(); - Minio.CreateBucket("zen-hydration-test"); - - ScopedEnvVar EnvAccessKey("AWS_ACCESS_KEY_ID", Minio.RootUser()); - ScopedEnvVar EnvSecretKey("AWS_SECRET_ACCESS_KEY", Minio.RootPassword()); - - ScopedTemporaryDirectory TempDir; - - struct ModuleData - { - HydrationConfig Config; - std::vector<std::pair<std::filesystem::path, IoBuffer>> Files; - }; - - std::vector<ModuleData> Modules; - for (const char* ModuleId : {"s3test_iso_a", "s3test_iso_b"}) - { - std::filesystem::path StateDir = TempDir.Path() / ModuleId / "state"; - std::filesystem::path TempPath = TempDir.Path() / ModuleId / "temp"; - CreateDirectories(StateDir); - CreateDirectories(TempPath); - - ModuleData Data; - Data.Config.ServerStateDir = StateDir; - Data.Config.TempDir = TempPath; - Data.Config.ModuleId = ModuleId; - { - std::string ConfigJson = - fmt::format(R"({{"type":"s3","settings":{{"uri":"s3://zen-hydration-test","endpoint":"{}","path-style":true}}}})", - Minio.Endpoint()); - std::string ParseError; - CbFieldIterator Root = LoadCompactBinaryFromJson(ConfigJson, ParseError); - ZEN_ASSERT(ParseError.empty() && Root.IsObject()); - Data.Config.Options = std::move(Root).AsObject(); - } - Data.Files = CreateTestTree(StateDir); - - std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Data.Config); - Hydrator->Dehydrate(); - - Modules.push_back(std::move(Data)); - } - - for (ModuleData& Module : Modules) - { - CleanDirectory(Module.Config.ServerStateDir, true); - std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Module.Config); - Hydrator->Hydrate(); - - // Each module's files must be independently restorable with correct byte content. - // If S3 key prefixes were mixed up, CreateSemiRandomBlob content would differ. - VerifyTree(Module.Config.ServerStateDir, Module.Files); - } -} - -// --------------------------------------------------------------------------- -// S3Hydrator concurrent test -// --------------------------------------------------------------------------- - TEST_CASE("hydration.s3.concurrent") { // N modules dehydrate and hydrate concurrently against MinIO. @@ -1164,9 +1789,11 @@ TEST_CASE("hydration.s3.concurrent") ScopedEnvVar EnvAccessKey("AWS_ACCESS_KEY_ID", Minio.RootUser()); ScopedEnvVar EnvSecretKey("AWS_SECRET_ACCESS_KEY", Minio.RootPassword()); - constexpr int kModuleCount = 16; + constexpr int kModuleCount = 6; constexpr int kThreadCount = 4; + TestThreading Threading(kThreadCount); + ScopedTemporaryDirectory TempDir; struct ModuleData @@ -1176,6 +1803,18 @@ TEST_CASE("hydration.s3.concurrent") }; std::vector<ModuleData> Modules(kModuleCount); + HydrationBase::Configuration BaseConfig; + { + std::string ConfigJson = + fmt::format(R"({{"type":"s3","settings":{{"uri":"s3://zen-hydration-test","endpoint":"{}","path-style":true}}}})", + Minio.Endpoint()); + std::string ParseError; + CbFieldIterator Root = LoadCompactBinaryFromJson(ConfigJson, ParseError); + ZEN_ASSERT(ParseError.empty() && Root.IsObject()); + BaseConfig.Options = std::move(Root).AsObject(); + } + auto Hydration = InitHydration(BaseConfig); + for (int I = 0; I < kModuleCount; ++I) { std::string ModuleId = fmt::format("s3_concurrent_{}", I); @@ -1187,16 +1826,8 @@ TEST_CASE("hydration.s3.concurrent") Modules[I].Config.ServerStateDir = StateDir; Modules[I].Config.TempDir = TempPath; Modules[I].Config.ModuleId = ModuleId; - { - std::string ConfigJson = - fmt::format(R"({{"type":"s3","settings":{{"uri":"s3://zen-hydration-test","endpoint":"{}","path-style":true}}}})", - Minio.Endpoint()); - std::string ParseError; - CbFieldIterator Root = LoadCompactBinaryFromJson(ConfigJson, ParseError); - ZEN_ASSERT(ParseError.empty() && Root.IsObject()); - Modules[I].Config.Options = std::move(Root).AsObject(); - } - Modules[I].Files = CreateTestTree(StateDir); + Modules[I].Config.Threading = Threading.Options; + Modules[I].Files = CreateTestTree(StateDir); } // Concurrent dehydrate @@ -1208,9 +1839,8 @@ TEST_CASE("hydration.s3.concurrent") for (int I = 0; I < kModuleCount; ++I) { - Work.ScheduleWork(Pool, [&Config = Modules[I].Config](std::atomic<bool>&) { - std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); - Hydrator->Dehydrate(); + Work.ScheduleWork(Pool, [&Hydration, &Config = Modules[I].Config](std::atomic<bool>&) { + Hydration->CreateHydrator(Config)->Dehydrate(CbObject()); }); } Work.Wait(); @@ -1226,10 +1856,9 @@ TEST_CASE("hydration.s3.concurrent") for (int I = 0; I < kModuleCount; ++I) { - Work.ScheduleWork(Pool, [&Config = Modules[I].Config](std::atomic<bool>&) { + Work.ScheduleWork(Pool, [&Hydration, &Config = Modules[I].Config](std::atomic<bool>&) { CleanDirectory(Config.ServerStateDir, true); - std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); - Hydrator->Hydrate(); + Hydration->CreateHydrator(Config)->Hydrate(); }); } Work.Wait(); @@ -1243,17 +1872,10 @@ TEST_CASE("hydration.s3.concurrent") } } -// --------------------------------------------------------------------------- -// S3Hydrator: no prior state (first-boot path) -// --------------------------------------------------------------------------- - -TEST_CASE("hydration.s3.no_prior_state") +TEST_CASE("hydration.s3.obliterate") { - // Hydrate() against an empty bucket (first-boot scenario) must leave ServerStateDir empty. - // The "No state found in S3" path goes through the error-cleanup branch, which wipes - // ServerStateDir to ensure no partial or stale content is left for the server to start on. MinioProcessOptions MinioOpts; - MinioOpts.Port = 19014; + MinioOpts.Port = 19019; MinioProcess Minio(MinioOpts); Minio.SpawnMinioServer(); Minio.CreateBucket("zen-hydration-test"); @@ -1268,13 +1890,9 @@ TEST_CASE("hydration.s3.no_prior_state") CreateDirectories(ServerStateDir); CreateDirectories(HydrationTemp); - // Pre-populate ServerStateDir to confirm the wipe actually runs. - WriteFile(ServerStateDir / "stale.bin", CreateSemiRandomBlob(256)); + const std::string ModuleId = "s3test_obliterate"; - HydrationConfig Config; - Config.ServerStateDir = ServerStateDir; - Config.TempDir = HydrationTemp; - Config.ModuleId = "s3test_no_prior"; + HydrationBase::Configuration BaseConfig; { std::string ConfigJson = fmt::format(R"({{"type":"s3","settings":{{"uri":"s3://zen-hydration-test","endpoint":"{}","path-style":true}}}})", @@ -1282,26 +1900,46 @@ TEST_CASE("hydration.s3.no_prior_state") std::string ParseError; CbFieldIterator Root = LoadCompactBinaryFromJson(ConfigJson, ParseError); ZEN_ASSERT(ParseError.empty() && Root.IsObject()); - Config.Options = std::move(Root).AsObject(); + BaseConfig.Options = std::move(Root).AsObject(); } + auto Hydration = InitHydration(BaseConfig); + + HydrationConfig Config{.ServerStateDir = ServerStateDir, .TempDir = HydrationTemp, .ModuleId = ModuleId}; + + // Dehydrate to populate backend + CreateSmallTestTree(ServerStateDir); + Hydration->CreateHydrator(Config)->Dehydrate(CbObject()); + + auto ListModuleObjects = [&]() { + S3ClientOptions Opts; + Opts.BucketName = "zen-hydration-test"; + Opts.Endpoint = Minio.Endpoint(); + Opts.PathStyle = true; + Opts.Credentials.AccessKeyId = Minio.RootUser(); + Opts.Credentials.SecretAccessKey = Minio.RootPassword(); + S3Client Client(Opts); + return Client.ListObjects(ModuleId + "/"); + }; + + // Verify objects exist in S3 + CHECK(!ListModuleObjects().Objects.empty()); - std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); - Hydrator->Hydrate(); + // Re-populate ServerStateDir and TempDir for cleanup verification + CreateSmallTestTree(ServerStateDir); + WriteFile(HydrationTemp / "leftover.tmp", CreateSemiRandomBlob(64)); - // ServerStateDir must be empty: the error path wipes it to prevent a server start - // against stale or partially-installed content. + // Obliterate + Hydration->CreateHydrator(Config)->Obliterate(); + + // Verify S3 objects deleted + CHECK(ListModuleObjects().Objects.empty()); + // Local directories cleaned CHECK(std::filesystem::is_empty(ServerStateDir)); + CHECK(std::filesystem::is_empty(HydrationTemp)); } -// --------------------------------------------------------------------------- -// S3Hydrator: bucket path prefix in TargetSpecification -// --------------------------------------------------------------------------- - -TEST_CASE("hydration.s3.path_prefix") +TEST_CASE("hydration.s3.config_overrides") { - // TargetSpecification of the form "s3://bucket/some/prefix" stores objects under - // "some/prefix/<ModuleId>/..." rather than directly under "<ModuleId>/...". - // Tests the second branch of the m_KeyPrefix calculation in S3Hydrator::Configure(). MinioProcessOptions MinioOpts; MinioOpts.Port = 19015; MinioProcess Minio(MinioOpts); @@ -1318,88 +1956,298 @@ TEST_CASE("hydration.s3.path_prefix") CreateDirectories(ServerStateDir); CreateDirectories(HydrationTemp); - std::vector<std::pair<std::filesystem::path, IoBuffer>> TestFiles = CreateTestTree(ServerStateDir); + // Path prefix: "s3://bucket/some/prefix" stores objects under + // "some/prefix/<ModuleId>/..." rather than directly under "<ModuleId>/...". + { + auto TestFiles = CreateSmallTestTree(ServerStateDir); - HydrationConfig Config; - Config.ServerStateDir = ServerStateDir; - Config.TempDir = HydrationTemp; - Config.ModuleId = "s3test_prefix"; + HydrationBase::Configuration BaseConfig; + { + std::string ConfigJson = fmt::format( + R"({{"type":"s3","settings":{{"uri":"s3://zen-hydration-test/team/project","endpoint":"{}","path-style":true}}}})", + Minio.Endpoint()); + std::string ParseError; + CbFieldIterator Root = LoadCompactBinaryFromJson(ConfigJson, ParseError); + ZEN_ASSERT(ParseError.empty() && Root.IsObject()); + BaseConfig.Options = std::move(Root).AsObject(); + } + auto Hydration = InitHydration(BaseConfig); + + HydrationConfig Config{.ServerStateDir = ServerStateDir, .TempDir = HydrationTemp, .ModuleId = "s3test_prefix"}; + + Hydration->CreateHydrator(Config)->Dehydrate(CbObject()); + + CleanDirectory(ServerStateDir, true); + + Hydration->CreateHydrator(Config)->Hydrate(); + + VerifyTree(ServerStateDir, TestFiles); + } + + // Region override: 'region' in Options["settings"] takes precedence over AWS_DEFAULT_REGION. + // AWS_DEFAULT_REGION is set to a bogus value; hydration must succeed using the region from Options. + { + CleanDirectory(ServerStateDir, true); + auto TestFiles = CreateSmallTestTree(ServerStateDir); + + ScopedEnvVar EnvRegion("AWS_DEFAULT_REGION", "wrong-region"); + + HydrationBase::Configuration BaseConfig; + { + std::string ConfigJson = fmt::format( + R"({{"type":"s3","settings":{{"uri":"s3://zen-hydration-test","endpoint":"{}","path-style":true,"region":"us-east-1"}}}})", + Minio.Endpoint()); + std::string ParseError; + CbFieldIterator Root = LoadCompactBinaryFromJson(ConfigJson, ParseError); + ZEN_ASSERT(ParseError.empty() && Root.IsObject()); + BaseConfig.Options = std::move(Root).AsObject(); + } + auto Hydration = InitHydration(BaseConfig); + + HydrationConfig Config{.ServerStateDir = ServerStateDir, .TempDir = HydrationTemp, .ModuleId = "s3test_region_override"}; + + Hydration->CreateHydrator(Config)->Dehydrate(CbObject()); + + CleanDirectory(ServerStateDir, true); + + Hydration->CreateHydrator(Config)->Hydrate(); + + VerifyTree(ServerStateDir, TestFiles); + } +} + +TEST_CASE("hydration.s3.dehydrate_hydrate.performance" * doctest::skip()) +{ + MinioProcessOptions MinioOpts; + MinioOpts.Port = 19010; + MinioProcess Minio(MinioOpts); + Minio.SpawnMinioServer(); + Minio.CreateBucket("zen-hydration-test"); + + ScopedEnvVar EnvAccessKey("AWS_ACCESS_KEY_ID", Minio.RootUser()); + ScopedEnvVar EnvSecretKey("AWS_SECRET_ACCESS_KEY", Minio.RootPassword()); + + ScopedTemporaryDirectory TempDir; + + std::filesystem::path ServerStateDir = TempDir.Path() / "server_state"; + std::filesystem::path HydrationTemp = TempDir.Path() / "hydration_temp"; + CreateDirectories(ServerStateDir); + CreateDirectories(HydrationTemp); + + const std::string ModuleId = "s3test_performance"; + CopyTree("E:\\Dev\\hub\\brainrot\\20260402-225355-508", ServerStateDir, {.EnableClone = true}); + // auto TestFiles = CreateTestTree(ServerStateDir); + + TestThreading Threading(4); + + HydrationBase::Configuration BaseConfig; { std::string ConfigJson = - fmt::format(R"({{"type":"s3","settings":{{"uri":"s3://zen-hydration-test/team/project","endpoint":"{}","path-style":true}}}})", + fmt::format(R"({{"type":"s3","settings":{{"uri":"s3://zen-hydration-test","endpoint":"{}","path-style":true}}}})", Minio.Endpoint()); std::string ParseError; CbFieldIterator Root = LoadCompactBinaryFromJson(ConfigJson, ParseError); ZEN_ASSERT(ParseError.empty() && Root.IsObject()); - Config.Options = std::move(Root).AsObject(); + BaseConfig.Options = std::move(Root).AsObject(); } + auto Hydration = InitHydration(BaseConfig); - { - std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); - Hydrator->Dehydrate(); - } + HydrationConfig Config{.ServerStateDir = ServerStateDir, + .TempDir = HydrationTemp, + .ModuleId = ModuleId, + .Threading = Threading.Options}; - CleanDirectory(ServerStateDir, true); + // Dehydrate: upload server state to MinIO + ZEN_INFO("============== DEHYDRATE =============="); + Hydration->CreateHydrator(Config)->Dehydrate(CbObject()); + for (size_t I = 0; I < 1; I++) { - std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); - Hydrator->Hydrate(); + // Wipe server state + CleanDirectory(ServerStateDir, true); + CHECK(std::filesystem::is_empty(ServerStateDir)); + + // Hydrate: download from MinIO back to server state + ZEN_INFO("=============== HYDRATE ==============="); + Hydration->CreateHydrator(Config)->Hydrate(); } +} + +//#define REAL_DATA_PATH "E:\\Dev\\hub\\zenddc\\Zen" +//#define REAL_DATA_PATH "E:\\Dev\\hub\\brainrot\\20260402-225355-508" + +TEST_CASE("hydration.file.incremental") +{ + std::filesystem::path TmpPath; +# ifdef REAL_DATA_PATH + TmpPath = std::filesystem::path(REAL_DATA_PATH).parent_path() / "hub"; +# endif + ScopedTemporaryDirectory TempDir(TmpPath); + + std::filesystem::path ServerStateDir = TempDir.Path() / "server_state"; + std::filesystem::path HydrationStore = TempDir.Path() / "hydration_store"; + std::filesystem::path HydrationTemp = TempDir.Path() / "hydration_temp"; + CreateDirectories(ServerStateDir); + CreateDirectories(HydrationStore); + CreateDirectories(HydrationTemp); + + const std::string ModuleId = "testmodule"; + // auto TestFiles = CreateTestTree(ServerStateDir); + + TestThreading Threading(4); + + auto Hydration = InitHydration({.TargetSpecification = "file://" + HydrationStore.string()}); + + HydrationConfig Config{.ServerStateDir = ServerStateDir, + .TempDir = HydrationTemp, + .ModuleId = ModuleId, + .Threading = Threading.Options}; + + // Hydrate with no prior state + CbObject HydrationState = Hydration->CreateHydrator(Config)->Hydrate(); + CHECK_FALSE(HydrationState); + +# ifdef REAL_DATA_PATH + ZEN_INFO("Writing state data..."); + CopyTree(REAL_DATA_PATH, ServerStateDir, {.EnableClone = true}); + ZEN_INFO("Writing state data complete"); +# else + // Create test files and dehydrate + auto TestFiles = CreateTestTree(ServerStateDir); +# endif + Hydration->CreateHydrator(Config)->Dehydrate(HydrationState); + CHECK(std::filesystem::is_empty(ServerStateDir)); + + // Hydrate: restore from file store + HydrationState = Hydration->CreateHydrator(Config)->Hydrate(); +# ifndef REAL_DATA_PATH + VerifyTree(ServerStateDir, TestFiles); +# endif + // Dehydrate again with cached state (should skip re-uploading unchanged files) + Hydration->CreateHydrator(Config)->Dehydrate(HydrationState); + CHECK(std::filesystem::is_empty(ServerStateDir)); + + // Hydrate one more time to confirm second dehydrate produced valid state + HydrationState = Hydration->CreateHydrator(Config)->Hydrate(); + // Replace files and dehydrate + TestFiles = CreateTestTree(ServerStateDir); + Hydration->CreateHydrator(Config)->Dehydrate(HydrationState); + + // Hydrate one more time to confirm second dehydrate produced valid state + HydrationState = Hydration->CreateHydrator(Config)->Hydrate(); +# ifndef REAL_DATA_PATH VerifyTree(ServerStateDir, TestFiles); +# endif // 0 + + // Dehydrate, nothing touched - no hashing, no upload + Hydration->CreateHydrator(Config)->Dehydrate(HydrationState); } -TEST_CASE("hydration.s3.options_region_override") -{ - // Verify that 'region' in Options["settings"] takes precedence over AWS_DEFAULT_REGION env var. - // AWS_DEFAULT_REGION is set to a bogus value; hydration must succeed using the region from Options. +// --------------------------------------------------------------------------- +// S3Storage test +// --------------------------------------------------------------------------- +TEST_CASE("hydration.s3.incremental") +{ MinioProcessOptions MinioOpts; - MinioOpts.Port = 19016; + MinioOpts.Port = 19017; MinioProcess Minio(MinioOpts); Minio.SpawnMinioServer(); Minio.CreateBucket("zen-hydration-test"); ScopedEnvVar EnvAccessKey("AWS_ACCESS_KEY_ID", Minio.RootUser()); ScopedEnvVar EnvSecretKey("AWS_SECRET_ACCESS_KEY", Minio.RootPassword()); - ScopedEnvVar EnvRegion("AWS_DEFAULT_REGION", "wrong-region"); - ScopedTemporaryDirectory TempDir; + std::filesystem::path TmpPath; +# ifdef REAL_DATA_PATH + TmpPath = std::filesystem::path(REAL_DATA_PATH).parent_path() / "hub"; +# endif + ScopedTemporaryDirectory TempDir(TmpPath); std::filesystem::path ServerStateDir = TempDir.Path() / "server_state"; std::filesystem::path HydrationTemp = TempDir.Path() / "hydration_temp"; CreateDirectories(ServerStateDir); CreateDirectories(HydrationTemp); - auto TestFiles = CreateTestTree(ServerStateDir); + const std::string ModuleId = "s3test_incremental"; + + TestThreading Threading(8); - HydrationConfig Config; - Config.ServerStateDir = ServerStateDir; - Config.TempDir = HydrationTemp; - Config.ModuleId = "s3test_region_override"; + HydrationBase::Configuration BaseConfig; { - std::string ConfigJson = fmt::format( - R"({{"type":"s3","settings":{{"uri":"s3://zen-hydration-test","endpoint":"{}","path-style":true,"region":"us-east-1"}}}})", - Minio.Endpoint()); + std::string ConfigJson = + fmt::format(R"({{"type":"s3","settings":{{"uri":"s3://zen-hydration-test","endpoint":"{}","path-style":true}}}})", + Minio.Endpoint()); std::string ParseError; CbFieldIterator Root = LoadCompactBinaryFromJson(ConfigJson, ParseError); ZEN_ASSERT(ParseError.empty() && Root.IsObject()); - Config.Options = std::move(Root).AsObject(); + BaseConfig.Options = std::move(Root).AsObject(); } + auto Hydration = InitHydration(BaseConfig); - { - std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); - Hydrator->Dehydrate(); - } + HydrationConfig Config{.ServerStateDir = ServerStateDir, + .TempDir = HydrationTemp, + .ModuleId = ModuleId, + .Threading = Threading.Options}; - CleanDirectory(ServerStateDir, true); + // Hydrate with no prior state + CbObject HydrationState = Hydration->CreateHydrator(Config)->Hydrate(); + CHECK_FALSE(HydrationState); + +# ifdef REAL_DATA_PATH + ZEN_INFO("Writing state data..."); + CopyTree(REAL_DATA_PATH, ServerStateDir, {.EnableClone = true}); + ZEN_INFO("Writing state data complete"); +# else + // Create test files and dehydrate + auto TestFiles = CreateTestTree(ServerStateDir); +# endif + Hydration->CreateHydrator(Config)->Dehydrate(HydrationState); + CHECK(std::filesystem::is_empty(ServerStateDir)); + + // Hydrate: restore from S3 + HydrationState = Hydration->CreateHydrator(Config)->Hydrate(); +# ifndef REAL_DATA_PATH + VerifyTree(ServerStateDir, TestFiles); +# endif + // Dehydrate again with cached state (should skip re-uploading unchanged files) + Hydration->CreateHydrator(Config)->Dehydrate(HydrationState); + CHECK(std::filesystem::is_empty(ServerStateDir)); + + // Hydrate one more time to confirm second dehydrate produced valid state + HydrationState = Hydration->CreateHydrator(Config)->Hydrate(); + // Replace files and dehydrate + TestFiles = CreateTestTree(ServerStateDir); + Hydration->CreateHydrator(Config)->Dehydrate(HydrationState); + + // Hydrate one more time to confirm second dehydrate produced valid state + HydrationState = Hydration->CreateHydrator(Config)->Hydrate(); + +# ifndef REAL_DATA_PATH + VerifyTree(ServerStateDir, TestFiles); +# endif // 0 + + // Dehydrate, nothing touched - no hashing, no upload + Hydration->CreateHydrator(Config)->Dehydrate(HydrationState); +} + +TEST_CASE("hydration.create_hydrator_rejects_invalid_config") +{ + // Unknown TargetSpecification prefix + CHECK_THROWS(InitHydration({.TargetSpecification = "ftp://somewhere"})); + + // Unknown Options type { - std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); - Hydrator->Hydrate(); + std::string ParseError; + CbFieldIterator Root = LoadCompactBinaryFromJson(R"({"type":"dynamodb"})", ParseError); + ZEN_ASSERT(ParseError.empty() && Root.IsObject()); + CHECK_THROWS(InitHydration({.Options = std::move(Root).AsObject()})); } - VerifyTree(ServerStateDir, TestFiles); + // Empty Options (no type field) + CHECK_THROWS(InitHydration({})); } TEST_SUITE_END(); diff --git a/src/zenserver/hub/hydration.h b/src/zenserver/hub/hydration.h index 19a96c248..0455dda91 100644 --- a/src/zenserver/hub/hydration.h +++ b/src/zenserver/hub/hydration.h @@ -4,10 +4,16 @@ #include <zencore/compactbinary.h> +#include <atomic> #include <filesystem> +#include <memory> +#include <optional> +#include <string> namespace zen { +class WorkerThreadPool; + struct HydrationConfig { // Location of server state to hydrate/dehydrate @@ -16,10 +22,16 @@ struct HydrationConfig std::filesystem::path TempDir; // Module ID of the server state being hydrated/dehydrated std::string ModuleId; - // Back-end specific target specification (e.g. S3 bucket, file path, etc) - std::string TargetSpecification; - // Full config object when using --hub-hydration-target-config (mutually exclusive with TargetSpecification) - CbObject Options; + + struct ThreadingOptions + { + WorkerThreadPool* WorkerPool = nullptr; + std::atomic<bool>* AbortFlag = nullptr; + std::atomic<bool>* PauseFlag = nullptr; + }; + + // External threading for parallel I/O and hashing. If not set, work runs inline on the caller's thread. + std::optional<ThreadingOptions> Threading; }; /** @@ -28,18 +40,53 @@ struct HydrationConfig * An instance of this interface is used to perform hydration OR * dehydration of server state. It's expected to be used only once * and not reused. - * */ struct HydrationStrategyBase { virtual ~HydrationStrategyBase() = default; - virtual void Dehydrate() = 0; - virtual void Hydrate() = 0; - virtual void Configure(const HydrationConfig& Config) = 0; + // Upload server state to the configured target. ServerStateDir is wiped on success. + // On failure, ServerStateDir is left intact. + virtual void Dehydrate(const CbObject& CachedState) = 0; + + // Download state from the configured target into ServerStateDir. Returns cached state for the next Dehydrate. + // On failure, ServerStateDir is wiped and an empty CbObject is returned. + virtual CbObject Hydrate() = 0; + + // Delete all stored data for this module from the configured backend, then clean ServerStateDir and TempDir. + virtual void Obliterate() = 0; +}; + +/** + * @brief Hub-wide hydration backend + * + * Constructed once per hub via InitHydration. Holds the shared connection / client / + * credentials state for the configured backend (e.g. a single S3 client and IMDS + * credential provider shared by all modules). CreateHydrator produces a ready-to-use + * per-module HydrationStrategyBase that references the shared state - no per-module + * backend setup cost. + */ +class HydrationBase +{ +public: + struct Configuration + { + // Back-end specific target specification (e.g. "s3://bucket/prefix", "file:///path") + std::string TargetSpecification; + // Full config object (mutually exclusive with TargetSpecification) + CbObject Options; + }; + + virtual ~HydrationBase() = default; + + // Create a configured per-module hydrator, ready to call Hydrate/Dehydrate/Obliterate. + virtual std::unique_ptr<HydrationStrategyBase> CreateHydrator(const HydrationConfig& Config) = 0; }; -std::unique_ptr<HydrationStrategyBase> CreateHydrator(const HydrationConfig& Config); +// Factory: parses Config and returns the concrete backend (FileHydration or S3Hydration). +// Throws zen::runtime_error if the config cannot be resolved to a known backend or if +// backend-specific validation fails. +std::unique_ptr<HydrationBase> InitHydration(const HydrationBase::Configuration& Config); #if ZEN_WITH_TESTS void hydration_forcelink(); diff --git a/src/zenserver/hub/storageserverinstance.cpp b/src/zenserver/hub/storageserverinstance.cpp index 0c9354990..9d477fb10 100644 --- a/src/zenserver/hub/storageserverinstance.cpp +++ b/src/zenserver/hub/storageserverinstance.cpp @@ -8,16 +8,20 @@ #include <zencore/filesystem.h> #include <zencore/fmtutils.h> #include <zencore/logging.h> +#include <zencore/string.h> +#include <zencore/timer.h> namespace zen { -StorageServerInstance::StorageServerInstance(ZenServerEnvironment& RunEnvironment, const Configuration& Config, std::string_view ModuleId) -: m_Config(Config) +StorageServerInstance::StorageServerInstance(ZenServerEnvironment& RunEnvironment, + HydrationBase& Hydration, + const Configuration& Config, + std::string_view ModuleId) +: m_Hydration(Hydration) +, m_Config(Config) , m_ModuleId(ModuleId) , m_ServerInstance(RunEnvironment, ZenServerInstance::ServerMode::kStorageServer) { - m_BaseDir = RunEnvironment.CreateChildDir(ModuleId); - m_TempDir = Config.HydrationTempPath / ModuleId; } StorageServerInstance::~StorageServerInstance() @@ -27,11 +31,13 @@ StorageServerInstance::~StorageServerInstance() void StorageServerInstance::SpawnServerProcess() { + Stopwatch SpawnTimer; + ZEN_ASSERT_FORMAT(!m_ServerInstance.IsRunning(), "Storage server instance for module '{}' is already running", m_ModuleId); m_ServerInstance.ResetDeadProcess(); m_ServerInstance.SetServerExecutablePath(GetRunningExecutablePath()); - m_ServerInstance.SetDataDir(m_BaseDir); + m_ServerInstance.SetDataDir(m_Config.StateDir); #if ZEN_PLATFORM_WINDOWS m_ServerInstance.SetJobObject(m_JobObject); #endif @@ -50,13 +56,61 @@ StorageServerInstance::SpawnServerProcess() { AdditionalOptions << " --config=\"" << MakeSafeAbsolutePath(m_Config.ConfigPath).string() << "\""; } + if (!m_Config.Malloc.empty()) + { + AdditionalOptions << " --malloc=" << m_Config.Malloc; + } + if (!m_Config.Trace.empty()) + { + AdditionalOptions << " --trace=" << m_Config.Trace; + } + if (!m_Config.TraceHost.empty()) + { + AdditionalOptions << " --tracehost=" << m_Config.TraceHost; + } + if (!m_Config.TraceFile.empty()) + { + constexpr std::string_view ModuleIdPattern = "{moduleid}"; + constexpr std::string_view PortPattern = "{port}"; + + std::string ResolvedTraceFile = m_Config.TraceFile; + for (size_t Pos = ResolvedTraceFile.find(ModuleIdPattern); Pos != std::string::npos; + Pos = ResolvedTraceFile.find(ModuleIdPattern, Pos)) + { + ResolvedTraceFile.replace(Pos, ModuleIdPattern.length(), m_ModuleId); + } + std::string PortStr = fmt::format("{}", m_Config.BasePort); + for (size_t Pos = ResolvedTraceFile.find(PortPattern); Pos != std::string::npos; Pos = ResolvedTraceFile.find(PortPattern, Pos)) + { + ResolvedTraceFile.replace(Pos, PortPattern.length(), PortStr); + } + AdditionalOptions << " --tracefile=\"" << ResolvedTraceFile << "\""; + } m_ServerInstance.SpawnServerAndWaitUntilReady(m_Config.BasePort, AdditionalOptions.ToView()); - ZEN_DEBUG("Storage server instance for module '{}' started, listening on port {}", m_ModuleId, m_Config.BasePort); + ZEN_INFO("Storage server instance for module '{}' started, listening on port {}, spawn took {}", + m_ModuleId, + m_Config.BasePort, + NiceLatencyNs(SpawnTimer.GetElapsedTimeUs() * 1000)); m_ServerInstance.EnableShutdownOnDestroy(); } +void +StorageServerInstance::ShutdownServerProcess() +{ + if (!m_ServerInstance.IsRunning()) + { + return; + } + Stopwatch ShutdownTimer; + // m_ServerInstance.Shutdown() never throws. + m_ServerInstance.Shutdown(); + ZEN_INFO("Storage server instance for module '{}' shut down, took {}", + m_ModuleId, + NiceLatencyNs(ShutdownTimer.GetElapsedTimeUs() * 1000)); +} + ProcessMetrics StorageServerInstance::GetProcessMetrics() const { @@ -77,7 +131,7 @@ StorageServerInstance::ProvisionLocked() return; } - ZEN_INFO("Provisioning storage server instance for module '{}', at '{}'", m_ModuleId, m_BaseDir); + ZEN_INFO("Provisioning storage server instance for module '{}', at '{}'", m_ModuleId, m_Config.StateDir); try { Hydrate(); @@ -87,7 +141,7 @@ StorageServerInstance::ProvisionLocked() { ZEN_WARN("Failed spawning server instance for module '{}', at '{}' during provisioning. Reason: {}", m_ModuleId, - m_BaseDir, + m_Config.StateDir, Ex.what()); throw; } @@ -96,11 +150,7 @@ StorageServerInstance::ProvisionLocked() void StorageServerInstance::DeprovisionLocked() { - if (m_ServerInstance.IsRunning()) - { - // m_ServerInstance.Shutdown() never throws. - m_ServerInstance.Shutdown(); - } + ShutdownServerProcess(); // Crashed or Hibernated: process already dead; skip Shutdown. // Dehydrate preserves instance state for future re-provisioning. Failure means saved state @@ -117,17 +167,22 @@ StorageServerInstance::DeprovisionLocked() } void -StorageServerInstance::HibernateLocked() +StorageServerInstance::ObliterateLocked() { - // Signal server to shut down, but keep data around for later wake + ShutdownServerProcess(); - if (!m_ServerInstance.IsRunning()) - { - return; - } + std::atomic<bool> AbortFlag{false}; + std::atomic<bool> PauseFlag{false}; + HydrationConfig Config = MakeHydrationConfig(AbortFlag, PauseFlag); + std::unique_ptr<HydrationStrategyBase> Hydrator = m_Hydration.CreateHydrator(Config); + Hydrator->Obliterate(); +} - // m_ServerInstance.Shutdown() never throws. - m_ServerInstance.Shutdown(); +void +StorageServerInstance::HibernateLocked() +{ + // Signal server to shut down, but keep data around for later wake + ShutdownServerProcess(); } void @@ -146,7 +201,10 @@ StorageServerInstance::WakeLocked() } catch (const std::exception& Ex) { - ZEN_WARN("Failed spawning server instance for module '{}', at '{}' during waking. Reason: {}", m_ModuleId, m_BaseDir, Ex.what()); + ZEN_WARN("Failed spawning server instance for module '{}', at '{}' during waking. Reason: {}", + m_ModuleId, + m_Config.StateDir, + Ex.what()); throw; } } @@ -154,29 +212,34 @@ StorageServerInstance::WakeLocked() void StorageServerInstance::Hydrate() { - HydrationConfig Config{.ServerStateDir = m_BaseDir, - .TempDir = m_TempDir, - .ModuleId = m_ModuleId, - .TargetSpecification = m_Config.HydrationTargetSpecification, - .Options = m_Config.HydrationOptions}; - - std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); - - Hydrator->Hydrate(); + std::atomic<bool> AbortFlag{false}; + std::atomic<bool> PauseFlag{false}; + HydrationConfig Config = MakeHydrationConfig(AbortFlag, PauseFlag); + std::unique_ptr<HydrationStrategyBase> Hydrator = m_Hydration.CreateHydrator(Config); + m_HydrationState = Hydrator->Hydrate(); } void StorageServerInstance::Dehydrate() { - HydrationConfig Config{.ServerStateDir = m_BaseDir, - .TempDir = m_TempDir, - .ModuleId = m_ModuleId, - .TargetSpecification = m_Config.HydrationTargetSpecification, - .Options = m_Config.HydrationOptions}; + std::atomic<bool> AbortFlag{false}; + std::atomic<bool> PauseFlag{false}; + HydrationConfig Config = MakeHydrationConfig(AbortFlag, PauseFlag); + std::unique_ptr<HydrationStrategyBase> Hydrator = m_Hydration.CreateHydrator(Config); + Hydrator->Dehydrate(m_HydrationState); +} - std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); +HydrationConfig +StorageServerInstance::MakeHydrationConfig(std::atomic<bool>& AbortFlag, std::atomic<bool>& PauseFlag) +{ + HydrationConfig Config{.ServerStateDir = m_Config.StateDir, .TempDir = m_Config.TempDir, .ModuleId = m_ModuleId}; + if (m_Config.OptionalWorkerPool) + { + Config.Threading.emplace( + HydrationConfig::ThreadingOptions{.WorkerPool = m_Config.OptionalWorkerPool, .AbortFlag = &AbortFlag, .PauseFlag = &PauseFlag}); + } - Hydrator->Dehydrate(); + return Config; } StorageServerInstance::SharedLockedPtr::SharedLockedPtr() : m_Lock(nullptr), m_Instance(nullptr) @@ -345,6 +408,13 @@ StorageServerInstance::ExclusiveLockedPtr::Deprovision() } void +StorageServerInstance::ExclusiveLockedPtr::Obliterate() +{ + ZEN_ASSERT(m_Instance != nullptr); + m_Instance->ObliterateLocked(); +} + +void StorageServerInstance::ExclusiveLockedPtr::Hibernate() { ZEN_ASSERT(m_Instance != nullptr); diff --git a/src/zenserver/hub/storageserverinstance.h b/src/zenserver/hub/storageserverinstance.h index 1b0078d87..21ac1ada3 100644 --- a/src/zenserver/hub/storageserverinstance.h +++ b/src/zenserver/hub/storageserverinstance.h @@ -2,6 +2,8 @@ #pragma once +#include "hydration.h" + #include <zencore/compactbinary.h> #include <zenutil/zenserverprocess.h> @@ -10,6 +12,8 @@ namespace zen { +class WorkerThreadPool; + /** * Storage Server Instance * @@ -23,15 +27,23 @@ public: struct Configuration { uint16_t BasePort; - std::filesystem::path HydrationTempPath; - std::string HydrationTargetSpecification; - CbObject HydrationOptions; + std::filesystem::path StateDir; + std::filesystem::path TempDir; uint32_t HttpThreadCount = 0; // Automatic int CoreLimit = 0; // Automatic std::filesystem::path ConfigPath; + std::string Malloc; + std::string Trace; + std::string TraceHost; + std::string TraceFile; + + WorkerThreadPool* OptionalWorkerPool = nullptr; }; - StorageServerInstance(ZenServerEnvironment& RunEnvironment, const Configuration& Config, std::string_view ModuleId); + StorageServerInstance(ZenServerEnvironment& RunEnvironment, + HydrationBase& Hydration, + const Configuration& Config, + std::string_view ModuleId); ~StorageServerInstance(); inline std::string_view GetModuleId() const { return m_ModuleId; } @@ -109,6 +121,7 @@ public: void Provision(); void Deprovision(); + void Obliterate(); void Hibernate(); void Wake(); @@ -122,27 +135,29 @@ public: private: void ProvisionLocked(); void DeprovisionLocked(); + void ObliterateLocked(); void HibernateLocked(); void WakeLocked(); mutable RwLock m_Lock; + HydrationBase& m_Hydration; const Configuration m_Config; std::string m_ModuleId; ZenServerInstance m_ServerInstance; - std::filesystem::path m_BaseDir; - - std::filesystem::path m_TempDir; + CbObject m_HydrationState; #if ZEN_PLATFORM_WINDOWS JobObject* m_JobObject = nullptr; #endif void SpawnServerProcess(); + void ShutdownServerProcess(); - void Hydrate(); - void Dehydrate(); + void Hydrate(); + void Dehydrate(); + HydrationConfig MakeHydrationConfig(std::atomic<bool>& AbortFlag, std::atomic<bool>& PauseFlag); friend class SharedLockedPtr; friend class ExclusiveLockedPtr; diff --git a/src/zenserver/hub/zenhubserver.cpp b/src/zenserver/hub/zenhubserver.cpp index 499586abc..ebc2cf2f1 100644 --- a/src/zenserver/hub/zenhubserver.cpp +++ b/src/zenserver/hub/zenhubserver.cpp @@ -2,8 +2,10 @@ #include "zenhubserver.h" +#include "config/luaconfig.h" #include "frontend/frontend.h" #include "httphubservice.h" +#include "httpproxyhandler.h" #include "hub.h" #include <zencore/compactbinary.h> @@ -12,16 +14,17 @@ #include <zencore/except_fmt.h> #include <zencore/filesystem.h> #include <zencore/fmtutils.h> +#include <zencore/intmath.h> #include <zencore/memory/llm.h> #include <zencore/memory/memorytrace.h> #include <zencore/memory/tagtrace.h> #include <zencore/scopeguard.h> #include <zencore/sentryintegration.h> #include <zencore/system.h> +#include <zencore/thread.h> #include <zencore/windows.h> #include <zenhttp/httpapiservice.h> #include <zenutil/service.h> -#include <zenutil/workerpools.h> ZEN_THIRD_PARTY_INCLUDES_START #include <cxxopts.hpp> @@ -58,12 +61,19 @@ ZenHubServerConfigurator::AddCliOptions(cxxopts::Options& Options) Options.add_option("hub", "", "instance-id", - "Instance ID for use in notifications", + "Instance ID for use in notifications (deprecated, use --upstream-notification-instance-id)", cxxopts::value<std::string>(m_ServerOptions.InstanceId)->default_value(""), ""); Options.add_option("hub", "", + "upstream-notification-instance-id", + "Instance ID for use in notifications", + cxxopts::value<std::string>(m_ServerOptions.InstanceId), + ""); + + Options.add_option("hub", + "", "consul-endpoint", "Consul endpoint URL for service registration (empty = disabled)", cxxopts::value<std::string>(m_ServerOptions.ConsulEndpoint)->default_value(""), @@ -93,13 +103,27 @@ ZenHubServerConfigurator::AddCliOptions(cxxopts::Options& Options) Options.add_option("hub", "", + "consul-register-hub", + "Register the hub parent service with Consul (instance registration is unaffected)", + cxxopts::value<bool>(m_ServerOptions.ConsulRegisterHub)->default_value("true"), + ""); + + Options.add_option("hub", + "", "hub-base-port-number", - "Base port number for provisioned instances", + "Base port number for provisioned instances (deprecated, use --hub-instance-base-port-number)", cxxopts::value<uint16_t>(m_ServerOptions.HubBasePortNumber)->default_value("21000"), ""); Options.add_option("hub", "", + "hub-instance-base-port-number", + "Base port number for provisioned instances", + cxxopts::value<uint16_t>(m_ServerOptions.HubBasePortNumber), + ""); + + Options.add_option("hub", + "", "hub-instance-limit", "Maximum number of provisioned instances for this hub", cxxopts::value<int>(m_ServerOptions.HubInstanceLimit)->default_value("1000"), @@ -118,6 +142,34 @@ ZenHubServerConfigurator::AddCliOptions(cxxopts::Options& Options) Options.add_option("hub", "", + "hub-instance-malloc", + "Select memory allocator for provisioned instances (ansi|stomp|rpmalloc|mimalloc)", + cxxopts::value<std::string>(m_ServerOptions.HubInstanceMalloc)->default_value(""), + "<allocator>"); + + Options.add_option("hub", + "", + "hub-instance-trace", + "Trace channel specification for provisioned instances (e.g. default, cpu,log, memory)", + cxxopts::value<std::string>(m_ServerOptions.HubInstanceTrace)->default_value(""), + "<channels>"); + + Options.add_option("hub", + "", + "hub-instance-tracehost", + "Trace host for provisioned instances", + cxxopts::value<std::string>(m_ServerOptions.HubInstanceTraceHost)->default_value(""), + "<host>"); + + Options.add_option("hub", + "", + "hub-instance-tracefile", + "Trace file path for provisioned instances", + cxxopts::value<std::string>(m_ServerOptions.HubInstanceTraceFile)->default_value(""), + "<path>"); + + Options.add_option("hub", + "", "hub-instance-http-threads", "Number of http server connection threads for provisioned instances", cxxopts::value<unsigned int>(m_ServerOptions.HubInstanceHttpThreadCount), @@ -136,6 +188,16 @@ ZenHubServerConfigurator::AddCliOptions(cxxopts::Options& Options) cxxopts::value(m_ServerOptions.HubInstanceConfigPath), "<instance config>"); + const uint32_t DefaultHubInstanceProvisionThreadCount = Max(GetHardwareConcurrency() / 4u, 2u); + + Options.add_option("hub", + "", + "hub-instance-provision-threads", + fmt::format("Number of threads for instance provisioning (default {})", DefaultHubInstanceProvisionThreadCount), + cxxopts::value<uint32_t>(m_ServerOptions.HubInstanceProvisionThreadCount) + ->default_value(fmt::format("{}", DefaultHubInstanceProvisionThreadCount)), + "<threads>"); + Options.add_option("hub", "", "hub-hydration-target-spec", @@ -152,6 +214,16 @@ ZenHubServerConfigurator::AddCliOptions(cxxopts::Options& Options) cxxopts::value(m_ServerOptions.HydrationTargetConfigPath), "<path>"); + const uint32_t DefaultHubHydrationThreadCount = Max(GetHardwareConcurrency() / 4u, 2u); + + Options.add_option( + "hub", + "", + "hub-hydration-threads", + fmt::format("Number of threads for hydration/dehydration (default {})", DefaultHubHydrationThreadCount), + cxxopts::value<uint32_t>(m_ServerOptions.HubHydrationThreadCount)->default_value(fmt::format("{}", DefaultHubHydrationThreadCount)), + "<threads>"); + #if ZEN_PLATFORM_WINDOWS Options.add_option("hub", "", @@ -249,7 +321,79 @@ ZenHubServerConfigurator::AddCliOptions(cxxopts::Options& Options) void ZenHubServerConfigurator::AddConfigOptions(LuaConfig::Options& Options) { - ZEN_UNUSED(Options); + using namespace std::literals; + + Options.AddOption("hub.upstreamnotification.endpoint"sv, + m_ServerOptions.UpstreamNotificationEndpoint, + "upstream-notification-endpoint"sv); + Options.AddOption("hub.upstreamnotification.instanceid"sv, m_ServerOptions.InstanceId, "upstream-notification-instance-id"sv); + + Options.AddOption("hub.consul.endpoint"sv, m_ServerOptions.ConsulEndpoint, "consul-endpoint"sv); + Options.AddOption("hub.consul.tokenenv"sv, m_ServerOptions.ConsulTokenEnv, "consul-token-env"sv); + Options.AddOption("hub.consul.healthintervalseconds"sv, + m_ServerOptions.ConsulHealthIntervalSeconds, + "consul-health-interval-seconds"sv); + Options.AddOption("hub.consul.deregisterafterseconds"sv, + m_ServerOptions.ConsulDeregisterAfterSeconds, + "consul-deregister-after-seconds"sv); + Options.AddOption("hub.consul.registerhub"sv, m_ServerOptions.ConsulRegisterHub, "consul-register-hub"sv); + + Options.AddOption("hub.instance.baseportnumber"sv, m_ServerOptions.HubBasePortNumber, "hub-instance-base-port-number"sv); + Options.AddOption("hub.instance.http"sv, m_ServerOptions.HubInstanceHttpClass, "hub-instance-http"sv); + Options.AddOption("hub.instance.malloc"sv, m_ServerOptions.HubInstanceMalloc, "hub-instance-malloc"sv); + Options.AddOption("hub.instance.trace"sv, m_ServerOptions.HubInstanceTrace, "hub-instance-trace"sv); + Options.AddOption("hub.instance.tracehost"sv, m_ServerOptions.HubInstanceTraceHost, "hub-instance-tracehost"sv); + Options.AddOption("hub.instance.tracefile"sv, m_ServerOptions.HubInstanceTraceFile, "hub-instance-tracefile"sv); + Options.AddOption("hub.instance.httpthreads"sv, m_ServerOptions.HubInstanceHttpThreadCount, "hub-instance-http-threads"sv); + Options.AddOption("hub.instance.corelimit"sv, m_ServerOptions.HubInstanceCoreLimit, "hub-instance-corelimit"sv); + Options.AddOption("hub.instance.config"sv, m_ServerOptions.HubInstanceConfigPath, "hub-instance-config"sv); + Options.AddOption("hub.instance.limits.count"sv, m_ServerOptions.HubInstanceLimit, "hub-instance-limit"sv); + Options.AddOption("hub.instance.limits.disklimitbytes"sv, + m_ServerOptions.HubProvisionDiskLimitBytes, + "hub-provision-disk-limit-bytes"sv); + Options.AddOption("hub.instance.limits.disklimitpercent"sv, + m_ServerOptions.HubProvisionDiskLimitPercent, + "hub-provision-disk-limit-percent"sv); + Options.AddOption("hub.instance.limits.memorylimitbytes"sv, + m_ServerOptions.HubProvisionMemoryLimitBytes, + "hub-provision-memory-limit-bytes"sv); + Options.AddOption("hub.instance.limits.memorylimitpercent"sv, + m_ServerOptions.HubProvisionMemoryLimitPercent, + "hub-provision-memory-limit-percent"sv); + Options.AddOption("hub.instance.provisionthreads"sv, + m_ServerOptions.HubInstanceProvisionThreadCount, + "hub-instance-provision-threads"sv); + + Options.AddOption("hub.hydration.targetspec"sv, m_ServerOptions.HydrationTargetSpecification, "hub-hydration-target-spec"sv); + Options.AddOption("hub.hydration.targetconfig"sv, m_ServerOptions.HydrationTargetConfigPath, "hub-hydration-target-config"sv); + Options.AddOption("hub.hydration.threads"sv, m_ServerOptions.HubHydrationThreadCount, "hub-hydration-threads"sv); + + Options.AddOption("hub.watchdog.cycleintervalms"sv, m_ServerOptions.WatchdogConfig.CycleIntervalMs, "hub-watchdog-cycle-interval-ms"sv); + Options.AddOption("hub.watchdog.cycleprocessingbudgetms"sv, + m_ServerOptions.WatchdogConfig.CycleProcessingBudgetMs, + "hub-watchdog-cycle-processing-budget-ms"sv); + Options.AddOption("hub.watchdog.instancecheckthrottlems"sv, + m_ServerOptions.WatchdogConfig.InstanceCheckThrottleMs, + "hub-watchdog-instance-check-throttle-ms"sv); + Options.AddOption("hub.watchdog.provisionedinactivitytimeoutseconds"sv, + m_ServerOptions.WatchdogConfig.ProvisionedInactivityTimeoutSeconds, + "hub-watchdog-provisioned-inactivity-timeout-seconds"sv); + Options.AddOption("hub.watchdog.hibernatedinactivitytimeoutseconds"sv, + m_ServerOptions.WatchdogConfig.HibernatedInactivityTimeoutSeconds, + "hub-watchdog-hibernated-inactivity-timeout-seconds"sv); + Options.AddOption("hub.watchdog.inactivitycheckmarginseconds"sv, + m_ServerOptions.WatchdogConfig.InactivityCheckMarginSeconds, + "hub-watchdog-inactivity-check-margin-seconds"sv); + Options.AddOption("hub.watchdog.activitycheckconnecttimeoutms"sv, + m_ServerOptions.WatchdogConfig.ActivityCheckConnectTimeoutMs, + "hub-watchdog-activity-check-connect-timeout-ms"sv); + Options.AddOption("hub.watchdog.activitycheckrequesttimeoutms"sv, + m_ServerOptions.WatchdogConfig.ActivityCheckRequestTimeoutMs, + "hub-watchdog-activity-check-request-timeout-ms"sv); + +#if ZEN_PLATFORM_WINDOWS + Options.AddOption("hub.usejobobject"sv, m_ServerOptions.HubUseJobObject, "hub-use-job-object"sv); +#endif } void @@ -310,55 +454,71 @@ ZenHubServer::OnModuleStateChanged(std::string_view HubInstanceId, HubInstanceState NewState) { ZEN_UNUSED(PreviousState); - if (!m_ConsulClient) - { - return; - } - if (NewState == HubInstanceState::Provisioning || NewState == HubInstanceState::Provisioned) + if (NewState == HubInstanceState::Deprovisioning || NewState == HubInstanceState::Hibernating) { - consul::ServiceRegistrationInfo ServiceInfo{ - .ServiceId = std::string(ModuleId), - .ServiceName = "zen-storage", - .Port = Info.Port, - .HealthEndpoint = "health", - .Tags = std::vector<std::pair<std::string, std::string>>{std::make_pair("module", std::string(ModuleId)), - std::make_pair("zen-hub", std::string(HubInstanceId)), - std::make_pair("version", std::string(ZEN_CFG_VERSION))}, - .HealthIntervalSeconds = NewState == HubInstanceState::Provisioning - ? 0u - : m_ConsulHealthIntervalSeconds, // Disable health checks while not finished provisioning - .DeregisterAfterSeconds = NewState == HubInstanceState::Provisioning - ? 0u - : m_ConsulDeregisterAfterSeconds}; // Disable health checks while not finished provisioning - - if (!m_ConsulClient->RegisterService(ServiceInfo)) - { - ZEN_WARN("Failed to register storage server instance for module '{}' with Consul, continuing anyway", ModuleId); - } - else + if (Info.Port != 0) { - ZEN_INFO("Registered storage server instance for module '{}' at port {} with Consul as '{}'", - ModuleId, - Info.Port, - ServiceInfo.ServiceName); + m_Proxy->PrunePort(Info.Port); } } - else if (NewState == HubInstanceState::Unprovisioned) + + if (!m_ConsulClient) { - if (!m_ConsulClient->DeregisterService(ModuleId)) - { - ZEN_WARN("Failed to deregister storage server instance for module '{}' at port {} from Consul, continuing anyway", - ModuleId, - Info.Port); - } - else - { - ZEN_INFO("Deregistered storage server instance for module '{}' at port {} from Consul", ModuleId, Info.Port); - } + return; + } + + switch (NewState) + { + case HubInstanceState::Provisioning: + case HubInstanceState::Waking: + case HubInstanceState::Recovering: + case HubInstanceState::Provisioned: + { + const bool IsProvisioned = NewState == HubInstanceState::Provisioned; + + consul::ServiceRegistrationInfo ServiceInfo{ + .ServiceId = std::string(ModuleId), + .ServiceName = "zen-storage", + .Port = Info.Port, + .HealthEndpoint = "health", + .Tags = std::vector<std::pair<std::string, std::string>>{std::make_pair("module", std::string(ModuleId)), + std::make_pair("zen-hub", std::string(HubInstanceId)), + std::make_pair("version", std::string(ZEN_CFG_VERSION))}, + .HealthIntervalSeconds = IsProvisioned ? m_ConsulHealthIntervalSeconds : 0u, + .DeregisterAfterSeconds = IsProvisioned ? m_ConsulDeregisterAfterSeconds : 0u, + .InitialStatus = IsProvisioned ? "passing" : ""}; + + m_ConsulClient->RegisterService(ServiceInfo); + ZEN_INFO("Submitted Consul registration for storage server instance for module '{}' at port {} as '{}'", + ModuleId, + Info.Port, + ServiceInfo.ServiceName); + break; + } + case HubInstanceState::Deprovisioning: + case HubInstanceState::Hibernating: + case HubInstanceState::Obliterating: + case HubInstanceState::Crashed: + case HubInstanceState::Hibernated: + case HubInstanceState::Unprovisioned: + { + // A Consul registration is "live" while the module is in a register-state + // (Provisioning / Waking / Recovering / Provisioned). Deregister once when + // we leave a register-state into any non-register-state + const bool WasRegisteredState = + PreviousState == HubInstanceState::Provisioning || PreviousState == HubInstanceState::Waking || + PreviousState == HubInstanceState::Recovering || PreviousState == HubInstanceState::Provisioned; + if (WasRegisteredState) + { + m_ConsulClient->DeregisterService(ModuleId); + ZEN_INFO("Submitted Consul deregistration for storage server instance for module '{}' at port {}", ModuleId, Info.Port); + } + } + break; + default: + break; } - // Transitional states (Deprovisioning, Hibernating, Waking, Recovering, Crashed) - // and Hibernated are intentionally ignored. } int @@ -380,6 +540,10 @@ ZenHubServer::Initialize(const ZenHubServerConfig& ServerConfig, ZenServerState: // the main test range. ZenServerEnvironment::SetBaseChildId(1000); + m_ProvisionWorkerPool = + std::make_unique<WorkerThreadPool>(gsl::narrow<int>(ServerConfig.HubInstanceProvisionThreadCount), "hub_provision"); + m_HydrationWorkerPool = std::make_unique<WorkerThreadPool>(gsl::narrow<int>(ServerConfig.HubHydrationThreadCount), "hub_hydration"); + m_DebugOptionForcedCrash = ServerConfig.ShouldCrash; InitializeState(ServerConfig); @@ -405,12 +569,18 @@ ZenHubServer::Cleanup() m_IoRunner.join(); } - ShutdownServices(); if (m_Http) { m_Http->Close(); } + ShutdownServices(); + + if (m_Proxy) + { + m_Proxy->Shutdown(); + } + if (m_Hub) { m_Hub->Shutdown(); @@ -420,6 +590,7 @@ ZenHubServer::Cleanup() m_HubService.reset(); m_ApiService.reset(); m_Hub.reset(); + m_Proxy.reset(); m_ConsulRegistration.reset(); m_ConsulClient.reset(); @@ -484,6 +655,10 @@ ZenHubServer::InitializeServices(const ZenHubServerConfig& ServerConfig) .InstanceLimit = ServerConfig.HubInstanceLimit, .InstanceHttpThreadCount = ServerConfig.HubInstanceHttpThreadCount, .InstanceCoreLimit = ServerConfig.HubInstanceCoreLimit, + .InstanceMalloc = ServerConfig.HubInstanceMalloc, + .InstanceTrace = ServerConfig.HubInstanceTrace, + .InstanceTraceHost = ServerConfig.HubInstanceTraceHost, + .InstanceTraceFile = ServerConfig.HubInstanceTraceFile, .InstanceConfigPath = ServerConfig.HubInstanceConfigPath, .HydrationTargetSpecification = ServerConfig.HydrationTargetSpecification, .WatchDog = @@ -497,7 +672,9 @@ ZenHubServer::InitializeServices(const ZenHubServerConfig& ServerConfig) .ActivityCheckConnectTimeout = std::chrono::milliseconds(ServerConfig.WatchdogConfig.ActivityCheckConnectTimeoutMs), .ActivityCheckRequestTimeout = std::chrono::milliseconds(ServerConfig.WatchdogConfig.ActivityCheckRequestTimeoutMs), }, - .ResourceLimits = ResolveLimits(ServerConfig)}; + .ResourceLimits = ResolveLimits(ServerConfig), + .OptionalProvisionWorkerPool = m_ProvisionWorkerPool.get(), + .OptionalHydrationWorkerPool = m_HydrationWorkerPool.get()}; if (!ServerConfig.HydrationTargetConfigPath.empty()) { @@ -522,27 +699,29 @@ ZenHubServer::InitializeServices(const ZenHubServerConfig& ServerConfig) HubConfig.HydrationOptions = std::move(Root).AsObject(); } + m_Proxy = std::make_unique<HttpProxyHandler>(); + m_Hub = std::make_unique<Hub>( std::move(HubConfig), ZenServerEnvironment(ZenServerEnvironment::Hub, ServerConfig.DataDir / "hub", ServerConfig.DataDir / "servers", ServerConfig.HubInstanceHttpClass), - &GetMediumWorkerPool(EWorkloadType::Background), - m_ConsulClient ? Hub::AsyncModuleStateChangeCallbackFunc{[this, HubInstanceId = fmt::format("zen-hub-{}", ServerConfig.InstanceId)]( - std::string_view ModuleId, - const HubProvisionedInstanceInfo& Info, - HubInstanceState PreviousState, - HubInstanceState NewState) { - OnModuleStateChanged(HubInstanceId, ModuleId, Info, PreviousState, NewState); - }} - : Hub::AsyncModuleStateChangeCallbackFunc{}); + Hub::AsyncModuleStateChangeCallbackFunc{ + [this, HubInstanceId = fmt::format("zen-hub-{}", ServerConfig.InstanceId)](std::string_view ModuleId, + const HubProvisionedInstanceInfo& Info, + HubInstanceState PreviousState, + HubInstanceState NewState) { + OnModuleStateChanged(HubInstanceId, ModuleId, Info, PreviousState, NewState); + }}); + + m_Proxy->SetPortValidator([Hub = m_Hub.get()](uint16_t Port) { return Hub->IsInstancePort(Port); }); ZEN_INFO("instantiating API service"); m_ApiService = std::make_unique<zen::HttpApiService>(*m_Http); ZEN_INFO("instantiating hub service"); - m_HubService = std::make_unique<HttpHubService>(*m_Hub, m_StatsService, m_StatusService); + m_HubService = std::make_unique<HttpHubService>(*m_Hub, *m_Proxy, m_StatsService, m_StatusService); m_HubService->SetNotificationEndpoint(ServerConfig.UpstreamNotificationEndpoint, ServerConfig.InstanceId); m_FrontendService = std::make_unique<HttpFrontendService>(m_ContentRoot, m_StatsService, m_StatusService); @@ -592,21 +771,32 @@ ZenHubServer::InitializeConsulRegistration(const ZenHubServerConfig& ServerConfi } else { - ZEN_INFO("Consul token read from environment variable '{}'", ConsulAccessTokenEnvName); + ZEN_INFO("Consul token will be read from environment variable '{}'", ConsulAccessTokenEnvName); } try { - m_ConsulClient = std::make_unique<consul::ConsulClient>(ServerConfig.ConsulEndpoint, ConsulAccessToken); + m_ConsulClient = std::make_unique<consul::ConsulClient>(consul::ConsulClient::Configuration{ + .BaseUri = ServerConfig.ConsulEndpoint, + .TokenEnvName = ConsulAccessTokenEnvName, + }); m_ConsulHealthIntervalSeconds = ServerConfig.ConsulHealthIntervalSeconds; m_ConsulDeregisterAfterSeconds = ServerConfig.ConsulDeregisterAfterSeconds; + if (!ServerConfig.ConsulRegisterHub) + { + ZEN_INFO( + "Hub parent Consul registration skipped (consul-register-hub is false); " + "instance registration remains enabled"); + return; + } + consul::ServiceRegistrationInfo Info; Info.ServiceId = fmt::format("zen-hub-{}", ServerConfig.InstanceId); Info.ServiceName = "zen-hub"; // Info.Address = "localhost"; // Let the consul agent figure out out external address // TODO: Info.BaseUri? Info.Port = static_cast<uint16_t>(EffectivePort); - Info.HealthEndpoint = "hub/health"; + Info.HealthEndpoint = "health"; Info.Tags = std::vector<std::pair<std::string, std::string>>{ std::make_pair("zen-hub", Info.ServiceId), std::make_pair("version", std::string(ZEN_CFG_VERSION)), @@ -696,6 +886,8 @@ ZenHubServer::Run() OnReady(); + StartSelfSession("zenhub"); + m_Http->Run(IsInteractiveMode); SetNewState(kShuttingDown); diff --git a/src/zenserver/hub/zenhubserver.h b/src/zenserver/hub/zenhubserver.h index b976c52b3..5e465bb14 100644 --- a/src/zenserver/hub/zenhubserver.h +++ b/src/zenserver/hub/zenhubserver.h @@ -6,6 +6,7 @@ #include "resourcemetrics.h" #include "zenserver.h" +#include <zencore/workthreadpool.h> #include <zenutil/consul.h> namespace cxxopts { @@ -20,6 +21,7 @@ namespace zen { class HttpApiService; class HttpFrontendService; class HttpHubService; +class HttpProxyHandler; struct ZenHubWatchdogConfig { @@ -35,21 +37,28 @@ struct ZenHubWatchdogConfig struct ZenHubServerConfig : public ZenServerConfig { - std::string UpstreamNotificationEndpoint; - std::string InstanceId; // For use in notifications - std::string ConsulEndpoint; // If set, enables Consul service registration - std::string ConsulTokenEnv; // Environment variable name to read a Consul token from; defaults to CONSUL_HTTP_TOKEN if empty - uint32_t ConsulHealthIntervalSeconds = 10; // Interval in seconds between Consul health checks - uint32_t ConsulDeregisterAfterSeconds = 30; // Seconds before Consul deregisters an unhealthy service - uint16_t HubBasePortNumber = 21000; - int HubInstanceLimit = 1000; - bool HubUseJobObject = true; - std::string HubInstanceHttpClass = "asio"; - uint32_t HubInstanceHttpThreadCount = 0; // Automatic - int HubInstanceCoreLimit = 0; // Automatic - std::filesystem::path HubInstanceConfigPath; // Path to Lua config file - std::string HydrationTargetSpecification; // hydration/dehydration target specification - std::filesystem::path HydrationTargetConfigPath; // path to JSON config file (mutually exclusive with HydrationTargetSpecification) + std::string UpstreamNotificationEndpoint; + std::string InstanceId; // For use in notifications + std::string ConsulEndpoint; // If set, enables Consul service registration + std::string ConsulTokenEnv; // Environment variable name to read a Consul token from; defaults to CONSUL_HTTP_TOKEN if empty + uint32_t ConsulHealthIntervalSeconds = 10; // Interval in seconds between Consul health checks + uint32_t ConsulDeregisterAfterSeconds = 30; // Seconds before Consul deregisters an unhealthy service + bool ConsulRegisterHub = true; // Whether to register the hub parent service with Consul (instance registration unaffected) + uint16_t HubBasePortNumber = 21000; + int HubInstanceLimit = 1000; + bool HubUseJobObject = true; + std::string HubInstanceHttpClass = "asio"; + std::string HubInstanceMalloc; + std::string HubInstanceTrace; + std::string HubInstanceTraceHost; + std::string HubInstanceTraceFile; + uint32_t HubInstanceHttpThreadCount = 0; // Automatic + uint32_t HubInstanceProvisionThreadCount = 0; // Synchronous provisioning + uint32_t HubHydrationThreadCount = 0; // Synchronous hydration/dehydration + int HubInstanceCoreLimit = 0; // Automatic + std::filesystem::path HubInstanceConfigPath; // Path to Lua config file + std::string HydrationTargetSpecification; // hydration/dehydration target specification + std::filesystem::path HydrationTargetConfigPath; // path to JSON config file (mutually exclusive with HydrationTargetSpecification) ZenHubWatchdogConfig WatchdogConfig; uint64_t HubProvisionDiskLimitBytes = 0; uint32_t HubProvisionDiskLimitPercent = 0; @@ -121,7 +130,10 @@ private: std::filesystem::path m_ContentRoot; bool m_DebugOptionForcedCrash = false; - std::unique_ptr<Hub> m_Hub; + std::unique_ptr<HttpProxyHandler> m_Proxy; + std::unique_ptr<WorkerThreadPool> m_ProvisionWorkerPool; + std::unique_ptr<WorkerThreadPool> m_HydrationWorkerPool; + std::unique_ptr<Hub> m_Hub; std::unique_ptr<HttpHubService> m_HubService; std::unique_ptr<HttpApiService> m_ApiService; diff --git a/src/zenserver/main.cpp b/src/zenserver/main.cpp index 00b7a67d7..d74d3956c 100644 --- a/src/zenserver/main.cpp +++ b/src/zenserver/main.cpp @@ -14,13 +14,13 @@ #include <zencore/memory/memorytrace.h> #include <zencore/memory/newdelete.h> #include <zencore/scopeguard.h> -#include <zencore/sentryintegration.h> #include <zencore/session.h> #include <zencore/string.h> #include <zencore/thread.h> #include <zencore/trace.h> #include <zentelemetry/otlptrace.h> #include <zenutil/config/commandlineoptions.h> +#include <zenutil/invocationhistory.h> #include <zenutil/service.h> #include "diag/logging.h" @@ -169,7 +169,12 @@ AppMain(int argc, char* argv[]) if (IsDir(ServerOptions.DataDir)) { ZEN_CONSOLE_INFO("Deleting files from '{}' ({})", ServerOptions.DataDir, DeleteReason); - DeleteDirectories(ServerOptions.DataDir); + std::error_code Ec; + DeleteDirectories(ServerOptions.DataDir, Ec); + if (Ec) + { + ZEN_WARN("could not fully clean '{}': {} (continuing anyway)", ServerOptions.DataDir, Ec.message()); + } } } @@ -250,7 +255,7 @@ test_main(int argc, char** argv) zen::MaximizeOpenFileCount(); zen::testing::TestRunner Runner; - Runner.ApplyCommandLine(argc, argv); + Runner.ApplyCommandLine(argc, argv, "server.*"); return Runner.Run(); } #endif @@ -260,6 +265,41 @@ main(int argc, char* argv[]) { zen::InstallCrashHandler(); + { + std::string_view ModeStr = "store"; + if (argc >= 2 && argv[1] != nullptr) + { + std::string_view A1 = argv[1]; + if (!A1.empty() && A1[0] != '-') + { + if (A1 == "hub") + { + ModeStr = "hub"; + } + else if (A1 == "store" || A1 == "storage") + { + ModeStr = "store"; + } + else if (A1 == "compute") + { + ModeStr = "compute"; + } + else if (A1 == "proxy") + { + ModeStr = "proxy"; + } + else if (A1 == "test") + { + ModeStr = "test"; + } + } + } + if (ModeStr != "test") + { + zen::LogInvocation("zenserver", ModeStr, argc, argv); + } + } + #if ZEN_PLATFORM_WINDOWS setlocale(LC_ALL, "en_us.UTF8"); #endif // ZEN_PLATFORM_WINDOWS diff --git a/src/zenserver/proxy/httptrafficinspector.cpp b/src/zenserver/proxy/httptrafficinspector.cpp index 74ecbfd48..913bd2c28 100644 --- a/src/zenserver/proxy/httptrafficinspector.cpp +++ b/src/zenserver/proxy/httptrafficinspector.cpp @@ -10,29 +10,33 @@ namespace zen { // clang-format off -http_parser_settings HttpTrafficInspector::s_RequestSettings{ - .on_message_begin = [](http_parser*) { return 0; }, - .on_url = [](http_parser* p, const char* Data, size_t Len) { return GetThis(p)->OnUrl(Data, Len); }, - .on_status = [](http_parser*, const char*, size_t) { return 0; }, - .on_header_field = [](http_parser* p, const char* Data, size_t Len) { return GetThis(p)->OnHeaderField(Data, Len); }, - .on_header_value = [](http_parser* p, const char* Data, size_t Len) { return GetThis(p)->OnHeaderValue(Data, Len); }, - .on_headers_complete = [](http_parser* p) { return GetThis(p)->OnHeadersComplete(); }, - .on_body = [](http_parser*, const char*, size_t) { return 0; }, - .on_message_complete = [](http_parser* p) { return GetThis(p)->OnMessageComplete(); }, - .on_chunk_header{}, - .on_chunk_complete{}}; - -http_parser_settings HttpTrafficInspector::s_ResponseSettings{ - .on_message_begin = [](http_parser*) { return 0; }, - .on_url = [](http_parser*, const char*, size_t) { return 0; }, - .on_status = [](http_parser*, const char*, size_t) { return 0; }, - .on_header_field = [](http_parser* p, const char* Data, size_t Len) { return GetThis(p)->OnHeaderField(Data, Len); }, - .on_header_value = [](http_parser* p, const char* Data, size_t Len) { return GetThis(p)->OnHeaderValue(Data, Len); }, - .on_headers_complete = [](http_parser* p) { return GetThis(p)->OnHeadersComplete(); }, - .on_body = [](http_parser*, const char*, size_t) { return 0; }, - .on_message_complete = [](http_parser* p) { return GetThis(p)->OnMessageComplete(); }, - .on_chunk_header{}, - .on_chunk_complete{}}; +llhttp_settings_t HttpTrafficInspector::s_RequestSettings = []() { + llhttp_settings_t S; + llhttp_settings_init(&S); + S.on_message_begin = [](llhttp_t*) { return 0; }; + S.on_url = [](llhttp_t* p, const char* Data, size_t Len) { return GetThis(p)->OnUrl(Data, Len); }; + S.on_status = [](llhttp_t*, const char*, size_t) { return 0; }; + S.on_header_field = [](llhttp_t* p, const char* Data, size_t Len) { return GetThis(p)->OnHeaderField(Data, Len); }; + S.on_header_value = [](llhttp_t* p, const char* Data, size_t Len) { return GetThis(p)->OnHeaderValue(Data, Len); }; + S.on_headers_complete = [](llhttp_t* p) { return GetThis(p)->OnHeadersComplete(); }; + S.on_body = [](llhttp_t*, const char*, size_t) { return 0; }; + S.on_message_complete = [](llhttp_t* p) { return GetThis(p)->OnMessageComplete(); }; + return S; +}(); + +llhttp_settings_t HttpTrafficInspector::s_ResponseSettings = []() { + llhttp_settings_t S; + llhttp_settings_init(&S); + S.on_message_begin = [](llhttp_t*) { return 0; }; + S.on_url = [](llhttp_t*, const char*, size_t) { return 0; }; + S.on_status = [](llhttp_t*, const char*, size_t) { return 0; }; + S.on_header_field = [](llhttp_t* p, const char* Data, size_t Len) { return GetThis(p)->OnHeaderField(Data, Len); }; + S.on_header_value = [](llhttp_t* p, const char* Data, size_t Len) { return GetThis(p)->OnHeaderValue(Data, Len); }; + S.on_headers_complete = [](llhttp_t* p) { return GetThis(p)->OnHeadersComplete(); }; + S.on_body = [](llhttp_t*, const char*, size_t) { return 0; }; + S.on_message_complete = [](llhttp_t* p) { return GetThis(p)->OnMessageComplete(); }; + return S; +}(); // clang-format on HttpTrafficInspector::HttpTrafficInspector(Direction Dir, std::string_view SessionLabel) @@ -40,7 +44,8 @@ HttpTrafficInspector::HttpTrafficInspector(Direction Dir, std::string_view Sessi , m_Direction(Dir) , m_SessionLabel(SessionLabel) { - http_parser_init(&m_Parser, Dir == Direction::Request ? HTTP_REQUEST : HTTP_RESPONSE); + llhttp_settings_t* Settings = (Dir == Direction::Request) ? &s_RequestSettings : &s_ResponseSettings; + llhttp_init(&m_Parser, Dir == Direction::Request ? HTTP_REQUEST : HTTP_RESPONSE, Settings); m_Parser.data = this; } @@ -52,11 +57,9 @@ HttpTrafficInspector::Inspect(const char* Data, size_t Length) return; } - http_parser_settings* Settings = (m_Direction == Direction::Request) ? &s_RequestSettings : &s_ResponseSettings; + llhttp_errno_t Err = llhttp_execute(&m_Parser, Data, Length); - size_t Parsed = http_parser_execute(&m_Parser, Settings, Data, Length); - - if (m_Parser.upgrade) + if (Err == HPE_PAUSED_UPGRADE) { if (m_Direction == Direction::Request) { @@ -72,15 +75,9 @@ HttpTrafficInspector::Inspect(const char* Data, size_t Length) return; } - http_errno Error = HTTP_PARSER_ERRNO(&m_Parser); - if (Error != HPE_OK) - { - ZEN_DEBUG("[{}] non-HTTP traffic detected ({}), disabling inspection", m_SessionLabel, http_errno_name(Error)); - m_Disabled = true; - } - else if (Parsed != Length) + if (Err != HPE_OK) { - ZEN_DEBUG("[{}] parser consumed {}/{} bytes, disabling inspection", m_SessionLabel, Parsed, Length); + ZEN_DEBUG("[{}] non-HTTP traffic detected ({}), disabling inspection", m_SessionLabel, llhttp_errno_name(Err)); m_Disabled = true; } } @@ -127,11 +124,11 @@ HttpTrafficInspector::OnHeadersComplete() { if (m_Direction == Direction::Request) { - m_Method = http_method_str(static_cast<http_method>(m_Parser.method)); + m_Method = llhttp_method_name(static_cast<llhttp_method_t>(llhttp_get_method(&m_Parser))); } else { - m_StatusCode = m_Parser.status_code; + m_StatusCode = static_cast<uint16_t>(llhttp_get_status_code(&m_Parser)); } return 0; } diff --git a/src/zenserver/proxy/httptrafficinspector.h b/src/zenserver/proxy/httptrafficinspector.h index f4af0e77e..8192632ba 100644 --- a/src/zenserver/proxy/httptrafficinspector.h +++ b/src/zenserver/proxy/httptrafficinspector.h @@ -6,7 +6,7 @@ #include <zencore/uid.h> ZEN_THIRD_PARTY_INCLUDES_START -#include <http_parser.h> +#include <llhttp.h> ZEN_THIRD_PARTY_INCLUDES_END #include <atomic> @@ -45,15 +45,15 @@ private: void ResetMessageState(); - static HttpTrafficInspector* GetThis(http_parser* Parser) { return static_cast<HttpTrafficInspector*>(Parser->data); } + static HttpTrafficInspector* GetThis(llhttp_t* Parser) { return static_cast<HttpTrafficInspector*>(Parser->data); } - static http_parser_settings s_RequestSettings; - static http_parser_settings s_ResponseSettings; + static llhttp_settings_t s_RequestSettings; + static llhttp_settings_t s_ResponseSettings; LoggerRef Log() { return m_Log; } LoggerRef m_Log; - http_parser m_Parser; + llhttp_t m_Parser; Direction m_Direction; std::string m_SessionLabel; bool m_Disabled = false; diff --git a/src/zenserver/proxy/zenproxyserver.cpp b/src/zenserver/proxy/zenproxyserver.cpp index 7e59a7b7e..ffa9a4295 100644 --- a/src/zenserver/proxy/zenproxyserver.cpp +++ b/src/zenserver/proxy/zenproxyserver.cpp @@ -257,7 +257,7 @@ ZenProxyServerConfigurator::ValidateOptions() for (const std::string& Raw : m_RawProxyMappings) { // The mode keyword "proxy" from argv[1] gets captured as a positional - // argument — skip it. + // argument - skip it. if (Raw == "proxy") { continue; @@ -304,7 +304,7 @@ ZenProxyServer::Initialize(const ZenProxyServerConfig& ServerConfig, ZenServerSt // worker threads don't exit prematurely between async operations. m_ProxyIoWorkGuard.emplace(m_ProxyIoContext.get_executor()); - // Start proxy I/O worker threads. Use a modest thread count — proxy work is + // Start proxy I/O worker threads. Use a modest thread count - proxy work is // I/O-bound so we don't need a thread per core, but having more than one // avoids head-of-line blocking when many connections are active. unsigned int ThreadCount = std::max(GetHardwareConcurrency() / 4, 4u); @@ -385,6 +385,8 @@ ZenProxyServer::Run() OnReady(); + StartSelfSession("zenproxy"); + m_Http->Run(IsInteractiveMode); SetNewState(kShuttingDown); @@ -422,15 +424,16 @@ ZenProxyServer::Cleanup() m_IoRunner.join(); } - m_ProxyStatsService.reset(); - m_FrontendService.reset(); - m_ApiService.reset(); - - ShutdownServices(); if (m_Http) { m_Http->Close(); } + + ShutdownServices(); + + m_ProxyStatsService.reset(); + m_FrontendService.reset(); + m_ApiService.reset(); } catch (const std::exception& Ex) { diff --git a/src/zenserver/sessions/httpsessions.cpp b/src/zenserver/sessions/httpsessions.cpp index fdf2e1f21..2276cb81a 100644 --- a/src/zenserver/sessions/httpsessions.cpp +++ b/src/zenserver/sessions/httpsessions.cpp @@ -377,7 +377,7 @@ HttpSessionsService::SessionLogRequest(HttpRouterRequest& Req) if (ServerRequest.RequestContentType() == HttpContentType::kText) { - // Raw text — split by newlines, one entry per line + // Raw text - split by newlines, one entry per line IoBuffer Payload = ServerRequest.ReadPayload(); std::string_view Text(reinterpret_cast<const char*>(Payload.GetData()), Payload.GetSize()); const DateTime Now = DateTime::Now(); @@ -417,13 +417,26 @@ HttpSessionsService::SessionLogRequest(HttpRouterRequest& Req) const DateTime Now = DateTime::Now(); auto AppendFromObject = [&](CbObjectView Obj) { - std::string Level(Obj["level"sv].AsString()); + CbFieldView LevelField = Obj["level"sv]; + std::string_view Level; + if (LevelField.IsString()) + { + Level = LevelField.AsString(); + } + else if (LevelField.IsInteger()) + { + int32_t LevelInt = LevelField.AsInt32(); + if (LevelInt >= 0 && LevelInt < logging::LogLevelCount) + { + Level = logging::ToString(static_cast<logging::LogLevel>(LevelInt)); + } + } std::string Message(Obj["message"sv].AsString()); CbObjectView DataView = Obj["data"sv].AsObjectView(); Session->AppendLog(SessionsService::LogEntry{ .Timestamp = Now, - .Level = std::move(Level), + .Level = std::string(Level), .Message = std::move(Message), .Data = CbObject::Clone(DataView), }); @@ -512,8 +525,9 @@ HttpSessionsService::SessionLogRequest(HttpRouterRequest& Req) // void -HttpSessionsService::OnWebSocketOpen(Ref<WebSocketConnection> Connection) +HttpSessionsService::OnWebSocketOpen(Ref<WebSocketConnection> Connection, std::string_view RelativeUri) { + ZEN_UNUSED(RelativeUri); ZEN_INFO("Sessions WebSocket client connected"); m_WsConnectionsLock.WithExclusiveLock([&] { m_WsConnections.push_back(std::move(Connection)); }); } diff --git a/src/zenserver/sessions/httpsessions.h b/src/zenserver/sessions/httpsessions.h index 86a23f835..6ebe61c8d 100644 --- a/src/zenserver/sessions/httpsessions.h +++ b/src/zenserver/sessions/httpsessions.h @@ -37,7 +37,7 @@ public: void SetSelfSessionId(const Oid& Id) { m_SelfSessionId = Id; } // IWebSocketHandler - void OnWebSocketOpen(Ref<WebSocketConnection> Connection) override; + void OnWebSocketOpen(Ref<WebSocketConnection> Connection, std::string_view RelativeUri) override; void OnWebSocketMessage(WebSocketConnection& Conn, const WebSocketMessage& Msg) override; void OnWebSocketClose(WebSocketConnection& Conn, uint16_t Code, std::string_view Reason) override; diff --git a/src/zenserver/sessions/inprocsessionlogsink.cpp b/src/zenserver/sessions/inprocsessionlogsink.cpp index 9982859b6..04c5f7312 100644 --- a/src/zenserver/sessions/inprocsessionlogsink.cpp +++ b/src/zenserver/sessions/inprocsessionlogsink.cpp @@ -29,7 +29,7 @@ InProcSessionLogSink::Log(const logging::LogMessage& Msg) SessionsService::LogEntry Entry{ .Timestamp = TimePointToDateTime(Msg.GetTime()), - .Level = std::string(logging::ToStringView(Msg.GetLevel())), + .Level = std::string(logging::ToString(Msg.GetLevel())), .Message = std::string(Msg.GetPayload()), }; diff --git a/src/zenserver/sessions/sessions.cpp b/src/zenserver/sessions/sessions.cpp index 1212ba5d8..9d4e3120c 100644 --- a/src/zenserver/sessions/sessions.cpp +++ b/src/zenserver/sessions/sessions.cpp @@ -129,7 +129,7 @@ SessionsService::~SessionsService() = default; bool SessionsService::RegisterSession(const Oid& SessionId, std::string AppName, std::string Mode, const Oid& JobId, CbObjectView Metadata) { - // Log outside the lock scope — InProcSessionLogSink calls back into + // Log outside the lock scope - InProcSessionLogSink calls back into // GetSession() which acquires m_Lock shared, so logging while holding // m_Lock exclusively would deadlock. { diff --git a/src/zenserver/sessions/sessions.h b/src/zenserver/sessions/sessions.h index 8f07bfc31..a84ca6506 100644 --- a/src/zenserver/sessions/sessions.h +++ b/src/zenserver/sessions/sessions.h @@ -8,10 +8,9 @@ #include <zencore/uid.h> ZEN_THIRD_PARTY_INCLUDES_START +#include <EASTL/deque.h> #include <tsl/robin_map.h> ZEN_THIRD_PARTY_INCLUDES_END - -#include <deque> #include <optional> #include <string> #include <vector> @@ -83,11 +82,11 @@ public: CursorResult GetLogEntriesAfter(uint64_t AfterCursor) const; private: - SessionInfo m_Info; - Ref<SessionLog> m_Log; - mutable RwLock m_LogLock; - std::deque<LogEntry> m_LogEntries; - uint64_t m_TotalAppended = 0; // monotonically increasing counter + SessionInfo m_Info; + Ref<SessionLog> m_Log; + mutable RwLock m_LogLock; + eastl::deque<LogEntry> m_LogEntries; + uint64_t m_TotalAppended = 0; // monotonically increasing counter static constexpr uint32_t MaxLogEntries = 10000; }; diff --git a/src/zenserver/storage/admin/admin.cpp b/src/zenserver/storage/admin/admin.cpp index f1c2daea4..34d9e570e 100644 --- a/src/zenserver/storage/admin/admin.cpp +++ b/src/zenserver/storage/admin/admin.cpp @@ -720,7 +720,7 @@ HttpAdminService::HttpAdminService(GcScheduler& Scheduler, "logs", [this](HttpRouterRequest& Req) { CbObjectWriter Obj; - auto LogLevel = logging::ToStringView(logging::GetLogLevel()); + auto LogLevel = logging::ToString(logging::GetLogLevel()); Obj.AddString("loglevel", std::string_view(LogLevel.data(), LogLevel.size())); Obj.AddString("Logfile", PathToUtf8(m_LogPaths.AbsLogPath)); Obj.BeginObject("cache"); @@ -772,7 +772,7 @@ HttpAdminService::HttpAdminService(GcScheduler& Scheduler, if (std::string Param(Params.GetValue("loglevel")); Param.empty() == false) { logging::LogLevel NewLevel = logging::ParseLogLevelString(Param); - std::string_view LogLevel = logging::ToStringView(NewLevel); + std::string_view LogLevel = logging::ToString(NewLevel); if (LogLevel != Param) { return Req.ServerRequest().WriteResponse(HttpResponseCode::BadRequest, diff --git a/src/zenserver/storage/buildstore/httpbuildstore.cpp b/src/zenserver/storage/buildstore/httpbuildstore.cpp index bbbb0c37b..f935e2c6b 100644 --- a/src/zenserver/storage/buildstore/httpbuildstore.cpp +++ b/src/zenserver/storage/buildstore/httpbuildstore.cpp @@ -162,96 +162,81 @@ HttpBuildStoreService::GetBlobRequest(HttpRouterRequest& Req) fmt::format("Invalid blob hash '{}'", Hash)); } - std::vector<std::pair<uint64_t, uint64_t>> OffsetAndLengthPairs; + m_BuildStoreStats.BlobReadCount++; + IoBuffer Blob = m_BuildStore.GetBlob(BlobHash); + if (!Blob) + { + return ServerRequest.WriteResponse(HttpResponseCode::NotFound, HttpContentType::kText, fmt::format("Blob {} not found", Hash)); + } + m_BuildStoreStats.BlobHitCount++; + if (ServerRequest.RequestVerb() == HttpVerb::kPost) { + if (ServerRequest.AcceptContentType() != HttpContentType::kCbPackage) + { + m_BuildStoreStats.BadRequestCount++; + return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, + HttpContentType::kText, + fmt::format("Accept type '{}' is not supported for blob {}, expected '{}'", + ToString(ServerRequest.AcceptContentType()), + Hash, + ToString(HttpContentType::kCbPackage))); + } + CbObject RangePayload = ServerRequest.ReadPayloadObject(); - if (RangePayload) + if (!RangePayload) { - CbArrayView RangesArray = RangePayload["ranges"sv].AsArrayView(); - OffsetAndLengthPairs.reserve(RangesArray.Num()); - for (CbFieldView FieldView : RangesArray) - { - CbObjectView RangeView = FieldView.AsObjectView(); - uint64_t RangeOffset = RangeView["offset"sv].AsUInt64(); - uint64_t RangeLength = RangeView["length"sv].AsUInt64(); - OffsetAndLengthPairs.push_back(std::make_pair(RangeOffset, RangeLength)); - } - if (OffsetAndLengthPairs.size() > MaxRangeCountPerRequestSupported) - { - return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, - HttpContentType::kText, - fmt::format("Number of ranges ({}) for blob request exceeds maximum range count {}", - OffsetAndLengthPairs.size(), - MaxRangeCountPerRequestSupported)); - } + m_BuildStoreStats.BadRequestCount++; + return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, + HttpContentType::kText, + fmt::format("Missing payload for range request on blob {}", BlobHash)); } - if (OffsetAndLengthPairs.empty()) + + CbArrayView RangesArray = RangePayload["ranges"sv].AsArrayView(); + const uint64_t RangeCount = RangesArray.Num(); + if (RangeCount == 0) { m_BuildStoreStats.BadRequestCount++; return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, HttpContentType::kText, - "Fetching blob without ranges must be done with the GET verb"); + "POST request must include a non-empty 'ranges' array"); } - } - else - { - HttpRanges Ranges; - bool HasRange = ServerRequest.TryGetRanges(Ranges); - if (HasRange) + if (RangeCount > MaxRangeCountPerRequestSupported) { - if (Ranges.size() > 1) - { - // Only a single http range is supported, we have limited support for http multirange responses - m_BuildStoreStats.BadRequestCount++; - return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, - HttpContentType::kText, - fmt::format("Multiple ranges in blob request is only supported for {} accept type", - ToString(HttpContentType::kCbPackage))); - } - const HttpRange& FirstRange = Ranges.front(); - OffsetAndLengthPairs.push_back(std::make_pair<uint64_t, uint64_t>(FirstRange.Start, FirstRange.End - FirstRange.Start + 1)); + m_BuildStoreStats.BadRequestCount++; + return ServerRequest.WriteResponse( + HttpResponseCode::BadRequest, + HttpContentType::kText, + fmt::format("Range count {} exceeds maximum of {}", RangeCount, MaxRangeCountPerRequestSupported)); } - } - - m_BuildStoreStats.BlobReadCount++; - IoBuffer Blob = m_BuildStore.GetBlob(BlobHash); - if (!Blob) - { - return ServerRequest.WriteResponse(HttpResponseCode::NotFound, - HttpContentType::kText, - fmt::format("Blob with hash '{}' could not be found", Hash)); - } - m_BuildStoreStats.BlobHitCount++; - if (OffsetAndLengthPairs.empty()) - { - return ServerRequest.WriteResponse(HttpResponseCode::OK, Blob.GetContentType(), Blob); - } + const uint64_t BlobSize = Blob.GetSize(); + std::vector<IoBuffer> RangeBuffers; + RangeBuffers.reserve(RangeCount); - if (ServerRequest.AcceptContentType() == HttpContentType::kCbPackage) - { - const uint64_t BlobSize = Blob.GetSize(); + CbPackage ResponsePackage; + CbObjectWriter Writer; - CbPackage ResponsePackage; - std::vector<IoBuffer> RangeBuffers; - CbObjectWriter Writer; Writer.BeginArray("ranges"sv); - for (const std::pair<uint64_t, uint64_t>& Range : OffsetAndLengthPairs) + for (CbFieldView FieldView : RangesArray) { - const uint64_t MaxBlobSize = Range.first < BlobSize ? BlobSize - Range.first : 0; - const uint64_t RangeSize = Min(Range.second, MaxBlobSize); + CbObjectView RangeView = FieldView.AsObjectView(); + uint64_t RangeOffset = RangeView["offset"sv].AsUInt64(); + uint64_t RangeLength = RangeView["length"sv].AsUInt64(); + + const uint64_t MaxBlobSize = RangeOffset < BlobSize ? BlobSize - RangeOffset : 0; + const uint64_t RangeSize = Min(RangeLength, MaxBlobSize); Writer.BeginObject(); { - if (Range.first + RangeSize <= BlobSize) + if (RangeOffset + RangeSize <= BlobSize) { - RangeBuffers.push_back(IoBuffer(Blob, Range.first, RangeSize)); - Writer.AddInteger("offset"sv, Range.first); + RangeBuffers.push_back(IoBuffer(Blob, RangeOffset, RangeSize)); + Writer.AddInteger("offset"sv, RangeOffset); Writer.AddInteger("length"sv, RangeSize); } else { - Writer.AddInteger("offset"sv, Range.first); + Writer.AddInteger("offset"sv, RangeOffset); Writer.AddInteger("length"sv, 0); } } @@ -259,7 +244,7 @@ HttpBuildStoreService::GetBlobRequest(HttpRouterRequest& Req) } Writer.EndArray(); - CompositeBuffer Ranges(RangeBuffers); + CompositeBuffer Ranges(std::move(RangeBuffers)); CbAttachment PayloadAttachment(std::move(Ranges), BlobHash); Writer.AddAttachment("payload", PayloadAttachment); @@ -269,32 +254,21 @@ HttpBuildStoreService::GetBlobRequest(HttpRouterRequest& Req) ResponsePackage.SetObject(HeaderObject); CompositeBuffer RpcResponseBuffer = FormatPackageMessageBuffer(ResponsePackage); - uint64_t ResponseSize = RpcResponseBuffer.GetSize(); - ZEN_UNUSED(ResponseSize); return ServerRequest.WriteResponse(HttpResponseCode::OK, HttpContentType::kCbPackage, RpcResponseBuffer); } else { - if (OffsetAndLengthPairs.size() != 1) + HttpRanges RequestedRangeHeader; + bool HasRange = ServerRequest.TryGetRanges(RequestedRangeHeader); + if (HasRange) { - // Only a single http range is supported, we have limited support for http multirange responses - m_BuildStoreStats.BadRequestCount++; - return ServerRequest.WriteResponse( - HttpResponseCode::BadRequest, - HttpContentType::kText, - fmt::format("Multiple ranges in blob request is only supported for {} accept type", ToString(HttpContentType::kCbPackage))); + // Standard HTTP GET with Range header: framework handles 206, Content-Range, and 416 on OOB. + return ServerRequest.WriteResponse(HttpContentType::kBinary, Blob, RequestedRangeHeader); } - - const std::pair<uint64_t, uint64_t>& OffsetAndLength = OffsetAndLengthPairs.front(); - const uint64_t BlobSize = Blob.GetSize(); - const uint64_t MaxBlobSize = OffsetAndLength.first < BlobSize ? BlobSize - OffsetAndLength.first : 0; - const uint64_t RangeSize = Min(OffsetAndLength.second, MaxBlobSize); - if (OffsetAndLength.first + RangeSize > BlobSize) + else { - return ServerRequest.WriteResponse(HttpResponseCode::NoContent); + return ServerRequest.WriteResponse(HttpResponseCode::OK, Blob.GetContentType(), Blob); } - Blob = IoBuffer(Blob, OffsetAndLength.first, RangeSize); - return ServerRequest.WriteResponse(HttpResponseCode::OK, ZenContentType::kBinary, Blob); } } diff --git a/src/zenserver/storage/cache/httpstructuredcache.cpp b/src/zenserver/storage/cache/httpstructuredcache.cpp index 8ad48225b..4d3673e70 100644 --- a/src/zenserver/storage/cache/httpstructuredcache.cpp +++ b/src/zenserver/storage/cache/httpstructuredcache.cpp @@ -437,19 +437,22 @@ HttpStructuredCacheService::HandleRequest(HttpServerRequest& Request) std::string RecordPath = UrlDecode(Params.GetValue("path")); - uint32_t ThreadCount = GetHardwareConcurrency(); + const uint32_t HardwareConcurrency = GetHardwareConcurrency(); + const uint32_t MaxThreadCount = std::max<uint32_t>(HardwareConcurrency, 16u); + uint32_t ThreadCount = HardwareConcurrency; if (auto Param = Params.GetValue("thread_count"); Param.empty() == false) { if (auto Value = ParseInt<uint64_t>(Param)) { - ThreadCount = gsl::narrow<uint32_t>(Value.value()); + ThreadCount = gsl::narrow_cast<uint32_t>(std::min<uint64_t>(Value.value(), MaxThreadCount)); } } + ThreadCount = std::clamp<uint32_t>(ThreadCount, 1u, MaxThreadCount); ZEN_INFO("initiating cache RPC replay using {} threads, from '{}'", ThreadCount, RecordPath); std::unique_ptr<cache::IRpcRequestReplayer> Replayer(cache::MakeDiskRequestReplayer(RecordPath, false)); - ReplayRequestRecorder(RequestContext, *Replayer, ThreadCount < 1 ? 1 : ThreadCount); + ReplayRequestRecorder(RequestContext, *Replayer, ThreadCount); ZEN_INFO("cache RPC replay COMPLETED"); @@ -557,7 +560,7 @@ HttpStructuredCacheService::HandleCacheRequest(HttpServerRequest& Request) break; default: m_CacheStats.BadRequestCount++; - break; + return Request.WriteResponse(HttpResponseCode::MethodNotAllowed); } } @@ -707,7 +710,8 @@ HttpStructuredCacheService::HandleCacheNamespaceRequest(HttpServerRequest& Reque break; default: - break; + m_CacheStats.BadRequestCount++; + return Request.WriteResponse(HttpResponseCode::MethodNotAllowed); } } @@ -797,7 +801,8 @@ HttpStructuredCacheService::HandleCacheBucketRequest(HttpServerRequest& Request, break; default: - break; + m_CacheStats.BadRequestCount++; + return Request.WriteResponse(HttpResponseCode::MethodNotAllowed); } } @@ -816,7 +821,8 @@ HttpStructuredCacheService::HandleCacheRecordRequest(HttpServerRequest& Request, break; default: - break; + m_CacheStats.BadRequestCount++; + return Request.WriteResponse(HttpResponseCode::MethodNotAllowed); } } @@ -1216,8 +1222,6 @@ HttpStructuredCacheService::HandlePutCacheRecord(HttpServerRequest& Request, con } auto WriteFailureResponse = [&Request](const ZenCacheStore::PutResult& PutResult) { - ZEN_UNUSED(PutResult); - HttpResponseCode ResponseCode = HttpResponseCode::InternalServerError; switch (PutResult.Status) { @@ -1231,7 +1235,7 @@ HttpStructuredCacheService::HandlePutCacheRecord(HttpServerRequest& Request, con if (PutResult.Details) { - Request.WriteResponse(ResponseCode, PutResult.Details); + return Request.WriteResponse(ResponseCode, PutResult.Details); } return Request.WriteResponse(ResponseCode); }; @@ -1507,7 +1511,8 @@ HttpStructuredCacheService::HandleCacheChunkRequest(HttpServerRequest& Request, HandlePutCacheChunk(Request, Ref, PolicyFromUrl); break; default: - break; + m_CacheStats.BadRequestCount++; + return Request.WriteResponse(HttpResponseCode::MethodNotAllowed); } } diff --git a/src/zenserver/storage/objectstore/objectstore.cpp b/src/zenserver/storage/objectstore/objectstore.cpp index d6516fa1a..1115c1cd6 100644 --- a/src/zenserver/storage/objectstore/objectstore.cpp +++ b/src/zenserver/storage/objectstore/objectstore.cpp @@ -637,11 +637,7 @@ HttpObjectStoreService::GetObject(HttpRouterRequest& Request, const std::string_ } HttpRanges Ranges; - if (Request.ServerRequest().TryGetRanges(Ranges); Ranges.size() > 1) - { - // Only a single range is supported - return Request.ServerRequest().WriteResponse(HttpResponseCode::BadRequest); - } + Request.ServerRequest().TryGetRanges(Ranges); FileContents File; { @@ -665,42 +661,49 @@ HttpObjectStoreService::GetObject(HttpRouterRequest& Request, const std::string_ if (Ranges.empty()) { - const uint64_t TotalServed = m_TotalBytesServed.fetch_add(FileBuf.Size()) + FileBuf.Size(); - + const uint64_t TotalServed = m_TotalBytesServed.fetch_add(FileBuf.GetSize()) + FileBuf.GetSize(); ZEN_LOG_DEBUG(LogObj, "GET - '{}/{}' ({}) [OK] (Served: {})", BucketName, RelativeBucketPath, - NiceBytes(FileBuf.Size()), + NiceBytes(FileBuf.GetSize()), NiceBytes(TotalServed)); - - Request.ServerRequest().WriteResponse(HttpResponseCode::OK, HttpContentType::kBinary, FileBuf); } else { - const auto Range = Ranges[0]; - const uint64_t RangeSize = 1 + (Range.End - Range.Start); - const uint64_t TotalServed = m_TotalBytesServed.fetch_add(RangeSize) + RangeSize; - - ZEN_LOG_DEBUG(LogObj, - "GET - '{}/{}' (Range: {}-{}) ({}/{}) [OK] (Served: {})", - BucketName, - RelativeBucketPath, - Range.Start, - Range.End, - NiceBytes(RangeSize), - NiceBytes(FileBuf.Size()), - NiceBytes(TotalServed)); - - MemoryView RangeView = FileBuf.GetView().Mid(Range.Start, RangeSize); - if (RangeView.GetSize() != RangeSize) + const uint64_t TotalSize = FileBuf.GetSize(); + uint64_t ServedBytes = 0; + for (const HttpRange& Range : Ranges) { - return Request.ServerRequest().WriteResponse(HttpResponseCode::BadRequest); + const uint64_t RangeEnd = (Range.End != ~uint64_t(0)) ? Range.End : TotalSize - 1; + if (RangeEnd < TotalSize && Range.Start <= RangeEnd) + { + ServedBytes += 1 + (RangeEnd - Range.Start); + } + } + if (ServedBytes > 0) + { + const uint64_t TotalServed = m_TotalBytesServed.fetch_add(ServedBytes) + ServedBytes; + ZEN_LOG_DEBUG(LogObj, + "GET - '{}/{}' (Ranges: {}) ({}/{}) [OK] (Served: {})", + BucketName, + RelativeBucketPath, + Ranges.size(), + NiceBytes(ServedBytes), + NiceBytes(TotalSize), + NiceBytes(TotalServed)); + } + else + { + ZEN_LOG_DEBUG(LogObj, + "GET - '{}/{}' (Ranges: {}) [416] ({})", + BucketName, + RelativeBucketPath, + Ranges.size(), + NiceBytes(TotalSize)); } - - IoBuffer RangeBuf = IoBuffer(IoBuffer::Wrap, RangeView.GetData(), RangeView.GetSize()); - Request.ServerRequest().WriteResponse(HttpResponseCode::PartialContent, HttpContentType::kBinary, RangeBuf); } + Request.ServerRequest().WriteResponse(HttpContentType::kBinary, FileBuf, Ranges); } void diff --git a/src/zenserver/storage/projectstore/httpprojectstore.cpp b/src/zenserver/storage/projectstore/httpprojectstore.cpp index afd0d8f82..9844d02f0 100644 --- a/src/zenserver/storage/projectstore/httpprojectstore.cpp +++ b/src/zenserver/storage/projectstore/httpprojectstore.cpp @@ -16,9 +16,9 @@ #include <zenhttp/httpclientauth.h> #include <zenhttp/packageformat.h> #include <zenremotestore/builds/buildstoragecache.h> +#include <zenremotestore/builds/buildstorageresolve.h> #include <zenremotestore/builds/buildstorageutil.h> #include <zenremotestore/jupiter/jupiterhost.h> -#include <zenremotestore/operationlogoutput.h> #include <zenremotestore/projectstore/buildsremoteprojectstore.h> #include <zenremotestore/projectstore/fileremoteprojectstore.h> #include <zenremotestore/projectstore/jupiterremoteprojectstore.h> @@ -279,7 +279,7 @@ namespace { { ZEN_MEMSCOPE(GetProjectHttpTag()); - auto Log = [InLog]() { return InLog; }; + ZEN_SCOPED_LOG(InLog); using namespace std::literals; @@ -566,11 +566,9 @@ namespace { .AllowResume = true, .RetryCount = 2}; - std::unique_ptr<OperationLogOutput> Output(CreateStandardLogOutput(Log())); - try { - ResolveResult = ResolveBuildStorage(*Output, + ResolveResult = ResolveBuildStorage(Log(), ClientSettings, Host, OverrideHost, @@ -636,11 +634,6 @@ namespace { return Result; } - static uint64_t GetMaxMemoryBufferSize(size_t MaxBlockSize, bool BoostWorkerMemory) - { - return BoostWorkerMemory ? (MaxBlockSize + 16u * 1024u) : 1024u * 1024u; - } - } // namespace ////////////////////////////////////////////////////////////////////////// @@ -787,22 +780,22 @@ HttpProjectService::HttpProjectService(CidStore& Store, HttpVerb::kPost); m_Router.RegisterRoute( - "details\\$", + "details$", [this](HttpRouterRequest& Req) { HandleDetailsRequest(Req); }, HttpVerb::kGet); m_Router.RegisterRoute( - "details\\$/{project}", + "details$/{project}", [this](HttpRouterRequest& Req) { HandleProjectDetailsRequest(Req); }, HttpVerb::kGet); m_Router.RegisterRoute( - "details\\$/{project}/{log}", + "details$/{project}/{log}", [this](HttpRouterRequest& Req) { HandleOplogDetailsRequest(Req); }, HttpVerb::kGet); m_Router.RegisterRoute( - "details\\$/{project}/{log}/{chunk}", + "details$/{project}/{log}/{chunk}", [this](HttpRouterRequest& Req) { HandleOplogOpDetailsRequest(Req); }, HttpVerb::kGet); @@ -1264,7 +1257,7 @@ HttpProjectService::HandleChunkInfoRequest(HttpRouterRequest& Req) const Oid Obj = Oid::FromHexString(ChunkId); - CbObject ResponsePayload = ProjectStore::GetChunkInfo(Log(), *Project, *FoundLog, Obj); + CbObject ResponsePayload = ProjectStore::GetChunkInfo(*Project, *FoundLog, Obj); if (ResponsePayload) { m_ProjectStats.ChunkHitCount++; @@ -1353,7 +1346,7 @@ HttpProjectService::HandleChunkByIdRequest(HttpRouterRequest& Req) HttpContentType AcceptType = HttpReq.AcceptContentType(); ProjectStore::GetChunkRangeResult Result = - ProjectStore::GetChunkRange(Log(), *Project, *FoundLog, Obj, Offset, Size, AcceptType, /*OptionalInOutModificationTag*/ nullptr); + ProjectStore::GetChunkRange(*Project, *FoundLog, Obj, Offset, Size, AcceptType, /*OptionalInOutModificationTag*/ nullptr); switch (Result.Error) { @@ -2691,6 +2684,7 @@ HttpProjectService::HandleOplogLoadRequest(HttpRouterRequest& Req) try { CbObject ContainerObject = BuildContainer( + Log(), m_CidStore, *Project, *Oplog, @@ -2891,6 +2885,7 @@ HttpProjectService::HandleRpcRequest(HttpRouterRequest& Req) try { LoadOplog(LoadOplogContext{ + .Log = Log(), .ChunkStore = m_CidStore, .RemoteStore = *RemoteStoreResult->Store, .OptionalCache = RemoteStoreResult->OptionalCache ? RemoteStoreResult->OptionalCache->Cache.get() : nullptr, @@ -3016,7 +3011,8 @@ HttpProjectService::HandleRpcRequest(HttpRouterRequest& Req) try { - SaveOplog(m_CidStore, + SaveOplog(Log(), + m_CidStore, *ActualRemoteStore, *Project, *Oplog, diff --git a/src/zenserver/storage/storageconfig.cpp b/src/zenserver/storage/storageconfig.cpp index 0dbb45164..bb4f053e4 100644 --- a/src/zenserver/storage/storageconfig.cpp +++ b/src/zenserver/storage/storageconfig.cpp @@ -57,6 +57,12 @@ ZenStorageServerConfigurator::ValidateOptions() ZEN_WARN("'--gc-v2=false' is deprecated, reverting to '--gc-v2=true'"); ServerOptions.GcConfig.UseGCV2 = true; } + if (ServerOptions.BuildStoreConfig.MaxDiskSpaceLimitPercent > 100) + { + throw OptionParseException(fmt::format("'--buildstore-disksizelimit-percent' ('{}') is invalid, must be between 1 and 100.", + ServerOptions.BuildStoreConfig.MaxDiskSpaceLimitPercent), + {}); + } } class ZenStructuredCacheBucketsConfigOption : public LuaConfig::OptionValue @@ -382,6 +388,9 @@ ZenStorageServerConfigurator::AddConfigOptions(LuaConfig::Options& LuaOptions) ////// buildsstore LuaOptions.AddOption("server.buildstore.enabled"sv, ServerOptions.BuildStoreConfig.Enabled, "buildstore-enabled"sv); LuaOptions.AddOption("server.buildstore.disksizelimit"sv, ServerOptions.BuildStoreConfig.MaxDiskSpaceLimit, "buildstore-disksizelimit"); + LuaOptions.AddOption("server.buildstore.disksizelimitpercent"sv, + ServerOptions.BuildStoreConfig.MaxDiskSpaceLimitPercent, + "buildstore-disksizelimit-percent"); ////// cache LuaOptions.AddOption("cache.enable"sv, ServerOptions.StructuredCacheConfig.Enabled); @@ -477,7 +486,7 @@ ZenStorageServerConfigurator::AddConfigOptions(LuaConfig::Options& LuaOptions) ServerOptions.GcConfig.CompactBlockUsageThresholdPercent, "gc-compactblock-threshold"sv); LuaOptions.AddOption("gc.verbose"sv, ServerOptions.GcConfig.Verbose, "gc-verbose"sv); - LuaOptions.AddOption("gc.single-threaded"sv, ServerOptions.GcConfig.SingleThreaded, "gc-single-threaded"sv); + LuaOptions.AddOption("gc.singlethreaded"sv, ServerOptions.GcConfig.SingleThreaded, "gc-single-threaded"sv); LuaOptions.AddOption("gc.cache.attachment.store"sv, ServerOptions.GcConfig.StoreCacheAttachmentMetaData, "gc-cache-attachment-store"); LuaOptions.AddOption("gc.projectstore.attachment.store"sv, ServerOptions.GcConfig.StoreProjectAttachmentMetaData, @@ -1035,6 +1044,13 @@ ZenStorageServerCmdLineOptions::AddBuildStoreOptions(cxxopts::Options& options, "Max number of bytes before build store entries get evicted. Default set to 1099511627776 (1TB week)", cxxopts::value<uint64_t>(ServerOptions.BuildStoreConfig.MaxDiskSpaceLimit)->default_value("1099511627776"), ""); + options.add_option("buildstore", + "", + "buildstore-disksizelimit-percent", + "Max percentage (1-100) of total drive capacity (of --data-dir drive) before build store entries get evicted. " + "0 (default) disables this limit. When combined with --buildstore-disksizelimit, the lower value wins.", + cxxopts::value<uint32_t>(ServerOptions.BuildStoreConfig.MaxDiskSpaceLimitPercent)->default_value("0"), + ""); } void diff --git a/src/zenserver/storage/storageconfig.h b/src/zenserver/storage/storageconfig.h index 18af4f096..fec8fd70b 100644 --- a/src/zenserver/storage/storageconfig.h +++ b/src/zenserver/storage/storageconfig.h @@ -135,8 +135,9 @@ struct ZenProjectStoreConfig struct ZenBuildStoreConfig { - bool Enabled = false; - uint64_t MaxDiskSpaceLimit = 1u * 1024u * 1024u * 1024u * 1024u; // 1TB + bool Enabled = false; + uint64_t MaxDiskSpaceLimit = 1u * 1024u * 1024u * 1024u * 1024u; // 1TB + uint32_t MaxDiskSpaceLimitPercent = 0; }; struct ZenWorkspacesConfig diff --git a/src/zenserver/storage/upstream/upstreamcache.cpp b/src/zenserver/storage/upstream/upstreamcache.cpp index b26c57414..a516c452c 100644 --- a/src/zenserver/storage/upstream/upstreamcache.cpp +++ b/src/zenserver/storage/upstream/upstreamcache.cpp @@ -772,7 +772,7 @@ namespace detail { UpstreamEndpointInfo m_Info; UpstreamStatus m_Status; UpstreamEndpointStats m_Stats; - RefPtr<JupiterClient> m_Client; + Ref<JupiterClient> m_Client; const bool m_AllowRedirect = false; }; @@ -1446,7 +1446,7 @@ namespace detail { // Make sure we safely bump the refcount inside a scope lock RwLock::SharedLockScope _(m_ClientLock); ZEN_ASSERT(m_Client); - Ref<ZenStructuredCacheClient> ClientRef(m_Client); + Ref<ZenStructuredCacheClient> ClientRef(m_Client.Get()); _.ReleaseNow(); return ClientRef; } @@ -1485,15 +1485,15 @@ namespace detail { LoggerRef Log() { return m_Log; } - LoggerRef m_Log; - UpstreamEndpointInfo m_Info; - UpstreamStatus m_Status; - UpstreamEndpointStats m_Stats; - std::vector<ZenEndpoint> m_Endpoints; - std::chrono::milliseconds m_ConnectTimeout; - std::chrono::milliseconds m_Timeout; - RwLock m_ClientLock; - RefPtr<ZenStructuredCacheClient> m_Client; + LoggerRef m_Log; + UpstreamEndpointInfo m_Info; + UpstreamStatus m_Status; + UpstreamEndpointStats m_Stats; + std::vector<ZenEndpoint> m_Endpoints; + std::chrono::milliseconds m_ConnectTimeout; + std::chrono::milliseconds m_Timeout; + RwLock m_ClientLock; + Ref<ZenStructuredCacheClient> m_Client; }; } // namespace detail diff --git a/src/zenserver/storage/zenstorageserver.cpp b/src/zenserver/storage/zenstorageserver.cpp index 6b1da5f12..44291395a 100644 --- a/src/zenserver/storage/zenstorageserver.cpp +++ b/src/zenserver/storage/zenstorageserver.cpp @@ -37,8 +37,6 @@ #include <zenutil/sessionsclient.h> #include <zenutil/workerpools.h> #include <zenutil/zenserverprocess.h> -#include "sessions/inprocsessionlogsink.h" -#include "sessions/sessions.h" #if ZEN_PLATFORM_WINDOWS # include <zencore/windows.h> @@ -165,11 +163,6 @@ ZenStorageServer::RegisterServices() m_Http->RegisterService(*m_HttpWorkspacesService); } - if (m_HttpSessionsService) - { - m_Http->RegisterService(*m_HttpSessionsService); - } - m_FrontendService = std::make_unique<HttpFrontendService>(m_ContentRoot, m_StatsService, m_StatusService); if (m_FrontendService) @@ -229,7 +222,7 @@ ZenStorageServer::InitializeServices(const ZenStorageServerConfig& ServerOptions m_ProjectStore = new ProjectStore(*m_CidStore, m_DataRoot / "projects", m_GcManager, ProjectStore::Configuration{}); m_HttpProjectService.reset(new HttpProjectService{*m_CidStore, - m_ProjectStore, + m_ProjectStore.Get(), m_StatusService, m_StatsService, *m_AuthMgr, @@ -253,16 +246,6 @@ ZenStorageServer::InitializeServices(const ZenStorageServerConfig& ServerOptions *m_Workspaces)); } - { - m_SessionsService = std::make_unique<SessionsService>(); - m_HttpSessionsService = std::make_unique<HttpSessionsService>(m_StatusService, m_StatsService, *m_SessionsService, m_IoContext); - m_HttpSessionsService->SetSelfSessionId(GetSessionId()); - - m_InProcSessionLogSink = logging::SinkPtr(new InProcSessionLogSink(*m_SessionsService)); - m_InProcSessionLogSink->SetLevel(logging::Info); - GetDefaultBroadcastSink()->AddSink(m_InProcSessionLogSink); - } - if (!ServerOptions.SessionsTargetUrl.empty()) { m_SessionsClient = std::make_unique<SessionsServiceClient>(SessionsServiceClient::Options{ @@ -283,7 +266,31 @@ ZenStorageServer::InitializeServices(const ZenStorageServerConfig& ServerOptions BuildStoreConfig BuildsCfg; BuildsCfg.RootDirectory = m_DataRoot / "builds"; BuildsCfg.MaxDiskSpaceLimit = ServerOptions.BuildStoreConfig.MaxDiskSpaceLimit; - m_BuildStore = std::make_unique<BuildStore>(std::move(BuildsCfg), m_GcManager, *m_BuildCidStore); + + if (ServerOptions.BuildStoreConfig.MaxDiskSpaceLimitPercent > 0) + { + DiskSpace Space; + if (DiskSpaceInfo(m_DataRoot, Space) && Space.Total > 0) + { + uint64_t PercentLimit = Space.Total * ServerOptions.BuildStoreConfig.MaxDiskSpaceLimitPercent / 100; + BuildsCfg.MaxDiskSpaceLimit = ServerOptions.BuildStoreConfig.MaxDiskSpaceLimit > 0 + ? std::min(ServerOptions.BuildStoreConfig.MaxDiskSpaceLimit, PercentLimit) + : PercentLimit; + ZEN_INFO("buildstore disk limit: {}% of {} = {} (effective limit: {})", + ServerOptions.BuildStoreConfig.MaxDiskSpaceLimitPercent, + NiceBytes(Space.Total), + NiceBytes(PercentLimit), + NiceBytes(BuildsCfg.MaxDiskSpaceLimit)); + } + else + { + ZEN_WARN("buildstore-disksizelimit-percent: failed to query disk space for {}, using absolute limit {}", + m_DataRoot.string(), + NiceBytes(BuildsCfg.MaxDiskSpaceLimit)); + } + } + + m_BuildStore = std::make_unique<BuildStore>(std::move(BuildsCfg), m_GcManager, *m_BuildCidStore); } if (ServerOptions.StructuredCacheConfig.Enabled) @@ -325,13 +332,13 @@ ZenStorageServer::InitializeServices(const ZenStorageServerConfig& ServerOptions ZEN_OTEL_SPAN("InitializeComputeService"); m_HttpComputeService = - std::make_unique<compute::HttpComputeService>(*m_CidStore, m_StatsService, ServerOptions.DataDir / "functions"); + std::make_unique<compute::HttpComputeService>(*m_CidStore, *m_CidStore, m_StatsService, ServerOptions.DataDir / "functions"); } #endif #if ZEN_WITH_VFS m_VfsServiceImpl = std::make_unique<VfsServiceImpl>(); - m_VfsServiceImpl->AddService(Ref<ProjectStore>(m_ProjectStore)); + m_VfsServiceImpl->AddService(Ref<ProjectStore>(m_ProjectStore.Get())); m_VfsServiceImpl->AddService(Ref<ZenCacheStore>(m_CacheStore)); m_VfsService = std::make_unique<VfsService>(m_StatusService, m_VfsServiceImpl.get()); @@ -841,11 +848,11 @@ ZenStorageServer::Run() OnReady(); - m_SessionsService->RegisterSession(GetSessionId(), "zenserver", GetServerMode(), Oid::Zero, {}); + StartSelfSession("zenserver"); if (m_SessionsClient) { - (void)m_SessionsClient->Announce(); + m_SessionsClient->Announce(); EnqueueSessionAnnounceTimer(); m_SessionLogSink = m_SessionsClient->CreateLogSink(); @@ -891,11 +898,6 @@ ZenStorageServer::Cleanup() m_Http->Close(); } - if (m_InProcSessionLogSink) - { - GetDefaultBroadcastSink()->RemoveSink(m_InProcSessionLogSink); - m_InProcSessionLogSink = {}; - } if (m_SessionLogSink) { GetDefaultBroadcastSink()->RemoveSink(m_SessionLogSink); @@ -903,15 +905,9 @@ ZenStorageServer::Cleanup() } if (m_SessionsClient) { - (void)m_SessionsClient->Remove(); m_SessionsClient.reset(); } - if (m_SessionsService) - { - m_SessionsService->RemoveSession(GetSessionId()); - } - ShutdownServices(); if (m_JobQueue) @@ -943,8 +939,6 @@ ZenStorageServer::Cleanup() m_UpstreamCache.reset(); m_CacheStore = {}; - m_HttpSessionsService.reset(); - m_SessionsService.reset(); m_HttpWorkspacesService.reset(); m_Workspaces.reset(); m_HttpProjectService.reset(); @@ -1004,7 +998,7 @@ ZenStorageServer::EnqueueSessionAnnounceTimer() m_SessionAnnounceTimer.async_wait([this](const asio::error_code& Ec) { if (!Ec && m_SessionsClient) { - (void)m_SessionsClient->Announce(); + m_SessionsClient->Announce(); EnqueueSessionAnnounceTimer(); } }); diff --git a/src/zenserver/storage/zenstorageserver.h b/src/zenserver/storage/zenstorageserver.h index e3c6248e6..9fa46ba9b 100644 --- a/src/zenserver/storage/zenstorageserver.h +++ b/src/zenserver/storage/zenstorageserver.h @@ -20,7 +20,6 @@ #include "frontend/frontend.h" #include "objectstore/objectstore.h" #include "projectstore/httpprojectstore.h" -#include "sessions/httpsessions.h" #include "stats/statsreporter.h" #include "upstream/upstream.h" #include "vfs/vfsservice.h" @@ -81,13 +80,11 @@ private: HttpTestingService m_TestingService; #endif - RefPtr<ProjectStore> m_ProjectStore; + Ref<ProjectStore> m_ProjectStore; std::unique_ptr<VfsServiceImpl> m_VfsServiceImpl; std::unique_ptr<HttpProjectService> m_HttpProjectService; std::unique_ptr<Workspaces> m_Workspaces; std::unique_ptr<HttpWorkspacesService> m_HttpWorkspacesService; - std::unique_ptr<SessionsService> m_SessionsService; - std::unique_ptr<HttpSessionsService> m_HttpSessionsService; std::unique_ptr<UpstreamCache> m_UpstreamCache; std::unique_ptr<HttpUpstreamService> m_UpstreamService; std::unique_ptr<HttpStructuredCacheService> m_StructuredCacheService; @@ -100,7 +97,6 @@ private: std::unique_ptr<SessionsServiceClient> m_SessionsClient; logging::SinkPtr m_SessionLogSink; - logging::SinkPtr m_InProcSessionLogSink; asio::steady_timer m_SessionAnnounceTimer{m_IoContext}; void EnqueueSessionAnnounceTimer(); diff --git a/src/zenserver/xmake.lua b/src/zenserver/xmake.lua index c2c81e7aa..e93df413f 100644 --- a/src/zenserver/xmake.lua +++ b/src/zenserver/xmake.lua @@ -32,9 +32,8 @@ target("zenserver") add_deps("protozero", "asio", "cxxopts") add_deps("sol2") - add_packages("http_parser") + add_packages("llhttp") add_packages("json11") - add_packages("zlib") add_packages("lua") add_packages("consul") add_packages("minio") @@ -269,4 +268,5 @@ target("zenserver") end copy_if_newer(path.join(installdir, "bin", nomad_bin), path.join(target:targetdir(), nomad_bin), nomad_bin) end + end) diff --git a/src/zenserver/zenserver.cpp b/src/zenserver/zenserver.cpp index 087b40d6a..6bf22eef8 100644 --- a/src/zenserver/zenserver.cpp +++ b/src/zenserver/zenserver.cpp @@ -13,6 +13,7 @@ #include <zencore/iobuffer.h> #include <zencore/jobqueue.h> #include <zencore/logging.h> +#include <zencore/logging/broadcastsink.h> #include <zencore/memory/fmalloc.h> #include <zencore/scopeguard.h> #include <zencore/sentryintegration.h> @@ -28,6 +29,9 @@ #include <zenhttp/security/passwordsecurityfilter.h> #include <zentelemetry/otlptrace.h> #include <zenutil/authutils.h> +#include <zenutil/config/commandlineoptions.h> +#include <zenutil/invocationhistory.h> +#include <zenutil/logging.h> #include <zenutil/service.h> #include <zenutil/workerpools.h> #include <zenutil/zenserverprocess.h> @@ -64,6 +68,9 @@ ZEN_THIRD_PARTY_INCLUDES_END #include "config/config.h" #include "diag/logging.h" +#include "sessions/httpsessions.h" +#include "sessions/inprocsessionlogsink.h" +#include "sessions/sessions.h" #include <zencore/memory/llm.h> @@ -225,6 +232,8 @@ ZenServerBase::Initialize(const ZenServerConfig& ServerOptions, ZenServerState:: LogSettingsSummary(ServerOptions); + InitializeSessions(); + return EffectiveBasePort; } @@ -233,6 +242,11 @@ ZenServerBase::Finalize() { m_StatsService.RegisterHandler("http", *m_Http); + if (m_HttpSessionsService) + { + m_Http->RegisterService(*m_HttpSessionsService); + } + m_Http->SetDefaultRedirect("/dashboard/"); // Register health service last so if we return "OK" for health it means all services have been properly initialized @@ -243,11 +257,49 @@ ZenServerBase::Finalize() void ZenServerBase::ShutdownServices() { - m_StatsService.UnregisterHandler("http", *m_Http); + if (m_InProcSessionLogSink) + { + GetDefaultBroadcastSink()->RemoveSink(m_InProcSessionLogSink); + m_InProcSessionLogSink = {}; + } + + if (m_SessionsService) + { + m_SessionsService->RemoveSession(GetSessionId()); + } + + m_HttpSessionsService.reset(); + m_SessionsService.reset(); + + if (m_Http) + { + m_StatsService.UnregisterHandler("http", *m_Http); + } m_StatsService.Shutdown(); } void +ZenServerBase::InitializeSessions() +{ + m_SessionsService = std::make_unique<SessionsService>(); + m_HttpSessionsService = std::make_unique<HttpSessionsService>(m_StatusService, m_StatsService, *m_SessionsService, m_IoContext); + m_HttpSessionsService->SetSelfSessionId(GetSessionId()); + + m_InProcSessionLogSink = logging::SinkPtr(new InProcSessionLogSink(*m_SessionsService)); + m_InProcSessionLogSink->SetLevel(logging::Info); + GetDefaultBroadcastSink()->AddSink(m_InProcSessionLogSink); +} + +void +ZenServerBase::StartSelfSession(std::string_view AppName) +{ + if (m_SessionsService) + { + m_SessionsService->RegisterSession(GetSessionId(), std::string(AppName), GetServerMode(), Oid::Zero, {}); + } +} + +void ZenServerBase::GetBuildOptions(StringBuilderBase& OutOptions, char Separator) const { ZEN_MEMSCOPE(GetZenserverTag()); @@ -614,6 +666,9 @@ ZenServerMain::Run() zen::SetCurrentThreadName("main"); #endif + std::string ScrubbedCmdLine = m_ServerOptions.CommandLine; + ScrubSensitiveValues(ScrubbedCmdLine); + #if ZEN_USE_SENTRY SentryIntegration Sentry; @@ -621,16 +676,25 @@ ZenServerMain::Run() { ZEN_OTEL_SPAN("SentryInit"); - std::string SentryDatabasePath = (m_ServerOptions.DataDir / ".sentry-native").string(); - std::string SentryAttachmentPath = m_ServerOptions.LoggingConfig.AbsLogFile.string(); + std::string SentryDatabasePath = (m_ServerOptions.DataDir / ".sentry-native").string(); + + std::vector<std::filesystem::path> AttachmentPaths; + if (!m_ServerOptions.LoggingConfig.AbsLogFile.empty()) + { + AttachmentPaths.push_back(m_ServerOptions.LoggingConfig.AbsLogFile); + } + if (std::filesystem::path HistoryPath = GetInvocationHistoryPath(); !HistoryPath.empty()) + { + AttachmentPaths.push_back(std::move(HistoryPath)); + } Sentry.Initialize({.DatabasePath = SentryDatabasePath, - .AttachmentsPath = SentryAttachmentPath, + .AttachmentPaths = std::move(AttachmentPaths), .Dsn = m_ServerOptions.SentryConfig.Dsn, .Environment = m_ServerOptions.SentryConfig.Environment, .AllowPII = m_ServerOptions.SentryConfig.AllowPII, .Debug = m_ServerOptions.SentryConfig.Debug}, - m_ServerOptions.CommandLine); + ScrubbedCmdLine); } #endif @@ -699,7 +763,7 @@ ZenServerMain::Run() // The entry's process failed to pick up our sponsor request after // multiple attempts. Before reclaiming the entry, verify that the // PID does not still belong to a zenserver process. If it does, the - // server is alive but unresponsive – fall back to the original error + // server is alive but unresponsive - fall back to the original error // path. If the PID is gone or belongs to a different executable the // entry is genuinely stale and safe to reclaim. const int StalePid = Entry->Pid.load(); @@ -717,7 +781,7 @@ ZenServerMain::Run() } ZEN_CONSOLE_WARN( "Failed to add sponsor to process on port {} (pid {}); " - "pid belongs to '{}' – assuming stale entry and reclaiming", + "pid belongs to '{}' - assuming stale entry and reclaiming", m_ServerOptions.BasePort, StalePid, ExeEc ? "<unknown>" : PidExePath.filename().string()); @@ -762,7 +826,7 @@ ZenServerMain::Run() InitializeLogging(); - ZEN_INFO("Command line: {}", m_ServerOptions.CommandLine); + ZEN_INFO("Command line: {}", ScrubbedCmdLine); #if ZEN_USE_SENTRY Sentry.LogStartupInformation(); diff --git a/src/zenserver/zenserver.h b/src/zenserver/zenserver.h index f5286e9ee..995ff054f 100644 --- a/src/zenserver/zenserver.h +++ b/src/zenserver/zenserver.h @@ -3,6 +3,7 @@ #pragma once #include <zencore/basicfile.h> +#include <zencore/logging/sink.h> #include <zencore/system.h> #include <zenhttp/httpserver.h> #include <zenhttp/httpstats.h> @@ -27,6 +28,8 @@ ZEN_THIRD_PARTY_INCLUDES_END namespace zen { +class HttpSessionsService; +class SessionsService; struct FLLMTag; extern const FLLMTag& GetZenserverTag(); @@ -57,6 +60,7 @@ protected: int Initialize(const ZenServerConfig& ServerOptions, ZenServerState::ZenServerEntry* ServerEntry); void Finalize(); void ShutdownServices(); + void StartSelfSession(std::string_view AppName); void GetBuildOptions(StringBuilderBase& OutOptions, char Separator = ',') const; static std::vector<std::pair<std::string_view, std::string>> BuildSettingsList(const ZenServerConfig& ServerConfig); void LogSettingsSummary(const ZenServerConfig& ServerConfig); @@ -104,6 +108,11 @@ protected: HttpStatusService m_StatusService; SystemMetricsTracker m_MetricsTracker; + // Sessions (shared by all derived servers) + std::unique_ptr<SessionsService> m_SessionsService; + std::unique_ptr<HttpSessionsService> m_HttpSessionsService; + logging::SinkPtr m_InProcSessionLogSink; + // Stats reporting StatsReporter m_StatsReporter; @@ -137,6 +146,7 @@ protected: virtual void HandleStatusRequest(HttpServerRequest& Request) override; private: + void InitializeSessions(); void InitializeSecuritySettings(const ZenServerConfig& ServerOptions); }; class ZenServerMain |