From 4d8fae7636ad45900f22253621b9f7d51d0b646e Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 7 Apr 2026 16:53:55 +0200 Subject: incremental dehydrate (#921) - Feature: Incremental CAS-based hydration/dehydration replacing the previous full-copy approach - Feature: S3 hydration backend with multipart upload/download support - Feature: Configurable thread pools for hub instance provisioning and hydration `--hub-instance-provision-threads` defaults to `max(cpu_count / 4, 2)`. Set to 0 for synchronous operation. `--hub-hydration-threads` defaults to `max(cpu_count / 4, 2)`. Set to 0 for synchronous operation. - Improvement: Hub triggers GC on instance before deprovisioning to compact storage before dehydration - Improvement: GC status now reports pending triggers as running - Improvement: S3 client debug logging gated behind verbose mode to reduce log noise at default verbosity - Improvement: Hub dashboard Resources tile now shows total memory - Improvement: `filesystemutils` moved from `zenremotestore` to `zenutil` for broader reuse - Improvement: Hub uses separate provision and hydration worker pools to avoid deadlocks - Improvement: Hibernate/wake/deprovision on non-existent or already-in-target-state modules are idempotent - Improvement: `ScopedTemporaryDirectory` with empty path now creates a temporary directory instead of asserting --- src/zenserver/hub/storageserverinstance.cpp | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'src/zenserver/hub/storageserverinstance.cpp') diff --git a/src/zenserver/hub/storageserverinstance.cpp b/src/zenserver/hub/storageserverinstance.cpp index 0c9354990..6185a7f19 100644 --- a/src/zenserver/hub/storageserverinstance.cpp +++ b/src/zenserver/hub/storageserverinstance.cpp @@ -154,29 +154,43 @@ StorageServerInstance::WakeLocked() void StorageServerInstance::Hydrate() { + std::atomic AbortFlag{false}; + std::atomic PauseFlag{false}; + HydrationConfig Config{.ServerStateDir = m_BaseDir, .TempDir = m_TempDir, .ModuleId = m_ModuleId, .TargetSpecification = m_Config.HydrationTargetSpecification, .Options = m_Config.HydrationOptions}; + if (m_Config.OptionalWorkerPool) + { + Config.Threading.emplace( + HydrationConfig::ThreadingOptions{.WorkerPool = m_Config.OptionalWorkerPool, .AbortFlag = &AbortFlag, .PauseFlag = &PauseFlag}); + } std::unique_ptr Hydrator = CreateHydrator(Config); - - Hydrator->Hydrate(); + m_HydrationState = Hydrator->Hydrate(); } void StorageServerInstance::Dehydrate() { + std::atomic AbortFlag{false}; + std::atomic PauseFlag{false}; + HydrationConfig Config{.ServerStateDir = m_BaseDir, .TempDir = m_TempDir, .ModuleId = m_ModuleId, .TargetSpecification = m_Config.HydrationTargetSpecification, .Options = m_Config.HydrationOptions}; + if (m_Config.OptionalWorkerPool) + { + Config.Threading.emplace( + HydrationConfig::ThreadingOptions{.WorkerPool = m_Config.OptionalWorkerPool, .AbortFlag = &AbortFlag, .PauseFlag = &PauseFlag}); + } std::unique_ptr Hydrator = CreateHydrator(Config); - - Hydrator->Dehydrate(); + Hydrator->Dehydrate(m_HydrationState); } StorageServerInstance::SharedLockedPtr::SharedLockedPtr() : m_Lock(nullptr), m_Instance(nullptr) -- cgit v1.2.3 From 289d66d7b54f0560253a2a4eb27bf697ad62fa83 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Wed, 8 Apr 2026 13:51:46 +0200 Subject: hydration data obliteration (#923) - Feature: Hub obliterate operation deletes all local and backend hydration data for a module - Improvement: Hub dashboard adds obliterate button for individual, bulk, and by-name module deletion --- src/zenserver/hub/storageserverinstance.cpp | 71 ++++++++++++++++++----------- 1 file changed, 45 insertions(+), 26 deletions(-) (limited to 'src/zenserver/hub/storageserverinstance.cpp') diff --git a/src/zenserver/hub/storageserverinstance.cpp b/src/zenserver/hub/storageserverinstance.cpp index 6185a7f19..b31a64e56 100644 --- a/src/zenserver/hub/storageserverinstance.cpp +++ b/src/zenserver/hub/storageserverinstance.cpp @@ -16,8 +16,6 @@ StorageServerInstance::StorageServerInstance(ZenServerEnvironment& RunEnvironmen , m_ModuleId(ModuleId) , m_ServerInstance(RunEnvironment, ZenServerInstance::ServerMode::kStorageServer) { - m_BaseDir = RunEnvironment.CreateChildDir(ModuleId); - m_TempDir = Config.HydrationTempPath / ModuleId; } StorageServerInstance::~StorageServerInstance() @@ -31,7 +29,7 @@ StorageServerInstance::SpawnServerProcess() m_ServerInstance.ResetDeadProcess(); m_ServerInstance.SetServerExecutablePath(GetRunningExecutablePath()); - m_ServerInstance.SetDataDir(m_BaseDir); + m_ServerInstance.SetDataDir(m_Config.StateDir); #if ZEN_PLATFORM_WINDOWS m_ServerInstance.SetJobObject(m_JobObject); #endif @@ -77,7 +75,7 @@ StorageServerInstance::ProvisionLocked() return; } - ZEN_INFO("Provisioning storage server instance for module '{}', at '{}'", m_ModuleId, m_BaseDir); + ZEN_INFO("Provisioning storage server instance for module '{}', at '{}'", m_ModuleId, m_Config.StateDir); try { Hydrate(); @@ -87,7 +85,7 @@ StorageServerInstance::ProvisionLocked() { ZEN_WARN("Failed spawning server instance for module '{}', at '{}' during provisioning. Reason: {}", m_ModuleId, - m_BaseDir, + m_Config.StateDir, Ex.what()); throw; } @@ -116,6 +114,22 @@ StorageServerInstance::DeprovisionLocked() } } +void +StorageServerInstance::ObliterateLocked() +{ + if (m_ServerInstance.IsRunning()) + { + // m_ServerInstance.Shutdown() never throws. + m_ServerInstance.Shutdown(); + } + + std::atomic AbortFlag{false}; + std::atomic PauseFlag{false}; + HydrationConfig Config = MakeHydrationConfig(AbortFlag, PauseFlag); + std::unique_ptr Hydrator = CreateHydrator(Config); + Hydrator->Obliterate(); +} + void StorageServerInstance::HibernateLocked() { @@ -146,7 +160,10 @@ StorageServerInstance::WakeLocked() } catch (const std::exception& Ex) { - ZEN_WARN("Failed spawning server instance for module '{}', at '{}' during waking. Reason: {}", m_ModuleId, m_BaseDir, Ex.what()); + ZEN_WARN("Failed spawning server instance for module '{}', at '{}' during waking. Reason: {}", + m_ModuleId, + m_Config.StateDir, + Ex.what()); throw; } } @@ -154,20 +171,9 @@ StorageServerInstance::WakeLocked() void StorageServerInstance::Hydrate() { - std::atomic AbortFlag{false}; - std::atomic PauseFlag{false}; - - HydrationConfig Config{.ServerStateDir = m_BaseDir, - .TempDir = m_TempDir, - .ModuleId = m_ModuleId, - .TargetSpecification = m_Config.HydrationTargetSpecification, - .Options = m_Config.HydrationOptions}; - if (m_Config.OptionalWorkerPool) - { - Config.Threading.emplace( - HydrationConfig::ThreadingOptions{.WorkerPool = m_Config.OptionalWorkerPool, .AbortFlag = &AbortFlag, .PauseFlag = &PauseFlag}); - } - + std::atomic AbortFlag{false}; + std::atomic PauseFlag{false}; + HydrationConfig Config = MakeHydrationConfig(AbortFlag, PauseFlag); std::unique_ptr Hydrator = CreateHydrator(Config); m_HydrationState = Hydrator->Hydrate(); } @@ -175,11 +181,18 @@ StorageServerInstance::Hydrate() void StorageServerInstance::Dehydrate() { - std::atomic AbortFlag{false}; - std::atomic PauseFlag{false}; + std::atomic AbortFlag{false}; + std::atomic PauseFlag{false}; + HydrationConfig Config = MakeHydrationConfig(AbortFlag, PauseFlag); + std::unique_ptr Hydrator = CreateHydrator(Config); + Hydrator->Dehydrate(m_HydrationState); +} - HydrationConfig Config{.ServerStateDir = m_BaseDir, - .TempDir = m_TempDir, +HydrationConfig +StorageServerInstance::MakeHydrationConfig(std::atomic& AbortFlag, std::atomic& PauseFlag) +{ + HydrationConfig Config{.ServerStateDir = m_Config.StateDir, + .TempDir = m_Config.TempDir, .ModuleId = m_ModuleId, .TargetSpecification = m_Config.HydrationTargetSpecification, .Options = m_Config.HydrationOptions}; @@ -189,8 +202,7 @@ StorageServerInstance::Dehydrate() HydrationConfig::ThreadingOptions{.WorkerPool = m_Config.OptionalWorkerPool, .AbortFlag = &AbortFlag, .PauseFlag = &PauseFlag}); } - std::unique_ptr Hydrator = CreateHydrator(Config); - Hydrator->Dehydrate(m_HydrationState); + return Config; } StorageServerInstance::SharedLockedPtr::SharedLockedPtr() : m_Lock(nullptr), m_Instance(nullptr) @@ -358,6 +370,13 @@ StorageServerInstance::ExclusiveLockedPtr::Deprovision() m_Instance->DeprovisionLocked(); } +void +StorageServerInstance::ExclusiveLockedPtr::Obliterate() +{ + ZEN_ASSERT(m_Instance != nullptr); + m_Instance->ObliterateLocked(); +} + void StorageServerInstance::ExclusiveLockedPtr::Hibernate() { -- cgit v1.2.3 From 115c34868116b70874cb6f449c6e3d9e27a93713 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 13 Apr 2026 10:59:54 +0200 Subject: hub instance malloc trace (#946) `--hub-instance-malloc` selects the memory allocator for child instances `--hub-instance-trace` sets trace channels for child instances `--hub-instance-tracehost` sets the trace streaming host for child instances `--hub-instance-tracefile` sets the trace output file for child instances add {moduleid} and {port} placeholder support for tracefile --- src/zenserver/hub/storageserverinstance.cpp | 30 +++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'src/zenserver/hub/storageserverinstance.cpp') diff --git a/src/zenserver/hub/storageserverinstance.cpp b/src/zenserver/hub/storageserverinstance.cpp index b31a64e56..af2c19113 100644 --- a/src/zenserver/hub/storageserverinstance.cpp +++ b/src/zenserver/hub/storageserverinstance.cpp @@ -48,6 +48,36 @@ StorageServerInstance::SpawnServerProcess() { AdditionalOptions << " --config=\"" << MakeSafeAbsolutePath(m_Config.ConfigPath).string() << "\""; } + if (!m_Config.Malloc.empty()) + { + AdditionalOptions << " --malloc=" << m_Config.Malloc; + } + if (!m_Config.Trace.empty()) + { + AdditionalOptions << " --trace=" << m_Config.Trace; + } + if (!m_Config.TraceHost.empty()) + { + AdditionalOptions << " --tracehost=" << m_Config.TraceHost; + } + if (!m_Config.TraceFile.empty()) + { + constexpr std::string_view ModuleIdPattern = "{moduleid}"; + constexpr std::string_view PortPattern = "{port}"; + + std::string ResolvedTraceFile = m_Config.TraceFile; + for (size_t Pos = ResolvedTraceFile.find(ModuleIdPattern); Pos != std::string::npos; + Pos = ResolvedTraceFile.find(ModuleIdPattern, Pos)) + { + ResolvedTraceFile.replace(Pos, ModuleIdPattern.length(), m_ModuleId); + } + std::string PortStr = fmt::format("{}", m_Config.BasePort); + for (size_t Pos = ResolvedTraceFile.find(PortPattern); Pos != std::string::npos; Pos = ResolvedTraceFile.find(PortPattern, Pos)) + { + ResolvedTraceFile.replace(Pos, PortPattern.length(), PortStr); + } + AdditionalOptions << " --tracefile=\"" << ResolvedTraceFile << "\""; + } m_ServerInstance.SpawnServerAndWaitUntilReady(m_Config.BasePort, AdditionalOptions.ToView()); ZEN_DEBUG("Storage server instance for module '{}' started, listening on port {}", m_ModuleId, m_Config.BasePort); -- cgit v1.2.3 From 6b59d3d37dcc6320929df2f0074f9a1cb506d1fd Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 21 Apr 2026 16:26:57 +0200 Subject: improved s3 hydration (#997) - Improvement: Hub shares a single S3 client and IMDS credential provider across all modules, reducing IMDS load and surviving transient IMDS blips during bulk provisioning - Improvement: Hub validates hydration config at startup; bad `--hub-hydration-target-spec` or `--hub-hydration-target-config` now fails `zen hub` at boot instead of per-module at first hydrate - Improvement: S3 hydration multipart chunk size configurable via `settings.chunk-size` (default 32 MiB) - Improvement: S3 client extracts `` and `` from XML error bodies (previously logged as ``) - Improvement: S3 client fails fast with a "no credentials available" error when AWS credentials are missing, instead of sending an unsigned request that S3 rejects with a generic 400 - Improvement: IMDS credential provider retries transient connection failures (up to 3 attempts with backoff) - Improvement: HTTP clients with `RetryCount > 0` also retry on `CURLE_COULDNT_CONNECT` --- src/zenserver/hub/storageserverinstance.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'src/zenserver/hub/storageserverinstance.cpp') diff --git a/src/zenserver/hub/storageserverinstance.cpp b/src/zenserver/hub/storageserverinstance.cpp index af2c19113..97edc5223 100644 --- a/src/zenserver/hub/storageserverinstance.cpp +++ b/src/zenserver/hub/storageserverinstance.cpp @@ -11,8 +11,12 @@ namespace zen { -StorageServerInstance::StorageServerInstance(ZenServerEnvironment& RunEnvironment, const Configuration& Config, std::string_view ModuleId) -: m_Config(Config) +StorageServerInstance::StorageServerInstance(ZenServerEnvironment& RunEnvironment, + HydrationBase& Hydration, + const Configuration& Config, + std::string_view ModuleId) +: m_Hydration(Hydration) +, m_Config(Config) , m_ModuleId(ModuleId) , m_ServerInstance(RunEnvironment, ZenServerInstance::ServerMode::kStorageServer) { @@ -156,7 +160,7 @@ StorageServerInstance::ObliterateLocked() std::atomic AbortFlag{false}; std::atomic PauseFlag{false}; HydrationConfig Config = MakeHydrationConfig(AbortFlag, PauseFlag); - std::unique_ptr Hydrator = CreateHydrator(Config); + std::unique_ptr Hydrator = m_Hydration.CreateHydrator(Config); Hydrator->Obliterate(); } @@ -204,7 +208,7 @@ StorageServerInstance::Hydrate() std::atomic AbortFlag{false}; std::atomic PauseFlag{false}; HydrationConfig Config = MakeHydrationConfig(AbortFlag, PauseFlag); - std::unique_ptr Hydrator = CreateHydrator(Config); + std::unique_ptr Hydrator = m_Hydration.CreateHydrator(Config); m_HydrationState = Hydrator->Hydrate(); } @@ -214,18 +218,14 @@ StorageServerInstance::Dehydrate() std::atomic AbortFlag{false}; std::atomic PauseFlag{false}; HydrationConfig Config = MakeHydrationConfig(AbortFlag, PauseFlag); - std::unique_ptr Hydrator = CreateHydrator(Config); + std::unique_ptr Hydrator = m_Hydration.CreateHydrator(Config); Hydrator->Dehydrate(m_HydrationState); } HydrationConfig StorageServerInstance::MakeHydrationConfig(std::atomic& AbortFlag, std::atomic& PauseFlag) { - HydrationConfig Config{.ServerStateDir = m_Config.StateDir, - .TempDir = m_Config.TempDir, - .ModuleId = m_ModuleId, - .TargetSpecification = m_Config.HydrationTargetSpecification, - .Options = m_Config.HydrationOptions}; + HydrationConfig Config{.ServerStateDir = m_Config.StateDir, .TempDir = m_Config.TempDir, .ModuleId = m_ModuleId}; if (m_Config.OptionalWorkerPool) { Config.Threading.emplace( -- cgit v1.2.3 From d9f113ec0f1c18bfcef91e8420650d99e6670a43 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Wed, 22 Apr 2026 14:59:28 +0200 Subject: hub execution stats (#1011) - Improvement: Hub hydration and dehydration completion logs now include per-phase wall time, bytes transferred, bits/s throughput, number of unique worker threads used, and the storage source/target URI - Improvement: Hub storage server instance lifecycle logs now report elapsed time for spawn and shutdown - Improvement: Hub deprovisioning now logs GC completion status and elapsed time; a GC that does not complete within the 5s deadline is logged as a warning and shutdown proceeds anyway --- src/zenserver/hub/storageserverinstance.cpp | 45 +++++++++++++++++------------ 1 file changed, 26 insertions(+), 19 deletions(-) (limited to 'src/zenserver/hub/storageserverinstance.cpp') diff --git a/src/zenserver/hub/storageserverinstance.cpp b/src/zenserver/hub/storageserverinstance.cpp index 97edc5223..9d477fb10 100644 --- a/src/zenserver/hub/storageserverinstance.cpp +++ b/src/zenserver/hub/storageserverinstance.cpp @@ -8,6 +8,8 @@ #include #include #include +#include +#include namespace zen { @@ -29,6 +31,8 @@ StorageServerInstance::~StorageServerInstance() void StorageServerInstance::SpawnServerProcess() { + Stopwatch SpawnTimer; + ZEN_ASSERT_FORMAT(!m_ServerInstance.IsRunning(), "Storage server instance for module '{}' is already running", m_ModuleId); m_ServerInstance.ResetDeadProcess(); @@ -84,11 +88,29 @@ StorageServerInstance::SpawnServerProcess() } m_ServerInstance.SpawnServerAndWaitUntilReady(m_Config.BasePort, AdditionalOptions.ToView()); - ZEN_DEBUG("Storage server instance for module '{}' started, listening on port {}", m_ModuleId, m_Config.BasePort); + ZEN_INFO("Storage server instance for module '{}' started, listening on port {}, spawn took {}", + m_ModuleId, + m_Config.BasePort, + NiceLatencyNs(SpawnTimer.GetElapsedTimeUs() * 1000)); m_ServerInstance.EnableShutdownOnDestroy(); } +void +StorageServerInstance::ShutdownServerProcess() +{ + if (!m_ServerInstance.IsRunning()) + { + return; + } + Stopwatch ShutdownTimer; + // m_ServerInstance.Shutdown() never throws. + m_ServerInstance.Shutdown(); + ZEN_INFO("Storage server instance for module '{}' shut down, took {}", + m_ModuleId, + NiceLatencyNs(ShutdownTimer.GetElapsedTimeUs() * 1000)); +} + ProcessMetrics StorageServerInstance::GetProcessMetrics() const { @@ -128,11 +150,7 @@ StorageServerInstance::ProvisionLocked() void StorageServerInstance::DeprovisionLocked() { - if (m_ServerInstance.IsRunning()) - { - // m_ServerInstance.Shutdown() never throws. - m_ServerInstance.Shutdown(); - } + ShutdownServerProcess(); // Crashed or Hibernated: process already dead; skip Shutdown. // Dehydrate preserves instance state for future re-provisioning. Failure means saved state @@ -151,11 +169,7 @@ StorageServerInstance::DeprovisionLocked() void StorageServerInstance::ObliterateLocked() { - if (m_ServerInstance.IsRunning()) - { - // m_ServerInstance.Shutdown() never throws. - m_ServerInstance.Shutdown(); - } + ShutdownServerProcess(); std::atomic AbortFlag{false}; std::atomic PauseFlag{false}; @@ -168,14 +182,7 @@ void StorageServerInstance::HibernateLocked() { // Signal server to shut down, but keep data around for later wake - - if (!m_ServerInstance.IsRunning()) - { - return; - } - - // m_ServerInstance.Shutdown() never throws. - m_ServerInstance.Shutdown(); + ShutdownServerProcess(); } void -- cgit v1.2.3