From ee26e5af2ced0987fbdf666dc6bce7c2074e925f Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Fri, 20 Feb 2026 09:05:23 +0100 Subject: GC - fix handling of attachment ranges, http access token expiration, lock file retry logic (#766) * GC - fix handling of attachment ranges * fix trace/log strings * fix HTTP access token expiration time logic * added missing lock retry in zenserver startup --- src/zenserver/hub/zenhubserver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/zenserver/hub') diff --git a/src/zenserver/hub/zenhubserver.cpp b/src/zenserver/hub/zenhubserver.cpp index 7a4ba951d..d0a0db417 100644 --- a/src/zenserver/hub/zenhubserver.cpp +++ b/src/zenserver/hub/zenhubserver.cpp @@ -105,7 +105,7 @@ ZenHubServer::Initialize(const ZenHubServerConfig& ServerConfig, ZenServerState: void ZenHubServer::Cleanup() { - ZEN_TRACE_CPU("ZenStorageServer::Cleanup"); + ZEN_TRACE_CPU("ZenHubServer::Cleanup"); ZEN_INFO(ZEN_APP_NAME " cleaning up"); try { -- cgit v1.2.3 From 3c89c486338890ce39ddebe5be4722a09e85701a Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Tue, 24 Feb 2026 13:23:52 +0100 Subject: Fix correctness and concurrency bugs found during code review zenstore fixes: - cas.cpp: GetFileCasResults Results param passed by value instead of reference (large chunk results were silently lost) - structuredcachestore.cpp: MissCount unconditionally incremented (counted hits as misses) - cacherpc.cpp: Wrong boolean in Incomplete response array (all entries marked incomplete) - cachedisklayer.cpp: sizeof(sizeof(...)) in two validation checks computed sizeof(size_t) instead of struct size - buildstore.cpp: Wrong hash tracked in GC key list (BlobHash pushed twice instead of MetadataHash) - buildstore.cpp: Removed duplicate m_LastAccessTimeUpdateCount increment in PutBlob zenserver fixes: - httpbuildstore.cpp: Reversed subtraction in HTTP range calculation (unsigned underflow) - hubservice.cpp: Deadlock in Provision() calling Wake() while holding m_Lock (extracted WakeLocked helper) - zipfs.cpp: Data race in GetFile() lazy initialization (added RwLock with shared/exclusive paths) Co-Authored-By: Claude Opus 4.6 --- src/zenserver/hub/hubservice.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'src/zenserver/hub') diff --git a/src/zenserver/hub/hubservice.cpp b/src/zenserver/hub/hubservice.cpp index 4d9da3a57..a00446a75 100644 --- a/src/zenserver/hub/hubservice.cpp +++ b/src/zenserver/hub/hubservice.cpp @@ -151,6 +151,7 @@ struct StorageServerInstance inline uint16_t GetBasePort() const { return m_ServerInstance.GetBasePort(); } private: + void WakeLocked(); RwLock m_Lock; std::string m_ModuleId; std::atomic m_IsProvisioned{false}; @@ -211,7 +212,7 @@ StorageServerInstance::Provision() if (m_IsHibernated) { - Wake(); + WakeLocked(); } else { @@ -294,9 +295,14 @@ StorageServerInstance::Hibernate() void StorageServerInstance::Wake() { - // Start server in-place using existing data - RwLock::ExclusiveLockScope _(m_Lock); + WakeLocked(); +} + +void +StorageServerInstance::WakeLocked() +{ + // Start server in-place using existing data if (!m_IsHibernated) { -- cgit v1.2.3 From 075bac3ca870a1297e9f62230d56e63aec13a77d Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Tue, 24 Feb 2026 13:36:44 +0100 Subject: Revert "Fix correctness and concurrency bugs found during code review" This reverts commit 3c89c486338890ce39ddebe5be4722a09e85701a. --- src/zenserver/hub/hubservice.cpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'src/zenserver/hub') diff --git a/src/zenserver/hub/hubservice.cpp b/src/zenserver/hub/hubservice.cpp index a00446a75..4d9da3a57 100644 --- a/src/zenserver/hub/hubservice.cpp +++ b/src/zenserver/hub/hubservice.cpp @@ -151,7 +151,6 @@ struct StorageServerInstance inline uint16_t GetBasePort() const { return m_ServerInstance.GetBasePort(); } private: - void WakeLocked(); RwLock m_Lock; std::string m_ModuleId; std::atomic m_IsProvisioned{false}; @@ -212,7 +211,7 @@ StorageServerInstance::Provision() if (m_IsHibernated) { - WakeLocked(); + Wake(); } else { @@ -294,16 +293,11 @@ StorageServerInstance::Hibernate() void StorageServerInstance::Wake() -{ - RwLock::ExclusiveLockScope _(m_Lock); - WakeLocked(); -} - -void -StorageServerInstance::WakeLocked() { // Start server in-place using existing data + RwLock::ExclusiveLockScope _(m_Lock); + if (!m_IsHibernated) { ZEN_WARN("Attempted to wake storage server instance for module '{}' which is not hibernated", m_ModuleId); -- cgit v1.2.3 From 5c5e12d1f02bb7cc1f42750e47a2735dc933c194 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Tue, 24 Feb 2026 14:56:57 +0100 Subject: Various bug fixes (#778) zencore fixes: - filesystem.cpp: ReadFile error reporting logic - compactbinaryvalue.h: CbValue::As*String error reporting logic zenhttp fixes: - httpasio BindAcceptor would `return 0;` in a function returning `std::string` (UB) - httpsys async workpool initialization race zenstore fixes: - cas.cpp: GetFileCasResults Results param passed by value instead of reference (large chunk results were silently lost) - structuredcachestore.cpp: MissCount unconditionally incremented (counted hits as misses) - cacherpc.cpp: Wrong boolean in Incomplete response array (all entries marked incomplete) - cachedisklayer.cpp: sizeof(sizeof(...)) in two validation checks computed sizeof(size_t) instead of struct size - buildstore.cpp: Wrong hash tracked in GC key list (BlobHash pushed twice instead of MetadataHash) - buildstore.cpp: Removed duplicate m_LastAccessTimeUpdateCount increment in PutBlob zenserver fixes: - httpbuildstore.cpp: Reversed subtraction in HTTP range calculation (unsigned underflow) - hubservice.cpp: Deadlock in Provision() calling Wake() while holding m_Lock (extracted WakeLocked helper) - zipfs.cpp: Data race in GetFile() lazy initialization (added RwLock with shared/exclusive paths) --- src/zenserver/hub/hubservice.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'src/zenserver/hub') diff --git a/src/zenserver/hub/hubservice.cpp b/src/zenserver/hub/hubservice.cpp index 4d9da3a57..a00446a75 100644 --- a/src/zenserver/hub/hubservice.cpp +++ b/src/zenserver/hub/hubservice.cpp @@ -151,6 +151,7 @@ struct StorageServerInstance inline uint16_t GetBasePort() const { return m_ServerInstance.GetBasePort(); } private: + void WakeLocked(); RwLock m_Lock; std::string m_ModuleId; std::atomic m_IsProvisioned{false}; @@ -211,7 +212,7 @@ StorageServerInstance::Provision() if (m_IsHibernated) { - Wake(); + WakeLocked(); } else { @@ -294,9 +295,14 @@ StorageServerInstance::Hibernate() void StorageServerInstance::Wake() { - // Start server in-place using existing data - RwLock::ExclusiveLockScope _(m_Lock); + WakeLocked(); +} + +void +StorageServerInstance::WakeLocked() +{ + // Start server in-place using existing data if (!m_IsHibernated) { -- cgit v1.2.3 From f796ee9e650d5f73844f862ed51a6de6bb33c219 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Sat, 28 Feb 2026 15:36:50 +0100 Subject: subprocess tracking using Jobs on Windows/hub (#796) This change introduces job object support on Windows to be able to more accurately track and limit resource usage on storage instances created by the hub service. It also ensures that all child instances can be torn down reliably on exit. Also made it so hub tests no longer pop up console windows while running. --- src/zenserver/hub/hubservice.cpp | 49 +++++++++++++++++++++++++++++++++++++--- src/zenserver/hub/hubservice.h | 7 ++++++ 2 files changed, 53 insertions(+), 3 deletions(-) (limited to 'src/zenserver/hub') diff --git a/src/zenserver/hub/hubservice.cpp b/src/zenserver/hub/hubservice.cpp index a00446a75..bf0e294c5 100644 --- a/src/zenserver/hub/hubservice.cpp +++ b/src/zenserver/hub/hubservice.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -150,6 +151,10 @@ struct StorageServerInstance inline uint16_t GetBasePort() const { return m_ServerInstance.GetBasePort(); } +#if ZEN_PLATFORM_WINDOWS + void SetJobObject(JobObject* InJobObject) { m_JobObject = InJobObject; } +#endif + private: void WakeLocked(); RwLock m_Lock; @@ -161,6 +166,9 @@ private: std::filesystem::path m_TempDir; std::filesystem::path m_HydrationPath; ResourceMetrics m_ResourceMetrics; +#if ZEN_PLATFORM_WINDOWS + JobObject* m_JobObject = nullptr; +#endif void SpawnServerProcess(); @@ -191,6 +199,9 @@ StorageServerInstance::SpawnServerProcess() m_ServerInstance.SetServerExecutablePath(GetRunningExecutablePath()); m_ServerInstance.SetDataDir(m_BaseDir); +#if ZEN_PLATFORM_WINDOWS + m_ServerInstance.SetJobObject(m_JobObject); +#endif const uint16_t BasePort = m_ServerInstance.SpawnServerAndWaitUntilReady(); ZEN_DEBUG("Storage server instance for module '{}' started, listening on port {}", m_ModuleId, BasePort); @@ -380,6 +391,21 @@ struct HttpHubService::Impl // flexibility, and to allow running multiple hubs on the same host if // necessary. m_RunEnvironment.SetNextPortNumber(21000); + +#if ZEN_PLATFORM_WINDOWS + if (m_UseJobObject) + { + m_JobObject.Initialize(); + if (m_JobObject.IsValid()) + { + ZEN_INFO("Job object initialized for hub service child process management"); + } + else + { + ZEN_WARN("Failed to initialize job object; child processes will not be auto-terminated on hub crash"); + } + } +#endif } void Cleanup() @@ -422,6 +448,12 @@ struct HttpHubService::Impl IsNewInstance = true; auto NewInstance = std::make_unique(m_RunEnvironment, ModuleId, m_FileHydrationPath, m_HydrationTempPath); +#if ZEN_PLATFORM_WINDOWS + if (m_JobObject.IsValid()) + { + NewInstance->SetJobObject(&m_JobObject); + } +#endif Instance = NewInstance.get(); m_Instances.emplace(std::string(ModuleId), std::move(NewInstance)); @@ -579,10 +611,15 @@ struct HttpHubService::Impl inline int GetInstanceLimit() { return m_InstanceLimit; } inline int GetMaxInstanceCount() { return m_MaxInstanceCount; } + bool m_UseJobObject = true; + private: - ZenServerEnvironment m_RunEnvironment; - std::filesystem::path m_FileHydrationPath; - std::filesystem::path m_HydrationTempPath; + ZenServerEnvironment m_RunEnvironment; + std::filesystem::path m_FileHydrationPath; + std::filesystem::path m_HydrationTempPath; +#if ZEN_PLATFORM_WINDOWS + JobObject m_JobObject; +#endif RwLock m_Lock; std::unordered_map> m_Instances; std::unordered_set m_DeprovisioningModules; @@ -817,6 +854,12 @@ HttpHubService::~HttpHubService() { } +void +HttpHubService::SetUseJobObject(bool Enable) +{ + m_Impl->m_UseJobObject = Enable; +} + const char* HttpHubService::BaseUri() const { diff --git a/src/zenserver/hub/hubservice.h b/src/zenserver/hub/hubservice.h index 1a5a8c57c..ef24bba69 100644 --- a/src/zenserver/hub/hubservice.h +++ b/src/zenserver/hub/hubservice.h @@ -28,6 +28,13 @@ public: void SetNotificationEndpoint(std::string_view UpstreamNotificationEndpoint, std::string_view InstanceId); + /** Enable or disable the use of a Windows Job Object for child process management. + * When enabled, all spawned child processes are assigned to a job object with + * JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE, ensuring children are terminated if the hub + * crashes or is force-killed. Must be called before Initialize(). No-op on non-Windows. + */ + void SetUseJobObject(bool Enable); + private: HttpRequestRouter m_Router; -- cgit v1.2.3 From 0763d09a81e5a1d3df11763a7ec75e7860c9510a Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Wed, 4 Mar 2026 14:13:46 +0100 Subject: compute orchestration (#763) - Added local process runners for Linux/Wine, Mac with some sandboxing support - Horde & Nomad provisioning for development and testing - Client session queues with lifecycle management (active/draining/cancelled), automatic retry with configurable limits, and manual reschedule API - Improved web UI for orchestrator, compute, and hub dashboards with WebSocket push updates - Some security hardening - Improved scalability and `zen exec` command Still experimental - compute support is disabled by default --- src/zenserver/hub/hubservice.cpp | 2 +- src/zenserver/hub/zenhubserver.cpp | 7 +++++++ src/zenserver/hub/zenhubserver.h | 6 ++++-- 3 files changed, 12 insertions(+), 3 deletions(-) (limited to 'src/zenserver/hub') diff --git a/src/zenserver/hub/hubservice.cpp b/src/zenserver/hub/hubservice.cpp index bf0e294c5..a757cd594 100644 --- a/src/zenserver/hub/hubservice.cpp +++ b/src/zenserver/hub/hubservice.cpp @@ -845,7 +845,7 @@ HttpHubService::HttpHubService(std::filesystem::path HubBaseDir, std::filesystem Obj << "currentInstanceCount" << m_Impl->GetInstanceCount(); Obj << "maxInstanceCount" << m_Impl->GetMaxInstanceCount(); Obj << "instanceLimit" << m_Impl->GetInstanceLimit(); - Req.ServerRequest().WriteResponse(HttpResponseCode::OK); + Req.ServerRequest().WriteResponse(HttpResponseCode::OK, Obj.Save()); }, HttpVerb::kGet); } diff --git a/src/zenserver/hub/zenhubserver.cpp b/src/zenserver/hub/zenhubserver.cpp index d0a0db417..c63c618df 100644 --- a/src/zenserver/hub/zenhubserver.cpp +++ b/src/zenserver/hub/zenhubserver.cpp @@ -143,6 +143,8 @@ ZenHubServer::InitializeServices(const ZenHubServerConfig& ServerConfig) ZEN_INFO("instantiating hub service"); m_HubService = std::make_unique(ServerConfig.DataDir / "hub", ServerConfig.DataDir / "servers"); m_HubService->SetNotificationEndpoint(ServerConfig.UpstreamNotificationEndpoint, ServerConfig.InstanceId); + + m_FrontendService = std::make_unique(m_ContentRoot, m_StatusService); } void @@ -159,6 +161,11 @@ ZenHubServer::RegisterServices(const ZenHubServerConfig& ServerConfig) { m_Http->RegisterService(*m_ApiService); } + + if (m_FrontendService) + { + m_Http->RegisterService(*m_FrontendService); + } } void diff --git a/src/zenserver/hub/zenhubserver.h b/src/zenserver/hub/zenhubserver.h index ac14362f0..4c56fdce5 100644 --- a/src/zenserver/hub/zenhubserver.h +++ b/src/zenserver/hub/zenhubserver.h @@ -2,6 +2,7 @@ #pragma once +#include "frontend/frontend.h" #include "zenserver.h" namespace cxxopts { @@ -81,8 +82,9 @@ private: std::filesystem::path m_ContentRoot; bool m_DebugOptionForcedCrash = false; - std::unique_ptr m_HubService; - std::unique_ptr m_ApiService; + std::unique_ptr m_HubService; + std::unique_ptr m_ApiService; + std::unique_ptr m_FrontendService; void InitializeState(const ZenHubServerConfig& ServerConfig); void InitializeServices(const ZenHubServerConfig& ServerConfig); -- cgit v1.2.3 From 1e731796187ad73b2dee44b48fcecdd487616394 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Fri, 6 Mar 2026 10:11:51 +0100 Subject: Claude config, some bug fixes (#813) * Claude config updates * Bug fixes and hardening across `zencore` and `zenhttp`, identified via static analysis. ### zencore - **`ZEN_ASSERT` macro** -- extended to accept an optional string message literal; added `ZEN_ASSERT_MSG_` helper for message formatting. Callers needing runtime fmt-style formatting should use `ZEN_ASSERT_FORMAT`. - **`MpscQueue`** -- fixed `TypeCompatibleStorage` to use a properly-sized `char Storage[sizeof(T)]` array instead of a single `char`; corrected `Data()` to cast `&Storage` rather than `this`; switched cache-line alignment to a fixed constant to avoid GCC's `-Winterference-size` warning. Enabled previously-disabled tests. - **`StringBuilderImpl`** -- initialized `m_Base`/`m_CurPos`/`m_End` to `nullptr`. Fixed `StringCompare` return type (`bool` -> `int`). Fixed `ParseInt` to reject strings with trailing non-numeric characters. Removed deprecated `` include. - **`NiceNumGeneral`** -- replaced `powl()` with integer `IntPow()` to avoid floating-point precision issues. - **`RwLock::ExclusiveLockScope`** -- added move constructor/assignment; initialized `m_Lock` to `nullptr`. - **`Latch::AddCount`** -- fixed variable type (`std::atomic_ptrdiff_t` -> `std::ptrdiff_t` for the return value of `fetch_add`). - **`thread.cpp`** -- fixed Linux `pthread_setname_np` 16-byte name truncation; added null check before dereferencing in `Event::Close()`; fixed `NamedEvent::Close()` to call `close(Fd)` outside the lock region; added null guard in `NamedMutex` destructor; `Sleep()` now returns early for non-positive durations. - **`MD5Stream`** -- was entirely stubbed out (no-op); now correctly calls `MD5Init`/`MD5Update`/`MD5Final`. Fixed `ToHexString` to use the correct string length. Fixed forward declarations. Fixed tests to compare `compare() == 0`. - **`sentryintegration.cpp`** -- guard against null `filename`/`funcname` in spdlog message handler to prevent a crash in `fmt::format`. - **`jobqueue.cpp`** -- fixed lost job ID when `IdGenerator` wraps around zero; fixed raw `Job*` in `RunningJobs` map (potential use-after-free) to `RefPtr`; fixed range-loop copies; fixed format string typo. - **`trace.cpp`** -- suppress GCC false-positive warnings in third-party `trace.h` include. ### zenhttp - **WebSocket close race** (`wsasio`, `wshttpsys`, `httpwsclient`) -- `m_CloseSent` promoted from `bool` to `std::atomic`; close check changed to `exchange(true)` to eliminate the check-then-set data race. - **`wsframecodec.cpp`** -- reject WebSocket frames with payload > 256 MB to prevent OOM from malformed/malicious frames. - **`oidc.cpp`** -- URL-encode refresh token and client ID in token requests (`FormUrlEncode`); parse `end_session_endpoint` and `device_authorization_endpoint` from OIDC discovery document. - **`httpclientcommon.cpp`** -- propagate error code from `AppendData` when flushing the cache buffer. - **`httpclient.h`** -- initialize all uninitialized members (`ErrorCode`, `UploadedBytes`, `DownloadedBytes`, `ElapsedSeconds`, `MultipartBoundary` fields). - **`httpserver.h`** -- fix `operator=` return type for `HttpRpcHandler` (missing `&`). - **`packageformat.h`** -- fix `~0u` (32-bit truncation) to `~uint64_t(0)` for a `uint64_t` field. - **`httpparser`** -- initialize `m_RequestVerb` in both declaration and `ResetState()`. - **`httpplugin.cpp`** -- initialize `m_BasePort`; fix format string missing quotes around connection name. - **`httptracer.h`** -- move `#pragma once` before includes. - **`websocket.h`** -- initialize `WebSocketMessage::Opcode`. ### zenserver - **`hubservice.cpp`** -- fix two `ZEN_ASSERT` calls that incorrectly used fmt-style format args; converted to `ZEN_ASSERT_FORMAT`. --- src/zenserver/hub/hubservice.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/zenserver/hub') diff --git a/src/zenserver/hub/hubservice.cpp b/src/zenserver/hub/hubservice.cpp index a757cd594..7b999ae20 100644 --- a/src/zenserver/hub/hubservice.cpp +++ b/src/zenserver/hub/hubservice.cpp @@ -4,6 +4,7 @@ #include "hydration.h" +#include #include #include #include @@ -195,7 +196,7 @@ StorageServerInstance::~StorageServerInstance() void StorageServerInstance::SpawnServerProcess() { - ZEN_ASSERT(!m_ServerInstance.IsRunning(), "Storage server instance for module '{}' is already running", m_ModuleId); + ZEN_ASSERT_FORMAT(!m_ServerInstance.IsRunning(), "Storage server instance for module '{}' is already running", m_ModuleId); m_ServerInstance.SetServerExecutablePath(GetRunningExecutablePath()); m_ServerInstance.SetDataDir(m_BaseDir); @@ -322,7 +323,7 @@ StorageServerInstance::WakeLocked() return; } - ZEN_ASSERT(!m_ServerInstance.IsRunning(), "Storage server instance for module '{}' is already running", m_ModuleId); + ZEN_ASSERT_FORMAT(!m_ServerInstance.IsRunning(), "Storage server instance for module '{}' is already running", m_ModuleId); try { -- cgit v1.2.3 From b37b34ea6ad906f54e8104526e77ba66aed997da Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Mon, 9 Mar 2026 17:43:08 +0100 Subject: Dashboard overhaul, compute integration (#814) - **Frontend dashboard overhaul**: Unified compute/main dashboards into a single shared UI. Added new pages for cache, projects, metrics, sessions, info (build/runtime config, system stats). Added live-update via WebSockets with pause control, sortable detail tables, themed styling. Refactored compute/hub/orchestrator pages into modular JS. - **HTTP server fixes and stats**: Fixed http.sys local-only fallback when default port is in use, implemented root endpoint redirect for http.sys, fixed Linux/Mac port reuse. Added /stats endpoint exposing HTTP server metrics (bytes transferred, request rates). Added WebSocket stats tracking. - **OTEL/diagnostics hardening**: Improved OTLP HTTP exporter with better error handling and resilience. Extended diagnostics services configuration. - **Session management**: Added new sessions service with HTTP endpoints for registering, updating, querying, and removing sessions. Includes session log file support. This is still WIP. - **CLI subcommand support**: Added support for commands with subcommands in the zen CLI tool, with improved command dispatch. - **Misc**: Exposed CPU usage/hostname to frontend, fixed JS compact binary float32/float64 decoding, limited projects displayed on front page to 25 sorted by last access, added vscode:// link support. Also contains some fixes from TSAN analysis. --- src/zenserver/hub/zenhubserver.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/zenserver/hub') diff --git a/src/zenserver/hub/zenhubserver.cpp b/src/zenserver/hub/zenhubserver.cpp index c63c618df..c6d2dc8d4 100644 --- a/src/zenserver/hub/zenhubserver.cpp +++ b/src/zenserver/hub/zenhubserver.cpp @@ -115,6 +115,8 @@ ZenHubServer::Cleanup() m_IoRunner.join(); } + ShutdownServices(); + if (m_Http) { m_Http->Close(); -- cgit v1.2.3