From 3c89c486338890ce39ddebe5be4722a09e85701a Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Tue, 24 Feb 2026 13:23:52 +0100 Subject: Fix correctness and concurrency bugs found during code review zenstore fixes: - cas.cpp: GetFileCasResults Results param passed by value instead of reference (large chunk results were silently lost) - structuredcachestore.cpp: MissCount unconditionally incremented (counted hits as misses) - cacherpc.cpp: Wrong boolean in Incomplete response array (all entries marked incomplete) - cachedisklayer.cpp: sizeof(sizeof(...)) in two validation checks computed sizeof(size_t) instead of struct size - buildstore.cpp: Wrong hash tracked in GC key list (BlobHash pushed twice instead of MetadataHash) - buildstore.cpp: Removed duplicate m_LastAccessTimeUpdateCount increment in PutBlob zenserver fixes: - httpbuildstore.cpp: Reversed subtraction in HTTP range calculation (unsigned underflow) - hubservice.cpp: Deadlock in Provision() calling Wake() while holding m_Lock (extracted WakeLocked helper) - zipfs.cpp: Data race in GetFile() lazy initialization (added RwLock with shared/exclusive paths) Co-Authored-By: Claude Opus 4.6 --- src/zenserver/hub/hubservice.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'src/zenserver/hub/hubservice.cpp') diff --git a/src/zenserver/hub/hubservice.cpp b/src/zenserver/hub/hubservice.cpp index 4d9da3a57..a00446a75 100644 --- a/src/zenserver/hub/hubservice.cpp +++ b/src/zenserver/hub/hubservice.cpp @@ -151,6 +151,7 @@ struct StorageServerInstance inline uint16_t GetBasePort() const { return m_ServerInstance.GetBasePort(); } private: + void WakeLocked(); RwLock m_Lock; std::string m_ModuleId; std::atomic m_IsProvisioned{false}; @@ -211,7 +212,7 @@ StorageServerInstance::Provision() if (m_IsHibernated) { - Wake(); + WakeLocked(); } else { @@ -294,9 +295,14 @@ StorageServerInstance::Hibernate() void StorageServerInstance::Wake() { - // Start server in-place using existing data - RwLock::ExclusiveLockScope _(m_Lock); + WakeLocked(); +} + +void +StorageServerInstance::WakeLocked() +{ + // Start server in-place using existing data if (!m_IsHibernated) { -- cgit v1.2.3 From 075bac3ca870a1297e9f62230d56e63aec13a77d Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Tue, 24 Feb 2026 13:36:44 +0100 Subject: Revert "Fix correctness and concurrency bugs found during code review" This reverts commit 3c89c486338890ce39ddebe5be4722a09e85701a. --- src/zenserver/hub/hubservice.cpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'src/zenserver/hub/hubservice.cpp') diff --git a/src/zenserver/hub/hubservice.cpp b/src/zenserver/hub/hubservice.cpp index a00446a75..4d9da3a57 100644 --- a/src/zenserver/hub/hubservice.cpp +++ b/src/zenserver/hub/hubservice.cpp @@ -151,7 +151,6 @@ struct StorageServerInstance inline uint16_t GetBasePort() const { return m_ServerInstance.GetBasePort(); } private: - void WakeLocked(); RwLock m_Lock; std::string m_ModuleId; std::atomic m_IsProvisioned{false}; @@ -212,7 +211,7 @@ StorageServerInstance::Provision() if (m_IsHibernated) { - WakeLocked(); + Wake(); } else { @@ -294,16 +293,11 @@ StorageServerInstance::Hibernate() void StorageServerInstance::Wake() -{ - RwLock::ExclusiveLockScope _(m_Lock); - WakeLocked(); -} - -void -StorageServerInstance::WakeLocked() { // Start server in-place using existing data + RwLock::ExclusiveLockScope _(m_Lock); + if (!m_IsHibernated) { ZEN_WARN("Attempted to wake storage server instance for module '{}' which is not hibernated", m_ModuleId); -- cgit v1.2.3 From 5c5e12d1f02bb7cc1f42750e47a2735dc933c194 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Tue, 24 Feb 2026 14:56:57 +0100 Subject: Various bug fixes (#778) zencore fixes: - filesystem.cpp: ReadFile error reporting logic - compactbinaryvalue.h: CbValue::As*String error reporting logic zenhttp fixes: - httpasio BindAcceptor would `return 0;` in a function returning `std::string` (UB) - httpsys async workpool initialization race zenstore fixes: - cas.cpp: GetFileCasResults Results param passed by value instead of reference (large chunk results were silently lost) - structuredcachestore.cpp: MissCount unconditionally incremented (counted hits as misses) - cacherpc.cpp: Wrong boolean in Incomplete response array (all entries marked incomplete) - cachedisklayer.cpp: sizeof(sizeof(...)) in two validation checks computed sizeof(size_t) instead of struct size - buildstore.cpp: Wrong hash tracked in GC key list (BlobHash pushed twice instead of MetadataHash) - buildstore.cpp: Removed duplicate m_LastAccessTimeUpdateCount increment in PutBlob zenserver fixes: - httpbuildstore.cpp: Reversed subtraction in HTTP range calculation (unsigned underflow) - hubservice.cpp: Deadlock in Provision() calling Wake() while holding m_Lock (extracted WakeLocked helper) - zipfs.cpp: Data race in GetFile() lazy initialization (added RwLock with shared/exclusive paths) --- src/zenserver/hub/hubservice.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'src/zenserver/hub/hubservice.cpp') diff --git a/src/zenserver/hub/hubservice.cpp b/src/zenserver/hub/hubservice.cpp index 4d9da3a57..a00446a75 100644 --- a/src/zenserver/hub/hubservice.cpp +++ b/src/zenserver/hub/hubservice.cpp @@ -151,6 +151,7 @@ struct StorageServerInstance inline uint16_t GetBasePort() const { return m_ServerInstance.GetBasePort(); } private: + void WakeLocked(); RwLock m_Lock; std::string m_ModuleId; std::atomic m_IsProvisioned{false}; @@ -211,7 +212,7 @@ StorageServerInstance::Provision() if (m_IsHibernated) { - Wake(); + WakeLocked(); } else { @@ -294,9 +295,14 @@ StorageServerInstance::Hibernate() void StorageServerInstance::Wake() { - // Start server in-place using existing data - RwLock::ExclusiveLockScope _(m_Lock); + WakeLocked(); +} + +void +StorageServerInstance::WakeLocked() +{ + // Start server in-place using existing data if (!m_IsHibernated) { -- cgit v1.2.3 From f796ee9e650d5f73844f862ed51a6de6bb33c219 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Sat, 28 Feb 2026 15:36:50 +0100 Subject: subprocess tracking using Jobs on Windows/hub (#796) This change introduces job object support on Windows to be able to more accurately track and limit resource usage on storage instances created by the hub service. It also ensures that all child instances can be torn down reliably on exit. Also made it so hub tests no longer pop up console windows while running. --- src/zenserver/hub/hubservice.cpp | 49 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 3 deletions(-) (limited to 'src/zenserver/hub/hubservice.cpp') diff --git a/src/zenserver/hub/hubservice.cpp b/src/zenserver/hub/hubservice.cpp index a00446a75..bf0e294c5 100644 --- a/src/zenserver/hub/hubservice.cpp +++ b/src/zenserver/hub/hubservice.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -150,6 +151,10 @@ struct StorageServerInstance inline uint16_t GetBasePort() const { return m_ServerInstance.GetBasePort(); } +#if ZEN_PLATFORM_WINDOWS + void SetJobObject(JobObject* InJobObject) { m_JobObject = InJobObject; } +#endif + private: void WakeLocked(); RwLock m_Lock; @@ -161,6 +166,9 @@ private: std::filesystem::path m_TempDir; std::filesystem::path m_HydrationPath; ResourceMetrics m_ResourceMetrics; +#if ZEN_PLATFORM_WINDOWS + JobObject* m_JobObject = nullptr; +#endif void SpawnServerProcess(); @@ -191,6 +199,9 @@ StorageServerInstance::SpawnServerProcess() m_ServerInstance.SetServerExecutablePath(GetRunningExecutablePath()); m_ServerInstance.SetDataDir(m_BaseDir); +#if ZEN_PLATFORM_WINDOWS + m_ServerInstance.SetJobObject(m_JobObject); +#endif const uint16_t BasePort = m_ServerInstance.SpawnServerAndWaitUntilReady(); ZEN_DEBUG("Storage server instance for module '{}' started, listening on port {}", m_ModuleId, BasePort); @@ -380,6 +391,21 @@ struct HttpHubService::Impl // flexibility, and to allow running multiple hubs on the same host if // necessary. m_RunEnvironment.SetNextPortNumber(21000); + +#if ZEN_PLATFORM_WINDOWS + if (m_UseJobObject) + { + m_JobObject.Initialize(); + if (m_JobObject.IsValid()) + { + ZEN_INFO("Job object initialized for hub service child process management"); + } + else + { + ZEN_WARN("Failed to initialize job object; child processes will not be auto-terminated on hub crash"); + } + } +#endif } void Cleanup() @@ -422,6 +448,12 @@ struct HttpHubService::Impl IsNewInstance = true; auto NewInstance = std::make_unique(m_RunEnvironment, ModuleId, m_FileHydrationPath, m_HydrationTempPath); +#if ZEN_PLATFORM_WINDOWS + if (m_JobObject.IsValid()) + { + NewInstance->SetJobObject(&m_JobObject); + } +#endif Instance = NewInstance.get(); m_Instances.emplace(std::string(ModuleId), std::move(NewInstance)); @@ -579,10 +611,15 @@ struct HttpHubService::Impl inline int GetInstanceLimit() { return m_InstanceLimit; } inline int GetMaxInstanceCount() { return m_MaxInstanceCount; } + bool m_UseJobObject = true; + private: - ZenServerEnvironment m_RunEnvironment; - std::filesystem::path m_FileHydrationPath; - std::filesystem::path m_HydrationTempPath; + ZenServerEnvironment m_RunEnvironment; + std::filesystem::path m_FileHydrationPath; + std::filesystem::path m_HydrationTempPath; +#if ZEN_PLATFORM_WINDOWS + JobObject m_JobObject; +#endif RwLock m_Lock; std::unordered_map> m_Instances; std::unordered_set m_DeprovisioningModules; @@ -817,6 +854,12 @@ HttpHubService::~HttpHubService() { } +void +HttpHubService::SetUseJobObject(bool Enable) +{ + m_Impl->m_UseJobObject = Enable; +} + const char* HttpHubService::BaseUri() const { -- cgit v1.2.3 From 0763d09a81e5a1d3df11763a7ec75e7860c9510a Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Wed, 4 Mar 2026 14:13:46 +0100 Subject: compute orchestration (#763) - Added local process runners for Linux/Wine, Mac with some sandboxing support - Horde & Nomad provisioning for development and testing - Client session queues with lifecycle management (active/draining/cancelled), automatic retry with configurable limits, and manual reschedule API - Improved web UI for orchestrator, compute, and hub dashboards with WebSocket push updates - Some security hardening - Improved scalability and `zen exec` command Still experimental - compute support is disabled by default --- src/zenserver/hub/hubservice.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/zenserver/hub/hubservice.cpp') diff --git a/src/zenserver/hub/hubservice.cpp b/src/zenserver/hub/hubservice.cpp index bf0e294c5..a757cd594 100644 --- a/src/zenserver/hub/hubservice.cpp +++ b/src/zenserver/hub/hubservice.cpp @@ -845,7 +845,7 @@ HttpHubService::HttpHubService(std::filesystem::path HubBaseDir, std::filesystem Obj << "currentInstanceCount" << m_Impl->GetInstanceCount(); Obj << "maxInstanceCount" << m_Impl->GetMaxInstanceCount(); Obj << "instanceLimit" << m_Impl->GetInstanceLimit(); - Req.ServerRequest().WriteResponse(HttpResponseCode::OK); + Req.ServerRequest().WriteResponse(HttpResponseCode::OK, Obj.Save()); }, HttpVerb::kGet); } -- cgit v1.2.3 From 1e731796187ad73b2dee44b48fcecdd487616394 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Fri, 6 Mar 2026 10:11:51 +0100 Subject: Claude config, some bug fixes (#813) * Claude config updates * Bug fixes and hardening across `zencore` and `zenhttp`, identified via static analysis. ### zencore - **`ZEN_ASSERT` macro** -- extended to accept an optional string message literal; added `ZEN_ASSERT_MSG_` helper for message formatting. Callers needing runtime fmt-style formatting should use `ZEN_ASSERT_FORMAT`. - **`MpscQueue`** -- fixed `TypeCompatibleStorage` to use a properly-sized `char Storage[sizeof(T)]` array instead of a single `char`; corrected `Data()` to cast `&Storage` rather than `this`; switched cache-line alignment to a fixed constant to avoid GCC's `-Winterference-size` warning. Enabled previously-disabled tests. - **`StringBuilderImpl`** -- initialized `m_Base`/`m_CurPos`/`m_End` to `nullptr`. Fixed `StringCompare` return type (`bool` -> `int`). Fixed `ParseInt` to reject strings with trailing non-numeric characters. Removed deprecated `` include. - **`NiceNumGeneral`** -- replaced `powl()` with integer `IntPow()` to avoid floating-point precision issues. - **`RwLock::ExclusiveLockScope`** -- added move constructor/assignment; initialized `m_Lock` to `nullptr`. - **`Latch::AddCount`** -- fixed variable type (`std::atomic_ptrdiff_t` -> `std::ptrdiff_t` for the return value of `fetch_add`). - **`thread.cpp`** -- fixed Linux `pthread_setname_np` 16-byte name truncation; added null check before dereferencing in `Event::Close()`; fixed `NamedEvent::Close()` to call `close(Fd)` outside the lock region; added null guard in `NamedMutex` destructor; `Sleep()` now returns early for non-positive durations. - **`MD5Stream`** -- was entirely stubbed out (no-op); now correctly calls `MD5Init`/`MD5Update`/`MD5Final`. Fixed `ToHexString` to use the correct string length. Fixed forward declarations. Fixed tests to compare `compare() == 0`. - **`sentryintegration.cpp`** -- guard against null `filename`/`funcname` in spdlog message handler to prevent a crash in `fmt::format`. - **`jobqueue.cpp`** -- fixed lost job ID when `IdGenerator` wraps around zero; fixed raw `Job*` in `RunningJobs` map (potential use-after-free) to `RefPtr`; fixed range-loop copies; fixed format string typo. - **`trace.cpp`** -- suppress GCC false-positive warnings in third-party `trace.h` include. ### zenhttp - **WebSocket close race** (`wsasio`, `wshttpsys`, `httpwsclient`) -- `m_CloseSent` promoted from `bool` to `std::atomic`; close check changed to `exchange(true)` to eliminate the check-then-set data race. - **`wsframecodec.cpp`** -- reject WebSocket frames with payload > 256 MB to prevent OOM from malformed/malicious frames. - **`oidc.cpp`** -- URL-encode refresh token and client ID in token requests (`FormUrlEncode`); parse `end_session_endpoint` and `device_authorization_endpoint` from OIDC discovery document. - **`httpclientcommon.cpp`** -- propagate error code from `AppendData` when flushing the cache buffer. - **`httpclient.h`** -- initialize all uninitialized members (`ErrorCode`, `UploadedBytes`, `DownloadedBytes`, `ElapsedSeconds`, `MultipartBoundary` fields). - **`httpserver.h`** -- fix `operator=` return type for `HttpRpcHandler` (missing `&`). - **`packageformat.h`** -- fix `~0u` (32-bit truncation) to `~uint64_t(0)` for a `uint64_t` field. - **`httpparser`** -- initialize `m_RequestVerb` in both declaration and `ResetState()`. - **`httpplugin.cpp`** -- initialize `m_BasePort`; fix format string missing quotes around connection name. - **`httptracer.h`** -- move `#pragma once` before includes. - **`websocket.h`** -- initialize `WebSocketMessage::Opcode`. ### zenserver - **`hubservice.cpp`** -- fix two `ZEN_ASSERT` calls that incorrectly used fmt-style format args; converted to `ZEN_ASSERT_FORMAT`. --- src/zenserver/hub/hubservice.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/zenserver/hub/hubservice.cpp') diff --git a/src/zenserver/hub/hubservice.cpp b/src/zenserver/hub/hubservice.cpp index a757cd594..7b999ae20 100644 --- a/src/zenserver/hub/hubservice.cpp +++ b/src/zenserver/hub/hubservice.cpp @@ -4,6 +4,7 @@ #include "hydration.h" +#include #include #include #include @@ -195,7 +196,7 @@ StorageServerInstance::~StorageServerInstance() void StorageServerInstance::SpawnServerProcess() { - ZEN_ASSERT(!m_ServerInstance.IsRunning(), "Storage server instance for module '{}' is already running", m_ModuleId); + ZEN_ASSERT_FORMAT(!m_ServerInstance.IsRunning(), "Storage server instance for module '{}' is already running", m_ModuleId); m_ServerInstance.SetServerExecutablePath(GetRunningExecutablePath()); m_ServerInstance.SetDataDir(m_BaseDir); @@ -322,7 +323,7 @@ StorageServerInstance::WakeLocked() return; } - ZEN_ASSERT(!m_ServerInstance.IsRunning(), "Storage server instance for module '{}' is already running", m_ModuleId); + ZEN_ASSERT_FORMAT(!m_ServerInstance.IsRunning(), "Storage server instance for module '{}' is already running", m_ModuleId); try { -- cgit v1.2.3