From 3c89c486338890ce39ddebe5be4722a09e85701a Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Tue, 24 Feb 2026 13:23:52 +0100 Subject: Fix correctness and concurrency bugs found during code review zenstore fixes: - cas.cpp: GetFileCasResults Results param passed by value instead of reference (large chunk results were silently lost) - structuredcachestore.cpp: MissCount unconditionally incremented (counted hits as misses) - cacherpc.cpp: Wrong boolean in Incomplete response array (all entries marked incomplete) - cachedisklayer.cpp: sizeof(sizeof(...)) in two validation checks computed sizeof(size_t) instead of struct size - buildstore.cpp: Wrong hash tracked in GC key list (BlobHash pushed twice instead of MetadataHash) - buildstore.cpp: Removed duplicate m_LastAccessTimeUpdateCount increment in PutBlob zenserver fixes: - httpbuildstore.cpp: Reversed subtraction in HTTP range calculation (unsigned underflow) - hubservice.cpp: Deadlock in Provision() calling Wake() while holding m_Lock (extracted WakeLocked helper) - zipfs.cpp: Data race in GetFile() lazy initialization (added RwLock with shared/exclusive paths) Co-Authored-By: Claude Opus 4.6 --- src/zenstore/cache/cachedisklayer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/zenstore/cache/cachedisklayer.cpp') diff --git a/src/zenstore/cache/cachedisklayer.cpp b/src/zenstore/cache/cachedisklayer.cpp index ead7e4f3a..b73b3e6fb 100644 --- a/src/zenstore/cache/cachedisklayer.cpp +++ b/src/zenstore/cache/cachedisklayer.cpp @@ -626,7 +626,7 @@ BucketManifestSerializer::ReadSidecarFile(RwLock::ExclusiveLockScope& B return false; } - const uint64_t ExpectedEntryCount = (FileSize - sizeof(sizeof(BucketMetaHeader))) / sizeof(ManifestData); + const uint64_t ExpectedEntryCount = (FileSize - sizeof(BucketMetaHeader)) / sizeof(ManifestData); if (Header.EntryCount > ExpectedEntryCount) { ZEN_WARN( @@ -1057,7 +1057,7 @@ ZenCacheDiskLayer::CacheBucket::ReadIndexFile(RwLock::ExclusiveLockScope&, const return 0; } - const uint64_t ExpectedEntryCount = (FileSize - sizeof(sizeof(cache::impl::CacheBucketIndexHeader))) / sizeof(DiskIndexEntry); + const uint64_t ExpectedEntryCount = (FileSize - sizeof(cache::impl::CacheBucketIndexHeader)) / sizeof(DiskIndexEntry); if (Header.EntryCount > ExpectedEntryCount) { return 0; -- cgit v1.2.3 From 075bac3ca870a1297e9f62230d56e63aec13a77d Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Tue, 24 Feb 2026 13:36:44 +0100 Subject: Revert "Fix correctness and concurrency bugs found during code review" This reverts commit 3c89c486338890ce39ddebe5be4722a09e85701a. --- src/zenstore/cache/cachedisklayer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/zenstore/cache/cachedisklayer.cpp') diff --git a/src/zenstore/cache/cachedisklayer.cpp b/src/zenstore/cache/cachedisklayer.cpp index b73b3e6fb..ead7e4f3a 100644 --- a/src/zenstore/cache/cachedisklayer.cpp +++ b/src/zenstore/cache/cachedisklayer.cpp @@ -626,7 +626,7 @@ BucketManifestSerializer::ReadSidecarFile(RwLock::ExclusiveLockScope& B return false; } - const uint64_t ExpectedEntryCount = (FileSize - sizeof(BucketMetaHeader)) / sizeof(ManifestData); + const uint64_t ExpectedEntryCount = (FileSize - sizeof(sizeof(BucketMetaHeader))) / sizeof(ManifestData); if (Header.EntryCount > ExpectedEntryCount) { ZEN_WARN( @@ -1057,7 +1057,7 @@ ZenCacheDiskLayer::CacheBucket::ReadIndexFile(RwLock::ExclusiveLockScope&, const return 0; } - const uint64_t ExpectedEntryCount = (FileSize - sizeof(cache::impl::CacheBucketIndexHeader)) / sizeof(DiskIndexEntry); + const uint64_t ExpectedEntryCount = (FileSize - sizeof(sizeof(cache::impl::CacheBucketIndexHeader))) / sizeof(DiskIndexEntry); if (Header.EntryCount > ExpectedEntryCount) { return 0; -- cgit v1.2.3 From 5c5e12d1f02bb7cc1f42750e47a2735dc933c194 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Tue, 24 Feb 2026 14:56:57 +0100 Subject: Various bug fixes (#778) zencore fixes: - filesystem.cpp: ReadFile error reporting logic - compactbinaryvalue.h: CbValue::As*String error reporting logic zenhttp fixes: - httpasio BindAcceptor would `return 0;` in a function returning `std::string` (UB) - httpsys async workpool initialization race zenstore fixes: - cas.cpp: GetFileCasResults Results param passed by value instead of reference (large chunk results were silently lost) - structuredcachestore.cpp: MissCount unconditionally incremented (counted hits as misses) - cacherpc.cpp: Wrong boolean in Incomplete response array (all entries marked incomplete) - cachedisklayer.cpp: sizeof(sizeof(...)) in two validation checks computed sizeof(size_t) instead of struct size - buildstore.cpp: Wrong hash tracked in GC key list (BlobHash pushed twice instead of MetadataHash) - buildstore.cpp: Removed duplicate m_LastAccessTimeUpdateCount increment in PutBlob zenserver fixes: - httpbuildstore.cpp: Reversed subtraction in HTTP range calculation (unsigned underflow) - hubservice.cpp: Deadlock in Provision() calling Wake() while holding m_Lock (extracted WakeLocked helper) - zipfs.cpp: Data race in GetFile() lazy initialization (added RwLock with shared/exclusive paths) --- src/zenstore/cache/cachedisklayer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/zenstore/cache/cachedisklayer.cpp') diff --git a/src/zenstore/cache/cachedisklayer.cpp b/src/zenstore/cache/cachedisklayer.cpp index ead7e4f3a..b73b3e6fb 100644 --- a/src/zenstore/cache/cachedisklayer.cpp +++ b/src/zenstore/cache/cachedisklayer.cpp @@ -626,7 +626,7 @@ BucketManifestSerializer::ReadSidecarFile(RwLock::ExclusiveLockScope& B return false; } - const uint64_t ExpectedEntryCount = (FileSize - sizeof(sizeof(BucketMetaHeader))) / sizeof(ManifestData); + const uint64_t ExpectedEntryCount = (FileSize - sizeof(BucketMetaHeader)) / sizeof(ManifestData); if (Header.EntryCount > ExpectedEntryCount) { ZEN_WARN( @@ -1057,7 +1057,7 @@ ZenCacheDiskLayer::CacheBucket::ReadIndexFile(RwLock::ExclusiveLockScope&, const return 0; } - const uint64_t ExpectedEntryCount = (FileSize - sizeof(sizeof(cache::impl::CacheBucketIndexHeader))) / sizeof(DiskIndexEntry); + const uint64_t ExpectedEntryCount = (FileSize - sizeof(cache::impl::CacheBucketIndexHeader)) / sizeof(DiskIndexEntry); if (Header.EntryCount > ExpectedEntryCount) { return 0; -- cgit v1.2.3 From 5115b419cefd41e8d5cc465c8c7ae5140cde71d4 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Fri, 6 Mar 2026 12:39:06 +0100 Subject: zenstore bug-fixes from static analysis pass (#815) **Bug fixes across zenstore, zenremotestore, and related subsystems, primarily surfaced by static analysis.** ## Cache subsystem (cachedisklayer.cpp) - Fixed tombstone scoping bug: tombstone flag and missing entry were recorded outside the block where data was removed, causing non-missing entries to be incorrectly tombstoned - Fixed use-after-overwrite: `RemoveMemCachedData`/`RemoveMetaData` were called after `Payload` was overwritten on cache put, leaking stale data - Fixed incorrect retry sleep formula (`100 - (3 - RetriesLeft) * 100` always produced the same or negative value; corrected to `(3 - RetriesLeft) * 100`) - Fixed broken `break` missing from sidecar file read loop, causing reads past valid data - Fixed missing format argument in three `ZEN_WARN`/`ZEN_ERROR` log calls (format string had `{}` placeholders with no corresponding argument, or vice versa) - Fixed elapsed timer being accumulated inside the wrong scope in `HandleRpcGetCacheRecords` - Fixed test asserting against unserialized `RecordPolicy` instead of the deserialized `Loaded` copy - Initialized `AbortFlag`/`PauseFlag` atomics at declaration (UB if read before first write) ## Build store (buildstore.cpp / buildstore.h) - Fixed wrong variable used in warning log: used loop index `ResultIndex` instead of `Index`/`MetaLocationResultIndexes[Index]`, logging wrong hash values - Fixed `sizeof(AccessTimesHeader)` used instead of `sizeof(AccessTimeRecord)` when advancing write offset, corrupting the access times file if the sizes differ - Initialized `m_LastAccessTimeUpdateCount` atomic member (was uninitialized) - Changed map iteration loops to use `const auto&` to avoid unnecessary copies ## Project store (projectstore.cpp / projectstore.h) - Fixed wrong iterator dereferenced in `IterateChunks`: used `ChunkIt->second` (from a different map lookup) instead of `MetaIt->second` - Fixed wrong assert variable: `Sizes[Index]` should be `RawSizes[Index]` - Fixed `MakeTombstone`/`IsTombstone` inconsistency: `MakeTombstone` was zeroing `OpLsn` but `IsTombstone` checks `OpLsn.Number != 0`; tombstone creation now preserves `OpLsn` - Fixed uninitialized `InvalidEntries` counter - Fixed format string mismatch in warning log - Initialized `AbortFlag`/`PauseFlag` atomics; changed map iteration to `const auto&` ## Workspaces (workspaces.cpp) - Fixed missing alias registration when a workspace share is updated: alias was deleted but never re-inserted - Fixed integer overflow in range clamping: `(RequestedOffset + RequestedSize) > Size` could wrap; corrected to `RequestedSize > Size - RequestedOffset` - Changed map iteration loops to `const auto&` ## CAS subsystem (cas.cpp, caslog.cpp, compactcas.cpp, filecas.cpp) - Fixed `IterateChunks` passing original `Payload` buffer instead of the modified `Chunk` buffer (content type was set on the copy but the original was sent to the callback) - Fixed invalid `std::future::get()` call on default-constructed futures - Fixed sign-comparison in `CasLogFile::Replay` loop (`int i` vs `size_t`) - Changed `CasLogFile::IsValid` and `Open` to take `const std::filesystem::path&` instead of by value - Fixed format string in `~CasContainerStrategy` error log ## Remote store (zenremotestore) - Fixed `FolderContent::operator==` always returning true: loop variable `PathCount` was initialized to 0 instead of `Paths.size()` - Fixed `GetChunkIndexForRawHash` looking up from wrong map (`RawHashToSequenceIndex` instead of `ChunkHashToChunkIndex`) - Fixed double-counted `UniqueSequencesFound` stat (incremented in both branches of an if/else) - Fixed `RawSize` sentinel value truncation: `(uint32_t)-1` assigned to a `uint64_t` field; corrected to `(uint64_t)-1` - Initialized uninitialized atomic and struct members across `buildstorageoperations.h`, `chunkblock.h`, and `remoteprojectstore.h` --- src/zenstore/cache/cachedisklayer.cpp | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'src/zenstore/cache/cachedisklayer.cpp') diff --git a/src/zenstore/cache/cachedisklayer.cpp b/src/zenstore/cache/cachedisklayer.cpp index b73b3e6fb..d53f9f369 100644 --- a/src/zenstore/cache/cachedisklayer.cpp +++ b/src/zenstore/cache/cachedisklayer.cpp @@ -602,7 +602,7 @@ BucketManifestSerializer::ReadSidecarFile(RwLock::ExclusiveLockScope& B if (FileSize < sizeof(BucketMetaHeader)) { - ZEN_WARN("Failed to read sidecar file '{}'. Minimum size {} expected, actual size: ", + ZEN_WARN("Failed to read sidecar file '{}'. Minimum size {} expected, actual size: {}", SidecarPath, sizeof(BucketMetaHeader), FileSize); @@ -654,6 +654,7 @@ BucketManifestSerializer::ReadSidecarFile(RwLock::ExclusiveLockScope& B SidecarPath, sizeof(ManifestData), CurrentReadOffset); + break; } CurrentReadOffset += sizeof(ManifestData); @@ -1011,7 +1012,7 @@ ZenCacheDiskLayer::CacheBucket::WriteIndexSnapshotLocked(uint64_t LogPosi { // This is non-critical, it only means that we will replay the events of the log over the snapshot - inefficent but in // the end it will be the same result - ZEN_WARN("snapshot failed to clean log file '{}', reason: '{}'", LogPath, IndexPath, Ec.message()); + ZEN_WARN("snapshot failed to clean log file '{}', reason: '{}'", LogPath, Ec.message()); } m_SlogFile.Open(LogPath, CasLogFile::Mode::kWrite); } @@ -1267,10 +1268,10 @@ ZenCacheDiskLayer::CacheBucket::InitializeIndexFromDisk(RwLock::ExclusiveLockSco { RemoveMemCachedData(IndexLock, Payload); RemoveMetaData(IndexLock, Payload); + Location.Flags |= DiskLocation::kTombStone; + MissingEntries.push_back(DiskIndexEntry{.Key = It.first, .Location = Location}); } } - Location.Flags |= DiskLocation::kTombStone; - MissingEntries.push_back(DiskIndexEntry{.Key = It.first, .Location = Location}); } ZEN_ASSERT(!MissingEntries.empty()); @@ -2812,7 +2813,7 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c m_BucketDir, Ec.message(), RetriesLeft); - Sleep(100 - (3 - RetriesLeft) * 100); // Total 600 ms + Sleep((3 - RetriesLeft) * 100); // Total 600 ms Ec.clear(); DataFile.MoveTemporaryIntoPlace(FsPath, Ec); RetriesLeft--; @@ -2866,11 +2867,12 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c { EntryIndex = It.value(); ZEN_ASSERT_SLOW(EntryIndex < PayloadIndex(m_AccessTimes.size())); - BucketPayload& Payload = m_Payloads[EntryIndex]; - uint64_t OldSize = Payload.Location.Size(); + BucketPayload& Payload = m_Payloads[EntryIndex]; + uint64_t OldSize = Payload.Location.Size(); + RemoveMemCachedData(IndexLock, Payload); + RemoveMetaData(IndexLock, Payload); Payload = BucketPayload{.Location = Loc}; m_AccessTimes[EntryIndex] = GcClock::TickCount(); - RemoveMemCachedData(IndexLock, Payload); m_StandaloneSize.fetch_sub(OldSize, std::memory_order::relaxed); } if ((Value.RawSize != 0 || Value.RawHash != IoHash::Zero) && Value.RawSize <= std::numeric_limits::max()) @@ -3521,7 +3523,7 @@ ZenCacheDiskLayer::CacheBucket::GetReferences(const LoggerRef& Logger, } else { - ZEN_WARN("Cache record {} payload is malformed. Reason: ", RawHash, ToString(Error)); + ZEN_WARN("Cache record {} payload is malformed. Reason: {}", RawHash, ToString(Error)); } return false; }; @@ -4282,8 +4284,8 @@ ZenCacheDiskLayer::DiscoverBuckets() RwLock SyncLock; WorkerThreadPool& Pool = GetLargeWorkerPool(EWorkloadType::Burst); - std::atomic AbortFlag; - std::atomic PauseFlag; + std::atomic AbortFlag{false}; + std::atomic PauseFlag{false}; ParallelWork Work(AbortFlag, PauseFlag, WorkerThreadPool::EMode::DisableBacklog); try { @@ -4454,8 +4456,8 @@ ZenCacheDiskLayer::Flush() } { WorkerThreadPool& Pool = GetMediumWorkerPool(EWorkloadType::Burst); - std::atomic AbortFlag; - std::atomic PauseFlag; + std::atomic AbortFlag{false}; + std::atomic PauseFlag{false}; ParallelWork Work(AbortFlag, PauseFlag, WorkerThreadPool::EMode::DisableBacklog); try { @@ -4496,8 +4498,8 @@ ZenCacheDiskLayer::Scrub(ScrubContext& Ctx) RwLock::SharedLockScope _(m_Lock); - std::atomic Abort; - std::atomic Pause; + std::atomic Abort{false}; + std::atomic Pause{false}; ParallelWork Work(Abort, Pause, WorkerThreadPool::EMode::DisableBacklog); try -- cgit v1.2.3 From b37b34ea6ad906f54e8104526e77ba66aed997da Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Mon, 9 Mar 2026 17:43:08 +0100 Subject: Dashboard overhaul, compute integration (#814) - **Frontend dashboard overhaul**: Unified compute/main dashboards into a single shared UI. Added new pages for cache, projects, metrics, sessions, info (build/runtime config, system stats). Added live-update via WebSockets with pause control, sortable detail tables, themed styling. Refactored compute/hub/orchestrator pages into modular JS. - **HTTP server fixes and stats**: Fixed http.sys local-only fallback when default port is in use, implemented root endpoint redirect for http.sys, fixed Linux/Mac port reuse. Added /stats endpoint exposing HTTP server metrics (bytes transferred, request rates). Added WebSocket stats tracking. - **OTEL/diagnostics hardening**: Improved OTLP HTTP exporter with better error handling and resilience. Extended diagnostics services configuration. - **Session management**: Added new sessions service with HTTP endpoints for registering, updating, querying, and removing sessions. Includes session log file support. This is still WIP. - **CLI subcommand support**: Added support for commands with subcommands in the zen CLI tool, with improved command dispatch. - **Misc**: Exposed CPU usage/hostname to frontend, fixed JS compact binary float32/float64 decoding, limited projects displayed on front page to 25 sorted by last access, added vscode:// link support. Also contains some fixes from TSAN analysis. --- src/zenstore/cache/cachedisklayer.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/zenstore/cache/cachedisklayer.cpp') diff --git a/src/zenstore/cache/cachedisklayer.cpp b/src/zenstore/cache/cachedisklayer.cpp index d53f9f369..4640309d9 100644 --- a/src/zenstore/cache/cachedisklayer.cpp +++ b/src/zenstore/cache/cachedisklayer.cpp @@ -4561,9 +4561,11 @@ ZenCacheDiskLayer::Stats() const ZenCacheDiskLayer::Info ZenCacheDiskLayer::GetInfo() const { - ZenCacheDiskLayer::Info Info = {.RootDir = m_RootDir, .Config = m_Configuration}; + ZenCacheDiskLayer::Info Info; + Info.RootDir = m_RootDir; { RwLock::SharedLockScope _(m_Lock); + Info.Config = m_Configuration; Info.BucketNames.reserve(m_Buckets.size()); for (auto& Kv : m_Buckets) { -- cgit v1.2.3