From d5e92ce97a55c39158329257dd80dc6a24f393ad Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Fri, 17 Apr 2026 16:36:10 +0200 Subject: zenbase hardening (#971) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A series of correctness and API hygiene fixes to the intrusive refcount primitives in `zenbase`, culminating in the removal of `RefPtr` in favour of a single unified `Ref` smart pointer. The changes are motivated by two pieces of latent UB sitting under every `Ref` / `TRefCounted` in the codebase, plus a handful of API footguns on the smart-pointer side (silent raw-pointer decay, missing converting moves, unconstrained conversions from unrelated types). ## Correctness fixes - **Strict-aliasing UB in atomic helpers** — `AtomicIncrement`/`Decrement`/`Add` took a `volatile uint32_t&` and reinterpret-cast it to `std::atomic*`. The object was never constructed as a `std::atomic`, so the access was type-punning UB. Fixed by changing `m_RefCount` to `std::atomic` directly in `RefCounted`, `TRefCounted` and `IoBufferCore`. The helpers (and `zenbase/atomic.h`) are later removed entirely — the three callers now invoke `fetch_add`/`fetch_sub` directly. - **const_cast of non-mutable member** — `AddRef()` / `Release()` are `const` but mutated `m_RefCount` via `const_cast`. Since `m_RefCount` wasn't `mutable`, writing through the cast was UB for any `const`-qualified holder (e.g. a `static const` refcounted singleton). Fixed by marking `m_RefCount` `mutable` and dropping the `const_cast` in `AddRef`/`Release`. - **Public non-virtual `TRefCounted` destructor** — allowed `delete basePtr;` to slice past the CRTP `DeleteThis()` contract. Moved to `protected`. ## Memory-ordering cleanup - `AddRef` weakened from seq_cst to **relaxed** (a thread can only take a new reference via one it already holds; nothing needs to synchronize). - `Release` weakened from seq_cst to **acq_rel** (sufficient to order prior writes before the destructor, and make the decrement visible to observers). - Diagnostic `RefCount()` / `GetRefCount()` reads made **relaxed** and spelled out as explicit `.load()` — the returned value is stale the moment it's observed, so stronger ordering gives no guarantee. - No-op on x86 (`lock xadd` either way), but removes a full barrier on every `Ref` copy on ARM64 (Apple silicon / Windows-on-ARM). ## `RefPtr` / `Ref` unification Before this branch, `RefPtr` and `Ref` were subtly different in ways that made the safer of the two (`Ref`) harder to use and the looser one (`RefPtr`) dangerous: - `RefPtr::operator T*()` was implicit — `delete refPtr;` compiled silently (double-delete), and the raw pointer could outlive the temporary `RefPtr` it was extracted from. Made `explicit`, then removed entirely once call sites were migrated to `.Get()`. - `RefPtr(T*)` was implicit while `RefPtr(RefPtr&&)` was `explicit` — exactly the opposite of the safety intent. Reversed. - `RefPtr`'s converting move was unconstrained (any `RefPtr` with an implicitly-convertible `U*` satisfied it, including `void*` and multiple-inheritance base offsets). Added a `DerivedFrom` constraint matching `Ref`. - `Ref` was missing a converting move ctor / move-assignment from `Ref` — upcasts of rvalues were going through `AddRef`+`Release` instead of a pointer steal. Added. - `Release()` and the non-move smart-pointer ops were not `noexcept`, despite being so in practice. Marked `noexcept` throughout. After all of the above, the two types were functionally identical. The final commit deletes `RefPtr` and rewrites the ~10 consumer files to use `Ref`. --- src/zencore/jobqueue.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'src/zencore/jobqueue.cpp') diff --git a/src/zencore/jobqueue.cpp b/src/zencore/jobqueue.cpp index 3e58fb97d..a5a82717d 100644 --- a/src/zencore/jobqueue.cpp +++ b/src/zencore/jobqueue.cpp @@ -93,7 +93,7 @@ public: { NewJobId = IdGenerator.fetch_add(1); } - RefPtr NewJob(new Job()); + Ref NewJob(new Job()); NewJob->Queue = this; NewJob->Name = Name; NewJob->Callback = std::move(JobFunc); @@ -124,7 +124,7 @@ public: QueueLock.WithExclusiveLock([&]() { if (auto It = std::find_if(QueuedJobs.begin(), QueuedJobs.end(), - [NewJobId](const RefPtr& Job) { return Job->Id.Id == NewJobId; }); + [NewJobId](const Ref& Job) { return Job->Id.Id == NewJobId; }); It != QueuedJobs.end()) { QueuedJobs.erase(It); @@ -156,7 +156,7 @@ public: Result = true; return; } - if (auto It = std::find_if(QueuedJobs.begin(), QueuedJobs.end(), [&Id](const RefPtr& Job) { return Job->Id.Id == Id.Id; }); + if (auto It = std::find_if(QueuedJobs.begin(), QueuedJobs.end(), [&Id](const Ref& Job) { return Job->Id.Id == Id.Id; }); It != QueuedJobs.end()) { ZEN_DEBUG("Cancelling queued background job {}:'{}'", (*It)->Id.Id, (*It)->Name); @@ -301,7 +301,7 @@ public: AbortedJobs.erase(It); return; } - if (auto It = std::find_if(QueuedJobs.begin(), QueuedJobs.end(), [&Id](const RefPtr& Job) { return Job->Id.Id == Id.Id; }); + if (auto It = std::find_if(QueuedJobs.begin(), QueuedJobs.end(), [&Id](const Ref& Job) { return Job->Id.Id == Id.Id; }); It != QueuedJobs.end()) { Result = Convert(JobStatus::Queued, *(*It)); @@ -340,20 +340,20 @@ public: std::atomic_uint64_t IdGenerator = 1; - std::atomic_bool InitializedFlag = false; - RwLock QueueLock; - std::deque> QueuedJobs; - std::unordered_map> RunningJobs; - std::unordered_map> CompletedJobs; - std::unordered_map> AbortedJobs; + std::atomic_bool InitializedFlag = false; + RwLock QueueLock; + std::deque> QueuedJobs; + std::unordered_map> RunningJobs; + std::unordered_map> CompletedJobs; + std::unordered_map> AbortedJobs; WorkerThreadPool WorkerPool; Latch WorkerCounter; void Worker() { - int CurrentThreadId = GetCurrentThreadId(); - RefPtr CurrentJob; + int CurrentThreadId = GetCurrentThreadId(); + Ref CurrentJob; QueueLock.WithExclusiveLock([&]() { if (!QueuedJobs.empty()) { -- cgit v1.2.3 From 29736c4fed154233a8adbf93b29995c8e0b3ea78 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Mon, 20 Apr 2026 21:22:01 +0200 Subject: Use eastl::deque for queues with many small elements (#991) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Switch several deque-based queues from `std::deque` to `eastl::deque` to reduce per-element heap allocation overhead. MSVC's `std::deque` allocates one node per element for anything larger than ~16 bytes; `eastl::deque` groups 4, 8, or 32 elements per block depending on element size. Converted call sites: - `BlockingQueue` and `WorkerThreadPool` (generic — downstream callers benefit automatically) - Session log entry buffer (~10k-entry ring of large log records — 4 per block vs 1) - Job queue (`Ref` — 32 per block vs 2) - RPC recording request queue (large `QueuedRequest` struct — 4 per block vs 1) - StatsD client message queues (~32-byte buffers — 8 per block vs 1) --- src/zencore/jobqueue.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/zencore/jobqueue.cpp') diff --git a/src/zencore/jobqueue.cpp b/src/zencore/jobqueue.cpp index a5a82717d..40e4e2162 100644 --- a/src/zencore/jobqueue.cpp +++ b/src/zencore/jobqueue.cpp @@ -12,10 +12,9 @@ #endif // ZEN_WITH_TESTS ZEN_THIRD_PARTY_INCLUDES_START +#include #include ZEN_THIRD_PARTY_INCLUDES_END - -#include #include #include @@ -342,7 +341,7 @@ public: std::atomic_bool InitializedFlag = false; RwLock QueueLock; - std::deque> QueuedJobs; + eastl::deque> QueuedJobs; std::unordered_map> RunningJobs; std::unordered_map> CompletedJobs; std::unordered_map> AbortedJobs; -- cgit v1.2.3