From 4984e8cd5c38cf77c8cb978f75f808bce0577f2d Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Thu, 27 Nov 2025 16:05:56 +0100 Subject: automatic scrub on startup (#667) - Improvement: Deeper validation of data when scrub is activated (cas/cache/project) - Improvement: Enabled more multi threading when running scrub operations - Improvement: Added means to force a scrub operation at startup with a new release using ZEN_DATA_FORCE_SCRUB_VERSION variable in xmake.lua --- src/zenstore/include/zenstore/cache/cacheshared.h | 2 +- src/zenstore/include/zenstore/gc.h | 68 +++++++++++------------ src/zenstore/include/zenstore/projectstore.h | 1 + src/zenstore/include/zenstore/scrubcontext.h | 3 + 4 files changed, 39 insertions(+), 35 deletions(-) (limited to 'src/zenstore/include') diff --git a/src/zenstore/include/zenstore/cache/cacheshared.h b/src/zenstore/include/zenstore/cache/cacheshared.h index 8f40ae727..791720589 100644 --- a/src/zenstore/include/zenstore/cache/cacheshared.h +++ b/src/zenstore/include/zenstore/cache/cacheshared.h @@ -78,6 +78,6 @@ enum class PutStatus }; bool IsKnownBadBucketName(std::string_view BucketName); -bool ValidateIoBuffer(ZenContentType ContentType, IoBuffer Buffer); +bool ValidateIoBuffer(ZenContentType ContentType, IoBuffer&& Buffer); } // namespace zen diff --git a/src/zenstore/include/zenstore/gc.h b/src/zenstore/include/zenstore/gc.h index 5150ecd42..734d2e5a7 100644 --- a/src/zenstore/include/zenstore/gc.h +++ b/src/zenstore/include/zenstore/gc.h @@ -488,10 +488,10 @@ public: GcScheduler(GcManager& GcManager); ~GcScheduler(); - void Initialize(const GcSchedulerConfig& Config); - void Shutdown(); - GcSchedulerStatus Status() const { return static_cast(m_Status.load()); } - GcSchedulerState GetState() const; + void Initialize(const GcSchedulerConfig& Config); + void Shutdown(); + bool IsManualTriggerPresent() const; + GcSchedulerState GetState() const; struct TriggerGcParams { @@ -528,30 +528,31 @@ public: bool CancelGC(); private: - void SchedulerThread(); - bool ReclaimDiskReserve(); - bool PrepareDiskReserve(); - bool CollectGarbage(const GcClock::TimePoint& CacheExpireTime, - const GcClock::TimePoint& ProjectStoreExpireTime, - const GcClock::TimePoint& BuildStoreExpireTime, - bool Delete, - bool CollectSmallObjects, - bool SkipCid, - GcVersion UseGCVersion, - uint32_t CompactBlockUsageThresholdPercent, - bool Verbose, - bool SingleThreaded, - const IoHash& AttachmentRangeMin, - const IoHash& AttachmentRangeMax, - bool StoreCacheAttachmentMetaData, - bool StoreProjectAttachmentMetaData, - bool EnableValidation, - bool SilenceErrors); - void ScrubStorage(bool DoDelete, bool SkipCid, std::chrono::seconds TimeSlice); - LoggerRef Log() { return m_Log; } - virtual bool AreDiskWritesAllowed() const override { return !m_AreDiskWritesBlocked.load(); } - DiskSpace CheckDiskSpace(); - void AppendGCLog(std::string_view Id, GcClock::TimePoint GcStartTime, const GcSettings& Settings, const GcResult& Result); + GcSchedulerStatus Status() const { return static_cast(m_Status.load()); } + void SchedulerThread(); + bool ReclaimDiskReserve(); + bool PrepareDiskReserve(); + bool CollectGarbage(const GcClock::TimePoint& CacheExpireTime, + const GcClock::TimePoint& ProjectStoreExpireTime, + const GcClock::TimePoint& BuildStoreExpireTime, + bool Delete, + bool CollectSmallObjects, + bool SkipCid, + GcVersion UseGCVersion, + uint32_t CompactBlockUsageThresholdPercent, + bool Verbose, + bool SingleThreaded, + const IoHash& AttachmentRangeMin, + const IoHash& AttachmentRangeMax, + bool StoreCacheAttachmentMetaData, + bool StoreProjectAttachmentMetaData, + bool EnableValidation, + bool SilenceErrors); + void ScrubStorage(bool DoDelete, bool SkipCid, std::chrono::seconds TimeSlice); + LoggerRef Log() { return m_Log; } + virtual bool AreDiskWritesAllowed() const override { return !m_AreDiskWritesBlocked.load(); } + DiskSpace CheckDiskSpace(); + void AppendGCLog(std::string_view Id, GcClock::TimePoint GcStartTime, const GcSettings& Settings, const GcResult& Result); LoggerRef m_Log; GcManager& m_GcManager; @@ -571,18 +572,17 @@ private: std::optional m_LastLightweightGCV2Result; std::optional m_LastFullGCV2Result; - std::atomic_uint32_t m_Status{}; - std::thread m_GcThread; - mutable std::mutex m_GcMutex; - std::condition_variable m_GcSignal; + std::atomic_uint32_t m_Status{}; + std::thread m_GcThread; + mutable std::mutex m_GcMutex; + Event m_GcSignal; + std::optional m_TriggerGcParams; std::optional m_TriggerScrubParams; std::atomic_bool m_AreDiskWritesBlocked = false; TCasLogFile m_DiskUsageLog; DiskUsageWindow m_DiskUsageWindow; - - RwLock m_GcLogLock; }; void gc_forcelink(); diff --git a/src/zenstore/include/zenstore/projectstore.h b/src/zenstore/include/zenstore/projectstore.h index 258be5930..ad108f65b 100644 --- a/src/zenstore/include/zenstore/projectstore.h +++ b/src/zenstore/include/zenstore/projectstore.h @@ -133,6 +133,7 @@ public: void IterateOplog(std::function&& Fn, const Paging& EntryPaging); void IterateOplogWithKey(std::function&& Fn); void IterateOplogWithKey(std::function&& Fn, const Paging& EntryPaging); + void IterateOplogWithKeyRaw(std::function&& Handler); void IterateOplogLocked(std::function&& Fn, const Paging& EntryPaging); size_t GetOplogEntryCount() const; diff --git a/src/zenstore/include/zenstore/scrubcontext.h b/src/zenstore/include/zenstore/scrubcontext.h index 2f28cfec7..0562ca8c5 100644 --- a/src/zenstore/include/zenstore/scrubcontext.h +++ b/src/zenstore/include/zenstore/scrubcontext.h @@ -8,6 +8,7 @@ namespace zen { class WorkerThreadPool; +class CompositeBuffer; /** Context object for data scrubbing @@ -67,4 +68,6 @@ public: ~ScrubDeadlineExpiredException(); }; +bool ValidateCompressedBuffer(const CompositeBuffer& Buffer, const IoHash* OptionalExpectedHash); + } // namespace zen -- cgit v1.2.3