diff options
| author | Stefan Boberg <[email protected]> | 2023-05-16 21:35:39 +0200 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-05-16 21:35:39 +0200 |
| commit | 81b2757f917e34bb338fad7965ae8a74e160bee4 (patch) | |
| tree | 931ba100471a2369c62a6e41a1b4a7937ed31f6f /src/zenstore/include | |
| parent | added benchmark utility command `bench` (#298) (diff) | |
| download | zen-81b2757f917e34bb338fad7965ae8a74e160bee4.tar.xz zen-81b2757f917e34bb338fad7965ae8a74e160bee4.zip | |
Content scrubbing (#271)
Added zen scrub command which may be triggered via the zen CLI helper. This traverses storage and validates contents either by content hash and/or by structure. If unexpected data is encountered it is invalidated.
Diffstat (limited to 'src/zenstore/include')
| -rw-r--r-- | src/zenstore/include/zenstore/gc.h | 40 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/scrubcontext.h | 45 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/zenstore.h | 2 |
3 files changed, 60 insertions, 27 deletions
diff --git a/src/zenstore/include/zenstore/gc.h b/src/zenstore/include/zenstore/gc.h index 881936d0f..22b9bc284 100644 --- a/src/zenstore/include/zenstore/gc.h +++ b/src/zenstore/include/zenstore/gc.h @@ -22,9 +22,10 @@ class logger; namespace zen { -class HashKeySet; -class GcManager; class CidStore; +class GcManager; +class HashKeySet; +class ScrubContext; struct IoHash; struct DiskSpace; @@ -146,6 +147,7 @@ public: void RemoveGcStorage(GcStorage* Contributor); void CollectGarbage(GcContext& GcCtx); + void ScrubStorage(ScrubContext& GcCtx); GcStorageSize TotalStorageSize() const; @@ -226,29 +228,39 @@ public: bool TriggerGc(const TriggerGcParams& Params); + struct TriggerScrubParams + { + bool SkipGc = false; + std::chrono::seconds MaxTimeslice = std::chrono::seconds::max(); + }; + + bool TriggerScrub(const TriggerScrubParams& Params); + private: void SchedulerThread(); void CollectGarbage(const GcClock::TimePoint& CacheExpireTime, const GcClock::TimePoint& ProjectStoreExpireTime, bool Delete, bool CollectSmallObjects); + void ScrubStorage(bool DoDelete, std::chrono::seconds TimeSlice); GcClock::TimePoint NextGcTime(GcClock::TimePoint CurrentTime); spdlog::logger& Log() { return m_Log; } virtual bool AreDiskWritesAllowed() const override { return !m_AreDiskWritesBlocked.load(); } void CheckDiskSpace(const DiskSpace& Space); - spdlog::logger& m_Log; - GcManager& m_GcManager; - GcSchedulerConfig m_Config; - GcClock::TimePoint m_LastGcTime{}; - GcClock::TimePoint m_LastGcExpireTime{}; - GcClock::TimePoint m_NextGcTime{}; - std::atomic_uint32_t m_Status{}; - std::thread m_GcThread; - std::mutex m_GcMutex; - std::condition_variable m_GcSignal; - std::optional<TriggerGcParams> m_TriggerGcParams; - std::atomic_bool m_AreDiskWritesBlocked = false; + spdlog::logger& m_Log; + GcManager& m_GcManager; + GcSchedulerConfig m_Config; + GcClock::TimePoint m_LastGcTime{}; + GcClock::TimePoint m_LastGcExpireTime{}; + GcClock::TimePoint m_NextGcTime{}; + std::atomic_uint32_t m_Status{}; + std::thread m_GcThread; + std::mutex m_GcMutex; + std::condition_variable m_GcSignal; + std::optional<TriggerGcParams> m_TriggerGcParams; + std::optional<TriggerScrubParams> m_TriggerScrubParams; + std::atomic_bool m_AreDiskWritesBlocked = false; TCasLogFile<DiskUsageWindow::DiskUsageEntry> m_DiskUsageLog; DiskUsageWindow m_DiskUsageWindow; diff --git a/src/zenstore/include/zenstore/scrubcontext.h b/src/zenstore/include/zenstore/scrubcontext.h index 8b8ebac3d..cefaf0888 100644 --- a/src/zenstore/include/zenstore/scrubcontext.h +++ b/src/zenstore/include/zenstore/scrubcontext.h @@ -7,38 +7,59 @@ namespace zen { +class WorkerThreadPool; + /** Context object for data scrubbing - * - * Data scrubbing is when we traverse stored data to validate it and - * optionally correct/recover + + Data scrubbing is when we traverse stored data to validate it and + optionally correct/recover */ class ScrubContext { public: - ScrubContext(); + ScrubContext(WorkerThreadPool& InWorkerThreadPool, + std::chrono::steady_clock::time_point Deadline = std::chrono::steady_clock::time_point::max()); ~ScrubContext(); - virtual void ReportBadCidChunks(std::span<IoHash> BadCasChunks) { m_BadCid.AddHashesToSet(BadCasChunks); } + void ReportBadCidChunks(std::span<IoHash> BadCasChunks); inline uint64_t ScrubTimestamp() const { return m_ScrubTime; } - inline bool RunRecovery() const { return m_Recover; } void ReportScrubbed(uint64_t ChunkCount, uint64_t ChunkBytes) { m_ChunkCount.fetch_add(ChunkCount); m_ByteCount.fetch_add(ChunkBytes); } + std::chrono::steady_clock::time_point GetDeadline() const { return m_Deadline; } + bool IsWithinDeadline() const; + void ThrowIfDeadlineExpired() const; + inline uint64_t ScrubbedChunks() const { return m_ChunkCount; } inline uint64_t ScrubbedBytes() const { return m_ByteCount; } - const HashKeySet BadCids() const { return m_BadCid; } + HashKeySet BadCids() const; + + inline bool RunRecovery() const { return m_Recover; } + inline void SetShouldDelete(bool DoDelete) { m_Recover = DoDelete; } + + inline WorkerThreadPool& ThreadPool() { return m_WorkerThreadPool; } private: - uint64_t m_ScrubTime = GetHifreqTimerValue(); - bool m_Recover = true; - std::atomic<uint64_t> m_ChunkCount{0}; - std::atomic<uint64_t> m_ByteCount{0}; - HashKeySet m_BadCid; + uint64_t m_ScrubTime = GetHifreqTimerValue(); + bool m_Recover = true; + std::atomic<uint64_t> m_ChunkCount{0}; + std::atomic<uint64_t> m_ByteCount{0}; + mutable RwLock m_Lock; + HashKeySet m_BadCid; + WorkerThreadPool& m_WorkerThreadPool; + std::chrono::steady_clock::time_point m_Deadline{}; +}; + +class ScrubDeadlineExpiredException : public std::runtime_error +{ +public: + ScrubDeadlineExpiredException(); + ~ScrubDeadlineExpiredException(); }; } // namespace zen diff --git a/src/zenstore/include/zenstore/zenstore.h b/src/zenstore/include/zenstore/zenstore.h index 46d62029d..29f3d2639 100644 --- a/src/zenstore/include/zenstore/zenstore.h +++ b/src/zenstore/include/zenstore/zenstore.h @@ -10,4 +10,4 @@ namespace zen { ZENSTORE_API void zenstore_forcelinktests(); -} +} // namespace zen |