diff options
| author | Stefan Boberg <[email protected]> | 2023-05-16 21:35:39 +0200 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-05-16 21:35:39 +0200 |
| commit | 81b2757f917e34bb338fad7965ae8a74e160bee4 (patch) | |
| tree | 931ba100471a2369c62a6e41a1b4a7937ed31f6f /src/zenstore/gc.cpp | |
| parent | added benchmark utility command `bench` (#298) (diff) | |
| download | zen-81b2757f917e34bb338fad7965ae8a74e160bee4.tar.xz zen-81b2757f917e34bb338fad7965ae8a74e160bee4.zip | |
Content scrubbing (#271)
Added zen scrub command which may be triggered via the zen CLI helper. This traverses storage and validates contents either by content hash and/or by structure. If unexpected data is encountered it is invalidated.
Diffstat (limited to 'src/zenstore/gc.cpp')
| -rw-r--r-- | src/zenstore/gc.cpp | 142 |
1 files changed, 135 insertions, 7 deletions
diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp index dc19a9a35..516a08f14 100644 --- a/src/zenstore/gc.cpp +++ b/src/zenstore/gc.cpp @@ -15,7 +15,9 @@ #include <zencore/testutils.h> #include <zencore/timer.h> #include <zencore/trace.h> +#include <zencore/workthreadpool.h> #include <zenstore/cidstore.h> +#include <zenstore/scrubcontext.h> #include "cas.h" @@ -378,6 +380,17 @@ GcManager::RemoveGcStorage(GcStorage* Storage) } void +GcManager::ScrubStorage(ScrubContext& GcCtx) +{ + RwLock::SharedLockScope _(m_Lock); + + for (GcStorage* Storage : m_GcStorage) + { + Storage->ScrubStorage(GcCtx); + } +} + +void GcManager::CollectGarbage(GcContext& GcCtx) { ZEN_TRACE_CPU("Gc::CollectGarbage"); @@ -435,6 +448,7 @@ GcManager::TotalStorageSize() const } ////////////////////////////////////////////////////////////////////////// + void DiskUsageWindow::KeepRange(GcClock::Tick StartTick, GcClock::Tick EndTick) { @@ -660,7 +674,9 @@ GcScheduler::TriggerGc(const GcScheduler::TriggerGcParams& Params) { m_TriggerGcParams = Params; uint32_t IdleState = static_cast<uint32_t>(GcSchedulerStatus::kIdle); - if (m_Status.compare_exchange_strong(IdleState, static_cast<uint32_t>(GcSchedulerStatus::kRunning))) + + if (m_Status.compare_exchange_strong(/* expected */ IdleState, + /* desired */ static_cast<uint32_t>(GcSchedulerStatus::kRunning))) { m_GcSignal.notify_one(); return true; @@ -671,6 +687,27 @@ GcScheduler::TriggerGc(const GcScheduler::TriggerGcParams& Params) return false; } +bool +GcScheduler::TriggerScrub(const TriggerScrubParams& Params) +{ + std::unique_lock Lock(m_GcMutex); + + if (static_cast<uint32_t>(GcSchedulerStatus::kIdle) == m_Status) + { + m_TriggerScrubParams = Params; + uint32_t IdleState = static_cast<uint32_t>(GcSchedulerStatus::kIdle); + + if (m_Status.compare_exchange_strong(/* expected */ IdleState, /* desired */ static_cast<uint32_t>(GcSchedulerStatus::kRunning))) + { + m_GcSignal.notify_one(); + + return true; + } + } + + return false; +} + void GcScheduler::CheckDiskSpace(const DiskSpace& Space) { @@ -697,6 +734,8 @@ GcScheduler::CheckDiskSpace(const DiskSpace& Space) void GcScheduler::SchedulerThread() { + SetCurrentThreadName("GcScheduler"); + std::chrono::seconds WaitTime{0}; for (;;) @@ -713,7 +752,7 @@ GcScheduler::SchedulerThread() break; } - if (!m_Config.Enabled) + if (!m_Config.Enabled && !m_TriggerScrubParams) { WaitTime = std::chrono::seconds::max(); continue; @@ -724,18 +763,23 @@ GcScheduler::SchedulerThread() continue; } - bool Delete = true; + bool DoGc = m_Config.Enabled; + bool DoScrubbing = false; + std::chrono::seconds ScrubTimeslice = std::chrono::seconds::max(); + bool DoDelete = true; bool CollectSmallObjects = m_Config.CollectSmallObjects; std::chrono::seconds MaxCacheDuration = m_Config.MaxCacheDuration; std::chrono::seconds MaxProjectStoreDuration = m_Config.MaxProjectStoreDuration; uint64_t DiskSizeSoftLimit = m_Config.DiskSizeSoftLimit; GcClock::TimePoint Now = GcClock::Now(); + if (m_TriggerGcParams) { const auto TriggerParams = m_TriggerGcParams.value(); m_TriggerGcParams.reset(); CollectSmallObjects = TriggerParams.CollectSmallObjects; + if (TriggerParams.MaxCacheDuration != std::chrono::seconds::max()) { MaxCacheDuration = TriggerParams.MaxCacheDuration; @@ -750,6 +794,29 @@ GcScheduler::SchedulerThread() } } + if (m_TriggerScrubParams) + { + DoScrubbing = true; + + if (m_TriggerScrubParams->SkipGc) + { + DoGc = false; + } + + ScrubTimeslice = m_TriggerScrubParams->MaxTimeslice; + } + + if (DoScrubbing) + { + ScrubStorage(DoDelete, ScrubTimeslice); + m_TriggerScrubParams.reset(); + } + + if (!DoGc) + { + continue; + } + GcClock::TimePoint CacheExpireTime = MaxCacheDuration == GcClock::Duration::max() ? GcClock::TimePoint::min() : Now - MaxCacheDuration; GcClock::TimePoint ProjectStoreExpireTime = @@ -775,14 +842,15 @@ GcScheduler::SchedulerThread() const std::chrono::duration LoadGraphTime = PressureGraphLength * m_Config.MonitorInterval; std::vector<uint64_t> DiskDeltas; uint64_t MaxLoad = 0; + { const GcClock::Tick EpochTickCount = GcClock::Now().time_since_epoch().count(); std::unique_lock Lock(m_GcMutex); m_DiskUsageWindow.Append({.SampleTime = EpochTickCount, .DiskUsage = TotalSize.DiskSize}); m_DiskUsageLog.Append({.SampleTime = EpochTickCount, .DiskUsage = TotalSize.DiskSize}); const GcClock::TimePoint LoadGraphStartTime = Now - LoadGraphTime; - GcClock::Tick Start = LoadGraphStartTime.time_since_epoch().count(); - GcClock::Tick End = Now.time_since_epoch().count(); + const GcClock::Tick Start = LoadGraphStartTime.time_since_epoch().count(); + const GcClock::Tick End = Now.time_since_epoch().count(); DiskDeltas = m_DiskUsageWindow.GetDiskDeltas(Start, End, Max(1, (End - Start + PressureGraphLength - 1) / PressureGraphLength), @@ -818,7 +886,7 @@ GcScheduler::SchedulerThread() } } - bool DiskSpaceGCTriggered = GcDiskSpaceGoal > 0; + const bool DiskSpaceGCTriggered = GcDiskSpaceGoal > 0; std::chrono::seconds RemaingTime = std::chrono::duration_cast<std::chrono::seconds>(m_NextGcTime - GcClock::Now()); @@ -858,7 +926,7 @@ GcScheduler::SchedulerThread() } } - CollectGarbage(CacheExpireTime, ProjectStoreExpireTime, Delete, CollectSmallObjects); + CollectGarbage(CacheExpireTime, ProjectStoreExpireTime, DoDelete, CollectSmallObjects); uint32_t RunningState = static_cast<uint32_t>(GcSchedulerStatus::kRunning); if (!m_Status.compare_exchange_strong(RunningState, static_cast<uint32_t>(GcSchedulerStatus::kIdle))) @@ -885,6 +953,36 @@ GcScheduler::NextGcTime(GcClock::TimePoint CurrentTime) } void +GcScheduler::ScrubStorage(bool DoDelete, std::chrono::seconds TimeSlice) +{ + const std::chrono::steady_clock::time_point TimeNow = std::chrono::steady_clock::now(); + std::chrono::steady_clock::time_point Deadline = TimeNow + TimeSlice; + // there really should be a saturating add in std::chrono + if (Deadline < TimeNow) + { + Deadline = std::chrono::steady_clock::time_point::max(); + } + + Stopwatch Timer; + ZEN_INFO("scrubbing STARTING (delete mode => {})", DoDelete); + + WorkerThreadPool ThreadPool{4, "scrubber"}; + ScrubContext Ctx{ThreadPool, Deadline}; + + try + { + Ctx.SetShouldDelete(DoDelete); + m_GcManager.ScrubStorage(Ctx); + } + catch (ScrubDeadlineExpiredException&) + { + ZEN_INFO("scrubbing deadline expired (top level), operation incomplete!"); + } + + ZEN_INFO("scrubbing DONE (in {})", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); +} + +void GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, const GcClock::TimePoint& ProjectStoreExpireTime, bool Delete, @@ -1354,6 +1452,36 @@ TEST_CASE("gc.diskusagewindow") CHECK(Stats.FindTimepointThatRemoves(100000u, 1000)); } } + +TEST_CASE("scrub.basic") +{ + using namespace gc::impl; + + ScopedTemporaryDirectory TempDir; + + CidStoreConfiguration CasConfig; + CasConfig.RootDirectory = TempDir.Path() / "cas"; + + GcManager Gc; + CidStore CidStore(Gc); + + CidStore.Initialize(CasConfig); + + IoBuffer Chunk = CreateChunk(128); + auto CompressedChunk = Compress(Chunk); + + const auto InsertResult = CidStore.AddChunk(CompressedChunk.GetCompressed().Flatten().AsIoBuffer(), CompressedChunk.DecodeRawHash()); + CHECK(InsertResult.New); + + GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); + GcCtx.CollectSmallObjects(true); + + CidStore.Flush(); + Gc.CollectGarbage(GcCtx); + + CHECK(!CidStore.ContainsChunk(CompressedChunk.DecodeRawHash())); +} + #endif void |