aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore/gc.cpp
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2023-05-16 21:35:39 +0200
committerGitHub <[email protected]>2023-05-16 21:35:39 +0200
commit81b2757f917e34bb338fad7965ae8a74e160bee4 (patch)
tree931ba100471a2369c62a6e41a1b4a7937ed31f6f /src/zenstore/gc.cpp
parentadded benchmark utility command `bench` (#298) (diff)
downloadzen-81b2757f917e34bb338fad7965ae8a74e160bee4.tar.xz
zen-81b2757f917e34bb338fad7965ae8a74e160bee4.zip
Content scrubbing (#271)
Added zen scrub command which may be triggered via the zen CLI helper. This traverses storage and validates contents either by content hash and/or by structure. If unexpected data is encountered it is invalidated.
Diffstat (limited to 'src/zenstore/gc.cpp')
-rw-r--r--src/zenstore/gc.cpp142
1 files changed, 135 insertions, 7 deletions
diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp
index dc19a9a35..516a08f14 100644
--- a/src/zenstore/gc.cpp
+++ b/src/zenstore/gc.cpp
@@ -15,7 +15,9 @@
#include <zencore/testutils.h>
#include <zencore/timer.h>
#include <zencore/trace.h>
+#include <zencore/workthreadpool.h>
#include <zenstore/cidstore.h>
+#include <zenstore/scrubcontext.h>
#include "cas.h"
@@ -378,6 +380,17 @@ GcManager::RemoveGcStorage(GcStorage* Storage)
}
void
+GcManager::ScrubStorage(ScrubContext& GcCtx)
+{
+ RwLock::SharedLockScope _(m_Lock);
+
+ for (GcStorage* Storage : m_GcStorage)
+ {
+ Storage->ScrubStorage(GcCtx);
+ }
+}
+
+void
GcManager::CollectGarbage(GcContext& GcCtx)
{
ZEN_TRACE_CPU("Gc::CollectGarbage");
@@ -435,6 +448,7 @@ GcManager::TotalStorageSize() const
}
//////////////////////////////////////////////////////////////////////////
+
void
DiskUsageWindow::KeepRange(GcClock::Tick StartTick, GcClock::Tick EndTick)
{
@@ -660,7 +674,9 @@ GcScheduler::TriggerGc(const GcScheduler::TriggerGcParams& Params)
{
m_TriggerGcParams = Params;
uint32_t IdleState = static_cast<uint32_t>(GcSchedulerStatus::kIdle);
- if (m_Status.compare_exchange_strong(IdleState, static_cast<uint32_t>(GcSchedulerStatus::kRunning)))
+
+ if (m_Status.compare_exchange_strong(/* expected */ IdleState,
+ /* desired */ static_cast<uint32_t>(GcSchedulerStatus::kRunning)))
{
m_GcSignal.notify_one();
return true;
@@ -671,6 +687,27 @@ GcScheduler::TriggerGc(const GcScheduler::TriggerGcParams& Params)
return false;
}
+bool
+GcScheduler::TriggerScrub(const TriggerScrubParams& Params)
+{
+ std::unique_lock Lock(m_GcMutex);
+
+ if (static_cast<uint32_t>(GcSchedulerStatus::kIdle) == m_Status)
+ {
+ m_TriggerScrubParams = Params;
+ uint32_t IdleState = static_cast<uint32_t>(GcSchedulerStatus::kIdle);
+
+ if (m_Status.compare_exchange_strong(/* expected */ IdleState, /* desired */ static_cast<uint32_t>(GcSchedulerStatus::kRunning)))
+ {
+ m_GcSignal.notify_one();
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
void
GcScheduler::CheckDiskSpace(const DiskSpace& Space)
{
@@ -697,6 +734,8 @@ GcScheduler::CheckDiskSpace(const DiskSpace& Space)
void
GcScheduler::SchedulerThread()
{
+ SetCurrentThreadName("GcScheduler");
+
std::chrono::seconds WaitTime{0};
for (;;)
@@ -713,7 +752,7 @@ GcScheduler::SchedulerThread()
break;
}
- if (!m_Config.Enabled)
+ if (!m_Config.Enabled && !m_TriggerScrubParams)
{
WaitTime = std::chrono::seconds::max();
continue;
@@ -724,18 +763,23 @@ GcScheduler::SchedulerThread()
continue;
}
- bool Delete = true;
+ bool DoGc = m_Config.Enabled;
+ bool DoScrubbing = false;
+ std::chrono::seconds ScrubTimeslice = std::chrono::seconds::max();
+ bool DoDelete = true;
bool CollectSmallObjects = m_Config.CollectSmallObjects;
std::chrono::seconds MaxCacheDuration = m_Config.MaxCacheDuration;
std::chrono::seconds MaxProjectStoreDuration = m_Config.MaxProjectStoreDuration;
uint64_t DiskSizeSoftLimit = m_Config.DiskSizeSoftLimit;
GcClock::TimePoint Now = GcClock::Now();
+
if (m_TriggerGcParams)
{
const auto TriggerParams = m_TriggerGcParams.value();
m_TriggerGcParams.reset();
CollectSmallObjects = TriggerParams.CollectSmallObjects;
+
if (TriggerParams.MaxCacheDuration != std::chrono::seconds::max())
{
MaxCacheDuration = TriggerParams.MaxCacheDuration;
@@ -750,6 +794,29 @@ GcScheduler::SchedulerThread()
}
}
+ if (m_TriggerScrubParams)
+ {
+ DoScrubbing = true;
+
+ if (m_TriggerScrubParams->SkipGc)
+ {
+ DoGc = false;
+ }
+
+ ScrubTimeslice = m_TriggerScrubParams->MaxTimeslice;
+ }
+
+ if (DoScrubbing)
+ {
+ ScrubStorage(DoDelete, ScrubTimeslice);
+ m_TriggerScrubParams.reset();
+ }
+
+ if (!DoGc)
+ {
+ continue;
+ }
+
GcClock::TimePoint CacheExpireTime =
MaxCacheDuration == GcClock::Duration::max() ? GcClock::TimePoint::min() : Now - MaxCacheDuration;
GcClock::TimePoint ProjectStoreExpireTime =
@@ -775,14 +842,15 @@ GcScheduler::SchedulerThread()
const std::chrono::duration LoadGraphTime = PressureGraphLength * m_Config.MonitorInterval;
std::vector<uint64_t> DiskDeltas;
uint64_t MaxLoad = 0;
+
{
const GcClock::Tick EpochTickCount = GcClock::Now().time_since_epoch().count();
std::unique_lock Lock(m_GcMutex);
m_DiskUsageWindow.Append({.SampleTime = EpochTickCount, .DiskUsage = TotalSize.DiskSize});
m_DiskUsageLog.Append({.SampleTime = EpochTickCount, .DiskUsage = TotalSize.DiskSize});
const GcClock::TimePoint LoadGraphStartTime = Now - LoadGraphTime;
- GcClock::Tick Start = LoadGraphStartTime.time_since_epoch().count();
- GcClock::Tick End = Now.time_since_epoch().count();
+ const GcClock::Tick Start = LoadGraphStartTime.time_since_epoch().count();
+ const GcClock::Tick End = Now.time_since_epoch().count();
DiskDeltas = m_DiskUsageWindow.GetDiskDeltas(Start,
End,
Max(1, (End - Start + PressureGraphLength - 1) / PressureGraphLength),
@@ -818,7 +886,7 @@ GcScheduler::SchedulerThread()
}
}
- bool DiskSpaceGCTriggered = GcDiskSpaceGoal > 0;
+ const bool DiskSpaceGCTriggered = GcDiskSpaceGoal > 0;
std::chrono::seconds RemaingTime = std::chrono::duration_cast<std::chrono::seconds>(m_NextGcTime - GcClock::Now());
@@ -858,7 +926,7 @@ GcScheduler::SchedulerThread()
}
}
- CollectGarbage(CacheExpireTime, ProjectStoreExpireTime, Delete, CollectSmallObjects);
+ CollectGarbage(CacheExpireTime, ProjectStoreExpireTime, DoDelete, CollectSmallObjects);
uint32_t RunningState = static_cast<uint32_t>(GcSchedulerStatus::kRunning);
if (!m_Status.compare_exchange_strong(RunningState, static_cast<uint32_t>(GcSchedulerStatus::kIdle)))
@@ -885,6 +953,36 @@ GcScheduler::NextGcTime(GcClock::TimePoint CurrentTime)
}
void
+GcScheduler::ScrubStorage(bool DoDelete, std::chrono::seconds TimeSlice)
+{
+ const std::chrono::steady_clock::time_point TimeNow = std::chrono::steady_clock::now();
+ std::chrono::steady_clock::time_point Deadline = TimeNow + TimeSlice;
+ // there really should be a saturating add in std::chrono
+ if (Deadline < TimeNow)
+ {
+ Deadline = std::chrono::steady_clock::time_point::max();
+ }
+
+ Stopwatch Timer;
+ ZEN_INFO("scrubbing STARTING (delete mode => {})", DoDelete);
+
+ WorkerThreadPool ThreadPool{4, "scrubber"};
+ ScrubContext Ctx{ThreadPool, Deadline};
+
+ try
+ {
+ Ctx.SetShouldDelete(DoDelete);
+ m_GcManager.ScrubStorage(Ctx);
+ }
+ catch (ScrubDeadlineExpiredException&)
+ {
+ ZEN_INFO("scrubbing deadline expired (top level), operation incomplete!");
+ }
+
+ ZEN_INFO("scrubbing DONE (in {})", NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+}
+
+void
GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
const GcClock::TimePoint& ProjectStoreExpireTime,
bool Delete,
@@ -1354,6 +1452,36 @@ TEST_CASE("gc.diskusagewindow")
CHECK(Stats.FindTimepointThatRemoves(100000u, 1000));
}
}
+
+TEST_CASE("scrub.basic")
+{
+ using namespace gc::impl;
+
+ ScopedTemporaryDirectory TempDir;
+
+ CidStoreConfiguration CasConfig;
+ CasConfig.RootDirectory = TempDir.Path() / "cas";
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+
+ CidStore.Initialize(CasConfig);
+
+ IoBuffer Chunk = CreateChunk(128);
+ auto CompressedChunk = Compress(Chunk);
+
+ const auto InsertResult = CidStore.AddChunk(CompressedChunk.GetCompressed().Flatten().AsIoBuffer(), CompressedChunk.DecodeRawHash());
+ CHECK(InsertResult.New);
+
+ GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
+ GcCtx.CollectSmallObjects(true);
+
+ CidStore.Flush();
+ Gc.CollectGarbage(GcCtx);
+
+ CHECK(!CidStore.ContainsChunk(CompressedChunk.DecodeRawHash()));
+}
+
#endif
void