aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore/gc.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2023-08-01 08:50:26 +0200
committerGitHub <[email protected]>2023-08-01 08:50:26 +0200
commit43026eaf95d051982588c8f0dcf85edfc100d5af (patch)
treea3ec796d06d29c2788ee16a9b33be82cf985d102 /src/zenstore/gc.cpp
parentremoved unnecessary cpr reference (diff)
downloadzen-43026eaf95d051982588c8f0dcf85edfc100d5af.tar.xz
zen-43026eaf95d051982588c8f0dcf85edfc100d5af.zip
catch exceptions when scheduling GC and when writing GC scheduling state (#339)
* catch exceptions when scheduling GC and when writing GC scheduling state
Diffstat (limited to 'src/zenstore/gc.cpp')
-rw-r--r--src/zenstore/gc.cpp285
1 files changed, 149 insertions, 136 deletions
diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp
index 516a08f14..2a599629a 100644
--- a/src/zenstore/gc.cpp
+++ b/src/zenstore/gc.cpp
@@ -763,176 +763,184 @@ GcScheduler::SchedulerThread()
continue;
}
- bool DoGc = m_Config.Enabled;
- bool DoScrubbing = false;
- std::chrono::seconds ScrubTimeslice = std::chrono::seconds::max();
- bool DoDelete = true;
- bool CollectSmallObjects = m_Config.CollectSmallObjects;
- std::chrono::seconds MaxCacheDuration = m_Config.MaxCacheDuration;
- std::chrono::seconds MaxProjectStoreDuration = m_Config.MaxProjectStoreDuration;
- uint64_t DiskSizeSoftLimit = m_Config.DiskSizeSoftLimit;
- GcClock::TimePoint Now = GcClock::Now();
-
- if (m_TriggerGcParams)
+ try
{
- const auto TriggerParams = m_TriggerGcParams.value();
- m_TriggerGcParams.reset();
-
- CollectSmallObjects = TriggerParams.CollectSmallObjects;
-
- if (TriggerParams.MaxCacheDuration != std::chrono::seconds::max())
- {
- MaxCacheDuration = TriggerParams.MaxCacheDuration;
- }
- if (TriggerParams.MaxProjectStoreDuration != std::chrono::seconds::max())
+ bool DoGc = m_Config.Enabled;
+ bool DoScrubbing = false;
+ std::chrono::seconds ScrubTimeslice = std::chrono::seconds::max();
+ bool DoDelete = true;
+ bool CollectSmallObjects = m_Config.CollectSmallObjects;
+ std::chrono::seconds MaxCacheDuration = m_Config.MaxCacheDuration;
+ std::chrono::seconds MaxProjectStoreDuration = m_Config.MaxProjectStoreDuration;
+ uint64_t DiskSizeSoftLimit = m_Config.DiskSizeSoftLimit;
+ GcClock::TimePoint Now = GcClock::Now();
+
+ if (m_TriggerGcParams)
{
- MaxProjectStoreDuration = TriggerParams.MaxProjectStoreDuration;
- }
- if (TriggerParams.DiskSizeSoftLimit != 0)
- {
- DiskSizeSoftLimit = TriggerParams.DiskSizeSoftLimit;
- }
- }
+ const auto TriggerParams = m_TriggerGcParams.value();
+ m_TriggerGcParams.reset();
- if (m_TriggerScrubParams)
- {
- DoScrubbing = true;
+ CollectSmallObjects = TriggerParams.CollectSmallObjects;
- if (m_TriggerScrubParams->SkipGc)
- {
- DoGc = false;
+ if (TriggerParams.MaxCacheDuration != std::chrono::seconds::max())
+ {
+ MaxCacheDuration = TriggerParams.MaxCacheDuration;
+ }
+ if (TriggerParams.MaxProjectStoreDuration != std::chrono::seconds::max())
+ {
+ MaxProjectStoreDuration = TriggerParams.MaxProjectStoreDuration;
+ }
+ if (TriggerParams.DiskSizeSoftLimit != 0)
+ {
+ DiskSizeSoftLimit = TriggerParams.DiskSizeSoftLimit;
+ }
}
- ScrubTimeslice = m_TriggerScrubParams->MaxTimeslice;
- }
-
- if (DoScrubbing)
- {
- ScrubStorage(DoDelete, ScrubTimeslice);
- m_TriggerScrubParams.reset();
- }
-
- if (!DoGc)
- {
- continue;
- }
+ if (m_TriggerScrubParams)
+ {
+ DoScrubbing = true;
- GcClock::TimePoint CacheExpireTime =
- MaxCacheDuration == GcClock::Duration::max() ? GcClock::TimePoint::min() : Now - MaxCacheDuration;
- GcClock::TimePoint ProjectStoreExpireTime =
- MaxProjectStoreDuration == GcClock::Duration::max() ? GcClock::TimePoint::min() : Now - MaxProjectStoreDuration;
+ if (m_TriggerScrubParams->SkipGc)
+ {
+ DoGc = false;
+ }
- const GcStorageSize TotalSize = m_GcManager.TotalStorageSize();
+ ScrubTimeslice = m_TriggerScrubParams->MaxTimeslice;
+ }
- if (Timeout && Status() == GcSchedulerStatus::kIdle)
- {
- std::error_code Ec;
- DiskSpace Space = DiskSpaceInfo(m_Config.RootDirectory, Ec);
- if (Ec)
+ if (DoScrubbing)
{
- m_AreDiskWritesBlocked.store(true);
- ZEN_WARN("get disk space info FAILED, reason: '{}'", Ec.message());
+ ScrubStorage(DoDelete, ScrubTimeslice);
+ m_TriggerScrubParams.reset();
}
- else
+
+ if (!DoGc)
{
- CheckDiskSpace(Space);
+ continue;
}
- const int64_t PressureGraphLength = 30;
- const std::chrono::duration LoadGraphTime = PressureGraphLength * m_Config.MonitorInterval;
- std::vector<uint64_t> DiskDeltas;
- uint64_t MaxLoad = 0;
+ GcClock::TimePoint CacheExpireTime =
+ MaxCacheDuration == GcClock::Duration::max() ? GcClock::TimePoint::min() : Now - MaxCacheDuration;
+ GcClock::TimePoint ProjectStoreExpireTime =
+ MaxProjectStoreDuration == GcClock::Duration::max() ? GcClock::TimePoint::min() : Now - MaxProjectStoreDuration;
- {
- const GcClock::Tick EpochTickCount = GcClock::Now().time_since_epoch().count();
- std::unique_lock Lock(m_GcMutex);
- m_DiskUsageWindow.Append({.SampleTime = EpochTickCount, .DiskUsage = TotalSize.DiskSize});
- m_DiskUsageLog.Append({.SampleTime = EpochTickCount, .DiskUsage = TotalSize.DiskSize});
- const GcClock::TimePoint LoadGraphStartTime = Now - LoadGraphTime;
- const GcClock::Tick Start = LoadGraphStartTime.time_since_epoch().count();
- const GcClock::Tick End = Now.time_since_epoch().count();
- DiskDeltas = m_DiskUsageWindow.GetDiskDeltas(Start,
- End,
- Max(1, (End - Start + PressureGraphLength - 1) / PressureGraphLength),
- MaxLoad);
- }
+ const GcStorageSize TotalSize = m_GcManager.TotalStorageSize();
- std::string LoadGraph;
- LoadGraph.resize(DiskDeltas.size(), '0');
- if (DiskDeltas.size() > 0 && MaxLoad > 0)
+ if (Timeout && Status() == GcSchedulerStatus::kIdle)
{
- char LoadIndicator[11] = "0123456789";
- for (size_t Index = 0; Index < DiskDeltas.size(); ++Index)
+ std::error_code Ec;
+ DiskSpace Space = DiskSpaceInfo(m_Config.RootDirectory, Ec);
+ if (Ec)
{
- size_t LoadIndex = (9 * DiskDeltas[Index] + MaxLoad - 1) / MaxLoad;
- LoadGraph[Index] = LoadIndicator[LoadIndex];
+ m_AreDiskWritesBlocked.store(true);
+ ZEN_WARN("get disk space info FAILED, reason: '{}'", Ec.message());
+ }
+ else
+ {
+ CheckDiskSpace(Space);
}
- }
- uint64_t GcDiskSpaceGoal = 0;
- if (DiskSizeSoftLimit != 0 && TotalSize.DiskSize > DiskSizeSoftLimit)
- {
- GcDiskSpaceGoal = TotalSize.DiskSize - DiskSizeSoftLimit;
- std::unique_lock Lock(m_GcMutex);
- GcClock::Tick AgeTick = m_DiskUsageWindow.FindTimepointThatRemoves(GcDiskSpaceGoal, Now.time_since_epoch().count());
- GcClock::TimePoint SizeBasedExpireTime = GcClock::TimePointFromTick(AgeTick);
- if (SizeBasedExpireTime > CacheExpireTime)
+ const int64_t PressureGraphLength = 30;
+ const std::chrono::duration LoadGraphTime = PressureGraphLength * m_Config.MonitorInterval;
+ std::vector<uint64_t> DiskDeltas;
+ uint64_t MaxLoad = 0;
+
{
- CacheExpireTime = SizeBasedExpireTime;
+ const GcClock::Tick EpochTickCount = GcClock::Now().time_since_epoch().count();
+ std::unique_lock Lock(m_GcMutex);
+ m_DiskUsageWindow.Append({.SampleTime = EpochTickCount, .DiskUsage = TotalSize.DiskSize});
+ m_DiskUsageLog.Append({.SampleTime = EpochTickCount, .DiskUsage = TotalSize.DiskSize});
+ const GcClock::TimePoint LoadGraphStartTime = Now - LoadGraphTime;
+ const GcClock::Tick Start = LoadGraphStartTime.time_since_epoch().count();
+ const GcClock::Tick End = Now.time_since_epoch().count();
+ DiskDeltas = m_DiskUsageWindow.GetDiskDeltas(Start,
+ End,
+ Max(1, (End - Start + PressureGraphLength - 1) / PressureGraphLength),
+ MaxLoad);
}
- if (SizeBasedExpireTime > ProjectStoreExpireTime)
+
+ std::string LoadGraph;
+ LoadGraph.resize(DiskDeltas.size(), '0');
+ if (DiskDeltas.size() > 0 && MaxLoad > 0)
{
- ProjectStoreExpireTime = SizeBasedExpireTime;
+ char LoadIndicator[11] = "0123456789";
+ for (size_t Index = 0; Index < DiskDeltas.size(); ++Index)
+ {
+ size_t LoadIndex = (9 * DiskDeltas[Index] + MaxLoad - 1) / MaxLoad;
+ LoadGraph[Index] = LoadIndicator[LoadIndex];
+ }
}
- }
- const bool DiskSpaceGCTriggered = GcDiskSpaceGoal > 0;
+ uint64_t GcDiskSpaceGoal = 0;
+ if (DiskSizeSoftLimit != 0 && TotalSize.DiskSize > DiskSizeSoftLimit)
+ {
+ GcDiskSpaceGoal = TotalSize.DiskSize - DiskSizeSoftLimit;
+ std::unique_lock Lock(m_GcMutex);
+ GcClock::Tick AgeTick = m_DiskUsageWindow.FindTimepointThatRemoves(GcDiskSpaceGoal, Now.time_since_epoch().count());
+ GcClock::TimePoint SizeBasedExpireTime = GcClock::TimePointFromTick(AgeTick);
+ if (SizeBasedExpireTime > CacheExpireTime)
+ {
+ CacheExpireTime = SizeBasedExpireTime;
+ }
+ if (SizeBasedExpireTime > ProjectStoreExpireTime)
+ {
+ ProjectStoreExpireTime = SizeBasedExpireTime;
+ }
+ }
- std::chrono::seconds RemaingTime = std::chrono::duration_cast<std::chrono::seconds>(m_NextGcTime - GcClock::Now());
+ const bool DiskSpaceGCTriggered = GcDiskSpaceGoal > 0;
- if (RemaingTime < std::chrono::seconds::zero())
- {
- RemaingTime = std::chrono::seconds::zero();
- }
+ std::chrono::seconds RemaingTime = std::chrono::duration_cast<std::chrono::seconds>(m_NextGcTime - GcClock::Now());
- bool TimeBasedGCTriggered = !DiskSpaceGCTriggered && RemaingTime.count() == 0;
- ZEN_INFO(
- "{} in use,{} {} of total {} free disk space, disk writes last {} per {} [{}], peak {}/s. {}",
- NiceBytes(TotalSize.DiskSize),
- DiskSizeSoftLimit == 0 ? "" : fmt::format(" {} soft limit,", NiceBytes(DiskSizeSoftLimit)),
- NiceBytes(Space.Free),
- NiceBytes(Space.Total),
- NiceTimeSpanMs(uint64_t(std::chrono::milliseconds(LoadGraphTime).count())),
- NiceTimeSpanMs(uint64_t(std::chrono::milliseconds(LoadGraphTime).count() / PressureGraphLength)),
- LoadGraph,
- NiceBytes(MaxLoad * uint64_t(std::chrono::seconds(1).count()) / uint64_t(std::chrono::seconds(LoadGraphTime).count())),
- DiskSpaceGCTriggered ? fmt::format("Disk use threshold triggered, trying to reclaim {}. ", NiceBytes(GcDiskSpaceGoal))
- : TimeBasedGCTriggered ? "GC schedule triggered."
- : m_NextGcTime == GcClock::TimePoint::max()
- ? ""
- : fmt::format("{} until next scheduled GC.", NiceTimeSpanMs(uint64_t(std::chrono::milliseconds(RemaingTime).count()))));
-
- if (!DiskSpaceGCTriggered && !TimeBasedGCTriggered)
- {
- WaitTime = m_Config.MonitorInterval < RemaingTime ? m_Config.MonitorInterval : RemaingTime;
- continue;
+ if (RemaingTime < std::chrono::seconds::zero())
+ {
+ RemaingTime = std::chrono::seconds::zero();
+ }
+
+ bool TimeBasedGCTriggered = !DiskSpaceGCTriggered && RemaingTime.count() == 0;
+ ZEN_INFO(
+ "{} in use,{} {} of total {} free disk space, disk writes last {} per {} [{}], peak {}/s. {}",
+ NiceBytes(TotalSize.DiskSize),
+ DiskSizeSoftLimit == 0 ? "" : fmt::format(" {} soft limit,", NiceBytes(DiskSizeSoftLimit)),
+ NiceBytes(Space.Free),
+ NiceBytes(Space.Total),
+ NiceTimeSpanMs(uint64_t(std::chrono::milliseconds(LoadGraphTime).count())),
+ NiceTimeSpanMs(uint64_t(std::chrono::milliseconds(LoadGraphTime).count() / PressureGraphLength)),
+ LoadGraph,
+ NiceBytes(MaxLoad * uint64_t(std::chrono::seconds(1).count()) / uint64_t(std::chrono::seconds(LoadGraphTime).count())),
+ DiskSpaceGCTriggered ? fmt::format("Disk use threshold triggered, trying to reclaim {}. ", NiceBytes(GcDiskSpaceGoal))
+ : TimeBasedGCTriggered ? "GC schedule triggered."
+ : m_NextGcTime == GcClock::TimePoint::max()
+ ? ""
+ : fmt::format("{} until next scheduled GC.",
+ NiceTimeSpanMs(uint64_t(std::chrono::milliseconds(RemaingTime).count()))));
+
+ if (!DiskSpaceGCTriggered && !TimeBasedGCTriggered)
+ {
+ WaitTime = m_Config.MonitorInterval < RemaingTime ? m_Config.MonitorInterval : RemaingTime;
+ continue;
+ }
+
+ WaitTime = m_Config.MonitorInterval;
+ uint32_t IdleState = static_cast<uint32_t>(GcSchedulerStatus::kIdle);
+ if (!m_Status.compare_exchange_strong(IdleState, static_cast<uint32_t>(GcSchedulerStatus::kRunning)))
+ {
+ continue;
+ }
}
- WaitTime = m_Config.MonitorInterval;
- uint32_t IdleState = static_cast<uint32_t>(GcSchedulerStatus::kIdle);
- if (!m_Status.compare_exchange_strong(IdleState, static_cast<uint32_t>(GcSchedulerStatus::kRunning)))
+ CollectGarbage(CacheExpireTime, ProjectStoreExpireTime, DoDelete, CollectSmallObjects);
+
+ uint32_t RunningState = static_cast<uint32_t>(GcSchedulerStatus::kRunning);
+ if (!m_Status.compare_exchange_strong(RunningState, static_cast<uint32_t>(GcSchedulerStatus::kIdle)))
{
- continue;
+ ZEN_ASSERT(m_Status == static_cast<uint32_t>(GcSchedulerStatus::kStopped));
+ break;
}
}
-
- CollectGarbage(CacheExpireTime, ProjectStoreExpireTime, DoDelete, CollectSmallObjects);
-
- uint32_t RunningState = static_cast<uint32_t>(GcSchedulerStatus::kRunning);
- if (!m_Status.compare_exchange_strong(RunningState, static_cast<uint32_t>(GcSchedulerStatus::kIdle)))
+ catch (std::exception& Ex)
{
- ZEN_ASSERT(m_Status == static_cast<uint32_t>(GcSchedulerStatus::kStopped));
- break;
+ ZEN_ERROR("scheduling garbage collection failed with: '{}'", Ex.what());
}
WaitTime = m_Config.MonitorInterval;
@@ -1014,6 +1022,7 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
m_LastGcTime = GcClock::Now();
m_NextGcTime = NextGcTime(m_LastGcTime);
+ try
{
const fs::path Path = m_Config.RootDirectory / "gc_state";
ZEN_DEBUG("saving scheduler state to '{}'", Path);
@@ -1022,6 +1031,10 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
SchedulerState << "LastGcExpireTime"sv << static_cast<int64_t>(m_LastGcExpireTime.time_since_epoch().count());
SaveCompactBinaryObject(Path, SchedulerState.Save());
}
+ catch (std::exception& Ex)
+ {
+ ZEN_ERROR("writing gc scheduler state failed with: '{}'", Ex.what());
+ }
std::error_code Ec = CreateGCReserve(m_Config.RootDirectory / "reserve.gc", m_Config.DiskReserveSize);
if (Ec)