diff options
| author | Dan Engelbrecht <[email protected]> | 2023-08-01 08:50:26 +0200 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-08-01 08:50:26 +0200 |
| commit | 43026eaf95d051982588c8f0dcf85edfc100d5af (patch) | |
| tree | a3ec796d06d29c2788ee16a9b33be82cf985d102 /src/zenstore/gc.cpp | |
| parent | removed unnecessary cpr reference (diff) | |
| download | zen-43026eaf95d051982588c8f0dcf85edfc100d5af.tar.xz zen-43026eaf95d051982588c8f0dcf85edfc100d5af.zip | |
catch exceptions when scheduling GC and when writing GC scheduling state (#339)
* catch exceptions when scheduling GC and when writing GC scheduling state
Diffstat (limited to 'src/zenstore/gc.cpp')
| -rw-r--r-- | src/zenstore/gc.cpp | 285 |
1 files changed, 149 insertions, 136 deletions
diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp index 516a08f14..2a599629a 100644 --- a/src/zenstore/gc.cpp +++ b/src/zenstore/gc.cpp @@ -763,176 +763,184 @@ GcScheduler::SchedulerThread() continue; } - bool DoGc = m_Config.Enabled; - bool DoScrubbing = false; - std::chrono::seconds ScrubTimeslice = std::chrono::seconds::max(); - bool DoDelete = true; - bool CollectSmallObjects = m_Config.CollectSmallObjects; - std::chrono::seconds MaxCacheDuration = m_Config.MaxCacheDuration; - std::chrono::seconds MaxProjectStoreDuration = m_Config.MaxProjectStoreDuration; - uint64_t DiskSizeSoftLimit = m_Config.DiskSizeSoftLimit; - GcClock::TimePoint Now = GcClock::Now(); - - if (m_TriggerGcParams) + try { - const auto TriggerParams = m_TriggerGcParams.value(); - m_TriggerGcParams.reset(); - - CollectSmallObjects = TriggerParams.CollectSmallObjects; - - if (TriggerParams.MaxCacheDuration != std::chrono::seconds::max()) - { - MaxCacheDuration = TriggerParams.MaxCacheDuration; - } - if (TriggerParams.MaxProjectStoreDuration != std::chrono::seconds::max()) + bool DoGc = m_Config.Enabled; + bool DoScrubbing = false; + std::chrono::seconds ScrubTimeslice = std::chrono::seconds::max(); + bool DoDelete = true; + bool CollectSmallObjects = m_Config.CollectSmallObjects; + std::chrono::seconds MaxCacheDuration = m_Config.MaxCacheDuration; + std::chrono::seconds MaxProjectStoreDuration = m_Config.MaxProjectStoreDuration; + uint64_t DiskSizeSoftLimit = m_Config.DiskSizeSoftLimit; + GcClock::TimePoint Now = GcClock::Now(); + + if (m_TriggerGcParams) { - MaxProjectStoreDuration = TriggerParams.MaxProjectStoreDuration; - } - if (TriggerParams.DiskSizeSoftLimit != 0) - { - DiskSizeSoftLimit = TriggerParams.DiskSizeSoftLimit; - } - } + const auto TriggerParams = m_TriggerGcParams.value(); + m_TriggerGcParams.reset(); - if (m_TriggerScrubParams) - { - DoScrubbing = true; + CollectSmallObjects = TriggerParams.CollectSmallObjects; - if (m_TriggerScrubParams->SkipGc) - { - DoGc = false; + if (TriggerParams.MaxCacheDuration != std::chrono::seconds::max()) + { + MaxCacheDuration = TriggerParams.MaxCacheDuration; + } + if (TriggerParams.MaxProjectStoreDuration != std::chrono::seconds::max()) + { + MaxProjectStoreDuration = TriggerParams.MaxProjectStoreDuration; + } + if (TriggerParams.DiskSizeSoftLimit != 0) + { + DiskSizeSoftLimit = TriggerParams.DiskSizeSoftLimit; + } } - ScrubTimeslice = m_TriggerScrubParams->MaxTimeslice; - } - - if (DoScrubbing) - { - ScrubStorage(DoDelete, ScrubTimeslice); - m_TriggerScrubParams.reset(); - } - - if (!DoGc) - { - continue; - } + if (m_TriggerScrubParams) + { + DoScrubbing = true; - GcClock::TimePoint CacheExpireTime = - MaxCacheDuration == GcClock::Duration::max() ? GcClock::TimePoint::min() : Now - MaxCacheDuration; - GcClock::TimePoint ProjectStoreExpireTime = - MaxProjectStoreDuration == GcClock::Duration::max() ? GcClock::TimePoint::min() : Now - MaxProjectStoreDuration; + if (m_TriggerScrubParams->SkipGc) + { + DoGc = false; + } - const GcStorageSize TotalSize = m_GcManager.TotalStorageSize(); + ScrubTimeslice = m_TriggerScrubParams->MaxTimeslice; + } - if (Timeout && Status() == GcSchedulerStatus::kIdle) - { - std::error_code Ec; - DiskSpace Space = DiskSpaceInfo(m_Config.RootDirectory, Ec); - if (Ec) + if (DoScrubbing) { - m_AreDiskWritesBlocked.store(true); - ZEN_WARN("get disk space info FAILED, reason: '{}'", Ec.message()); + ScrubStorage(DoDelete, ScrubTimeslice); + m_TriggerScrubParams.reset(); } - else + + if (!DoGc) { - CheckDiskSpace(Space); + continue; } - const int64_t PressureGraphLength = 30; - const std::chrono::duration LoadGraphTime = PressureGraphLength * m_Config.MonitorInterval; - std::vector<uint64_t> DiskDeltas; - uint64_t MaxLoad = 0; + GcClock::TimePoint CacheExpireTime = + MaxCacheDuration == GcClock::Duration::max() ? GcClock::TimePoint::min() : Now - MaxCacheDuration; + GcClock::TimePoint ProjectStoreExpireTime = + MaxProjectStoreDuration == GcClock::Duration::max() ? GcClock::TimePoint::min() : Now - MaxProjectStoreDuration; - { - const GcClock::Tick EpochTickCount = GcClock::Now().time_since_epoch().count(); - std::unique_lock Lock(m_GcMutex); - m_DiskUsageWindow.Append({.SampleTime = EpochTickCount, .DiskUsage = TotalSize.DiskSize}); - m_DiskUsageLog.Append({.SampleTime = EpochTickCount, .DiskUsage = TotalSize.DiskSize}); - const GcClock::TimePoint LoadGraphStartTime = Now - LoadGraphTime; - const GcClock::Tick Start = LoadGraphStartTime.time_since_epoch().count(); - const GcClock::Tick End = Now.time_since_epoch().count(); - DiskDeltas = m_DiskUsageWindow.GetDiskDeltas(Start, - End, - Max(1, (End - Start + PressureGraphLength - 1) / PressureGraphLength), - MaxLoad); - } + const GcStorageSize TotalSize = m_GcManager.TotalStorageSize(); - std::string LoadGraph; - LoadGraph.resize(DiskDeltas.size(), '0'); - if (DiskDeltas.size() > 0 && MaxLoad > 0) + if (Timeout && Status() == GcSchedulerStatus::kIdle) { - char LoadIndicator[11] = "0123456789"; - for (size_t Index = 0; Index < DiskDeltas.size(); ++Index) + std::error_code Ec; + DiskSpace Space = DiskSpaceInfo(m_Config.RootDirectory, Ec); + if (Ec) { - size_t LoadIndex = (9 * DiskDeltas[Index] + MaxLoad - 1) / MaxLoad; - LoadGraph[Index] = LoadIndicator[LoadIndex]; + m_AreDiskWritesBlocked.store(true); + ZEN_WARN("get disk space info FAILED, reason: '{}'", Ec.message()); + } + else + { + CheckDiskSpace(Space); } - } - uint64_t GcDiskSpaceGoal = 0; - if (DiskSizeSoftLimit != 0 && TotalSize.DiskSize > DiskSizeSoftLimit) - { - GcDiskSpaceGoal = TotalSize.DiskSize - DiskSizeSoftLimit; - std::unique_lock Lock(m_GcMutex); - GcClock::Tick AgeTick = m_DiskUsageWindow.FindTimepointThatRemoves(GcDiskSpaceGoal, Now.time_since_epoch().count()); - GcClock::TimePoint SizeBasedExpireTime = GcClock::TimePointFromTick(AgeTick); - if (SizeBasedExpireTime > CacheExpireTime) + const int64_t PressureGraphLength = 30; + const std::chrono::duration LoadGraphTime = PressureGraphLength * m_Config.MonitorInterval; + std::vector<uint64_t> DiskDeltas; + uint64_t MaxLoad = 0; + { - CacheExpireTime = SizeBasedExpireTime; + const GcClock::Tick EpochTickCount = GcClock::Now().time_since_epoch().count(); + std::unique_lock Lock(m_GcMutex); + m_DiskUsageWindow.Append({.SampleTime = EpochTickCount, .DiskUsage = TotalSize.DiskSize}); + m_DiskUsageLog.Append({.SampleTime = EpochTickCount, .DiskUsage = TotalSize.DiskSize}); + const GcClock::TimePoint LoadGraphStartTime = Now - LoadGraphTime; + const GcClock::Tick Start = LoadGraphStartTime.time_since_epoch().count(); + const GcClock::Tick End = Now.time_since_epoch().count(); + DiskDeltas = m_DiskUsageWindow.GetDiskDeltas(Start, + End, + Max(1, (End - Start + PressureGraphLength - 1) / PressureGraphLength), + MaxLoad); } - if (SizeBasedExpireTime > ProjectStoreExpireTime) + + std::string LoadGraph; + LoadGraph.resize(DiskDeltas.size(), '0'); + if (DiskDeltas.size() > 0 && MaxLoad > 0) { - ProjectStoreExpireTime = SizeBasedExpireTime; + char LoadIndicator[11] = "0123456789"; + for (size_t Index = 0; Index < DiskDeltas.size(); ++Index) + { + size_t LoadIndex = (9 * DiskDeltas[Index] + MaxLoad - 1) / MaxLoad; + LoadGraph[Index] = LoadIndicator[LoadIndex]; + } } - } - const bool DiskSpaceGCTriggered = GcDiskSpaceGoal > 0; + uint64_t GcDiskSpaceGoal = 0; + if (DiskSizeSoftLimit != 0 && TotalSize.DiskSize > DiskSizeSoftLimit) + { + GcDiskSpaceGoal = TotalSize.DiskSize - DiskSizeSoftLimit; + std::unique_lock Lock(m_GcMutex); + GcClock::Tick AgeTick = m_DiskUsageWindow.FindTimepointThatRemoves(GcDiskSpaceGoal, Now.time_since_epoch().count()); + GcClock::TimePoint SizeBasedExpireTime = GcClock::TimePointFromTick(AgeTick); + if (SizeBasedExpireTime > CacheExpireTime) + { + CacheExpireTime = SizeBasedExpireTime; + } + if (SizeBasedExpireTime > ProjectStoreExpireTime) + { + ProjectStoreExpireTime = SizeBasedExpireTime; + } + } - std::chrono::seconds RemaingTime = std::chrono::duration_cast<std::chrono::seconds>(m_NextGcTime - GcClock::Now()); + const bool DiskSpaceGCTriggered = GcDiskSpaceGoal > 0; - if (RemaingTime < std::chrono::seconds::zero()) - { - RemaingTime = std::chrono::seconds::zero(); - } + std::chrono::seconds RemaingTime = std::chrono::duration_cast<std::chrono::seconds>(m_NextGcTime - GcClock::Now()); - bool TimeBasedGCTriggered = !DiskSpaceGCTriggered && RemaingTime.count() == 0; - ZEN_INFO( - "{} in use,{} {} of total {} free disk space, disk writes last {} per {} [{}], peak {}/s. {}", - NiceBytes(TotalSize.DiskSize), - DiskSizeSoftLimit == 0 ? "" : fmt::format(" {} soft limit,", NiceBytes(DiskSizeSoftLimit)), - NiceBytes(Space.Free), - NiceBytes(Space.Total), - NiceTimeSpanMs(uint64_t(std::chrono::milliseconds(LoadGraphTime).count())), - NiceTimeSpanMs(uint64_t(std::chrono::milliseconds(LoadGraphTime).count() / PressureGraphLength)), - LoadGraph, - NiceBytes(MaxLoad * uint64_t(std::chrono::seconds(1).count()) / uint64_t(std::chrono::seconds(LoadGraphTime).count())), - DiskSpaceGCTriggered ? fmt::format("Disk use threshold triggered, trying to reclaim {}. ", NiceBytes(GcDiskSpaceGoal)) - : TimeBasedGCTriggered ? "GC schedule triggered." - : m_NextGcTime == GcClock::TimePoint::max() - ? "" - : fmt::format("{} until next scheduled GC.", NiceTimeSpanMs(uint64_t(std::chrono::milliseconds(RemaingTime).count())))); - - if (!DiskSpaceGCTriggered && !TimeBasedGCTriggered) - { - WaitTime = m_Config.MonitorInterval < RemaingTime ? m_Config.MonitorInterval : RemaingTime; - continue; + if (RemaingTime < std::chrono::seconds::zero()) + { + RemaingTime = std::chrono::seconds::zero(); + } + + bool TimeBasedGCTriggered = !DiskSpaceGCTriggered && RemaingTime.count() == 0; + ZEN_INFO( + "{} in use,{} {} of total {} free disk space, disk writes last {} per {} [{}], peak {}/s. {}", + NiceBytes(TotalSize.DiskSize), + DiskSizeSoftLimit == 0 ? "" : fmt::format(" {} soft limit,", NiceBytes(DiskSizeSoftLimit)), + NiceBytes(Space.Free), + NiceBytes(Space.Total), + NiceTimeSpanMs(uint64_t(std::chrono::milliseconds(LoadGraphTime).count())), + NiceTimeSpanMs(uint64_t(std::chrono::milliseconds(LoadGraphTime).count() / PressureGraphLength)), + LoadGraph, + NiceBytes(MaxLoad * uint64_t(std::chrono::seconds(1).count()) / uint64_t(std::chrono::seconds(LoadGraphTime).count())), + DiskSpaceGCTriggered ? fmt::format("Disk use threshold triggered, trying to reclaim {}. ", NiceBytes(GcDiskSpaceGoal)) + : TimeBasedGCTriggered ? "GC schedule triggered." + : m_NextGcTime == GcClock::TimePoint::max() + ? "" + : fmt::format("{} until next scheduled GC.", + NiceTimeSpanMs(uint64_t(std::chrono::milliseconds(RemaingTime).count())))); + + if (!DiskSpaceGCTriggered && !TimeBasedGCTriggered) + { + WaitTime = m_Config.MonitorInterval < RemaingTime ? m_Config.MonitorInterval : RemaingTime; + continue; + } + + WaitTime = m_Config.MonitorInterval; + uint32_t IdleState = static_cast<uint32_t>(GcSchedulerStatus::kIdle); + if (!m_Status.compare_exchange_strong(IdleState, static_cast<uint32_t>(GcSchedulerStatus::kRunning))) + { + continue; + } } - WaitTime = m_Config.MonitorInterval; - uint32_t IdleState = static_cast<uint32_t>(GcSchedulerStatus::kIdle); - if (!m_Status.compare_exchange_strong(IdleState, static_cast<uint32_t>(GcSchedulerStatus::kRunning))) + CollectGarbage(CacheExpireTime, ProjectStoreExpireTime, DoDelete, CollectSmallObjects); + + uint32_t RunningState = static_cast<uint32_t>(GcSchedulerStatus::kRunning); + if (!m_Status.compare_exchange_strong(RunningState, static_cast<uint32_t>(GcSchedulerStatus::kIdle))) { - continue; + ZEN_ASSERT(m_Status == static_cast<uint32_t>(GcSchedulerStatus::kStopped)); + break; } } - - CollectGarbage(CacheExpireTime, ProjectStoreExpireTime, DoDelete, CollectSmallObjects); - - uint32_t RunningState = static_cast<uint32_t>(GcSchedulerStatus::kRunning); - if (!m_Status.compare_exchange_strong(RunningState, static_cast<uint32_t>(GcSchedulerStatus::kIdle))) + catch (std::exception& Ex) { - ZEN_ASSERT(m_Status == static_cast<uint32_t>(GcSchedulerStatus::kStopped)); - break; + ZEN_ERROR("scheduling garbage collection failed with: '{}'", Ex.what()); } WaitTime = m_Config.MonitorInterval; @@ -1014,6 +1022,7 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, m_LastGcTime = GcClock::Now(); m_NextGcTime = NextGcTime(m_LastGcTime); + try { const fs::path Path = m_Config.RootDirectory / "gc_state"; ZEN_DEBUG("saving scheduler state to '{}'", Path); @@ -1022,6 +1031,10 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, SchedulerState << "LastGcExpireTime"sv << static_cast<int64_t>(m_LastGcExpireTime.time_since_epoch().count()); SaveCompactBinaryObject(Path, SchedulerState.Save()); } + catch (std::exception& Ex) + { + ZEN_ERROR("writing gc scheduler state failed with: '{}'", Ex.what()); + } std::error_code Ec = CreateGCReserve(m_Config.RootDirectory / "reserve.gc", m_Config.DiskReserveSize); if (Ec) |