diff options
| author | Dan Engelbrecht <[email protected]> | 2025-10-23 14:57:34 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2025-10-23 14:57:34 +0200 |
| commit | aa3c00b9a7f19b1ebf61f5251348ab298cafd4a3 (patch) | |
| tree | f83b813b148e473b1d7c13f38585540721807e98 /src | |
| parent | add host discovery and zen cache support for oplog import (#601) (diff) | |
| download | zen-aa3c00b9a7f19b1ebf61f5251348ab298cafd4a3.tar.xz zen-aa3c00b9a7f19b1ebf61f5251348ab298cafd4a3.zip | |
if we are low on disk space, only run GC if it will remove any data (#603)
* if we are low on disk space, only run GC if it will remove any data
* make sure we don't treat bail of GC due to disk space as success causing 0 wait between GC passes
Diffstat (limited to 'src')
| -rw-r--r-- | src/zenstore/gc.cpp | 246 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/gc.h | 4 |
2 files changed, 160 insertions, 90 deletions
diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp index 3fc795f9e..8c3d802c3 100644 --- a/src/zenstore/gc.cpp +++ b/src/zenstore/gc.cpp @@ -58,6 +58,8 @@ namespace fs = std::filesystem; ////////////////////////////////////////////////////////////////////////// namespace { + static std::filesystem::path GcDiskReserveFileName = "reserve.gc"; + std::error_code CreateGCReserve(const std::filesystem::path& Path, uint64_t Size) { if (Size == 0) @@ -1678,7 +1680,7 @@ DiskUsageWindow::GetDiskDeltas(GcClock::Tick StartTick, GcClock::Tick EndTick, G } GcClock::Tick -DiskUsageWindow::FindTimepointThatRemoves(uint64_t Amount, GcClock::Tick EndTick) const +DiskUsageWindow::FindTimepointThatRemoves(uint64_t Amount, GcClock::Tick EndTick, uint64_t& OutFoundAmount) const { ZEN_ASSERT(Amount > 0); uint64_t RemainingToFind = Amount; @@ -1688,17 +1690,29 @@ DiskUsageWindow::FindTimepointThatRemoves(uint64_t Amount, GcClock::Tick EndTick const DiskUsageEntry& Entry = m_LogWindow[Offset]; if (Entry.SampleTime >= EndTick) { + OutFoundAmount = Amount - RemainingToFind; + if (OutFoundAmount > 0) + { + return m_LogWindow[Offset - 1].SampleTime + 1; + } return EndTick; } const DiskUsageEntry& PreviousEntry = m_LogWindow[Offset - 1]; uint64_t Delta = Entry.DiskUsage > PreviousEntry.DiskUsage ? Entry.DiskUsage - PreviousEntry.DiskUsage : 0; if (Delta >= RemainingToFind) { + OutFoundAmount = (Amount - RemainingToFind) + Delta; return m_LogWindow[Offset].SampleTime + 1; } RemainingToFind -= Delta; Offset++; } + OutFoundAmount = Amount - RemainingToFind; + if (OutFoundAmount > 0) + { + // Remove what we can if we found anything + return m_LogWindow[Offset - 1].SampleTime + 1; + } return EndTick; } @@ -1737,11 +1751,11 @@ GcScheduler::Initialize(const GcSchedulerConfig& Config) CreateDirectories(Config.RootDirectory); - std::error_code Ec = CreateGCReserve(m_Config.RootDirectory / "reserve.gc", m_Config.DiskReserveSize); + std::error_code Ec = CreateGCReserve(m_Config.RootDirectory / GcDiskReserveFileName, m_Config.DiskReserveSize); if (Ec) { ZEN_WARN("unable to create GC reserve at '{}' with size {}, reason '{}'", - m_Config.RootDirectory / "reserve.gc", + m_Config.RootDirectory / GcDiskReserveFileName, NiceBytes(m_Config.DiskReserveSize), Ec.message()); } @@ -2029,7 +2043,7 @@ GcScheduler::GetState() const if (Result.Config.DiskReserveSize != 0) { Ec.clear(); - Result.HasDiskReserve = IsFile(Result.Config.RootDirectory / "reserve.gc", Ec) && !Ec; + Result.HasDiskReserve = IsFile(Result.Config.RootDirectory / GcDiskReserveFileName, Ec) && !Ec; } if (Result.Status != GcSchedulerStatus::kRunning) @@ -2330,20 +2344,44 @@ GcScheduler::SchedulerThread() const uint64_t GcDiskSpaceRemoveGoal = Max(MaximumDiskUseGcSpaceGoal, MinimumFreeDiskGcSpaceGoal); std::unique_lock Lock(m_GcMutex); + uint64_t FoundAmount = 0; + GcClock::Tick NowTick = Now.time_since_epoch().count(); GcClock::Tick AgeTick = - m_DiskUsageWindow.FindTimepointThatRemoves(GcDiskSpaceRemoveGoal, Now.time_since_epoch().count()); - GcClock::TimePoint SizeBasedExpireTime = GcClock::TimePointFromTick(AgeTick); - if (SizeBasedExpireTime > CacheExpireTime) - { - CacheExpireTime = SizeBasedExpireTime; - } - if (SizeBasedExpireTime > ProjectStoreExpireTime) + m_DiskUsageWindow.FindTimepointThatRemoves(GcDiskSpaceRemoveGoal, Now.time_since_epoch().count(), FoundAmount); + if (AgeTick < NowTick) { - ProjectStoreExpireTime = SizeBasedExpireTime; + ZEN_ASSERT(FoundAmount > 0); + GcClock::TimePoint SizeBasedExpireTime = GcClock::TimePointFromTick(AgeTick); + if (SizeBasedExpireTime > CacheExpireTime) + { + CacheExpireTime = SizeBasedExpireTime; + } + if (SizeBasedExpireTime > ProjectStoreExpireTime) + { + ProjectStoreExpireTime = SizeBasedExpireTime; + } + if (SizeBasedExpireTime > BuildStoreExpireTime) + { + BuildStoreExpireTime = SizeBasedExpireTime; + } } - if (SizeBasedExpireTime > BuildStoreExpireTime) + else { - BuildStoreExpireTime = SizeBasedExpireTime; + if (HighDiskSpaceUsageGCTriggered) + { + ZEN_WARN("Used disk space {} is above {} but no data available to free", + NiceBytes(TotalSize.DiskSize), + NiceBytes(DiskSizeSoftLimit)); + HighDiskSpaceUsageGCTriggered = false; + } + if (LowDiskSpaceGCTriggered) + { + ZEN_WARN("Free disk space {} is below {} but no data available to free", + NiceBytes(Space.Free), + NiceBytes(MinimumFreeDiskSpaceToAllowWrites)); + LowDiskSpaceGCTriggered = false; + m_AreDiskWritesBlocked.store(true); + } } } @@ -2476,34 +2514,44 @@ GcScheduler::SchedulerThread() m_AttachmentPassIndex = NextAttachmentPassIndex; } - bool GcSuccess = CollectGarbage(CacheExpireTime, - ProjectStoreExpireTime, - BuildStoreExpireTime, - DoDelete, - CollectSmallObjects, - SkipCid, - UseGCVersion, - CompactBlockUsageThresholdPercent, - Verbose, - SingleThreaded, - AttachmentRangeMin, - AttachmentRangeMax, - StoreCacheAttachmentMetaData, - StoreProjectAttachmentMetaData, - EnableValidation, - SilenceErrors); - if (!GcSuccess) + if (PrepareDiskReserve()) { - SilenceErrors = true; - ZEN_INFO("gc going into error state - gc errors will be demoted to warnings until we recover"); + bool GcSuccess = CollectGarbage(CacheExpireTime, + ProjectStoreExpireTime, + BuildStoreExpireTime, + DoDelete, + CollectSmallObjects, + SkipCid, + UseGCVersion, + CompactBlockUsageThresholdPercent, + Verbose, + SingleThreaded, + AttachmentRangeMin, + AttachmentRangeMax, + StoreCacheAttachmentMetaData, + StoreProjectAttachmentMetaData, + EnableValidation, + SilenceErrors); + if (!GcSuccess) + { + SilenceErrors = true; + ZEN_INFO("gc going into error state - gc errors will be demoted to warnings until we recover"); + } + else if (SilenceErrors) + { + SilenceErrors = false; + ZEN_INFO("gc recovered from error state - gc errors will be will be reported as errors again"); + } + + WaitTime = std::chrono::seconds(0); } - else if (SilenceErrors) + else { - SilenceErrors = false; - ZEN_INFO("gc recovered from error state - gc errors will be will be reported as errors again"); + ZEN_WARN( + "Disk space is very low and we have no GC reserve, skipping GC as this requires at least some space to write to '{}'", + m_Config.RootDirectory); + WaitTime = m_Config.MonitorInterval; } - - WaitTime = std::chrono::seconds(0); } catch (const std::system_error& SystemError) { @@ -2582,6 +2630,63 @@ GcScheduler::ScrubStorage(bool DoDelete, bool SkipCid, std::chrono::seconds Time } bool +GcScheduler::ReclaimDiskReserve() +{ + const std::filesystem::path DiskReservePath = m_Config.RootDirectory / GcDiskReserveFileName; + std::error_code Ec = CreateGCReserve(DiskReservePath, m_Config.DiskReserveSize); + if (Ec) + { + ZEN_WARN("unable to create GC reserve at '{}' with size {}, reason: '{}'", + DiskReservePath, + NiceBytes(m_Config.DiskReserveSize), + Ec.message()); + return false; + } + return true; +} + +bool +GcScheduler::PrepareDiskReserve() +{ + try + { + static bool ForceFail = false; + if (ForceFail) + { + return false; + } + (void)ReclaimDiskReserve(); + CheckDiskSpace(); + + if (m_AreDiskWritesBlocked.load()) + { + // We are low on disk, check if we can release our extra storage reserve, if we can't bail from doing GC + uint64_t ReleasedSpace = 0; + const std::filesystem::path DiskReservePath = m_Config.RootDirectory / GcDiskReserveFileName; + if (IsFile(DiskReservePath)) + { + uint64_t GcReserveFileSize = FileSizeFromPath(DiskReservePath); + if (RemoveFile(DiskReservePath)) + { + ReleasedSpace = GcReserveFileSize; + } + } + + if (ReleasedSpace == 0) + { + return false; + } + } + return true; + } + catch (const std::exception& Ex) + { + ZEN_WARN("Failed to prepare for GC, reason: {}", Ex.what()); + return false; + } +} + +bool GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, const GcClock::TimePoint& ProjectStoreExpireTime, const GcClock::TimePoint& BuildStoreExpireTime, @@ -2605,53 +2710,8 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, try { - const std::filesystem::path DiskReservePath = m_Config.RootDirectory / "reserve.gc"; - - auto ReclaimDiskReserve = [&]() { - std::error_code Ec = CreateGCReserve(DiskReservePath, m_Config.DiskReserveSize); - if (Ec) - { - ZEN_WARN("unable to create GC reserve at '{}' with size {}, reason: '{}'", - m_Config.RootDirectory / "reserve.gc", - NiceBytes(m_Config.DiskReserveSize), - Ec.message()); - } - }; - - ReclaimDiskReserve(); const auto _ = MakeGuard([&] { ReclaimDiskReserve(); }); - CheckDiskSpace(); - - if (m_AreDiskWritesBlocked.load()) - { - // We are low on disk, check if we can release our extra storage reserve, if we can't bail from doing GC - auto ClaimDiskReserve = [&]() -> uint64_t { - if (!IsFile(DiskReservePath)) - { - return 0; - } - uint64_t ReclaimedSize = FileSizeFromPath(DiskReservePath); - if (RemoveFile(DiskReservePath)) - { - return ReclaimedSize; - } - return 0; - }; - - uint64_t ReleasedSpace = ClaimDiskReserve(); - if (ReleasedSpace == 0) - { - ZEN_WARN( - "Disk space is very low and we have no GC reserve, skipping GC as this requires at least some space to write to '{}'", - m_Config.RootDirectory); - m_LastGcTime = GcClock::Now(); - m_LastLightweightGcTime = m_LastGcTime; - return true; // Treat this as a success as we don't want to silence any errors that happens when we are no longer low on - // disk space - } - } - { Stopwatch Timer; const auto __ = MakeGuard([&] { ZEN_INFO("garbage collection DONE in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); @@ -2675,7 +2735,7 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, .Verbose = Verbose, .SingleThread = SingleThreaded, .CompactBlockUsageThresholdPercent = CompactBlockUsageThresholdPercent, - .DiskReservePath = DiskReservePath, + .DiskReservePath = m_Config.RootDirectory / GcDiskReserveFileName, .AttachmentRangeMin = AttachmentRangeMin, .AttachmentRangeMax = AttachmentRangeMax, .StoreCacheAttachmentMetaData = StoreCacheAttachmentMetaData, @@ -3095,12 +3155,20 @@ TEST_CASE("gc.diskusagewindow") SUBCASE("Find size window") { DiskUsageWindow Empty; - CHECK(Empty.FindTimepointThatRemoves(15u, 10000) == 10000); - CHECK(Stats.FindTimepointThatRemoves(15u, 40) == 21); - CHECK(Stats.FindTimepointThatRemoves(15u, 20) == 20); - CHECK(Stats.FindTimepointThatRemoves(100000u, 50) == 50); - CHECK(Stats.FindTimepointThatRemoves(100000u, 1000)); + uint64_t FoundAmount = 0; + + CHECK(Empty.FindTimepointThatRemoves(15u, 10000, FoundAmount) == 10000); + CHECK(FoundAmount == 0); + + CHECK(Stats.FindTimepointThatRemoves(15u, 40, FoundAmount) == 21); + CHECK(FoundAmount == 20); + CHECK(Stats.FindTimepointThatRemoves(15u, 20, FoundAmount) == 11); + CHECK(FoundAmount == 10); + CHECK(Stats.FindTimepointThatRemoves(100000u, 50, FoundAmount) == 41); + CHECK(FoundAmount == 20); + CHECK(Stats.FindTimepointThatRemoves(100000u, 1000, FoundAmount) == 71); + CHECK(FoundAmount == 50); } } diff --git a/src/zenstore/include/zenstore/gc.h b/src/zenstore/include/zenstore/gc.h index 3223fba39..5150ecd42 100644 --- a/src/zenstore/include/zenstore/gc.h +++ b/src/zenstore/include/zenstore/gc.h @@ -476,7 +476,7 @@ public: GcClock::Tick EndTick, GcClock::Tick DeltaWidth, uint64_t& OutMaxDelta) const; - GcClock::Tick FindTimepointThatRemoves(uint64_t Amount, GcClock::Tick EndTick) const; + GcClock::Tick FindTimepointThatRemoves(uint64_t Amount, GcClock::Tick EndTick, uint64_t& OutFoundAmount) const; }; /** @@ -529,6 +529,8 @@ public: private: void SchedulerThread(); + bool ReclaimDiskReserve(); + bool PrepareDiskReserve(); bool CollectGarbage(const GcClock::TimePoint& CacheExpireTime, const GcClock::TimePoint& ProjectStoreExpireTime, const GcClock::TimePoint& BuildStoreExpireTime, |