aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore/gc.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2025-10-23 14:57:34 +0200
committerGitHub Enterprise <[email protected]>2025-10-23 14:57:34 +0200
commitaa3c00b9a7f19b1ebf61f5251348ab298cafd4a3 (patch)
treef83b813b148e473b1d7c13f38585540721807e98 /src/zenstore/gc.cpp
parentadd host discovery and zen cache support for oplog import (#601) (diff)
downloadzen-aa3c00b9a7f19b1ebf61f5251348ab298cafd4a3.tar.xz
zen-aa3c00b9a7f19b1ebf61f5251348ab298cafd4a3.zip
if we are low on disk space, only run GC if it will remove any data (#603)
* if we are low on disk space, only run GC if it will remove any data * make sure we don't treat bail of GC due to disk space as success causing 0 wait between GC passes
Diffstat (limited to 'src/zenstore/gc.cpp')
-rw-r--r--src/zenstore/gc.cpp246
1 files changed, 157 insertions, 89 deletions
diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp
index 3fc795f9e..8c3d802c3 100644
--- a/src/zenstore/gc.cpp
+++ b/src/zenstore/gc.cpp
@@ -58,6 +58,8 @@ namespace fs = std::filesystem;
//////////////////////////////////////////////////////////////////////////
namespace {
+ static std::filesystem::path GcDiskReserveFileName = "reserve.gc";
+
std::error_code CreateGCReserve(const std::filesystem::path& Path, uint64_t Size)
{
if (Size == 0)
@@ -1678,7 +1680,7 @@ DiskUsageWindow::GetDiskDeltas(GcClock::Tick StartTick, GcClock::Tick EndTick, G
}
GcClock::Tick
-DiskUsageWindow::FindTimepointThatRemoves(uint64_t Amount, GcClock::Tick EndTick) const
+DiskUsageWindow::FindTimepointThatRemoves(uint64_t Amount, GcClock::Tick EndTick, uint64_t& OutFoundAmount) const
{
ZEN_ASSERT(Amount > 0);
uint64_t RemainingToFind = Amount;
@@ -1688,17 +1690,29 @@ DiskUsageWindow::FindTimepointThatRemoves(uint64_t Amount, GcClock::Tick EndTick
const DiskUsageEntry& Entry = m_LogWindow[Offset];
if (Entry.SampleTime >= EndTick)
{
+ OutFoundAmount = Amount - RemainingToFind;
+ if (OutFoundAmount > 0)
+ {
+ return m_LogWindow[Offset - 1].SampleTime + 1;
+ }
return EndTick;
}
const DiskUsageEntry& PreviousEntry = m_LogWindow[Offset - 1];
uint64_t Delta = Entry.DiskUsage > PreviousEntry.DiskUsage ? Entry.DiskUsage - PreviousEntry.DiskUsage : 0;
if (Delta >= RemainingToFind)
{
+ OutFoundAmount = (Amount - RemainingToFind) + Delta;
return m_LogWindow[Offset].SampleTime + 1;
}
RemainingToFind -= Delta;
Offset++;
}
+ OutFoundAmount = Amount - RemainingToFind;
+ if (OutFoundAmount > 0)
+ {
+ // Remove what we can if we found anything
+ return m_LogWindow[Offset - 1].SampleTime + 1;
+ }
return EndTick;
}
@@ -1737,11 +1751,11 @@ GcScheduler::Initialize(const GcSchedulerConfig& Config)
CreateDirectories(Config.RootDirectory);
- std::error_code Ec = CreateGCReserve(m_Config.RootDirectory / "reserve.gc", m_Config.DiskReserveSize);
+ std::error_code Ec = CreateGCReserve(m_Config.RootDirectory / GcDiskReserveFileName, m_Config.DiskReserveSize);
if (Ec)
{
ZEN_WARN("unable to create GC reserve at '{}' with size {}, reason '{}'",
- m_Config.RootDirectory / "reserve.gc",
+ m_Config.RootDirectory / GcDiskReserveFileName,
NiceBytes(m_Config.DiskReserveSize),
Ec.message());
}
@@ -2029,7 +2043,7 @@ GcScheduler::GetState() const
if (Result.Config.DiskReserveSize != 0)
{
Ec.clear();
- Result.HasDiskReserve = IsFile(Result.Config.RootDirectory / "reserve.gc", Ec) && !Ec;
+ Result.HasDiskReserve = IsFile(Result.Config.RootDirectory / GcDiskReserveFileName, Ec) && !Ec;
}
if (Result.Status != GcSchedulerStatus::kRunning)
@@ -2330,20 +2344,44 @@ GcScheduler::SchedulerThread()
const uint64_t GcDiskSpaceRemoveGoal = Max(MaximumDiskUseGcSpaceGoal, MinimumFreeDiskGcSpaceGoal);
std::unique_lock Lock(m_GcMutex);
+ uint64_t FoundAmount = 0;
+ GcClock::Tick NowTick = Now.time_since_epoch().count();
GcClock::Tick AgeTick =
- m_DiskUsageWindow.FindTimepointThatRemoves(GcDiskSpaceRemoveGoal, Now.time_since_epoch().count());
- GcClock::TimePoint SizeBasedExpireTime = GcClock::TimePointFromTick(AgeTick);
- if (SizeBasedExpireTime > CacheExpireTime)
- {
- CacheExpireTime = SizeBasedExpireTime;
- }
- if (SizeBasedExpireTime > ProjectStoreExpireTime)
+ m_DiskUsageWindow.FindTimepointThatRemoves(GcDiskSpaceRemoveGoal, Now.time_since_epoch().count(), FoundAmount);
+ if (AgeTick < NowTick)
{
- ProjectStoreExpireTime = SizeBasedExpireTime;
+ ZEN_ASSERT(FoundAmount > 0);
+ GcClock::TimePoint SizeBasedExpireTime = GcClock::TimePointFromTick(AgeTick);
+ if (SizeBasedExpireTime > CacheExpireTime)
+ {
+ CacheExpireTime = SizeBasedExpireTime;
+ }
+ if (SizeBasedExpireTime > ProjectStoreExpireTime)
+ {
+ ProjectStoreExpireTime = SizeBasedExpireTime;
+ }
+ if (SizeBasedExpireTime > BuildStoreExpireTime)
+ {
+ BuildStoreExpireTime = SizeBasedExpireTime;
+ }
}
- if (SizeBasedExpireTime > BuildStoreExpireTime)
+ else
{
- BuildStoreExpireTime = SizeBasedExpireTime;
+ if (HighDiskSpaceUsageGCTriggered)
+ {
+ ZEN_WARN("Used disk space {} is above {} but no data available to free",
+ NiceBytes(TotalSize.DiskSize),
+ NiceBytes(DiskSizeSoftLimit));
+ HighDiskSpaceUsageGCTriggered = false;
+ }
+ if (LowDiskSpaceGCTriggered)
+ {
+ ZEN_WARN("Free disk space {} is below {} but no data available to free",
+ NiceBytes(Space.Free),
+ NiceBytes(MinimumFreeDiskSpaceToAllowWrites));
+ LowDiskSpaceGCTriggered = false;
+ m_AreDiskWritesBlocked.store(true);
+ }
}
}
@@ -2476,34 +2514,44 @@ GcScheduler::SchedulerThread()
m_AttachmentPassIndex = NextAttachmentPassIndex;
}
- bool GcSuccess = CollectGarbage(CacheExpireTime,
- ProjectStoreExpireTime,
- BuildStoreExpireTime,
- DoDelete,
- CollectSmallObjects,
- SkipCid,
- UseGCVersion,
- CompactBlockUsageThresholdPercent,
- Verbose,
- SingleThreaded,
- AttachmentRangeMin,
- AttachmentRangeMax,
- StoreCacheAttachmentMetaData,
- StoreProjectAttachmentMetaData,
- EnableValidation,
- SilenceErrors);
- if (!GcSuccess)
+ if (PrepareDiskReserve())
{
- SilenceErrors = true;
- ZEN_INFO("gc going into error state - gc errors will be demoted to warnings until we recover");
+ bool GcSuccess = CollectGarbage(CacheExpireTime,
+ ProjectStoreExpireTime,
+ BuildStoreExpireTime,
+ DoDelete,
+ CollectSmallObjects,
+ SkipCid,
+ UseGCVersion,
+ CompactBlockUsageThresholdPercent,
+ Verbose,
+ SingleThreaded,
+ AttachmentRangeMin,
+ AttachmentRangeMax,
+ StoreCacheAttachmentMetaData,
+ StoreProjectAttachmentMetaData,
+ EnableValidation,
+ SilenceErrors);
+ if (!GcSuccess)
+ {
+ SilenceErrors = true;
+ ZEN_INFO("gc going into error state - gc errors will be demoted to warnings until we recover");
+ }
+ else if (SilenceErrors)
+ {
+ SilenceErrors = false;
+ ZEN_INFO("gc recovered from error state - gc errors will be will be reported as errors again");
+ }
+
+ WaitTime = std::chrono::seconds(0);
}
- else if (SilenceErrors)
+ else
{
- SilenceErrors = false;
- ZEN_INFO("gc recovered from error state - gc errors will be will be reported as errors again");
+ ZEN_WARN(
+ "Disk space is very low and we have no GC reserve, skipping GC as this requires at least some space to write to '{}'",
+ m_Config.RootDirectory);
+ WaitTime = m_Config.MonitorInterval;
}
-
- WaitTime = std::chrono::seconds(0);
}
catch (const std::system_error& SystemError)
{
@@ -2582,6 +2630,63 @@ GcScheduler::ScrubStorage(bool DoDelete, bool SkipCid, std::chrono::seconds Time
}
bool
+GcScheduler::ReclaimDiskReserve()
+{
+ const std::filesystem::path DiskReservePath = m_Config.RootDirectory / GcDiskReserveFileName;
+ std::error_code Ec = CreateGCReserve(DiskReservePath, m_Config.DiskReserveSize);
+ if (Ec)
+ {
+ ZEN_WARN("unable to create GC reserve at '{}' with size {}, reason: '{}'",
+ DiskReservePath,
+ NiceBytes(m_Config.DiskReserveSize),
+ Ec.message());
+ return false;
+ }
+ return true;
+}
+
+bool
+GcScheduler::PrepareDiskReserve()
+{
+ try
+ {
+ static bool ForceFail = false;
+ if (ForceFail)
+ {
+ return false;
+ }
+ (void)ReclaimDiskReserve();
+ CheckDiskSpace();
+
+ if (m_AreDiskWritesBlocked.load())
+ {
+ // We are low on disk, check if we can release our extra storage reserve, if we can't bail from doing GC
+ uint64_t ReleasedSpace = 0;
+ const std::filesystem::path DiskReservePath = m_Config.RootDirectory / GcDiskReserveFileName;
+ if (IsFile(DiskReservePath))
+ {
+ uint64_t GcReserveFileSize = FileSizeFromPath(DiskReservePath);
+ if (RemoveFile(DiskReservePath))
+ {
+ ReleasedSpace = GcReserveFileSize;
+ }
+ }
+
+ if (ReleasedSpace == 0)
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_WARN("Failed to prepare for GC, reason: {}", Ex.what());
+ return false;
+ }
+}
+
+bool
GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
const GcClock::TimePoint& ProjectStoreExpireTime,
const GcClock::TimePoint& BuildStoreExpireTime,
@@ -2605,53 +2710,8 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
try
{
- const std::filesystem::path DiskReservePath = m_Config.RootDirectory / "reserve.gc";
-
- auto ReclaimDiskReserve = [&]() {
- std::error_code Ec = CreateGCReserve(DiskReservePath, m_Config.DiskReserveSize);
- if (Ec)
- {
- ZEN_WARN("unable to create GC reserve at '{}' with size {}, reason: '{}'",
- m_Config.RootDirectory / "reserve.gc",
- NiceBytes(m_Config.DiskReserveSize),
- Ec.message());
- }
- };
-
- ReclaimDiskReserve();
const auto _ = MakeGuard([&] { ReclaimDiskReserve(); });
- CheckDiskSpace();
-
- if (m_AreDiskWritesBlocked.load())
- {
- // We are low on disk, check if we can release our extra storage reserve, if we can't bail from doing GC
- auto ClaimDiskReserve = [&]() -> uint64_t {
- if (!IsFile(DiskReservePath))
- {
- return 0;
- }
- uint64_t ReclaimedSize = FileSizeFromPath(DiskReservePath);
- if (RemoveFile(DiskReservePath))
- {
- return ReclaimedSize;
- }
- return 0;
- };
-
- uint64_t ReleasedSpace = ClaimDiskReserve();
- if (ReleasedSpace == 0)
- {
- ZEN_WARN(
- "Disk space is very low and we have no GC reserve, skipping GC as this requires at least some space to write to '{}'",
- m_Config.RootDirectory);
- m_LastGcTime = GcClock::Now();
- m_LastLightweightGcTime = m_LastGcTime;
- return true; // Treat this as a success as we don't want to silence any errors that happens when we are no longer low on
- // disk space
- }
- }
-
{
Stopwatch Timer;
const auto __ = MakeGuard([&] { ZEN_INFO("garbage collection DONE in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); });
@@ -2675,7 +2735,7 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
.Verbose = Verbose,
.SingleThread = SingleThreaded,
.CompactBlockUsageThresholdPercent = CompactBlockUsageThresholdPercent,
- .DiskReservePath = DiskReservePath,
+ .DiskReservePath = m_Config.RootDirectory / GcDiskReserveFileName,
.AttachmentRangeMin = AttachmentRangeMin,
.AttachmentRangeMax = AttachmentRangeMax,
.StoreCacheAttachmentMetaData = StoreCacheAttachmentMetaData,
@@ -3095,12 +3155,20 @@ TEST_CASE("gc.diskusagewindow")
SUBCASE("Find size window")
{
DiskUsageWindow Empty;
- CHECK(Empty.FindTimepointThatRemoves(15u, 10000) == 10000);
- CHECK(Stats.FindTimepointThatRemoves(15u, 40) == 21);
- CHECK(Stats.FindTimepointThatRemoves(15u, 20) == 20);
- CHECK(Stats.FindTimepointThatRemoves(100000u, 50) == 50);
- CHECK(Stats.FindTimepointThatRemoves(100000u, 1000));
+ uint64_t FoundAmount = 0;
+
+ CHECK(Empty.FindTimepointThatRemoves(15u, 10000, FoundAmount) == 10000);
+ CHECK(FoundAmount == 0);
+
+ CHECK(Stats.FindTimepointThatRemoves(15u, 40, FoundAmount) == 21);
+ CHECK(FoundAmount == 20);
+ CHECK(Stats.FindTimepointThatRemoves(15u, 20, FoundAmount) == 11);
+ CHECK(FoundAmount == 10);
+ CHECK(Stats.FindTimepointThatRemoves(100000u, 50, FoundAmount) == 41);
+ CHECK(FoundAmount == 20);
+ CHECK(Stats.FindTimepointThatRemoves(100000u, 1000, FoundAmount) == 71);
+ CHECK(FoundAmount == 50);
}
}