aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore/gc.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2024-09-25 10:21:53 +0200
committerGitHub Enterprise <[email protected]>2024-09-25 10:21:53 +0200
commite27a5da5dae33f958a4b809a9e20a0af33c24f90 (patch)
tree3f22bdba794108fa2a4a4d5d1fc308a986b3483b /src/zenstore/gc.cpp
parentexception safety when writing block (#168) (diff)
downloadzen-e27a5da5dae33f958a4b809a9e20a0af33c24f90.tar.xz
zen-e27a5da5dae33f958a4b809a9e20a0af33c24f90.zip
Add `gc-attachment-passes` option to zenserver (#167)
Added option `gc-attachment-passes` to zenserver Cleaned up GCv2 start and stop logs and added identifier to easily find matching start and end of a GC pass in log file Fixed project store not properly sorting references found during lock phase
Diffstat (limited to 'src/zenstore/gc.cpp')
-rw-r--r--src/zenstore/gc.cpp186
1 files changed, 150 insertions, 36 deletions
diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp
index 904619222..cde89421e 100644
--- a/src/zenstore/gc.cpp
+++ b/src/zenstore/gc.cpp
@@ -145,6 +145,28 @@ namespace {
return std::error_code{};
}
+ uint8_t ComputeAttachmentRange(uint16_t AttachmentPassIndex, uint16_t AttachmentPassCount, IoHash& OutMin, IoHash& OutMax)
+ {
+ if (AttachmentPassCount <= 1)
+ {
+ OutMin = IoHash::Zero;
+ OutMax = IoHash::Max;
+ return 0;
+ }
+ if (AttachmentPassIndex >= AttachmentPassCount)
+ {
+ AttachmentPassIndex = 0;
+ }
+
+ uint32_t RangeBegin = (256 * AttachmentPassIndex) / AttachmentPassCount;
+ AttachmentPassIndex++;
+ uint32_t RangeEnd = ((256 * AttachmentPassIndex) / AttachmentPassCount) - 1;
+ OutMin = IoHash::Zero;
+ OutMin.Hash[0] = gsl::narrow<uint8_t>(RangeBegin);
+ OutMax = IoHash::Max;
+ OutMax.Hash[0] = gsl::narrow<uint8_t>(RangeEnd);
+ return gsl::narrow<uint8_t>((AttachmentPassIndex == AttachmentPassCount) ? 0 : AttachmentPassIndex);
+ };
} // namespace
//////////////////////////////////////////////////////////////////////////
@@ -572,25 +594,38 @@ Sum(GcResult& Stat, bool Cancelled = false)
}
bool
-FilterReferences(GcCtx& Ctx, std::vector<IoHash>& InOutReferences)
+FilterReferences(GcCtx& Ctx, std::string_view Context, std::vector<IoHash>& InOutReferences)
{
if (InOutReferences.empty())
{
return false;
}
- if (Ctx.Settings.AttachmentRangeMax != IoHash::Max || Ctx.Settings.AttachmentRangeMin != IoHash::Zero)
+
+ const bool Filter = Ctx.Settings.AttachmentRangeMax != IoHash::Max || Ctx.Settings.AttachmentRangeMin != IoHash::Zero;
+
+ size_t TotalCount = InOutReferences.size();
+ size_t RemovedCount = 0;
+
+ Stopwatch Timer;
+ const auto _ = MakeGuard([&] {
+ if (!Ctx.Settings.Verbose)
+ {
+ return;
+ }
+ ZEN_INFO(
+ "GCV2: {}: {}sorted {} entries in {}",
+ Context,
+ Filter ? fmt::format("skipped {}% ({} out of {}) and ", (100 * RemovedCount) / TotalCount, RemovedCount, TotalCount) : ""sv,
+ TotalCount - RemovedCount,
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ });
+
+ if (Filter)
{
- size_t TotalCount = InOutReferences.size();
std::erase_if(InOutReferences, [&Ctx](const IoHash& Key) {
return ((Ctx.Settings.AttachmentRangeMax < Key) || (Key < Ctx.Settings.AttachmentRangeMin));
});
- size_t RemovedCount = TotalCount - InOutReferences.size();
- ZEN_INFO("Skipped GC for {}% of references ({} out of {}) due to attachment filtering with range {} to {}",
- (100 * RemovedCount) / TotalCount,
- RemovedCount,
- TotalCount,
- Ctx.Settings.AttachmentRangeMin,
- Ctx.Settings.AttachmentRangeMax);
+ RemovedCount = TotalCount - InOutReferences.size();
}
if (InOutReferences.empty())
{
@@ -1502,6 +1537,7 @@ GcScheduler::Initialize(const GcSchedulerConfig& Config)
m_LastGcTime = GcClock::Now();
m_LastLightweightGcTime = m_LastGcTime;
m_LastGcExpireTime = GcClock::TimePoint::min();
+ m_AttachmentPassIndex = 0;
if (CbObject SchedulerState = LoadCompactBinaryObject(Config.RootDirectory / "gc_state"))
{
@@ -1514,6 +1550,7 @@ GcScheduler::Initialize(const GcSchedulerConfig& Config)
m_LastGcTime = GcClock::Now();
m_LastLightweightGcTime = m_LastGcTime;
}
+ m_AttachmentPassIndex = SchedulerState["AttachmentPassIndex"sv].AsUInt8();
}
m_DiskUsageLog.Open(m_Config.RootDirectory / "gc.dlog", CasLogFile::Mode::kWrite);
@@ -1646,14 +1683,13 @@ GcScheduler::CheckDiskSpace()
}
void
-GcScheduler::AppendGCLog(GcClock::TimePoint StartTime, const GcSettings& Settings, const GcResult& Result)
+GcScheduler::AppendGCLog(std::string_view Id, GcClock::TimePoint StartTime, const GcSettings& Settings, const GcResult& Result)
{
try
{
std::vector<uint8_t> Blob;
{
CbObjectWriter Writer;
- std::string Id = fmt::format("{}", gsl::narrow<int64_t>(StartTime.time_since_epoch().count()));
Writer.BeginObject(Id);
{
Writer << "StartTime"sv << ToDateTime(StartTime);
@@ -1667,6 +1703,8 @@ GcScheduler::AppendGCLog(GcClock::TimePoint StartTime, const GcSettings& Setting
Writer << "Verbose"sv << Settings.Verbose;
Writer << "SingleThread"sv << Settings.SingleThread;
Writer << "CompactBlockUsageThresholdPercent"sv << Settings.CompactBlockUsageThresholdPercent;
+ Writer << "AttachmentRangeMin"sv << Settings.AttachmentRangeMin;
+ Writer << "AttachmentRangeMax"sv << Settings.AttachmentRangeMin;
}
Writer.EndObject();
@@ -1781,6 +1819,9 @@ GcScheduler::GetState() const
Result.LastLightweightGCV2Result = m_LastLightweightGCV2Result;
Result.LastFullGCV2Result = m_LastFullGCV2Result;
+
+ Result.LastFullAttachmentRangeMin = m_LastFullAttachmentRangeMin;
+ Result.LastFullAttachmentRangeMax = m_LastFullAttachmentRangeMax;
}
Result.RemainingTimeUntilFullGc =
@@ -1860,6 +1901,8 @@ GcScheduler::SchedulerThread()
bool SingleThreaded = m_Config.SingleThreaded;
IoHash AttachmentRangeMin = IoHash::Zero;
IoHash AttachmentRangeMax = IoHash::Max;
+ uint8_t NextAttachmentPassIndex =
+ ComputeAttachmentRange(m_AttachmentPassIndex, m_Config.AttachmentPassCount, AttachmentRangeMin, AttachmentRangeMax);
bool DiskSpaceGCTriggered = false;
bool TimeBasedGCTriggered = false;
@@ -1898,9 +1941,13 @@ GcScheduler::SchedulerThread()
TriggerParams.CompactBlockUsageThresholdPercent.value_or(CompactBlockUsageThresholdPercent);
Verbose = TriggerParams.Verbose.value_or(Verbose);
SingleThreaded = TriggerParams.SingleThreaded.value_or(SingleThreaded);
- AttachmentRangeMin = TriggerParams.AttachmentRangeMin;
- AttachmentRangeMax = TriggerParams.AttachmentRangeMax;
- DoGc = true;
+ AttachmentRangeMin = TriggerParams.AttachmentRangeMin.value_or(AttachmentRangeMin);
+ AttachmentRangeMax = TriggerParams.AttachmentRangeMax.value_or(AttachmentRangeMax);
+ if (TriggerParams.AttachmentRangeMin.has_value() || TriggerParams.AttachmentRangeMax.has_value())
+ {
+ NextAttachmentPassIndex = m_AttachmentPassIndex;
+ }
+ DoGc = true;
}
if (m_TriggerScrubParams)
@@ -2110,6 +2157,11 @@ GcScheduler::SchedulerThread()
}
}
+ if (!SkipCid)
+ {
+ m_AttachmentPassIndex = NextAttachmentPassIndex;
+ }
+
bool GcSuccess = CollectGarbage(CacheExpireTime,
ProjectStoreExpireTime,
DoDelete,
@@ -2266,11 +2318,6 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
}
}
- ZEN_INFO("garbage collection STARTING, small objects gc {}, {} CAS. Cache cutoff time {}, project store cutoff time {}",
- GcCtx.CollectSmallObjects() ? "ENABLED"sv : "DISABLED"sv,
- SkipCid ? "skip"sv : "include"sv,
- CacheExpireTime,
- ProjectStoreExpireTime);
{
Stopwatch Timer;
const auto __ = MakeGuard([&] { ZEN_INFO("garbage collection DONE in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); });
@@ -2279,6 +2326,13 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
switch (UseGCVersion)
{
case GcVersion::kV1:
+ ZEN_INFO(
+ "GCV1: Garbage collection STARTING, small objects gc {}, {} CAS. Cache cutoff time {}, project store cutoff time "
+ "{}",
+ GcCtx.CollectSmallObjects() ? "ENABLED"sv : "DISABLED"sv,
+ SkipCid ? "skip"sv : "include"sv,
+ CacheExpireTime,
+ ProjectStoreExpireTime);
Diff = m_GcManager.CollectGarbage(GcCtx);
if (SkipCid)
{
@@ -2291,6 +2345,8 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
break;
case GcVersion::kV2:
{
+ std::string GcId = Oid::NewOid().ToString();
+
const GcSettings Settings = {.CacheExpireTime = CacheExpireTime,
.ProjectStoreExpireTime = ProjectStoreExpireTime,
.CollectSmallObjects = CollectSmallObjects,
@@ -2303,44 +2359,60 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
.AttachmentRangeMin = AttachmentRangeMin,
.AttachmentRangeMax = AttachmentRangeMax};
+ auto AppendSettings = [](StringBuilderBase& SB, const GcSettings& Settings) {
+ SB.Append(
+ fmt::format(" GC small objects: {}\n", Settings.CollectSmallObjects ? "yes"sv : "no"sv));
+ SB.Append(fmt::format(" GC Cid store: {}\n", Settings.SkipCidDelete ? "no"sv : "yes"sv));
+ if (!Settings.SkipCidDelete &&
+ (Settings.AttachmentRangeMin != IoHash::Zero || Settings.AttachmentRangeMax != IoHash::Max))
+ {
+ SB.Append(fmt::format(" Attachment range: {}-{}\n",
+ Settings.AttachmentRangeMin,
+ Settings.AttachmentRangeMax));
+ }
+ SB.Append(fmt::format(" Cache cutoff time: {}\n", Settings.CacheExpireTime));
+ SB.Append(fmt::format(" Project store cutoff time: {}", Settings.ProjectStoreExpireTime));
+ };
+
+ {
+ ExtendableStringBuilder<256> SB;
+ SB.Append(fmt::format("STARTING '{}'\n", GcId));
+ AppendSettings(SB, Settings);
+ ZEN_INFO("GCV2: {}", SB.ToView());
+ }
+
GcClock::TimePoint GcStartTime = GcClock::Now();
GcResult Result = m_GcManager.CollectGarbage(Settings);
ExtendableStringBuilder<256> SB;
if (Result.WasCancelled)
{
- SB.Append(fmt::format("Cancelled after {}", NiceTimeSpanMs(Result.ElapsedMS.count())));
+ SB.Append(fmt::format("CANCELLED '{}' after {}", GcId, NiceTimeSpanMs(Result.ElapsedMS.count())));
}
else
{
- SB.Append(
- fmt::format("CacheExpireTime: {}, ProjectStoreExpireTime: {}, CollectSmallObjects: {}, IsDeleteMode: {}, "
- "SkipCidDelete: {}\n",
- Settings.CacheExpireTime,
- Settings.ProjectStoreExpireTime,
- Settings.CollectSmallObjects,
- Settings.IsDeleteMode,
- Settings.SkipCidDelete));
- SB.Append(fmt::format(" Found {} expired items out of {}, deleted {}.\n",
+ SB.Append(fmt::format("COMPLETED '{}' in {}\n", GcId, NiceTimeSpanMs(Result.ElapsedMS.count())));
+ AppendSettings(SB, Settings);
+ SB.Append("\n\n");
+ SB.Append(fmt::format(" Found {} expired items out of {}, deleted {}\n",
Result.ReferencerStatSum.RemoveExpiredDataStats.FoundCount,
Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount,
Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount));
if (!Settings.SkipCidDelete)
{
- SB.Append(fmt::format(" Found {} unreferenced Cid entries out of {}, deleted {}.\n",
+ SB.Append(fmt::format(" Found {} unreferenced Cid entries out of {}, deleted {}\n",
Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.FoundCount,
Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount,
Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount));
}
- SB.Append(fmt::format(" Freed {} on disk and {} of memory in {}.\n",
+ SB.Append(fmt::format(" Freed {} on disk and {} of memory\n",
NiceBytes(Result.CompactStoresStatSum.RemovedDisk),
- NiceBytes(Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory),
- NiceTimeSpanMs(Result.ElapsedMS.count())));
+ NiceBytes(Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory)));
}
ZEN_INFO("GCV2: {}", SB.ToView());
- AppendGCLog(GcStartTime, Settings, Result);
+ AppendGCLog(GcId, GcStartTime, Settings, Result);
if (SkipCid)
{
@@ -2348,7 +2420,9 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
}
else
{
- m_LastFullGCV2Result = Result;
+ m_LastFullGCV2Result = Result;
+ m_LastFullAttachmentRangeMin = AttachmentRangeMin;
+ m_LastFullAttachmentRangeMin = AttachmentRangeMax;
}
Diff.DiskSize = Result.CompactStoresStatSum.RemovedDisk;
Diff.MemorySize = Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory;
@@ -2393,6 +2467,8 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
CbObjectWriter SchedulerState;
SchedulerState << "LastGcTime"sv << static_cast<int64_t>(m_LastGcTime.time_since_epoch().count());
SchedulerState << "LastGcExpireTime"sv << static_cast<int64_t>(m_LastGcExpireTime.time_since_epoch().count());
+ SchedulerState << "AttachmentPassIndex"sv << m_AttachmentPassIndex;
+
SaveCompactBinaryObject(Path, SchedulerState.Save());
if (RetryCount > 0)
{
@@ -3041,6 +3117,44 @@ TEST_CASE("gc.keepunusedreferences")
}
}
+TEST_CASE("gc.attachmentrange")
+{
+ IoHash AttachmentRangeMin = IoHash::Zero;
+ IoHash AttachmentRangeMax = IoHash::Zero;
+
+ CHECK(ComputeAttachmentRange(0, 0, AttachmentRangeMin, AttachmentRangeMax) == 0);
+ CHECK(AttachmentRangeMin == IoHash::Zero);
+ CHECK(AttachmentRangeMax == IoHash::Max);
+
+ CHECK(ComputeAttachmentRange(1, 0, AttachmentRangeMin, AttachmentRangeMax) == 0);
+ CHECK(AttachmentRangeMin == IoHash::Zero);
+ CHECK(AttachmentRangeMax == IoHash::Max);
+
+ CHECK(ComputeAttachmentRange(1, 1, AttachmentRangeMin, AttachmentRangeMax) == 0);
+ CHECK(AttachmentRangeMin == IoHash::Zero);
+ CHECK(AttachmentRangeMax == IoHash::Max);
+
+ CHECK(ComputeAttachmentRange(0, 1, AttachmentRangeMin, AttachmentRangeMax) == 0);
+ CHECK(AttachmentRangeMin == IoHash::Zero);
+ CHECK(AttachmentRangeMax == IoHash::Max);
+
+ CHECK(ComputeAttachmentRange(0, 2, AttachmentRangeMin, AttachmentRangeMax) == 1);
+ CHECK(AttachmentRangeMin == IoHash::Zero);
+ CHECK(AttachmentRangeMax == IoHash::FromHexString("7fffffffffffffffffffffffffffffffffffffff"));
+
+ CHECK(ComputeAttachmentRange(1, 2, AttachmentRangeMin, AttachmentRangeMax) == 0);
+ CHECK(AttachmentRangeMin == IoHash::FromHexString("8000000000000000000000000000000000000000"));
+ CHECK(AttachmentRangeMax == IoHash::Max);
+
+ CHECK(ComputeAttachmentRange(1, 256, AttachmentRangeMin, AttachmentRangeMax) == 2);
+ CHECK(AttachmentRangeMin == IoHash::FromHexString("0100000000000000000000000000000000000000"));
+ CHECK(AttachmentRangeMax == IoHash::FromHexString("01ffffffffffffffffffffffffffffffffffffff"));
+
+ CHECK(ComputeAttachmentRange(255, 256, AttachmentRangeMin, AttachmentRangeMax) == 0);
+ CHECK(AttachmentRangeMin == IoHash::FromHexString("ff00000000000000000000000000000000000000"));
+ CHECK(AttachmentRangeMax == IoHash::Max);
+}
+
#endif
void