diff options
| author | Dan Engelbrecht <[email protected]> | 2024-09-25 10:21:53 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2024-09-25 10:21:53 +0200 |
| commit | e27a5da5dae33f958a4b809a9e20a0af33c24f90 (patch) | |
| tree | 3f22bdba794108fa2a4a4d5d1fc308a986b3483b /src/zenstore/gc.cpp | |
| parent | exception safety when writing block (#168) (diff) | |
| download | zen-e27a5da5dae33f958a4b809a9e20a0af33c24f90.tar.xz zen-e27a5da5dae33f958a4b809a9e20a0af33c24f90.zip | |
Add `gc-attachment-passes` option to zenserver (#167)
Added option `gc-attachment-passes` to zenserver
Cleaned up GCv2 start and stop logs and added identifier to easily find matching start and end of a GC pass in log file
Fixed project store not properly sorting references found during lock phase
Diffstat (limited to 'src/zenstore/gc.cpp')
| -rw-r--r-- | src/zenstore/gc.cpp | 186 |
1 files changed, 150 insertions, 36 deletions
diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp index 904619222..cde89421e 100644 --- a/src/zenstore/gc.cpp +++ b/src/zenstore/gc.cpp @@ -145,6 +145,28 @@ namespace { return std::error_code{}; } + uint8_t ComputeAttachmentRange(uint16_t AttachmentPassIndex, uint16_t AttachmentPassCount, IoHash& OutMin, IoHash& OutMax) + { + if (AttachmentPassCount <= 1) + { + OutMin = IoHash::Zero; + OutMax = IoHash::Max; + return 0; + } + if (AttachmentPassIndex >= AttachmentPassCount) + { + AttachmentPassIndex = 0; + } + + uint32_t RangeBegin = (256 * AttachmentPassIndex) / AttachmentPassCount; + AttachmentPassIndex++; + uint32_t RangeEnd = ((256 * AttachmentPassIndex) / AttachmentPassCount) - 1; + OutMin = IoHash::Zero; + OutMin.Hash[0] = gsl::narrow<uint8_t>(RangeBegin); + OutMax = IoHash::Max; + OutMax.Hash[0] = gsl::narrow<uint8_t>(RangeEnd); + return gsl::narrow<uint8_t>((AttachmentPassIndex == AttachmentPassCount) ? 0 : AttachmentPassIndex); + }; } // namespace ////////////////////////////////////////////////////////////////////////// @@ -572,25 +594,38 @@ Sum(GcResult& Stat, bool Cancelled = false) } bool -FilterReferences(GcCtx& Ctx, std::vector<IoHash>& InOutReferences) +FilterReferences(GcCtx& Ctx, std::string_view Context, std::vector<IoHash>& InOutReferences) { if (InOutReferences.empty()) { return false; } - if (Ctx.Settings.AttachmentRangeMax != IoHash::Max || Ctx.Settings.AttachmentRangeMin != IoHash::Zero) + + const bool Filter = Ctx.Settings.AttachmentRangeMax != IoHash::Max || Ctx.Settings.AttachmentRangeMin != IoHash::Zero; + + size_t TotalCount = InOutReferences.size(); + size_t RemovedCount = 0; + + Stopwatch Timer; + const auto _ = MakeGuard([&] { + if (!Ctx.Settings.Verbose) + { + return; + } + ZEN_INFO( + "GCV2: {}: {}sorted {} entries in {}", + Context, + Filter ? fmt::format("skipped {}% ({} out of {}) and ", (100 * RemovedCount) / TotalCount, RemovedCount, TotalCount) : ""sv, + TotalCount - RemovedCount, + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); + + if (Filter) { - size_t TotalCount = InOutReferences.size(); std::erase_if(InOutReferences, [&Ctx](const IoHash& Key) { return ((Ctx.Settings.AttachmentRangeMax < Key) || (Key < Ctx.Settings.AttachmentRangeMin)); }); - size_t RemovedCount = TotalCount - InOutReferences.size(); - ZEN_INFO("Skipped GC for {}% of references ({} out of {}) due to attachment filtering with range {} to {}", - (100 * RemovedCount) / TotalCount, - RemovedCount, - TotalCount, - Ctx.Settings.AttachmentRangeMin, - Ctx.Settings.AttachmentRangeMax); + RemovedCount = TotalCount - InOutReferences.size(); } if (InOutReferences.empty()) { @@ -1502,6 +1537,7 @@ GcScheduler::Initialize(const GcSchedulerConfig& Config) m_LastGcTime = GcClock::Now(); m_LastLightweightGcTime = m_LastGcTime; m_LastGcExpireTime = GcClock::TimePoint::min(); + m_AttachmentPassIndex = 0; if (CbObject SchedulerState = LoadCompactBinaryObject(Config.RootDirectory / "gc_state")) { @@ -1514,6 +1550,7 @@ GcScheduler::Initialize(const GcSchedulerConfig& Config) m_LastGcTime = GcClock::Now(); m_LastLightweightGcTime = m_LastGcTime; } + m_AttachmentPassIndex = SchedulerState["AttachmentPassIndex"sv].AsUInt8(); } m_DiskUsageLog.Open(m_Config.RootDirectory / "gc.dlog", CasLogFile::Mode::kWrite); @@ -1646,14 +1683,13 @@ GcScheduler::CheckDiskSpace() } void -GcScheduler::AppendGCLog(GcClock::TimePoint StartTime, const GcSettings& Settings, const GcResult& Result) +GcScheduler::AppendGCLog(std::string_view Id, GcClock::TimePoint StartTime, const GcSettings& Settings, const GcResult& Result) { try { std::vector<uint8_t> Blob; { CbObjectWriter Writer; - std::string Id = fmt::format("{}", gsl::narrow<int64_t>(StartTime.time_since_epoch().count())); Writer.BeginObject(Id); { Writer << "StartTime"sv << ToDateTime(StartTime); @@ -1667,6 +1703,8 @@ GcScheduler::AppendGCLog(GcClock::TimePoint StartTime, const GcSettings& Setting Writer << "Verbose"sv << Settings.Verbose; Writer << "SingleThread"sv << Settings.SingleThread; Writer << "CompactBlockUsageThresholdPercent"sv << Settings.CompactBlockUsageThresholdPercent; + Writer << "AttachmentRangeMin"sv << Settings.AttachmentRangeMin; + Writer << "AttachmentRangeMax"sv << Settings.AttachmentRangeMin; } Writer.EndObject(); @@ -1781,6 +1819,9 @@ GcScheduler::GetState() const Result.LastLightweightGCV2Result = m_LastLightweightGCV2Result; Result.LastFullGCV2Result = m_LastFullGCV2Result; + + Result.LastFullAttachmentRangeMin = m_LastFullAttachmentRangeMin; + Result.LastFullAttachmentRangeMax = m_LastFullAttachmentRangeMax; } Result.RemainingTimeUntilFullGc = @@ -1860,6 +1901,8 @@ GcScheduler::SchedulerThread() bool SingleThreaded = m_Config.SingleThreaded; IoHash AttachmentRangeMin = IoHash::Zero; IoHash AttachmentRangeMax = IoHash::Max; + uint8_t NextAttachmentPassIndex = + ComputeAttachmentRange(m_AttachmentPassIndex, m_Config.AttachmentPassCount, AttachmentRangeMin, AttachmentRangeMax); bool DiskSpaceGCTriggered = false; bool TimeBasedGCTriggered = false; @@ -1898,9 +1941,13 @@ GcScheduler::SchedulerThread() TriggerParams.CompactBlockUsageThresholdPercent.value_or(CompactBlockUsageThresholdPercent); Verbose = TriggerParams.Verbose.value_or(Verbose); SingleThreaded = TriggerParams.SingleThreaded.value_or(SingleThreaded); - AttachmentRangeMin = TriggerParams.AttachmentRangeMin; - AttachmentRangeMax = TriggerParams.AttachmentRangeMax; - DoGc = true; + AttachmentRangeMin = TriggerParams.AttachmentRangeMin.value_or(AttachmentRangeMin); + AttachmentRangeMax = TriggerParams.AttachmentRangeMax.value_or(AttachmentRangeMax); + if (TriggerParams.AttachmentRangeMin.has_value() || TriggerParams.AttachmentRangeMax.has_value()) + { + NextAttachmentPassIndex = m_AttachmentPassIndex; + } + DoGc = true; } if (m_TriggerScrubParams) @@ -2110,6 +2157,11 @@ GcScheduler::SchedulerThread() } } + if (!SkipCid) + { + m_AttachmentPassIndex = NextAttachmentPassIndex; + } + bool GcSuccess = CollectGarbage(CacheExpireTime, ProjectStoreExpireTime, DoDelete, @@ -2266,11 +2318,6 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, } } - ZEN_INFO("garbage collection STARTING, small objects gc {}, {} CAS. Cache cutoff time {}, project store cutoff time {}", - GcCtx.CollectSmallObjects() ? "ENABLED"sv : "DISABLED"sv, - SkipCid ? "skip"sv : "include"sv, - CacheExpireTime, - ProjectStoreExpireTime); { Stopwatch Timer; const auto __ = MakeGuard([&] { ZEN_INFO("garbage collection DONE in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); @@ -2279,6 +2326,13 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, switch (UseGCVersion) { case GcVersion::kV1: + ZEN_INFO( + "GCV1: Garbage collection STARTING, small objects gc {}, {} CAS. Cache cutoff time {}, project store cutoff time " + "{}", + GcCtx.CollectSmallObjects() ? "ENABLED"sv : "DISABLED"sv, + SkipCid ? "skip"sv : "include"sv, + CacheExpireTime, + ProjectStoreExpireTime); Diff = m_GcManager.CollectGarbage(GcCtx); if (SkipCid) { @@ -2291,6 +2345,8 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, break; case GcVersion::kV2: { + std::string GcId = Oid::NewOid().ToString(); + const GcSettings Settings = {.CacheExpireTime = CacheExpireTime, .ProjectStoreExpireTime = ProjectStoreExpireTime, .CollectSmallObjects = CollectSmallObjects, @@ -2303,44 +2359,60 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, .AttachmentRangeMin = AttachmentRangeMin, .AttachmentRangeMax = AttachmentRangeMax}; + auto AppendSettings = [](StringBuilderBase& SB, const GcSettings& Settings) { + SB.Append( + fmt::format(" GC small objects: {}\n", Settings.CollectSmallObjects ? "yes"sv : "no"sv)); + SB.Append(fmt::format(" GC Cid store: {}\n", Settings.SkipCidDelete ? "no"sv : "yes"sv)); + if (!Settings.SkipCidDelete && + (Settings.AttachmentRangeMin != IoHash::Zero || Settings.AttachmentRangeMax != IoHash::Max)) + { + SB.Append(fmt::format(" Attachment range: {}-{}\n", + Settings.AttachmentRangeMin, + Settings.AttachmentRangeMax)); + } + SB.Append(fmt::format(" Cache cutoff time: {}\n", Settings.CacheExpireTime)); + SB.Append(fmt::format(" Project store cutoff time: {}", Settings.ProjectStoreExpireTime)); + }; + + { + ExtendableStringBuilder<256> SB; + SB.Append(fmt::format("STARTING '{}'\n", GcId)); + AppendSettings(SB, Settings); + ZEN_INFO("GCV2: {}", SB.ToView()); + } + GcClock::TimePoint GcStartTime = GcClock::Now(); GcResult Result = m_GcManager.CollectGarbage(Settings); ExtendableStringBuilder<256> SB; if (Result.WasCancelled) { - SB.Append(fmt::format("Cancelled after {}", NiceTimeSpanMs(Result.ElapsedMS.count()))); + SB.Append(fmt::format("CANCELLED '{}' after {}", GcId, NiceTimeSpanMs(Result.ElapsedMS.count()))); } else { - SB.Append( - fmt::format("CacheExpireTime: {}, ProjectStoreExpireTime: {}, CollectSmallObjects: {}, IsDeleteMode: {}, " - "SkipCidDelete: {}\n", - Settings.CacheExpireTime, - Settings.ProjectStoreExpireTime, - Settings.CollectSmallObjects, - Settings.IsDeleteMode, - Settings.SkipCidDelete)); - SB.Append(fmt::format(" Found {} expired items out of {}, deleted {}.\n", + SB.Append(fmt::format("COMPLETED '{}' in {}\n", GcId, NiceTimeSpanMs(Result.ElapsedMS.count()))); + AppendSettings(SB, Settings); + SB.Append("\n\n"); + SB.Append(fmt::format(" Found {} expired items out of {}, deleted {}\n", Result.ReferencerStatSum.RemoveExpiredDataStats.FoundCount, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount)); if (!Settings.SkipCidDelete) { - SB.Append(fmt::format(" Found {} unreferenced Cid entries out of {}, deleted {}.\n", + SB.Append(fmt::format(" Found {} unreferenced Cid entries out of {}, deleted {}\n", Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.FoundCount, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount)); } - SB.Append(fmt::format(" Freed {} on disk and {} of memory in {}.\n", + SB.Append(fmt::format(" Freed {} on disk and {} of memory\n", NiceBytes(Result.CompactStoresStatSum.RemovedDisk), - NiceBytes(Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory), - NiceTimeSpanMs(Result.ElapsedMS.count()))); + NiceBytes(Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory))); } ZEN_INFO("GCV2: {}", SB.ToView()); - AppendGCLog(GcStartTime, Settings, Result); + AppendGCLog(GcId, GcStartTime, Settings, Result); if (SkipCid) { @@ -2348,7 +2420,9 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, } else { - m_LastFullGCV2Result = Result; + m_LastFullGCV2Result = Result; + m_LastFullAttachmentRangeMin = AttachmentRangeMin; + m_LastFullAttachmentRangeMin = AttachmentRangeMax; } Diff.DiskSize = Result.CompactStoresStatSum.RemovedDisk; Diff.MemorySize = Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory; @@ -2393,6 +2467,8 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, CbObjectWriter SchedulerState; SchedulerState << "LastGcTime"sv << static_cast<int64_t>(m_LastGcTime.time_since_epoch().count()); SchedulerState << "LastGcExpireTime"sv << static_cast<int64_t>(m_LastGcExpireTime.time_since_epoch().count()); + SchedulerState << "AttachmentPassIndex"sv << m_AttachmentPassIndex; + SaveCompactBinaryObject(Path, SchedulerState.Save()); if (RetryCount > 0) { @@ -3041,6 +3117,44 @@ TEST_CASE("gc.keepunusedreferences") } } +TEST_CASE("gc.attachmentrange") +{ + IoHash AttachmentRangeMin = IoHash::Zero; + IoHash AttachmentRangeMax = IoHash::Zero; + + CHECK(ComputeAttachmentRange(0, 0, AttachmentRangeMin, AttachmentRangeMax) == 0); + CHECK(AttachmentRangeMin == IoHash::Zero); + CHECK(AttachmentRangeMax == IoHash::Max); + + CHECK(ComputeAttachmentRange(1, 0, AttachmentRangeMin, AttachmentRangeMax) == 0); + CHECK(AttachmentRangeMin == IoHash::Zero); + CHECK(AttachmentRangeMax == IoHash::Max); + + CHECK(ComputeAttachmentRange(1, 1, AttachmentRangeMin, AttachmentRangeMax) == 0); + CHECK(AttachmentRangeMin == IoHash::Zero); + CHECK(AttachmentRangeMax == IoHash::Max); + + CHECK(ComputeAttachmentRange(0, 1, AttachmentRangeMin, AttachmentRangeMax) == 0); + CHECK(AttachmentRangeMin == IoHash::Zero); + CHECK(AttachmentRangeMax == IoHash::Max); + + CHECK(ComputeAttachmentRange(0, 2, AttachmentRangeMin, AttachmentRangeMax) == 1); + CHECK(AttachmentRangeMin == IoHash::Zero); + CHECK(AttachmentRangeMax == IoHash::FromHexString("7fffffffffffffffffffffffffffffffffffffff")); + + CHECK(ComputeAttachmentRange(1, 2, AttachmentRangeMin, AttachmentRangeMax) == 0); + CHECK(AttachmentRangeMin == IoHash::FromHexString("8000000000000000000000000000000000000000")); + CHECK(AttachmentRangeMax == IoHash::Max); + + CHECK(ComputeAttachmentRange(1, 256, AttachmentRangeMin, AttachmentRangeMax) == 2); + CHECK(AttachmentRangeMin == IoHash::FromHexString("0100000000000000000000000000000000000000")); + CHECK(AttachmentRangeMax == IoHash::FromHexString("01ffffffffffffffffffffffffffffffffffffff")); + + CHECK(ComputeAttachmentRange(255, 256, AttachmentRangeMin, AttachmentRangeMax) == 0); + CHECK(AttachmentRangeMin == IoHash::FromHexString("ff00000000000000000000000000000000000000")); + CHECK(AttachmentRangeMax == IoHash::Max); +} + #endif void |