diff options
| author | Dan Engelbrecht <[email protected]> | 2024-09-23 19:19:40 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2024-09-23 19:19:40 +0200 |
| commit | bc9e590727211d803cce7be84c1cbc026179b841 (patch) | |
| tree | 96d89b59cdced94ce1d795cd941d35d26f6c5e88 /src/zenstore/gc.cpp | |
| parent | made fmt formatter format function const (#162) (diff) | |
| download | zen-bc9e590727211d803cce7be84c1cbc026179b841.tar.xz zen-bc9e590727211d803cce7be84c1cbc026179b841.zip | |
gc unused refactor (#165)
* optimize IoHash and OId comparisions
* refactor filtering of unused references
* add attachment filtering to gc
Diffstat (limited to 'src/zenstore/gc.cpp')
| -rw-r--r-- | src/zenstore/gc.cpp | 258 |
1 files changed, 242 insertions, 16 deletions
diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp index 28bdd2f42..904619222 100644 --- a/src/zenstore/gc.cpp +++ b/src/zenstore/gc.cpp @@ -571,6 +571,95 @@ Sum(GcResult& Stat, bool Cancelled = false) return Stat; } +bool +FilterReferences(GcCtx& Ctx, std::vector<IoHash>& InOutReferences) +{ + if (InOutReferences.empty()) + { + return false; + } + if (Ctx.Settings.AttachmentRangeMax != IoHash::Max || Ctx.Settings.AttachmentRangeMin != IoHash::Zero) + { + size_t TotalCount = InOutReferences.size(); + std::erase_if(InOutReferences, [&Ctx](const IoHash& Key) { + return ((Ctx.Settings.AttachmentRangeMax < Key) || (Key < Ctx.Settings.AttachmentRangeMin)); + }); + size_t RemovedCount = TotalCount - InOutReferences.size(); + ZEN_INFO("Skipped GC for {}% of references ({} out of {}) due to attachment filtering with range {} to {}", + (100 * RemovedCount) / TotalCount, + RemovedCount, + TotalCount, + Ctx.Settings.AttachmentRangeMin, + Ctx.Settings.AttachmentRangeMax); + } + if (InOutReferences.empty()) + { + return false; + } + std::sort(InOutReferences.begin(), InOutReferences.end()); + auto NewEnd = std::unique(InOutReferences.begin(), InOutReferences.end()); + InOutReferences.erase(NewEnd, InOutReferences.end()); + return true; +} + +std::span<IoHash> +KeepUnusedReferences(std::span<const IoHash> SortedUsedReferences, std::span<IoHash> SortedReferences) +{ + if (SortedUsedReferences.empty()) + { + return SortedReferences; + } + if (SortedReferences.empty()) + { + return {}; + } + + const IoHash* ReferencesRead = &SortedReferences[0]; + IoHash* ReferencesWrite = &SortedReferences[0]; + const IoHash* ReferencesEnd = ReferencesRead + SortedReferences.size(); + + const IoHash* UsedReferencesRead = &SortedUsedReferences[0]; + const IoHash* UsedReferencesReadEnd = UsedReferencesRead + SortedUsedReferences.size(); + + while (ReferencesRead != ReferencesEnd && UsedReferencesRead != UsedReferencesReadEnd) + { + const IoHash& Reference = *ReferencesRead; + const IoHash& UsedReference = *UsedReferencesRead; + if (Reference == UsedReference) + { + // Skip it + ReferencesRead++; + UsedReferencesRead++; + } + else if (Reference < UsedReference) + { + // Keep it + if (ReferencesRead > ReferencesWrite) + { + *ReferencesWrite = Reference; + } + ReferencesWrite++; + ReferencesRead++; + } + else + { + // Skip it + UsedReferencesRead++; + } + } + + size_t Remaining = std::distance(ReferencesRead, ReferencesEnd); + if (Remaining > 0) + { + if (ReferencesRead != ReferencesWrite) + { + memcpy(ReferencesWrite, ReferencesRead, sizeof(IoHash::Hash) * Remaining); + } + ReferencesWrite += Remaining; + } + return SortedReferences.subspan(0, (size_t)std::distance(&SortedReferences[0], ReferencesWrite)); +} + void GcManager::AddGcReferencer(GcReferencer& Referencer) { @@ -987,18 +1076,19 @@ GcManager::CollectGarbage(const GcSettings& Settings) return Sum(Result, true); } { - const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::vector<IoHash> { - HashSet UnusedCids(References.begin(), References.end()); + const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::span<IoHash> { + std::span<IoHash> UnusedCids(References); + ZEN_ASSERT(UnusedCids.empty() || UnusedCids[0] != IoHash::Zero); for (const auto& It : ReferenceCheckers) { GcReferenceChecker* ReferenceChecker = It.first.get(); - ReferenceChecker->RemoveUsedReferencesFromSet(Ctx, UnusedCids); + UnusedCids = ReferenceChecker->GetUnusedReferences(Ctx, UnusedCids); if (UnusedCids.empty()) { return {}; } } - return std::vector<IoHash>(UnusedCids.begin(), UnusedCids.end()); + return UnusedCids; }; // checking all Cids agains references in cache @@ -1768,6 +1858,8 @@ GcScheduler::SchedulerThread() uint32_t CompactBlockUsageThresholdPercent = m_Config.CompactBlockUsageThresholdPercent; bool Verbose = m_Config.Verbose; bool SingleThreaded = m_Config.SingleThreaded; + IoHash AttachmentRangeMin = IoHash::Zero; + IoHash AttachmentRangeMax = IoHash::Max; bool DiskSpaceGCTriggered = false; bool TimeBasedGCTriggered = false; @@ -1804,9 +1896,11 @@ GcScheduler::SchedulerThread() UseGCVersion = TriggerParams.ForceGCVersion.value_or(UseGCVersion); CompactBlockUsageThresholdPercent = TriggerParams.CompactBlockUsageThresholdPercent.value_or(CompactBlockUsageThresholdPercent); - Verbose = TriggerParams.Verbose.value_or(Verbose); - SingleThreaded = TriggerParams.SingleThreaded.value_or(SingleThreaded); - DoGc = true; + Verbose = TriggerParams.Verbose.value_or(Verbose); + SingleThreaded = TriggerParams.SingleThreaded.value_or(SingleThreaded); + AttachmentRangeMin = TriggerParams.AttachmentRangeMin; + AttachmentRangeMax = TriggerParams.AttachmentRangeMax; + DoGc = true; } if (m_TriggerScrubParams) @@ -2025,6 +2119,8 @@ GcScheduler::SchedulerThread() CompactBlockUsageThresholdPercent, Verbose, SingleThreaded, + AttachmentRangeMin, + AttachmentRangeMax, SilenceErrors); if (!GcSuccess) { @@ -2124,6 +2220,8 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, uint32_t CompactBlockUsageThresholdPercent, bool Verbose, bool SingleThreaded, + const IoHash& AttachmentRangeMin, + const IoHash& AttachmentRangeMax, bool SilenceErrors) { ZEN_TRACE_CPU("GcScheduler::CollectGarbage"); @@ -2193,15 +2291,18 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, break; case GcVersion::kV2: { - const GcSettings Settings = {.CacheExpireTime = CacheExpireTime, - .ProjectStoreExpireTime = ProjectStoreExpireTime, - .CollectSmallObjects = CollectSmallObjects, - .IsDeleteMode = Delete, - .SkipCidDelete = SkipCid, - .Verbose = Verbose, - .SingleThread = SingleThreaded, - .CompactBlockUsageThresholdPercent = CompactBlockUsageThresholdPercent, - .DiskReservePath = m_Config.RootDirectory / "reserve.gc"}; + const GcSettings Settings = {.CacheExpireTime = CacheExpireTime, + .ProjectStoreExpireTime = ProjectStoreExpireTime, + .CollectSmallObjects = CollectSmallObjects, + .IsDeleteMode = Delete, + .SkipCidDelete = SkipCid, + .Verbose = Verbose, + .SingleThread = SingleThreaded, + .CompactBlockUsageThresholdPercent = CompactBlockUsageThresholdPercent, + .DiskReservePath = m_Config.RootDirectory / "reserve.gc", + .AttachmentRangeMin = AttachmentRangeMin, + .AttachmentRangeMax = AttachmentRangeMax}; + GcClock::TimePoint GcStartTime = GcClock::Now(); GcResult Result = m_GcManager.CollectGarbage(Settings); @@ -2815,6 +2916,131 @@ TEST_CASE("scrub.basic") CHECK(!CidStore.ContainsChunk(CompressedChunk.DecodeRawHash())); } +TEST_CASE("gc.keepunusedreferences") +{ + const IoHash Hashes[] = {IoHash::FromHexString("177030568fdd461bf4fe5ddbf4d463e514e8178e"), + IoHash::FromHexString("372d795bb907a15cab15ab3917854bfef7e7af2c"), + IoHash::FromHexString("75ab3917854bfef7e72d795bb907a15cab1af2c3"), + IoHash::FromHexString("ab3917854bfef7e7af2c372d795bb907a15cab15"), + IoHash::FromHexString("d1df59fcab06793a5f2c372d795bb907a15cab15")}; + { + std::vector<IoHash> UsedReferences; + std::vector<IoHash> References; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.empty()); + } + + { + std::vector<IoHash> UsedReferences{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::vector<IoHash> References; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.empty()); + } + + { + std::vector<IoHash> UsedReferences{}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 5); + } + + { + std::vector<IoHash> UsedReferences{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.empty()); + } + + { + std::vector<IoHash> UsedReferences{Hashes[0], Hashes[2], Hashes[4]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 2); + CHECK(UnusedReferences[0] == Hashes[1]); + CHECK(UnusedReferences[1] == Hashes[3]); + } + + { + std::vector<IoHash> UsedReferences{Hashes[2], Hashes[3], Hashes[4]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 2); + CHECK(UnusedReferences[0] == Hashes[0]); + CHECK(UnusedReferences[1] == Hashes[1]); + } + + { + std::vector<IoHash> UsedReferences{Hashes[0], Hashes[1], Hashes[2]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 2); + CHECK(UnusedReferences[0] == Hashes[3]); + CHECK(UnusedReferences[1] == Hashes[4]); + } + + { + std::vector<IoHash> UsedReferences{Hashes[0], Hashes[1], Hashes[2], Hashes[4]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 1); + CHECK(UnusedReferences[0] == Hashes[3]); + } + + { + std::vector<IoHash> UsedReferences{Hashes[1], Hashes[3]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 3); + CHECK(UnusedReferences[0] == Hashes[0]); + CHECK(UnusedReferences[1] == Hashes[2]); + CHECK(UnusedReferences[2] == Hashes[4]); + } + + { + std::vector<IoHash> UsedReferences{Hashes[0]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 4); + CHECK(UnusedReferences[0] == Hashes[1]); + CHECK(UnusedReferences[1] == Hashes[2]); + CHECK(UnusedReferences[2] == Hashes[3]); + CHECK(UnusedReferences[3] == Hashes[4]); + } + + { + std::vector<IoHash> UsedReferences{Hashes[1]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 4); + CHECK(UnusedReferences[0] == Hashes[0]); + CHECK(UnusedReferences[1] == Hashes[2]); + CHECK(UnusedReferences[2] == Hashes[3]); + CHECK(UnusedReferences[3] == Hashes[4]); + } + + { + std::vector<IoHash> UsedReferences{Hashes[3]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 4); + CHECK(UnusedReferences[0] == Hashes[0]); + CHECK(UnusedReferences[1] == Hashes[1]); + CHECK(UnusedReferences[2] == Hashes[2]); + CHECK(UnusedReferences[3] == Hashes[4]); + } + + { + std::vector<IoHash> UsedReferences{Hashes[4]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 4); + CHECK(UnusedReferences[0] == Hashes[0]); + CHECK(UnusedReferences[1] == Hashes[1]); + CHECK(UnusedReferences[2] == Hashes[2]); + CHECK(UnusedReferences[3] == Hashes[3]); + } +} + #endif void |