diff options
| author | Dan Engelbrecht <[email protected]> | 2024-09-23 19:19:40 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2024-09-23 19:19:40 +0200 |
| commit | bc9e590727211d803cce7be84c1cbc026179b841 (patch) | |
| tree | 96d89b59cdced94ce1d795cd941d35d26f6c5e88 /src/zenstore | |
| parent | made fmt formatter format function const (#162) (diff) | |
| download | zen-bc9e590727211d803cce7be84c1cbc026179b841.tar.xz zen-bc9e590727211d803cce7be84c1cbc026179b841.zip | |
gc unused refactor (#165)
* optimize IoHash and OId comparisions
* refactor filtering of unused references
* add attachment filtering to gc
Diffstat (limited to 'src/zenstore')
| -rw-r--r-- | src/zenstore/cache/cachedisklayer.cpp | 54 | ||||
| -rw-r--r-- | src/zenstore/cache/structuredcachestore.cpp | 24 | ||||
| -rw-r--r-- | src/zenstore/compactcas.cpp | 14 | ||||
| -rw-r--r-- | src/zenstore/filecas.cpp | 14 | ||||
| -rw-r--r-- | src/zenstore/gc.cpp | 258 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/cache/cachedisklayer.h | 24 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/gc.h | 21 |
7 files changed, 324 insertions, 85 deletions
diff --git a/src/zenstore/cache/cachedisklayer.cpp b/src/zenstore/cache/cachedisklayer.cpp index 63f6d708a..417b63fb4 100644 --- a/src/zenstore/cache/cachedisklayer.cpp +++ b/src/zenstore/cache/cachedisklayer.cpp @@ -1262,7 +1262,9 @@ ZenCacheDiskLayer::CacheBucket::EndPutBatch(PutBatchHandle* Batch) noexcept } if (m_TrackedReferences && HashKeyAndReferences.size() > 1) { - m_TrackedReferences->insert(HashKeyAndReferences.begin() + 1, HashKeyAndReferences.end()); + m_TrackedReferences->insert(m_TrackedReferences->end(), + HashKeyAndReferences.begin() + 1, + HashKeyAndReferences.end()); } if (auto It = m_Index.find(HashKey); It != m_Index.end()) { @@ -2963,7 +2965,7 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c } if (m_TrackedReferences) { - m_TrackedReferences->insert(References.begin(), References.end()); + m_TrackedReferences->insert(m_TrackedReferences->end(), References.begin(), References.end()); } PayloadIndex EntryIndex = {}; @@ -3130,7 +3132,7 @@ ZenCacheDiskLayer::CacheBucket::PutInlineCacheValue(const IoHash& HashKey, } if (m_TrackedReferences) { - m_TrackedReferences->insert(References.begin(), References.end()); + m_TrackedReferences->insert(m_TrackedReferences->end(), References.begin(), References.end()); } if (auto It = m_Index.find(HashKey); It != m_Index.end()) { @@ -3782,17 +3784,19 @@ public: } ZEN_INFO("GCV2: cachebucket [PRECACHE] '{}': found {} references in {}", m_CacheBucket.m_BucketDir, - m_References.size(), + m_PrecachedReferences.size(), NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); - m_CacheBucket.m_IndexLock.WithExclusiveLock([&]() { m_CacheBucket.m_TrackedReferences = std::make_unique<HashSet>(); }); + m_CacheBucket.m_IndexLock.WithExclusiveLock([&]() { m_CacheBucket.m_TrackedReferences = std::make_unique<std::vector<IoHash>>(); }); - bool Continue = m_CacheBucket.GetReferences(Ctx, /*StateIsAlreadyLocked*/ false, m_References); + bool Continue = m_CacheBucket.GetReferences(Ctx, /*StateIsAlreadyLocked*/ false, m_PrecachedReferences); if (!Continue) { m_CacheBucket.m_IndexLock.WithExclusiveLock([&]() { m_CacheBucket.m_TrackedReferences.reset(); }); + return; } + FilterReferences(Ctx, m_PrecachedReferences); } virtual void UpdateLockedState(GcCtx& Ctx) override @@ -3809,32 +3813,32 @@ public: } ZEN_INFO("GCV2: cachebucket [LOCKSTATE] '{}': found {} references in {}", m_CacheBucket.m_BucketDir, - m_References.size(), + m_PrecachedReferences.size() + m_AddedReferences.size(), NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); if (Ctx.IsCancelledFlag.load()) { - m_References = {}; + m_PrecachedReferences = {}; m_CacheBucket.m_TrackedReferences.reset(); return; } ZEN_ASSERT(m_CacheBucket.m_TrackedReferences); - HashSet& AddedReferences(*m_CacheBucket.m_TrackedReferences); - m_References.reserve(m_References.size() + AddedReferences.size()); - m_References.insert(m_References.end(), AddedReferences.begin(), AddedReferences.end()); - AddedReferences = {}; + m_AddedReferences = std::move(*m_CacheBucket.m_TrackedReferences); + FilterReferences(Ctx, m_AddedReferences); } - virtual void RemoveUsedReferencesFromSet(GcCtx& Ctx, HashSet& IoCids) override + virtual std::span<IoHash> GetUnusedReferences(GcCtx& Ctx, std::span<IoHash> IoCids) override { - ZEN_TRACE_CPU("Z$::Bucket::RemoveUsedReferencesFromSet"); + ZEN_TRACE_CPU("Z$::Bucket::GetUnusedReferences"); auto Log = [&Ctx]() { return Ctx.Logger; }; - size_t InitialCount = IoCids.size(); + const size_t InitialCount = IoCids.size(); + size_t UsedCount = InitialCount; + Stopwatch Timer; const auto _ = MakeGuard([&] { if (!Ctx.Settings.Verbose) @@ -3843,24 +3847,20 @@ public: } ZEN_INFO("GCV2: cachebucket [FILTER REFERENCES] '{}': filtered out {} used references out of {} in {}", m_CacheBucket.m_BucketDir, - InitialCount - IoCids.size(), + UsedCount, InitialCount, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); - for (const IoHash& ReferenceHash : m_References) - { - if (IoCids.erase(ReferenceHash) == 1) - { - if (IoCids.empty()) - { - return; - } - } - } + std::span<IoHash> UnusedReferences = KeepUnusedReferences(m_PrecachedReferences, IoCids); + UnusedReferences = KeepUnusedReferences(m_AddedReferences, UnusedReferences); + UsedCount = IoCids.size() - UnusedReferences.size(); + return UnusedReferences; } + CacheBucket& m_CacheBucket; - std::vector<IoHash> m_References; + std::vector<IoHash> m_PrecachedReferences; + std::vector<IoHash> m_AddedReferences; }; std::vector<GcReferenceChecker*> diff --git a/src/zenstore/cache/structuredcachestore.cpp b/src/zenstore/cache/structuredcachestore.cpp index 7794d025f..9f1bcb41a 100644 --- a/src/zenstore/cache/structuredcachestore.cpp +++ b/src/zenstore/cache/structuredcachestore.cpp @@ -1189,15 +1189,18 @@ public: break; } } + FilterReferences(Ctx, m_References); } - virtual void RemoveUsedReferencesFromSet(GcCtx& Ctx, HashSet& IoCids) override + virtual std::span<IoHash> GetUnusedReferences(GcCtx& Ctx, std::span<IoHash> IoCids) override { - ZEN_TRACE_CPU("Z$::RemoveUsedReferencesFromSet"); + ZEN_TRACE_CPU("Z$::GetUnusedReferences"); auto Log = [&Ctx]() { return Ctx.Logger; }; - size_t InitialCount = IoCids.size(); + const size_t InitialCount = IoCids.size(); + size_t UsedCount = InitialCount; + Stopwatch Timer; const auto _ = MakeGuard([&] { if (!Ctx.Settings.Verbose) @@ -1206,21 +1209,14 @@ public: } ZEN_INFO("GCV2: projectstore [FILTER REFERENCES] '{}': filtered out {} used references out of {} in {}", "projectstore", - InitialCount - IoCids.size(), + UsedCount, InitialCount, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); - for (const IoHash& ReferenceHash : m_References) - { - if (IoCids.erase(ReferenceHash) == 1) - { - if (IoCids.empty()) - { - return; - } - } - } + std::span<IoHash> UnusedReferences = KeepUnusedReferences(m_References, IoCids); + UsedCount = IoCids.size() - UnusedReferences.size(); + return UnusedReferences; } private: diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp index 15f80d4cf..e0a7900f1 100644 --- a/src/zenstore/compactcas.cpp +++ b/src/zenstore/compactcas.cpp @@ -867,9 +867,9 @@ public: NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); - std::vector<IoHash> UnusedCids = GetUnusedReferences(m_Cids); - Stats.CheckedCount = m_Cids.size(); - Stats.FoundCount = UnusedCids.size(); + std::span<IoHash> UnusedCids = GetUnusedReferences(m_Cids); + Stats.CheckedCount = m_Cids.size(); + Stats.FoundCount = UnusedCids.size(); if (UnusedCids.empty()) { @@ -967,7 +967,11 @@ CasContainerStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats&) CidsToCheck.push_back(It.first); } } - return new CasContainerReferencePruner(*this, std::move(CidsToCheck)); + if (FilterReferences(Ctx, CidsToCheck)) + { + return new CasContainerReferencePruner(*this, std::move(CidsToCheck)); + } + return nullptr; } void @@ -2018,7 +2022,7 @@ TEST_CASE_TEMPLATE("compactcas.threadedinsert", GCV2, FalseType, TrueType) HashKeySet Deleted; GcStats Stats; GcStoreCompactor* Compactor = - Pruner->RemoveUnreferencedData(Ctx, Stats, [&](std::span<IoHash> References) -> std::vector<IoHash> { + Pruner->RemoveUnreferencedData(Ctx, Stats, [&](std::span<IoHash> References) -> std::span<IoHash> { std::vector<IoHash> Unreferenced; HashKeySet Retain; Retain.AddHashesToSet(KeepHashes); diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp index 733140e50..7bd17ee88 100644 --- a/src/zenstore/filecas.cpp +++ b/src/zenstore/filecas.cpp @@ -1653,9 +1653,9 @@ public: NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); - std::vector<IoHash> UnusedCids = GetUnusedReferences(m_Cids); - Stats.CheckedCount = m_Cids.size(); - Stats.FoundCount = UnusedCids.size(); + std::span<IoHash> UnusedCids = GetUnusedReferences(m_Cids); + Stats.CheckedCount = m_Cids.size(); + Stats.FoundCount = UnusedCids.size(); if (UnusedCids.empty()) { // Nothing to collect @@ -1699,7 +1699,7 @@ public: } } - return new FileCasStoreCompactor(m_FileCasStrategy, std::move(UnusedCids)); + return new FileCasStoreCompactor(m_FileCasStrategy, std::vector<IoHash>(UnusedCids.begin(), UnusedCids.end())); } private: @@ -1745,7 +1745,11 @@ FileCasStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats&) CidsToCheck.push_back(It.first); } } - return new FileCasReferencePruner(*this, std::move(CidsToCheck)); + if (FilterReferences(Ctx, CidsToCheck)) + { + return new FileCasReferencePruner(*this, std::move(CidsToCheck)); + } + return nullptr; } ////////////////////////////////////////////////////////////////////////// diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp index 28bdd2f42..904619222 100644 --- a/src/zenstore/gc.cpp +++ b/src/zenstore/gc.cpp @@ -571,6 +571,95 @@ Sum(GcResult& Stat, bool Cancelled = false) return Stat; } +bool +FilterReferences(GcCtx& Ctx, std::vector<IoHash>& InOutReferences) +{ + if (InOutReferences.empty()) + { + return false; + } + if (Ctx.Settings.AttachmentRangeMax != IoHash::Max || Ctx.Settings.AttachmentRangeMin != IoHash::Zero) + { + size_t TotalCount = InOutReferences.size(); + std::erase_if(InOutReferences, [&Ctx](const IoHash& Key) { + return ((Ctx.Settings.AttachmentRangeMax < Key) || (Key < Ctx.Settings.AttachmentRangeMin)); + }); + size_t RemovedCount = TotalCount - InOutReferences.size(); + ZEN_INFO("Skipped GC for {}% of references ({} out of {}) due to attachment filtering with range {} to {}", + (100 * RemovedCount) / TotalCount, + RemovedCount, + TotalCount, + Ctx.Settings.AttachmentRangeMin, + Ctx.Settings.AttachmentRangeMax); + } + if (InOutReferences.empty()) + { + return false; + } + std::sort(InOutReferences.begin(), InOutReferences.end()); + auto NewEnd = std::unique(InOutReferences.begin(), InOutReferences.end()); + InOutReferences.erase(NewEnd, InOutReferences.end()); + return true; +} + +std::span<IoHash> +KeepUnusedReferences(std::span<const IoHash> SortedUsedReferences, std::span<IoHash> SortedReferences) +{ + if (SortedUsedReferences.empty()) + { + return SortedReferences; + } + if (SortedReferences.empty()) + { + return {}; + } + + const IoHash* ReferencesRead = &SortedReferences[0]; + IoHash* ReferencesWrite = &SortedReferences[0]; + const IoHash* ReferencesEnd = ReferencesRead + SortedReferences.size(); + + const IoHash* UsedReferencesRead = &SortedUsedReferences[0]; + const IoHash* UsedReferencesReadEnd = UsedReferencesRead + SortedUsedReferences.size(); + + while (ReferencesRead != ReferencesEnd && UsedReferencesRead != UsedReferencesReadEnd) + { + const IoHash& Reference = *ReferencesRead; + const IoHash& UsedReference = *UsedReferencesRead; + if (Reference == UsedReference) + { + // Skip it + ReferencesRead++; + UsedReferencesRead++; + } + else if (Reference < UsedReference) + { + // Keep it + if (ReferencesRead > ReferencesWrite) + { + *ReferencesWrite = Reference; + } + ReferencesWrite++; + ReferencesRead++; + } + else + { + // Skip it + UsedReferencesRead++; + } + } + + size_t Remaining = std::distance(ReferencesRead, ReferencesEnd); + if (Remaining > 0) + { + if (ReferencesRead != ReferencesWrite) + { + memcpy(ReferencesWrite, ReferencesRead, sizeof(IoHash::Hash) * Remaining); + } + ReferencesWrite += Remaining; + } + return SortedReferences.subspan(0, (size_t)std::distance(&SortedReferences[0], ReferencesWrite)); +} + void GcManager::AddGcReferencer(GcReferencer& Referencer) { @@ -987,18 +1076,19 @@ GcManager::CollectGarbage(const GcSettings& Settings) return Sum(Result, true); } { - const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::vector<IoHash> { - HashSet UnusedCids(References.begin(), References.end()); + const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::span<IoHash> { + std::span<IoHash> UnusedCids(References); + ZEN_ASSERT(UnusedCids.empty() || UnusedCids[0] != IoHash::Zero); for (const auto& It : ReferenceCheckers) { GcReferenceChecker* ReferenceChecker = It.first.get(); - ReferenceChecker->RemoveUsedReferencesFromSet(Ctx, UnusedCids); + UnusedCids = ReferenceChecker->GetUnusedReferences(Ctx, UnusedCids); if (UnusedCids.empty()) { return {}; } } - return std::vector<IoHash>(UnusedCids.begin(), UnusedCids.end()); + return UnusedCids; }; // checking all Cids agains references in cache @@ -1768,6 +1858,8 @@ GcScheduler::SchedulerThread() uint32_t CompactBlockUsageThresholdPercent = m_Config.CompactBlockUsageThresholdPercent; bool Verbose = m_Config.Verbose; bool SingleThreaded = m_Config.SingleThreaded; + IoHash AttachmentRangeMin = IoHash::Zero; + IoHash AttachmentRangeMax = IoHash::Max; bool DiskSpaceGCTriggered = false; bool TimeBasedGCTriggered = false; @@ -1804,9 +1896,11 @@ GcScheduler::SchedulerThread() UseGCVersion = TriggerParams.ForceGCVersion.value_or(UseGCVersion); CompactBlockUsageThresholdPercent = TriggerParams.CompactBlockUsageThresholdPercent.value_or(CompactBlockUsageThresholdPercent); - Verbose = TriggerParams.Verbose.value_or(Verbose); - SingleThreaded = TriggerParams.SingleThreaded.value_or(SingleThreaded); - DoGc = true; + Verbose = TriggerParams.Verbose.value_or(Verbose); + SingleThreaded = TriggerParams.SingleThreaded.value_or(SingleThreaded); + AttachmentRangeMin = TriggerParams.AttachmentRangeMin; + AttachmentRangeMax = TriggerParams.AttachmentRangeMax; + DoGc = true; } if (m_TriggerScrubParams) @@ -2025,6 +2119,8 @@ GcScheduler::SchedulerThread() CompactBlockUsageThresholdPercent, Verbose, SingleThreaded, + AttachmentRangeMin, + AttachmentRangeMax, SilenceErrors); if (!GcSuccess) { @@ -2124,6 +2220,8 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, uint32_t CompactBlockUsageThresholdPercent, bool Verbose, bool SingleThreaded, + const IoHash& AttachmentRangeMin, + const IoHash& AttachmentRangeMax, bool SilenceErrors) { ZEN_TRACE_CPU("GcScheduler::CollectGarbage"); @@ -2193,15 +2291,18 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, break; case GcVersion::kV2: { - const GcSettings Settings = {.CacheExpireTime = CacheExpireTime, - .ProjectStoreExpireTime = ProjectStoreExpireTime, - .CollectSmallObjects = CollectSmallObjects, - .IsDeleteMode = Delete, - .SkipCidDelete = SkipCid, - .Verbose = Verbose, - .SingleThread = SingleThreaded, - .CompactBlockUsageThresholdPercent = CompactBlockUsageThresholdPercent, - .DiskReservePath = m_Config.RootDirectory / "reserve.gc"}; + const GcSettings Settings = {.CacheExpireTime = CacheExpireTime, + .ProjectStoreExpireTime = ProjectStoreExpireTime, + .CollectSmallObjects = CollectSmallObjects, + .IsDeleteMode = Delete, + .SkipCidDelete = SkipCid, + .Verbose = Verbose, + .SingleThread = SingleThreaded, + .CompactBlockUsageThresholdPercent = CompactBlockUsageThresholdPercent, + .DiskReservePath = m_Config.RootDirectory / "reserve.gc", + .AttachmentRangeMin = AttachmentRangeMin, + .AttachmentRangeMax = AttachmentRangeMax}; + GcClock::TimePoint GcStartTime = GcClock::Now(); GcResult Result = m_GcManager.CollectGarbage(Settings); @@ -2815,6 +2916,131 @@ TEST_CASE("scrub.basic") CHECK(!CidStore.ContainsChunk(CompressedChunk.DecodeRawHash())); } +TEST_CASE("gc.keepunusedreferences") +{ + const IoHash Hashes[] = {IoHash::FromHexString("177030568fdd461bf4fe5ddbf4d463e514e8178e"), + IoHash::FromHexString("372d795bb907a15cab15ab3917854bfef7e7af2c"), + IoHash::FromHexString("75ab3917854bfef7e72d795bb907a15cab1af2c3"), + IoHash::FromHexString("ab3917854bfef7e7af2c372d795bb907a15cab15"), + IoHash::FromHexString("d1df59fcab06793a5f2c372d795bb907a15cab15")}; + { + std::vector<IoHash> UsedReferences; + std::vector<IoHash> References; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.empty()); + } + + { + std::vector<IoHash> UsedReferences{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::vector<IoHash> References; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.empty()); + } + + { + std::vector<IoHash> UsedReferences{}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 5); + } + + { + std::vector<IoHash> UsedReferences{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.empty()); + } + + { + std::vector<IoHash> UsedReferences{Hashes[0], Hashes[2], Hashes[4]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 2); + CHECK(UnusedReferences[0] == Hashes[1]); + CHECK(UnusedReferences[1] == Hashes[3]); + } + + { + std::vector<IoHash> UsedReferences{Hashes[2], Hashes[3], Hashes[4]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 2); + CHECK(UnusedReferences[0] == Hashes[0]); + CHECK(UnusedReferences[1] == Hashes[1]); + } + + { + std::vector<IoHash> UsedReferences{Hashes[0], Hashes[1], Hashes[2]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 2); + CHECK(UnusedReferences[0] == Hashes[3]); + CHECK(UnusedReferences[1] == Hashes[4]); + } + + { + std::vector<IoHash> UsedReferences{Hashes[0], Hashes[1], Hashes[2], Hashes[4]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 1); + CHECK(UnusedReferences[0] == Hashes[3]); + } + + { + std::vector<IoHash> UsedReferences{Hashes[1], Hashes[3]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 3); + CHECK(UnusedReferences[0] == Hashes[0]); + CHECK(UnusedReferences[1] == Hashes[2]); + CHECK(UnusedReferences[2] == Hashes[4]); + } + + { + std::vector<IoHash> UsedReferences{Hashes[0]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 4); + CHECK(UnusedReferences[0] == Hashes[1]); + CHECK(UnusedReferences[1] == Hashes[2]); + CHECK(UnusedReferences[2] == Hashes[3]); + CHECK(UnusedReferences[3] == Hashes[4]); + } + + { + std::vector<IoHash> UsedReferences{Hashes[1]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 4); + CHECK(UnusedReferences[0] == Hashes[0]); + CHECK(UnusedReferences[1] == Hashes[2]); + CHECK(UnusedReferences[2] == Hashes[3]); + CHECK(UnusedReferences[3] == Hashes[4]); + } + + { + std::vector<IoHash> UsedReferences{Hashes[3]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 4); + CHECK(UnusedReferences[0] == Hashes[0]); + CHECK(UnusedReferences[1] == Hashes[1]); + CHECK(UnusedReferences[2] == Hashes[2]); + CHECK(UnusedReferences[3] == Hashes[4]); + } + + { + std::vector<IoHash> UsedReferences{Hashes[4]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 4); + CHECK(UnusedReferences[0] == Hashes[0]); + CHECK(UnusedReferences[1] == Hashes[1]); + CHECK(UnusedReferences[2] == Hashes[2]); + CHECK(UnusedReferences[3] == Hashes[3]); + } +} + #endif void diff --git a/src/zenstore/include/zenstore/cache/cachedisklayer.h b/src/zenstore/include/zenstore/cache/cachedisklayer.h index a735893a1..8e31d3222 100644 --- a/src/zenstore/include/zenstore/cache/cachedisklayer.h +++ b/src/zenstore/include/zenstore/cache/cachedisklayer.h @@ -343,18 +343,18 @@ public: metrics::RequestStats m_PutOps; metrics::RequestStats m_GetOps; - mutable RwLock m_IndexLock; - IndexMap m_Index; - std::vector<AccessTime> m_AccessTimes; - std::vector<BucketPayload> m_Payloads; - std::vector<BucketMetaData> m_MetaDatas; - std::vector<MetaDataIndex> m_FreeMetaDatas; - std::vector<MemCacheData> m_MemCachedPayloads; - std::vector<MemCachedIndex> m_FreeMemCachedPayloads; - std::unique_ptr<HashSet> m_TrackedCacheKeys; - std::unique_ptr<HashSet> m_TrackedReferences; - std::atomic_uint64_t m_StandaloneSize{}; - std::atomic_uint64_t m_MemCachedSize{}; + mutable RwLock m_IndexLock; + IndexMap m_Index; + std::vector<AccessTime> m_AccessTimes; + std::vector<BucketPayload> m_Payloads; + std::vector<BucketMetaData> m_MetaDatas; + std::vector<MetaDataIndex> m_FreeMetaDatas; + std::vector<MemCacheData> m_MemCachedPayloads; + std::vector<MemCachedIndex> m_FreeMemCachedPayloads; + std::unique_ptr<HashSet> m_TrackedCacheKeys; + std::unique_ptr<std::vector<IoHash>> m_TrackedReferences; + std::atomic_uint64_t m_StandaloneSize{}; + std::atomic_uint64_t m_MemCachedSize{}; virtual std::string GetGcName(GcCtx& Ctx) override; virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) override; diff --git a/src/zenstore/include/zenstore/gc.h b/src/zenstore/include/zenstore/gc.h index a8c5c0219..3f2f5448d 100644 --- a/src/zenstore/include/zenstore/gc.h +++ b/src/zenstore/include/zenstore/gc.h @@ -64,6 +64,8 @@ struct GcSettings 90; // 0 = compact only empty eligible blocks, 100 = compact all non-full eligible blocks, 1-99 = compact eligible blocks with less // usage than CompactBlockUsageThresholdPercent std::filesystem::path DiskReservePath; + IoHash AttachmentRangeMin = IoHash::Zero; + IoHash AttachmentRangeMax = IoHash::Max; }; struct GcCompactStoreStats @@ -166,10 +168,10 @@ public: /** * @brief An interface to check if a set of Cids are referenced * - * Instance will be deleted after RemoveUsedReferencesFromSet has been called 0-n times. + * Instance will be deleted after GetUnusedReferences has been called 0-n times. * * During construction of the GcReferenceChecker the world is not stopped and this is a good - * place to do caching to be able to execute LockState and RemoveUsedReferencesFromSet quickly. + * place to do caching to be able to execute LockState and GetUnusedReferences quickly. */ class GcReferenceChecker { @@ -188,16 +190,19 @@ public: // *IMPORTANT* Do *not* take any locks (shared or exclusive) in this code. // This is because we need to acquire the locks in an ordered manner and not end up in a deadlock due to other code // trying to get exclusive locks halfway through our execution. - // Called once before any calls to RemoveUsedReferencesFromSet. + // Called once before any calls to GetUnusedReferences. // The implementation should be as fast as possible as UpdateLockedState is part of a stop the world (from changes) // until all instances of GcReferenceChecker UpdateLockedState are completed virtual void UpdateLockedState(GcCtx& Ctx) = 0; // Go through IoCids and see which ones are referenced. If it is the reference must be removed from IoCids // This function should use pre-cached information on what is referenced as we are in stop the world mode - virtual void RemoveUsedReferencesFromSet(GcCtx& Ctx, HashSet& IoCids) = 0; + virtual std::span<IoHash> GetUnusedReferences(GcCtx& Ctx, std::span<IoHash> IoCids) = 0; }; +std::span<IoHash> KeepUnusedReferences(std::span<const IoHash> SortedUsedReferences, std::span<IoHash> SortedReferences); +bool FilterReferences(GcCtx& Ctx, std::vector<IoHash>& InOutReferences); + /** * @brief An interface to implement a lock for Stop The World (from writing new data) * @@ -209,7 +214,7 @@ public: virtual ~GcReferenceLocker() = default; // Take all the locks needed to execute UpdateLockedState for the all the GcReferenceChecker in your domain - // Once all the GcReferenceChecker has executed UpdateLockedState and RemoveUsedReferencesFromSet for all + // Once all the GcReferenceChecker has executed UpdateLockedState and GetUnusedReferences for all // domains has completed, the locks will be disposed and writes are allowed once again virtual std::vector<RwLock::SharedLockScope> LockState(GcCtx& Ctx) = 0; }; @@ -245,7 +250,7 @@ public: virtual std::string GetGcName(GcCtx& Ctx) = 0; - typedef std::function<std::vector<IoHash>(std::span<IoHash> References)> GetUnusedReferencesFunc; + typedef std::function<std::span<IoHash>(std::span<IoHash> References)> GetUnusedReferencesFunc; // Check a set of references to see if they are in use. // Use the GetUnusedReferences input function to check if references are used and update any pointers @@ -520,6 +525,8 @@ public: std::optional<uint32_t> CompactBlockUsageThresholdPercent; std::optional<bool> Verbose; std::optional<bool> SingleThreaded; + IoHash AttachmentRangeMin = IoHash::Zero; + IoHash AttachmentRangeMax = IoHash::Max; }; bool TriggerGc(const TriggerGcParams& Params); @@ -547,6 +554,8 @@ private: uint32_t CompactBlockUsageThresholdPercent, bool Verbose, bool SingleThreaded, + const IoHash& AttachmentRangeMin, + const IoHash& AttachmentRangeMax, bool SilenceErrors); void ScrubStorage(bool DoDelete, bool SkipCid, std::chrono::seconds TimeSlice); LoggerRef Log() { return m_Log; } |