diff options
| author | Dan Engelbrecht <[email protected]> | 2023-10-30 18:29:09 +0100 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-10-30 18:29:09 +0100 |
| commit | cbdda104ada38108700f9da5b192867d83074119 (patch) | |
| tree | 98c04b344e041c156fdc1a5c393672bef743be34 /src/zenstore/gc.cpp | |
| parent | fix changelog (diff) | |
| download | zen-cbdda104ada38108700f9da5b192867d83074119.tar.xz zen-cbdda104ada38108700f9da5b192867d83074119.zip | |
individual gc stats (#506)
- Feature: New parameter for endpoint `admin/gc` (GET) `details=true` which gives details stats on GC operation when using GC V2
- Feature: New options for zen command `gc-status`
- `--details` that enables the detailed output from the last GC operation when using GC V2
Diffstat (limited to 'src/zenstore/gc.cpp')
| -rw-r--r-- | src/zenstore/gc.cpp | 631 |
1 files changed, 366 insertions, 265 deletions
diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp index e09f46063..4d146c16c 100644 --- a/src/zenstore/gc.cpp +++ b/src/zenstore/gc.cpp @@ -330,6 +330,47 @@ GcManager::~GcManager() //////// Begin New GC WIP void +GcResult::Sum() +{ + for (std::pair<std::string, GcReferencerStats>& Referencer : ReferencerStats) + { + GcReferencerStats& SubStat = Referencer.second; + ReferencerStat.Count += SubStat.Count; + ReferencerStat.Expired += SubStat.Expired; + ReferencerStat.Deleted += SubStat.Deleted; + ReferencerStat.RemovedDisk += SubStat.RemovedDisk; + ReferencerStat.RemovedMemory += SubStat.RemovedMemory; + SubStat.ElapsedMS = SubStat.RemoveExpiredDataMS + SubStat.CreateReferenceCheckersMS + SubStat.LockStateMS; + + ReferencerStat.RemoveExpiredDataMS += SubStat.RemoveExpiredDataMS; + ReferencerStat.CreateReferenceCheckersMS += SubStat.CreateReferenceCheckersMS; + ReferencerStat.LockStateMS += SubStat.LockStateMS; + ReferencerStat.ElapsedMS += SubStat.ElapsedMS; + + RemovedDisk += SubStat.RemovedDisk; + RemovedMemory += SubStat.RemovedMemory; + } + for (std::pair<std::string, GcReferenceStoreStats>& ReferenceStore : ReferenceStoreStats) + { + GcReferenceStoreStats& SubStat = ReferenceStore.second; + ReferenceStoreStat.Count += SubStat.Count; + ReferenceStoreStat.Pruned += SubStat.Pruned; + ReferenceStoreStat.Compacted += SubStat.Compacted; + ReferenceStoreStat.RemovedDisk += SubStat.RemovedDisk; + ReferenceStoreStat.RemovedMemory += SubStat.RemovedMemory; + SubStat.ElapsedMS = SubStat.CreateReferencePrunerMS + SubStat.RemoveUnreferencedDataMS + SubStat.CompactReferenceStoreMS; + + ReferenceStoreStat.CreateReferencePrunerMS += SubStat.CreateReferencePrunerMS; + ReferenceStoreStat.RemoveUnreferencedDataMS += SubStat.RemoveUnreferencedDataMS; + ReferenceStoreStat.CompactReferenceStoreMS += SubStat.CompactReferenceStoreMS; + ReferenceStoreStat.ElapsedMS += SubStat.ElapsedMS; + + RemovedDisk += SubStat.RemovedDisk; + RemovedMemory += SubStat.RemovedMemory; + } +} + +void GcManager::AddGcReferencer(GcReferencer& Referencer) { RwLock::ExclusiveLockScope _(m_Lock); @@ -358,245 +399,270 @@ GcManager::RemoveGcReferenceStore(GcReferenceStore& ReferenceStore) GcResult GcManager::CollectGarbage(const GcSettings& Settings) { - GcCtx Ctx{.Settings = Settings}; - - Stopwatch TotalTimer; - auto __ = MakeGuard([&]() { - ZEN_INFO( - "GC: Removed {} items out of {}, deleted {} out of {}. Pruned {} Cid entries out of {}, compacted {} Cid entries out of {}, " - "freed " - "{} on disk and {} of memory in {}", - Ctx.ExpiredItems.load(), - Ctx.Items.load(), - Ctx.DeletedItems.load(), - Ctx.ExpiredItems.load(), - Ctx.PrunedReferences.load(), - Ctx.References.load(), - Ctx.CompactedReferences.load(), - Ctx.PrunedReferences.load(), - NiceBytes(Ctx.RemovedDiskSpace.load()), - NiceBytes(Ctx.RemovedMemory.load()), - NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs())); - }); - - RwLock::SharedLockScope GcLock(m_Lock); - - static const bool SingleThread = -#if ZEN_BUILD_DEBUG - true -#else - false -#endif - ; - WorkerThreadPool ThreadPool(SingleThread ? 0 : 8); - - if (!m_GcReferencers.empty()) - { - Latch WorkLeft(1); - // First remove any cache keys that may own references - Stopwatch Timer; - auto _ = MakeGuard([&]() { ZEN_INFO("GC: Removed expired data in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())) }); - for (GcReferencer* Owner : m_GcReferencers) - { - WorkLeft.AddCount(1); - ThreadPool.ScheduleWork([&Ctx, Owner, &WorkLeft]() { - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); - Owner->RemoveExpiredData(Ctx); - }); - } - WorkLeft.CountDown(); - WorkLeft.Wait(); - } + GcCtx Ctx{.Settings = Settings}; + GcResult Result; - if (Ctx.Settings.SkipCidDelete) { - return GcResult{.Items = Ctx.Items.load(), - .ExpiredItems = Ctx.ExpiredItems.load(), - .DeletedItems = Ctx.DeletedItems.load(), - .References = Ctx.References.load(), - .PrunedReferences = Ctx.PrunedReferences.load(), - .CompactedReferences = Ctx.CompactedReferences.load(), - .RemovedDiskSpace = Ctx.RemovedDiskSpace.load(), - .RemovedMemory = Ctx.RemovedMemory.load()}; - } + Stopwatch TotalTimer; + auto __ = MakeGuard([&]() { Result.ElapsedMS = std::chrono::milliseconds(TotalTimer.GetElapsedTimeMs()); }); - std::vector<std::unique_ptr<GcReferencePruner>> ReferencePruners; - if (!m_GcReferenceStores.empty()) - { - ReferencePruners.reserve(m_GcReferenceStores.size()); - Latch WorkLeft(1); - RwLock ReferencePrunersLock; - // Easy to go wide, CreateReferencePruner is usually not very heavy but big data sets change that - Stopwatch Timer; - auto _ = MakeGuard([&]() { ZEN_INFO("GC: Created Cid pruners in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())) }); - for (GcReferenceStore* CidStore : m_GcReferenceStores) - { - WorkLeft.AddCount(1); - ThreadPool.ScheduleWork([&Ctx, CidStore, &WorkLeft, &ReferencePrunersLock, &ReferencePruners]() { - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); - // The CidStore will pick a list of CId entries to check, returning a collector - std::unique_ptr<GcReferencePruner> ReferencePruner(CidStore->CreateReferencePruner(Ctx)); - if (ReferencePruner) - { - RwLock::ExclusiveLockScope __(ReferencePrunersLock); - ReferencePruners.emplace_back(std::move(ReferencePruner)); - } - }); - } - WorkLeft.CountDown(); - WorkLeft.Wait(); - } + RwLock::SharedLockScope GcLock(m_Lock); - std::vector<std::unique_ptr<GcReferenceChecker>> ReferenceCheckers; - if (!m_GcReferencers.empty()) - { - ReferenceCheckers.reserve(m_GcReferencers.size()); - Latch WorkLeft(1); - RwLock ReferenceCheckersLock; - Stopwatch Timer; - auto _ = MakeGuard([&]() { ZEN_INFO("GC: Created Cid checkers in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())) }); - // Easy to go wide, CreateReferenceCheckers is potentially heavy - // Lock all reference owners from changing the reference data and get access to check for referenced data - for (GcReferencer* Referencer : m_GcReferencers) - { - WorkLeft.AddCount(1); - ThreadPool.ScheduleWork([&Ctx, &WorkLeft, Referencer, &ReferenceCheckersLock, &ReferenceCheckers]() { - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); - // The Referencer will create a reference checker that guarrantees that the references do not change as long as it lives - std::vector<GcReferenceChecker*> Checkers = Referencer->CreateReferenceCheckers(Ctx); - try - { - if (!Checkers.empty()) - { - RwLock::ExclusiveLockScope __(ReferenceCheckersLock); - for (auto& Checker : Checkers) - { - ReferenceCheckers.emplace_back(std::unique_ptr<GcReferenceChecker>(Checker)); - Checker = nullptr; - } - } - } - catch (std::exception&) - { - while (!Checkers.empty()) - { - delete Checkers.back(); - Checkers.pop_back(); - } - throw; - } - }); - } - WorkLeft.CountDown(); - WorkLeft.Wait(); - } - - Stopwatch LockStateTimer; - if (!ReferenceCheckers.empty()) - { - // Easy to go wide, locking all references checkers so we hafve a stead state of which references are used - // From this point we have block all writes to all References (DiskBucket/ProjectStore) until we do delete the ReferenceCheckers - Latch WorkLeft(1); - - Stopwatch Timer; - auto _ = MakeGuard([&]() { ZEN_INFO("GC: Locked Cid checkers in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())) }); - for (std::unique_ptr<GcReferenceChecker>& ReferenceChecker : ReferenceCheckers) - { - GcReferenceChecker* Checker = ReferenceChecker.get(); - WorkLeft.AddCount(1); - ThreadPool.ScheduleWork([&Ctx, Checker, &WorkLeft, &ReferenceCheckers]() { - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); - Checker->LockState(Ctx); - }); - } - WorkLeft.CountDown(); - WorkLeft.Wait(); - } - - std::vector<std::unique_ptr<GcReferenceStoreCompactor>> ReferenceStoreCompactors; - ReferenceStoreCompactors.reserve(ReferencePruners.size()); - if (!ReferencePruners.empty()) - { - const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::vector<IoHash> { - HashSet UnusedCids(References.begin(), References.end()); - for (const std::unique_ptr<GcReferenceChecker>& ReferenceChecker : ReferenceCheckers) - { - ReferenceChecker->RemoveUsedReferencesFromSet(Ctx, UnusedCids); - if (UnusedCids.empty()) - { - return {}; - } - } - return std::vector<IoHash>(UnusedCids.begin(), UnusedCids.end()); - }; - - // Easy to go wide, checking all Cids agains references in cache - // Ask stores to remove data that the ReferenceCheckers says are not references - this should be a lightweight operation that - // only updates in-memory index, actual disk changes should be done by the ReferenceStoreCompactors - - Latch WorkLeft(1); - RwLock ReferenceStoreCompactorsLock; - - Stopwatch Timer; - auto _ = MakeGuard([&]() { ZEN_INFO("GC: Pruned unreferenced Cid data in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())) }); - for (std::unique_ptr<GcReferencePruner>& ReferencePruner : ReferencePruners) - { - GcReferencePruner* Pruner = ReferencePruner.get(); - WorkLeft.AddCount(1); - ThreadPool.ScheduleWork( - [&Ctx, Pruner, &WorkLeft, &GetUnusedReferences, &ReferenceStoreCompactorsLock, &ReferenceStoreCompactors]() { - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); - // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are referenced or not. - std::unique_ptr<GcReferenceStoreCompactor> ReferenceCompactor(Pruner->RemoveUnreferencedData(Ctx, GetUnusedReferences)); - if (ReferenceCompactor) - { - RwLock::ExclusiveLockScope __(ReferenceStoreCompactorsLock); - ReferenceStoreCompactors.emplace_back(std::move(ReferenceCompactor)); - } - }); - } - WorkLeft.CountDown(); - WorkLeft.Wait(); - } - // Let the GcReferencers add new data, we will only change on-disk data at this point, adding new data is allowed - ReferenceCheckers.clear(); - ZEN_INFO("GC: Writes blocked for {}", NiceTimeSpanMs(LockStateTimer.GetElapsedTimeMs())) - - // Let go of the pruners - ReferencePruners.clear(); - - if (!ReferenceStoreCompactors.empty()) - { - Latch WorkLeft(1); - - // Easy to go wide - // Remove the stuff we deemed unreferenced from disk - may be heavy operation - Stopwatch Timer; - auto _ = MakeGuard([&]() { ZEN_INFO("GC: Compacted Cid stores in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())) }); - for (std::unique_ptr<GcReferenceStoreCompactor>& StoreCompactor : ReferenceStoreCompactors) - { - GcReferenceStoreCompactor* Compactor = StoreCompactor.get(); - WorkLeft.AddCount(1); - ThreadPool.ScheduleWork([&Ctx, Compactor, &WorkLeft]() { - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); - // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are referenced or not. - Compactor->CompactReferenceStore(Ctx); - }); - } - WorkLeft.CountDown(); - WorkLeft.Wait(); - } - - ReferenceStoreCompactors.clear(); - - return GcResult{.Items = Ctx.Items.load(), - .ExpiredItems = Ctx.ExpiredItems.load(), - .DeletedItems = Ctx.DeletedItems.load(), - .References = Ctx.References.load(), - .PrunedReferences = Ctx.PrunedReferences.load(), - .CompactedReferences = Ctx.CompactedReferences.load(), - .RemovedDiskSpace = Ctx.RemovedDiskSpace.load(), - .RemovedMemory = Ctx.RemovedMemory.load()}; + static const bool SingleThread = +#if ZEN_BUILD_DEBUG + true +#else + false +#endif + ; + +#define SCOPED_TIMER(closure) \ + Stopwatch $Timer##__LINE__; \ + auto $Guard##__LINE = MakeGuard([&, &Timer = $Timer##__LINE__]() { closure }) + + Result.ReferencerStats.resize(m_GcReferencers.size()); + Result.ReferenceStoreStats.resize(m_GcReferenceStores.size()); + + WorkerThreadPool ThreadPool(SingleThread ? 0 : 8); + + ZEN_INFO("GCV2: Removing expired data from {} referencers", m_GcReferencers.size()); + if (!m_GcReferencers.empty()) + { + Latch WorkLeft(1); + // First remove any cache keys that may own references + SCOPED_TIMER(Result.RemoveExpiredDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + for (size_t Index = 0; Index < m_GcReferencers.size(); Index++) + { + GcReferencer* Owner = m_GcReferencers[Index]; + std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index]; + WorkLeft.AddCount(1); + ThreadPool.ScheduleWork([&Ctx, Owner, &Stats, &WorkLeft]() { + auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); + Stats.first = Owner->GetGcName(Ctx); + SCOPED_TIMER(Stats.second.RemoveExpiredDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + Owner->RemoveExpiredData(Ctx, Stats.second); + }); + } + WorkLeft.CountDown(); + WorkLeft.Wait(); + } + + if (Ctx.Settings.SkipCidDelete) + { + Result.Sum(); + return Result; + } + + ZEN_INFO("GCV2: Creating reference pruners from {} reference stores", m_GcReferenceStores.size()); + std::unordered_map<size_t, std::unique_ptr<GcReferencePruner>> ReferencePruners; + if (!m_GcReferenceStores.empty()) + { + ReferencePruners.reserve(m_GcReferenceStores.size()); + Latch WorkLeft(1); + RwLock ReferencePrunersLock; + // CreateReferencePruner is usually not very heavy but big data sets change that + SCOPED_TIMER(Result.CreateReferencePrunerMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + for (size_t Index = 0; Index < m_GcReferenceStores.size(); Index++) + { + GcReferenceStore* ReferenceStore = m_GcReferenceStores[Index]; + std::pair<std::string, GcReferenceStoreStats>& Stats = Result.ReferenceStoreStats[Index]; + WorkLeft.AddCount(1); + ThreadPool.ScheduleWork([&Ctx, ReferenceStore, &Stats, Index, &WorkLeft, &ReferencePrunersLock, &ReferencePruners]() { + auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); + Stats.first = ReferenceStore->GetGcName(Ctx); + std::unique_ptr<GcReferencePruner> ReferencePruner; + { + SCOPED_TIMER(Stats.second.CreateReferencePrunerMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + // The ReferenceStore will pick a list of CId entries to check, returning a collector + ReferencePruner = std::unique_ptr<GcReferencePruner>(ReferenceStore->CreateReferencePruner(Ctx, Stats.second)); + } + if (ReferencePruner) + { + RwLock::ExclusiveLockScope __(ReferencePrunersLock); + ReferencePruners.insert_or_assign(Index, std::move(ReferencePruner)); + } + }); + } + WorkLeft.CountDown(); + WorkLeft.Wait(); + } + + ZEN_INFO("GCV2: Creating reference checkers from {} referencers", m_GcReferencers.size()); + std::unordered_map<std::unique_ptr<GcReferenceChecker>, size_t> ReferenceCheckers; + if (!m_GcReferencers.empty()) + { + ReferenceCheckers.reserve(m_GcReferencers.size()); + Latch WorkLeft(1); + RwLock ReferenceCheckersLock; + SCOPED_TIMER(Result.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + // Lock all reference owners from changing the reference data and get access to check for referenced data + for (size_t Index = 0; Index < m_GcReferencers.size(); Index++) + { + GcReferencer* Referencer = m_GcReferencers[Index]; + std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index]; + WorkLeft.AddCount(1); + ThreadPool.ScheduleWork([&Ctx, &WorkLeft, Referencer, Index, &Stats, &ReferenceCheckersLock, &ReferenceCheckers]() { + auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); + // The Referencer will create a reference checker that guarrantees that the references do not change as long as it lives + std::vector<GcReferenceChecker*> Checkers; + { + SCOPED_TIMER(Stats.second.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + Checkers = Referencer->CreateReferenceCheckers(Ctx); + } + try + { + if (!Checkers.empty()) + { + RwLock::ExclusiveLockScope __(ReferenceCheckersLock); + for (auto& Checker : Checkers) + { + ReferenceCheckers.insert_or_assign(std::unique_ptr<GcReferenceChecker>(Checker), Index); + Checker = nullptr; + } + } + } + catch (std::exception&) + { + while (!Checkers.empty()) + { + delete Checkers.back(); + Checkers.pop_back(); + } + throw; + } + }); + } + WorkLeft.CountDown(); + WorkLeft.Wait(); + } + + std::unordered_map<std::unique_ptr<GcReferenceStoreCompactor>, size_t> ReferenceStoreCompactors; + ReferenceStoreCompactors.reserve(ReferencePruners.size()); + + ZEN_INFO("GCV2: Locking state for {} reference checkers", ReferenceCheckers.size()); + { + SCOPED_TIMER(uint64_t ElapsedMS = Timer.GetElapsedTimeMs(); Result.WriteBlockMS = std::chrono::milliseconds(ElapsedMS); + ZEN_INFO("GCV2: Writes blocked for {}", NiceTimeSpanMs(ElapsedMS))); + if (!ReferenceCheckers.empty()) + { + // Locking all references checkers so we have a steady state of which references are used + // From this point we have blocked all writes to all References (DiskBucket/ProjectStore) until + // we delete the ReferenceCheckers + Latch WorkLeft(1); + + SCOPED_TIMER(Result.LockStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + for (auto& It : ReferenceCheckers) + { + GcReferenceChecker* Checker = It.first.get(); + size_t Index = It.second; + std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index]; + WorkLeft.AddCount(1); + ThreadPool.ScheduleWork([&Ctx, Checker, Index, &Stats, &WorkLeft]() { + auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); + SCOPED_TIMER(Stats.second.LockStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + Checker->LockState(Ctx); + }); + } + WorkLeft.CountDown(); + WorkLeft.Wait(); + } + + ZEN_INFO("GCV2: Removing unreferenced data for {} reference pruners", ReferencePruners.size()); + if (!ReferencePruners.empty()) + { + const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::vector<IoHash> { + HashSet UnusedCids(References.begin(), References.end()); + for (const auto& It : ReferenceCheckers) + { + GcReferenceChecker* ReferenceChecker = It.first.get(); + ReferenceChecker->RemoveUsedReferencesFromSet(Ctx, UnusedCids); + if (UnusedCids.empty()) + { + return {}; + } + } + return std::vector<IoHash>(UnusedCids.begin(), UnusedCids.end()); + }; + + // checking all Cids agains references in cache + // Ask stores to remove data that the ReferenceCheckers says are not referenced - this should be a lightweight operation + // that only updates in-memory index, actual disk changes should be done by the ReferenceStoreCompactors + + Latch WorkLeft(1); + RwLock ReferenceStoreCompactorsLock; + + SCOPED_TIMER(Result.RemoveUnreferencedDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + for (auto& It : ReferencePruners) + { + GcReferencePruner* Pruner = It.second.get(); + size_t Index = It.first; + GcReferenceStoreStats& Stats = Result.ReferenceStoreStats[Index].second; + WorkLeft.AddCount(1); + ThreadPool.ScheduleWork([&Ctx, + Pruner, + &Stats, + &WorkLeft, + Index, + &GetUnusedReferences, + &ReferenceStoreCompactorsLock, + &ReferenceStoreCompactors]() { + auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); + // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are referenced or not. + std::unique_ptr<GcReferenceStoreCompactor> ReferenceCompactor; + { + SCOPED_TIMER(Stats.RemoveUnreferencedDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + ReferenceCompactor = + std::unique_ptr<GcReferenceStoreCompactor>(Pruner->RemoveUnreferencedData(Ctx, Stats, GetUnusedReferences)); + } + if (ReferenceCompactor) + { + RwLock::ExclusiveLockScope __(ReferenceStoreCompactorsLock); + ReferenceStoreCompactors.insert_or_assign(std::move(ReferenceCompactor), Index); + } + }); + } + WorkLeft.CountDown(); + WorkLeft.Wait(); + } + // Let the GcReferencers add new data, we will only change on-disk data at this point, adding new data is allowed + ReferenceCheckers.clear(); + } + + // Let go of the pruners + ReferencePruners.clear(); + + ZEN_INFO("GCV2: Compacting reference stores for {} reference store compactors", ReferenceStoreCompactors.size()); + if (!ReferenceStoreCompactors.empty()) + { + Latch WorkLeft(1); + + // Remove the stuff we deemed unreferenced from disk - may be heavy operation + SCOPED_TIMER(Result.CompactReferenceStoreMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + for (auto& It : ReferenceStoreCompactors) + { + GcReferenceStoreCompactor* Compactor = It.first.get(); + size_t Index = It.second; + GcReferenceStoreStats& Stats = Result.ReferenceStoreStats[Index].second; + WorkLeft.AddCount(1); + ThreadPool.ScheduleWork([&Ctx, Compactor, &Stats, &WorkLeft]() { + auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); + // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are referenced or not. + SCOPED_TIMER(Stats.CompactReferenceStoreMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + Compactor->CompactReferenceStore(Ctx, Stats); + }); + } + WorkLeft.CountDown(); + WorkLeft.Wait(); + } + + ReferenceStoreCompactors.clear(); + + ZEN_INFO("GCV2: Completed in {}", NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs())); + } + + Result.Sum(); + return Result; +#undef SCOPED_TIMER } //////// End New GC WIP @@ -998,15 +1064,6 @@ GcScheduler::GetState() const GcSchedulerState Result{.Status = Status(), .Config = m_Config, .AreDiskWritesBlocked = m_AreDiskWritesBlocked.load()}; - { - std::unique_lock Lock(m_GcMutex); - Result.LastFullGcTime = m_LastGcTime; - Result.LastFullGCDiff = m_LastFullGCDiff; - Result.LastFullGcDuration = m_LastFullGcDuration; - Result.LastLightweightGcTime = m_LastLightweightGcTime; - Result.LastLightweightGCDiff = m_LastLightweightGCDiff; - Result.LastLightweightGcDuration = m_LastLightweightGcDuration; - } std::error_code Ec; DiskSpace Space = DiskSpaceInfo(Result.Config.RootDirectory, Ec); if (!Ec) @@ -1026,30 +1083,40 @@ GcScheduler::GetState() const Result.HasDiskReserve = std::filesystem::is_regular_file(Result.Config.RootDirectory / "reserve.gc", Ec) && !Ec; } - GcClock::TimePoint CacheExpireTime = - Result.Config.MaxCacheDuration == GcClock::Duration::max() ? GcClock::TimePoint::min() : Now - Result.Config.MaxCacheDuration; - GcClock::TimePoint ProjectStoreExpireTime = Result.Config.MaxProjectStoreDuration == GcClock::Duration::max() - ? GcClock::TimePoint::min() - : Now - Result.Config.MaxProjectStoreDuration; + if (Result.Status != GcSchedulerStatus::kRunning) + { + { + std::unique_lock Lock(m_GcMutex); + Result.LastFullGcTime = m_LastGcTime; + Result.LastFullGCDiff = m_LastFullGCDiff; + Result.LastFullGcDuration = m_LastFullGcDuration; + Result.LastLightweightGcTime = m_LastLightweightGcTime; + Result.LastLightweightGCDiff = m_LastLightweightGCDiff; + Result.LastLightweightGcDuration = m_LastLightweightGcDuration; + + Result.LastLightweightGCV2Result = m_LastLightweightGCV2Result; + Result.LastFullGCV2Result = m_LastFullGCV2Result; + } - Result.RemainingTimeUntilFullGc = - Result.Config.Interval.count() == 0 - ? std::chrono::seconds::max() - : std::chrono::duration_cast<std::chrono::seconds>(Result.LastFullGcTime + Result.Config.Interval - Now); + Result.RemainingTimeUntilFullGc = + Result.Config.Interval.count() == 0 + ? std::chrono::seconds::max() + : std::chrono::duration_cast<std::chrono::seconds>(Result.LastFullGcTime + Result.Config.Interval - Now); - if (Result.RemainingTimeUntilFullGc < std::chrono::seconds::zero()) - { - Result.RemainingTimeUntilFullGc = std::chrono::seconds::zero(); - } + if (Result.RemainingTimeUntilFullGc < std::chrono::seconds::zero()) + { + Result.RemainingTimeUntilFullGc = std::chrono::seconds::zero(); + } - Result.RemainingTimeUntilLightweightGc = - Result.Config.LightweightInterval.count() == 0 - ? std::chrono::seconds::max() - : std::chrono::duration_cast<std::chrono::seconds>(Result.LastLightweightGcTime + Result.Config.LightweightInterval - Now); + Result.RemainingTimeUntilLightweightGc = + Result.Config.LightweightInterval.count() == 0 + ? std::chrono::seconds::max() + : std::chrono::duration_cast<std::chrono::seconds>(Result.LastLightweightGcTime + Result.Config.LightweightInterval - Now); - if (Result.RemainingTimeUntilLightweightGc < std::chrono::seconds::zero()) - { - Result.RemainingTimeUntilLightweightGc = std::chrono::seconds::zero(); + if (Result.RemainingTimeUntilLightweightGc < std::chrono::seconds::zero()) + { + Result.RemainingTimeUntilLightweightGc = std::chrono::seconds::zero(); + } } return Result; @@ -1474,6 +1541,14 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, { case GcVersion::kV1: Diff = m_GcManager.CollectGarbage(GcCtx); + if (SkipCid) + { + m_LastLightweightGCV2Result.reset(); + } + else + { + m_LastFullGCV2Result.reset(); + } break; case GcVersion::kV2: { @@ -1482,7 +1557,33 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, .CollectSmallObjects = CollectSmallObjects, .IsDeleteMode = Delete, .SkipCidDelete = SkipCid}); - Diff.DiskSize = Result.RemovedDiskSpace; + + ZEN_INFO( + "GCV2: Removed {} items out of {}, deleted {} out of {}. Pruned {} Cid entries out of {}, compacted {} Cid entries " + "out of {}, " + "freed " + "{} on disk and {} of memory in {}", + Result.ReferencerStat.Expired, + Result.ReferencerStat.Count, + Result.ReferencerStat.Deleted, + Result.ReferencerStat.Expired, + Result.ReferenceStoreStat.Pruned, + Result.ReferenceStoreStat.Count, + Result.ReferenceStoreStat.Compacted, + Result.ReferenceStoreStat.Pruned, + NiceBytes(Result.RemovedDisk), + NiceBytes(Result.RemovedMemory), + NiceTimeSpanMs(Result.ElapsedMS.count())); + + if (SkipCid) + { + m_LastLightweightGCV2Result = Result; + } + else + { + m_LastFullGCV2Result = Result; + } + Diff.DiskSize = Result.RemovedDisk; Diff.MemorySize = Result.RemovedMemory; } break; |