diff options
| author | Dan Engelbrecht <[email protected]> | 2023-11-21 15:06:25 +0100 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-11-21 15:06:25 +0100 |
| commit | 05178f7c18a48b21b9e260de282a86b91df26955 (patch) | |
| tree | 25f77af287730c6dbe8d655e0cb503f2652cbd36 /src/zenstore/gc.cpp | |
| parent | zen run command (#552) (diff) | |
| download | zen-05178f7c18a48b21b9e260de282a86b91df26955.tar.xz zen-05178f7c18a48b21b9e260de282a86b91df26955.zip | |
compact separate for gc referencer (#533)
- Refactor GCV2 so GcReferencer::RemoveExpiredData returns a store compactor, moving out the actual disk work from deleting items in the index.
- Refactor GCV2 GcResult to reuse GcCompactStoreStats and GcStats
- Make Compacting of stores non-parallell to not eat all the disk I/O when running GC
Diffstat (limited to 'src/zenstore/gc.cpp')
| -rw-r--r-- | src/zenstore/gc.cpp | 821 |
1 files changed, 425 insertions, 396 deletions
diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp index 7f9ca5236..b78b23350 100644 --- a/src/zenstore/gc.cpp +++ b/src/zenstore/gc.cpp @@ -173,166 +173,6 @@ SaveCompactBinaryObject(const fs::path& Path, const CbObject& Object) ////////////////////////////////////////////////////////////////////////// -void -WriteReferencerStats(CbObjectWriter& Writer, const GcReferencerStats& Stats, bool HumanReadable) -{ - if (Stats.Count == 0) - { - return; - } - Writer << "Count" << Stats.Count; - Writer << "Expired" << Stats.Expired; - Writer << "Deleted" << Stats.Deleted; - - if (HumanReadable) - { - Writer << "RemovedDisk" << NiceBytes(Stats.RemovedDisk); - Writer << "RemovedMemory" << NiceBytes(Stats.RemovedMemory); - } - else - { - Writer << "RemovedDiskBytes" << Stats.RemovedDisk; - Writer << "RemovedMemoryBytes" << Stats.RemovedMemory; - } - - if (HumanReadable) - { - Writer << "RemoveExpiredData" << NiceTimeSpanMs(Stats.RemoveExpiredDataMS.count()); - Writer << "CreateReferenceCheckers" << NiceTimeSpanMs(Stats.CreateReferenceCheckersMS.count()); - Writer << "LockState" << NiceTimeSpanMs(Stats.LockStateMS.count()); - Writer << "Elapsed" << NiceTimeSpanMs(Stats.ElapsedMS.count()); - } - else - { - Writer << "RemoveExpiredDataMS" << gsl::narrow<int64_t>(Stats.RemoveExpiredDataMS.count()); - Writer << "CreateReferenceCheckersMS" << gsl::narrow<int64_t>(Stats.CreateReferenceCheckersMS.count()); - Writer << "LockStateMS" << gsl::narrow<int64_t>(Stats.LockStateMS.count()); - Writer << "ElapsedMS" << gsl::narrow<int64_t>(Stats.ElapsedMS.count()); - } -}; - -void -WriteReferenceStoreStats(CbObjectWriter& Writer, const GcReferenceStoreStats& Stats, bool HumanReadable) -{ - if (Stats.Count == 0) - { - return; - } - Writer << "Count" << Stats.Count; - Writer << "Pruned" << Stats.Pruned; - Writer << "Compacted" << Stats.Compacted; - - if (HumanReadable) - { - Writer << "RemovedDisk" << NiceBytes(Stats.RemovedDisk); - Writer << "RemovedMemory" << NiceBytes(Stats.RemovedMemory); - } - else - { - Writer << "RemovedDiskBytes" << Stats.RemovedDisk; - Writer << "RemovedMemoryBytes" << Stats.RemovedMemory; - } - - if (HumanReadable) - { - Writer << "CreateReferencePruner" << NiceTimeSpanMs(Stats.CreateReferencePrunerMS.count()); - Writer << "RemoveUnreferencedData" << NiceTimeSpanMs(Stats.RemoveUnreferencedDataMS.count()); - Writer << "CompactReferenceStore" << NiceTimeSpanMs(Stats.CompactReferenceStoreMS.count()); - Writer << "Elapsed" << NiceTimeSpanMs(Stats.ElapsedMS.count()); - } - else - { - Writer << "CreateReferencePrunerMS" << gsl::narrow<int64_t>(Stats.CreateReferencePrunerMS.count()); - Writer << "RemoveUnreferencedDataMS" << gsl::narrow<int64_t>(Stats.RemoveUnreferencedDataMS.count()); - Writer << "CompactReferenceStoreMS" << gsl::narrow<int64_t>(Stats.CompactReferenceStoreMS.count()); - Writer << "ElapsedMS" << gsl::narrow<int64_t>(Stats.ElapsedMS.count()); - } -}; - -void -WriteGCResult(CbObjectWriter& Writer, const GcResult& Result, bool HumanReadable, bool IncludeDetails) -{ - if (HumanReadable) - { - Writer << "RemovedDisk" << NiceBytes(Result.RemovedDisk); - Writer << "RemovedMemory" << NiceBytes(Result.RemovedMemory); - Writer << "WriteBlock" << NiceTimeSpanMs(Result.WriteBlockMS.count()); - Writer << "Elapsed" << NiceTimeSpanMs(Result.ElapsedMS.count()); - } - else - { - Writer << "RemovedDiskBytes" << gsl::narrow<int64_t>(Result.RemovedDisk); - Writer << "RemovedMemoryBytes" << gsl::narrow<int64_t>(Result.RemovedMemory); - Writer << "WriteBlockMS" << gsl::narrow<int64_t>(Result.WriteBlockMS.count()); - Writer << "ElapsedMS" << gsl::narrow<int64_t>(Result.ElapsedMS.count()); - } - - if (!IncludeDetails) - { - return; - } - - if (HumanReadable) - { - Writer << "RemoveExpiredData" << NiceTimeSpanMs(Result.RemoveExpiredDataMS.count()); - Writer << "CreateReferenceCheckers" << NiceTimeSpanMs(Result.CreateReferenceCheckersMS.count()); - Writer << "LockState" << NiceTimeSpanMs(Result.LockStateMS.count()); - - Writer << "CreateReferencePruner" << NiceTimeSpanMs(Result.CreateReferencePrunerMS.count()); - Writer << "RemoveUnreferencedData" << NiceTimeSpanMs(Result.RemoveUnreferencedDataMS.count()); - Writer << "CompactReferenceStore" << NiceTimeSpanMs(Result.CompactReferenceStoreMS.count()); - } - else - { - Writer << "RemoveExpiredDataMS" << gsl::narrow<int64_t>(Result.RemoveExpiredDataMS.count()); - Writer << "CreateReferenceCheckersMS" << gsl::narrow<int64_t>(Result.CreateReferenceCheckersMS.count()); - Writer << "LockStateMS" << gsl::narrow<int64_t>(Result.LockStateMS.count()); - - Writer << "CreateReferencePrunerMS" << gsl::narrow<int64_t>(Result.CreateReferencePrunerMS.count()); - Writer << "RemoveUnreferencedDataMS" << gsl::narrow<int64_t>(Result.RemoveUnreferencedDataMS.count()); - Writer << "CompactReferenceStoreMS" << gsl::narrow<int64_t>(Result.CompactReferenceStoreMS.count()); - } - - Writer.BeginObject("ReferencerStats"); - { - WriteReferencerStats(Writer, Result.ReferencerStat, HumanReadable); - } - Writer.EndObject(); - - Writer.BeginObject("ReferenceStoreStats"); - { - WriteReferenceStoreStats(Writer, Result.ReferenceStoreStat, HumanReadable); - } - Writer.EndObject(); - - if (!Result.ReferencerStats.empty()) - { - Writer.BeginArray("Referencers"); - { - for (const std::pair<std::string, GcReferencerStats>& It : Result.ReferencerStats) - { - Writer.BeginObject(); - Writer << "Name" << It.first; - WriteReferencerStats(Writer, It.second, HumanReadable); - Writer.EndObject(); - } - } - Writer.EndArray(); - } - if (!Result.ReferenceStoreStats.empty()) - { - Writer.BeginArray("ReferenceStores"); - for (const std::pair<std::string, GcReferenceStoreStats>& It : Result.ReferenceStoreStats) - { - Writer.BeginObject(); - Writer << "Name" << It.first; - WriteReferenceStoreStats(Writer, It.second, HumanReadable); - Writer.EndObject(); - } - Writer.EndArray(); - } -}; - struct GcContext::GcState { using CacheKeyContexts = std::unordered_map<std::string, std::vector<IoHash>>; @@ -490,44 +330,235 @@ GcManager::~GcManager() //////// Begin GC V2 void -GcResult::Sum() +WriteGcStats(CbObjectWriter& Writer, const GcStats& Stats, bool HumanReadable) { - for (std::pair<std::string, GcReferencerStats>& Referencer : ReferencerStats) + Writer << "Checked" << Stats.CheckedCount; + Writer << "Found" << Stats.FoundCount; + Writer << "Deleted" << Stats.DeletedCount; + if (HumanReadable) { - GcReferencerStats& SubStat = Referencer.second; - ReferencerStat.Count += SubStat.Count; - ReferencerStat.Expired += SubStat.Expired; - ReferencerStat.Deleted += SubStat.Deleted; - ReferencerStat.RemovedDisk += SubStat.RemovedDisk; - ReferencerStat.RemovedMemory += SubStat.RemovedMemory; - SubStat.ElapsedMS = SubStat.RemoveExpiredDataMS + SubStat.CreateReferenceCheckersMS + SubStat.LockStateMS; + Writer << "FreedMemory" << NiceBytes(Stats.FreedMemory); + } + else + { + Writer << "FreedMemoryBytes" << Stats.FreedMemory; + } + Writer << "Elapsed" << ToTimeSpan(Stats.ElapsedMS); +} - ReferencerStat.RemoveExpiredDataMS += SubStat.RemoveExpiredDataMS; - ReferencerStat.CreateReferenceCheckersMS += SubStat.CreateReferenceCheckersMS; - ReferencerStat.LockStateMS += SubStat.LockStateMS; - ReferencerStat.ElapsedMS += SubStat.ElapsedMS; +void +WriteCompactStoreStats(CbObjectWriter& Writer, const GcCompactStoreStats& Stats, bool HumanReadable) +{ + if (HumanReadable) + { + Writer << "RemovedDisk" << NiceBytes(Stats.RemovedDisk); + } + else + { + Writer << "RemovedDiskBytes" << Stats.RemovedDisk; + } + Writer << "Elapsed" << ToTimeSpan(Stats.ElapsedMS); +} - RemovedDisk += SubStat.RemovedDisk; - RemovedMemory += SubStat.RemovedMemory; +void +WriteReferencerStats(CbObjectWriter& Writer, const GcReferencerStats& Stats, bool HumanReadable) +{ + if (Stats.RemoveExpiredDataStats.CheckedCount == 0) + { + return; } - for (std::pair<std::string, GcReferenceStoreStats>& ReferenceStore : ReferenceStoreStats) + Writer.BeginObject("RemoveExpired"); { - GcReferenceStoreStats& SubStat = ReferenceStore.second; - ReferenceStoreStat.Count += SubStat.Count; - ReferenceStoreStat.Pruned += SubStat.Pruned; - ReferenceStoreStat.Compacted += SubStat.Compacted; - ReferenceStoreStat.RemovedDisk += SubStat.RemovedDisk; - ReferenceStoreStat.RemovedMemory += SubStat.RemovedMemory; - SubStat.ElapsedMS = SubStat.CreateReferencePrunerMS + SubStat.RemoveUnreferencedDataMS + SubStat.CompactReferenceStoreMS; + WriteGcStats(Writer, Stats.RemoveExpiredDataStats, HumanReadable); + } + Writer.EndObject(); + + Writer.BeginObject("Compact"); + { + WriteCompactStoreStats(Writer, Stats.CompactStoreStats, HumanReadable); + } + Writer.EndObject(); + + Writer << "CreateReferenceCheckers" << ToTimeSpan(Stats.CreateReferenceCheckersMS); + Writer << "LockState" << ToTimeSpan(Stats.LockStateMS); + Writer << "Elapsed" << ToTimeSpan(Stats.ElapsedMS); +}; + +void +WriteReferenceStoreStats(CbObjectWriter& Writer, const GcReferenceStoreStats& Stats, bool HumanReadable) +{ + if (Stats.RemoveUnreferencedDataStats.CheckedCount == 0) + { + return; + } + Writer.BeginObject("RemoveUnreferenced"); + { + WriteGcStats(Writer, Stats.RemoveUnreferencedDataStats, HumanReadable); + } + Writer.EndObject(); + + Writer.BeginObject("Compact"); + { + WriteCompactStoreStats(Writer, Stats.CompactStoreStats, HumanReadable); + } + Writer.EndObject(); + + Writer << "CreateReferencePruners" << ToTimeSpan(Stats.CreateReferencePrunersMS); + Writer << "Elapsed" << ToTimeSpan(Stats.ElapsedMS); +}; + +void +WriteGCResult(CbObjectWriter& Writer, const GcResult& Result, bool HumanReadable, bool IncludeDetails) +{ + if (!IncludeDetails) + { + if (HumanReadable) + { + Writer << "RemovedDisk" << NiceBytes(Result.CompactStoresStatSum.RemovedDisk); + Writer << "FreedMemory" << NiceBytes(Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory); + } + else + { + Writer << "RemovedDiskBytes" << gsl::narrow<int64_t>(Result.CompactStoresStatSum.RemovedDisk); + Writer << "RemovedMemoryBytes" << gsl::narrow<int64_t>(Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory); + } + Writer << "WriteBlock" << ToTimeSpan(Result.WriteBlockMS); + Writer << "Elapsed" << ToTimeSpan(Result.ElapsedMS); + return; + } + + Writer.BeginObject("Referencer"); + { + WriteReferencerStats(Writer, Result.ReferencerStatSum, HumanReadable); + } + Writer.EndObject(); + + Writer.BeginObject("ReferenceStore"); + { + WriteReferenceStoreStats(Writer, Result.ReferenceStoreStatSum, HumanReadable); + } + Writer.EndObject(); + + Writer.BeginObject("Compact"); + { + WriteCompactStoreStats(Writer, Result.CompactStoresStatSum, HumanReadable); + } + Writer.EndObject(); + + Writer << "RemoveExpiredData" << ToTimeSpan(Result.RemoveExpiredDataMS); + Writer << "CreateReferenceCheckers" << ToTimeSpan(Result.CreateReferenceCheckersMS); + Writer << "LockState" << ToTimeSpan(Result.LockStateMS); + + Writer << "CreateReferencePruners" << ToTimeSpan(Result.CreateReferencePrunersMS); + Writer << "RemoveUnreferencedData" << ToTimeSpan(Result.RemoveUnreferencedDataMS); + Writer << "CompactStores" << ToTimeSpan(Result.CompactStoresMS); + Writer << "WriteBlock" << ToTimeSpan(Result.WriteBlockMS); + Writer << "Elapsed" << ToTimeSpan(Result.ElapsedMS); + + if (!Result.ReferencerStats.empty()) + { + Writer.BeginArray("Referencers"); + { + for (const std::pair<std::string, GcReferencerStats>& It : Result.ReferencerStats) + { + Writer.BeginObject(); + Writer << "Name" << It.first; + WriteReferencerStats(Writer, It.second, HumanReadable); + Writer.EndObject(); + } + } + Writer.EndArray(); + } + if (!Result.ReferenceStoreStats.empty()) + { + Writer.BeginArray("ReferenceStores"); + for (const std::pair<std::string, GcReferenceStoreStats>& It : Result.ReferenceStoreStats) + { + Writer.BeginObject(); + Writer << "Name" << It.first; + WriteReferenceStoreStats(Writer, It.second, HumanReadable); + Writer.EndObject(); + } + Writer.EndArray(); + } +}; + +void +Add(GcCompactStoreStats& Sum, const GcCompactStoreStats& Sub) +{ + Sum.RemovedDisk += Sub.RemovedDisk; + + Sum.ElapsedMS += Sub.ElapsedMS; +} + +void +Add(GcStats& Sum, const GcStats& Sub) +{ + Sum.CheckedCount += Sub.CheckedCount; + Sum.FoundCount += Sub.FoundCount; + Sum.DeletedCount += Sub.DeletedCount; + Sum.FreedMemory += Sub.FreedMemory; + + Sum.ElapsedMS += Sub.ElapsedMS; +} + +void +Sum(GcReferencerStats& Stat) +{ + Stat.ElapsedMS = + Stat.RemoveExpiredDataStats.ElapsedMS + Stat.CompactStoreStats.ElapsedMS + Stat.CreateReferenceCheckersMS + Stat.LockStateMS; +} + +void +Add(GcReferencerStats& Sum, const GcReferencerStats& Sub) +{ + Add(Sum.RemoveExpiredDataStats, Sub.RemoveExpiredDataStats); + Add(Sum.CompactStoreStats, Sub.CompactStoreStats); + + Sum.CreateReferenceCheckersMS += Sub.CreateReferenceCheckersMS; + Sum.LockStateMS += Sub.LockStateMS; + + Sum.ElapsedMS += Sub.ElapsedMS; +} - ReferenceStoreStat.CreateReferencePrunerMS += SubStat.CreateReferencePrunerMS; - ReferenceStoreStat.RemoveUnreferencedDataMS += SubStat.RemoveUnreferencedDataMS; - ReferenceStoreStat.CompactReferenceStoreMS += SubStat.CompactReferenceStoreMS; - ReferenceStoreStat.ElapsedMS += SubStat.ElapsedMS; +void +Sum(GcReferenceStoreStats& Stat) +{ + Stat.ElapsedMS = Stat.RemoveUnreferencedDataStats.ElapsedMS + Stat.CompactStoreStats.ElapsedMS + Stat.CreateReferencePrunersMS; +} - RemovedDisk += SubStat.RemovedDisk; - RemovedMemory += SubStat.RemovedMemory; +void +Add(GcReferenceStoreStats& Sum, const GcReferenceStoreStats& Sub) +{ + Add(Sum.RemoveUnreferencedDataStats, Sub.RemoveUnreferencedDataStats); + Add(Sum.CompactStoreStats, Sub.CompactStoreStats); + + Sum.CreateReferencePrunersMS += Sub.CreateReferencePrunersMS; + + Sum.ElapsedMS += Sub.ElapsedMS; +} + +void +Sum(GcResult& Stat) +{ + for (std::pair<std::string, GcReferencerStats>& Referencer : Stat.ReferencerStats) + { + GcReferencerStats& SubStat = Referencer.second; + Sum(SubStat); + Add(Stat.ReferencerStatSum, SubStat); + } + for (std::pair<std::string, GcReferenceStoreStats>& ReferenceStore : Stat.ReferenceStoreStats) + { + GcReferenceStoreStats& SubStat = ReferenceStore.second; + Sum(SubStat); + Add(Stat.ReferenceStoreStatSum, SubStat); } + + Sum(Stat.ReferencerStatSum); + Sum(Stat.ReferenceStoreStatSum); + + Add(Stat.CompactStoresStatSum, Stat.ReferencerStatSum.CompactStoreStats); + Add(Stat.CompactStoresStatSum, Stat.ReferenceStoreStatSum.CompactStoreStats); } void @@ -581,7 +612,9 @@ GcManager::CollectGarbage(const GcSettings& Settings) Result.ReferencerStats.resize(m_GcReferencers.size()); - WorkerThreadPool ThreadPool(WorkerThreadPoolCount, "GCV2"); + std::unordered_map<std::unique_ptr<GcStoreCompactor>, GcCompactStoreStats*> StoreCompactors; + RwLock StoreCompactorsLock; + WorkerThreadPool ThreadPool(WorkerThreadPoolCount, "GCV2"); ZEN_INFO("GCV2: Removing expired data from {} referencers", m_GcReferencers.size()); if (!m_GcReferencers.empty()) @@ -594,52 +627,15 @@ GcManager::CollectGarbage(const GcSettings& Settings) GcReferencer* Owner = m_GcReferencers[Index]; std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index]; WorkLeft.AddCount(1); - ThreadPool.ScheduleWork([&Ctx, Owner, &Stats, &WorkLeft]() { + ThreadPool.ScheduleWork([&Ctx, &WorkLeft, Owner, &Stats, &StoreCompactorsLock, &StoreCompactors]() { auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); Stats.first = Owner->GetGcName(Ctx); - SCOPED_TIMER(Stats.second.RemoveExpiredDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - Owner->RemoveExpiredData(Ctx, Stats.second); - }); - } - WorkLeft.CountDown(); - WorkLeft.Wait(); - } - - if (Ctx.Settings.SkipCidDelete) - { - Result.Sum(); - return Result; - } - - Result.ReferenceStoreStats.resize(m_GcReferenceStores.size()); - - ZEN_INFO("GCV2: Creating reference pruners from {} reference stores", m_GcReferenceStores.size()); - std::unordered_map<size_t, std::unique_ptr<GcReferencePruner>> ReferencePruners; - if (!m_GcReferenceStores.empty()) - { - ReferencePruners.reserve(m_GcReferenceStores.size()); - Latch WorkLeft(1); - RwLock ReferencePrunersLock; - // CreateReferencePruner is usually not very heavy but big data sets change that - SCOPED_TIMER(Result.CreateReferencePrunerMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - for (size_t Index = 0; Index < m_GcReferenceStores.size(); Index++) - { - GcReferenceStore* ReferenceStore = m_GcReferenceStores[Index]; - std::pair<std::string, GcReferenceStoreStats>& Stats = Result.ReferenceStoreStats[Index]; - WorkLeft.AddCount(1); - ThreadPool.ScheduleWork([&Ctx, ReferenceStore, &Stats, Index, &WorkLeft, &ReferencePrunersLock, &ReferencePruners]() { - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); - Stats.first = ReferenceStore->GetGcName(Ctx); - std::unique_ptr<GcReferencePruner> ReferencePruner; - { - SCOPED_TIMER(Stats.second.CreateReferencePrunerMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - // The ReferenceStore will pick a list of CId entries to check, returning a collector - ReferencePruner = std::unique_ptr<GcReferencePruner>(ReferenceStore->CreateReferencePruner(Ctx, Stats.second)); - } - if (ReferencePruner) + SCOPED_TIMER(Stats.second.RemoveExpiredDataStats.ElapsedMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + std::unique_ptr<GcStoreCompactor> StoreCompactor(Owner->RemoveExpiredData(Ctx, Stats.second.RemoveExpiredDataStats)); + if (StoreCompactor) { - RwLock::ExclusiveLockScope __(ReferencePrunersLock); - ReferencePruners.insert_or_assign(Index, std::move(ReferencePruner)); + RwLock::ExclusiveLockScope __(StoreCompactorsLock); + StoreCompactors.insert_or_assign(std::move(StoreCompactor), &Stats.second.CompactStoreStats); } }); } @@ -647,180 +643,217 @@ GcManager::CollectGarbage(const GcSettings& Settings) WorkLeft.Wait(); } - ZEN_INFO("GCV2: Creating reference checkers from {} referencers", m_GcReferencers.size()); - std::unordered_map<std::unique_ptr<GcReferenceChecker>, size_t> ReferenceCheckers; - if (!m_GcReferencers.empty()) + if (!Ctx.Settings.SkipCidDelete) { - ReferenceCheckers.reserve(m_GcReferencers.size()); - Latch WorkLeft(1); - RwLock ReferenceCheckersLock; - SCOPED_TIMER(Result.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - // Lock all reference owners from changing the reference data and get access to check for referenced data - for (size_t Index = 0; Index < m_GcReferencers.size(); Index++) + Result.ReferenceStoreStats.resize(m_GcReferenceStores.size()); + + ZEN_INFO("GCV2: Creating reference pruners from {} reference stores", m_GcReferenceStores.size()); + std::unordered_map<size_t, std::unique_ptr<GcReferencePruner>> ReferencePruners; + if (!m_GcReferenceStores.empty()) { - GcReferencer* Referencer = m_GcReferencers[Index]; - std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index]; - WorkLeft.AddCount(1); - ThreadPool.ScheduleWork([&Ctx, &WorkLeft, Referencer, Index, &Stats, &ReferenceCheckersLock, &ReferenceCheckers]() { - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); - // The Referencer will create a reference checker that guarrantees that the references do not change as long as it lives - std::vector<GcReferenceChecker*> Checkers; - { - SCOPED_TIMER(Stats.second.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - Checkers = Referencer->CreateReferenceCheckers(Ctx); - } - try - { - if (!Checkers.empty()) + ReferencePruners.reserve(m_GcReferenceStores.size()); + Latch WorkLeft(1); + RwLock ReferencePrunersLock; + // CreateReferencePruner is usually not very heavy but big data sets change that + SCOPED_TIMER(Result.CreateReferencePrunersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + for (size_t Index = 0; Index < m_GcReferenceStores.size(); Index++) + { + GcReferenceStore* ReferenceStore = m_GcReferenceStores[Index]; + std::pair<std::string, GcReferenceStoreStats>& Stats = Result.ReferenceStoreStats[Index]; + WorkLeft.AddCount(1); + ThreadPool.ScheduleWork([&Ctx, ReferenceStore, &Stats, Index, &WorkLeft, &ReferencePrunersLock, &ReferencePruners]() { + auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); + Stats.first = ReferenceStore->GetGcName(Ctx); + std::unique_ptr<GcReferencePruner> ReferencePruner; { - RwLock::ExclusiveLockScope __(ReferenceCheckersLock); - for (auto& Checker : Checkers) - { - ReferenceCheckers.insert_or_assign(std::unique_ptr<GcReferenceChecker>(Checker), Index); - Checker = nullptr; - } + SCOPED_TIMER(Stats.second.CreateReferencePrunersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + // The ReferenceStore will pick a list of CId entries to check, returning a collector + ReferencePruner = std::unique_ptr<GcReferencePruner>(ReferenceStore->CreateReferencePruner(Ctx, Stats.second)); } - } - catch (std::exception&) - { - while (!Checkers.empty()) + if (ReferencePruner) { - delete Checkers.back(); - Checkers.pop_back(); + RwLock::ExclusiveLockScope __(ReferencePrunersLock); + ReferencePruners.insert_or_assign(Index, std::move(ReferencePruner)); } - throw; - } - }); - } - WorkLeft.CountDown(); - WorkLeft.Wait(); - } - - std::unordered_map<std::unique_ptr<GcReferenceStoreCompactor>, size_t> ReferenceStoreCompactors; - ReferenceStoreCompactors.reserve(ReferencePruners.size()); - - ZEN_INFO("GCV2: Locking state for {} reference checkers", ReferenceCheckers.size()); - { - SCOPED_TIMER(uint64_t ElapsedMS = Timer.GetElapsedTimeMs(); Result.WriteBlockMS = std::chrono::milliseconds(ElapsedMS); - ZEN_INFO("GCV2: Writes blocked for {}", NiceTimeSpanMs(ElapsedMS))); - if (!ReferenceCheckers.empty()) - { - // Locking all references checkers so we have a steady state of which references are used - // From this point we have blocked all writes to all References (DiskBucket/ProjectStore) until - // we delete the ReferenceCheckers - Latch WorkLeft(1); - - SCOPED_TIMER(Result.LockStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - for (auto& It : ReferenceCheckers) - { - GcReferenceChecker* Checker = It.first.get(); - size_t Index = It.second; - std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index]; - WorkLeft.AddCount(1); - ThreadPool.ScheduleWork([&Ctx, Checker, Index, &Stats, &WorkLeft]() { - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); - SCOPED_TIMER(Stats.second.LockStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - Checker->LockState(Ctx); }); } WorkLeft.CountDown(); WorkLeft.Wait(); } - ZEN_INFO("GCV2: Removing unreferenced data for {} reference pruners", ReferencePruners.size()); if (!ReferencePruners.empty()) { - const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::vector<IoHash> { - HashSet UnusedCids(References.begin(), References.end()); - for (const auto& It : ReferenceCheckers) + ZEN_INFO("GCV2: Creating reference checkers from {} referencers", m_GcReferencers.size()); + std::unordered_map<std::unique_ptr<GcReferenceChecker>, size_t> ReferenceCheckers; + if (!m_GcReferencers.empty()) + { + ReferenceCheckers.reserve(m_GcReferencers.size()); + Latch WorkLeft(1); + RwLock ReferenceCheckersLock; + SCOPED_TIMER(Result.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + // Lock all reference owners from changing the reference data and get access to check for referenced data + for (size_t Index = 0; Index < m_GcReferencers.size(); Index++) { - GcReferenceChecker* ReferenceChecker = It.first.get(); - ReferenceChecker->RemoveUsedReferencesFromSet(Ctx, UnusedCids); - if (UnusedCids.empty()) + GcReferencer* Referencer = m_GcReferencers[Index]; + std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index]; + WorkLeft.AddCount(1); + ThreadPool.ScheduleWork([&Ctx, &WorkLeft, Referencer, Index, &Stats, &ReferenceCheckersLock, &ReferenceCheckers]() { + auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); + // The Referencer will create a reference checker that guarrantees that the references do not change as long as + // it lives + std::vector<GcReferenceChecker*> Checkers; + { + SCOPED_TIMER(Stats.second.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + Checkers = Referencer->CreateReferenceCheckers(Ctx); + } + try + { + if (!Checkers.empty()) + { + RwLock::ExclusiveLockScope __(ReferenceCheckersLock); + for (auto& Checker : Checkers) + { + ReferenceCheckers.insert_or_assign(std::unique_ptr<GcReferenceChecker>(Checker), Index); + Checker = nullptr; + } + } + } + catch (std::exception&) + { + while (!Checkers.empty()) + { + delete Checkers.back(); + Checkers.pop_back(); + } + throw; + } + }); + } + WorkLeft.CountDown(); + WorkLeft.Wait(); + } + + ZEN_INFO("GCV2: Locking state for {} reference checkers", ReferenceCheckers.size()); + { + SCOPED_TIMER(uint64_t ElapsedMS = Timer.GetElapsedTimeMs(); Result.WriteBlockMS = std::chrono::milliseconds(ElapsedMS); + ZEN_INFO("GCV2: Writes blocked for {}", NiceTimeSpanMs(ElapsedMS))); + if (!ReferenceCheckers.empty()) + { + // Locking all references checkers so we have a steady state of which references are used + // From this point we have blocked all writes to all References (DiskBucket/ProjectStore) until + // we delete the ReferenceCheckers + Latch WorkLeft(1); + + SCOPED_TIMER(Result.LockStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + for (auto& It : ReferenceCheckers) { - return {}; + GcReferenceChecker* Checker = It.first.get(); + size_t Index = It.second; + std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index]; + WorkLeft.AddCount(1); + ThreadPool.ScheduleWork([&Ctx, Checker, Index, &Stats, &WorkLeft]() { + auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); + SCOPED_TIMER(Stats.second.LockStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + Checker->LockState(Ctx); + }); } + WorkLeft.CountDown(); + WorkLeft.Wait(); } - return std::vector<IoHash>(UnusedCids.begin(), UnusedCids.end()); - }; - // checking all Cids agains references in cache - // Ask stores to remove data that the ReferenceCheckers says are not referenced - this should be a lightweight operation - // that only updates in-memory index, actual disk changes should be done by the ReferenceStoreCompactors + ZEN_INFO("GCV2: Removing unreferenced data for {} reference pruners", ReferencePruners.size()); + { + const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::vector<IoHash> { + HashSet UnusedCids(References.begin(), References.end()); + for (const auto& It : ReferenceCheckers) + { + GcReferenceChecker* ReferenceChecker = It.first.get(); + ReferenceChecker->RemoveUsedReferencesFromSet(Ctx, UnusedCids); + if (UnusedCids.empty()) + { + return {}; + } + } + return std::vector<IoHash>(UnusedCids.begin(), UnusedCids.end()); + }; - Latch WorkLeft(1); - RwLock ReferenceStoreCompactorsLock; + // checking all Cids agains references in cache + // Ask stores to remove data that the ReferenceCheckers says are not referenced - this should be a lightweight + // operation that only updates in-memory index, actual disk changes should be done by the ReferenceStoreCompactors - SCOPED_TIMER(Result.RemoveUnreferencedDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - for (auto& It : ReferencePruners) - { - GcReferencePruner* Pruner = It.second.get(); - size_t Index = It.first; - GcReferenceStoreStats& Stats = Result.ReferenceStoreStats[Index].second; - WorkLeft.AddCount(1); - ThreadPool.ScheduleWork([&Ctx, - Pruner, - &Stats, - &WorkLeft, - Index, - &GetUnusedReferences, - &ReferenceStoreCompactorsLock, - &ReferenceStoreCompactors]() { - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); - // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are referenced or not. - std::unique_ptr<GcReferenceStoreCompactor> ReferenceCompactor; - { - SCOPED_TIMER(Stats.RemoveUnreferencedDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - ReferenceCompactor = - std::unique_ptr<GcReferenceStoreCompactor>(Pruner->RemoveUnreferencedData(Ctx, Stats, GetUnusedReferences)); - } - if (ReferenceCompactor) + Latch WorkLeft(1); + + SCOPED_TIMER(Result.RemoveUnreferencedDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + for (auto& It : ReferencePruners) { - RwLock::ExclusiveLockScope __(ReferenceStoreCompactorsLock); - ReferenceStoreCompactors.insert_or_assign(std::move(ReferenceCompactor), Index); + GcReferencePruner* Pruner = It.second.get(); + size_t Index = It.first; + GcReferenceStoreStats& Stats = Result.ReferenceStoreStats[Index].second; + WorkLeft.AddCount(1); + ThreadPool.ScheduleWork( + [&Ctx, Pruner, &Stats, &WorkLeft, &GetUnusedReferences, &StoreCompactorsLock, &StoreCompactors]() { + auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); + // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are referenced + // or not. + std::unique_ptr<GcStoreCompactor> StoreCompactor; + { + SCOPED_TIMER(Stats.RemoveUnreferencedDataStats.ElapsedMS = + std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + StoreCompactor = std::unique_ptr<GcStoreCompactor>( + Pruner->RemoveUnreferencedData(Ctx, Stats.RemoveUnreferencedDataStats, GetUnusedReferences)); + } + if (StoreCompactor) + { + RwLock::ExclusiveLockScope __(StoreCompactorsLock); + StoreCompactors.insert_or_assign(std::move(StoreCompactor), &Stats.CompactStoreStats); + } + }); } - }); + WorkLeft.CountDown(); + WorkLeft.Wait(); + } + // Let the GcReferencers add new data, we will only change on-disk data at this point, adding new data is allowed + ReferenceCheckers.clear(); + ReferencePruners.clear(); } - WorkLeft.CountDown(); - WorkLeft.Wait(); } - // Let the GcReferencers add new data, we will only change on-disk data at this point, adding new data is allowed - ReferenceCheckers.clear(); } - // Let go of the pruners - ReferencePruners.clear(); - - ZEN_INFO("GCV2: Compacting reference stores for {} reference store compactors", ReferenceStoreCompactors.size()); - if (!ReferenceStoreCompactors.empty()) + ZEN_INFO("GCV2: Compacting reference stores for {} store compactors", StoreCompactors.size()); + if (!StoreCompactors.empty()) { - Latch WorkLeft(1); - + auto ClaimDiskReserve = [&]() -> uint64_t { + if (!std::filesystem::is_regular_file(Settings.DiskReservePath)) + { + return 0; + } + uint64_t ReclaimedSize = std::filesystem::file_size(Settings.DiskReservePath); + if (std::filesystem::remove(Settings.DiskReservePath)) + { + return ReclaimedSize; + } + return 0; + }; // Remove the stuff we deemed unreferenced from disk - may be heavy operation - SCOPED_TIMER(Result.CompactReferenceStoreMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - for (auto& It : ReferenceStoreCompactors) + // Don't do in parallel, we don't want to steal CPU/Disk from regular operation + SCOPED_TIMER(Result.CompactStoresMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + for (auto& It : StoreCompactors) { - GcReferenceStoreCompactor* Compactor = It.first.get(); - size_t Index = It.second; - GcReferenceStoreStats& Stats = Result.ReferenceStoreStats[Index].second; - WorkLeft.AddCount(1); - ThreadPool.ScheduleWork([&Ctx, Compactor, &Stats, &WorkLeft]() { - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); + GcStoreCompactor* Compactor = It.first.get(); + GcCompactStoreStats& Stats = *It.second; + { // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are referenced or not. - SCOPED_TIMER(Stats.CompactReferenceStoreMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - Compactor->CompactReferenceStore(Ctx, Stats); - }); + SCOPED_TIMER(Stats.ElapsedMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + Compactor->CompactStore(Ctx, Stats, ClaimDiskReserve); + } } - WorkLeft.CountDown(); - WorkLeft.Wait(); + StoreCompactors.clear(); } - ReferenceStoreCompactors.clear(); - ZEN_INFO("GCV2: Completed in {}", NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs())); } - Result.Sum(); + Sum(Result); return Result; } @@ -1227,22 +1260,17 @@ GcScheduler::AppendGCLog(GcClock::TimePoint StartTime, const GcSettings& Setting std::string Id = fmt::format("{}", gsl::narrow<int64_t>(StartTime.time_since_epoch().count())); Writer.BeginObject(Id); { - Writer << "StartTimeSec"sv - << gsl::narrow<int64_t>(std::chrono::duration_cast<std::chrono::seconds>(StartTime.time_since_epoch()).count()); + Writer << "StartTime"sv << ToDateTime(StartTime); Writer.BeginObject("Settings"sv); { - Writer << "CacheExpireTimeSec"sv - << gsl::narrow<int64_t>( - std::chrono::duration_cast<std::chrono::seconds>(Settings.CacheExpireTime.time_since_epoch()).count()); - Writer << "ProjectStoreExpireTimeSec"sv - << gsl::narrow<int64_t>( - std::chrono::duration_cast<std::chrono::seconds>(Settings.ProjectStoreExpireTime.time_since_epoch()) - .count()); + Writer << "CacheExpireTime"sv << ToDateTime(Settings.CacheExpireTime); + Writer << "ProjectStoreExpireTime"sv << ToDateTime(Settings.ProjectStoreExpireTime); Writer << "CollectSmallObjects"sv << Settings.CollectSmallObjects; Writer << "IsDeleteMode"sv << Settings.IsDeleteMode; Writer << "SkipCidDelete"sv << Settings.SkipCidDelete; Writer << "Verbose"sv << Settings.Verbose; Writer << "SingleThread"sv << Settings.SingleThread; + Writer << "CompactBlockUsageThresholdPercent"sv << Settings.CompactBlockUsageThresholdPercent; } Writer.EndObject(); @@ -1817,26 +1845,27 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, .ProjectStoreExpireTime = ProjectStoreExpireTime, .CollectSmallObjects = CollectSmallObjects, .IsDeleteMode = Delete, - .SkipCidDelete = SkipCid}; + .SkipCidDelete = SkipCid, + .DiskReservePath = m_Config.RootDirectory / "reserve.gc"}; GcClock::TimePoint GcStartTime = GcClock::Now(); GcResult Result = m_GcManager.CollectGarbage(Settings); ZEN_INFO( - "GCV2: Removed {} items out of {}, deleted {} out of {}. Pruned {} Cid entries out of {}, compacted {} Cid entries " - "out of {}, " - "freed " - "{} on disk and {} of memory in {}. CacheExpireTime: {}, ProjectStoreExpireTime: {}, CollectSmallObjects: {}, " + "GCV2: Found {} expired items out of {}, deleted {}. " + "Found {} unreferenced Cid entries out of {}, deleted {}. " + "Freed {} on disk and {} of memory in {}. " + "CacheExpireTime: {}, ProjectStoreExpireTime: {}, CollectSmallObjects: {}, " "IsDeleteMode: {}, SkipCidDelete: {}", - Result.ReferencerStat.Expired, - Result.ReferencerStat.Count, - Result.ReferencerStat.Deleted, - Result.ReferencerStat.Expired, - Result.ReferenceStoreStat.Pruned, - Result.ReferenceStoreStat.Count, - Result.ReferenceStoreStat.Compacted, - Result.ReferenceStoreStat.Pruned, - NiceBytes(Result.RemovedDisk), - NiceBytes(Result.RemovedMemory), + Result.ReferencerStatSum.RemoveExpiredDataStats.FoundCount, + Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount, + Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount, + + Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.FoundCount, + Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount, + Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount, + + NiceBytes(Result.CompactStoresStatSum.RemovedDisk), + NiceBytes(Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory), NiceTimeSpanMs(Result.ElapsedMS.count()), Settings.CacheExpireTime, Settings.ProjectStoreExpireTime, @@ -1854,8 +1883,8 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, { m_LastFullGCV2Result = Result; } - Diff.DiskSize = Result.RemovedDisk; - Diff.MemorySize = Result.RemovedMemory; + Diff.DiskSize = Result.CompactStoresStatSum.RemovedDisk; + Diff.MemorySize = Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory; } break; } |