aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore/gc.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2023-11-21 15:06:25 +0100
committerGitHub <[email protected]>2023-11-21 15:06:25 +0100
commit05178f7c18a48b21b9e260de282a86b91df26955 (patch)
tree25f77af287730c6dbe8d655e0cb503f2652cbd36 /src/zenstore/gc.cpp
parentzen run command (#552) (diff)
downloadzen-05178f7c18a48b21b9e260de282a86b91df26955.tar.xz
zen-05178f7c18a48b21b9e260de282a86b91df26955.zip
compact separate for gc referencer (#533)
- Refactor GCV2 so GcReferencer::RemoveExpiredData returns a store compactor, moving out the actual disk work from deleting items in the index. - Refactor GCV2 GcResult to reuse GcCompactStoreStats and GcStats - Make Compacting of stores non-parallell to not eat all the disk I/O when running GC
Diffstat (limited to 'src/zenstore/gc.cpp')
-rw-r--r--src/zenstore/gc.cpp821
1 files changed, 425 insertions, 396 deletions
diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp
index 7f9ca5236..b78b23350 100644
--- a/src/zenstore/gc.cpp
+++ b/src/zenstore/gc.cpp
@@ -173,166 +173,6 @@ SaveCompactBinaryObject(const fs::path& Path, const CbObject& Object)
//////////////////////////////////////////////////////////////////////////
-void
-WriteReferencerStats(CbObjectWriter& Writer, const GcReferencerStats& Stats, bool HumanReadable)
-{
- if (Stats.Count == 0)
- {
- return;
- }
- Writer << "Count" << Stats.Count;
- Writer << "Expired" << Stats.Expired;
- Writer << "Deleted" << Stats.Deleted;
-
- if (HumanReadable)
- {
- Writer << "RemovedDisk" << NiceBytes(Stats.RemovedDisk);
- Writer << "RemovedMemory" << NiceBytes(Stats.RemovedMemory);
- }
- else
- {
- Writer << "RemovedDiskBytes" << Stats.RemovedDisk;
- Writer << "RemovedMemoryBytes" << Stats.RemovedMemory;
- }
-
- if (HumanReadable)
- {
- Writer << "RemoveExpiredData" << NiceTimeSpanMs(Stats.RemoveExpiredDataMS.count());
- Writer << "CreateReferenceCheckers" << NiceTimeSpanMs(Stats.CreateReferenceCheckersMS.count());
- Writer << "LockState" << NiceTimeSpanMs(Stats.LockStateMS.count());
- Writer << "Elapsed" << NiceTimeSpanMs(Stats.ElapsedMS.count());
- }
- else
- {
- Writer << "RemoveExpiredDataMS" << gsl::narrow<int64_t>(Stats.RemoveExpiredDataMS.count());
- Writer << "CreateReferenceCheckersMS" << gsl::narrow<int64_t>(Stats.CreateReferenceCheckersMS.count());
- Writer << "LockStateMS" << gsl::narrow<int64_t>(Stats.LockStateMS.count());
- Writer << "ElapsedMS" << gsl::narrow<int64_t>(Stats.ElapsedMS.count());
- }
-};
-
-void
-WriteReferenceStoreStats(CbObjectWriter& Writer, const GcReferenceStoreStats& Stats, bool HumanReadable)
-{
- if (Stats.Count == 0)
- {
- return;
- }
- Writer << "Count" << Stats.Count;
- Writer << "Pruned" << Stats.Pruned;
- Writer << "Compacted" << Stats.Compacted;
-
- if (HumanReadable)
- {
- Writer << "RemovedDisk" << NiceBytes(Stats.RemovedDisk);
- Writer << "RemovedMemory" << NiceBytes(Stats.RemovedMemory);
- }
- else
- {
- Writer << "RemovedDiskBytes" << Stats.RemovedDisk;
- Writer << "RemovedMemoryBytes" << Stats.RemovedMemory;
- }
-
- if (HumanReadable)
- {
- Writer << "CreateReferencePruner" << NiceTimeSpanMs(Stats.CreateReferencePrunerMS.count());
- Writer << "RemoveUnreferencedData" << NiceTimeSpanMs(Stats.RemoveUnreferencedDataMS.count());
- Writer << "CompactReferenceStore" << NiceTimeSpanMs(Stats.CompactReferenceStoreMS.count());
- Writer << "Elapsed" << NiceTimeSpanMs(Stats.ElapsedMS.count());
- }
- else
- {
- Writer << "CreateReferencePrunerMS" << gsl::narrow<int64_t>(Stats.CreateReferencePrunerMS.count());
- Writer << "RemoveUnreferencedDataMS" << gsl::narrow<int64_t>(Stats.RemoveUnreferencedDataMS.count());
- Writer << "CompactReferenceStoreMS" << gsl::narrow<int64_t>(Stats.CompactReferenceStoreMS.count());
- Writer << "ElapsedMS" << gsl::narrow<int64_t>(Stats.ElapsedMS.count());
- }
-};
-
-void
-WriteGCResult(CbObjectWriter& Writer, const GcResult& Result, bool HumanReadable, bool IncludeDetails)
-{
- if (HumanReadable)
- {
- Writer << "RemovedDisk" << NiceBytes(Result.RemovedDisk);
- Writer << "RemovedMemory" << NiceBytes(Result.RemovedMemory);
- Writer << "WriteBlock" << NiceTimeSpanMs(Result.WriteBlockMS.count());
- Writer << "Elapsed" << NiceTimeSpanMs(Result.ElapsedMS.count());
- }
- else
- {
- Writer << "RemovedDiskBytes" << gsl::narrow<int64_t>(Result.RemovedDisk);
- Writer << "RemovedMemoryBytes" << gsl::narrow<int64_t>(Result.RemovedMemory);
- Writer << "WriteBlockMS" << gsl::narrow<int64_t>(Result.WriteBlockMS.count());
- Writer << "ElapsedMS" << gsl::narrow<int64_t>(Result.ElapsedMS.count());
- }
-
- if (!IncludeDetails)
- {
- return;
- }
-
- if (HumanReadable)
- {
- Writer << "RemoveExpiredData" << NiceTimeSpanMs(Result.RemoveExpiredDataMS.count());
- Writer << "CreateReferenceCheckers" << NiceTimeSpanMs(Result.CreateReferenceCheckersMS.count());
- Writer << "LockState" << NiceTimeSpanMs(Result.LockStateMS.count());
-
- Writer << "CreateReferencePruner" << NiceTimeSpanMs(Result.CreateReferencePrunerMS.count());
- Writer << "RemoveUnreferencedData" << NiceTimeSpanMs(Result.RemoveUnreferencedDataMS.count());
- Writer << "CompactReferenceStore" << NiceTimeSpanMs(Result.CompactReferenceStoreMS.count());
- }
- else
- {
- Writer << "RemoveExpiredDataMS" << gsl::narrow<int64_t>(Result.RemoveExpiredDataMS.count());
- Writer << "CreateReferenceCheckersMS" << gsl::narrow<int64_t>(Result.CreateReferenceCheckersMS.count());
- Writer << "LockStateMS" << gsl::narrow<int64_t>(Result.LockStateMS.count());
-
- Writer << "CreateReferencePrunerMS" << gsl::narrow<int64_t>(Result.CreateReferencePrunerMS.count());
- Writer << "RemoveUnreferencedDataMS" << gsl::narrow<int64_t>(Result.RemoveUnreferencedDataMS.count());
- Writer << "CompactReferenceStoreMS" << gsl::narrow<int64_t>(Result.CompactReferenceStoreMS.count());
- }
-
- Writer.BeginObject("ReferencerStats");
- {
- WriteReferencerStats(Writer, Result.ReferencerStat, HumanReadable);
- }
- Writer.EndObject();
-
- Writer.BeginObject("ReferenceStoreStats");
- {
- WriteReferenceStoreStats(Writer, Result.ReferenceStoreStat, HumanReadable);
- }
- Writer.EndObject();
-
- if (!Result.ReferencerStats.empty())
- {
- Writer.BeginArray("Referencers");
- {
- for (const std::pair<std::string, GcReferencerStats>& It : Result.ReferencerStats)
- {
- Writer.BeginObject();
- Writer << "Name" << It.first;
- WriteReferencerStats(Writer, It.second, HumanReadable);
- Writer.EndObject();
- }
- }
- Writer.EndArray();
- }
- if (!Result.ReferenceStoreStats.empty())
- {
- Writer.BeginArray("ReferenceStores");
- for (const std::pair<std::string, GcReferenceStoreStats>& It : Result.ReferenceStoreStats)
- {
- Writer.BeginObject();
- Writer << "Name" << It.first;
- WriteReferenceStoreStats(Writer, It.second, HumanReadable);
- Writer.EndObject();
- }
- Writer.EndArray();
- }
-};
-
struct GcContext::GcState
{
using CacheKeyContexts = std::unordered_map<std::string, std::vector<IoHash>>;
@@ -490,44 +330,235 @@ GcManager::~GcManager()
//////// Begin GC V2
void
-GcResult::Sum()
+WriteGcStats(CbObjectWriter& Writer, const GcStats& Stats, bool HumanReadable)
{
- for (std::pair<std::string, GcReferencerStats>& Referencer : ReferencerStats)
+ Writer << "Checked" << Stats.CheckedCount;
+ Writer << "Found" << Stats.FoundCount;
+ Writer << "Deleted" << Stats.DeletedCount;
+ if (HumanReadable)
{
- GcReferencerStats& SubStat = Referencer.second;
- ReferencerStat.Count += SubStat.Count;
- ReferencerStat.Expired += SubStat.Expired;
- ReferencerStat.Deleted += SubStat.Deleted;
- ReferencerStat.RemovedDisk += SubStat.RemovedDisk;
- ReferencerStat.RemovedMemory += SubStat.RemovedMemory;
- SubStat.ElapsedMS = SubStat.RemoveExpiredDataMS + SubStat.CreateReferenceCheckersMS + SubStat.LockStateMS;
+ Writer << "FreedMemory" << NiceBytes(Stats.FreedMemory);
+ }
+ else
+ {
+ Writer << "FreedMemoryBytes" << Stats.FreedMemory;
+ }
+ Writer << "Elapsed" << ToTimeSpan(Stats.ElapsedMS);
+}
- ReferencerStat.RemoveExpiredDataMS += SubStat.RemoveExpiredDataMS;
- ReferencerStat.CreateReferenceCheckersMS += SubStat.CreateReferenceCheckersMS;
- ReferencerStat.LockStateMS += SubStat.LockStateMS;
- ReferencerStat.ElapsedMS += SubStat.ElapsedMS;
+void
+WriteCompactStoreStats(CbObjectWriter& Writer, const GcCompactStoreStats& Stats, bool HumanReadable)
+{
+ if (HumanReadable)
+ {
+ Writer << "RemovedDisk" << NiceBytes(Stats.RemovedDisk);
+ }
+ else
+ {
+ Writer << "RemovedDiskBytes" << Stats.RemovedDisk;
+ }
+ Writer << "Elapsed" << ToTimeSpan(Stats.ElapsedMS);
+}
- RemovedDisk += SubStat.RemovedDisk;
- RemovedMemory += SubStat.RemovedMemory;
+void
+WriteReferencerStats(CbObjectWriter& Writer, const GcReferencerStats& Stats, bool HumanReadable)
+{
+ if (Stats.RemoveExpiredDataStats.CheckedCount == 0)
+ {
+ return;
}
- for (std::pair<std::string, GcReferenceStoreStats>& ReferenceStore : ReferenceStoreStats)
+ Writer.BeginObject("RemoveExpired");
{
- GcReferenceStoreStats& SubStat = ReferenceStore.second;
- ReferenceStoreStat.Count += SubStat.Count;
- ReferenceStoreStat.Pruned += SubStat.Pruned;
- ReferenceStoreStat.Compacted += SubStat.Compacted;
- ReferenceStoreStat.RemovedDisk += SubStat.RemovedDisk;
- ReferenceStoreStat.RemovedMemory += SubStat.RemovedMemory;
- SubStat.ElapsedMS = SubStat.CreateReferencePrunerMS + SubStat.RemoveUnreferencedDataMS + SubStat.CompactReferenceStoreMS;
+ WriteGcStats(Writer, Stats.RemoveExpiredDataStats, HumanReadable);
+ }
+ Writer.EndObject();
+
+ Writer.BeginObject("Compact");
+ {
+ WriteCompactStoreStats(Writer, Stats.CompactStoreStats, HumanReadable);
+ }
+ Writer.EndObject();
+
+ Writer << "CreateReferenceCheckers" << ToTimeSpan(Stats.CreateReferenceCheckersMS);
+ Writer << "LockState" << ToTimeSpan(Stats.LockStateMS);
+ Writer << "Elapsed" << ToTimeSpan(Stats.ElapsedMS);
+};
+
+void
+WriteReferenceStoreStats(CbObjectWriter& Writer, const GcReferenceStoreStats& Stats, bool HumanReadable)
+{
+ if (Stats.RemoveUnreferencedDataStats.CheckedCount == 0)
+ {
+ return;
+ }
+ Writer.BeginObject("RemoveUnreferenced");
+ {
+ WriteGcStats(Writer, Stats.RemoveUnreferencedDataStats, HumanReadable);
+ }
+ Writer.EndObject();
+
+ Writer.BeginObject("Compact");
+ {
+ WriteCompactStoreStats(Writer, Stats.CompactStoreStats, HumanReadable);
+ }
+ Writer.EndObject();
+
+ Writer << "CreateReferencePruners" << ToTimeSpan(Stats.CreateReferencePrunersMS);
+ Writer << "Elapsed" << ToTimeSpan(Stats.ElapsedMS);
+};
+
+void
+WriteGCResult(CbObjectWriter& Writer, const GcResult& Result, bool HumanReadable, bool IncludeDetails)
+{
+ if (!IncludeDetails)
+ {
+ if (HumanReadable)
+ {
+ Writer << "RemovedDisk" << NiceBytes(Result.CompactStoresStatSum.RemovedDisk);
+ Writer << "FreedMemory" << NiceBytes(Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory);
+ }
+ else
+ {
+ Writer << "RemovedDiskBytes" << gsl::narrow<int64_t>(Result.CompactStoresStatSum.RemovedDisk);
+ Writer << "RemovedMemoryBytes" << gsl::narrow<int64_t>(Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory);
+ }
+ Writer << "WriteBlock" << ToTimeSpan(Result.WriteBlockMS);
+ Writer << "Elapsed" << ToTimeSpan(Result.ElapsedMS);
+ return;
+ }
+
+ Writer.BeginObject("Referencer");
+ {
+ WriteReferencerStats(Writer, Result.ReferencerStatSum, HumanReadable);
+ }
+ Writer.EndObject();
+
+ Writer.BeginObject("ReferenceStore");
+ {
+ WriteReferenceStoreStats(Writer, Result.ReferenceStoreStatSum, HumanReadable);
+ }
+ Writer.EndObject();
+
+ Writer.BeginObject("Compact");
+ {
+ WriteCompactStoreStats(Writer, Result.CompactStoresStatSum, HumanReadable);
+ }
+ Writer.EndObject();
+
+ Writer << "RemoveExpiredData" << ToTimeSpan(Result.RemoveExpiredDataMS);
+ Writer << "CreateReferenceCheckers" << ToTimeSpan(Result.CreateReferenceCheckersMS);
+ Writer << "LockState" << ToTimeSpan(Result.LockStateMS);
+
+ Writer << "CreateReferencePruners" << ToTimeSpan(Result.CreateReferencePrunersMS);
+ Writer << "RemoveUnreferencedData" << ToTimeSpan(Result.RemoveUnreferencedDataMS);
+ Writer << "CompactStores" << ToTimeSpan(Result.CompactStoresMS);
+ Writer << "WriteBlock" << ToTimeSpan(Result.WriteBlockMS);
+ Writer << "Elapsed" << ToTimeSpan(Result.ElapsedMS);
+
+ if (!Result.ReferencerStats.empty())
+ {
+ Writer.BeginArray("Referencers");
+ {
+ for (const std::pair<std::string, GcReferencerStats>& It : Result.ReferencerStats)
+ {
+ Writer.BeginObject();
+ Writer << "Name" << It.first;
+ WriteReferencerStats(Writer, It.second, HumanReadable);
+ Writer.EndObject();
+ }
+ }
+ Writer.EndArray();
+ }
+ if (!Result.ReferenceStoreStats.empty())
+ {
+ Writer.BeginArray("ReferenceStores");
+ for (const std::pair<std::string, GcReferenceStoreStats>& It : Result.ReferenceStoreStats)
+ {
+ Writer.BeginObject();
+ Writer << "Name" << It.first;
+ WriteReferenceStoreStats(Writer, It.second, HumanReadable);
+ Writer.EndObject();
+ }
+ Writer.EndArray();
+ }
+};
+
+void
+Add(GcCompactStoreStats& Sum, const GcCompactStoreStats& Sub)
+{
+ Sum.RemovedDisk += Sub.RemovedDisk;
+
+ Sum.ElapsedMS += Sub.ElapsedMS;
+}
+
+void
+Add(GcStats& Sum, const GcStats& Sub)
+{
+ Sum.CheckedCount += Sub.CheckedCount;
+ Sum.FoundCount += Sub.FoundCount;
+ Sum.DeletedCount += Sub.DeletedCount;
+ Sum.FreedMemory += Sub.FreedMemory;
+
+ Sum.ElapsedMS += Sub.ElapsedMS;
+}
+
+void
+Sum(GcReferencerStats& Stat)
+{
+ Stat.ElapsedMS =
+ Stat.RemoveExpiredDataStats.ElapsedMS + Stat.CompactStoreStats.ElapsedMS + Stat.CreateReferenceCheckersMS + Stat.LockStateMS;
+}
+
+void
+Add(GcReferencerStats& Sum, const GcReferencerStats& Sub)
+{
+ Add(Sum.RemoveExpiredDataStats, Sub.RemoveExpiredDataStats);
+ Add(Sum.CompactStoreStats, Sub.CompactStoreStats);
+
+ Sum.CreateReferenceCheckersMS += Sub.CreateReferenceCheckersMS;
+ Sum.LockStateMS += Sub.LockStateMS;
+
+ Sum.ElapsedMS += Sub.ElapsedMS;
+}
- ReferenceStoreStat.CreateReferencePrunerMS += SubStat.CreateReferencePrunerMS;
- ReferenceStoreStat.RemoveUnreferencedDataMS += SubStat.RemoveUnreferencedDataMS;
- ReferenceStoreStat.CompactReferenceStoreMS += SubStat.CompactReferenceStoreMS;
- ReferenceStoreStat.ElapsedMS += SubStat.ElapsedMS;
+void
+Sum(GcReferenceStoreStats& Stat)
+{
+ Stat.ElapsedMS = Stat.RemoveUnreferencedDataStats.ElapsedMS + Stat.CompactStoreStats.ElapsedMS + Stat.CreateReferencePrunersMS;
+}
- RemovedDisk += SubStat.RemovedDisk;
- RemovedMemory += SubStat.RemovedMemory;
+void
+Add(GcReferenceStoreStats& Sum, const GcReferenceStoreStats& Sub)
+{
+ Add(Sum.RemoveUnreferencedDataStats, Sub.RemoveUnreferencedDataStats);
+ Add(Sum.CompactStoreStats, Sub.CompactStoreStats);
+
+ Sum.CreateReferencePrunersMS += Sub.CreateReferencePrunersMS;
+
+ Sum.ElapsedMS += Sub.ElapsedMS;
+}
+
+void
+Sum(GcResult& Stat)
+{
+ for (std::pair<std::string, GcReferencerStats>& Referencer : Stat.ReferencerStats)
+ {
+ GcReferencerStats& SubStat = Referencer.second;
+ Sum(SubStat);
+ Add(Stat.ReferencerStatSum, SubStat);
+ }
+ for (std::pair<std::string, GcReferenceStoreStats>& ReferenceStore : Stat.ReferenceStoreStats)
+ {
+ GcReferenceStoreStats& SubStat = ReferenceStore.second;
+ Sum(SubStat);
+ Add(Stat.ReferenceStoreStatSum, SubStat);
}
+
+ Sum(Stat.ReferencerStatSum);
+ Sum(Stat.ReferenceStoreStatSum);
+
+ Add(Stat.CompactStoresStatSum, Stat.ReferencerStatSum.CompactStoreStats);
+ Add(Stat.CompactStoresStatSum, Stat.ReferenceStoreStatSum.CompactStoreStats);
}
void
@@ -581,7 +612,9 @@ GcManager::CollectGarbage(const GcSettings& Settings)
Result.ReferencerStats.resize(m_GcReferencers.size());
- WorkerThreadPool ThreadPool(WorkerThreadPoolCount, "GCV2");
+ std::unordered_map<std::unique_ptr<GcStoreCompactor>, GcCompactStoreStats*> StoreCompactors;
+ RwLock StoreCompactorsLock;
+ WorkerThreadPool ThreadPool(WorkerThreadPoolCount, "GCV2");
ZEN_INFO("GCV2: Removing expired data from {} referencers", m_GcReferencers.size());
if (!m_GcReferencers.empty())
@@ -594,52 +627,15 @@ GcManager::CollectGarbage(const GcSettings& Settings)
GcReferencer* Owner = m_GcReferencers[Index];
std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index];
WorkLeft.AddCount(1);
- ThreadPool.ScheduleWork([&Ctx, Owner, &Stats, &WorkLeft]() {
+ ThreadPool.ScheduleWork([&Ctx, &WorkLeft, Owner, &Stats, &StoreCompactorsLock, &StoreCompactors]() {
auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
Stats.first = Owner->GetGcName(Ctx);
- SCOPED_TIMER(Stats.second.RemoveExpiredDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
- Owner->RemoveExpiredData(Ctx, Stats.second);
- });
- }
- WorkLeft.CountDown();
- WorkLeft.Wait();
- }
-
- if (Ctx.Settings.SkipCidDelete)
- {
- Result.Sum();
- return Result;
- }
-
- Result.ReferenceStoreStats.resize(m_GcReferenceStores.size());
-
- ZEN_INFO("GCV2: Creating reference pruners from {} reference stores", m_GcReferenceStores.size());
- std::unordered_map<size_t, std::unique_ptr<GcReferencePruner>> ReferencePruners;
- if (!m_GcReferenceStores.empty())
- {
- ReferencePruners.reserve(m_GcReferenceStores.size());
- Latch WorkLeft(1);
- RwLock ReferencePrunersLock;
- // CreateReferencePruner is usually not very heavy but big data sets change that
- SCOPED_TIMER(Result.CreateReferencePrunerMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
- for (size_t Index = 0; Index < m_GcReferenceStores.size(); Index++)
- {
- GcReferenceStore* ReferenceStore = m_GcReferenceStores[Index];
- std::pair<std::string, GcReferenceStoreStats>& Stats = Result.ReferenceStoreStats[Index];
- WorkLeft.AddCount(1);
- ThreadPool.ScheduleWork([&Ctx, ReferenceStore, &Stats, Index, &WorkLeft, &ReferencePrunersLock, &ReferencePruners]() {
- auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
- Stats.first = ReferenceStore->GetGcName(Ctx);
- std::unique_ptr<GcReferencePruner> ReferencePruner;
- {
- SCOPED_TIMER(Stats.second.CreateReferencePrunerMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
- // The ReferenceStore will pick a list of CId entries to check, returning a collector
- ReferencePruner = std::unique_ptr<GcReferencePruner>(ReferenceStore->CreateReferencePruner(Ctx, Stats.second));
- }
- if (ReferencePruner)
+ SCOPED_TIMER(Stats.second.RemoveExpiredDataStats.ElapsedMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ std::unique_ptr<GcStoreCompactor> StoreCompactor(Owner->RemoveExpiredData(Ctx, Stats.second.RemoveExpiredDataStats));
+ if (StoreCompactor)
{
- RwLock::ExclusiveLockScope __(ReferencePrunersLock);
- ReferencePruners.insert_or_assign(Index, std::move(ReferencePruner));
+ RwLock::ExclusiveLockScope __(StoreCompactorsLock);
+ StoreCompactors.insert_or_assign(std::move(StoreCompactor), &Stats.second.CompactStoreStats);
}
});
}
@@ -647,180 +643,217 @@ GcManager::CollectGarbage(const GcSettings& Settings)
WorkLeft.Wait();
}
- ZEN_INFO("GCV2: Creating reference checkers from {} referencers", m_GcReferencers.size());
- std::unordered_map<std::unique_ptr<GcReferenceChecker>, size_t> ReferenceCheckers;
- if (!m_GcReferencers.empty())
+ if (!Ctx.Settings.SkipCidDelete)
{
- ReferenceCheckers.reserve(m_GcReferencers.size());
- Latch WorkLeft(1);
- RwLock ReferenceCheckersLock;
- SCOPED_TIMER(Result.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
- // Lock all reference owners from changing the reference data and get access to check for referenced data
- for (size_t Index = 0; Index < m_GcReferencers.size(); Index++)
+ Result.ReferenceStoreStats.resize(m_GcReferenceStores.size());
+
+ ZEN_INFO("GCV2: Creating reference pruners from {} reference stores", m_GcReferenceStores.size());
+ std::unordered_map<size_t, std::unique_ptr<GcReferencePruner>> ReferencePruners;
+ if (!m_GcReferenceStores.empty())
{
- GcReferencer* Referencer = m_GcReferencers[Index];
- std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index];
- WorkLeft.AddCount(1);
- ThreadPool.ScheduleWork([&Ctx, &WorkLeft, Referencer, Index, &Stats, &ReferenceCheckersLock, &ReferenceCheckers]() {
- auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
- // The Referencer will create a reference checker that guarrantees that the references do not change as long as it lives
- std::vector<GcReferenceChecker*> Checkers;
- {
- SCOPED_TIMER(Stats.second.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
- Checkers = Referencer->CreateReferenceCheckers(Ctx);
- }
- try
- {
- if (!Checkers.empty())
+ ReferencePruners.reserve(m_GcReferenceStores.size());
+ Latch WorkLeft(1);
+ RwLock ReferencePrunersLock;
+ // CreateReferencePruner is usually not very heavy but big data sets change that
+ SCOPED_TIMER(Result.CreateReferencePrunersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ for (size_t Index = 0; Index < m_GcReferenceStores.size(); Index++)
+ {
+ GcReferenceStore* ReferenceStore = m_GcReferenceStores[Index];
+ std::pair<std::string, GcReferenceStoreStats>& Stats = Result.ReferenceStoreStats[Index];
+ WorkLeft.AddCount(1);
+ ThreadPool.ScheduleWork([&Ctx, ReferenceStore, &Stats, Index, &WorkLeft, &ReferencePrunersLock, &ReferencePruners]() {
+ auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
+ Stats.first = ReferenceStore->GetGcName(Ctx);
+ std::unique_ptr<GcReferencePruner> ReferencePruner;
{
- RwLock::ExclusiveLockScope __(ReferenceCheckersLock);
- for (auto& Checker : Checkers)
- {
- ReferenceCheckers.insert_or_assign(std::unique_ptr<GcReferenceChecker>(Checker), Index);
- Checker = nullptr;
- }
+ SCOPED_TIMER(Stats.second.CreateReferencePrunersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ // The ReferenceStore will pick a list of CId entries to check, returning a collector
+ ReferencePruner = std::unique_ptr<GcReferencePruner>(ReferenceStore->CreateReferencePruner(Ctx, Stats.second));
}
- }
- catch (std::exception&)
- {
- while (!Checkers.empty())
+ if (ReferencePruner)
{
- delete Checkers.back();
- Checkers.pop_back();
+ RwLock::ExclusiveLockScope __(ReferencePrunersLock);
+ ReferencePruners.insert_or_assign(Index, std::move(ReferencePruner));
}
- throw;
- }
- });
- }
- WorkLeft.CountDown();
- WorkLeft.Wait();
- }
-
- std::unordered_map<std::unique_ptr<GcReferenceStoreCompactor>, size_t> ReferenceStoreCompactors;
- ReferenceStoreCompactors.reserve(ReferencePruners.size());
-
- ZEN_INFO("GCV2: Locking state for {} reference checkers", ReferenceCheckers.size());
- {
- SCOPED_TIMER(uint64_t ElapsedMS = Timer.GetElapsedTimeMs(); Result.WriteBlockMS = std::chrono::milliseconds(ElapsedMS);
- ZEN_INFO("GCV2: Writes blocked for {}", NiceTimeSpanMs(ElapsedMS)));
- if (!ReferenceCheckers.empty())
- {
- // Locking all references checkers so we have a steady state of which references are used
- // From this point we have blocked all writes to all References (DiskBucket/ProjectStore) until
- // we delete the ReferenceCheckers
- Latch WorkLeft(1);
-
- SCOPED_TIMER(Result.LockStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
- for (auto& It : ReferenceCheckers)
- {
- GcReferenceChecker* Checker = It.first.get();
- size_t Index = It.second;
- std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index];
- WorkLeft.AddCount(1);
- ThreadPool.ScheduleWork([&Ctx, Checker, Index, &Stats, &WorkLeft]() {
- auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
- SCOPED_TIMER(Stats.second.LockStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
- Checker->LockState(Ctx);
});
}
WorkLeft.CountDown();
WorkLeft.Wait();
}
- ZEN_INFO("GCV2: Removing unreferenced data for {} reference pruners", ReferencePruners.size());
if (!ReferencePruners.empty())
{
- const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::vector<IoHash> {
- HashSet UnusedCids(References.begin(), References.end());
- for (const auto& It : ReferenceCheckers)
+ ZEN_INFO("GCV2: Creating reference checkers from {} referencers", m_GcReferencers.size());
+ std::unordered_map<std::unique_ptr<GcReferenceChecker>, size_t> ReferenceCheckers;
+ if (!m_GcReferencers.empty())
+ {
+ ReferenceCheckers.reserve(m_GcReferencers.size());
+ Latch WorkLeft(1);
+ RwLock ReferenceCheckersLock;
+ SCOPED_TIMER(Result.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ // Lock all reference owners from changing the reference data and get access to check for referenced data
+ for (size_t Index = 0; Index < m_GcReferencers.size(); Index++)
{
- GcReferenceChecker* ReferenceChecker = It.first.get();
- ReferenceChecker->RemoveUsedReferencesFromSet(Ctx, UnusedCids);
- if (UnusedCids.empty())
+ GcReferencer* Referencer = m_GcReferencers[Index];
+ std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index];
+ WorkLeft.AddCount(1);
+ ThreadPool.ScheduleWork([&Ctx, &WorkLeft, Referencer, Index, &Stats, &ReferenceCheckersLock, &ReferenceCheckers]() {
+ auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
+ // The Referencer will create a reference checker that guarrantees that the references do not change as long as
+ // it lives
+ std::vector<GcReferenceChecker*> Checkers;
+ {
+ SCOPED_TIMER(Stats.second.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ Checkers = Referencer->CreateReferenceCheckers(Ctx);
+ }
+ try
+ {
+ if (!Checkers.empty())
+ {
+ RwLock::ExclusiveLockScope __(ReferenceCheckersLock);
+ for (auto& Checker : Checkers)
+ {
+ ReferenceCheckers.insert_or_assign(std::unique_ptr<GcReferenceChecker>(Checker), Index);
+ Checker = nullptr;
+ }
+ }
+ }
+ catch (std::exception&)
+ {
+ while (!Checkers.empty())
+ {
+ delete Checkers.back();
+ Checkers.pop_back();
+ }
+ throw;
+ }
+ });
+ }
+ WorkLeft.CountDown();
+ WorkLeft.Wait();
+ }
+
+ ZEN_INFO("GCV2: Locking state for {} reference checkers", ReferenceCheckers.size());
+ {
+ SCOPED_TIMER(uint64_t ElapsedMS = Timer.GetElapsedTimeMs(); Result.WriteBlockMS = std::chrono::milliseconds(ElapsedMS);
+ ZEN_INFO("GCV2: Writes blocked for {}", NiceTimeSpanMs(ElapsedMS)));
+ if (!ReferenceCheckers.empty())
+ {
+ // Locking all references checkers so we have a steady state of which references are used
+ // From this point we have blocked all writes to all References (DiskBucket/ProjectStore) until
+ // we delete the ReferenceCheckers
+ Latch WorkLeft(1);
+
+ SCOPED_TIMER(Result.LockStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ for (auto& It : ReferenceCheckers)
{
- return {};
+ GcReferenceChecker* Checker = It.first.get();
+ size_t Index = It.second;
+ std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index];
+ WorkLeft.AddCount(1);
+ ThreadPool.ScheduleWork([&Ctx, Checker, Index, &Stats, &WorkLeft]() {
+ auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
+ SCOPED_TIMER(Stats.second.LockStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ Checker->LockState(Ctx);
+ });
}
+ WorkLeft.CountDown();
+ WorkLeft.Wait();
}
- return std::vector<IoHash>(UnusedCids.begin(), UnusedCids.end());
- };
- // checking all Cids agains references in cache
- // Ask stores to remove data that the ReferenceCheckers says are not referenced - this should be a lightweight operation
- // that only updates in-memory index, actual disk changes should be done by the ReferenceStoreCompactors
+ ZEN_INFO("GCV2: Removing unreferenced data for {} reference pruners", ReferencePruners.size());
+ {
+ const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::vector<IoHash> {
+ HashSet UnusedCids(References.begin(), References.end());
+ for (const auto& It : ReferenceCheckers)
+ {
+ GcReferenceChecker* ReferenceChecker = It.first.get();
+ ReferenceChecker->RemoveUsedReferencesFromSet(Ctx, UnusedCids);
+ if (UnusedCids.empty())
+ {
+ return {};
+ }
+ }
+ return std::vector<IoHash>(UnusedCids.begin(), UnusedCids.end());
+ };
- Latch WorkLeft(1);
- RwLock ReferenceStoreCompactorsLock;
+ // checking all Cids agains references in cache
+ // Ask stores to remove data that the ReferenceCheckers says are not referenced - this should be a lightweight
+ // operation that only updates in-memory index, actual disk changes should be done by the ReferenceStoreCompactors
- SCOPED_TIMER(Result.RemoveUnreferencedDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
- for (auto& It : ReferencePruners)
- {
- GcReferencePruner* Pruner = It.second.get();
- size_t Index = It.first;
- GcReferenceStoreStats& Stats = Result.ReferenceStoreStats[Index].second;
- WorkLeft.AddCount(1);
- ThreadPool.ScheduleWork([&Ctx,
- Pruner,
- &Stats,
- &WorkLeft,
- Index,
- &GetUnusedReferences,
- &ReferenceStoreCompactorsLock,
- &ReferenceStoreCompactors]() {
- auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
- // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are referenced or not.
- std::unique_ptr<GcReferenceStoreCompactor> ReferenceCompactor;
- {
- SCOPED_TIMER(Stats.RemoveUnreferencedDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
- ReferenceCompactor =
- std::unique_ptr<GcReferenceStoreCompactor>(Pruner->RemoveUnreferencedData(Ctx, Stats, GetUnusedReferences));
- }
- if (ReferenceCompactor)
+ Latch WorkLeft(1);
+
+ SCOPED_TIMER(Result.RemoveUnreferencedDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ for (auto& It : ReferencePruners)
{
- RwLock::ExclusiveLockScope __(ReferenceStoreCompactorsLock);
- ReferenceStoreCompactors.insert_or_assign(std::move(ReferenceCompactor), Index);
+ GcReferencePruner* Pruner = It.second.get();
+ size_t Index = It.first;
+ GcReferenceStoreStats& Stats = Result.ReferenceStoreStats[Index].second;
+ WorkLeft.AddCount(1);
+ ThreadPool.ScheduleWork(
+ [&Ctx, Pruner, &Stats, &WorkLeft, &GetUnusedReferences, &StoreCompactorsLock, &StoreCompactors]() {
+ auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
+ // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are referenced
+ // or not.
+ std::unique_ptr<GcStoreCompactor> StoreCompactor;
+ {
+ SCOPED_TIMER(Stats.RemoveUnreferencedDataStats.ElapsedMS =
+ std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ StoreCompactor = std::unique_ptr<GcStoreCompactor>(
+ Pruner->RemoveUnreferencedData(Ctx, Stats.RemoveUnreferencedDataStats, GetUnusedReferences));
+ }
+ if (StoreCompactor)
+ {
+ RwLock::ExclusiveLockScope __(StoreCompactorsLock);
+ StoreCompactors.insert_or_assign(std::move(StoreCompactor), &Stats.CompactStoreStats);
+ }
+ });
}
- });
+ WorkLeft.CountDown();
+ WorkLeft.Wait();
+ }
+ // Let the GcReferencers add new data, we will only change on-disk data at this point, adding new data is allowed
+ ReferenceCheckers.clear();
+ ReferencePruners.clear();
}
- WorkLeft.CountDown();
- WorkLeft.Wait();
}
- // Let the GcReferencers add new data, we will only change on-disk data at this point, adding new data is allowed
- ReferenceCheckers.clear();
}
- // Let go of the pruners
- ReferencePruners.clear();
-
- ZEN_INFO("GCV2: Compacting reference stores for {} reference store compactors", ReferenceStoreCompactors.size());
- if (!ReferenceStoreCompactors.empty())
+ ZEN_INFO("GCV2: Compacting reference stores for {} store compactors", StoreCompactors.size());
+ if (!StoreCompactors.empty())
{
- Latch WorkLeft(1);
-
+ auto ClaimDiskReserve = [&]() -> uint64_t {
+ if (!std::filesystem::is_regular_file(Settings.DiskReservePath))
+ {
+ return 0;
+ }
+ uint64_t ReclaimedSize = std::filesystem::file_size(Settings.DiskReservePath);
+ if (std::filesystem::remove(Settings.DiskReservePath))
+ {
+ return ReclaimedSize;
+ }
+ return 0;
+ };
// Remove the stuff we deemed unreferenced from disk - may be heavy operation
- SCOPED_TIMER(Result.CompactReferenceStoreMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
- for (auto& It : ReferenceStoreCompactors)
+ // Don't do in parallel, we don't want to steal CPU/Disk from regular operation
+ SCOPED_TIMER(Result.CompactStoresMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ for (auto& It : StoreCompactors)
{
- GcReferenceStoreCompactor* Compactor = It.first.get();
- size_t Index = It.second;
- GcReferenceStoreStats& Stats = Result.ReferenceStoreStats[Index].second;
- WorkLeft.AddCount(1);
- ThreadPool.ScheduleWork([&Ctx, Compactor, &Stats, &WorkLeft]() {
- auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
+ GcStoreCompactor* Compactor = It.first.get();
+ GcCompactStoreStats& Stats = *It.second;
+ {
// Go through all the ReferenceCheckers to see if the list of Cids the collector selected are referenced or not.
- SCOPED_TIMER(Stats.CompactReferenceStoreMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
- Compactor->CompactReferenceStore(Ctx, Stats);
- });
+ SCOPED_TIMER(Stats.ElapsedMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ Compactor->CompactStore(Ctx, Stats, ClaimDiskReserve);
+ }
}
- WorkLeft.CountDown();
- WorkLeft.Wait();
+ StoreCompactors.clear();
}
- ReferenceStoreCompactors.clear();
-
ZEN_INFO("GCV2: Completed in {}", NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs()));
}
- Result.Sum();
+ Sum(Result);
return Result;
}
@@ -1227,22 +1260,17 @@ GcScheduler::AppendGCLog(GcClock::TimePoint StartTime, const GcSettings& Setting
std::string Id = fmt::format("{}", gsl::narrow<int64_t>(StartTime.time_since_epoch().count()));
Writer.BeginObject(Id);
{
- Writer << "StartTimeSec"sv
- << gsl::narrow<int64_t>(std::chrono::duration_cast<std::chrono::seconds>(StartTime.time_since_epoch()).count());
+ Writer << "StartTime"sv << ToDateTime(StartTime);
Writer.BeginObject("Settings"sv);
{
- Writer << "CacheExpireTimeSec"sv
- << gsl::narrow<int64_t>(
- std::chrono::duration_cast<std::chrono::seconds>(Settings.CacheExpireTime.time_since_epoch()).count());
- Writer << "ProjectStoreExpireTimeSec"sv
- << gsl::narrow<int64_t>(
- std::chrono::duration_cast<std::chrono::seconds>(Settings.ProjectStoreExpireTime.time_since_epoch())
- .count());
+ Writer << "CacheExpireTime"sv << ToDateTime(Settings.CacheExpireTime);
+ Writer << "ProjectStoreExpireTime"sv << ToDateTime(Settings.ProjectStoreExpireTime);
Writer << "CollectSmallObjects"sv << Settings.CollectSmallObjects;
Writer << "IsDeleteMode"sv << Settings.IsDeleteMode;
Writer << "SkipCidDelete"sv << Settings.SkipCidDelete;
Writer << "Verbose"sv << Settings.Verbose;
Writer << "SingleThread"sv << Settings.SingleThread;
+ Writer << "CompactBlockUsageThresholdPercent"sv << Settings.CompactBlockUsageThresholdPercent;
}
Writer.EndObject();
@@ -1817,26 +1845,27 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
.ProjectStoreExpireTime = ProjectStoreExpireTime,
.CollectSmallObjects = CollectSmallObjects,
.IsDeleteMode = Delete,
- .SkipCidDelete = SkipCid};
+ .SkipCidDelete = SkipCid,
+ .DiskReservePath = m_Config.RootDirectory / "reserve.gc"};
GcClock::TimePoint GcStartTime = GcClock::Now();
GcResult Result = m_GcManager.CollectGarbage(Settings);
ZEN_INFO(
- "GCV2: Removed {} items out of {}, deleted {} out of {}. Pruned {} Cid entries out of {}, compacted {} Cid entries "
- "out of {}, "
- "freed "
- "{} on disk and {} of memory in {}. CacheExpireTime: {}, ProjectStoreExpireTime: {}, CollectSmallObjects: {}, "
+ "GCV2: Found {} expired items out of {}, deleted {}. "
+ "Found {} unreferenced Cid entries out of {}, deleted {}. "
+ "Freed {} on disk and {} of memory in {}. "
+ "CacheExpireTime: {}, ProjectStoreExpireTime: {}, CollectSmallObjects: {}, "
"IsDeleteMode: {}, SkipCidDelete: {}",
- Result.ReferencerStat.Expired,
- Result.ReferencerStat.Count,
- Result.ReferencerStat.Deleted,
- Result.ReferencerStat.Expired,
- Result.ReferenceStoreStat.Pruned,
- Result.ReferenceStoreStat.Count,
- Result.ReferenceStoreStat.Compacted,
- Result.ReferenceStoreStat.Pruned,
- NiceBytes(Result.RemovedDisk),
- NiceBytes(Result.RemovedMemory),
+ Result.ReferencerStatSum.RemoveExpiredDataStats.FoundCount,
+ Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount,
+ Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount,
+
+ Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.FoundCount,
+ Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount,
+ Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount,
+
+ NiceBytes(Result.CompactStoresStatSum.RemovedDisk),
+ NiceBytes(Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory),
NiceTimeSpanMs(Result.ElapsedMS.count()),
Settings.CacheExpireTime,
Settings.ProjectStoreExpireTime,
@@ -1854,8 +1883,8 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
{
m_LastFullGCV2Result = Result;
}
- Diff.DiskSize = Result.RemovedDisk;
- Diff.MemorySize = Result.RemovedMemory;
+ Diff.DiskSize = Result.CompactStoresStatSum.RemovedDisk;
+ Diff.MemorySize = Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory;
}
break;
}