aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore/gc.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2023-10-30 18:29:09 +0100
committerGitHub <[email protected]>2023-10-30 18:29:09 +0100
commitcbdda104ada38108700f9da5b192867d83074119 (patch)
tree98c04b344e041c156fdc1a5c393672bef743be34 /src/zenstore/gc.cpp
parentfix changelog (diff)
downloadzen-cbdda104ada38108700f9da5b192867d83074119.tar.xz
zen-cbdda104ada38108700f9da5b192867d83074119.zip
individual gc stats (#506)
- Feature: New parameter for endpoint `admin/gc` (GET) `details=true` which gives details stats on GC operation when using GC V2 - Feature: New options for zen command `gc-status` - `--details` that enables the detailed output from the last GC operation when using GC V2
Diffstat (limited to 'src/zenstore/gc.cpp')
-rw-r--r--src/zenstore/gc.cpp631
1 files changed, 366 insertions, 265 deletions
diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp
index e09f46063..4d146c16c 100644
--- a/src/zenstore/gc.cpp
+++ b/src/zenstore/gc.cpp
@@ -330,6 +330,47 @@ GcManager::~GcManager()
//////// Begin New GC WIP
void
+GcResult::Sum()
+{
+ for (std::pair<std::string, GcReferencerStats>& Referencer : ReferencerStats)
+ {
+ GcReferencerStats& SubStat = Referencer.second;
+ ReferencerStat.Count += SubStat.Count;
+ ReferencerStat.Expired += SubStat.Expired;
+ ReferencerStat.Deleted += SubStat.Deleted;
+ ReferencerStat.RemovedDisk += SubStat.RemovedDisk;
+ ReferencerStat.RemovedMemory += SubStat.RemovedMemory;
+ SubStat.ElapsedMS = SubStat.RemoveExpiredDataMS + SubStat.CreateReferenceCheckersMS + SubStat.LockStateMS;
+
+ ReferencerStat.RemoveExpiredDataMS += SubStat.RemoveExpiredDataMS;
+ ReferencerStat.CreateReferenceCheckersMS += SubStat.CreateReferenceCheckersMS;
+ ReferencerStat.LockStateMS += SubStat.LockStateMS;
+ ReferencerStat.ElapsedMS += SubStat.ElapsedMS;
+
+ RemovedDisk += SubStat.RemovedDisk;
+ RemovedMemory += SubStat.RemovedMemory;
+ }
+ for (std::pair<std::string, GcReferenceStoreStats>& ReferenceStore : ReferenceStoreStats)
+ {
+ GcReferenceStoreStats& SubStat = ReferenceStore.second;
+ ReferenceStoreStat.Count += SubStat.Count;
+ ReferenceStoreStat.Pruned += SubStat.Pruned;
+ ReferenceStoreStat.Compacted += SubStat.Compacted;
+ ReferenceStoreStat.RemovedDisk += SubStat.RemovedDisk;
+ ReferenceStoreStat.RemovedMemory += SubStat.RemovedMemory;
+ SubStat.ElapsedMS = SubStat.CreateReferencePrunerMS + SubStat.RemoveUnreferencedDataMS + SubStat.CompactReferenceStoreMS;
+
+ ReferenceStoreStat.CreateReferencePrunerMS += SubStat.CreateReferencePrunerMS;
+ ReferenceStoreStat.RemoveUnreferencedDataMS += SubStat.RemoveUnreferencedDataMS;
+ ReferenceStoreStat.CompactReferenceStoreMS += SubStat.CompactReferenceStoreMS;
+ ReferenceStoreStat.ElapsedMS += SubStat.ElapsedMS;
+
+ RemovedDisk += SubStat.RemovedDisk;
+ RemovedMemory += SubStat.RemovedMemory;
+ }
+}
+
+void
GcManager::AddGcReferencer(GcReferencer& Referencer)
{
RwLock::ExclusiveLockScope _(m_Lock);
@@ -358,245 +399,270 @@ GcManager::RemoveGcReferenceStore(GcReferenceStore& ReferenceStore)
GcResult
GcManager::CollectGarbage(const GcSettings& Settings)
{
- GcCtx Ctx{.Settings = Settings};
-
- Stopwatch TotalTimer;
- auto __ = MakeGuard([&]() {
- ZEN_INFO(
- "GC: Removed {} items out of {}, deleted {} out of {}. Pruned {} Cid entries out of {}, compacted {} Cid entries out of {}, "
- "freed "
- "{} on disk and {} of memory in {}",
- Ctx.ExpiredItems.load(),
- Ctx.Items.load(),
- Ctx.DeletedItems.load(),
- Ctx.ExpiredItems.load(),
- Ctx.PrunedReferences.load(),
- Ctx.References.load(),
- Ctx.CompactedReferences.load(),
- Ctx.PrunedReferences.load(),
- NiceBytes(Ctx.RemovedDiskSpace.load()),
- NiceBytes(Ctx.RemovedMemory.load()),
- NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs()));
- });
-
- RwLock::SharedLockScope GcLock(m_Lock);
-
- static const bool SingleThread =
-#if ZEN_BUILD_DEBUG
- true
-#else
- false
-#endif
- ;
- WorkerThreadPool ThreadPool(SingleThread ? 0 : 8);
-
- if (!m_GcReferencers.empty())
- {
- Latch WorkLeft(1);
- // First remove any cache keys that may own references
- Stopwatch Timer;
- auto _ = MakeGuard([&]() { ZEN_INFO("GC: Removed expired data in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())) });
- for (GcReferencer* Owner : m_GcReferencers)
- {
- WorkLeft.AddCount(1);
- ThreadPool.ScheduleWork([&Ctx, Owner, &WorkLeft]() {
- auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
- Owner->RemoveExpiredData(Ctx);
- });
- }
- WorkLeft.CountDown();
- WorkLeft.Wait();
- }
+ GcCtx Ctx{.Settings = Settings};
+ GcResult Result;
- if (Ctx.Settings.SkipCidDelete)
{
- return GcResult{.Items = Ctx.Items.load(),
- .ExpiredItems = Ctx.ExpiredItems.load(),
- .DeletedItems = Ctx.DeletedItems.load(),
- .References = Ctx.References.load(),
- .PrunedReferences = Ctx.PrunedReferences.load(),
- .CompactedReferences = Ctx.CompactedReferences.load(),
- .RemovedDiskSpace = Ctx.RemovedDiskSpace.load(),
- .RemovedMemory = Ctx.RemovedMemory.load()};
- }
+ Stopwatch TotalTimer;
+ auto __ = MakeGuard([&]() { Result.ElapsedMS = std::chrono::milliseconds(TotalTimer.GetElapsedTimeMs()); });
- std::vector<std::unique_ptr<GcReferencePruner>> ReferencePruners;
- if (!m_GcReferenceStores.empty())
- {
- ReferencePruners.reserve(m_GcReferenceStores.size());
- Latch WorkLeft(1);
- RwLock ReferencePrunersLock;
- // Easy to go wide, CreateReferencePruner is usually not very heavy but big data sets change that
- Stopwatch Timer;
- auto _ = MakeGuard([&]() { ZEN_INFO("GC: Created Cid pruners in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())) });
- for (GcReferenceStore* CidStore : m_GcReferenceStores)
- {
- WorkLeft.AddCount(1);
- ThreadPool.ScheduleWork([&Ctx, CidStore, &WorkLeft, &ReferencePrunersLock, &ReferencePruners]() {
- auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
- // The CidStore will pick a list of CId entries to check, returning a collector
- std::unique_ptr<GcReferencePruner> ReferencePruner(CidStore->CreateReferencePruner(Ctx));
- if (ReferencePruner)
- {
- RwLock::ExclusiveLockScope __(ReferencePrunersLock);
- ReferencePruners.emplace_back(std::move(ReferencePruner));
- }
- });
- }
- WorkLeft.CountDown();
- WorkLeft.Wait();
- }
+ RwLock::SharedLockScope GcLock(m_Lock);
- std::vector<std::unique_ptr<GcReferenceChecker>> ReferenceCheckers;
- if (!m_GcReferencers.empty())
- {
- ReferenceCheckers.reserve(m_GcReferencers.size());
- Latch WorkLeft(1);
- RwLock ReferenceCheckersLock;
- Stopwatch Timer;
- auto _ = MakeGuard([&]() { ZEN_INFO("GC: Created Cid checkers in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())) });
- // Easy to go wide, CreateReferenceCheckers is potentially heavy
- // Lock all reference owners from changing the reference data and get access to check for referenced data
- for (GcReferencer* Referencer : m_GcReferencers)
- {
- WorkLeft.AddCount(1);
- ThreadPool.ScheduleWork([&Ctx, &WorkLeft, Referencer, &ReferenceCheckersLock, &ReferenceCheckers]() {
- auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
- // The Referencer will create a reference checker that guarrantees that the references do not change as long as it lives
- std::vector<GcReferenceChecker*> Checkers = Referencer->CreateReferenceCheckers(Ctx);
- try
- {
- if (!Checkers.empty())
- {
- RwLock::ExclusiveLockScope __(ReferenceCheckersLock);
- for (auto& Checker : Checkers)
- {
- ReferenceCheckers.emplace_back(std::unique_ptr<GcReferenceChecker>(Checker));
- Checker = nullptr;
- }
- }
- }
- catch (std::exception&)
- {
- while (!Checkers.empty())
- {
- delete Checkers.back();
- Checkers.pop_back();
- }
- throw;
- }
- });
- }
- WorkLeft.CountDown();
- WorkLeft.Wait();
- }
-
- Stopwatch LockStateTimer;
- if (!ReferenceCheckers.empty())
- {
- // Easy to go wide, locking all references checkers so we hafve a stead state of which references are used
- // From this point we have block all writes to all References (DiskBucket/ProjectStore) until we do delete the ReferenceCheckers
- Latch WorkLeft(1);
-
- Stopwatch Timer;
- auto _ = MakeGuard([&]() { ZEN_INFO("GC: Locked Cid checkers in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())) });
- for (std::unique_ptr<GcReferenceChecker>& ReferenceChecker : ReferenceCheckers)
- {
- GcReferenceChecker* Checker = ReferenceChecker.get();
- WorkLeft.AddCount(1);
- ThreadPool.ScheduleWork([&Ctx, Checker, &WorkLeft, &ReferenceCheckers]() {
- auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
- Checker->LockState(Ctx);
- });
- }
- WorkLeft.CountDown();
- WorkLeft.Wait();
- }
-
- std::vector<std::unique_ptr<GcReferenceStoreCompactor>> ReferenceStoreCompactors;
- ReferenceStoreCompactors.reserve(ReferencePruners.size());
- if (!ReferencePruners.empty())
- {
- const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::vector<IoHash> {
- HashSet UnusedCids(References.begin(), References.end());
- for (const std::unique_ptr<GcReferenceChecker>& ReferenceChecker : ReferenceCheckers)
- {
- ReferenceChecker->RemoveUsedReferencesFromSet(Ctx, UnusedCids);
- if (UnusedCids.empty())
- {
- return {};
- }
- }
- return std::vector<IoHash>(UnusedCids.begin(), UnusedCids.end());
- };
-
- // Easy to go wide, checking all Cids agains references in cache
- // Ask stores to remove data that the ReferenceCheckers says are not references - this should be a lightweight operation that
- // only updates in-memory index, actual disk changes should be done by the ReferenceStoreCompactors
-
- Latch WorkLeft(1);
- RwLock ReferenceStoreCompactorsLock;
-
- Stopwatch Timer;
- auto _ = MakeGuard([&]() { ZEN_INFO("GC: Pruned unreferenced Cid data in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())) });
- for (std::unique_ptr<GcReferencePruner>& ReferencePruner : ReferencePruners)
- {
- GcReferencePruner* Pruner = ReferencePruner.get();
- WorkLeft.AddCount(1);
- ThreadPool.ScheduleWork(
- [&Ctx, Pruner, &WorkLeft, &GetUnusedReferences, &ReferenceStoreCompactorsLock, &ReferenceStoreCompactors]() {
- auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
- // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are referenced or not.
- std::unique_ptr<GcReferenceStoreCompactor> ReferenceCompactor(Pruner->RemoveUnreferencedData(Ctx, GetUnusedReferences));
- if (ReferenceCompactor)
- {
- RwLock::ExclusiveLockScope __(ReferenceStoreCompactorsLock);
- ReferenceStoreCompactors.emplace_back(std::move(ReferenceCompactor));
- }
- });
- }
- WorkLeft.CountDown();
- WorkLeft.Wait();
- }
- // Let the GcReferencers add new data, we will only change on-disk data at this point, adding new data is allowed
- ReferenceCheckers.clear();
- ZEN_INFO("GC: Writes blocked for {}", NiceTimeSpanMs(LockStateTimer.GetElapsedTimeMs()))
-
- // Let go of the pruners
- ReferencePruners.clear();
-
- if (!ReferenceStoreCompactors.empty())
- {
- Latch WorkLeft(1);
-
- // Easy to go wide
- // Remove the stuff we deemed unreferenced from disk - may be heavy operation
- Stopwatch Timer;
- auto _ = MakeGuard([&]() { ZEN_INFO("GC: Compacted Cid stores in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())) });
- for (std::unique_ptr<GcReferenceStoreCompactor>& StoreCompactor : ReferenceStoreCompactors)
- {
- GcReferenceStoreCompactor* Compactor = StoreCompactor.get();
- WorkLeft.AddCount(1);
- ThreadPool.ScheduleWork([&Ctx, Compactor, &WorkLeft]() {
- auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
- // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are referenced or not.
- Compactor->CompactReferenceStore(Ctx);
- });
- }
- WorkLeft.CountDown();
- WorkLeft.Wait();
- }
-
- ReferenceStoreCompactors.clear();
-
- return GcResult{.Items = Ctx.Items.load(),
- .ExpiredItems = Ctx.ExpiredItems.load(),
- .DeletedItems = Ctx.DeletedItems.load(),
- .References = Ctx.References.load(),
- .PrunedReferences = Ctx.PrunedReferences.load(),
- .CompactedReferences = Ctx.CompactedReferences.load(),
- .RemovedDiskSpace = Ctx.RemovedDiskSpace.load(),
- .RemovedMemory = Ctx.RemovedMemory.load()};
+ static const bool SingleThread =
+#if ZEN_BUILD_DEBUG
+ true
+#else
+ false
+#endif
+ ;
+
+#define SCOPED_TIMER(closure) \
+ Stopwatch $Timer##__LINE__; \
+ auto $Guard##__LINE = MakeGuard([&, &Timer = $Timer##__LINE__]() { closure })
+
+ Result.ReferencerStats.resize(m_GcReferencers.size());
+ Result.ReferenceStoreStats.resize(m_GcReferenceStores.size());
+
+ WorkerThreadPool ThreadPool(SingleThread ? 0 : 8);
+
+ ZEN_INFO("GCV2: Removing expired data from {} referencers", m_GcReferencers.size());
+ if (!m_GcReferencers.empty())
+ {
+ Latch WorkLeft(1);
+ // First remove any cache keys that may own references
+ SCOPED_TIMER(Result.RemoveExpiredDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ for (size_t Index = 0; Index < m_GcReferencers.size(); Index++)
+ {
+ GcReferencer* Owner = m_GcReferencers[Index];
+ std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index];
+ WorkLeft.AddCount(1);
+ ThreadPool.ScheduleWork([&Ctx, Owner, &Stats, &WorkLeft]() {
+ auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
+ Stats.first = Owner->GetGcName(Ctx);
+ SCOPED_TIMER(Stats.second.RemoveExpiredDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ Owner->RemoveExpiredData(Ctx, Stats.second);
+ });
+ }
+ WorkLeft.CountDown();
+ WorkLeft.Wait();
+ }
+
+ if (Ctx.Settings.SkipCidDelete)
+ {
+ Result.Sum();
+ return Result;
+ }
+
+ ZEN_INFO("GCV2: Creating reference pruners from {} reference stores", m_GcReferenceStores.size());
+ std::unordered_map<size_t, std::unique_ptr<GcReferencePruner>> ReferencePruners;
+ if (!m_GcReferenceStores.empty())
+ {
+ ReferencePruners.reserve(m_GcReferenceStores.size());
+ Latch WorkLeft(1);
+ RwLock ReferencePrunersLock;
+ // CreateReferencePruner is usually not very heavy but big data sets change that
+ SCOPED_TIMER(Result.CreateReferencePrunerMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ for (size_t Index = 0; Index < m_GcReferenceStores.size(); Index++)
+ {
+ GcReferenceStore* ReferenceStore = m_GcReferenceStores[Index];
+ std::pair<std::string, GcReferenceStoreStats>& Stats = Result.ReferenceStoreStats[Index];
+ WorkLeft.AddCount(1);
+ ThreadPool.ScheduleWork([&Ctx, ReferenceStore, &Stats, Index, &WorkLeft, &ReferencePrunersLock, &ReferencePruners]() {
+ auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
+ Stats.first = ReferenceStore->GetGcName(Ctx);
+ std::unique_ptr<GcReferencePruner> ReferencePruner;
+ {
+ SCOPED_TIMER(Stats.second.CreateReferencePrunerMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ // The ReferenceStore will pick a list of CId entries to check, returning a collector
+ ReferencePruner = std::unique_ptr<GcReferencePruner>(ReferenceStore->CreateReferencePruner(Ctx, Stats.second));
+ }
+ if (ReferencePruner)
+ {
+ RwLock::ExclusiveLockScope __(ReferencePrunersLock);
+ ReferencePruners.insert_or_assign(Index, std::move(ReferencePruner));
+ }
+ });
+ }
+ WorkLeft.CountDown();
+ WorkLeft.Wait();
+ }
+
+ ZEN_INFO("GCV2: Creating reference checkers from {} referencers", m_GcReferencers.size());
+ std::unordered_map<std::unique_ptr<GcReferenceChecker>, size_t> ReferenceCheckers;
+ if (!m_GcReferencers.empty())
+ {
+ ReferenceCheckers.reserve(m_GcReferencers.size());
+ Latch WorkLeft(1);
+ RwLock ReferenceCheckersLock;
+ SCOPED_TIMER(Result.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ // Lock all reference owners from changing the reference data and get access to check for referenced data
+ for (size_t Index = 0; Index < m_GcReferencers.size(); Index++)
+ {
+ GcReferencer* Referencer = m_GcReferencers[Index];
+ std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index];
+ WorkLeft.AddCount(1);
+ ThreadPool.ScheduleWork([&Ctx, &WorkLeft, Referencer, Index, &Stats, &ReferenceCheckersLock, &ReferenceCheckers]() {
+ auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
+ // The Referencer will create a reference checker that guarrantees that the references do not change as long as it lives
+ std::vector<GcReferenceChecker*> Checkers;
+ {
+ SCOPED_TIMER(Stats.second.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ Checkers = Referencer->CreateReferenceCheckers(Ctx);
+ }
+ try
+ {
+ if (!Checkers.empty())
+ {
+ RwLock::ExclusiveLockScope __(ReferenceCheckersLock);
+ for (auto& Checker : Checkers)
+ {
+ ReferenceCheckers.insert_or_assign(std::unique_ptr<GcReferenceChecker>(Checker), Index);
+ Checker = nullptr;
+ }
+ }
+ }
+ catch (std::exception&)
+ {
+ while (!Checkers.empty())
+ {
+ delete Checkers.back();
+ Checkers.pop_back();
+ }
+ throw;
+ }
+ });
+ }
+ WorkLeft.CountDown();
+ WorkLeft.Wait();
+ }
+
+ std::unordered_map<std::unique_ptr<GcReferenceStoreCompactor>, size_t> ReferenceStoreCompactors;
+ ReferenceStoreCompactors.reserve(ReferencePruners.size());
+
+ ZEN_INFO("GCV2: Locking state for {} reference checkers", ReferenceCheckers.size());
+ {
+ SCOPED_TIMER(uint64_t ElapsedMS = Timer.GetElapsedTimeMs(); Result.WriteBlockMS = std::chrono::milliseconds(ElapsedMS);
+ ZEN_INFO("GCV2: Writes blocked for {}", NiceTimeSpanMs(ElapsedMS)));
+ if (!ReferenceCheckers.empty())
+ {
+ // Locking all references checkers so we have a steady state of which references are used
+ // From this point we have blocked all writes to all References (DiskBucket/ProjectStore) until
+ // we delete the ReferenceCheckers
+ Latch WorkLeft(1);
+
+ SCOPED_TIMER(Result.LockStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ for (auto& It : ReferenceCheckers)
+ {
+ GcReferenceChecker* Checker = It.first.get();
+ size_t Index = It.second;
+ std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index];
+ WorkLeft.AddCount(1);
+ ThreadPool.ScheduleWork([&Ctx, Checker, Index, &Stats, &WorkLeft]() {
+ auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
+ SCOPED_TIMER(Stats.second.LockStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ Checker->LockState(Ctx);
+ });
+ }
+ WorkLeft.CountDown();
+ WorkLeft.Wait();
+ }
+
+ ZEN_INFO("GCV2: Removing unreferenced data for {} reference pruners", ReferencePruners.size());
+ if (!ReferencePruners.empty())
+ {
+ const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::vector<IoHash> {
+ HashSet UnusedCids(References.begin(), References.end());
+ for (const auto& It : ReferenceCheckers)
+ {
+ GcReferenceChecker* ReferenceChecker = It.first.get();
+ ReferenceChecker->RemoveUsedReferencesFromSet(Ctx, UnusedCids);
+ if (UnusedCids.empty())
+ {
+ return {};
+ }
+ }
+ return std::vector<IoHash>(UnusedCids.begin(), UnusedCids.end());
+ };
+
+ // checking all Cids agains references in cache
+ // Ask stores to remove data that the ReferenceCheckers says are not referenced - this should be a lightweight operation
+ // that only updates in-memory index, actual disk changes should be done by the ReferenceStoreCompactors
+
+ Latch WorkLeft(1);
+ RwLock ReferenceStoreCompactorsLock;
+
+ SCOPED_TIMER(Result.RemoveUnreferencedDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ for (auto& It : ReferencePruners)
+ {
+ GcReferencePruner* Pruner = It.second.get();
+ size_t Index = It.first;
+ GcReferenceStoreStats& Stats = Result.ReferenceStoreStats[Index].second;
+ WorkLeft.AddCount(1);
+ ThreadPool.ScheduleWork([&Ctx,
+ Pruner,
+ &Stats,
+ &WorkLeft,
+ Index,
+ &GetUnusedReferences,
+ &ReferenceStoreCompactorsLock,
+ &ReferenceStoreCompactors]() {
+ auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
+ // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are referenced or not.
+ std::unique_ptr<GcReferenceStoreCompactor> ReferenceCompactor;
+ {
+ SCOPED_TIMER(Stats.RemoveUnreferencedDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ ReferenceCompactor =
+ std::unique_ptr<GcReferenceStoreCompactor>(Pruner->RemoveUnreferencedData(Ctx, Stats, GetUnusedReferences));
+ }
+ if (ReferenceCompactor)
+ {
+ RwLock::ExclusiveLockScope __(ReferenceStoreCompactorsLock);
+ ReferenceStoreCompactors.insert_or_assign(std::move(ReferenceCompactor), Index);
+ }
+ });
+ }
+ WorkLeft.CountDown();
+ WorkLeft.Wait();
+ }
+ // Let the GcReferencers add new data, we will only change on-disk data at this point, adding new data is allowed
+ ReferenceCheckers.clear();
+ }
+
+ // Let go of the pruners
+ ReferencePruners.clear();
+
+ ZEN_INFO("GCV2: Compacting reference stores for {} reference store compactors", ReferenceStoreCompactors.size());
+ if (!ReferenceStoreCompactors.empty())
+ {
+ Latch WorkLeft(1);
+
+ // Remove the stuff we deemed unreferenced from disk - may be heavy operation
+ SCOPED_TIMER(Result.CompactReferenceStoreMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ for (auto& It : ReferenceStoreCompactors)
+ {
+ GcReferenceStoreCompactor* Compactor = It.first.get();
+ size_t Index = It.second;
+ GcReferenceStoreStats& Stats = Result.ReferenceStoreStats[Index].second;
+ WorkLeft.AddCount(1);
+ ThreadPool.ScheduleWork([&Ctx, Compactor, &Stats, &WorkLeft]() {
+ auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
+ // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are referenced or not.
+ SCOPED_TIMER(Stats.CompactReferenceStoreMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ Compactor->CompactReferenceStore(Ctx, Stats);
+ });
+ }
+ WorkLeft.CountDown();
+ WorkLeft.Wait();
+ }
+
+ ReferenceStoreCompactors.clear();
+
+ ZEN_INFO("GCV2: Completed in {}", NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs()));
+ }
+
+ Result.Sum();
+ return Result;
+#undef SCOPED_TIMER
}
//////// End New GC WIP
@@ -998,15 +1064,6 @@ GcScheduler::GetState() const
GcSchedulerState Result{.Status = Status(), .Config = m_Config, .AreDiskWritesBlocked = m_AreDiskWritesBlocked.load()};
- {
- std::unique_lock Lock(m_GcMutex);
- Result.LastFullGcTime = m_LastGcTime;
- Result.LastFullGCDiff = m_LastFullGCDiff;
- Result.LastFullGcDuration = m_LastFullGcDuration;
- Result.LastLightweightGcTime = m_LastLightweightGcTime;
- Result.LastLightweightGCDiff = m_LastLightweightGCDiff;
- Result.LastLightweightGcDuration = m_LastLightweightGcDuration;
- }
std::error_code Ec;
DiskSpace Space = DiskSpaceInfo(Result.Config.RootDirectory, Ec);
if (!Ec)
@@ -1026,30 +1083,40 @@ GcScheduler::GetState() const
Result.HasDiskReserve = std::filesystem::is_regular_file(Result.Config.RootDirectory / "reserve.gc", Ec) && !Ec;
}
- GcClock::TimePoint CacheExpireTime =
- Result.Config.MaxCacheDuration == GcClock::Duration::max() ? GcClock::TimePoint::min() : Now - Result.Config.MaxCacheDuration;
- GcClock::TimePoint ProjectStoreExpireTime = Result.Config.MaxProjectStoreDuration == GcClock::Duration::max()
- ? GcClock::TimePoint::min()
- : Now - Result.Config.MaxProjectStoreDuration;
+ if (Result.Status != GcSchedulerStatus::kRunning)
+ {
+ {
+ std::unique_lock Lock(m_GcMutex);
+ Result.LastFullGcTime = m_LastGcTime;
+ Result.LastFullGCDiff = m_LastFullGCDiff;
+ Result.LastFullGcDuration = m_LastFullGcDuration;
+ Result.LastLightweightGcTime = m_LastLightweightGcTime;
+ Result.LastLightweightGCDiff = m_LastLightweightGCDiff;
+ Result.LastLightweightGcDuration = m_LastLightweightGcDuration;
+
+ Result.LastLightweightGCV2Result = m_LastLightweightGCV2Result;
+ Result.LastFullGCV2Result = m_LastFullGCV2Result;
+ }
- Result.RemainingTimeUntilFullGc =
- Result.Config.Interval.count() == 0
- ? std::chrono::seconds::max()
- : std::chrono::duration_cast<std::chrono::seconds>(Result.LastFullGcTime + Result.Config.Interval - Now);
+ Result.RemainingTimeUntilFullGc =
+ Result.Config.Interval.count() == 0
+ ? std::chrono::seconds::max()
+ : std::chrono::duration_cast<std::chrono::seconds>(Result.LastFullGcTime + Result.Config.Interval - Now);
- if (Result.RemainingTimeUntilFullGc < std::chrono::seconds::zero())
- {
- Result.RemainingTimeUntilFullGc = std::chrono::seconds::zero();
- }
+ if (Result.RemainingTimeUntilFullGc < std::chrono::seconds::zero())
+ {
+ Result.RemainingTimeUntilFullGc = std::chrono::seconds::zero();
+ }
- Result.RemainingTimeUntilLightweightGc =
- Result.Config.LightweightInterval.count() == 0
- ? std::chrono::seconds::max()
- : std::chrono::duration_cast<std::chrono::seconds>(Result.LastLightweightGcTime + Result.Config.LightweightInterval - Now);
+ Result.RemainingTimeUntilLightweightGc =
+ Result.Config.LightweightInterval.count() == 0
+ ? std::chrono::seconds::max()
+ : std::chrono::duration_cast<std::chrono::seconds>(Result.LastLightweightGcTime + Result.Config.LightweightInterval - Now);
- if (Result.RemainingTimeUntilLightweightGc < std::chrono::seconds::zero())
- {
- Result.RemainingTimeUntilLightweightGc = std::chrono::seconds::zero();
+ if (Result.RemainingTimeUntilLightweightGc < std::chrono::seconds::zero())
+ {
+ Result.RemainingTimeUntilLightweightGc = std::chrono::seconds::zero();
+ }
}
return Result;
@@ -1474,6 +1541,14 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
{
case GcVersion::kV1:
Diff = m_GcManager.CollectGarbage(GcCtx);
+ if (SkipCid)
+ {
+ m_LastLightweightGCV2Result.reset();
+ }
+ else
+ {
+ m_LastFullGCV2Result.reset();
+ }
break;
case GcVersion::kV2:
{
@@ -1482,7 +1557,33 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
.CollectSmallObjects = CollectSmallObjects,
.IsDeleteMode = Delete,
.SkipCidDelete = SkipCid});
- Diff.DiskSize = Result.RemovedDiskSpace;
+
+ ZEN_INFO(
+ "GCV2: Removed {} items out of {}, deleted {} out of {}. Pruned {} Cid entries out of {}, compacted {} Cid entries "
+ "out of {}, "
+ "freed "
+ "{} on disk and {} of memory in {}",
+ Result.ReferencerStat.Expired,
+ Result.ReferencerStat.Count,
+ Result.ReferencerStat.Deleted,
+ Result.ReferencerStat.Expired,
+ Result.ReferenceStoreStat.Pruned,
+ Result.ReferenceStoreStat.Count,
+ Result.ReferenceStoreStat.Compacted,
+ Result.ReferenceStoreStat.Pruned,
+ NiceBytes(Result.RemovedDisk),
+ NiceBytes(Result.RemovedMemory),
+ NiceTimeSpanMs(Result.ElapsedMS.count()));
+
+ if (SkipCid)
+ {
+ m_LastLightweightGCV2Result = Result;
+ }
+ else
+ {
+ m_LastFullGCV2Result = Result;
+ }
+ Diff.DiskSize = Result.RemovedDisk;
Diff.MemorySize = Result.RemovedMemory;
}
break;