aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2024-11-15 10:06:39 +0100
committerGitHub Enterprise <[email protected]>2024-11-15 10:06:39 +0100
commitaca6f56fde841454b13ed18136008b0ffe946aed (patch)
tree3770efa6c789b45de8ea3ec426da7a77e7813775 /src/zenstore
parentfixed some issues with ZenServerInstance::SpawnServer (#218) (diff)
downloadzen-aca6f56fde841454b13ed18136008b0ffe946aed.tar.xz
zen-aca6f56fde841454b13ed18136008b0ffe946aed.zip
oplog prep gc fix (#216)
- Added option gc-validation to zenserver that does a check for missing references in all oplog post full GC. Enabled by default. - Feature: Added option gc-validation to zen gc command to control reference validation. Enabled by default. - Added more details in post GC log. - Fixed race condition in oplog writes which could cause used attachments to be incorrectly removed by GC
Diffstat (limited to 'src/zenstore')
-rw-r--r--src/zenstore/cache/cachedisklayer.cpp23
-rw-r--r--src/zenstore/cache/structuredcachestore.cpp53
-rw-r--r--src/zenstore/gc.cpp600
-rw-r--r--src/zenstore/include/zenstore/cache/cachedisklayer.h10
-rw-r--r--src/zenstore/include/zenstore/cache/structuredcachestore.h10
-rw-r--r--src/zenstore/include/zenstore/gc.h68
6 files changed, 525 insertions, 239 deletions
diff --git a/src/zenstore/cache/cachedisklayer.cpp b/src/zenstore/cache/cachedisklayer.cpp
index 4aafb9828..93b639a51 100644
--- a/src/zenstore/cache/cachedisklayer.cpp
+++ b/src/zenstore/cache/cachedisklayer.cpp
@@ -3454,6 +3454,12 @@ ZenCacheDiskLayer::CacheBucket::CreateReferenceCheckers(GcCtx& Ctx)
return {new DiskBucketReferenceChecker(*this)};
}
+std::vector<GcReferenceValidator*>
+ZenCacheDiskLayer::CacheBucket::CreateReferenceValidators(GcCtx& /*Ctx*/)
+{
+ return {};
+}
+
void
ZenCacheDiskLayer::CacheBucket::CompactState(RwLock::ExclusiveLockScope&,
std::vector<BucketPayload>& Payloads,
@@ -3594,12 +3600,10 @@ ZenCacheDiskLayer::GetOrCreateBucket(std::string_view InBucket)
CacheBucket* Result = Bucket.get();
m_Buckets.emplace(BucketName, std::move(Bucket));
- m_UpdateCaptureLock.WithExclusiveLock([&]() {
- if (m_CapturedBuckets)
- {
- m_CapturedBuckets->push_back(std::string(BucketName));
- }
- });
+ if (m_CapturedBuckets)
+ {
+ m_CapturedBuckets->push_back(std::string(BucketName));
+ }
return Result;
}
@@ -4176,7 +4180,7 @@ ZenCacheDiskLayer::GetGcReferencerLocks()
void
ZenCacheDiskLayer::EnableUpdateCapture()
{
- m_UpdateCaptureLock.WithExclusiveLock([&]() {
+ m_Lock.WithExclusiveLock([&]() {
if (m_UpdateCaptureRefCounter == 0)
{
ZEN_ASSERT(!m_CapturedBuckets);
@@ -4193,7 +4197,7 @@ ZenCacheDiskLayer::EnableUpdateCapture()
void
ZenCacheDiskLayer::DisableUpdateCapture()
{
- m_UpdateCaptureLock.WithExclusiveLock([&]() {
+ m_Lock.WithExclusiveLock([&]() {
ZEN_ASSERT(m_CapturedBuckets);
ZEN_ASSERT(m_UpdateCaptureRefCounter > 0);
m_UpdateCaptureRefCounter--;
@@ -4205,9 +4209,8 @@ ZenCacheDiskLayer::DisableUpdateCapture()
}
std::vector<std::string>
-ZenCacheDiskLayer::GetCapturedBuckets()
+ZenCacheDiskLayer::GetCapturedBucketsLocked()
{
- RwLock::SharedLockScope _(m_UpdateCaptureLock);
if (m_CapturedBuckets)
{
return *m_CapturedBuckets;
diff --git a/src/zenstore/cache/structuredcachestore.cpp b/src/zenstore/cache/structuredcachestore.cpp
index 578929198..512f1d7f2 100644
--- a/src/zenstore/cache/structuredcachestore.cpp
+++ b/src/zenstore/cache/structuredcachestore.cpp
@@ -364,6 +364,7 @@ ZenCacheNamespace::EnableUpdateCapture()
{
m_DiskLayer.EnableUpdateCapture();
}
+
void
ZenCacheNamespace::DisableUpdateCapture()
{
@@ -850,12 +851,10 @@ ZenCacheStore::GetNamespace(std::string_view Namespace)
m_BasePath / fmt::format("{}{}", NamespaceDiskPrefix, Namespace),
m_Configuration.NamespaceConfig));
- m_UpdateCaptureLock.WithExclusiveLock([&]() {
- if (m_CapturedNamespaces)
- {
- m_CapturedNamespaces->push_back(std::string(Namespace));
- }
- });
+ if (m_CapturedNamespaces)
+ {
+ m_CapturedNamespaces->push_back(std::string(Namespace));
+ }
return NewNamespace.first->second.get();
}
@@ -1039,7 +1038,8 @@ ZenCacheStore::LockState(GcCtx& Ctx)
void
ZenCacheStore::EnableUpdateCapture()
{
- m_UpdateCaptureLock.WithExclusiveLock([&]() {
+ std::vector<ZenCacheNamespace*> Namespaces;
+ m_NamespacesLock.WithExclusiveLock([&]() {
if (m_UpdateCaptureRefCounter == 0)
{
ZEN_ASSERT(!m_CapturedNamespaces);
@@ -1050,21 +1050,24 @@ ZenCacheStore::EnableUpdateCapture()
ZEN_ASSERT(m_CapturedNamespaces);
}
m_UpdateCaptureRefCounter++;
+ Namespaces.reserve(m_Namespaces.size());
+ for (auto& NamespaceIt : m_Namespaces)
+ {
+ Namespaces.push_back(NamespaceIt.second.get());
+ }
});
- for (auto& NamespaceIt : m_Namespaces)
+
+ for (ZenCacheNamespace* Namespace : Namespaces)
{
- NamespaceIt.second->EnableUpdateCapture();
+ Namespace->EnableUpdateCapture();
}
}
void
ZenCacheStore::DisableUpdateCapture()
{
- for (auto& NamespaceIt : m_Namespaces)
- {
- NamespaceIt.second->DisableUpdateCapture();
- }
- m_UpdateCaptureLock.WithExclusiveLock([&]() {
+ std::vector<ZenCacheNamespace*> Namespaces;
+ m_NamespacesLock.WithExclusiveLock([&]() {
ZEN_ASSERT(m_CapturedNamespaces);
ZEN_ASSERT(m_UpdateCaptureRefCounter > 0);
m_UpdateCaptureRefCounter--;
@@ -1072,13 +1075,21 @@ ZenCacheStore::DisableUpdateCapture()
{
m_CapturedNamespaces.reset();
}
+ Namespaces.reserve(m_Namespaces.size());
+ for (auto& NamespaceIt : m_Namespaces)
+ {
+ Namespaces.push_back(NamespaceIt.second.get());
+ }
});
+ for (ZenCacheNamespace* Namespace : Namespaces)
+ {
+ Namespace->DisableUpdateCapture();
+ }
}
std::vector<std::string>
-ZenCacheStore::GetCapturedNamespaces()
+ZenCacheStore::GetCapturedNamespacesLocked()
{
- RwLock::SharedLockScope _(m_UpdateCaptureLock);
if (m_CapturedNamespaces)
{
return *m_CapturedNamespaces;
@@ -1149,7 +1160,7 @@ public:
AddedBuckets.size());
});
- std::vector<std::string> AddedNamespaces = m_CacheStore.GetCapturedNamespaces();
+ std::vector<std::string> AddedNamespaces = m_CacheStore.GetCapturedNamespacesLocked();
for (const std::string& AddedNamespace : AddedNamespaces)
{
@@ -1165,7 +1176,7 @@ public:
for (auto& NamepaceKV : m_CacheStore.m_Namespaces)
{
ZenCacheNamespace& Namespace = *NamepaceKV.second;
- std::vector<std::string> NamespaceAddedBuckets = Namespace.m_DiskLayer.GetCapturedBuckets();
+ std::vector<std::string> NamespaceAddedBuckets = Namespace.m_DiskLayer.GetCapturedBucketsLocked();
for (const std::string& AddedBucketName : NamespaceAddedBuckets)
{
if (auto It = Namespace.m_DiskLayer.m_Buckets.find(AddedBucketName); It != Namespace.m_DiskLayer.m_Buckets.end())
@@ -1244,6 +1255,12 @@ ZenCacheStore::CreateReferenceCheckers(GcCtx& Ctx)
return Checkers;
}
+std::vector<GcReferenceValidator*>
+ZenCacheStore::CreateReferenceValidators(GcCtx& /*Ctx*/)
+{
+ return {};
+}
+
//////////////////////////////////////////////////////////////////////////
#if ZEN_WITH_TESTS
diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp
index be8fc0148..b2b574799 100644
--- a/src/zenstore/gc.cpp
+++ b/src/zenstore/gc.cpp
@@ -240,15 +240,15 @@ WriteCompactStoreStats(CbObjectWriter& Writer, const GcCompactStoreStats& Stats,
void
WriteReferencerStats(CbObjectWriter& Writer, const GcReferencerStats& Stats, bool HumanReadable)
{
- if (Stats.RemoveExpiredDataStats.CheckedCount == 0)
+ Writer << "CreateReferenceValidators" << ToTimeSpan(Stats.CreateReferenceValidatorsMS);
+ if (Stats.RemoveExpiredDataStats.CheckedCount != 0)
{
- return;
- }
- Writer.BeginObject("RemoveExpired");
- {
- WriteGcStats(Writer, Stats.RemoveExpiredDataStats, HumanReadable);
+ Writer.BeginObject("RemoveExpired");
+ {
+ WriteGcStats(Writer, Stats.RemoveExpiredDataStats, HumanReadable);
+ }
+ Writer.EndObject();
}
- Writer.EndObject();
Writer.BeginObject("Compact");
{
@@ -265,15 +265,14 @@ WriteReferencerStats(CbObjectWriter& Writer, const GcReferencerStats& Stats, boo
void
WriteReferenceStoreStats(CbObjectWriter& Writer, const GcReferenceStoreStats& Stats, bool HumanReadable)
{
- if (Stats.RemoveUnreferencedDataStats.CheckedCount == 0)
- {
- return;
- }
- Writer.BeginObject("RemoveUnreferenced");
+ if (Stats.RemoveUnreferencedDataStats.CheckedCount != 0)
{
- WriteGcStats(Writer, Stats.RemoveUnreferencedDataStats, HumanReadable);
+ Writer.BeginObject("RemoveUnreferenced");
+ {
+ WriteGcStats(Writer, Stats.RemoveUnreferencedDataStats, HumanReadable);
+ }
+ Writer.EndObject();
}
- Writer.EndObject();
Writer.BeginObject("Compact");
{
@@ -286,6 +285,21 @@ WriteReferenceStoreStats(CbObjectWriter& Writer, const GcReferenceStoreStats& St
};
void
+WriteReferenceValidatorStats(CbObjectWriter& Writer, const GcReferenceValidatorStats& Stats, bool /*HumanReadable*/)
+{
+ Writer << "Checked" << Stats.CheckedCount;
+ Writer << "Missing" << Stats.MissingCount;
+ if (Stats.MissingCount > 0)
+ {
+ Writer << "MissingChunks" << Stats.MissingChunks;
+ Writer << "MissingFiles" << Stats.MissingFiles;
+ Writer << "MissingMetas" << Stats.MissingMetas;
+ Writer << "MissingAttachments" << Stats.MissingAttachments;
+ }
+ Writer << "Elapsed" << ToTimeSpan(Stats.ElapsedMS);
+};
+
+void
WriteGCResult(CbObjectWriter& Writer, const GcResult& Result, bool HumanReadable, bool IncludeDetails)
{
if (!IncludeDetails)
@@ -324,8 +338,15 @@ WriteGCResult(CbObjectWriter& Writer, const GcResult& Result, bool HumanReadable
}
Writer.EndObject();
+ Writer.BeginObject("ReferenceValidator");
+ {
+ WriteReferenceValidatorStats(Writer, Result.ReferenceValidatorStatSum, HumanReadable);
+ }
+ Writer.EndObject();
+
Writer << "RemoveExpiredData" << ToTimeSpan(Result.RemoveExpiredDataMS);
Writer << "CreateReferenceCheckers" << ToTimeSpan(Result.CreateReferenceCheckersMS);
+ Writer << "CreateReferenceValidators" << ToTimeSpan(Result.CreateReferenceValidatorsMS);
Writer << "PreCacheState" << ToTimeSpan(Result.PreCacheStateMS);
Writer << "LockState" << ToTimeSpan(Result.LockStateMS);
Writer << "UpdateLockedState" << ToTimeSpan(Result.UpdateLockedStateMS);
@@ -333,6 +354,7 @@ WriteGCResult(CbObjectWriter& Writer, const GcResult& Result, bool HumanReadable
Writer << "CreateReferencePruners" << ToTimeSpan(Result.CreateReferencePrunersMS);
Writer << "RemoveUnreferencedData" << ToTimeSpan(Result.RemoveUnreferencedDataMS);
Writer << "CompactStores" << ToTimeSpan(Result.CompactStoresMS);
+ Writer << "Validate" << ToTimeSpan(Result.ValidateReferencersMS);
Writer << "WriteBlock" << ToTimeSpan(Result.WriteBlockMS);
Writer << "Elapsed" << ToTimeSpan(Result.ElapsedMS);
@@ -362,6 +384,18 @@ WriteGCResult(CbObjectWriter& Writer, const GcResult& Result, bool HumanReadable
}
Writer.EndArray();
}
+ if (!Result.ReferenceValidatorStats.empty())
+ {
+ Writer.BeginArray("ReferenceValidators");
+ for (const std::pair<std::string, GcReferenceValidatorStats>& It : Result.ReferenceValidatorStats)
+ {
+ Writer.BeginObject();
+ Writer << "Name" << It.first;
+ WriteReferenceValidatorStats(Writer, It.second, HumanReadable);
+ Writer.EndObject();
+ }
+ Writer.EndArray();
+ }
};
void
@@ -387,7 +421,7 @@ void
Sum(GcReferencerStats& Stat)
{
Stat.ElapsedMS = Stat.RemoveExpiredDataStats.ElapsedMS + Stat.CompactStoreStats.ElapsedMS + Stat.CreateReferenceCheckersMS +
- Stat.PreCacheStateMS + Stat.UpdateLockedStateMS;
+ Stat.CreateReferenceValidatorsMS + Stat.PreCacheStateMS + Stat.UpdateLockedStateMS;
}
void
@@ -397,6 +431,7 @@ Add(GcReferencerStats& Sum, const GcReferencerStats& Sub)
Add(Sum.CompactStoreStats, Sub.CompactStoreStats);
Sum.CreateReferenceCheckersMS += Sub.CreateReferenceCheckersMS;
+ Sum.CreateReferenceValidatorsMS += Sub.CreateReferenceValidatorsMS;
Sum.PreCacheStateMS += Sub.PreCacheStateMS;
Sum.UpdateLockedStateMS += Sub.UpdateLockedStateMS;
@@ -420,6 +455,23 @@ Add(GcReferenceStoreStats& Sum, const GcReferenceStoreStats& Sub)
Sum.ElapsedMS += Sub.ElapsedMS;
}
+void
+Add(GcReferenceValidatorStats& Sum, const GcReferenceValidatorStats& Sub)
+{
+ Sum.CheckedCount += Sub.CheckedCount;
+ Sum.MissingChunks += Sub.MissingChunks;
+ Sum.MissingFiles += Sub.MissingFiles;
+ Sum.MissingMetas += Sub.MissingMetas;
+ Sum.MissingAttachments += Sub.MissingAttachments;
+ Sum.ElapsedMS += Sub.ElapsedMS;
+}
+
+void
+Sum(GcReferenceValidatorStats& Stat)
+{
+ Stat.MissingCount = Stat.MissingChunks + Stat.MissingFiles + Stat.MissingMetas + Stat.MissingAttachments;
+}
+
GcResult&
Sum(GcResult& Stat, bool Cancelled = false)
{
@@ -435,9 +487,16 @@ Sum(GcResult& Stat, bool Cancelled = false)
Sum(SubStat);
Add(Stat.ReferenceStoreStatSum, SubStat);
}
+ for (std::pair<std::string, GcReferenceValidatorStats>& ReferenceValidator : Stat.ReferenceValidatorStats)
+ {
+ GcReferenceValidatorStats& SubStat = ReferenceValidator.second;
+ Sum(SubStat);
+ Add(Stat.ReferenceValidatorStatSum, SubStat);
+ }
Sum(Stat.ReferencerStatSum);
Sum(Stat.ReferenceStoreStatSum);
+ Sum(Stat.ReferenceValidatorStatSum);
Add(Stat.CompactStoresStatSum, Stat.ReferencerStatSum.CompactStoreStats);
Add(Stat.CompactStoresStatSum, Stat.ReferenceStoreStatSum.CompactStoreStats);
@@ -630,18 +689,21 @@ GcManager::CollectGarbage(const GcSettings& Settings)
Result.ReferencerStats.resize(m_GcReferencers.size());
+ std::unordered_map<std::unique_ptr<GcReferenceChecker>, size_t> ReferenceCheckers;
std::unordered_map<std::unique_ptr<GcStoreCompactor>, GcCompactStoreStats*> StoreCompactors;
RwLock StoreCompactorsLock;
+ std::unordered_map<std::unique_ptr<GcReferenceValidator>, size_t> ReferenceValidators;
+ RwLock ReferenceValidatorsLock;
WorkerThreadPool& PreCachePhaseThreadPool =
Settings.SingleThread ? GetSyncWorkerPool() : GetSmallWorkerPool(EWorkloadType::Background);
- ZEN_INFO("GCV2: Removing expired data from {} referencers", m_GcReferencers.size());
if (!m_GcReferencers.empty())
{
if (CheckGCCancel())
{
return Sum(Result, true);
}
+ ZEN_INFO("GCV2: Removing expired data from {} referencers", m_GcReferencers.size());
ZEN_TRACE_CPU("GcV2::RemoveExpiredData");
Latch WorkLeft(1);
@@ -697,9 +759,11 @@ GcManager::CollectGarbage(const GcSettings& Settings)
return Sum(Result, true);
}
+ ZEN_INFO("GCV2: Creating reference pruners from {} reference stores", m_GcReferenceStores.size());
+ ZEN_TRACE_CPU("GcV2::CreateReferencePruners");
+
Result.ReferenceStoreStats.resize(m_GcReferenceStores.size());
- ZEN_INFO("GCV2: Creating reference pruners from {} reference stores", m_GcReferenceStores.size());
std::unordered_map<size_t, std::unique_ptr<GcReferencePruner>> ReferencePruners;
if (!m_GcReferenceStores.empty())
{
@@ -771,23 +835,100 @@ GcManager::CollectGarbage(const GcSettings& Settings)
}
ZEN_INFO("GCV2: Creating reference checkers from {} referencers", m_GcReferencers.size());
- std::unordered_map<std::unique_ptr<GcReferenceChecker>, size_t> ReferenceCheckers;
- if (!m_GcReferencers.empty())
{
ZEN_TRACE_CPU("GcV2::CreateReferenceCheckers");
- ReferenceCheckers.reserve(m_GcReferencers.size());
- Latch WorkLeft(1);
- RwLock ReferenceCheckersLock;
+ if (!m_GcReferencers.empty())
{
- SCOPED_TIMER(Result.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());
+ ZEN_TRACE_CPU("GcV2::CreateReferenceCheckers");
+
+ ReferenceCheckers.reserve(m_GcReferencers.size());
+ Latch WorkLeft(1);
+ RwLock ReferenceCheckersLock;
+ {
+ SCOPED_TIMER(Result.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());
+ if (Ctx.Settings.Verbose) {
+ ZEN_INFO("GCV2: Created {} reference checkers using {} referencers in {}",
+ ReferenceCheckers.size(),
+ m_GcReferencers.size(),
+ NiceTimeSpanMs(Result.CreateReferenceCheckersMS.count()));
+ });
+ // Lock all reference owners from changing the reference data and get access to check for referenced data
+ for (size_t Index = 0; Index < m_GcReferencers.size(); Index++)
+ {
+ if (CheckGCCancel())
+ {
+ WorkLeft.CountDown();
+ WorkLeft.Wait();
+ return Sum(Result, true);
+ }
+
+ GcReferencer* Referencer = m_GcReferencers[Index];
+ std::pair<std::string, GcReferencerStats>* Stats = &Result.ReferencerStats[Index];
+ WorkLeft.AddCount(1);
+ PreCachePhaseThreadPool.ScheduleWork(
+ [this, &Ctx, &WorkLeft, Referencer, Index, Stats, &ReferenceCheckersLock, &ReferenceCheckers]() {
+ auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
+ // The Referencer will create a reference checker that guarantees that the references do not change
+ // as long as it lives
+ std::vector<GcReferenceChecker*> Checkers;
+ try
+ {
+ {
+ SCOPED_TIMER(Stats->second.CreateReferenceCheckersMS =
+ std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ Checkers = Referencer->CreateReferenceCheckers(Ctx);
+ }
+ if (!Checkers.empty())
+ {
+ RwLock::ExclusiveLockScope __(ReferenceCheckersLock);
+ for (auto& Checker : Checkers)
+ {
+ ReferenceCheckers.insert_or_assign(std::unique_ptr<GcReferenceChecker>(Checker), Index);
+ Checker = nullptr;
+ }
+ }
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_ERROR("GCV2: Failed creating reference checkers for {}. Reason: '{}'",
+ Referencer->GetGcName(Ctx),
+ Ex.what());
+ SetCancelGC(true);
+ while (!Checkers.empty())
+ {
+ delete Checkers.back();
+ Checkers.pop_back();
+ }
+ }
+ });
+ }
+ WorkLeft.CountDown();
+ WorkLeft.Wait();
+ }
+ }
+ }
+
+ if (CheckGCCancel())
+ {
+ return Sum(Result, true);
+ }
+
+ if (!m_GcReferencers.empty() && Settings.EnableValidation)
+ {
+ ZEN_INFO("GCV2: Creating reference validators from {} referencers", m_GcReferencers.size());
+ ZEN_TRACE_CPU("GcV2::CreateReferenceValidators");
+
+ ReferenceValidators.reserve(m_GcReferencers.size());
+ Latch WorkLeft(1);
+ {
+ SCOPED_TIMER(Result.CreateReferenceValidatorsMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());
if (Ctx.Settings.Verbose) {
- ZEN_INFO("GCV2: Created {} reference checkers using {} referencers in {}",
- ReferenceCheckers.size(),
+ ZEN_INFO("GCV2: Created {} reference validators using {} referencers in {}",
+ ReferenceValidators.size(),
m_GcReferencers.size(),
- NiceTimeSpanMs(Result.CreateReferenceCheckersMS.count()));
+ NiceTimeSpanMs(Result.CreateReferenceValidatorsMS.count()));
});
- // Lock all reference owners from changing the reference data and get access to check for referenced data
for (size_t Index = 0; Index < m_GcReferencers.size(); Index++)
{
if (CheckGCCancel())
@@ -797,100 +938,106 @@ GcManager::CollectGarbage(const GcSettings& Settings)
return Sum(Result, true);
}
- GcReferencer* Referencer = m_GcReferencers[Index];
- std::pair<std::string, GcReferencerStats>* Stats = &Result.ReferencerStats[Index];
+ GcReferencer* Referencer = m_GcReferencers[Index];
+ std::pair<std::string, GcReferencerStats>* ReferemcerStats = &Result.ReferencerStats[Index];
WorkLeft.AddCount(1);
- PreCachePhaseThreadPool.ScheduleWork(
- [this, &Ctx, &WorkLeft, Referencer, Index, Stats, &ReferenceCheckersLock, &ReferenceCheckers]() {
- auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
- // The Referencer will create a reference checker that guarantees that the references do not change as
- // long as it lives
- std::vector<GcReferenceChecker*> Checkers;
- try
+ PreCachePhaseThreadPool.ScheduleWork([this,
+ &Ctx,
+ &WorkLeft,
+ Referencer,
+ Index,
+ Result = &Result,
+ ReferemcerStats,
+ &ReferenceValidatorsLock,
+ &ReferenceValidators]() {
+ auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
+ std::vector<GcReferenceValidator*> Validators;
+ try
+ {
{
- {
- SCOPED_TIMER(Stats->second.CreateReferenceCheckersMS =
- std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
- Checkers = Referencer->CreateReferenceCheckers(Ctx);
- }
- if (!Checkers.empty())
- {
- RwLock::ExclusiveLockScope __(ReferenceCheckersLock);
- for (auto& Checker : Checkers)
- {
- ReferenceCheckers.insert_or_assign(std::unique_ptr<GcReferenceChecker>(Checker), Index);
- Checker = nullptr;
- }
- }
+ SCOPED_TIMER(ReferemcerStats->second.CreateReferenceValidatorsMS =
+ std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ Validators = Referencer->CreateReferenceValidators(Ctx);
}
- catch (const std::exception& Ex)
+ if (!Validators.empty())
{
- ZEN_ERROR("GCV2: Failed creating reference checkers for {}. Reason: '{}'",
- Referencer->GetGcName(Ctx),
- Ex.what());
- SetCancelGC(true);
- while (!Checkers.empty())
+ RwLock::ExclusiveLockScope __(ReferenceValidatorsLock);
+ for (auto& ReferenceValidator : Validators)
{
- delete Checkers.back();
- Checkers.pop_back();
+ size_t ReferencesStatsIndex = Result->ReferenceValidatorStats.size();
+ Result->ReferenceValidatorStats.push_back({ReferenceValidator->GetGcName(Ctx), {}});
+ ReferenceValidators.insert_or_assign(std::unique_ptr<GcReferenceValidator>(ReferenceValidator),
+ ReferencesStatsIndex);
+ ReferenceValidator = nullptr;
}
}
- });
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_ERROR("GCV2: Failed creating reference validators for {}. Reason: '{}'",
+ Referencer->GetGcName(Ctx),
+ Ex.what());
+ SetCancelGC(true);
+ while (!Validators.empty())
+ {
+ delete Validators.back();
+ Validators.pop_back();
+ }
+ }
+ });
}
WorkLeft.CountDown();
WorkLeft.Wait();
}
}
+ if (!ReferenceCheckers.empty())
{
- ZEN_INFO("GCV2: Precaching state for {} reference checkers", ReferenceCheckers.size());
- if (!ReferenceCheckers.empty())
+ if (CheckGCCancel())
{
- if (CheckGCCancel())
- {
- return Sum(Result, true);
- }
- ZEN_TRACE_CPU("GcV2::PreCache");
+ return Sum(Result, true);
+ }
+ ZEN_INFO("GCV2: Precaching state for {} reference checkers", ReferenceCheckers.size());
+ ZEN_TRACE_CPU("GcV2::PreCache");
- Latch WorkLeft(1);
+ Latch WorkLeft(1);
+ {
+ SCOPED_TIMER(Result.PreCacheStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());
+ if (Ctx.Settings.Verbose) {
+ ZEN_INFO("GCV2: Precached state using {} reference checkers in {}",
+ ReferenceCheckers.size(),
+ NiceTimeSpanMs(Result.PreCacheStateMS.count()));
+ });
+ for (auto& It : ReferenceCheckers)
{
- SCOPED_TIMER(Result.PreCacheStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());
- if (Ctx.Settings.Verbose) {
- ZEN_INFO("GCV2: Precached state using {} reference checkers in {}",
- ReferenceCheckers.size(),
- NiceTimeSpanMs(Result.PreCacheStateMS.count()));
- });
- for (auto& It : ReferenceCheckers)
+ if (CheckGCCancel())
{
- if (CheckGCCancel())
+ WorkLeft.CountDown();
+ WorkLeft.Wait();
+ return Sum(Result, true);
+ }
+
+ GcReferenceChecker* Checker = It.first.get();
+ size_t Index = It.second;
+ std::pair<std::string, GcReferencerStats>* Stats = &Result.ReferencerStats[Index];
+ WorkLeft.AddCount(1);
+ PreCachePhaseThreadPool.ScheduleWork([this, &Ctx, Checker, Index, Stats, &WorkLeft]() {
+ auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
+ try
{
- WorkLeft.CountDown();
- WorkLeft.Wait();
- return Sum(Result, true);
+ SCOPED_TIMER(Stats->second.PreCacheStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ Checker->PreCache(Ctx);
}
-
- GcReferenceChecker* Checker = It.first.get();
- size_t Index = It.second;
- std::pair<std::string, GcReferencerStats>* Stats = &Result.ReferencerStats[Index];
- WorkLeft.AddCount(1);
- PreCachePhaseThreadPool.ScheduleWork([this, &Ctx, Checker, Index, Stats, &WorkLeft]() {
- auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
- try
- {
- SCOPED_TIMER(Stats->second.PreCacheStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
- Checker->PreCache(Ctx);
- }
- catch (const std::exception& Ex)
- {
- ZEN_ERROR("GCV2: Failed precaching for {}. Reason: '{}'", Checker->GetGcName(Ctx), Ex.what());
- SetCancelGC(true);
- }
- });
- }
- WorkLeft.CountDown();
- WorkLeft.Wait();
+ catch (const std::exception& Ex)
+ {
+ ZEN_ERROR("GCV2: Failed precaching for {}. Reason: '{}'", Checker->GetGcName(Ctx), Ex.what());
+ SetCancelGC(true);
+ }
+ });
}
+ WorkLeft.CountDown();
+ WorkLeft.Wait();
}
}
@@ -898,8 +1045,7 @@ GcManager::CollectGarbage(const GcSettings& Settings)
Settings.SingleThread ? GetSyncWorkerPool() : GetMediumWorkerPool(EWorkloadType::Background);
std::vector<RwLock::SharedLockScope> LockerScopes;
- SCOPED_TIMER(uint64_t ElapsedMS = Timer.GetElapsedTimeMs(); Result.WriteBlockMS = std::chrono::milliseconds(ElapsedMS);
- ZEN_INFO("GCV2: Writes blocked for {}", NiceTimeSpanMs(ElapsedMS)));
+ SCOPED_TIMER(uint64_t ElapsedMS = Timer.GetElapsedTimeMs(); Result.WriteBlockMS = std::chrono::milliseconds(ElapsedMS););
{
if (!ReferenceCheckers.empty())
{
@@ -984,100 +1130,98 @@ GcManager::CollectGarbage(const GcSettings& Settings)
}
}
}
+
+ if (CheckGCCancel())
{
- ZEN_INFO("GCV2: Removing unreferenced data for {} reference pruners", ReferencePruners.size());
- if (CheckGCCancel())
- {
- return Sum(Result, true);
- }
- {
- const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::span<IoHash> {
- std::span<IoHash> UnusedCids(References);
- ZEN_ASSERT(UnusedCids.empty() || UnusedCids[0] != IoHash::Zero);
- for (const auto& It : ReferenceCheckers)
+ return Sum(Result, true);
+ }
+ ZEN_INFO("GCV2: Removing unreferenced data for {} reference pruners", ReferencePruners.size());
+ {
+ ZEN_TRACE_CPU("GcV2::RemoveUnreferencedData");
+
+ const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::span<IoHash> {
+ std::span<IoHash> UnusedCids(References);
+ ZEN_ASSERT(UnusedCids.empty() || UnusedCids[0] != IoHash::Zero);
+ for (const auto& It : ReferenceCheckers)
+ {
+ GcReferenceChecker* ReferenceChecker = It.first.get();
+ UnusedCids = ReferenceChecker->GetUnusedReferences(Ctx, UnusedCids);
+ if (UnusedCids.empty())
{
- GcReferenceChecker* ReferenceChecker = It.first.get();
- UnusedCids = ReferenceChecker->GetUnusedReferences(Ctx, UnusedCids);
- if (UnusedCids.empty())
- {
- return {};
- }
+ return {};
}
- return UnusedCids;
- };
-
- // checking all Cids agains references in cache
- // Ask stores to remove data that the ReferenceCheckers says are not referenced - this should be a lightweight
- // operation that only updates in-memory index, actual disk changes should be done by the ReferenceStoreCompactors
+ }
+ return UnusedCids;
+ };
- ZEN_TRACE_CPU("GcV2::RemoveUnreferencedData");
+ // checking all Cids agains references in cache
+ // Ask stores to remove data that the ReferenceCheckers says are not referenced - this should be a lightweight
+ // operation that only updates in-memory index, actual disk changes should be done by the ReferenceStoreCompactors
- Latch WorkLeft(1);
+ Latch WorkLeft(1);
+ {
+ SCOPED_TIMER(Result.RemoveUnreferencedDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());
+ if (Ctx.Settings.Verbose) {
+ ZEN_INFO("GCV2: Removed unused data using {} pruners in {}",
+ ReferencePruners.size(),
+ NiceTimeSpanMs(Result.RemoveUnreferencedDataMS.count()));
+ });
+ for (auto& It : ReferencePruners)
{
- SCOPED_TIMER(Result.RemoveUnreferencedDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());
- if (Ctx.Settings.Verbose) {
- ZEN_INFO("GCV2: Removed unused data using {} pruners in {}",
- ReferencePruners.size(),
- NiceTimeSpanMs(Result.RemoveUnreferencedDataMS.count()));
- });
- for (auto& It : ReferencePruners)
+ if (CheckGCCancel())
{
- if (CheckGCCancel())
- {
- WorkLeft.CountDown();
- WorkLeft.Wait();
- return Sum(Result, true);
- }
+ WorkLeft.CountDown();
+ WorkLeft.Wait();
+ return Sum(Result, true);
+ }
- GcReferencePruner* Pruner = It.second.get();
- size_t Index = It.first;
- GcReferenceStoreStats* Stats = &Result.ReferenceStoreStats[Index].second;
- WorkLeft.AddCount(1);
- LockedPhaseThreadPool.ScheduleWork(
- [this, &Ctx, Pruner, Stats, &WorkLeft, &GetUnusedReferences, &StoreCompactorsLock, &StoreCompactors]() {
- auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
- // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are
- // referenced or not.
- try
+ GcReferencePruner* Pruner = It.second.get();
+ size_t Index = It.first;
+ GcReferenceStoreStats* Stats = &Result.ReferenceStoreStats[Index].second;
+ WorkLeft.AddCount(1);
+ LockedPhaseThreadPool.ScheduleWork(
+ [this, &Ctx, Pruner, Stats, &WorkLeft, &GetUnusedReferences, &StoreCompactorsLock, &StoreCompactors]() {
+ auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
+ // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are
+ // referenced or not.
+ try
+ {
+ std::unique_ptr<GcStoreCompactor> StoreCompactor;
{
- std::unique_ptr<GcStoreCompactor> StoreCompactor;
- {
- SCOPED_TIMER(Stats->RemoveUnreferencedDataStats.ElapsedMS =
- std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
- StoreCompactor = std::unique_ptr<GcStoreCompactor>(
- Pruner->RemoveUnreferencedData(Ctx,
- Stats->RemoveUnreferencedDataStats,
- GetUnusedReferences));
- }
- if (StoreCompactor)
- {
- RwLock::ExclusiveLockScope __(StoreCompactorsLock);
- StoreCompactors.insert_or_assign(std::move(StoreCompactor), &Stats->CompactStoreStats);
- }
+ SCOPED_TIMER(Stats->RemoveUnreferencedDataStats.ElapsedMS =
+ std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ StoreCompactor = std::unique_ptr<GcStoreCompactor>(
+ Pruner->RemoveUnreferencedData(Ctx,
+ Stats->RemoveUnreferencedDataStats,
+ GetUnusedReferences));
}
- catch (const std::exception& Ex)
+ if (StoreCompactor)
{
- ZEN_ERROR("GCV2: Failed removing unused data for {}. Reason: '{}'",
- Pruner->GetGcName(Ctx),
- Ex.what());
- SetCancelGC(true);
+ RwLock::ExclusiveLockScope __(StoreCompactorsLock);
+ StoreCompactors.insert_or_assign(std::move(StoreCompactor), &Stats->CompactStoreStats);
}
- });
- }
- WorkLeft.CountDown();
- WorkLeft.Wait();
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_ERROR("GCV2: Failed removing unused data for {}. Reason: '{}'",
+ Pruner->GetGcName(Ctx),
+ Ex.what());
+ SetCancelGC(true);
+ }
+ });
}
+ WorkLeft.CountDown();
+ WorkLeft.Wait();
}
- // Let the GcReferencers add new data, we will only change on-disk data at this point, adding new data is allowed
- LockerScopes.clear();
- ReferenceCheckers.clear();
- ReferencePruners.clear();
}
+ // Let the GcReferencers add new data, we will only change on-disk data at this point, adding new data is allowed
+ LockerScopes.clear();
+ ReferenceCheckers.clear();
+ ReferencePruners.clear();
}
}
- ZEN_INFO("GCV2: Compacting using {} store compactors", StoreCompactors.size());
if (!StoreCompactors.empty())
{
if (CheckGCCancel())
@@ -1085,6 +1229,7 @@ GcManager::CollectGarbage(const GcSettings& Settings)
return Sum(Result, true);
}
+ ZEN_INFO("GCV2: Compacting using {} store compactors", StoreCompactors.size());
ZEN_TRACE_CPU("GcV2::CompactStores");
auto ClaimDiskReserve = [&]() -> uint64_t {
@@ -1129,6 +1274,47 @@ GcManager::CollectGarbage(const GcSettings& Settings)
}
StoreCompactors.clear();
}
+
+ if (!ReferenceValidators.empty())
+ {
+ if (CheckGCCancel())
+ {
+ return Sum(Result, true);
+ }
+
+ ZEN_INFO("GCV2: Validating using {} reference validators", ReferenceValidators.size());
+ ZEN_TRACE_CPU("GcV2::ValidateReferences");
+
+ // Remove the stuff we deemed unreferenced from disk - may be heavy operation
+ // Don't do in parallel, we don't want to steal CPU/Disk from regular operation
+ {
+ SCOPED_TIMER(Result.ValidateReferencersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()); if (Ctx.Settings.Verbose) {
+ ZEN_INFO("GCV2: Compacted {} stores in {}", StoreCompactors.size(), NiceTimeSpanMs(Result.CompactStoresMS.count()));
+ });
+ for (auto& It : ReferenceValidators)
+ {
+ if (CheckGCCancel())
+ {
+ return Sum(Result, true);
+ }
+
+ GcReferenceValidator* ReferenceValidator = It.first.get();
+ GcReferenceValidatorStats& Stats = Result.ReferenceValidatorStats[It.second].second;
+ try
+ {
+ // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are referenced or
+ SCOPED_TIMER(Stats.ElapsedMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ ReferenceValidator->Validate(Ctx, Stats);
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_ERROR("GCV2: Failed validating referencer {}. Reason: '{}'", ReferenceValidator->GetGcName(Ctx), Ex.what());
+ SetCancelGC(true);
+ }
+ }
+ }
+ ReferenceValidators.clear();
+ }
}
return Sum(Result);
@@ -1521,6 +1707,7 @@ GcScheduler::AppendGCLog(std::string_view Id, GcClock::TimePoint StartTime, cons
Writer << "AttachmentRangeMax"sv << Settings.AttachmentRangeMin;
Writer << "ForceStoreCacheAttachmentMetaData"sv << Settings.StoreCacheAttachmentMetaData;
Writer << "ForceStoreProjectAttachmentMetaData"sv << Settings.StoreProjectAttachmentMetaData;
+ Writer << "EnableValidation"sv << Settings.EnableValidation;
}
Writer.EndObject();
@@ -1719,6 +1906,7 @@ GcScheduler::SchedulerThread()
IoHash AttachmentRangeMax = IoHash::Max;
bool StoreCacheAttachmentMetaData = m_Config.StoreCacheAttachmentMetaData;
bool StoreProjectAttachmentMetaData = m_Config.StoreProjectAttachmentMetaData;
+ bool EnableValidation = m_Config.EnableValidation;
uint8_t NextAttachmentPassIndex =
ComputeAttachmentRange(m_AttachmentPassIndex, m_Config.AttachmentPassCount, AttachmentRangeMin, AttachmentRangeMax);
@@ -1774,6 +1962,10 @@ GcScheduler::SchedulerThread()
{
StoreProjectAttachmentMetaData = TriggerParams.StoreProjectAttachmentMetaData.value();
}
+ if (TriggerParams.EnableValidation.has_value())
+ {
+ EnableValidation = TriggerParams.EnableValidation.value();
+ }
DoGc = true;
}
@@ -2002,6 +2194,7 @@ GcScheduler::SchedulerThread()
AttachmentRangeMax,
StoreCacheAttachmentMetaData,
StoreProjectAttachmentMetaData,
+ EnableValidation,
SilenceErrors);
if (!GcSuccess)
{
@@ -2105,6 +2298,7 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
const IoHash& AttachmentRangeMax,
bool StoreCacheAttachmentMetaData,
bool StoreProjectAttachmentMetaData,
+ bool EnableValidation,
bool SilenceErrors)
{
ZEN_TRACE_CPU("GcScheduler::CollectGarbage");
@@ -2184,25 +2378,26 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
.AttachmentRangeMin = AttachmentRangeMin,
.AttachmentRangeMax = AttachmentRangeMax,
.StoreCacheAttachmentMetaData = StoreCacheAttachmentMetaData,
- .StoreProjectAttachmentMetaData = StoreProjectAttachmentMetaData};
+ .StoreProjectAttachmentMetaData = StoreProjectAttachmentMetaData,
+ .EnableValidation = EnableValidation};
auto AppendSettings = [](StringBuilderBase& SB, const GcSettings& Settings) {
SB.Append(
- fmt::format(" GC small objects: {}\n", Settings.CollectSmallObjects ? "yes"sv : "no"sv));
- SB.Append(fmt::format(" GC Cid store: {}\n", Settings.SkipCidDelete ? "no"sv : "yes"sv));
+ fmt::format(" GC small objects: {}\n", Settings.CollectSmallObjects ? "yes"sv : "no"sv));
+ SB.Append(fmt::format(" GC Cid store: {}\n", Settings.SkipCidDelete ? "no"sv : "yes"sv));
if (!Settings.SkipCidDelete)
{
if (Settings.AttachmentRangeMin != IoHash::Zero || Settings.AttachmentRangeMax != IoHash::Max)
{
- SB.Append(fmt::format(" Attachment range: {}-{}\n",
+ SB.Append(fmt::format(" Attachment range: {}-{}\n",
Settings.AttachmentRangeMin,
Settings.AttachmentRangeMax));
}
- SB.Append(fmt::format(" Cache attachment meta: {}\n", Settings.StoreCacheAttachmentMetaData));
- SB.Append(fmt::format(" Project attachment meta: {}\n", Settings.StoreProjectAttachmentMetaData));
+ SB.Append(fmt::format(" Cache attachment meta: {}\n", Settings.StoreCacheAttachmentMetaData));
+ SB.Append(fmt::format(" Project attachment meta: {}\n", Settings.StoreProjectAttachmentMetaData));
+ SB.Append(fmt::format(" Enable validation: {}\n", Settings.EnableValidation));
}
- SB.Append(fmt::format(" Cache cutoff time: {}\n", Settings.CacheExpireTime));
- SB.Append(fmt::format(" Project store cutoff time: {}", Settings.ProjectStoreExpireTime));
+ SB.Append(fmt::format(" Cache cutoff time: {}\n", Settings.CacheExpireTime));
};
{
@@ -2224,7 +2419,36 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
{
SB.Append(fmt::format("COMPLETED '{}' in {}\n", GcId, NiceTimeSpanMs(Result.ElapsedMS.count())));
AppendSettings(SB, Settings);
- SB.Append("\n\n");
+ SB.Append("\n");
+ SB.Append(
+ fmt::format(" Remove Expired Data: {}\n", NiceTimeSpanMs(Result.RemoveExpiredDataMS.count())));
+ if (!Settings.SkipCidDelete)
+ {
+ SB.Append(fmt::format(" Create Reference Pruners: {}\n",
+ NiceTimeSpanMs(Result.CreateReferencePrunersMS.count())));
+ SB.Append(fmt::format(" Create Reference Checkers: {}\n",
+ NiceTimeSpanMs(Result.CreateReferenceCheckersMS.count())));
+ if (EnableValidation)
+ {
+ SB.Append(fmt::format(" Create Reference Validators: {}\n",
+ NiceTimeSpanMs(Result.CreateReferenceValidatorsMS.count())));
+ }
+ SB.Append(
+ fmt::format(" Precache State: {}\n", NiceTimeSpanMs(Result.PreCacheStateMS.count())));
+ SB.Append(
+ fmt::format(" Writes blocked: {}\n", NiceTimeSpanMs(Result.WriteBlockMS.count())));
+ SB.Append(
+ fmt::format(" Lock State: {}\n", NiceTimeSpanMs(Result.LockStateMS.count())));
+ SB.Append(fmt::format(" Update Lock State: {}\n",
+ NiceTimeSpanMs(Result.UpdateLockedStateMS.count())));
+ SB.Append(fmt::format(" Remove Unreferenced: {}\n",
+ NiceTimeSpanMs(Result.RemoveUnreferencedDataMS.count())));
+ SB.Append(
+ fmt::format(" Compacting Stores: {}\n", NiceTimeSpanMs(Result.CompactStoresMS.count())));
+ SB.Append(fmt::format(" Reference Validation: {}\n",
+ NiceTimeSpanMs(Result.ValidateReferencersMS.count())));
+ SB.Append("\n");
+ }
SB.Append(fmt::format(" Found {} expired items out of {}, deleted {}\n",
Result.ReferencerStatSum.RemoveExpiredDataStats.FoundCount,
Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount,
@@ -2235,6 +2459,12 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.FoundCount,
Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount,
Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount));
+ if (EnableValidation)
+ {
+ SB.Append(fmt::format(" Validated {} items and found {} missing references\n",
+ Result.ReferenceValidatorStatSum.CheckedCount,
+ Result.ReferenceValidatorStatSum.MissingCount));
+ }
}
SB.Append(fmt::format(" Freed {} on disk and {} of memory\n",
NiceBytes(Result.CompactStoresStatSum.RemovedDisk),
diff --git a/src/zenstore/include/zenstore/cache/cachedisklayer.h b/src/zenstore/include/zenstore/cache/cachedisklayer.h
index 4b7cf6101..711b96c8f 100644
--- a/src/zenstore/include/zenstore/cache/cachedisklayer.h
+++ b/src/zenstore/include/zenstore/cache/cachedisklayer.h
@@ -199,7 +199,7 @@ public:
void EnableUpdateCapture();
void DisableUpdateCapture();
- std::vector<std::string> GetCapturedBuckets();
+ std::vector<std::string> GetCapturedBucketsLocked();
#if ZEN_WITH_TESTS
void SetAccessTime(std::string_view Bucket, const IoHash& HashKey, GcClock::TimePoint Time);
@@ -367,9 +367,10 @@ public:
std::atomic_uint64_t m_StandaloneSize{};
std::atomic_uint64_t m_MemCachedSize{};
- virtual std::string GetGcName(GcCtx& Ctx) override;
- virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) override;
- virtual std::vector<GcReferenceChecker*> CreateReferenceCheckers(GcCtx& Ctx) override;
+ virtual std::string GetGcName(GcCtx& Ctx) override;
+ virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) override;
+ virtual std::vector<GcReferenceChecker*> CreateReferenceCheckers(GcCtx& Ctx) override;
+ virtual std::vector<GcReferenceValidator*> CreateReferenceValidators(GcCtx& Ctx) override;
void BuildPath(PathBuilderBase& Path, const IoHash& HashKey) const;
void PutStandaloneCacheValue(const IoHash& HashKey, const ZenCacheValue& Value, std::span<IoHash> References);
@@ -491,7 +492,6 @@ private:
mutable RwLock m_Lock;
std::unordered_map<std::string, std::unique_ptr<CacheBucket>> m_Buckets;
std::vector<std::unique_ptr<CacheBucket>> m_DroppedBuckets;
- mutable RwLock m_UpdateCaptureLock;
uint32_t m_UpdateCaptureRefCounter = 0;
std::unique_ptr<std::vector<std::string>> m_CapturedBuckets;
diff --git a/src/zenstore/include/zenstore/cache/structuredcachestore.h b/src/zenstore/include/zenstore/cache/structuredcachestore.h
index dcdca71c6..82fec9b0e 100644
--- a/src/zenstore/include/zenstore/cache/structuredcachestore.h
+++ b/src/zenstore/include/zenstore/cache/structuredcachestore.h
@@ -279,13 +279,14 @@ public:
virtual std::vector<RwLock::SharedLockScope> LockState(GcCtx& Ctx) override;
- virtual std::string GetGcName(GcCtx& Ctx) override;
- virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) override;
- virtual std::vector<GcReferenceChecker*> CreateReferenceCheckers(GcCtx& Ctx) override;
+ virtual std::string GetGcName(GcCtx& Ctx) override;
+ virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) override;
+ virtual std::vector<GcReferenceChecker*> CreateReferenceCheckers(GcCtx& Ctx) override;
+ virtual std::vector<GcReferenceValidator*> CreateReferenceValidators(GcCtx& Ctx) override;
void EnableUpdateCapture();
void DisableUpdateCapture();
- std::vector<std::string> GetCapturedNamespaces();
+ std::vector<std::string> GetCapturedNamespacesLocked();
bool GetContentStats(std::string_view Namespace, std::string_view BucketName, CacheContentStats& OutContentStats) const;
@@ -301,7 +302,6 @@ private:
mutable RwLock m_NamespacesLock;
NamespaceMap m_Namespaces;
std::vector<std::unique_ptr<ZenCacheNamespace>> m_DroppedNamespaces;
- mutable RwLock m_UpdateCaptureLock;
uint32_t m_UpdateCaptureRefCounter = 0;
std::unique_ptr<std::vector<std::string>> m_CapturedNamespaces;
diff --git a/src/zenstore/include/zenstore/gc.h b/src/zenstore/include/zenstore/gc.h
index e191a0930..3daae0a93 100644
--- a/src/zenstore/include/zenstore/gc.h
+++ b/src/zenstore/include/zenstore/gc.h
@@ -68,6 +68,7 @@ struct GcSettings
IoHash AttachmentRangeMax = IoHash::Max;
bool StoreCacheAttachmentMetaData = false;
bool StoreProjectAttachmentMetaData = false;
+ bool EnableValidation = true;
};
struct GcCompactStoreStats
@@ -76,6 +77,18 @@ struct GcCompactStoreStats
std::chrono::milliseconds ElapsedMS = {};
};
+struct GcReferenceValidatorStats
+{
+ std::uint64_t CheckedCount = 0;
+ std::uint64_t MissingChunks = 0;
+ std::uint64_t MissingFiles = 0;
+ std::uint64_t MissingMetas = 0;
+ std::uint64_t MissingAttachments = 0;
+
+ std::uint64_t MissingCount = 0;
+ std::chrono::milliseconds ElapsedMS = {};
+};
+
struct GcStats
{
std::uint64_t CheckedCount = 0;
@@ -90,10 +103,11 @@ struct GcReferencerStats
GcStats RemoveExpiredDataStats;
GcCompactStoreStats CompactStoreStats;
- std::chrono::milliseconds CreateReferenceCheckersMS = {};
- std::chrono::milliseconds PreCacheStateMS = {};
- std::chrono::milliseconds UpdateLockedStateMS = {};
- std::chrono::milliseconds ElapsedMS = {};
+ std::chrono::milliseconds CreateReferenceCheckersMS = {};
+ std::chrono::milliseconds CreateReferenceValidatorsMS = {};
+ std::chrono::milliseconds PreCacheStateMS = {};
+ std::chrono::milliseconds UpdateLockedStateMS = {};
+ std::chrono::milliseconds ElapsedMS = {};
};
struct GcReferenceStoreStats
@@ -107,23 +121,27 @@ struct GcReferenceStoreStats
struct GcResult
{
- std::vector<std::pair<std::string, GcReferencerStats>> ReferencerStats;
- std::vector<std::pair<std::string, GcReferenceStoreStats>> ReferenceStoreStats;
+ std::vector<std::pair<std::string, GcReferencerStats>> ReferencerStats;
+ std::vector<std::pair<std::string, GcReferenceStoreStats>> ReferenceStoreStats;
+ std::vector<std::pair<std::string, GcReferenceValidatorStats>> ReferenceValidatorStats;
- GcReferencerStats ReferencerStatSum;
- GcReferenceStoreStats ReferenceStoreStatSum;
- GcCompactStoreStats CompactStoresStatSum;
+ GcReferencerStats ReferencerStatSum;
+ GcReferenceStoreStats ReferenceStoreStatSum;
+ GcCompactStoreStats CompactStoresStatSum;
+ GcReferenceValidatorStats ReferenceValidatorStatSum;
// Wall times, not sum of each
- std::chrono::milliseconds RemoveExpiredDataMS = {};
- std::chrono::milliseconds CreateReferenceCheckersMS = {};
- std::chrono::milliseconds PreCacheStateMS = {};
- std::chrono::milliseconds LockStateMS = {};
- std::chrono::milliseconds UpdateLockedStateMS = {};
+ std::chrono::milliseconds RemoveExpiredDataMS = {};
+ std::chrono::milliseconds CreateReferenceCheckersMS = {};
+ std::chrono::milliseconds CreateReferenceValidatorsMS = {};
+ std::chrono::milliseconds PreCacheStateMS = {};
+ std::chrono::milliseconds LockStateMS = {};
+ std::chrono::milliseconds UpdateLockedStateMS = {};
std::chrono::milliseconds CreateReferencePrunersMS = {};
std::chrono::milliseconds RemoveUnreferencedDataMS = {};
std::chrono::milliseconds CompactStoresMS = {};
+ std::chrono::milliseconds ValidateReferencersMS = {};
std::chrono::milliseconds WriteBlockMS = {};
@@ -167,6 +185,17 @@ public:
virtual std::string GetGcName(GcCtx& Ctx) = 0;
};
+class GcReferenceValidator
+{
+public:
+ virtual ~GcReferenceValidator() = default;
+
+ virtual std::string GetGcName(GcCtx&) = 0;
+
+ // Validate that that GC did not remove anything needed by this reference checker
+ virtual void Validate(GcCtx& Ctx, GcReferenceValidatorStats& Stats) = 0;
+};
+
/**
* @brief An interface to check if a set of Cids are referenced
*
@@ -240,6 +269,10 @@ public:
// Create 0-n GcReferenceChecker for this GcReferencer. Caller will manage lifetime of
// returned instances
virtual std::vector<GcReferenceChecker*> CreateReferenceCheckers(GcCtx& Ctx) = 0;
+
+ // Create 0-n GcReferenceValidator for this GcReferencer. Caller will manage lifetime of
+ // returned instances
+ virtual std::vector<GcReferenceValidator*> CreateReferenceValidators(GcCtx& Ctx) = 0;
};
/**
@@ -392,6 +425,7 @@ struct GcSchedulerConfig
uint16_t AttachmentPassCount = 1;
bool StoreCacheAttachmentMetaData = false;
bool StoreProjectAttachmentMetaData = false;
+ bool EnableValidation = true;
};
struct GcSchedulerState
@@ -471,8 +505,9 @@ public:
std::optional<bool> SingleThreaded;
std::optional<IoHash> AttachmentRangeMin;
std::optional<IoHash> AttachmentRangeMax;
- std::optional<bool> StoreCacheAttachmentMetaData = false;
- std::optional<bool> StoreProjectAttachmentMetaData = false;
+ std::optional<bool> StoreCacheAttachmentMetaData;
+ std::optional<bool> StoreProjectAttachmentMetaData;
+ std::optional<bool> EnableValidation;
};
bool TriggerGc(const TriggerGcParams& Params);
@@ -504,6 +539,7 @@ private:
const IoHash& AttachmentRangeMax,
bool StoreCacheAttachmentMetaData,
bool StoreProjectAttachmentMetaData,
+ bool EnableValidation,
bool SilenceErrors);
void ScrubStorage(bool DoDelete, bool SkipCid, std::chrono::seconds TimeSlice);
LoggerRef Log() { return m_Log; }