aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2024-09-23 19:19:40 +0200
committerGitHub Enterprise <[email protected]>2024-09-23 19:19:40 +0200
commitbc9e590727211d803cce7be84c1cbc026179b841 (patch)
tree96d89b59cdced94ce1d795cd941d35d26f6c5e88 /src/zenstore
parentmade fmt formatter format function const (#162) (diff)
downloadzen-bc9e590727211d803cce7be84c1cbc026179b841.tar.xz
zen-bc9e590727211d803cce7be84c1cbc026179b841.zip
gc unused refactor (#165)
* optimize IoHash and OId comparisions * refactor filtering of unused references * add attachment filtering to gc
Diffstat (limited to 'src/zenstore')
-rw-r--r--src/zenstore/cache/cachedisklayer.cpp54
-rw-r--r--src/zenstore/cache/structuredcachestore.cpp24
-rw-r--r--src/zenstore/compactcas.cpp14
-rw-r--r--src/zenstore/filecas.cpp14
-rw-r--r--src/zenstore/gc.cpp258
-rw-r--r--src/zenstore/include/zenstore/cache/cachedisklayer.h24
-rw-r--r--src/zenstore/include/zenstore/gc.h21
7 files changed, 324 insertions, 85 deletions
diff --git a/src/zenstore/cache/cachedisklayer.cpp b/src/zenstore/cache/cachedisklayer.cpp
index 63f6d708a..417b63fb4 100644
--- a/src/zenstore/cache/cachedisklayer.cpp
+++ b/src/zenstore/cache/cachedisklayer.cpp
@@ -1262,7 +1262,9 @@ ZenCacheDiskLayer::CacheBucket::EndPutBatch(PutBatchHandle* Batch) noexcept
}
if (m_TrackedReferences && HashKeyAndReferences.size() > 1)
{
- m_TrackedReferences->insert(HashKeyAndReferences.begin() + 1, HashKeyAndReferences.end());
+ m_TrackedReferences->insert(m_TrackedReferences->end(),
+ HashKeyAndReferences.begin() + 1,
+ HashKeyAndReferences.end());
}
if (auto It = m_Index.find(HashKey); It != m_Index.end())
{
@@ -2963,7 +2965,7 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c
}
if (m_TrackedReferences)
{
- m_TrackedReferences->insert(References.begin(), References.end());
+ m_TrackedReferences->insert(m_TrackedReferences->end(), References.begin(), References.end());
}
PayloadIndex EntryIndex = {};
@@ -3130,7 +3132,7 @@ ZenCacheDiskLayer::CacheBucket::PutInlineCacheValue(const IoHash& HashKey,
}
if (m_TrackedReferences)
{
- m_TrackedReferences->insert(References.begin(), References.end());
+ m_TrackedReferences->insert(m_TrackedReferences->end(), References.begin(), References.end());
}
if (auto It = m_Index.find(HashKey); It != m_Index.end())
{
@@ -3782,17 +3784,19 @@ public:
}
ZEN_INFO("GCV2: cachebucket [PRECACHE] '{}': found {} references in {}",
m_CacheBucket.m_BucketDir,
- m_References.size(),
+ m_PrecachedReferences.size(),
NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
});
- m_CacheBucket.m_IndexLock.WithExclusiveLock([&]() { m_CacheBucket.m_TrackedReferences = std::make_unique<HashSet>(); });
+ m_CacheBucket.m_IndexLock.WithExclusiveLock([&]() { m_CacheBucket.m_TrackedReferences = std::make_unique<std::vector<IoHash>>(); });
- bool Continue = m_CacheBucket.GetReferences(Ctx, /*StateIsAlreadyLocked*/ false, m_References);
+ bool Continue = m_CacheBucket.GetReferences(Ctx, /*StateIsAlreadyLocked*/ false, m_PrecachedReferences);
if (!Continue)
{
m_CacheBucket.m_IndexLock.WithExclusiveLock([&]() { m_CacheBucket.m_TrackedReferences.reset(); });
+ return;
}
+ FilterReferences(Ctx, m_PrecachedReferences);
}
virtual void UpdateLockedState(GcCtx& Ctx) override
@@ -3809,32 +3813,32 @@ public:
}
ZEN_INFO("GCV2: cachebucket [LOCKSTATE] '{}': found {} references in {}",
m_CacheBucket.m_BucketDir,
- m_References.size(),
+ m_PrecachedReferences.size() + m_AddedReferences.size(),
NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
});
if (Ctx.IsCancelledFlag.load())
{
- m_References = {};
+ m_PrecachedReferences = {};
m_CacheBucket.m_TrackedReferences.reset();
return;
}
ZEN_ASSERT(m_CacheBucket.m_TrackedReferences);
- HashSet& AddedReferences(*m_CacheBucket.m_TrackedReferences);
- m_References.reserve(m_References.size() + AddedReferences.size());
- m_References.insert(m_References.end(), AddedReferences.begin(), AddedReferences.end());
- AddedReferences = {};
+ m_AddedReferences = std::move(*m_CacheBucket.m_TrackedReferences);
+ FilterReferences(Ctx, m_AddedReferences);
}
- virtual void RemoveUsedReferencesFromSet(GcCtx& Ctx, HashSet& IoCids) override
+ virtual std::span<IoHash> GetUnusedReferences(GcCtx& Ctx, std::span<IoHash> IoCids) override
{
- ZEN_TRACE_CPU("Z$::Bucket::RemoveUsedReferencesFromSet");
+ ZEN_TRACE_CPU("Z$::Bucket::GetUnusedReferences");
auto Log = [&Ctx]() { return Ctx.Logger; };
- size_t InitialCount = IoCids.size();
+ const size_t InitialCount = IoCids.size();
+ size_t UsedCount = InitialCount;
+
Stopwatch Timer;
const auto _ = MakeGuard([&] {
if (!Ctx.Settings.Verbose)
@@ -3843,24 +3847,20 @@ public:
}
ZEN_INFO("GCV2: cachebucket [FILTER REFERENCES] '{}': filtered out {} used references out of {} in {}",
m_CacheBucket.m_BucketDir,
- InitialCount - IoCids.size(),
+ UsedCount,
InitialCount,
NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
});
- for (const IoHash& ReferenceHash : m_References)
- {
- if (IoCids.erase(ReferenceHash) == 1)
- {
- if (IoCids.empty())
- {
- return;
- }
- }
- }
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(m_PrecachedReferences, IoCids);
+ UnusedReferences = KeepUnusedReferences(m_AddedReferences, UnusedReferences);
+ UsedCount = IoCids.size() - UnusedReferences.size();
+ return UnusedReferences;
}
+
CacheBucket& m_CacheBucket;
- std::vector<IoHash> m_References;
+ std::vector<IoHash> m_PrecachedReferences;
+ std::vector<IoHash> m_AddedReferences;
};
std::vector<GcReferenceChecker*>
diff --git a/src/zenstore/cache/structuredcachestore.cpp b/src/zenstore/cache/structuredcachestore.cpp
index 7794d025f..9f1bcb41a 100644
--- a/src/zenstore/cache/structuredcachestore.cpp
+++ b/src/zenstore/cache/structuredcachestore.cpp
@@ -1189,15 +1189,18 @@ public:
break;
}
}
+ FilterReferences(Ctx, m_References);
}
- virtual void RemoveUsedReferencesFromSet(GcCtx& Ctx, HashSet& IoCids) override
+ virtual std::span<IoHash> GetUnusedReferences(GcCtx& Ctx, std::span<IoHash> IoCids) override
{
- ZEN_TRACE_CPU("Z$::RemoveUsedReferencesFromSet");
+ ZEN_TRACE_CPU("Z$::GetUnusedReferences");
auto Log = [&Ctx]() { return Ctx.Logger; };
- size_t InitialCount = IoCids.size();
+ const size_t InitialCount = IoCids.size();
+ size_t UsedCount = InitialCount;
+
Stopwatch Timer;
const auto _ = MakeGuard([&] {
if (!Ctx.Settings.Verbose)
@@ -1206,21 +1209,14 @@ public:
}
ZEN_INFO("GCV2: projectstore [FILTER REFERENCES] '{}': filtered out {} used references out of {} in {}",
"projectstore",
- InitialCount - IoCids.size(),
+ UsedCount,
InitialCount,
NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
});
- for (const IoHash& ReferenceHash : m_References)
- {
- if (IoCids.erase(ReferenceHash) == 1)
- {
- if (IoCids.empty())
- {
- return;
- }
- }
- }
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(m_References, IoCids);
+ UsedCount = IoCids.size() - UnusedReferences.size();
+ return UnusedReferences;
}
private:
diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp
index 15f80d4cf..e0a7900f1 100644
--- a/src/zenstore/compactcas.cpp
+++ b/src/zenstore/compactcas.cpp
@@ -867,9 +867,9 @@ public:
NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
});
- std::vector<IoHash> UnusedCids = GetUnusedReferences(m_Cids);
- Stats.CheckedCount = m_Cids.size();
- Stats.FoundCount = UnusedCids.size();
+ std::span<IoHash> UnusedCids = GetUnusedReferences(m_Cids);
+ Stats.CheckedCount = m_Cids.size();
+ Stats.FoundCount = UnusedCids.size();
if (UnusedCids.empty())
{
@@ -967,7 +967,11 @@ CasContainerStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats&)
CidsToCheck.push_back(It.first);
}
}
- return new CasContainerReferencePruner(*this, std::move(CidsToCheck));
+ if (FilterReferences(Ctx, CidsToCheck))
+ {
+ return new CasContainerReferencePruner(*this, std::move(CidsToCheck));
+ }
+ return nullptr;
}
void
@@ -2018,7 +2022,7 @@ TEST_CASE_TEMPLATE("compactcas.threadedinsert", GCV2, FalseType, TrueType)
HashKeySet Deleted;
GcStats Stats;
GcStoreCompactor* Compactor =
- Pruner->RemoveUnreferencedData(Ctx, Stats, [&](std::span<IoHash> References) -> std::vector<IoHash> {
+ Pruner->RemoveUnreferencedData(Ctx, Stats, [&](std::span<IoHash> References) -> std::span<IoHash> {
std::vector<IoHash> Unreferenced;
HashKeySet Retain;
Retain.AddHashesToSet(KeepHashes);
diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp
index 733140e50..7bd17ee88 100644
--- a/src/zenstore/filecas.cpp
+++ b/src/zenstore/filecas.cpp
@@ -1653,9 +1653,9 @@ public:
NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
});
- std::vector<IoHash> UnusedCids = GetUnusedReferences(m_Cids);
- Stats.CheckedCount = m_Cids.size();
- Stats.FoundCount = UnusedCids.size();
+ std::span<IoHash> UnusedCids = GetUnusedReferences(m_Cids);
+ Stats.CheckedCount = m_Cids.size();
+ Stats.FoundCount = UnusedCids.size();
if (UnusedCids.empty())
{
// Nothing to collect
@@ -1699,7 +1699,7 @@ public:
}
}
- return new FileCasStoreCompactor(m_FileCasStrategy, std::move(UnusedCids));
+ return new FileCasStoreCompactor(m_FileCasStrategy, std::vector<IoHash>(UnusedCids.begin(), UnusedCids.end()));
}
private:
@@ -1745,7 +1745,11 @@ FileCasStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats&)
CidsToCheck.push_back(It.first);
}
}
- return new FileCasReferencePruner(*this, std::move(CidsToCheck));
+ if (FilterReferences(Ctx, CidsToCheck))
+ {
+ return new FileCasReferencePruner(*this, std::move(CidsToCheck));
+ }
+ return nullptr;
}
//////////////////////////////////////////////////////////////////////////
diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp
index 28bdd2f42..904619222 100644
--- a/src/zenstore/gc.cpp
+++ b/src/zenstore/gc.cpp
@@ -571,6 +571,95 @@ Sum(GcResult& Stat, bool Cancelled = false)
return Stat;
}
+bool
+FilterReferences(GcCtx& Ctx, std::vector<IoHash>& InOutReferences)
+{
+ if (InOutReferences.empty())
+ {
+ return false;
+ }
+ if (Ctx.Settings.AttachmentRangeMax != IoHash::Max || Ctx.Settings.AttachmentRangeMin != IoHash::Zero)
+ {
+ size_t TotalCount = InOutReferences.size();
+ std::erase_if(InOutReferences, [&Ctx](const IoHash& Key) {
+ return ((Ctx.Settings.AttachmentRangeMax < Key) || (Key < Ctx.Settings.AttachmentRangeMin));
+ });
+ size_t RemovedCount = TotalCount - InOutReferences.size();
+ ZEN_INFO("Skipped GC for {}% of references ({} out of {}) due to attachment filtering with range {} to {}",
+ (100 * RemovedCount) / TotalCount,
+ RemovedCount,
+ TotalCount,
+ Ctx.Settings.AttachmentRangeMin,
+ Ctx.Settings.AttachmentRangeMax);
+ }
+ if (InOutReferences.empty())
+ {
+ return false;
+ }
+ std::sort(InOutReferences.begin(), InOutReferences.end());
+ auto NewEnd = std::unique(InOutReferences.begin(), InOutReferences.end());
+ InOutReferences.erase(NewEnd, InOutReferences.end());
+ return true;
+}
+
+std::span<IoHash>
+KeepUnusedReferences(std::span<const IoHash> SortedUsedReferences, std::span<IoHash> SortedReferences)
+{
+ if (SortedUsedReferences.empty())
+ {
+ return SortedReferences;
+ }
+ if (SortedReferences.empty())
+ {
+ return {};
+ }
+
+ const IoHash* ReferencesRead = &SortedReferences[0];
+ IoHash* ReferencesWrite = &SortedReferences[0];
+ const IoHash* ReferencesEnd = ReferencesRead + SortedReferences.size();
+
+ const IoHash* UsedReferencesRead = &SortedUsedReferences[0];
+ const IoHash* UsedReferencesReadEnd = UsedReferencesRead + SortedUsedReferences.size();
+
+ while (ReferencesRead != ReferencesEnd && UsedReferencesRead != UsedReferencesReadEnd)
+ {
+ const IoHash& Reference = *ReferencesRead;
+ const IoHash& UsedReference = *UsedReferencesRead;
+ if (Reference == UsedReference)
+ {
+ // Skip it
+ ReferencesRead++;
+ UsedReferencesRead++;
+ }
+ else if (Reference < UsedReference)
+ {
+ // Keep it
+ if (ReferencesRead > ReferencesWrite)
+ {
+ *ReferencesWrite = Reference;
+ }
+ ReferencesWrite++;
+ ReferencesRead++;
+ }
+ else
+ {
+ // Skip it
+ UsedReferencesRead++;
+ }
+ }
+
+ size_t Remaining = std::distance(ReferencesRead, ReferencesEnd);
+ if (Remaining > 0)
+ {
+ if (ReferencesRead != ReferencesWrite)
+ {
+ memcpy(ReferencesWrite, ReferencesRead, sizeof(IoHash::Hash) * Remaining);
+ }
+ ReferencesWrite += Remaining;
+ }
+ return SortedReferences.subspan(0, (size_t)std::distance(&SortedReferences[0], ReferencesWrite));
+}
+
void
GcManager::AddGcReferencer(GcReferencer& Referencer)
{
@@ -987,18 +1076,19 @@ GcManager::CollectGarbage(const GcSettings& Settings)
return Sum(Result, true);
}
{
- const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::vector<IoHash> {
- HashSet UnusedCids(References.begin(), References.end());
+ const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::span<IoHash> {
+ std::span<IoHash> UnusedCids(References);
+ ZEN_ASSERT(UnusedCids.empty() || UnusedCids[0] != IoHash::Zero);
for (const auto& It : ReferenceCheckers)
{
GcReferenceChecker* ReferenceChecker = It.first.get();
- ReferenceChecker->RemoveUsedReferencesFromSet(Ctx, UnusedCids);
+ UnusedCids = ReferenceChecker->GetUnusedReferences(Ctx, UnusedCids);
if (UnusedCids.empty())
{
return {};
}
}
- return std::vector<IoHash>(UnusedCids.begin(), UnusedCids.end());
+ return UnusedCids;
};
// checking all Cids agains references in cache
@@ -1768,6 +1858,8 @@ GcScheduler::SchedulerThread()
uint32_t CompactBlockUsageThresholdPercent = m_Config.CompactBlockUsageThresholdPercent;
bool Verbose = m_Config.Verbose;
bool SingleThreaded = m_Config.SingleThreaded;
+ IoHash AttachmentRangeMin = IoHash::Zero;
+ IoHash AttachmentRangeMax = IoHash::Max;
bool DiskSpaceGCTriggered = false;
bool TimeBasedGCTriggered = false;
@@ -1804,9 +1896,11 @@ GcScheduler::SchedulerThread()
UseGCVersion = TriggerParams.ForceGCVersion.value_or(UseGCVersion);
CompactBlockUsageThresholdPercent =
TriggerParams.CompactBlockUsageThresholdPercent.value_or(CompactBlockUsageThresholdPercent);
- Verbose = TriggerParams.Verbose.value_or(Verbose);
- SingleThreaded = TriggerParams.SingleThreaded.value_or(SingleThreaded);
- DoGc = true;
+ Verbose = TriggerParams.Verbose.value_or(Verbose);
+ SingleThreaded = TriggerParams.SingleThreaded.value_or(SingleThreaded);
+ AttachmentRangeMin = TriggerParams.AttachmentRangeMin;
+ AttachmentRangeMax = TriggerParams.AttachmentRangeMax;
+ DoGc = true;
}
if (m_TriggerScrubParams)
@@ -2025,6 +2119,8 @@ GcScheduler::SchedulerThread()
CompactBlockUsageThresholdPercent,
Verbose,
SingleThreaded,
+ AttachmentRangeMin,
+ AttachmentRangeMax,
SilenceErrors);
if (!GcSuccess)
{
@@ -2124,6 +2220,8 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
uint32_t CompactBlockUsageThresholdPercent,
bool Verbose,
bool SingleThreaded,
+ const IoHash& AttachmentRangeMin,
+ const IoHash& AttachmentRangeMax,
bool SilenceErrors)
{
ZEN_TRACE_CPU("GcScheduler::CollectGarbage");
@@ -2193,15 +2291,18 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
break;
case GcVersion::kV2:
{
- const GcSettings Settings = {.CacheExpireTime = CacheExpireTime,
- .ProjectStoreExpireTime = ProjectStoreExpireTime,
- .CollectSmallObjects = CollectSmallObjects,
- .IsDeleteMode = Delete,
- .SkipCidDelete = SkipCid,
- .Verbose = Verbose,
- .SingleThread = SingleThreaded,
- .CompactBlockUsageThresholdPercent = CompactBlockUsageThresholdPercent,
- .DiskReservePath = m_Config.RootDirectory / "reserve.gc"};
+ const GcSettings Settings = {.CacheExpireTime = CacheExpireTime,
+ .ProjectStoreExpireTime = ProjectStoreExpireTime,
+ .CollectSmallObjects = CollectSmallObjects,
+ .IsDeleteMode = Delete,
+ .SkipCidDelete = SkipCid,
+ .Verbose = Verbose,
+ .SingleThread = SingleThreaded,
+ .CompactBlockUsageThresholdPercent = CompactBlockUsageThresholdPercent,
+ .DiskReservePath = m_Config.RootDirectory / "reserve.gc",
+ .AttachmentRangeMin = AttachmentRangeMin,
+ .AttachmentRangeMax = AttachmentRangeMax};
+
GcClock::TimePoint GcStartTime = GcClock::Now();
GcResult Result = m_GcManager.CollectGarbage(Settings);
@@ -2815,6 +2916,131 @@ TEST_CASE("scrub.basic")
CHECK(!CidStore.ContainsChunk(CompressedChunk.DecodeRawHash()));
}
+TEST_CASE("gc.keepunusedreferences")
+{
+ const IoHash Hashes[] = {IoHash::FromHexString("177030568fdd461bf4fe5ddbf4d463e514e8178e"),
+ IoHash::FromHexString("372d795bb907a15cab15ab3917854bfef7e7af2c"),
+ IoHash::FromHexString("75ab3917854bfef7e72d795bb907a15cab1af2c3"),
+ IoHash::FromHexString("ab3917854bfef7e7af2c372d795bb907a15cab15"),
+ IoHash::FromHexString("d1df59fcab06793a5f2c372d795bb907a15cab15")};
+ {
+ std::vector<IoHash> UsedReferences;
+ std::vector<IoHash> References;
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.empty());
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::vector<IoHash> References;
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.empty());
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 5);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.empty());
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[0], Hashes[2], Hashes[4]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 2);
+ CHECK(UnusedReferences[0] == Hashes[1]);
+ CHECK(UnusedReferences[1] == Hashes[3]);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[2], Hashes[3], Hashes[4]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 2);
+ CHECK(UnusedReferences[0] == Hashes[0]);
+ CHECK(UnusedReferences[1] == Hashes[1]);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[0], Hashes[1], Hashes[2]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 2);
+ CHECK(UnusedReferences[0] == Hashes[3]);
+ CHECK(UnusedReferences[1] == Hashes[4]);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[0], Hashes[1], Hashes[2], Hashes[4]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 1);
+ CHECK(UnusedReferences[0] == Hashes[3]);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[1], Hashes[3]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 3);
+ CHECK(UnusedReferences[0] == Hashes[0]);
+ CHECK(UnusedReferences[1] == Hashes[2]);
+ CHECK(UnusedReferences[2] == Hashes[4]);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[0]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 4);
+ CHECK(UnusedReferences[0] == Hashes[1]);
+ CHECK(UnusedReferences[1] == Hashes[2]);
+ CHECK(UnusedReferences[2] == Hashes[3]);
+ CHECK(UnusedReferences[3] == Hashes[4]);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[1]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 4);
+ CHECK(UnusedReferences[0] == Hashes[0]);
+ CHECK(UnusedReferences[1] == Hashes[2]);
+ CHECK(UnusedReferences[2] == Hashes[3]);
+ CHECK(UnusedReferences[3] == Hashes[4]);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[3]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 4);
+ CHECK(UnusedReferences[0] == Hashes[0]);
+ CHECK(UnusedReferences[1] == Hashes[1]);
+ CHECK(UnusedReferences[2] == Hashes[2]);
+ CHECK(UnusedReferences[3] == Hashes[4]);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[4]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 4);
+ CHECK(UnusedReferences[0] == Hashes[0]);
+ CHECK(UnusedReferences[1] == Hashes[1]);
+ CHECK(UnusedReferences[2] == Hashes[2]);
+ CHECK(UnusedReferences[3] == Hashes[3]);
+ }
+}
+
#endif
void
diff --git a/src/zenstore/include/zenstore/cache/cachedisklayer.h b/src/zenstore/include/zenstore/cache/cachedisklayer.h
index a735893a1..8e31d3222 100644
--- a/src/zenstore/include/zenstore/cache/cachedisklayer.h
+++ b/src/zenstore/include/zenstore/cache/cachedisklayer.h
@@ -343,18 +343,18 @@ public:
metrics::RequestStats m_PutOps;
metrics::RequestStats m_GetOps;
- mutable RwLock m_IndexLock;
- IndexMap m_Index;
- std::vector<AccessTime> m_AccessTimes;
- std::vector<BucketPayload> m_Payloads;
- std::vector<BucketMetaData> m_MetaDatas;
- std::vector<MetaDataIndex> m_FreeMetaDatas;
- std::vector<MemCacheData> m_MemCachedPayloads;
- std::vector<MemCachedIndex> m_FreeMemCachedPayloads;
- std::unique_ptr<HashSet> m_TrackedCacheKeys;
- std::unique_ptr<HashSet> m_TrackedReferences;
- std::atomic_uint64_t m_StandaloneSize{};
- std::atomic_uint64_t m_MemCachedSize{};
+ mutable RwLock m_IndexLock;
+ IndexMap m_Index;
+ std::vector<AccessTime> m_AccessTimes;
+ std::vector<BucketPayload> m_Payloads;
+ std::vector<BucketMetaData> m_MetaDatas;
+ std::vector<MetaDataIndex> m_FreeMetaDatas;
+ std::vector<MemCacheData> m_MemCachedPayloads;
+ std::vector<MemCachedIndex> m_FreeMemCachedPayloads;
+ std::unique_ptr<HashSet> m_TrackedCacheKeys;
+ std::unique_ptr<std::vector<IoHash>> m_TrackedReferences;
+ std::atomic_uint64_t m_StandaloneSize{};
+ std::atomic_uint64_t m_MemCachedSize{};
virtual std::string GetGcName(GcCtx& Ctx) override;
virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) override;
diff --git a/src/zenstore/include/zenstore/gc.h b/src/zenstore/include/zenstore/gc.h
index a8c5c0219..3f2f5448d 100644
--- a/src/zenstore/include/zenstore/gc.h
+++ b/src/zenstore/include/zenstore/gc.h
@@ -64,6 +64,8 @@ struct GcSettings
90; // 0 = compact only empty eligible blocks, 100 = compact all non-full eligible blocks, 1-99 = compact eligible blocks with less
// usage than CompactBlockUsageThresholdPercent
std::filesystem::path DiskReservePath;
+ IoHash AttachmentRangeMin = IoHash::Zero;
+ IoHash AttachmentRangeMax = IoHash::Max;
};
struct GcCompactStoreStats
@@ -166,10 +168,10 @@ public:
/**
* @brief An interface to check if a set of Cids are referenced
*
- * Instance will be deleted after RemoveUsedReferencesFromSet has been called 0-n times.
+ * Instance will be deleted after GetUnusedReferences has been called 0-n times.
*
* During construction of the GcReferenceChecker the world is not stopped and this is a good
- * place to do caching to be able to execute LockState and RemoveUsedReferencesFromSet quickly.
+ * place to do caching to be able to execute LockState and GetUnusedReferences quickly.
*/
class GcReferenceChecker
{
@@ -188,16 +190,19 @@ public:
// *IMPORTANT* Do *not* take any locks (shared or exclusive) in this code.
// This is because we need to acquire the locks in an ordered manner and not end up in a deadlock due to other code
// trying to get exclusive locks halfway through our execution.
- // Called once before any calls to RemoveUsedReferencesFromSet.
+ // Called once before any calls to GetUnusedReferences.
// The implementation should be as fast as possible as UpdateLockedState is part of a stop the world (from changes)
// until all instances of GcReferenceChecker UpdateLockedState are completed
virtual void UpdateLockedState(GcCtx& Ctx) = 0;
// Go through IoCids and see which ones are referenced. If it is the reference must be removed from IoCids
// This function should use pre-cached information on what is referenced as we are in stop the world mode
- virtual void RemoveUsedReferencesFromSet(GcCtx& Ctx, HashSet& IoCids) = 0;
+ virtual std::span<IoHash> GetUnusedReferences(GcCtx& Ctx, std::span<IoHash> IoCids) = 0;
};
+std::span<IoHash> KeepUnusedReferences(std::span<const IoHash> SortedUsedReferences, std::span<IoHash> SortedReferences);
+bool FilterReferences(GcCtx& Ctx, std::vector<IoHash>& InOutReferences);
+
/**
* @brief An interface to implement a lock for Stop The World (from writing new data)
*
@@ -209,7 +214,7 @@ public:
virtual ~GcReferenceLocker() = default;
// Take all the locks needed to execute UpdateLockedState for the all the GcReferenceChecker in your domain
- // Once all the GcReferenceChecker has executed UpdateLockedState and RemoveUsedReferencesFromSet for all
+ // Once all the GcReferenceChecker has executed UpdateLockedState and GetUnusedReferences for all
// domains has completed, the locks will be disposed and writes are allowed once again
virtual std::vector<RwLock::SharedLockScope> LockState(GcCtx& Ctx) = 0;
};
@@ -245,7 +250,7 @@ public:
virtual std::string GetGcName(GcCtx& Ctx) = 0;
- typedef std::function<std::vector<IoHash>(std::span<IoHash> References)> GetUnusedReferencesFunc;
+ typedef std::function<std::span<IoHash>(std::span<IoHash> References)> GetUnusedReferencesFunc;
// Check a set of references to see if they are in use.
// Use the GetUnusedReferences input function to check if references are used and update any pointers
@@ -520,6 +525,8 @@ public:
std::optional<uint32_t> CompactBlockUsageThresholdPercent;
std::optional<bool> Verbose;
std::optional<bool> SingleThreaded;
+ IoHash AttachmentRangeMin = IoHash::Zero;
+ IoHash AttachmentRangeMax = IoHash::Max;
};
bool TriggerGc(const TriggerGcParams& Params);
@@ -547,6 +554,8 @@ private:
uint32_t CompactBlockUsageThresholdPercent,
bool Verbose,
bool SingleThreaded,
+ const IoHash& AttachmentRangeMin,
+ const IoHash& AttachmentRangeMax,
bool SilenceErrors);
void ScrubStorage(bool DoDelete, bool SkipCid, std::chrono::seconds TimeSlice);
LoggerRef Log() { return m_Log; }