aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore/gc.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2024-09-23 19:19:40 +0200
committerGitHub Enterprise <[email protected]>2024-09-23 19:19:40 +0200
commitbc9e590727211d803cce7be84c1cbc026179b841 (patch)
tree96d89b59cdced94ce1d795cd941d35d26f6c5e88 /src/zenstore/gc.cpp
parentmade fmt formatter format function const (#162) (diff)
downloadzen-bc9e590727211d803cce7be84c1cbc026179b841.tar.xz
zen-bc9e590727211d803cce7be84c1cbc026179b841.zip
gc unused refactor (#165)
* optimize IoHash and OId comparisions * refactor filtering of unused references * add attachment filtering to gc
Diffstat (limited to 'src/zenstore/gc.cpp')
-rw-r--r--src/zenstore/gc.cpp258
1 files changed, 242 insertions, 16 deletions
diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp
index 28bdd2f42..904619222 100644
--- a/src/zenstore/gc.cpp
+++ b/src/zenstore/gc.cpp
@@ -571,6 +571,95 @@ Sum(GcResult& Stat, bool Cancelled = false)
return Stat;
}
+bool
+FilterReferences(GcCtx& Ctx, std::vector<IoHash>& InOutReferences)
+{
+ if (InOutReferences.empty())
+ {
+ return false;
+ }
+ if (Ctx.Settings.AttachmentRangeMax != IoHash::Max || Ctx.Settings.AttachmentRangeMin != IoHash::Zero)
+ {
+ size_t TotalCount = InOutReferences.size();
+ std::erase_if(InOutReferences, [&Ctx](const IoHash& Key) {
+ return ((Ctx.Settings.AttachmentRangeMax < Key) || (Key < Ctx.Settings.AttachmentRangeMin));
+ });
+ size_t RemovedCount = TotalCount - InOutReferences.size();
+ ZEN_INFO("Skipped GC for {}% of references ({} out of {}) due to attachment filtering with range {} to {}",
+ (100 * RemovedCount) / TotalCount,
+ RemovedCount,
+ TotalCount,
+ Ctx.Settings.AttachmentRangeMin,
+ Ctx.Settings.AttachmentRangeMax);
+ }
+ if (InOutReferences.empty())
+ {
+ return false;
+ }
+ std::sort(InOutReferences.begin(), InOutReferences.end());
+ auto NewEnd = std::unique(InOutReferences.begin(), InOutReferences.end());
+ InOutReferences.erase(NewEnd, InOutReferences.end());
+ return true;
+}
+
+std::span<IoHash>
+KeepUnusedReferences(std::span<const IoHash> SortedUsedReferences, std::span<IoHash> SortedReferences)
+{
+ if (SortedUsedReferences.empty())
+ {
+ return SortedReferences;
+ }
+ if (SortedReferences.empty())
+ {
+ return {};
+ }
+
+ const IoHash* ReferencesRead = &SortedReferences[0];
+ IoHash* ReferencesWrite = &SortedReferences[0];
+ const IoHash* ReferencesEnd = ReferencesRead + SortedReferences.size();
+
+ const IoHash* UsedReferencesRead = &SortedUsedReferences[0];
+ const IoHash* UsedReferencesReadEnd = UsedReferencesRead + SortedUsedReferences.size();
+
+ while (ReferencesRead != ReferencesEnd && UsedReferencesRead != UsedReferencesReadEnd)
+ {
+ const IoHash& Reference = *ReferencesRead;
+ const IoHash& UsedReference = *UsedReferencesRead;
+ if (Reference == UsedReference)
+ {
+ // Skip it
+ ReferencesRead++;
+ UsedReferencesRead++;
+ }
+ else if (Reference < UsedReference)
+ {
+ // Keep it
+ if (ReferencesRead > ReferencesWrite)
+ {
+ *ReferencesWrite = Reference;
+ }
+ ReferencesWrite++;
+ ReferencesRead++;
+ }
+ else
+ {
+ // Skip it
+ UsedReferencesRead++;
+ }
+ }
+
+ size_t Remaining = std::distance(ReferencesRead, ReferencesEnd);
+ if (Remaining > 0)
+ {
+ if (ReferencesRead != ReferencesWrite)
+ {
+ memcpy(ReferencesWrite, ReferencesRead, sizeof(IoHash::Hash) * Remaining);
+ }
+ ReferencesWrite += Remaining;
+ }
+ return SortedReferences.subspan(0, (size_t)std::distance(&SortedReferences[0], ReferencesWrite));
+}
+
void
GcManager::AddGcReferencer(GcReferencer& Referencer)
{
@@ -987,18 +1076,19 @@ GcManager::CollectGarbage(const GcSettings& Settings)
return Sum(Result, true);
}
{
- const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::vector<IoHash> {
- HashSet UnusedCids(References.begin(), References.end());
+ const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::span<IoHash> {
+ std::span<IoHash> UnusedCids(References);
+ ZEN_ASSERT(UnusedCids.empty() || UnusedCids[0] != IoHash::Zero);
for (const auto& It : ReferenceCheckers)
{
GcReferenceChecker* ReferenceChecker = It.first.get();
- ReferenceChecker->RemoveUsedReferencesFromSet(Ctx, UnusedCids);
+ UnusedCids = ReferenceChecker->GetUnusedReferences(Ctx, UnusedCids);
if (UnusedCids.empty())
{
return {};
}
}
- return std::vector<IoHash>(UnusedCids.begin(), UnusedCids.end());
+ return UnusedCids;
};
// checking all Cids agains references in cache
@@ -1768,6 +1858,8 @@ GcScheduler::SchedulerThread()
uint32_t CompactBlockUsageThresholdPercent = m_Config.CompactBlockUsageThresholdPercent;
bool Verbose = m_Config.Verbose;
bool SingleThreaded = m_Config.SingleThreaded;
+ IoHash AttachmentRangeMin = IoHash::Zero;
+ IoHash AttachmentRangeMax = IoHash::Max;
bool DiskSpaceGCTriggered = false;
bool TimeBasedGCTriggered = false;
@@ -1804,9 +1896,11 @@ GcScheduler::SchedulerThread()
UseGCVersion = TriggerParams.ForceGCVersion.value_or(UseGCVersion);
CompactBlockUsageThresholdPercent =
TriggerParams.CompactBlockUsageThresholdPercent.value_or(CompactBlockUsageThresholdPercent);
- Verbose = TriggerParams.Verbose.value_or(Verbose);
- SingleThreaded = TriggerParams.SingleThreaded.value_or(SingleThreaded);
- DoGc = true;
+ Verbose = TriggerParams.Verbose.value_or(Verbose);
+ SingleThreaded = TriggerParams.SingleThreaded.value_or(SingleThreaded);
+ AttachmentRangeMin = TriggerParams.AttachmentRangeMin;
+ AttachmentRangeMax = TriggerParams.AttachmentRangeMax;
+ DoGc = true;
}
if (m_TriggerScrubParams)
@@ -2025,6 +2119,8 @@ GcScheduler::SchedulerThread()
CompactBlockUsageThresholdPercent,
Verbose,
SingleThreaded,
+ AttachmentRangeMin,
+ AttachmentRangeMax,
SilenceErrors);
if (!GcSuccess)
{
@@ -2124,6 +2220,8 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
uint32_t CompactBlockUsageThresholdPercent,
bool Verbose,
bool SingleThreaded,
+ const IoHash& AttachmentRangeMin,
+ const IoHash& AttachmentRangeMax,
bool SilenceErrors)
{
ZEN_TRACE_CPU("GcScheduler::CollectGarbage");
@@ -2193,15 +2291,18 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
break;
case GcVersion::kV2:
{
- const GcSettings Settings = {.CacheExpireTime = CacheExpireTime,
- .ProjectStoreExpireTime = ProjectStoreExpireTime,
- .CollectSmallObjects = CollectSmallObjects,
- .IsDeleteMode = Delete,
- .SkipCidDelete = SkipCid,
- .Verbose = Verbose,
- .SingleThread = SingleThreaded,
- .CompactBlockUsageThresholdPercent = CompactBlockUsageThresholdPercent,
- .DiskReservePath = m_Config.RootDirectory / "reserve.gc"};
+ const GcSettings Settings = {.CacheExpireTime = CacheExpireTime,
+ .ProjectStoreExpireTime = ProjectStoreExpireTime,
+ .CollectSmallObjects = CollectSmallObjects,
+ .IsDeleteMode = Delete,
+ .SkipCidDelete = SkipCid,
+ .Verbose = Verbose,
+ .SingleThread = SingleThreaded,
+ .CompactBlockUsageThresholdPercent = CompactBlockUsageThresholdPercent,
+ .DiskReservePath = m_Config.RootDirectory / "reserve.gc",
+ .AttachmentRangeMin = AttachmentRangeMin,
+ .AttachmentRangeMax = AttachmentRangeMax};
+
GcClock::TimePoint GcStartTime = GcClock::Now();
GcResult Result = m_GcManager.CollectGarbage(Settings);
@@ -2815,6 +2916,131 @@ TEST_CASE("scrub.basic")
CHECK(!CidStore.ContainsChunk(CompressedChunk.DecodeRawHash()));
}
+TEST_CASE("gc.keepunusedreferences")
+{
+ const IoHash Hashes[] = {IoHash::FromHexString("177030568fdd461bf4fe5ddbf4d463e514e8178e"),
+ IoHash::FromHexString("372d795bb907a15cab15ab3917854bfef7e7af2c"),
+ IoHash::FromHexString("75ab3917854bfef7e72d795bb907a15cab1af2c3"),
+ IoHash::FromHexString("ab3917854bfef7e7af2c372d795bb907a15cab15"),
+ IoHash::FromHexString("d1df59fcab06793a5f2c372d795bb907a15cab15")};
+ {
+ std::vector<IoHash> UsedReferences;
+ std::vector<IoHash> References;
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.empty());
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::vector<IoHash> References;
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.empty());
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 5);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.empty());
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[0], Hashes[2], Hashes[4]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 2);
+ CHECK(UnusedReferences[0] == Hashes[1]);
+ CHECK(UnusedReferences[1] == Hashes[3]);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[2], Hashes[3], Hashes[4]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 2);
+ CHECK(UnusedReferences[0] == Hashes[0]);
+ CHECK(UnusedReferences[1] == Hashes[1]);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[0], Hashes[1], Hashes[2]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 2);
+ CHECK(UnusedReferences[0] == Hashes[3]);
+ CHECK(UnusedReferences[1] == Hashes[4]);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[0], Hashes[1], Hashes[2], Hashes[4]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 1);
+ CHECK(UnusedReferences[0] == Hashes[3]);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[1], Hashes[3]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 3);
+ CHECK(UnusedReferences[0] == Hashes[0]);
+ CHECK(UnusedReferences[1] == Hashes[2]);
+ CHECK(UnusedReferences[2] == Hashes[4]);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[0]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 4);
+ CHECK(UnusedReferences[0] == Hashes[1]);
+ CHECK(UnusedReferences[1] == Hashes[2]);
+ CHECK(UnusedReferences[2] == Hashes[3]);
+ CHECK(UnusedReferences[3] == Hashes[4]);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[1]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 4);
+ CHECK(UnusedReferences[0] == Hashes[0]);
+ CHECK(UnusedReferences[1] == Hashes[2]);
+ CHECK(UnusedReferences[2] == Hashes[3]);
+ CHECK(UnusedReferences[3] == Hashes[4]);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[3]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 4);
+ CHECK(UnusedReferences[0] == Hashes[0]);
+ CHECK(UnusedReferences[1] == Hashes[1]);
+ CHECK(UnusedReferences[2] == Hashes[2]);
+ CHECK(UnusedReferences[3] == Hashes[4]);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[4]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 4);
+ CHECK(UnusedReferences[0] == Hashes[0]);
+ CHECK(UnusedReferences[1] == Hashes[1]);
+ CHECK(UnusedReferences[2] == Hashes[2]);
+ CHECK(UnusedReferences[3] == Hashes[3]);
+ }
+}
+
#endif
void