aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2024-09-23 19:19:40 +0200
committerGitHub Enterprise <[email protected]>2024-09-23 19:19:40 +0200
commitbc9e590727211d803cce7be84c1cbc026179b841 (patch)
tree96d89b59cdced94ce1d795cd941d35d26f6c5e88 /src
parentmade fmt formatter format function const (#162) (diff)
downloadzen-bc9e590727211d803cce7be84c1cbc026179b841.tar.xz
zen-bc9e590727211d803cce7be84c1cbc026179b841.zip
gc unused refactor (#165)
* optimize IoHash and OId comparisions * refactor filtering of unused references * add attachment filtering to gc
Diffstat (limited to 'src')
-rw-r--r--src/zen/cmds/admin_cmd.cpp44
-rw-r--r--src/zen/cmds/admin_cmd.h2
-rw-r--r--src/zencore/include/zencore/iohash.h8
-rw-r--r--src/zencore/include/zencore/memory.h35
-rw-r--r--src/zencore/include/zencore/uid.h4
-rw-r--r--src/zencore/iohash.cpp6
-rw-r--r--src/zenserver/admin/admin.cpp10
-rw-r--r--src/zenserver/projectstore/projectstore.cpp49
-rw-r--r--src/zenstore/cache/cachedisklayer.cpp54
-rw-r--r--src/zenstore/cache/structuredcachestore.cpp24
-rw-r--r--src/zenstore/compactcas.cpp14
-rw-r--r--src/zenstore/filecas.cpp14
-rw-r--r--src/zenstore/gc.cpp258
-rw-r--r--src/zenstore/include/zenstore/cache/cachedisklayer.h24
-rw-r--r--src/zenstore/include/zenstore/gc.h21
15 files changed, 448 insertions, 119 deletions
diff --git a/src/zen/cmds/admin_cmd.cpp b/src/zen/cmds/admin_cmd.cpp
index f5bd15ea2..dd0bf83de 100644
--- a/src/zen/cmds/admin_cmd.cpp
+++ b/src/zen/cmds/admin_cmd.cpp
@@ -119,6 +119,18 @@ GcCommand::GcCommand()
"Force GC to run single threaded",
cxxopts::value(m_SingleThreaded)->default_value("false"),
"<single-threaded>");
+ m_Options.add_option("",
+ "",
+ "reference-low",
+ "Reference filter lower limit - defaults to no limit",
+ cxxopts::value(m_ReferenceHashLow),
+ "<reflowlimit>");
+ m_Options.add_option("",
+ "",
+ "reference-high",
+ "Reference filter higher limit - defaults to no limit",
+ cxxopts::value(m_ReferenceHashHigh),
+ "<refhighlimit>");
}
GcCommand::~GcCommand()
@@ -170,6 +182,38 @@ GcCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
{
Params.Add({"compactblockthreshold", fmt::format("{}", m_CompactBlockThreshold)});
}
+ IoHash LowRef = IoHash::Zero;
+ if (!m_ReferenceHashLow.empty())
+ {
+ if (m_ReferenceHashLow.length() != IoHash::StringLength)
+ {
+ throw OptionParseException(fmt::format("reference-low must be a {} character hex string", IoHash::StringLength));
+ }
+ LowRef = IoHash::FromHexString(m_ReferenceHashLow);
+ }
+ IoHash HighRef = IoHash::Max;
+ if (!m_ReferenceHashHigh.empty())
+ {
+ if (m_ReferenceHashHigh.length() != IoHash::StringLength)
+ {
+ throw OptionParseException(fmt::format("reference-high must be a {} character hex string", IoHash::StringLength));
+ }
+ HighRef = IoHash::FromHexString(m_ReferenceHashHigh);
+ }
+
+ if (HighRef < LowRef)
+ {
+ throw OptionParseException(fmt::format("invalid reference range, reference-high must be higher value than reference-low"));
+ }
+ if (LowRef != IoHash::Zero)
+ {
+ Params.Add({"referencehashlow", LowRef.ToHexString()});
+ }
+ if (HighRef != IoHash::Max)
+ {
+ Params.Add({"referencehashhigh", HighRef.ToHexString()});
+ }
+
Params.Add({"verbose", m_Verbose ? "true" : "false"});
Params.Add({"singlethreaded", m_SingleThreaded ? "true" : "false"});
diff --git a/src/zen/cmds/admin_cmd.h b/src/zen/cmds/admin_cmd.h
index e26e7f4e7..f5dd33d32 100644
--- a/src/zen/cmds/admin_cmd.h
+++ b/src/zen/cmds/admin_cmd.h
@@ -51,6 +51,8 @@ private:
uint32_t m_CompactBlockThreshold = 90;
bool m_Verbose{false};
bool m_SingleThreaded{false};
+ std::string m_ReferenceHashLow;
+ std::string m_ReferenceHashHigh;
};
class GcStatusCommand : public StorageCommand
diff --git a/src/zencore/include/zencore/iohash.h b/src/zencore/include/zencore/iohash.h
index ff902399e..a8fc9e6c1 100644
--- a/src/zencore/include/zencore/iohash.h
+++ b/src/zencore/include/zencore/iohash.h
@@ -1,5 +1,4 @@
// Copyright Epic Games, Inc. All Rights Reserved.
-// Copyright Epic Games, Inc. All Rights Reserved.
#pragma once
@@ -55,10 +54,11 @@ struct IoHash
StringBuilderBase& ToHexString(StringBuilderBase& outBuilder) const;
std::string ToHexString() const;
- static const int StringLength = 40;
- typedef char String_t[StringLength + 1];
+ static constexpr int StringLength = 40;
+ typedef char String_t[StringLength + 1];
static const IoHash Zero; // Initialized to all zeros
+ static const IoHash Max; // Initialized to all ones
inline auto operator<=>(const IoHash& rhs) const = default;
inline bool operator==(const IoHash& rhs) const
@@ -75,7 +75,7 @@ struct IoHash
return LhsHash[0] != RhsHash[0] || LhsHash[1] != RhsHash[1] || LhsHash[2] != RhsHash[2] || LhsHash[3] != RhsHash[3] ||
LhsHash[4] != RhsHash[4];
}
- inline bool operator<(const IoHash& rhs) const { return memcmp(Hash, rhs.Hash, sizeof Hash) < 0; }
+ inline bool operator<(const IoHash& rhs) const { return MemCmpFixed<sizeof Hash, std::uint32_t>(Hash, rhs.Hash) < 0; }
struct Hasher
{
diff --git a/src/zencore/include/zencore/memory.h b/src/zencore/include/zencore/memory.h
index 6419252ff..7a893d3ab 100644
--- a/src/zencore/include/zencore/memory.h
+++ b/src/zencore/include/zencore/memory.h
@@ -402,6 +402,41 @@ MakeMutableMemoryView(void* Data, void* DataEnd)
return MutableMemoryView(Data, DataEnd);
}
+template<std::size_t SIZE>
+inline int
+MemCmpFixed(const void* a1, const void* a2)
+{
+ auto const s1 = reinterpret_cast<const unsigned char*>(a1);
+ auto const s2 = reinterpret_cast<const unsigned char*>(a2);
+ auto const diff = *s1 - *s2;
+ return diff ? diff : MemCmpFixed<SIZE - 1>(s1 + 1, s2 + 1);
+}
+
+template<>
+inline int
+MemCmpFixed<0>(const void*, const void*)
+{
+ return 0;
+}
+
+template<std::size_t SIZE, typename EQTYPE>
+inline int
+MemCmpFixed(const void* a1, const void* a2)
+{
+ ZEN_ASSERT_SLOW((uintptr_t(a1) & (sizeof(EQTYPE) - 1)) == 0);
+ ZEN_ASSERT_SLOW((uintptr_t(a2) & (sizeof(EQTYPE) - 1)) == 0);
+ auto const s1 = reinterpret_cast<const EQTYPE*>(a1);
+ auto const s2 = reinterpret_cast<const EQTYPE*>(a2);
+ return (*s1 != *s2) ? MemCmpFixed<sizeof(EQTYPE)>(s1, s2) : MemCmpFixed<SIZE - sizeof(EQTYPE), EQTYPE>(s1 + 1, s2 + 1);
+}
+
+template<>
+inline int
+MemCmpFixed<0, uint32_t>(const void*, const void*)
+{
+ return 0;
+}
+
void memory_forcelink(); // internal
} // namespace zen
diff --git a/src/zencore/include/zencore/uid.h b/src/zencore/include/zencore/uid.h
index f8b1ccf98..08a335392 100644
--- a/src/zencore/include/zencore/uid.h
+++ b/src/zencore/include/zencore/uid.h
@@ -2,6 +2,7 @@
#pragma once
+#include <zencore/memory.h>
#include <zencore/zencore.h>
#include <compare>
@@ -67,7 +68,8 @@ struct Oid
std::string ToString() const;
[[nodiscard]] static Oid FromMemory(const void* Ptr);
- auto operator<=>(const Oid& rhs) const = default;
+ auto operator<=>(const Oid& rhs) const = default;
+ inline bool operator<(const Oid& rhs) const { return MemCmpFixed<sizeof OidBits, std::uint32_t>(OidBits, rhs.OidBits) < 0; }
[[nodiscard]] inline explicit operator bool() const { return *this != Zero; }
static const Oid Zero; // Min (can be used to signify a "null" value, or for open range queries)
diff --git a/src/zencore/iohash.cpp b/src/zencore/iohash.cpp
index 1bf2c033d..8f3f8da26 100644
--- a/src/zencore/iohash.cpp
+++ b/src/zencore/iohash.cpp
@@ -12,7 +12,11 @@
namespace zen {
-const IoHash IoHash::Zero{}; // Initialized to all zeros
+static const uint8_t MaxData[20] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+
+const IoHash IoHash::Max = IoHash::MakeFrom(MaxData); // Initialized to all 0xff
+const IoHash IoHash::Zero{}; // Initialized to all zeros
IoHash
IoHash::HashBuffer(const void* data, size_t byteCount)
diff --git a/src/zenserver/admin/admin.cpp b/src/zenserver/admin/admin.cpp
index 1eeb5637a..cd336c715 100644
--- a/src/zenserver/admin/admin.cpp
+++ b/src/zenserver/admin/admin.cpp
@@ -416,6 +416,16 @@ HttpAdminService::HttpAdminService(GcScheduler& Scheduler,
GcParams.SingleThreaded = Param == "true"sv;
}
+ if (auto Param = Params.GetValue("referencehashlow"); Param.empty() == false)
+ {
+ GcParams.AttachmentRangeMin = IoHash::FromHexString(Param);
+ }
+
+ if (auto Param = Params.GetValue("referencehashhigh"); Param.empty() == false)
+ {
+ GcParams.AttachmentRangeMax = IoHash::FromHexString(Param);
+ }
+
const bool Started = m_GcScheduler.TriggerGc(GcParams);
CbObjectWriter Response;
diff --git a/src/zenserver/projectstore/projectstore.cpp b/src/zenserver/projectstore/projectstore.cpp
index fb1385266..be50a03e2 100644
--- a/src/zenserver/projectstore/projectstore.cpp
+++ b/src/zenserver/projectstore/projectstore.cpp
@@ -5410,15 +5410,18 @@ public:
Oplog->GetAttachmentsLocked(m_References, m_ProjectStore.m_Config.StoreAttachmentMetaData);
}
+ FilterReferences(Ctx, m_References);
}
- virtual void RemoveUsedReferencesFromSet(GcCtx& Ctx, HashSet& IoCids) override
+ virtual std::span<IoHash> GetUnusedReferences(GcCtx& Ctx, std::span<IoHash> IoCids) override
{
- ZEN_TRACE_CPU("Store::RemoveUsedReferencesFromSet");
+ ZEN_TRACE_CPU("Store::GetUnusedReferences");
auto Log = [&Ctx]() { return Ctx.Logger; };
- size_t InitialCount = IoCids.size();
+ size_t InitialCount = IoCids.size();
+ size_t UsedCount = InitialCount;
+
Stopwatch Timer;
const auto _ = MakeGuard([&] {
if (!Ctx.Settings.Verbose)
@@ -5427,21 +5430,14 @@ public:
}
ZEN_INFO("GCV2: projectstore [FILTER REFERENCES] '{}': filtered out {} used references out of {} in {}",
"projectstore",
- InitialCount - IoCids.size(),
+ UsedCount,
InitialCount,
NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
});
- for (const IoHash& ReferenceHash : m_References)
- {
- if (IoCids.erase(ReferenceHash) == 1)
- {
- if (IoCids.empty())
- {
- return;
- }
- }
- }
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(m_References, IoCids);
+ UsedCount = IoCids.size() - UnusedReferences.size();
+ return UnusedReferences;
}
private:
@@ -5548,6 +5544,7 @@ public:
Oplog->GetAttachmentsLocked(m_References, m_ProjectStore.m_Config.StoreAttachmentMetaData);
m_OplogAccessTime = m_Project->LastOplogAccessTime(m_OplogId);
+ FilterReferences(Ctx, m_References);
}
virtual void UpdateLockedState(GcCtx& Ctx) override
@@ -5595,13 +5592,15 @@ public:
}
}
- virtual void RemoveUsedReferencesFromSet(GcCtx& Ctx, HashSet& IoCids) override
+ virtual std::span<IoHash> GetUnusedReferences(GcCtx& Ctx, std::span<IoHash> IoCids) override
{
- ZEN_TRACE_CPU("Store::Oplog::RemoveUsedReferencesFromSet");
+ ZEN_TRACE_CPU("Store::Oplog::GetUnusedReferences");
auto Log = [&Ctx]() { return Ctx.Logger; };
- size_t InitialCount = IoCids.size();
+ const size_t InitialCount = IoCids.size();
+ size_t UsedCount = InitialCount;
+
Stopwatch Timer;
const auto _ = MakeGuard([&] {
if (!Ctx.Settings.Verbose)
@@ -5610,24 +5609,18 @@ public:
}
ZEN_INFO("GCV2: projectstore [FILTER REFERENCES] '{}': filtered out {} used references out of {} in {} from {}/{}",
m_OplogBasePath,
- InitialCount - IoCids.size(),
+ UsedCount,
InitialCount,
NiceTimeSpanMs(Timer.GetElapsedTimeMs()),
m_Project->Identifier,
m_OplogId);
});
- for (const IoHash& ReferenceHash : m_References)
- {
- if (IoCids.erase(ReferenceHash) == 1)
- {
- if (IoCids.empty())
- {
- return;
- }
- }
- }
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(m_References, IoCids);
+ UsedCount = IoCids.size() - UnusedReferences.size();
+ return UnusedReferences;
}
+
ProjectStore& m_ProjectStore;
Ref<ProjectStore::Project> m_Project;
std::string m_OplogId;
diff --git a/src/zenstore/cache/cachedisklayer.cpp b/src/zenstore/cache/cachedisklayer.cpp
index 63f6d708a..417b63fb4 100644
--- a/src/zenstore/cache/cachedisklayer.cpp
+++ b/src/zenstore/cache/cachedisklayer.cpp
@@ -1262,7 +1262,9 @@ ZenCacheDiskLayer::CacheBucket::EndPutBatch(PutBatchHandle* Batch) noexcept
}
if (m_TrackedReferences && HashKeyAndReferences.size() > 1)
{
- m_TrackedReferences->insert(HashKeyAndReferences.begin() + 1, HashKeyAndReferences.end());
+ m_TrackedReferences->insert(m_TrackedReferences->end(),
+ HashKeyAndReferences.begin() + 1,
+ HashKeyAndReferences.end());
}
if (auto It = m_Index.find(HashKey); It != m_Index.end())
{
@@ -2963,7 +2965,7 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c
}
if (m_TrackedReferences)
{
- m_TrackedReferences->insert(References.begin(), References.end());
+ m_TrackedReferences->insert(m_TrackedReferences->end(), References.begin(), References.end());
}
PayloadIndex EntryIndex = {};
@@ -3130,7 +3132,7 @@ ZenCacheDiskLayer::CacheBucket::PutInlineCacheValue(const IoHash& HashKey,
}
if (m_TrackedReferences)
{
- m_TrackedReferences->insert(References.begin(), References.end());
+ m_TrackedReferences->insert(m_TrackedReferences->end(), References.begin(), References.end());
}
if (auto It = m_Index.find(HashKey); It != m_Index.end())
{
@@ -3782,17 +3784,19 @@ public:
}
ZEN_INFO("GCV2: cachebucket [PRECACHE] '{}': found {} references in {}",
m_CacheBucket.m_BucketDir,
- m_References.size(),
+ m_PrecachedReferences.size(),
NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
});
- m_CacheBucket.m_IndexLock.WithExclusiveLock([&]() { m_CacheBucket.m_TrackedReferences = std::make_unique<HashSet>(); });
+ m_CacheBucket.m_IndexLock.WithExclusiveLock([&]() { m_CacheBucket.m_TrackedReferences = std::make_unique<std::vector<IoHash>>(); });
- bool Continue = m_CacheBucket.GetReferences(Ctx, /*StateIsAlreadyLocked*/ false, m_References);
+ bool Continue = m_CacheBucket.GetReferences(Ctx, /*StateIsAlreadyLocked*/ false, m_PrecachedReferences);
if (!Continue)
{
m_CacheBucket.m_IndexLock.WithExclusiveLock([&]() { m_CacheBucket.m_TrackedReferences.reset(); });
+ return;
}
+ FilterReferences(Ctx, m_PrecachedReferences);
}
virtual void UpdateLockedState(GcCtx& Ctx) override
@@ -3809,32 +3813,32 @@ public:
}
ZEN_INFO("GCV2: cachebucket [LOCKSTATE] '{}': found {} references in {}",
m_CacheBucket.m_BucketDir,
- m_References.size(),
+ m_PrecachedReferences.size() + m_AddedReferences.size(),
NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
});
if (Ctx.IsCancelledFlag.load())
{
- m_References = {};
+ m_PrecachedReferences = {};
m_CacheBucket.m_TrackedReferences.reset();
return;
}
ZEN_ASSERT(m_CacheBucket.m_TrackedReferences);
- HashSet& AddedReferences(*m_CacheBucket.m_TrackedReferences);
- m_References.reserve(m_References.size() + AddedReferences.size());
- m_References.insert(m_References.end(), AddedReferences.begin(), AddedReferences.end());
- AddedReferences = {};
+ m_AddedReferences = std::move(*m_CacheBucket.m_TrackedReferences);
+ FilterReferences(Ctx, m_AddedReferences);
}
- virtual void RemoveUsedReferencesFromSet(GcCtx& Ctx, HashSet& IoCids) override
+ virtual std::span<IoHash> GetUnusedReferences(GcCtx& Ctx, std::span<IoHash> IoCids) override
{
- ZEN_TRACE_CPU("Z$::Bucket::RemoveUsedReferencesFromSet");
+ ZEN_TRACE_CPU("Z$::Bucket::GetUnusedReferences");
auto Log = [&Ctx]() { return Ctx.Logger; };
- size_t InitialCount = IoCids.size();
+ const size_t InitialCount = IoCids.size();
+ size_t UsedCount = InitialCount;
+
Stopwatch Timer;
const auto _ = MakeGuard([&] {
if (!Ctx.Settings.Verbose)
@@ -3843,24 +3847,20 @@ public:
}
ZEN_INFO("GCV2: cachebucket [FILTER REFERENCES] '{}': filtered out {} used references out of {} in {}",
m_CacheBucket.m_BucketDir,
- InitialCount - IoCids.size(),
+ UsedCount,
InitialCount,
NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
});
- for (const IoHash& ReferenceHash : m_References)
- {
- if (IoCids.erase(ReferenceHash) == 1)
- {
- if (IoCids.empty())
- {
- return;
- }
- }
- }
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(m_PrecachedReferences, IoCids);
+ UnusedReferences = KeepUnusedReferences(m_AddedReferences, UnusedReferences);
+ UsedCount = IoCids.size() - UnusedReferences.size();
+ return UnusedReferences;
}
+
CacheBucket& m_CacheBucket;
- std::vector<IoHash> m_References;
+ std::vector<IoHash> m_PrecachedReferences;
+ std::vector<IoHash> m_AddedReferences;
};
std::vector<GcReferenceChecker*>
diff --git a/src/zenstore/cache/structuredcachestore.cpp b/src/zenstore/cache/structuredcachestore.cpp
index 7794d025f..9f1bcb41a 100644
--- a/src/zenstore/cache/structuredcachestore.cpp
+++ b/src/zenstore/cache/structuredcachestore.cpp
@@ -1189,15 +1189,18 @@ public:
break;
}
}
+ FilterReferences(Ctx, m_References);
}
- virtual void RemoveUsedReferencesFromSet(GcCtx& Ctx, HashSet& IoCids) override
+ virtual std::span<IoHash> GetUnusedReferences(GcCtx& Ctx, std::span<IoHash> IoCids) override
{
- ZEN_TRACE_CPU("Z$::RemoveUsedReferencesFromSet");
+ ZEN_TRACE_CPU("Z$::GetUnusedReferences");
auto Log = [&Ctx]() { return Ctx.Logger; };
- size_t InitialCount = IoCids.size();
+ const size_t InitialCount = IoCids.size();
+ size_t UsedCount = InitialCount;
+
Stopwatch Timer;
const auto _ = MakeGuard([&] {
if (!Ctx.Settings.Verbose)
@@ -1206,21 +1209,14 @@ public:
}
ZEN_INFO("GCV2: projectstore [FILTER REFERENCES] '{}': filtered out {} used references out of {} in {}",
"projectstore",
- InitialCount - IoCids.size(),
+ UsedCount,
InitialCount,
NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
});
- for (const IoHash& ReferenceHash : m_References)
- {
- if (IoCids.erase(ReferenceHash) == 1)
- {
- if (IoCids.empty())
- {
- return;
- }
- }
- }
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(m_References, IoCids);
+ UsedCount = IoCids.size() - UnusedReferences.size();
+ return UnusedReferences;
}
private:
diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp
index 15f80d4cf..e0a7900f1 100644
--- a/src/zenstore/compactcas.cpp
+++ b/src/zenstore/compactcas.cpp
@@ -867,9 +867,9 @@ public:
NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
});
- std::vector<IoHash> UnusedCids = GetUnusedReferences(m_Cids);
- Stats.CheckedCount = m_Cids.size();
- Stats.FoundCount = UnusedCids.size();
+ std::span<IoHash> UnusedCids = GetUnusedReferences(m_Cids);
+ Stats.CheckedCount = m_Cids.size();
+ Stats.FoundCount = UnusedCids.size();
if (UnusedCids.empty())
{
@@ -967,7 +967,11 @@ CasContainerStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats&)
CidsToCheck.push_back(It.first);
}
}
- return new CasContainerReferencePruner(*this, std::move(CidsToCheck));
+ if (FilterReferences(Ctx, CidsToCheck))
+ {
+ return new CasContainerReferencePruner(*this, std::move(CidsToCheck));
+ }
+ return nullptr;
}
void
@@ -2018,7 +2022,7 @@ TEST_CASE_TEMPLATE("compactcas.threadedinsert", GCV2, FalseType, TrueType)
HashKeySet Deleted;
GcStats Stats;
GcStoreCompactor* Compactor =
- Pruner->RemoveUnreferencedData(Ctx, Stats, [&](std::span<IoHash> References) -> std::vector<IoHash> {
+ Pruner->RemoveUnreferencedData(Ctx, Stats, [&](std::span<IoHash> References) -> std::span<IoHash> {
std::vector<IoHash> Unreferenced;
HashKeySet Retain;
Retain.AddHashesToSet(KeepHashes);
diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp
index 733140e50..7bd17ee88 100644
--- a/src/zenstore/filecas.cpp
+++ b/src/zenstore/filecas.cpp
@@ -1653,9 +1653,9 @@ public:
NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
});
- std::vector<IoHash> UnusedCids = GetUnusedReferences(m_Cids);
- Stats.CheckedCount = m_Cids.size();
- Stats.FoundCount = UnusedCids.size();
+ std::span<IoHash> UnusedCids = GetUnusedReferences(m_Cids);
+ Stats.CheckedCount = m_Cids.size();
+ Stats.FoundCount = UnusedCids.size();
if (UnusedCids.empty())
{
// Nothing to collect
@@ -1699,7 +1699,7 @@ public:
}
}
- return new FileCasStoreCompactor(m_FileCasStrategy, std::move(UnusedCids));
+ return new FileCasStoreCompactor(m_FileCasStrategy, std::vector<IoHash>(UnusedCids.begin(), UnusedCids.end()));
}
private:
@@ -1745,7 +1745,11 @@ FileCasStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats&)
CidsToCheck.push_back(It.first);
}
}
- return new FileCasReferencePruner(*this, std::move(CidsToCheck));
+ if (FilterReferences(Ctx, CidsToCheck))
+ {
+ return new FileCasReferencePruner(*this, std::move(CidsToCheck));
+ }
+ return nullptr;
}
//////////////////////////////////////////////////////////////////////////
diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp
index 28bdd2f42..904619222 100644
--- a/src/zenstore/gc.cpp
+++ b/src/zenstore/gc.cpp
@@ -571,6 +571,95 @@ Sum(GcResult& Stat, bool Cancelled = false)
return Stat;
}
+bool
+FilterReferences(GcCtx& Ctx, std::vector<IoHash>& InOutReferences)
+{
+ if (InOutReferences.empty())
+ {
+ return false;
+ }
+ if (Ctx.Settings.AttachmentRangeMax != IoHash::Max || Ctx.Settings.AttachmentRangeMin != IoHash::Zero)
+ {
+ size_t TotalCount = InOutReferences.size();
+ std::erase_if(InOutReferences, [&Ctx](const IoHash& Key) {
+ return ((Ctx.Settings.AttachmentRangeMax < Key) || (Key < Ctx.Settings.AttachmentRangeMin));
+ });
+ size_t RemovedCount = TotalCount - InOutReferences.size();
+ ZEN_INFO("Skipped GC for {}% of references ({} out of {}) due to attachment filtering with range {} to {}",
+ (100 * RemovedCount) / TotalCount,
+ RemovedCount,
+ TotalCount,
+ Ctx.Settings.AttachmentRangeMin,
+ Ctx.Settings.AttachmentRangeMax);
+ }
+ if (InOutReferences.empty())
+ {
+ return false;
+ }
+ std::sort(InOutReferences.begin(), InOutReferences.end());
+ auto NewEnd = std::unique(InOutReferences.begin(), InOutReferences.end());
+ InOutReferences.erase(NewEnd, InOutReferences.end());
+ return true;
+}
+
+std::span<IoHash>
+KeepUnusedReferences(std::span<const IoHash> SortedUsedReferences, std::span<IoHash> SortedReferences)
+{
+ if (SortedUsedReferences.empty())
+ {
+ return SortedReferences;
+ }
+ if (SortedReferences.empty())
+ {
+ return {};
+ }
+
+ const IoHash* ReferencesRead = &SortedReferences[0];
+ IoHash* ReferencesWrite = &SortedReferences[0];
+ const IoHash* ReferencesEnd = ReferencesRead + SortedReferences.size();
+
+ const IoHash* UsedReferencesRead = &SortedUsedReferences[0];
+ const IoHash* UsedReferencesReadEnd = UsedReferencesRead + SortedUsedReferences.size();
+
+ while (ReferencesRead != ReferencesEnd && UsedReferencesRead != UsedReferencesReadEnd)
+ {
+ const IoHash& Reference = *ReferencesRead;
+ const IoHash& UsedReference = *UsedReferencesRead;
+ if (Reference == UsedReference)
+ {
+ // Skip it
+ ReferencesRead++;
+ UsedReferencesRead++;
+ }
+ else if (Reference < UsedReference)
+ {
+ // Keep it
+ if (ReferencesRead > ReferencesWrite)
+ {
+ *ReferencesWrite = Reference;
+ }
+ ReferencesWrite++;
+ ReferencesRead++;
+ }
+ else
+ {
+ // Skip it
+ UsedReferencesRead++;
+ }
+ }
+
+ size_t Remaining = std::distance(ReferencesRead, ReferencesEnd);
+ if (Remaining > 0)
+ {
+ if (ReferencesRead != ReferencesWrite)
+ {
+ memcpy(ReferencesWrite, ReferencesRead, sizeof(IoHash::Hash) * Remaining);
+ }
+ ReferencesWrite += Remaining;
+ }
+ return SortedReferences.subspan(0, (size_t)std::distance(&SortedReferences[0], ReferencesWrite));
+}
+
void
GcManager::AddGcReferencer(GcReferencer& Referencer)
{
@@ -987,18 +1076,19 @@ GcManager::CollectGarbage(const GcSettings& Settings)
return Sum(Result, true);
}
{
- const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::vector<IoHash> {
- HashSet UnusedCids(References.begin(), References.end());
+ const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::span<IoHash> {
+ std::span<IoHash> UnusedCids(References);
+ ZEN_ASSERT(UnusedCids.empty() || UnusedCids[0] != IoHash::Zero);
for (const auto& It : ReferenceCheckers)
{
GcReferenceChecker* ReferenceChecker = It.first.get();
- ReferenceChecker->RemoveUsedReferencesFromSet(Ctx, UnusedCids);
+ UnusedCids = ReferenceChecker->GetUnusedReferences(Ctx, UnusedCids);
if (UnusedCids.empty())
{
return {};
}
}
- return std::vector<IoHash>(UnusedCids.begin(), UnusedCids.end());
+ return UnusedCids;
};
// checking all Cids agains references in cache
@@ -1768,6 +1858,8 @@ GcScheduler::SchedulerThread()
uint32_t CompactBlockUsageThresholdPercent = m_Config.CompactBlockUsageThresholdPercent;
bool Verbose = m_Config.Verbose;
bool SingleThreaded = m_Config.SingleThreaded;
+ IoHash AttachmentRangeMin = IoHash::Zero;
+ IoHash AttachmentRangeMax = IoHash::Max;
bool DiskSpaceGCTriggered = false;
bool TimeBasedGCTriggered = false;
@@ -1804,9 +1896,11 @@ GcScheduler::SchedulerThread()
UseGCVersion = TriggerParams.ForceGCVersion.value_or(UseGCVersion);
CompactBlockUsageThresholdPercent =
TriggerParams.CompactBlockUsageThresholdPercent.value_or(CompactBlockUsageThresholdPercent);
- Verbose = TriggerParams.Verbose.value_or(Verbose);
- SingleThreaded = TriggerParams.SingleThreaded.value_or(SingleThreaded);
- DoGc = true;
+ Verbose = TriggerParams.Verbose.value_or(Verbose);
+ SingleThreaded = TriggerParams.SingleThreaded.value_or(SingleThreaded);
+ AttachmentRangeMin = TriggerParams.AttachmentRangeMin;
+ AttachmentRangeMax = TriggerParams.AttachmentRangeMax;
+ DoGc = true;
}
if (m_TriggerScrubParams)
@@ -2025,6 +2119,8 @@ GcScheduler::SchedulerThread()
CompactBlockUsageThresholdPercent,
Verbose,
SingleThreaded,
+ AttachmentRangeMin,
+ AttachmentRangeMax,
SilenceErrors);
if (!GcSuccess)
{
@@ -2124,6 +2220,8 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
uint32_t CompactBlockUsageThresholdPercent,
bool Verbose,
bool SingleThreaded,
+ const IoHash& AttachmentRangeMin,
+ const IoHash& AttachmentRangeMax,
bool SilenceErrors)
{
ZEN_TRACE_CPU("GcScheduler::CollectGarbage");
@@ -2193,15 +2291,18 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
break;
case GcVersion::kV2:
{
- const GcSettings Settings = {.CacheExpireTime = CacheExpireTime,
- .ProjectStoreExpireTime = ProjectStoreExpireTime,
- .CollectSmallObjects = CollectSmallObjects,
- .IsDeleteMode = Delete,
- .SkipCidDelete = SkipCid,
- .Verbose = Verbose,
- .SingleThread = SingleThreaded,
- .CompactBlockUsageThresholdPercent = CompactBlockUsageThresholdPercent,
- .DiskReservePath = m_Config.RootDirectory / "reserve.gc"};
+ const GcSettings Settings = {.CacheExpireTime = CacheExpireTime,
+ .ProjectStoreExpireTime = ProjectStoreExpireTime,
+ .CollectSmallObjects = CollectSmallObjects,
+ .IsDeleteMode = Delete,
+ .SkipCidDelete = SkipCid,
+ .Verbose = Verbose,
+ .SingleThread = SingleThreaded,
+ .CompactBlockUsageThresholdPercent = CompactBlockUsageThresholdPercent,
+ .DiskReservePath = m_Config.RootDirectory / "reserve.gc",
+ .AttachmentRangeMin = AttachmentRangeMin,
+ .AttachmentRangeMax = AttachmentRangeMax};
+
GcClock::TimePoint GcStartTime = GcClock::Now();
GcResult Result = m_GcManager.CollectGarbage(Settings);
@@ -2815,6 +2916,131 @@ TEST_CASE("scrub.basic")
CHECK(!CidStore.ContainsChunk(CompressedChunk.DecodeRawHash()));
}
+TEST_CASE("gc.keepunusedreferences")
+{
+ const IoHash Hashes[] = {IoHash::FromHexString("177030568fdd461bf4fe5ddbf4d463e514e8178e"),
+ IoHash::FromHexString("372d795bb907a15cab15ab3917854bfef7e7af2c"),
+ IoHash::FromHexString("75ab3917854bfef7e72d795bb907a15cab1af2c3"),
+ IoHash::FromHexString("ab3917854bfef7e7af2c372d795bb907a15cab15"),
+ IoHash::FromHexString("d1df59fcab06793a5f2c372d795bb907a15cab15")};
+ {
+ std::vector<IoHash> UsedReferences;
+ std::vector<IoHash> References;
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.empty());
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::vector<IoHash> References;
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.empty());
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 5);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.empty());
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[0], Hashes[2], Hashes[4]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 2);
+ CHECK(UnusedReferences[0] == Hashes[1]);
+ CHECK(UnusedReferences[1] == Hashes[3]);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[2], Hashes[3], Hashes[4]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 2);
+ CHECK(UnusedReferences[0] == Hashes[0]);
+ CHECK(UnusedReferences[1] == Hashes[1]);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[0], Hashes[1], Hashes[2]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 2);
+ CHECK(UnusedReferences[0] == Hashes[3]);
+ CHECK(UnusedReferences[1] == Hashes[4]);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[0], Hashes[1], Hashes[2], Hashes[4]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 1);
+ CHECK(UnusedReferences[0] == Hashes[3]);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[1], Hashes[3]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 3);
+ CHECK(UnusedReferences[0] == Hashes[0]);
+ CHECK(UnusedReferences[1] == Hashes[2]);
+ CHECK(UnusedReferences[2] == Hashes[4]);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[0]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 4);
+ CHECK(UnusedReferences[0] == Hashes[1]);
+ CHECK(UnusedReferences[1] == Hashes[2]);
+ CHECK(UnusedReferences[2] == Hashes[3]);
+ CHECK(UnusedReferences[3] == Hashes[4]);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[1]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 4);
+ CHECK(UnusedReferences[0] == Hashes[0]);
+ CHECK(UnusedReferences[1] == Hashes[2]);
+ CHECK(UnusedReferences[2] == Hashes[3]);
+ CHECK(UnusedReferences[3] == Hashes[4]);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[3]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 4);
+ CHECK(UnusedReferences[0] == Hashes[0]);
+ CHECK(UnusedReferences[1] == Hashes[1]);
+ CHECK(UnusedReferences[2] == Hashes[2]);
+ CHECK(UnusedReferences[3] == Hashes[4]);
+ }
+
+ {
+ std::vector<IoHash> UsedReferences{Hashes[4]};
+ std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]};
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References);
+ CHECK(UnusedReferences.size() == 4);
+ CHECK(UnusedReferences[0] == Hashes[0]);
+ CHECK(UnusedReferences[1] == Hashes[1]);
+ CHECK(UnusedReferences[2] == Hashes[2]);
+ CHECK(UnusedReferences[3] == Hashes[3]);
+ }
+}
+
#endif
void
diff --git a/src/zenstore/include/zenstore/cache/cachedisklayer.h b/src/zenstore/include/zenstore/cache/cachedisklayer.h
index a735893a1..8e31d3222 100644
--- a/src/zenstore/include/zenstore/cache/cachedisklayer.h
+++ b/src/zenstore/include/zenstore/cache/cachedisklayer.h
@@ -343,18 +343,18 @@ public:
metrics::RequestStats m_PutOps;
metrics::RequestStats m_GetOps;
- mutable RwLock m_IndexLock;
- IndexMap m_Index;
- std::vector<AccessTime> m_AccessTimes;
- std::vector<BucketPayload> m_Payloads;
- std::vector<BucketMetaData> m_MetaDatas;
- std::vector<MetaDataIndex> m_FreeMetaDatas;
- std::vector<MemCacheData> m_MemCachedPayloads;
- std::vector<MemCachedIndex> m_FreeMemCachedPayloads;
- std::unique_ptr<HashSet> m_TrackedCacheKeys;
- std::unique_ptr<HashSet> m_TrackedReferences;
- std::atomic_uint64_t m_StandaloneSize{};
- std::atomic_uint64_t m_MemCachedSize{};
+ mutable RwLock m_IndexLock;
+ IndexMap m_Index;
+ std::vector<AccessTime> m_AccessTimes;
+ std::vector<BucketPayload> m_Payloads;
+ std::vector<BucketMetaData> m_MetaDatas;
+ std::vector<MetaDataIndex> m_FreeMetaDatas;
+ std::vector<MemCacheData> m_MemCachedPayloads;
+ std::vector<MemCachedIndex> m_FreeMemCachedPayloads;
+ std::unique_ptr<HashSet> m_TrackedCacheKeys;
+ std::unique_ptr<std::vector<IoHash>> m_TrackedReferences;
+ std::atomic_uint64_t m_StandaloneSize{};
+ std::atomic_uint64_t m_MemCachedSize{};
virtual std::string GetGcName(GcCtx& Ctx) override;
virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) override;
diff --git a/src/zenstore/include/zenstore/gc.h b/src/zenstore/include/zenstore/gc.h
index a8c5c0219..3f2f5448d 100644
--- a/src/zenstore/include/zenstore/gc.h
+++ b/src/zenstore/include/zenstore/gc.h
@@ -64,6 +64,8 @@ struct GcSettings
90; // 0 = compact only empty eligible blocks, 100 = compact all non-full eligible blocks, 1-99 = compact eligible blocks with less
// usage than CompactBlockUsageThresholdPercent
std::filesystem::path DiskReservePath;
+ IoHash AttachmentRangeMin = IoHash::Zero;
+ IoHash AttachmentRangeMax = IoHash::Max;
};
struct GcCompactStoreStats
@@ -166,10 +168,10 @@ public:
/**
* @brief An interface to check if a set of Cids are referenced
*
- * Instance will be deleted after RemoveUsedReferencesFromSet has been called 0-n times.
+ * Instance will be deleted after GetUnusedReferences has been called 0-n times.
*
* During construction of the GcReferenceChecker the world is not stopped and this is a good
- * place to do caching to be able to execute LockState and RemoveUsedReferencesFromSet quickly.
+ * place to do caching to be able to execute LockState and GetUnusedReferences quickly.
*/
class GcReferenceChecker
{
@@ -188,16 +190,19 @@ public:
// *IMPORTANT* Do *not* take any locks (shared or exclusive) in this code.
// This is because we need to acquire the locks in an ordered manner and not end up in a deadlock due to other code
// trying to get exclusive locks halfway through our execution.
- // Called once before any calls to RemoveUsedReferencesFromSet.
+ // Called once before any calls to GetUnusedReferences.
// The implementation should be as fast as possible as UpdateLockedState is part of a stop the world (from changes)
// until all instances of GcReferenceChecker UpdateLockedState are completed
virtual void UpdateLockedState(GcCtx& Ctx) = 0;
// Go through IoCids and see which ones are referenced. If it is the reference must be removed from IoCids
// This function should use pre-cached information on what is referenced as we are in stop the world mode
- virtual void RemoveUsedReferencesFromSet(GcCtx& Ctx, HashSet& IoCids) = 0;
+ virtual std::span<IoHash> GetUnusedReferences(GcCtx& Ctx, std::span<IoHash> IoCids) = 0;
};
+std::span<IoHash> KeepUnusedReferences(std::span<const IoHash> SortedUsedReferences, std::span<IoHash> SortedReferences);
+bool FilterReferences(GcCtx& Ctx, std::vector<IoHash>& InOutReferences);
+
/**
* @brief An interface to implement a lock for Stop The World (from writing new data)
*
@@ -209,7 +214,7 @@ public:
virtual ~GcReferenceLocker() = default;
// Take all the locks needed to execute UpdateLockedState for the all the GcReferenceChecker in your domain
- // Once all the GcReferenceChecker has executed UpdateLockedState and RemoveUsedReferencesFromSet for all
+ // Once all the GcReferenceChecker has executed UpdateLockedState and GetUnusedReferences for all
// domains has completed, the locks will be disposed and writes are allowed once again
virtual std::vector<RwLock::SharedLockScope> LockState(GcCtx& Ctx) = 0;
};
@@ -245,7 +250,7 @@ public:
virtual std::string GetGcName(GcCtx& Ctx) = 0;
- typedef std::function<std::vector<IoHash>(std::span<IoHash> References)> GetUnusedReferencesFunc;
+ typedef std::function<std::span<IoHash>(std::span<IoHash> References)> GetUnusedReferencesFunc;
// Check a set of references to see if they are in use.
// Use the GetUnusedReferences input function to check if references are used and update any pointers
@@ -520,6 +525,8 @@ public:
std::optional<uint32_t> CompactBlockUsageThresholdPercent;
std::optional<bool> Verbose;
std::optional<bool> SingleThreaded;
+ IoHash AttachmentRangeMin = IoHash::Zero;
+ IoHash AttachmentRangeMax = IoHash::Max;
};
bool TriggerGc(const TriggerGcParams& Params);
@@ -547,6 +554,8 @@ private:
uint32_t CompactBlockUsageThresholdPercent,
bool Verbose,
bool SingleThreaded,
+ const IoHash& AttachmentRangeMin,
+ const IoHash& AttachmentRangeMax,
bool SilenceErrors);
void ScrubStorage(bool DoDelete, bool SkipCid, std::chrono::seconds TimeSlice);
LoggerRef Log() { return m_Log; }