diff options
| author | Dan Engelbrecht <[email protected]> | 2024-09-23 19:19:40 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2024-09-23 19:19:40 +0200 |
| commit | bc9e590727211d803cce7be84c1cbc026179b841 (patch) | |
| tree | 96d89b59cdced94ce1d795cd941d35d26f6c5e88 /src | |
| parent | made fmt formatter format function const (#162) (diff) | |
| download | zen-bc9e590727211d803cce7be84c1cbc026179b841.tar.xz zen-bc9e590727211d803cce7be84c1cbc026179b841.zip | |
gc unused refactor (#165)
* optimize IoHash and OId comparisions
* refactor filtering of unused references
* add attachment filtering to gc
Diffstat (limited to 'src')
| -rw-r--r-- | src/zen/cmds/admin_cmd.cpp | 44 | ||||
| -rw-r--r-- | src/zen/cmds/admin_cmd.h | 2 | ||||
| -rw-r--r-- | src/zencore/include/zencore/iohash.h | 8 | ||||
| -rw-r--r-- | src/zencore/include/zencore/memory.h | 35 | ||||
| -rw-r--r-- | src/zencore/include/zencore/uid.h | 4 | ||||
| -rw-r--r-- | src/zencore/iohash.cpp | 6 | ||||
| -rw-r--r-- | src/zenserver/admin/admin.cpp | 10 | ||||
| -rw-r--r-- | src/zenserver/projectstore/projectstore.cpp | 49 | ||||
| -rw-r--r-- | src/zenstore/cache/cachedisklayer.cpp | 54 | ||||
| -rw-r--r-- | src/zenstore/cache/structuredcachestore.cpp | 24 | ||||
| -rw-r--r-- | src/zenstore/compactcas.cpp | 14 | ||||
| -rw-r--r-- | src/zenstore/filecas.cpp | 14 | ||||
| -rw-r--r-- | src/zenstore/gc.cpp | 258 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/cache/cachedisklayer.h | 24 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/gc.h | 21 |
15 files changed, 448 insertions, 119 deletions
diff --git a/src/zen/cmds/admin_cmd.cpp b/src/zen/cmds/admin_cmd.cpp index f5bd15ea2..dd0bf83de 100644 --- a/src/zen/cmds/admin_cmd.cpp +++ b/src/zen/cmds/admin_cmd.cpp @@ -119,6 +119,18 @@ GcCommand::GcCommand() "Force GC to run single threaded", cxxopts::value(m_SingleThreaded)->default_value("false"), "<single-threaded>"); + m_Options.add_option("", + "", + "reference-low", + "Reference filter lower limit - defaults to no limit", + cxxopts::value(m_ReferenceHashLow), + "<reflowlimit>"); + m_Options.add_option("", + "", + "reference-high", + "Reference filter higher limit - defaults to no limit", + cxxopts::value(m_ReferenceHashHigh), + "<refhighlimit>"); } GcCommand::~GcCommand() @@ -170,6 +182,38 @@ GcCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) { Params.Add({"compactblockthreshold", fmt::format("{}", m_CompactBlockThreshold)}); } + IoHash LowRef = IoHash::Zero; + if (!m_ReferenceHashLow.empty()) + { + if (m_ReferenceHashLow.length() != IoHash::StringLength) + { + throw OptionParseException(fmt::format("reference-low must be a {} character hex string", IoHash::StringLength)); + } + LowRef = IoHash::FromHexString(m_ReferenceHashLow); + } + IoHash HighRef = IoHash::Max; + if (!m_ReferenceHashHigh.empty()) + { + if (m_ReferenceHashHigh.length() != IoHash::StringLength) + { + throw OptionParseException(fmt::format("reference-high must be a {} character hex string", IoHash::StringLength)); + } + HighRef = IoHash::FromHexString(m_ReferenceHashHigh); + } + + if (HighRef < LowRef) + { + throw OptionParseException(fmt::format("invalid reference range, reference-high must be higher value than reference-low")); + } + if (LowRef != IoHash::Zero) + { + Params.Add({"referencehashlow", LowRef.ToHexString()}); + } + if (HighRef != IoHash::Max) + { + Params.Add({"referencehashhigh", HighRef.ToHexString()}); + } + Params.Add({"verbose", m_Verbose ? "true" : "false"}); Params.Add({"singlethreaded", m_SingleThreaded ? "true" : "false"}); diff --git a/src/zen/cmds/admin_cmd.h b/src/zen/cmds/admin_cmd.h index e26e7f4e7..f5dd33d32 100644 --- a/src/zen/cmds/admin_cmd.h +++ b/src/zen/cmds/admin_cmd.h @@ -51,6 +51,8 @@ private: uint32_t m_CompactBlockThreshold = 90; bool m_Verbose{false}; bool m_SingleThreaded{false}; + std::string m_ReferenceHashLow; + std::string m_ReferenceHashHigh; }; class GcStatusCommand : public StorageCommand diff --git a/src/zencore/include/zencore/iohash.h b/src/zencore/include/zencore/iohash.h index ff902399e..a8fc9e6c1 100644 --- a/src/zencore/include/zencore/iohash.h +++ b/src/zencore/include/zencore/iohash.h @@ -1,5 +1,4 @@ // Copyright Epic Games, Inc. All Rights Reserved. -// Copyright Epic Games, Inc. All Rights Reserved. #pragma once @@ -55,10 +54,11 @@ struct IoHash StringBuilderBase& ToHexString(StringBuilderBase& outBuilder) const; std::string ToHexString() const; - static const int StringLength = 40; - typedef char String_t[StringLength + 1]; + static constexpr int StringLength = 40; + typedef char String_t[StringLength + 1]; static const IoHash Zero; // Initialized to all zeros + static const IoHash Max; // Initialized to all ones inline auto operator<=>(const IoHash& rhs) const = default; inline bool operator==(const IoHash& rhs) const @@ -75,7 +75,7 @@ struct IoHash return LhsHash[0] != RhsHash[0] || LhsHash[1] != RhsHash[1] || LhsHash[2] != RhsHash[2] || LhsHash[3] != RhsHash[3] || LhsHash[4] != RhsHash[4]; } - inline bool operator<(const IoHash& rhs) const { return memcmp(Hash, rhs.Hash, sizeof Hash) < 0; } + inline bool operator<(const IoHash& rhs) const { return MemCmpFixed<sizeof Hash, std::uint32_t>(Hash, rhs.Hash) < 0; } struct Hasher { diff --git a/src/zencore/include/zencore/memory.h b/src/zencore/include/zencore/memory.h index 6419252ff..7a893d3ab 100644 --- a/src/zencore/include/zencore/memory.h +++ b/src/zencore/include/zencore/memory.h @@ -402,6 +402,41 @@ MakeMutableMemoryView(void* Data, void* DataEnd) return MutableMemoryView(Data, DataEnd); } +template<std::size_t SIZE> +inline int +MemCmpFixed(const void* a1, const void* a2) +{ + auto const s1 = reinterpret_cast<const unsigned char*>(a1); + auto const s2 = reinterpret_cast<const unsigned char*>(a2); + auto const diff = *s1 - *s2; + return diff ? diff : MemCmpFixed<SIZE - 1>(s1 + 1, s2 + 1); +} + +template<> +inline int +MemCmpFixed<0>(const void*, const void*) +{ + return 0; +} + +template<std::size_t SIZE, typename EQTYPE> +inline int +MemCmpFixed(const void* a1, const void* a2) +{ + ZEN_ASSERT_SLOW((uintptr_t(a1) & (sizeof(EQTYPE) - 1)) == 0); + ZEN_ASSERT_SLOW((uintptr_t(a2) & (sizeof(EQTYPE) - 1)) == 0); + auto const s1 = reinterpret_cast<const EQTYPE*>(a1); + auto const s2 = reinterpret_cast<const EQTYPE*>(a2); + return (*s1 != *s2) ? MemCmpFixed<sizeof(EQTYPE)>(s1, s2) : MemCmpFixed<SIZE - sizeof(EQTYPE), EQTYPE>(s1 + 1, s2 + 1); +} + +template<> +inline int +MemCmpFixed<0, uint32_t>(const void*, const void*) +{ + return 0; +} + void memory_forcelink(); // internal } // namespace zen diff --git a/src/zencore/include/zencore/uid.h b/src/zencore/include/zencore/uid.h index f8b1ccf98..08a335392 100644 --- a/src/zencore/include/zencore/uid.h +++ b/src/zencore/include/zencore/uid.h @@ -2,6 +2,7 @@ #pragma once +#include <zencore/memory.h> #include <zencore/zencore.h> #include <compare> @@ -67,7 +68,8 @@ struct Oid std::string ToString() const; [[nodiscard]] static Oid FromMemory(const void* Ptr); - auto operator<=>(const Oid& rhs) const = default; + auto operator<=>(const Oid& rhs) const = default; + inline bool operator<(const Oid& rhs) const { return MemCmpFixed<sizeof OidBits, std::uint32_t>(OidBits, rhs.OidBits) < 0; } [[nodiscard]] inline explicit operator bool() const { return *this != Zero; } static const Oid Zero; // Min (can be used to signify a "null" value, or for open range queries) diff --git a/src/zencore/iohash.cpp b/src/zencore/iohash.cpp index 1bf2c033d..8f3f8da26 100644 --- a/src/zencore/iohash.cpp +++ b/src/zencore/iohash.cpp @@ -12,7 +12,11 @@ namespace zen { -const IoHash IoHash::Zero{}; // Initialized to all zeros +static const uint8_t MaxData[20] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +const IoHash IoHash::Max = IoHash::MakeFrom(MaxData); // Initialized to all 0xff +const IoHash IoHash::Zero{}; // Initialized to all zeros IoHash IoHash::HashBuffer(const void* data, size_t byteCount) diff --git a/src/zenserver/admin/admin.cpp b/src/zenserver/admin/admin.cpp index 1eeb5637a..cd336c715 100644 --- a/src/zenserver/admin/admin.cpp +++ b/src/zenserver/admin/admin.cpp @@ -416,6 +416,16 @@ HttpAdminService::HttpAdminService(GcScheduler& Scheduler, GcParams.SingleThreaded = Param == "true"sv; } + if (auto Param = Params.GetValue("referencehashlow"); Param.empty() == false) + { + GcParams.AttachmentRangeMin = IoHash::FromHexString(Param); + } + + if (auto Param = Params.GetValue("referencehashhigh"); Param.empty() == false) + { + GcParams.AttachmentRangeMax = IoHash::FromHexString(Param); + } + const bool Started = m_GcScheduler.TriggerGc(GcParams); CbObjectWriter Response; diff --git a/src/zenserver/projectstore/projectstore.cpp b/src/zenserver/projectstore/projectstore.cpp index fb1385266..be50a03e2 100644 --- a/src/zenserver/projectstore/projectstore.cpp +++ b/src/zenserver/projectstore/projectstore.cpp @@ -5410,15 +5410,18 @@ public: Oplog->GetAttachmentsLocked(m_References, m_ProjectStore.m_Config.StoreAttachmentMetaData); } + FilterReferences(Ctx, m_References); } - virtual void RemoveUsedReferencesFromSet(GcCtx& Ctx, HashSet& IoCids) override + virtual std::span<IoHash> GetUnusedReferences(GcCtx& Ctx, std::span<IoHash> IoCids) override { - ZEN_TRACE_CPU("Store::RemoveUsedReferencesFromSet"); + ZEN_TRACE_CPU("Store::GetUnusedReferences"); auto Log = [&Ctx]() { return Ctx.Logger; }; - size_t InitialCount = IoCids.size(); + size_t InitialCount = IoCids.size(); + size_t UsedCount = InitialCount; + Stopwatch Timer; const auto _ = MakeGuard([&] { if (!Ctx.Settings.Verbose) @@ -5427,21 +5430,14 @@ public: } ZEN_INFO("GCV2: projectstore [FILTER REFERENCES] '{}': filtered out {} used references out of {} in {}", "projectstore", - InitialCount - IoCids.size(), + UsedCount, InitialCount, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); - for (const IoHash& ReferenceHash : m_References) - { - if (IoCids.erase(ReferenceHash) == 1) - { - if (IoCids.empty()) - { - return; - } - } - } + std::span<IoHash> UnusedReferences = KeepUnusedReferences(m_References, IoCids); + UsedCount = IoCids.size() - UnusedReferences.size(); + return UnusedReferences; } private: @@ -5548,6 +5544,7 @@ public: Oplog->GetAttachmentsLocked(m_References, m_ProjectStore.m_Config.StoreAttachmentMetaData); m_OplogAccessTime = m_Project->LastOplogAccessTime(m_OplogId); + FilterReferences(Ctx, m_References); } virtual void UpdateLockedState(GcCtx& Ctx) override @@ -5595,13 +5592,15 @@ public: } } - virtual void RemoveUsedReferencesFromSet(GcCtx& Ctx, HashSet& IoCids) override + virtual std::span<IoHash> GetUnusedReferences(GcCtx& Ctx, std::span<IoHash> IoCids) override { - ZEN_TRACE_CPU("Store::Oplog::RemoveUsedReferencesFromSet"); + ZEN_TRACE_CPU("Store::Oplog::GetUnusedReferences"); auto Log = [&Ctx]() { return Ctx.Logger; }; - size_t InitialCount = IoCids.size(); + const size_t InitialCount = IoCids.size(); + size_t UsedCount = InitialCount; + Stopwatch Timer; const auto _ = MakeGuard([&] { if (!Ctx.Settings.Verbose) @@ -5610,24 +5609,18 @@ public: } ZEN_INFO("GCV2: projectstore [FILTER REFERENCES] '{}': filtered out {} used references out of {} in {} from {}/{}", m_OplogBasePath, - InitialCount - IoCids.size(), + UsedCount, InitialCount, NiceTimeSpanMs(Timer.GetElapsedTimeMs()), m_Project->Identifier, m_OplogId); }); - for (const IoHash& ReferenceHash : m_References) - { - if (IoCids.erase(ReferenceHash) == 1) - { - if (IoCids.empty()) - { - return; - } - } - } + std::span<IoHash> UnusedReferences = KeepUnusedReferences(m_References, IoCids); + UsedCount = IoCids.size() - UnusedReferences.size(); + return UnusedReferences; } + ProjectStore& m_ProjectStore; Ref<ProjectStore::Project> m_Project; std::string m_OplogId; diff --git a/src/zenstore/cache/cachedisklayer.cpp b/src/zenstore/cache/cachedisklayer.cpp index 63f6d708a..417b63fb4 100644 --- a/src/zenstore/cache/cachedisklayer.cpp +++ b/src/zenstore/cache/cachedisklayer.cpp @@ -1262,7 +1262,9 @@ ZenCacheDiskLayer::CacheBucket::EndPutBatch(PutBatchHandle* Batch) noexcept } if (m_TrackedReferences && HashKeyAndReferences.size() > 1) { - m_TrackedReferences->insert(HashKeyAndReferences.begin() + 1, HashKeyAndReferences.end()); + m_TrackedReferences->insert(m_TrackedReferences->end(), + HashKeyAndReferences.begin() + 1, + HashKeyAndReferences.end()); } if (auto It = m_Index.find(HashKey); It != m_Index.end()) { @@ -2963,7 +2965,7 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c } if (m_TrackedReferences) { - m_TrackedReferences->insert(References.begin(), References.end()); + m_TrackedReferences->insert(m_TrackedReferences->end(), References.begin(), References.end()); } PayloadIndex EntryIndex = {}; @@ -3130,7 +3132,7 @@ ZenCacheDiskLayer::CacheBucket::PutInlineCacheValue(const IoHash& HashKey, } if (m_TrackedReferences) { - m_TrackedReferences->insert(References.begin(), References.end()); + m_TrackedReferences->insert(m_TrackedReferences->end(), References.begin(), References.end()); } if (auto It = m_Index.find(HashKey); It != m_Index.end()) { @@ -3782,17 +3784,19 @@ public: } ZEN_INFO("GCV2: cachebucket [PRECACHE] '{}': found {} references in {}", m_CacheBucket.m_BucketDir, - m_References.size(), + m_PrecachedReferences.size(), NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); - m_CacheBucket.m_IndexLock.WithExclusiveLock([&]() { m_CacheBucket.m_TrackedReferences = std::make_unique<HashSet>(); }); + m_CacheBucket.m_IndexLock.WithExclusiveLock([&]() { m_CacheBucket.m_TrackedReferences = std::make_unique<std::vector<IoHash>>(); }); - bool Continue = m_CacheBucket.GetReferences(Ctx, /*StateIsAlreadyLocked*/ false, m_References); + bool Continue = m_CacheBucket.GetReferences(Ctx, /*StateIsAlreadyLocked*/ false, m_PrecachedReferences); if (!Continue) { m_CacheBucket.m_IndexLock.WithExclusiveLock([&]() { m_CacheBucket.m_TrackedReferences.reset(); }); + return; } + FilterReferences(Ctx, m_PrecachedReferences); } virtual void UpdateLockedState(GcCtx& Ctx) override @@ -3809,32 +3813,32 @@ public: } ZEN_INFO("GCV2: cachebucket [LOCKSTATE] '{}': found {} references in {}", m_CacheBucket.m_BucketDir, - m_References.size(), + m_PrecachedReferences.size() + m_AddedReferences.size(), NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); if (Ctx.IsCancelledFlag.load()) { - m_References = {}; + m_PrecachedReferences = {}; m_CacheBucket.m_TrackedReferences.reset(); return; } ZEN_ASSERT(m_CacheBucket.m_TrackedReferences); - HashSet& AddedReferences(*m_CacheBucket.m_TrackedReferences); - m_References.reserve(m_References.size() + AddedReferences.size()); - m_References.insert(m_References.end(), AddedReferences.begin(), AddedReferences.end()); - AddedReferences = {}; + m_AddedReferences = std::move(*m_CacheBucket.m_TrackedReferences); + FilterReferences(Ctx, m_AddedReferences); } - virtual void RemoveUsedReferencesFromSet(GcCtx& Ctx, HashSet& IoCids) override + virtual std::span<IoHash> GetUnusedReferences(GcCtx& Ctx, std::span<IoHash> IoCids) override { - ZEN_TRACE_CPU("Z$::Bucket::RemoveUsedReferencesFromSet"); + ZEN_TRACE_CPU("Z$::Bucket::GetUnusedReferences"); auto Log = [&Ctx]() { return Ctx.Logger; }; - size_t InitialCount = IoCids.size(); + const size_t InitialCount = IoCids.size(); + size_t UsedCount = InitialCount; + Stopwatch Timer; const auto _ = MakeGuard([&] { if (!Ctx.Settings.Verbose) @@ -3843,24 +3847,20 @@ public: } ZEN_INFO("GCV2: cachebucket [FILTER REFERENCES] '{}': filtered out {} used references out of {} in {}", m_CacheBucket.m_BucketDir, - InitialCount - IoCids.size(), + UsedCount, InitialCount, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); - for (const IoHash& ReferenceHash : m_References) - { - if (IoCids.erase(ReferenceHash) == 1) - { - if (IoCids.empty()) - { - return; - } - } - } + std::span<IoHash> UnusedReferences = KeepUnusedReferences(m_PrecachedReferences, IoCids); + UnusedReferences = KeepUnusedReferences(m_AddedReferences, UnusedReferences); + UsedCount = IoCids.size() - UnusedReferences.size(); + return UnusedReferences; } + CacheBucket& m_CacheBucket; - std::vector<IoHash> m_References; + std::vector<IoHash> m_PrecachedReferences; + std::vector<IoHash> m_AddedReferences; }; std::vector<GcReferenceChecker*> diff --git a/src/zenstore/cache/structuredcachestore.cpp b/src/zenstore/cache/structuredcachestore.cpp index 7794d025f..9f1bcb41a 100644 --- a/src/zenstore/cache/structuredcachestore.cpp +++ b/src/zenstore/cache/structuredcachestore.cpp @@ -1189,15 +1189,18 @@ public: break; } } + FilterReferences(Ctx, m_References); } - virtual void RemoveUsedReferencesFromSet(GcCtx& Ctx, HashSet& IoCids) override + virtual std::span<IoHash> GetUnusedReferences(GcCtx& Ctx, std::span<IoHash> IoCids) override { - ZEN_TRACE_CPU("Z$::RemoveUsedReferencesFromSet"); + ZEN_TRACE_CPU("Z$::GetUnusedReferences"); auto Log = [&Ctx]() { return Ctx.Logger; }; - size_t InitialCount = IoCids.size(); + const size_t InitialCount = IoCids.size(); + size_t UsedCount = InitialCount; + Stopwatch Timer; const auto _ = MakeGuard([&] { if (!Ctx.Settings.Verbose) @@ -1206,21 +1209,14 @@ public: } ZEN_INFO("GCV2: projectstore [FILTER REFERENCES] '{}': filtered out {} used references out of {} in {}", "projectstore", - InitialCount - IoCids.size(), + UsedCount, InitialCount, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); - for (const IoHash& ReferenceHash : m_References) - { - if (IoCids.erase(ReferenceHash) == 1) - { - if (IoCids.empty()) - { - return; - } - } - } + std::span<IoHash> UnusedReferences = KeepUnusedReferences(m_References, IoCids); + UsedCount = IoCids.size() - UnusedReferences.size(); + return UnusedReferences; } private: diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp index 15f80d4cf..e0a7900f1 100644 --- a/src/zenstore/compactcas.cpp +++ b/src/zenstore/compactcas.cpp @@ -867,9 +867,9 @@ public: NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); - std::vector<IoHash> UnusedCids = GetUnusedReferences(m_Cids); - Stats.CheckedCount = m_Cids.size(); - Stats.FoundCount = UnusedCids.size(); + std::span<IoHash> UnusedCids = GetUnusedReferences(m_Cids); + Stats.CheckedCount = m_Cids.size(); + Stats.FoundCount = UnusedCids.size(); if (UnusedCids.empty()) { @@ -967,7 +967,11 @@ CasContainerStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats&) CidsToCheck.push_back(It.first); } } - return new CasContainerReferencePruner(*this, std::move(CidsToCheck)); + if (FilterReferences(Ctx, CidsToCheck)) + { + return new CasContainerReferencePruner(*this, std::move(CidsToCheck)); + } + return nullptr; } void @@ -2018,7 +2022,7 @@ TEST_CASE_TEMPLATE("compactcas.threadedinsert", GCV2, FalseType, TrueType) HashKeySet Deleted; GcStats Stats; GcStoreCompactor* Compactor = - Pruner->RemoveUnreferencedData(Ctx, Stats, [&](std::span<IoHash> References) -> std::vector<IoHash> { + Pruner->RemoveUnreferencedData(Ctx, Stats, [&](std::span<IoHash> References) -> std::span<IoHash> { std::vector<IoHash> Unreferenced; HashKeySet Retain; Retain.AddHashesToSet(KeepHashes); diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp index 733140e50..7bd17ee88 100644 --- a/src/zenstore/filecas.cpp +++ b/src/zenstore/filecas.cpp @@ -1653,9 +1653,9 @@ public: NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); - std::vector<IoHash> UnusedCids = GetUnusedReferences(m_Cids); - Stats.CheckedCount = m_Cids.size(); - Stats.FoundCount = UnusedCids.size(); + std::span<IoHash> UnusedCids = GetUnusedReferences(m_Cids); + Stats.CheckedCount = m_Cids.size(); + Stats.FoundCount = UnusedCids.size(); if (UnusedCids.empty()) { // Nothing to collect @@ -1699,7 +1699,7 @@ public: } } - return new FileCasStoreCompactor(m_FileCasStrategy, std::move(UnusedCids)); + return new FileCasStoreCompactor(m_FileCasStrategy, std::vector<IoHash>(UnusedCids.begin(), UnusedCids.end())); } private: @@ -1745,7 +1745,11 @@ FileCasStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats&) CidsToCheck.push_back(It.first); } } - return new FileCasReferencePruner(*this, std::move(CidsToCheck)); + if (FilterReferences(Ctx, CidsToCheck)) + { + return new FileCasReferencePruner(*this, std::move(CidsToCheck)); + } + return nullptr; } ////////////////////////////////////////////////////////////////////////// diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp index 28bdd2f42..904619222 100644 --- a/src/zenstore/gc.cpp +++ b/src/zenstore/gc.cpp @@ -571,6 +571,95 @@ Sum(GcResult& Stat, bool Cancelled = false) return Stat; } +bool +FilterReferences(GcCtx& Ctx, std::vector<IoHash>& InOutReferences) +{ + if (InOutReferences.empty()) + { + return false; + } + if (Ctx.Settings.AttachmentRangeMax != IoHash::Max || Ctx.Settings.AttachmentRangeMin != IoHash::Zero) + { + size_t TotalCount = InOutReferences.size(); + std::erase_if(InOutReferences, [&Ctx](const IoHash& Key) { + return ((Ctx.Settings.AttachmentRangeMax < Key) || (Key < Ctx.Settings.AttachmentRangeMin)); + }); + size_t RemovedCount = TotalCount - InOutReferences.size(); + ZEN_INFO("Skipped GC for {}% of references ({} out of {}) due to attachment filtering with range {} to {}", + (100 * RemovedCount) / TotalCount, + RemovedCount, + TotalCount, + Ctx.Settings.AttachmentRangeMin, + Ctx.Settings.AttachmentRangeMax); + } + if (InOutReferences.empty()) + { + return false; + } + std::sort(InOutReferences.begin(), InOutReferences.end()); + auto NewEnd = std::unique(InOutReferences.begin(), InOutReferences.end()); + InOutReferences.erase(NewEnd, InOutReferences.end()); + return true; +} + +std::span<IoHash> +KeepUnusedReferences(std::span<const IoHash> SortedUsedReferences, std::span<IoHash> SortedReferences) +{ + if (SortedUsedReferences.empty()) + { + return SortedReferences; + } + if (SortedReferences.empty()) + { + return {}; + } + + const IoHash* ReferencesRead = &SortedReferences[0]; + IoHash* ReferencesWrite = &SortedReferences[0]; + const IoHash* ReferencesEnd = ReferencesRead + SortedReferences.size(); + + const IoHash* UsedReferencesRead = &SortedUsedReferences[0]; + const IoHash* UsedReferencesReadEnd = UsedReferencesRead + SortedUsedReferences.size(); + + while (ReferencesRead != ReferencesEnd && UsedReferencesRead != UsedReferencesReadEnd) + { + const IoHash& Reference = *ReferencesRead; + const IoHash& UsedReference = *UsedReferencesRead; + if (Reference == UsedReference) + { + // Skip it + ReferencesRead++; + UsedReferencesRead++; + } + else if (Reference < UsedReference) + { + // Keep it + if (ReferencesRead > ReferencesWrite) + { + *ReferencesWrite = Reference; + } + ReferencesWrite++; + ReferencesRead++; + } + else + { + // Skip it + UsedReferencesRead++; + } + } + + size_t Remaining = std::distance(ReferencesRead, ReferencesEnd); + if (Remaining > 0) + { + if (ReferencesRead != ReferencesWrite) + { + memcpy(ReferencesWrite, ReferencesRead, sizeof(IoHash::Hash) * Remaining); + } + ReferencesWrite += Remaining; + } + return SortedReferences.subspan(0, (size_t)std::distance(&SortedReferences[0], ReferencesWrite)); +} + void GcManager::AddGcReferencer(GcReferencer& Referencer) { @@ -987,18 +1076,19 @@ GcManager::CollectGarbage(const GcSettings& Settings) return Sum(Result, true); } { - const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::vector<IoHash> { - HashSet UnusedCids(References.begin(), References.end()); + const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::span<IoHash> { + std::span<IoHash> UnusedCids(References); + ZEN_ASSERT(UnusedCids.empty() || UnusedCids[0] != IoHash::Zero); for (const auto& It : ReferenceCheckers) { GcReferenceChecker* ReferenceChecker = It.first.get(); - ReferenceChecker->RemoveUsedReferencesFromSet(Ctx, UnusedCids); + UnusedCids = ReferenceChecker->GetUnusedReferences(Ctx, UnusedCids); if (UnusedCids.empty()) { return {}; } } - return std::vector<IoHash>(UnusedCids.begin(), UnusedCids.end()); + return UnusedCids; }; // checking all Cids agains references in cache @@ -1768,6 +1858,8 @@ GcScheduler::SchedulerThread() uint32_t CompactBlockUsageThresholdPercent = m_Config.CompactBlockUsageThresholdPercent; bool Verbose = m_Config.Verbose; bool SingleThreaded = m_Config.SingleThreaded; + IoHash AttachmentRangeMin = IoHash::Zero; + IoHash AttachmentRangeMax = IoHash::Max; bool DiskSpaceGCTriggered = false; bool TimeBasedGCTriggered = false; @@ -1804,9 +1896,11 @@ GcScheduler::SchedulerThread() UseGCVersion = TriggerParams.ForceGCVersion.value_or(UseGCVersion); CompactBlockUsageThresholdPercent = TriggerParams.CompactBlockUsageThresholdPercent.value_or(CompactBlockUsageThresholdPercent); - Verbose = TriggerParams.Verbose.value_or(Verbose); - SingleThreaded = TriggerParams.SingleThreaded.value_or(SingleThreaded); - DoGc = true; + Verbose = TriggerParams.Verbose.value_or(Verbose); + SingleThreaded = TriggerParams.SingleThreaded.value_or(SingleThreaded); + AttachmentRangeMin = TriggerParams.AttachmentRangeMin; + AttachmentRangeMax = TriggerParams.AttachmentRangeMax; + DoGc = true; } if (m_TriggerScrubParams) @@ -2025,6 +2119,8 @@ GcScheduler::SchedulerThread() CompactBlockUsageThresholdPercent, Verbose, SingleThreaded, + AttachmentRangeMin, + AttachmentRangeMax, SilenceErrors); if (!GcSuccess) { @@ -2124,6 +2220,8 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, uint32_t CompactBlockUsageThresholdPercent, bool Verbose, bool SingleThreaded, + const IoHash& AttachmentRangeMin, + const IoHash& AttachmentRangeMax, bool SilenceErrors) { ZEN_TRACE_CPU("GcScheduler::CollectGarbage"); @@ -2193,15 +2291,18 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, break; case GcVersion::kV2: { - const GcSettings Settings = {.CacheExpireTime = CacheExpireTime, - .ProjectStoreExpireTime = ProjectStoreExpireTime, - .CollectSmallObjects = CollectSmallObjects, - .IsDeleteMode = Delete, - .SkipCidDelete = SkipCid, - .Verbose = Verbose, - .SingleThread = SingleThreaded, - .CompactBlockUsageThresholdPercent = CompactBlockUsageThresholdPercent, - .DiskReservePath = m_Config.RootDirectory / "reserve.gc"}; + const GcSettings Settings = {.CacheExpireTime = CacheExpireTime, + .ProjectStoreExpireTime = ProjectStoreExpireTime, + .CollectSmallObjects = CollectSmallObjects, + .IsDeleteMode = Delete, + .SkipCidDelete = SkipCid, + .Verbose = Verbose, + .SingleThread = SingleThreaded, + .CompactBlockUsageThresholdPercent = CompactBlockUsageThresholdPercent, + .DiskReservePath = m_Config.RootDirectory / "reserve.gc", + .AttachmentRangeMin = AttachmentRangeMin, + .AttachmentRangeMax = AttachmentRangeMax}; + GcClock::TimePoint GcStartTime = GcClock::Now(); GcResult Result = m_GcManager.CollectGarbage(Settings); @@ -2815,6 +2916,131 @@ TEST_CASE("scrub.basic") CHECK(!CidStore.ContainsChunk(CompressedChunk.DecodeRawHash())); } +TEST_CASE("gc.keepunusedreferences") +{ + const IoHash Hashes[] = {IoHash::FromHexString("177030568fdd461bf4fe5ddbf4d463e514e8178e"), + IoHash::FromHexString("372d795bb907a15cab15ab3917854bfef7e7af2c"), + IoHash::FromHexString("75ab3917854bfef7e72d795bb907a15cab1af2c3"), + IoHash::FromHexString("ab3917854bfef7e7af2c372d795bb907a15cab15"), + IoHash::FromHexString("d1df59fcab06793a5f2c372d795bb907a15cab15")}; + { + std::vector<IoHash> UsedReferences; + std::vector<IoHash> References; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.empty()); + } + + { + std::vector<IoHash> UsedReferences{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::vector<IoHash> References; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.empty()); + } + + { + std::vector<IoHash> UsedReferences{}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 5); + } + + { + std::vector<IoHash> UsedReferences{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.empty()); + } + + { + std::vector<IoHash> UsedReferences{Hashes[0], Hashes[2], Hashes[4]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 2); + CHECK(UnusedReferences[0] == Hashes[1]); + CHECK(UnusedReferences[1] == Hashes[3]); + } + + { + std::vector<IoHash> UsedReferences{Hashes[2], Hashes[3], Hashes[4]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 2); + CHECK(UnusedReferences[0] == Hashes[0]); + CHECK(UnusedReferences[1] == Hashes[1]); + } + + { + std::vector<IoHash> UsedReferences{Hashes[0], Hashes[1], Hashes[2]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 2); + CHECK(UnusedReferences[0] == Hashes[3]); + CHECK(UnusedReferences[1] == Hashes[4]); + } + + { + std::vector<IoHash> UsedReferences{Hashes[0], Hashes[1], Hashes[2], Hashes[4]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 1); + CHECK(UnusedReferences[0] == Hashes[3]); + } + + { + std::vector<IoHash> UsedReferences{Hashes[1], Hashes[3]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 3); + CHECK(UnusedReferences[0] == Hashes[0]); + CHECK(UnusedReferences[1] == Hashes[2]); + CHECK(UnusedReferences[2] == Hashes[4]); + } + + { + std::vector<IoHash> UsedReferences{Hashes[0]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 4); + CHECK(UnusedReferences[0] == Hashes[1]); + CHECK(UnusedReferences[1] == Hashes[2]); + CHECK(UnusedReferences[2] == Hashes[3]); + CHECK(UnusedReferences[3] == Hashes[4]); + } + + { + std::vector<IoHash> UsedReferences{Hashes[1]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 4); + CHECK(UnusedReferences[0] == Hashes[0]); + CHECK(UnusedReferences[1] == Hashes[2]); + CHECK(UnusedReferences[2] == Hashes[3]); + CHECK(UnusedReferences[3] == Hashes[4]); + } + + { + std::vector<IoHash> UsedReferences{Hashes[3]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 4); + CHECK(UnusedReferences[0] == Hashes[0]); + CHECK(UnusedReferences[1] == Hashes[1]); + CHECK(UnusedReferences[2] == Hashes[2]); + CHECK(UnusedReferences[3] == Hashes[4]); + } + + { + std::vector<IoHash> UsedReferences{Hashes[4]}; + std::vector<IoHash> References{Hashes[0], Hashes[1], Hashes[2], Hashes[3], Hashes[4]}; + std::span<IoHash> UnusedReferences = KeepUnusedReferences(UsedReferences, References); + CHECK(UnusedReferences.size() == 4); + CHECK(UnusedReferences[0] == Hashes[0]); + CHECK(UnusedReferences[1] == Hashes[1]); + CHECK(UnusedReferences[2] == Hashes[2]); + CHECK(UnusedReferences[3] == Hashes[3]); + } +} + #endif void diff --git a/src/zenstore/include/zenstore/cache/cachedisklayer.h b/src/zenstore/include/zenstore/cache/cachedisklayer.h index a735893a1..8e31d3222 100644 --- a/src/zenstore/include/zenstore/cache/cachedisklayer.h +++ b/src/zenstore/include/zenstore/cache/cachedisklayer.h @@ -343,18 +343,18 @@ public: metrics::RequestStats m_PutOps; metrics::RequestStats m_GetOps; - mutable RwLock m_IndexLock; - IndexMap m_Index; - std::vector<AccessTime> m_AccessTimes; - std::vector<BucketPayload> m_Payloads; - std::vector<BucketMetaData> m_MetaDatas; - std::vector<MetaDataIndex> m_FreeMetaDatas; - std::vector<MemCacheData> m_MemCachedPayloads; - std::vector<MemCachedIndex> m_FreeMemCachedPayloads; - std::unique_ptr<HashSet> m_TrackedCacheKeys; - std::unique_ptr<HashSet> m_TrackedReferences; - std::atomic_uint64_t m_StandaloneSize{}; - std::atomic_uint64_t m_MemCachedSize{}; + mutable RwLock m_IndexLock; + IndexMap m_Index; + std::vector<AccessTime> m_AccessTimes; + std::vector<BucketPayload> m_Payloads; + std::vector<BucketMetaData> m_MetaDatas; + std::vector<MetaDataIndex> m_FreeMetaDatas; + std::vector<MemCacheData> m_MemCachedPayloads; + std::vector<MemCachedIndex> m_FreeMemCachedPayloads; + std::unique_ptr<HashSet> m_TrackedCacheKeys; + std::unique_ptr<std::vector<IoHash>> m_TrackedReferences; + std::atomic_uint64_t m_StandaloneSize{}; + std::atomic_uint64_t m_MemCachedSize{}; virtual std::string GetGcName(GcCtx& Ctx) override; virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) override; diff --git a/src/zenstore/include/zenstore/gc.h b/src/zenstore/include/zenstore/gc.h index a8c5c0219..3f2f5448d 100644 --- a/src/zenstore/include/zenstore/gc.h +++ b/src/zenstore/include/zenstore/gc.h @@ -64,6 +64,8 @@ struct GcSettings 90; // 0 = compact only empty eligible blocks, 100 = compact all non-full eligible blocks, 1-99 = compact eligible blocks with less // usage than CompactBlockUsageThresholdPercent std::filesystem::path DiskReservePath; + IoHash AttachmentRangeMin = IoHash::Zero; + IoHash AttachmentRangeMax = IoHash::Max; }; struct GcCompactStoreStats @@ -166,10 +168,10 @@ public: /** * @brief An interface to check if a set of Cids are referenced * - * Instance will be deleted after RemoveUsedReferencesFromSet has been called 0-n times. + * Instance will be deleted after GetUnusedReferences has been called 0-n times. * * During construction of the GcReferenceChecker the world is not stopped and this is a good - * place to do caching to be able to execute LockState and RemoveUsedReferencesFromSet quickly. + * place to do caching to be able to execute LockState and GetUnusedReferences quickly. */ class GcReferenceChecker { @@ -188,16 +190,19 @@ public: // *IMPORTANT* Do *not* take any locks (shared or exclusive) in this code. // This is because we need to acquire the locks in an ordered manner and not end up in a deadlock due to other code // trying to get exclusive locks halfway through our execution. - // Called once before any calls to RemoveUsedReferencesFromSet. + // Called once before any calls to GetUnusedReferences. // The implementation should be as fast as possible as UpdateLockedState is part of a stop the world (from changes) // until all instances of GcReferenceChecker UpdateLockedState are completed virtual void UpdateLockedState(GcCtx& Ctx) = 0; // Go through IoCids and see which ones are referenced. If it is the reference must be removed from IoCids // This function should use pre-cached information on what is referenced as we are in stop the world mode - virtual void RemoveUsedReferencesFromSet(GcCtx& Ctx, HashSet& IoCids) = 0; + virtual std::span<IoHash> GetUnusedReferences(GcCtx& Ctx, std::span<IoHash> IoCids) = 0; }; +std::span<IoHash> KeepUnusedReferences(std::span<const IoHash> SortedUsedReferences, std::span<IoHash> SortedReferences); +bool FilterReferences(GcCtx& Ctx, std::vector<IoHash>& InOutReferences); + /** * @brief An interface to implement a lock for Stop The World (from writing new data) * @@ -209,7 +214,7 @@ public: virtual ~GcReferenceLocker() = default; // Take all the locks needed to execute UpdateLockedState for the all the GcReferenceChecker in your domain - // Once all the GcReferenceChecker has executed UpdateLockedState and RemoveUsedReferencesFromSet for all + // Once all the GcReferenceChecker has executed UpdateLockedState and GetUnusedReferences for all // domains has completed, the locks will be disposed and writes are allowed once again virtual std::vector<RwLock::SharedLockScope> LockState(GcCtx& Ctx) = 0; }; @@ -245,7 +250,7 @@ public: virtual std::string GetGcName(GcCtx& Ctx) = 0; - typedef std::function<std::vector<IoHash>(std::span<IoHash> References)> GetUnusedReferencesFunc; + typedef std::function<std::span<IoHash>(std::span<IoHash> References)> GetUnusedReferencesFunc; // Check a set of references to see if they are in use. // Use the GetUnusedReferences input function to check if references are used and update any pointers @@ -520,6 +525,8 @@ public: std::optional<uint32_t> CompactBlockUsageThresholdPercent; std::optional<bool> Verbose; std::optional<bool> SingleThreaded; + IoHash AttachmentRangeMin = IoHash::Zero; + IoHash AttachmentRangeMax = IoHash::Max; }; bool TriggerGc(const TriggerGcParams& Params); @@ -547,6 +554,8 @@ private: uint32_t CompactBlockUsageThresholdPercent, bool Verbose, bool SingleThreaded, + const IoHash& AttachmentRangeMin, + const IoHash& AttachmentRangeMax, bool SilenceErrors); void ScrubStorage(bool DoDelete, bool SkipCid, std::chrono::seconds TimeSlice); LoggerRef Log() { return m_Log; } |