aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore/compactcas.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2023-10-30 09:32:54 +0100
committerGitHub <[email protected]>2023-10-30 09:32:54 +0100
commit3a6a5855cf36967c6bde31292669bfaf832c6f0b (patch)
tree593e7c21e6840e7ad312207fddc63e1934e19d85 /src/zenstore/compactcas.cpp
parentset up arch properly when running tests (mac) (#505) (diff)
downloadzen-3a6a5855cf36967c6bde31292669bfaf832c6f0b.tar.xz
zen-3a6a5855cf36967c6bde31292669bfaf832c6f0b.zip
New GC implementation (#459)
- Feature: New garbage collection implementation, still in evaluation mode. Enabled by `--gc-v2` command line option
Diffstat (limited to 'src/zenstore/compactcas.cpp')
-rw-r--r--src/zenstore/compactcas.cpp217
1 files changed, 217 insertions, 0 deletions
diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp
index 115bdcf03..f93dafa21 100644
--- a/src/zenstore/compactcas.cpp
+++ b/src/zenstore/compactcas.cpp
@@ -117,10 +117,12 @@ namespace {
CasContainerStrategy::CasContainerStrategy(GcManager& Gc) : m_Log(logging::Get("containercas")), m_Gc(Gc)
{
m_Gc.AddGcStorage(this);
+ m_Gc.AddGcReferenceStore(*this);
}
CasContainerStrategy::~CasContainerStrategy()
{
+ m_Gc.RemoveGcReferenceStore(*this);
m_Gc.RemoveGcStorage(this);
}
@@ -551,6 +553,221 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
GcCtx.AddDeletedCids(DeletedChunks);
}
+class CasContainerStoreCompactor : public GcReferenceStoreCompactor
+{
+public:
+ CasContainerStoreCompactor(CasContainerStrategy& Owner,
+ BlockStoreCompactState&& CompactState,
+ std::vector<IoHash>&& CompactStateKeys,
+ std::vector<IoHash>&& PrunedKeys)
+ : m_CasContainerStrategy(Owner)
+ , m_CompactState(std::move(CompactState))
+ , m_CompactStateKeys(std::move(CompactStateKeys))
+ , m_PrunedKeys(std::move(PrunedKeys))
+ {
+ }
+
+ virtual void CompactReferenceStore(GcCtx& Ctx)
+ {
+ size_t CompactedCount = 0;
+ Stopwatch Timer;
+ const auto _ = MakeGuard([&] {
+ ZEN_DEBUG("gc block store '{}': compacted {} cids in {}",
+ m_CasContainerStrategy.m_RootDirectory / m_CasContainerStrategy.m_ContainerBaseName,
+ CompactedCount,
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ });
+
+ if (Ctx.Settings.IsDeleteMode && Ctx.Settings.CollectSmallObjects)
+ {
+ // Compact block store
+ m_CasContainerStrategy.m_BlockStore.CompactBlocks(
+ m_CompactState,
+ m_CasContainerStrategy.m_PayloadAlignment,
+ [&](const BlockStore::MovedChunksArray& MovedArray, uint64_t FreedDiskSpace) {
+ std::vector<CasDiskIndexEntry> MovedEntries;
+ RwLock::ExclusiveLockScope _(m_CasContainerStrategy.m_LocationMapLock);
+ for (const std::pair<size_t, BlockStoreLocation>& Moved : MovedArray)
+ {
+ size_t ChunkIndex = Moved.first;
+ const IoHash& Key = m_CompactStateKeys[ChunkIndex];
+
+ if (auto It = m_CasContainerStrategy.m_LocationMap.find(Key); It != m_CasContainerStrategy.m_LocationMap.end())
+ {
+ BlockStoreDiskLocation& Location = m_CasContainerStrategy.m_Locations[It->second];
+ const BlockStoreLocation& OldLocation = m_CompactState.GetLocation(ChunkIndex);
+ if (Location.Get(m_CasContainerStrategy.m_PayloadAlignment) != OldLocation)
+ {
+ // Someone has moved our chunk so lets just skip the new location we were provided, it will be GC:d at a
+ // later time
+ continue;
+ }
+
+ const BlockStoreLocation& NewLocation = Moved.second;
+ Location = BlockStoreDiskLocation(NewLocation, m_CasContainerStrategy.m_PayloadAlignment);
+ MovedEntries.push_back(CasDiskIndexEntry{.Key = Key, .Location = Location});
+ }
+ }
+ m_CasContainerStrategy.m_CasLog.Append(MovedEntries);
+ Ctx.RemovedDiskSpace.fetch_add(FreedDiskSpace);
+ },
+ [&]() { return 0; });
+
+ CompactedCount = m_PrunedKeys.size();
+ Ctx.CompactedReferences.fetch_add(
+ CompactedCount); // Slightly missleading, it might not be compacted if the block is the currently writing block
+ }
+ }
+
+ CasContainerStrategy& m_CasContainerStrategy;
+ BlockStoreCompactState m_CompactState;
+ std::vector<IoHash> m_CompactStateKeys;
+ std::vector<IoHash> m_PrunedKeys;
+};
+
+class CasContainerReferencePruner : public GcReferencePruner
+{
+public:
+ CasContainerReferencePruner(CasContainerStrategy& Owner, std::vector<IoHash>&& Cids)
+ : m_CasContainerStrategy(Owner)
+ , m_Cids(std::move(Cids))
+ {
+ }
+
+ virtual GcReferenceStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, const GetUnusedReferencesFunc& GetUnusedReferences)
+ {
+ size_t TotalCount = m_Cids.size();
+ size_t PruneCount = 0;
+ Stopwatch Timer;
+ const auto _ = MakeGuard([&] {
+ ZEN_DEBUG("gc block store '{}': removed {} unused cid out of {} in {}",
+ m_CasContainerStrategy.m_RootDirectory / m_CasContainerStrategy.m_ContainerBaseName,
+ PruneCount,
+ TotalCount,
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ });
+
+ std::vector<IoHash> UnusedCids = GetUnusedReferences(m_Cids);
+ m_Cids.clear();
+
+ if (UnusedCids.empty())
+ {
+ // Nothing to collect
+ return nullptr;
+ }
+
+ BlockStoreCompactState CompactState;
+ BlockStore::ReclaimSnapshotState BlockSnapshotState;
+ std::vector<IoHash> CompactStateKeys;
+ std::vector<CasDiskIndexEntry> ExpiredEntries;
+ ExpiredEntries.reserve(UnusedCids.size());
+ tsl::robin_set<IoHash, IoHash::Hasher> UnusedKeys;
+
+ {
+ RwLock::ExclusiveLockScope __(m_CasContainerStrategy.m_LocationMapLock);
+ if (Ctx.Settings.CollectSmallObjects)
+ {
+ BlockSnapshotState = m_CasContainerStrategy.m_BlockStore.GetReclaimSnapshotState();
+ }
+
+ for (const IoHash& Cid : UnusedCids)
+ {
+ auto It = m_CasContainerStrategy.m_LocationMap.find(Cid);
+ if (It == m_CasContainerStrategy.m_LocationMap.end())
+ {
+ continue;
+ }
+ CasDiskIndexEntry ExpiredEntry = {.Key = Cid,
+ .Location = m_CasContainerStrategy.m_Locations[It->second],
+ .Flags = CasDiskIndexEntry::kTombstone};
+ const BlockStoreDiskLocation& Location = m_CasContainerStrategy.m_Locations[It->second];
+ BlockStoreLocation BlockLocation = Location.Get(m_CasContainerStrategy.m_PayloadAlignment);
+ if (Ctx.Settings.CollectSmallObjects)
+ {
+ UnusedKeys.insert(Cid);
+ uint32_t BlockIndex = BlockLocation.BlockIndex;
+ bool IsActiveWriteBlock = BlockSnapshotState.m_ActiveWriteBlocks.contains(BlockIndex);
+ if (!IsActiveWriteBlock)
+ {
+ CompactState.AddBlock(BlockIndex);
+ }
+ ExpiredEntries.push_back(ExpiredEntry);
+ }
+ }
+
+ // Get all locations we need to keep for affected blocks
+ if (Ctx.Settings.CollectSmallObjects && !UnusedKeys.empty())
+ {
+ for (const auto& Entry : m_CasContainerStrategy.m_LocationMap)
+ {
+ const IoHash& Key = Entry.first;
+ if (UnusedKeys.contains(Key))
+ {
+ continue;
+ }
+ const BlockStoreDiskLocation& Location = m_CasContainerStrategy.m_Locations[Entry.second];
+ BlockStoreLocation BlockLocation = Location.Get(m_CasContainerStrategy.m_PayloadAlignment);
+ if (CompactState.AddKeepLocation(BlockLocation))
+ {
+ CompactStateKeys.push_back(Key);
+ }
+ }
+ }
+
+ if (Ctx.Settings.IsDeleteMode)
+ {
+ for (const CasDiskIndexEntry& Entry : ExpiredEntries)
+ {
+ m_CasContainerStrategy.m_LocationMap.erase(Entry.Key);
+ }
+ m_CasContainerStrategy.m_CasLog.Append(ExpiredEntries);
+ m_CasContainerStrategy.m_CasLog.Flush();
+ }
+ }
+
+ PruneCount = UnusedKeys.size();
+ Ctx.PrunedReferences.fetch_add(PruneCount);
+ return new CasContainerStoreCompactor(m_CasContainerStrategy,
+ std::move(CompactState),
+ std::move(CompactStateKeys),
+ std::vector<IoHash>(UnusedKeys.begin(), UnusedKeys.end()));
+ }
+
+private:
+ CasContainerStrategy& m_CasContainerStrategy;
+ std::vector<IoHash> m_Cids;
+};
+
+GcReferencePruner*
+CasContainerStrategy::CreateReferencePruner(GcCtx& Ctx)
+{
+ size_t TotalCount = 0;
+ Stopwatch Timer;
+ const auto _ = MakeGuard([&] {
+ ZEN_DEBUG("gc block store '{}': found {} cid keys to check in {}",
+ m_RootDirectory / m_ContainerBaseName,
+ TotalCount,
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ });
+
+ std::vector<IoHash> CidsToCheck;
+ {
+ RwLock::SharedLockScope __(m_LocationMapLock);
+ CidsToCheck.reserve(m_LocationMap.size());
+ for (const auto& It : m_LocationMap)
+ {
+ CidsToCheck.push_back(It.first);
+ }
+ }
+ TotalCount = CidsToCheck.size();
+ if (TotalCount == 0)
+ {
+ return {};
+ }
+ Ctx.References.fetch_add(TotalCount);
+ return new CasContainerReferencePruner(*this, std::move(CidsToCheck));
+}
+
void
CasContainerStrategy::CompactIndex(RwLock::ExclusiveLockScope&)
{