diff options
| author | Dan Engelbrecht <[email protected]> | 2023-11-21 15:06:25 +0100 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-11-21 15:06:25 +0100 |
| commit | 05178f7c18a48b21b9e260de282a86b91df26955 (patch) | |
| tree | 25f77af287730c6dbe8d655e0cb503f2652cbd36 /src/zenstore/compactcas.cpp | |
| parent | zen run command (#552) (diff) | |
| download | zen-05178f7c18a48b21b9e260de282a86b91df26955.tar.xz zen-05178f7c18a48b21b9e260de282a86b91df26955.zip | |
compact separate for gc referencer (#533)
- Refactor GCV2 so GcReferencer::RemoveExpiredData returns a store compactor, moving out the actual disk work from deleting items in the index.
- Refactor GCV2 GcResult to reuse GcCompactStoreStats and GcStats
- Make Compacting of stores non-parallell to not eat all the disk I/O when running GC
Diffstat (limited to 'src/zenstore/compactcas.cpp')
| -rw-r--r-- | src/zenstore/compactcas.cpp | 219 |
1 files changed, 106 insertions, 113 deletions
diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp index 00a018948..7b8e930b3 100644 --- a/src/zenstore/compactcas.cpp +++ b/src/zenstore/compactcas.cpp @@ -553,21 +553,12 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) GcCtx.AddDeletedCids(DeletedChunks); } -class CasContainerStoreCompactor : public GcReferenceStoreCompactor +class CasContainerStoreCompactor : public GcStoreCompactor { public: - CasContainerStoreCompactor(CasContainerStrategy& Owner, - BlockStoreCompactState&& CompactState, - std::vector<IoHash>&& CompactStateKeys, - std::vector<IoHash>&& PrunedKeys) - : m_CasContainerStrategy(Owner) - , m_CompactState(std::move(CompactState)) - , m_CompactStateKeys(std::move(CompactStateKeys)) - , m_PrunedKeys(std::move(PrunedKeys)) - { - } + CasContainerStoreCompactor(CasContainerStrategy& Owner) : m_CasContainerStrategy(Owner) {} - virtual void CompactReferenceStore(GcCtx& Ctx, GcReferenceStoreStats& Stats) + virtual void CompactStore(GcCtx& Ctx, GcCompactStoreStats& Stats, const std::function<uint64_t()>& ClaimDiskReserveCallback) override { Stopwatch Timer; const auto _ = MakeGuard([&] { @@ -575,60 +566,109 @@ public: { return; } - ZEN_INFO("GCV2: compactcas [COMPACT] '{}': Count: {}, Pruned: {}, Compacted: {}, RemovedDisk: {}, RemovedMemory: {} in {}", + ZEN_INFO("GCV2: compactcas [COMPACT] '{}': RemovedDisk: {} in {}", m_CasContainerStrategy.m_RootDirectory / m_CasContainerStrategy.m_ContainerBaseName, - Stats.Count, - Stats.Pruned, - Stats.Compacted, NiceBytes(Stats.RemovedDisk), - NiceBytes(Stats.RemovedMemory), NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); - if (Ctx.Settings.IsDeleteMode && Ctx.Settings.CollectSmallObjects) + if (Ctx.Settings.CollectSmallObjects) { - // Compact block store - m_CasContainerStrategy.m_BlockStore.CompactBlocks( - m_CompactState, - m_CasContainerStrategy.m_PayloadAlignment, - [&](const BlockStore::MovedChunksArray& MovedArray, uint64_t FreedDiskSpace) { - std::vector<CasDiskIndexEntry> MovedEntries; - RwLock::ExclusiveLockScope _(m_CasContainerStrategy.m_LocationMapLock); - for (const std::pair<size_t, BlockStoreLocation>& Moved : MovedArray) + std::unordered_map<uint32_t, uint64_t> BlockUsage; + { + RwLock::SharedLockScope __(m_CasContainerStrategy.m_LocationMapLock); + for (const auto& Entry : m_CasContainerStrategy.m_LocationMap) + { + size_t Index = Entry.second; + const BlockStoreDiskLocation& Loc = m_CasContainerStrategy.m_Locations[Index]; + + uint32_t BlockIndex = Loc.GetBlockIndex(); + uint64_t ChunkSize = RoundUp(Loc.GetSize(), m_CasContainerStrategy.m_PayloadAlignment); + auto It = BlockUsage.find(BlockIndex); + if (It == BlockUsage.end()) + { + BlockUsage.insert_or_assign(BlockIndex, ChunkSize); + } + else { - size_t ChunkIndex = Moved.first; - const IoHash& Key = m_CompactStateKeys[ChunkIndex]; + It->second += ChunkSize; + } + } + } - if (auto It = m_CasContainerStrategy.m_LocationMap.find(Key); It != m_CasContainerStrategy.m_LocationMap.end()) - { - BlockStoreDiskLocation& Location = m_CasContainerStrategy.m_Locations[It->second]; - const BlockStoreLocation& OldLocation = m_CompactState.GetLocation(ChunkIndex); - if (Location.Get(m_CasContainerStrategy.m_PayloadAlignment) != OldLocation) - { - // Someone has moved our chunk so lets just skip the new location we were provided, it will be GC:d at a - // later time - continue; - } + { + BlockStoreCompactState BlockCompactState; + std::vector<IoHash> BlockCompactStateKeys; + + std::vector<uint32_t> BlocksToCompact = + m_CasContainerStrategy.m_BlockStore.GetBlocksToCompact(BlockUsage, Ctx.Settings.CompactBlockUsageThresholdPercent); + BlockCompactState.IncludeBlocks(BlocksToCompact); - const BlockStoreLocation& NewLocation = Moved.second; - Location = BlockStoreDiskLocation(NewLocation, m_CasContainerStrategy.m_PayloadAlignment); - MovedEntries.push_back(CasDiskIndexEntry{.Key = Key, .Location = Location}); + { + RwLock::SharedLockScope __(m_CasContainerStrategy.m_LocationMapLock); + for (const auto& Entry : m_CasContainerStrategy.m_LocationMap) + { + size_t Index = Entry.second; + const BlockStoreDiskLocation& Loc = m_CasContainerStrategy.m_Locations[Index]; + + if (!BlockCompactState.AddKeepLocation(Loc.Get(m_CasContainerStrategy.m_PayloadAlignment))) + { + continue; } + BlockCompactStateKeys.push_back(Entry.first); } - m_CasContainerStrategy.m_CasLog.Append(MovedEntries); - Stats.RemovedDisk += FreedDiskSpace; - }, - [&]() { return 0; }); + } - Stats.Compacted += - m_PrunedKeys.size(); // Slightly missleading, it might not be compacted if the block is the currently writing block + if (Ctx.Settings.IsDeleteMode) + { + ZEN_DEBUG("GCV2: compactcas [COMPACT] '{}': compacting {} blocks", + m_CasContainerStrategy.m_RootDirectory / m_CasContainerStrategy.m_ContainerBaseName, + BlocksToCompact.size()); + + m_CasContainerStrategy.m_BlockStore.CompactBlocks( + BlockCompactState, + m_CasContainerStrategy.m_PayloadAlignment, + [&](const BlockStore::MovedChunksArray& MovedArray, uint64_t FreedDiskSpace) { + std::vector<CasDiskIndexEntry> MovedEntries; + RwLock::ExclusiveLockScope _(m_CasContainerStrategy.m_LocationMapLock); + for (const std::pair<size_t, BlockStoreLocation>& Moved : MovedArray) + { + size_t ChunkIndex = Moved.first; + const IoHash& Key = BlockCompactStateKeys[ChunkIndex]; + + if (auto It = m_CasContainerStrategy.m_LocationMap.find(Key); + It != m_CasContainerStrategy.m_LocationMap.end()) + { + BlockStoreDiskLocation& Location = m_CasContainerStrategy.m_Locations[It->second]; + const BlockStoreLocation& OldLocation = BlockCompactState.GetLocation(ChunkIndex); + if (Location.Get(m_CasContainerStrategy.m_PayloadAlignment) != OldLocation) + { + // Someone has moved our chunk so lets just skip the new location we were provided, it will be GC:d + // at a later time + continue; + } + const BlockStoreLocation& NewLocation = Moved.second; + + Location = BlockStoreDiskLocation(NewLocation, m_CasContainerStrategy.m_PayloadAlignment); + MovedEntries.push_back(CasDiskIndexEntry{.Key = Key, .Location = Location}); + } + } + m_CasContainerStrategy.m_CasLog.Append(MovedEntries); + Stats.RemovedDisk += FreedDiskSpace; + }, + ClaimDiskReserveCallback); + } + else + { + ZEN_DEBUG("GCV2: compactcas [COMPACT] '{}': skipped compacting of {} eligible blocks", + m_CasContainerStrategy.m_RootDirectory / m_CasContainerStrategy.m_ContainerBaseName, + BlocksToCompact.size()); + } + } } } - CasContainerStrategy& m_CasContainerStrategy; - BlockStoreCompactState m_CompactState; - std::vector<IoHash> m_CompactStateKeys; - std::vector<IoHash> m_PrunedKeys; + CasContainerStrategy& m_CasContainerStrategy; }; class CasContainerReferencePruner : public GcReferencePruner @@ -640,9 +680,7 @@ public: { } - virtual GcReferenceStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, - GcReferenceStoreStats& Stats, - const GetUnusedReferencesFunc& GetUnusedReferences) + virtual GcStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, GcStats& Stats, const GetUnusedReferencesFunc& GetUnusedReferences) { Stopwatch Timer; const auto _ = MakeGuard([&] { @@ -650,17 +688,17 @@ public: { return; } - ZEN_INFO("GCV2: compactcas [PRUNE] '{}': Count: {}, Pruned: {}, Compacted: {}, RemovedDisk: {}, RemovedMemory: {} in {}", + ZEN_INFO("GCV2: compactcas [PRUNE] '{}': Checked: {}, Deleted: {}, FreedMemory: {} in {}", m_CasContainerStrategy.m_RootDirectory / m_CasContainerStrategy.m_ContainerBaseName, - Stats.Count, - Stats.Pruned, - Stats.Compacted, - NiceBytes(Stats.RemovedDisk), - NiceBytes(Stats.RemovedMemory), + Stats.CheckedCount, + Stats.DeletedCount, + NiceBytes(Stats.FreedMemory), NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); std::vector<IoHash> UnusedCids = GetUnusedReferences(m_Cids); + Stats.CheckedCount = m_Cids.size(); + Stats.FoundCount = UnusedCids.size(); if (UnusedCids.empty()) { @@ -668,19 +706,11 @@ public: return nullptr; } - BlockStoreCompactState CompactState; - BlockStore::ReclaimSnapshotState BlockSnapshotState; - std::vector<IoHash> CompactStateKeys; - std::vector<CasDiskIndexEntry> ExpiredEntries; + std::vector<CasDiskIndexEntry> ExpiredEntries; ExpiredEntries.reserve(UnusedCids.size()); - tsl::robin_set<IoHash, IoHash::Hasher> UnusedKeys; { RwLock::ExclusiveLockScope __(m_CasContainerStrategy.m_LocationMapLock); - if (Ctx.Settings.CollectSmallObjects) - { - BlockSnapshotState = m_CasContainerStrategy.m_BlockStore.GetReclaimSnapshotState(); - } for (const IoHash& Cid : UnusedCids) { @@ -689,59 +719,28 @@ public: { continue; } - CasDiskIndexEntry ExpiredEntry = {.Key = Cid, - .Location = m_CasContainerStrategy.m_Locations[It->second], - .Flags = CasDiskIndexEntry::kTombstone}; - const BlockStoreDiskLocation& Location = m_CasContainerStrategy.m_Locations[It->second]; - BlockStoreLocation BlockLocation = Location.Get(m_CasContainerStrategy.m_PayloadAlignment); if (Ctx.Settings.CollectSmallObjects) { - UnusedKeys.insert(Cid); - uint32_t BlockIndex = BlockLocation.BlockIndex; - bool IsActiveWriteBlock = BlockSnapshotState.m_ActiveWriteBlocks.contains(BlockIndex); - if (!IsActiveWriteBlock) - { - CompactState.IncludeBlock(BlockIndex); - } + CasDiskIndexEntry ExpiredEntry = {.Key = Cid, + .Location = m_CasContainerStrategy.m_Locations[It->second], + .Flags = CasDiskIndexEntry::kTombstone}; ExpiredEntries.push_back(ExpiredEntry); } } - // Get all locations we need to keep for affected blocks - if (Ctx.Settings.CollectSmallObjects && !UnusedKeys.empty()) - { - for (const auto& Entry : m_CasContainerStrategy.m_LocationMap) - { - const IoHash& Key = Entry.first; - if (UnusedKeys.contains(Key)) - { - continue; - } - const BlockStoreDiskLocation& Location = m_CasContainerStrategy.m_Locations[Entry.second]; - BlockStoreLocation BlockLocation = Location.Get(m_CasContainerStrategy.m_PayloadAlignment); - if (CompactState.AddKeepLocation(BlockLocation)) - { - CompactStateKeys.push_back(Key); - } - } - } - if (Ctx.Settings.IsDeleteMode) { for (const CasDiskIndexEntry& Entry : ExpiredEntries) { m_CasContainerStrategy.m_LocationMap.erase(Entry.Key); + Stats.DeletedCount++; } m_CasContainerStrategy.m_CasLog.Append(ExpiredEntries); m_CasContainerStrategy.m_CasLog.Flush(); } } - Stats.Pruned += UnusedKeys.size(); - return new CasContainerStoreCompactor(m_CasContainerStrategy, - std::move(CompactState), - std::move(CompactStateKeys), - std::vector<IoHash>(UnusedKeys.begin(), UnusedKeys.end())); + return new CasContainerStoreCompactor(m_CasContainerStrategy); } private: @@ -756,7 +755,7 @@ CasContainerStrategy::GetGcName(GcCtx&) } GcReferencePruner* -CasContainerStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& Stats) +CasContainerStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats&) { Stopwatch Timer; const auto _ = MakeGuard([&] { @@ -764,13 +763,8 @@ CasContainerStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& S { return; } - ZEN_INFO("GCV2: compactcas [CREATE PRUNERS] '{}': Count: {}, Pruned: {}, Compacted: {}, RemovedDisk: {}, RemovedMemory: {} in {}", + ZEN_INFO("GCV2: compactcas [CREATE PRUNERS] '{}' in {}", m_RootDirectory / m_ContainerBaseName, - Stats.Count, - Stats.Pruned, - Stats.Compacted, - NiceBytes(Stats.RemovedDisk), - NiceBytes(Stats.RemovedMemory), NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); @@ -787,7 +781,6 @@ CasContainerStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& S CidsToCheck.push_back(It.first); } } - Stats.Count += CidsToCheck.size(); return new CasContainerReferencePruner(*this, std::move(CidsToCheck)); } |