diff options
Diffstat (limited to 'src/zenstore')
| -rw-r--r-- | src/zenstore/blockstore.cpp | 107 | ||||
| -rw-r--r-- | src/zenstore/compactcas.cpp | 219 | ||||
| -rw-r--r-- | src/zenstore/filecas.cpp | 113 | ||||
| -rw-r--r-- | src/zenstore/gc.cpp | 821 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/blockstore.h | 22 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/gc.h | 84 |
6 files changed, 735 insertions, 631 deletions
diff --git a/src/zenstore/blockstore.cpp b/src/zenstore/blockstore.cpp index 063d38707..ec299092d 100644 --- a/src/zenstore/blockstore.cpp +++ b/src/zenstore/blockstore.cpp @@ -15,6 +15,7 @@ ZEN_THIRD_PARTY_INCLUDES_START #include <tsl/robin_map.h> #include <tsl/robin_set.h> +#include <gsl/gsl-lite.hpp> ZEN_THIRD_PARTY_INCLUDES_END #if ZEN_WITH_TESTS @@ -267,6 +268,59 @@ BlockStore::SyncExistingBlocksOnDisk(const std::vector<BlockStoreLocation>& Know } } +std::vector<uint32_t> +BlockStore::GetBlocksToCompact(const std::unordered_map<uint32_t, uint64_t>& BlockUsage, uint32_t BlockUsageThresholdPercent) +{ + std::unordered_set<uint32_t> Result; + { + RwLock::SharedLockScope InsertLock(m_InsertLock); + for (const auto& It : m_ChunkBlocks) + { + uint32_t BlockIndex = It.first; + if (std::find(m_ActiveWriteBlocks.begin(), m_ActiveWriteBlocks.end(), BlockIndex) != m_ActiveWriteBlocks.end()) + { + continue; + } + uint64_t BlockSize = It.second ? It.second->FileSize() : 0u; + if (BlockSize == 0) + { + Result.insert(BlockIndex); + continue; + } + + uint64_t UsedSize = 0; + if (auto UsageIt = BlockUsage.find(BlockIndex); UsageIt != BlockUsage.end()) + { + UsedSize = UsageIt->second; + } + + if (BlockUsageThresholdPercent == 100) + { + if (UsedSize < BlockSize) + { + Result.insert(BlockIndex); + } + } + else if (BlockUsageThresholdPercent == 0) + { + if (UsedSize == 0) + { + Result.insert(BlockIndex); + } + } + else + { + const uint32_t UsedPercent = UsedSize < BlockSize ? gsl::narrow<uint32_t>((100 * UsedSize) / BlockSize) : 100u; + if (UsedPercent < BlockUsageThresholdPercent) + { + Result.insert(BlockIndex); + } + } + } + } + return std::vector<uint32_t>(Result.begin(), Result.end()); +} + void BlockStore::Close() { @@ -971,7 +1025,7 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, Stopwatch TotalTimer; const auto _ = MakeGuard([&] { - ZEN_DEBUG("compact blocks for '{}' DONE after {}, deleted {} and moved {} chunks ({}) ", + ZEN_DEBUG("Compact blocks for '{}' DONE after {}, deleted {} and moved {} chunks ({}) ", m_BlocksBasePath, NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs()), NiceBytes(DeletedSize), @@ -983,13 +1037,14 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, uint32_t NewBlockIndex = 0; MovedChunksArray MovedChunks; + uint64_t AddedSize = 0; uint64_t RemovedSize = 0; Ref<BlockStoreFile> NewBlockFile; auto NewBlockFileGuard = MakeGuard([&]() { if (NewBlockFile) { - ZEN_DEBUG("dropping incomplete cas block store file '{}'", NewBlockFile->GetPath()); + ZEN_DEBUG("Dropping incomplete cas block store file '{}'", NewBlockFile->GetPath()); { RwLock::ExclusiveLockScope _l(m_InsertLock); if (m_ChunkBlocks[NewBlockIndex] == NewBlockFile) @@ -1001,6 +1056,18 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, } }); + auto ReportChanges = [&]() { + if (!MovedChunks.empty() || RemovedSize > 0) + { + ChangeCallback(MovedChunks, RemovedSize > AddedSize ? RemovedSize - AddedSize : 0); + DeletedSize += RemovedSize; + RemovedSize = 0; + AddedSize = 0; + MovedCount += MovedChunks.size(); + MovedChunks.clear(); + } + }; + std::vector<uint32_t> RemovedBlocks; CompactState.IterateBlocks( @@ -1030,12 +1097,23 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, uint64_t OldBlockSize = OldBlockFile->FileSize(); - // TODO: Add heuristics for determining if it is worth to compact a block (if only a very small part is removed) - std::vector<uint8_t> Chunk; for (const size_t& ChunkIndex : KeepChunkIndexes) { const BlockStoreLocation ChunkLocation = ChunkLocations[ChunkIndex]; + if (ChunkLocation.Offset + ChunkLocation.Size > OldBlockSize) + { + ZEN_WARN( + "Compact Block skipping chunk outside of block range in '{}', Chunk start {}, Chunk size {} in Block {}, Block " + "size {}", + m_BlocksBasePath, + ChunkLocation.Offset, + ChunkLocation.Size, + OldBlockFile->GetPath(), + OldBlockSize); + continue; + } + Chunk.resize(ChunkLocation.Size); OldBlockFile->Read(Chunk.data(), Chunk.size(), ChunkLocation.Offset); @@ -1113,18 +1191,11 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, NewBlockFile->Write(Chunk.data(), Chunk.size(), WriteOffset); MovedChunks.push_back({ChunkIndex, {.BlockIndex = NewBlockIndex, .Offset = WriteOffset, .Size = Chunk.size()}}); WriteOffset = RoundUp(WriteOffset + Chunk.size(), PayloadAlignment); + AddedSize += Chunk.size(); } Chunk.clear(); - // Report what we have moved so we can purge the old block - if (!MovedChunks.empty() || RemovedSize > 0) - { - ChangeCallback(MovedChunks, RemovedSize); - DeletedSize += RemovedSize; - RemovedSize = 0; - MovedCount += MovedChunks.size(); - MovedChunks.clear(); - } + ReportChanges(); { RwLock::ExclusiveLockScope InsertLock(m_InsertLock); @@ -1135,6 +1206,7 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, RemovedSize += OldBlockSize; } }); + if (NewBlockFile) { NewBlockFile->Flush(); @@ -1142,14 +1214,7 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, NewBlockFile = nullptr; } - if (!MovedChunks.empty() || RemovedSize > 0) - { - ChangeCallback(MovedChunks, RemovedSize); - DeletedSize += RemovedSize; - RemovedSize = 0; - MovedCount += MovedChunks.size(); - MovedChunks.clear(); - } + ReportChanges(); } const char* diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp index 00a018948..7b8e930b3 100644 --- a/src/zenstore/compactcas.cpp +++ b/src/zenstore/compactcas.cpp @@ -553,21 +553,12 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) GcCtx.AddDeletedCids(DeletedChunks); } -class CasContainerStoreCompactor : public GcReferenceStoreCompactor +class CasContainerStoreCompactor : public GcStoreCompactor { public: - CasContainerStoreCompactor(CasContainerStrategy& Owner, - BlockStoreCompactState&& CompactState, - std::vector<IoHash>&& CompactStateKeys, - std::vector<IoHash>&& PrunedKeys) - : m_CasContainerStrategy(Owner) - , m_CompactState(std::move(CompactState)) - , m_CompactStateKeys(std::move(CompactStateKeys)) - , m_PrunedKeys(std::move(PrunedKeys)) - { - } + CasContainerStoreCompactor(CasContainerStrategy& Owner) : m_CasContainerStrategy(Owner) {} - virtual void CompactReferenceStore(GcCtx& Ctx, GcReferenceStoreStats& Stats) + virtual void CompactStore(GcCtx& Ctx, GcCompactStoreStats& Stats, const std::function<uint64_t()>& ClaimDiskReserveCallback) override { Stopwatch Timer; const auto _ = MakeGuard([&] { @@ -575,60 +566,109 @@ public: { return; } - ZEN_INFO("GCV2: compactcas [COMPACT] '{}': Count: {}, Pruned: {}, Compacted: {}, RemovedDisk: {}, RemovedMemory: {} in {}", + ZEN_INFO("GCV2: compactcas [COMPACT] '{}': RemovedDisk: {} in {}", m_CasContainerStrategy.m_RootDirectory / m_CasContainerStrategy.m_ContainerBaseName, - Stats.Count, - Stats.Pruned, - Stats.Compacted, NiceBytes(Stats.RemovedDisk), - NiceBytes(Stats.RemovedMemory), NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); - if (Ctx.Settings.IsDeleteMode && Ctx.Settings.CollectSmallObjects) + if (Ctx.Settings.CollectSmallObjects) { - // Compact block store - m_CasContainerStrategy.m_BlockStore.CompactBlocks( - m_CompactState, - m_CasContainerStrategy.m_PayloadAlignment, - [&](const BlockStore::MovedChunksArray& MovedArray, uint64_t FreedDiskSpace) { - std::vector<CasDiskIndexEntry> MovedEntries; - RwLock::ExclusiveLockScope _(m_CasContainerStrategy.m_LocationMapLock); - for (const std::pair<size_t, BlockStoreLocation>& Moved : MovedArray) + std::unordered_map<uint32_t, uint64_t> BlockUsage; + { + RwLock::SharedLockScope __(m_CasContainerStrategy.m_LocationMapLock); + for (const auto& Entry : m_CasContainerStrategy.m_LocationMap) + { + size_t Index = Entry.second; + const BlockStoreDiskLocation& Loc = m_CasContainerStrategy.m_Locations[Index]; + + uint32_t BlockIndex = Loc.GetBlockIndex(); + uint64_t ChunkSize = RoundUp(Loc.GetSize(), m_CasContainerStrategy.m_PayloadAlignment); + auto It = BlockUsage.find(BlockIndex); + if (It == BlockUsage.end()) + { + BlockUsage.insert_or_assign(BlockIndex, ChunkSize); + } + else { - size_t ChunkIndex = Moved.first; - const IoHash& Key = m_CompactStateKeys[ChunkIndex]; + It->second += ChunkSize; + } + } + } - if (auto It = m_CasContainerStrategy.m_LocationMap.find(Key); It != m_CasContainerStrategy.m_LocationMap.end()) - { - BlockStoreDiskLocation& Location = m_CasContainerStrategy.m_Locations[It->second]; - const BlockStoreLocation& OldLocation = m_CompactState.GetLocation(ChunkIndex); - if (Location.Get(m_CasContainerStrategy.m_PayloadAlignment) != OldLocation) - { - // Someone has moved our chunk so lets just skip the new location we were provided, it will be GC:d at a - // later time - continue; - } + { + BlockStoreCompactState BlockCompactState; + std::vector<IoHash> BlockCompactStateKeys; + + std::vector<uint32_t> BlocksToCompact = + m_CasContainerStrategy.m_BlockStore.GetBlocksToCompact(BlockUsage, Ctx.Settings.CompactBlockUsageThresholdPercent); + BlockCompactState.IncludeBlocks(BlocksToCompact); - const BlockStoreLocation& NewLocation = Moved.second; - Location = BlockStoreDiskLocation(NewLocation, m_CasContainerStrategy.m_PayloadAlignment); - MovedEntries.push_back(CasDiskIndexEntry{.Key = Key, .Location = Location}); + { + RwLock::SharedLockScope __(m_CasContainerStrategy.m_LocationMapLock); + for (const auto& Entry : m_CasContainerStrategy.m_LocationMap) + { + size_t Index = Entry.second; + const BlockStoreDiskLocation& Loc = m_CasContainerStrategy.m_Locations[Index]; + + if (!BlockCompactState.AddKeepLocation(Loc.Get(m_CasContainerStrategy.m_PayloadAlignment))) + { + continue; } + BlockCompactStateKeys.push_back(Entry.first); } - m_CasContainerStrategy.m_CasLog.Append(MovedEntries); - Stats.RemovedDisk += FreedDiskSpace; - }, - [&]() { return 0; }); + } - Stats.Compacted += - m_PrunedKeys.size(); // Slightly missleading, it might not be compacted if the block is the currently writing block + if (Ctx.Settings.IsDeleteMode) + { + ZEN_DEBUG("GCV2: compactcas [COMPACT] '{}': compacting {} blocks", + m_CasContainerStrategy.m_RootDirectory / m_CasContainerStrategy.m_ContainerBaseName, + BlocksToCompact.size()); + + m_CasContainerStrategy.m_BlockStore.CompactBlocks( + BlockCompactState, + m_CasContainerStrategy.m_PayloadAlignment, + [&](const BlockStore::MovedChunksArray& MovedArray, uint64_t FreedDiskSpace) { + std::vector<CasDiskIndexEntry> MovedEntries; + RwLock::ExclusiveLockScope _(m_CasContainerStrategy.m_LocationMapLock); + for (const std::pair<size_t, BlockStoreLocation>& Moved : MovedArray) + { + size_t ChunkIndex = Moved.first; + const IoHash& Key = BlockCompactStateKeys[ChunkIndex]; + + if (auto It = m_CasContainerStrategy.m_LocationMap.find(Key); + It != m_CasContainerStrategy.m_LocationMap.end()) + { + BlockStoreDiskLocation& Location = m_CasContainerStrategy.m_Locations[It->second]; + const BlockStoreLocation& OldLocation = BlockCompactState.GetLocation(ChunkIndex); + if (Location.Get(m_CasContainerStrategy.m_PayloadAlignment) != OldLocation) + { + // Someone has moved our chunk so lets just skip the new location we were provided, it will be GC:d + // at a later time + continue; + } + const BlockStoreLocation& NewLocation = Moved.second; + + Location = BlockStoreDiskLocation(NewLocation, m_CasContainerStrategy.m_PayloadAlignment); + MovedEntries.push_back(CasDiskIndexEntry{.Key = Key, .Location = Location}); + } + } + m_CasContainerStrategy.m_CasLog.Append(MovedEntries); + Stats.RemovedDisk += FreedDiskSpace; + }, + ClaimDiskReserveCallback); + } + else + { + ZEN_DEBUG("GCV2: compactcas [COMPACT] '{}': skipped compacting of {} eligible blocks", + m_CasContainerStrategy.m_RootDirectory / m_CasContainerStrategy.m_ContainerBaseName, + BlocksToCompact.size()); + } + } } } - CasContainerStrategy& m_CasContainerStrategy; - BlockStoreCompactState m_CompactState; - std::vector<IoHash> m_CompactStateKeys; - std::vector<IoHash> m_PrunedKeys; + CasContainerStrategy& m_CasContainerStrategy; }; class CasContainerReferencePruner : public GcReferencePruner @@ -640,9 +680,7 @@ public: { } - virtual GcReferenceStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, - GcReferenceStoreStats& Stats, - const GetUnusedReferencesFunc& GetUnusedReferences) + virtual GcStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, GcStats& Stats, const GetUnusedReferencesFunc& GetUnusedReferences) { Stopwatch Timer; const auto _ = MakeGuard([&] { @@ -650,17 +688,17 @@ public: { return; } - ZEN_INFO("GCV2: compactcas [PRUNE] '{}': Count: {}, Pruned: {}, Compacted: {}, RemovedDisk: {}, RemovedMemory: {} in {}", + ZEN_INFO("GCV2: compactcas [PRUNE] '{}': Checked: {}, Deleted: {}, FreedMemory: {} in {}", m_CasContainerStrategy.m_RootDirectory / m_CasContainerStrategy.m_ContainerBaseName, - Stats.Count, - Stats.Pruned, - Stats.Compacted, - NiceBytes(Stats.RemovedDisk), - NiceBytes(Stats.RemovedMemory), + Stats.CheckedCount, + Stats.DeletedCount, + NiceBytes(Stats.FreedMemory), NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); std::vector<IoHash> UnusedCids = GetUnusedReferences(m_Cids); + Stats.CheckedCount = m_Cids.size(); + Stats.FoundCount = UnusedCids.size(); if (UnusedCids.empty()) { @@ -668,19 +706,11 @@ public: return nullptr; } - BlockStoreCompactState CompactState; - BlockStore::ReclaimSnapshotState BlockSnapshotState; - std::vector<IoHash> CompactStateKeys; - std::vector<CasDiskIndexEntry> ExpiredEntries; + std::vector<CasDiskIndexEntry> ExpiredEntries; ExpiredEntries.reserve(UnusedCids.size()); - tsl::robin_set<IoHash, IoHash::Hasher> UnusedKeys; { RwLock::ExclusiveLockScope __(m_CasContainerStrategy.m_LocationMapLock); - if (Ctx.Settings.CollectSmallObjects) - { - BlockSnapshotState = m_CasContainerStrategy.m_BlockStore.GetReclaimSnapshotState(); - } for (const IoHash& Cid : UnusedCids) { @@ -689,59 +719,28 @@ public: { continue; } - CasDiskIndexEntry ExpiredEntry = {.Key = Cid, - .Location = m_CasContainerStrategy.m_Locations[It->second], - .Flags = CasDiskIndexEntry::kTombstone}; - const BlockStoreDiskLocation& Location = m_CasContainerStrategy.m_Locations[It->second]; - BlockStoreLocation BlockLocation = Location.Get(m_CasContainerStrategy.m_PayloadAlignment); if (Ctx.Settings.CollectSmallObjects) { - UnusedKeys.insert(Cid); - uint32_t BlockIndex = BlockLocation.BlockIndex; - bool IsActiveWriteBlock = BlockSnapshotState.m_ActiveWriteBlocks.contains(BlockIndex); - if (!IsActiveWriteBlock) - { - CompactState.IncludeBlock(BlockIndex); - } + CasDiskIndexEntry ExpiredEntry = {.Key = Cid, + .Location = m_CasContainerStrategy.m_Locations[It->second], + .Flags = CasDiskIndexEntry::kTombstone}; ExpiredEntries.push_back(ExpiredEntry); } } - // Get all locations we need to keep for affected blocks - if (Ctx.Settings.CollectSmallObjects && !UnusedKeys.empty()) - { - for (const auto& Entry : m_CasContainerStrategy.m_LocationMap) - { - const IoHash& Key = Entry.first; - if (UnusedKeys.contains(Key)) - { - continue; - } - const BlockStoreDiskLocation& Location = m_CasContainerStrategy.m_Locations[Entry.second]; - BlockStoreLocation BlockLocation = Location.Get(m_CasContainerStrategy.m_PayloadAlignment); - if (CompactState.AddKeepLocation(BlockLocation)) - { - CompactStateKeys.push_back(Key); - } - } - } - if (Ctx.Settings.IsDeleteMode) { for (const CasDiskIndexEntry& Entry : ExpiredEntries) { m_CasContainerStrategy.m_LocationMap.erase(Entry.Key); + Stats.DeletedCount++; } m_CasContainerStrategy.m_CasLog.Append(ExpiredEntries); m_CasContainerStrategy.m_CasLog.Flush(); } } - Stats.Pruned += UnusedKeys.size(); - return new CasContainerStoreCompactor(m_CasContainerStrategy, - std::move(CompactState), - std::move(CompactStateKeys), - std::vector<IoHash>(UnusedKeys.begin(), UnusedKeys.end())); + return new CasContainerStoreCompactor(m_CasContainerStrategy); } private: @@ -756,7 +755,7 @@ CasContainerStrategy::GetGcName(GcCtx&) } GcReferencePruner* -CasContainerStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& Stats) +CasContainerStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats&) { Stopwatch Timer; const auto _ = MakeGuard([&] { @@ -764,13 +763,8 @@ CasContainerStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& S { return; } - ZEN_INFO("GCV2: compactcas [CREATE PRUNERS] '{}': Count: {}, Pruned: {}, Compacted: {}, RemovedDisk: {}, RemovedMemory: {} in {}", + ZEN_INFO("GCV2: compactcas [CREATE PRUNERS] '{}' in {}", m_RootDirectory / m_ContainerBaseName, - Stats.Count, - Stats.Pruned, - Stats.Compacted, - NiceBytes(Stats.RemovedDisk), - NiceBytes(Stats.RemovedMemory), NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); @@ -787,7 +781,6 @@ CasContainerStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& S CidsToCheck.push_back(It.first); } } - Stats.Count += CidsToCheck.size(); return new CasContainerReferencePruner(*this, std::move(CidsToCheck)); } diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp index a72619e4b..6e432bc9d 100644 --- a/src/zenstore/filecas.cpp +++ b/src/zenstore/filecas.cpp @@ -1331,35 +1331,31 @@ FileCasStrategy::ScanFolderForCasFiles(const std::filesystem::path& RootDir) return Entries; }; -class FileCasStoreCompactor : public GcReferenceStoreCompactor +class FileCasStoreCompactor : public GcStoreCompactor { public: FileCasStoreCompactor(FileCasStrategy& Owner, std::vector<IoHash>&& ReferencesToClean) : m_FileCasStrategy(Owner) , m_ReferencesToClean(std::move(ReferencesToClean)) { + m_ReferencesToClean.shrink_to_fit(); } - virtual void CompactReferenceStore(GcCtx& Ctx, GcReferenceStoreStats& Stats) + virtual void CompactStore(GcCtx& Ctx, GcCompactStoreStats& Stats, const std::function<uint64_t()>&) { - Stopwatch Timer; - const auto _ = MakeGuard([&] { - if (!Ctx.Settings.Verbose) - { - return; - } - ZEN_INFO("GCV2: filecas [COMPACT] '{}': Count: {}, Pruned: {}, Compacted: {}, RemovedDisk: {}, RemovedMemory: {} in {}", - m_FileCasStrategy.m_RootDirectory, - Stats.Count, - Stats.Pruned, - Stats.Compacted, - NiceBytes(Stats.RemovedDisk), - NiceBytes(Stats.RemovedMemory), - NiceTimeSpanMs(Timer.GetElapsedTimeMs())); - }); - std::vector<IoHash> ReferencedCleaned; - ReferencedCleaned.reserve(m_ReferencesToClean.size()); + Stopwatch Timer; + const auto _ = MakeGuard([&] { + if (!Ctx.Settings.Verbose) + { + return; + } + ZEN_INFO("GCV2: filecas [COMPACT] '{}': RemovedDisk: {} in {}", + m_FileCasStrategy.m_RootDirectory, + NiceBytes(Stats.RemovedDisk), + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); + size_t Skipped = 0; for (const IoHash& ChunkHash : m_ReferencesToClean) { FileCasStrategy::ShardingHelper Name(m_FileCasStrategy.m_RootDirectory.c_str(), ChunkHash); @@ -1372,7 +1368,9 @@ public: } if (Ctx.Settings.IsDeleteMode) { - ZEN_DEBUG("deleting CAS payload file '{}'", Name.ShardedPath.ToUtf8()); + ZEN_DEBUG("GCV2: filecas [COMPACT] '{}': Deleting CAS payload file '{}'", + m_FileCasStrategy.m_RootDirectory, + Name.ShardedPath.ToUtf8()); std::error_code Ec; uint64_t SizeOnDisk = std::filesystem::file_size(Name.ShardedPath.c_str(), Ec); if (Ec) @@ -1382,7 +1380,10 @@ public: bool Existed = std::filesystem::remove(Name.ShardedPath.c_str(), Ec); if (Ec) { - ZEN_WARN("failed deleting CAS payload file '{}'. Reason '{}'", Name.ShardedPath.ToUtf8(), Ec.message()); + ZEN_WARN("GCV2: filecas [COMPACT] '{}': Failed deleting CAS payload file '{}'. Reason '{}'", + m_FileCasStrategy.m_RootDirectory, + Name.ShardedPath.ToUtf8(), + Ec.message()); continue; } if (!Existed) @@ -1397,18 +1398,28 @@ public: bool Existed = std::filesystem::is_regular_file(Name.ShardedPath.c_str(), Ec); if (Ec) { - ZEN_WARN("failed checking CAS payload file '{}'. Reason '{}'", Name.ShardedPath.ToUtf8(), Ec.message()); + ZEN_WARN("GCV2: filecas [COMPACT] '{}': Failed checking CAS payload file '{}'. Reason '{}'", + m_FileCasStrategy.m_RootDirectory, + Name.ShardedPath.ToUtf8(), + Ec.message()); continue; } if (!Existed) { continue; } + Skipped++; } - ReferencedCleaned.push_back(ChunkHash); } } - Stats.Compacted += ReferencedCleaned.size(); + + if (Skipped > 0) + { + ZEN_DEBUG("GCV2: filecas [COMPACT] '{}': Skipped deleting of {} eligible files", m_FileCasStrategy.m_RootDirectory, Skipped); + } + + m_ReferencesToClean.clear(); + m_ReferencesToClean.shrink_to_fit(); } private: @@ -1421,9 +1432,7 @@ class FileCasReferencePruner : public GcReferencePruner public: FileCasReferencePruner(FileCasStrategy& Owner, std::vector<IoHash>&& Cids) : m_FileCasStrategy(Owner), m_Cids(std::move(Cids)) {} - virtual GcReferenceStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, - GcReferenceStoreStats& Stats, - const GetUnusedReferencesFunc& GetUnusedReferences) + virtual GcStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, GcStats& Stats, const GetUnusedReferencesFunc& GetUnusedReferences) { Stopwatch Timer; const auto _ = MakeGuard([&] { @@ -1431,23 +1440,29 @@ public: { return; } - ZEN_INFO("GCV2: filecas [PRUNE] '{}': Count: {}, Pruned: {}, Compacted: {}, RemovedDisk: {}, RemovedMemory: {} in {}", + ZEN_INFO("GCV2: filecas [PRUNE] '{}': Count: {}, Unreferenced: {}, FreedMemory: {} in {}", m_FileCasStrategy.m_RootDirectory, - Stats.Count, - Stats.Pruned, - Stats.Compacted, - NiceBytes(Stats.RemovedDisk), - NiceBytes(Stats.RemovedMemory), + Stats.CheckedCount, + Stats.FoundCount, + NiceBytes(Stats.FreedMemory), NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); std::vector<IoHash> UnusedCids = GetUnusedReferences(m_Cids); + Stats.CheckedCount = m_Cids.size(); if (UnusedCids.empty()) { // Nothing to collect return nullptr; } + Stats.FoundCount += UnusedCids.size(); + + if (!Ctx.Settings.IsDeleteMode) + { + return nullptr; + } + std::vector<IoHash> PrunedReferences; PrunedReferences.reserve(UnusedCids.size()); { @@ -1459,19 +1474,21 @@ public: { continue; } - if (Ctx.Settings.IsDeleteMode) - { - uint64_t FileSize = It->second.Size; - m_FileCasStrategy.m_Index.erase(It); - m_FileCasStrategy.m_CasLog.Append( - {.Key = ChunkHash, .Flags = FileCasStrategy::FileCasIndexEntry::kTombStone, .Size = FileSize}); - m_FileCasStrategy.m_TotalSize.fetch_sub(It->second.Size, std::memory_order_relaxed); - } + uint64_t FileSize = It->second.Size; + m_FileCasStrategy.m_Index.erase(It); + m_FileCasStrategy.m_CasLog.Append( + {.Key = ChunkHash, .Flags = FileCasStrategy::FileCasIndexEntry::kTombStone, .Size = FileSize}); + m_FileCasStrategy.m_TotalSize.fetch_sub(It->second.Size, std::memory_order_relaxed); PrunedReferences.push_back(ChunkHash); + Stats.DeletedCount++; } } - Stats.Pruned += PrunedReferences.size(); + if (PrunedReferences.empty()) + { + return nullptr; + } + return new FileCasStoreCompactor(m_FileCasStrategy, std::move(PrunedReferences)); } @@ -1487,7 +1504,7 @@ FileCasStrategy::GetGcName(GcCtx&) } GcReferencePruner* -FileCasStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& Stats) +FileCasStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats&) { Stopwatch Timer; const auto _ = MakeGuard([&] { @@ -1495,14 +1512,7 @@ FileCasStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& Stats) { return; } - ZEN_INFO("GCV2: filecas [CREATE PRUNERS] '{}': Count: {}, Pruned: {}, Compacted: {}, RemovedDisk: {}, RemovedMemory: {} in {}", - m_RootDirectory, - Stats.Count, - Stats.Pruned, - Stats.Compacted, - NiceBytes(Stats.RemovedDisk), - NiceBytes(Stats.RemovedMemory), - NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + ZEN_INFO("GCV2: filecas [CREATE PRUNERS] '{}' in {}", m_RootDirectory, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); std::vector<IoHash> CidsToCheck; { @@ -1517,7 +1527,6 @@ FileCasStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& Stats) CidsToCheck.push_back(It.first); } } - Stats.Count += CidsToCheck.size(); return new FileCasReferencePruner(*this, std::move(CidsToCheck)); } diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp index 7f9ca5236..b78b23350 100644 --- a/src/zenstore/gc.cpp +++ b/src/zenstore/gc.cpp @@ -173,166 +173,6 @@ SaveCompactBinaryObject(const fs::path& Path, const CbObject& Object) ////////////////////////////////////////////////////////////////////////// -void -WriteReferencerStats(CbObjectWriter& Writer, const GcReferencerStats& Stats, bool HumanReadable) -{ - if (Stats.Count == 0) - { - return; - } - Writer << "Count" << Stats.Count; - Writer << "Expired" << Stats.Expired; - Writer << "Deleted" << Stats.Deleted; - - if (HumanReadable) - { - Writer << "RemovedDisk" << NiceBytes(Stats.RemovedDisk); - Writer << "RemovedMemory" << NiceBytes(Stats.RemovedMemory); - } - else - { - Writer << "RemovedDiskBytes" << Stats.RemovedDisk; - Writer << "RemovedMemoryBytes" << Stats.RemovedMemory; - } - - if (HumanReadable) - { - Writer << "RemoveExpiredData" << NiceTimeSpanMs(Stats.RemoveExpiredDataMS.count()); - Writer << "CreateReferenceCheckers" << NiceTimeSpanMs(Stats.CreateReferenceCheckersMS.count()); - Writer << "LockState" << NiceTimeSpanMs(Stats.LockStateMS.count()); - Writer << "Elapsed" << NiceTimeSpanMs(Stats.ElapsedMS.count()); - } - else - { - Writer << "RemoveExpiredDataMS" << gsl::narrow<int64_t>(Stats.RemoveExpiredDataMS.count()); - Writer << "CreateReferenceCheckersMS" << gsl::narrow<int64_t>(Stats.CreateReferenceCheckersMS.count()); - Writer << "LockStateMS" << gsl::narrow<int64_t>(Stats.LockStateMS.count()); - Writer << "ElapsedMS" << gsl::narrow<int64_t>(Stats.ElapsedMS.count()); - } -}; - -void -WriteReferenceStoreStats(CbObjectWriter& Writer, const GcReferenceStoreStats& Stats, bool HumanReadable) -{ - if (Stats.Count == 0) - { - return; - } - Writer << "Count" << Stats.Count; - Writer << "Pruned" << Stats.Pruned; - Writer << "Compacted" << Stats.Compacted; - - if (HumanReadable) - { - Writer << "RemovedDisk" << NiceBytes(Stats.RemovedDisk); - Writer << "RemovedMemory" << NiceBytes(Stats.RemovedMemory); - } - else - { - Writer << "RemovedDiskBytes" << Stats.RemovedDisk; - Writer << "RemovedMemoryBytes" << Stats.RemovedMemory; - } - - if (HumanReadable) - { - Writer << "CreateReferencePruner" << NiceTimeSpanMs(Stats.CreateReferencePrunerMS.count()); - Writer << "RemoveUnreferencedData" << NiceTimeSpanMs(Stats.RemoveUnreferencedDataMS.count()); - Writer << "CompactReferenceStore" << NiceTimeSpanMs(Stats.CompactReferenceStoreMS.count()); - Writer << "Elapsed" << NiceTimeSpanMs(Stats.ElapsedMS.count()); - } - else - { - Writer << "CreateReferencePrunerMS" << gsl::narrow<int64_t>(Stats.CreateReferencePrunerMS.count()); - Writer << "RemoveUnreferencedDataMS" << gsl::narrow<int64_t>(Stats.RemoveUnreferencedDataMS.count()); - Writer << "CompactReferenceStoreMS" << gsl::narrow<int64_t>(Stats.CompactReferenceStoreMS.count()); - Writer << "ElapsedMS" << gsl::narrow<int64_t>(Stats.ElapsedMS.count()); - } -}; - -void -WriteGCResult(CbObjectWriter& Writer, const GcResult& Result, bool HumanReadable, bool IncludeDetails) -{ - if (HumanReadable) - { - Writer << "RemovedDisk" << NiceBytes(Result.RemovedDisk); - Writer << "RemovedMemory" << NiceBytes(Result.RemovedMemory); - Writer << "WriteBlock" << NiceTimeSpanMs(Result.WriteBlockMS.count()); - Writer << "Elapsed" << NiceTimeSpanMs(Result.ElapsedMS.count()); - } - else - { - Writer << "RemovedDiskBytes" << gsl::narrow<int64_t>(Result.RemovedDisk); - Writer << "RemovedMemoryBytes" << gsl::narrow<int64_t>(Result.RemovedMemory); - Writer << "WriteBlockMS" << gsl::narrow<int64_t>(Result.WriteBlockMS.count()); - Writer << "ElapsedMS" << gsl::narrow<int64_t>(Result.ElapsedMS.count()); - } - - if (!IncludeDetails) - { - return; - } - - if (HumanReadable) - { - Writer << "RemoveExpiredData" << NiceTimeSpanMs(Result.RemoveExpiredDataMS.count()); - Writer << "CreateReferenceCheckers" << NiceTimeSpanMs(Result.CreateReferenceCheckersMS.count()); - Writer << "LockState" << NiceTimeSpanMs(Result.LockStateMS.count()); - - Writer << "CreateReferencePruner" << NiceTimeSpanMs(Result.CreateReferencePrunerMS.count()); - Writer << "RemoveUnreferencedData" << NiceTimeSpanMs(Result.RemoveUnreferencedDataMS.count()); - Writer << "CompactReferenceStore" << NiceTimeSpanMs(Result.CompactReferenceStoreMS.count()); - } - else - { - Writer << "RemoveExpiredDataMS" << gsl::narrow<int64_t>(Result.RemoveExpiredDataMS.count()); - Writer << "CreateReferenceCheckersMS" << gsl::narrow<int64_t>(Result.CreateReferenceCheckersMS.count()); - Writer << "LockStateMS" << gsl::narrow<int64_t>(Result.LockStateMS.count()); - - Writer << "CreateReferencePrunerMS" << gsl::narrow<int64_t>(Result.CreateReferencePrunerMS.count()); - Writer << "RemoveUnreferencedDataMS" << gsl::narrow<int64_t>(Result.RemoveUnreferencedDataMS.count()); - Writer << "CompactReferenceStoreMS" << gsl::narrow<int64_t>(Result.CompactReferenceStoreMS.count()); - } - - Writer.BeginObject("ReferencerStats"); - { - WriteReferencerStats(Writer, Result.ReferencerStat, HumanReadable); - } - Writer.EndObject(); - - Writer.BeginObject("ReferenceStoreStats"); - { - WriteReferenceStoreStats(Writer, Result.ReferenceStoreStat, HumanReadable); - } - Writer.EndObject(); - - if (!Result.ReferencerStats.empty()) - { - Writer.BeginArray("Referencers"); - { - for (const std::pair<std::string, GcReferencerStats>& It : Result.ReferencerStats) - { - Writer.BeginObject(); - Writer << "Name" << It.first; - WriteReferencerStats(Writer, It.second, HumanReadable); - Writer.EndObject(); - } - } - Writer.EndArray(); - } - if (!Result.ReferenceStoreStats.empty()) - { - Writer.BeginArray("ReferenceStores"); - for (const std::pair<std::string, GcReferenceStoreStats>& It : Result.ReferenceStoreStats) - { - Writer.BeginObject(); - Writer << "Name" << It.first; - WriteReferenceStoreStats(Writer, It.second, HumanReadable); - Writer.EndObject(); - } - Writer.EndArray(); - } -}; - struct GcContext::GcState { using CacheKeyContexts = std::unordered_map<std::string, std::vector<IoHash>>; @@ -490,44 +330,235 @@ GcManager::~GcManager() //////// Begin GC V2 void -GcResult::Sum() +WriteGcStats(CbObjectWriter& Writer, const GcStats& Stats, bool HumanReadable) { - for (std::pair<std::string, GcReferencerStats>& Referencer : ReferencerStats) + Writer << "Checked" << Stats.CheckedCount; + Writer << "Found" << Stats.FoundCount; + Writer << "Deleted" << Stats.DeletedCount; + if (HumanReadable) { - GcReferencerStats& SubStat = Referencer.second; - ReferencerStat.Count += SubStat.Count; - ReferencerStat.Expired += SubStat.Expired; - ReferencerStat.Deleted += SubStat.Deleted; - ReferencerStat.RemovedDisk += SubStat.RemovedDisk; - ReferencerStat.RemovedMemory += SubStat.RemovedMemory; - SubStat.ElapsedMS = SubStat.RemoveExpiredDataMS + SubStat.CreateReferenceCheckersMS + SubStat.LockStateMS; + Writer << "FreedMemory" << NiceBytes(Stats.FreedMemory); + } + else + { + Writer << "FreedMemoryBytes" << Stats.FreedMemory; + } + Writer << "Elapsed" << ToTimeSpan(Stats.ElapsedMS); +} - ReferencerStat.RemoveExpiredDataMS += SubStat.RemoveExpiredDataMS; - ReferencerStat.CreateReferenceCheckersMS += SubStat.CreateReferenceCheckersMS; - ReferencerStat.LockStateMS += SubStat.LockStateMS; - ReferencerStat.ElapsedMS += SubStat.ElapsedMS; +void +WriteCompactStoreStats(CbObjectWriter& Writer, const GcCompactStoreStats& Stats, bool HumanReadable) +{ + if (HumanReadable) + { + Writer << "RemovedDisk" << NiceBytes(Stats.RemovedDisk); + } + else + { + Writer << "RemovedDiskBytes" << Stats.RemovedDisk; + } + Writer << "Elapsed" << ToTimeSpan(Stats.ElapsedMS); +} - RemovedDisk += SubStat.RemovedDisk; - RemovedMemory += SubStat.RemovedMemory; +void +WriteReferencerStats(CbObjectWriter& Writer, const GcReferencerStats& Stats, bool HumanReadable) +{ + if (Stats.RemoveExpiredDataStats.CheckedCount == 0) + { + return; } - for (std::pair<std::string, GcReferenceStoreStats>& ReferenceStore : ReferenceStoreStats) + Writer.BeginObject("RemoveExpired"); { - GcReferenceStoreStats& SubStat = ReferenceStore.second; - ReferenceStoreStat.Count += SubStat.Count; - ReferenceStoreStat.Pruned += SubStat.Pruned; - ReferenceStoreStat.Compacted += SubStat.Compacted; - ReferenceStoreStat.RemovedDisk += SubStat.RemovedDisk; - ReferenceStoreStat.RemovedMemory += SubStat.RemovedMemory; - SubStat.ElapsedMS = SubStat.CreateReferencePrunerMS + SubStat.RemoveUnreferencedDataMS + SubStat.CompactReferenceStoreMS; + WriteGcStats(Writer, Stats.RemoveExpiredDataStats, HumanReadable); + } + Writer.EndObject(); + + Writer.BeginObject("Compact"); + { + WriteCompactStoreStats(Writer, Stats.CompactStoreStats, HumanReadable); + } + Writer.EndObject(); + + Writer << "CreateReferenceCheckers" << ToTimeSpan(Stats.CreateReferenceCheckersMS); + Writer << "LockState" << ToTimeSpan(Stats.LockStateMS); + Writer << "Elapsed" << ToTimeSpan(Stats.ElapsedMS); +}; + +void +WriteReferenceStoreStats(CbObjectWriter& Writer, const GcReferenceStoreStats& Stats, bool HumanReadable) +{ + if (Stats.RemoveUnreferencedDataStats.CheckedCount == 0) + { + return; + } + Writer.BeginObject("RemoveUnreferenced"); + { + WriteGcStats(Writer, Stats.RemoveUnreferencedDataStats, HumanReadable); + } + Writer.EndObject(); + + Writer.BeginObject("Compact"); + { + WriteCompactStoreStats(Writer, Stats.CompactStoreStats, HumanReadable); + } + Writer.EndObject(); + + Writer << "CreateReferencePruners" << ToTimeSpan(Stats.CreateReferencePrunersMS); + Writer << "Elapsed" << ToTimeSpan(Stats.ElapsedMS); +}; + +void +WriteGCResult(CbObjectWriter& Writer, const GcResult& Result, bool HumanReadable, bool IncludeDetails) +{ + if (!IncludeDetails) + { + if (HumanReadable) + { + Writer << "RemovedDisk" << NiceBytes(Result.CompactStoresStatSum.RemovedDisk); + Writer << "FreedMemory" << NiceBytes(Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory); + } + else + { + Writer << "RemovedDiskBytes" << gsl::narrow<int64_t>(Result.CompactStoresStatSum.RemovedDisk); + Writer << "RemovedMemoryBytes" << gsl::narrow<int64_t>(Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory); + } + Writer << "WriteBlock" << ToTimeSpan(Result.WriteBlockMS); + Writer << "Elapsed" << ToTimeSpan(Result.ElapsedMS); + return; + } + + Writer.BeginObject("Referencer"); + { + WriteReferencerStats(Writer, Result.ReferencerStatSum, HumanReadable); + } + Writer.EndObject(); + + Writer.BeginObject("ReferenceStore"); + { + WriteReferenceStoreStats(Writer, Result.ReferenceStoreStatSum, HumanReadable); + } + Writer.EndObject(); + + Writer.BeginObject("Compact"); + { + WriteCompactStoreStats(Writer, Result.CompactStoresStatSum, HumanReadable); + } + Writer.EndObject(); + + Writer << "RemoveExpiredData" << ToTimeSpan(Result.RemoveExpiredDataMS); + Writer << "CreateReferenceCheckers" << ToTimeSpan(Result.CreateReferenceCheckersMS); + Writer << "LockState" << ToTimeSpan(Result.LockStateMS); + + Writer << "CreateReferencePruners" << ToTimeSpan(Result.CreateReferencePrunersMS); + Writer << "RemoveUnreferencedData" << ToTimeSpan(Result.RemoveUnreferencedDataMS); + Writer << "CompactStores" << ToTimeSpan(Result.CompactStoresMS); + Writer << "WriteBlock" << ToTimeSpan(Result.WriteBlockMS); + Writer << "Elapsed" << ToTimeSpan(Result.ElapsedMS); + + if (!Result.ReferencerStats.empty()) + { + Writer.BeginArray("Referencers"); + { + for (const std::pair<std::string, GcReferencerStats>& It : Result.ReferencerStats) + { + Writer.BeginObject(); + Writer << "Name" << It.first; + WriteReferencerStats(Writer, It.second, HumanReadable); + Writer.EndObject(); + } + } + Writer.EndArray(); + } + if (!Result.ReferenceStoreStats.empty()) + { + Writer.BeginArray("ReferenceStores"); + for (const std::pair<std::string, GcReferenceStoreStats>& It : Result.ReferenceStoreStats) + { + Writer.BeginObject(); + Writer << "Name" << It.first; + WriteReferenceStoreStats(Writer, It.second, HumanReadable); + Writer.EndObject(); + } + Writer.EndArray(); + } +}; + +void +Add(GcCompactStoreStats& Sum, const GcCompactStoreStats& Sub) +{ + Sum.RemovedDisk += Sub.RemovedDisk; + + Sum.ElapsedMS += Sub.ElapsedMS; +} + +void +Add(GcStats& Sum, const GcStats& Sub) +{ + Sum.CheckedCount += Sub.CheckedCount; + Sum.FoundCount += Sub.FoundCount; + Sum.DeletedCount += Sub.DeletedCount; + Sum.FreedMemory += Sub.FreedMemory; + + Sum.ElapsedMS += Sub.ElapsedMS; +} + +void +Sum(GcReferencerStats& Stat) +{ + Stat.ElapsedMS = + Stat.RemoveExpiredDataStats.ElapsedMS + Stat.CompactStoreStats.ElapsedMS + Stat.CreateReferenceCheckersMS + Stat.LockStateMS; +} + +void +Add(GcReferencerStats& Sum, const GcReferencerStats& Sub) +{ + Add(Sum.RemoveExpiredDataStats, Sub.RemoveExpiredDataStats); + Add(Sum.CompactStoreStats, Sub.CompactStoreStats); + + Sum.CreateReferenceCheckersMS += Sub.CreateReferenceCheckersMS; + Sum.LockStateMS += Sub.LockStateMS; + + Sum.ElapsedMS += Sub.ElapsedMS; +} - ReferenceStoreStat.CreateReferencePrunerMS += SubStat.CreateReferencePrunerMS; - ReferenceStoreStat.RemoveUnreferencedDataMS += SubStat.RemoveUnreferencedDataMS; - ReferenceStoreStat.CompactReferenceStoreMS += SubStat.CompactReferenceStoreMS; - ReferenceStoreStat.ElapsedMS += SubStat.ElapsedMS; +void +Sum(GcReferenceStoreStats& Stat) +{ + Stat.ElapsedMS = Stat.RemoveUnreferencedDataStats.ElapsedMS + Stat.CompactStoreStats.ElapsedMS + Stat.CreateReferencePrunersMS; +} - RemovedDisk += SubStat.RemovedDisk; - RemovedMemory += SubStat.RemovedMemory; +void +Add(GcReferenceStoreStats& Sum, const GcReferenceStoreStats& Sub) +{ + Add(Sum.RemoveUnreferencedDataStats, Sub.RemoveUnreferencedDataStats); + Add(Sum.CompactStoreStats, Sub.CompactStoreStats); + + Sum.CreateReferencePrunersMS += Sub.CreateReferencePrunersMS; + + Sum.ElapsedMS += Sub.ElapsedMS; +} + +void +Sum(GcResult& Stat) +{ + for (std::pair<std::string, GcReferencerStats>& Referencer : Stat.ReferencerStats) + { + GcReferencerStats& SubStat = Referencer.second; + Sum(SubStat); + Add(Stat.ReferencerStatSum, SubStat); + } + for (std::pair<std::string, GcReferenceStoreStats>& ReferenceStore : Stat.ReferenceStoreStats) + { + GcReferenceStoreStats& SubStat = ReferenceStore.second; + Sum(SubStat); + Add(Stat.ReferenceStoreStatSum, SubStat); } + + Sum(Stat.ReferencerStatSum); + Sum(Stat.ReferenceStoreStatSum); + + Add(Stat.CompactStoresStatSum, Stat.ReferencerStatSum.CompactStoreStats); + Add(Stat.CompactStoresStatSum, Stat.ReferenceStoreStatSum.CompactStoreStats); } void @@ -581,7 +612,9 @@ GcManager::CollectGarbage(const GcSettings& Settings) Result.ReferencerStats.resize(m_GcReferencers.size()); - WorkerThreadPool ThreadPool(WorkerThreadPoolCount, "GCV2"); + std::unordered_map<std::unique_ptr<GcStoreCompactor>, GcCompactStoreStats*> StoreCompactors; + RwLock StoreCompactorsLock; + WorkerThreadPool ThreadPool(WorkerThreadPoolCount, "GCV2"); ZEN_INFO("GCV2: Removing expired data from {} referencers", m_GcReferencers.size()); if (!m_GcReferencers.empty()) @@ -594,52 +627,15 @@ GcManager::CollectGarbage(const GcSettings& Settings) GcReferencer* Owner = m_GcReferencers[Index]; std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index]; WorkLeft.AddCount(1); - ThreadPool.ScheduleWork([&Ctx, Owner, &Stats, &WorkLeft]() { + ThreadPool.ScheduleWork([&Ctx, &WorkLeft, Owner, &Stats, &StoreCompactorsLock, &StoreCompactors]() { auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); Stats.first = Owner->GetGcName(Ctx); - SCOPED_TIMER(Stats.second.RemoveExpiredDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - Owner->RemoveExpiredData(Ctx, Stats.second); - }); - } - WorkLeft.CountDown(); - WorkLeft.Wait(); - } - - if (Ctx.Settings.SkipCidDelete) - { - Result.Sum(); - return Result; - } - - Result.ReferenceStoreStats.resize(m_GcReferenceStores.size()); - - ZEN_INFO("GCV2: Creating reference pruners from {} reference stores", m_GcReferenceStores.size()); - std::unordered_map<size_t, std::unique_ptr<GcReferencePruner>> ReferencePruners; - if (!m_GcReferenceStores.empty()) - { - ReferencePruners.reserve(m_GcReferenceStores.size()); - Latch WorkLeft(1); - RwLock ReferencePrunersLock; - // CreateReferencePruner is usually not very heavy but big data sets change that - SCOPED_TIMER(Result.CreateReferencePrunerMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - for (size_t Index = 0; Index < m_GcReferenceStores.size(); Index++) - { - GcReferenceStore* ReferenceStore = m_GcReferenceStores[Index]; - std::pair<std::string, GcReferenceStoreStats>& Stats = Result.ReferenceStoreStats[Index]; - WorkLeft.AddCount(1); - ThreadPool.ScheduleWork([&Ctx, ReferenceStore, &Stats, Index, &WorkLeft, &ReferencePrunersLock, &ReferencePruners]() { - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); - Stats.first = ReferenceStore->GetGcName(Ctx); - std::unique_ptr<GcReferencePruner> ReferencePruner; - { - SCOPED_TIMER(Stats.second.CreateReferencePrunerMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - // The ReferenceStore will pick a list of CId entries to check, returning a collector - ReferencePruner = std::unique_ptr<GcReferencePruner>(ReferenceStore->CreateReferencePruner(Ctx, Stats.second)); - } - if (ReferencePruner) + SCOPED_TIMER(Stats.second.RemoveExpiredDataStats.ElapsedMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + std::unique_ptr<GcStoreCompactor> StoreCompactor(Owner->RemoveExpiredData(Ctx, Stats.second.RemoveExpiredDataStats)); + if (StoreCompactor) { - RwLock::ExclusiveLockScope __(ReferencePrunersLock); - ReferencePruners.insert_or_assign(Index, std::move(ReferencePruner)); + RwLock::ExclusiveLockScope __(StoreCompactorsLock); + StoreCompactors.insert_or_assign(std::move(StoreCompactor), &Stats.second.CompactStoreStats); } }); } @@ -647,180 +643,217 @@ GcManager::CollectGarbage(const GcSettings& Settings) WorkLeft.Wait(); } - ZEN_INFO("GCV2: Creating reference checkers from {} referencers", m_GcReferencers.size()); - std::unordered_map<std::unique_ptr<GcReferenceChecker>, size_t> ReferenceCheckers; - if (!m_GcReferencers.empty()) + if (!Ctx.Settings.SkipCidDelete) { - ReferenceCheckers.reserve(m_GcReferencers.size()); - Latch WorkLeft(1); - RwLock ReferenceCheckersLock; - SCOPED_TIMER(Result.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - // Lock all reference owners from changing the reference data and get access to check for referenced data - for (size_t Index = 0; Index < m_GcReferencers.size(); Index++) + Result.ReferenceStoreStats.resize(m_GcReferenceStores.size()); + + ZEN_INFO("GCV2: Creating reference pruners from {} reference stores", m_GcReferenceStores.size()); + std::unordered_map<size_t, std::unique_ptr<GcReferencePruner>> ReferencePruners; + if (!m_GcReferenceStores.empty()) { - GcReferencer* Referencer = m_GcReferencers[Index]; - std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index]; - WorkLeft.AddCount(1); - ThreadPool.ScheduleWork([&Ctx, &WorkLeft, Referencer, Index, &Stats, &ReferenceCheckersLock, &ReferenceCheckers]() { - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); - // The Referencer will create a reference checker that guarrantees that the references do not change as long as it lives - std::vector<GcReferenceChecker*> Checkers; - { - SCOPED_TIMER(Stats.second.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - Checkers = Referencer->CreateReferenceCheckers(Ctx); - } - try - { - if (!Checkers.empty()) + ReferencePruners.reserve(m_GcReferenceStores.size()); + Latch WorkLeft(1); + RwLock ReferencePrunersLock; + // CreateReferencePruner is usually not very heavy but big data sets change that + SCOPED_TIMER(Result.CreateReferencePrunersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + for (size_t Index = 0; Index < m_GcReferenceStores.size(); Index++) + { + GcReferenceStore* ReferenceStore = m_GcReferenceStores[Index]; + std::pair<std::string, GcReferenceStoreStats>& Stats = Result.ReferenceStoreStats[Index]; + WorkLeft.AddCount(1); + ThreadPool.ScheduleWork([&Ctx, ReferenceStore, &Stats, Index, &WorkLeft, &ReferencePrunersLock, &ReferencePruners]() { + auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); + Stats.first = ReferenceStore->GetGcName(Ctx); + std::unique_ptr<GcReferencePruner> ReferencePruner; { - RwLock::ExclusiveLockScope __(ReferenceCheckersLock); - for (auto& Checker : Checkers) - { - ReferenceCheckers.insert_or_assign(std::unique_ptr<GcReferenceChecker>(Checker), Index); - Checker = nullptr; - } + SCOPED_TIMER(Stats.second.CreateReferencePrunersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + // The ReferenceStore will pick a list of CId entries to check, returning a collector + ReferencePruner = std::unique_ptr<GcReferencePruner>(ReferenceStore->CreateReferencePruner(Ctx, Stats.second)); } - } - catch (std::exception&) - { - while (!Checkers.empty()) + if (ReferencePruner) { - delete Checkers.back(); - Checkers.pop_back(); + RwLock::ExclusiveLockScope __(ReferencePrunersLock); + ReferencePruners.insert_or_assign(Index, std::move(ReferencePruner)); } - throw; - } - }); - } - WorkLeft.CountDown(); - WorkLeft.Wait(); - } - - std::unordered_map<std::unique_ptr<GcReferenceStoreCompactor>, size_t> ReferenceStoreCompactors; - ReferenceStoreCompactors.reserve(ReferencePruners.size()); - - ZEN_INFO("GCV2: Locking state for {} reference checkers", ReferenceCheckers.size()); - { - SCOPED_TIMER(uint64_t ElapsedMS = Timer.GetElapsedTimeMs(); Result.WriteBlockMS = std::chrono::milliseconds(ElapsedMS); - ZEN_INFO("GCV2: Writes blocked for {}", NiceTimeSpanMs(ElapsedMS))); - if (!ReferenceCheckers.empty()) - { - // Locking all references checkers so we have a steady state of which references are used - // From this point we have blocked all writes to all References (DiskBucket/ProjectStore) until - // we delete the ReferenceCheckers - Latch WorkLeft(1); - - SCOPED_TIMER(Result.LockStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - for (auto& It : ReferenceCheckers) - { - GcReferenceChecker* Checker = It.first.get(); - size_t Index = It.second; - std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index]; - WorkLeft.AddCount(1); - ThreadPool.ScheduleWork([&Ctx, Checker, Index, &Stats, &WorkLeft]() { - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); - SCOPED_TIMER(Stats.second.LockStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - Checker->LockState(Ctx); }); } WorkLeft.CountDown(); WorkLeft.Wait(); } - ZEN_INFO("GCV2: Removing unreferenced data for {} reference pruners", ReferencePruners.size()); if (!ReferencePruners.empty()) { - const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::vector<IoHash> { - HashSet UnusedCids(References.begin(), References.end()); - for (const auto& It : ReferenceCheckers) + ZEN_INFO("GCV2: Creating reference checkers from {} referencers", m_GcReferencers.size()); + std::unordered_map<std::unique_ptr<GcReferenceChecker>, size_t> ReferenceCheckers; + if (!m_GcReferencers.empty()) + { + ReferenceCheckers.reserve(m_GcReferencers.size()); + Latch WorkLeft(1); + RwLock ReferenceCheckersLock; + SCOPED_TIMER(Result.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + // Lock all reference owners from changing the reference data and get access to check for referenced data + for (size_t Index = 0; Index < m_GcReferencers.size(); Index++) { - GcReferenceChecker* ReferenceChecker = It.first.get(); - ReferenceChecker->RemoveUsedReferencesFromSet(Ctx, UnusedCids); - if (UnusedCids.empty()) + GcReferencer* Referencer = m_GcReferencers[Index]; + std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index]; + WorkLeft.AddCount(1); + ThreadPool.ScheduleWork([&Ctx, &WorkLeft, Referencer, Index, &Stats, &ReferenceCheckersLock, &ReferenceCheckers]() { + auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); + // The Referencer will create a reference checker that guarrantees that the references do not change as long as + // it lives + std::vector<GcReferenceChecker*> Checkers; + { + SCOPED_TIMER(Stats.second.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + Checkers = Referencer->CreateReferenceCheckers(Ctx); + } + try + { + if (!Checkers.empty()) + { + RwLock::ExclusiveLockScope __(ReferenceCheckersLock); + for (auto& Checker : Checkers) + { + ReferenceCheckers.insert_or_assign(std::unique_ptr<GcReferenceChecker>(Checker), Index); + Checker = nullptr; + } + } + } + catch (std::exception&) + { + while (!Checkers.empty()) + { + delete Checkers.back(); + Checkers.pop_back(); + } + throw; + } + }); + } + WorkLeft.CountDown(); + WorkLeft.Wait(); + } + + ZEN_INFO("GCV2: Locking state for {} reference checkers", ReferenceCheckers.size()); + { + SCOPED_TIMER(uint64_t ElapsedMS = Timer.GetElapsedTimeMs(); Result.WriteBlockMS = std::chrono::milliseconds(ElapsedMS); + ZEN_INFO("GCV2: Writes blocked for {}", NiceTimeSpanMs(ElapsedMS))); + if (!ReferenceCheckers.empty()) + { + // Locking all references checkers so we have a steady state of which references are used + // From this point we have blocked all writes to all References (DiskBucket/ProjectStore) until + // we delete the ReferenceCheckers + Latch WorkLeft(1); + + SCOPED_TIMER(Result.LockStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + for (auto& It : ReferenceCheckers) { - return {}; + GcReferenceChecker* Checker = It.first.get(); + size_t Index = It.second; + std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index]; + WorkLeft.AddCount(1); + ThreadPool.ScheduleWork([&Ctx, Checker, Index, &Stats, &WorkLeft]() { + auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); + SCOPED_TIMER(Stats.second.LockStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + Checker->LockState(Ctx); + }); } + WorkLeft.CountDown(); + WorkLeft.Wait(); } - return std::vector<IoHash>(UnusedCids.begin(), UnusedCids.end()); - }; - // checking all Cids agains references in cache - // Ask stores to remove data that the ReferenceCheckers says are not referenced - this should be a lightweight operation - // that only updates in-memory index, actual disk changes should be done by the ReferenceStoreCompactors + ZEN_INFO("GCV2: Removing unreferenced data for {} reference pruners", ReferencePruners.size()); + { + const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::vector<IoHash> { + HashSet UnusedCids(References.begin(), References.end()); + for (const auto& It : ReferenceCheckers) + { + GcReferenceChecker* ReferenceChecker = It.first.get(); + ReferenceChecker->RemoveUsedReferencesFromSet(Ctx, UnusedCids); + if (UnusedCids.empty()) + { + return {}; + } + } + return std::vector<IoHash>(UnusedCids.begin(), UnusedCids.end()); + }; - Latch WorkLeft(1); - RwLock ReferenceStoreCompactorsLock; + // checking all Cids agains references in cache + // Ask stores to remove data that the ReferenceCheckers says are not referenced - this should be a lightweight + // operation that only updates in-memory index, actual disk changes should be done by the ReferenceStoreCompactors - SCOPED_TIMER(Result.RemoveUnreferencedDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - for (auto& It : ReferencePruners) - { - GcReferencePruner* Pruner = It.second.get(); - size_t Index = It.first; - GcReferenceStoreStats& Stats = Result.ReferenceStoreStats[Index].second; - WorkLeft.AddCount(1); - ThreadPool.ScheduleWork([&Ctx, - Pruner, - &Stats, - &WorkLeft, - Index, - &GetUnusedReferences, - &ReferenceStoreCompactorsLock, - &ReferenceStoreCompactors]() { - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); - // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are referenced or not. - std::unique_ptr<GcReferenceStoreCompactor> ReferenceCompactor; - { - SCOPED_TIMER(Stats.RemoveUnreferencedDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - ReferenceCompactor = - std::unique_ptr<GcReferenceStoreCompactor>(Pruner->RemoveUnreferencedData(Ctx, Stats, GetUnusedReferences)); - } - if (ReferenceCompactor) + Latch WorkLeft(1); + + SCOPED_TIMER(Result.RemoveUnreferencedDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + for (auto& It : ReferencePruners) { - RwLock::ExclusiveLockScope __(ReferenceStoreCompactorsLock); - ReferenceStoreCompactors.insert_or_assign(std::move(ReferenceCompactor), Index); + GcReferencePruner* Pruner = It.second.get(); + size_t Index = It.first; + GcReferenceStoreStats& Stats = Result.ReferenceStoreStats[Index].second; + WorkLeft.AddCount(1); + ThreadPool.ScheduleWork( + [&Ctx, Pruner, &Stats, &WorkLeft, &GetUnusedReferences, &StoreCompactorsLock, &StoreCompactors]() { + auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); + // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are referenced + // or not. + std::unique_ptr<GcStoreCompactor> StoreCompactor; + { + SCOPED_TIMER(Stats.RemoveUnreferencedDataStats.ElapsedMS = + std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + StoreCompactor = std::unique_ptr<GcStoreCompactor>( + Pruner->RemoveUnreferencedData(Ctx, Stats.RemoveUnreferencedDataStats, GetUnusedReferences)); + } + if (StoreCompactor) + { + RwLock::ExclusiveLockScope __(StoreCompactorsLock); + StoreCompactors.insert_or_assign(std::move(StoreCompactor), &Stats.CompactStoreStats); + } + }); } - }); + WorkLeft.CountDown(); + WorkLeft.Wait(); + } + // Let the GcReferencers add new data, we will only change on-disk data at this point, adding new data is allowed + ReferenceCheckers.clear(); + ReferencePruners.clear(); } - WorkLeft.CountDown(); - WorkLeft.Wait(); } - // Let the GcReferencers add new data, we will only change on-disk data at this point, adding new data is allowed - ReferenceCheckers.clear(); } - // Let go of the pruners - ReferencePruners.clear(); - - ZEN_INFO("GCV2: Compacting reference stores for {} reference store compactors", ReferenceStoreCompactors.size()); - if (!ReferenceStoreCompactors.empty()) + ZEN_INFO("GCV2: Compacting reference stores for {} store compactors", StoreCompactors.size()); + if (!StoreCompactors.empty()) { - Latch WorkLeft(1); - + auto ClaimDiskReserve = [&]() -> uint64_t { + if (!std::filesystem::is_regular_file(Settings.DiskReservePath)) + { + return 0; + } + uint64_t ReclaimedSize = std::filesystem::file_size(Settings.DiskReservePath); + if (std::filesystem::remove(Settings.DiskReservePath)) + { + return ReclaimedSize; + } + return 0; + }; // Remove the stuff we deemed unreferenced from disk - may be heavy operation - SCOPED_TIMER(Result.CompactReferenceStoreMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - for (auto& It : ReferenceStoreCompactors) + // Don't do in parallel, we don't want to steal CPU/Disk from regular operation + SCOPED_TIMER(Result.CompactStoresMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + for (auto& It : StoreCompactors) { - GcReferenceStoreCompactor* Compactor = It.first.get(); - size_t Index = It.second; - GcReferenceStoreStats& Stats = Result.ReferenceStoreStats[Index].second; - WorkLeft.AddCount(1); - ThreadPool.ScheduleWork([&Ctx, Compactor, &Stats, &WorkLeft]() { - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); + GcStoreCompactor* Compactor = It.first.get(); + GcCompactStoreStats& Stats = *It.second; + { // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are referenced or not. - SCOPED_TIMER(Stats.CompactReferenceStoreMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - Compactor->CompactReferenceStore(Ctx, Stats); - }); + SCOPED_TIMER(Stats.ElapsedMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + Compactor->CompactStore(Ctx, Stats, ClaimDiskReserve); + } } - WorkLeft.CountDown(); - WorkLeft.Wait(); + StoreCompactors.clear(); } - ReferenceStoreCompactors.clear(); - ZEN_INFO("GCV2: Completed in {}", NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs())); } - Result.Sum(); + Sum(Result); return Result; } @@ -1227,22 +1260,17 @@ GcScheduler::AppendGCLog(GcClock::TimePoint StartTime, const GcSettings& Setting std::string Id = fmt::format("{}", gsl::narrow<int64_t>(StartTime.time_since_epoch().count())); Writer.BeginObject(Id); { - Writer << "StartTimeSec"sv - << gsl::narrow<int64_t>(std::chrono::duration_cast<std::chrono::seconds>(StartTime.time_since_epoch()).count()); + Writer << "StartTime"sv << ToDateTime(StartTime); Writer.BeginObject("Settings"sv); { - Writer << "CacheExpireTimeSec"sv - << gsl::narrow<int64_t>( - std::chrono::duration_cast<std::chrono::seconds>(Settings.CacheExpireTime.time_since_epoch()).count()); - Writer << "ProjectStoreExpireTimeSec"sv - << gsl::narrow<int64_t>( - std::chrono::duration_cast<std::chrono::seconds>(Settings.ProjectStoreExpireTime.time_since_epoch()) - .count()); + Writer << "CacheExpireTime"sv << ToDateTime(Settings.CacheExpireTime); + Writer << "ProjectStoreExpireTime"sv << ToDateTime(Settings.ProjectStoreExpireTime); Writer << "CollectSmallObjects"sv << Settings.CollectSmallObjects; Writer << "IsDeleteMode"sv << Settings.IsDeleteMode; Writer << "SkipCidDelete"sv << Settings.SkipCidDelete; Writer << "Verbose"sv << Settings.Verbose; Writer << "SingleThread"sv << Settings.SingleThread; + Writer << "CompactBlockUsageThresholdPercent"sv << Settings.CompactBlockUsageThresholdPercent; } Writer.EndObject(); @@ -1817,26 +1845,27 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, .ProjectStoreExpireTime = ProjectStoreExpireTime, .CollectSmallObjects = CollectSmallObjects, .IsDeleteMode = Delete, - .SkipCidDelete = SkipCid}; + .SkipCidDelete = SkipCid, + .DiskReservePath = m_Config.RootDirectory / "reserve.gc"}; GcClock::TimePoint GcStartTime = GcClock::Now(); GcResult Result = m_GcManager.CollectGarbage(Settings); ZEN_INFO( - "GCV2: Removed {} items out of {}, deleted {} out of {}. Pruned {} Cid entries out of {}, compacted {} Cid entries " - "out of {}, " - "freed " - "{} on disk and {} of memory in {}. CacheExpireTime: {}, ProjectStoreExpireTime: {}, CollectSmallObjects: {}, " + "GCV2: Found {} expired items out of {}, deleted {}. " + "Found {} unreferenced Cid entries out of {}, deleted {}. " + "Freed {} on disk and {} of memory in {}. " + "CacheExpireTime: {}, ProjectStoreExpireTime: {}, CollectSmallObjects: {}, " "IsDeleteMode: {}, SkipCidDelete: {}", - Result.ReferencerStat.Expired, - Result.ReferencerStat.Count, - Result.ReferencerStat.Deleted, - Result.ReferencerStat.Expired, - Result.ReferenceStoreStat.Pruned, - Result.ReferenceStoreStat.Count, - Result.ReferenceStoreStat.Compacted, - Result.ReferenceStoreStat.Pruned, - NiceBytes(Result.RemovedDisk), - NiceBytes(Result.RemovedMemory), + Result.ReferencerStatSum.RemoveExpiredDataStats.FoundCount, + Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount, + Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount, + + Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.FoundCount, + Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount, + Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount, + + NiceBytes(Result.CompactStoresStatSum.RemovedDisk), + NiceBytes(Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory), NiceTimeSpanMs(Result.ElapsedMS.count()), Settings.CacheExpireTime, Settings.ProjectStoreExpireTime, @@ -1854,8 +1883,8 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, { m_LastFullGCV2Result = Result; } - Diff.DiskSize = Result.RemovedDisk; - Diff.MemorySize = Result.RemovedMemory; + Diff.DiskSize = Result.CompactStoresStatSum.RemovedDisk; + Diff.MemorySize = Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory; } break; } diff --git a/src/zenstore/include/zenstore/blockstore.h b/src/zenstore/include/zenstore/blockstore.h index 75accd9b8..1429a6b02 100644 --- a/src/zenstore/include/zenstore/blockstore.h +++ b/src/zenstore/include/zenstore/blockstore.h @@ -136,7 +136,8 @@ public: // Ask the store to create empty blocks for all locations that does not have a block // Remove any block that is not referenced - void SyncExistingBlocksOnDisk(const std::vector<BlockStoreLocation>& KnownLocations); + void SyncExistingBlocksOnDisk(const std::vector<BlockStoreLocation>& KnownLocations); + std::vector<uint32_t> GetBlocksToCompact(const std::unordered_map<uint32_t, uint64_t>& BlockUsage, uint32_t BlockUsageThresholdPercent); void Close(); @@ -193,16 +194,25 @@ class BlockStoreCompactState public: BlockStoreCompactState() = default; - void IncludeBlock(uint32_t BlockIndex) + void IncludeBlocks(const std::span<const uint32_t> BlockIndexes) { - auto It = m_BlockIndexToChunkMapIndex.find(BlockIndex); - if (It == m_BlockIndexToChunkMapIndex.end()) + for (uint32_t BlockIndex : BlockIndexes) { - m_KeepChunks.emplace_back(std::vector<size_t>()); - m_BlockIndexToChunkMapIndex.insert_or_assign(BlockIndex, m_KeepChunks.size() - 1); + auto It = m_BlockIndexToChunkMapIndex.find(BlockIndex); + if (It == m_BlockIndexToChunkMapIndex.end()) + { + m_KeepChunks.emplace_back(std::vector<size_t>()); + m_BlockIndexToChunkMapIndex.insert_or_assign(BlockIndex, m_KeepChunks.size() - 1); + } } } + void IncludeBlock(uint32_t BlockIndex) + { + const uint32_t Blocks[1] = {BlockIndex}; + IncludeBlocks(Blocks); + } + bool AddKeepLocation(const BlockStoreLocation& Location) { auto It = m_BlockIndexToChunkMapIndex.find(Location.BlockIndex); diff --git a/src/zenstore/include/zenstore/gc.h b/src/zenstore/include/zenstore/gc.h index d4c7bba25..4cd01bc2c 100644 --- a/src/zenstore/include/zenstore/gc.h +++ b/src/zenstore/include/zenstore/gc.h @@ -60,17 +60,32 @@ struct GcSettings bool SkipCidDelete = false; bool Verbose = false; bool SingleThread = false; + uint32_t CompactBlockUsageThresholdPercent = + 90; // 0 = compact only empty eligible blocks, 100 = compact all non-full eligible blocks, 1-99 = compact eligible blocks with less + // usage than CompactBlockUsageThresholdPercent + std::filesystem::path DiskReservePath; +}; + +struct GcCompactStoreStats +{ + std::uint64_t RemovedDisk = 0; + std::chrono::milliseconds ElapsedMS = {}; +}; + +struct GcStats +{ + std::uint64_t CheckedCount = 0; + std::uint64_t FoundCount = 0; + std::uint64_t DeletedCount = 0; + std::uint64_t FreedMemory = 0; + std::chrono::milliseconds ElapsedMS = {}; }; struct GcReferencerStats { - std::uint64_t Count = 0; - std::uint64_t Expired = 0; - std::uint64_t Deleted = 0; - std::uint64_t RemovedDisk = 0; - std::uint64_t RemovedMemory = 0; + GcStats RemoveExpiredDataStats; + GcCompactStoreStats CompactStoreStats; - std::chrono::milliseconds RemoveExpiredDataMS = {}; std::chrono::milliseconds CreateReferenceCheckersMS = {}; std::chrono::milliseconds LockStateMS = {}; std::chrono::milliseconds ElapsedMS = {}; @@ -78,43 +93,34 @@ struct GcReferencerStats struct GcReferenceStoreStats { - std::uint64_t Count = 0; - std::uint64_t Pruned = 0; - std::uint64_t Compacted = 0; - std::uint64_t RemovedDisk = 0; - std::uint64_t RemovedMemory = 0; + GcStats RemoveUnreferencedDataStats; + GcCompactStoreStats CompactStoreStats; - std::chrono::milliseconds CreateReferencePrunerMS = {}; - std::chrono::milliseconds RemoveUnreferencedDataMS = {}; - std::chrono::milliseconds CompactReferenceStoreMS = {}; + std::chrono::milliseconds CreateReferencePrunersMS = {}; std::chrono::milliseconds ElapsedMS = {}; }; struct GcResult { - GcReferencerStats ReferencerStat; - GcReferenceStoreStats ReferenceStoreStat; - - std::uint64_t RemovedDisk = 0; - std::uint64_t RemovedMemory = 0; - std::vector<std::pair<std::string, GcReferencerStats>> ReferencerStats; std::vector<std::pair<std::string, GcReferenceStoreStats>> ReferenceStoreStats; + GcReferencerStats ReferencerStatSum; + GcReferenceStoreStats ReferenceStoreStatSum; + GcCompactStoreStats CompactStoresStatSum; + // Wall times, not sum of each std::chrono::milliseconds RemoveExpiredDataMS = {}; std::chrono::milliseconds CreateReferenceCheckersMS = {}; std::chrono::milliseconds LockStateMS = {}; - std::chrono::milliseconds CreateReferencePrunerMS = {}; + std::chrono::milliseconds CreateReferencePrunersMS = {}; std::chrono::milliseconds RemoveUnreferencedDataMS = {}; - std::chrono::milliseconds CompactReferenceStoreMS = {}; + std::chrono::milliseconds CompactStoresMS = {}; std::chrono::milliseconds WriteBlockMS = {}; std::chrono::milliseconds ElapsedMS = {}; - - void Sum(); }; class CbObjectWriter; @@ -129,22 +135,23 @@ struct GcCtx typedef tsl::robin_set<IoHash> HashSet; /** - * @brief An interface to remove the stored data on disk after a GcReferencePruner::RemoveUnreferencedData + * @brief An interface to remove the stored data on disk after a GcReferencer::RemoveExpiredData and + * GcReferencePruner::RemoveUnreferencedData * - * CompactReferenceStore is called after pruning (GcReferencePruner::RemoveUnreferencedData) and state locking is - * complete so implementor must take care to only remove data that has not been altered since the prune operation. + * CompactStore is called after state locking is complete so implementor must take care to only remove + * data that has not been altered since the prune operation. * - * Instance will be deleted after CompactReferenceStore has completed execution. + * Instance will be deleted after CompactStore has completed execution. * * The subclass constructor should be provided with information on what is intended to be removed. */ -class GcReferenceStoreCompactor +class GcStoreCompactor { public: - virtual ~GcReferenceStoreCompactor() = default; + virtual ~GcStoreCompactor() = default; // Remove data on disk based on results from GcReferencePruner::RemoveUnreferencedData - virtual void CompactReferenceStore(GcCtx& Ctx, GcReferenceStoreStats& Stats) = 0; + virtual void CompactStore(GcCtx& Ctx, GcCompactStoreStats& Stats, const std::function<uint64_t()>& ClaimDiskReserveCallback) = 0; }; /** @@ -175,10 +182,6 @@ public: /** * @brief Interface to handle GC of data that references Cid data * - * TODO: Maybe we should split up being a referencer and something that holds cache values? - * - * GcCacheStore and GcReferencer? - * * This interface is registered/unregistered to GcManager vua AddGcReferencer() and RemoveGcReferencer() */ class GcReferencer @@ -190,10 +193,7 @@ public: virtual std::string GetGcName(GcCtx& Ctx) = 0; // Remove expired data based on either GcCtx::Settings CacheExpireTime/ProjectExpireTime - // TODO: For disk layer we need to first update it with access times from the memory layer - // The implementer of GcReferencer (in our case a disk bucket) does not know about any - // potential memory cache layer :( - virtual void RemoveExpiredData(GcCtx& Ctx, GcReferencerStats& Stats) = 0; + virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) = 0; // Create 0-n GcReferenceChecker for this GcReferencer. Caller will manage lifetime of // returned instances @@ -213,14 +213,12 @@ public: // Check a set of references to see if they are in use. // Use the GetUnusedReferences input function to check if references are used and update any pointers // so any query for references determined to be unreferences will not be found. - // If any references a found to be unused, return a GcReferenceStoreCompactor instance which will + // If any references a found to be unused, return a GcStoreCompactor instance which will // clean up any stored bulk data mapping to the pruned references. // Caller will manage lifetime of returned instance // This function should execute as fast as possible, so try to prepare a list of references to check ahead of // call to this function and make sure the removal of unreferences items is as lightweight as possible. - virtual GcReferenceStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, - GcReferenceStoreStats& Stats, - const GetUnusedReferencesFunc& GetUnusedReferences) = 0; + virtual GcStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, GcStats& Stats, const GetUnusedReferencesFunc& GetUnusedReferences) = 0; }; /** |