diff options
| author | Dan Engelbrecht <[email protected]> | 2023-11-21 15:06:25 +0100 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-11-21 15:06:25 +0100 |
| commit | 05178f7c18a48b21b9e260de282a86b91df26955 (patch) | |
| tree | 25f77af287730c6dbe8d655e0cb503f2652cbd36 /src/zenstore/filecas.cpp | |
| parent | zen run command (#552) (diff) | |
| download | zen-05178f7c18a48b21b9e260de282a86b91df26955.tar.xz zen-05178f7c18a48b21b9e260de282a86b91df26955.zip | |
compact separate for gc referencer (#533)
- Refactor GCV2 so GcReferencer::RemoveExpiredData returns a store compactor, moving out the actual disk work from deleting items in the index.
- Refactor GCV2 GcResult to reuse GcCompactStoreStats and GcStats
- Make Compacting of stores non-parallell to not eat all the disk I/O when running GC
Diffstat (limited to 'src/zenstore/filecas.cpp')
| -rw-r--r-- | src/zenstore/filecas.cpp | 113 |
1 files changed, 61 insertions, 52 deletions
diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp index a72619e4b..6e432bc9d 100644 --- a/src/zenstore/filecas.cpp +++ b/src/zenstore/filecas.cpp @@ -1331,35 +1331,31 @@ FileCasStrategy::ScanFolderForCasFiles(const std::filesystem::path& RootDir) return Entries; }; -class FileCasStoreCompactor : public GcReferenceStoreCompactor +class FileCasStoreCompactor : public GcStoreCompactor { public: FileCasStoreCompactor(FileCasStrategy& Owner, std::vector<IoHash>&& ReferencesToClean) : m_FileCasStrategy(Owner) , m_ReferencesToClean(std::move(ReferencesToClean)) { + m_ReferencesToClean.shrink_to_fit(); } - virtual void CompactReferenceStore(GcCtx& Ctx, GcReferenceStoreStats& Stats) + virtual void CompactStore(GcCtx& Ctx, GcCompactStoreStats& Stats, const std::function<uint64_t()>&) { - Stopwatch Timer; - const auto _ = MakeGuard([&] { - if (!Ctx.Settings.Verbose) - { - return; - } - ZEN_INFO("GCV2: filecas [COMPACT] '{}': Count: {}, Pruned: {}, Compacted: {}, RemovedDisk: {}, RemovedMemory: {} in {}", - m_FileCasStrategy.m_RootDirectory, - Stats.Count, - Stats.Pruned, - Stats.Compacted, - NiceBytes(Stats.RemovedDisk), - NiceBytes(Stats.RemovedMemory), - NiceTimeSpanMs(Timer.GetElapsedTimeMs())); - }); - std::vector<IoHash> ReferencedCleaned; - ReferencedCleaned.reserve(m_ReferencesToClean.size()); + Stopwatch Timer; + const auto _ = MakeGuard([&] { + if (!Ctx.Settings.Verbose) + { + return; + } + ZEN_INFO("GCV2: filecas [COMPACT] '{}': RemovedDisk: {} in {}", + m_FileCasStrategy.m_RootDirectory, + NiceBytes(Stats.RemovedDisk), + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); + size_t Skipped = 0; for (const IoHash& ChunkHash : m_ReferencesToClean) { FileCasStrategy::ShardingHelper Name(m_FileCasStrategy.m_RootDirectory.c_str(), ChunkHash); @@ -1372,7 +1368,9 @@ public: } if (Ctx.Settings.IsDeleteMode) { - ZEN_DEBUG("deleting CAS payload file '{}'", Name.ShardedPath.ToUtf8()); + ZEN_DEBUG("GCV2: filecas [COMPACT] '{}': Deleting CAS payload file '{}'", + m_FileCasStrategy.m_RootDirectory, + Name.ShardedPath.ToUtf8()); std::error_code Ec; uint64_t SizeOnDisk = std::filesystem::file_size(Name.ShardedPath.c_str(), Ec); if (Ec) @@ -1382,7 +1380,10 @@ public: bool Existed = std::filesystem::remove(Name.ShardedPath.c_str(), Ec); if (Ec) { - ZEN_WARN("failed deleting CAS payload file '{}'. Reason '{}'", Name.ShardedPath.ToUtf8(), Ec.message()); + ZEN_WARN("GCV2: filecas [COMPACT] '{}': Failed deleting CAS payload file '{}'. Reason '{}'", + m_FileCasStrategy.m_RootDirectory, + Name.ShardedPath.ToUtf8(), + Ec.message()); continue; } if (!Existed) @@ -1397,18 +1398,28 @@ public: bool Existed = std::filesystem::is_regular_file(Name.ShardedPath.c_str(), Ec); if (Ec) { - ZEN_WARN("failed checking CAS payload file '{}'. Reason '{}'", Name.ShardedPath.ToUtf8(), Ec.message()); + ZEN_WARN("GCV2: filecas [COMPACT] '{}': Failed checking CAS payload file '{}'. Reason '{}'", + m_FileCasStrategy.m_RootDirectory, + Name.ShardedPath.ToUtf8(), + Ec.message()); continue; } if (!Existed) { continue; } + Skipped++; } - ReferencedCleaned.push_back(ChunkHash); } } - Stats.Compacted += ReferencedCleaned.size(); + + if (Skipped > 0) + { + ZEN_DEBUG("GCV2: filecas [COMPACT] '{}': Skipped deleting of {} eligible files", m_FileCasStrategy.m_RootDirectory, Skipped); + } + + m_ReferencesToClean.clear(); + m_ReferencesToClean.shrink_to_fit(); } private: @@ -1421,9 +1432,7 @@ class FileCasReferencePruner : public GcReferencePruner public: FileCasReferencePruner(FileCasStrategy& Owner, std::vector<IoHash>&& Cids) : m_FileCasStrategy(Owner), m_Cids(std::move(Cids)) {} - virtual GcReferenceStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, - GcReferenceStoreStats& Stats, - const GetUnusedReferencesFunc& GetUnusedReferences) + virtual GcStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, GcStats& Stats, const GetUnusedReferencesFunc& GetUnusedReferences) { Stopwatch Timer; const auto _ = MakeGuard([&] { @@ -1431,23 +1440,29 @@ public: { return; } - ZEN_INFO("GCV2: filecas [PRUNE] '{}': Count: {}, Pruned: {}, Compacted: {}, RemovedDisk: {}, RemovedMemory: {} in {}", + ZEN_INFO("GCV2: filecas [PRUNE] '{}': Count: {}, Unreferenced: {}, FreedMemory: {} in {}", m_FileCasStrategy.m_RootDirectory, - Stats.Count, - Stats.Pruned, - Stats.Compacted, - NiceBytes(Stats.RemovedDisk), - NiceBytes(Stats.RemovedMemory), + Stats.CheckedCount, + Stats.FoundCount, + NiceBytes(Stats.FreedMemory), NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); std::vector<IoHash> UnusedCids = GetUnusedReferences(m_Cids); + Stats.CheckedCount = m_Cids.size(); if (UnusedCids.empty()) { // Nothing to collect return nullptr; } + Stats.FoundCount += UnusedCids.size(); + + if (!Ctx.Settings.IsDeleteMode) + { + return nullptr; + } + std::vector<IoHash> PrunedReferences; PrunedReferences.reserve(UnusedCids.size()); { @@ -1459,19 +1474,21 @@ public: { continue; } - if (Ctx.Settings.IsDeleteMode) - { - uint64_t FileSize = It->second.Size; - m_FileCasStrategy.m_Index.erase(It); - m_FileCasStrategy.m_CasLog.Append( - {.Key = ChunkHash, .Flags = FileCasStrategy::FileCasIndexEntry::kTombStone, .Size = FileSize}); - m_FileCasStrategy.m_TotalSize.fetch_sub(It->second.Size, std::memory_order_relaxed); - } + uint64_t FileSize = It->second.Size; + m_FileCasStrategy.m_Index.erase(It); + m_FileCasStrategy.m_CasLog.Append( + {.Key = ChunkHash, .Flags = FileCasStrategy::FileCasIndexEntry::kTombStone, .Size = FileSize}); + m_FileCasStrategy.m_TotalSize.fetch_sub(It->second.Size, std::memory_order_relaxed); PrunedReferences.push_back(ChunkHash); + Stats.DeletedCount++; } } - Stats.Pruned += PrunedReferences.size(); + if (PrunedReferences.empty()) + { + return nullptr; + } + return new FileCasStoreCompactor(m_FileCasStrategy, std::move(PrunedReferences)); } @@ -1487,7 +1504,7 @@ FileCasStrategy::GetGcName(GcCtx&) } GcReferencePruner* -FileCasStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& Stats) +FileCasStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats&) { Stopwatch Timer; const auto _ = MakeGuard([&] { @@ -1495,14 +1512,7 @@ FileCasStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& Stats) { return; } - ZEN_INFO("GCV2: filecas [CREATE PRUNERS] '{}': Count: {}, Pruned: {}, Compacted: {}, RemovedDisk: {}, RemovedMemory: {} in {}", - m_RootDirectory, - Stats.Count, - Stats.Pruned, - Stats.Compacted, - NiceBytes(Stats.RemovedDisk), - NiceBytes(Stats.RemovedMemory), - NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + ZEN_INFO("GCV2: filecas [CREATE PRUNERS] '{}' in {}", m_RootDirectory, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); std::vector<IoHash> CidsToCheck; { @@ -1517,7 +1527,6 @@ FileCasStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& Stats) CidsToCheck.push_back(It.first); } } - Stats.Count += CidsToCheck.size(); return new FileCasReferencePruner(*this, std::move(CidsToCheck)); } |