diff options
| author | Dan Engelbrecht <[email protected]> | 2023-10-30 09:32:54 +0100 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-10-30 09:32:54 +0100 |
| commit | 3a6a5855cf36967c6bde31292669bfaf832c6f0b (patch) | |
| tree | 593e7c21e6840e7ad312207fddc63e1934e19d85 /src/zenstore/filecas.cpp | |
| parent | set up arch properly when running tests (mac) (#505) (diff) | |
| download | zen-3a6a5855cf36967c6bde31292669bfaf832c6f0b.tar.xz zen-3a6a5855cf36967c6bde31292669bfaf832c6f0b.zip | |
New GC implementation (#459)
- Feature: New garbage collection implementation, still in evaluation mode. Enabled by `--gc-v2` command line option
Diffstat (limited to 'src/zenstore/filecas.cpp')
| -rw-r--r-- | src/zenstore/filecas.cpp | 167 |
1 files changed, 166 insertions, 1 deletions
diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp index 24d0a39bb..e28e0dea4 100644 --- a/src/zenstore/filecas.cpp +++ b/src/zenstore/filecas.cpp @@ -122,10 +122,12 @@ FileCasStrategy::ShardingHelper::ShardingHelper(const std::filesystem::path& Roo FileCasStrategy::FileCasStrategy(GcManager& Gc) : m_Log(logging::Get("filecas")), m_Gc(Gc) { m_Gc.AddGcStorage(this); + m_Gc.AddGcReferenceStore(*this); } FileCasStrategy::~FileCasStrategy() { + m_Gc.RemoveGcReferenceStore(*this); m_Gc.RemoveGcStorage(this); } @@ -1329,7 +1331,170 @@ FileCasStrategy::ScanFolderForCasFiles(const std::filesystem::path& RootDir) return Entries; }; - ////////////////////////////////////////////////////////////////////////// +class FileCasStoreCompactor : public GcReferenceStoreCompactor +{ +public: + FileCasStoreCompactor(FileCasStrategy& Owner, std::vector<IoHash>&& ReferencesToClean) + : m_FileCasStrategy(Owner) + , m_ReferencesToClean(std::move(ReferencesToClean)) + { + } + + virtual void CompactReferenceStore(GcCtx& Ctx) + { + size_t CompactedCount = 0; + Stopwatch Timer; + const auto _ = MakeGuard([&] { + ZEN_DEBUG("gc file store '{}': removed data for {} unused cids in {}", + m_FileCasStrategy.m_RootDirectory, + CompactedCount, + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); + std::vector<IoHash> ReferencedCleaned; + ReferencedCleaned.reserve(m_ReferencesToClean.size()); + + for (const IoHash& ChunkHash : m_ReferencesToClean) + { + FileCasStrategy::ShardingHelper Name(m_FileCasStrategy.m_RootDirectory.c_str(), ChunkHash); + { + RwLock::SharedLockScope __(m_FileCasStrategy.m_Lock); + if (auto It = m_FileCasStrategy.m_Index.find(ChunkHash); It != m_FileCasStrategy.m_Index.end()) + { + // Not regarded as pruned, leave it be + continue; + } + if (Ctx.Settings.IsDeleteMode) + { + ZEN_DEBUG("deleting CAS payload file '{}'", Name.ShardedPath.ToUtf8()); + std::error_code Ec; + uint64_t SizeOnDisk = std::filesystem::file_size(Name.ShardedPath.c_str(), Ec); + if (Ec) + { + SizeOnDisk = 0; + } + bool Existed = std::filesystem::remove(Name.ShardedPath.c_str(), Ec); + if (Ec) + { + ZEN_WARN("failed deleting CAS payload file '{}'. Reason '{}'", Name.ShardedPath.ToUtf8(), Ec.message()); + continue; + } + if (!Existed) + { + continue; + } + Ctx.RemovedDiskSpace.fetch_add(SizeOnDisk); + } + else + { + std::error_code Ec; + bool Existed = std::filesystem::is_regular_file(Name.ShardedPath.c_str(), Ec); + if (Ec) + { + ZEN_WARN("failed checking CAS payload file '{}'. Reason '{}'", Name.ShardedPath.ToUtf8(), Ec.message()); + continue; + } + if (!Existed) + { + continue; + } + } + ReferencedCleaned.push_back(ChunkHash); + } + } + CompactedCount = ReferencedCleaned.size(); + Ctx.CompactedReferences.fetch_add(ReferencedCleaned.size()); + } + +private: + FileCasStrategy& m_FileCasStrategy; + std::vector<IoHash> m_ReferencesToClean; +}; + +class FileCasReferencePruner : public GcReferencePruner +{ +public: + FileCasReferencePruner(FileCasStrategy& Owner, std::vector<IoHash>&& Cids) : m_FileCasStrategy(Owner), m_Cids(std::move(Cids)) {} + + virtual GcReferenceStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, const GetUnusedReferencesFunc& GetUnusedReferences) + { + size_t TotalCount = m_Cids.size(); + size_t PruneCount = 0; + Stopwatch Timer; + const auto _ = MakeGuard([&] { + ZEN_DEBUG("gc file store '{}': removed {} unused cid out of {} in {}", + m_FileCasStrategy.m_RootDirectory, + PruneCount, + TotalCount, + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); + + std::vector<IoHash> UnusedReferences = GetUnusedReferences(m_Cids); + m_Cids.clear(); + + std::vector<IoHash> PrunedReferences; + PrunedReferences.reserve(UnusedReferences.size()); + { + RwLock::ExclusiveLockScope __(m_FileCasStrategy.m_Lock); + for (const IoHash& ChunkHash : UnusedReferences) + { + auto It = m_FileCasStrategy.m_Index.find(ChunkHash); + if (It == m_FileCasStrategy.m_Index.end()) + { + continue; + } + if (Ctx.Settings.IsDeleteMode) + { + uint64_t FileSize = It->second.Size; + m_FileCasStrategy.m_Index.erase(It); + m_FileCasStrategy.m_CasLog.Append( + {.Key = ChunkHash, .Flags = FileCasStrategy::FileCasIndexEntry::kTombStone, .Size = FileSize}); + m_FileCasStrategy.m_TotalSize.fetch_sub(It->second.Size, std::memory_order_relaxed); + } + PrunedReferences.push_back(ChunkHash); + } + } + + PruneCount = PrunedReferences.size(); + Ctx.PrunedReferences.fetch_add(PruneCount); + return new FileCasStoreCompactor(m_FileCasStrategy, std::move(PrunedReferences)); + } + +private: + FileCasStrategy& m_FileCasStrategy; + std::vector<IoHash> m_Cids; +}; + +GcReferencePruner* +FileCasStrategy::CreateReferencePruner(GcCtx& Ctx) +{ + // TODO + std::size_t TotalCount = 0; + Stopwatch Timer; + const auto _ = MakeGuard([&] { + ZEN_DEBUG("gc file store '{}': found {} cid keys to check in {}", + m_RootDirectory, + TotalCount, + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); + std::vector<IoHash> CidsToCheck; + { + RwLock::SharedLockScope __(m_Lock); + CidsToCheck.reserve(m_Index.size()); + for (const auto& It : m_Index) + { + CidsToCheck.push_back(It.first); + } + } + TotalCount = CidsToCheck.size(); + if (TotalCount == 0) + { + return {}; + } + Ctx.References.fetch_add(TotalCount); + return new FileCasReferencePruner(*this, std::move(CidsToCheck)); +} + +////////////////////////////////////////////////////////////////////////// #if ZEN_WITH_TESTS |