aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore/filecas.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2023-10-30 09:32:54 +0100
committerGitHub <[email protected]>2023-10-30 09:32:54 +0100
commit3a6a5855cf36967c6bde31292669bfaf832c6f0b (patch)
tree593e7c21e6840e7ad312207fddc63e1934e19d85 /src/zenstore/filecas.cpp
parentset up arch properly when running tests (mac) (#505) (diff)
downloadzen-3a6a5855cf36967c6bde31292669bfaf832c6f0b.tar.xz
zen-3a6a5855cf36967c6bde31292669bfaf832c6f0b.zip
New GC implementation (#459)
- Feature: New garbage collection implementation, still in evaluation mode. Enabled by `--gc-v2` command line option
Diffstat (limited to 'src/zenstore/filecas.cpp')
-rw-r--r--src/zenstore/filecas.cpp167
1 files changed, 166 insertions, 1 deletions
diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp
index 24d0a39bb..e28e0dea4 100644
--- a/src/zenstore/filecas.cpp
+++ b/src/zenstore/filecas.cpp
@@ -122,10 +122,12 @@ FileCasStrategy::ShardingHelper::ShardingHelper(const std::filesystem::path& Roo
FileCasStrategy::FileCasStrategy(GcManager& Gc) : m_Log(logging::Get("filecas")), m_Gc(Gc)
{
m_Gc.AddGcStorage(this);
+ m_Gc.AddGcReferenceStore(*this);
}
FileCasStrategy::~FileCasStrategy()
{
+ m_Gc.RemoveGcReferenceStore(*this);
m_Gc.RemoveGcStorage(this);
}
@@ -1329,7 +1331,170 @@ FileCasStrategy::ScanFolderForCasFiles(const std::filesystem::path& RootDir)
return Entries;
};
- //////////////////////////////////////////////////////////////////////////
+class FileCasStoreCompactor : public GcReferenceStoreCompactor
+{
+public:
+ FileCasStoreCompactor(FileCasStrategy& Owner, std::vector<IoHash>&& ReferencesToClean)
+ : m_FileCasStrategy(Owner)
+ , m_ReferencesToClean(std::move(ReferencesToClean))
+ {
+ }
+
+ virtual void CompactReferenceStore(GcCtx& Ctx)
+ {
+ size_t CompactedCount = 0;
+ Stopwatch Timer;
+ const auto _ = MakeGuard([&] {
+ ZEN_DEBUG("gc file store '{}': removed data for {} unused cids in {}",
+ m_FileCasStrategy.m_RootDirectory,
+ CompactedCount,
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ });
+ std::vector<IoHash> ReferencedCleaned;
+ ReferencedCleaned.reserve(m_ReferencesToClean.size());
+
+ for (const IoHash& ChunkHash : m_ReferencesToClean)
+ {
+ FileCasStrategy::ShardingHelper Name(m_FileCasStrategy.m_RootDirectory.c_str(), ChunkHash);
+ {
+ RwLock::SharedLockScope __(m_FileCasStrategy.m_Lock);
+ if (auto It = m_FileCasStrategy.m_Index.find(ChunkHash); It != m_FileCasStrategy.m_Index.end())
+ {
+ // Not regarded as pruned, leave it be
+ continue;
+ }
+ if (Ctx.Settings.IsDeleteMode)
+ {
+ ZEN_DEBUG("deleting CAS payload file '{}'", Name.ShardedPath.ToUtf8());
+ std::error_code Ec;
+ uint64_t SizeOnDisk = std::filesystem::file_size(Name.ShardedPath.c_str(), Ec);
+ if (Ec)
+ {
+ SizeOnDisk = 0;
+ }
+ bool Existed = std::filesystem::remove(Name.ShardedPath.c_str(), Ec);
+ if (Ec)
+ {
+ ZEN_WARN("failed deleting CAS payload file '{}'. Reason '{}'", Name.ShardedPath.ToUtf8(), Ec.message());
+ continue;
+ }
+ if (!Existed)
+ {
+ continue;
+ }
+ Ctx.RemovedDiskSpace.fetch_add(SizeOnDisk);
+ }
+ else
+ {
+ std::error_code Ec;
+ bool Existed = std::filesystem::is_regular_file(Name.ShardedPath.c_str(), Ec);
+ if (Ec)
+ {
+ ZEN_WARN("failed checking CAS payload file '{}'. Reason '{}'", Name.ShardedPath.ToUtf8(), Ec.message());
+ continue;
+ }
+ if (!Existed)
+ {
+ continue;
+ }
+ }
+ ReferencedCleaned.push_back(ChunkHash);
+ }
+ }
+ CompactedCount = ReferencedCleaned.size();
+ Ctx.CompactedReferences.fetch_add(ReferencedCleaned.size());
+ }
+
+private:
+ FileCasStrategy& m_FileCasStrategy;
+ std::vector<IoHash> m_ReferencesToClean;
+};
+
+class FileCasReferencePruner : public GcReferencePruner
+{
+public:
+ FileCasReferencePruner(FileCasStrategy& Owner, std::vector<IoHash>&& Cids) : m_FileCasStrategy(Owner), m_Cids(std::move(Cids)) {}
+
+ virtual GcReferenceStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, const GetUnusedReferencesFunc& GetUnusedReferences)
+ {
+ size_t TotalCount = m_Cids.size();
+ size_t PruneCount = 0;
+ Stopwatch Timer;
+ const auto _ = MakeGuard([&] {
+ ZEN_DEBUG("gc file store '{}': removed {} unused cid out of {} in {}",
+ m_FileCasStrategy.m_RootDirectory,
+ PruneCount,
+ TotalCount,
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ });
+
+ std::vector<IoHash> UnusedReferences = GetUnusedReferences(m_Cids);
+ m_Cids.clear();
+
+ std::vector<IoHash> PrunedReferences;
+ PrunedReferences.reserve(UnusedReferences.size());
+ {
+ RwLock::ExclusiveLockScope __(m_FileCasStrategy.m_Lock);
+ for (const IoHash& ChunkHash : UnusedReferences)
+ {
+ auto It = m_FileCasStrategy.m_Index.find(ChunkHash);
+ if (It == m_FileCasStrategy.m_Index.end())
+ {
+ continue;
+ }
+ if (Ctx.Settings.IsDeleteMode)
+ {
+ uint64_t FileSize = It->second.Size;
+ m_FileCasStrategy.m_Index.erase(It);
+ m_FileCasStrategy.m_CasLog.Append(
+ {.Key = ChunkHash, .Flags = FileCasStrategy::FileCasIndexEntry::kTombStone, .Size = FileSize});
+ m_FileCasStrategy.m_TotalSize.fetch_sub(It->second.Size, std::memory_order_relaxed);
+ }
+ PrunedReferences.push_back(ChunkHash);
+ }
+ }
+
+ PruneCount = PrunedReferences.size();
+ Ctx.PrunedReferences.fetch_add(PruneCount);
+ return new FileCasStoreCompactor(m_FileCasStrategy, std::move(PrunedReferences));
+ }
+
+private:
+ FileCasStrategy& m_FileCasStrategy;
+ std::vector<IoHash> m_Cids;
+};
+
+GcReferencePruner*
+FileCasStrategy::CreateReferencePruner(GcCtx& Ctx)
+{
+ // TODO
+ std::size_t TotalCount = 0;
+ Stopwatch Timer;
+ const auto _ = MakeGuard([&] {
+ ZEN_DEBUG("gc file store '{}': found {} cid keys to check in {}",
+ m_RootDirectory,
+ TotalCount,
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ });
+ std::vector<IoHash> CidsToCheck;
+ {
+ RwLock::SharedLockScope __(m_Lock);
+ CidsToCheck.reserve(m_Index.size());
+ for (const auto& It : m_Index)
+ {
+ CidsToCheck.push_back(It.first);
+ }
+ }
+ TotalCount = CidsToCheck.size();
+ if (TotalCount == 0)
+ {
+ return {};
+ }
+ Ctx.References.fetch_add(TotalCount);
+ return new FileCasReferencePruner(*this, std::move(CidsToCheck));
+}
+
+//////////////////////////////////////////////////////////////////////////
#if ZEN_WITH_TESTS