diff options
Diffstat (limited to 'zenstore/gc.cpp')
| -rw-r--r-- | zenstore/gc.cpp | 174 |
1 files changed, 59 insertions, 115 deletions
diff --git a/zenstore/gc.cpp b/zenstore/gc.cpp index bb03b9751..0902abf4a 100644 --- a/zenstore/gc.cpp +++ b/zenstore/gc.cpp @@ -14,9 +14,10 @@ #include <zencore/testing.h> #include <zencore/testutils.h> #include <zencore/timer.h> -#include <zenstore/cas.h> #include <zenstore/cidstore.h> +#include "cas.h" + #include <fmt/format.h> #include <filesystem> @@ -173,9 +174,8 @@ struct GcContext::GcState using CacheKeyContexts = std::unordered_map<std::string, std::vector<IoHash>>; CacheKeyContexts m_ExpiredCacheKeys; - CasChunkSet m_CasChunks; - CasChunkSet m_DeletedCasChunks; - CasChunkSet m_CidChunks; + HashKeySet m_RetainedCids; + HashKeySet m_DeletedCids; GcClock::TimePoint m_GcTime; GcClock::Duration m_MaxCacheDuration = std::chrono::hours(24); bool m_DeletionMode = true; @@ -194,19 +194,13 @@ GcContext::~GcContext() } void -GcContext::ContributeCids(std::span<const IoHash> Cids) -{ - m_State->m_CidChunks.AddChunksToSet(Cids); -} - -void -GcContext::ContributeCas(std::span<const IoHash> Cas) +GcContext::AddRetainedCids(std::span<const IoHash> Cids) { - m_State->m_CasChunks.AddChunksToSet(Cas); + m_State->m_RetainedCids.AddHashesToSet(Cids); } void -GcContext::ContributeCacheKeys(const std::string& CacheKeyContext, std::vector<IoHash>&& ExpiredKeys) +GcContext::SetExpiredCacheKeys(const std::string& CacheKeyContext, std::vector<IoHash>&& ExpiredKeys) { m_State->m_ExpiredCacheKeys[CacheKeyContext] = std::move(ExpiredKeys); } @@ -214,37 +208,31 @@ GcContext::ContributeCacheKeys(const std::string& CacheKeyContext, std::vector<I void GcContext::IterateCids(std::function<void(const IoHash&)> Callback) { - m_State->m_CidChunks.IterateChunks([&](const IoHash& Hash) { Callback(Hash); }); + m_State->m_RetainedCids.IterateHashes([&](const IoHash& Hash) { Callback(Hash); }); } void GcContext::FilterCids(std::span<const IoHash> Cid, std::function<void(const IoHash&)> KeepFunc) { - m_State->m_CidChunks.FilterChunks(Cid, [&](const IoHash& Hash) { KeepFunc(Hash); }); + m_State->m_RetainedCids.FilterHashes(Cid, [&](const IoHash& Hash) { KeepFunc(Hash); }); } void -GcContext::FilterCas(std::span<const IoHash> Cas, std::function<void(const IoHash&)> KeepFunc) +GcContext::FilterCids(std::span<const IoHash> Cid, std::function<void(const IoHash&, bool)>&& FilterFunc) { - m_State->m_CasChunks.FilterChunks(Cas, [&](const IoHash& Hash) { KeepFunc(Hash); }); + m_State->m_RetainedCids.FilterHashes(Cid, std::move(FilterFunc)); } void -GcContext::FilterCas(std::span<const IoHash> Cas, std::function<void(const IoHash&, bool)>&& FilterFunc) +GcContext::AddDeletedCids(std::span<const IoHash> Cas) { - m_State->m_CasChunks.FilterChunks(Cas, std::move(FilterFunc)); + m_State->m_DeletedCids.AddHashesToSet(Cas); } -void -GcContext::DeletedCas(std::span<const IoHash> Cas) +const HashKeySet& +GcContext::DeletedCids() { - m_State->m_DeletedCasChunks.AddChunksToSet(Cas); -} - -CasChunkSet& -GcContext::DeletedCas() -{ - return m_State->m_DeletedCasChunks; + return m_State->m_DeletedCids; } std::span<const IoHash> @@ -318,7 +306,7 @@ GcContext::ClaimGCReserve() ////////////////////////////////////////////////////////////////////////// -GcContributor::GcContributor(CasGc& Gc) : m_Gc(Gc) +GcContributor::GcContributor(GcManager& Gc) : m_Gc(Gc) { m_Gc.AddGcContributor(this); } @@ -330,7 +318,7 @@ GcContributor::~GcContributor() ////////////////////////////////////////////////////////////////////////// -GcStorage::GcStorage(CasGc& Gc) : m_Gc(Gc) +GcStorage::GcStorage(GcManager& Gc) : m_Gc(Gc) { m_Gc.AddGcStorage(this); } @@ -342,30 +330,30 @@ GcStorage::~GcStorage() ////////////////////////////////////////////////////////////////////////// -CasGc::CasGc() +GcManager::GcManager() { } -CasGc::~CasGc() +GcManager::~GcManager() { } void -CasGc::AddGcContributor(GcContributor* Contributor) +GcManager::AddGcContributor(GcContributor* Contributor) { RwLock::ExclusiveLockScope _(m_Lock); m_GcContribs.push_back(Contributor); } void -CasGc::RemoveGcContributor(GcContributor* Contributor) +GcManager::RemoveGcContributor(GcContributor* Contributor) { RwLock::ExclusiveLockScope _(m_Lock); std::erase_if(m_GcContribs, [&](GcContributor* $) { return $ == Contributor; }); } void -CasGc::AddGcStorage(GcStorage* Storage) +GcManager::AddGcStorage(GcStorage* Storage) { ZEN_ASSERT(Storage != nullptr); RwLock::ExclusiveLockScope _(m_Lock); @@ -373,14 +361,14 @@ CasGc::AddGcStorage(GcStorage* Storage) } void -CasGc::RemoveGcStorage(GcStorage* Storage) +GcManager::RemoveGcStorage(GcStorage* Storage) { RwLock::ExclusiveLockScope _(m_Lock); std::erase_if(m_GcStorage, [&](GcStorage* $) { return $ == Storage; }); } void -CasGc::CollectGarbage(GcContext& GcCtx) +GcManager::CollectGarbage(GcContext& GcCtx) { RwLock::SharedLockScope _(m_Lock); @@ -394,36 +382,6 @@ CasGc::CollectGarbage(GcContext& GcCtx) } } - // Cache records reference CAS chunks with the uncompressed - // raw hash (Cid). Map the content ID to CAS hash to enable - // the CAS storage backends to filter valid chunks. - - if (CidStore* CidStore = m_CidStore) - { - std::vector<IoHash> CasHashes; - uint64_t UnknownChunks = 0; - - GcCtx.IterateCids([&](const IoHash& Cid) { - IoHash Cas = CidStore->RemapCid(Cid); - - if (Cas == IoHash::Zero) - { - ++UnknownChunks; - } - else - { - CasHashes.push_back(Cas); - } - }); - - if (UnknownChunks) - { - ZEN_WARN("found {} unknown CIDs", UnknownChunks); - } - - GcCtx.ContributeCas(CasHashes); - } - // Then trim storage { @@ -434,61 +392,48 @@ CasGc::CollectGarbage(GcContext& GcCtx) Storage->CollectGarbage(GcCtx); } } +} + +GcStorageSize +GcManager::TotalStorageSize() const +{ + RwLock::SharedLockScope _(m_Lock); - // Remove Cid to CAS hash mappings. Scrub? + GcStorageSize TotalSize; - if (CidStore* CidStore = m_CidStore) + for (GcStorage* Storage : m_GcStorage) { - Stopwatch Timer; - const auto Guard = MakeGuard([&] { ZEN_INFO("clean up deleted content ids in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); - CidStore->RemoveCids(GcCtx.DeletedCas()); + const auto Size = Storage->StorageSize(); + TotalSize.DiskSize += Size.DiskSize; + TotalSize.MemorySize += Size.MemorySize; } -} -void -CasGc::SetCidStore(CidStore* Cids) -{ - m_CidStore = Cids; + return TotalSize; } +#if ZEN_USE_REF_TRACKING void -CasGc::OnNewCidReferences(std::span<IoHash> Hashes) +GcManager::OnNewCidReferences(std::span<IoHash> Hashes) { ZEN_UNUSED(Hashes); } void -CasGc::OnCommittedCidReferences(std::span<IoHash> Hashes) +GcManager::OnCommittedCidReferences(std::span<IoHash> Hashes) { ZEN_UNUSED(Hashes); } void -CasGc::OnDroppedCidReferences(std::span<IoHash> Hashes) +GcManager::OnDroppedCidReferences(std::span<IoHash> Hashes) { ZEN_UNUSED(Hashes); } - -GcStorageSize -CasGc::TotalStorageSize() const -{ - RwLock::SharedLockScope _(m_Lock); - - GcStorageSize TotalSize; - - for (GcStorage* Storage : m_GcStorage) - { - const auto Size = Storage->StorageSize(); - TotalSize.DiskSize += Size.DiskSize; - TotalSize.MemorySize += Size.MemorySize; - } - - return TotalSize; -} +#endif ////////////////////////////////////////////////////////////////////////// -GcScheduler::GcScheduler(CasGc& CasGc) : m_Log(logging::Get("gc")), m_CasGc(CasGc) +GcScheduler::GcScheduler(GcManager& GcManager) : m_Log(logging::Get("gc")), m_GcManager(GcManager) { } @@ -606,7 +551,7 @@ GcScheduler::SchedulerThread() { std::error_code Ec; DiskSpace Space = DiskSpaceInfo(m_Config.RootDirectory, Ec); - GcStorageSize TotalSize = m_CasGc.TotalStorageSize(); + GcStorageSize TotalSize = m_GcManager.TotalStorageSize(); std::chrono::seconds RemaingTime = std::chrono::duration_cast<std::chrono::seconds>(m_NextGcTime - GcClock::Now()); if (RemaingTime < std::chrono::seconds::zero()) @@ -668,7 +613,7 @@ GcScheduler::SchedulerThread() Stopwatch Timer; const auto __ = MakeGuard([&] { ZEN_INFO("garbage collection DONE after {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); - m_CasGc.CollectGarbage(GcCtx); + m_GcManager.CollectGarbage(GcCtx); m_LastGcTime = GcClock::Now(); m_NextGcTime = NextGcTime(m_LastGcTime); @@ -745,38 +690,37 @@ TEST_CASE("gc.basic") { ScopedTemporaryDirectory TempDir; - CasStoreConfiguration CasConfig; + CidStoreConfiguration CasConfig; CasConfig.RootDirectory = TempDir.Path() / "cas"; - CasGc Gc; - std::unique_ptr<CasStore> CasStore = CreateCasStore(Gc); - CidStore CidStore{*CasStore, TempDir.Path() / "cid"}; + GcManager Gc; + CidStore CidStore(Gc); - CasStore->Initialize(CasConfig); - Gc.SetCidStore(&CidStore); + CidStore.Initialize(CasConfig); IoBuffer Chunk = CreateChunk(128); auto CompressedChunk = Compress(Chunk); const auto InsertResult = CidStore.AddChunk(CompressedChunk); + CHECK(InsertResult.New); GcContext GcCtx; GcCtx.CollectSmallObjects(true); - CasStore->Flush(); + CidStore.Flush(); Gc.CollectGarbage(GcCtx); - CHECK(!CidStore.ContainsChunk(InsertResult.DecompressedId)); + CHECK(!CidStore.ContainsChunk(IoHash::FromBLAKE3(CompressedChunk.GetRawHash()))); } TEST_CASE("gc.full") { ScopedTemporaryDirectory TempDir; - CasStoreConfiguration CasConfig; + CidStoreConfiguration CasConfig; CasConfig.RootDirectory = TempDir.Path() / "cas"; - CasGc Gc; + GcManager Gc; std::unique_ptr<CasStore> CasStore = CreateCasStore(Gc); CasStore->Initialize(CasConfig); @@ -813,7 +757,7 @@ TEST_CASE("gc.full") CasStore->InsertChunk(Chunks[7], ChunkHashes[7]); CasStore->InsertChunk(Chunks[8], ChunkHashes[8]); - CasStoreSize InitialSize = CasStore->TotalSize(); + CidStoreSize InitialSize = CasStore->TotalSize(); // Keep first and last { @@ -823,7 +767,7 @@ TEST_CASE("gc.full") std::vector<IoHash> KeepChunks; KeepChunks.push_back(ChunkHashes[0]); KeepChunks.push_back(ChunkHashes[8]); - GcCtx.ContributeCas(KeepChunks); + GcCtx.AddRetainedCids(KeepChunks); CasStore->Flush(); Gc.CollectGarbage(GcCtx); @@ -856,7 +800,7 @@ TEST_CASE("gc.full") GcCtx.CollectSmallObjects(true); std::vector<IoHash> KeepChunks; KeepChunks.push_back(ChunkHashes[8]); - GcCtx.ContributeCas(KeepChunks); + GcCtx.AddRetainedCids(KeepChunks); CasStore->Flush(); Gc.CollectGarbage(GcCtx); @@ -890,7 +834,7 @@ TEST_CASE("gc.full") KeepChunks.push_back(ChunkHashes[1]); KeepChunks.push_back(ChunkHashes[4]); KeepChunks.push_back(ChunkHashes[7]); - GcCtx.ContributeCas(KeepChunks); + GcCtx.AddRetainedCids(KeepChunks); CasStore->Flush(); Gc.CollectGarbage(GcCtx); @@ -925,7 +869,7 @@ TEST_CASE("gc.full") KeepChunks.push_back(ChunkHashes[6]); KeepChunks.push_back(ChunkHashes[7]); KeepChunks.push_back(ChunkHashes[8]); - GcCtx.ContributeCas(KeepChunks); + GcCtx.AddRetainedCids(KeepChunks); CasStore->Flush(); Gc.CollectGarbage(GcCtx); |