aboutsummaryrefslogtreecommitdiff
path: root/zenstore/gc.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2022-06-17 07:06:21 -0700
committerGitHub <[email protected]>2022-06-17 07:06:21 -0700
commitc7e22a4ef1cce7103b9afbeec487461cb32f8dbe (patch)
tree8b99d51bf496c96f82161c18fbdcfd5c6f8f31fd /zenstore/gc.cpp
parentfixed merge mistake which caused a build error (diff)
downloadzen-0.1.4-pre6.tar.xz
zen-0.1.4-pre6.zip
Make cas storage an hidden implementation detail of CidStore (#130)v0.1.4-pre6v0.1.4-pre5
- Bumped ZEN_SCHEMA_VERSION - CasStore no longer a public API, it is hidden behind CidStore - Moved cas.h from public header folder - CidStore no longer maps from Cid -> Cas, we store entries in Cas under RawHash - CasStore now decompresses data to validate content (matching against RawHash) - CasChunkSet renames to HashKeySet and put in separate header/cpp file - Disabled "Chunk" command for now as it relied on CAS being exposed as a service - Changed CAS http service to Cid http server - Moved "Run" command completely inside ZEN_WITH_EXEC_SERVICES define - Removed "cas.basic" test - Uncommented ".exec.basic" test and added return-skip at start of test - Moved ScrubContext to separate header file - Renamed CasGC to GcManager - Cleaned up configuration passing in cas store classes - Removed CAS stuff from GcContext and clarified naming in class - Remove migration code
Diffstat (limited to 'zenstore/gc.cpp')
-rw-r--r--zenstore/gc.cpp174
1 files changed, 59 insertions, 115 deletions
diff --git a/zenstore/gc.cpp b/zenstore/gc.cpp
index bb03b9751..0902abf4a 100644
--- a/zenstore/gc.cpp
+++ b/zenstore/gc.cpp
@@ -14,9 +14,10 @@
#include <zencore/testing.h>
#include <zencore/testutils.h>
#include <zencore/timer.h>
-#include <zenstore/cas.h>
#include <zenstore/cidstore.h>
+#include "cas.h"
+
#include <fmt/format.h>
#include <filesystem>
@@ -173,9 +174,8 @@ struct GcContext::GcState
using CacheKeyContexts = std::unordered_map<std::string, std::vector<IoHash>>;
CacheKeyContexts m_ExpiredCacheKeys;
- CasChunkSet m_CasChunks;
- CasChunkSet m_DeletedCasChunks;
- CasChunkSet m_CidChunks;
+ HashKeySet m_RetainedCids;
+ HashKeySet m_DeletedCids;
GcClock::TimePoint m_GcTime;
GcClock::Duration m_MaxCacheDuration = std::chrono::hours(24);
bool m_DeletionMode = true;
@@ -194,19 +194,13 @@ GcContext::~GcContext()
}
void
-GcContext::ContributeCids(std::span<const IoHash> Cids)
-{
- m_State->m_CidChunks.AddChunksToSet(Cids);
-}
-
-void
-GcContext::ContributeCas(std::span<const IoHash> Cas)
+GcContext::AddRetainedCids(std::span<const IoHash> Cids)
{
- m_State->m_CasChunks.AddChunksToSet(Cas);
+ m_State->m_RetainedCids.AddHashesToSet(Cids);
}
void
-GcContext::ContributeCacheKeys(const std::string& CacheKeyContext, std::vector<IoHash>&& ExpiredKeys)
+GcContext::SetExpiredCacheKeys(const std::string& CacheKeyContext, std::vector<IoHash>&& ExpiredKeys)
{
m_State->m_ExpiredCacheKeys[CacheKeyContext] = std::move(ExpiredKeys);
}
@@ -214,37 +208,31 @@ GcContext::ContributeCacheKeys(const std::string& CacheKeyContext, std::vector<I
void
GcContext::IterateCids(std::function<void(const IoHash&)> Callback)
{
- m_State->m_CidChunks.IterateChunks([&](const IoHash& Hash) { Callback(Hash); });
+ m_State->m_RetainedCids.IterateHashes([&](const IoHash& Hash) { Callback(Hash); });
}
void
GcContext::FilterCids(std::span<const IoHash> Cid, std::function<void(const IoHash&)> KeepFunc)
{
- m_State->m_CidChunks.FilterChunks(Cid, [&](const IoHash& Hash) { KeepFunc(Hash); });
+ m_State->m_RetainedCids.FilterHashes(Cid, [&](const IoHash& Hash) { KeepFunc(Hash); });
}
void
-GcContext::FilterCas(std::span<const IoHash> Cas, std::function<void(const IoHash&)> KeepFunc)
+GcContext::FilterCids(std::span<const IoHash> Cid, std::function<void(const IoHash&, bool)>&& FilterFunc)
{
- m_State->m_CasChunks.FilterChunks(Cas, [&](const IoHash& Hash) { KeepFunc(Hash); });
+ m_State->m_RetainedCids.FilterHashes(Cid, std::move(FilterFunc));
}
void
-GcContext::FilterCas(std::span<const IoHash> Cas, std::function<void(const IoHash&, bool)>&& FilterFunc)
+GcContext::AddDeletedCids(std::span<const IoHash> Cas)
{
- m_State->m_CasChunks.FilterChunks(Cas, std::move(FilterFunc));
+ m_State->m_DeletedCids.AddHashesToSet(Cas);
}
-void
-GcContext::DeletedCas(std::span<const IoHash> Cas)
+const HashKeySet&
+GcContext::DeletedCids()
{
- m_State->m_DeletedCasChunks.AddChunksToSet(Cas);
-}
-
-CasChunkSet&
-GcContext::DeletedCas()
-{
- return m_State->m_DeletedCasChunks;
+ return m_State->m_DeletedCids;
}
std::span<const IoHash>
@@ -318,7 +306,7 @@ GcContext::ClaimGCReserve()
//////////////////////////////////////////////////////////////////////////
-GcContributor::GcContributor(CasGc& Gc) : m_Gc(Gc)
+GcContributor::GcContributor(GcManager& Gc) : m_Gc(Gc)
{
m_Gc.AddGcContributor(this);
}
@@ -330,7 +318,7 @@ GcContributor::~GcContributor()
//////////////////////////////////////////////////////////////////////////
-GcStorage::GcStorage(CasGc& Gc) : m_Gc(Gc)
+GcStorage::GcStorage(GcManager& Gc) : m_Gc(Gc)
{
m_Gc.AddGcStorage(this);
}
@@ -342,30 +330,30 @@ GcStorage::~GcStorage()
//////////////////////////////////////////////////////////////////////////
-CasGc::CasGc()
+GcManager::GcManager()
{
}
-CasGc::~CasGc()
+GcManager::~GcManager()
{
}
void
-CasGc::AddGcContributor(GcContributor* Contributor)
+GcManager::AddGcContributor(GcContributor* Contributor)
{
RwLock::ExclusiveLockScope _(m_Lock);
m_GcContribs.push_back(Contributor);
}
void
-CasGc::RemoveGcContributor(GcContributor* Contributor)
+GcManager::RemoveGcContributor(GcContributor* Contributor)
{
RwLock::ExclusiveLockScope _(m_Lock);
std::erase_if(m_GcContribs, [&](GcContributor* $) { return $ == Contributor; });
}
void
-CasGc::AddGcStorage(GcStorage* Storage)
+GcManager::AddGcStorage(GcStorage* Storage)
{
ZEN_ASSERT(Storage != nullptr);
RwLock::ExclusiveLockScope _(m_Lock);
@@ -373,14 +361,14 @@ CasGc::AddGcStorage(GcStorage* Storage)
}
void
-CasGc::RemoveGcStorage(GcStorage* Storage)
+GcManager::RemoveGcStorage(GcStorage* Storage)
{
RwLock::ExclusiveLockScope _(m_Lock);
std::erase_if(m_GcStorage, [&](GcStorage* $) { return $ == Storage; });
}
void
-CasGc::CollectGarbage(GcContext& GcCtx)
+GcManager::CollectGarbage(GcContext& GcCtx)
{
RwLock::SharedLockScope _(m_Lock);
@@ -394,36 +382,6 @@ CasGc::CollectGarbage(GcContext& GcCtx)
}
}
- // Cache records reference CAS chunks with the uncompressed
- // raw hash (Cid). Map the content ID to CAS hash to enable
- // the CAS storage backends to filter valid chunks.
-
- if (CidStore* CidStore = m_CidStore)
- {
- std::vector<IoHash> CasHashes;
- uint64_t UnknownChunks = 0;
-
- GcCtx.IterateCids([&](const IoHash& Cid) {
- IoHash Cas = CidStore->RemapCid(Cid);
-
- if (Cas == IoHash::Zero)
- {
- ++UnknownChunks;
- }
- else
- {
- CasHashes.push_back(Cas);
- }
- });
-
- if (UnknownChunks)
- {
- ZEN_WARN("found {} unknown CIDs", UnknownChunks);
- }
-
- GcCtx.ContributeCas(CasHashes);
- }
-
// Then trim storage
{
@@ -434,61 +392,48 @@ CasGc::CollectGarbage(GcContext& GcCtx)
Storage->CollectGarbage(GcCtx);
}
}
+}
+
+GcStorageSize
+GcManager::TotalStorageSize() const
+{
+ RwLock::SharedLockScope _(m_Lock);
- // Remove Cid to CAS hash mappings. Scrub?
+ GcStorageSize TotalSize;
- if (CidStore* CidStore = m_CidStore)
+ for (GcStorage* Storage : m_GcStorage)
{
- Stopwatch Timer;
- const auto Guard = MakeGuard([&] { ZEN_INFO("clean up deleted content ids in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); });
- CidStore->RemoveCids(GcCtx.DeletedCas());
+ const auto Size = Storage->StorageSize();
+ TotalSize.DiskSize += Size.DiskSize;
+ TotalSize.MemorySize += Size.MemorySize;
}
-}
-void
-CasGc::SetCidStore(CidStore* Cids)
-{
- m_CidStore = Cids;
+ return TotalSize;
}
+#if ZEN_USE_REF_TRACKING
void
-CasGc::OnNewCidReferences(std::span<IoHash> Hashes)
+GcManager::OnNewCidReferences(std::span<IoHash> Hashes)
{
ZEN_UNUSED(Hashes);
}
void
-CasGc::OnCommittedCidReferences(std::span<IoHash> Hashes)
+GcManager::OnCommittedCidReferences(std::span<IoHash> Hashes)
{
ZEN_UNUSED(Hashes);
}
void
-CasGc::OnDroppedCidReferences(std::span<IoHash> Hashes)
+GcManager::OnDroppedCidReferences(std::span<IoHash> Hashes)
{
ZEN_UNUSED(Hashes);
}
-
-GcStorageSize
-CasGc::TotalStorageSize() const
-{
- RwLock::SharedLockScope _(m_Lock);
-
- GcStorageSize TotalSize;
-
- for (GcStorage* Storage : m_GcStorage)
- {
- const auto Size = Storage->StorageSize();
- TotalSize.DiskSize += Size.DiskSize;
- TotalSize.MemorySize += Size.MemorySize;
- }
-
- return TotalSize;
-}
+#endif
//////////////////////////////////////////////////////////////////////////
-GcScheduler::GcScheduler(CasGc& CasGc) : m_Log(logging::Get("gc")), m_CasGc(CasGc)
+GcScheduler::GcScheduler(GcManager& GcManager) : m_Log(logging::Get("gc")), m_GcManager(GcManager)
{
}
@@ -606,7 +551,7 @@ GcScheduler::SchedulerThread()
{
std::error_code Ec;
DiskSpace Space = DiskSpaceInfo(m_Config.RootDirectory, Ec);
- GcStorageSize TotalSize = m_CasGc.TotalStorageSize();
+ GcStorageSize TotalSize = m_GcManager.TotalStorageSize();
std::chrono::seconds RemaingTime = std::chrono::duration_cast<std::chrono::seconds>(m_NextGcTime - GcClock::Now());
if (RemaingTime < std::chrono::seconds::zero())
@@ -668,7 +613,7 @@ GcScheduler::SchedulerThread()
Stopwatch Timer;
const auto __ = MakeGuard([&] { ZEN_INFO("garbage collection DONE after {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); });
- m_CasGc.CollectGarbage(GcCtx);
+ m_GcManager.CollectGarbage(GcCtx);
m_LastGcTime = GcClock::Now();
m_NextGcTime = NextGcTime(m_LastGcTime);
@@ -745,38 +690,37 @@ TEST_CASE("gc.basic")
{
ScopedTemporaryDirectory TempDir;
- CasStoreConfiguration CasConfig;
+ CidStoreConfiguration CasConfig;
CasConfig.RootDirectory = TempDir.Path() / "cas";
- CasGc Gc;
- std::unique_ptr<CasStore> CasStore = CreateCasStore(Gc);
- CidStore CidStore{*CasStore, TempDir.Path() / "cid"};
+ GcManager Gc;
+ CidStore CidStore(Gc);
- CasStore->Initialize(CasConfig);
- Gc.SetCidStore(&CidStore);
+ CidStore.Initialize(CasConfig);
IoBuffer Chunk = CreateChunk(128);
auto CompressedChunk = Compress(Chunk);
const auto InsertResult = CidStore.AddChunk(CompressedChunk);
+ CHECK(InsertResult.New);
GcContext GcCtx;
GcCtx.CollectSmallObjects(true);
- CasStore->Flush();
+ CidStore.Flush();
Gc.CollectGarbage(GcCtx);
- CHECK(!CidStore.ContainsChunk(InsertResult.DecompressedId));
+ CHECK(!CidStore.ContainsChunk(IoHash::FromBLAKE3(CompressedChunk.GetRawHash())));
}
TEST_CASE("gc.full")
{
ScopedTemporaryDirectory TempDir;
- CasStoreConfiguration CasConfig;
+ CidStoreConfiguration CasConfig;
CasConfig.RootDirectory = TempDir.Path() / "cas";
- CasGc Gc;
+ GcManager Gc;
std::unique_ptr<CasStore> CasStore = CreateCasStore(Gc);
CasStore->Initialize(CasConfig);
@@ -813,7 +757,7 @@ TEST_CASE("gc.full")
CasStore->InsertChunk(Chunks[7], ChunkHashes[7]);
CasStore->InsertChunk(Chunks[8], ChunkHashes[8]);
- CasStoreSize InitialSize = CasStore->TotalSize();
+ CidStoreSize InitialSize = CasStore->TotalSize();
// Keep first and last
{
@@ -823,7 +767,7 @@ TEST_CASE("gc.full")
std::vector<IoHash> KeepChunks;
KeepChunks.push_back(ChunkHashes[0]);
KeepChunks.push_back(ChunkHashes[8]);
- GcCtx.ContributeCas(KeepChunks);
+ GcCtx.AddRetainedCids(KeepChunks);
CasStore->Flush();
Gc.CollectGarbage(GcCtx);
@@ -856,7 +800,7 @@ TEST_CASE("gc.full")
GcCtx.CollectSmallObjects(true);
std::vector<IoHash> KeepChunks;
KeepChunks.push_back(ChunkHashes[8]);
- GcCtx.ContributeCas(KeepChunks);
+ GcCtx.AddRetainedCids(KeepChunks);
CasStore->Flush();
Gc.CollectGarbage(GcCtx);
@@ -890,7 +834,7 @@ TEST_CASE("gc.full")
KeepChunks.push_back(ChunkHashes[1]);
KeepChunks.push_back(ChunkHashes[4]);
KeepChunks.push_back(ChunkHashes[7]);
- GcCtx.ContributeCas(KeepChunks);
+ GcCtx.AddRetainedCids(KeepChunks);
CasStore->Flush();
Gc.CollectGarbage(GcCtx);
@@ -925,7 +869,7 @@ TEST_CASE("gc.full")
KeepChunks.push_back(ChunkHashes[6]);
KeepChunks.push_back(ChunkHashes[7]);
KeepChunks.push_back(ChunkHashes[8]);
- GcCtx.ContributeCas(KeepChunks);
+ GcCtx.AddRetainedCids(KeepChunks);
CasStore->Flush();
Gc.CollectGarbage(GcCtx);