aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPer Larsson <[email protected]>2021-12-13 19:46:36 +0100
committerPer Larsson <[email protected]>2021-12-13 19:46:36 +0100
commit3e666bec6605931114c1d78d48bffeeb75e3e61b (patch)
treec8db57d3f2f7f9d6947ad6a6f8ac0b38289e4f77
parentFixed bug in z$ GC. (diff)
downloadzen-3e666bec6605931114c1d78d48bffeeb75e3e61b.tar.xz
zen-3e666bec6605931114c1d78d48bffeeb75e3e61b.zip
Remove Cid to CAS chunk mapping after GC.
-rw-r--r--zenstore/cidstore.cpp27
-rw-r--r--zenstore/compactcas.cpp29
-rw-r--r--zenstore/filecas.cpp2
-rw-r--r--zenstore/gc.cpp101
-rw-r--r--zenstore/include/zenstore/CAS.h8
-rw-r--r--zenstore/include/zenstore/cidstore.h1
-rw-r--r--zenstore/include/zenstore/gc.h7
-rw-r--r--zenstore/zenstore.cpp2
8 files changed, 166 insertions, 11 deletions
diff --git a/zenstore/cidstore.cpp b/zenstore/cidstore.cpp
index 4ddf34c79..33dc216b5 100644
--- a/zenstore/cidstore.cpp
+++ b/zenstore/cidstore.cpp
@@ -233,6 +233,27 @@ struct CidStore::Impl
Ctx.ReportBadCasChunks(BadChunks);
}
+ void RemoveCids(CasChunkSet& CasChunks)
+ {
+ RwLock::ExclusiveLockScope _(m_Lock);
+
+ for (auto It = m_CidMap.begin(), End = m_CidMap.end(); It != End;)
+ {
+ if (CasChunks.ContainsChunk(It->second))
+ {
+ const IoHash& BadHash = It->first;
+
+ // Log a tombstone record
+ LogMapping(BadHash, IoHash::Zero);
+ It = m_CidMap.erase(It);
+ }
+ else
+ {
+ ++It;
+ }
+ }
+ }
+
uint64_t m_LastScrubTime = 0;
};
@@ -289,6 +310,12 @@ CidStore::Scrub(ScrubContext& Ctx)
m_Impl->Scrub(Ctx);
}
+void
+CidStore::RemoveCids(CasChunkSet& CasChunks)
+{
+ m_Impl->RemoveCids(CasChunks);
+}
+
CasStoreSize
CidStore::CasSize() const
{
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp
index 6149873ad..d4d29c179 100644
--- a/zenstore/compactcas.cpp
+++ b/zenstore/compactcas.cpp
@@ -268,7 +268,8 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
Flush();
std::vector<IoHash> Candidates;
- std::vector<IoHash> Keep;
+ std::vector<IoHash> ChunksToKeep;
+ std::vector<IoHash> ChunksToDelete;
const uint64_t ChunkCount = m_LocationMap.size();
uint64_t TotalSize{};
@@ -280,10 +281,19 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
TotalSize += Entry.second.GetSize();
}
- Keep.reserve(Candidates.size());
- GcCtx.FilterCas(Candidates, [&](const IoHash& Hash) { Keep.push_back(Hash); });
+ ChunksToKeep.reserve(Candidates.size());
+ GcCtx.FilterCas(Candidates, [&ChunksToKeep, &ChunksToDelete](const IoHash& Hash, bool Keep) {
+ if (Keep)
+ {
+ ChunksToKeep.push_back(Hash);
+ }
+ else
+ {
+ ChunksToDelete.push_back(Hash);
+ }
+ });
- if (m_LocationMap.empty() || Keep.size() == m_LocationMap.size())
+ if (m_LocationMap.empty() || ChunksToKeep.size() == m_LocationMap.size())
{
ZEN_INFO("garbage collect DONE, scanned #{} {} chunks from '{}', nothing to delete",
ChunkCount,
@@ -292,10 +302,10 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
return;
}
- const uint64_t NewChunkCount = Keep.size();
+ const uint64_t NewChunkCount = ChunksToKeep.size();
uint64_t NewTotalSize = 0;
- for (const IoHash& Key : Keep)
+ for (const IoHash& Key : ChunksToKeep)
{
const CasDiskLocation& Loc = m_LocationMap[Key];
NewTotalSize += Loc.GetSize();
@@ -347,7 +357,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
std::vector<uint8_t> Chunk;
uint64_t NextInsertOffset{};
- for (const IoHash& Key : Keep)
+ for (const IoHash& Key : ChunksToKeep)
{
const auto Entry = m_LocationMap.find(Key);
const auto& Loc = Entry->second;
@@ -386,6 +396,8 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
OpenContainer(false /* IsNewStore */);
+ GcCtx.DeletedCas(ChunksToDelete);
+
ZEN_INFO("garbage collect from '{}' DONE, collected #{} {} chunks of total #{} {}",
m_Config.RootDirectory / m_ContainerBaseName,
ChunkCount - NewChunkCount,
@@ -399,6 +411,9 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
// Something went wrong, try create a new container
OpenContainer(true /* IsNewStore */);
+
+ GcCtx.DeletedCas(ChunksToDelete);
+ GcCtx.DeletedCas(ChunksToKeep);
}
}
diff --git a/zenstore/filecas.cpp b/zenstore/filecas.cpp
index bfad34c86..2fc968a91 100644
--- a/zenstore/filecas.cpp
+++ b/zenstore/filecas.cpp
@@ -616,6 +616,8 @@ FileCasStrategy::CollectGarbage(GcContext& GcCtx)
ZEN_WARN("failed to delete file for chunk {}: '{}'", Hash, Ec.message());
}
}
+
+ GcCtx.DeletedCas(ChunksToDelete);
}
//////////////////////////////////////////////////////////////////////////
diff --git a/zenstore/gc.cpp b/zenstore/gc.cpp
index bb26af87b..d5cb4901b 100644
--- a/zenstore/gc.cpp
+++ b/zenstore/gc.cpp
@@ -9,6 +9,8 @@
#include <zencore/fmtutils.h>
#include <zencore/logging.h>
#include <zencore/string.h>
+#include <zencore/testing.h>
+#include <zencore/testutils.h>
#include <zencore/timer.h>
#include <zenstore/CAS.h>
#include <zenstore/cidstore.h>
@@ -16,6 +18,12 @@
#include <fmt/format.h>
#include <filesystem>
+#if ZEN_WITH_TESTS
+# include <zencore/compress.h>
+# include <algorithm>
+# include <random>
+#endif
+
namespace zen {
using namespace std::literals;
@@ -60,6 +68,7 @@ struct GcContext::GcState
CacheBuckets m_CacheBuckets;
CasChunkSet m_CasChunks;
+ CasChunkSet m_DeletedCasChunks;
CasChunkSet m_CidChunks;
GcClock::TimePoint m_GcTime;
GcClock::Duration m_MaxCacheDuration = std::chrono::hours(24);
@@ -113,6 +122,24 @@ GcContext::FilterCas(std::span<const IoHash> Cas, std::function<void(const IoHas
m_State->m_CasChunks.FilterChunks(Cas, [&](const IoHash& Hash) { KeepFunc(Hash); });
}
+void
+GcContext::FilterCas(std::span<const IoHash> Cas, std::function<void(const IoHash&, bool)>&& FilterFunc)
+{
+ m_State->m_CasChunks.FilterChunks(Cas, std::move(FilterFunc));
+}
+
+void
+GcContext::DeletedCas(std::span<const IoHash> Cas)
+{
+ m_State->m_DeletedCasChunks.AddChunksToSet(Cas);
+}
+
+CasChunkSet&
+GcContext::DeletedCas()
+{
+ return m_State->m_DeletedCasChunks;
+}
+
std::span<const IoHash>
GcContext::ValidCacheKeys(const std::string& Bucket) const
{
@@ -244,11 +271,10 @@ CasGc::CollectGarbage(GcContext& GcCtx)
if (CidStore* CidStore = m_CidStore)
{
std::vector<IoHash> CasHashes;
+ uint64_t UnknownChunks = 0;
- int UnknownChunks = 0;
-
- GcCtx.IterateCids([&](const IoHash& Hash) {
- IoHash Cas = CidStore->RemapCid(Hash);
+ GcCtx.IterateCids([&](const IoHash& Cid) {
+ IoHash Cas = CidStore->RemapCid(Cid);
if (Cas == IoHash::Zero)
{
@@ -274,6 +300,11 @@ CasGc::CollectGarbage(GcContext& GcCtx)
{
Storage->CollectGarbage(GcCtx);
}
+
+ if (CidStore* CidStore = m_CidStore)
+ {
+ CidStore->RemoveCids(GcCtx.DeletedCas());
+ }
}
void
@@ -510,4 +541,66 @@ GcScheduler::NextGcTime(GcClock::TimePoint CurrentTime)
//////////////////////////////////////////////////////////////////////////
+#if ZEN_WITH_TESTS
+
+namespace {
+ IoHash CreateKey(size_t KeyValue) { return IoHash::HashBuffer(&KeyValue, sizeof(size_t)); }
+
+ static IoBuffer CreateChunk(uint64_t Size)
+ {
+ static std::random_device rd;
+ static std::mt19937 g(rd());
+
+ const size_t Count = static_cast<size_t>(Size / sizeof(uint32_t));
+ std::vector<uint32_t> Values;
+ Values.resize(Count);
+ for (size_t Idx = 0; Idx < Count; ++Idx)
+ {
+ Values[Idx] = static_cast<uint32_t>(Idx);
+ }
+ std::shuffle(Values.begin(), Values.end(), g);
+
+ return IoBufferBuilder::MakeCloneFromMemory(Values.data(), Values.size() * sizeof(uint32_t));
+ }
+
+ static CompressedBuffer Compress(IoBuffer Buffer)
+ {
+ return CompressedBuffer::Compress(SharedBuffer::MakeView(Buffer.GetData(), Buffer.GetSize()));
+ }
+} // namespace
+
+TEST_CASE("gc.basic")
+{
+ ScopedTemporaryDirectory TempDir;
+
+ CasStoreConfiguration CasConfig;
+ CasConfig.RootDirectory = TempDir.Path() / "cas";
+
+ CasGc Gc;
+ std::unique_ptr<CasStore> CasStore = CreateCasStore(Gc);
+ CidStore CidStore{*CasStore, TempDir.Path() / "cid"};
+
+ CasStore->Initialize(CasConfig);
+ Gc.SetCidStore(&CidStore);
+
+ IoBuffer Chunk = CreateChunk(128);
+ auto CompressedChunk = Compress(Chunk);
+
+ const auto InsertResult = CidStore.AddChunk(CompressedChunk);
+
+ GcContext GcCtx;
+ GcCtx.CollectSmallObjects(true);
+
+ Gc.CollectGarbage(GcCtx);
+
+ CHECK(!CidStore.ContainsChunk(InsertResult.DecompressedId));
+}
+
+#endif
+
+void
+gc_forcelink()
+{
+}
+
} // namespace zen
diff --git a/zenstore/include/zenstore/CAS.h b/zenstore/include/zenstore/CAS.h
index 72b750d6c..5f1565f81 100644
--- a/zenstore/include/zenstore/CAS.h
+++ b/zenstore/include/zenstore/CAS.h
@@ -60,6 +60,14 @@ public:
}
}
+ inline void FilterChunks(std::span<const IoHash> Candidates, std::invocable<const IoHash&, bool> auto MatchFunc)
+ {
+ for (const IoHash& Candidate : Candidates)
+ {
+ MatchFunc(Candidate, ContainsChunk(Candidate));
+ }
+ }
+
private:
// Q: should we protect this with a lock, or is that a higher level concern?
std::unordered_set<IoHash> m_ChunkSet;
diff --git a/zenstore/include/zenstore/cidstore.h b/zenstore/include/zenstore/cidstore.h
index 4dd83f24e..a8cb87f40 100644
--- a/zenstore/include/zenstore/cidstore.h
+++ b/zenstore/include/zenstore/cidstore.h
@@ -54,6 +54,7 @@ public:
bool ContainsChunk(const IoHash& DecompressedId);
void Flush();
void Scrub(ScrubContext& Ctx);
+ void RemoveCids(CasChunkSet& CasChunks);
CasStoreSize CasSize() const;
// TODO: add batch filter support
diff --git a/zenstore/include/zenstore/gc.h b/zenstore/include/zenstore/gc.h
index fe93456c6..9b0025403 100644
--- a/zenstore/include/zenstore/gc.h
+++ b/zenstore/include/zenstore/gc.h
@@ -22,6 +22,7 @@ class logger;
namespace zen {
class CasStore;
+class CasChunkSet;
class CasGc;
class CidStore;
struct IoHash;
@@ -57,6 +58,10 @@ public:
void FilterCids(std::span<const IoHash> Cid, std::function<void(const IoHash&)> KeepFunc);
void FilterCas(std::span<const IoHash> Cas, std::function<void(const IoHash&)> KeepFunc);
+ void FilterCas(std::span<const IoHash> Cas, std::function<void(const IoHash&, bool)>&& FilterFunc);
+
+ void DeletedCas(std::span<const IoHash> Cas);
+ CasChunkSet& DeletedCas();
std::span<const IoHash> ValidCacheKeys(const std::string& Bucket) const;
std::span<const IoHash> ExpiredCacheKeys(const std::string& Bucket) const;
@@ -204,4 +209,6 @@ private:
std::optional<TriggerParams> m_TriggerParams;
};
+void gc_forcelink();
+
} // namespace zen
diff --git a/zenstore/zenstore.cpp b/zenstore/zenstore.cpp
index 9fdf2dccf..337a1c75b 100644
--- a/zenstore/zenstore.cpp
+++ b/zenstore/zenstore.cpp
@@ -4,6 +4,7 @@
#include <zenstore/CAS.h>
#include <zenstore/basicfile.h>
+#include <zenstore/gc.h>
#include "compactcas.h"
#include "filecas.h"
@@ -16,6 +17,7 @@ zenstore_forcelinktests()
CAS_forcelink();
filecas_forcelink();
compactcas_forcelink();
+ gc_forcelink();
}
} // namespace zen