diff options
| author | Per Larsson <[email protected]> | 2021-12-13 19:46:36 +0100 |
|---|---|---|
| committer | Per Larsson <[email protected]> | 2021-12-13 19:46:36 +0100 |
| commit | 3e666bec6605931114c1d78d48bffeeb75e3e61b (patch) | |
| tree | c8db57d3f2f7f9d6947ad6a6f8ac0b38289e4f77 | |
| parent | Fixed bug in z$ GC. (diff) | |
| download | zen-3e666bec6605931114c1d78d48bffeeb75e3e61b.tar.xz zen-3e666bec6605931114c1d78d48bffeeb75e3e61b.zip | |
Remove Cid to CAS chunk mapping after GC.
| -rw-r--r-- | zenstore/cidstore.cpp | 27 | ||||
| -rw-r--r-- | zenstore/compactcas.cpp | 29 | ||||
| -rw-r--r-- | zenstore/filecas.cpp | 2 | ||||
| -rw-r--r-- | zenstore/gc.cpp | 101 | ||||
| -rw-r--r-- | zenstore/include/zenstore/CAS.h | 8 | ||||
| -rw-r--r-- | zenstore/include/zenstore/cidstore.h | 1 | ||||
| -rw-r--r-- | zenstore/include/zenstore/gc.h | 7 | ||||
| -rw-r--r-- | zenstore/zenstore.cpp | 2 |
8 files changed, 166 insertions, 11 deletions
diff --git a/zenstore/cidstore.cpp b/zenstore/cidstore.cpp index 4ddf34c79..33dc216b5 100644 --- a/zenstore/cidstore.cpp +++ b/zenstore/cidstore.cpp @@ -233,6 +233,27 @@ struct CidStore::Impl Ctx.ReportBadCasChunks(BadChunks); } + void RemoveCids(CasChunkSet& CasChunks) + { + RwLock::ExclusiveLockScope _(m_Lock); + + for (auto It = m_CidMap.begin(), End = m_CidMap.end(); It != End;) + { + if (CasChunks.ContainsChunk(It->second)) + { + const IoHash& BadHash = It->first; + + // Log a tombstone record + LogMapping(BadHash, IoHash::Zero); + It = m_CidMap.erase(It); + } + else + { + ++It; + } + } + } + uint64_t m_LastScrubTime = 0; }; @@ -289,6 +310,12 @@ CidStore::Scrub(ScrubContext& Ctx) m_Impl->Scrub(Ctx); } +void +CidStore::RemoveCids(CasChunkSet& CasChunks) +{ + m_Impl->RemoveCids(CasChunks); +} + CasStoreSize CidStore::CasSize() const { diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index 6149873ad..d4d29c179 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -268,7 +268,8 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) Flush(); std::vector<IoHash> Candidates; - std::vector<IoHash> Keep; + std::vector<IoHash> ChunksToKeep; + std::vector<IoHash> ChunksToDelete; const uint64_t ChunkCount = m_LocationMap.size(); uint64_t TotalSize{}; @@ -280,10 +281,19 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) TotalSize += Entry.second.GetSize(); } - Keep.reserve(Candidates.size()); - GcCtx.FilterCas(Candidates, [&](const IoHash& Hash) { Keep.push_back(Hash); }); + ChunksToKeep.reserve(Candidates.size()); + GcCtx.FilterCas(Candidates, [&ChunksToKeep, &ChunksToDelete](const IoHash& Hash, bool Keep) { + if (Keep) + { + ChunksToKeep.push_back(Hash); + } + else + { + ChunksToDelete.push_back(Hash); + } + }); - if (m_LocationMap.empty() || Keep.size() == m_LocationMap.size()) + if (m_LocationMap.empty() || ChunksToKeep.size() == m_LocationMap.size()) { ZEN_INFO("garbage collect DONE, scanned #{} {} chunks from '{}', nothing to delete", ChunkCount, @@ -292,10 +302,10 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) return; } - const uint64_t NewChunkCount = Keep.size(); + const uint64_t NewChunkCount = ChunksToKeep.size(); uint64_t NewTotalSize = 0; - for (const IoHash& Key : Keep) + for (const IoHash& Key : ChunksToKeep) { const CasDiskLocation& Loc = m_LocationMap[Key]; NewTotalSize += Loc.GetSize(); @@ -347,7 +357,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) std::vector<uint8_t> Chunk; uint64_t NextInsertOffset{}; - for (const IoHash& Key : Keep) + for (const IoHash& Key : ChunksToKeep) { const auto Entry = m_LocationMap.find(Key); const auto& Loc = Entry->second; @@ -386,6 +396,8 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) OpenContainer(false /* IsNewStore */); + GcCtx.DeletedCas(ChunksToDelete); + ZEN_INFO("garbage collect from '{}' DONE, collected #{} {} chunks of total #{} {}", m_Config.RootDirectory / m_ContainerBaseName, ChunkCount - NewChunkCount, @@ -399,6 +411,9 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) // Something went wrong, try create a new container OpenContainer(true /* IsNewStore */); + + GcCtx.DeletedCas(ChunksToDelete); + GcCtx.DeletedCas(ChunksToKeep); } } diff --git a/zenstore/filecas.cpp b/zenstore/filecas.cpp index bfad34c86..2fc968a91 100644 --- a/zenstore/filecas.cpp +++ b/zenstore/filecas.cpp @@ -616,6 +616,8 @@ FileCasStrategy::CollectGarbage(GcContext& GcCtx) ZEN_WARN("failed to delete file for chunk {}: '{}'", Hash, Ec.message()); } } + + GcCtx.DeletedCas(ChunksToDelete); } ////////////////////////////////////////////////////////////////////////// diff --git a/zenstore/gc.cpp b/zenstore/gc.cpp index bb26af87b..d5cb4901b 100644 --- a/zenstore/gc.cpp +++ b/zenstore/gc.cpp @@ -9,6 +9,8 @@ #include <zencore/fmtutils.h> #include <zencore/logging.h> #include <zencore/string.h> +#include <zencore/testing.h> +#include <zencore/testutils.h> #include <zencore/timer.h> #include <zenstore/CAS.h> #include <zenstore/cidstore.h> @@ -16,6 +18,12 @@ #include <fmt/format.h> #include <filesystem> +#if ZEN_WITH_TESTS +# include <zencore/compress.h> +# include <algorithm> +# include <random> +#endif + namespace zen { using namespace std::literals; @@ -60,6 +68,7 @@ struct GcContext::GcState CacheBuckets m_CacheBuckets; CasChunkSet m_CasChunks; + CasChunkSet m_DeletedCasChunks; CasChunkSet m_CidChunks; GcClock::TimePoint m_GcTime; GcClock::Duration m_MaxCacheDuration = std::chrono::hours(24); @@ -113,6 +122,24 @@ GcContext::FilterCas(std::span<const IoHash> Cas, std::function<void(const IoHas m_State->m_CasChunks.FilterChunks(Cas, [&](const IoHash& Hash) { KeepFunc(Hash); }); } +void +GcContext::FilterCas(std::span<const IoHash> Cas, std::function<void(const IoHash&, bool)>&& FilterFunc) +{ + m_State->m_CasChunks.FilterChunks(Cas, std::move(FilterFunc)); +} + +void +GcContext::DeletedCas(std::span<const IoHash> Cas) +{ + m_State->m_DeletedCasChunks.AddChunksToSet(Cas); +} + +CasChunkSet& +GcContext::DeletedCas() +{ + return m_State->m_DeletedCasChunks; +} + std::span<const IoHash> GcContext::ValidCacheKeys(const std::string& Bucket) const { @@ -244,11 +271,10 @@ CasGc::CollectGarbage(GcContext& GcCtx) if (CidStore* CidStore = m_CidStore) { std::vector<IoHash> CasHashes; + uint64_t UnknownChunks = 0; - int UnknownChunks = 0; - - GcCtx.IterateCids([&](const IoHash& Hash) { - IoHash Cas = CidStore->RemapCid(Hash); + GcCtx.IterateCids([&](const IoHash& Cid) { + IoHash Cas = CidStore->RemapCid(Cid); if (Cas == IoHash::Zero) { @@ -274,6 +300,11 @@ CasGc::CollectGarbage(GcContext& GcCtx) { Storage->CollectGarbage(GcCtx); } + + if (CidStore* CidStore = m_CidStore) + { + CidStore->RemoveCids(GcCtx.DeletedCas()); + } } void @@ -510,4 +541,66 @@ GcScheduler::NextGcTime(GcClock::TimePoint CurrentTime) ////////////////////////////////////////////////////////////////////////// +#if ZEN_WITH_TESTS + +namespace { + IoHash CreateKey(size_t KeyValue) { return IoHash::HashBuffer(&KeyValue, sizeof(size_t)); } + + static IoBuffer CreateChunk(uint64_t Size) + { + static std::random_device rd; + static std::mt19937 g(rd()); + + const size_t Count = static_cast<size_t>(Size / sizeof(uint32_t)); + std::vector<uint32_t> Values; + Values.resize(Count); + for (size_t Idx = 0; Idx < Count; ++Idx) + { + Values[Idx] = static_cast<uint32_t>(Idx); + } + std::shuffle(Values.begin(), Values.end(), g); + + return IoBufferBuilder::MakeCloneFromMemory(Values.data(), Values.size() * sizeof(uint32_t)); + } + + static CompressedBuffer Compress(IoBuffer Buffer) + { + return CompressedBuffer::Compress(SharedBuffer::MakeView(Buffer.GetData(), Buffer.GetSize())); + } +} // namespace + +TEST_CASE("gc.basic") +{ + ScopedTemporaryDirectory TempDir; + + CasStoreConfiguration CasConfig; + CasConfig.RootDirectory = TempDir.Path() / "cas"; + + CasGc Gc; + std::unique_ptr<CasStore> CasStore = CreateCasStore(Gc); + CidStore CidStore{*CasStore, TempDir.Path() / "cid"}; + + CasStore->Initialize(CasConfig); + Gc.SetCidStore(&CidStore); + + IoBuffer Chunk = CreateChunk(128); + auto CompressedChunk = Compress(Chunk); + + const auto InsertResult = CidStore.AddChunk(CompressedChunk); + + GcContext GcCtx; + GcCtx.CollectSmallObjects(true); + + Gc.CollectGarbage(GcCtx); + + CHECK(!CidStore.ContainsChunk(InsertResult.DecompressedId)); +} + +#endif + +void +gc_forcelink() +{ +} + } // namespace zen diff --git a/zenstore/include/zenstore/CAS.h b/zenstore/include/zenstore/CAS.h index 72b750d6c..5f1565f81 100644 --- a/zenstore/include/zenstore/CAS.h +++ b/zenstore/include/zenstore/CAS.h @@ -60,6 +60,14 @@ public: } } + inline void FilterChunks(std::span<const IoHash> Candidates, std::invocable<const IoHash&, bool> auto MatchFunc) + { + for (const IoHash& Candidate : Candidates) + { + MatchFunc(Candidate, ContainsChunk(Candidate)); + } + } + private: // Q: should we protect this with a lock, or is that a higher level concern? std::unordered_set<IoHash> m_ChunkSet; diff --git a/zenstore/include/zenstore/cidstore.h b/zenstore/include/zenstore/cidstore.h index 4dd83f24e..a8cb87f40 100644 --- a/zenstore/include/zenstore/cidstore.h +++ b/zenstore/include/zenstore/cidstore.h @@ -54,6 +54,7 @@ public: bool ContainsChunk(const IoHash& DecompressedId); void Flush(); void Scrub(ScrubContext& Ctx); + void RemoveCids(CasChunkSet& CasChunks); CasStoreSize CasSize() const; // TODO: add batch filter support diff --git a/zenstore/include/zenstore/gc.h b/zenstore/include/zenstore/gc.h index fe93456c6..9b0025403 100644 --- a/zenstore/include/zenstore/gc.h +++ b/zenstore/include/zenstore/gc.h @@ -22,6 +22,7 @@ class logger; namespace zen { class CasStore; +class CasChunkSet; class CasGc; class CidStore; struct IoHash; @@ -57,6 +58,10 @@ public: void FilterCids(std::span<const IoHash> Cid, std::function<void(const IoHash&)> KeepFunc); void FilterCas(std::span<const IoHash> Cas, std::function<void(const IoHash&)> KeepFunc); + void FilterCas(std::span<const IoHash> Cas, std::function<void(const IoHash&, bool)>&& FilterFunc); + + void DeletedCas(std::span<const IoHash> Cas); + CasChunkSet& DeletedCas(); std::span<const IoHash> ValidCacheKeys(const std::string& Bucket) const; std::span<const IoHash> ExpiredCacheKeys(const std::string& Bucket) const; @@ -204,4 +209,6 @@ private: std::optional<TriggerParams> m_TriggerParams; }; +void gc_forcelink(); + } // namespace zen diff --git a/zenstore/zenstore.cpp b/zenstore/zenstore.cpp index 9fdf2dccf..337a1c75b 100644 --- a/zenstore/zenstore.cpp +++ b/zenstore/zenstore.cpp @@ -4,6 +4,7 @@ #include <zenstore/CAS.h> #include <zenstore/basicfile.h> +#include <zenstore/gc.h> #include "compactcas.h" #include "filecas.h" @@ -16,6 +17,7 @@ zenstore_forcelinktests() CAS_forcelink(); filecas_forcelink(); compactcas_forcelink(); + gc_forcelink(); } } // namespace zen |