diff options
| author | Dan Engelbrecht <[email protected]> | 2024-10-03 16:42:57 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2024-10-03 16:42:57 +0200 |
| commit | b13b5f48bb497aaf9f9f3d74aceb6e474cf12898 (patch) | |
| tree | 24b1ed63ece11fb773a0ecf41ce6308969468198 /src | |
| parent | 5.5.9-pre0 (diff) | |
| download | zen-b13b5f48bb497aaf9f9f3d74aceb6e474cf12898.tar.xz zen-b13b5f48bb497aaf9f9f3d74aceb6e474cf12898.zip | |
remove gc v1 (#121)
* kill gc v1
* block use of gc v1 from zen command line
* warn and flip to gcv2 if --gc-v2=false is specified for zenserver
Diffstat (limited to 'src')
| -rw-r--r-- | src/zen/cmds/admin_cmd.cpp | 14 | ||||
| -rw-r--r-- | src/zenserver/admin/admin.cpp | 4 | ||||
| -rw-r--r-- | src/zenserver/config.cpp | 5 | ||||
| -rw-r--r-- | src/zenserver/projectstore/projectstore.cpp | 286 | ||||
| -rw-r--r-- | src/zenserver/projectstore/projectstore.h | 6 | ||||
| -rw-r--r-- | src/zenserver/zenserver.cpp | 2 | ||||
| -rw-r--r-- | src/zenstore/blockstore.cpp | 648 | ||||
| -rw-r--r-- | src/zenstore/cache/cachedisklayer.cpp | 504 | ||||
| -rw-r--r-- | src/zenstore/cache/structuredcachestore.cpp | 300 | ||||
| -rw-r--r-- | src/zenstore/cas.cpp | 11 | ||||
| -rw-r--r-- | src/zenstore/cas.h | 2 | ||||
| -rw-r--r-- | src/zenstore/compactcas.cpp | 639 | ||||
| -rw-r--r-- | src/zenstore/compactcas.h | 1 | ||||
| -rw-r--r-- | src/zenstore/filecas.cpp | 170 | ||||
| -rw-r--r-- | src/zenstore/filecas.h | 1 | ||||
| -rw-r--r-- | src/zenstore/gc.cpp | 525 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/blockstore.h | 23 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/cache/cachedisklayer.h | 4 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/cache/structuredcachestore.h | 6 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/gc.h | 84 |
20 files changed, 115 insertions, 3120 deletions
diff --git a/src/zen/cmds/admin_cmd.cpp b/src/zen/cmds/admin_cmd.cpp index d7fc1710d..3b24b9078 100644 --- a/src/zen/cmds/admin_cmd.cpp +++ b/src/zen/cmds/admin_cmd.cpp @@ -100,8 +100,12 @@ GcCommand::GcCommand() "Max disk usage size (in bytes)", cxxopts::value(m_DiskSizeSoftLimit)->default_value("0"), "<disksizesoftlimit>"); - m_Options - .add_option("", "", "usegcv1", "Force use of GC version 1", cxxopts::value(m_ForceUseGCV1)->default_value("false"), "<usegcv2>"); + m_Options.add_option("", + "", + "usegcv1", + "Force use of GC version 1. Deprecated, will do nothing.", + cxxopts::value(m_ForceUseGCV1)->default_value("false"), + "<usegcv1>"); m_Options .add_option("", "", "usegcv2", "Force use of GC version 2", cxxopts::value(m_ForceUseGCV2)->default_value("false"), "<usegcv2>"); m_Options.add_option("", @@ -182,11 +186,7 @@ GcCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) Params.Add({"skipdelete", m_SkipDelete ? "true" : "false"}); if (m_ForceUseGCV1) { - if (m_ForceUseGCV2) - { - throw OptionParseException("only usegcv1 or usegcv2 can be selected, not both"); - } - Params.Add({"forceusegcv1", "true"}); + throw OptionParseException("usegcv1 is deprecated and can no longer be used"); } if (m_ForceUseGCV2) { diff --git a/src/zenserver/admin/admin.cpp b/src/zenserver/admin/admin.cpp index eb49bac51..f6311cf3e 100644 --- a/src/zenserver/admin/admin.cpp +++ b/src/zenserver/admin/admin.cpp @@ -287,7 +287,7 @@ HttpAdminService::HttpAdminService(GcScheduler& Scheduler, Response << "DiskSizeSoftLimit" << NiceBytes(State.Config.DiskSizeSoftLimit); Response << "MinimumFreeDiskSpaceToAllowWrites" << NiceBytes(State.Config.MinimumFreeDiskSpaceToAllowWrites); Response << "LightweightInterval" << ToTimeSpan(State.Config.LightweightInterval); - Response << "UseGCVersion" << ((State.Config.UseGCVersion == GcVersion::kV1) ? "1" : "2"); + Response << "UseGCVersion" << ((State.Config.UseGCVersion == GcVersion::kV1_Deprecated) ? "1" : "2"); Response << "CompactBlockUsageThresholdPercent" << State.Config.CompactBlockUsageThresholdPercent; Response << "Verbose" << State.Config.Verbose; Response << "SingleThreaded" << State.Config.SingleThreaded; @@ -396,7 +396,7 @@ HttpAdminService::HttpAdminService(GcScheduler& Scheduler, if (auto Param = Params.GetValue("forceusegcv1"); Param.empty() == false) { - GcParams.ForceGCVersion = GcVersion::kV1; + GcParams.ForceGCVersion = GcVersion::kV1_Deprecated; } if (auto Param = Params.GetValue("forceusegcv2"); Param.empty() == false) diff --git a/src/zenserver/config.cpp b/src/zenserver/config.cpp index 7466255a9..cda5aca16 100644 --- a/src/zenserver/config.cpp +++ b/src/zenserver/config.cpp @@ -171,6 +171,11 @@ ValidateOptions(ZenServerOptions& ServerOptions) throw zen::OptionParseException( fmt::format("GC attachment pass count can not be larger than {}", ZenGcConfig::GcMaxAttachmentPassCount)); } + if (ServerOptions.GcConfig.UseGCV2 == false) + { + ZEN_WARN("--gc-v2=false is deprecated, reverting to --gc-v2=true"); + ServerOptions.GcConfig.UseGCV2 = true; + } } UpstreamCachePolicy diff --git a/src/zenserver/projectstore/projectstore.cpp b/src/zenserver/projectstore/projectstore.cpp index 4a943a565..6dbdb7029 100644 --- a/src/zenserver/projectstore/projectstore.cpp +++ b/src/zenserver/projectstore/projectstore.cpp @@ -1082,30 +1082,6 @@ ProjectStore::Oplog::ScrubStorage(ScrubContext& Ctx) } } -void -ProjectStore::Oplog::GatherReferences(GcContext& GcCtx) -{ - ZEN_TRACE_CPU("Store::Oplog::GatherReferences"); - if (GcCtx.SkipCid()) - { - return; - } - - std::vector<IoHash> Cids; - Cids.reserve(1024); - IterateOplog( - [&](CbObjectView Op) { - Op.IterateAttachments([&](CbFieldView Visitor) { Cids.emplace_back(Visitor.AsAttachment()); }); - if (Cids.size() >= 1024) - { - GcCtx.AddRetainedCids(Cids); - Cids.clear(); - } - }, - Paging{}); - GcCtx.AddRetainedCids(Cids); -} - uint64_t ProjectStore::Oplog::TotalSize(const std::filesystem::path& BasePath) { @@ -3132,43 +3108,6 @@ ProjectStore::Project::ScrubStorage(ScrubContext& Ctx) }); } -void -ProjectStore::Project::GatherReferences(GcContext& GcCtx) -{ - ZEN_TRACE_CPU("Project::GatherReferences"); - - Stopwatch Timer; - const auto Guard = MakeGuard([&] { - ZEN_DEBUG("gathered references from project store project {} in {}", Identifier, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); - }); - - // GatherReferences needs to check all existing oplogs - std::vector<std::string> OpLogs = ScanForOplogs(); - for (const std::string& OpLogId : OpLogs) - { - OpenOplog(OpLogId, /*AllowCompact*/ false); - } - - { - // Make sure any oplog at least have a last access time so they eventually will be GC:d if not touched - RwLock::ExclusiveLockScope _(m_LastAccessTimesLock); - for (const std::string& OpId : OpLogs) - { - if (auto It = m_LastAccessTimes.find(OpId); It == m_LastAccessTimes.end()) - { - m_LastAccessTimes[OpId] = GcClock::TickCount(); - } - } - } - - IterateOplogs([&](const RwLock::SharedLockScope&, Oplog& Ops) { - if (!IsExpired(GcCtx.ProjectStoreExpireTime(), Ops)) - { - Ops.GatherReferences(GcCtx); - } - }); -} - uint64_t ProjectStore::Project::TotalSize(const std::filesystem::path& BasePath) { @@ -3407,7 +3346,6 @@ ProjectStore::ProjectStore(CidStore& Store, std::filesystem::path BasePath, GcMa { ZEN_INFO("initializing project store at '{}'", m_ProjectBasePath); // m_Log.set_level(spdlog::level::debug); - m_Gc.AddGcContributor(this); m_Gc.AddGcStorage(this); m_Gc.AddGcReferencer(*this); m_Gc.AddGcReferenceLocker(*this); @@ -3419,7 +3357,6 @@ ProjectStore::~ProjectStore() m_Gc.RemoveGcReferenceLocker(*this); m_Gc.RemoveGcReferencer(*this); m_Gc.RemoveGcStorage(this); - m_Gc.RemoveGcContributor(this); } std::filesystem::path @@ -3521,145 +3458,6 @@ ProjectStore::ScrubStorage(ScrubContext& Ctx) } } -void -ProjectStore::GatherReferences(GcContext& GcCtx) -{ - ZEN_TRACE_CPU("Store::GatherReferences"); - - size_t ProjectCount = 0; - size_t ExpiredProjectCount = 0; - Stopwatch Timer; - const auto Guard = MakeGuard([&] { - ZEN_DEBUG("gathered references from '{}' in {}, found {} active projects and {} expired projects", - m_ProjectBasePath.string(), - NiceTimeSpanMs(Timer.GetElapsedTimeMs()), - ProjectCount, - ExpiredProjectCount); - }); - - DiscoverProjects(); - - std::vector<Ref<Project>> Projects; - { - RwLock::SharedLockScope Lock(m_ProjectsLock); - Projects.reserve(m_Projects.size()); - - for (auto& Kv : m_Projects) - { - if (Kv.second->IsExpired(GcCtx.ProjectStoreExpireTime())) - { - ExpiredProjectCount++; - continue; - } - Projects.push_back(Kv.second); - } - } - ProjectCount = Projects.size(); - for (const Ref<Project>& Project : Projects) - { - Project->GatherReferences(GcCtx); - } -} - -void -ProjectStore::CollectGarbage(GcContext& GcCtx) -{ - ZEN_TRACE_CPU("Store::CollectGarbage"); - - size_t ProjectCount = 0; - size_t ExpiredProjectCount = 0; - - Stopwatch Timer; - const auto Guard = MakeGuard([&] { - ZEN_DEBUG("garbage collect from '{}' DONE after {}, found {} active projects and {} expired projects", - m_ProjectBasePath.string(), - NiceTimeSpanMs(Timer.GetElapsedTimeMs()), - ProjectCount, - ExpiredProjectCount); - }); - std::vector<Ref<Project>> ExpiredProjects; - std::vector<Ref<Project>> Projects; - - { - RwLock::SharedLockScope Lock(m_ProjectsLock); - for (auto& Kv : m_Projects) - { - if (Kv.second->IsExpired(GcCtx.ProjectStoreExpireTime())) - { - ExpiredProjects.push_back(Kv.second); - ExpiredProjectCount++; - continue; - } - Projects.push_back(Kv.second); - ProjectCount++; - } - } - - if (!GcCtx.IsDeletionMode()) - { - ZEN_DEBUG("garbage collect DISABLED, for '{}' ", m_ProjectBasePath.string()); - return; - } - - for (const Ref<Project>& Project : Projects) - { - std::vector<std::string> ExpiredOplogs; - { - Project->IterateOplogs([&GcCtx, &Project, &ExpiredOplogs](const RwLock::SharedLockScope&, ProjectStore::Oplog& Oplog) { - if (Project->IsExpired(GcCtx.ProjectStoreExpireTime(), Oplog)) - { - ExpiredOplogs.push_back(Oplog.OplogId()); - } - }); - } - for (const std::string& OplogId : ExpiredOplogs) - { - ZEN_DEBUG("ProjectStore::CollectGarbage garbage collected oplog '{}' in project '{}'. Removing storage on disk", - OplogId, - Project->Identifier); - (void)Project->DeleteOplog(OplogId); - } - Project->Flush(); - } - - if (ExpiredProjects.empty()) - { - ZEN_DEBUG("garbage collect for '{}', no expired projects found", m_ProjectBasePath.string()); - return; - } - - for (const Ref<Project>& Project : ExpiredProjects) - { - std::filesystem::path PathToRemove; - std::string ProjectId; - { - if (!Project->IsExpired(GcCtx.ProjectStoreExpireTime())) - { - ZEN_DEBUG("ProjectStore::CollectGarbage skipped garbage collect of project '{}'. Project no longer expired.", ProjectId); - continue; - } - - RwLock::ExclusiveLockScope _(m_ProjectsLock); - bool Success = Project->PrepareForDelete(PathToRemove); - if (!Success) - { - ZEN_DEBUG("ProjectStore::CollectGarbage skipped garbage collect of project '{}'. Project folder is locked.", ProjectId); - continue; - } - m_Projects.erase(Project->Identifier); - ProjectId = Project->Identifier; - } - - ZEN_DEBUG("ProjectStore::CollectGarbage garbage collected project '{}'. Removing storage on disk", ProjectId); - if (PathToRemove.empty()) - { - continue; - } - - DeleteDirectories(PathToRemove); - } -} - GcStorageSize ProjectStore::StorageSize() const { @@ -6079,90 +5877,6 @@ TEST_CASE("project.store.gc") } } - SUBCASE("v1") - { - { - GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); - ProjectStore.GatherReferences(GcCtx); - size_t RefCount = 0; - GcCtx.IterateCids([&RefCount](const IoHash&) { RefCount++; }); - CHECK(RefCount == 21); - ProjectStore.CollectGarbage(GcCtx); - CHECK(ProjectStore.OpenProject("proj1"sv)); - CHECK(ProjectStore.OpenProject("proj2"sv)); - } - - { - GcContext GcCtx(GcClock::Now() + std::chrono::hours(24), GcClock::Now() + std::chrono::hours(24)); - ProjectStore.GatherReferences(GcCtx); - size_t RefCount = 0; - GcCtx.IterateCids([&RefCount](const IoHash&) { RefCount++; }); - CHECK(RefCount == 21); - ProjectStore.CollectGarbage(GcCtx); - CHECK(ProjectStore.OpenProject("proj1"sv)); - CHECK(ProjectStore.OpenProject("proj2"sv)); - } - - std::filesystem::remove(Project1FilePath); - - { - GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); - ProjectStore.GatherReferences(GcCtx); - size_t RefCount = 0; - GcCtx.IterateCids([&RefCount](const IoHash&) { RefCount++; }); - CHECK(RefCount == 21); - ProjectStore.CollectGarbage(GcCtx); - CHECK(ProjectStore.OpenProject("proj1"sv)); - CHECK(ProjectStore.OpenProject("proj2"sv)); - } - - { - GcContext GcCtx(GcClock::Now() + std::chrono::hours(24), GcClock::Now() + std::chrono::hours(24)); - ProjectStore.GatherReferences(GcCtx); - size_t RefCount = 0; - GcCtx.IterateCids([&RefCount](const IoHash&) { RefCount++; }); - CHECK(RefCount == 14); - ProjectStore.CollectGarbage(GcCtx); - CHECK(!ProjectStore.OpenProject("proj1"sv)); - CHECK(ProjectStore.OpenProject("proj2"sv)); - } - - std::filesystem::remove(Project2Oplog1Path); - { - GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); - ProjectStore.GatherReferences(GcCtx); - size_t RefCount = 0; - GcCtx.IterateCids([&RefCount](const IoHash&) { RefCount++; }); - CHECK(RefCount == 14); - ProjectStore.CollectGarbage(GcCtx); - CHECK(!ProjectStore.OpenProject("proj1"sv)); - CHECK(ProjectStore.OpenProject("proj2"sv)); - } - - { - GcContext GcCtx(GcClock::Now() + std::chrono::hours(24), GcClock::Now() + std::chrono::hours(24)); - ProjectStore.GatherReferences(GcCtx); - size_t RefCount = 0; - GcCtx.IterateCids([&RefCount](const IoHash&) { RefCount++; }); - CHECK(RefCount == 7); - ProjectStore.CollectGarbage(GcCtx); - CHECK(!ProjectStore.OpenProject("proj1"sv)); - CHECK(ProjectStore.OpenProject("proj2"sv)); - } - - std::filesystem::remove(Project2FilePath); - { - GcContext GcCtx(GcClock::Now() + std::chrono::hours(24), GcClock::Now() + std::chrono::hours(24)); - ProjectStore.GatherReferences(GcCtx); - size_t RefCount = 0; - GcCtx.IterateCids([&RefCount](const IoHash&) { RefCount++; }); - CHECK(RefCount == 0); - ProjectStore.CollectGarbage(GcCtx); - CHECK(!ProjectStore.OpenProject("proj1"sv)); - CHECK(!ProjectStore.OpenProject("proj2"sv)); - } - } - SUBCASE("v2") { { diff --git a/src/zenserver/projectstore/projectstore.h b/src/zenserver/projectstore/projectstore.h index 2552f657f..0a5e71da4 100644 --- a/src/zenserver/projectstore/projectstore.h +++ b/src/zenserver/projectstore/projectstore.h @@ -58,7 +58,7 @@ static_assert(IsPow2(sizeof(OplogEntry))); package data split into separate chunks for bulk data, exports and header information. */ -class ProjectStore : public RefCounted, public GcStorage, public GcContributor, public GcReferencer, public GcReferenceLocker +class ProjectStore : public RefCounted, public GcStorage, public GcReferencer, public GcReferenceLocker { struct OplogStorage; @@ -145,7 +145,6 @@ public: LoggerRef Log() { return m_OuterProject->Log(); } void Flush(); void ScrubStorage(ScrubContext& Ctx); - void GatherReferences(GcContext& GcCtx); static uint64_t TotalSize(const std::filesystem::path& BasePath); uint64_t TotalSize() const; @@ -298,7 +297,6 @@ public: void Flush(); void ScrubStorage(ScrubContext& Ctx); LoggerRef Log() const; - void GatherReferences(GcContext& GcCtx); static uint64_t TotalSize(const std::filesystem::path& BasePath); uint64_t TotalSize() const; bool PrepareForDelete(std::filesystem::path& OutDeletePath); @@ -374,9 +372,7 @@ public: LoggerRef Log() { return m_Log; } const std::filesystem::path& BasePath() const { return m_ProjectBasePath; } - virtual void GatherReferences(GcContext& GcCtx) override; virtual void ScrubStorage(ScrubContext& Ctx) override; - virtual void CollectGarbage(GcContext& GcCtx) override; virtual GcStorageSize StorageSize() const override; virtual std::string GetGcName(GcCtx& Ctx) override; diff --git a/src/zenserver/zenserver.cpp b/src/zenserver/zenserver.cpp index 8d8da29b5..124e9ff5f 100644 --- a/src/zenserver/zenserver.cpp +++ b/src/zenserver/zenserver.cpp @@ -313,7 +313,7 @@ ZenServer::Initialize(const ZenServerOptions& ServerOptions, ZenServerState::Zen .DiskSizeSoftLimit = ServerOptions.GcConfig.DiskSizeSoftLimit, .MinimumFreeDiskSpaceToAllowWrites = ServerOptions.GcConfig.MinimumFreeDiskSpaceToAllowWrites, .LightweightInterval = std::chrono::seconds(ServerOptions.GcConfig.LightweightIntervalSeconds), - .UseGCVersion = ServerOptions.GcConfig.UseGCV2 ? GcVersion::kV2 : GcVersion::kV1, + .UseGCVersion = ServerOptions.GcConfig.UseGCV2 ? GcVersion::kV2 : GcVersion::kV1_Deprecated, .CompactBlockUsageThresholdPercent = ServerOptions.GcConfig.CompactBlockUsageThresholdPercent, .Verbose = ServerOptions.GcConfig.Verbose, .SingleThreaded = ServerOptions.GcConfig.SingleThreaded, diff --git a/src/zenstore/blockstore.cpp b/src/zenstore/blockstore.cpp index 2ae51d627..239b9e56b 100644 --- a/src/zenstore/blockstore.cpp +++ b/src/zenstore/blockstore.cpp @@ -659,26 +659,6 @@ BlockStore::WriteChunks(std::span<IoBuffer> Datas, uint32_t Alignment, const Wri } } -BlockStore::ReclaimSnapshotState -BlockStore::GetReclaimSnapshotState() -{ - ReclaimSnapshotState State; - RwLock::SharedLockScope _(m_InsertLock); - for (uint32_t BlockIndex : m_ActiveWriteBlocks) - { - State.m_ActiveWriteBlocks.insert(BlockIndex); - } - if (m_WriteBlock) - { - State.m_ActiveWriteBlocks.insert(m_WriteBlockIndex); - } - for (auto It : m_ChunkBlocks) - { - State.m_BlockIndexes.insert(It.first); - } - return State; -} - IoBuffer BlockStore::TryGetChunk(const BlockStoreLocation& Location) const { @@ -724,429 +704,6 @@ BlockStore::Flush(bool ForceNewBlock) } } -void -BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, - const std::vector<BlockStoreLocation>& ChunkLocations, - const ChunkIndexArray& KeepChunkIndexes, - uint32_t PayloadAlignment, - bool DryRun, - const ReclaimCallback& ChangeCallback, - const ClaimDiskReserveCallback& DiskReserveCallback) -{ - ZEN_TRACE_CPU("BlockStore::ReclaimSpace"); - - uint64_t WriteBlockTimeUs = 0; - uint64_t WriteBlockLongestTimeUs = 0; - uint64_t ReadBlockTimeUs = 0; - uint64_t ReadBlockLongestTimeUs = 0; - uint64_t TotalChunkCount = ChunkLocations.size(); - uint64_t DeletedSize = 0; - uint64_t OldTotalSize = 0; - uint64_t NewTotalSize = 0; - - uint64_t MovedCount = 0; - uint64_t DeletedCount = 0; - - Stopwatch TotalTimer; - const auto _ = MakeGuard([&] { - ZEN_DEBUG( - "reclaim space for '{}' DONE after {}, write lock: {} ({}), read lock: {} ({}), collected {} bytes, deleted {} and moved " - "{} " - "of {} " - "chunks ({}).", - m_BlocksBasePath, - NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs()), - NiceLatencyNs(WriteBlockTimeUs), - NiceLatencyNs(WriteBlockLongestTimeUs), - NiceLatencyNs(ReadBlockTimeUs), - NiceLatencyNs(ReadBlockLongestTimeUs), - NiceBytes(DeletedSize), - DeletedCount, - MovedCount, - TotalChunkCount, - NiceBytes(OldTotalSize)); - }); - - size_t BlockCount = Snapshot.m_BlockIndexes.size(); - if (BlockCount == 0) - { - ZEN_DEBUG("garbage collect for '{}' SKIPPED, no blocks to process", m_BlocksBasePath); - return; - } - - tsl::robin_set<size_t> KeepChunkMap; - KeepChunkMap.reserve(KeepChunkIndexes.size()); - for (size_t KeepChunkIndex : KeepChunkIndexes) - { - KeepChunkMap.insert(KeepChunkIndex); - } - - tsl::robin_map<uint32_t, size_t> BlockIndexToChunkMapIndex; - std::vector<ChunkIndexArray> BlockKeepChunks; - std::vector<ChunkIndexArray> BlockDeleteChunks; - - BlockIndexToChunkMapIndex.reserve(BlockCount); - BlockKeepChunks.reserve(BlockCount); - BlockDeleteChunks.reserve(BlockCount); - size_t GuesstimateCountPerBlock = TotalChunkCount / BlockCount / 2; - - size_t DeleteCount = 0; - for (size_t Index = 0; Index < TotalChunkCount; ++Index) - { - const BlockStoreLocation& Location = ChunkLocations[Index]; - if (!Snapshot.m_BlockIndexes.contains(Location.BlockIndex)) - { - // We did not know about the block when we took the snapshot, don't touch it - continue; - } - OldTotalSize += Location.Size; - auto BlockIndexPtr = BlockIndexToChunkMapIndex.find(Location.BlockIndex); - size_t ChunkMapIndex = 0; - if (BlockIndexPtr == BlockIndexToChunkMapIndex.end()) - { - ChunkMapIndex = BlockKeepChunks.size(); - BlockIndexToChunkMapIndex[Location.BlockIndex] = ChunkMapIndex; - BlockKeepChunks.resize(ChunkMapIndex + 1); - BlockKeepChunks.back().reserve(GuesstimateCountPerBlock); - BlockDeleteChunks.resize(ChunkMapIndex + 1); - BlockDeleteChunks.back().reserve(GuesstimateCountPerBlock); - } - else - { - ChunkMapIndex = BlockIndexPtr->second; - } - - if (KeepChunkMap.contains(Index)) - { - ChunkIndexArray& IndexMap = BlockKeepChunks[ChunkMapIndex]; - IndexMap.push_back(Index); - NewTotalSize += Location.Size; - continue; - } - ChunkIndexArray& IndexMap = BlockDeleteChunks[ChunkMapIndex]; - IndexMap.push_back(Index); - DeleteCount++; - } - - std::vector<uint32_t> BlocksToReWrite; - BlocksToReWrite.reserve(BlockIndexToChunkMapIndex.size()); - for (const auto& Entry : BlockIndexToChunkMapIndex) - { - uint32_t BlockIndex = Entry.first; - size_t ChunkMapIndex = Entry.second; - const ChunkIndexArray& ChunkMap = BlockDeleteChunks[ChunkMapIndex]; - if (ChunkMap.empty()) - { - continue; - } - BlocksToReWrite.push_back(BlockIndex); - } - - { - // Any known block not referenced should be added as well - RwLock::SharedLockScope __(m_InsertLock); - for (std::uint32_t BlockIndex : Snapshot.m_BlockIndexes) - { - if (!m_ChunkBlocks.contains(BlockIndex)) - { - continue; - } - bool WasActiveWriteBlock = Snapshot.m_ActiveWriteBlocks.contains(BlockIndex); - if (WasActiveWriteBlock) - { - continue; - } - if (BlockIndexToChunkMapIndex.contains(BlockIndex)) - { - continue; - } - size_t ChunkMapIndex = ChunkMapIndex = BlockKeepChunks.size(); - BlockIndexToChunkMapIndex[BlockIndex] = ChunkMapIndex; - BlockKeepChunks.resize(ChunkMapIndex + 1); - BlockDeleteChunks.resize(ChunkMapIndex + 1); - BlocksToReWrite.push_back(BlockIndex); - } - } - - if (DryRun) - { - ZEN_DEBUG("garbage collect for '{}' DISABLED, found {} {} chunks of total {} {}", - m_BlocksBasePath, - DeleteCount, - NiceBytes(OldTotalSize - NewTotalSize), - TotalChunkCount, - OldTotalSize); - return; - } - - try - { - ZEN_TRACE_CPU("BlockStore::ReclaimSpace::Compact"); - Ref<BlockStoreFile> NewBlockFile; - auto NewBlockFileGuard = MakeGuard([&]() { - if (NewBlockFile && NewBlockFile->IsOpen()) - { - ZEN_DEBUG("dropping incomplete cas block store file '{}'", NewBlockFile->GetPath()); - m_TotalSize.fetch_sub(NewBlockFile->FileSize(), std::memory_order::relaxed); - ZEN_ASSERT_SLOW(NewBlockFile->MetaSize() == 0); - NewBlockFile->MarkAsDeleteOnClose(); - } - }); - - uint64_t WriteOffset = 0; - uint32_t NewBlockIndex = 0; - for (uint32_t BlockIndex : BlocksToReWrite) - { - bool IsActiveWriteBlock = Snapshot.m_ActiveWriteBlocks.contains(BlockIndex); - - const size_t ChunkMapIndex = BlockIndexToChunkMapIndex[BlockIndex]; - - Ref<BlockStoreFile> OldBlockFile; - if (!IsActiveWriteBlock) - { - RwLock::SharedLockScope _i(m_InsertLock); - Stopwatch Timer; - const auto __ = MakeGuard([&] { - uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - WriteBlockTimeUs += ElapsedUs; - WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); - }); - if (auto It = m_ChunkBlocks.find(BlockIndex); It != m_ChunkBlocks.end()) - { - OldBlockFile = It->second; - } - } - - ChunkIndexArray& KeepMap = BlockKeepChunks[ChunkMapIndex]; - if (KeepMap.empty()) - { - ZEN_TRACE_CPU("BlockStore::ReclaimSpace::DeleteBlock"); - - const ChunkIndexArray& DeleteMap = BlockDeleteChunks[ChunkMapIndex]; - for (size_t DeleteIndex : DeleteMap) - { - DeletedSize += ChunkLocations[DeleteIndex].Size; - } - ChangeCallback({}, DeleteMap); - DeletedCount += DeleteMap.size(); - if (OldBlockFile) - { - RwLock::ExclusiveLockScope _i(m_InsertLock); - Stopwatch Timer; - const auto __ = MakeGuard([&] { - uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - ReadBlockTimeUs += ElapsedUs; - ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); - }); - ZEN_DEBUG("marking cas block store file '{}' for delete, block #{}", OldBlockFile->GetPath(), BlockIndex); - ZEN_ASSERT(m_ChunkBlocks[BlockIndex] == OldBlockFile); - m_ChunkBlocks.erase(BlockIndex); - m_TotalSize.fetch_sub(OldBlockFile->TotalSize(), std::memory_order::relaxed); - OldBlockFile->MarkAsDeleteOnClose(); - } - continue; - } - else if (!OldBlockFile && !IsActiveWriteBlock) - { - // If the block file pointed to does not exist, move any keep chunk them to deleted list - ZEN_ERROR("Expected to find block {} in {} - this should never happen, marking {} entries as deleted.", - BlockIndex, - m_BlocksBasePath, - KeepMap.size()); - - BlockDeleteChunks[ChunkMapIndex].insert(BlockDeleteChunks[ChunkMapIndex].end(), KeepMap.begin(), KeepMap.end()); - KeepMap.clear(); - } - else if (OldBlockFile && (OldBlockFile->FileSize() == 0)) - { - // Block created to accommodate missing blocks - ZEN_WARN("Missing block {} in {} - backing data for locations is missing, marking {} entries as deleted.", - BlockIndex, - m_BlocksBasePath, - KeepMap.size()); - - BlockDeleteChunks[ChunkMapIndex].insert(BlockDeleteChunks[ChunkMapIndex].end(), KeepMap.begin(), KeepMap.end()); - KeepMap.clear(); - } - - MovedChunksArray MovedChunks; - if (OldBlockFile) - { - ZEN_TRACE_CPU("BlockStore::ReclaimSpace::MoveBlock"); - - ZEN_INFO("Moving {} chunks from '{}' to new block", KeepMap.size(), GetBlockPath(m_BlocksBasePath, BlockIndex)); - - uint64_t OldBlockSize = OldBlockFile->FileSize(); - std::vector<uint8_t> Chunk; - for (const size_t& ChunkIndex : KeepMap) - { - const BlockStoreLocation ChunkLocation = ChunkLocations[ChunkIndex]; - if (ChunkLocation.Offset + ChunkLocation.Size > OldBlockSize) - { - ZEN_WARN( - "ReclaimSpace skipping chunk outside of block range in '{}', Chunk start {}, Chunk size {} in Block {}, Block " - "size {}", - m_BlocksBasePath, - ChunkLocation.Offset, - ChunkLocation.Size, - OldBlockFile->GetPath(), - OldBlockSize); - continue; - } - Chunk.resize(ChunkLocation.Size); - OldBlockFile->Read(Chunk.data(), ChunkLocation.Size, ChunkLocation.Offset); - - if (!NewBlockFile || (WriteOffset + ChunkLocation.Size > m_MaxBlockSize)) - { - uint32_t NextBlockIndex = m_WriteBlockIndex.load(std::memory_order_relaxed); - - if (NewBlockFile) - { - ZEN_ASSERT_SLOW(NewBlockFile->IsOpen()); - NewBlockFile->Flush(); - NewBlockFile = nullptr; - } - { - ChangeCallback(MovedChunks, {}); - MovedCount += KeepMap.size(); - MovedChunks.clear(); - RwLock::ExclusiveLockScope InsertLock(m_InsertLock); - Stopwatch Timer; - const auto ___ = MakeGuard([&] { - uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - ReadBlockTimeUs += ElapsedUs; - ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); - }); - - std::filesystem::path NewBlockPath; - NextBlockIndex = GetFreeBlockIndex(NextBlockIndex, InsertLock, NewBlockPath); - if (NextBlockIndex == (uint32_t)m_MaxBlockCount) - { - ZEN_ERROR("unable to allocate a new block in '{}', count limit {} exeeded", - m_BlocksBasePath, - static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) + 1); - return; - } - - NewBlockFile = new BlockStoreFile(NewBlockPath); - m_ChunkBlocks[NextBlockIndex] = NewBlockFile; - } - - std::error_code Error; - DiskSpace Space = DiskSpaceInfo(m_BlocksBasePath, Error); - if (Error) - { - ZEN_ERROR("get disk space in '{}' FAILED, reason: '{}'", m_BlocksBasePath, Error.message()); - return; - } - if (Space.Free < m_MaxBlockSize) - { - uint64_t ReclaimedSpace = DiskReserveCallback(); - if (Space.Free + ReclaimedSpace < m_MaxBlockSize) - { - ZEN_WARN("garbage collect for '{}' FAILED, required disk space {}, free {}", - m_BlocksBasePath, - m_MaxBlockSize, - NiceBytes(Space.Free + ReclaimedSpace)); - RwLock::ExclusiveLockScope _l(m_InsertLock); - Stopwatch Timer; - const auto __ = MakeGuard([&] { - uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - ReadBlockTimeUs += ElapsedUs; - ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); - }); - ZEN_ASSERT(m_ChunkBlocks[NextBlockIndex] == NewBlockFile); - ZEN_ASSERT_SLOW(!NewBlockFile->IsOpen()); - m_ChunkBlocks.erase(NextBlockIndex); - return; - } - - ZEN_INFO("using gc reserve for '{}', reclaimed {}, disk free {}", - m_BlocksBasePath, - ReclaimedSpace, - NiceBytes(Space.Free + ReclaimedSpace)); - } - NewBlockFile->Create(m_MaxBlockSize); - NewBlockIndex = NextBlockIndex; - WriteOffset = 0; - } - - NewBlockFile->Write(Chunk.data(), ChunkLocation.Size, WriteOffset); - MovedChunks.push_back( - {ChunkIndex, - {.BlockIndex = NewBlockIndex, .Offset = gsl::narrow<uint32_t>(WriteOffset), .Size = ChunkLocation.Size}}); - uint64_t OldOffset = WriteOffset; - WriteOffset = RoundUp(WriteOffset + ChunkLocation.Size, PayloadAlignment); - m_TotalSize.fetch_add(WriteOffset - OldOffset, std::memory_order::relaxed); - } - Chunk.clear(); - if (NewBlockFile) - { - ZEN_ASSERT_SLOW(NewBlockFile->IsOpen()); - NewBlockFile->Flush(); - } - } - - const ChunkIndexArray& DeleteMap = BlockDeleteChunks[ChunkMapIndex]; - for (size_t DeleteIndex : DeleteMap) - { - DeletedSize += ChunkLocations[DeleteIndex].Size; - } - - ChangeCallback(MovedChunks, DeleteMap); - MovedCount += MovedChunks.size(); - DeletedCount += DeleteMap.size(); - MovedChunks.clear(); - - if (OldBlockFile) - { - RwLock::ExclusiveLockScope __(m_InsertLock); - Stopwatch Timer; - const auto ___ = MakeGuard([&] { - uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - ReadBlockTimeUs += ElapsedUs; - ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); - }); - - ZEN_DEBUG("marking cas block store file '{}' for delete, block #{}", OldBlockFile->GetPath(), BlockIndex); - ZEN_ASSERT(m_ChunkBlocks[BlockIndex] == OldBlockFile); - m_ChunkBlocks.erase(BlockIndex); - m_TotalSize.fetch_sub(OldBlockFile->TotalSize(), std::memory_order::relaxed); - OldBlockFile->MarkAsDeleteOnClose(); - } - } - if (NewBlockFile) - { - ZEN_ASSERT_SLOW(NewBlockFile->IsOpen()); - NewBlockFile->Flush(); - NewBlockFile = nullptr; - } - } - catch (const std::system_error& SystemError) - { - if (IsOOM(SystemError.code())) - { - ZEN_WARN("reclaiming space for '{}' ran out of memory: '{}'", m_BlocksBasePath, SystemError.what()); - } - else if (IsOOD(SystemError.code())) - { - ZEN_WARN("reclaiming space for '{}' ran out of disk space: '{}'", m_BlocksBasePath, SystemError.what()); - } - else - { - ZEN_ERROR("reclaiming space for '{}' failed with system error exception: '{}'", m_BlocksBasePath, SystemError.what()); - } - } - catch (const std::bad_alloc& BadAlloc) - { - ZEN_WARN("reclaiming space for '{}' ran out of memory: '{}'", m_BlocksBasePath, BadAlloc.what()); - } - catch (const std::exception& ex) - { - ZEN_ERROR("reclaiming space for '{}' failed with: '{}'", m_BlocksBasePath, ex.what()); - } -} - bool BlockStore::IterateBlock(std::span<const BlockStoreLocation> ChunkLocations, std::span<const size_t> InChunkIndexes, @@ -1945,53 +1502,6 @@ TEST_CASE("blockstore.chunks") CHECK(ReadChunkAsString(Store, ThirdChunkLocation) == ThirdChunkData); } -TEST_CASE("blockstore.clean.stray.blocks") -{ - using namespace blockstore::impl; - - ScopedTemporaryDirectory TempDir; - auto RootDirectory = TempDir.Path(); - - BlockStore Store; - Store.Initialize(RootDirectory / "store", 128, 1024); - - std::string FirstChunkData = "This is the data of the first chunk that we will write"; - BlockStoreLocation FirstChunkLocation = WriteStringAsChunk(Store, FirstChunkData, 4); - std::string SecondChunkData = "This is the data for the second chunk that we will write"; - BlockStoreLocation SecondChunkLocation = WriteStringAsChunk(Store, SecondChunkData, 4); - std::string ThirdChunkData = - "This is a much longer string that will not fit in the first block so it should be placed in the second block"; - BlockStoreLocation ThirdChunkLocation = WriteStringAsChunk(Store, ThirdChunkData, 4); - - Store.Close(); - - Store.Initialize(RootDirectory / "store", 128, 1024); - CHECK(GetDirectoryContent(RootDirectory / "store", true, false).size() == 2); - IoBuffer ThirdChunk = Store.TryGetChunk(ThirdChunkLocation); - CHECK(ThirdChunk); - - // Reclaim space should delete unreferenced block - Store.ReclaimSpace(Store.GetReclaimSnapshotState(), {FirstChunkLocation, SecondChunkLocation}, {0, 1}, 4, false); - // Block lives on as long as we reference it via ThirdChunk - CHECK(GetDirectoryContent(RootDirectory / "store", true, false).size() == 2); - ThirdChunk = {}; - CHECK(GetDirectoryContent(RootDirectory / "store", true, false).size() == 1); - ThirdChunk = Store.TryGetChunk(ThirdChunkLocation); - CHECK(!ThirdChunk); - - // Recreate a fake block for a missing chunk location - BlockStore::BlockIndexSet KnownBlocks; - KnownBlocks.insert(FirstChunkLocation.BlockIndex); - KnownBlocks.insert(SecondChunkLocation.BlockIndex); - KnownBlocks.insert(ThirdChunkLocation.BlockIndex); - Store.SyncExistingBlocksOnDisk(KnownBlocks); - - // We create a fake block for the location - we should still not be able to get the chunk - CHECK(GetDirectoryContent(RootDirectory / "store", true, false).size() == 2); - ThirdChunk = Store.TryGetChunk(ThirdChunkLocation); - CHECK(!ThirdChunk); -} - TEST_CASE("blockstore.flush.force.new.block") { using namespace blockstore::impl; @@ -2133,125 +1643,6 @@ TEST_CASE("blockstore.iterate.chunks") WorkLatch.Wait(); } -TEST_CASE("blockstore.reclaim.space") -{ - using namespace blockstore::impl; - - ScopedTemporaryDirectory TempDir; - auto RootDirectory = TempDir.Path(); - - BlockStore Store; - Store.Initialize(RootDirectory / "store", 512, 1024); - - constexpr size_t ChunkCount = 200; - constexpr size_t Alignment = 8; - std::vector<BlockStoreLocation> ChunkLocations; - std::vector<IoHash> ChunkHashes; - ChunkLocations.reserve(ChunkCount); - ChunkHashes.reserve(ChunkCount); - for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex) - { - IoBuffer Chunk = CreateRandomBlob(57 + ChunkIndex); - - Store.WriteChunk(Chunk.Data(), Chunk.Size(), Alignment, [&](const BlockStoreLocation& L) { ChunkLocations.push_back(L); }); - ChunkHashes.push_back(IoHash::HashBuffer(Chunk.Data(), Chunk.Size())); - } - - std::vector<size_t> ChunksToKeep; - ChunksToKeep.reserve(ChunkLocations.size()); - for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex) - { - ChunksToKeep.push_back(ChunkIndex); - } - - Store.Flush(/*ForceNewBlock*/ false); - BlockStore::ReclaimSnapshotState State1 = Store.GetReclaimSnapshotState(); - Store.ReclaimSpace(State1, ChunkLocations, ChunksToKeep, Alignment, true); - - // If we keep all the chunks we should not get any callbacks on moved/deleted stuff - Store.ReclaimSpace( - State1, - ChunkLocations, - ChunksToKeep, - Alignment, - false, - [](const BlockStore::MovedChunksArray&, const BlockStore::ChunkIndexArray&) { CHECK(false); }, - []() { - CHECK(false); - return 0; - }); - - size_t DeleteChunkCount = 38; - ChunksToKeep.clear(); - for (size_t ChunkIndex = DeleteChunkCount; ChunkIndex < ChunkCount; ++ChunkIndex) - { - ChunksToKeep.push_back(ChunkIndex); - } - - std::vector<BlockStoreLocation> NewChunkLocations = ChunkLocations; - size_t MovedChunkCount = 0; - size_t DeletedChunkCount = 0; - Store.ReclaimSpace( - State1, - ChunkLocations, - ChunksToKeep, - Alignment, - false, - [&](const BlockStore::MovedChunksArray& MovedChunks, const BlockStore::ChunkIndexArray& DeletedChunks) { - for (const auto& MovedChunk : MovedChunks) - { - CHECK(MovedChunk.first >= DeleteChunkCount); - NewChunkLocations[MovedChunk.first] = MovedChunk.second; - } - MovedChunkCount += MovedChunks.size(); - for (size_t DeletedIndex : DeletedChunks) - { - CHECK(DeletedIndex < DeleteChunkCount); - } - DeletedChunkCount += DeletedChunks.size(); - }, - []() { - CHECK(false); - return 0; - }); - CHECK(MovedChunkCount <= DeleteChunkCount); - CHECK(DeletedChunkCount == DeleteChunkCount); - ChunkLocations = std::vector<BlockStoreLocation>(NewChunkLocations.begin() + DeleteChunkCount, NewChunkLocations.end()); - - for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex) - { - IoBuffer ChunkBlock = Store.TryGetChunk(NewChunkLocations[ChunkIndex]); - if (ChunkIndex >= DeleteChunkCount) - { - IoBuffer VerifyChunk = Store.TryGetChunk(NewChunkLocations[ChunkIndex]); - CHECK(VerifyChunk); - IoHash VerifyHash = IoHash::HashBuffer(VerifyChunk.Data(), VerifyChunk.Size()); - CHECK(VerifyHash == ChunkHashes[ChunkIndex]); - } - } - - // We need to take a new state since reclaim space add new block when compacting - BlockStore::ReclaimSnapshotState State2 = Store.GetReclaimSnapshotState(); - NewChunkLocations = ChunkLocations; - MovedChunkCount = 0; - DeletedChunkCount = 0; - Store.ReclaimSpace( - State2, - ChunkLocations, - {}, - Alignment, - false, - [&](const BlockStore::MovedChunksArray& MovedChunks, const BlockStore::ChunkIndexArray& DeletedChunks) { - CHECK(MovedChunks.empty()); - DeletedChunkCount += DeletedChunks.size(); - }, - []() { - CHECK(false); - return 0; - }); - CHECK(DeletedChunkCount == ChunkCount - DeleteChunkCount); -} - TEST_CASE("blockstore.thread.read.write") { using namespace blockstore::impl; @@ -2411,12 +1802,11 @@ TEST_CASE("blockstore.compact.blocks") } SUBCASE("keep current write block") { - uint64_t PreSize = Store.TotalSize(); - BlockStoreCompactState State; - BlockStore::ReclaimSnapshotState SnapshotState = Store.GetReclaimSnapshotState(); + uint64_t PreSize = Store.TotalSize(); + BlockStoreCompactState State; for (const BlockStoreLocation& Location : ChunkLocations) { - if (SnapshotState.m_ActiveWriteBlocks.contains(Location.BlockIndex)) + if (Store.IsWriting(Location.BlockIndex)) { continue; } @@ -2440,9 +1830,8 @@ TEST_CASE("blockstore.compact.blocks") { Store.Flush(true); - uint64_t PreSize = Store.TotalSize(); - BlockStoreCompactState State; - BlockStore::ReclaimSnapshotState SnapshotState = Store.GetReclaimSnapshotState(); + uint64_t PreSize = Store.TotalSize(); + BlockStoreCompactState State; for (const BlockStoreLocation& Location : ChunkLocations) { State.AddKeepLocation(Location); @@ -2462,11 +1851,10 @@ TEST_CASE("blockstore.compact.blocks") } SUBCASE("drop first block") { - uint64_t PreSize = Store.TotalSize(); - BlockStoreCompactState State; - BlockStore::ReclaimSnapshotState SnapshotState = Store.GetReclaimSnapshotState(); + uint64_t PreSize = Store.TotalSize(); + BlockStoreCompactState State; - CHECK(!SnapshotState.m_ActiveWriteBlocks.contains(0)); + CHECK(!Store.IsWriting(0)); State.IncludeBlock(0); uint64_t FirstBlockSize = 0; @@ -2496,11 +1884,10 @@ TEST_CASE("blockstore.compact.blocks") } SUBCASE("compact first block") { - uint64_t PreSize = Store.TotalSize(); - BlockStoreCompactState State; - BlockStore::ReclaimSnapshotState SnapshotState = Store.GetReclaimSnapshotState(); + uint64_t PreSize = Store.TotalSize(); + BlockStoreCompactState State; - CHECK(!SnapshotState.m_ActiveWriteBlocks.contains(0)); + CHECK(!Store.IsWriting(0)); State.IncludeBlock(0); uint64_t SkipChunkCount = 2; @@ -2555,14 +1942,13 @@ TEST_CASE("blockstore.compact.blocks") } SUBCASE("compact every other item") { - uint64_t PreSize = Store.TotalSize(); - BlockStoreCompactState State; - BlockStore::ReclaimSnapshotState SnapshotState = Store.GetReclaimSnapshotState(); - bool SkipFlag = false; + uint64_t PreSize = Store.TotalSize(); + BlockStoreCompactState State; + bool SkipFlag = false; for (const BlockStoreLocation& Location : ChunkLocations) { - if (SnapshotState.m_ActiveWriteBlocks.contains(Location.BlockIndex)) + if (Store.IsWriting(Location.BlockIndex)) { continue; } @@ -2579,7 +1965,7 @@ TEST_CASE("blockstore.compact.blocks") std::vector<BlockStoreLocation> DroppedLocations; for (const BlockStoreLocation& Location : ChunkLocations) { - if (SnapshotState.m_ActiveWriteBlocks.contains(Location.BlockIndex)) + if (Store.IsWriting(Location.BlockIndex)) { continue; } @@ -2616,7 +2002,7 @@ TEST_CASE("blockstore.compact.blocks") for (size_t Index = 0; Index < ChunkLocations.size(); Index++) { const BlockStoreLocation& Location = ChunkLocations[Index]; - if (SkipFlag && !SnapshotState.m_ActiveWriteBlocks.contains(Location.BlockIndex)) + if (SkipFlag && !Store.IsWriting(Location.BlockIndex)) { CHECK(std::find(DroppedLocations.begin(), DroppedLocations.end(), Location) != DroppedLocations.end()); CHECK(!Store.TryGetChunk(Location)); diff --git a/src/zenstore/cache/cachedisklayer.cpp b/src/zenstore/cache/cachedisklayer.cpp index fed37824a..8a2de34e2 100644 --- a/src/zenstore/cache/cachedisklayer.cpp +++ b/src/zenstore/cache/cachedisklayer.cpp @@ -2293,466 +2293,6 @@ ZenCacheDiskLayer::CacheBucket::ScrubStorage(ScrubContext& Ctx) } } -void -ZenCacheDiskLayer::CacheBucket::GatherReferences(GcContext& GcCtx) -{ - ZEN_TRACE_CPU("Z$::Bucket::GatherReferences"); - -#define CALCULATE_BLOCKING_TIME 0 - -#if CALCULATE_BLOCKING_TIME - uint64_t WriteBlockTimeUs = 0; - uint64_t WriteBlockLongestTimeUs = 0; - uint64_t ReadBlockTimeUs = 0; - uint64_t ReadBlockLongestTimeUs = 0; -#endif // CALCULATE_BLOCKING_TIME - - Stopwatch TotalTimer; - const auto _ = MakeGuard([&] { -#if CALCULATE_BLOCKING_TIME - ZEN_DEBUG("gathered references from '{}' in {} write lock: {} ({}), read lock: {} ({})", - m_BucketDir, - NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs()), - NiceLatencyNs(WriteBlockTimeUs), - NiceLatencyNs(WriteBlockLongestTimeUs), - NiceLatencyNs(ReadBlockTimeUs), - NiceLatencyNs(ReadBlockLongestTimeUs)); -#else - ZEN_DEBUG("gathered references from '{}' in {}", m_BucketDir, NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs())); -#endif // CALCULATE_BLOCKING_TIME - }); - - const GcClock::TimePoint ExpireTime = GcCtx.CacheExpireTime(); - - const GcClock::Tick ExpireTicks = ExpireTime.time_since_epoch().count(); - - IndexMap Index; - std::vector<AccessTime> AccessTimes; - std::vector<BucketPayload> Payloads; - { - RwLock::SharedLockScope __(m_IndexLock); -#if CALCULATE_BLOCKING_TIME - Stopwatch Timer; - const auto ___ = MakeGuard([&] { - uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - WriteBlockTimeUs += ElapsedUs; - WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); - }); -#endif // CALCULATE_BLOCKING_TIME - if (m_Index.empty()) - { - return; - } - Index = m_Index; - AccessTimes = m_AccessTimes; - Payloads = m_Payloads; - } - - std::vector<IoHash> ExpiredKeys; - ExpiredKeys.reserve(1024); - - std::vector<IoHash> Cids; - if (!GcCtx.SkipCid()) - { - Cids.reserve(1024); - } - - std::vector<std::pair<IoHash, size_t>> StructuredItemsWithUnknownAttachments; - - for (const auto& Entry : Index) - { - const IoHash& Key = Entry.first; - size_t PayloadIndex = Entry.second; - GcClock::Tick AccessTime = AccessTimes[PayloadIndex]; - if (AccessTime < ExpireTicks) - { - ExpiredKeys.push_back(Key); - continue; - } - - if (GcCtx.SkipCid()) - { - continue; - } - - BucketPayload& Payload = Payloads[PayloadIndex]; - const DiskLocation& Loc = Payload.Location; - - if (!Loc.IsFlagSet(DiskLocation::kStructured)) - { - continue; - } - StructuredItemsWithUnknownAttachments.push_back(Entry); - } - - for (const auto& Entry : StructuredItemsWithUnknownAttachments) - { - const IoHash& Key = Entry.first; - BucketPayload& Payload = Payloads[Entry.second]; - const DiskLocation& Loc = Payload.Location; - { - IoBuffer Buffer; - if (Loc.IsFlagSet(DiskLocation::kStandaloneFile)) - { - if (Buffer = GetStandaloneCacheValue(Loc, Key); !Buffer) - { - continue; - } - } - else - { - RwLock::SharedLockScope IndexLock(m_IndexLock); -#if CALCULATE_BLOCKING_TIME - Stopwatch Timer; - const auto ___ = MakeGuard([&] { - uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - WriteBlockTimeUs += ElapsedUs; - WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); - }); -#endif // CALCULATE_BLOCKING_TIME - if (auto It = m_Index.find(Key); It != m_Index.end()) - { - const BucketPayload& CachedPayload = m_Payloads[It->second]; - if (CachedPayload.MemCached) - { - Buffer = m_MemCachedPayloads[CachedPayload.MemCached].Payload; - ZEN_ASSERT_SLOW(Buffer); - } - else - { - DiskLocation Location = m_Payloads[It->second].Location; - IndexLock.ReleaseNow(); - Buffer = GetInlineCacheValue(Location); - // Don't memcache items when doing GC - } - } - if (!Buffer) - { - continue; - } - } - - ZEN_ASSERT(Buffer); - ZEN_ASSERT(Buffer.GetContentType() == ZenContentType::kCbObject); - CbObjectView Obj(Buffer.GetData()); - Obj.IterateAttachments([&Cids](CbFieldView Field) { Cids.push_back(Field.AsAttachment()); }); - if (Cids.size() >= 1024) - { - GcCtx.AddRetainedCids(Cids); - Cids.clear(); - } - } - } - - GcCtx.AddRetainedCids(Cids); - GcCtx.SetExpiredCacheKeys(m_BucketDir.string(), std::move(ExpiredKeys)); -} - -void -ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) -{ - ZEN_TRACE_CPU("Z$::Bucket::CollectGarbage"); - - ZEN_DEBUG("collecting garbage from '{}'", m_BucketDir); - - Stopwatch TotalTimer; - uint64_t WriteBlockTimeUs = 0; - uint64_t WriteBlockLongestTimeUs = 0; - uint64_t ReadBlockTimeUs = 0; - uint64_t ReadBlockLongestTimeUs = 0; - uint64_t TotalChunkCount = 0; - uint64_t DeletedSize = 0; - GcStorageSize OldTotalSize = StorageSize(); - - std::unordered_set<IoHash> DeletedChunks; - uint64_t MovedCount = 0; - - const auto _ = MakeGuard([&] { - ZEN_DEBUG( - "garbage collect from '{}' DONE after {}, write lock: {} ({}), read lock: {} ({}), collected {} bytes, deleted {} and moved " - "{} " - "of {} " - "entries ({}/{}).", - m_BucketDir, - NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs()), - NiceLatencyNs(WriteBlockTimeUs), - NiceLatencyNs(WriteBlockLongestTimeUs), - NiceLatencyNs(ReadBlockTimeUs), - NiceLatencyNs(ReadBlockLongestTimeUs), - NiceBytes(DeletedSize), - DeletedChunks.size(), - MovedCount, - TotalChunkCount, - NiceBytes(OldTotalSize.DiskSize), - NiceBytes(OldTotalSize.MemorySize)); - - bool Expected = false; - if (m_IsFlushing || !m_IsFlushing.compare_exchange_strong(Expected, true)) - { - return; - } - auto FlushingGuard = MakeGuard([&] { m_IsFlushing.store(false); }); - - try - { - SaveSnapshot([&]() { return GcCtx.ClaimGCReserve(); }); - } - catch (const std::exception& Ex) - { - ZEN_WARN("Failed to write index and manifest after GC in '{}'. Reason: '{}'", m_BucketDir, Ex.what()); - } - }); - - auto __ = MakeGuard([&]() { - if (!DeletedChunks.empty()) - { - // Clean up m_AccessTimes and m_Payloads vectors - std::vector<BucketPayload> Payloads; - std::vector<AccessTime> AccessTimes; - std::vector<BucketMetaData> MetaDatas; - std::vector<MemCacheData> MemCachedPayloads; - IndexMap Index; - { - RwLock::ExclusiveLockScope IndexLock(m_IndexLock); - Stopwatch Timer; - const auto ___ = MakeGuard([&] { - uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - WriteBlockTimeUs += ElapsedUs; - WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); - }); - CompactState(IndexLock, Payloads, AccessTimes, MetaDatas, MemCachedPayloads, Index); - } - GcCtx.AddDeletedCids(std::vector<IoHash>(DeletedChunks.begin(), DeletedChunks.end())); - } - }); - - std::span<const IoHash> ExpiredCacheKeySpan = GcCtx.ExpiredCacheKeys(m_BucketDir.string()); - if (ExpiredCacheKeySpan.empty()) - { - return; - } - - m_SlogFile.Flush(); - - std::unordered_set<IoHash, IoHash::Hasher> ExpiredCacheKeys(ExpiredCacheKeySpan.begin(), ExpiredCacheKeySpan.end()); - - std::vector<DiskIndexEntry> ExpiredStandaloneEntries; - IndexMap IndexSnapshot; - std::vector<BucketPayload> PayloadsSnapshot; - BlockStore::ReclaimSnapshotState BlockStoreState; - { - bool Expected = false; - if (m_IsFlushing || !m_IsFlushing.compare_exchange_strong(Expected, true)) - { - ZEN_DEBUG("garbage collect SKIPPED, for '{}', container is currently flushing", m_BucketDir); - return; - } - auto FlushingGuard = MakeGuard([&] { m_IsFlushing.store(false); }); - - { - ZEN_TRACE_CPU("Z$::Bucket::CollectGarbage::State"); - RwLock::SharedLockScope IndexLock(m_IndexLock); - - Stopwatch Timer; - const auto ____ = MakeGuard([&] { - uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - WriteBlockTimeUs += ElapsedUs; - WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); - }); - - BlockStoreState = m_BlockStore.GetReclaimSnapshotState(); - - for (const IoHash& Key : ExpiredCacheKeys) - { - if (auto It = m_Index.find(Key); It != m_Index.end()) - { - const BucketPayload& Payload = m_Payloads[It->second]; - if (Payload.Location.Flags & DiskLocation::kStandaloneFile) - { - DiskIndexEntry Entry = {.Key = Key, .Location = Payload.Location}; - Entry.Location.Flags |= DiskLocation::kTombStone; - ExpiredStandaloneEntries.push_back(Entry); - } - } - } - - PayloadsSnapshot = m_Payloads; - IndexSnapshot = m_Index; - - if (GcCtx.IsDeletionMode()) - { - IndexLock.ReleaseNow(); - RwLock::ExclusiveLockScope __(m_IndexLock); - for (const auto& Entry : ExpiredStandaloneEntries) - { - if (m_Index.erase(Entry.Key) == 1) - { - m_StandaloneSize.fetch_sub(Entry.Location.Size(), std::memory_order::relaxed); - DeletedChunks.insert(Entry.Key); - } - } - m_SlogFile.Append(ExpiredStandaloneEntries); - } - } - } - - if (GcCtx.IsDeletionMode()) - { - ZEN_TRACE_CPU("Z$::Bucket::CollectGarbage::Delete"); - - ExtendablePathBuilder<256> Path; - - for (const auto& Entry : ExpiredStandaloneEntries) - { - const IoHash& Key = Entry.Key; - - Path.Reset(); - BuildPath(Path, Key); - fs::path FilePath = Path.ToPath(); - - { - RwLock::SharedLockScope IndexLock(m_IndexLock); - Stopwatch Timer; - const auto ____ = MakeGuard([&] { - uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - WriteBlockTimeUs += ElapsedUs; - WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); - }); - if (m_Index.contains(Key)) - { - // Someone added it back, let the file on disk be - ZEN_DEBUG("skipping z$ delete standalone of file '{}' FAILED, it has been added back", Path.ToUtf8()); - continue; - } - IndexLock.ReleaseNow(); - - RwLock::ExclusiveLockScope ValueLock(LockForHash(Key)); - if (fs::is_regular_file(FilePath)) - { - ZEN_DEBUG("deleting standalone cache file '{}'", Path.ToUtf8()); - std::error_code Ec; - fs::remove(FilePath, Ec); - if (Ec) - { - ZEN_WARN("delete expired z$ standalone file '{}' FAILED, reason: '{}'", Path.ToUtf8(), Ec.message()); - continue; - } - } - } - DeletedSize += Entry.Location.Size(); - } - } - - TotalChunkCount = IndexSnapshot.size(); - - std::vector<BlockStoreLocation> ChunkLocations; - BlockStore::ChunkIndexArray KeepChunkIndexes; - std::vector<IoHash> ChunkIndexToChunkHash; - ChunkLocations.reserve(TotalChunkCount); - ChunkLocations.reserve(TotalChunkCount); - ChunkIndexToChunkHash.reserve(TotalChunkCount); - { - TotalChunkCount = 0; - for (const auto& Entry : IndexSnapshot) - { - size_t EntryIndex = Entry.second; - const DiskLocation& DiskLocation = PayloadsSnapshot[EntryIndex].Location; - - if (DiskLocation.Flags & DiskLocation::kStandaloneFile) - { - continue; - } - const IoHash& Key = Entry.first; - BlockStoreLocation Location = DiskLocation.GetBlockLocation(m_Configuration.PayloadAlignment); - size_t ChunkIndex = ChunkLocations.size(); - ChunkLocations.push_back(Location); - ChunkIndexToChunkHash.push_back(Key); - if (ExpiredCacheKeys.contains(Key)) - { - continue; - } - KeepChunkIndexes.push_back(ChunkIndex); - } - } - TotalChunkCount = ChunkLocations.size(); - size_t DeleteCount = TotalChunkCount - KeepChunkIndexes.size(); - - const bool PerformDelete = GcCtx.IsDeletionMode() && GcCtx.CollectSmallObjects(); - if (!PerformDelete) - { - m_BlockStore.ReclaimSpace(BlockStoreState, ChunkLocations, KeepChunkIndexes, m_Configuration.PayloadAlignment, true); - GcStorageSize CurrentTotalSize = StorageSize(); - ZEN_DEBUG("garbage collect from '{}' DISABLED, found {} chunks of total {} ({}/{})", - m_BucketDir, - DeleteCount, - TotalChunkCount, - NiceBytes(CurrentTotalSize.DiskSize), - NiceBytes(CurrentTotalSize.MemorySize)); - return; - } - - m_BlockStore.ReclaimSpace( - BlockStoreState, - ChunkLocations, - KeepChunkIndexes, - m_Configuration.PayloadAlignment, - false, - [&](const BlockStore::MovedChunksArray& MovedChunks, const BlockStore::ChunkIndexArray& RemovedChunks) { - std::vector<DiskIndexEntry> LogEntries; - LogEntries.reserve(MovedChunks.size() + RemovedChunks.size()); - { - RwLock::ExclusiveLockScope IndexLock(m_IndexLock); - Stopwatch Timer; - const auto ____ = MakeGuard([&] { - uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - WriteBlockTimeUs += ElapsedUs; - WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); - }); - for (const auto& Entry : MovedChunks) - { - size_t ChunkIndex = Entry.first; - const BlockStoreLocation& NewLocation = Entry.second; - const IoHash& ChunkHash = ChunkIndexToChunkHash[ChunkIndex]; - size_t EntryIndex = m_Index[ChunkHash]; - BucketPayload& Payload = m_Payloads[EntryIndex]; - if (PayloadsSnapshot[IndexSnapshot[ChunkHash]].Location != m_Payloads[EntryIndex].Location) - { - // Entry has been updated while GC was running, ignore the move - continue; - } - Payload.Location = DiskLocation(NewLocation, m_Configuration.PayloadAlignment, Payload.Location.GetFlags()); - LogEntries.push_back({.Key = ChunkHash, .Location = Payload.Location}); - } - for (const size_t ChunkIndex : RemovedChunks) - { - const IoHash& ChunkHash = ChunkIndexToChunkHash[ChunkIndex]; - size_t EntryIndex = m_Index[ChunkHash]; - BucketPayload& Payload = m_Payloads[EntryIndex]; - if (PayloadsSnapshot[IndexSnapshot[ChunkHash]].Location != Payload.Location) - { - // Entry has been updated while GC was running, ignore the delete - continue; - } - const DiskLocation& OldDiskLocation = Payload.Location; - LogEntries.push_back({.Key = ChunkHash, - .Location = DiskLocation(OldDiskLocation.GetBlockLocation(m_Configuration.PayloadAlignment), - m_Configuration.PayloadAlignment, - OldDiskLocation.GetFlags() | DiskLocation::kTombStone)}); - - RemoveMemCachedData(IndexLock, Payload); - RemoveMetaData(IndexLock, Payload); - - m_Index.erase(ChunkHash); - DeletedChunks.insert(ChunkHash); - } - } - - m_SlogFile.Append(LogEntries); - m_SlogFile.Flush(); - }, - [&]() { return GcCtx.ClaimGCReserve(); }); -} - ZenCacheDiskLayer::BucketStats ZenCacheDiskLayer::CacheBucket::Stats() { @@ -2835,30 +2375,6 @@ ZenCacheDiskLayer::CacheBucket::EnumerateBucketContents( } void -ZenCacheDiskLayer::CollectGarbage(GcContext& GcCtx) -{ - ZEN_TRACE_CPU("Z$::CollectGarbage"); - - std::vector<CacheBucket*> Buckets; - { - RwLock::SharedLockScope _(m_Lock); - Buckets.reserve(m_Buckets.size()); - for (auto& Kv : m_Buckets) - { - Buckets.push_back(Kv.second.get()); - } - } - for (CacheBucket* Bucket : Buckets) - { - Bucket->CollectGarbage(GcCtx); - } - if (!m_IsMemCacheTrimming) - { - MemCacheTrim(Buckets, GcCtx.CacheExpireTime()); - } -} - -void ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, const ZenCacheValue& Value, std::span<IoHash> References) { ZEN_TRACE_CPU("Z$::Bucket::PutStandaloneCacheValue"); @@ -4519,26 +4035,6 @@ ZenCacheDiskLayer::ScrubStorage(ScrubContext& Ctx) } } -void -ZenCacheDiskLayer::GatherReferences(GcContext& GcCtx) -{ - ZEN_TRACE_CPU("Z$::GatherReferences"); - - std::vector<CacheBucket*> Buckets; - { - RwLock::SharedLockScope _(m_Lock); - Buckets.reserve(m_Buckets.size()); - for (auto& Kv : m_Buckets) - { - Buckets.push_back(Kv.second.get()); - } - } - for (CacheBucket* Bucket : Buckets) - { - Bucket->GatherReferences(GcCtx); - } -} - GcStorageSize ZenCacheDiskLayer::StorageSize() const { diff --git a/src/zenstore/cache/structuredcachestore.cpp b/src/zenstore/cache/structuredcachestore.cpp index ac8b70c1c..d30bd93cc 100644 --- a/src/zenstore/cache/structuredcachestore.cpp +++ b/src/zenstore/cache/structuredcachestore.cpp @@ -127,14 +127,12 @@ ZenCacheNamespace::ZenCacheNamespace(GcManager& Gc, JobQueue& JobQueue, const st m_DiskLayer.DiscoverBuckets(); - m_Gc.AddGcContributor(this); m_Gc.AddGcStorage(this); } ZenCacheNamespace::~ZenCacheNamespace() { m_Gc.RemoveGcStorage(this); - m_Gc.RemoveGcContributor(this); } struct ZenCacheNamespace::PutBatchHandle @@ -307,26 +305,6 @@ ZenCacheNamespace::ScrubStorage(ScrubContext& Ctx) m_DiskLayer.ScrubStorage(Ctx); } -void -ZenCacheNamespace::GatherReferences(GcContext& GcCtx) -{ - ZEN_TRACE_CPU("Z$::ZenCacheNamespace::GatherReferences"); - - Stopwatch Timer; - const auto Guard = - MakeGuard([&] { ZEN_DEBUG("cache gathered all references from '{}' in {}", m_RootDir, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); - - m_DiskLayer.GatherReferences(GcCtx); -} - -void -ZenCacheNamespace::CollectGarbage(GcContext& GcCtx) -{ - ZEN_TRACE_CPU("Z$::Namespace::CollectGarbage"); - - m_DiskLayer.CollectGarbage(GcCtx); -} - GcStorageSize ZenCacheNamespace::StorageSize() const { @@ -1452,186 +1430,7 @@ TEST_CASE("cachestore.size") } } -TEST_CASE("cachestore.gc") -{ - using namespace testutils; - - auto JobQueue = MakeJobQueue(1, "testqueue"); - - SUBCASE("gather references does NOT add references for expired cache entries") - { - ScopedTemporaryDirectory TempDir; - std::vector<IoHash> Cids{CreateKey(1), CreateKey(2), CreateKey(3)}; - - const auto CollectAndFilter = [](GcManager& Gc, - GcClock::TimePoint Time, - GcClock::Duration MaxDuration, - std::span<const IoHash> Cids, - std::vector<IoHash>& OutKeep) { - GcContext GcCtx(Time - MaxDuration, Time - MaxDuration); - Gc.CollectGarbage(GcCtx); - OutKeep.clear(); - GcCtx.FilterCids(Cids, [&OutKeep](const IoHash& Hash) { OutKeep.push_back(Hash); }); - }; - - { - GcManager Gc; - ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache", {}); - const auto Bucket = "teardrinker"sv; - - // Create a cache record - const IoHash Key = CreateKey(42); - CbObjectWriter Record; - Record << "Key"sv - << "SomeRecord"sv; - - for (size_t Idx = 0; auto& Cid : Cids) - { - Record.AddBinaryAttachment(fmt::format("attachment-{}", Idx++), Cid); - } - - IoBuffer Buffer = Record.Save().GetBuffer().AsIoBuffer(); - Buffer.SetContentType(ZenContentType::kCbObject); - - Zcs.Put(Bucket, Key, {.Value = Buffer}, Cids); - - std::vector<IoHash> Keep; - - // Collect garbage with 1 hour max cache duration - { - CollectAndFilter(Gc, GcClock::Now(), std::chrono::hours(1), Cids, Keep); - CHECK_EQ(Cids.size(), Keep.size()); - } - - // Move forward in time - { - CollectAndFilter(Gc, GcClock::Now() + std::chrono::hours(2), std::chrono::hours(1), Cids, Keep); - CHECK_EQ(0, Keep.size()); - } - } - - // Expect timestamps to be serialized - { - GcManager Gc; - ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache", {}); - std::vector<IoHash> Keep; - - // Collect garbage with 1 hour max cache duration - { - CollectAndFilter(Gc, GcClock::Now(), std::chrono::hours(1), Cids, Keep); - CHECK_EQ(3, Keep.size()); - } - - // Move forward in time - { - CollectAndFilter(Gc, GcClock::Now() + std::chrono::hours(2), std::chrono::hours(1), Cids, Keep); - CHECK_EQ(0, Keep.size()); - } - } - } - - SUBCASE("gc removes standalone values") - { - ScopedTemporaryDirectory TempDir; - GcManager Gc; - ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache", {}); - const auto Bucket = "fortysixandtwo"sv; - const GcClock::TimePoint CurrentTime = GcClock::Now(); - - std::vector<IoHash> Keys{CreateKey(1), CreateKey(2), CreateKey(3)}; - - for (const auto& Key : Keys) - { - IoBuffer Value = CreateRandomBlob(128 << 10); - Zcs.Put(Bucket, Key, {.Value = Value}, {}); - } - - { - GcContext GcCtx(CurrentTime - std::chrono::hours(46), CurrentTime - std::chrono::hours(46)); - - Gc.CollectGarbage(GcCtx); - - for (const auto& Key : Keys) - { - ZenCacheValue CacheValue; - const bool Exists = Zcs.Get(Bucket, Key, CacheValue); - CHECK(Exists); - } - } - - // Move forward in time and collect again - { - GcContext GcCtx(CurrentTime + std::chrono::minutes(2), CurrentTime + std::chrono::minutes(2)); - Gc.CollectGarbage(GcCtx); - - for (const auto& Key : Keys) - { - ZenCacheValue CacheValue; - const bool Exists = Zcs.Get(Bucket, Key, CacheValue); - CHECK(!Exists); - } - - CHECK_EQ(0, Zcs.StorageSize().DiskSize); - } - } - - SUBCASE("gc removes small objects") - { - ScopedTemporaryDirectory TempDir; - GcManager Gc; - { - ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache", {}); - const auto Bucket = "rightintwo"sv; - - std::vector<IoHash> Keys{CreateKey(1), CreateKey(2), CreateKey(3)}; - - for (const auto& Key : Keys) - { - IoBuffer Value = CreateRandomBlob(128); - Zcs.Put(Bucket, Key, {.Value = Value}, {}); - } - - { - GcContext GcCtx(GcClock::Now() - std::chrono::hours(2), GcClock::Now() - std::chrono::hours(2)); - GcCtx.CollectSmallObjects(true); - - Gc.CollectGarbage(GcCtx); - - for (const auto& Key : Keys) - { - ZenCacheValue CacheValue; - const bool Exists = Zcs.Get(Bucket, Key, CacheValue); - CHECK(Exists); - } - } - - // Move forward in time and collect again - { - GcContext GcCtx(GcClock::Now() + std::chrono::minutes(2), GcClock::Now() + std::chrono::minutes(2)); - GcCtx.CollectSmallObjects(true); - - Zcs.Flush(); - Gc.CollectGarbage(GcCtx); - - for (const auto& Key : Keys) - { - ZenCacheValue CacheValue; - const bool Exists = Zcs.Get(Bucket, Key, CacheValue); - CHECK(!Exists); - } - // GC could not remove the currently written block so size will not be zero - CHECK_NE(0, Zcs.StorageSize().DiskSize); - } - } - { - // Unreferenced blocks will be pruned so size should now be zero - ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache", {}); - CHECK_EQ(0, Zcs.StorageSize().DiskSize); - } - } -} - -TEST_CASE_TEMPLATE("cachestore.threadedinsert", GCV2, FalseType, TrueType) // * doctest::skip(true)) +TEST_CASE("cachestore.threadedinsert") // * doctest::skip(true)) { // for (uint32_t i = 0; i < 100; ++i) { @@ -1699,39 +1498,24 @@ TEST_CASE_TEMPLATE("cachestore.threadedinsert", GCV2, FalseType, TrueType) // * } } - auto DoGC = [](GcManager& Gc, - ZenCacheNamespace& Zcs, - std::unordered_map<IoHash, std::string, IoHash::Hasher>& GcChunkHashes, - const std::vector<IoHash>& KeepHashes) { - if (GCV2::Enabled) + auto DoGC = [](GcManager& Gc, ZenCacheNamespace& Zcs, std::unordered_map<IoHash, std::string, IoHash::Hasher>& GcChunkHashes) { + GcSettings Settings = {.CacheExpireTime = GcClock::Now() - std::chrono::hours(24), + .ProjectStoreExpireTime = GcClock::Now() - std::chrono::hours(24), + .CollectSmallObjects = true, + .IsDeleteMode = true, + .CompactBlockUsageThresholdPercent = 100}; + Gc.CollectGarbage(Settings); + // Cheating as we don't get the list of deleted hashes back from this call + std::unordered_map<IoHash, std::string, IoHash::Hasher> RemainingChunkHashes; + for (const auto& It : GcChunkHashes) { - GcSettings Settings = {.CacheExpireTime = GcClock::Now() - std::chrono::hours(24), - .ProjectStoreExpireTime = GcClock::Now() - std::chrono::hours(24), - .CollectSmallObjects = true, - .IsDeleteMode = true, - .CompactBlockUsageThresholdPercent = 100}; - Gc.CollectGarbage(Settings); - // Cheating as we don't get the list of deleted hashes back from this call - std::unordered_map<IoHash, std::string, IoHash::Hasher> RemainingChunkHashes; - for (const auto& It : GcChunkHashes) + ZenCacheValue Tmp; + if (Zcs.Get(It.second, It.first, Tmp)) { - ZenCacheValue Tmp; - if (Zcs.Get(It.second, It.first, Tmp)) - { - RemainingChunkHashes.insert(It); - } + RemainingChunkHashes.insert(It); } - GcChunkHashes.swap(RemainingChunkHashes); - } - else - { - GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); - GcCtx.CollectSmallObjects(true); - GcCtx.AddRetainedCids(KeepHashes); - Zcs.CollectGarbage(GcCtx); - const HashKeySet& Deleted = GcCtx.DeletedCids(); - Deleted.IterateHashes([&GcChunkHashes](const IoHash& ChunkHash) { GcChunkHashes.erase(ChunkHash); }); } + GcChunkHashes.swap(RemainingChunkHashes); }; const uint64_t TotalSize = Zcs.StorageSize().DiskSize; @@ -1813,32 +1597,7 @@ TEST_CASE_TEMPLATE("cachestore.threadedinsert", GCV2, FalseType, TrueType) // * GcChunkHashes[Chunk.first] = Chunk.second.Bucket; } } - std::vector<IoHash> KeepHashes; - KeepHashes.reserve(GcChunkHashes.size()); - for (const auto& Entry : GcChunkHashes) - { - KeepHashes.push_back(Entry.first); - } - size_t C = 0; - while (C < KeepHashes.size()) - { - if (C % 155 == 0) - { - if (C < KeepHashes.size() - 1) - { - KeepHashes[C] = KeepHashes[KeepHashes.size() - 1]; - KeepHashes.pop_back(); - } - if (C + 3 < KeepHashes.size() - 1) - { - KeepHashes[C + 3] = KeepHashes[KeepHashes.size() - 1]; - KeepHashes.pop_back(); - } - } - C++; - } - - DoGC(Gc, Zcs, GcChunkHashes, KeepHashes); + DoGC(Gc, Zcs, GcChunkHashes); } while (WorkCompleted < NewChunks.size() + Chunks.size()) @@ -1856,32 +1615,7 @@ TEST_CASE_TEMPLATE("cachestore.threadedinsert", GCV2, FalseType, TrueType) // * GcChunkHashes[Chunk.first] = Chunk.second.Bucket; } } - std::vector<IoHash> KeepHashes; - KeepHashes.reserve(GcChunkHashes.size()); - for (const auto& Entry : GcChunkHashes) - { - KeepHashes.push_back(Entry.first); - } - size_t C = 0; - while (C < KeepHashes.size()) - { - if (C % 155 == 0) - { - if (C < KeepHashes.size() - 1) - { - KeepHashes[C] = KeepHashes[KeepHashes.size() - 1]; - KeepHashes.pop_back(); - } - if (C + 3 < KeepHashes.size() - 1) - { - KeepHashes[C + 3] = KeepHashes[KeepHashes.size() - 1]; - KeepHashes.pop_back(); - } - } - C++; - } - - DoGC(Gc, Zcs, GcChunkHashes, KeepHashes); + DoGC(Gc, Zcs, GcChunkHashes); } } { diff --git a/src/zenstore/cas.cpp b/src/zenstore/cas.cpp index 871558a52..bff221fc7 100644 --- a/src/zenstore/cas.cpp +++ b/src/zenstore/cas.cpp @@ -62,7 +62,6 @@ public: WorkerThreadPool* OptionalWorkerPool) override; virtual void Flush() override; virtual void ScrubStorage(ScrubContext& Ctx) override; - virtual void GarbageCollect(GcContext& GcCtx) override; virtual CidStoreSize TotalSize() const override; private: @@ -459,16 +458,6 @@ CasImpl::ScrubStorage(ScrubContext& Ctx) m_LargeStrategy.ScrubStorage(Ctx); } -void -CasImpl::GarbageCollect(GcContext& GcCtx) -{ - ZEN_TRACE_CPU("Cas::GarbageCollect"); - - m_SmallStrategy.CollectGarbage(GcCtx); - m_TinyStrategy.CollectGarbage(GcCtx); - m_LargeStrategy.CollectGarbage(GcCtx); -} - CidStoreSize CasImpl::TotalSize() const { diff --git a/src/zenstore/cas.h b/src/zenstore/cas.h index 169f4d58c..bedbc6a9a 100644 --- a/src/zenstore/cas.h +++ b/src/zenstore/cas.h @@ -11,7 +11,6 @@ namespace zen { -class GcContext; class GcManager; class ScrubContext; @@ -51,7 +50,6 @@ public: WorkerThreadPool* OptionalWorkerPool) = 0; virtual void Flush() = 0; virtual void ScrubStorage(ScrubContext& Ctx) = 0; - virtual void GarbageCollect(GcContext& GcCtx) = 0; virtual CidStoreSize TotalSize() const = 0; protected: diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp index a5d70a991..7f1300177 100644 --- a/src/zenstore/compactcas.cpp +++ b/src/zenstore/compactcas.cpp @@ -527,167 +527,6 @@ CasContainerStrategy::ScrubStorage(ScrubContext& Ctx) ZEN_INFO("scrubbed {} chunks ({}) in '{}'", ChunkCount.load(), NiceBytes(ChunkBytes.load()), m_RootDirectory / m_ContainerBaseName); } -void -CasContainerStrategy::CollectGarbage(GcContext& GcCtx) -{ - ZEN_TRACE_CPU("CasContainer::CollectGarbage"); - - if (GcCtx.SkipCid()) - { - return; - } - - // It collects all the blocks that we want to delete chunks from. For each such - // block we keep a list of chunks to retain and a list of chunks to delete. - // - // If there is a block that we are currently writing to, that block is omitted - // from the garbage collection. - // - // Next it will iterate over all blocks that we want to remove chunks from. - // If the block is empty after removal of chunks we mark the block as pending - // delete - we want to delete it as soon as there are no IoBuffers using the - // block file. - // Once complete we update the m_LocationMap by removing the chunks. - // - // If the block is non-empty we write out the chunks we want to keep to a new - // block file (creating new block files as needed). - // - // We update the index as we complete each new block file. This makes it possible - // to break the GC if we want to limit time for execution. - // - // GC can very parallell to regular operation - it will block while taking - // a snapshot of the current m_LocationMap state and while moving blocks it will - // do a blocking operation and update the m_LocationMap after each new block is - // written and figuring out the path to the next new block. - - ZEN_DEBUG("collecting garbage from '{}'", m_RootDirectory / m_ContainerBaseName); - - uint64_t WriteBlockTimeUs = 0; - uint64_t WriteBlockLongestTimeUs = 0; - uint64_t ReadBlockTimeUs = 0; - uint64_t ReadBlockLongestTimeUs = 0; - - LocationMap_t LocationMap; - std::vector<BlockStoreDiskLocation> Locations; - BlockStore::ReclaimSnapshotState BlockStoreState; - { - ZEN_TRACE_CPU("CasContainer::CollectGarbage::State"); - - RwLock::SharedLockScope ___(m_LocationMapLock); - Stopwatch Timer; - const auto ____ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { - uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - ReadBlockTimeUs += ElapsedUs; - ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); - }); - LocationMap = m_LocationMap; - Locations = m_Locations; - BlockStoreState = m_BlockStore.GetReclaimSnapshotState(); - } - - uint64_t TotalChunkCount = LocationMap.size(); - - std::vector<IoHash> TotalChunkHashes; - TotalChunkHashes.reserve(TotalChunkCount); - for (const auto& Entry : LocationMap) - { - TotalChunkHashes.push_back(Entry.first); - } - - std::vector<BlockStoreLocation> ChunkLocations; - BlockStore::ChunkIndexArray KeepChunkIndexes; - std::vector<IoHash> ChunkIndexToChunkHash; - ChunkLocations.reserve(TotalChunkCount); - KeepChunkIndexes.reserve(TotalChunkCount); - ChunkIndexToChunkHash.reserve(TotalChunkCount); - - { - ZEN_TRACE_CPU("CasContainer::CollectGarbage::Filter"); - GcCtx.FilterCids(TotalChunkHashes, [&](const IoHash& ChunkHash, bool Keep) { - auto KeyIt = LocationMap.find(ChunkHash); - const BlockStoreDiskLocation& DiskLocation = Locations[KeyIt->second]; - BlockStoreLocation Location = DiskLocation.Get(m_PayloadAlignment); - size_t ChunkIndex = ChunkLocations.size(); - - ChunkLocations.push_back(Location); - ChunkIndexToChunkHash.push_back(ChunkHash); - if (Keep) - { - KeepChunkIndexes.push_back(ChunkIndex); - } - }); - } - - const bool PerformDelete = GcCtx.IsDeletionMode() && GcCtx.CollectSmallObjects(); - if (!PerformDelete) - { - m_BlockStore.ReclaimSpace(BlockStoreState, ChunkLocations, KeepChunkIndexes, m_PayloadAlignment, true); - return; - } - - std::vector<IoHash> DeletedChunks; - m_BlockStore.ReclaimSpace( - BlockStoreState, - ChunkLocations, - KeepChunkIndexes, - m_PayloadAlignment, - false, - [&](const BlockStore::MovedChunksArray& MovedChunks, const BlockStore::ChunkIndexArray& RemovedChunks) { - std::vector<CasDiskIndexEntry> LogEntries; - LogEntries.reserve(MovedChunks.size() + RemovedChunks.size()); - { - RwLock::ExclusiveLockScope __(m_LocationMapLock); - Stopwatch Timer; - const auto ____ = MakeGuard([&] { - uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - WriteBlockTimeUs += ElapsedUs; - WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); - }); - for (const auto& Entry : MovedChunks) - { - size_t ChunkIndex = Entry.first; - const BlockStoreLocation& NewLocation = Entry.second; - const IoHash& ChunkHash = ChunkIndexToChunkHash[ChunkIndex]; - size_t LocationIndex = m_LocationMap[ChunkHash]; - BlockStoreDiskLocation& Location = m_Locations[LocationIndex]; - if (Locations[LocationMap[ChunkHash]] != Location) - { - // Entry has been updated while GC was running, ignore the move - continue; - } - Location = {NewLocation, m_PayloadAlignment}; - LogEntries.push_back({.Key = ChunkHash, .Location = Location}); - } - for (const size_t ChunkIndex : RemovedChunks) - { - const IoHash& ChunkHash = ChunkIndexToChunkHash[ChunkIndex]; - size_t LocationIndex = m_LocationMap[ChunkHash]; - const BlockStoreDiskLocation& Location = Locations[LocationIndex]; - if (Locations[LocationMap[ChunkHash]] != Location) - { - // Entry has been updated while GC was running, ignore the delete - continue; - } - LogEntries.push_back({.Key = ChunkHash, .Location = Location, .Flags = CasDiskIndexEntry::kTombstone}); - m_LocationMap.erase(ChunkHash); - DeletedChunks.push_back(ChunkHash); - } - } - - m_CasLog.Append(LogEntries); - m_CasLog.Flush(); - }, - [&GcCtx]() { return GcCtx.ClaimGCReserve(); }); - - if (!DeletedChunks.empty()) - { - // Clean up m_Locations vectors - RwLock::ExclusiveLockScope IndexLock(m_LocationMapLock); - CompactIndex(IndexLock); - } - GcCtx.AddDeletedCids(DeletedChunks); -} - class CasContainerStoreCompactor : public GcStoreCompactor { public: @@ -1456,412 +1295,7 @@ TEST_CASE("compactcas.compact.totalsize") } } -TEST_CASE("compactcas.gc.basic") -{ - ScopedTemporaryDirectory TempDir; - - GcManager Gc; - CasContainerStrategy Cas(Gc); - Cas.Initialize(TempDir.Path(), "cb", 65536, 1 << 4, true); - - IoBuffer Chunk = CreateRandomBlob(128); - IoHash ChunkHash = IoHash::HashBuffer(Chunk); - - const CasStore::InsertResult InsertResult = Cas.InsertChunk(Chunk, ChunkHash); - CHECK(InsertResult.New); - Cas.Flush(); - - GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); - GcCtx.CollectSmallObjects(true); - - Cas.CollectGarbage(GcCtx); - - CHECK(!Cas.HaveChunk(ChunkHash)); -} - -TEST_CASE("compactcas.gc.removefile") -{ - ScopedTemporaryDirectory TempDir; - - IoBuffer Chunk = CreateRandomBlob(128); - IoHash ChunkHash = IoHash::HashBuffer(Chunk); - { - GcManager Gc; - CasContainerStrategy Cas(Gc); - Cas.Initialize(TempDir.Path(), "cb", 65536, 1 << 4, true); - - const CasStore::InsertResult InsertResult = Cas.InsertChunk(Chunk, ChunkHash); - CHECK(InsertResult.New); - const CasStore::InsertResult InsertResultDup = Cas.InsertChunk(Chunk, ChunkHash); - CHECK(!InsertResultDup.New); - Cas.Flush(); - } - - GcManager Gc; - CasContainerStrategy Cas(Gc); - Cas.Initialize(TempDir.Path(), "cb", 65536, 1 << 4, false); - - GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); - GcCtx.CollectSmallObjects(true); - - Cas.CollectGarbage(GcCtx); - - CHECK(!Cas.HaveChunk(ChunkHash)); -} - -TEST_CASE("compactcas.gc.compact") -{ - { - ScopedTemporaryDirectory TempDir; - - GcManager Gc; - CasContainerStrategy Cas(Gc); - Cas.Initialize(TempDir.Path(), "cb", 2048, 1 << 4, true); - - uint64_t ChunkSizes[9] = {128, 541, 1023, 781, 218, 37, 4, 997, 5}; - std::vector<IoBuffer> Chunks; - Chunks.reserve(9); - for (uint64_t Size : ChunkSizes) - { - Chunks.push_back(CreateRandomBlob(Size)); - } - - std::vector<IoHash> ChunkHashes; - ChunkHashes.reserve(9); - for (const IoBuffer& Chunk : Chunks) - { - ChunkHashes.push_back(IoHash::HashBuffer(Chunk.Data(), Chunk.Size())); - } - - CHECK(Cas.InsertChunk(Chunks[0], ChunkHashes[0]).New); - CHECK(Cas.InsertChunk(Chunks[1], ChunkHashes[1]).New); - CHECK(Cas.InsertChunk(Chunks[2], ChunkHashes[2]).New); - CHECK(Cas.InsertChunk(Chunks[3], ChunkHashes[3]).New); - CHECK(Cas.InsertChunk(Chunks[4], ChunkHashes[4]).New); - CHECK(Cas.InsertChunk(Chunks[5], ChunkHashes[5]).New); - CHECK(Cas.InsertChunk(Chunks[6], ChunkHashes[6]).New); - CHECK(Cas.InsertChunk(Chunks[7], ChunkHashes[7]).New); - CHECK(Cas.InsertChunk(Chunks[8], ChunkHashes[8]).New); - - CHECK(Cas.HaveChunk(ChunkHashes[0])); - CHECK(Cas.HaveChunk(ChunkHashes[1])); - CHECK(Cas.HaveChunk(ChunkHashes[2])); - CHECK(Cas.HaveChunk(ChunkHashes[3])); - CHECK(Cas.HaveChunk(ChunkHashes[4])); - CHECK(Cas.HaveChunk(ChunkHashes[5])); - CHECK(Cas.HaveChunk(ChunkHashes[6])); - CHECK(Cas.HaveChunk(ChunkHashes[7])); - CHECK(Cas.HaveChunk(ChunkHashes[8])); - - auto ValidateChunkExists = [&](size_t Index) { - IoBuffer Chunk = Cas.FindChunk(ChunkHashes[Index]); - bool Exists = !!Chunk; - CHECK(Exists); - IoHash Hash = IoHash::HashBuffer(Chunk); - if (ChunkHashes[Index] != Hash) - { - CHECK(fmt::format("{}", ChunkHashes[Index]) == fmt::format("{}", Hash)); - } - }; - - // Keep first and last - { - GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); - GcCtx.CollectSmallObjects(true); - - std::vector<IoHash> KeepChunks; - KeepChunks.push_back(ChunkHashes[0]); - KeepChunks.push_back(ChunkHashes[8]); - GcCtx.AddRetainedCids(KeepChunks); - - Cas.Flush(); - Cas.CollectGarbage(GcCtx); - - CHECK(Cas.HaveChunk(ChunkHashes[0])); - CHECK(!Cas.HaveChunk(ChunkHashes[1])); - CHECK(!Cas.HaveChunk(ChunkHashes[2])); - CHECK(!Cas.HaveChunk(ChunkHashes[3])); - CHECK(!Cas.HaveChunk(ChunkHashes[4])); - CHECK(!Cas.HaveChunk(ChunkHashes[5])); - CHECK(!Cas.HaveChunk(ChunkHashes[6])); - CHECK(!Cas.HaveChunk(ChunkHashes[7])); - CHECK(Cas.HaveChunk(ChunkHashes[8])); - - ValidateChunkExists(0); - ValidateChunkExists(8); - - Cas.InsertChunk(Chunks[1], ChunkHashes[1]); - Cas.InsertChunk(Chunks[2], ChunkHashes[2]); - Cas.InsertChunk(Chunks[3], ChunkHashes[3]); - Cas.InsertChunk(Chunks[4], ChunkHashes[4]); - Cas.InsertChunk(Chunks[5], ChunkHashes[5]); - Cas.InsertChunk(Chunks[6], ChunkHashes[6]); - Cas.InsertChunk(Chunks[7], ChunkHashes[7]); - } - - // Keep last - { - GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); - GcCtx.CollectSmallObjects(true); - std::vector<IoHash> KeepChunks; - KeepChunks.push_back(ChunkHashes[8]); - GcCtx.AddRetainedCids(KeepChunks); - - Cas.Flush(); - Cas.CollectGarbage(GcCtx); - - CHECK(!Cas.HaveChunk(ChunkHashes[0])); - CHECK(!Cas.HaveChunk(ChunkHashes[1])); - CHECK(!Cas.HaveChunk(ChunkHashes[2])); - CHECK(!Cas.HaveChunk(ChunkHashes[3])); - CHECK(!Cas.HaveChunk(ChunkHashes[4])); - CHECK(!Cas.HaveChunk(ChunkHashes[5])); - CHECK(!Cas.HaveChunk(ChunkHashes[6])); - CHECK(!Cas.HaveChunk(ChunkHashes[7])); - CHECK(Cas.HaveChunk(ChunkHashes[8])); - - ValidateChunkExists(8); - - Cas.InsertChunk(Chunks[1], ChunkHashes[1]); - Cas.InsertChunk(Chunks[2], ChunkHashes[2]); - Cas.InsertChunk(Chunks[3], ChunkHashes[3]); - Cas.InsertChunk(Chunks[4], ChunkHashes[4]); - Cas.InsertChunk(Chunks[5], ChunkHashes[5]); - Cas.InsertChunk(Chunks[6], ChunkHashes[6]); - Cas.InsertChunk(Chunks[7], ChunkHashes[7]); - } - - // Keep mixed - { - GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); - GcCtx.CollectSmallObjects(true); - std::vector<IoHash> KeepChunks; - KeepChunks.push_back(ChunkHashes[1]); - KeepChunks.push_back(ChunkHashes[4]); - KeepChunks.push_back(ChunkHashes[7]); - GcCtx.AddRetainedCids(KeepChunks); - - Cas.Flush(); - Cas.CollectGarbage(GcCtx); - - CHECK(!Cas.HaveChunk(ChunkHashes[0])); - CHECK(Cas.HaveChunk(ChunkHashes[1])); - CHECK(!Cas.HaveChunk(ChunkHashes[2])); - CHECK(!Cas.HaveChunk(ChunkHashes[3])); - CHECK(Cas.HaveChunk(ChunkHashes[4])); - CHECK(!Cas.HaveChunk(ChunkHashes[5])); - CHECK(!Cas.HaveChunk(ChunkHashes[6])); - CHECK(Cas.HaveChunk(ChunkHashes[7])); - CHECK(!Cas.HaveChunk(ChunkHashes[8])); - - ValidateChunkExists(1); - ValidateChunkExists(4); - ValidateChunkExists(7); - - Cas.InsertChunk(Chunks[0], ChunkHashes[0]); - Cas.InsertChunk(Chunks[2], ChunkHashes[2]); - Cas.InsertChunk(Chunks[3], ChunkHashes[3]); - Cas.InsertChunk(Chunks[5], ChunkHashes[5]); - Cas.InsertChunk(Chunks[6], ChunkHashes[6]); - Cas.InsertChunk(Chunks[8], ChunkHashes[8]); - } - - // Keep multiple at end - { - GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); - GcCtx.CollectSmallObjects(true); - std::vector<IoHash> KeepChunks; - KeepChunks.push_back(ChunkHashes[6]); - KeepChunks.push_back(ChunkHashes[7]); - KeepChunks.push_back(ChunkHashes[8]); - GcCtx.AddRetainedCids(KeepChunks); - - Cas.Flush(); - Cas.CollectGarbage(GcCtx); - - CHECK(!Cas.HaveChunk(ChunkHashes[0])); - CHECK(!Cas.HaveChunk(ChunkHashes[1])); - CHECK(!Cas.HaveChunk(ChunkHashes[2])); - CHECK(!Cas.HaveChunk(ChunkHashes[3])); - CHECK(!Cas.HaveChunk(ChunkHashes[4])); - CHECK(!Cas.HaveChunk(ChunkHashes[5])); - CHECK(Cas.HaveChunk(ChunkHashes[6])); - CHECK(Cas.HaveChunk(ChunkHashes[7])); - CHECK(Cas.HaveChunk(ChunkHashes[8])); - - ValidateChunkExists(6); - ValidateChunkExists(7); - ValidateChunkExists(8); - - Cas.InsertChunk(Chunks[0], ChunkHashes[0]); - Cas.InsertChunk(Chunks[1], ChunkHashes[1]); - Cas.InsertChunk(Chunks[2], ChunkHashes[2]); - Cas.InsertChunk(Chunks[3], ChunkHashes[3]); - Cas.InsertChunk(Chunks[4], ChunkHashes[4]); - Cas.InsertChunk(Chunks[5], ChunkHashes[5]); - } - - // Keep every other - { - GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); - GcCtx.CollectSmallObjects(true); - std::vector<IoHash> KeepChunks; - KeepChunks.push_back(ChunkHashes[0]); - KeepChunks.push_back(ChunkHashes[2]); - KeepChunks.push_back(ChunkHashes[4]); - KeepChunks.push_back(ChunkHashes[6]); - KeepChunks.push_back(ChunkHashes[8]); - GcCtx.AddRetainedCids(KeepChunks); - - Cas.Flush(); - Cas.CollectGarbage(GcCtx); - - CHECK(Cas.HaveChunk(ChunkHashes[0])); - CHECK(!Cas.HaveChunk(ChunkHashes[1])); - CHECK(Cas.HaveChunk(ChunkHashes[2])); - CHECK(!Cas.HaveChunk(ChunkHashes[3])); - CHECK(Cas.HaveChunk(ChunkHashes[4])); - CHECK(!Cas.HaveChunk(ChunkHashes[5])); - CHECK(Cas.HaveChunk(ChunkHashes[6])); - CHECK(!Cas.HaveChunk(ChunkHashes[7])); - CHECK(Cas.HaveChunk(ChunkHashes[8])); - - ValidateChunkExists(0); - ValidateChunkExists(2); - ValidateChunkExists(4); - ValidateChunkExists(6); - ValidateChunkExists(8); - - Cas.InsertChunk(Chunks[1], ChunkHashes[1]); - Cas.InsertChunk(Chunks[3], ChunkHashes[3]); - Cas.InsertChunk(Chunks[5], ChunkHashes[5]); - Cas.InsertChunk(Chunks[7], ChunkHashes[7]); - } - - // Verify that we nicely appended blocks even after all GC operations - ValidateChunkExists(0); - ValidateChunkExists(1); - ValidateChunkExists(2); - ValidateChunkExists(3); - ValidateChunkExists(4); - ValidateChunkExists(5); - ValidateChunkExists(6); - ValidateChunkExists(7); - ValidateChunkExists(8); - } -} - -TEST_CASE("compactcas.gc.deleteblockonopen") -{ - ScopedTemporaryDirectory TempDir; - - uint64_t ChunkSizes[20] = {128, 541, 311, 181, 218, 37, 4, 397, 5, 92, 551, 721, 31, 92, 16, 99, 131, 41, 541, 84}; - std::vector<IoBuffer> Chunks; - Chunks.reserve(20); - for (uint64_t Size : ChunkSizes) - { - Chunks.push_back(CreateRandomBlob(Size)); - } - - std::vector<IoHash> ChunkHashes; - ChunkHashes.reserve(20); - for (const IoBuffer& Chunk : Chunks) - { - ChunkHashes.push_back(IoHash::HashBuffer(Chunk.Data(), Chunk.Size())); - } - - { - GcManager Gc; - CasContainerStrategy Cas(Gc); - Cas.Initialize(TempDir.Path(), "test", 1024, 16, true); - - for (size_t i = 0; i < 20; i++) - { - CHECK(Cas.InsertChunk(Chunks[i], ChunkHashes[i]).New); - } - - // GC every other block - { - GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); - GcCtx.CollectSmallObjects(true); - std::vector<IoHash> KeepChunks; - for (size_t i = 0; i < 20; i += 2) - { - KeepChunks.push_back(ChunkHashes[i]); - } - GcCtx.AddRetainedCids(KeepChunks); - - Cas.Flush(); - Cas.CollectGarbage(GcCtx); - - for (size_t i = 0; i < 20; i += 2) - { - CHECK(Cas.HaveChunk(ChunkHashes[i])); - CHECK(!Cas.HaveChunk(ChunkHashes[i + 1])); - CHECK(ChunkHashes[i] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[i]))); - } - } - } - { - // Re-open - GcManager Gc; - CasContainerStrategy Cas(Gc); - Cas.Initialize(TempDir.Path(), "test", 1024, 16, false); - - for (size_t i = 0; i < 20; i += 2) - { - CHECK(Cas.HaveChunk(ChunkHashes[i])); - CHECK(!Cas.HaveChunk(ChunkHashes[i + 1])); - CHECK(ChunkHashes[i] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[i]))); - } - } -} - -TEST_CASE("compactcas.gc.handleopeniobuffer") -{ - ScopedTemporaryDirectory TempDir; - - uint64_t ChunkSizes[20] = {128, 541, 311, 181, 218, 37, 4, 397, 5, 92, 551, 721, 31, 92, 16, 99, 131, 41, 541, 84}; - std::vector<IoBuffer> Chunks; - Chunks.reserve(20); - for (const uint64_t& Size : ChunkSizes) - { - Chunks.push_back(CreateRandomBlob(Size)); - } - - std::vector<IoHash> ChunkHashes; - ChunkHashes.reserve(20); - for (const IoBuffer& Chunk : Chunks) - { - ChunkHashes.push_back(IoHash::HashBuffer(Chunk.Data(), Chunk.Size())); - } - - GcManager Gc; - CasContainerStrategy Cas(Gc); - Cas.Initialize(TempDir.Path(), "test", 1024, 16, true); - - for (size_t i = 0; i < 20; i++) - { - CHECK(Cas.InsertChunk(Chunks[i], ChunkHashes[i]).New); - } - - IoBuffer RetainChunk = Cas.FindChunk(ChunkHashes[5]); - Cas.Flush(); - - // GC everything - GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); - GcCtx.CollectSmallObjects(true); - Cas.CollectGarbage(GcCtx); - - for (size_t i = 0; i < 20; i++) - { - CHECK(!Cas.HaveChunk(ChunkHashes[i])); - } - - CHECK(ChunkHashes[5] == IoHash::HashBuffer(RetainChunk)); -} - -TEST_CASE_TEMPLATE("compactcas.threadedinsert", GCV2, FalseType, TrueType) +TEST_CASE("compactcas.threadedinsert") { // for (uint32_t i = 0; i < 100; ++i) { @@ -2004,56 +1438,41 @@ TEST_CASE_TEMPLATE("compactcas.threadedinsert", GCV2, FalseType, TrueType) const tsl::robin_set<IoHash, IoHash::Hasher>& ChunksToDelete, const std::vector<IoHash>& KeepHashes, tsl::robin_set<IoHash, IoHash::Hasher>& GcChunkHashes) { - if (GCV2::Enabled) - { - std::atomic_bool IsCancelledFlag = false; - GcCtx Ctx = {.Settings = {.CacheExpireTime = GcClock::Now() - std::chrono::hours(24), - .ProjectStoreExpireTime = GcClock::Now() - std::chrono::hours(24), - .CollectSmallObjects = true, - .IsDeleteMode = true, - .CompactBlockUsageThresholdPercent = 100}, - .IsCancelledFlag = IsCancelledFlag}; - GcReferenceStoreStats PrunerStats; - GcReferencePruner* Pruner = Cas.CreateReferencePruner(Ctx, PrunerStats); - CHECK(Pruner); - HashKeySet Deleted; - GcStats Stats; - GcStoreCompactor* Compactor = - Pruner->RemoveUnreferencedData(Ctx, Stats, [&](std::span<IoHash> References) -> std::span<IoHash> { - std::vector<IoHash> Unreferenced; - HashKeySet Retain; - Retain.AddHashesToSet(KeepHashes); - for (const IoHash& ChunkHash : References) + std::atomic_bool IsCancelledFlag = false; + GcCtx Ctx = {.Settings = {.CacheExpireTime = GcClock::Now() - std::chrono::hours(24), + .ProjectStoreExpireTime = GcClock::Now() - std::chrono::hours(24), + .CollectSmallObjects = true, + .IsDeleteMode = true, + .CompactBlockUsageThresholdPercent = 100}, + .IsCancelledFlag = IsCancelledFlag}; + GcReferenceStoreStats PrunerStats; + GcReferencePruner* Pruner = Cas.CreateReferencePruner(Ctx, PrunerStats); + CHECK(Pruner); + HashKeySet Deleted; + GcStats Stats; + GcStoreCompactor* Compactor = + Pruner->RemoveUnreferencedData(Ctx, Stats, [&](std::span<IoHash> References) -> std::span<IoHash> { + std::vector<IoHash> Unreferenced; + HashKeySet Retain; + Retain.AddHashesToSet(KeepHashes); + for (const IoHash& ChunkHash : References) + { + if (!Retain.ContainsHash(ChunkHash)) { - if (!Retain.ContainsHash(ChunkHash)) - { - Unreferenced.push_back(ChunkHash); - } + Unreferenced.push_back(ChunkHash); } - Deleted.AddHashesToSet(Unreferenced); - return Unreferenced; - }); - if (Compactor) - { - Deleted.IterateHashes([&GcChunkHashes, &ChunksToDelete](const IoHash& ChunkHash) { - CHECK(ChunksToDelete.contains(ChunkHash)); - GcChunkHashes.erase(ChunkHash); - }); - GcCompactStoreStats CompactStats; - Compactor->CompactStore(Ctx, CompactStats, []() { return 0; }); - } - } - else + } + Deleted.AddHashesToSet(Unreferenced); + return Unreferenced; + }); + if (Compactor) { - GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); - GcCtx.CollectSmallObjects(true); - GcCtx.AddRetainedCids(KeepHashes); - Cas.CollectGarbage(GcCtx); - const HashKeySet& Deleted = GcCtx.DeletedCids(); Deleted.IterateHashes([&GcChunkHashes, &ChunksToDelete](const IoHash& ChunkHash) { CHECK(ChunksToDelete.contains(ChunkHash)); GcChunkHashes.erase(ChunkHash); }); + GcCompactStoreStats CompactStats; + Compactor->CompactStore(Ctx, CompactStats, []() { return 0; }); } }; diff --git a/src/zenstore/compactcas.h b/src/zenstore/compactcas.h index db6b4c914..44567e7a0 100644 --- a/src/zenstore/compactcas.h +++ b/src/zenstore/compactcas.h @@ -69,7 +69,6 @@ struct CasContainerStrategy final : public GcStorage, public GcReferenceStore // GcStorage virtual void ScrubStorage(ScrubContext& ScrubCtx) override; - virtual void CollectGarbage(GcContext& GcCtx) override; virtual GcStorageSize StorageSize() const override; virtual std::string GetGcName(GcCtx& Ctx) override; diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp index 71eead9b2..57b42beb2 100644 --- a/src/zenstore/filecas.cpp +++ b/src/zenstore/filecas.cpp @@ -1073,101 +1073,6 @@ FileCasStrategy::ScrubStorage(ScrubContext& Ctx) NiceTimeSpanMs(Timer.GetElapsedTimeMs())); } -void -FileCasStrategy::CollectGarbage(GcContext& GcCtx) -{ - ZEN_TRACE_CPU("FileCas::CollectGarbage"); - - ZEN_ASSERT(m_IsInitialized); - - if (GcCtx.SkipCid()) - { - return; - } - - ZEN_DEBUG("collecting garbage from {}", m_RootDirectory); - - std::vector<IoHash> ChunksToDelete; - std::atomic<uint64_t> ChunksToDeleteBytes{0}; - std::atomic<uint64_t> ChunkCount{0}, ChunkBytes{0}; - - std::vector<IoHash> CandidateCas; - CandidateCas.resize(1); - - uint64_t DeletedCount = 0; - uint64_t OldTotalSize = m_TotalSize.load(std::memory_order::relaxed); - - Stopwatch TotalTimer; - const auto _ = MakeGuard([&] { - ZEN_DEBUG("garbage collect for '{}' DONE after {}, deleted {} out of {} files, removed {} out of {}", - m_RootDirectory, - NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs()), - DeletedCount, - ChunkCount.load(), - NiceBytes(OldTotalSize - m_TotalSize.load(std::memory_order::relaxed)), - NiceBytes(OldTotalSize)); - }); - - { - ZEN_TRACE_CPU("FileCas::CollectGarbage::Filter"); - IterateChunks([&](const IoHash& Hash, uint64_t Size) { - bool KeepThis = false; - CandidateCas[0] = Hash; - GcCtx.FilterCids(CandidateCas, [&](const IoHash& Hash) { - ZEN_UNUSED(Hash); - KeepThis = true; - }); - - if (!KeepThis) - { - ChunksToDelete.push_back(Hash); - ChunksToDeleteBytes.fetch_add(Size); - } - - ++ChunkCount; - ChunkBytes.fetch_add(Size); - }); - } - - // TODO, any entires we did not encounter during our IterateChunks should be removed from the index - - if (ChunksToDelete.empty()) - { - ZEN_DEBUG("gc for '{}' SKIPPED, nothing to delete", m_RootDirectory); - return; - } - - ZEN_DEBUG("deleting file CAS garbage for '{}': {} out of {} chunks ({})", - m_RootDirectory, - ChunksToDelete.size(), - ChunkCount.load(), - NiceBytes(ChunksToDeleteBytes)); - - if (GcCtx.IsDeletionMode() == false) - { - ZEN_DEBUG("NOTE: not actually deleting anything since deletion is disabled"); - - return; - } - - for (const IoHash& Hash : ChunksToDelete) - { - ZEN_TRACE("deleting chunk {}", Hash); - - std::error_code Ec; - DeleteChunk(Hash, Ec); - - if (Ec) - { - ZEN_WARN("gc for '{}' failed to delete file for chunk {}: '{}'", m_RootDirectory, Hash, Ec.message()); - continue; - } - DeletedCount++; - } - - GcCtx.AddDeletedCids(ChunksToDelete); -} - GcStorageSize FileCasStrategy::StorageSize() const { @@ -1834,81 +1739,6 @@ TEST_CASE("cas.file.move") # endif } -TEST_CASE("cas.file.gc") -{ - // specifying an absolute path here can be helpful when using procmon to dig into things - ScopedTemporaryDirectory TempDir; // {"d:\\filecas_testdir"}; - - GcManager Gc; - FileCasStrategy FileCas(Gc); - FileCas.Initialize(TempDir.Path() / "cas", /* IsNewStore */ true); - - const int kIterationCount = 100; - std::vector<IoHash> Keys{kIterationCount}; - - auto InsertChunks = [&] { - for (int i = 0; i < kIterationCount; ++i) - { - CbObjectWriter Cbo; - Cbo << "id" << i; - CbObject Obj = Cbo.Save(); - - IoBuffer ObjBuffer = Obj.GetBuffer().AsIoBuffer(); - IoHash Hash = IoHash::HashBuffer(ObjBuffer); - - FileCas.InsertChunk(ObjBuffer, Hash); - - Keys[i] = Hash; - } - }; - - // Drop everything - - { - InsertChunks(); - - GcContext Ctx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); - FileCas.CollectGarbage(Ctx); - - for (const IoHash& Key : Keys) - { - IoBuffer Chunk = FileCas.FindChunk(Key); - - CHECK(!Chunk); - } - } - - // Keep roughly half of the chunks - - { - InsertChunks(); - - GcContext Ctx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); - - for (const IoHash& Key : Keys) - { - if (Key.Hash[0] & 1) - { - Ctx.AddRetainedCids(std::vector<IoHash>{Key}); - } - } - - FileCas.CollectGarbage(Ctx); - - for (const IoHash& Key : Keys) - { - if (Key.Hash[0] & 1) - { - CHECK(FileCas.FindChunk(Key)); - } - else - { - CHECK(!FileCas.FindChunk(Key)); - } - } - } -} - #endif void diff --git a/src/zenstore/filecas.h b/src/zenstore/filecas.h index 07fc36954..ff7126325 100644 --- a/src/zenstore/filecas.h +++ b/src/zenstore/filecas.h @@ -44,7 +44,6 @@ struct FileCasStrategy final : public GcStorage, public GcReferenceStore WorkerThreadPool* OptionalWorkerPool); void Flush(); virtual void ScrubStorage(ScrubContext& ScrubCtx) override; - virtual void CollectGarbage(GcContext& GcCtx) override; virtual GcStorageSize StorageSize() const override; virtual std::string GetGcName(GcCtx& Ctx) override; diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp index 1276b9b4c..981ba15cb 100644 --- a/src/zenstore/gc.cpp +++ b/src/zenstore/gc.cpp @@ -196,152 +196,6 @@ SaveCompactBinaryObject(const fs::path& Path, const CbObject& Object) ////////////////////////////////////////////////////////////////////////// -struct GcContext::GcState -{ - using CacheKeyContexts = std::unordered_map<std::string, std::vector<IoHash>>; - - CacheKeyContexts m_ExpiredCacheKeys; - HashKeySet m_RetainedCids; - HashKeySet m_DeletedCids; - GcClock::TimePoint m_CacheExpireTime; - GcClock::TimePoint m_ProjectStoreExpireTime; - bool m_DeletionMode = true; - bool m_CollectSmallObjects = false; - bool m_SkipCid = false; - - std::filesystem::path DiskReservePath; -}; - -GcContext::GcContext(const GcClock::TimePoint& CacheExpireTime, const GcClock::TimePoint& ProjectStoreExpireTime) -: m_State(std::make_unique<GcState>()) -{ - m_State->m_CacheExpireTime = CacheExpireTime; - m_State->m_ProjectStoreExpireTime = ProjectStoreExpireTime; -} - -GcContext::~GcContext() -{ -} - -void -GcContext::AddRetainedCids(std::span<const IoHash> Cids) -{ - m_State->m_RetainedCids.AddHashesToSet(Cids); -} - -void -GcContext::SetExpiredCacheKeys(const std::string& CacheKeyContext, std::vector<IoHash>&& ExpiredKeys) -{ - m_State->m_ExpiredCacheKeys[CacheKeyContext] = std::move(ExpiredKeys); -} - -void -GcContext::IterateCids(std::function<void(const IoHash&)> Callback) -{ - m_State->m_RetainedCids.IterateHashes([&](const IoHash& Hash) { Callback(Hash); }); -} - -void -GcContext::FilterCids(std::span<const IoHash> Cid, std::function<void(const IoHash&)> KeepFunc) -{ - m_State->m_RetainedCids.FilterHashes(Cid, [&](const IoHash& Hash) { KeepFunc(Hash); }); -} - -void -GcContext::FilterCids(std::span<const IoHash> Cid, std::function<void(const IoHash&, bool)>&& FilterFunc) -{ - m_State->m_RetainedCids.FilterHashes(Cid, std::move(FilterFunc)); -} - -void -GcContext::AddDeletedCids(std::span<const IoHash> Cas) -{ - m_State->m_DeletedCids.AddHashesToSet(Cas); -} - -const HashKeySet& -GcContext::DeletedCids() -{ - return m_State->m_DeletedCids; -} - -std::span<const IoHash> -GcContext::ExpiredCacheKeys(const std::string& CacheKeyContext) const -{ - return m_State->m_ExpiredCacheKeys[CacheKeyContext]; -} - -bool -GcContext::SkipCid() const -{ - return m_State->m_SkipCid; -} - -void -GcContext::SetSkipCid(bool NewState) -{ - m_State->m_SkipCid = NewState; -} - -bool -GcContext::IsDeletionMode() const -{ - return m_State->m_DeletionMode; -} - -void -GcContext::SetDeletionMode(bool NewState) -{ - m_State->m_DeletionMode = NewState; -} - -bool -GcContext::CollectSmallObjects() const -{ - return m_State->m_CollectSmallObjects; -} - -void -GcContext::CollectSmallObjects(bool NewState) -{ - m_State->m_CollectSmallObjects = NewState; -} - -GcClock::TimePoint -GcContext::CacheExpireTime() const -{ - return m_State->m_CacheExpireTime; -} - -GcClock::TimePoint -GcContext::ProjectStoreExpireTime() const -{ - return m_State->m_ProjectStoreExpireTime; -} - -void -GcContext::DiskReservePath(const std::filesystem::path& Path) -{ - m_State->DiskReservePath = Path; -} - -uint64_t -GcContext::ClaimGCReserve() -{ - if (!std::filesystem::is_regular_file(m_State->DiskReservePath)) - { - return 0; - } - uint64_t ReclaimedSize = std::filesystem::file_size(m_State->DiskReservePath); - if (std::filesystem::remove(m_State->DiskReservePath)) - { - return ReclaimedSize; - } - return 0; -} - -////////////////////////////////////////////////////////////////////////// - GcManager::GcManager() : m_Log(logging::Get("gc")) { } @@ -1287,20 +1141,6 @@ GcManager::SetCancelGC(bool CancelFlag) } void -GcManager::AddGcContributor(GcContributor* Contributor) -{ - RwLock::ExclusiveLockScope _(m_Lock); - m_GcContribs.push_back(Contributor); -} - -void -GcManager::RemoveGcContributor(GcContributor* Contributor) -{ - RwLock::ExclusiveLockScope _(m_Lock); - std::erase_if(m_GcContribs, [&](GcContributor* $) { return $ == Contributor; }); -} - -void GcManager::AddGcStorage(GcStorage* Storage) { ZEN_ASSERT(Storage != nullptr); @@ -1327,58 +1167,6 @@ GcManager::ScrubStorage(ScrubContext& GcCtx) } GcStorageSize -GcManager::CollectGarbage(GcContext& GcCtx) -{ - ZEN_TRACE_CPU("Gc::CollectGarbage"); - - GcStorageSize GCTotalSizeDiff; - - RwLock::SharedLockScope _(m_Lock); - - // First gather reference set - { - ZEN_TRACE_CPU("Gc::CollectGarbage::GatherReferences"); - Stopwatch Timer; - const auto Guard = MakeGuard([&] { ZEN_INFO("gathered references in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); - for (GcContributor* Contributor : m_GcContribs) - { - if (CheckGCCancel()) - { - return GCTotalSizeDiff; - } - Contributor->GatherReferences(GcCtx); - } - } - - // Then trim storage - { - ZEN_TRACE_CPU("Gc::CollectGarbage::CollectGarbage"); - - Stopwatch Timer; - const auto Guard = MakeGuard([&] { - ZEN_INFO("collected garbage in {}. Removed {} disk space, {} memory", - NiceTimeSpanMs(Timer.GetElapsedTimeMs()), - NiceBytes(GCTotalSizeDiff.DiskSize), - NiceBytes(GCTotalSizeDiff.MemorySize)); - }); - for (GcStorage* Storage : m_GcStorage) - { - if (CheckGCCancel()) - { - break; - } - - const auto PreSize = Storage->StorageSize(); - Storage->CollectGarbage(GcCtx); - const auto PostSize = Storage->StorageSize(); - GCTotalSizeDiff.DiskSize += PreSize.DiskSize > PostSize.DiskSize ? PreSize.DiskSize - PostSize.DiskSize : 0; - GCTotalSizeDiff.MemorySize += PreSize.MemorySize > PostSize.MemorySize ? PreSize.MemorySize - PostSize.MemorySize : 0; - } - } - return GCTotalSizeDiff; -} - -GcStorageSize GcManager::TotalStorageSize() const { ZEN_TRACE_CPU("Gc::TotalStorageSize"); @@ -2319,14 +2107,10 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, try { - GcContext GcCtx(CacheExpireTime, ProjectStoreExpireTime); - GcCtx.SetDeletionMode(Delete); - GcCtx.SetSkipCid(SkipCid); - GcCtx.CollectSmallObjects(CollectSmallObjects); - GcCtx.DiskReservePath(m_Config.RootDirectory / "reserve.gc"); + const std::filesystem::path DiskReservePath = m_Config.RootDirectory / "reserve.gc"; auto ReclaimDiskReserve = [&]() { - std::error_code Ec = CreateGCReserve(m_Config.RootDirectory / "reserve.gc", m_Config.DiskReserveSize); + std::error_code Ec = CreateGCReserve(DiskReservePath, m_Config.DiskReserveSize); if (Ec) { ZEN_WARN("unable to create GC reserve at '{}' with size {}, reason: '{}'", @@ -2344,7 +2128,20 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, if (m_AreDiskWritesBlocked.load()) { // We are low on disk, check if we can release our extra storage reserve, if we can't bail from doing GC - uint64_t ReleasedSpace = GcCtx.ClaimGCReserve(); + auto ClaimDiskReserve = [&]() -> uint64_t { + if (!std::filesystem::is_regular_file(DiskReservePath)) + { + return 0; + } + uint64_t ReclaimedSize = std::filesystem::file_size(DiskReservePath); + if (std::filesystem::remove(DiskReservePath)) + { + return ReclaimedSize; + } + return 0; + }; + + uint64_t ReleasedSpace = ClaimDiskReserve(); if (ReleasedSpace == 0) { ZEN_WARN( @@ -2364,23 +2161,8 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, GcStorageSize Diff; switch (UseGCVersion) { - case GcVersion::kV1: - ZEN_INFO( - "GCV1: Garbage collection STARTING, small objects gc {}, {} CAS. Cache cutoff time {}, project store cutoff time " - "{}", - GcCtx.CollectSmallObjects() ? "ENABLED"sv : "DISABLED"sv, - SkipCid ? "skip"sv : "include"sv, - CacheExpireTime, - ProjectStoreExpireTime); - Diff = m_GcManager.CollectGarbage(GcCtx); - if (SkipCid) - { - m_LastLightweightGCV2Result.reset(); - } - else - { - m_LastFullGCV2Result.reset(); - } + case GcVersion::kV1_Deprecated: + ZEN_WARN("GCV1: Depreated - no GC will be executed"); break; case GcVersion::kV2: { @@ -2394,7 +2176,7 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, .Verbose = Verbose, .SingleThread = SingleThreaded, .CompactBlockUsageThresholdPercent = CompactBlockUsageThresholdPercent, - .DiskReservePath = m_Config.RootDirectory / "reserve.gc", + .DiskReservePath = DiskReservePath, .AttachmentRangeMin = AttachmentRangeMin, .AttachmentRangeMax = AttachmentRangeMax, .StoreCacheAttachmentMetaData = StoreCacheAttachmentMetaData, @@ -2609,248 +2391,8 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, #if ZEN_WITH_TESTS -namespace gc::impl { - static CompressedBuffer Compress(IoBuffer Buffer) - { - return CompressedBuffer::Compress(SharedBuffer::MakeView(Buffer.GetData(), Buffer.GetSize())); - } -} // namespace gc::impl - -TEST_CASE("gc.basic") -{ - using namespace gc::impl; - - ScopedTemporaryDirectory TempDir; - - CidStoreConfiguration CasConfig; - CasConfig.RootDirectory = TempDir.Path() / "cas"; - - GcManager Gc; - CidStore CidStore(Gc); - - CidStore.Initialize(CasConfig); - - IoBuffer Chunk = CreateRandomBlob(128); - auto CompressedChunk = Compress(Chunk); - - const auto InsertResult = CidStore.AddChunk(CompressedChunk.GetCompressed().Flatten().AsIoBuffer(), CompressedChunk.DecodeRawHash()); - CHECK(InsertResult.New); - - GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); - GcCtx.CollectSmallObjects(true); - - CidStore.Flush(); - Gc.CollectGarbage(GcCtx); - - CHECK(!CidStore.ContainsChunk(CompressedChunk.DecodeRawHash())); -} - -TEST_CASE("gc.full") -{ - using namespace gc::impl; - - ScopedTemporaryDirectory TempDir; - - CidStoreConfiguration CasConfig; - CasConfig.RootDirectory = TempDir.Path() / "cas"; - - GcManager Gc; - std::unique_ptr<CasStore> CasStore = CreateCasStore(Gc); - - CasStore->Initialize(CasConfig); - - uint64_t ChunkSizes[9] = {128, 541, 1023, 781, 218, 37, 4, 997, 5}; - IoBuffer Chunks[9] = {CreateRandomBlob(ChunkSizes[0]), - CreateRandomBlob(ChunkSizes[1]), - CreateRandomBlob(ChunkSizes[2]), - CreateRandomBlob(ChunkSizes[3]), - CreateRandomBlob(ChunkSizes[4]), - CreateRandomBlob(ChunkSizes[5]), - CreateRandomBlob(ChunkSizes[6]), - CreateRandomBlob(ChunkSizes[7]), - CreateRandomBlob(ChunkSizes[8])}; - IoHash ChunkHashes[9] = { - IoHash::HashBuffer(Chunks[0].Data(), Chunks[0].Size()), - IoHash::HashBuffer(Chunks[1].Data(), Chunks[1].Size()), - IoHash::HashBuffer(Chunks[2].Data(), Chunks[2].Size()), - IoHash::HashBuffer(Chunks[3].Data(), Chunks[3].Size()), - IoHash::HashBuffer(Chunks[4].Data(), Chunks[4].Size()), - IoHash::HashBuffer(Chunks[5].Data(), Chunks[5].Size()), - IoHash::HashBuffer(Chunks[6].Data(), Chunks[6].Size()), - IoHash::HashBuffer(Chunks[7].Data(), Chunks[7].Size()), - IoHash::HashBuffer(Chunks[8].Data(), Chunks[8].Size()), - }; - - CasStore->InsertChunk(Chunks[0], ChunkHashes[0]); - CasStore->InsertChunk(Chunks[1], ChunkHashes[1]); - CasStore->InsertChunk(Chunks[2], ChunkHashes[2]); - CasStore->InsertChunk(Chunks[3], ChunkHashes[3]); - CasStore->InsertChunk(Chunks[4], ChunkHashes[4]); - CasStore->InsertChunk(Chunks[5], ChunkHashes[5]); - CasStore->InsertChunk(Chunks[6], ChunkHashes[6]); - CasStore->InsertChunk(Chunks[7], ChunkHashes[7]); - CasStore->InsertChunk(Chunks[8], ChunkHashes[8]); - - CidStoreSize InitialSize = CasStore->TotalSize(); - - // Keep first and last - { - GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); - GcCtx.CollectSmallObjects(true); - - std::vector<IoHash> KeepChunks; - KeepChunks.push_back(ChunkHashes[0]); - KeepChunks.push_back(ChunkHashes[8]); - GcCtx.AddRetainedCids(KeepChunks); - - CasStore->Flush(); - Gc.CollectGarbage(GcCtx); - - CHECK(CasStore->ContainsChunk(ChunkHashes[0])); - CHECK(!CasStore->ContainsChunk(ChunkHashes[1])); - CHECK(!CasStore->ContainsChunk(ChunkHashes[2])); - CHECK(!CasStore->ContainsChunk(ChunkHashes[3])); - CHECK(!CasStore->ContainsChunk(ChunkHashes[4])); - CHECK(!CasStore->ContainsChunk(ChunkHashes[5])); - CHECK(!CasStore->ContainsChunk(ChunkHashes[6])); - CHECK(!CasStore->ContainsChunk(ChunkHashes[7])); - CHECK(CasStore->ContainsChunk(ChunkHashes[8])); - - CHECK(ChunkHashes[0] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[0]))); - CHECK(ChunkHashes[8] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[8]))); - } - - CasStore->InsertChunk(Chunks[1], ChunkHashes[1]); - CasStore->InsertChunk(Chunks[2], ChunkHashes[2]); - CasStore->InsertChunk(Chunks[3], ChunkHashes[3]); - CasStore->InsertChunk(Chunks[4], ChunkHashes[4]); - CasStore->InsertChunk(Chunks[5], ChunkHashes[5]); - CasStore->InsertChunk(Chunks[6], ChunkHashes[6]); - CasStore->InsertChunk(Chunks[7], ChunkHashes[7]); - - // Keep last - { - GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); - GcCtx.CollectSmallObjects(true); - std::vector<IoHash> KeepChunks; - KeepChunks.push_back(ChunkHashes[8]); - GcCtx.AddRetainedCids(KeepChunks); - - CasStore->Flush(); - Gc.CollectGarbage(GcCtx); - - CHECK(!CasStore->ContainsChunk(ChunkHashes[0])); - CHECK(!CasStore->ContainsChunk(ChunkHashes[1])); - CHECK(!CasStore->ContainsChunk(ChunkHashes[2])); - CHECK(!CasStore->ContainsChunk(ChunkHashes[3])); - CHECK(!CasStore->ContainsChunk(ChunkHashes[4])); - CHECK(!CasStore->ContainsChunk(ChunkHashes[5])); - CHECK(!CasStore->ContainsChunk(ChunkHashes[6])); - CHECK(!CasStore->ContainsChunk(ChunkHashes[7])); - CHECK(CasStore->ContainsChunk(ChunkHashes[8])); - - CHECK(ChunkHashes[8] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[8]))); - - CasStore->InsertChunk(Chunks[1], ChunkHashes[1]); - CasStore->InsertChunk(Chunks[2], ChunkHashes[2]); - CasStore->InsertChunk(Chunks[3], ChunkHashes[3]); - CasStore->InsertChunk(Chunks[4], ChunkHashes[4]); - CasStore->InsertChunk(Chunks[5], ChunkHashes[5]); - CasStore->InsertChunk(Chunks[6], ChunkHashes[6]); - CasStore->InsertChunk(Chunks[7], ChunkHashes[7]); - } - - // Keep mixed - { - GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); - GcCtx.CollectSmallObjects(true); - std::vector<IoHash> KeepChunks; - KeepChunks.push_back(ChunkHashes[1]); - KeepChunks.push_back(ChunkHashes[4]); - KeepChunks.push_back(ChunkHashes[7]); - GcCtx.AddRetainedCids(KeepChunks); - - CasStore->Flush(); - Gc.CollectGarbage(GcCtx); - - CHECK(!CasStore->ContainsChunk(ChunkHashes[0])); - CHECK(CasStore->ContainsChunk(ChunkHashes[1])); - CHECK(!CasStore->ContainsChunk(ChunkHashes[2])); - CHECK(!CasStore->ContainsChunk(ChunkHashes[3])); - CHECK(CasStore->ContainsChunk(ChunkHashes[4])); - CHECK(!CasStore->ContainsChunk(ChunkHashes[5])); - CHECK(!CasStore->ContainsChunk(ChunkHashes[6])); - CHECK(CasStore->ContainsChunk(ChunkHashes[7])); - CHECK(!CasStore->ContainsChunk(ChunkHashes[8])); - - CHECK(ChunkHashes[1] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[1]))); - CHECK(ChunkHashes[4] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[4]))); - CHECK(ChunkHashes[7] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[7]))); - - CasStore->InsertChunk(Chunks[0], ChunkHashes[0]); - CasStore->InsertChunk(Chunks[2], ChunkHashes[2]); - CasStore->InsertChunk(Chunks[3], ChunkHashes[3]); - CasStore->InsertChunk(Chunks[5], ChunkHashes[5]); - CasStore->InsertChunk(Chunks[6], ChunkHashes[6]); - CasStore->InsertChunk(Chunks[8], ChunkHashes[8]); - } - - // Keep multiple at end - { - GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); - GcCtx.CollectSmallObjects(true); - std::vector<IoHash> KeepChunks; - KeepChunks.push_back(ChunkHashes[6]); - KeepChunks.push_back(ChunkHashes[7]); - KeepChunks.push_back(ChunkHashes[8]); - GcCtx.AddRetainedCids(KeepChunks); - - CasStore->Flush(); - Gc.CollectGarbage(GcCtx); - - CHECK(!CasStore->ContainsChunk(ChunkHashes[0])); - CHECK(!CasStore->ContainsChunk(ChunkHashes[1])); - CHECK(!CasStore->ContainsChunk(ChunkHashes[2])); - CHECK(!CasStore->ContainsChunk(ChunkHashes[3])); - CHECK(!CasStore->ContainsChunk(ChunkHashes[4])); - CHECK(!CasStore->ContainsChunk(ChunkHashes[5])); - CHECK(CasStore->ContainsChunk(ChunkHashes[6])); - CHECK(CasStore->ContainsChunk(ChunkHashes[7])); - CHECK(CasStore->ContainsChunk(ChunkHashes[8])); - - CHECK(ChunkHashes[6] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[6]))); - CHECK(ChunkHashes[7] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[7]))); - CHECK(ChunkHashes[8] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[8]))); - - CasStore->InsertChunk(Chunks[0], ChunkHashes[0]); - CasStore->InsertChunk(Chunks[1], ChunkHashes[1]); - CasStore->InsertChunk(Chunks[2], ChunkHashes[2]); - CasStore->InsertChunk(Chunks[3], ChunkHashes[3]); - CasStore->InsertChunk(Chunks[4], ChunkHashes[4]); - CasStore->InsertChunk(Chunks[5], ChunkHashes[5]); - } - - // Verify that we nicely appended blocks even after all GC operations - CHECK(ChunkHashes[0] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[0]))); - CHECK(ChunkHashes[1] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[1]))); - CHECK(ChunkHashes[2] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[2]))); - CHECK(ChunkHashes[3] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[3]))); - CHECK(ChunkHashes[4] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[4]))); - CHECK(ChunkHashes[5] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[5]))); - CHECK(ChunkHashes[6] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[6]))); - CHECK(ChunkHashes[7] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[7]))); - CHECK(ChunkHashes[8] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[8]))); - - auto FinalSize = CasStore->TotalSize(); - - CHECK_LE(InitialSize.TinySize, FinalSize.TinySize); - CHECK_GE(InitialSize.TinySize + (1u << 28), FinalSize.TinySize); -} - TEST_CASE("gc.diskusagewindow") { - using namespace gc::impl; - DiskUsageWindow Stats; Stats.Append({.SampleTime = 0, .DiskUsage = 0}); // 0 0 Stats.Append({.SampleTime = 10, .DiskUsage = 10}); // 1 10 @@ -3008,35 +2550,6 @@ TEST_CASE("gc.diskusagewindow") } } -TEST_CASE("scrub.basic") -{ - using namespace gc::impl; - - ScopedTemporaryDirectory TempDir; - - CidStoreConfiguration CasConfig; - CasConfig.RootDirectory = TempDir.Path() / "cas"; - - GcManager Gc; - CidStore CidStore(Gc); - - CidStore.Initialize(CasConfig); - - IoBuffer Chunk = CreateRandomBlob(128); - auto CompressedChunk = Compress(Chunk); - - const auto InsertResult = CidStore.AddChunk(CompressedChunk.GetCompressed().Flatten().AsIoBuffer(), CompressedChunk.DecodeRawHash()); - CHECK(InsertResult.New); - - GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); - GcCtx.CollectSmallObjects(true); - - CidStore.Flush(); - Gc.CollectGarbage(GcCtx); - - CHECK(!CidStore.ContainsChunk(CompressedChunk.DecodeRawHash())); -} - TEST_CASE("gc.keepunusedreferences") { // Order is important, this is the order the hashes would be sorted by FilterReferences diff --git a/src/zenstore/include/zenstore/blockstore.h b/src/zenstore/include/zenstore/blockstore.h index a98ca5375..d4c2be73f 100644 --- a/src/zenstore/include/zenstore/blockstore.h +++ b/src/zenstore/include/zenstore/blockstore.h @@ -127,13 +127,12 @@ public: typedef std::vector<std::pair<size_t, BlockStoreLocation>> MovedChunksArray; typedef std::vector<size_t> ChunkIndexArray; - typedef std::function<void(const MovedChunksArray& MovedChunks, const ChunkIndexArray& RemovedChunks)> ReclaimCallback; - typedef std::function<bool(const MovedChunksArray& MovedChunks, uint64_t FreedDiskSpace)> CompactCallback; - typedef std::function<uint64_t()> ClaimDiskReserveCallback; - typedef std::function<bool(size_t ChunkIndex, const void* Data, uint64_t Size)> IterateChunksSmallSizeCallback; - typedef std::function<bool(size_t ChunkIndex, BlockStoreFile& File, uint64_t Offset, uint64_t Size)> IterateChunksLargeSizeCallback; - typedef std::function<void(const BlockStoreLocation& Location)> WriteChunkCallback; - typedef std::function<bool(uint32_t BlockIndex, std::span<const size_t> ChunkIndexes)> IterateChunksCallback; + typedef std::function<bool(const MovedChunksArray& MovedChunks, uint64_t FreedDiskSpace)> CompactCallback; + typedef std::function<uint64_t()> ClaimDiskReserveCallback; + typedef std::function<bool(size_t ChunkIndex, const void* Data, uint64_t Size)> IterateChunksSmallSizeCallback; + typedef std::function<bool(size_t ChunkIndex, BlockStoreFile& File, uint64_t Offset, uint64_t Size)> IterateChunksLargeSizeCallback; + typedef std::function<void(const BlockStoreLocation& Location)> WriteChunkCallback; + typedef std::function<bool(uint32_t BlockIndex, std::span<const size_t> ChunkIndexes)> IterateChunksCallback; struct BlockUsageInfo { @@ -162,16 +161,6 @@ public: IoBuffer TryGetChunk(const BlockStoreLocation& Location) const; void Flush(bool ForceNewBlock); - ReclaimSnapshotState GetReclaimSnapshotState(); - void ReclaimSpace( - const ReclaimSnapshotState& Snapshot, - const std::vector<BlockStoreLocation>& ChunkLocations, - const ChunkIndexArray& KeepChunkIndexes, - uint32_t PayloadAlignment, - bool DryRun, - const ReclaimCallback& ChangeCallback = [](const MovedChunksArray&, const ChunkIndexArray&) {}, - const ClaimDiskReserveCallback& DiskReserveCallback = []() { return 0; }); - bool IterateChunks(const std::span<const BlockStoreLocation>& ChunkLocations, const IterateChunksCallback& Callback); bool IterateBlock(std::span<const BlockStoreLocation> ChunkLocations, diff --git a/src/zenstore/include/zenstore/cache/cachedisklayer.h b/src/zenstore/include/zenstore/cache/cachedisklayer.h index f38776b6e..f8ce8641c 100644 --- a/src/zenstore/include/zenstore/cache/cachedisklayer.h +++ b/src/zenstore/include/zenstore/cache/cachedisklayer.h @@ -183,8 +183,6 @@ public: bool DropBucket(std::string_view Bucket); void Flush(); void ScrubStorage(ScrubContext& Ctx); - void GatherReferences(GcContext& GcCtx); - void CollectGarbage(GcContext& GcCtx); void DiscoverBuckets(); GcStorageSize StorageSize() const; @@ -231,8 +229,6 @@ public: bool Drop(); void Flush(); void ScrubStorage(ScrubContext& Ctx); - void GatherReferences(GcContext& GcCtx); - void CollectGarbage(GcContext& GcCtx); RwLock::SharedLockScope GetGcReferencerLock(); bool GetReferences(GcCtx& Ctx, bool StateIsAlreadyLocked, std::vector<IoHash>& OutReferences); diff --git a/src/zenstore/include/zenstore/cache/structuredcachestore.h b/src/zenstore/include/zenstore/cache/structuredcachestore.h index 9160db667..50e40042a 100644 --- a/src/zenstore/include/zenstore/cache/structuredcachestore.h +++ b/src/zenstore/include/zenstore/cache/structuredcachestore.h @@ -49,7 +49,7 @@ class JobQueue; */ -class ZenCacheNamespace final : public GcStorage, public GcContributor +class ZenCacheNamespace final : public GcStorage { public: struct Configuration @@ -104,12 +104,8 @@ public: bool Drop(); void Flush(); - // GcContributor - virtual void GatherReferences(GcContext& GcCtx) override; - // GcStorage virtual void ScrubStorage(ScrubContext& ScrubCtx) override; - virtual void CollectGarbage(GcContext& GcCtx) override; virtual GcStorageSize StorageSize() const override; Configuration GetConfig() const { return m_Configuration; } diff --git a/src/zenstore/include/zenstore/gc.h b/src/zenstore/include/zenstore/gc.h index b79f1b9df..e191a0930 100644 --- a/src/zenstore/include/zenstore/gc.h +++ b/src/zenstore/include/zenstore/gc.h @@ -283,64 +283,6 @@ public: //////// End GC V2 -/** Garbage Collection context object - */ -class GcContext -{ -public: - GcContext(const GcClock::TimePoint& CacheExpireTime, const GcClock::TimePoint& ProjectStoreExpireTime); - ~GcContext(); - - void AddRetainedCids(std::span<const IoHash> Cid); - void SetExpiredCacheKeys(const std::string& CacheKeyContext, std::vector<IoHash>&& ExpiredKeys); - - void IterateCids(std::function<void(const IoHash&)> Callback); - - void FilterCids(std::span<const IoHash> Cid, std::function<void(const IoHash&)> KeepFunc); - void FilterCids(std::span<const IoHash> Cid, std::function<void(const IoHash&, bool)>&& FilterFunc); - - void AddDeletedCids(std::span<const IoHash> Cas); - const HashKeySet& DeletedCids(); - - std::span<const IoHash> ExpiredCacheKeys(const std::string& CacheKeyContext) const; - - bool SkipCid() const; - void SetSkipCid(bool NewState); - - bool IsDeletionMode() const; - void SetDeletionMode(bool NewState); - - bool CollectSmallObjects() const; - void CollectSmallObjects(bool NewState); - - GcClock::TimePoint CacheExpireTime() const; - GcClock::TimePoint ProjectStoreExpireTime() const; - - void DiskReservePath(const std::filesystem::path& Path); - uint64_t ClaimGCReserve(); - -private: - struct GcState; - - std::unique_ptr<GcState> m_State; -}; - -/** GC root contributor - - Higher level data structures provide roots for the garbage collector, - which ultimately determine what is garbage and what data we need to - retain. - - */ -class GcContributor -{ -public: - virtual void GatherReferences(GcContext& GcCtx) = 0; - -protected: - virtual ~GcContributor() {} -}; - struct GcStorageSize { uint64_t DiskSize{}; @@ -353,7 +295,6 @@ class GcStorage { public: virtual void ScrubStorage(ScrubContext& ScrubCtx) = 0; - virtual void CollectGarbage(GcContext& GcCtx) = 0; virtual GcStorageSize StorageSize() const = 0; protected: @@ -392,14 +333,10 @@ public: //////// End GC V2 - void AddGcContributor(GcContributor* Contributor); - void RemoveGcContributor(GcContributor* Contributor); - void AddGcStorage(GcStorage* Contributor); void RemoveGcStorage(GcStorage* Contributor); - GcStorageSize CollectGarbage(GcContext& GcCtx); - void ScrubStorage(ScrubContext& GcCtx); + void ScrubStorage(ScrubContext& GcCtx); GcStorageSize TotalStorageSize() const; @@ -407,14 +344,13 @@ public: void SetDiskWriteBlocker(const DiskWriteBlocker* Monitor) { m_DiskWriteBlocker = Monitor; } private: - bool CheckGCCancel() { return m_CancelGC.load(); } - LoggerRef Log() { return m_Log; } - LoggerRef m_Log; - mutable RwLock m_Lock; - std::vector<GcContributor*> m_GcContribs; - std::vector<GcStorage*> m_GcStorage; - CidStore* m_CidStore = nullptr; - const DiskWriteBlocker* m_DiskWriteBlocker = nullptr; + bool CheckGCCancel() { return m_CancelGC.load(); } + LoggerRef Log() { return m_Log; } + LoggerRef m_Log; + mutable RwLock m_Lock; + std::vector<GcStorage*> m_GcStorage; + CidStore* m_CidStore = nullptr; + const DiskWriteBlocker* m_DiskWriteBlocker = nullptr; std::vector<GcReferencer*> m_GcReferencers; std::vector<GcReferenceLocker*> m_GcReferencerLockers; @@ -432,7 +368,7 @@ enum class GcSchedulerStatus : uint32_t enum class GcVersion : uint32_t { - kV1, + kV1_Deprecated, kV2 }; @@ -449,7 +385,7 @@ struct GcSchedulerConfig uint64_t DiskSizeSoftLimit = 0; uint64_t MinimumFreeDiskSpaceToAllowWrites = 1ul << 28; std::chrono::seconds LightweightInterval{}; - GcVersion UseGCVersion = GcVersion::kV1; + GcVersion UseGCVersion = GcVersion::kV2; uint32_t CompactBlockUsageThresholdPercent = 90; bool Verbose = false; bool SingleThreaded = false; |