aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2024-10-03 16:42:57 +0200
committerGitHub Enterprise <[email protected]>2024-10-03 16:42:57 +0200
commitb13b5f48bb497aaf9f9f3d74aceb6e474cf12898 (patch)
tree24b1ed63ece11fb773a0ecf41ce6308969468198 /src
parent5.5.9-pre0 (diff)
downloadzen-b13b5f48bb497aaf9f9f3d74aceb6e474cf12898.tar.xz
zen-b13b5f48bb497aaf9f9f3d74aceb6e474cf12898.zip
remove gc v1 (#121)
* kill gc v1 * block use of gc v1 from zen command line * warn and flip to gcv2 if --gc-v2=false is specified for zenserver
Diffstat (limited to 'src')
-rw-r--r--src/zen/cmds/admin_cmd.cpp14
-rw-r--r--src/zenserver/admin/admin.cpp4
-rw-r--r--src/zenserver/config.cpp5
-rw-r--r--src/zenserver/projectstore/projectstore.cpp286
-rw-r--r--src/zenserver/projectstore/projectstore.h6
-rw-r--r--src/zenserver/zenserver.cpp2
-rw-r--r--src/zenstore/blockstore.cpp648
-rw-r--r--src/zenstore/cache/cachedisklayer.cpp504
-rw-r--r--src/zenstore/cache/structuredcachestore.cpp300
-rw-r--r--src/zenstore/cas.cpp11
-rw-r--r--src/zenstore/cas.h2
-rw-r--r--src/zenstore/compactcas.cpp639
-rw-r--r--src/zenstore/compactcas.h1
-rw-r--r--src/zenstore/filecas.cpp170
-rw-r--r--src/zenstore/filecas.h1
-rw-r--r--src/zenstore/gc.cpp525
-rw-r--r--src/zenstore/include/zenstore/blockstore.h23
-rw-r--r--src/zenstore/include/zenstore/cache/cachedisklayer.h4
-rw-r--r--src/zenstore/include/zenstore/cache/structuredcachestore.h6
-rw-r--r--src/zenstore/include/zenstore/gc.h84
20 files changed, 115 insertions, 3120 deletions
diff --git a/src/zen/cmds/admin_cmd.cpp b/src/zen/cmds/admin_cmd.cpp
index d7fc1710d..3b24b9078 100644
--- a/src/zen/cmds/admin_cmd.cpp
+++ b/src/zen/cmds/admin_cmd.cpp
@@ -100,8 +100,12 @@ GcCommand::GcCommand()
"Max disk usage size (in bytes)",
cxxopts::value(m_DiskSizeSoftLimit)->default_value("0"),
"<disksizesoftlimit>");
- m_Options
- .add_option("", "", "usegcv1", "Force use of GC version 1", cxxopts::value(m_ForceUseGCV1)->default_value("false"), "<usegcv2>");
+ m_Options.add_option("",
+ "",
+ "usegcv1",
+ "Force use of GC version 1. Deprecated, will do nothing.",
+ cxxopts::value(m_ForceUseGCV1)->default_value("false"),
+ "<usegcv1>");
m_Options
.add_option("", "", "usegcv2", "Force use of GC version 2", cxxopts::value(m_ForceUseGCV2)->default_value("false"), "<usegcv2>");
m_Options.add_option("",
@@ -182,11 +186,7 @@ GcCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
Params.Add({"skipdelete", m_SkipDelete ? "true" : "false"});
if (m_ForceUseGCV1)
{
- if (m_ForceUseGCV2)
- {
- throw OptionParseException("only usegcv1 or usegcv2 can be selected, not both");
- }
- Params.Add({"forceusegcv1", "true"});
+ throw OptionParseException("usegcv1 is deprecated and can no longer be used");
}
if (m_ForceUseGCV2)
{
diff --git a/src/zenserver/admin/admin.cpp b/src/zenserver/admin/admin.cpp
index eb49bac51..f6311cf3e 100644
--- a/src/zenserver/admin/admin.cpp
+++ b/src/zenserver/admin/admin.cpp
@@ -287,7 +287,7 @@ HttpAdminService::HttpAdminService(GcScheduler& Scheduler,
Response << "DiskSizeSoftLimit" << NiceBytes(State.Config.DiskSizeSoftLimit);
Response << "MinimumFreeDiskSpaceToAllowWrites" << NiceBytes(State.Config.MinimumFreeDiskSpaceToAllowWrites);
Response << "LightweightInterval" << ToTimeSpan(State.Config.LightweightInterval);
- Response << "UseGCVersion" << ((State.Config.UseGCVersion == GcVersion::kV1) ? "1" : "2");
+ Response << "UseGCVersion" << ((State.Config.UseGCVersion == GcVersion::kV1_Deprecated) ? "1" : "2");
Response << "CompactBlockUsageThresholdPercent" << State.Config.CompactBlockUsageThresholdPercent;
Response << "Verbose" << State.Config.Verbose;
Response << "SingleThreaded" << State.Config.SingleThreaded;
@@ -396,7 +396,7 @@ HttpAdminService::HttpAdminService(GcScheduler& Scheduler,
if (auto Param = Params.GetValue("forceusegcv1"); Param.empty() == false)
{
- GcParams.ForceGCVersion = GcVersion::kV1;
+ GcParams.ForceGCVersion = GcVersion::kV1_Deprecated;
}
if (auto Param = Params.GetValue("forceusegcv2"); Param.empty() == false)
diff --git a/src/zenserver/config.cpp b/src/zenserver/config.cpp
index 7466255a9..cda5aca16 100644
--- a/src/zenserver/config.cpp
+++ b/src/zenserver/config.cpp
@@ -171,6 +171,11 @@ ValidateOptions(ZenServerOptions& ServerOptions)
throw zen::OptionParseException(
fmt::format("GC attachment pass count can not be larger than {}", ZenGcConfig::GcMaxAttachmentPassCount));
}
+ if (ServerOptions.GcConfig.UseGCV2 == false)
+ {
+ ZEN_WARN("--gc-v2=false is deprecated, reverting to --gc-v2=true");
+ ServerOptions.GcConfig.UseGCV2 = true;
+ }
}
UpstreamCachePolicy
diff --git a/src/zenserver/projectstore/projectstore.cpp b/src/zenserver/projectstore/projectstore.cpp
index 4a943a565..6dbdb7029 100644
--- a/src/zenserver/projectstore/projectstore.cpp
+++ b/src/zenserver/projectstore/projectstore.cpp
@@ -1082,30 +1082,6 @@ ProjectStore::Oplog::ScrubStorage(ScrubContext& Ctx)
}
}
-void
-ProjectStore::Oplog::GatherReferences(GcContext& GcCtx)
-{
- ZEN_TRACE_CPU("Store::Oplog::GatherReferences");
- if (GcCtx.SkipCid())
- {
- return;
- }
-
- std::vector<IoHash> Cids;
- Cids.reserve(1024);
- IterateOplog(
- [&](CbObjectView Op) {
- Op.IterateAttachments([&](CbFieldView Visitor) { Cids.emplace_back(Visitor.AsAttachment()); });
- if (Cids.size() >= 1024)
- {
- GcCtx.AddRetainedCids(Cids);
- Cids.clear();
- }
- },
- Paging{});
- GcCtx.AddRetainedCids(Cids);
-}
-
uint64_t
ProjectStore::Oplog::TotalSize(const std::filesystem::path& BasePath)
{
@@ -3132,43 +3108,6 @@ ProjectStore::Project::ScrubStorage(ScrubContext& Ctx)
});
}
-void
-ProjectStore::Project::GatherReferences(GcContext& GcCtx)
-{
- ZEN_TRACE_CPU("Project::GatherReferences");
-
- Stopwatch Timer;
- const auto Guard = MakeGuard([&] {
- ZEN_DEBUG("gathered references from project store project {} in {}", Identifier, NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
- });
-
- // GatherReferences needs to check all existing oplogs
- std::vector<std::string> OpLogs = ScanForOplogs();
- for (const std::string& OpLogId : OpLogs)
- {
- OpenOplog(OpLogId, /*AllowCompact*/ false);
- }
-
- {
- // Make sure any oplog at least have a last access time so they eventually will be GC:d if not touched
- RwLock::ExclusiveLockScope _(m_LastAccessTimesLock);
- for (const std::string& OpId : OpLogs)
- {
- if (auto It = m_LastAccessTimes.find(OpId); It == m_LastAccessTimes.end())
- {
- m_LastAccessTimes[OpId] = GcClock::TickCount();
- }
- }
- }
-
- IterateOplogs([&](const RwLock::SharedLockScope&, Oplog& Ops) {
- if (!IsExpired(GcCtx.ProjectStoreExpireTime(), Ops))
- {
- Ops.GatherReferences(GcCtx);
- }
- });
-}
-
uint64_t
ProjectStore::Project::TotalSize(const std::filesystem::path& BasePath)
{
@@ -3407,7 +3346,6 @@ ProjectStore::ProjectStore(CidStore& Store, std::filesystem::path BasePath, GcMa
{
ZEN_INFO("initializing project store at '{}'", m_ProjectBasePath);
// m_Log.set_level(spdlog::level::debug);
- m_Gc.AddGcContributor(this);
m_Gc.AddGcStorage(this);
m_Gc.AddGcReferencer(*this);
m_Gc.AddGcReferenceLocker(*this);
@@ -3419,7 +3357,6 @@ ProjectStore::~ProjectStore()
m_Gc.RemoveGcReferenceLocker(*this);
m_Gc.RemoveGcReferencer(*this);
m_Gc.RemoveGcStorage(this);
- m_Gc.RemoveGcContributor(this);
}
std::filesystem::path
@@ -3521,145 +3458,6 @@ ProjectStore::ScrubStorage(ScrubContext& Ctx)
}
}
-void
-ProjectStore::GatherReferences(GcContext& GcCtx)
-{
- ZEN_TRACE_CPU("Store::GatherReferences");
-
- size_t ProjectCount = 0;
- size_t ExpiredProjectCount = 0;
- Stopwatch Timer;
- const auto Guard = MakeGuard([&] {
- ZEN_DEBUG("gathered references from '{}' in {}, found {} active projects and {} expired projects",
- m_ProjectBasePath.string(),
- NiceTimeSpanMs(Timer.GetElapsedTimeMs()),
- ProjectCount,
- ExpiredProjectCount);
- });
-
- DiscoverProjects();
-
- std::vector<Ref<Project>> Projects;
- {
- RwLock::SharedLockScope Lock(m_ProjectsLock);
- Projects.reserve(m_Projects.size());
-
- for (auto& Kv : m_Projects)
- {
- if (Kv.second->IsExpired(GcCtx.ProjectStoreExpireTime()))
- {
- ExpiredProjectCount++;
- continue;
- }
- Projects.push_back(Kv.second);
- }
- }
- ProjectCount = Projects.size();
- for (const Ref<Project>& Project : Projects)
- {
- Project->GatherReferences(GcCtx);
- }
-}
-
-void
-ProjectStore::CollectGarbage(GcContext& GcCtx)
-{
- ZEN_TRACE_CPU("Store::CollectGarbage");
-
- size_t ProjectCount = 0;
- size_t ExpiredProjectCount = 0;
-
- Stopwatch Timer;
- const auto Guard = MakeGuard([&] {
- ZEN_DEBUG("garbage collect from '{}' DONE after {}, found {} active projects and {} expired projects",
- m_ProjectBasePath.string(),
- NiceTimeSpanMs(Timer.GetElapsedTimeMs()),
- ProjectCount,
- ExpiredProjectCount);
- });
- std::vector<Ref<Project>> ExpiredProjects;
- std::vector<Ref<Project>> Projects;
-
- {
- RwLock::SharedLockScope Lock(m_ProjectsLock);
- for (auto& Kv : m_Projects)
- {
- if (Kv.second->IsExpired(GcCtx.ProjectStoreExpireTime()))
- {
- ExpiredProjects.push_back(Kv.second);
- ExpiredProjectCount++;
- continue;
- }
- Projects.push_back(Kv.second);
- ProjectCount++;
- }
- }
-
- if (!GcCtx.IsDeletionMode())
- {
- ZEN_DEBUG("garbage collect DISABLED, for '{}' ", m_ProjectBasePath.string());
- return;
- }
-
- for (const Ref<Project>& Project : Projects)
- {
- std::vector<std::string> ExpiredOplogs;
- {
- Project->IterateOplogs([&GcCtx, &Project, &ExpiredOplogs](const RwLock::SharedLockScope&, ProjectStore::Oplog& Oplog) {
- if (Project->IsExpired(GcCtx.ProjectStoreExpireTime(), Oplog))
- {
- ExpiredOplogs.push_back(Oplog.OplogId());
- }
- });
- }
- for (const std::string& OplogId : ExpiredOplogs)
- {
- ZEN_DEBUG("ProjectStore::CollectGarbage garbage collected oplog '{}' in project '{}'. Removing storage on disk",
- OplogId,
- Project->Identifier);
- (void)Project->DeleteOplog(OplogId);
- }
- Project->Flush();
- }
-
- if (ExpiredProjects.empty())
- {
- ZEN_DEBUG("garbage collect for '{}', no expired projects found", m_ProjectBasePath.string());
- return;
- }
-
- for (const Ref<Project>& Project : ExpiredProjects)
- {
- std::filesystem::path PathToRemove;
- std::string ProjectId;
- {
- if (!Project->IsExpired(GcCtx.ProjectStoreExpireTime()))
- {
- ZEN_DEBUG("ProjectStore::CollectGarbage skipped garbage collect of project '{}'. Project no longer expired.", ProjectId);
- continue;
- }
-
- RwLock::ExclusiveLockScope _(m_ProjectsLock);
- bool Success = Project->PrepareForDelete(PathToRemove);
- if (!Success)
- {
- ZEN_DEBUG("ProjectStore::CollectGarbage skipped garbage collect of project '{}'. Project folder is locked.", ProjectId);
- continue;
- }
- m_Projects.erase(Project->Identifier);
- ProjectId = Project->Identifier;
- }
-
- ZEN_DEBUG("ProjectStore::CollectGarbage garbage collected project '{}'. Removing storage on disk", ProjectId);
- if (PathToRemove.empty())
- {
- continue;
- }
-
- DeleteDirectories(PathToRemove);
- }
-}
-
GcStorageSize
ProjectStore::StorageSize() const
{
@@ -6079,90 +5877,6 @@ TEST_CASE("project.store.gc")
}
}
- SUBCASE("v1")
- {
- {
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
- ProjectStore.GatherReferences(GcCtx);
- size_t RefCount = 0;
- GcCtx.IterateCids([&RefCount](const IoHash&) { RefCount++; });
- CHECK(RefCount == 21);
- ProjectStore.CollectGarbage(GcCtx);
- CHECK(ProjectStore.OpenProject("proj1"sv));
- CHECK(ProjectStore.OpenProject("proj2"sv));
- }
-
- {
- GcContext GcCtx(GcClock::Now() + std::chrono::hours(24), GcClock::Now() + std::chrono::hours(24));
- ProjectStore.GatherReferences(GcCtx);
- size_t RefCount = 0;
- GcCtx.IterateCids([&RefCount](const IoHash&) { RefCount++; });
- CHECK(RefCount == 21);
- ProjectStore.CollectGarbage(GcCtx);
- CHECK(ProjectStore.OpenProject("proj1"sv));
- CHECK(ProjectStore.OpenProject("proj2"sv));
- }
-
- std::filesystem::remove(Project1FilePath);
-
- {
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
- ProjectStore.GatherReferences(GcCtx);
- size_t RefCount = 0;
- GcCtx.IterateCids([&RefCount](const IoHash&) { RefCount++; });
- CHECK(RefCount == 21);
- ProjectStore.CollectGarbage(GcCtx);
- CHECK(ProjectStore.OpenProject("proj1"sv));
- CHECK(ProjectStore.OpenProject("proj2"sv));
- }
-
- {
- GcContext GcCtx(GcClock::Now() + std::chrono::hours(24), GcClock::Now() + std::chrono::hours(24));
- ProjectStore.GatherReferences(GcCtx);
- size_t RefCount = 0;
- GcCtx.IterateCids([&RefCount](const IoHash&) { RefCount++; });
- CHECK(RefCount == 14);
- ProjectStore.CollectGarbage(GcCtx);
- CHECK(!ProjectStore.OpenProject("proj1"sv));
- CHECK(ProjectStore.OpenProject("proj2"sv));
- }
-
- std::filesystem::remove(Project2Oplog1Path);
- {
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
- ProjectStore.GatherReferences(GcCtx);
- size_t RefCount = 0;
- GcCtx.IterateCids([&RefCount](const IoHash&) { RefCount++; });
- CHECK(RefCount == 14);
- ProjectStore.CollectGarbage(GcCtx);
- CHECK(!ProjectStore.OpenProject("proj1"sv));
- CHECK(ProjectStore.OpenProject("proj2"sv));
- }
-
- {
- GcContext GcCtx(GcClock::Now() + std::chrono::hours(24), GcClock::Now() + std::chrono::hours(24));
- ProjectStore.GatherReferences(GcCtx);
- size_t RefCount = 0;
- GcCtx.IterateCids([&RefCount](const IoHash&) { RefCount++; });
- CHECK(RefCount == 7);
- ProjectStore.CollectGarbage(GcCtx);
- CHECK(!ProjectStore.OpenProject("proj1"sv));
- CHECK(ProjectStore.OpenProject("proj2"sv));
- }
-
- std::filesystem::remove(Project2FilePath);
- {
- GcContext GcCtx(GcClock::Now() + std::chrono::hours(24), GcClock::Now() + std::chrono::hours(24));
- ProjectStore.GatherReferences(GcCtx);
- size_t RefCount = 0;
- GcCtx.IterateCids([&RefCount](const IoHash&) { RefCount++; });
- CHECK(RefCount == 0);
- ProjectStore.CollectGarbage(GcCtx);
- CHECK(!ProjectStore.OpenProject("proj1"sv));
- CHECK(!ProjectStore.OpenProject("proj2"sv));
- }
- }
-
SUBCASE("v2")
{
{
diff --git a/src/zenserver/projectstore/projectstore.h b/src/zenserver/projectstore/projectstore.h
index 2552f657f..0a5e71da4 100644
--- a/src/zenserver/projectstore/projectstore.h
+++ b/src/zenserver/projectstore/projectstore.h
@@ -58,7 +58,7 @@ static_assert(IsPow2(sizeof(OplogEntry)));
package data split into separate chunks for bulk data, exports and header
information.
*/
-class ProjectStore : public RefCounted, public GcStorage, public GcContributor, public GcReferencer, public GcReferenceLocker
+class ProjectStore : public RefCounted, public GcStorage, public GcReferencer, public GcReferenceLocker
{
struct OplogStorage;
@@ -145,7 +145,6 @@ public:
LoggerRef Log() { return m_OuterProject->Log(); }
void Flush();
void ScrubStorage(ScrubContext& Ctx);
- void GatherReferences(GcContext& GcCtx);
static uint64_t TotalSize(const std::filesystem::path& BasePath);
uint64_t TotalSize() const;
@@ -298,7 +297,6 @@ public:
void Flush();
void ScrubStorage(ScrubContext& Ctx);
LoggerRef Log() const;
- void GatherReferences(GcContext& GcCtx);
static uint64_t TotalSize(const std::filesystem::path& BasePath);
uint64_t TotalSize() const;
bool PrepareForDelete(std::filesystem::path& OutDeletePath);
@@ -374,9 +372,7 @@ public:
LoggerRef Log() { return m_Log; }
const std::filesystem::path& BasePath() const { return m_ProjectBasePath; }
- virtual void GatherReferences(GcContext& GcCtx) override;
virtual void ScrubStorage(ScrubContext& Ctx) override;
- virtual void CollectGarbage(GcContext& GcCtx) override;
virtual GcStorageSize StorageSize() const override;
virtual std::string GetGcName(GcCtx& Ctx) override;
diff --git a/src/zenserver/zenserver.cpp b/src/zenserver/zenserver.cpp
index 8d8da29b5..124e9ff5f 100644
--- a/src/zenserver/zenserver.cpp
+++ b/src/zenserver/zenserver.cpp
@@ -313,7 +313,7 @@ ZenServer::Initialize(const ZenServerOptions& ServerOptions, ZenServerState::Zen
.DiskSizeSoftLimit = ServerOptions.GcConfig.DiskSizeSoftLimit,
.MinimumFreeDiskSpaceToAllowWrites = ServerOptions.GcConfig.MinimumFreeDiskSpaceToAllowWrites,
.LightweightInterval = std::chrono::seconds(ServerOptions.GcConfig.LightweightIntervalSeconds),
- .UseGCVersion = ServerOptions.GcConfig.UseGCV2 ? GcVersion::kV2 : GcVersion::kV1,
+ .UseGCVersion = ServerOptions.GcConfig.UseGCV2 ? GcVersion::kV2 : GcVersion::kV1_Deprecated,
.CompactBlockUsageThresholdPercent = ServerOptions.GcConfig.CompactBlockUsageThresholdPercent,
.Verbose = ServerOptions.GcConfig.Verbose,
.SingleThreaded = ServerOptions.GcConfig.SingleThreaded,
diff --git a/src/zenstore/blockstore.cpp b/src/zenstore/blockstore.cpp
index 2ae51d627..239b9e56b 100644
--- a/src/zenstore/blockstore.cpp
+++ b/src/zenstore/blockstore.cpp
@@ -659,26 +659,6 @@ BlockStore::WriteChunks(std::span<IoBuffer> Datas, uint32_t Alignment, const Wri
}
}
-BlockStore::ReclaimSnapshotState
-BlockStore::GetReclaimSnapshotState()
-{
- ReclaimSnapshotState State;
- RwLock::SharedLockScope _(m_InsertLock);
- for (uint32_t BlockIndex : m_ActiveWriteBlocks)
- {
- State.m_ActiveWriteBlocks.insert(BlockIndex);
- }
- if (m_WriteBlock)
- {
- State.m_ActiveWriteBlocks.insert(m_WriteBlockIndex);
- }
- for (auto It : m_ChunkBlocks)
- {
- State.m_BlockIndexes.insert(It.first);
- }
- return State;
-}
-
IoBuffer
BlockStore::TryGetChunk(const BlockStoreLocation& Location) const
{
@@ -724,429 +704,6 @@ BlockStore::Flush(bool ForceNewBlock)
}
}
-void
-BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot,
- const std::vector<BlockStoreLocation>& ChunkLocations,
- const ChunkIndexArray& KeepChunkIndexes,
- uint32_t PayloadAlignment,
- bool DryRun,
- const ReclaimCallback& ChangeCallback,
- const ClaimDiskReserveCallback& DiskReserveCallback)
-{
- ZEN_TRACE_CPU("BlockStore::ReclaimSpace");
-
- uint64_t WriteBlockTimeUs = 0;
- uint64_t WriteBlockLongestTimeUs = 0;
- uint64_t ReadBlockTimeUs = 0;
- uint64_t ReadBlockLongestTimeUs = 0;
- uint64_t TotalChunkCount = ChunkLocations.size();
- uint64_t DeletedSize = 0;
- uint64_t OldTotalSize = 0;
- uint64_t NewTotalSize = 0;
-
- uint64_t MovedCount = 0;
- uint64_t DeletedCount = 0;
-
- Stopwatch TotalTimer;
- const auto _ = MakeGuard([&] {
- ZEN_DEBUG(
- "reclaim space for '{}' DONE after {}, write lock: {} ({}), read lock: {} ({}), collected {} bytes, deleted {} and moved "
- "{} "
- "of {} "
- "chunks ({}).",
- m_BlocksBasePath,
- NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs()),
- NiceLatencyNs(WriteBlockTimeUs),
- NiceLatencyNs(WriteBlockLongestTimeUs),
- NiceLatencyNs(ReadBlockTimeUs),
- NiceLatencyNs(ReadBlockLongestTimeUs),
- NiceBytes(DeletedSize),
- DeletedCount,
- MovedCount,
- TotalChunkCount,
- NiceBytes(OldTotalSize));
- });
-
- size_t BlockCount = Snapshot.m_BlockIndexes.size();
- if (BlockCount == 0)
- {
- ZEN_DEBUG("garbage collect for '{}' SKIPPED, no blocks to process", m_BlocksBasePath);
- return;
- }
-
- tsl::robin_set<size_t> KeepChunkMap;
- KeepChunkMap.reserve(KeepChunkIndexes.size());
- for (size_t KeepChunkIndex : KeepChunkIndexes)
- {
- KeepChunkMap.insert(KeepChunkIndex);
- }
-
- tsl::robin_map<uint32_t, size_t> BlockIndexToChunkMapIndex;
- std::vector<ChunkIndexArray> BlockKeepChunks;
- std::vector<ChunkIndexArray> BlockDeleteChunks;
-
- BlockIndexToChunkMapIndex.reserve(BlockCount);
- BlockKeepChunks.reserve(BlockCount);
- BlockDeleteChunks.reserve(BlockCount);
- size_t GuesstimateCountPerBlock = TotalChunkCount / BlockCount / 2;
-
- size_t DeleteCount = 0;
- for (size_t Index = 0; Index < TotalChunkCount; ++Index)
- {
- const BlockStoreLocation& Location = ChunkLocations[Index];
- if (!Snapshot.m_BlockIndexes.contains(Location.BlockIndex))
- {
- // We did not know about the block when we took the snapshot, don't touch it
- continue;
- }
- OldTotalSize += Location.Size;
- auto BlockIndexPtr = BlockIndexToChunkMapIndex.find(Location.BlockIndex);
- size_t ChunkMapIndex = 0;
- if (BlockIndexPtr == BlockIndexToChunkMapIndex.end())
- {
- ChunkMapIndex = BlockKeepChunks.size();
- BlockIndexToChunkMapIndex[Location.BlockIndex] = ChunkMapIndex;
- BlockKeepChunks.resize(ChunkMapIndex + 1);
- BlockKeepChunks.back().reserve(GuesstimateCountPerBlock);
- BlockDeleteChunks.resize(ChunkMapIndex + 1);
- BlockDeleteChunks.back().reserve(GuesstimateCountPerBlock);
- }
- else
- {
- ChunkMapIndex = BlockIndexPtr->second;
- }
-
- if (KeepChunkMap.contains(Index))
- {
- ChunkIndexArray& IndexMap = BlockKeepChunks[ChunkMapIndex];
- IndexMap.push_back(Index);
- NewTotalSize += Location.Size;
- continue;
- }
- ChunkIndexArray& IndexMap = BlockDeleteChunks[ChunkMapIndex];
- IndexMap.push_back(Index);
- DeleteCount++;
- }
-
- std::vector<uint32_t> BlocksToReWrite;
- BlocksToReWrite.reserve(BlockIndexToChunkMapIndex.size());
- for (const auto& Entry : BlockIndexToChunkMapIndex)
- {
- uint32_t BlockIndex = Entry.first;
- size_t ChunkMapIndex = Entry.second;
- const ChunkIndexArray& ChunkMap = BlockDeleteChunks[ChunkMapIndex];
- if (ChunkMap.empty())
- {
- continue;
- }
- BlocksToReWrite.push_back(BlockIndex);
- }
-
- {
- // Any known block not referenced should be added as well
- RwLock::SharedLockScope __(m_InsertLock);
- for (std::uint32_t BlockIndex : Snapshot.m_BlockIndexes)
- {
- if (!m_ChunkBlocks.contains(BlockIndex))
- {
- continue;
- }
- bool WasActiveWriteBlock = Snapshot.m_ActiveWriteBlocks.contains(BlockIndex);
- if (WasActiveWriteBlock)
- {
- continue;
- }
- if (BlockIndexToChunkMapIndex.contains(BlockIndex))
- {
- continue;
- }
- size_t ChunkMapIndex = ChunkMapIndex = BlockKeepChunks.size();
- BlockIndexToChunkMapIndex[BlockIndex] = ChunkMapIndex;
- BlockKeepChunks.resize(ChunkMapIndex + 1);
- BlockDeleteChunks.resize(ChunkMapIndex + 1);
- BlocksToReWrite.push_back(BlockIndex);
- }
- }
-
- if (DryRun)
- {
- ZEN_DEBUG("garbage collect for '{}' DISABLED, found {} {} chunks of total {} {}",
- m_BlocksBasePath,
- DeleteCount,
- NiceBytes(OldTotalSize - NewTotalSize),
- TotalChunkCount,
- OldTotalSize);
- return;
- }
-
- try
- {
- ZEN_TRACE_CPU("BlockStore::ReclaimSpace::Compact");
- Ref<BlockStoreFile> NewBlockFile;
- auto NewBlockFileGuard = MakeGuard([&]() {
- if (NewBlockFile && NewBlockFile->IsOpen())
- {
- ZEN_DEBUG("dropping incomplete cas block store file '{}'", NewBlockFile->GetPath());
- m_TotalSize.fetch_sub(NewBlockFile->FileSize(), std::memory_order::relaxed);
- ZEN_ASSERT_SLOW(NewBlockFile->MetaSize() == 0);
- NewBlockFile->MarkAsDeleteOnClose();
- }
- });
-
- uint64_t WriteOffset = 0;
- uint32_t NewBlockIndex = 0;
- for (uint32_t BlockIndex : BlocksToReWrite)
- {
- bool IsActiveWriteBlock = Snapshot.m_ActiveWriteBlocks.contains(BlockIndex);
-
- const size_t ChunkMapIndex = BlockIndexToChunkMapIndex[BlockIndex];
-
- Ref<BlockStoreFile> OldBlockFile;
- if (!IsActiveWriteBlock)
- {
- RwLock::SharedLockScope _i(m_InsertLock);
- Stopwatch Timer;
- const auto __ = MakeGuard([&] {
- uint64_t ElapsedUs = Timer.GetElapsedTimeUs();
- WriteBlockTimeUs += ElapsedUs;
- WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs);
- });
- if (auto It = m_ChunkBlocks.find(BlockIndex); It != m_ChunkBlocks.end())
- {
- OldBlockFile = It->second;
- }
- }
-
- ChunkIndexArray& KeepMap = BlockKeepChunks[ChunkMapIndex];
- if (KeepMap.empty())
- {
- ZEN_TRACE_CPU("BlockStore::ReclaimSpace::DeleteBlock");
-
- const ChunkIndexArray& DeleteMap = BlockDeleteChunks[ChunkMapIndex];
- for (size_t DeleteIndex : DeleteMap)
- {
- DeletedSize += ChunkLocations[DeleteIndex].Size;
- }
- ChangeCallback({}, DeleteMap);
- DeletedCount += DeleteMap.size();
- if (OldBlockFile)
- {
- RwLock::ExclusiveLockScope _i(m_InsertLock);
- Stopwatch Timer;
- const auto __ = MakeGuard([&] {
- uint64_t ElapsedUs = Timer.GetElapsedTimeUs();
- ReadBlockTimeUs += ElapsedUs;
- ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs);
- });
- ZEN_DEBUG("marking cas block store file '{}' for delete, block #{}", OldBlockFile->GetPath(), BlockIndex);
- ZEN_ASSERT(m_ChunkBlocks[BlockIndex] == OldBlockFile);
- m_ChunkBlocks.erase(BlockIndex);
- m_TotalSize.fetch_sub(OldBlockFile->TotalSize(), std::memory_order::relaxed);
- OldBlockFile->MarkAsDeleteOnClose();
- }
- continue;
- }
- else if (!OldBlockFile && !IsActiveWriteBlock)
- {
- // If the block file pointed to does not exist, move any keep chunk them to deleted list
- ZEN_ERROR("Expected to find block {} in {} - this should never happen, marking {} entries as deleted.",
- BlockIndex,
- m_BlocksBasePath,
- KeepMap.size());
-
- BlockDeleteChunks[ChunkMapIndex].insert(BlockDeleteChunks[ChunkMapIndex].end(), KeepMap.begin(), KeepMap.end());
- KeepMap.clear();
- }
- else if (OldBlockFile && (OldBlockFile->FileSize() == 0))
- {
- // Block created to accommodate missing blocks
- ZEN_WARN("Missing block {} in {} - backing data for locations is missing, marking {} entries as deleted.",
- BlockIndex,
- m_BlocksBasePath,
- KeepMap.size());
-
- BlockDeleteChunks[ChunkMapIndex].insert(BlockDeleteChunks[ChunkMapIndex].end(), KeepMap.begin(), KeepMap.end());
- KeepMap.clear();
- }
-
- MovedChunksArray MovedChunks;
- if (OldBlockFile)
- {
- ZEN_TRACE_CPU("BlockStore::ReclaimSpace::MoveBlock");
-
- ZEN_INFO("Moving {} chunks from '{}' to new block", KeepMap.size(), GetBlockPath(m_BlocksBasePath, BlockIndex));
-
- uint64_t OldBlockSize = OldBlockFile->FileSize();
- std::vector<uint8_t> Chunk;
- for (const size_t& ChunkIndex : KeepMap)
- {
- const BlockStoreLocation ChunkLocation = ChunkLocations[ChunkIndex];
- if (ChunkLocation.Offset + ChunkLocation.Size > OldBlockSize)
- {
- ZEN_WARN(
- "ReclaimSpace skipping chunk outside of block range in '{}', Chunk start {}, Chunk size {} in Block {}, Block "
- "size {}",
- m_BlocksBasePath,
- ChunkLocation.Offset,
- ChunkLocation.Size,
- OldBlockFile->GetPath(),
- OldBlockSize);
- continue;
- }
- Chunk.resize(ChunkLocation.Size);
- OldBlockFile->Read(Chunk.data(), ChunkLocation.Size, ChunkLocation.Offset);
-
- if (!NewBlockFile || (WriteOffset + ChunkLocation.Size > m_MaxBlockSize))
- {
- uint32_t NextBlockIndex = m_WriteBlockIndex.load(std::memory_order_relaxed);
-
- if (NewBlockFile)
- {
- ZEN_ASSERT_SLOW(NewBlockFile->IsOpen());
- NewBlockFile->Flush();
- NewBlockFile = nullptr;
- }
- {
- ChangeCallback(MovedChunks, {});
- MovedCount += KeepMap.size();
- MovedChunks.clear();
- RwLock::ExclusiveLockScope InsertLock(m_InsertLock);
- Stopwatch Timer;
- const auto ___ = MakeGuard([&] {
- uint64_t ElapsedUs = Timer.GetElapsedTimeUs();
- ReadBlockTimeUs += ElapsedUs;
- ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs);
- });
-
- std::filesystem::path NewBlockPath;
- NextBlockIndex = GetFreeBlockIndex(NextBlockIndex, InsertLock, NewBlockPath);
- if (NextBlockIndex == (uint32_t)m_MaxBlockCount)
- {
- ZEN_ERROR("unable to allocate a new block in '{}', count limit {} exeeded",
- m_BlocksBasePath,
- static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) + 1);
- return;
- }
-
- NewBlockFile = new BlockStoreFile(NewBlockPath);
- m_ChunkBlocks[NextBlockIndex] = NewBlockFile;
- }
-
- std::error_code Error;
- DiskSpace Space = DiskSpaceInfo(m_BlocksBasePath, Error);
- if (Error)
- {
- ZEN_ERROR("get disk space in '{}' FAILED, reason: '{}'", m_BlocksBasePath, Error.message());
- return;
- }
- if (Space.Free < m_MaxBlockSize)
- {
- uint64_t ReclaimedSpace = DiskReserveCallback();
- if (Space.Free + ReclaimedSpace < m_MaxBlockSize)
- {
- ZEN_WARN("garbage collect for '{}' FAILED, required disk space {}, free {}",
- m_BlocksBasePath,
- m_MaxBlockSize,
- NiceBytes(Space.Free + ReclaimedSpace));
- RwLock::ExclusiveLockScope _l(m_InsertLock);
- Stopwatch Timer;
- const auto __ = MakeGuard([&] {
- uint64_t ElapsedUs = Timer.GetElapsedTimeUs();
- ReadBlockTimeUs += ElapsedUs;
- ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs);
- });
- ZEN_ASSERT(m_ChunkBlocks[NextBlockIndex] == NewBlockFile);
- ZEN_ASSERT_SLOW(!NewBlockFile->IsOpen());
- m_ChunkBlocks.erase(NextBlockIndex);
- return;
- }
-
- ZEN_INFO("using gc reserve for '{}', reclaimed {}, disk free {}",
- m_BlocksBasePath,
- ReclaimedSpace,
- NiceBytes(Space.Free + ReclaimedSpace));
- }
- NewBlockFile->Create(m_MaxBlockSize);
- NewBlockIndex = NextBlockIndex;
- WriteOffset = 0;
- }
-
- NewBlockFile->Write(Chunk.data(), ChunkLocation.Size, WriteOffset);
- MovedChunks.push_back(
- {ChunkIndex,
- {.BlockIndex = NewBlockIndex, .Offset = gsl::narrow<uint32_t>(WriteOffset), .Size = ChunkLocation.Size}});
- uint64_t OldOffset = WriteOffset;
- WriteOffset = RoundUp(WriteOffset + ChunkLocation.Size, PayloadAlignment);
- m_TotalSize.fetch_add(WriteOffset - OldOffset, std::memory_order::relaxed);
- }
- Chunk.clear();
- if (NewBlockFile)
- {
- ZEN_ASSERT_SLOW(NewBlockFile->IsOpen());
- NewBlockFile->Flush();
- }
- }
-
- const ChunkIndexArray& DeleteMap = BlockDeleteChunks[ChunkMapIndex];
- for (size_t DeleteIndex : DeleteMap)
- {
- DeletedSize += ChunkLocations[DeleteIndex].Size;
- }
-
- ChangeCallback(MovedChunks, DeleteMap);
- MovedCount += MovedChunks.size();
- DeletedCount += DeleteMap.size();
- MovedChunks.clear();
-
- if (OldBlockFile)
- {
- RwLock::ExclusiveLockScope __(m_InsertLock);
- Stopwatch Timer;
- const auto ___ = MakeGuard([&] {
- uint64_t ElapsedUs = Timer.GetElapsedTimeUs();
- ReadBlockTimeUs += ElapsedUs;
- ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs);
- });
-
- ZEN_DEBUG("marking cas block store file '{}' for delete, block #{}", OldBlockFile->GetPath(), BlockIndex);
- ZEN_ASSERT(m_ChunkBlocks[BlockIndex] == OldBlockFile);
- m_ChunkBlocks.erase(BlockIndex);
- m_TotalSize.fetch_sub(OldBlockFile->TotalSize(), std::memory_order::relaxed);
- OldBlockFile->MarkAsDeleteOnClose();
- }
- }
- if (NewBlockFile)
- {
- ZEN_ASSERT_SLOW(NewBlockFile->IsOpen());
- NewBlockFile->Flush();
- NewBlockFile = nullptr;
- }
- }
- catch (const std::system_error& SystemError)
- {
- if (IsOOM(SystemError.code()))
- {
- ZEN_WARN("reclaiming space for '{}' ran out of memory: '{}'", m_BlocksBasePath, SystemError.what());
- }
- else if (IsOOD(SystemError.code()))
- {
- ZEN_WARN("reclaiming space for '{}' ran out of disk space: '{}'", m_BlocksBasePath, SystemError.what());
- }
- else
- {
- ZEN_ERROR("reclaiming space for '{}' failed with system error exception: '{}'", m_BlocksBasePath, SystemError.what());
- }
- }
- catch (const std::bad_alloc& BadAlloc)
- {
- ZEN_WARN("reclaiming space for '{}' ran out of memory: '{}'", m_BlocksBasePath, BadAlloc.what());
- }
- catch (const std::exception& ex)
- {
- ZEN_ERROR("reclaiming space for '{}' failed with: '{}'", m_BlocksBasePath, ex.what());
- }
-}
-
bool
BlockStore::IterateBlock(std::span<const BlockStoreLocation> ChunkLocations,
std::span<const size_t> InChunkIndexes,
@@ -1945,53 +1502,6 @@ TEST_CASE("blockstore.chunks")
CHECK(ReadChunkAsString(Store, ThirdChunkLocation) == ThirdChunkData);
}
-TEST_CASE("blockstore.clean.stray.blocks")
-{
- using namespace blockstore::impl;
-
- ScopedTemporaryDirectory TempDir;
- auto RootDirectory = TempDir.Path();
-
- BlockStore Store;
- Store.Initialize(RootDirectory / "store", 128, 1024);
-
- std::string FirstChunkData = "This is the data of the first chunk that we will write";
- BlockStoreLocation FirstChunkLocation = WriteStringAsChunk(Store, FirstChunkData, 4);
- std::string SecondChunkData = "This is the data for the second chunk that we will write";
- BlockStoreLocation SecondChunkLocation = WriteStringAsChunk(Store, SecondChunkData, 4);
- std::string ThirdChunkData =
- "This is a much longer string that will not fit in the first block so it should be placed in the second block";
- BlockStoreLocation ThirdChunkLocation = WriteStringAsChunk(Store, ThirdChunkData, 4);
-
- Store.Close();
-
- Store.Initialize(RootDirectory / "store", 128, 1024);
- CHECK(GetDirectoryContent(RootDirectory / "store", true, false).size() == 2);
- IoBuffer ThirdChunk = Store.TryGetChunk(ThirdChunkLocation);
- CHECK(ThirdChunk);
-
- // Reclaim space should delete unreferenced block
- Store.ReclaimSpace(Store.GetReclaimSnapshotState(), {FirstChunkLocation, SecondChunkLocation}, {0, 1}, 4, false);
- // Block lives on as long as we reference it via ThirdChunk
- CHECK(GetDirectoryContent(RootDirectory / "store", true, false).size() == 2);
- ThirdChunk = {};
- CHECK(GetDirectoryContent(RootDirectory / "store", true, false).size() == 1);
- ThirdChunk = Store.TryGetChunk(ThirdChunkLocation);
- CHECK(!ThirdChunk);
-
- // Recreate a fake block for a missing chunk location
- BlockStore::BlockIndexSet KnownBlocks;
- KnownBlocks.insert(FirstChunkLocation.BlockIndex);
- KnownBlocks.insert(SecondChunkLocation.BlockIndex);
- KnownBlocks.insert(ThirdChunkLocation.BlockIndex);
- Store.SyncExistingBlocksOnDisk(KnownBlocks);
-
- // We create a fake block for the location - we should still not be able to get the chunk
- CHECK(GetDirectoryContent(RootDirectory / "store", true, false).size() == 2);
- ThirdChunk = Store.TryGetChunk(ThirdChunkLocation);
- CHECK(!ThirdChunk);
-}
-
TEST_CASE("blockstore.flush.force.new.block")
{
using namespace blockstore::impl;
@@ -2133,125 +1643,6 @@ TEST_CASE("blockstore.iterate.chunks")
WorkLatch.Wait();
}
-TEST_CASE("blockstore.reclaim.space")
-{
- using namespace blockstore::impl;
-
- ScopedTemporaryDirectory TempDir;
- auto RootDirectory = TempDir.Path();
-
- BlockStore Store;
- Store.Initialize(RootDirectory / "store", 512, 1024);
-
- constexpr size_t ChunkCount = 200;
- constexpr size_t Alignment = 8;
- std::vector<BlockStoreLocation> ChunkLocations;
- std::vector<IoHash> ChunkHashes;
- ChunkLocations.reserve(ChunkCount);
- ChunkHashes.reserve(ChunkCount);
- for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex)
- {
- IoBuffer Chunk = CreateRandomBlob(57 + ChunkIndex);
-
- Store.WriteChunk(Chunk.Data(), Chunk.Size(), Alignment, [&](const BlockStoreLocation& L) { ChunkLocations.push_back(L); });
- ChunkHashes.push_back(IoHash::HashBuffer(Chunk.Data(), Chunk.Size()));
- }
-
- std::vector<size_t> ChunksToKeep;
- ChunksToKeep.reserve(ChunkLocations.size());
- for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex)
- {
- ChunksToKeep.push_back(ChunkIndex);
- }
-
- Store.Flush(/*ForceNewBlock*/ false);
- BlockStore::ReclaimSnapshotState State1 = Store.GetReclaimSnapshotState();
- Store.ReclaimSpace(State1, ChunkLocations, ChunksToKeep, Alignment, true);
-
- // If we keep all the chunks we should not get any callbacks on moved/deleted stuff
- Store.ReclaimSpace(
- State1,
- ChunkLocations,
- ChunksToKeep,
- Alignment,
- false,
- [](const BlockStore::MovedChunksArray&, const BlockStore::ChunkIndexArray&) { CHECK(false); },
- []() {
- CHECK(false);
- return 0;
- });
-
- size_t DeleteChunkCount = 38;
- ChunksToKeep.clear();
- for (size_t ChunkIndex = DeleteChunkCount; ChunkIndex < ChunkCount; ++ChunkIndex)
- {
- ChunksToKeep.push_back(ChunkIndex);
- }
-
- std::vector<BlockStoreLocation> NewChunkLocations = ChunkLocations;
- size_t MovedChunkCount = 0;
- size_t DeletedChunkCount = 0;
- Store.ReclaimSpace(
- State1,
- ChunkLocations,
- ChunksToKeep,
- Alignment,
- false,
- [&](const BlockStore::MovedChunksArray& MovedChunks, const BlockStore::ChunkIndexArray& DeletedChunks) {
- for (const auto& MovedChunk : MovedChunks)
- {
- CHECK(MovedChunk.first >= DeleteChunkCount);
- NewChunkLocations[MovedChunk.first] = MovedChunk.second;
- }
- MovedChunkCount += MovedChunks.size();
- for (size_t DeletedIndex : DeletedChunks)
- {
- CHECK(DeletedIndex < DeleteChunkCount);
- }
- DeletedChunkCount += DeletedChunks.size();
- },
- []() {
- CHECK(false);
- return 0;
- });
- CHECK(MovedChunkCount <= DeleteChunkCount);
- CHECK(DeletedChunkCount == DeleteChunkCount);
- ChunkLocations = std::vector<BlockStoreLocation>(NewChunkLocations.begin() + DeleteChunkCount, NewChunkLocations.end());
-
- for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex)
- {
- IoBuffer ChunkBlock = Store.TryGetChunk(NewChunkLocations[ChunkIndex]);
- if (ChunkIndex >= DeleteChunkCount)
- {
- IoBuffer VerifyChunk = Store.TryGetChunk(NewChunkLocations[ChunkIndex]);
- CHECK(VerifyChunk);
- IoHash VerifyHash = IoHash::HashBuffer(VerifyChunk.Data(), VerifyChunk.Size());
- CHECK(VerifyHash == ChunkHashes[ChunkIndex]);
- }
- }
-
- // We need to take a new state since reclaim space add new block when compacting
- BlockStore::ReclaimSnapshotState State2 = Store.GetReclaimSnapshotState();
- NewChunkLocations = ChunkLocations;
- MovedChunkCount = 0;
- DeletedChunkCount = 0;
- Store.ReclaimSpace(
- State2,
- ChunkLocations,
- {},
- Alignment,
- false,
- [&](const BlockStore::MovedChunksArray& MovedChunks, const BlockStore::ChunkIndexArray& DeletedChunks) {
- CHECK(MovedChunks.empty());
- DeletedChunkCount += DeletedChunks.size();
- },
- []() {
- CHECK(false);
- return 0;
- });
- CHECK(DeletedChunkCount == ChunkCount - DeleteChunkCount);
-}
-
TEST_CASE("blockstore.thread.read.write")
{
using namespace blockstore::impl;
@@ -2411,12 +1802,11 @@ TEST_CASE("blockstore.compact.blocks")
}
SUBCASE("keep current write block")
{
- uint64_t PreSize = Store.TotalSize();
- BlockStoreCompactState State;
- BlockStore::ReclaimSnapshotState SnapshotState = Store.GetReclaimSnapshotState();
+ uint64_t PreSize = Store.TotalSize();
+ BlockStoreCompactState State;
for (const BlockStoreLocation& Location : ChunkLocations)
{
- if (SnapshotState.m_ActiveWriteBlocks.contains(Location.BlockIndex))
+ if (Store.IsWriting(Location.BlockIndex))
{
continue;
}
@@ -2440,9 +1830,8 @@ TEST_CASE("blockstore.compact.blocks")
{
Store.Flush(true);
- uint64_t PreSize = Store.TotalSize();
- BlockStoreCompactState State;
- BlockStore::ReclaimSnapshotState SnapshotState = Store.GetReclaimSnapshotState();
+ uint64_t PreSize = Store.TotalSize();
+ BlockStoreCompactState State;
for (const BlockStoreLocation& Location : ChunkLocations)
{
State.AddKeepLocation(Location);
@@ -2462,11 +1851,10 @@ TEST_CASE("blockstore.compact.blocks")
}
SUBCASE("drop first block")
{
- uint64_t PreSize = Store.TotalSize();
- BlockStoreCompactState State;
- BlockStore::ReclaimSnapshotState SnapshotState = Store.GetReclaimSnapshotState();
+ uint64_t PreSize = Store.TotalSize();
+ BlockStoreCompactState State;
- CHECK(!SnapshotState.m_ActiveWriteBlocks.contains(0));
+ CHECK(!Store.IsWriting(0));
State.IncludeBlock(0);
uint64_t FirstBlockSize = 0;
@@ -2496,11 +1884,10 @@ TEST_CASE("blockstore.compact.blocks")
}
SUBCASE("compact first block")
{
- uint64_t PreSize = Store.TotalSize();
- BlockStoreCompactState State;
- BlockStore::ReclaimSnapshotState SnapshotState = Store.GetReclaimSnapshotState();
+ uint64_t PreSize = Store.TotalSize();
+ BlockStoreCompactState State;
- CHECK(!SnapshotState.m_ActiveWriteBlocks.contains(0));
+ CHECK(!Store.IsWriting(0));
State.IncludeBlock(0);
uint64_t SkipChunkCount = 2;
@@ -2555,14 +1942,13 @@ TEST_CASE("blockstore.compact.blocks")
}
SUBCASE("compact every other item")
{
- uint64_t PreSize = Store.TotalSize();
- BlockStoreCompactState State;
- BlockStore::ReclaimSnapshotState SnapshotState = Store.GetReclaimSnapshotState();
- bool SkipFlag = false;
+ uint64_t PreSize = Store.TotalSize();
+ BlockStoreCompactState State;
+ bool SkipFlag = false;
for (const BlockStoreLocation& Location : ChunkLocations)
{
- if (SnapshotState.m_ActiveWriteBlocks.contains(Location.BlockIndex))
+ if (Store.IsWriting(Location.BlockIndex))
{
continue;
}
@@ -2579,7 +1965,7 @@ TEST_CASE("blockstore.compact.blocks")
std::vector<BlockStoreLocation> DroppedLocations;
for (const BlockStoreLocation& Location : ChunkLocations)
{
- if (SnapshotState.m_ActiveWriteBlocks.contains(Location.BlockIndex))
+ if (Store.IsWriting(Location.BlockIndex))
{
continue;
}
@@ -2616,7 +2002,7 @@ TEST_CASE("blockstore.compact.blocks")
for (size_t Index = 0; Index < ChunkLocations.size(); Index++)
{
const BlockStoreLocation& Location = ChunkLocations[Index];
- if (SkipFlag && !SnapshotState.m_ActiveWriteBlocks.contains(Location.BlockIndex))
+ if (SkipFlag && !Store.IsWriting(Location.BlockIndex))
{
CHECK(std::find(DroppedLocations.begin(), DroppedLocations.end(), Location) != DroppedLocations.end());
CHECK(!Store.TryGetChunk(Location));
diff --git a/src/zenstore/cache/cachedisklayer.cpp b/src/zenstore/cache/cachedisklayer.cpp
index fed37824a..8a2de34e2 100644
--- a/src/zenstore/cache/cachedisklayer.cpp
+++ b/src/zenstore/cache/cachedisklayer.cpp
@@ -2293,466 +2293,6 @@ ZenCacheDiskLayer::CacheBucket::ScrubStorage(ScrubContext& Ctx)
}
}
-void
-ZenCacheDiskLayer::CacheBucket::GatherReferences(GcContext& GcCtx)
-{
- ZEN_TRACE_CPU("Z$::Bucket::GatherReferences");
-
-#define CALCULATE_BLOCKING_TIME 0
-
-#if CALCULATE_BLOCKING_TIME
- uint64_t WriteBlockTimeUs = 0;
- uint64_t WriteBlockLongestTimeUs = 0;
- uint64_t ReadBlockTimeUs = 0;
- uint64_t ReadBlockLongestTimeUs = 0;
-#endif // CALCULATE_BLOCKING_TIME
-
- Stopwatch TotalTimer;
- const auto _ = MakeGuard([&] {
-#if CALCULATE_BLOCKING_TIME
- ZEN_DEBUG("gathered references from '{}' in {} write lock: {} ({}), read lock: {} ({})",
- m_BucketDir,
- NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs()),
- NiceLatencyNs(WriteBlockTimeUs),
- NiceLatencyNs(WriteBlockLongestTimeUs),
- NiceLatencyNs(ReadBlockTimeUs),
- NiceLatencyNs(ReadBlockLongestTimeUs));
-#else
- ZEN_DEBUG("gathered references from '{}' in {}", m_BucketDir, NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs()));
-#endif // CALCULATE_BLOCKING_TIME
- });
-
- const GcClock::TimePoint ExpireTime = GcCtx.CacheExpireTime();
-
- const GcClock::Tick ExpireTicks = ExpireTime.time_since_epoch().count();
-
- IndexMap Index;
- std::vector<AccessTime> AccessTimes;
- std::vector<BucketPayload> Payloads;
- {
- RwLock::SharedLockScope __(m_IndexLock);
-#if CALCULATE_BLOCKING_TIME
- Stopwatch Timer;
- const auto ___ = MakeGuard([&] {
- uint64_t ElapsedUs = Timer.GetElapsedTimeUs();
- WriteBlockTimeUs += ElapsedUs;
- WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs);
- });
-#endif // CALCULATE_BLOCKING_TIME
- if (m_Index.empty())
- {
- return;
- }
- Index = m_Index;
- AccessTimes = m_AccessTimes;
- Payloads = m_Payloads;
- }
-
- std::vector<IoHash> ExpiredKeys;
- ExpiredKeys.reserve(1024);
-
- std::vector<IoHash> Cids;
- if (!GcCtx.SkipCid())
- {
- Cids.reserve(1024);
- }
-
- std::vector<std::pair<IoHash, size_t>> StructuredItemsWithUnknownAttachments;
-
- for (const auto& Entry : Index)
- {
- const IoHash& Key = Entry.first;
- size_t PayloadIndex = Entry.second;
- GcClock::Tick AccessTime = AccessTimes[PayloadIndex];
- if (AccessTime < ExpireTicks)
- {
- ExpiredKeys.push_back(Key);
- continue;
- }
-
- if (GcCtx.SkipCid())
- {
- continue;
- }
-
- BucketPayload& Payload = Payloads[PayloadIndex];
- const DiskLocation& Loc = Payload.Location;
-
- if (!Loc.IsFlagSet(DiskLocation::kStructured))
- {
- continue;
- }
- StructuredItemsWithUnknownAttachments.push_back(Entry);
- }
-
- for (const auto& Entry : StructuredItemsWithUnknownAttachments)
- {
- const IoHash& Key = Entry.first;
- BucketPayload& Payload = Payloads[Entry.second];
- const DiskLocation& Loc = Payload.Location;
- {
- IoBuffer Buffer;
- if (Loc.IsFlagSet(DiskLocation::kStandaloneFile))
- {
- if (Buffer = GetStandaloneCacheValue(Loc, Key); !Buffer)
- {
- continue;
- }
- }
- else
- {
- RwLock::SharedLockScope IndexLock(m_IndexLock);
-#if CALCULATE_BLOCKING_TIME
- Stopwatch Timer;
- const auto ___ = MakeGuard([&] {
- uint64_t ElapsedUs = Timer.GetElapsedTimeUs();
- WriteBlockTimeUs += ElapsedUs;
- WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs);
- });
-#endif // CALCULATE_BLOCKING_TIME
- if (auto It = m_Index.find(Key); It != m_Index.end())
- {
- const BucketPayload& CachedPayload = m_Payloads[It->second];
- if (CachedPayload.MemCached)
- {
- Buffer = m_MemCachedPayloads[CachedPayload.MemCached].Payload;
- ZEN_ASSERT_SLOW(Buffer);
- }
- else
- {
- DiskLocation Location = m_Payloads[It->second].Location;
- IndexLock.ReleaseNow();
- Buffer = GetInlineCacheValue(Location);
- // Don't memcache items when doing GC
- }
- }
- if (!Buffer)
- {
- continue;
- }
- }
-
- ZEN_ASSERT(Buffer);
- ZEN_ASSERT(Buffer.GetContentType() == ZenContentType::kCbObject);
- CbObjectView Obj(Buffer.GetData());
- Obj.IterateAttachments([&Cids](CbFieldView Field) { Cids.push_back(Field.AsAttachment()); });
- if (Cids.size() >= 1024)
- {
- GcCtx.AddRetainedCids(Cids);
- Cids.clear();
- }
- }
- }
-
- GcCtx.AddRetainedCids(Cids);
- GcCtx.SetExpiredCacheKeys(m_BucketDir.string(), std::move(ExpiredKeys));
-}
-
-void
-ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx)
-{
- ZEN_TRACE_CPU("Z$::Bucket::CollectGarbage");
-
- ZEN_DEBUG("collecting garbage from '{}'", m_BucketDir);
-
- Stopwatch TotalTimer;
- uint64_t WriteBlockTimeUs = 0;
- uint64_t WriteBlockLongestTimeUs = 0;
- uint64_t ReadBlockTimeUs = 0;
- uint64_t ReadBlockLongestTimeUs = 0;
- uint64_t TotalChunkCount = 0;
- uint64_t DeletedSize = 0;
- GcStorageSize OldTotalSize = StorageSize();
-
- std::unordered_set<IoHash> DeletedChunks;
- uint64_t MovedCount = 0;
-
- const auto _ = MakeGuard([&] {
- ZEN_DEBUG(
- "garbage collect from '{}' DONE after {}, write lock: {} ({}), read lock: {} ({}), collected {} bytes, deleted {} and moved "
- "{} "
- "of {} "
- "entries ({}/{}).",
- m_BucketDir,
- NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs()),
- NiceLatencyNs(WriteBlockTimeUs),
- NiceLatencyNs(WriteBlockLongestTimeUs),
- NiceLatencyNs(ReadBlockTimeUs),
- NiceLatencyNs(ReadBlockLongestTimeUs),
- NiceBytes(DeletedSize),
- DeletedChunks.size(),
- MovedCount,
- TotalChunkCount,
- NiceBytes(OldTotalSize.DiskSize),
- NiceBytes(OldTotalSize.MemorySize));
-
- bool Expected = false;
- if (m_IsFlushing || !m_IsFlushing.compare_exchange_strong(Expected, true))
- {
- return;
- }
- auto FlushingGuard = MakeGuard([&] { m_IsFlushing.store(false); });
-
- try
- {
- SaveSnapshot([&]() { return GcCtx.ClaimGCReserve(); });
- }
- catch (const std::exception& Ex)
- {
- ZEN_WARN("Failed to write index and manifest after GC in '{}'. Reason: '{}'", m_BucketDir, Ex.what());
- }
- });
-
- auto __ = MakeGuard([&]() {
- if (!DeletedChunks.empty())
- {
- // Clean up m_AccessTimes and m_Payloads vectors
- std::vector<BucketPayload> Payloads;
- std::vector<AccessTime> AccessTimes;
- std::vector<BucketMetaData> MetaDatas;
- std::vector<MemCacheData> MemCachedPayloads;
- IndexMap Index;
- {
- RwLock::ExclusiveLockScope IndexLock(m_IndexLock);
- Stopwatch Timer;
- const auto ___ = MakeGuard([&] {
- uint64_t ElapsedUs = Timer.GetElapsedTimeUs();
- WriteBlockTimeUs += ElapsedUs;
- WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs);
- });
- CompactState(IndexLock, Payloads, AccessTimes, MetaDatas, MemCachedPayloads, Index);
- }
- GcCtx.AddDeletedCids(std::vector<IoHash>(DeletedChunks.begin(), DeletedChunks.end()));
- }
- });
-
- std::span<const IoHash> ExpiredCacheKeySpan = GcCtx.ExpiredCacheKeys(m_BucketDir.string());
- if (ExpiredCacheKeySpan.empty())
- {
- return;
- }
-
- m_SlogFile.Flush();
-
- std::unordered_set<IoHash, IoHash::Hasher> ExpiredCacheKeys(ExpiredCacheKeySpan.begin(), ExpiredCacheKeySpan.end());
-
- std::vector<DiskIndexEntry> ExpiredStandaloneEntries;
- IndexMap IndexSnapshot;
- std::vector<BucketPayload> PayloadsSnapshot;
- BlockStore::ReclaimSnapshotState BlockStoreState;
- {
- bool Expected = false;
- if (m_IsFlushing || !m_IsFlushing.compare_exchange_strong(Expected, true))
- {
- ZEN_DEBUG("garbage collect SKIPPED, for '{}', container is currently flushing", m_BucketDir);
- return;
- }
- auto FlushingGuard = MakeGuard([&] { m_IsFlushing.store(false); });
-
- {
- ZEN_TRACE_CPU("Z$::Bucket::CollectGarbage::State");
- RwLock::SharedLockScope IndexLock(m_IndexLock);
-
- Stopwatch Timer;
- const auto ____ = MakeGuard([&] {
- uint64_t ElapsedUs = Timer.GetElapsedTimeUs();
- WriteBlockTimeUs += ElapsedUs;
- WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs);
- });
-
- BlockStoreState = m_BlockStore.GetReclaimSnapshotState();
-
- for (const IoHash& Key : ExpiredCacheKeys)
- {
- if (auto It = m_Index.find(Key); It != m_Index.end())
- {
- const BucketPayload& Payload = m_Payloads[It->second];
- if (Payload.Location.Flags & DiskLocation::kStandaloneFile)
- {
- DiskIndexEntry Entry = {.Key = Key, .Location = Payload.Location};
- Entry.Location.Flags |= DiskLocation::kTombStone;
- ExpiredStandaloneEntries.push_back(Entry);
- }
- }
- }
-
- PayloadsSnapshot = m_Payloads;
- IndexSnapshot = m_Index;
-
- if (GcCtx.IsDeletionMode())
- {
- IndexLock.ReleaseNow();
- RwLock::ExclusiveLockScope __(m_IndexLock);
- for (const auto& Entry : ExpiredStandaloneEntries)
- {
- if (m_Index.erase(Entry.Key) == 1)
- {
- m_StandaloneSize.fetch_sub(Entry.Location.Size(), std::memory_order::relaxed);
- DeletedChunks.insert(Entry.Key);
- }
- }
- m_SlogFile.Append(ExpiredStandaloneEntries);
- }
- }
- }
-
- if (GcCtx.IsDeletionMode())
- {
- ZEN_TRACE_CPU("Z$::Bucket::CollectGarbage::Delete");
-
- ExtendablePathBuilder<256> Path;
-
- for (const auto& Entry : ExpiredStandaloneEntries)
- {
- const IoHash& Key = Entry.Key;
-
- Path.Reset();
- BuildPath(Path, Key);
- fs::path FilePath = Path.ToPath();
-
- {
- RwLock::SharedLockScope IndexLock(m_IndexLock);
- Stopwatch Timer;
- const auto ____ = MakeGuard([&] {
- uint64_t ElapsedUs = Timer.GetElapsedTimeUs();
- WriteBlockTimeUs += ElapsedUs;
- WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs);
- });
- if (m_Index.contains(Key))
- {
- // Someone added it back, let the file on disk be
- ZEN_DEBUG("skipping z$ delete standalone of file '{}' FAILED, it has been added back", Path.ToUtf8());
- continue;
- }
- IndexLock.ReleaseNow();
-
- RwLock::ExclusiveLockScope ValueLock(LockForHash(Key));
- if (fs::is_regular_file(FilePath))
- {
- ZEN_DEBUG("deleting standalone cache file '{}'", Path.ToUtf8());
- std::error_code Ec;
- fs::remove(FilePath, Ec);
- if (Ec)
- {
- ZEN_WARN("delete expired z$ standalone file '{}' FAILED, reason: '{}'", Path.ToUtf8(), Ec.message());
- continue;
- }
- }
- }
- DeletedSize += Entry.Location.Size();
- }
- }
-
- TotalChunkCount = IndexSnapshot.size();
-
- std::vector<BlockStoreLocation> ChunkLocations;
- BlockStore::ChunkIndexArray KeepChunkIndexes;
- std::vector<IoHash> ChunkIndexToChunkHash;
- ChunkLocations.reserve(TotalChunkCount);
- ChunkLocations.reserve(TotalChunkCount);
- ChunkIndexToChunkHash.reserve(TotalChunkCount);
- {
- TotalChunkCount = 0;
- for (const auto& Entry : IndexSnapshot)
- {
- size_t EntryIndex = Entry.second;
- const DiskLocation& DiskLocation = PayloadsSnapshot[EntryIndex].Location;
-
- if (DiskLocation.Flags & DiskLocation::kStandaloneFile)
- {
- continue;
- }
- const IoHash& Key = Entry.first;
- BlockStoreLocation Location = DiskLocation.GetBlockLocation(m_Configuration.PayloadAlignment);
- size_t ChunkIndex = ChunkLocations.size();
- ChunkLocations.push_back(Location);
- ChunkIndexToChunkHash.push_back(Key);
- if (ExpiredCacheKeys.contains(Key))
- {
- continue;
- }
- KeepChunkIndexes.push_back(ChunkIndex);
- }
- }
- TotalChunkCount = ChunkLocations.size();
- size_t DeleteCount = TotalChunkCount - KeepChunkIndexes.size();
-
- const bool PerformDelete = GcCtx.IsDeletionMode() && GcCtx.CollectSmallObjects();
- if (!PerformDelete)
- {
- m_BlockStore.ReclaimSpace(BlockStoreState, ChunkLocations, KeepChunkIndexes, m_Configuration.PayloadAlignment, true);
- GcStorageSize CurrentTotalSize = StorageSize();
- ZEN_DEBUG("garbage collect from '{}' DISABLED, found {} chunks of total {} ({}/{})",
- m_BucketDir,
- DeleteCount,
- TotalChunkCount,
- NiceBytes(CurrentTotalSize.DiskSize),
- NiceBytes(CurrentTotalSize.MemorySize));
- return;
- }
-
- m_BlockStore.ReclaimSpace(
- BlockStoreState,
- ChunkLocations,
- KeepChunkIndexes,
- m_Configuration.PayloadAlignment,
- false,
- [&](const BlockStore::MovedChunksArray& MovedChunks, const BlockStore::ChunkIndexArray& RemovedChunks) {
- std::vector<DiskIndexEntry> LogEntries;
- LogEntries.reserve(MovedChunks.size() + RemovedChunks.size());
- {
- RwLock::ExclusiveLockScope IndexLock(m_IndexLock);
- Stopwatch Timer;
- const auto ____ = MakeGuard([&] {
- uint64_t ElapsedUs = Timer.GetElapsedTimeUs();
- WriteBlockTimeUs += ElapsedUs;
- WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs);
- });
- for (const auto& Entry : MovedChunks)
- {
- size_t ChunkIndex = Entry.first;
- const BlockStoreLocation& NewLocation = Entry.second;
- const IoHash& ChunkHash = ChunkIndexToChunkHash[ChunkIndex];
- size_t EntryIndex = m_Index[ChunkHash];
- BucketPayload& Payload = m_Payloads[EntryIndex];
- if (PayloadsSnapshot[IndexSnapshot[ChunkHash]].Location != m_Payloads[EntryIndex].Location)
- {
- // Entry has been updated while GC was running, ignore the move
- continue;
- }
- Payload.Location = DiskLocation(NewLocation, m_Configuration.PayloadAlignment, Payload.Location.GetFlags());
- LogEntries.push_back({.Key = ChunkHash, .Location = Payload.Location});
- }
- for (const size_t ChunkIndex : RemovedChunks)
- {
- const IoHash& ChunkHash = ChunkIndexToChunkHash[ChunkIndex];
- size_t EntryIndex = m_Index[ChunkHash];
- BucketPayload& Payload = m_Payloads[EntryIndex];
- if (PayloadsSnapshot[IndexSnapshot[ChunkHash]].Location != Payload.Location)
- {
- // Entry has been updated while GC was running, ignore the delete
- continue;
- }
- const DiskLocation& OldDiskLocation = Payload.Location;
- LogEntries.push_back({.Key = ChunkHash,
- .Location = DiskLocation(OldDiskLocation.GetBlockLocation(m_Configuration.PayloadAlignment),
- m_Configuration.PayloadAlignment,
- OldDiskLocation.GetFlags() | DiskLocation::kTombStone)});
-
- RemoveMemCachedData(IndexLock, Payload);
- RemoveMetaData(IndexLock, Payload);
-
- m_Index.erase(ChunkHash);
- DeletedChunks.insert(ChunkHash);
- }
- }
-
- m_SlogFile.Append(LogEntries);
- m_SlogFile.Flush();
- },
- [&]() { return GcCtx.ClaimGCReserve(); });
-}
-
ZenCacheDiskLayer::BucketStats
ZenCacheDiskLayer::CacheBucket::Stats()
{
@@ -2835,30 +2375,6 @@ ZenCacheDiskLayer::CacheBucket::EnumerateBucketContents(
}
void
-ZenCacheDiskLayer::CollectGarbage(GcContext& GcCtx)
-{
- ZEN_TRACE_CPU("Z$::CollectGarbage");
-
- std::vector<CacheBucket*> Buckets;
- {
- RwLock::SharedLockScope _(m_Lock);
- Buckets.reserve(m_Buckets.size());
- for (auto& Kv : m_Buckets)
- {
- Buckets.push_back(Kv.second.get());
- }
- }
- for (CacheBucket* Bucket : Buckets)
- {
- Bucket->CollectGarbage(GcCtx);
- }
- if (!m_IsMemCacheTrimming)
- {
- MemCacheTrim(Buckets, GcCtx.CacheExpireTime());
- }
-}
-
-void
ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, const ZenCacheValue& Value, std::span<IoHash> References)
{
ZEN_TRACE_CPU("Z$::Bucket::PutStandaloneCacheValue");
@@ -4519,26 +4035,6 @@ ZenCacheDiskLayer::ScrubStorage(ScrubContext& Ctx)
}
}
-void
-ZenCacheDiskLayer::GatherReferences(GcContext& GcCtx)
-{
- ZEN_TRACE_CPU("Z$::GatherReferences");
-
- std::vector<CacheBucket*> Buckets;
- {
- RwLock::SharedLockScope _(m_Lock);
- Buckets.reserve(m_Buckets.size());
- for (auto& Kv : m_Buckets)
- {
- Buckets.push_back(Kv.second.get());
- }
- }
- for (CacheBucket* Bucket : Buckets)
- {
- Bucket->GatherReferences(GcCtx);
- }
-}
-
GcStorageSize
ZenCacheDiskLayer::StorageSize() const
{
diff --git a/src/zenstore/cache/structuredcachestore.cpp b/src/zenstore/cache/structuredcachestore.cpp
index ac8b70c1c..d30bd93cc 100644
--- a/src/zenstore/cache/structuredcachestore.cpp
+++ b/src/zenstore/cache/structuredcachestore.cpp
@@ -127,14 +127,12 @@ ZenCacheNamespace::ZenCacheNamespace(GcManager& Gc, JobQueue& JobQueue, const st
m_DiskLayer.DiscoverBuckets();
- m_Gc.AddGcContributor(this);
m_Gc.AddGcStorage(this);
}
ZenCacheNamespace::~ZenCacheNamespace()
{
m_Gc.RemoveGcStorage(this);
- m_Gc.RemoveGcContributor(this);
}
struct ZenCacheNamespace::PutBatchHandle
@@ -307,26 +305,6 @@ ZenCacheNamespace::ScrubStorage(ScrubContext& Ctx)
m_DiskLayer.ScrubStorage(Ctx);
}
-void
-ZenCacheNamespace::GatherReferences(GcContext& GcCtx)
-{
- ZEN_TRACE_CPU("Z$::ZenCacheNamespace::GatherReferences");
-
- Stopwatch Timer;
- const auto Guard =
- MakeGuard([&] { ZEN_DEBUG("cache gathered all references from '{}' in {}", m_RootDir, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); });
-
- m_DiskLayer.GatherReferences(GcCtx);
-}
-
-void
-ZenCacheNamespace::CollectGarbage(GcContext& GcCtx)
-{
- ZEN_TRACE_CPU("Z$::Namespace::CollectGarbage");
-
- m_DiskLayer.CollectGarbage(GcCtx);
-}
-
GcStorageSize
ZenCacheNamespace::StorageSize() const
{
@@ -1452,186 +1430,7 @@ TEST_CASE("cachestore.size")
}
}
-TEST_CASE("cachestore.gc")
-{
- using namespace testutils;
-
- auto JobQueue = MakeJobQueue(1, "testqueue");
-
- SUBCASE("gather references does NOT add references for expired cache entries")
- {
- ScopedTemporaryDirectory TempDir;
- std::vector<IoHash> Cids{CreateKey(1), CreateKey(2), CreateKey(3)};
-
- const auto CollectAndFilter = [](GcManager& Gc,
- GcClock::TimePoint Time,
- GcClock::Duration MaxDuration,
- std::span<const IoHash> Cids,
- std::vector<IoHash>& OutKeep) {
- GcContext GcCtx(Time - MaxDuration, Time - MaxDuration);
- Gc.CollectGarbage(GcCtx);
- OutKeep.clear();
- GcCtx.FilterCids(Cids, [&OutKeep](const IoHash& Hash) { OutKeep.push_back(Hash); });
- };
-
- {
- GcManager Gc;
- ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache", {});
- const auto Bucket = "teardrinker"sv;
-
- // Create a cache record
- const IoHash Key = CreateKey(42);
- CbObjectWriter Record;
- Record << "Key"sv
- << "SomeRecord"sv;
-
- for (size_t Idx = 0; auto& Cid : Cids)
- {
- Record.AddBinaryAttachment(fmt::format("attachment-{}", Idx++), Cid);
- }
-
- IoBuffer Buffer = Record.Save().GetBuffer().AsIoBuffer();
- Buffer.SetContentType(ZenContentType::kCbObject);
-
- Zcs.Put(Bucket, Key, {.Value = Buffer}, Cids);
-
- std::vector<IoHash> Keep;
-
- // Collect garbage with 1 hour max cache duration
- {
- CollectAndFilter(Gc, GcClock::Now(), std::chrono::hours(1), Cids, Keep);
- CHECK_EQ(Cids.size(), Keep.size());
- }
-
- // Move forward in time
- {
- CollectAndFilter(Gc, GcClock::Now() + std::chrono::hours(2), std::chrono::hours(1), Cids, Keep);
- CHECK_EQ(0, Keep.size());
- }
- }
-
- // Expect timestamps to be serialized
- {
- GcManager Gc;
- ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache", {});
- std::vector<IoHash> Keep;
-
- // Collect garbage with 1 hour max cache duration
- {
- CollectAndFilter(Gc, GcClock::Now(), std::chrono::hours(1), Cids, Keep);
- CHECK_EQ(3, Keep.size());
- }
-
- // Move forward in time
- {
- CollectAndFilter(Gc, GcClock::Now() + std::chrono::hours(2), std::chrono::hours(1), Cids, Keep);
- CHECK_EQ(0, Keep.size());
- }
- }
- }
-
- SUBCASE("gc removes standalone values")
- {
- ScopedTemporaryDirectory TempDir;
- GcManager Gc;
- ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache", {});
- const auto Bucket = "fortysixandtwo"sv;
- const GcClock::TimePoint CurrentTime = GcClock::Now();
-
- std::vector<IoHash> Keys{CreateKey(1), CreateKey(2), CreateKey(3)};
-
- for (const auto& Key : Keys)
- {
- IoBuffer Value = CreateRandomBlob(128 << 10);
- Zcs.Put(Bucket, Key, {.Value = Value}, {});
- }
-
- {
- GcContext GcCtx(CurrentTime - std::chrono::hours(46), CurrentTime - std::chrono::hours(46));
-
- Gc.CollectGarbage(GcCtx);
-
- for (const auto& Key : Keys)
- {
- ZenCacheValue CacheValue;
- const bool Exists = Zcs.Get(Bucket, Key, CacheValue);
- CHECK(Exists);
- }
- }
-
- // Move forward in time and collect again
- {
- GcContext GcCtx(CurrentTime + std::chrono::minutes(2), CurrentTime + std::chrono::minutes(2));
- Gc.CollectGarbage(GcCtx);
-
- for (const auto& Key : Keys)
- {
- ZenCacheValue CacheValue;
- const bool Exists = Zcs.Get(Bucket, Key, CacheValue);
- CHECK(!Exists);
- }
-
- CHECK_EQ(0, Zcs.StorageSize().DiskSize);
- }
- }
-
- SUBCASE("gc removes small objects")
- {
- ScopedTemporaryDirectory TempDir;
- GcManager Gc;
- {
- ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache", {});
- const auto Bucket = "rightintwo"sv;
-
- std::vector<IoHash> Keys{CreateKey(1), CreateKey(2), CreateKey(3)};
-
- for (const auto& Key : Keys)
- {
- IoBuffer Value = CreateRandomBlob(128);
- Zcs.Put(Bucket, Key, {.Value = Value}, {});
- }
-
- {
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(2), GcClock::Now() - std::chrono::hours(2));
- GcCtx.CollectSmallObjects(true);
-
- Gc.CollectGarbage(GcCtx);
-
- for (const auto& Key : Keys)
- {
- ZenCacheValue CacheValue;
- const bool Exists = Zcs.Get(Bucket, Key, CacheValue);
- CHECK(Exists);
- }
- }
-
- // Move forward in time and collect again
- {
- GcContext GcCtx(GcClock::Now() + std::chrono::minutes(2), GcClock::Now() + std::chrono::minutes(2));
- GcCtx.CollectSmallObjects(true);
-
- Zcs.Flush();
- Gc.CollectGarbage(GcCtx);
-
- for (const auto& Key : Keys)
- {
- ZenCacheValue CacheValue;
- const bool Exists = Zcs.Get(Bucket, Key, CacheValue);
- CHECK(!Exists);
- }
- // GC could not remove the currently written block so size will not be zero
- CHECK_NE(0, Zcs.StorageSize().DiskSize);
- }
- }
- {
- // Unreferenced blocks will be pruned so size should now be zero
- ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache", {});
- CHECK_EQ(0, Zcs.StorageSize().DiskSize);
- }
- }
-}
-
-TEST_CASE_TEMPLATE("cachestore.threadedinsert", GCV2, FalseType, TrueType) // * doctest::skip(true))
+TEST_CASE("cachestore.threadedinsert") // * doctest::skip(true))
{
// for (uint32_t i = 0; i < 100; ++i)
{
@@ -1699,39 +1498,24 @@ TEST_CASE_TEMPLATE("cachestore.threadedinsert", GCV2, FalseType, TrueType) // *
}
}
- auto DoGC = [](GcManager& Gc,
- ZenCacheNamespace& Zcs,
- std::unordered_map<IoHash, std::string, IoHash::Hasher>& GcChunkHashes,
- const std::vector<IoHash>& KeepHashes) {
- if (GCV2::Enabled)
+ auto DoGC = [](GcManager& Gc, ZenCacheNamespace& Zcs, std::unordered_map<IoHash, std::string, IoHash::Hasher>& GcChunkHashes) {
+ GcSettings Settings = {.CacheExpireTime = GcClock::Now() - std::chrono::hours(24),
+ .ProjectStoreExpireTime = GcClock::Now() - std::chrono::hours(24),
+ .CollectSmallObjects = true,
+ .IsDeleteMode = true,
+ .CompactBlockUsageThresholdPercent = 100};
+ Gc.CollectGarbage(Settings);
+ // Cheating as we don't get the list of deleted hashes back from this call
+ std::unordered_map<IoHash, std::string, IoHash::Hasher> RemainingChunkHashes;
+ for (const auto& It : GcChunkHashes)
{
- GcSettings Settings = {.CacheExpireTime = GcClock::Now() - std::chrono::hours(24),
- .ProjectStoreExpireTime = GcClock::Now() - std::chrono::hours(24),
- .CollectSmallObjects = true,
- .IsDeleteMode = true,
- .CompactBlockUsageThresholdPercent = 100};
- Gc.CollectGarbage(Settings);
- // Cheating as we don't get the list of deleted hashes back from this call
- std::unordered_map<IoHash, std::string, IoHash::Hasher> RemainingChunkHashes;
- for (const auto& It : GcChunkHashes)
+ ZenCacheValue Tmp;
+ if (Zcs.Get(It.second, It.first, Tmp))
{
- ZenCacheValue Tmp;
- if (Zcs.Get(It.second, It.first, Tmp))
- {
- RemainingChunkHashes.insert(It);
- }
+ RemainingChunkHashes.insert(It);
}
- GcChunkHashes.swap(RemainingChunkHashes);
- }
- else
- {
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
- GcCtx.CollectSmallObjects(true);
- GcCtx.AddRetainedCids(KeepHashes);
- Zcs.CollectGarbage(GcCtx);
- const HashKeySet& Deleted = GcCtx.DeletedCids();
- Deleted.IterateHashes([&GcChunkHashes](const IoHash& ChunkHash) { GcChunkHashes.erase(ChunkHash); });
}
+ GcChunkHashes.swap(RemainingChunkHashes);
};
const uint64_t TotalSize = Zcs.StorageSize().DiskSize;
@@ -1813,32 +1597,7 @@ TEST_CASE_TEMPLATE("cachestore.threadedinsert", GCV2, FalseType, TrueType) // *
GcChunkHashes[Chunk.first] = Chunk.second.Bucket;
}
}
- std::vector<IoHash> KeepHashes;
- KeepHashes.reserve(GcChunkHashes.size());
- for (const auto& Entry : GcChunkHashes)
- {
- KeepHashes.push_back(Entry.first);
- }
- size_t C = 0;
- while (C < KeepHashes.size())
- {
- if (C % 155 == 0)
- {
- if (C < KeepHashes.size() - 1)
- {
- KeepHashes[C] = KeepHashes[KeepHashes.size() - 1];
- KeepHashes.pop_back();
- }
- if (C + 3 < KeepHashes.size() - 1)
- {
- KeepHashes[C + 3] = KeepHashes[KeepHashes.size() - 1];
- KeepHashes.pop_back();
- }
- }
- C++;
- }
-
- DoGC(Gc, Zcs, GcChunkHashes, KeepHashes);
+ DoGC(Gc, Zcs, GcChunkHashes);
}
while (WorkCompleted < NewChunks.size() + Chunks.size())
@@ -1856,32 +1615,7 @@ TEST_CASE_TEMPLATE("cachestore.threadedinsert", GCV2, FalseType, TrueType) // *
GcChunkHashes[Chunk.first] = Chunk.second.Bucket;
}
}
- std::vector<IoHash> KeepHashes;
- KeepHashes.reserve(GcChunkHashes.size());
- for (const auto& Entry : GcChunkHashes)
- {
- KeepHashes.push_back(Entry.first);
- }
- size_t C = 0;
- while (C < KeepHashes.size())
- {
- if (C % 155 == 0)
- {
- if (C < KeepHashes.size() - 1)
- {
- KeepHashes[C] = KeepHashes[KeepHashes.size() - 1];
- KeepHashes.pop_back();
- }
- if (C + 3 < KeepHashes.size() - 1)
- {
- KeepHashes[C + 3] = KeepHashes[KeepHashes.size() - 1];
- KeepHashes.pop_back();
- }
- }
- C++;
- }
-
- DoGC(Gc, Zcs, GcChunkHashes, KeepHashes);
+ DoGC(Gc, Zcs, GcChunkHashes);
}
}
{
diff --git a/src/zenstore/cas.cpp b/src/zenstore/cas.cpp
index 871558a52..bff221fc7 100644
--- a/src/zenstore/cas.cpp
+++ b/src/zenstore/cas.cpp
@@ -62,7 +62,6 @@ public:
WorkerThreadPool* OptionalWorkerPool) override;
virtual void Flush() override;
virtual void ScrubStorage(ScrubContext& Ctx) override;
- virtual void GarbageCollect(GcContext& GcCtx) override;
virtual CidStoreSize TotalSize() const override;
private:
@@ -459,16 +458,6 @@ CasImpl::ScrubStorage(ScrubContext& Ctx)
m_LargeStrategy.ScrubStorage(Ctx);
}
-void
-CasImpl::GarbageCollect(GcContext& GcCtx)
-{
- ZEN_TRACE_CPU("Cas::GarbageCollect");
-
- m_SmallStrategy.CollectGarbage(GcCtx);
- m_TinyStrategy.CollectGarbage(GcCtx);
- m_LargeStrategy.CollectGarbage(GcCtx);
-}
-
CidStoreSize
CasImpl::TotalSize() const
{
diff --git a/src/zenstore/cas.h b/src/zenstore/cas.h
index 169f4d58c..bedbc6a9a 100644
--- a/src/zenstore/cas.h
+++ b/src/zenstore/cas.h
@@ -11,7 +11,6 @@
namespace zen {
-class GcContext;
class GcManager;
class ScrubContext;
@@ -51,7 +50,6 @@ public:
WorkerThreadPool* OptionalWorkerPool) = 0;
virtual void Flush() = 0;
virtual void ScrubStorage(ScrubContext& Ctx) = 0;
- virtual void GarbageCollect(GcContext& GcCtx) = 0;
virtual CidStoreSize TotalSize() const = 0;
protected:
diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp
index a5d70a991..7f1300177 100644
--- a/src/zenstore/compactcas.cpp
+++ b/src/zenstore/compactcas.cpp
@@ -527,167 +527,6 @@ CasContainerStrategy::ScrubStorage(ScrubContext& Ctx)
ZEN_INFO("scrubbed {} chunks ({}) in '{}'", ChunkCount.load(), NiceBytes(ChunkBytes.load()), m_RootDirectory / m_ContainerBaseName);
}
-void
-CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
-{
- ZEN_TRACE_CPU("CasContainer::CollectGarbage");
-
- if (GcCtx.SkipCid())
- {
- return;
- }
-
- // It collects all the blocks that we want to delete chunks from. For each such
- // block we keep a list of chunks to retain and a list of chunks to delete.
- //
- // If there is a block that we are currently writing to, that block is omitted
- // from the garbage collection.
- //
- // Next it will iterate over all blocks that we want to remove chunks from.
- // If the block is empty after removal of chunks we mark the block as pending
- // delete - we want to delete it as soon as there are no IoBuffers using the
- // block file.
- // Once complete we update the m_LocationMap by removing the chunks.
- //
- // If the block is non-empty we write out the chunks we want to keep to a new
- // block file (creating new block files as needed).
- //
- // We update the index as we complete each new block file. This makes it possible
- // to break the GC if we want to limit time for execution.
- //
- // GC can very parallell to regular operation - it will block while taking
- // a snapshot of the current m_LocationMap state and while moving blocks it will
- // do a blocking operation and update the m_LocationMap after each new block is
- // written and figuring out the path to the next new block.
-
- ZEN_DEBUG("collecting garbage from '{}'", m_RootDirectory / m_ContainerBaseName);
-
- uint64_t WriteBlockTimeUs = 0;
- uint64_t WriteBlockLongestTimeUs = 0;
- uint64_t ReadBlockTimeUs = 0;
- uint64_t ReadBlockLongestTimeUs = 0;
-
- LocationMap_t LocationMap;
- std::vector<BlockStoreDiskLocation> Locations;
- BlockStore::ReclaimSnapshotState BlockStoreState;
- {
- ZEN_TRACE_CPU("CasContainer::CollectGarbage::State");
-
- RwLock::SharedLockScope ___(m_LocationMapLock);
- Stopwatch Timer;
- const auto ____ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] {
- uint64_t ElapsedUs = Timer.GetElapsedTimeUs();
- ReadBlockTimeUs += ElapsedUs;
- ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs);
- });
- LocationMap = m_LocationMap;
- Locations = m_Locations;
- BlockStoreState = m_BlockStore.GetReclaimSnapshotState();
- }
-
- uint64_t TotalChunkCount = LocationMap.size();
-
- std::vector<IoHash> TotalChunkHashes;
- TotalChunkHashes.reserve(TotalChunkCount);
- for (const auto& Entry : LocationMap)
- {
- TotalChunkHashes.push_back(Entry.first);
- }
-
- std::vector<BlockStoreLocation> ChunkLocations;
- BlockStore::ChunkIndexArray KeepChunkIndexes;
- std::vector<IoHash> ChunkIndexToChunkHash;
- ChunkLocations.reserve(TotalChunkCount);
- KeepChunkIndexes.reserve(TotalChunkCount);
- ChunkIndexToChunkHash.reserve(TotalChunkCount);
-
- {
- ZEN_TRACE_CPU("CasContainer::CollectGarbage::Filter");
- GcCtx.FilterCids(TotalChunkHashes, [&](const IoHash& ChunkHash, bool Keep) {
- auto KeyIt = LocationMap.find(ChunkHash);
- const BlockStoreDiskLocation& DiskLocation = Locations[KeyIt->second];
- BlockStoreLocation Location = DiskLocation.Get(m_PayloadAlignment);
- size_t ChunkIndex = ChunkLocations.size();
-
- ChunkLocations.push_back(Location);
- ChunkIndexToChunkHash.push_back(ChunkHash);
- if (Keep)
- {
- KeepChunkIndexes.push_back(ChunkIndex);
- }
- });
- }
-
- const bool PerformDelete = GcCtx.IsDeletionMode() && GcCtx.CollectSmallObjects();
- if (!PerformDelete)
- {
- m_BlockStore.ReclaimSpace(BlockStoreState, ChunkLocations, KeepChunkIndexes, m_PayloadAlignment, true);
- return;
- }
-
- std::vector<IoHash> DeletedChunks;
- m_BlockStore.ReclaimSpace(
- BlockStoreState,
- ChunkLocations,
- KeepChunkIndexes,
- m_PayloadAlignment,
- false,
- [&](const BlockStore::MovedChunksArray& MovedChunks, const BlockStore::ChunkIndexArray& RemovedChunks) {
- std::vector<CasDiskIndexEntry> LogEntries;
- LogEntries.reserve(MovedChunks.size() + RemovedChunks.size());
- {
- RwLock::ExclusiveLockScope __(m_LocationMapLock);
- Stopwatch Timer;
- const auto ____ = MakeGuard([&] {
- uint64_t ElapsedUs = Timer.GetElapsedTimeUs();
- WriteBlockTimeUs += ElapsedUs;
- WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs);
- });
- for (const auto& Entry : MovedChunks)
- {
- size_t ChunkIndex = Entry.first;
- const BlockStoreLocation& NewLocation = Entry.second;
- const IoHash& ChunkHash = ChunkIndexToChunkHash[ChunkIndex];
- size_t LocationIndex = m_LocationMap[ChunkHash];
- BlockStoreDiskLocation& Location = m_Locations[LocationIndex];
- if (Locations[LocationMap[ChunkHash]] != Location)
- {
- // Entry has been updated while GC was running, ignore the move
- continue;
- }
- Location = {NewLocation, m_PayloadAlignment};
- LogEntries.push_back({.Key = ChunkHash, .Location = Location});
- }
- for (const size_t ChunkIndex : RemovedChunks)
- {
- const IoHash& ChunkHash = ChunkIndexToChunkHash[ChunkIndex];
- size_t LocationIndex = m_LocationMap[ChunkHash];
- const BlockStoreDiskLocation& Location = Locations[LocationIndex];
- if (Locations[LocationMap[ChunkHash]] != Location)
- {
- // Entry has been updated while GC was running, ignore the delete
- continue;
- }
- LogEntries.push_back({.Key = ChunkHash, .Location = Location, .Flags = CasDiskIndexEntry::kTombstone});
- m_LocationMap.erase(ChunkHash);
- DeletedChunks.push_back(ChunkHash);
- }
- }
-
- m_CasLog.Append(LogEntries);
- m_CasLog.Flush();
- },
- [&GcCtx]() { return GcCtx.ClaimGCReserve(); });
-
- if (!DeletedChunks.empty())
- {
- // Clean up m_Locations vectors
- RwLock::ExclusiveLockScope IndexLock(m_LocationMapLock);
- CompactIndex(IndexLock);
- }
- GcCtx.AddDeletedCids(DeletedChunks);
-}
-
class CasContainerStoreCompactor : public GcStoreCompactor
{
public:
@@ -1456,412 +1295,7 @@ TEST_CASE("compactcas.compact.totalsize")
}
}
-TEST_CASE("compactcas.gc.basic")
-{
- ScopedTemporaryDirectory TempDir;
-
- GcManager Gc;
- CasContainerStrategy Cas(Gc);
- Cas.Initialize(TempDir.Path(), "cb", 65536, 1 << 4, true);
-
- IoBuffer Chunk = CreateRandomBlob(128);
- IoHash ChunkHash = IoHash::HashBuffer(Chunk);
-
- const CasStore::InsertResult InsertResult = Cas.InsertChunk(Chunk, ChunkHash);
- CHECK(InsertResult.New);
- Cas.Flush();
-
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
- GcCtx.CollectSmallObjects(true);
-
- Cas.CollectGarbage(GcCtx);
-
- CHECK(!Cas.HaveChunk(ChunkHash));
-}
-
-TEST_CASE("compactcas.gc.removefile")
-{
- ScopedTemporaryDirectory TempDir;
-
- IoBuffer Chunk = CreateRandomBlob(128);
- IoHash ChunkHash = IoHash::HashBuffer(Chunk);
- {
- GcManager Gc;
- CasContainerStrategy Cas(Gc);
- Cas.Initialize(TempDir.Path(), "cb", 65536, 1 << 4, true);
-
- const CasStore::InsertResult InsertResult = Cas.InsertChunk(Chunk, ChunkHash);
- CHECK(InsertResult.New);
- const CasStore::InsertResult InsertResultDup = Cas.InsertChunk(Chunk, ChunkHash);
- CHECK(!InsertResultDup.New);
- Cas.Flush();
- }
-
- GcManager Gc;
- CasContainerStrategy Cas(Gc);
- Cas.Initialize(TempDir.Path(), "cb", 65536, 1 << 4, false);
-
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
- GcCtx.CollectSmallObjects(true);
-
- Cas.CollectGarbage(GcCtx);
-
- CHECK(!Cas.HaveChunk(ChunkHash));
-}
-
-TEST_CASE("compactcas.gc.compact")
-{
- {
- ScopedTemporaryDirectory TempDir;
-
- GcManager Gc;
- CasContainerStrategy Cas(Gc);
- Cas.Initialize(TempDir.Path(), "cb", 2048, 1 << 4, true);
-
- uint64_t ChunkSizes[9] = {128, 541, 1023, 781, 218, 37, 4, 997, 5};
- std::vector<IoBuffer> Chunks;
- Chunks.reserve(9);
- for (uint64_t Size : ChunkSizes)
- {
- Chunks.push_back(CreateRandomBlob(Size));
- }
-
- std::vector<IoHash> ChunkHashes;
- ChunkHashes.reserve(9);
- for (const IoBuffer& Chunk : Chunks)
- {
- ChunkHashes.push_back(IoHash::HashBuffer(Chunk.Data(), Chunk.Size()));
- }
-
- CHECK(Cas.InsertChunk(Chunks[0], ChunkHashes[0]).New);
- CHECK(Cas.InsertChunk(Chunks[1], ChunkHashes[1]).New);
- CHECK(Cas.InsertChunk(Chunks[2], ChunkHashes[2]).New);
- CHECK(Cas.InsertChunk(Chunks[3], ChunkHashes[3]).New);
- CHECK(Cas.InsertChunk(Chunks[4], ChunkHashes[4]).New);
- CHECK(Cas.InsertChunk(Chunks[5], ChunkHashes[5]).New);
- CHECK(Cas.InsertChunk(Chunks[6], ChunkHashes[6]).New);
- CHECK(Cas.InsertChunk(Chunks[7], ChunkHashes[7]).New);
- CHECK(Cas.InsertChunk(Chunks[8], ChunkHashes[8]).New);
-
- CHECK(Cas.HaveChunk(ChunkHashes[0]));
- CHECK(Cas.HaveChunk(ChunkHashes[1]));
- CHECK(Cas.HaveChunk(ChunkHashes[2]));
- CHECK(Cas.HaveChunk(ChunkHashes[3]));
- CHECK(Cas.HaveChunk(ChunkHashes[4]));
- CHECK(Cas.HaveChunk(ChunkHashes[5]));
- CHECK(Cas.HaveChunk(ChunkHashes[6]));
- CHECK(Cas.HaveChunk(ChunkHashes[7]));
- CHECK(Cas.HaveChunk(ChunkHashes[8]));
-
- auto ValidateChunkExists = [&](size_t Index) {
- IoBuffer Chunk = Cas.FindChunk(ChunkHashes[Index]);
- bool Exists = !!Chunk;
- CHECK(Exists);
- IoHash Hash = IoHash::HashBuffer(Chunk);
- if (ChunkHashes[Index] != Hash)
- {
- CHECK(fmt::format("{}", ChunkHashes[Index]) == fmt::format("{}", Hash));
- }
- };
-
- // Keep first and last
- {
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
- GcCtx.CollectSmallObjects(true);
-
- std::vector<IoHash> KeepChunks;
- KeepChunks.push_back(ChunkHashes[0]);
- KeepChunks.push_back(ChunkHashes[8]);
- GcCtx.AddRetainedCids(KeepChunks);
-
- Cas.Flush();
- Cas.CollectGarbage(GcCtx);
-
- CHECK(Cas.HaveChunk(ChunkHashes[0]));
- CHECK(!Cas.HaveChunk(ChunkHashes[1]));
- CHECK(!Cas.HaveChunk(ChunkHashes[2]));
- CHECK(!Cas.HaveChunk(ChunkHashes[3]));
- CHECK(!Cas.HaveChunk(ChunkHashes[4]));
- CHECK(!Cas.HaveChunk(ChunkHashes[5]));
- CHECK(!Cas.HaveChunk(ChunkHashes[6]));
- CHECK(!Cas.HaveChunk(ChunkHashes[7]));
- CHECK(Cas.HaveChunk(ChunkHashes[8]));
-
- ValidateChunkExists(0);
- ValidateChunkExists(8);
-
- Cas.InsertChunk(Chunks[1], ChunkHashes[1]);
- Cas.InsertChunk(Chunks[2], ChunkHashes[2]);
- Cas.InsertChunk(Chunks[3], ChunkHashes[3]);
- Cas.InsertChunk(Chunks[4], ChunkHashes[4]);
- Cas.InsertChunk(Chunks[5], ChunkHashes[5]);
- Cas.InsertChunk(Chunks[6], ChunkHashes[6]);
- Cas.InsertChunk(Chunks[7], ChunkHashes[7]);
- }
-
- // Keep last
- {
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
- GcCtx.CollectSmallObjects(true);
- std::vector<IoHash> KeepChunks;
- KeepChunks.push_back(ChunkHashes[8]);
- GcCtx.AddRetainedCids(KeepChunks);
-
- Cas.Flush();
- Cas.CollectGarbage(GcCtx);
-
- CHECK(!Cas.HaveChunk(ChunkHashes[0]));
- CHECK(!Cas.HaveChunk(ChunkHashes[1]));
- CHECK(!Cas.HaveChunk(ChunkHashes[2]));
- CHECK(!Cas.HaveChunk(ChunkHashes[3]));
- CHECK(!Cas.HaveChunk(ChunkHashes[4]));
- CHECK(!Cas.HaveChunk(ChunkHashes[5]));
- CHECK(!Cas.HaveChunk(ChunkHashes[6]));
- CHECK(!Cas.HaveChunk(ChunkHashes[7]));
- CHECK(Cas.HaveChunk(ChunkHashes[8]));
-
- ValidateChunkExists(8);
-
- Cas.InsertChunk(Chunks[1], ChunkHashes[1]);
- Cas.InsertChunk(Chunks[2], ChunkHashes[2]);
- Cas.InsertChunk(Chunks[3], ChunkHashes[3]);
- Cas.InsertChunk(Chunks[4], ChunkHashes[4]);
- Cas.InsertChunk(Chunks[5], ChunkHashes[5]);
- Cas.InsertChunk(Chunks[6], ChunkHashes[6]);
- Cas.InsertChunk(Chunks[7], ChunkHashes[7]);
- }
-
- // Keep mixed
- {
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
- GcCtx.CollectSmallObjects(true);
- std::vector<IoHash> KeepChunks;
- KeepChunks.push_back(ChunkHashes[1]);
- KeepChunks.push_back(ChunkHashes[4]);
- KeepChunks.push_back(ChunkHashes[7]);
- GcCtx.AddRetainedCids(KeepChunks);
-
- Cas.Flush();
- Cas.CollectGarbage(GcCtx);
-
- CHECK(!Cas.HaveChunk(ChunkHashes[0]));
- CHECK(Cas.HaveChunk(ChunkHashes[1]));
- CHECK(!Cas.HaveChunk(ChunkHashes[2]));
- CHECK(!Cas.HaveChunk(ChunkHashes[3]));
- CHECK(Cas.HaveChunk(ChunkHashes[4]));
- CHECK(!Cas.HaveChunk(ChunkHashes[5]));
- CHECK(!Cas.HaveChunk(ChunkHashes[6]));
- CHECK(Cas.HaveChunk(ChunkHashes[7]));
- CHECK(!Cas.HaveChunk(ChunkHashes[8]));
-
- ValidateChunkExists(1);
- ValidateChunkExists(4);
- ValidateChunkExists(7);
-
- Cas.InsertChunk(Chunks[0], ChunkHashes[0]);
- Cas.InsertChunk(Chunks[2], ChunkHashes[2]);
- Cas.InsertChunk(Chunks[3], ChunkHashes[3]);
- Cas.InsertChunk(Chunks[5], ChunkHashes[5]);
- Cas.InsertChunk(Chunks[6], ChunkHashes[6]);
- Cas.InsertChunk(Chunks[8], ChunkHashes[8]);
- }
-
- // Keep multiple at end
- {
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
- GcCtx.CollectSmallObjects(true);
- std::vector<IoHash> KeepChunks;
- KeepChunks.push_back(ChunkHashes[6]);
- KeepChunks.push_back(ChunkHashes[7]);
- KeepChunks.push_back(ChunkHashes[8]);
- GcCtx.AddRetainedCids(KeepChunks);
-
- Cas.Flush();
- Cas.CollectGarbage(GcCtx);
-
- CHECK(!Cas.HaveChunk(ChunkHashes[0]));
- CHECK(!Cas.HaveChunk(ChunkHashes[1]));
- CHECK(!Cas.HaveChunk(ChunkHashes[2]));
- CHECK(!Cas.HaveChunk(ChunkHashes[3]));
- CHECK(!Cas.HaveChunk(ChunkHashes[4]));
- CHECK(!Cas.HaveChunk(ChunkHashes[5]));
- CHECK(Cas.HaveChunk(ChunkHashes[6]));
- CHECK(Cas.HaveChunk(ChunkHashes[7]));
- CHECK(Cas.HaveChunk(ChunkHashes[8]));
-
- ValidateChunkExists(6);
- ValidateChunkExists(7);
- ValidateChunkExists(8);
-
- Cas.InsertChunk(Chunks[0], ChunkHashes[0]);
- Cas.InsertChunk(Chunks[1], ChunkHashes[1]);
- Cas.InsertChunk(Chunks[2], ChunkHashes[2]);
- Cas.InsertChunk(Chunks[3], ChunkHashes[3]);
- Cas.InsertChunk(Chunks[4], ChunkHashes[4]);
- Cas.InsertChunk(Chunks[5], ChunkHashes[5]);
- }
-
- // Keep every other
- {
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
- GcCtx.CollectSmallObjects(true);
- std::vector<IoHash> KeepChunks;
- KeepChunks.push_back(ChunkHashes[0]);
- KeepChunks.push_back(ChunkHashes[2]);
- KeepChunks.push_back(ChunkHashes[4]);
- KeepChunks.push_back(ChunkHashes[6]);
- KeepChunks.push_back(ChunkHashes[8]);
- GcCtx.AddRetainedCids(KeepChunks);
-
- Cas.Flush();
- Cas.CollectGarbage(GcCtx);
-
- CHECK(Cas.HaveChunk(ChunkHashes[0]));
- CHECK(!Cas.HaveChunk(ChunkHashes[1]));
- CHECK(Cas.HaveChunk(ChunkHashes[2]));
- CHECK(!Cas.HaveChunk(ChunkHashes[3]));
- CHECK(Cas.HaveChunk(ChunkHashes[4]));
- CHECK(!Cas.HaveChunk(ChunkHashes[5]));
- CHECK(Cas.HaveChunk(ChunkHashes[6]));
- CHECK(!Cas.HaveChunk(ChunkHashes[7]));
- CHECK(Cas.HaveChunk(ChunkHashes[8]));
-
- ValidateChunkExists(0);
- ValidateChunkExists(2);
- ValidateChunkExists(4);
- ValidateChunkExists(6);
- ValidateChunkExists(8);
-
- Cas.InsertChunk(Chunks[1], ChunkHashes[1]);
- Cas.InsertChunk(Chunks[3], ChunkHashes[3]);
- Cas.InsertChunk(Chunks[5], ChunkHashes[5]);
- Cas.InsertChunk(Chunks[7], ChunkHashes[7]);
- }
-
- // Verify that we nicely appended blocks even after all GC operations
- ValidateChunkExists(0);
- ValidateChunkExists(1);
- ValidateChunkExists(2);
- ValidateChunkExists(3);
- ValidateChunkExists(4);
- ValidateChunkExists(5);
- ValidateChunkExists(6);
- ValidateChunkExists(7);
- ValidateChunkExists(8);
- }
-}
-
-TEST_CASE("compactcas.gc.deleteblockonopen")
-{
- ScopedTemporaryDirectory TempDir;
-
- uint64_t ChunkSizes[20] = {128, 541, 311, 181, 218, 37, 4, 397, 5, 92, 551, 721, 31, 92, 16, 99, 131, 41, 541, 84};
- std::vector<IoBuffer> Chunks;
- Chunks.reserve(20);
- for (uint64_t Size : ChunkSizes)
- {
- Chunks.push_back(CreateRandomBlob(Size));
- }
-
- std::vector<IoHash> ChunkHashes;
- ChunkHashes.reserve(20);
- for (const IoBuffer& Chunk : Chunks)
- {
- ChunkHashes.push_back(IoHash::HashBuffer(Chunk.Data(), Chunk.Size()));
- }
-
- {
- GcManager Gc;
- CasContainerStrategy Cas(Gc);
- Cas.Initialize(TempDir.Path(), "test", 1024, 16, true);
-
- for (size_t i = 0; i < 20; i++)
- {
- CHECK(Cas.InsertChunk(Chunks[i], ChunkHashes[i]).New);
- }
-
- // GC every other block
- {
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
- GcCtx.CollectSmallObjects(true);
- std::vector<IoHash> KeepChunks;
- for (size_t i = 0; i < 20; i += 2)
- {
- KeepChunks.push_back(ChunkHashes[i]);
- }
- GcCtx.AddRetainedCids(KeepChunks);
-
- Cas.Flush();
- Cas.CollectGarbage(GcCtx);
-
- for (size_t i = 0; i < 20; i += 2)
- {
- CHECK(Cas.HaveChunk(ChunkHashes[i]));
- CHECK(!Cas.HaveChunk(ChunkHashes[i + 1]));
- CHECK(ChunkHashes[i] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[i])));
- }
- }
- }
- {
- // Re-open
- GcManager Gc;
- CasContainerStrategy Cas(Gc);
- Cas.Initialize(TempDir.Path(), "test", 1024, 16, false);
-
- for (size_t i = 0; i < 20; i += 2)
- {
- CHECK(Cas.HaveChunk(ChunkHashes[i]));
- CHECK(!Cas.HaveChunk(ChunkHashes[i + 1]));
- CHECK(ChunkHashes[i] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[i])));
- }
- }
-}
-
-TEST_CASE("compactcas.gc.handleopeniobuffer")
-{
- ScopedTemporaryDirectory TempDir;
-
- uint64_t ChunkSizes[20] = {128, 541, 311, 181, 218, 37, 4, 397, 5, 92, 551, 721, 31, 92, 16, 99, 131, 41, 541, 84};
- std::vector<IoBuffer> Chunks;
- Chunks.reserve(20);
- for (const uint64_t& Size : ChunkSizes)
- {
- Chunks.push_back(CreateRandomBlob(Size));
- }
-
- std::vector<IoHash> ChunkHashes;
- ChunkHashes.reserve(20);
- for (const IoBuffer& Chunk : Chunks)
- {
- ChunkHashes.push_back(IoHash::HashBuffer(Chunk.Data(), Chunk.Size()));
- }
-
- GcManager Gc;
- CasContainerStrategy Cas(Gc);
- Cas.Initialize(TempDir.Path(), "test", 1024, 16, true);
-
- for (size_t i = 0; i < 20; i++)
- {
- CHECK(Cas.InsertChunk(Chunks[i], ChunkHashes[i]).New);
- }
-
- IoBuffer RetainChunk = Cas.FindChunk(ChunkHashes[5]);
- Cas.Flush();
-
- // GC everything
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
- GcCtx.CollectSmallObjects(true);
- Cas.CollectGarbage(GcCtx);
-
- for (size_t i = 0; i < 20; i++)
- {
- CHECK(!Cas.HaveChunk(ChunkHashes[i]));
- }
-
- CHECK(ChunkHashes[5] == IoHash::HashBuffer(RetainChunk));
-}
-
-TEST_CASE_TEMPLATE("compactcas.threadedinsert", GCV2, FalseType, TrueType)
+TEST_CASE("compactcas.threadedinsert")
{
// for (uint32_t i = 0; i < 100; ++i)
{
@@ -2004,56 +1438,41 @@ TEST_CASE_TEMPLATE("compactcas.threadedinsert", GCV2, FalseType, TrueType)
const tsl::robin_set<IoHash, IoHash::Hasher>& ChunksToDelete,
const std::vector<IoHash>& KeepHashes,
tsl::robin_set<IoHash, IoHash::Hasher>& GcChunkHashes) {
- if (GCV2::Enabled)
- {
- std::atomic_bool IsCancelledFlag = false;
- GcCtx Ctx = {.Settings = {.CacheExpireTime = GcClock::Now() - std::chrono::hours(24),
- .ProjectStoreExpireTime = GcClock::Now() - std::chrono::hours(24),
- .CollectSmallObjects = true,
- .IsDeleteMode = true,
- .CompactBlockUsageThresholdPercent = 100},
- .IsCancelledFlag = IsCancelledFlag};
- GcReferenceStoreStats PrunerStats;
- GcReferencePruner* Pruner = Cas.CreateReferencePruner(Ctx, PrunerStats);
- CHECK(Pruner);
- HashKeySet Deleted;
- GcStats Stats;
- GcStoreCompactor* Compactor =
- Pruner->RemoveUnreferencedData(Ctx, Stats, [&](std::span<IoHash> References) -> std::span<IoHash> {
- std::vector<IoHash> Unreferenced;
- HashKeySet Retain;
- Retain.AddHashesToSet(KeepHashes);
- for (const IoHash& ChunkHash : References)
+ std::atomic_bool IsCancelledFlag = false;
+ GcCtx Ctx = {.Settings = {.CacheExpireTime = GcClock::Now() - std::chrono::hours(24),
+ .ProjectStoreExpireTime = GcClock::Now() - std::chrono::hours(24),
+ .CollectSmallObjects = true,
+ .IsDeleteMode = true,
+ .CompactBlockUsageThresholdPercent = 100},
+ .IsCancelledFlag = IsCancelledFlag};
+ GcReferenceStoreStats PrunerStats;
+ GcReferencePruner* Pruner = Cas.CreateReferencePruner(Ctx, PrunerStats);
+ CHECK(Pruner);
+ HashKeySet Deleted;
+ GcStats Stats;
+ GcStoreCompactor* Compactor =
+ Pruner->RemoveUnreferencedData(Ctx, Stats, [&](std::span<IoHash> References) -> std::span<IoHash> {
+ std::vector<IoHash> Unreferenced;
+ HashKeySet Retain;
+ Retain.AddHashesToSet(KeepHashes);
+ for (const IoHash& ChunkHash : References)
+ {
+ if (!Retain.ContainsHash(ChunkHash))
{
- if (!Retain.ContainsHash(ChunkHash))
- {
- Unreferenced.push_back(ChunkHash);
- }
+ Unreferenced.push_back(ChunkHash);
}
- Deleted.AddHashesToSet(Unreferenced);
- return Unreferenced;
- });
- if (Compactor)
- {
- Deleted.IterateHashes([&GcChunkHashes, &ChunksToDelete](const IoHash& ChunkHash) {
- CHECK(ChunksToDelete.contains(ChunkHash));
- GcChunkHashes.erase(ChunkHash);
- });
- GcCompactStoreStats CompactStats;
- Compactor->CompactStore(Ctx, CompactStats, []() { return 0; });
- }
- }
- else
+ }
+ Deleted.AddHashesToSet(Unreferenced);
+ return Unreferenced;
+ });
+ if (Compactor)
{
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
- GcCtx.CollectSmallObjects(true);
- GcCtx.AddRetainedCids(KeepHashes);
- Cas.CollectGarbage(GcCtx);
- const HashKeySet& Deleted = GcCtx.DeletedCids();
Deleted.IterateHashes([&GcChunkHashes, &ChunksToDelete](const IoHash& ChunkHash) {
CHECK(ChunksToDelete.contains(ChunkHash));
GcChunkHashes.erase(ChunkHash);
});
+ GcCompactStoreStats CompactStats;
+ Compactor->CompactStore(Ctx, CompactStats, []() { return 0; });
}
};
diff --git a/src/zenstore/compactcas.h b/src/zenstore/compactcas.h
index db6b4c914..44567e7a0 100644
--- a/src/zenstore/compactcas.h
+++ b/src/zenstore/compactcas.h
@@ -69,7 +69,6 @@ struct CasContainerStrategy final : public GcStorage, public GcReferenceStore
// GcStorage
virtual void ScrubStorage(ScrubContext& ScrubCtx) override;
- virtual void CollectGarbage(GcContext& GcCtx) override;
virtual GcStorageSize StorageSize() const override;
virtual std::string GetGcName(GcCtx& Ctx) override;
diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp
index 71eead9b2..57b42beb2 100644
--- a/src/zenstore/filecas.cpp
+++ b/src/zenstore/filecas.cpp
@@ -1073,101 +1073,6 @@ FileCasStrategy::ScrubStorage(ScrubContext& Ctx)
NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
}
-void
-FileCasStrategy::CollectGarbage(GcContext& GcCtx)
-{
- ZEN_TRACE_CPU("FileCas::CollectGarbage");
-
- ZEN_ASSERT(m_IsInitialized);
-
- if (GcCtx.SkipCid())
- {
- return;
- }
-
- ZEN_DEBUG("collecting garbage from {}", m_RootDirectory);
-
- std::vector<IoHash> ChunksToDelete;
- std::atomic<uint64_t> ChunksToDeleteBytes{0};
- std::atomic<uint64_t> ChunkCount{0}, ChunkBytes{0};
-
- std::vector<IoHash> CandidateCas;
- CandidateCas.resize(1);
-
- uint64_t DeletedCount = 0;
- uint64_t OldTotalSize = m_TotalSize.load(std::memory_order::relaxed);
-
- Stopwatch TotalTimer;
- const auto _ = MakeGuard([&] {
- ZEN_DEBUG("garbage collect for '{}' DONE after {}, deleted {} out of {} files, removed {} out of {}",
- m_RootDirectory,
- NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs()),
- DeletedCount,
- ChunkCount.load(),
- NiceBytes(OldTotalSize - m_TotalSize.load(std::memory_order::relaxed)),
- NiceBytes(OldTotalSize));
- });
-
- {
- ZEN_TRACE_CPU("FileCas::CollectGarbage::Filter");
- IterateChunks([&](const IoHash& Hash, uint64_t Size) {
- bool KeepThis = false;
- CandidateCas[0] = Hash;
- GcCtx.FilterCids(CandidateCas, [&](const IoHash& Hash) {
- ZEN_UNUSED(Hash);
- KeepThis = true;
- });
-
- if (!KeepThis)
- {
- ChunksToDelete.push_back(Hash);
- ChunksToDeleteBytes.fetch_add(Size);
- }
-
- ++ChunkCount;
- ChunkBytes.fetch_add(Size);
- });
- }
-
- // TODO, any entires we did not encounter during our IterateChunks should be removed from the index
-
- if (ChunksToDelete.empty())
- {
- ZEN_DEBUG("gc for '{}' SKIPPED, nothing to delete", m_RootDirectory);
- return;
- }
-
- ZEN_DEBUG("deleting file CAS garbage for '{}': {} out of {} chunks ({})",
- m_RootDirectory,
- ChunksToDelete.size(),
- ChunkCount.load(),
- NiceBytes(ChunksToDeleteBytes));
-
- if (GcCtx.IsDeletionMode() == false)
- {
- ZEN_DEBUG("NOTE: not actually deleting anything since deletion is disabled");
-
- return;
- }
-
- for (const IoHash& Hash : ChunksToDelete)
- {
- ZEN_TRACE("deleting chunk {}", Hash);
-
- std::error_code Ec;
- DeleteChunk(Hash, Ec);
-
- if (Ec)
- {
- ZEN_WARN("gc for '{}' failed to delete file for chunk {}: '{}'", m_RootDirectory, Hash, Ec.message());
- continue;
- }
- DeletedCount++;
- }
-
- GcCtx.AddDeletedCids(ChunksToDelete);
-}
-
GcStorageSize
FileCasStrategy::StorageSize() const
{
@@ -1834,81 +1739,6 @@ TEST_CASE("cas.file.move")
# endif
}
-TEST_CASE("cas.file.gc")
-{
- // specifying an absolute path here can be helpful when using procmon to dig into things
- ScopedTemporaryDirectory TempDir; // {"d:\\filecas_testdir"};
-
- GcManager Gc;
- FileCasStrategy FileCas(Gc);
- FileCas.Initialize(TempDir.Path() / "cas", /* IsNewStore */ true);
-
- const int kIterationCount = 100;
- std::vector<IoHash> Keys{kIterationCount};
-
- auto InsertChunks = [&] {
- for (int i = 0; i < kIterationCount; ++i)
- {
- CbObjectWriter Cbo;
- Cbo << "id" << i;
- CbObject Obj = Cbo.Save();
-
- IoBuffer ObjBuffer = Obj.GetBuffer().AsIoBuffer();
- IoHash Hash = IoHash::HashBuffer(ObjBuffer);
-
- FileCas.InsertChunk(ObjBuffer, Hash);
-
- Keys[i] = Hash;
- }
- };
-
- // Drop everything
-
- {
- InsertChunks();
-
- GcContext Ctx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
- FileCas.CollectGarbage(Ctx);
-
- for (const IoHash& Key : Keys)
- {
- IoBuffer Chunk = FileCas.FindChunk(Key);
-
- CHECK(!Chunk);
- }
- }
-
- // Keep roughly half of the chunks
-
- {
- InsertChunks();
-
- GcContext Ctx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
-
- for (const IoHash& Key : Keys)
- {
- if (Key.Hash[0] & 1)
- {
- Ctx.AddRetainedCids(std::vector<IoHash>{Key});
- }
- }
-
- FileCas.CollectGarbage(Ctx);
-
- for (const IoHash& Key : Keys)
- {
- if (Key.Hash[0] & 1)
- {
- CHECK(FileCas.FindChunk(Key));
- }
- else
- {
- CHECK(!FileCas.FindChunk(Key));
- }
- }
- }
-}
-
#endif
void
diff --git a/src/zenstore/filecas.h b/src/zenstore/filecas.h
index 07fc36954..ff7126325 100644
--- a/src/zenstore/filecas.h
+++ b/src/zenstore/filecas.h
@@ -44,7 +44,6 @@ struct FileCasStrategy final : public GcStorage, public GcReferenceStore
WorkerThreadPool* OptionalWorkerPool);
void Flush();
virtual void ScrubStorage(ScrubContext& ScrubCtx) override;
- virtual void CollectGarbage(GcContext& GcCtx) override;
virtual GcStorageSize StorageSize() const override;
virtual std::string GetGcName(GcCtx& Ctx) override;
diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp
index 1276b9b4c..981ba15cb 100644
--- a/src/zenstore/gc.cpp
+++ b/src/zenstore/gc.cpp
@@ -196,152 +196,6 @@ SaveCompactBinaryObject(const fs::path& Path, const CbObject& Object)
//////////////////////////////////////////////////////////////////////////
-struct GcContext::GcState
-{
- using CacheKeyContexts = std::unordered_map<std::string, std::vector<IoHash>>;
-
- CacheKeyContexts m_ExpiredCacheKeys;
- HashKeySet m_RetainedCids;
- HashKeySet m_DeletedCids;
- GcClock::TimePoint m_CacheExpireTime;
- GcClock::TimePoint m_ProjectStoreExpireTime;
- bool m_DeletionMode = true;
- bool m_CollectSmallObjects = false;
- bool m_SkipCid = false;
-
- std::filesystem::path DiskReservePath;
-};
-
-GcContext::GcContext(const GcClock::TimePoint& CacheExpireTime, const GcClock::TimePoint& ProjectStoreExpireTime)
-: m_State(std::make_unique<GcState>())
-{
- m_State->m_CacheExpireTime = CacheExpireTime;
- m_State->m_ProjectStoreExpireTime = ProjectStoreExpireTime;
-}
-
-GcContext::~GcContext()
-{
-}
-
-void
-GcContext::AddRetainedCids(std::span<const IoHash> Cids)
-{
- m_State->m_RetainedCids.AddHashesToSet(Cids);
-}
-
-void
-GcContext::SetExpiredCacheKeys(const std::string& CacheKeyContext, std::vector<IoHash>&& ExpiredKeys)
-{
- m_State->m_ExpiredCacheKeys[CacheKeyContext] = std::move(ExpiredKeys);
-}
-
-void
-GcContext::IterateCids(std::function<void(const IoHash&)> Callback)
-{
- m_State->m_RetainedCids.IterateHashes([&](const IoHash& Hash) { Callback(Hash); });
-}
-
-void
-GcContext::FilterCids(std::span<const IoHash> Cid, std::function<void(const IoHash&)> KeepFunc)
-{
- m_State->m_RetainedCids.FilterHashes(Cid, [&](const IoHash& Hash) { KeepFunc(Hash); });
-}
-
-void
-GcContext::FilterCids(std::span<const IoHash> Cid, std::function<void(const IoHash&, bool)>&& FilterFunc)
-{
- m_State->m_RetainedCids.FilterHashes(Cid, std::move(FilterFunc));
-}
-
-void
-GcContext::AddDeletedCids(std::span<const IoHash> Cas)
-{
- m_State->m_DeletedCids.AddHashesToSet(Cas);
-}
-
-const HashKeySet&
-GcContext::DeletedCids()
-{
- return m_State->m_DeletedCids;
-}
-
-std::span<const IoHash>
-GcContext::ExpiredCacheKeys(const std::string& CacheKeyContext) const
-{
- return m_State->m_ExpiredCacheKeys[CacheKeyContext];
-}
-
-bool
-GcContext::SkipCid() const
-{
- return m_State->m_SkipCid;
-}
-
-void
-GcContext::SetSkipCid(bool NewState)
-{
- m_State->m_SkipCid = NewState;
-}
-
-bool
-GcContext::IsDeletionMode() const
-{
- return m_State->m_DeletionMode;
-}
-
-void
-GcContext::SetDeletionMode(bool NewState)
-{
- m_State->m_DeletionMode = NewState;
-}
-
-bool
-GcContext::CollectSmallObjects() const
-{
- return m_State->m_CollectSmallObjects;
-}
-
-void
-GcContext::CollectSmallObjects(bool NewState)
-{
- m_State->m_CollectSmallObjects = NewState;
-}
-
-GcClock::TimePoint
-GcContext::CacheExpireTime() const
-{
- return m_State->m_CacheExpireTime;
-}
-
-GcClock::TimePoint
-GcContext::ProjectStoreExpireTime() const
-{
- return m_State->m_ProjectStoreExpireTime;
-}
-
-void
-GcContext::DiskReservePath(const std::filesystem::path& Path)
-{
- m_State->DiskReservePath = Path;
-}
-
-uint64_t
-GcContext::ClaimGCReserve()
-{
- if (!std::filesystem::is_regular_file(m_State->DiskReservePath))
- {
- return 0;
- }
- uint64_t ReclaimedSize = std::filesystem::file_size(m_State->DiskReservePath);
- if (std::filesystem::remove(m_State->DiskReservePath))
- {
- return ReclaimedSize;
- }
- return 0;
-}
-
-//////////////////////////////////////////////////////////////////////////
-
GcManager::GcManager() : m_Log(logging::Get("gc"))
{
}
@@ -1287,20 +1141,6 @@ GcManager::SetCancelGC(bool CancelFlag)
}
void
-GcManager::AddGcContributor(GcContributor* Contributor)
-{
- RwLock::ExclusiveLockScope _(m_Lock);
- m_GcContribs.push_back(Contributor);
-}
-
-void
-GcManager::RemoveGcContributor(GcContributor* Contributor)
-{
- RwLock::ExclusiveLockScope _(m_Lock);
- std::erase_if(m_GcContribs, [&](GcContributor* $) { return $ == Contributor; });
-}
-
-void
GcManager::AddGcStorage(GcStorage* Storage)
{
ZEN_ASSERT(Storage != nullptr);
@@ -1327,58 +1167,6 @@ GcManager::ScrubStorage(ScrubContext& GcCtx)
}
GcStorageSize
-GcManager::CollectGarbage(GcContext& GcCtx)
-{
- ZEN_TRACE_CPU("Gc::CollectGarbage");
-
- GcStorageSize GCTotalSizeDiff;
-
- RwLock::SharedLockScope _(m_Lock);
-
- // First gather reference set
- {
- ZEN_TRACE_CPU("Gc::CollectGarbage::GatherReferences");
- Stopwatch Timer;
- const auto Guard = MakeGuard([&] { ZEN_INFO("gathered references in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); });
- for (GcContributor* Contributor : m_GcContribs)
- {
- if (CheckGCCancel())
- {
- return GCTotalSizeDiff;
- }
- Contributor->GatherReferences(GcCtx);
- }
- }
-
- // Then trim storage
- {
- ZEN_TRACE_CPU("Gc::CollectGarbage::CollectGarbage");
-
- Stopwatch Timer;
- const auto Guard = MakeGuard([&] {
- ZEN_INFO("collected garbage in {}. Removed {} disk space, {} memory",
- NiceTimeSpanMs(Timer.GetElapsedTimeMs()),
- NiceBytes(GCTotalSizeDiff.DiskSize),
- NiceBytes(GCTotalSizeDiff.MemorySize));
- });
- for (GcStorage* Storage : m_GcStorage)
- {
- if (CheckGCCancel())
- {
- break;
- }
-
- const auto PreSize = Storage->StorageSize();
- Storage->CollectGarbage(GcCtx);
- const auto PostSize = Storage->StorageSize();
- GCTotalSizeDiff.DiskSize += PreSize.DiskSize > PostSize.DiskSize ? PreSize.DiskSize - PostSize.DiskSize : 0;
- GCTotalSizeDiff.MemorySize += PreSize.MemorySize > PostSize.MemorySize ? PreSize.MemorySize - PostSize.MemorySize : 0;
- }
- }
- return GCTotalSizeDiff;
-}
-
-GcStorageSize
GcManager::TotalStorageSize() const
{
ZEN_TRACE_CPU("Gc::TotalStorageSize");
@@ -2319,14 +2107,10 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
try
{
- GcContext GcCtx(CacheExpireTime, ProjectStoreExpireTime);
- GcCtx.SetDeletionMode(Delete);
- GcCtx.SetSkipCid(SkipCid);
- GcCtx.CollectSmallObjects(CollectSmallObjects);
- GcCtx.DiskReservePath(m_Config.RootDirectory / "reserve.gc");
+ const std::filesystem::path DiskReservePath = m_Config.RootDirectory / "reserve.gc";
auto ReclaimDiskReserve = [&]() {
- std::error_code Ec = CreateGCReserve(m_Config.RootDirectory / "reserve.gc", m_Config.DiskReserveSize);
+ std::error_code Ec = CreateGCReserve(DiskReservePath, m_Config.DiskReserveSize);
if (Ec)
{
ZEN_WARN("unable to create GC reserve at '{}' with size {}, reason: '{}'",
@@ -2344,7 +2128,20 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
if (m_AreDiskWritesBlocked.load())
{
// We are low on disk, check if we can release our extra storage reserve, if we can't bail from doing GC
- uint64_t ReleasedSpace = GcCtx.ClaimGCReserve();
+ auto ClaimDiskReserve = [&]() -> uint64_t {
+ if (!std::filesystem::is_regular_file(DiskReservePath))
+ {
+ return 0;
+ }
+ uint64_t ReclaimedSize = std::filesystem::file_size(DiskReservePath);
+ if (std::filesystem::remove(DiskReservePath))
+ {
+ return ReclaimedSize;
+ }
+ return 0;
+ };
+
+ uint64_t ReleasedSpace = ClaimDiskReserve();
if (ReleasedSpace == 0)
{
ZEN_WARN(
@@ -2364,23 +2161,8 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
GcStorageSize Diff;
switch (UseGCVersion)
{
- case GcVersion::kV1:
- ZEN_INFO(
- "GCV1: Garbage collection STARTING, small objects gc {}, {} CAS. Cache cutoff time {}, project store cutoff time "
- "{}",
- GcCtx.CollectSmallObjects() ? "ENABLED"sv : "DISABLED"sv,
- SkipCid ? "skip"sv : "include"sv,
- CacheExpireTime,
- ProjectStoreExpireTime);
- Diff = m_GcManager.CollectGarbage(GcCtx);
- if (SkipCid)
- {
- m_LastLightweightGCV2Result.reset();
- }
- else
- {
- m_LastFullGCV2Result.reset();
- }
+ case GcVersion::kV1_Deprecated:
+ ZEN_WARN("GCV1: Depreated - no GC will be executed");
break;
case GcVersion::kV2:
{
@@ -2394,7 +2176,7 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
.Verbose = Verbose,
.SingleThread = SingleThreaded,
.CompactBlockUsageThresholdPercent = CompactBlockUsageThresholdPercent,
- .DiskReservePath = m_Config.RootDirectory / "reserve.gc",
+ .DiskReservePath = DiskReservePath,
.AttachmentRangeMin = AttachmentRangeMin,
.AttachmentRangeMax = AttachmentRangeMax,
.StoreCacheAttachmentMetaData = StoreCacheAttachmentMetaData,
@@ -2609,248 +2391,8 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
#if ZEN_WITH_TESTS
-namespace gc::impl {
- static CompressedBuffer Compress(IoBuffer Buffer)
- {
- return CompressedBuffer::Compress(SharedBuffer::MakeView(Buffer.GetData(), Buffer.GetSize()));
- }
-} // namespace gc::impl
-
-TEST_CASE("gc.basic")
-{
- using namespace gc::impl;
-
- ScopedTemporaryDirectory TempDir;
-
- CidStoreConfiguration CasConfig;
- CasConfig.RootDirectory = TempDir.Path() / "cas";
-
- GcManager Gc;
- CidStore CidStore(Gc);
-
- CidStore.Initialize(CasConfig);
-
- IoBuffer Chunk = CreateRandomBlob(128);
- auto CompressedChunk = Compress(Chunk);
-
- const auto InsertResult = CidStore.AddChunk(CompressedChunk.GetCompressed().Flatten().AsIoBuffer(), CompressedChunk.DecodeRawHash());
- CHECK(InsertResult.New);
-
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
- GcCtx.CollectSmallObjects(true);
-
- CidStore.Flush();
- Gc.CollectGarbage(GcCtx);
-
- CHECK(!CidStore.ContainsChunk(CompressedChunk.DecodeRawHash()));
-}
-
-TEST_CASE("gc.full")
-{
- using namespace gc::impl;
-
- ScopedTemporaryDirectory TempDir;
-
- CidStoreConfiguration CasConfig;
- CasConfig.RootDirectory = TempDir.Path() / "cas";
-
- GcManager Gc;
- std::unique_ptr<CasStore> CasStore = CreateCasStore(Gc);
-
- CasStore->Initialize(CasConfig);
-
- uint64_t ChunkSizes[9] = {128, 541, 1023, 781, 218, 37, 4, 997, 5};
- IoBuffer Chunks[9] = {CreateRandomBlob(ChunkSizes[0]),
- CreateRandomBlob(ChunkSizes[1]),
- CreateRandomBlob(ChunkSizes[2]),
- CreateRandomBlob(ChunkSizes[3]),
- CreateRandomBlob(ChunkSizes[4]),
- CreateRandomBlob(ChunkSizes[5]),
- CreateRandomBlob(ChunkSizes[6]),
- CreateRandomBlob(ChunkSizes[7]),
- CreateRandomBlob(ChunkSizes[8])};
- IoHash ChunkHashes[9] = {
- IoHash::HashBuffer(Chunks[0].Data(), Chunks[0].Size()),
- IoHash::HashBuffer(Chunks[1].Data(), Chunks[1].Size()),
- IoHash::HashBuffer(Chunks[2].Data(), Chunks[2].Size()),
- IoHash::HashBuffer(Chunks[3].Data(), Chunks[3].Size()),
- IoHash::HashBuffer(Chunks[4].Data(), Chunks[4].Size()),
- IoHash::HashBuffer(Chunks[5].Data(), Chunks[5].Size()),
- IoHash::HashBuffer(Chunks[6].Data(), Chunks[6].Size()),
- IoHash::HashBuffer(Chunks[7].Data(), Chunks[7].Size()),
- IoHash::HashBuffer(Chunks[8].Data(), Chunks[8].Size()),
- };
-
- CasStore->InsertChunk(Chunks[0], ChunkHashes[0]);
- CasStore->InsertChunk(Chunks[1], ChunkHashes[1]);
- CasStore->InsertChunk(Chunks[2], ChunkHashes[2]);
- CasStore->InsertChunk(Chunks[3], ChunkHashes[3]);
- CasStore->InsertChunk(Chunks[4], ChunkHashes[4]);
- CasStore->InsertChunk(Chunks[5], ChunkHashes[5]);
- CasStore->InsertChunk(Chunks[6], ChunkHashes[6]);
- CasStore->InsertChunk(Chunks[7], ChunkHashes[7]);
- CasStore->InsertChunk(Chunks[8], ChunkHashes[8]);
-
- CidStoreSize InitialSize = CasStore->TotalSize();
-
- // Keep first and last
- {
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
- GcCtx.CollectSmallObjects(true);
-
- std::vector<IoHash> KeepChunks;
- KeepChunks.push_back(ChunkHashes[0]);
- KeepChunks.push_back(ChunkHashes[8]);
- GcCtx.AddRetainedCids(KeepChunks);
-
- CasStore->Flush();
- Gc.CollectGarbage(GcCtx);
-
- CHECK(CasStore->ContainsChunk(ChunkHashes[0]));
- CHECK(!CasStore->ContainsChunk(ChunkHashes[1]));
- CHECK(!CasStore->ContainsChunk(ChunkHashes[2]));
- CHECK(!CasStore->ContainsChunk(ChunkHashes[3]));
- CHECK(!CasStore->ContainsChunk(ChunkHashes[4]));
- CHECK(!CasStore->ContainsChunk(ChunkHashes[5]));
- CHECK(!CasStore->ContainsChunk(ChunkHashes[6]));
- CHECK(!CasStore->ContainsChunk(ChunkHashes[7]));
- CHECK(CasStore->ContainsChunk(ChunkHashes[8]));
-
- CHECK(ChunkHashes[0] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[0])));
- CHECK(ChunkHashes[8] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[8])));
- }
-
- CasStore->InsertChunk(Chunks[1], ChunkHashes[1]);
- CasStore->InsertChunk(Chunks[2], ChunkHashes[2]);
- CasStore->InsertChunk(Chunks[3], ChunkHashes[3]);
- CasStore->InsertChunk(Chunks[4], ChunkHashes[4]);
- CasStore->InsertChunk(Chunks[5], ChunkHashes[5]);
- CasStore->InsertChunk(Chunks[6], ChunkHashes[6]);
- CasStore->InsertChunk(Chunks[7], ChunkHashes[7]);
-
- // Keep last
- {
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
- GcCtx.CollectSmallObjects(true);
- std::vector<IoHash> KeepChunks;
- KeepChunks.push_back(ChunkHashes[8]);
- GcCtx.AddRetainedCids(KeepChunks);
-
- CasStore->Flush();
- Gc.CollectGarbage(GcCtx);
-
- CHECK(!CasStore->ContainsChunk(ChunkHashes[0]));
- CHECK(!CasStore->ContainsChunk(ChunkHashes[1]));
- CHECK(!CasStore->ContainsChunk(ChunkHashes[2]));
- CHECK(!CasStore->ContainsChunk(ChunkHashes[3]));
- CHECK(!CasStore->ContainsChunk(ChunkHashes[4]));
- CHECK(!CasStore->ContainsChunk(ChunkHashes[5]));
- CHECK(!CasStore->ContainsChunk(ChunkHashes[6]));
- CHECK(!CasStore->ContainsChunk(ChunkHashes[7]));
- CHECK(CasStore->ContainsChunk(ChunkHashes[8]));
-
- CHECK(ChunkHashes[8] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[8])));
-
- CasStore->InsertChunk(Chunks[1], ChunkHashes[1]);
- CasStore->InsertChunk(Chunks[2], ChunkHashes[2]);
- CasStore->InsertChunk(Chunks[3], ChunkHashes[3]);
- CasStore->InsertChunk(Chunks[4], ChunkHashes[4]);
- CasStore->InsertChunk(Chunks[5], ChunkHashes[5]);
- CasStore->InsertChunk(Chunks[6], ChunkHashes[6]);
- CasStore->InsertChunk(Chunks[7], ChunkHashes[7]);
- }
-
- // Keep mixed
- {
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
- GcCtx.CollectSmallObjects(true);
- std::vector<IoHash> KeepChunks;
- KeepChunks.push_back(ChunkHashes[1]);
- KeepChunks.push_back(ChunkHashes[4]);
- KeepChunks.push_back(ChunkHashes[7]);
- GcCtx.AddRetainedCids(KeepChunks);
-
- CasStore->Flush();
- Gc.CollectGarbage(GcCtx);
-
- CHECK(!CasStore->ContainsChunk(ChunkHashes[0]));
- CHECK(CasStore->ContainsChunk(ChunkHashes[1]));
- CHECK(!CasStore->ContainsChunk(ChunkHashes[2]));
- CHECK(!CasStore->ContainsChunk(ChunkHashes[3]));
- CHECK(CasStore->ContainsChunk(ChunkHashes[4]));
- CHECK(!CasStore->ContainsChunk(ChunkHashes[5]));
- CHECK(!CasStore->ContainsChunk(ChunkHashes[6]));
- CHECK(CasStore->ContainsChunk(ChunkHashes[7]));
- CHECK(!CasStore->ContainsChunk(ChunkHashes[8]));
-
- CHECK(ChunkHashes[1] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[1])));
- CHECK(ChunkHashes[4] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[4])));
- CHECK(ChunkHashes[7] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[7])));
-
- CasStore->InsertChunk(Chunks[0], ChunkHashes[0]);
- CasStore->InsertChunk(Chunks[2], ChunkHashes[2]);
- CasStore->InsertChunk(Chunks[3], ChunkHashes[3]);
- CasStore->InsertChunk(Chunks[5], ChunkHashes[5]);
- CasStore->InsertChunk(Chunks[6], ChunkHashes[6]);
- CasStore->InsertChunk(Chunks[8], ChunkHashes[8]);
- }
-
- // Keep multiple at end
- {
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
- GcCtx.CollectSmallObjects(true);
- std::vector<IoHash> KeepChunks;
- KeepChunks.push_back(ChunkHashes[6]);
- KeepChunks.push_back(ChunkHashes[7]);
- KeepChunks.push_back(ChunkHashes[8]);
- GcCtx.AddRetainedCids(KeepChunks);
-
- CasStore->Flush();
- Gc.CollectGarbage(GcCtx);
-
- CHECK(!CasStore->ContainsChunk(ChunkHashes[0]));
- CHECK(!CasStore->ContainsChunk(ChunkHashes[1]));
- CHECK(!CasStore->ContainsChunk(ChunkHashes[2]));
- CHECK(!CasStore->ContainsChunk(ChunkHashes[3]));
- CHECK(!CasStore->ContainsChunk(ChunkHashes[4]));
- CHECK(!CasStore->ContainsChunk(ChunkHashes[5]));
- CHECK(CasStore->ContainsChunk(ChunkHashes[6]));
- CHECK(CasStore->ContainsChunk(ChunkHashes[7]));
- CHECK(CasStore->ContainsChunk(ChunkHashes[8]));
-
- CHECK(ChunkHashes[6] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[6])));
- CHECK(ChunkHashes[7] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[7])));
- CHECK(ChunkHashes[8] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[8])));
-
- CasStore->InsertChunk(Chunks[0], ChunkHashes[0]);
- CasStore->InsertChunk(Chunks[1], ChunkHashes[1]);
- CasStore->InsertChunk(Chunks[2], ChunkHashes[2]);
- CasStore->InsertChunk(Chunks[3], ChunkHashes[3]);
- CasStore->InsertChunk(Chunks[4], ChunkHashes[4]);
- CasStore->InsertChunk(Chunks[5], ChunkHashes[5]);
- }
-
- // Verify that we nicely appended blocks even after all GC operations
- CHECK(ChunkHashes[0] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[0])));
- CHECK(ChunkHashes[1] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[1])));
- CHECK(ChunkHashes[2] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[2])));
- CHECK(ChunkHashes[3] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[3])));
- CHECK(ChunkHashes[4] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[4])));
- CHECK(ChunkHashes[5] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[5])));
- CHECK(ChunkHashes[6] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[6])));
- CHECK(ChunkHashes[7] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[7])));
- CHECK(ChunkHashes[8] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[8])));
-
- auto FinalSize = CasStore->TotalSize();
-
- CHECK_LE(InitialSize.TinySize, FinalSize.TinySize);
- CHECK_GE(InitialSize.TinySize + (1u << 28), FinalSize.TinySize);
-}
-
TEST_CASE("gc.diskusagewindow")
{
- using namespace gc::impl;
-
DiskUsageWindow Stats;
Stats.Append({.SampleTime = 0, .DiskUsage = 0}); // 0 0
Stats.Append({.SampleTime = 10, .DiskUsage = 10}); // 1 10
@@ -3008,35 +2550,6 @@ TEST_CASE("gc.diskusagewindow")
}
}
-TEST_CASE("scrub.basic")
-{
- using namespace gc::impl;
-
- ScopedTemporaryDirectory TempDir;
-
- CidStoreConfiguration CasConfig;
- CasConfig.RootDirectory = TempDir.Path() / "cas";
-
- GcManager Gc;
- CidStore CidStore(Gc);
-
- CidStore.Initialize(CasConfig);
-
- IoBuffer Chunk = CreateRandomBlob(128);
- auto CompressedChunk = Compress(Chunk);
-
- const auto InsertResult = CidStore.AddChunk(CompressedChunk.GetCompressed().Flatten().AsIoBuffer(), CompressedChunk.DecodeRawHash());
- CHECK(InsertResult.New);
-
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
- GcCtx.CollectSmallObjects(true);
-
- CidStore.Flush();
- Gc.CollectGarbage(GcCtx);
-
- CHECK(!CidStore.ContainsChunk(CompressedChunk.DecodeRawHash()));
-}
-
TEST_CASE("gc.keepunusedreferences")
{
// Order is important, this is the order the hashes would be sorted by FilterReferences
diff --git a/src/zenstore/include/zenstore/blockstore.h b/src/zenstore/include/zenstore/blockstore.h
index a98ca5375..d4c2be73f 100644
--- a/src/zenstore/include/zenstore/blockstore.h
+++ b/src/zenstore/include/zenstore/blockstore.h
@@ -127,13 +127,12 @@ public:
typedef std::vector<std::pair<size_t, BlockStoreLocation>> MovedChunksArray;
typedef std::vector<size_t> ChunkIndexArray;
- typedef std::function<void(const MovedChunksArray& MovedChunks, const ChunkIndexArray& RemovedChunks)> ReclaimCallback;
- typedef std::function<bool(const MovedChunksArray& MovedChunks, uint64_t FreedDiskSpace)> CompactCallback;
- typedef std::function<uint64_t()> ClaimDiskReserveCallback;
- typedef std::function<bool(size_t ChunkIndex, const void* Data, uint64_t Size)> IterateChunksSmallSizeCallback;
- typedef std::function<bool(size_t ChunkIndex, BlockStoreFile& File, uint64_t Offset, uint64_t Size)> IterateChunksLargeSizeCallback;
- typedef std::function<void(const BlockStoreLocation& Location)> WriteChunkCallback;
- typedef std::function<bool(uint32_t BlockIndex, std::span<const size_t> ChunkIndexes)> IterateChunksCallback;
+ typedef std::function<bool(const MovedChunksArray& MovedChunks, uint64_t FreedDiskSpace)> CompactCallback;
+ typedef std::function<uint64_t()> ClaimDiskReserveCallback;
+ typedef std::function<bool(size_t ChunkIndex, const void* Data, uint64_t Size)> IterateChunksSmallSizeCallback;
+ typedef std::function<bool(size_t ChunkIndex, BlockStoreFile& File, uint64_t Offset, uint64_t Size)> IterateChunksLargeSizeCallback;
+ typedef std::function<void(const BlockStoreLocation& Location)> WriteChunkCallback;
+ typedef std::function<bool(uint32_t BlockIndex, std::span<const size_t> ChunkIndexes)> IterateChunksCallback;
struct BlockUsageInfo
{
@@ -162,16 +161,6 @@ public:
IoBuffer TryGetChunk(const BlockStoreLocation& Location) const;
void Flush(bool ForceNewBlock);
- ReclaimSnapshotState GetReclaimSnapshotState();
- void ReclaimSpace(
- const ReclaimSnapshotState& Snapshot,
- const std::vector<BlockStoreLocation>& ChunkLocations,
- const ChunkIndexArray& KeepChunkIndexes,
- uint32_t PayloadAlignment,
- bool DryRun,
- const ReclaimCallback& ChangeCallback = [](const MovedChunksArray&, const ChunkIndexArray&) {},
- const ClaimDiskReserveCallback& DiskReserveCallback = []() { return 0; });
-
bool IterateChunks(const std::span<const BlockStoreLocation>& ChunkLocations, const IterateChunksCallback& Callback);
bool IterateBlock(std::span<const BlockStoreLocation> ChunkLocations,
diff --git a/src/zenstore/include/zenstore/cache/cachedisklayer.h b/src/zenstore/include/zenstore/cache/cachedisklayer.h
index f38776b6e..f8ce8641c 100644
--- a/src/zenstore/include/zenstore/cache/cachedisklayer.h
+++ b/src/zenstore/include/zenstore/cache/cachedisklayer.h
@@ -183,8 +183,6 @@ public:
bool DropBucket(std::string_view Bucket);
void Flush();
void ScrubStorage(ScrubContext& Ctx);
- void GatherReferences(GcContext& GcCtx);
- void CollectGarbage(GcContext& GcCtx);
void DiscoverBuckets();
GcStorageSize StorageSize() const;
@@ -231,8 +229,6 @@ public:
bool Drop();
void Flush();
void ScrubStorage(ScrubContext& Ctx);
- void GatherReferences(GcContext& GcCtx);
- void CollectGarbage(GcContext& GcCtx);
RwLock::SharedLockScope GetGcReferencerLock();
bool GetReferences(GcCtx& Ctx, bool StateIsAlreadyLocked, std::vector<IoHash>& OutReferences);
diff --git a/src/zenstore/include/zenstore/cache/structuredcachestore.h b/src/zenstore/include/zenstore/cache/structuredcachestore.h
index 9160db667..50e40042a 100644
--- a/src/zenstore/include/zenstore/cache/structuredcachestore.h
+++ b/src/zenstore/include/zenstore/cache/structuredcachestore.h
@@ -49,7 +49,7 @@ class JobQueue;
*/
-class ZenCacheNamespace final : public GcStorage, public GcContributor
+class ZenCacheNamespace final : public GcStorage
{
public:
struct Configuration
@@ -104,12 +104,8 @@ public:
bool Drop();
void Flush();
- // GcContributor
- virtual void GatherReferences(GcContext& GcCtx) override;
-
// GcStorage
virtual void ScrubStorage(ScrubContext& ScrubCtx) override;
- virtual void CollectGarbage(GcContext& GcCtx) override;
virtual GcStorageSize StorageSize() const override;
Configuration GetConfig() const { return m_Configuration; }
diff --git a/src/zenstore/include/zenstore/gc.h b/src/zenstore/include/zenstore/gc.h
index b79f1b9df..e191a0930 100644
--- a/src/zenstore/include/zenstore/gc.h
+++ b/src/zenstore/include/zenstore/gc.h
@@ -283,64 +283,6 @@ public:
//////// End GC V2
-/** Garbage Collection context object
- */
-class GcContext
-{
-public:
- GcContext(const GcClock::TimePoint& CacheExpireTime, const GcClock::TimePoint& ProjectStoreExpireTime);
- ~GcContext();
-
- void AddRetainedCids(std::span<const IoHash> Cid);
- void SetExpiredCacheKeys(const std::string& CacheKeyContext, std::vector<IoHash>&& ExpiredKeys);
-
- void IterateCids(std::function<void(const IoHash&)> Callback);
-
- void FilterCids(std::span<const IoHash> Cid, std::function<void(const IoHash&)> KeepFunc);
- void FilterCids(std::span<const IoHash> Cid, std::function<void(const IoHash&, bool)>&& FilterFunc);
-
- void AddDeletedCids(std::span<const IoHash> Cas);
- const HashKeySet& DeletedCids();
-
- std::span<const IoHash> ExpiredCacheKeys(const std::string& CacheKeyContext) const;
-
- bool SkipCid() const;
- void SetSkipCid(bool NewState);
-
- bool IsDeletionMode() const;
- void SetDeletionMode(bool NewState);
-
- bool CollectSmallObjects() const;
- void CollectSmallObjects(bool NewState);
-
- GcClock::TimePoint CacheExpireTime() const;
- GcClock::TimePoint ProjectStoreExpireTime() const;
-
- void DiskReservePath(const std::filesystem::path& Path);
- uint64_t ClaimGCReserve();
-
-private:
- struct GcState;
-
- std::unique_ptr<GcState> m_State;
-};
-
-/** GC root contributor
-
- Higher level data structures provide roots for the garbage collector,
- which ultimately determine what is garbage and what data we need to
- retain.
-
- */
-class GcContributor
-{
-public:
- virtual void GatherReferences(GcContext& GcCtx) = 0;
-
-protected:
- virtual ~GcContributor() {}
-};
-
struct GcStorageSize
{
uint64_t DiskSize{};
@@ -353,7 +295,6 @@ class GcStorage
{
public:
virtual void ScrubStorage(ScrubContext& ScrubCtx) = 0;
- virtual void CollectGarbage(GcContext& GcCtx) = 0;
virtual GcStorageSize StorageSize() const = 0;
protected:
@@ -392,14 +333,10 @@ public:
//////// End GC V2
- void AddGcContributor(GcContributor* Contributor);
- void RemoveGcContributor(GcContributor* Contributor);
-
void AddGcStorage(GcStorage* Contributor);
void RemoveGcStorage(GcStorage* Contributor);
- GcStorageSize CollectGarbage(GcContext& GcCtx);
- void ScrubStorage(ScrubContext& GcCtx);
+ void ScrubStorage(ScrubContext& GcCtx);
GcStorageSize TotalStorageSize() const;
@@ -407,14 +344,13 @@ public:
void SetDiskWriteBlocker(const DiskWriteBlocker* Monitor) { m_DiskWriteBlocker = Monitor; }
private:
- bool CheckGCCancel() { return m_CancelGC.load(); }
- LoggerRef Log() { return m_Log; }
- LoggerRef m_Log;
- mutable RwLock m_Lock;
- std::vector<GcContributor*> m_GcContribs;
- std::vector<GcStorage*> m_GcStorage;
- CidStore* m_CidStore = nullptr;
- const DiskWriteBlocker* m_DiskWriteBlocker = nullptr;
+ bool CheckGCCancel() { return m_CancelGC.load(); }
+ LoggerRef Log() { return m_Log; }
+ LoggerRef m_Log;
+ mutable RwLock m_Lock;
+ std::vector<GcStorage*> m_GcStorage;
+ CidStore* m_CidStore = nullptr;
+ const DiskWriteBlocker* m_DiskWriteBlocker = nullptr;
std::vector<GcReferencer*> m_GcReferencers;
std::vector<GcReferenceLocker*> m_GcReferencerLockers;
@@ -432,7 +368,7 @@ enum class GcSchedulerStatus : uint32_t
enum class GcVersion : uint32_t
{
- kV1,
+ kV1_Deprecated,
kV2
};
@@ -449,7 +385,7 @@ struct GcSchedulerConfig
uint64_t DiskSizeSoftLimit = 0;
uint64_t MinimumFreeDiskSpaceToAllowWrites = 1ul << 28;
std::chrono::seconds LightweightInterval{};
- GcVersion UseGCVersion = GcVersion::kV1;
+ GcVersion UseGCVersion = GcVersion::kV2;
uint32_t CompactBlockUsageThresholdPercent = 90;
bool Verbose = false;
bool SingleThreaded = false;