aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore/filecas.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2023-11-21 15:06:25 +0100
committerGitHub <[email protected]>2023-11-21 15:06:25 +0100
commit05178f7c18a48b21b9e260de282a86b91df26955 (patch)
tree25f77af287730c6dbe8d655e0cb503f2652cbd36 /src/zenstore/filecas.cpp
parentzen run command (#552) (diff)
downloadzen-05178f7c18a48b21b9e260de282a86b91df26955.tar.xz
zen-05178f7c18a48b21b9e260de282a86b91df26955.zip
compact separate for gc referencer (#533)
- Refactor GCV2 so GcReferencer::RemoveExpiredData returns a store compactor, moving out the actual disk work from deleting items in the index. - Refactor GCV2 GcResult to reuse GcCompactStoreStats and GcStats - Make Compacting of stores non-parallell to not eat all the disk I/O when running GC
Diffstat (limited to 'src/zenstore/filecas.cpp')
-rw-r--r--src/zenstore/filecas.cpp113
1 files changed, 61 insertions, 52 deletions
diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp
index a72619e4b..6e432bc9d 100644
--- a/src/zenstore/filecas.cpp
+++ b/src/zenstore/filecas.cpp
@@ -1331,35 +1331,31 @@ FileCasStrategy::ScanFolderForCasFiles(const std::filesystem::path& RootDir)
return Entries;
};
-class FileCasStoreCompactor : public GcReferenceStoreCompactor
+class FileCasStoreCompactor : public GcStoreCompactor
{
public:
FileCasStoreCompactor(FileCasStrategy& Owner, std::vector<IoHash>&& ReferencesToClean)
: m_FileCasStrategy(Owner)
, m_ReferencesToClean(std::move(ReferencesToClean))
{
+ m_ReferencesToClean.shrink_to_fit();
}
- virtual void CompactReferenceStore(GcCtx& Ctx, GcReferenceStoreStats& Stats)
+ virtual void CompactStore(GcCtx& Ctx, GcCompactStoreStats& Stats, const std::function<uint64_t()>&)
{
- Stopwatch Timer;
- const auto _ = MakeGuard([&] {
- if (!Ctx.Settings.Verbose)
- {
- return;
- }
- ZEN_INFO("GCV2: filecas [COMPACT] '{}': Count: {}, Pruned: {}, Compacted: {}, RemovedDisk: {}, RemovedMemory: {} in {}",
- m_FileCasStrategy.m_RootDirectory,
- Stats.Count,
- Stats.Pruned,
- Stats.Compacted,
- NiceBytes(Stats.RemovedDisk),
- NiceBytes(Stats.RemovedMemory),
- NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
- });
- std::vector<IoHash> ReferencedCleaned;
- ReferencedCleaned.reserve(m_ReferencesToClean.size());
+ Stopwatch Timer;
+ const auto _ = MakeGuard([&] {
+ if (!Ctx.Settings.Verbose)
+ {
+ return;
+ }
+ ZEN_INFO("GCV2: filecas [COMPACT] '{}': RemovedDisk: {} in {}",
+ m_FileCasStrategy.m_RootDirectory,
+ NiceBytes(Stats.RemovedDisk),
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ });
+ size_t Skipped = 0;
for (const IoHash& ChunkHash : m_ReferencesToClean)
{
FileCasStrategy::ShardingHelper Name(m_FileCasStrategy.m_RootDirectory.c_str(), ChunkHash);
@@ -1372,7 +1368,9 @@ public:
}
if (Ctx.Settings.IsDeleteMode)
{
- ZEN_DEBUG("deleting CAS payload file '{}'", Name.ShardedPath.ToUtf8());
+ ZEN_DEBUG("GCV2: filecas [COMPACT] '{}': Deleting CAS payload file '{}'",
+ m_FileCasStrategy.m_RootDirectory,
+ Name.ShardedPath.ToUtf8());
std::error_code Ec;
uint64_t SizeOnDisk = std::filesystem::file_size(Name.ShardedPath.c_str(), Ec);
if (Ec)
@@ -1382,7 +1380,10 @@ public:
bool Existed = std::filesystem::remove(Name.ShardedPath.c_str(), Ec);
if (Ec)
{
- ZEN_WARN("failed deleting CAS payload file '{}'. Reason '{}'", Name.ShardedPath.ToUtf8(), Ec.message());
+ ZEN_WARN("GCV2: filecas [COMPACT] '{}': Failed deleting CAS payload file '{}'. Reason '{}'",
+ m_FileCasStrategy.m_RootDirectory,
+ Name.ShardedPath.ToUtf8(),
+ Ec.message());
continue;
}
if (!Existed)
@@ -1397,18 +1398,28 @@ public:
bool Existed = std::filesystem::is_regular_file(Name.ShardedPath.c_str(), Ec);
if (Ec)
{
- ZEN_WARN("failed checking CAS payload file '{}'. Reason '{}'", Name.ShardedPath.ToUtf8(), Ec.message());
+ ZEN_WARN("GCV2: filecas [COMPACT] '{}': Failed checking CAS payload file '{}'. Reason '{}'",
+ m_FileCasStrategy.m_RootDirectory,
+ Name.ShardedPath.ToUtf8(),
+ Ec.message());
continue;
}
if (!Existed)
{
continue;
}
+ Skipped++;
}
- ReferencedCleaned.push_back(ChunkHash);
}
}
- Stats.Compacted += ReferencedCleaned.size();
+
+ if (Skipped > 0)
+ {
+ ZEN_DEBUG("GCV2: filecas [COMPACT] '{}': Skipped deleting of {} eligible files", m_FileCasStrategy.m_RootDirectory, Skipped);
+ }
+
+ m_ReferencesToClean.clear();
+ m_ReferencesToClean.shrink_to_fit();
}
private:
@@ -1421,9 +1432,7 @@ class FileCasReferencePruner : public GcReferencePruner
public:
FileCasReferencePruner(FileCasStrategy& Owner, std::vector<IoHash>&& Cids) : m_FileCasStrategy(Owner), m_Cids(std::move(Cids)) {}
- virtual GcReferenceStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx,
- GcReferenceStoreStats& Stats,
- const GetUnusedReferencesFunc& GetUnusedReferences)
+ virtual GcStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, GcStats& Stats, const GetUnusedReferencesFunc& GetUnusedReferences)
{
Stopwatch Timer;
const auto _ = MakeGuard([&] {
@@ -1431,23 +1440,29 @@ public:
{
return;
}
- ZEN_INFO("GCV2: filecas [PRUNE] '{}': Count: {}, Pruned: {}, Compacted: {}, RemovedDisk: {}, RemovedMemory: {} in {}",
+ ZEN_INFO("GCV2: filecas [PRUNE] '{}': Count: {}, Unreferenced: {}, FreedMemory: {} in {}",
m_FileCasStrategy.m_RootDirectory,
- Stats.Count,
- Stats.Pruned,
- Stats.Compacted,
- NiceBytes(Stats.RemovedDisk),
- NiceBytes(Stats.RemovedMemory),
+ Stats.CheckedCount,
+ Stats.FoundCount,
+ NiceBytes(Stats.FreedMemory),
NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
});
std::vector<IoHash> UnusedCids = GetUnusedReferences(m_Cids);
+ Stats.CheckedCount = m_Cids.size();
if (UnusedCids.empty())
{
// Nothing to collect
return nullptr;
}
+ Stats.FoundCount += UnusedCids.size();
+
+ if (!Ctx.Settings.IsDeleteMode)
+ {
+ return nullptr;
+ }
+
std::vector<IoHash> PrunedReferences;
PrunedReferences.reserve(UnusedCids.size());
{
@@ -1459,19 +1474,21 @@ public:
{
continue;
}
- if (Ctx.Settings.IsDeleteMode)
- {
- uint64_t FileSize = It->second.Size;
- m_FileCasStrategy.m_Index.erase(It);
- m_FileCasStrategy.m_CasLog.Append(
- {.Key = ChunkHash, .Flags = FileCasStrategy::FileCasIndexEntry::kTombStone, .Size = FileSize});
- m_FileCasStrategy.m_TotalSize.fetch_sub(It->second.Size, std::memory_order_relaxed);
- }
+ uint64_t FileSize = It->second.Size;
+ m_FileCasStrategy.m_Index.erase(It);
+ m_FileCasStrategy.m_CasLog.Append(
+ {.Key = ChunkHash, .Flags = FileCasStrategy::FileCasIndexEntry::kTombStone, .Size = FileSize});
+ m_FileCasStrategy.m_TotalSize.fetch_sub(It->second.Size, std::memory_order_relaxed);
PrunedReferences.push_back(ChunkHash);
+ Stats.DeletedCount++;
}
}
- Stats.Pruned += PrunedReferences.size();
+ if (PrunedReferences.empty())
+ {
+ return nullptr;
+ }
+
return new FileCasStoreCompactor(m_FileCasStrategy, std::move(PrunedReferences));
}
@@ -1487,7 +1504,7 @@ FileCasStrategy::GetGcName(GcCtx&)
}
GcReferencePruner*
-FileCasStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& Stats)
+FileCasStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats&)
{
Stopwatch Timer;
const auto _ = MakeGuard([&] {
@@ -1495,14 +1512,7 @@ FileCasStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& Stats)
{
return;
}
- ZEN_INFO("GCV2: filecas [CREATE PRUNERS] '{}': Count: {}, Pruned: {}, Compacted: {}, RemovedDisk: {}, RemovedMemory: {} in {}",
- m_RootDirectory,
- Stats.Count,
- Stats.Pruned,
- Stats.Compacted,
- NiceBytes(Stats.RemovedDisk),
- NiceBytes(Stats.RemovedMemory),
- NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ ZEN_INFO("GCV2: filecas [CREATE PRUNERS] '{}' in {}", m_RootDirectory, NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
});
std::vector<IoHash> CidsToCheck;
{
@@ -1517,7 +1527,6 @@ FileCasStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& Stats)
CidsToCheck.push_back(It.first);
}
}
- Stats.Count += CidsToCheck.size();
return new FileCasReferencePruner(*this, std::move(CidsToCheck));
}