aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore/compactcas.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2023-11-21 15:06:25 +0100
committerGitHub <[email protected]>2023-11-21 15:06:25 +0100
commit05178f7c18a48b21b9e260de282a86b91df26955 (patch)
tree25f77af287730c6dbe8d655e0cb503f2652cbd36 /src/zenstore/compactcas.cpp
parentzen run command (#552) (diff)
downloadzen-05178f7c18a48b21b9e260de282a86b91df26955.tar.xz
zen-05178f7c18a48b21b9e260de282a86b91df26955.zip
compact separate for gc referencer (#533)
- Refactor GCV2 so GcReferencer::RemoveExpiredData returns a store compactor, moving out the actual disk work from deleting items in the index. - Refactor GCV2 GcResult to reuse GcCompactStoreStats and GcStats - Make Compacting of stores non-parallell to not eat all the disk I/O when running GC
Diffstat (limited to 'src/zenstore/compactcas.cpp')
-rw-r--r--src/zenstore/compactcas.cpp219
1 files changed, 106 insertions, 113 deletions
diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp
index 00a018948..7b8e930b3 100644
--- a/src/zenstore/compactcas.cpp
+++ b/src/zenstore/compactcas.cpp
@@ -553,21 +553,12 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
GcCtx.AddDeletedCids(DeletedChunks);
}
-class CasContainerStoreCompactor : public GcReferenceStoreCompactor
+class CasContainerStoreCompactor : public GcStoreCompactor
{
public:
- CasContainerStoreCompactor(CasContainerStrategy& Owner,
- BlockStoreCompactState&& CompactState,
- std::vector<IoHash>&& CompactStateKeys,
- std::vector<IoHash>&& PrunedKeys)
- : m_CasContainerStrategy(Owner)
- , m_CompactState(std::move(CompactState))
- , m_CompactStateKeys(std::move(CompactStateKeys))
- , m_PrunedKeys(std::move(PrunedKeys))
- {
- }
+ CasContainerStoreCompactor(CasContainerStrategy& Owner) : m_CasContainerStrategy(Owner) {}
- virtual void CompactReferenceStore(GcCtx& Ctx, GcReferenceStoreStats& Stats)
+ virtual void CompactStore(GcCtx& Ctx, GcCompactStoreStats& Stats, const std::function<uint64_t()>& ClaimDiskReserveCallback) override
{
Stopwatch Timer;
const auto _ = MakeGuard([&] {
@@ -575,60 +566,109 @@ public:
{
return;
}
- ZEN_INFO("GCV2: compactcas [COMPACT] '{}': Count: {}, Pruned: {}, Compacted: {}, RemovedDisk: {}, RemovedMemory: {} in {}",
+ ZEN_INFO("GCV2: compactcas [COMPACT] '{}': RemovedDisk: {} in {}",
m_CasContainerStrategy.m_RootDirectory / m_CasContainerStrategy.m_ContainerBaseName,
- Stats.Count,
- Stats.Pruned,
- Stats.Compacted,
NiceBytes(Stats.RemovedDisk),
- NiceBytes(Stats.RemovedMemory),
NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
});
- if (Ctx.Settings.IsDeleteMode && Ctx.Settings.CollectSmallObjects)
+ if (Ctx.Settings.CollectSmallObjects)
{
- // Compact block store
- m_CasContainerStrategy.m_BlockStore.CompactBlocks(
- m_CompactState,
- m_CasContainerStrategy.m_PayloadAlignment,
- [&](const BlockStore::MovedChunksArray& MovedArray, uint64_t FreedDiskSpace) {
- std::vector<CasDiskIndexEntry> MovedEntries;
- RwLock::ExclusiveLockScope _(m_CasContainerStrategy.m_LocationMapLock);
- for (const std::pair<size_t, BlockStoreLocation>& Moved : MovedArray)
+ std::unordered_map<uint32_t, uint64_t> BlockUsage;
+ {
+ RwLock::SharedLockScope __(m_CasContainerStrategy.m_LocationMapLock);
+ for (const auto& Entry : m_CasContainerStrategy.m_LocationMap)
+ {
+ size_t Index = Entry.second;
+ const BlockStoreDiskLocation& Loc = m_CasContainerStrategy.m_Locations[Index];
+
+ uint32_t BlockIndex = Loc.GetBlockIndex();
+ uint64_t ChunkSize = RoundUp(Loc.GetSize(), m_CasContainerStrategy.m_PayloadAlignment);
+ auto It = BlockUsage.find(BlockIndex);
+ if (It == BlockUsage.end())
+ {
+ BlockUsage.insert_or_assign(BlockIndex, ChunkSize);
+ }
+ else
{
- size_t ChunkIndex = Moved.first;
- const IoHash& Key = m_CompactStateKeys[ChunkIndex];
+ It->second += ChunkSize;
+ }
+ }
+ }
- if (auto It = m_CasContainerStrategy.m_LocationMap.find(Key); It != m_CasContainerStrategy.m_LocationMap.end())
- {
- BlockStoreDiskLocation& Location = m_CasContainerStrategy.m_Locations[It->second];
- const BlockStoreLocation& OldLocation = m_CompactState.GetLocation(ChunkIndex);
- if (Location.Get(m_CasContainerStrategy.m_PayloadAlignment) != OldLocation)
- {
- // Someone has moved our chunk so lets just skip the new location we were provided, it will be GC:d at a
- // later time
- continue;
- }
+ {
+ BlockStoreCompactState BlockCompactState;
+ std::vector<IoHash> BlockCompactStateKeys;
+
+ std::vector<uint32_t> BlocksToCompact =
+ m_CasContainerStrategy.m_BlockStore.GetBlocksToCompact(BlockUsage, Ctx.Settings.CompactBlockUsageThresholdPercent);
+ BlockCompactState.IncludeBlocks(BlocksToCompact);
- const BlockStoreLocation& NewLocation = Moved.second;
- Location = BlockStoreDiskLocation(NewLocation, m_CasContainerStrategy.m_PayloadAlignment);
- MovedEntries.push_back(CasDiskIndexEntry{.Key = Key, .Location = Location});
+ {
+ RwLock::SharedLockScope __(m_CasContainerStrategy.m_LocationMapLock);
+ for (const auto& Entry : m_CasContainerStrategy.m_LocationMap)
+ {
+ size_t Index = Entry.second;
+ const BlockStoreDiskLocation& Loc = m_CasContainerStrategy.m_Locations[Index];
+
+ if (!BlockCompactState.AddKeepLocation(Loc.Get(m_CasContainerStrategy.m_PayloadAlignment)))
+ {
+ continue;
}
+ BlockCompactStateKeys.push_back(Entry.first);
}
- m_CasContainerStrategy.m_CasLog.Append(MovedEntries);
- Stats.RemovedDisk += FreedDiskSpace;
- },
- [&]() { return 0; });
+ }
- Stats.Compacted +=
- m_PrunedKeys.size(); // Slightly missleading, it might not be compacted if the block is the currently writing block
+ if (Ctx.Settings.IsDeleteMode)
+ {
+ ZEN_DEBUG("GCV2: compactcas [COMPACT] '{}': compacting {} blocks",
+ m_CasContainerStrategy.m_RootDirectory / m_CasContainerStrategy.m_ContainerBaseName,
+ BlocksToCompact.size());
+
+ m_CasContainerStrategy.m_BlockStore.CompactBlocks(
+ BlockCompactState,
+ m_CasContainerStrategy.m_PayloadAlignment,
+ [&](const BlockStore::MovedChunksArray& MovedArray, uint64_t FreedDiskSpace) {
+ std::vector<CasDiskIndexEntry> MovedEntries;
+ RwLock::ExclusiveLockScope _(m_CasContainerStrategy.m_LocationMapLock);
+ for (const std::pair<size_t, BlockStoreLocation>& Moved : MovedArray)
+ {
+ size_t ChunkIndex = Moved.first;
+ const IoHash& Key = BlockCompactStateKeys[ChunkIndex];
+
+ if (auto It = m_CasContainerStrategy.m_LocationMap.find(Key);
+ It != m_CasContainerStrategy.m_LocationMap.end())
+ {
+ BlockStoreDiskLocation& Location = m_CasContainerStrategy.m_Locations[It->second];
+ const BlockStoreLocation& OldLocation = BlockCompactState.GetLocation(ChunkIndex);
+ if (Location.Get(m_CasContainerStrategy.m_PayloadAlignment) != OldLocation)
+ {
+ // Someone has moved our chunk so lets just skip the new location we were provided, it will be GC:d
+ // at a later time
+ continue;
+ }
+ const BlockStoreLocation& NewLocation = Moved.second;
+
+ Location = BlockStoreDiskLocation(NewLocation, m_CasContainerStrategy.m_PayloadAlignment);
+ MovedEntries.push_back(CasDiskIndexEntry{.Key = Key, .Location = Location});
+ }
+ }
+ m_CasContainerStrategy.m_CasLog.Append(MovedEntries);
+ Stats.RemovedDisk += FreedDiskSpace;
+ },
+ ClaimDiskReserveCallback);
+ }
+ else
+ {
+ ZEN_DEBUG("GCV2: compactcas [COMPACT] '{}': skipped compacting of {} eligible blocks",
+ m_CasContainerStrategy.m_RootDirectory / m_CasContainerStrategy.m_ContainerBaseName,
+ BlocksToCompact.size());
+ }
+ }
}
}
- CasContainerStrategy& m_CasContainerStrategy;
- BlockStoreCompactState m_CompactState;
- std::vector<IoHash> m_CompactStateKeys;
- std::vector<IoHash> m_PrunedKeys;
+ CasContainerStrategy& m_CasContainerStrategy;
};
class CasContainerReferencePruner : public GcReferencePruner
@@ -640,9 +680,7 @@ public:
{
}
- virtual GcReferenceStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx,
- GcReferenceStoreStats& Stats,
- const GetUnusedReferencesFunc& GetUnusedReferences)
+ virtual GcStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, GcStats& Stats, const GetUnusedReferencesFunc& GetUnusedReferences)
{
Stopwatch Timer;
const auto _ = MakeGuard([&] {
@@ -650,17 +688,17 @@ public:
{
return;
}
- ZEN_INFO("GCV2: compactcas [PRUNE] '{}': Count: {}, Pruned: {}, Compacted: {}, RemovedDisk: {}, RemovedMemory: {} in {}",
+ ZEN_INFO("GCV2: compactcas [PRUNE] '{}': Checked: {}, Deleted: {}, FreedMemory: {} in {}",
m_CasContainerStrategy.m_RootDirectory / m_CasContainerStrategy.m_ContainerBaseName,
- Stats.Count,
- Stats.Pruned,
- Stats.Compacted,
- NiceBytes(Stats.RemovedDisk),
- NiceBytes(Stats.RemovedMemory),
+ Stats.CheckedCount,
+ Stats.DeletedCount,
+ NiceBytes(Stats.FreedMemory),
NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
});
std::vector<IoHash> UnusedCids = GetUnusedReferences(m_Cids);
+ Stats.CheckedCount = m_Cids.size();
+ Stats.FoundCount = UnusedCids.size();
if (UnusedCids.empty())
{
@@ -668,19 +706,11 @@ public:
return nullptr;
}
- BlockStoreCompactState CompactState;
- BlockStore::ReclaimSnapshotState BlockSnapshotState;
- std::vector<IoHash> CompactStateKeys;
- std::vector<CasDiskIndexEntry> ExpiredEntries;
+ std::vector<CasDiskIndexEntry> ExpiredEntries;
ExpiredEntries.reserve(UnusedCids.size());
- tsl::robin_set<IoHash, IoHash::Hasher> UnusedKeys;
{
RwLock::ExclusiveLockScope __(m_CasContainerStrategy.m_LocationMapLock);
- if (Ctx.Settings.CollectSmallObjects)
- {
- BlockSnapshotState = m_CasContainerStrategy.m_BlockStore.GetReclaimSnapshotState();
- }
for (const IoHash& Cid : UnusedCids)
{
@@ -689,59 +719,28 @@ public:
{
continue;
}
- CasDiskIndexEntry ExpiredEntry = {.Key = Cid,
- .Location = m_CasContainerStrategy.m_Locations[It->second],
- .Flags = CasDiskIndexEntry::kTombstone};
- const BlockStoreDiskLocation& Location = m_CasContainerStrategy.m_Locations[It->second];
- BlockStoreLocation BlockLocation = Location.Get(m_CasContainerStrategy.m_PayloadAlignment);
if (Ctx.Settings.CollectSmallObjects)
{
- UnusedKeys.insert(Cid);
- uint32_t BlockIndex = BlockLocation.BlockIndex;
- bool IsActiveWriteBlock = BlockSnapshotState.m_ActiveWriteBlocks.contains(BlockIndex);
- if (!IsActiveWriteBlock)
- {
- CompactState.IncludeBlock(BlockIndex);
- }
+ CasDiskIndexEntry ExpiredEntry = {.Key = Cid,
+ .Location = m_CasContainerStrategy.m_Locations[It->second],
+ .Flags = CasDiskIndexEntry::kTombstone};
ExpiredEntries.push_back(ExpiredEntry);
}
}
- // Get all locations we need to keep for affected blocks
- if (Ctx.Settings.CollectSmallObjects && !UnusedKeys.empty())
- {
- for (const auto& Entry : m_CasContainerStrategy.m_LocationMap)
- {
- const IoHash& Key = Entry.first;
- if (UnusedKeys.contains(Key))
- {
- continue;
- }
- const BlockStoreDiskLocation& Location = m_CasContainerStrategy.m_Locations[Entry.second];
- BlockStoreLocation BlockLocation = Location.Get(m_CasContainerStrategy.m_PayloadAlignment);
- if (CompactState.AddKeepLocation(BlockLocation))
- {
- CompactStateKeys.push_back(Key);
- }
- }
- }
-
if (Ctx.Settings.IsDeleteMode)
{
for (const CasDiskIndexEntry& Entry : ExpiredEntries)
{
m_CasContainerStrategy.m_LocationMap.erase(Entry.Key);
+ Stats.DeletedCount++;
}
m_CasContainerStrategy.m_CasLog.Append(ExpiredEntries);
m_CasContainerStrategy.m_CasLog.Flush();
}
}
- Stats.Pruned += UnusedKeys.size();
- return new CasContainerStoreCompactor(m_CasContainerStrategy,
- std::move(CompactState),
- std::move(CompactStateKeys),
- std::vector<IoHash>(UnusedKeys.begin(), UnusedKeys.end()));
+ return new CasContainerStoreCompactor(m_CasContainerStrategy);
}
private:
@@ -756,7 +755,7 @@ CasContainerStrategy::GetGcName(GcCtx&)
}
GcReferencePruner*
-CasContainerStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& Stats)
+CasContainerStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats&)
{
Stopwatch Timer;
const auto _ = MakeGuard([&] {
@@ -764,13 +763,8 @@ CasContainerStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& S
{
return;
}
- ZEN_INFO("GCV2: compactcas [CREATE PRUNERS] '{}': Count: {}, Pruned: {}, Compacted: {}, RemovedDisk: {}, RemovedMemory: {} in {}",
+ ZEN_INFO("GCV2: compactcas [CREATE PRUNERS] '{}' in {}",
m_RootDirectory / m_ContainerBaseName,
- Stats.Count,
- Stats.Pruned,
- Stats.Compacted,
- NiceBytes(Stats.RemovedDisk),
- NiceBytes(Stats.RemovedMemory),
NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
});
@@ -787,7 +781,6 @@ CasContainerStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& S
CidsToCheck.push_back(It.first);
}
}
- Stats.Count += CidsToCheck.size();
return new CasContainerReferencePruner(*this, std::move(CidsToCheck));
}