aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore/compactcas.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2023-10-04 14:37:49 +0200
committerGitHub <[email protected]>2023-10-04 14:37:49 +0200
commit387b6d99e6ef3958a6fd78b22c48bb8a85b53bda (patch)
treefd6a5e07e9785a10606f35f92b2f205af87fff1f /src/zenstore/compactcas.cpp
parentadded CHANGELOG.md note for websocket removal (diff)
downloadzen-387b6d99e6ef3958a6fd78b22c48bb8a85b53bda.tar.xz
zen-387b6d99e6ef3958a6fd78b22c48bb8a85b53bda.zip
refactor comapactcas index (#443)
- Bugfix: Fix scrub messing up payload and access time in disk cache bucket when compacting index - Improvement: Split up disk cache bucket index into hash lookup and payload array to improve performance - Improvement: Reserve space up front for compact binary output when saving cache bucket manifest to improve performance
Diffstat (limited to 'src/zenstore/compactcas.cpp')
-rw-r--r--src/zenstore/compactcas.cpp73
1 files changed, 57 insertions, 16 deletions
diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp
index ce2e53527..e6383c3a1 100644
--- a/src/zenstore/compactcas.cpp
+++ b/src/zenstore/compactcas.cpp
@@ -174,7 +174,8 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const
m_CasLog.Append(IndexEntry);
{
RwLock::ExclusiveLockScope _(m_LocationMapLock);
- m_LocationMap.emplace(ChunkHash, DiskLocation);
+ m_LocationMap.emplace(ChunkHash, m_Locations.size());
+ m_Locations.push_back(DiskLocation);
}
});
@@ -201,7 +202,7 @@ CasContainerStrategy::FindChunk(const IoHash& ChunkHash)
{
return IoBuffer();
}
- const BlockStoreLocation& Location = KeyIt->second.Get(m_PayloadAlignment);
+ const BlockStoreLocation& Location = m_Locations[KeyIt->second].Get(m_PayloadAlignment);
IoBuffer Chunk = m_BlockStore.TryGetChunk(Location);
return Chunk;
@@ -258,7 +259,7 @@ CasContainerStrategy::ScrubStorage(ScrubContext& Ctx)
for (const auto& Entry : m_LocationMap)
{
const IoHash& ChunkHash = Entry.first;
- const BlockStoreDiskLocation& DiskLocation = Entry.second;
+ const BlockStoreDiskLocation& DiskLocation = m_Locations[Entry.second];
BlockStoreLocation Location = DiskLocation.Get(m_PayloadAlignment);
ChunkLocations.push_back(Location);
@@ -355,7 +356,7 @@ CasContainerStrategy::ScrubStorage(ScrubContext& Ctx)
std::vector<CasDiskIndexEntry> LogEntries;
LogEntries.reserve(BadKeys.size());
{
- RwLock::ExclusiveLockScope __(m_LocationMapLock);
+ RwLock::ExclusiveLockScope IndexLock(m_LocationMapLock);
for (const IoHash& ChunkHash : BadKeys)
{
const auto KeyIt = m_LocationMap.find(ChunkHash);
@@ -364,9 +365,13 @@ CasContainerStrategy::ScrubStorage(ScrubContext& Ctx)
// Might have been GC'd
continue;
}
- LogEntries.push_back({.Key = KeyIt->first, .Location = KeyIt->second, .Flags = CasDiskIndexEntry::kTombstone});
+ LogEntries.push_back(
+ {.Key = KeyIt->first, .Location = m_Locations[KeyIt->second], .Flags = CasDiskIndexEntry::kTombstone});
m_LocationMap.erase(KeyIt);
}
+
+ // Clean up m_Locations vectors
+ CompactIndex(IndexLock);
}
m_CasLog.Append(LogEntries);
}
@@ -423,8 +428,9 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
uint64_t ReadBlockTimeUs = 0;
uint64_t ReadBlockLongestTimeUs = 0;
- LocationMap_t LocationMap;
- BlockStore::ReclaimSnapshotState BlockStoreState;
+ LocationMap_t LocationMap;
+ std::vector<BlockStoreDiskLocation> Locations;
+ BlockStore::ReclaimSnapshotState BlockStoreState;
{
ZEN_TRACE_CPU("CasContainer::CollectGarbage::State");
@@ -436,6 +442,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs);
});
LocationMap = m_LocationMap;
+ Locations = m_Locations;
BlockStoreState = m_BlockStore.GetReclaimSnapshotState();
}
@@ -459,7 +466,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
ZEN_TRACE_CPU("CasContainer::CollectGarbage::Filter");
GcCtx.FilterCids(TotalChunkHashes, [&](const IoHash& ChunkHash, bool Keep) {
auto KeyIt = LocationMap.find(ChunkHash);
- const BlockStoreDiskLocation& DiskLocation = KeyIt->second;
+ const BlockStoreDiskLocation& DiskLocation = Locations[KeyIt->second];
BlockStoreLocation Location = DiskLocation.Get(m_PayloadAlignment);
size_t ChunkIndex = ChunkLocations.size();
@@ -499,7 +506,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
for (const size_t ChunkIndex : RemovedChunks)
{
const IoHash& ChunkHash = ChunkIndexToChunkHash[ChunkIndex];
- const BlockStoreDiskLocation& OldDiskLocation = LocationMap[ChunkHash];
+ const BlockStoreDiskLocation& OldDiskLocation = Locations[LocationMap[ChunkHash]];
LogEntries.push_back({.Key = ChunkHash, .Location = OldDiskLocation, .Flags = CasDiskIndexEntry::kTombstone});
DeletedChunks.push_back(ChunkHash);
}
@@ -521,15 +528,41 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
m_LocationMap.erase(Entry.Key);
continue;
}
- m_LocationMap[Entry.Key] = Entry.Location;
+ m_Locations[m_LocationMap[Entry.Key]] = Entry.Location;
}
}
},
[&GcCtx]() { return GcCtx.CollectSmallObjects(); });
+ if (!DeletedChunks.empty())
+ {
+ // Clean up m_Locations vectors
+ RwLock::ExclusiveLockScope IndexLock(m_LocationMapLock);
+ CompactIndex(IndexLock);
+ }
GcCtx.AddDeletedCids(DeletedChunks);
}
+void
+CasContainerStrategy::CompactIndex(RwLock::ExclusiveLockScope&)
+{
+ ZEN_TRACE_CPU("CasContainer::CompactIndex");
+
+ size_t EntryCount = m_LocationMap.size();
+ LocationMap_t LocationMap;
+ std::vector<BlockStoreDiskLocation> Locations;
+ Locations.reserve(EntryCount);
+ LocationMap.reserve(EntryCount);
+ for (auto It : m_LocationMap)
+ {
+ size_t EntryIndex = Locations.size();
+ Locations.push_back(m_Locations[It.second]);
+ LocationMap.insert({It.first, EntryIndex});
+ }
+ m_LocationMap.swap(LocationMap);
+ m_Locations.swap(Locations);
+}
+
GcStorageSize
CasContainerStrategy::StorageSize() const
{
@@ -592,7 +625,7 @@ CasContainerStrategy::MakeIndexSnapshot()
{
CasDiskIndexEntry& IndexEntry = Entries[EntryIndex++];
IndexEntry.Key = Entry.first;
- IndexEntry.Location = Entry.second;
+ IndexEntry.Location = m_Locations[Entry.second];
}
}
@@ -673,7 +706,8 @@ CasContainerStrategy::ReadIndexFile(const std::filesystem::path& IndexPath, uint
ZEN_WARN("skipping invalid entry in '{}', reason: '{}'", IndexPath, InvalidEntryReason);
continue;
}
- m_LocationMap[Entry.Key] = Entry.Location;
+ m_LocationMap[Entry.Key] = m_Locations.size();
+ m_Locations.push_back(Entry.Location);
}
OutVersion = CasDiskIndexHeader::CurrentVersion;
@@ -733,9 +767,11 @@ CasContainerStrategy::ReadLog(const std::filesystem::path& LogPath, uint64_t Ski
ZEN_WARN("skipping invalid entry in '{}', reason: '{}'", LogPath, InvalidEntryReason);
return;
}
- m_LocationMap[Record.Key] = Record.Location;
+ m_LocationMap[Record.Key] = m_Locations.size();
+ m_Locations.push_back(Record.Location);
},
SkipEntryCount);
+
return LogEntryCount;
}
return 0;
@@ -749,6 +785,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
// Add .running file and delete on clean on close to detect bad termination
m_LocationMap.clear();
+ m_Locations.clear();
std::filesystem::path BasePath = GetBasePath(m_RootDirectory, m_ContainerBaseName);
@@ -797,11 +834,12 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
std::vector<CasDiskIndexEntry> BadEntries;
for (const auto& Entry : m_LocationMap)
{
- const BlockStoreDiskLocation& DiskLocation = Entry.second;
- auto BlockIt = BlockSizes.find(DiskLocation.GetBlockIndex());
+ const BlockStoreDiskLocation& DiskLocation = m_Locations[Entry.second];
+ uint32_t BlockIndex = DiskLocation.GetBlockIndex();
+ auto BlockIt = BlockSizes.find(BlockIndex);
if (BlockIt == BlockSizes.end())
{
- ZEN_WARN("Unknown block {} for entry {} in '{}'", DiskLocation.GetBlockIndex(), Entry.first.ToHexString(), BasePath);
+ ZEN_WARN("Unknown block {} for entry {} in '{}'", BlockIndex, Entry.first.ToHexString(), BasePath);
}
else
{
@@ -835,6 +873,9 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
{
m_LocationMap.erase(BadEntry.Key);
}
+
+ RwLock::ExclusiveLockScope IndexLock(m_LocationMapLock);
+ CompactIndex(IndexLock);
}
m_BlockStore.Prune(KnownLocations);