diff options
| author | Dan Engelbrecht <[email protected]> | 2023-10-04 14:37:49 +0200 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-10-04 14:37:49 +0200 |
| commit | 387b6d99e6ef3958a6fd78b22c48bb8a85b53bda (patch) | |
| tree | fd6a5e07e9785a10606f35f92b2f205af87fff1f /src | |
| parent | added CHANGELOG.md note for websocket removal (diff) | |
| download | zen-387b6d99e6ef3958a6fd78b22c48bb8a85b53bda.tar.xz zen-387b6d99e6ef3958a6fd78b22c48bb8a85b53bda.zip | |
refactor comapactcas index (#443)
- Bugfix: Fix scrub messing up payload and access time in disk cache bucket when compacting index
- Improvement: Split up disk cache bucket index into hash lookup and payload array to improve performance
- Improvement: Reserve space up front for compact binary output when saving cache bucket manifest to improve performance
Diffstat (limited to 'src')
| -rw-r--r-- | src/zencore/include/zencore/compactbinarybuilder.h | 1 | ||||
| -rw-r--r-- | src/zenserver/cache/cachedisklayer.cpp | 13 | ||||
| -rw-r--r-- | src/zenstore/compactcas.cpp | 73 | ||||
| -rw-r--r-- | src/zenstore/compactcas.h | 8 |
4 files changed, 73 insertions, 22 deletions
diff --git a/src/zencore/include/zencore/compactbinarybuilder.h b/src/zencore/include/zencore/compactbinarybuilder.h index 53f00ae4e..e8d981fcb 100644 --- a/src/zencore/include/zencore/compactbinarybuilder.h +++ b/src/zencore/include/zencore/compactbinarybuilder.h @@ -441,6 +441,7 @@ private: class CbObjectWriter : public CbWriter { public: + explicit CbObjectWriter(int64_t InitialSize) : CbWriter(InitialSize) { BeginObject(); } CbObjectWriter() { BeginObject(); } ZENCORE_API CbObject Save() diff --git a/src/zenserver/cache/cachedisklayer.cpp b/src/zenserver/cache/cachedisklayer.cpp index 9883e2119..7ce713de9 100644 --- a/src/zenserver/cache/cachedisklayer.cpp +++ b/src/zenserver/cache/cachedisklayer.cpp @@ -884,7 +884,14 @@ ZenCacheDiskLayer::CacheBucket::MakeManifest(IndexMap&& Index, std::vector<Acces ZEN_TRACE_CPU("Z$::Disk::Bucket::MakeManifest"); - CbObjectWriter Writer; + size_t ItemCount = m_Index.size(); + + // This tends to overestimate a little bit but it is still way more accurate than what we get with exponential growth + // And we don't need to reallocate theunderying buffer in almost every case + const size_t EstimatedSizePerItem = 54u; + const size_t ReserveSize = ItemCount == 0 ? 48u : RoundUp(32u + (ItemCount * EstimatedSizePerItem), 128); + CbObjectWriter Writer(ReserveSize); + Writer << "BucketId"sv << m_BucketId; Writer << "Version"sv << CurrentDiskBucketVersion; @@ -1213,8 +1220,8 @@ ZenCacheDiskLayer::CacheBucket::ScrubStorage(ScrubContext& Ctx) for (auto It : m_Index) { size_t EntryIndex = Payloads.size(); - Payloads.push_back(m_Payloads[EntryIndex]); - AccessTimes.push_back(m_AccessTimes[EntryIndex]); + Payloads.push_back(m_Payloads[It.second]); + AccessTimes.push_back(m_AccessTimes[It.second]); Index.insert({It.first, EntryIndex}); } m_Index.swap(Index); diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp index ce2e53527..e6383c3a1 100644 --- a/src/zenstore/compactcas.cpp +++ b/src/zenstore/compactcas.cpp @@ -174,7 +174,8 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const m_CasLog.Append(IndexEntry); { RwLock::ExclusiveLockScope _(m_LocationMapLock); - m_LocationMap.emplace(ChunkHash, DiskLocation); + m_LocationMap.emplace(ChunkHash, m_Locations.size()); + m_Locations.push_back(DiskLocation); } }); @@ -201,7 +202,7 @@ CasContainerStrategy::FindChunk(const IoHash& ChunkHash) { return IoBuffer(); } - const BlockStoreLocation& Location = KeyIt->second.Get(m_PayloadAlignment); + const BlockStoreLocation& Location = m_Locations[KeyIt->second].Get(m_PayloadAlignment); IoBuffer Chunk = m_BlockStore.TryGetChunk(Location); return Chunk; @@ -258,7 +259,7 @@ CasContainerStrategy::ScrubStorage(ScrubContext& Ctx) for (const auto& Entry : m_LocationMap) { const IoHash& ChunkHash = Entry.first; - const BlockStoreDiskLocation& DiskLocation = Entry.second; + const BlockStoreDiskLocation& DiskLocation = m_Locations[Entry.second]; BlockStoreLocation Location = DiskLocation.Get(m_PayloadAlignment); ChunkLocations.push_back(Location); @@ -355,7 +356,7 @@ CasContainerStrategy::ScrubStorage(ScrubContext& Ctx) std::vector<CasDiskIndexEntry> LogEntries; LogEntries.reserve(BadKeys.size()); { - RwLock::ExclusiveLockScope __(m_LocationMapLock); + RwLock::ExclusiveLockScope IndexLock(m_LocationMapLock); for (const IoHash& ChunkHash : BadKeys) { const auto KeyIt = m_LocationMap.find(ChunkHash); @@ -364,9 +365,13 @@ CasContainerStrategy::ScrubStorage(ScrubContext& Ctx) // Might have been GC'd continue; } - LogEntries.push_back({.Key = KeyIt->first, .Location = KeyIt->second, .Flags = CasDiskIndexEntry::kTombstone}); + LogEntries.push_back( + {.Key = KeyIt->first, .Location = m_Locations[KeyIt->second], .Flags = CasDiskIndexEntry::kTombstone}); m_LocationMap.erase(KeyIt); } + + // Clean up m_Locations vectors + CompactIndex(IndexLock); } m_CasLog.Append(LogEntries); } @@ -423,8 +428,9 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) uint64_t ReadBlockTimeUs = 0; uint64_t ReadBlockLongestTimeUs = 0; - LocationMap_t LocationMap; - BlockStore::ReclaimSnapshotState BlockStoreState; + LocationMap_t LocationMap; + std::vector<BlockStoreDiskLocation> Locations; + BlockStore::ReclaimSnapshotState BlockStoreState; { ZEN_TRACE_CPU("CasContainer::CollectGarbage::State"); @@ -436,6 +442,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); }); LocationMap = m_LocationMap; + Locations = m_Locations; BlockStoreState = m_BlockStore.GetReclaimSnapshotState(); } @@ -459,7 +466,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) ZEN_TRACE_CPU("CasContainer::CollectGarbage::Filter"); GcCtx.FilterCids(TotalChunkHashes, [&](const IoHash& ChunkHash, bool Keep) { auto KeyIt = LocationMap.find(ChunkHash); - const BlockStoreDiskLocation& DiskLocation = KeyIt->second; + const BlockStoreDiskLocation& DiskLocation = Locations[KeyIt->second]; BlockStoreLocation Location = DiskLocation.Get(m_PayloadAlignment); size_t ChunkIndex = ChunkLocations.size(); @@ -499,7 +506,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) for (const size_t ChunkIndex : RemovedChunks) { const IoHash& ChunkHash = ChunkIndexToChunkHash[ChunkIndex]; - const BlockStoreDiskLocation& OldDiskLocation = LocationMap[ChunkHash]; + const BlockStoreDiskLocation& OldDiskLocation = Locations[LocationMap[ChunkHash]]; LogEntries.push_back({.Key = ChunkHash, .Location = OldDiskLocation, .Flags = CasDiskIndexEntry::kTombstone}); DeletedChunks.push_back(ChunkHash); } @@ -521,15 +528,41 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) m_LocationMap.erase(Entry.Key); continue; } - m_LocationMap[Entry.Key] = Entry.Location; + m_Locations[m_LocationMap[Entry.Key]] = Entry.Location; } } }, [&GcCtx]() { return GcCtx.CollectSmallObjects(); }); + if (!DeletedChunks.empty()) + { + // Clean up m_Locations vectors + RwLock::ExclusiveLockScope IndexLock(m_LocationMapLock); + CompactIndex(IndexLock); + } GcCtx.AddDeletedCids(DeletedChunks); } +void +CasContainerStrategy::CompactIndex(RwLock::ExclusiveLockScope&) +{ + ZEN_TRACE_CPU("CasContainer::CompactIndex"); + + size_t EntryCount = m_LocationMap.size(); + LocationMap_t LocationMap; + std::vector<BlockStoreDiskLocation> Locations; + Locations.reserve(EntryCount); + LocationMap.reserve(EntryCount); + for (auto It : m_LocationMap) + { + size_t EntryIndex = Locations.size(); + Locations.push_back(m_Locations[It.second]); + LocationMap.insert({It.first, EntryIndex}); + } + m_LocationMap.swap(LocationMap); + m_Locations.swap(Locations); +} + GcStorageSize CasContainerStrategy::StorageSize() const { @@ -592,7 +625,7 @@ CasContainerStrategy::MakeIndexSnapshot() { CasDiskIndexEntry& IndexEntry = Entries[EntryIndex++]; IndexEntry.Key = Entry.first; - IndexEntry.Location = Entry.second; + IndexEntry.Location = m_Locations[Entry.second]; } } @@ -673,7 +706,8 @@ CasContainerStrategy::ReadIndexFile(const std::filesystem::path& IndexPath, uint ZEN_WARN("skipping invalid entry in '{}', reason: '{}'", IndexPath, InvalidEntryReason); continue; } - m_LocationMap[Entry.Key] = Entry.Location; + m_LocationMap[Entry.Key] = m_Locations.size(); + m_Locations.push_back(Entry.Location); } OutVersion = CasDiskIndexHeader::CurrentVersion; @@ -733,9 +767,11 @@ CasContainerStrategy::ReadLog(const std::filesystem::path& LogPath, uint64_t Ski ZEN_WARN("skipping invalid entry in '{}', reason: '{}'", LogPath, InvalidEntryReason); return; } - m_LocationMap[Record.Key] = Record.Location; + m_LocationMap[Record.Key] = m_Locations.size(); + m_Locations.push_back(Record.Location); }, SkipEntryCount); + return LogEntryCount; } return 0; @@ -749,6 +785,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) // Add .running file and delete on clean on close to detect bad termination m_LocationMap.clear(); + m_Locations.clear(); std::filesystem::path BasePath = GetBasePath(m_RootDirectory, m_ContainerBaseName); @@ -797,11 +834,12 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) std::vector<CasDiskIndexEntry> BadEntries; for (const auto& Entry : m_LocationMap) { - const BlockStoreDiskLocation& DiskLocation = Entry.second; - auto BlockIt = BlockSizes.find(DiskLocation.GetBlockIndex()); + const BlockStoreDiskLocation& DiskLocation = m_Locations[Entry.second]; + uint32_t BlockIndex = DiskLocation.GetBlockIndex(); + auto BlockIt = BlockSizes.find(BlockIndex); if (BlockIt == BlockSizes.end()) { - ZEN_WARN("Unknown block {} for entry {} in '{}'", DiskLocation.GetBlockIndex(), Entry.first.ToHexString(), BasePath); + ZEN_WARN("Unknown block {} for entry {} in '{}'", BlockIndex, Entry.first.ToHexString(), BasePath); } else { @@ -835,6 +873,9 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) { m_LocationMap.erase(BadEntry.Key); } + + RwLock::ExclusiveLockScope IndexLock(m_LocationMapLock); + CompactIndex(IndexLock); } m_BlockStore.Prune(KnownLocations); diff --git a/src/zenstore/compactcas.h b/src/zenstore/compactcas.h index eff9cc135..c0cbbac32 100644 --- a/src/zenstore/compactcas.h +++ b/src/zenstore/compactcas.h @@ -77,6 +77,7 @@ private: uint64_t ReadIndexFile(const std::filesystem::path& IndexPath, uint32_t& OutVersion); uint64_t ReadLog(const std::filesystem::path& LogPath, uint64_t SkipEntryCount); void OpenContainer(bool IsNewStore); + void CompactIndex(RwLock::ExclusiveLockScope&); spdlog::logger& Log() { return m_Log; } @@ -91,9 +92,10 @@ private: std::filesystem::path m_BlocksBasePath; BlockStore m_BlockStore; - RwLock m_LocationMapLock; - typedef tsl::robin_map<IoHash, BlockStoreDiskLocation, IoHash::Hasher> LocationMap_t; - LocationMap_t m_LocationMap; + RwLock m_LocationMapLock; + typedef tsl::robin_map<IoHash, size_t, IoHash::Hasher> LocationMap_t; + LocationMap_t m_LocationMap; + std::vector<BlockStoreDiskLocation> m_Locations; }; void compactcas_forcelink(); |