diff options
| author | Dan Engelbrecht <[email protected]> | 2024-11-27 12:52:25 +0100 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2024-11-27 12:52:25 +0100 |
| commit | 1bd5cb6fb09dfa56a7b317a56a9914609f2b4535 (patch) | |
| tree | 4cc24c2553486c73567707346a860d7164abd121 /src | |
| parent | 5.5.14-pre4 (diff) | |
| download | zen-1bd5cb6fb09dfa56a7b317a56a9914609f2b4535.tar.xz zen-1bd5cb6fb09dfa56a7b317a56a9914609f2b4535.zip | |
use plain sorted array instead of map of vectors (#237)
* use plain sorted array instead of map of vectors
* reserve vectors up front = 5% perf increase
* don't do batch read of chunks if we have a single chunk -> 1% perf gain
Diffstat (limited to 'src')
| -rw-r--r-- | src/zenstore/blockstore.cpp | 44 | ||||
| -rw-r--r-- | src/zenstore/compactcas.cpp | 11 |
2 files changed, 33 insertions, 22 deletions
diff --git a/src/zenstore/blockstore.cpp b/src/zenstore/blockstore.cpp index fcf934344..5f2f5cba0 100644 --- a/src/zenstore/blockstore.cpp +++ b/src/zenstore/blockstore.cpp @@ -803,7 +803,7 @@ BlockStore::IterateBlock(std::span<const BlockStoreLocation> ChunkLocations, const size_t BlockSize = BlockFile->FileSize(); const size_t RangeCount = GetNextRange(BlockSize, ChunkIndexes, LocationIndexOffset); - if (RangeCount > 0) + if (RangeCount > 1) { size_t LastChunkIndex = ChunkIndexes[LocationIndexOffset + RangeCount - 1]; const BlockStoreLocation& LastLocation = ChunkLocations[LastChunkIndex]; @@ -870,31 +870,39 @@ BlockStore::IterateChunks(const std::span<const BlockStoreLocation>& ChunkLocati ZEN_LOG_SCOPE("iterating chunks from '{}'", m_BlocksBasePath); - tsl::robin_map<uint32_t, size_t> BlockIndexToBlockChunks; - std::vector<std::vector<size_t>> BlocksChunks; - + std::vector<size_t> ChunkOrder(ChunkLocations.size()); for (size_t ChunkIndex = 0; ChunkIndex < ChunkLocations.size(); ++ChunkIndex) { - const BlockStoreLocation& Location = ChunkLocations[ChunkIndex]; - if (auto It = BlockIndexToBlockChunks.find(Location.BlockIndex); It != BlockIndexToBlockChunks.end()) - { - BlocksChunks[It->second].push_back(ChunkIndex); - } - else - { - BlockIndexToBlockChunks.insert(std::make_pair(Location.BlockIndex, BlocksChunks.size())); - BlocksChunks.push_back(std::vector<size_t>({ChunkIndex})); - } + ChunkOrder[ChunkIndex] = ChunkIndex; } - for (auto& BlockChunks : BlocksChunks) + std::sort(ChunkOrder.begin(), ChunkOrder.end(), [&ChunkLocations](const size_t Lhs, const size_t Rhs) { + return ChunkLocations[Lhs].BlockIndex < ChunkLocations[Rhs].BlockIndex; + }); + size_t RangeStart = 0; + size_t RangeEnd = 0; + const std::span<size_t> ChunkIndexRange(ChunkOrder); + while (RangeStart < ChunkOrder.size()) { - ZEN_ASSERT(!BlockChunks.empty()); - uint32_t BlockIndex = ChunkLocations[BlockChunks[0]].BlockIndex; - if (!Callback(BlockIndex, BlockChunks)) + const size_t ChunkIndex = ChunkOrder[RangeStart]; + const uint32_t BlockIndex = ChunkLocations[ChunkIndex].BlockIndex; + RangeEnd++; + while (RangeEnd < ChunkOrder.size()) + { + const size_t NextChunkIndex = ChunkOrder[RangeEnd]; + if (ChunkLocations[NextChunkIndex].BlockIndex != BlockIndex) + { + break; + } + ++RangeEnd; + } + + if (!Callback(BlockIndex, ChunkIndexRange.subspan(RangeStart, RangeEnd - RangeStart))) { return false; } + + RangeStart = RangeEnd; } return true; } diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp index 792854af6..9982e7571 100644 --- a/src/zenstore/compactcas.cpp +++ b/src/zenstore/compactcas.cpp @@ -308,9 +308,10 @@ CasContainerStrategy::IterateChunks(std::span<IoHash> ChunkHashes, WorkerThreadPool* OptionalWorkerPool, uint64_t LargeSizeLimit) { - if (ChunkHashes.size() < 3) + const size_t ChunkCount = ChunkHashes.size(); + if (ChunkCount < 3) { - for (size_t ChunkIndex = 0; ChunkIndex < ChunkHashes.size(); ChunkIndex++) + for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ChunkIndex++) { IoBuffer Chunk = FindChunk(ChunkHashes[ChunkIndex]); if (!AsyncCallback(ChunkIndex, Chunk)) @@ -322,8 +323,10 @@ CasContainerStrategy::IterateChunks(std::span<IoHash> ChunkHashes, } std::vector<size_t> FoundChunkIndexes; std::vector<BlockStoreLocation> FoundChunkLocations; - RwLock::SharedLockScope _(m_LocationMapLock); - for (size_t ChunkIndex = 0; ChunkIndex < ChunkHashes.size(); ChunkIndex++) + FoundChunkIndexes.reserve(ChunkCount); + FoundChunkLocations.reserve(ChunkCount); + RwLock::SharedLockScope _(m_LocationMapLock); + for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ChunkIndex++) { if (auto KeyIt = m_LocationMap.find(ChunkHashes[ChunkIndex]); KeyIt != m_LocationMap.end()) { |