diff options
| author | Dan Engelbrecht <[email protected]> | 2025-12-15 13:20:21 +0100 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2025-12-15 13:20:21 +0100 |
| commit | a715d3ab7701e6257730a73c62567052d21c9771 (patch) | |
| tree | 1f6b1de9c7cf11ec1403187d77d74a3b1af52a39 /src/zenremotestore/builds/buildstorageoperations.cpp | |
| parent | show download source data (#689) (diff) | |
| download | zen-a715d3ab7701e6257730a73c62567052d21c9771.tar.xz zen-a715d3ab7701e6257730a73c62567052d21c9771.zip | |
oplog download size (#690)
- Bugfix: Upload of oplogs could reference multiple blocks for the same chunk causing redundant downloads of blocks
- Improvement: Use the improved block reuse selection function from zen builds upload in zen oplog-export to reduce oplog download size
Diffstat (limited to 'src/zenremotestore/builds/buildstorageoperations.cpp')
| -rw-r--r-- | src/zenremotestore/builds/buildstorageoperations.cpp | 207 |
1 files changed, 17 insertions, 190 deletions
diff --git a/src/zenremotestore/builds/buildstorageoperations.cpp b/src/zenremotestore/builds/buildstorageoperations.cpp index b8dd18bb5..6c370f975 100644 --- a/src/zenremotestore/builds/buildstorageoperations.cpp +++ b/src/zenremotestore/builds/buildstorageoperations.cpp @@ -4998,7 +4998,11 @@ BuildsOperationUploadFolder::Execute() } else { - ReuseBlockIndexes = FindReuseBlocks(PrepBuildResult.KnownBlocks, + ReuseBlockIndexes = FindReuseBlocks(m_LogOutput, + m_Options.BlockReuseMinPercentLimit, + m_Options.IsVerbose, + m_ReuseBlocksStats, + PrepBuildResult.KnownBlocks, LocalContent.ChunkedContent.ChunkHashes, BlockChunkIndexes, NewBlockChunkIndexes); @@ -5027,13 +5031,13 @@ BuildsOperationUploadFolder::Execute() const double AcceptedByteCountPercent = m_FindBlocksStats.PotentialChunkByteCount > 0 - ? (100.0 * m_FindBlocksStats.AcceptedRawByteCount / m_FindBlocksStats.PotentialChunkByteCount) + ? (100.0 * m_ReuseBlocksStats.AcceptedRawByteCount / m_FindBlocksStats.PotentialChunkByteCount) : 0.0; const double AcceptedReduntantByteCountPercent = - m_FindBlocksStats.AcceptedByteCount > 0 - ? (100.0 * m_FindBlocksStats.AcceptedReduntantByteCount) / - (m_FindBlocksStats.AcceptedByteCount + m_FindBlocksStats.AcceptedReduntantByteCount) + m_ReuseBlocksStats.AcceptedByteCount > 0 + ? (100.0 * m_ReuseBlocksStats.AcceptedReduntantByteCount) / + (m_ReuseBlocksStats.AcceptedByteCount + m_ReuseBlocksStats.AcceptedReduntantByteCount) : 0.0; if (!m_Options.IsQuiet) { @@ -5050,18 +5054,18 @@ BuildsOperationUploadFolder::Execute() NiceBytes(m_FindBlocksStats.FoundBlockByteCount), NiceTimeSpanMs(m_FindBlocksStats.FindBlockTimeMS), - m_FindBlocksStats.AcceptedChunkCount, - NiceBytes(m_FindBlocksStats.AcceptedRawByteCount), + m_ReuseBlocksStats.AcceptedChunkCount, + NiceBytes(m_ReuseBlocksStats.AcceptedRawByteCount), m_FindBlocksStats.AcceptedBlockCount, AcceptedByteCountPercent, - m_FindBlocksStats.AcceptedReduntantChunkCount, - NiceBytes(m_FindBlocksStats.AcceptedReduntantByteCount), + m_ReuseBlocksStats.AcceptedReduntantChunkCount, + NiceBytes(m_ReuseBlocksStats.AcceptedReduntantByteCount), AcceptedReduntantByteCountPercent, - m_FindBlocksStats.RejectedChunkCount, - NiceBytes(m_FindBlocksStats.RejectedByteCount), - m_FindBlocksStats.RejectedBlockCount, + m_ReuseBlocksStats.RejectedChunkCount, + NiceBytes(m_ReuseBlocksStats.RejectedByteCount), + m_ReuseBlocksStats.RejectedBlockCount, m_FindBlocksStats.NewBlocksChunkCount, NiceBytes(m_FindBlocksStats.NewBlocksChunkByteCount), @@ -5497,7 +5501,7 @@ BuildsOperationUploadFolder::Execute() {{"totalSize", double(m_LocalFolderScanStats.FoundFileByteCount.load())}, {"reusedRatio", AcceptedByteCountPercent / 100.0}, {"reusedBlockCount", double(m_FindBlocksStats.AcceptedBlockCount)}, - {"reusedBlockByteCount", double(m_FindBlocksStats.AcceptedRawByteCount)}, + {"reusedBlockByteCount", double(m_ReuseBlocksStats.AcceptedRawByteCount)}, {"newBlockCount", double(m_FindBlocksStats.NewBlocksCount)}, {"newBlockByteCount", double(m_FindBlocksStats.NewBlocksChunkByteCount)}, {"uploadedCount", double(m_UploadStats.BlockCount.load() + m_UploadStats.ChunkCount.load())}, @@ -5589,183 +5593,6 @@ BuildsOperationUploadFolder::IsAcceptedFile(const std::string_view& RelativePath return true; } -std::vector<size_t> -BuildsOperationUploadFolder::FindReuseBlocks(const std::vector<ChunkBlockDescription>& KnownBlocks, - std::span<const IoHash> ChunkHashes, - std::span<const uint32_t> ChunkIndexes, - std::vector<uint32_t>& OutUnusedChunkIndexes) -{ - ZEN_TRACE_CPU("FindReuseBlocks"); - - // Find all blocks with a usage level higher than MinPercentLimit - // Pick out the blocks with usage higher or equal to MinPercentLimit - // Sort them with highest size usage - most usage first - // Make a list of all chunks and mark them as not found - // For each block, recalculate the block has usage percent based on the chunks marked as not found - // If the block still reaches MinPercentLimit, keep it and remove the matching chunks from the not found list - // Repeat for following all remaining block that initially matched MinPercentLimit - - std::vector<size_t> FilteredReuseBlockIndexes; - - uint32_t ChunkCount = gsl::narrow<uint32_t>(ChunkHashes.size()); - std::vector<bool> ChunkFound(ChunkCount, false); - - if (ChunkCount > 0) - { - if (!KnownBlocks.empty()) - { - Stopwatch ReuseTimer; - - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex; - ChunkHashToChunkIndex.reserve(ChunkIndexes.size()); - for (uint32_t ChunkIndex : ChunkIndexes) - { - ChunkHashToChunkIndex.insert_or_assign(ChunkHashes[ChunkIndex], ChunkIndex); - } - - std::vector<size_t> BlockSizes(KnownBlocks.size(), 0); - std::vector<size_t> BlockUseSize(KnownBlocks.size(), 0); - - std::vector<size_t> ReuseBlockIndexes; - - for (size_t KnownBlockIndex = 0; KnownBlockIndex < KnownBlocks.size(); KnownBlockIndex++) - { - const ChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; - if (KnownBlock.BlockHash != IoHash::Zero && KnownBlock.ChunkRawHashes.size() == KnownBlock.ChunkCompressedLengths.size()) - { - size_t BlockAttachmentCount = KnownBlock.ChunkRawHashes.size(); - if (BlockAttachmentCount == 0) - { - continue; - } - size_t ReuseSize = 0; - size_t BlockSize = 0; - size_t FoundAttachmentCount = 0; - size_t BlockChunkCount = KnownBlock.ChunkRawHashes.size(); - for (size_t BlockChunkIndex = 0; BlockChunkIndex < BlockChunkCount; BlockChunkIndex++) - { - const IoHash& BlockChunkHash = KnownBlock.ChunkRawHashes[BlockChunkIndex]; - const uint32_t BlockChunkSize = KnownBlock.ChunkCompressedLengths[BlockChunkIndex]; - BlockSize += BlockChunkSize; - if (ChunkHashToChunkIndex.contains(BlockChunkHash)) - { - ReuseSize += BlockChunkSize; - FoundAttachmentCount++; - } - } - - size_t ReusePercent = (ReuseSize * 100) / BlockSize; - - if (ReusePercent >= m_Options.BlockReuseMinPercentLimit) - { - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Reusing block {}. {} attachments found, usage level: {}%", - KnownBlock.BlockHash, - FoundAttachmentCount, - ReusePercent); - } - ReuseBlockIndexes.push_back(KnownBlockIndex); - - BlockSizes[KnownBlockIndex] = BlockSize; - BlockUseSize[KnownBlockIndex] = ReuseSize; - } - else if (FoundAttachmentCount > 0) - { - // if (m_Options.IsVerbose) - //{ - // ZEN_OPERATION_LOG_INFO(m_LogOutput, "Skipping block {}. {} attachments found, usage level: {}%", - // KnownBlock.BlockHash, - // FoundAttachmentCount, ReusePercent); - //} - m_FindBlocksStats.RejectedBlockCount++; - m_FindBlocksStats.RejectedChunkCount += FoundAttachmentCount; - m_FindBlocksStats.RejectedByteCount += ReuseSize; - } - } - } - - if (!ReuseBlockIndexes.empty()) - { - std::sort(ReuseBlockIndexes.begin(), ReuseBlockIndexes.end(), [&](size_t Lhs, size_t Rhs) { - return BlockUseSize[Lhs] > BlockUseSize[Rhs]; - }); - - for (size_t KnownBlockIndex : ReuseBlockIndexes) - { - std::vector<uint32_t> FoundChunkIndexes; - size_t BlockSize = 0; - size_t AdjustedReuseSize = 0; - size_t AdjustedRawReuseSize = 0; - const ChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; - for (size_t BlockChunkIndex = 0; BlockChunkIndex < KnownBlock.ChunkRawHashes.size(); BlockChunkIndex++) - { - const IoHash& BlockChunkHash = KnownBlock.ChunkRawHashes[BlockChunkIndex]; - const uint32_t BlockChunkSize = KnownBlock.ChunkCompressedLengths[BlockChunkIndex]; - BlockSize += BlockChunkSize; - if (auto It = ChunkHashToChunkIndex.find(BlockChunkHash); It != ChunkHashToChunkIndex.end()) - { - const uint32_t ChunkIndex = It->second; - if (!ChunkFound[ChunkIndex]) - { - FoundChunkIndexes.push_back(ChunkIndex); - AdjustedReuseSize += KnownBlock.ChunkCompressedLengths[BlockChunkIndex]; - AdjustedRawReuseSize += KnownBlock.ChunkRawLengths[BlockChunkIndex]; - } - } - } - - size_t ReusePercent = (AdjustedReuseSize * 100) / BlockSize; - - if (ReusePercent >= m_Options.BlockReuseMinPercentLimit) - { - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Reusing block {}. {} attachments found, usage level: {}%", - KnownBlock.BlockHash, - FoundChunkIndexes.size(), - ReusePercent); - } - FilteredReuseBlockIndexes.push_back(KnownBlockIndex); - - for (uint32_t ChunkIndex : FoundChunkIndexes) - { - ChunkFound[ChunkIndex] = true; - } - m_FindBlocksStats.AcceptedChunkCount += FoundChunkIndexes.size(); - m_FindBlocksStats.AcceptedByteCount += AdjustedReuseSize; - m_FindBlocksStats.AcceptedRawByteCount += AdjustedRawReuseSize; - m_FindBlocksStats.AcceptedReduntantChunkCount += KnownBlock.ChunkRawHashes.size() - FoundChunkIndexes.size(); - m_FindBlocksStats.AcceptedReduntantByteCount += BlockSize - AdjustedReuseSize; - } - else - { - // if (m_Options.IsVerbose) - //{ - // ZEN_OPERATION_LOG_INFO(m_LogOutput, "Skipping block {}. filtered usage level: {}%", KnownBlock.BlockHash, - // ReusePercent); - //} - m_FindBlocksStats.RejectedBlockCount++; - m_FindBlocksStats.RejectedChunkCount += FoundChunkIndexes.size(); - m_FindBlocksStats.RejectedByteCount += AdjustedReuseSize; - } - } - } - } - OutUnusedChunkIndexes.reserve(ChunkIndexes.size() - m_FindBlocksStats.AcceptedChunkCount); - for (uint32_t ChunkIndex : ChunkIndexes) - { - if (!ChunkFound[ChunkIndex]) - { - OutUnusedChunkIndexes.push_back(ChunkIndex); - } - } - } - return FilteredReuseBlockIndexes; -} - void BuildsOperationUploadFolder::ArrangeChunksIntoBlocks(const ChunkedFolderContent& Content, const ChunkedContentLookup& Lookup, |