aboutsummaryrefslogtreecommitdiff
path: root/src/zenremotestore/builds/buildstorageoperations.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2025-12-15 13:20:21 +0100
committerGitHub Enterprise <[email protected]>2025-12-15 13:20:21 +0100
commita715d3ab7701e6257730a73c62567052d21c9771 (patch)
tree1f6b1de9c7cf11ec1403187d77d74a3b1af52a39 /src/zenremotestore/builds/buildstorageoperations.cpp
parentshow download source data (#689) (diff)
downloadzen-a715d3ab7701e6257730a73c62567052d21c9771.tar.xz
zen-a715d3ab7701e6257730a73c62567052d21c9771.zip
oplog download size (#690)
- Bugfix: Upload of oplogs could reference multiple blocks for the same chunk causing redundant downloads of blocks - Improvement: Use the improved block reuse selection function from zen builds upload in zen oplog-export to reduce oplog download size
Diffstat (limited to 'src/zenremotestore/builds/buildstorageoperations.cpp')
-rw-r--r--src/zenremotestore/builds/buildstorageoperations.cpp207
1 files changed, 17 insertions, 190 deletions
diff --git a/src/zenremotestore/builds/buildstorageoperations.cpp b/src/zenremotestore/builds/buildstorageoperations.cpp
index b8dd18bb5..6c370f975 100644
--- a/src/zenremotestore/builds/buildstorageoperations.cpp
+++ b/src/zenremotestore/builds/buildstorageoperations.cpp
@@ -4998,7 +4998,11 @@ BuildsOperationUploadFolder::Execute()
}
else
{
- ReuseBlockIndexes = FindReuseBlocks(PrepBuildResult.KnownBlocks,
+ ReuseBlockIndexes = FindReuseBlocks(m_LogOutput,
+ m_Options.BlockReuseMinPercentLimit,
+ m_Options.IsVerbose,
+ m_ReuseBlocksStats,
+ PrepBuildResult.KnownBlocks,
LocalContent.ChunkedContent.ChunkHashes,
BlockChunkIndexes,
NewBlockChunkIndexes);
@@ -5027,13 +5031,13 @@ BuildsOperationUploadFolder::Execute()
const double AcceptedByteCountPercent =
m_FindBlocksStats.PotentialChunkByteCount > 0
- ? (100.0 * m_FindBlocksStats.AcceptedRawByteCount / m_FindBlocksStats.PotentialChunkByteCount)
+ ? (100.0 * m_ReuseBlocksStats.AcceptedRawByteCount / m_FindBlocksStats.PotentialChunkByteCount)
: 0.0;
const double AcceptedReduntantByteCountPercent =
- m_FindBlocksStats.AcceptedByteCount > 0
- ? (100.0 * m_FindBlocksStats.AcceptedReduntantByteCount) /
- (m_FindBlocksStats.AcceptedByteCount + m_FindBlocksStats.AcceptedReduntantByteCount)
+ m_ReuseBlocksStats.AcceptedByteCount > 0
+ ? (100.0 * m_ReuseBlocksStats.AcceptedReduntantByteCount) /
+ (m_ReuseBlocksStats.AcceptedByteCount + m_ReuseBlocksStats.AcceptedReduntantByteCount)
: 0.0;
if (!m_Options.IsQuiet)
{
@@ -5050,18 +5054,18 @@ BuildsOperationUploadFolder::Execute()
NiceBytes(m_FindBlocksStats.FoundBlockByteCount),
NiceTimeSpanMs(m_FindBlocksStats.FindBlockTimeMS),
- m_FindBlocksStats.AcceptedChunkCount,
- NiceBytes(m_FindBlocksStats.AcceptedRawByteCount),
+ m_ReuseBlocksStats.AcceptedChunkCount,
+ NiceBytes(m_ReuseBlocksStats.AcceptedRawByteCount),
m_FindBlocksStats.AcceptedBlockCount,
AcceptedByteCountPercent,
- m_FindBlocksStats.AcceptedReduntantChunkCount,
- NiceBytes(m_FindBlocksStats.AcceptedReduntantByteCount),
+ m_ReuseBlocksStats.AcceptedReduntantChunkCount,
+ NiceBytes(m_ReuseBlocksStats.AcceptedReduntantByteCount),
AcceptedReduntantByteCountPercent,
- m_FindBlocksStats.RejectedChunkCount,
- NiceBytes(m_FindBlocksStats.RejectedByteCount),
- m_FindBlocksStats.RejectedBlockCount,
+ m_ReuseBlocksStats.RejectedChunkCount,
+ NiceBytes(m_ReuseBlocksStats.RejectedByteCount),
+ m_ReuseBlocksStats.RejectedBlockCount,
m_FindBlocksStats.NewBlocksChunkCount,
NiceBytes(m_FindBlocksStats.NewBlocksChunkByteCount),
@@ -5497,7 +5501,7 @@ BuildsOperationUploadFolder::Execute()
{{"totalSize", double(m_LocalFolderScanStats.FoundFileByteCount.load())},
{"reusedRatio", AcceptedByteCountPercent / 100.0},
{"reusedBlockCount", double(m_FindBlocksStats.AcceptedBlockCount)},
- {"reusedBlockByteCount", double(m_FindBlocksStats.AcceptedRawByteCount)},
+ {"reusedBlockByteCount", double(m_ReuseBlocksStats.AcceptedRawByteCount)},
{"newBlockCount", double(m_FindBlocksStats.NewBlocksCount)},
{"newBlockByteCount", double(m_FindBlocksStats.NewBlocksChunkByteCount)},
{"uploadedCount", double(m_UploadStats.BlockCount.load() + m_UploadStats.ChunkCount.load())},
@@ -5589,183 +5593,6 @@ BuildsOperationUploadFolder::IsAcceptedFile(const std::string_view& RelativePath
return true;
}
-std::vector<size_t>
-BuildsOperationUploadFolder::FindReuseBlocks(const std::vector<ChunkBlockDescription>& KnownBlocks,
- std::span<const IoHash> ChunkHashes,
- std::span<const uint32_t> ChunkIndexes,
- std::vector<uint32_t>& OutUnusedChunkIndexes)
-{
- ZEN_TRACE_CPU("FindReuseBlocks");
-
- // Find all blocks with a usage level higher than MinPercentLimit
- // Pick out the blocks with usage higher or equal to MinPercentLimit
- // Sort them with highest size usage - most usage first
- // Make a list of all chunks and mark them as not found
- // For each block, recalculate the block has usage percent based on the chunks marked as not found
- // If the block still reaches MinPercentLimit, keep it and remove the matching chunks from the not found list
- // Repeat for following all remaining block that initially matched MinPercentLimit
-
- std::vector<size_t> FilteredReuseBlockIndexes;
-
- uint32_t ChunkCount = gsl::narrow<uint32_t>(ChunkHashes.size());
- std::vector<bool> ChunkFound(ChunkCount, false);
-
- if (ChunkCount > 0)
- {
- if (!KnownBlocks.empty())
- {
- Stopwatch ReuseTimer;
-
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex;
- ChunkHashToChunkIndex.reserve(ChunkIndexes.size());
- for (uint32_t ChunkIndex : ChunkIndexes)
- {
- ChunkHashToChunkIndex.insert_or_assign(ChunkHashes[ChunkIndex], ChunkIndex);
- }
-
- std::vector<size_t> BlockSizes(KnownBlocks.size(), 0);
- std::vector<size_t> BlockUseSize(KnownBlocks.size(), 0);
-
- std::vector<size_t> ReuseBlockIndexes;
-
- for (size_t KnownBlockIndex = 0; KnownBlockIndex < KnownBlocks.size(); KnownBlockIndex++)
- {
- const ChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex];
- if (KnownBlock.BlockHash != IoHash::Zero && KnownBlock.ChunkRawHashes.size() == KnownBlock.ChunkCompressedLengths.size())
- {
- size_t BlockAttachmentCount = KnownBlock.ChunkRawHashes.size();
- if (BlockAttachmentCount == 0)
- {
- continue;
- }
- size_t ReuseSize = 0;
- size_t BlockSize = 0;
- size_t FoundAttachmentCount = 0;
- size_t BlockChunkCount = KnownBlock.ChunkRawHashes.size();
- for (size_t BlockChunkIndex = 0; BlockChunkIndex < BlockChunkCount; BlockChunkIndex++)
- {
- const IoHash& BlockChunkHash = KnownBlock.ChunkRawHashes[BlockChunkIndex];
- const uint32_t BlockChunkSize = KnownBlock.ChunkCompressedLengths[BlockChunkIndex];
- BlockSize += BlockChunkSize;
- if (ChunkHashToChunkIndex.contains(BlockChunkHash))
- {
- ReuseSize += BlockChunkSize;
- FoundAttachmentCount++;
- }
- }
-
- size_t ReusePercent = (ReuseSize * 100) / BlockSize;
-
- if (ReusePercent >= m_Options.BlockReuseMinPercentLimit)
- {
- if (m_Options.IsVerbose)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Reusing block {}. {} attachments found, usage level: {}%",
- KnownBlock.BlockHash,
- FoundAttachmentCount,
- ReusePercent);
- }
- ReuseBlockIndexes.push_back(KnownBlockIndex);
-
- BlockSizes[KnownBlockIndex] = BlockSize;
- BlockUseSize[KnownBlockIndex] = ReuseSize;
- }
- else if (FoundAttachmentCount > 0)
- {
- // if (m_Options.IsVerbose)
- //{
- // ZEN_OPERATION_LOG_INFO(m_LogOutput, "Skipping block {}. {} attachments found, usage level: {}%",
- // KnownBlock.BlockHash,
- // FoundAttachmentCount, ReusePercent);
- //}
- m_FindBlocksStats.RejectedBlockCount++;
- m_FindBlocksStats.RejectedChunkCount += FoundAttachmentCount;
- m_FindBlocksStats.RejectedByteCount += ReuseSize;
- }
- }
- }
-
- if (!ReuseBlockIndexes.empty())
- {
- std::sort(ReuseBlockIndexes.begin(), ReuseBlockIndexes.end(), [&](size_t Lhs, size_t Rhs) {
- return BlockUseSize[Lhs] > BlockUseSize[Rhs];
- });
-
- for (size_t KnownBlockIndex : ReuseBlockIndexes)
- {
- std::vector<uint32_t> FoundChunkIndexes;
- size_t BlockSize = 0;
- size_t AdjustedReuseSize = 0;
- size_t AdjustedRawReuseSize = 0;
- const ChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex];
- for (size_t BlockChunkIndex = 0; BlockChunkIndex < KnownBlock.ChunkRawHashes.size(); BlockChunkIndex++)
- {
- const IoHash& BlockChunkHash = KnownBlock.ChunkRawHashes[BlockChunkIndex];
- const uint32_t BlockChunkSize = KnownBlock.ChunkCompressedLengths[BlockChunkIndex];
- BlockSize += BlockChunkSize;
- if (auto It = ChunkHashToChunkIndex.find(BlockChunkHash); It != ChunkHashToChunkIndex.end())
- {
- const uint32_t ChunkIndex = It->second;
- if (!ChunkFound[ChunkIndex])
- {
- FoundChunkIndexes.push_back(ChunkIndex);
- AdjustedReuseSize += KnownBlock.ChunkCompressedLengths[BlockChunkIndex];
- AdjustedRawReuseSize += KnownBlock.ChunkRawLengths[BlockChunkIndex];
- }
- }
- }
-
- size_t ReusePercent = (AdjustedReuseSize * 100) / BlockSize;
-
- if (ReusePercent >= m_Options.BlockReuseMinPercentLimit)
- {
- if (m_Options.IsVerbose)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Reusing block {}. {} attachments found, usage level: {}%",
- KnownBlock.BlockHash,
- FoundChunkIndexes.size(),
- ReusePercent);
- }
- FilteredReuseBlockIndexes.push_back(KnownBlockIndex);
-
- for (uint32_t ChunkIndex : FoundChunkIndexes)
- {
- ChunkFound[ChunkIndex] = true;
- }
- m_FindBlocksStats.AcceptedChunkCount += FoundChunkIndexes.size();
- m_FindBlocksStats.AcceptedByteCount += AdjustedReuseSize;
- m_FindBlocksStats.AcceptedRawByteCount += AdjustedRawReuseSize;
- m_FindBlocksStats.AcceptedReduntantChunkCount += KnownBlock.ChunkRawHashes.size() - FoundChunkIndexes.size();
- m_FindBlocksStats.AcceptedReduntantByteCount += BlockSize - AdjustedReuseSize;
- }
- else
- {
- // if (m_Options.IsVerbose)
- //{
- // ZEN_OPERATION_LOG_INFO(m_LogOutput, "Skipping block {}. filtered usage level: {}%", KnownBlock.BlockHash,
- // ReusePercent);
- //}
- m_FindBlocksStats.RejectedBlockCount++;
- m_FindBlocksStats.RejectedChunkCount += FoundChunkIndexes.size();
- m_FindBlocksStats.RejectedByteCount += AdjustedReuseSize;
- }
- }
- }
- }
- OutUnusedChunkIndexes.reserve(ChunkIndexes.size() - m_FindBlocksStats.AcceptedChunkCount);
- for (uint32_t ChunkIndex : ChunkIndexes)
- {
- if (!ChunkFound[ChunkIndex])
- {
- OutUnusedChunkIndexes.push_back(ChunkIndex);
- }
- }
- }
- return FilteredReuseBlockIndexes;
-}
-
void
BuildsOperationUploadFolder::ArrangeChunksIntoBlocks(const ChunkedFolderContent& Content,
const ChunkedContentLookup& Lookup,