From 0a0b35f685caa02fa386b1105ba626c29084ff42 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 4 Nov 2025 11:00:58 +0100 Subject: refactor MergeChunkedFolderContents to not reallocate Result.ChunkedContent.ChunkOrders for each path added (#626) --- src/zenremotestore/chunking/chunkedcontent.cpp | 44 ++++++++++++++++++++------ 1 file changed, 35 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/zenremotestore/chunking/chunkedcontent.cpp b/src/zenremotestore/chunking/chunkedcontent.cpp index ea67e3d94..5e6cb9ba5 100644 --- a/src/zenremotestore/chunking/chunkedcontent.cpp +++ b/src/zenremotestore/chunking/chunkedcontent.cpp @@ -33,7 +33,6 @@ namespace { { ZEN_ASSERT(ChunkHashes.size() == ChunkRawSizes.size()); InOutChunkedContent.ChunkCounts.push_back(gsl::narrow(ChunkSequence.size())); - InOutChunkedContent.ChunkOrders.reserve(InOutChunkedContent.ChunkOrders.size() + ChunkSequence.size()); for (uint32_t ChunkedSequenceIndex : ChunkSequence) { @@ -122,6 +121,8 @@ namespace { { ChunkSizes.push_back(Source.Size); } + OutChunkedContent.ChunkedContent.ChunkOrders.reserve(OutChunkedContent.ChunkedContent.ChunkOrders.size() + + Chunked.Info.ChunkSequence.size()); AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, @@ -606,6 +607,19 @@ MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span Overlays) { const ChunkedContentLookup OverlayLookup = BuildChunkedContentLookup(OverlayContent); tsl::robin_set BaseOverlayPaths = BuildOverlayPaths(Overlays); + + struct SequenceToAdd + { + IoHash RawHash; + std::span OriginalChunkOrder; + }; + + std::vector SequencesToAdd; + + size_t ChunkCountsAdded = 0; + size_t ChunkOrdersAdded = 0; + + ChunkingStatistics Stats; for (uint32_t PathIndex = 0; PathIndex < OverlayContent.Paths.size(); PathIndex++) { std::string GenericPath = PathCompareString(OverlayContent.Paths[PathIndex]); @@ -626,24 +640,36 @@ MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span(Result.ChunkedContent.SequenceRawHashes.size())}); + const uint32_t SequenceRawHashIndex = OverlayLookup.RawHashToSequenceIndex.at(RawHash); const uint32_t OrderIndexOffset = OverlayLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex]; const uint32_t ChunkCount = OverlayContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; - ChunkingStatistics Stats; std::span OriginalChunkOrder = std::span(OverlayContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); - AddChunkSequence(Stats, - Result.ChunkedContent, - ChunkHashToChunkIndex, - RawHash, - OriginalChunkOrder, - OverlayContent.ChunkedContent.ChunkHashes, - OverlayContent.ChunkedContent.ChunkRawSizes); + + SequencesToAdd.push_back(SequenceToAdd{.RawHash = RawHash, .OriginalChunkOrder = OriginalChunkOrder}); + + ChunkCountsAdded++; + ChunkOrdersAdded += OriginalChunkOrder.size(); + Stats.UniqueSequencesFound++; } } } } + + Result.ChunkedContent.ChunkCounts.reserve(Result.ChunkedContent.ChunkCounts.size() + ChunkCountsAdded); + Result.ChunkedContent.ChunkOrders.reserve(Result.ChunkedContent.ChunkOrders.size() + ChunkOrdersAdded); + for (SequenceToAdd& NewSequence : SequencesToAdd) + { + AddChunkSequence(Stats, + Result.ChunkedContent, + ChunkHashToChunkIndex, + NewSequence.RawHash, + NewSequence.OriginalChunkOrder, + OverlayContent.ChunkedContent.ChunkHashes, + OverlayContent.ChunkedContent.ChunkRawSizes); + } }; tsl::robin_map MergedChunkHashToChunkIndex; -- cgit v1.2.3