diff options
| author | Dan Engelbrecht <[email protected]> | 2025-11-04 11:00:58 +0100 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2025-11-04 11:00:58 +0100 |
| commit | 0a0b35f685caa02fa386b1105ba626c29084ff42 (patch) | |
| tree | 06a6a651c12747eee556920b91b8e798e9ff71dd /src | |
| parent | add `--verbose-http` option to builds commands (#630) (diff) | |
| download | zen-0a0b35f685caa02fa386b1105ba626c29084ff42.tar.xz zen-0a0b35f685caa02fa386b1105ba626c29084ff42.zip | |
refactor MergeChunkedFolderContents to not reallocate Result.ChunkedContent.ChunkOrders for each path added (#626)
Diffstat (limited to 'src')
| -rw-r--r-- | src/zenremotestore/chunking/chunkedcontent.cpp | 44 |
1 files changed, 35 insertions, 9 deletions
diff --git a/src/zenremotestore/chunking/chunkedcontent.cpp b/src/zenremotestore/chunking/chunkedcontent.cpp index ea67e3d94..5e6cb9ba5 100644 --- a/src/zenremotestore/chunking/chunkedcontent.cpp +++ b/src/zenremotestore/chunking/chunkedcontent.cpp @@ -33,7 +33,6 @@ namespace { { ZEN_ASSERT(ChunkHashes.size() == ChunkRawSizes.size()); InOutChunkedContent.ChunkCounts.push_back(gsl::narrow<uint32_t>(ChunkSequence.size())); - InOutChunkedContent.ChunkOrders.reserve(InOutChunkedContent.ChunkOrders.size() + ChunkSequence.size()); for (uint32_t ChunkedSequenceIndex : ChunkSequence) { @@ -122,6 +121,8 @@ namespace { { ChunkSizes.push_back(Source.Size); } + OutChunkedContent.ChunkedContent.ChunkOrders.reserve(OutChunkedContent.ChunkedContent.ChunkOrders.size() + + Chunked.Info.ChunkSequence.size()); AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, @@ -606,6 +607,19 @@ MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span<const Chu std::span<const ChunkedFolderContent> Overlays) { const ChunkedContentLookup OverlayLookup = BuildChunkedContentLookup(OverlayContent); tsl::robin_set<std::string> BaseOverlayPaths = BuildOverlayPaths(Overlays); + + struct SequenceToAdd + { + IoHash RawHash; + std::span<const uint32_t> OriginalChunkOrder; + }; + + std::vector<SequenceToAdd> SequencesToAdd; + + size_t ChunkCountsAdded = 0; + size_t ChunkOrdersAdded = 0; + + ChunkingStatistics Stats; for (uint32_t PathIndex = 0; PathIndex < OverlayContent.Paths.size(); PathIndex++) { std::string GenericPath = PathCompareString(OverlayContent.Paths[PathIndex]); @@ -626,24 +640,36 @@ MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span<const Chu { RawHashToSequenceRawHashIndex.insert( {RawHash, gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size())}); + const uint32_t SequenceRawHashIndex = OverlayLookup.RawHashToSequenceIndex.at(RawHash); const uint32_t OrderIndexOffset = OverlayLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex]; const uint32_t ChunkCount = OverlayContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; - ChunkingStatistics Stats; std::span<const uint32_t> OriginalChunkOrder = std::span<const uint32_t>(OverlayContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); - AddChunkSequence(Stats, - Result.ChunkedContent, - ChunkHashToChunkIndex, - RawHash, - OriginalChunkOrder, - OverlayContent.ChunkedContent.ChunkHashes, - OverlayContent.ChunkedContent.ChunkRawSizes); + + SequencesToAdd.push_back(SequenceToAdd{.RawHash = RawHash, .OriginalChunkOrder = OriginalChunkOrder}); + + ChunkCountsAdded++; + ChunkOrdersAdded += OriginalChunkOrder.size(); + Stats.UniqueSequencesFound++; } } } } + + Result.ChunkedContent.ChunkCounts.reserve(Result.ChunkedContent.ChunkCounts.size() + ChunkCountsAdded); + Result.ChunkedContent.ChunkOrders.reserve(Result.ChunkedContent.ChunkOrders.size() + ChunkOrdersAdded); + for (SequenceToAdd& NewSequence : SequencesToAdd) + { + AddChunkSequence(Stats, + Result.ChunkedContent, + ChunkHashToChunkIndex, + NewSequence.RawHash, + NewSequence.OriginalChunkOrder, + OverlayContent.ChunkedContent.ChunkHashes, + OverlayContent.ChunkedContent.ChunkRawSizes); + } }; tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> MergedChunkHashToChunkIndex; |