diff options
| author | Dan Engelbrecht <[email protected]> | 2025-03-03 17:53:11 +0100 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2025-03-03 17:53:11 +0100 |
| commit | 1270bfeffbc81b1e4940c5c454ee6acde43e696a (patch) | |
| tree | 9ff53df6b43f2806fb5701b4d10ad37696a1c203 /src/zenutil | |
| parent | builds download incremental (#290) (diff) | |
| download | zen-1270bfeffbc81b1e4940c5c454ee6acde43e696a.tar.xz zen-1270bfeffbc81b1e4940c5c454ee6acde43e696a.zip | |
refactor use chunk sequence download (#291)
* work on chunk sequences on download, not paths
* write chunksequences to .tmp file and move when complete
* cleanup
* Added on the fly validation `zen builds download` of files built from smaller chunks as each file is completed
Added `--verify` option to `zen builds upload` to verify all uploaded data once entire upload is complete
Added `--verify` option to `zen builds download` to verify all files in target folder once entire download is complete
Fixed/improved progress updated
Multithreaded part validation
* added rates to Write Chunks task
* b/s -> bits/s
* dont validate partial content as complete payload
* handle legacy c# builds
Diffstat (limited to 'src/zenutil')
| -rw-r--r-- | src/zenutil/chunkedcontent.cpp | 78 | ||||
| -rw-r--r-- | src/zenutil/include/zenutil/chunkedcontent.h | 53 |
2 files changed, 84 insertions, 47 deletions
diff --git a/src/zenutil/chunkedcontent.cpp b/src/zenutil/chunkedcontent.cpp index 6dc2a20d8..1552ea823 100644 --- a/src/zenutil/chunkedcontent.cpp +++ b/src/zenutil/chunkedcontent.cpp @@ -599,10 +599,10 @@ MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span<const Chu { RawHashToSequenceRawHashIndex.insert( {RawHash, gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size())}); - const uint32_t SequenceRawHashIndex = OverlayLookup.RawHashToSequenceRawHashIndex.at(RawHash); - const uint32_t OrderIndexOffset = OverlayLookup.SequenceRawHashIndexChunkOrderOffset[SequenceRawHashIndex]; - const uint32_t ChunkCount = OverlayContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; - ChunkingStatistics Stats; + const uint32_t SequenceRawHashIndex = OverlayLookup.RawHashToSequenceIndex.at(RawHash); + const uint32_t OrderIndexOffset = OverlayLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex]; + const uint32_t ChunkCount = OverlayContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; + ChunkingStatistics Stats; std::span<const uint32_t> OriginalChunkOrder = std::span<const uint32_t>(OverlayContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); AddCunkSequence(Stats, @@ -667,9 +667,9 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span { RawHashToSequenceRawHashIndex.insert( {RawHash, gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size())}); - const uint32_t SequenceRawHashIndex = BaseLookup.RawHashToSequenceRawHashIndex.at(RawHash); - const uint32_t OrderIndexOffset = BaseLookup.SequenceRawHashIndexChunkOrderOffset[SequenceRawHashIndex]; - const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; + const uint32_t SequenceRawHashIndex = BaseLookup.RawHashToSequenceIndex.at(RawHash); + const uint32_t OrderIndexOffset = BaseLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex]; + const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; ChunkingStatistics Stats; std::span<const uint32_t> OriginalChunkOrder = std::span<const uint32_t>(BaseContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); @@ -777,46 +777,40 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) { struct ChunkLocationReference { - uint32_t ChunkIndex; - ChunkedContentLookup::ChunkLocation Location; + uint32_t ChunkIndex; + ChunkedContentLookup::ChunkSequenceLocation Location; }; ChunkedContentLookup Result; { const uint32_t SequenceRawHashesCount = gsl::narrow<uint32_t>(Content.ChunkedContent.SequenceRawHashes.size()); - Result.RawHashToSequenceRawHashIndex.reserve(SequenceRawHashesCount); - Result.SequenceRawHashIndexChunkOrderOffset.reserve(SequenceRawHashesCount); + Result.RawHashToSequenceIndex.reserve(SequenceRawHashesCount); + Result.SequenceIndexChunkOrderOffset.reserve(SequenceRawHashesCount); uint32_t OrderOffset = 0; for (uint32_t SequenceRawHashIndex = 0; SequenceRawHashIndex < Content.ChunkedContent.SequenceRawHashes.size(); SequenceRawHashIndex++) { - Result.RawHashToSequenceRawHashIndex.insert( - {Content.ChunkedContent.SequenceRawHashes[SequenceRawHashIndex], SequenceRawHashIndex}); - Result.SequenceRawHashIndexChunkOrderOffset.push_back(OrderOffset); + Result.RawHashToSequenceIndex.insert({Content.ChunkedContent.SequenceRawHashes[SequenceRawHashIndex], SequenceRawHashIndex}); + Result.SequenceIndexChunkOrderOffset.push_back(OrderOffset); OrderOffset += Content.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; } } std::vector<ChunkLocationReference> Locations; Locations.reserve(Content.ChunkedContent.ChunkOrders.size()); - for (uint32_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++) + for (uint32_t SequenceIndex = 0; SequenceIndex < Content.ChunkedContent.SequenceRawHashes.size(); SequenceIndex++) { - if (Content.RawSizes[PathIndex] > 0) + const uint32_t OrderOffset = Result.SequenceIndexChunkOrderOffset[SequenceIndex]; + const uint32_t ChunkCount = Content.ChunkedContent.ChunkCounts[SequenceIndex]; + uint64_t LocationOffset = 0; + for (size_t OrderIndex = OrderOffset; OrderIndex < OrderOffset + ChunkCount; OrderIndex++) { - const IoHash& RawHash = Content.RawHashes[PathIndex]; - uint32_t SequenceRawHashIndex = Result.RawHashToSequenceRawHashIndex.at(RawHash); - const uint32_t OrderOffset = Result.SequenceRawHashIndexChunkOrderOffset[SequenceRawHashIndex]; - const uint32_t ChunkCount = Content.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; - uint64_t LocationOffset = 0; - for (size_t OrderIndex = OrderOffset; OrderIndex < OrderOffset + ChunkCount; OrderIndex++) - { - uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex]; + uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex]; - Locations.push_back(ChunkLocationReference{ChunkIndex, ChunkedContentLookup::ChunkLocation{PathIndex, LocationOffset}}); + Locations.push_back( + ChunkLocationReference{ChunkIndex, ChunkedContentLookup::ChunkSequenceLocation{SequenceIndex, LocationOffset}}); - LocationOffset += Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; - } - ZEN_ASSERT(LocationOffset == Content.RawSizes[PathIndex]); + LocationOffset += Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; } } @@ -829,18 +823,18 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) { return false; } - if (Lhs.Location.PathIndex < Rhs.Location.PathIndex) + if (Lhs.Location.SequenceIndex < Rhs.Location.SequenceIndex) { return true; } - if (Lhs.Location.PathIndex > Rhs.Location.PathIndex) + if (Lhs.Location.SequenceIndex > Rhs.Location.SequenceIndex) { return false; } return Lhs.Location.Offset < Rhs.Location.Offset; }); - Result.ChunkLocations.reserve(Locations.size()); + Result.ChunkSequenceLocations.reserve(Locations.size()); const uint32_t ChunkCount = gsl::narrow<uint32_t>(Content.ChunkedContent.ChunkHashes.size()); Result.ChunkHashToChunkIndex.reserve(ChunkCount); size_t RangeOffset = 0; @@ -850,14 +844,30 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) uint32_t Count = 0; while (Locations[RangeOffset + Count].ChunkIndex == ChunkIndex) { - Result.ChunkLocations.push_back(Locations[RangeOffset + Count].Location); + Result.ChunkSequenceLocations.push_back(Locations[RangeOffset + Count].Location); Count++; } - Result.ChunkLocationOffset.push_back(RangeOffset); - Result.ChunkLocationCounts.push_back(Count); + Result.ChunkSequenceLocationOffset.push_back(RangeOffset); + Result.ChunkSequenceLocationCounts.push_back(Count); RangeOffset += Count; } + Result.SequenceIndexFirstPathIndex.resize(Content.ChunkedContent.SequenceRawHashes.size(), (uint32_t)-1); + for (uint32_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++) + { + if (Content.RawSizes[PathIndex] > 0) + { + const IoHash& RawHash = Content.RawHashes[PathIndex]; + auto SequenceIndexIt = Result.RawHashToSequenceIndex.find(RawHash); + ZEN_ASSERT(SequenceIndexIt != Result.RawHashToSequenceIndex.end()); + const uint32_t SequenceIndex = SequenceIndexIt->second; + if (Result.SequenceIndexFirstPathIndex[SequenceIndex] == (uint32_t)-1) + { + Result.SequenceIndexFirstPathIndex[SequenceIndex] = PathIndex; + } + } + } + return Result; } diff --git a/src/zenutil/include/zenutil/chunkedcontent.h b/src/zenutil/include/zenutil/chunkedcontent.h index 15c687462..309341550 100644 --- a/src/zenutil/include/zenutil/chunkedcontent.h +++ b/src/zenutil/include/zenutil/chunkedcontent.h @@ -122,32 +122,59 @@ ChunkedFolderContent ChunkFolderContent(ChunkingStatistics& Stats, struct ChunkedContentLookup { - struct ChunkLocation + struct ChunkSequenceLocation { - uint32_t PathIndex; + uint32_t SequenceIndex; uint64_t Offset; }; tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex; - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceRawHashIndex; - std::vector<uint32_t> SequenceRawHashIndexChunkOrderOffset; - std::vector<ChunkLocation> ChunkLocations; - std::vector<size_t> ChunkLocationOffset; // ChunkLocations[ChunkLocationOffset[ChunkIndex]] -> start of sources for ChunkIndex - std::vector<uint32_t> ChunkLocationCounts; // ChunkLocationCounts[ChunkIndex] count of chunk locations for ChunkIndex + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceIndex; + std::vector<uint32_t> SequenceIndexChunkOrderOffset; + std::vector<ChunkSequenceLocation> ChunkSequenceLocations; + std::vector<size_t> + ChunkSequenceLocationOffset; // ChunkSequenceLocations[ChunkLocationOffset[ChunkIndex]] -> start of sources for ChunkIndex + std::vector<uint32_t> ChunkSequenceLocationCounts; // ChunkSequenceLocationCounts[ChunkIndex] count of chunk locations for ChunkIndex + std::vector<uint32_t> SequenceIndexFirstPathIndex; // SequenceIndexFirstPathIndex[SequenceIndex] -> first path index with that RawHash }; ChunkedContentLookup BuildChunkedContentLookup(const ChunkedFolderContent& Content); inline std::pair<size_t, uint32_t> -GetChunkLocationRange(const ChunkedContentLookup& Lookup, uint32_t ChunkIndex) +GetChunkSequenceLocationRange(const ChunkedContentLookup& Lookup, uint32_t ChunkIndex) { - return std::make_pair(Lookup.ChunkLocationOffset[ChunkIndex], Lookup.ChunkLocationCounts[ChunkIndex]); + return std::make_pair(Lookup.ChunkSequenceLocationOffset[ChunkIndex], Lookup.ChunkSequenceLocationCounts[ChunkIndex]); } -inline std::span<const ChunkedContentLookup::ChunkLocation> -GetChunkLocations(const ChunkedContentLookup& Lookup, uint32_t ChunkIndex) +inline std::span<const ChunkedContentLookup::ChunkSequenceLocation> +GetChunkSequenceLocations(const ChunkedContentLookup& Lookup, uint32_t ChunkIndex) { - std::pair<size_t, uint32_t> Range = GetChunkLocationRange(Lookup, ChunkIndex); - return std::span<const ChunkedContentLookup::ChunkLocation>(Lookup.ChunkLocations).subspan(Range.first, Range.second); + std::pair<size_t, uint32_t> Range = GetChunkSequenceLocationRange(Lookup, ChunkIndex); + return std::span<const ChunkedContentLookup::ChunkSequenceLocation>(Lookup.ChunkSequenceLocations).subspan(Range.first, Range.second); +} + +inline uint32_t +GetSequenceIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& RawHash) +{ + return Lookup.RawHashToSequenceIndex.at(RawHash); +} + +inline uint32_t +GetChunkIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& RawHash) +{ + return Lookup.RawHashToSequenceIndex.at(RawHash); +} + +inline uint32_t +GetFirstPathIndexForSeqeuenceIndex(const ChunkedContentLookup& Lookup, const uint32_t SequenceIndex) +{ + return Lookup.SequenceIndexFirstPathIndex[SequenceIndex]; +} + +inline uint32_t +GetFirstPathIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& RawHash) +{ + const uint32_t SequenceIndex = GetSequenceIndexForRawHash(Lookup, RawHash); + return GetFirstPathIndexForSeqeuenceIndex(Lookup, SequenceIndex); } namespace compactbinary_helpers { |