aboutsummaryrefslogtreecommitdiff
path: root/src/zenutil/chunkedcontent.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2025-03-03 17:53:11 +0100
committerGitHub Enterprise <[email protected]>2025-03-03 17:53:11 +0100
commit1270bfeffbc81b1e4940c5c454ee6acde43e696a (patch)
tree9ff53df6b43f2806fb5701b4d10ad37696a1c203 /src/zenutil/chunkedcontent.cpp
parentbuilds download incremental (#290) (diff)
downloadzen-1270bfeffbc81b1e4940c5c454ee6acde43e696a.tar.xz
zen-1270bfeffbc81b1e4940c5c454ee6acde43e696a.zip
refactor use chunk sequence download (#291)
* work on chunk sequences on download, not paths * write chunksequences to .tmp file and move when complete * cleanup * Added on the fly validation `zen builds download` of files built from smaller chunks as each file is completed Added `--verify` option to `zen builds upload` to verify all uploaded data once entire upload is complete Added `--verify` option to `zen builds download` to verify all files in target folder once entire download is complete Fixed/improved progress updated Multithreaded part validation * added rates to Write Chunks task * b/s -> bits/s * dont validate partial content as complete payload * handle legacy c# builds
Diffstat (limited to 'src/zenutil/chunkedcontent.cpp')
-rw-r--r--src/zenutil/chunkedcontent.cpp78
1 files changed, 44 insertions, 34 deletions
diff --git a/src/zenutil/chunkedcontent.cpp b/src/zenutil/chunkedcontent.cpp
index 6dc2a20d8..1552ea823 100644
--- a/src/zenutil/chunkedcontent.cpp
+++ b/src/zenutil/chunkedcontent.cpp
@@ -599,10 +599,10 @@ MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span<const Chu
{
RawHashToSequenceRawHashIndex.insert(
{RawHash, gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size())});
- const uint32_t SequenceRawHashIndex = OverlayLookup.RawHashToSequenceRawHashIndex.at(RawHash);
- const uint32_t OrderIndexOffset = OverlayLookup.SequenceRawHashIndexChunkOrderOffset[SequenceRawHashIndex];
- const uint32_t ChunkCount = OverlayContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex];
- ChunkingStatistics Stats;
+ const uint32_t SequenceRawHashIndex = OverlayLookup.RawHashToSequenceIndex.at(RawHash);
+ const uint32_t OrderIndexOffset = OverlayLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex];
+ const uint32_t ChunkCount = OverlayContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex];
+ ChunkingStatistics Stats;
std::span<const uint32_t> OriginalChunkOrder =
std::span<const uint32_t>(OverlayContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount);
AddCunkSequence(Stats,
@@ -667,9 +667,9 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span
{
RawHashToSequenceRawHashIndex.insert(
{RawHash, gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size())});
- const uint32_t SequenceRawHashIndex = BaseLookup.RawHashToSequenceRawHashIndex.at(RawHash);
- const uint32_t OrderIndexOffset = BaseLookup.SequenceRawHashIndexChunkOrderOffset[SequenceRawHashIndex];
- const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex];
+ const uint32_t SequenceRawHashIndex = BaseLookup.RawHashToSequenceIndex.at(RawHash);
+ const uint32_t OrderIndexOffset = BaseLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex];
+ const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex];
ChunkingStatistics Stats;
std::span<const uint32_t> OriginalChunkOrder =
std::span<const uint32_t>(BaseContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount);
@@ -777,46 +777,40 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content)
{
struct ChunkLocationReference
{
- uint32_t ChunkIndex;
- ChunkedContentLookup::ChunkLocation Location;
+ uint32_t ChunkIndex;
+ ChunkedContentLookup::ChunkSequenceLocation Location;
};
ChunkedContentLookup Result;
{
const uint32_t SequenceRawHashesCount = gsl::narrow<uint32_t>(Content.ChunkedContent.SequenceRawHashes.size());
- Result.RawHashToSequenceRawHashIndex.reserve(SequenceRawHashesCount);
- Result.SequenceRawHashIndexChunkOrderOffset.reserve(SequenceRawHashesCount);
+ Result.RawHashToSequenceIndex.reserve(SequenceRawHashesCount);
+ Result.SequenceIndexChunkOrderOffset.reserve(SequenceRawHashesCount);
uint32_t OrderOffset = 0;
for (uint32_t SequenceRawHashIndex = 0; SequenceRawHashIndex < Content.ChunkedContent.SequenceRawHashes.size();
SequenceRawHashIndex++)
{
- Result.RawHashToSequenceRawHashIndex.insert(
- {Content.ChunkedContent.SequenceRawHashes[SequenceRawHashIndex], SequenceRawHashIndex});
- Result.SequenceRawHashIndexChunkOrderOffset.push_back(OrderOffset);
+ Result.RawHashToSequenceIndex.insert({Content.ChunkedContent.SequenceRawHashes[SequenceRawHashIndex], SequenceRawHashIndex});
+ Result.SequenceIndexChunkOrderOffset.push_back(OrderOffset);
OrderOffset += Content.ChunkedContent.ChunkCounts[SequenceRawHashIndex];
}
}
std::vector<ChunkLocationReference> Locations;
Locations.reserve(Content.ChunkedContent.ChunkOrders.size());
- for (uint32_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++)
+ for (uint32_t SequenceIndex = 0; SequenceIndex < Content.ChunkedContent.SequenceRawHashes.size(); SequenceIndex++)
{
- if (Content.RawSizes[PathIndex] > 0)
+ const uint32_t OrderOffset = Result.SequenceIndexChunkOrderOffset[SequenceIndex];
+ const uint32_t ChunkCount = Content.ChunkedContent.ChunkCounts[SequenceIndex];
+ uint64_t LocationOffset = 0;
+ for (size_t OrderIndex = OrderOffset; OrderIndex < OrderOffset + ChunkCount; OrderIndex++)
{
- const IoHash& RawHash = Content.RawHashes[PathIndex];
- uint32_t SequenceRawHashIndex = Result.RawHashToSequenceRawHashIndex.at(RawHash);
- const uint32_t OrderOffset = Result.SequenceRawHashIndexChunkOrderOffset[SequenceRawHashIndex];
- const uint32_t ChunkCount = Content.ChunkedContent.ChunkCounts[SequenceRawHashIndex];
- uint64_t LocationOffset = 0;
- for (size_t OrderIndex = OrderOffset; OrderIndex < OrderOffset + ChunkCount; OrderIndex++)
- {
- uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex];
+ uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex];
- Locations.push_back(ChunkLocationReference{ChunkIndex, ChunkedContentLookup::ChunkLocation{PathIndex, LocationOffset}});
+ Locations.push_back(
+ ChunkLocationReference{ChunkIndex, ChunkedContentLookup::ChunkSequenceLocation{SequenceIndex, LocationOffset}});
- LocationOffset += Content.ChunkedContent.ChunkRawSizes[ChunkIndex];
- }
- ZEN_ASSERT(LocationOffset == Content.RawSizes[PathIndex]);
+ LocationOffset += Content.ChunkedContent.ChunkRawSizes[ChunkIndex];
}
}
@@ -829,18 +823,18 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content)
{
return false;
}
- if (Lhs.Location.PathIndex < Rhs.Location.PathIndex)
+ if (Lhs.Location.SequenceIndex < Rhs.Location.SequenceIndex)
{
return true;
}
- if (Lhs.Location.PathIndex > Rhs.Location.PathIndex)
+ if (Lhs.Location.SequenceIndex > Rhs.Location.SequenceIndex)
{
return false;
}
return Lhs.Location.Offset < Rhs.Location.Offset;
});
- Result.ChunkLocations.reserve(Locations.size());
+ Result.ChunkSequenceLocations.reserve(Locations.size());
const uint32_t ChunkCount = gsl::narrow<uint32_t>(Content.ChunkedContent.ChunkHashes.size());
Result.ChunkHashToChunkIndex.reserve(ChunkCount);
size_t RangeOffset = 0;
@@ -850,14 +844,30 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content)
uint32_t Count = 0;
while (Locations[RangeOffset + Count].ChunkIndex == ChunkIndex)
{
- Result.ChunkLocations.push_back(Locations[RangeOffset + Count].Location);
+ Result.ChunkSequenceLocations.push_back(Locations[RangeOffset + Count].Location);
Count++;
}
- Result.ChunkLocationOffset.push_back(RangeOffset);
- Result.ChunkLocationCounts.push_back(Count);
+ Result.ChunkSequenceLocationOffset.push_back(RangeOffset);
+ Result.ChunkSequenceLocationCounts.push_back(Count);
RangeOffset += Count;
}
+ Result.SequenceIndexFirstPathIndex.resize(Content.ChunkedContent.SequenceRawHashes.size(), (uint32_t)-1);
+ for (uint32_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++)
+ {
+ if (Content.RawSizes[PathIndex] > 0)
+ {
+ const IoHash& RawHash = Content.RawHashes[PathIndex];
+ auto SequenceIndexIt = Result.RawHashToSequenceIndex.find(RawHash);
+ ZEN_ASSERT(SequenceIndexIt != Result.RawHashToSequenceIndex.end());
+ const uint32_t SequenceIndex = SequenceIndexIt->second;
+ if (Result.SequenceIndexFirstPathIndex[SequenceIndex] == (uint32_t)-1)
+ {
+ Result.SequenceIndexFirstPathIndex[SequenceIndex] = PathIndex;
+ }
+ }
+ }
+
return Result;
}