aboutsummaryrefslogtreecommitdiff
path: root/src/zenutil/chunkedcontent.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2025-06-04 08:59:44 +0200
committerGitHub Enterprise <[email protected]>2025-06-04 08:59:44 +0200
commit937510356143f83ecd15d0a9f58b611c7418ed61 (patch)
treed0540f89c30a46f1fd3a041a20d7bed417fcb877 /src/zenutil/chunkedcontent.cpp
parentfixed size chunking for encrypted files (#410) (diff)
downloadzen-937510356143f83ecd15d0a9f58b611c7418ed61.tar.xz
zen-937510356143f83ecd15d0a9f58b611c7418ed61.zip
faster scavenge (#417)
- Improvement: Multithreaded scavenge pass for zen builds download - Improvement: Optimized check for modified files when verifying state of scavenged paths
Diffstat (limited to 'src/zenutil/chunkedcontent.cpp')
-rw-r--r--src/zenutil/chunkedcontent.cpp91
1 files changed, 63 insertions, 28 deletions
diff --git a/src/zenutil/chunkedcontent.cpp b/src/zenutil/chunkedcontent.cpp
index 4bec4901a..c7532e098 100644
--- a/src/zenutil/chunkedcontent.cpp
+++ b/src/zenutil/chunkedcontent.cpp
@@ -662,7 +662,9 @@ MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span<const Chu
}
ChunkedFolderContent
-DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span<const std::filesystem::path> DeletedPaths)
+DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent,
+ const ChunkedContentLookup& BaseContentLookup,
+ std::span<const std::filesystem::path> DeletedPaths)
{
ZEN_TRACE_CPU("DeletePathsFromChunkedContent");
@@ -676,14 +678,18 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span
{
DeletedPathSet.insert(PathCompareString(DeletedPath));
}
- const ChunkedContentLookup BaseLookup = BuildChunkedContentLookup(BaseContent);
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex;
- const size_t ExpectedCount = BaseContent.Paths.size() - DeletedPaths.size();
- Result.Paths.reserve(ExpectedCount);
- Result.RawSizes.reserve(ExpectedCount);
- Result.Attributes.reserve(ExpectedCount);
- Result.RawHashes.reserve(ExpectedCount);
+ const size_t BaseChunkCount = BaseContent.ChunkedContent.ChunkHashes.size();
+ std::vector<uint32_t> NewChunkIndexes(BaseChunkCount, (uint32_t)-1);
+
+ const size_t ExpectedPathCount = BaseContent.Paths.size() - DeletedPaths.size();
+ Result.Paths.reserve(ExpectedPathCount);
+ Result.RawSizes.reserve(ExpectedPathCount);
+ Result.Attributes.reserve(ExpectedPathCount);
+ Result.RawHashes.reserve(ExpectedPathCount);
+
+ Result.ChunkedContent.ChunkHashes.reserve(BaseChunkCount);
+ Result.ChunkedContent.ChunkRawSizes.reserve(BaseChunkCount);
tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceRawHashIndex;
for (uint32_t PathIndex = 0; PathIndex < BaseContent.Paths.size(); PathIndex++)
@@ -703,20 +709,33 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span
{
RawHashToSequenceRawHashIndex.insert(
{RawHash, gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size())});
- const uint32_t SequenceRawHashIndex = BaseLookup.RawHashToSequenceIndex.at(RawHash);
- const uint32_t OrderIndexOffset = BaseLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex];
- const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex];
- ChunkingStatistics Stats;
+ const uint32_t SequenceRawHashIndex = BaseContentLookup.RawHashToSequenceIndex.at(RawHash);
+ const uint32_t OrderIndexOffset = BaseContentLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex];
+ const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex];
+
std::span<const uint32_t> OriginalChunkOrder =
std::span<const uint32_t>(BaseContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount);
- AddChunkSequence(Stats,
- Result.ChunkedContent,
- ChunkHashToChunkIndex,
- RawHash,
- OriginalChunkOrder,
- BaseContent.ChunkedContent.ChunkHashes,
- BaseContent.ChunkedContent.ChunkRawSizes);
- Stats.UniqueSequencesFound++;
+
+ Result.ChunkedContent.ChunkCounts.push_back(gsl::narrow<uint32_t>(OriginalChunkOrder.size()));
+
+ for (uint32_t OldChunkIndex : OriginalChunkOrder)
+ {
+ if (uint32_t FoundChunkIndex = NewChunkIndexes[OldChunkIndex]; FoundChunkIndex != (uint32_t)-1)
+ {
+ Result.ChunkedContent.ChunkOrders.push_back(FoundChunkIndex);
+ }
+ else
+ {
+ const uint32_t NewChunkIndex = gsl::narrow<uint32_t>(Result.ChunkedContent.ChunkHashes.size());
+ NewChunkIndexes[OldChunkIndex] = NewChunkIndex;
+ const IoHash& ChunkHash = BaseContent.ChunkedContent.ChunkHashes[OldChunkIndex];
+ const uint64_t OldChunkSize = BaseContent.ChunkedContent.ChunkRawSizes[OldChunkIndex];
+ Result.ChunkedContent.ChunkHashes.push_back(ChunkHash);
+ Result.ChunkedContent.ChunkRawSizes.push_back(OldChunkSize);
+ Result.ChunkedContent.ChunkOrders.push_back(NewChunkIndex);
+ }
+ }
+ Result.ChunkedContent.SequenceRawHashes.push_back(RawHash);
}
}
}
@@ -726,6 +745,19 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span
}
ChunkedFolderContent
+DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span<const std::filesystem::path> DeletedPaths)
+{
+ ZEN_TRACE_CPU("DeletePathsFromChunkedContent");
+ ZEN_ASSERT(DeletedPaths.size() <= BaseContent.Paths.size());
+ if (DeletedPaths.size() == BaseContent.Paths.size())
+ {
+ return {};
+ }
+ const ChunkedContentLookup BaseLookup = BuildChunkedContentLookup(BaseContent);
+ return DeletePathsFromChunkedContent(BaseContent, BaseLookup, DeletedPaths);
+}
+
+ChunkedFolderContent
ChunkFolderContent(ChunkingStatistics& Stats,
WorkerThreadPool& WorkerPool,
const std::filesystem::path& RootPath,
@@ -815,8 +847,9 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content)
struct ChunkLocationReference
{
- uint32_t ChunkIndex = (uint32_t)-1;
- ChunkedContentLookup::ChunkSequenceLocation Location;
+ uint32_t ChunkIndex = (uint32_t)-1;
+ uint32_t SequenceIndex = (uint32_t)-1;
+ uint64_t Offset = (uint64_t)-1;
};
ChunkedContentLookup Result;
@@ -845,8 +878,7 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content)
{
uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex];
- Locations.push_back(
- ChunkLocationReference{ChunkIndex, ChunkedContentLookup::ChunkSequenceLocation{SequenceIndex, LocationOffset}});
+ Locations.push_back(ChunkLocationReference{.ChunkIndex = ChunkIndex, .SequenceIndex = SequenceIndex, .Offset = LocationOffset});
LocationOffset += Content.ChunkedContent.ChunkRawSizes[ChunkIndex];
}
@@ -861,15 +893,15 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content)
{
return false;
}
- if (Lhs.Location.SequenceIndex < Rhs.Location.SequenceIndex)
+ if (Lhs.SequenceIndex < Rhs.SequenceIndex)
{
return true;
}
- if (Lhs.Location.SequenceIndex > Rhs.Location.SequenceIndex)
+ if (Lhs.SequenceIndex > Rhs.SequenceIndex)
{
return false;
}
- return Lhs.Location.Offset < Rhs.Location.Offset;
+ return Lhs.Offset < Rhs.Offset;
});
Result.ChunkSequenceLocations.reserve(Locations.size());
@@ -882,7 +914,10 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content)
uint32_t Count = 0;
while ((RangeOffset + Count < Locations.size()) && (Locations[RangeOffset + Count].ChunkIndex == ChunkIndex))
{
- Result.ChunkSequenceLocations.push_back(Locations[RangeOffset + Count].Location);
+ const ChunkLocationReference& LocationReference = Locations[RangeOffset + Count];
+ Result.ChunkSequenceLocations.push_back(
+ ChunkedContentLookup::ChunkSequenceLocation{.SequenceIndex = LocationReference.SequenceIndex,
+ .Offset = LocationReference.Offset});
Count++;
}
Result.ChunkSequenceLocationOffset.push_back(RangeOffset);