diff options
| author | Dan Engelbrecht <[email protected]> | 2025-11-24 10:06:52 +0100 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2025-11-24 10:06:52 +0100 |
| commit | 6dcdddbf733b0aa323ffb7ecbe56c04b15c6c16a (patch) | |
| tree | 78685156e98214e4e1125501a8c09cac37bc45f4 /src/zenremotestore/chunking/chunkedcontent.cpp | |
| parent | changelog (#661) (diff) | |
| download | zen-6dcdddbf733b0aa323ffb7ecbe56c04b15c6c16a.tar.xz zen-6dcdddbf733b0aa323ffb7ecbe56c04b15c6c16a.zip | |
update state when wildcard (#657)
* add --append option and improve state handling when using downloads for `zen builds download`
Diffstat (limited to 'src/zenremotestore/chunking/chunkedcontent.cpp')
| -rw-r--r-- | src/zenremotestore/chunking/chunkedcontent.cpp | 569 |
1 files changed, 430 insertions, 139 deletions
diff --git a/src/zenremotestore/chunking/chunkedcontent.cpp b/src/zenremotestore/chunking/chunkedcontent.cpp index ac979a64b..5f1876908 100644 --- a/src/zenremotestore/chunking/chunkedcontent.cpp +++ b/src/zenremotestore/chunking/chunkedcontent.cpp @@ -3,6 +3,7 @@ #include <zenremotestore/chunking/chunkedcontent.h> #include <zencore/compactbinaryutil.h> +#include <zencore/compositebuffer.h> #include <zencore/filesystem.h> #include <zencore/fmtutils.h> #include <zencore/logging.h> @@ -15,6 +16,12 @@ #include <zenremotestore/chunking/chunkingcontroller.h> #include <zenutil/wildcard.h> +#if ZEN_WITH_TESTS +# include <zencore/testing.h> +# include <zencore/testutils.h> +# include <numeric> +#endif // ZEN_WITH_TESTS + ZEN_THIRD_PARTY_INCLUDES_START #include <tsl/robin_set.h> #include <gsl/gsl-lite.hpp> @@ -170,6 +177,31 @@ namespace { std::string PathCompareString(const std::filesystem::path& Path) { return ToLower(Path.generic_string()); } + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> BuildHashLookup(std::span<const IoHash> Hashes) + { + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> Lookup; + Lookup.reserve(Hashes.size()); + for (uint32_t Index = 0; Index < Hashes.size(); Index++) + { + bool IsNew = Lookup.insert_or_assign(Hashes[Index], Index).second; + ZEN_ASSERT(IsNew); + } + return Lookup; + } + + std::vector<uint32_t> BuildChunkOrderOffset(std::span<const uint32_t> ChunkCounts) + { + std::vector<uint32_t> ChunkOffsets; + ChunkOffsets.reserve(ChunkCounts.size()); + uint32_t Offset = 0; + for (uint32_t SequenceIndex = 0; SequenceIndex < ChunkCounts.size(); SequenceIndex++) + { + ChunkOffsets.push_back(Offset); + Offset += ChunkCounts[SequenceIndex]; + } + return ChunkOffsets; + } + } // namespace std::string_view FolderContentSourcePlatformNames[(size_t)SourcePlatform::_Count] = {"Windows"sv, "Linux"sv, "MacOS"sv}; @@ -539,9 +571,9 @@ SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Content, CbW } ChunkedFolderContent -LoadChunkedFolderContentToCompactBinary(CbObjectView Input) +LoadChunkedFolderContentFromCompactBinary(CbObjectView Input) { - ZEN_TRACE_CPU("LoadChunkedFolderContentToCompactBinary"); + ZEN_TRACE_CPU("LoadChunkedFolderContentFromCompactBinary"); ChunkedFolderContent Content; Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform()); Content.Paths = compactbinary_helpers::ReadArray<std::filesystem::path>("paths"sv, Input); @@ -836,30 +868,135 @@ CompareChunkedContent(const ChunkedFolderContent& Lhs, const ChunkedFolderConten return true; }; -static tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> -BuildHashLookup(std::span<const IoHash> Hashes) +ChunkedFolderContent +ApplyChunkedContentOverlay(const ChunkedFolderContent& Base, + const ChunkedFolderContent& Overlay, + std::span<const std::string> OverlayIncludeWildcards, + std::span<const std::string> OverlayExcludeWildcards) { - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> Lookup; - Lookup.reserve(Hashes.size()); - for (uint32_t Index = 0; Index < Hashes.size(); Index++) + ChunkedFolderContent Result = {.Platform = Base.Platform}; + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> BaseSequenceHashToSequenceIndex = + BuildHashLookup(Base.ChunkedContent.SequenceRawHashes); + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> BaseChunkHashToChunkIndex = BuildHashLookup(Base.ChunkedContent.ChunkHashes); + std::vector<uint32_t> BaseSequenceChunkOrderOffset = BuildChunkOrderOffset(Base.ChunkedContent.ChunkCounts); + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> OverlaySequenceHashToSequenceIndex = + BuildHashLookup(Overlay.ChunkedContent.SequenceRawHashes); + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> OverlayChunkHashToChunkIndex = BuildHashLookup(Overlay.ChunkedContent.ChunkHashes); + std::vector<uint32_t> OverlaySequenceChunkOrderOffset = BuildChunkOrderOffset(Overlay.ChunkedContent.ChunkCounts); + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ResultSequenceHashToSequenceIndex; + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ResultChunkHashToChunkIndex; + + const size_t EstimatedPathCount = Max(Base.Paths.size(), Overlay.Paths.size()); + + Result.Attributes.reserve(EstimatedPathCount); + Result.Paths.reserve(EstimatedPathCount); + Result.RawSizes.reserve(EstimatedPathCount); + Result.RawHashes.reserve(EstimatedPathCount); + + const size_t EstimatedSequenceCount = + Max(Base.ChunkedContent.SequenceRawHashes.size(), Overlay.ChunkedContent.SequenceRawHashes.size()); + Result.ChunkedContent.SequenceRawHashes.reserve(EstimatedSequenceCount); + + const size_t EstimatedChunkCount = Max(Base.ChunkedContent.ChunkHashes.size(), Overlay.ChunkedContent.ChunkHashes.size()); + Result.ChunkedContent.ChunkHashes.reserve(EstimatedChunkCount); + Result.ChunkedContent.ChunkRawSizes.reserve(EstimatedChunkCount); + + const size_t EstimatedChunkOrderCount = Max(Base.ChunkedContent.ChunkOrders.size(), Overlay.ChunkedContent.ChunkOrders.size()); + Result.ChunkedContent.ChunkOrders.reserve(EstimatedChunkOrderCount); + + auto AddPath = [&Result, &ResultSequenceHashToSequenceIndex, &ResultChunkHashToChunkIndex]( + const ChunkedFolderContent& Source, + uint32_t SourcePathIndex, + const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& SourceSequenceHashToSequenceIndex, + const std::vector<uint32_t>& SourceSequenceChunkOrderOffset) { + Result.Attributes.push_back(Source.Attributes[SourcePathIndex]); + Result.Paths.push_back(Source.Paths[SourcePathIndex]); + Result.RawSizes.push_back(Source.RawSizes[SourcePathIndex]); + Result.RawHashes.push_back(Source.RawHashes[SourcePathIndex]); + if (Source.RawSizes[SourcePathIndex] > 0) + { + if (!ResultSequenceHashToSequenceIndex.contains(Source.RawHashes[SourcePathIndex])) + { + const uint32_t ResultSequenceIndex = gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size()); + ResultSequenceHashToSequenceIndex.insert_or_assign(Source.RawHashes[SourcePathIndex], ResultSequenceIndex); + Result.ChunkedContent.SequenceRawHashes.push_back(Source.RawHashes[SourcePathIndex]); + auto SourceSequenceIndexIt = SourceSequenceHashToSequenceIndex.find(Source.RawHashes[SourcePathIndex]); + ZEN_ASSERT(SourceSequenceIndexIt != SourceSequenceHashToSequenceIndex.end()); + const uint32_t SourceSequenceIndex = SourceSequenceIndexIt->second; + const uint32_t ChunkOrderOffset = SourceSequenceChunkOrderOffset[SourceSequenceIndex]; + const uint32_t ChunkCount = Source.ChunkedContent.ChunkCounts[SourceSequenceIndex]; + Result.ChunkedContent.ChunkCounts.push_back(ChunkCount); + + std::span<const uint32_t> SourceChunkIndexes = + std::span<const uint32_t>(Source.ChunkedContent.ChunkOrders).subspan(ChunkOrderOffset, ChunkCount); + for (uint32_t SourceChunkIndex : SourceChunkIndexes) + { + const IoHash& ChunkHash = Source.ChunkedContent.ChunkHashes[SourceChunkIndex]; + if (auto It = ResultChunkHashToChunkIndex.find(ChunkHash); It != ResultChunkHashToChunkIndex.end()) + { + const uint32_t ResultChunkIndex = It->second; + Result.ChunkedContent.ChunkOrders.push_back(ResultChunkIndex); + } + else + { + const uint32_t ResultChunkIndex = gsl::narrow<uint32_t>(Result.ChunkedContent.ChunkHashes.size()); + Result.ChunkedContent.ChunkHashes.push_back(ChunkHash); + Result.ChunkedContent.ChunkRawSizes.push_back(Source.ChunkedContent.ChunkRawSizes[SourceChunkIndex]); + Result.ChunkedContent.ChunkOrders.push_back(ResultChunkIndex); + ResultChunkHashToChunkIndex.insert_or_assign(ChunkHash, ResultChunkIndex); + } + } + } + } + }; + + if (OverlayIncludeWildcards.empty() && OverlayExcludeWildcards.empty()) { - Lookup.insert_or_assign(Hashes[Index], Index); - } - return Lookup; -} + tsl::robin_set<std::string> OverlayPaths; + OverlayPaths.reserve(Overlay.Paths.size()); + for (uint32_t OverlayPathIndex = 0; OverlayPathIndex < Overlay.Paths.size(); OverlayPathIndex++) + { + const std::string PathString = ToLower(Overlay.Paths[OverlayPathIndex].generic_string()); + OverlayPaths.insert(PathString); + } + for (uint32_t BasePathIndex = 0; BasePathIndex < Base.Paths.size(); BasePathIndex++) + { + const std::string PathString = ToLower(Base.Paths[BasePathIndex].generic_string()); + if (!OverlayPaths.contains(PathString)) + { + AddPath(Base, BasePathIndex, BaseSequenceHashToSequenceIndex, BaseSequenceChunkOrderOffset); + } + } -static std::vector<uint32_t> -BuildChunkOrderOffset(std::span<const uint32_t> ChunkCounts) -{ - std::vector<uint32_t> ChunkOffsets; - ChunkOffsets.reserve(ChunkCounts.size()); - uint32_t Offset = 0; - for (uint32_t SequenceIndex = 0; SequenceIndex < ChunkCounts.size(); SequenceIndex++) + for (uint32_t OverlayPathIndex = 0; OverlayPathIndex < Overlay.Paths.size(); OverlayPathIndex++) + { + AddPath(Overlay, OverlayPathIndex, OverlaySequenceHashToSequenceIndex, OverlaySequenceChunkOrderOffset); + } + } + else { - ChunkOffsets.push_back(Offset); - Offset += ChunkCounts[SequenceIndex]; + for (uint32_t BasePathIndex = 0; BasePathIndex < Base.Paths.size(); BasePathIndex++) + { + const std::string PathString = ToLower(Base.Paths[BasePathIndex].generic_string()); + if (!IncludePath(OverlayIncludeWildcards, OverlayExcludeWildcards, PathString, /*CaseSensitive*/ true)) + { + AddPath(Base, BasePathIndex, BaseSequenceHashToSequenceIndex, BaseSequenceChunkOrderOffset); + } + } + + for (uint32_t OverlayPathIndex = 0; OverlayPathIndex < Overlay.Paths.size(); OverlayPathIndex++) + { + const std::string PathString = ToLower(Overlay.Paths[OverlayPathIndex].generic_string()); + if (IncludePath(OverlayIncludeWildcards, OverlayExcludeWildcards, PathString, /*CaseSensitive*/ true)) + { + AddPath(Overlay, OverlayPathIndex, OverlaySequenceHashToSequenceIndex, OverlaySequenceChunkOrderOffset); + } + } } - return ChunkOffsets; + return Result; } ChunkedFolderContent @@ -1058,9 +1195,85 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) } void +CalculateLocalChunkOrders(const std::span<const uint32_t>& AbsoluteChunkOrders, + const std::span<const IoHash> LooseChunkHashes, + const std::span<const uint64_t> LooseChunkRawSizes, + const std::span<const ChunkBlockDescription>& BlockDescriptions, + std::vector<IoHash>& OutLocalChunkHashes, + std::vector<uint64_t>& OutLocalChunkRawSizes, + std::vector<uint32_t>& OutLocalChunkOrders, + bool DoExtraVerify) +{ + ZEN_TRACE_CPU("CalculateLocalChunkOrders"); + + std::vector<IoHash> AbsoluteChunkHashes; + std::vector<uint64_t> AbsoluteChunkRawSizes; + AbsoluteChunkHashes.insert(AbsoluteChunkHashes.end(), LooseChunkHashes.begin(), LooseChunkHashes.end()); + AbsoluteChunkRawSizes.insert(AbsoluteChunkRawSizes.end(), LooseChunkRawSizes.begin(), LooseChunkRawSizes.end()); + for (const ChunkBlockDescription& Block : BlockDescriptions) + { + AbsoluteChunkHashes.insert(AbsoluteChunkHashes.end(), Block.ChunkRawHashes.begin(), Block.ChunkRawHashes.end()); + AbsoluteChunkRawSizes.insert(AbsoluteChunkRawSizes.end(), Block.ChunkRawLengths.begin(), Block.ChunkRawLengths.end()); + } + OutLocalChunkHashes.reserve(AbsoluteChunkHashes.size()); + OutLocalChunkRawSizes.reserve(AbsoluteChunkRawSizes.size()); + OutLocalChunkOrders.reserve(AbsoluteChunkOrders.size()); + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex; + ChunkHashToChunkIndex.reserve(AbsoluteChunkHashes.size()); + + for (uint32_t AbsoluteChunkOrderIndex = 0; AbsoluteChunkOrderIndex < AbsoluteChunkOrders.size(); AbsoluteChunkOrderIndex++) + { + const uint32_t AbsoluteChunkIndex = AbsoluteChunkOrders[AbsoluteChunkOrderIndex]; + const IoHash& AbsoluteChunkHash = AbsoluteChunkHashes[AbsoluteChunkIndex]; + const uint64_t AbsoluteChunkRawSize = AbsoluteChunkRawSizes[AbsoluteChunkIndex]; + + if (auto It = ChunkHashToChunkIndex.find(AbsoluteChunkHash); It != ChunkHashToChunkIndex.end()) + { + const uint32_t LocalChunkIndex = It->second; + OutLocalChunkOrders.push_back(LocalChunkIndex); + } + else + { + uint32_t LocalChunkIndex = gsl::narrow<uint32_t>(OutLocalChunkHashes.size()); + OutLocalChunkHashes.push_back(AbsoluteChunkHash); + OutLocalChunkRawSizes.push_back(AbsoluteChunkRawSize); + OutLocalChunkOrders.push_back(LocalChunkIndex); + ChunkHashToChunkIndex.insert_or_assign(AbsoluteChunkHash, LocalChunkIndex); + } + if (DoExtraVerify) + { + const uint32_t LocalChunkIndex = OutLocalChunkOrders[AbsoluteChunkOrderIndex]; + const IoHash& LocalChunkHash = OutLocalChunkHashes[LocalChunkIndex]; + const uint64_t& LocalChunkRawSize = OutLocalChunkRawSizes[LocalChunkIndex]; + ZEN_ASSERT(LocalChunkHash == AbsoluteChunkHash); + ZEN_ASSERT(LocalChunkRawSize == AbsoluteChunkRawSize); + } + } + if (DoExtraVerify) + { + for (uint32_t OrderIndex = 0; OrderIndex < OutLocalChunkOrders.size(); OrderIndex++) + { + uint32_t LocalChunkIndex = OutLocalChunkOrders[OrderIndex]; + const IoHash LocalChunkHash = OutLocalChunkHashes[LocalChunkIndex]; + uint64_t LocalChunkRawSize = OutLocalChunkRawSizes[LocalChunkIndex]; + + uint32_t VerifyChunkIndex = AbsoluteChunkOrders[OrderIndex]; + const IoHash VerifyChunkHash = AbsoluteChunkHashes[VerifyChunkIndex]; + uint64_t VerifyChunkRawSize = AbsoluteChunkRawSizes[VerifyChunkIndex]; + + ZEN_ASSERT(LocalChunkHash == VerifyChunkHash); + ZEN_ASSERT(LocalChunkRawSize == VerifyChunkRawSize); + } + } +} + +void ValidateChunkedFolderContent(const ChunkedFolderContent& Content, std::span<const ChunkBlockDescription> BlockDescriptions, - std::span<const IoHash> LooseChunks) + std::span<const IoHash> LooseChunks, + std::span<const std::string> IncludeWildcards, + std::span<const std::string> ExcludeWildcards) { size_t TotalKnownChunkCount = LooseChunks.size(); for (const ChunkBlockDescription& BlockDescription : BlockDescriptions) @@ -1101,6 +1314,8 @@ ValidateChunkedFolderContent(const ChunkedFolderContent& Content, std::span<const uint32_t> ChunkIndexes = std::span<const uint32_t>(Content.ChunkedContent.ChunkOrders).subspan(ChunkOrderOffset, ChunkCount); + bool VerifyIfChunkExists = IncludePath(IncludeWildcards, ExcludeWildcards, ToLower(Path.generic_string()), true); + IoHashStream Hasher; uint64_t SizeSum = 0; for (uint32_t ChunkIndex : ChunkIndexes) @@ -1116,9 +1331,13 @@ ValidateChunkedFolderContent(const ChunkedFolderContent& Content, { throw std::runtime_error("Chunked folder content contains zero chunk hash"); } - if (!KnownChunks.contains(ChunkRawHash)) + + if (VerifyIfChunkExists) { - throw std::runtime_error(fmt::format("Chunked folder content references an unknown chunk '{}'", ChunkRawHash)); + if (!KnownChunks.contains(ChunkRawHash)) + { + throw std::runtime_error(fmt::format("Chunked folder content references an unknown chunk '{}'", ChunkRawHash)); + } } SizeSum += ChunkSize; } @@ -1186,35 +1405,41 @@ chunkedcontent_forcelink() { } -namespace chunked_test_utils { - struct ChunkedFile +namespace chunkedcontent_testutils { + ChunkedFile CreateChunkedFile(FastRandom& Random, const size_t FinalSize, size_t ChunkingSize, const IoBuffer& LastUsedBuffer) { - IoHash RawHash; - std::vector<IoHash> ChunkHashes; - std::vector<uint64_t> ChunkSizes; - std::vector<IoBuffer> Chunks; - }; - - ChunkedFile CreateChunkedFile(FastRandom& Random, size_t Size, size_t ChunkingSize) - { - size_t ChunkCount = (Size + (ChunkingSize - 1)) / ChunkingSize; + size_t ChunkCount = (FinalSize + (ChunkingSize - 1)) / ChunkingSize; std::vector<IoHash> ChunkHashes; std::vector<uint64_t> ChunkSizes; std::vector<IoBuffer> Chunks; ChunkHashes.reserve(ChunkCount); ChunkSizes.reserve(ChunkCount); + size_t SizeLeft = FinalSize; IoHashStream HashStream; - while (Size > 0) + while (SizeLeft > 0) { - size_t ChunkSize = Min(Size, ChunkingSize); - IoBuffer ChunkBuffer = CreateRandomBlob(Random, ChunkSize); + size_t ChunkSize = Min(SizeLeft, ChunkingSize); + IoBuffer ChunkBuffer; + if (LastUsedBuffer && FinalSize == SizeLeft) + { + ChunkSize = Min(ChunkSize, LastUsedBuffer.GetSize()); + ChunkBuffer = IoBuffer(LastUsedBuffer, 0, ChunkSize); + } + else + { + ChunkBuffer = CreateRandomBlob(Random, ChunkSize); + } HashStream.Append(ChunkBuffer); ChunkHashes.push_back(IoHash::HashBuffer(ChunkBuffer)); ChunkSizes.push_back(ChunkSize); Chunks.emplace_back(std::move(ChunkBuffer)); - Size -= ChunkSize; + SizeLeft -= ChunkSize; } + ZEN_ASSERT(std::accumulate(ChunkSizes.begin(), ChunkSizes.end(), uint64_t(0)) == FinalSize); + ZEN_ASSERT(std::accumulate(Chunks.begin(), Chunks.end(), uint64_t(0), [](uint64_t Current, const IoBuffer& B) { + return Current + B.GetSize(); + }) == FinalSize); return ChunkedFile{.RawHash = HashStream.GetHash(), .ChunkHashes = std::move(ChunkHashes), .ChunkSizes = std::move(ChunkSizes), @@ -1233,6 +1458,8 @@ namespace chunked_test_utils { Result.RawHashes.reserve(PathAndSizes.size()); ChunkPayloads.reserve(PathAndSizes.size()); + IoBuffer LastChunkGenerated; + tsl::robin_map<IoHash, uint32_t> SequenceToIndex; tsl::robin_map<IoHash, uint32_t> ChunkToIndex; for (size_t PathIndex = 0; PathIndex < PathAndSizes.size(); PathIndex++) @@ -1246,7 +1473,8 @@ namespace chunked_test_utils { if (Size > 0) { - ChunkedFile File = CreateChunkedFile(Random, Size, ChunkingSize); + ChunkedFile File = CreateChunkedFile(Random, Size, ChunkingSize, LastChunkGenerated); + LastChunkGenerated = File.Chunks.back(); Result.RawHashes.push_back(File.RawHash); if (auto SequenceIt = SequenceToIndex.find(File.RawHash); SequenceIt == SequenceToIndex.end()) @@ -1259,12 +1487,13 @@ namespace chunked_test_utils { const IoHash& ChunkHash = File.ChunkHashes[ChunkIndex]; if (auto ChunkIt = ChunkToIndex.find(ChunkHash); ChunkIt == ChunkToIndex.end()) { - const uint32_t ChunkedContentChunkIndex = gsl::narrow<uint32_t>(Result.ChunkedContent.ChunkOrders.size()); + const uint32_t ChunkedContentChunkIndex = gsl::narrow<uint32_t>(Result.ChunkedContent.ChunkHashes.size()); Result.ChunkedContent.ChunkOrders.push_back(gsl::narrow<uint32_t>(ChunkedContentChunkIndex)); Result.ChunkedContent.ChunkHashes.push_back(ChunkHash); Result.ChunkedContent.ChunkRawSizes.push_back(File.ChunkSizes[ChunkIndex]); ChunkPayloads.push_back(std::move(File.Chunks[ChunkIndex])); + ChunkToIndex.insert_or_assign(ChunkHash, ChunkedContentChunkIndex); } else { @@ -1281,67 +1510,7 @@ namespace chunked_test_utils { } return Result; } -# if 0 - void ValidateChunkedFolderContent(const ChunkedFolderContent& Content, std::span<const IoBuffer> Chunks) - { - std::vector<uint32_t> ChunkOrderOffsets = BuildChunkOrderOffset(Content.ChunkedContent.ChunkCounts); - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> SequenceIndexLookup = BuildHashLookup(Content.ChunkedContent.SequenceRawHashes); - std::vector<size_t> SequenceUseCount(Content.ChunkedContent.SequenceRawHashes.size(), 0); - std::vector<size_t> ChunkUseCount(Content.ChunkedContent.ChunkHashes.size(), 0); - for (size_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++) - { - const std::filesystem::path& Path = Content.Paths[PathIndex]; - ZEN_ASSERT(!Path.empty()); - const uint64_t RawSize = Content.RawSizes[PathIndex]; - const IoHash RawHash = Content.RawHashes[PathIndex]; - if (RawSize > 0) - { - if (auto It = SequenceIndexLookup.find(RawHash); It != SequenceIndexLookup.end()) - { - const uint32_t SourceSequenceIndex = It->second; - SequenceUseCount[SourceSequenceIndex]++; - const uint32_t ChunkOrderOffset = ChunkOrderOffsets[SourceSequenceIndex]; - const uint32_t ChunkCount = Content.ChunkedContent.ChunkCounts[SourceSequenceIndex]; - - std::span<const uint32_t> ChunkIndexes = - std::span<const uint32_t>(Content.ChunkedContent.ChunkOrders).subspan(ChunkOrderOffset, ChunkCount); - - IoHashStream Hasher; - uint64_t SizeSum = 0; - for (uint32_t ChunkIndex : ChunkIndexes) - { - ChunkUseCount[ChunkIndex]++; - const IoBuffer& ChunkBuffer = Chunks[ChunkIndex]; - const uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; - const IoHash& ChunkRawHash = Content.ChunkedContent.ChunkHashes[ChunkIndex]; - SizeSum += ChunkSize; - CHECK_EQ(ChunkRawHash, IoHash::HashBuffer(ChunkBuffer)); - Hasher.Append(ChunkBuffer); - } - CHECK_EQ(RawHash, Hasher.GetHash()); - CHECK_EQ(SizeSum, RawSize); - } - else - { - CHECK(false); - } - } - else - { - CHECK(RawHash == IoHash::Zero); - } - } - for (uint32_t SequenceIndex = 0; SequenceIndex < SequenceUseCount.size(); SequenceIndex++) - { - CHECK(SequenceUseCount[SequenceIndex] > 0); - } - for (uint32_t ChunkIndex = 0; ChunkIndex < ChunkUseCount.size(); ChunkIndex++) - { - CHECK(ChunkUseCount[ChunkIndex] > 0); - } - } -# endif // 0 std::vector<IoBuffer> GetChunkPayloads(std::span<const IoHash> BaseHashes, std::span<const IoBuffer> BaseChunks, std::span<const IoHash> OverlayHashes, @@ -1382,41 +1551,9 @@ namespace chunked_test_utils { return Result; } - bool IncludePath(std::span<const std::string> IncludeWildcards, - std::span<const std::string> ExcludeWildcards, - const std::filesystem::path& Path) - { - const std::string PathString = Path.generic_string(); - bool IncludePath = true; - if (!IncludeWildcards.empty()) - { - IncludePath = false; - for (const std::string& IncludeWildcard : IncludeWildcards) - { - if (MatchWildcard(IncludeWildcard, PathString, /*CaseSensitive*/ false)) - { - IncludePath = true; - break; - } - } - if (!IncludePath) - { - return false; - } - } - for (const std::string& ExcludeWildcard : ExcludeWildcards) - { - if (MatchWildcard(ExcludeWildcard, PathString, /*CaseSensitive*/ false)) - { - return false; - } - } - return true; - } - -} // namespace chunked_test_utils +} // namespace chunkedcontent_testutils -TEST_CASE("DeletePathsFromContent") +TEST_CASE("chunkedcontent.DeletePathsFromContent") { FastRandom BaseRandom; @@ -1448,8 +1585,8 @@ TEST_CASE("DeletePathsFromContent") {BasePaths[9], BaseSizes[9]}, {BasePaths[10], BaseSizes[10]}}; - ChunkedFolderContent Base = chunked_test_utils::CreateChunkedFolderContent(BaseRandom, BasePathAndSizes, 4u * 1024u, BaseChunks); - ValidateChunkedFolderContent(Base, {}, Base.ChunkedContent.ChunkHashes); + ChunkedFolderContent Base = chunkedcontent_testutils::CreateChunkedFolderContent(BaseRandom, BasePathAndSizes, 4u * 1024u, BaseChunks); + ValidateChunkedFolderContent(Base, {}, Base.ChunkedContent.ChunkHashes, {}, {}); tsl::robin_map<IoHash, size_t, IoHash::Hasher> BaseChunksLookup; for (size_t Index = 0; Index < BaseChunks.size(); Index++) @@ -1463,7 +1600,7 @@ TEST_CASE("DeletePathsFromContent") std::vector<std::filesystem::path> DeletedPaths; for (const std::filesystem::path& RemotePath : Base.Paths) { - if (!chunked_test_utils::IncludePath(IncludeWildcards, ExcludeWildcards, RemotePath)) + if (!IncludePath(IncludeWildcards, ExcludeWildcards, ToLower(RemotePath.generic_string()), true)) { DeletedPaths.push_back(RemotePath); } @@ -1477,7 +1614,161 @@ TEST_CASE("DeletePathsFromContent") InlineRemoveUnusedHashes(FilteredChunks, FilteredContent.ChunkedContent.ChunkHashes); - ValidateChunkedFolderContent(FilteredContent, {}, FilteredChunks); + ValidateChunkedFolderContent(FilteredContent, {}, FilteredChunks, {}, {}); +} + +TEST_CASE("chunkedcontent.ApplyChunkedContentOverlay") +{ + FastRandom BaseRandom; + + std::vector<IoBuffer> BaseChunks; + + const std::string BasePaths[11] = {{"file_1"}, + {"file_2.exe"}, + {"file_3.txt"}, + {"dir_1/dir1_file_1.exe"}, + {"dir_1/dir1_file_2.pdb"}, + {"dir_1/dir1_file_3.txt"}, + {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"}, + {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"}, + {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"}, + {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"}, + {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}}; + const uint64_t BaseSizes[11] = + {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u}; + + std::pair<const std::string, uint64_t> BasePathAndSizes[11] = {{BasePaths[0], BaseSizes[0]}, + {BasePaths[1], BaseSizes[1]}, + {BasePaths[2], BaseSizes[2]}, + {BasePaths[3], BaseSizes[3]}, + {BasePaths[4], BaseSizes[4]}, + {BasePaths[5], BaseSizes[5]}, + {BasePaths[6], BaseSizes[6]}, + {BasePaths[7], BaseSizes[7]}, + {BasePaths[8], BaseSizes[8]}, + {BasePaths[9], BaseSizes[9]}, + {BasePaths[10], BaseSizes[10]}}; + + const std::string OverlayPaths[6] = {{"file_1"}, + {"file_4"}, + {"dir_1/dir1_file_1.exe"}, + {"dir_1/dir1_file_2.pdb"}, + {"dir_2/dir2_dir1/dir2_dir1_file_1.self"}, + {"dir_2/dir2_dir1/dir2_dir1_file_2.sym"}}; + const uint64_t OverlaySizes[6] = {7u * 1024u, 1249, 17u * 1024u, 9u * 1024u, 0, 17u * 1024u}; + + std::pair<const std::string, uint64_t> OverlayPathAndSizes[6] = {{OverlayPaths[0], OverlaySizes[0]}, + {OverlayPaths[1], OverlaySizes[1]}, + {OverlayPaths[2], OverlaySizes[2]}, + {OverlayPaths[3], OverlaySizes[3]}, + {OverlayPaths[4], OverlaySizes[4]}, + {OverlayPaths[5], OverlaySizes[5]}}; + + ChunkedFolderContent Base = chunkedcontent_testutils::CreateChunkedFolderContent(BaseRandom, BasePathAndSizes, 4u * 1024u, BaseChunks); + ValidateChunkedFolderContent(Base, {}, Base.ChunkedContent.ChunkHashes, {}, {}); + tsl::robin_map<std::string, uint32_t> BasePathLookup = chunkedcontent_testutils::BuildPathLookup(Base.Paths); + + std::vector<IoBuffer> OverlayChunks; + ChunkedFolderContent Overlay = + chunkedcontent_testutils::CreateChunkedFolderContent(BaseRandom, OverlayPathAndSizes, 4u * 1024u, OverlayChunks); + ValidateChunkedFolderContent(Overlay, {}, Overlay.ChunkedContent.ChunkHashes, {}, {}); + + tsl::robin_map<std::string, uint32_t> OverlayPathLookup = chunkedcontent_testutils::BuildPathLookup(Overlay.Paths); + + auto PathMatchesBase = + [&](const std::string& Path, const ChunkedFolderContent& MergedContent, tsl::robin_map<std::string, uint32_t> MergedPathLookup) { + return MergedContent.RawHashes[MergedPathLookup.at(Path)] == Base.RawHashes[BasePathLookup.at(Path)]; + }; + + auto PathMatchesOverlay = + [&](const std::string& Path, const ChunkedFolderContent& MergedContent, tsl::robin_map<std::string, uint32_t> MergedPathLookup) { + return MergedContent.RawHashes[MergedPathLookup.at(Path)] == Overlay.RawHashes[OverlayPathLookup.at(Path)]; + }; + + { + ChunkedFolderContent AllMergedContent = ApplyChunkedContentOverlay(Base, Overlay, {}, {}); + CHECK_EQ(AllMergedContent.Paths.size(), 14); + + std::vector<IoBuffer> AllMergedChunks = chunkedcontent_testutils::GetChunkPayloads(Base.ChunkedContent.ChunkHashes, + BaseChunks, + Overlay.ChunkedContent.ChunkHashes, + OverlayChunks, + AllMergedContent.ChunkedContent.ChunkHashes); + ValidateChunkedFolderContent(AllMergedContent, {}, AllMergedContent.ChunkedContent.ChunkHashes, {}, {}); + + tsl::robin_map<std::string, uint32_t> AllMergedPathLookup = chunkedcontent_testutils::BuildPathLookup(AllMergedContent.Paths); + CHECK(PathMatchesBase("file_2.exe", AllMergedContent, AllMergedPathLookup)); + CHECK(PathMatchesBase("file_3.txt", AllMergedContent, AllMergedPathLookup)); + CHECK(PathMatchesBase("dir_1/dir1_file_3.txt", AllMergedContent, AllMergedPathLookup)); + CHECK(PathMatchesBase("dir_2/dir2_dir1/dir2_dir1_file_1.exe", AllMergedContent, AllMergedPathLookup)); + CHECK(PathMatchesBase("dir_2/dir2_dir1/dir2_dir1_file_2.pdb", AllMergedContent, AllMergedPathLookup)); + CHECK(PathMatchesBase("dir_2/dir2_dir1/dir2_dir1_file_3.dll", AllMergedContent, AllMergedPathLookup)); + CHECK(PathMatchesBase("dir_2/dir2_dir2/dir2_dir2_file_1.txt", AllMergedContent, AllMergedPathLookup)); + CHECK(PathMatchesBase("dir_2/dir2_dir2/dir2_dir2_file_2.json", AllMergedContent, AllMergedPathLookup)); + + CHECK(PathMatchesOverlay("file_1", AllMergedContent, AllMergedPathLookup)); + CHECK(PathMatchesOverlay("file_4", AllMergedContent, AllMergedPathLookup)); + CHECK(PathMatchesOverlay("dir_1/dir1_file_1.exe", AllMergedContent, AllMergedPathLookup)); + CHECK(PathMatchesOverlay("dir_1/dir1_file_2.pdb", AllMergedContent, AllMergedPathLookup)); + CHECK(PathMatchesOverlay("dir_2/dir2_dir1/dir2_dir1_file_1.self", AllMergedContent, AllMergedPathLookup)); + CHECK(PathMatchesOverlay("dir_2/dir2_dir1/dir2_dir1_file_2.sym", AllMergedContent, AllMergedPathLookup)); + } + + { + ChunkedFolderContent ReplaceExecutablesContent = + ApplyChunkedContentOverlay(Base, Overlay, std::vector<std::string>{"*.exe", "*.self"}, {}); + CHECK_EQ(ReplaceExecutablesContent.Paths.size(), 10); + + std::vector<IoBuffer> ReplaceExecutablesChunks = + chunkedcontent_testutils::GetChunkPayloads(Base.ChunkedContent.ChunkHashes, + BaseChunks, + Overlay.ChunkedContent.ChunkHashes, + OverlayChunks, + ReplaceExecutablesContent.ChunkedContent.ChunkHashes); + ValidateChunkedFolderContent(ReplaceExecutablesContent, {}, ReplaceExecutablesContent.ChunkedContent.ChunkHashes, {}, {}); + + tsl::robin_map<std::string, uint32_t> ReplaceExecutablesPathLookup = + chunkedcontent_testutils::BuildPathLookup(ReplaceExecutablesContent.Paths); + CHECK(PathMatchesBase("file_1", ReplaceExecutablesContent, ReplaceExecutablesPathLookup)); + CHECK(PathMatchesBase("file_3.txt", ReplaceExecutablesContent, ReplaceExecutablesPathLookup)); + CHECK(PathMatchesBase("dir_1/dir1_file_2.pdb", ReplaceExecutablesContent, ReplaceExecutablesPathLookup)); + CHECK(PathMatchesBase("dir_1/dir1_file_3.txt", ReplaceExecutablesContent, ReplaceExecutablesPathLookup)); + + CHECK(PathMatchesBase("dir_2/dir2_dir1/dir2_dir1_file_2.pdb", ReplaceExecutablesContent, ReplaceExecutablesPathLookup)); + CHECK(PathMatchesBase("dir_2/dir2_dir1/dir2_dir1_file_3.dll", ReplaceExecutablesContent, ReplaceExecutablesPathLookup)); + CHECK(PathMatchesBase("dir_2/dir2_dir2/dir2_dir2_file_1.txt", ReplaceExecutablesContent, ReplaceExecutablesPathLookup)); + CHECK(PathMatchesBase("dir_2/dir2_dir2/dir2_dir2_file_2.json", ReplaceExecutablesContent, ReplaceExecutablesPathLookup)); + + CHECK(PathMatchesOverlay("dir_1/dir1_file_1.exe", ReplaceExecutablesContent, ReplaceExecutablesPathLookup)); + CHECK(PathMatchesOverlay("dir_2/dir2_dir1/dir2_dir1_file_1.self", ReplaceExecutablesContent, ReplaceExecutablesPathLookup)); + } + + { + ChunkedFolderContent ReplaceDir1ExecutablesContent = ApplyChunkedContentOverlay(Base, + Overlay, + std::vector<std::string>{"dir_1/*.exe", "dir_2/*"}, + std::vector<std::string>{"dir_2/*.sym"}); + CHECK_EQ(ReplaceDir1ExecutablesContent.Paths.size(), 7); + + std::vector<IoBuffer> ReplaceDir1Chunks = + chunkedcontent_testutils::GetChunkPayloads(Base.ChunkedContent.ChunkHashes, + BaseChunks, + Overlay.ChunkedContent.ChunkHashes, + OverlayChunks, + ReplaceDir1ExecutablesContent.ChunkedContent.ChunkHashes); + ValidateChunkedFolderContent(ReplaceDir1ExecutablesContent, {}, ReplaceDir1ExecutablesContent.ChunkedContent.ChunkHashes, {}, {}); + + tsl::robin_map<std::string, uint32_t> ReplaceDir1ExecutablesPathLookup = + chunkedcontent_testutils::BuildPathLookup(ReplaceDir1ExecutablesContent.Paths); + + CHECK(PathMatchesBase("file_1", ReplaceDir1ExecutablesContent, ReplaceDir1ExecutablesPathLookup)); + CHECK(PathMatchesBase("file_2.exe", ReplaceDir1ExecutablesContent, ReplaceDir1ExecutablesPathLookup)); + CHECK(PathMatchesBase("file_3.txt", ReplaceDir1ExecutablesContent, ReplaceDir1ExecutablesPathLookup)); + CHECK(PathMatchesBase("dir_1/dir1_file_3.txt", ReplaceDir1ExecutablesContent, ReplaceDir1ExecutablesPathLookup)); + + CHECK(PathMatchesOverlay("dir_1/dir1_file_1.exe", ReplaceDir1ExecutablesContent, ReplaceDir1ExecutablesPathLookup)); + CHECK(PathMatchesOverlay("dir_2/dir2_dir1/dir2_dir1_file_1.self", ReplaceDir1ExecutablesContent, ReplaceDir1ExecutablesPathLookup)); + } } #endif // ZEN_WITH_TESTS |