aboutsummaryrefslogtreecommitdiff
path: root/src/zenremotestore/chunking/chunkedcontent.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2025-11-24 10:06:52 +0100
committerGitHub Enterprise <[email protected]>2025-11-24 10:06:52 +0100
commit6dcdddbf733b0aa323ffb7ecbe56c04b15c6c16a (patch)
tree78685156e98214e4e1125501a8c09cac37bc45f4 /src/zenremotestore/chunking/chunkedcontent.cpp
parentchangelog (#661) (diff)
downloadzen-6dcdddbf733b0aa323ffb7ecbe56c04b15c6c16a.tar.xz
zen-6dcdddbf733b0aa323ffb7ecbe56c04b15c6c16a.zip
update state when wildcard (#657)
* add --append option and improve state handling when using downloads for `zen builds download`
Diffstat (limited to 'src/zenremotestore/chunking/chunkedcontent.cpp')
-rw-r--r--src/zenremotestore/chunking/chunkedcontent.cpp569
1 files changed, 430 insertions, 139 deletions
diff --git a/src/zenremotestore/chunking/chunkedcontent.cpp b/src/zenremotestore/chunking/chunkedcontent.cpp
index ac979a64b..5f1876908 100644
--- a/src/zenremotestore/chunking/chunkedcontent.cpp
+++ b/src/zenremotestore/chunking/chunkedcontent.cpp
@@ -3,6 +3,7 @@
#include <zenremotestore/chunking/chunkedcontent.h>
#include <zencore/compactbinaryutil.h>
+#include <zencore/compositebuffer.h>
#include <zencore/filesystem.h>
#include <zencore/fmtutils.h>
#include <zencore/logging.h>
@@ -15,6 +16,12 @@
#include <zenremotestore/chunking/chunkingcontroller.h>
#include <zenutil/wildcard.h>
+#if ZEN_WITH_TESTS
+# include <zencore/testing.h>
+# include <zencore/testutils.h>
+# include <numeric>
+#endif // ZEN_WITH_TESTS
+
ZEN_THIRD_PARTY_INCLUDES_START
#include <tsl/robin_set.h>
#include <gsl/gsl-lite.hpp>
@@ -170,6 +177,31 @@ namespace {
std::string PathCompareString(const std::filesystem::path& Path) { return ToLower(Path.generic_string()); }
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> BuildHashLookup(std::span<const IoHash> Hashes)
+ {
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> Lookup;
+ Lookup.reserve(Hashes.size());
+ for (uint32_t Index = 0; Index < Hashes.size(); Index++)
+ {
+ bool IsNew = Lookup.insert_or_assign(Hashes[Index], Index).second;
+ ZEN_ASSERT(IsNew);
+ }
+ return Lookup;
+ }
+
+ std::vector<uint32_t> BuildChunkOrderOffset(std::span<const uint32_t> ChunkCounts)
+ {
+ std::vector<uint32_t> ChunkOffsets;
+ ChunkOffsets.reserve(ChunkCounts.size());
+ uint32_t Offset = 0;
+ for (uint32_t SequenceIndex = 0; SequenceIndex < ChunkCounts.size(); SequenceIndex++)
+ {
+ ChunkOffsets.push_back(Offset);
+ Offset += ChunkCounts[SequenceIndex];
+ }
+ return ChunkOffsets;
+ }
+
} // namespace
std::string_view FolderContentSourcePlatformNames[(size_t)SourcePlatform::_Count] = {"Windows"sv, "Linux"sv, "MacOS"sv};
@@ -539,9 +571,9 @@ SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Content, CbW
}
ChunkedFolderContent
-LoadChunkedFolderContentToCompactBinary(CbObjectView Input)
+LoadChunkedFolderContentFromCompactBinary(CbObjectView Input)
{
- ZEN_TRACE_CPU("LoadChunkedFolderContentToCompactBinary");
+ ZEN_TRACE_CPU("LoadChunkedFolderContentFromCompactBinary");
ChunkedFolderContent Content;
Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform());
Content.Paths = compactbinary_helpers::ReadArray<std::filesystem::path>("paths"sv, Input);
@@ -836,30 +868,135 @@ CompareChunkedContent(const ChunkedFolderContent& Lhs, const ChunkedFolderConten
return true;
};
-static tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>
-BuildHashLookup(std::span<const IoHash> Hashes)
+ChunkedFolderContent
+ApplyChunkedContentOverlay(const ChunkedFolderContent& Base,
+ const ChunkedFolderContent& Overlay,
+ std::span<const std::string> OverlayIncludeWildcards,
+ std::span<const std::string> OverlayExcludeWildcards)
{
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> Lookup;
- Lookup.reserve(Hashes.size());
- for (uint32_t Index = 0; Index < Hashes.size(); Index++)
+ ChunkedFolderContent Result = {.Platform = Base.Platform};
+
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> BaseSequenceHashToSequenceIndex =
+ BuildHashLookup(Base.ChunkedContent.SequenceRawHashes);
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> BaseChunkHashToChunkIndex = BuildHashLookup(Base.ChunkedContent.ChunkHashes);
+ std::vector<uint32_t> BaseSequenceChunkOrderOffset = BuildChunkOrderOffset(Base.ChunkedContent.ChunkCounts);
+
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> OverlaySequenceHashToSequenceIndex =
+ BuildHashLookup(Overlay.ChunkedContent.SequenceRawHashes);
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> OverlayChunkHashToChunkIndex = BuildHashLookup(Overlay.ChunkedContent.ChunkHashes);
+ std::vector<uint32_t> OverlaySequenceChunkOrderOffset = BuildChunkOrderOffset(Overlay.ChunkedContent.ChunkCounts);
+
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ResultSequenceHashToSequenceIndex;
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ResultChunkHashToChunkIndex;
+
+ const size_t EstimatedPathCount = Max(Base.Paths.size(), Overlay.Paths.size());
+
+ Result.Attributes.reserve(EstimatedPathCount);
+ Result.Paths.reserve(EstimatedPathCount);
+ Result.RawSizes.reserve(EstimatedPathCount);
+ Result.RawHashes.reserve(EstimatedPathCount);
+
+ const size_t EstimatedSequenceCount =
+ Max(Base.ChunkedContent.SequenceRawHashes.size(), Overlay.ChunkedContent.SequenceRawHashes.size());
+ Result.ChunkedContent.SequenceRawHashes.reserve(EstimatedSequenceCount);
+
+ const size_t EstimatedChunkCount = Max(Base.ChunkedContent.ChunkHashes.size(), Overlay.ChunkedContent.ChunkHashes.size());
+ Result.ChunkedContent.ChunkHashes.reserve(EstimatedChunkCount);
+ Result.ChunkedContent.ChunkRawSizes.reserve(EstimatedChunkCount);
+
+ const size_t EstimatedChunkOrderCount = Max(Base.ChunkedContent.ChunkOrders.size(), Overlay.ChunkedContent.ChunkOrders.size());
+ Result.ChunkedContent.ChunkOrders.reserve(EstimatedChunkOrderCount);
+
+ auto AddPath = [&Result, &ResultSequenceHashToSequenceIndex, &ResultChunkHashToChunkIndex](
+ const ChunkedFolderContent& Source,
+ uint32_t SourcePathIndex,
+ const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& SourceSequenceHashToSequenceIndex,
+ const std::vector<uint32_t>& SourceSequenceChunkOrderOffset) {
+ Result.Attributes.push_back(Source.Attributes[SourcePathIndex]);
+ Result.Paths.push_back(Source.Paths[SourcePathIndex]);
+ Result.RawSizes.push_back(Source.RawSizes[SourcePathIndex]);
+ Result.RawHashes.push_back(Source.RawHashes[SourcePathIndex]);
+ if (Source.RawSizes[SourcePathIndex] > 0)
+ {
+ if (!ResultSequenceHashToSequenceIndex.contains(Source.RawHashes[SourcePathIndex]))
+ {
+ const uint32_t ResultSequenceIndex = gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size());
+ ResultSequenceHashToSequenceIndex.insert_or_assign(Source.RawHashes[SourcePathIndex], ResultSequenceIndex);
+ Result.ChunkedContent.SequenceRawHashes.push_back(Source.RawHashes[SourcePathIndex]);
+ auto SourceSequenceIndexIt = SourceSequenceHashToSequenceIndex.find(Source.RawHashes[SourcePathIndex]);
+ ZEN_ASSERT(SourceSequenceIndexIt != SourceSequenceHashToSequenceIndex.end());
+ const uint32_t SourceSequenceIndex = SourceSequenceIndexIt->second;
+ const uint32_t ChunkOrderOffset = SourceSequenceChunkOrderOffset[SourceSequenceIndex];
+ const uint32_t ChunkCount = Source.ChunkedContent.ChunkCounts[SourceSequenceIndex];
+ Result.ChunkedContent.ChunkCounts.push_back(ChunkCount);
+
+ std::span<const uint32_t> SourceChunkIndexes =
+ std::span<const uint32_t>(Source.ChunkedContent.ChunkOrders).subspan(ChunkOrderOffset, ChunkCount);
+ for (uint32_t SourceChunkIndex : SourceChunkIndexes)
+ {
+ const IoHash& ChunkHash = Source.ChunkedContent.ChunkHashes[SourceChunkIndex];
+ if (auto It = ResultChunkHashToChunkIndex.find(ChunkHash); It != ResultChunkHashToChunkIndex.end())
+ {
+ const uint32_t ResultChunkIndex = It->second;
+ Result.ChunkedContent.ChunkOrders.push_back(ResultChunkIndex);
+ }
+ else
+ {
+ const uint32_t ResultChunkIndex = gsl::narrow<uint32_t>(Result.ChunkedContent.ChunkHashes.size());
+ Result.ChunkedContent.ChunkHashes.push_back(ChunkHash);
+ Result.ChunkedContent.ChunkRawSizes.push_back(Source.ChunkedContent.ChunkRawSizes[SourceChunkIndex]);
+ Result.ChunkedContent.ChunkOrders.push_back(ResultChunkIndex);
+ ResultChunkHashToChunkIndex.insert_or_assign(ChunkHash, ResultChunkIndex);
+ }
+ }
+ }
+ }
+ };
+
+ if (OverlayIncludeWildcards.empty() && OverlayExcludeWildcards.empty())
{
- Lookup.insert_or_assign(Hashes[Index], Index);
- }
- return Lookup;
-}
+ tsl::robin_set<std::string> OverlayPaths;
+ OverlayPaths.reserve(Overlay.Paths.size());
+ for (uint32_t OverlayPathIndex = 0; OverlayPathIndex < Overlay.Paths.size(); OverlayPathIndex++)
+ {
+ const std::string PathString = ToLower(Overlay.Paths[OverlayPathIndex].generic_string());
+ OverlayPaths.insert(PathString);
+ }
+ for (uint32_t BasePathIndex = 0; BasePathIndex < Base.Paths.size(); BasePathIndex++)
+ {
+ const std::string PathString = ToLower(Base.Paths[BasePathIndex].generic_string());
+ if (!OverlayPaths.contains(PathString))
+ {
+ AddPath(Base, BasePathIndex, BaseSequenceHashToSequenceIndex, BaseSequenceChunkOrderOffset);
+ }
+ }
-static std::vector<uint32_t>
-BuildChunkOrderOffset(std::span<const uint32_t> ChunkCounts)
-{
- std::vector<uint32_t> ChunkOffsets;
- ChunkOffsets.reserve(ChunkCounts.size());
- uint32_t Offset = 0;
- for (uint32_t SequenceIndex = 0; SequenceIndex < ChunkCounts.size(); SequenceIndex++)
+ for (uint32_t OverlayPathIndex = 0; OverlayPathIndex < Overlay.Paths.size(); OverlayPathIndex++)
+ {
+ AddPath(Overlay, OverlayPathIndex, OverlaySequenceHashToSequenceIndex, OverlaySequenceChunkOrderOffset);
+ }
+ }
+ else
{
- ChunkOffsets.push_back(Offset);
- Offset += ChunkCounts[SequenceIndex];
+ for (uint32_t BasePathIndex = 0; BasePathIndex < Base.Paths.size(); BasePathIndex++)
+ {
+ const std::string PathString = ToLower(Base.Paths[BasePathIndex].generic_string());
+ if (!IncludePath(OverlayIncludeWildcards, OverlayExcludeWildcards, PathString, /*CaseSensitive*/ true))
+ {
+ AddPath(Base, BasePathIndex, BaseSequenceHashToSequenceIndex, BaseSequenceChunkOrderOffset);
+ }
+ }
+
+ for (uint32_t OverlayPathIndex = 0; OverlayPathIndex < Overlay.Paths.size(); OverlayPathIndex++)
+ {
+ const std::string PathString = ToLower(Overlay.Paths[OverlayPathIndex].generic_string());
+ if (IncludePath(OverlayIncludeWildcards, OverlayExcludeWildcards, PathString, /*CaseSensitive*/ true))
+ {
+ AddPath(Overlay, OverlayPathIndex, OverlaySequenceHashToSequenceIndex, OverlaySequenceChunkOrderOffset);
+ }
+ }
}
- return ChunkOffsets;
+ return Result;
}
ChunkedFolderContent
@@ -1058,9 +1195,85 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content)
}
void
+CalculateLocalChunkOrders(const std::span<const uint32_t>& AbsoluteChunkOrders,
+ const std::span<const IoHash> LooseChunkHashes,
+ const std::span<const uint64_t> LooseChunkRawSizes,
+ const std::span<const ChunkBlockDescription>& BlockDescriptions,
+ std::vector<IoHash>& OutLocalChunkHashes,
+ std::vector<uint64_t>& OutLocalChunkRawSizes,
+ std::vector<uint32_t>& OutLocalChunkOrders,
+ bool DoExtraVerify)
+{
+ ZEN_TRACE_CPU("CalculateLocalChunkOrders");
+
+ std::vector<IoHash> AbsoluteChunkHashes;
+ std::vector<uint64_t> AbsoluteChunkRawSizes;
+ AbsoluteChunkHashes.insert(AbsoluteChunkHashes.end(), LooseChunkHashes.begin(), LooseChunkHashes.end());
+ AbsoluteChunkRawSizes.insert(AbsoluteChunkRawSizes.end(), LooseChunkRawSizes.begin(), LooseChunkRawSizes.end());
+ for (const ChunkBlockDescription& Block : BlockDescriptions)
+ {
+ AbsoluteChunkHashes.insert(AbsoluteChunkHashes.end(), Block.ChunkRawHashes.begin(), Block.ChunkRawHashes.end());
+ AbsoluteChunkRawSizes.insert(AbsoluteChunkRawSizes.end(), Block.ChunkRawLengths.begin(), Block.ChunkRawLengths.end());
+ }
+ OutLocalChunkHashes.reserve(AbsoluteChunkHashes.size());
+ OutLocalChunkRawSizes.reserve(AbsoluteChunkRawSizes.size());
+ OutLocalChunkOrders.reserve(AbsoluteChunkOrders.size());
+
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex;
+ ChunkHashToChunkIndex.reserve(AbsoluteChunkHashes.size());
+
+ for (uint32_t AbsoluteChunkOrderIndex = 0; AbsoluteChunkOrderIndex < AbsoluteChunkOrders.size(); AbsoluteChunkOrderIndex++)
+ {
+ const uint32_t AbsoluteChunkIndex = AbsoluteChunkOrders[AbsoluteChunkOrderIndex];
+ const IoHash& AbsoluteChunkHash = AbsoluteChunkHashes[AbsoluteChunkIndex];
+ const uint64_t AbsoluteChunkRawSize = AbsoluteChunkRawSizes[AbsoluteChunkIndex];
+
+ if (auto It = ChunkHashToChunkIndex.find(AbsoluteChunkHash); It != ChunkHashToChunkIndex.end())
+ {
+ const uint32_t LocalChunkIndex = It->second;
+ OutLocalChunkOrders.push_back(LocalChunkIndex);
+ }
+ else
+ {
+ uint32_t LocalChunkIndex = gsl::narrow<uint32_t>(OutLocalChunkHashes.size());
+ OutLocalChunkHashes.push_back(AbsoluteChunkHash);
+ OutLocalChunkRawSizes.push_back(AbsoluteChunkRawSize);
+ OutLocalChunkOrders.push_back(LocalChunkIndex);
+ ChunkHashToChunkIndex.insert_or_assign(AbsoluteChunkHash, LocalChunkIndex);
+ }
+ if (DoExtraVerify)
+ {
+ const uint32_t LocalChunkIndex = OutLocalChunkOrders[AbsoluteChunkOrderIndex];
+ const IoHash& LocalChunkHash = OutLocalChunkHashes[LocalChunkIndex];
+ const uint64_t& LocalChunkRawSize = OutLocalChunkRawSizes[LocalChunkIndex];
+ ZEN_ASSERT(LocalChunkHash == AbsoluteChunkHash);
+ ZEN_ASSERT(LocalChunkRawSize == AbsoluteChunkRawSize);
+ }
+ }
+ if (DoExtraVerify)
+ {
+ for (uint32_t OrderIndex = 0; OrderIndex < OutLocalChunkOrders.size(); OrderIndex++)
+ {
+ uint32_t LocalChunkIndex = OutLocalChunkOrders[OrderIndex];
+ const IoHash LocalChunkHash = OutLocalChunkHashes[LocalChunkIndex];
+ uint64_t LocalChunkRawSize = OutLocalChunkRawSizes[LocalChunkIndex];
+
+ uint32_t VerifyChunkIndex = AbsoluteChunkOrders[OrderIndex];
+ const IoHash VerifyChunkHash = AbsoluteChunkHashes[VerifyChunkIndex];
+ uint64_t VerifyChunkRawSize = AbsoluteChunkRawSizes[VerifyChunkIndex];
+
+ ZEN_ASSERT(LocalChunkHash == VerifyChunkHash);
+ ZEN_ASSERT(LocalChunkRawSize == VerifyChunkRawSize);
+ }
+ }
+}
+
+void
ValidateChunkedFolderContent(const ChunkedFolderContent& Content,
std::span<const ChunkBlockDescription> BlockDescriptions,
- std::span<const IoHash> LooseChunks)
+ std::span<const IoHash> LooseChunks,
+ std::span<const std::string> IncludeWildcards,
+ std::span<const std::string> ExcludeWildcards)
{
size_t TotalKnownChunkCount = LooseChunks.size();
for (const ChunkBlockDescription& BlockDescription : BlockDescriptions)
@@ -1101,6 +1314,8 @@ ValidateChunkedFolderContent(const ChunkedFolderContent& Content,
std::span<const uint32_t> ChunkIndexes =
std::span<const uint32_t>(Content.ChunkedContent.ChunkOrders).subspan(ChunkOrderOffset, ChunkCount);
+ bool VerifyIfChunkExists = IncludePath(IncludeWildcards, ExcludeWildcards, ToLower(Path.generic_string()), true);
+
IoHashStream Hasher;
uint64_t SizeSum = 0;
for (uint32_t ChunkIndex : ChunkIndexes)
@@ -1116,9 +1331,13 @@ ValidateChunkedFolderContent(const ChunkedFolderContent& Content,
{
throw std::runtime_error("Chunked folder content contains zero chunk hash");
}
- if (!KnownChunks.contains(ChunkRawHash))
+
+ if (VerifyIfChunkExists)
{
- throw std::runtime_error(fmt::format("Chunked folder content references an unknown chunk '{}'", ChunkRawHash));
+ if (!KnownChunks.contains(ChunkRawHash))
+ {
+ throw std::runtime_error(fmt::format("Chunked folder content references an unknown chunk '{}'", ChunkRawHash));
+ }
}
SizeSum += ChunkSize;
}
@@ -1186,35 +1405,41 @@ chunkedcontent_forcelink()
{
}
-namespace chunked_test_utils {
- struct ChunkedFile
+namespace chunkedcontent_testutils {
+ ChunkedFile CreateChunkedFile(FastRandom& Random, const size_t FinalSize, size_t ChunkingSize, const IoBuffer& LastUsedBuffer)
{
- IoHash RawHash;
- std::vector<IoHash> ChunkHashes;
- std::vector<uint64_t> ChunkSizes;
- std::vector<IoBuffer> Chunks;
- };
-
- ChunkedFile CreateChunkedFile(FastRandom& Random, size_t Size, size_t ChunkingSize)
- {
- size_t ChunkCount = (Size + (ChunkingSize - 1)) / ChunkingSize;
+ size_t ChunkCount = (FinalSize + (ChunkingSize - 1)) / ChunkingSize;
std::vector<IoHash> ChunkHashes;
std::vector<uint64_t> ChunkSizes;
std::vector<IoBuffer> Chunks;
ChunkHashes.reserve(ChunkCount);
ChunkSizes.reserve(ChunkCount);
+ size_t SizeLeft = FinalSize;
IoHashStream HashStream;
- while (Size > 0)
+ while (SizeLeft > 0)
{
- size_t ChunkSize = Min(Size, ChunkingSize);
- IoBuffer ChunkBuffer = CreateRandomBlob(Random, ChunkSize);
+ size_t ChunkSize = Min(SizeLeft, ChunkingSize);
+ IoBuffer ChunkBuffer;
+ if (LastUsedBuffer && FinalSize == SizeLeft)
+ {
+ ChunkSize = Min(ChunkSize, LastUsedBuffer.GetSize());
+ ChunkBuffer = IoBuffer(LastUsedBuffer, 0, ChunkSize);
+ }
+ else
+ {
+ ChunkBuffer = CreateRandomBlob(Random, ChunkSize);
+ }
HashStream.Append(ChunkBuffer);
ChunkHashes.push_back(IoHash::HashBuffer(ChunkBuffer));
ChunkSizes.push_back(ChunkSize);
Chunks.emplace_back(std::move(ChunkBuffer));
- Size -= ChunkSize;
+ SizeLeft -= ChunkSize;
}
+ ZEN_ASSERT(std::accumulate(ChunkSizes.begin(), ChunkSizes.end(), uint64_t(0)) == FinalSize);
+ ZEN_ASSERT(std::accumulate(Chunks.begin(), Chunks.end(), uint64_t(0), [](uint64_t Current, const IoBuffer& B) {
+ return Current + B.GetSize();
+ }) == FinalSize);
return ChunkedFile{.RawHash = HashStream.GetHash(),
.ChunkHashes = std::move(ChunkHashes),
.ChunkSizes = std::move(ChunkSizes),
@@ -1233,6 +1458,8 @@ namespace chunked_test_utils {
Result.RawHashes.reserve(PathAndSizes.size());
ChunkPayloads.reserve(PathAndSizes.size());
+ IoBuffer LastChunkGenerated;
+
tsl::robin_map<IoHash, uint32_t> SequenceToIndex;
tsl::robin_map<IoHash, uint32_t> ChunkToIndex;
for (size_t PathIndex = 0; PathIndex < PathAndSizes.size(); PathIndex++)
@@ -1246,7 +1473,8 @@ namespace chunked_test_utils {
if (Size > 0)
{
- ChunkedFile File = CreateChunkedFile(Random, Size, ChunkingSize);
+ ChunkedFile File = CreateChunkedFile(Random, Size, ChunkingSize, LastChunkGenerated);
+ LastChunkGenerated = File.Chunks.back();
Result.RawHashes.push_back(File.RawHash);
if (auto SequenceIt = SequenceToIndex.find(File.RawHash); SequenceIt == SequenceToIndex.end())
@@ -1259,12 +1487,13 @@ namespace chunked_test_utils {
const IoHash& ChunkHash = File.ChunkHashes[ChunkIndex];
if (auto ChunkIt = ChunkToIndex.find(ChunkHash); ChunkIt == ChunkToIndex.end())
{
- const uint32_t ChunkedContentChunkIndex = gsl::narrow<uint32_t>(Result.ChunkedContent.ChunkOrders.size());
+ const uint32_t ChunkedContentChunkIndex = gsl::narrow<uint32_t>(Result.ChunkedContent.ChunkHashes.size());
Result.ChunkedContent.ChunkOrders.push_back(gsl::narrow<uint32_t>(ChunkedContentChunkIndex));
Result.ChunkedContent.ChunkHashes.push_back(ChunkHash);
Result.ChunkedContent.ChunkRawSizes.push_back(File.ChunkSizes[ChunkIndex]);
ChunkPayloads.push_back(std::move(File.Chunks[ChunkIndex]));
+ ChunkToIndex.insert_or_assign(ChunkHash, ChunkedContentChunkIndex);
}
else
{
@@ -1281,67 +1510,7 @@ namespace chunked_test_utils {
}
return Result;
}
-# if 0
- void ValidateChunkedFolderContent(const ChunkedFolderContent& Content, std::span<const IoBuffer> Chunks)
- {
- std::vector<uint32_t> ChunkOrderOffsets = BuildChunkOrderOffset(Content.ChunkedContent.ChunkCounts);
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> SequenceIndexLookup = BuildHashLookup(Content.ChunkedContent.SequenceRawHashes);
- std::vector<size_t> SequenceUseCount(Content.ChunkedContent.SequenceRawHashes.size(), 0);
- std::vector<size_t> ChunkUseCount(Content.ChunkedContent.ChunkHashes.size(), 0);
- for (size_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++)
- {
- const std::filesystem::path& Path = Content.Paths[PathIndex];
- ZEN_ASSERT(!Path.empty());
- const uint64_t RawSize = Content.RawSizes[PathIndex];
- const IoHash RawHash = Content.RawHashes[PathIndex];
- if (RawSize > 0)
- {
- if (auto It = SequenceIndexLookup.find(RawHash); It != SequenceIndexLookup.end())
- {
- const uint32_t SourceSequenceIndex = It->second;
- SequenceUseCount[SourceSequenceIndex]++;
- const uint32_t ChunkOrderOffset = ChunkOrderOffsets[SourceSequenceIndex];
- const uint32_t ChunkCount = Content.ChunkedContent.ChunkCounts[SourceSequenceIndex];
-
- std::span<const uint32_t> ChunkIndexes =
- std::span<const uint32_t>(Content.ChunkedContent.ChunkOrders).subspan(ChunkOrderOffset, ChunkCount);
-
- IoHashStream Hasher;
- uint64_t SizeSum = 0;
- for (uint32_t ChunkIndex : ChunkIndexes)
- {
- ChunkUseCount[ChunkIndex]++;
- const IoBuffer& ChunkBuffer = Chunks[ChunkIndex];
- const uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex];
- const IoHash& ChunkRawHash = Content.ChunkedContent.ChunkHashes[ChunkIndex];
- SizeSum += ChunkSize;
- CHECK_EQ(ChunkRawHash, IoHash::HashBuffer(ChunkBuffer));
- Hasher.Append(ChunkBuffer);
- }
- CHECK_EQ(RawHash, Hasher.GetHash());
- CHECK_EQ(SizeSum, RawSize);
- }
- else
- {
- CHECK(false);
- }
- }
- else
- {
- CHECK(RawHash == IoHash::Zero);
- }
- }
- for (uint32_t SequenceIndex = 0; SequenceIndex < SequenceUseCount.size(); SequenceIndex++)
- {
- CHECK(SequenceUseCount[SequenceIndex] > 0);
- }
- for (uint32_t ChunkIndex = 0; ChunkIndex < ChunkUseCount.size(); ChunkIndex++)
- {
- CHECK(ChunkUseCount[ChunkIndex] > 0);
- }
- }
-# endif // 0
std::vector<IoBuffer> GetChunkPayloads(std::span<const IoHash> BaseHashes,
std::span<const IoBuffer> BaseChunks,
std::span<const IoHash> OverlayHashes,
@@ -1382,41 +1551,9 @@ namespace chunked_test_utils {
return Result;
}
- bool IncludePath(std::span<const std::string> IncludeWildcards,
- std::span<const std::string> ExcludeWildcards,
- const std::filesystem::path& Path)
- {
- const std::string PathString = Path.generic_string();
- bool IncludePath = true;
- if (!IncludeWildcards.empty())
- {
- IncludePath = false;
- for (const std::string& IncludeWildcard : IncludeWildcards)
- {
- if (MatchWildcard(IncludeWildcard, PathString, /*CaseSensitive*/ false))
- {
- IncludePath = true;
- break;
- }
- }
- if (!IncludePath)
- {
- return false;
- }
- }
- for (const std::string& ExcludeWildcard : ExcludeWildcards)
- {
- if (MatchWildcard(ExcludeWildcard, PathString, /*CaseSensitive*/ false))
- {
- return false;
- }
- }
- return true;
- }
-
-} // namespace chunked_test_utils
+} // namespace chunkedcontent_testutils
-TEST_CASE("DeletePathsFromContent")
+TEST_CASE("chunkedcontent.DeletePathsFromContent")
{
FastRandom BaseRandom;
@@ -1448,8 +1585,8 @@ TEST_CASE("DeletePathsFromContent")
{BasePaths[9], BaseSizes[9]},
{BasePaths[10], BaseSizes[10]}};
- ChunkedFolderContent Base = chunked_test_utils::CreateChunkedFolderContent(BaseRandom, BasePathAndSizes, 4u * 1024u, BaseChunks);
- ValidateChunkedFolderContent(Base, {}, Base.ChunkedContent.ChunkHashes);
+ ChunkedFolderContent Base = chunkedcontent_testutils::CreateChunkedFolderContent(BaseRandom, BasePathAndSizes, 4u * 1024u, BaseChunks);
+ ValidateChunkedFolderContent(Base, {}, Base.ChunkedContent.ChunkHashes, {}, {});
tsl::robin_map<IoHash, size_t, IoHash::Hasher> BaseChunksLookup;
for (size_t Index = 0; Index < BaseChunks.size(); Index++)
@@ -1463,7 +1600,7 @@ TEST_CASE("DeletePathsFromContent")
std::vector<std::filesystem::path> DeletedPaths;
for (const std::filesystem::path& RemotePath : Base.Paths)
{
- if (!chunked_test_utils::IncludePath(IncludeWildcards, ExcludeWildcards, RemotePath))
+ if (!IncludePath(IncludeWildcards, ExcludeWildcards, ToLower(RemotePath.generic_string()), true))
{
DeletedPaths.push_back(RemotePath);
}
@@ -1477,7 +1614,161 @@ TEST_CASE("DeletePathsFromContent")
InlineRemoveUnusedHashes(FilteredChunks, FilteredContent.ChunkedContent.ChunkHashes);
- ValidateChunkedFolderContent(FilteredContent, {}, FilteredChunks);
+ ValidateChunkedFolderContent(FilteredContent, {}, FilteredChunks, {}, {});
+}
+
+TEST_CASE("chunkedcontent.ApplyChunkedContentOverlay")
+{
+ FastRandom BaseRandom;
+
+ std::vector<IoBuffer> BaseChunks;
+
+ const std::string BasePaths[11] = {{"file_1"},
+ {"file_2.exe"},
+ {"file_3.txt"},
+ {"dir_1/dir1_file_1.exe"},
+ {"dir_1/dir1_file_2.pdb"},
+ {"dir_1/dir1_file_3.txt"},
+ {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"},
+ {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"},
+ {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"},
+ {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"},
+ {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}};
+ const uint64_t BaseSizes[11] =
+ {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u};
+
+ std::pair<const std::string, uint64_t> BasePathAndSizes[11] = {{BasePaths[0], BaseSizes[0]},
+ {BasePaths[1], BaseSizes[1]},
+ {BasePaths[2], BaseSizes[2]},
+ {BasePaths[3], BaseSizes[3]},
+ {BasePaths[4], BaseSizes[4]},
+ {BasePaths[5], BaseSizes[5]},
+ {BasePaths[6], BaseSizes[6]},
+ {BasePaths[7], BaseSizes[7]},
+ {BasePaths[8], BaseSizes[8]},
+ {BasePaths[9], BaseSizes[9]},
+ {BasePaths[10], BaseSizes[10]}};
+
+ const std::string OverlayPaths[6] = {{"file_1"},
+ {"file_4"},
+ {"dir_1/dir1_file_1.exe"},
+ {"dir_1/dir1_file_2.pdb"},
+ {"dir_2/dir2_dir1/dir2_dir1_file_1.self"},
+ {"dir_2/dir2_dir1/dir2_dir1_file_2.sym"}};
+ const uint64_t OverlaySizes[6] = {7u * 1024u, 1249, 17u * 1024u, 9u * 1024u, 0, 17u * 1024u};
+
+ std::pair<const std::string, uint64_t> OverlayPathAndSizes[6] = {{OverlayPaths[0], OverlaySizes[0]},
+ {OverlayPaths[1], OverlaySizes[1]},
+ {OverlayPaths[2], OverlaySizes[2]},
+ {OverlayPaths[3], OverlaySizes[3]},
+ {OverlayPaths[4], OverlaySizes[4]},
+ {OverlayPaths[5], OverlaySizes[5]}};
+
+ ChunkedFolderContent Base = chunkedcontent_testutils::CreateChunkedFolderContent(BaseRandom, BasePathAndSizes, 4u * 1024u, BaseChunks);
+ ValidateChunkedFolderContent(Base, {}, Base.ChunkedContent.ChunkHashes, {}, {});
+ tsl::robin_map<std::string, uint32_t> BasePathLookup = chunkedcontent_testutils::BuildPathLookup(Base.Paths);
+
+ std::vector<IoBuffer> OverlayChunks;
+ ChunkedFolderContent Overlay =
+ chunkedcontent_testutils::CreateChunkedFolderContent(BaseRandom, OverlayPathAndSizes, 4u * 1024u, OverlayChunks);
+ ValidateChunkedFolderContent(Overlay, {}, Overlay.ChunkedContent.ChunkHashes, {}, {});
+
+ tsl::robin_map<std::string, uint32_t> OverlayPathLookup = chunkedcontent_testutils::BuildPathLookup(Overlay.Paths);
+
+ auto PathMatchesBase =
+ [&](const std::string& Path, const ChunkedFolderContent& MergedContent, tsl::robin_map<std::string, uint32_t> MergedPathLookup) {
+ return MergedContent.RawHashes[MergedPathLookup.at(Path)] == Base.RawHashes[BasePathLookup.at(Path)];
+ };
+
+ auto PathMatchesOverlay =
+ [&](const std::string& Path, const ChunkedFolderContent& MergedContent, tsl::robin_map<std::string, uint32_t> MergedPathLookup) {
+ return MergedContent.RawHashes[MergedPathLookup.at(Path)] == Overlay.RawHashes[OverlayPathLookup.at(Path)];
+ };
+
+ {
+ ChunkedFolderContent AllMergedContent = ApplyChunkedContentOverlay(Base, Overlay, {}, {});
+ CHECK_EQ(AllMergedContent.Paths.size(), 14);
+
+ std::vector<IoBuffer> AllMergedChunks = chunkedcontent_testutils::GetChunkPayloads(Base.ChunkedContent.ChunkHashes,
+ BaseChunks,
+ Overlay.ChunkedContent.ChunkHashes,
+ OverlayChunks,
+ AllMergedContent.ChunkedContent.ChunkHashes);
+ ValidateChunkedFolderContent(AllMergedContent, {}, AllMergedContent.ChunkedContent.ChunkHashes, {}, {});
+
+ tsl::robin_map<std::string, uint32_t> AllMergedPathLookup = chunkedcontent_testutils::BuildPathLookup(AllMergedContent.Paths);
+ CHECK(PathMatchesBase("file_2.exe", AllMergedContent, AllMergedPathLookup));
+ CHECK(PathMatchesBase("file_3.txt", AllMergedContent, AllMergedPathLookup));
+ CHECK(PathMatchesBase("dir_1/dir1_file_3.txt", AllMergedContent, AllMergedPathLookup));
+ CHECK(PathMatchesBase("dir_2/dir2_dir1/dir2_dir1_file_1.exe", AllMergedContent, AllMergedPathLookup));
+ CHECK(PathMatchesBase("dir_2/dir2_dir1/dir2_dir1_file_2.pdb", AllMergedContent, AllMergedPathLookup));
+ CHECK(PathMatchesBase("dir_2/dir2_dir1/dir2_dir1_file_3.dll", AllMergedContent, AllMergedPathLookup));
+ CHECK(PathMatchesBase("dir_2/dir2_dir2/dir2_dir2_file_1.txt", AllMergedContent, AllMergedPathLookup));
+ CHECK(PathMatchesBase("dir_2/dir2_dir2/dir2_dir2_file_2.json", AllMergedContent, AllMergedPathLookup));
+
+ CHECK(PathMatchesOverlay("file_1", AllMergedContent, AllMergedPathLookup));
+ CHECK(PathMatchesOverlay("file_4", AllMergedContent, AllMergedPathLookup));
+ CHECK(PathMatchesOverlay("dir_1/dir1_file_1.exe", AllMergedContent, AllMergedPathLookup));
+ CHECK(PathMatchesOverlay("dir_1/dir1_file_2.pdb", AllMergedContent, AllMergedPathLookup));
+ CHECK(PathMatchesOverlay("dir_2/dir2_dir1/dir2_dir1_file_1.self", AllMergedContent, AllMergedPathLookup));
+ CHECK(PathMatchesOverlay("dir_2/dir2_dir1/dir2_dir1_file_2.sym", AllMergedContent, AllMergedPathLookup));
+ }
+
+ {
+ ChunkedFolderContent ReplaceExecutablesContent =
+ ApplyChunkedContentOverlay(Base, Overlay, std::vector<std::string>{"*.exe", "*.self"}, {});
+ CHECK_EQ(ReplaceExecutablesContent.Paths.size(), 10);
+
+ std::vector<IoBuffer> ReplaceExecutablesChunks =
+ chunkedcontent_testutils::GetChunkPayloads(Base.ChunkedContent.ChunkHashes,
+ BaseChunks,
+ Overlay.ChunkedContent.ChunkHashes,
+ OverlayChunks,
+ ReplaceExecutablesContent.ChunkedContent.ChunkHashes);
+ ValidateChunkedFolderContent(ReplaceExecutablesContent, {}, ReplaceExecutablesContent.ChunkedContent.ChunkHashes, {}, {});
+
+ tsl::robin_map<std::string, uint32_t> ReplaceExecutablesPathLookup =
+ chunkedcontent_testutils::BuildPathLookup(ReplaceExecutablesContent.Paths);
+ CHECK(PathMatchesBase("file_1", ReplaceExecutablesContent, ReplaceExecutablesPathLookup));
+ CHECK(PathMatchesBase("file_3.txt", ReplaceExecutablesContent, ReplaceExecutablesPathLookup));
+ CHECK(PathMatchesBase("dir_1/dir1_file_2.pdb", ReplaceExecutablesContent, ReplaceExecutablesPathLookup));
+ CHECK(PathMatchesBase("dir_1/dir1_file_3.txt", ReplaceExecutablesContent, ReplaceExecutablesPathLookup));
+
+ CHECK(PathMatchesBase("dir_2/dir2_dir1/dir2_dir1_file_2.pdb", ReplaceExecutablesContent, ReplaceExecutablesPathLookup));
+ CHECK(PathMatchesBase("dir_2/dir2_dir1/dir2_dir1_file_3.dll", ReplaceExecutablesContent, ReplaceExecutablesPathLookup));
+ CHECK(PathMatchesBase("dir_2/dir2_dir2/dir2_dir2_file_1.txt", ReplaceExecutablesContent, ReplaceExecutablesPathLookup));
+ CHECK(PathMatchesBase("dir_2/dir2_dir2/dir2_dir2_file_2.json", ReplaceExecutablesContent, ReplaceExecutablesPathLookup));
+
+ CHECK(PathMatchesOverlay("dir_1/dir1_file_1.exe", ReplaceExecutablesContent, ReplaceExecutablesPathLookup));
+ CHECK(PathMatchesOverlay("dir_2/dir2_dir1/dir2_dir1_file_1.self", ReplaceExecutablesContent, ReplaceExecutablesPathLookup));
+ }
+
+ {
+ ChunkedFolderContent ReplaceDir1ExecutablesContent = ApplyChunkedContentOverlay(Base,
+ Overlay,
+ std::vector<std::string>{"dir_1/*.exe", "dir_2/*"},
+ std::vector<std::string>{"dir_2/*.sym"});
+ CHECK_EQ(ReplaceDir1ExecutablesContent.Paths.size(), 7);
+
+ std::vector<IoBuffer> ReplaceDir1Chunks =
+ chunkedcontent_testutils::GetChunkPayloads(Base.ChunkedContent.ChunkHashes,
+ BaseChunks,
+ Overlay.ChunkedContent.ChunkHashes,
+ OverlayChunks,
+ ReplaceDir1ExecutablesContent.ChunkedContent.ChunkHashes);
+ ValidateChunkedFolderContent(ReplaceDir1ExecutablesContent, {}, ReplaceDir1ExecutablesContent.ChunkedContent.ChunkHashes, {}, {});
+
+ tsl::robin_map<std::string, uint32_t> ReplaceDir1ExecutablesPathLookup =
+ chunkedcontent_testutils::BuildPathLookup(ReplaceDir1ExecutablesContent.Paths);
+
+ CHECK(PathMatchesBase("file_1", ReplaceDir1ExecutablesContent, ReplaceDir1ExecutablesPathLookup));
+ CHECK(PathMatchesBase("file_2.exe", ReplaceDir1ExecutablesContent, ReplaceDir1ExecutablesPathLookup));
+ CHECK(PathMatchesBase("file_3.txt", ReplaceDir1ExecutablesContent, ReplaceDir1ExecutablesPathLookup));
+ CHECK(PathMatchesBase("dir_1/dir1_file_3.txt", ReplaceDir1ExecutablesContent, ReplaceDir1ExecutablesPathLookup));
+
+ CHECK(PathMatchesOverlay("dir_1/dir1_file_1.exe", ReplaceDir1ExecutablesContent, ReplaceDir1ExecutablesPathLookup));
+ CHECK(PathMatchesOverlay("dir_2/dir2_dir1/dir2_dir1_file_1.self", ReplaceDir1ExecutablesContent, ReplaceDir1ExecutablesPathLookup));
+ }
}
#endif // ZEN_WITH_TESTS