aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2025-11-18 16:34:17 +0100
committerGitHub Enterprise <[email protected]>2025-11-18 16:34:17 +0100
commit2f9b8b2000b71650ff5a2b72dae3a5312e670465 (patch)
treec58814f89b7cddc94db237f630b018e4d7982733 /src
parentretain xmake state between runs (#656) (diff)
downloadzen-2f9b8b2000b71650ff5a2b72dae3a5312e670465.tar.xz
zen-2f9b8b2000b71650ff5a2b72dae3a5312e670465.zip
loose chunk filtering bug when using wildcards (#654)
* fix filtering of loose chunks when downloading with a filter add tests * changelog * move InlineRemoveUnusedHashes * remove extra braces
Diffstat (limited to 'src')
-rw-r--r--src/zen/cmds/builds_cmd.cpp22
-rw-r--r--src/zencore/include/zencore/testutils.h15
-rw-r--r--src/zencore/testutils.cpp43
-rw-r--r--src/zenremotestore/chunking/chunkedcontent.cpp461
-rw-r--r--src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h12
-rw-r--r--src/zenremotestore/zenremotestore.cpp2
6 files changed, 529 insertions, 26 deletions
diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp
index 14686a173..fcda6e809 100644
--- a/src/zen/cmds/builds_cmd.cpp
+++ b/src/zen/cmds/builds_cmd.cpp
@@ -1267,22 +1267,13 @@ namespace {
if (!DeletedPaths.empty())
{
OutRemoteContent = DeletePathsFromChunkedContent(OutRemoteContent, DeletedPaths);
-
- tsl::robin_set<IoHash, IoHash::Hasher> UsedLooseChunkHashes;
- UsedLooseChunkHashes.insert(OutRemoteContent.RawHashes.begin(), OutRemoteContent.RawHashes.end());
- for (auto It = OutLooseChunkHashes.begin(); It != OutLooseChunkHashes.end();)
- {
- if (!UsedLooseChunkHashes.contains(*It))
- {
- It = OutLooseChunkHashes.erase(It);
- }
- else
- {
- It++;
- }
- }
+ InlineRemoveUnusedHashes(OutLooseChunkHashes, OutRemoteContent.ChunkedContent.ChunkHashes);
}
}
+
+#if ZEN_BUILD_DEBUG
+ ValidateChunkedFolderContent(OutRemoteContent, OutBlockDescriptions, OutLooseChunkHashes);
+#endif // ZEN_BUILD_DEBUG
};
OutPartContents.resize(1);
@@ -1846,6 +1837,9 @@ namespace {
Options.IncludeWildcards,
Options.ExcludeWildcards,
LocalFolderContent);
+#if ZEN_BUILD_DEBUG
+ ValidateChunkedFolderContent(LocalContent, {}, LocalContent.ChunkedContent.ChunkHashes);
+#endif // ZEN_BUILD_DEBUG
}
else
{
diff --git a/src/zencore/include/zencore/testutils.h b/src/zencore/include/zencore/testutils.h
index 45fde4eda..e2a4f8346 100644
--- a/src/zencore/include/zencore/testutils.h
+++ b/src/zencore/include/zencore/testutils.h
@@ -35,6 +35,21 @@ struct ScopedCurrentDirectoryChange
IoBuffer CreateRandomBlob(uint64_t Size);
IoBuffer CreateSemiRandomBlob(uint64_t Size);
+struct FastRandom
+{
+ uint64_t Seed = 0x7CEBF54E45B9F5D1;
+ inline uint64_t Next()
+ {
+ uint64_t z = (Seed += UINT64_C(0x9E3779B97F4A7C15));
+ z = (z ^ (z >> 30)) * UINT64_C(0xBF58476D1CE4E5B9);
+ z = (z ^ (z >> 27)) * UINT64_C(0x94D049BB133111EB);
+ return z ^ (z >> 31);
+ }
+};
+
+IoBuffer CreateRandomBlob(FastRandom& Random, uint64_t Size);
+IoBuffer CreateSemiRandomBlob(FastRandom& Random, uint64_t Size);
+
struct FalseType
{
static const bool Enabled = false;
diff --git a/src/zencore/testutils.cpp b/src/zencore/testutils.cpp
index 9f50de032..5bc2841ae 100644
--- a/src/zencore/testutils.cpp
+++ b/src/zencore/testutils.cpp
@@ -46,22 +46,41 @@ ScopedTemporaryDirectory::~ScopedTemporaryDirectory()
IoBuffer
CreateRandomBlob(uint64_t Size)
{
- static uint64_t Seed{0x7CEBF54E45B9F5D1};
- auto Next = [](uint64_t& seed) {
- uint64_t z = (seed += UINT64_C(0x9E3779B97F4A7C15));
- z = (z ^ (z >> 30)) * UINT64_C(0xBF58476D1CE4E5B9);
- z = (z ^ (z >> 27)) * UINT64_C(0x94D049BB133111EB);
- return z ^ (z >> 31);
- };
+ static FastRandom Rand{.Seed = 0x7CEBF54E45B9F5D1};
+ return CreateRandomBlob(Rand, Size);
+};
+
+IoBuffer
+CreateSemiRandomBlob(uint64_t Size)
+{
+ IoBuffer Result(Size);
+ const size_t PartCount = (Size / (1u * 1024u * 64)) + 1;
+ const size_t PartSize = Size / PartCount;
+ auto Part = CreateRandomBlob(PartSize);
+ auto Remain = Result.GetMutableView().CopyFrom(Part.GetView());
+ while (Remain.GetSize() >= PartSize)
+ {
+ Remain = Remain.CopyFrom(Part.GetView());
+ }
+ if (Remain.GetSize() > 0)
+ {
+ auto RemainBuffer = CreateRandomBlob(Remain.GetSize());
+ Remain.CopyFrom(RemainBuffer.GetView());
+ }
+ return Result;
+};
+IoBuffer
+CreateRandomBlob(FastRandom& Rand, uint64_t Size)
+{
IoBuffer Data(Size);
uint64_t* DataPtr = reinterpret_cast<uint64_t*>(Data.MutableData());
while (Size > sizeof(uint64_t))
{
- *DataPtr++ = Next(Seed);
+ *DataPtr++ = Rand.Next();
Size -= sizeof(uint64_t);
}
- uint64_t ByteNext = Next(Seed);
+ uint64_t ByteNext = Rand.Next();
uint8_t* ByteDataPtr = reinterpret_cast<uint8_t*>(DataPtr);
while (Size > 0)
{
@@ -73,12 +92,12 @@ CreateRandomBlob(uint64_t Size)
};
IoBuffer
-CreateSemiRandomBlob(uint64_t Size)
+CreateSemiRandomBlob(FastRandom& Rand, uint64_t Size)
{
IoBuffer Result(Size);
const size_t PartCount = (Size / (1u * 1024u * 64)) + 1;
const size_t PartSize = Size / PartCount;
- auto Part = CreateRandomBlob(PartSize);
+ auto Part = CreateRandomBlob(Rand, PartSize);
auto Remain = Result.GetMutableView().CopyFrom(Part.GetView());
while (Remain.GetSize() >= PartSize)
{
@@ -86,7 +105,7 @@ CreateSemiRandomBlob(uint64_t Size)
}
if (Remain.GetSize() > 0)
{
- auto RemainBuffer = CreateRandomBlob(Remain.GetSize());
+ auto RemainBuffer = CreateRandomBlob(Rand, Remain.GetSize());
Remain.CopyFrom(RemainBuffer.GetView());
}
return Result;
diff --git a/src/zenremotestore/chunking/chunkedcontent.cpp b/src/zenremotestore/chunking/chunkedcontent.cpp
index af1f06cec..ac979a64b 100644
--- a/src/zenremotestore/chunking/chunkedcontent.cpp
+++ b/src/zenremotestore/chunking/chunkedcontent.cpp
@@ -10,14 +10,21 @@
#include <zencore/scopeguard.h>
#include <zencore/timer.h>
#include <zencore/trace.h>
+#include <zenremotestore/chunking/chunkblock.h>
#include <zenremotestore/chunking/chunkedfile.h>
#include <zenremotestore/chunking/chunkingcontroller.h>
+#include <zenutil/wildcard.h>
ZEN_THIRD_PARTY_INCLUDES_START
#include <tsl/robin_set.h>
#include <gsl/gsl-lite.hpp>
ZEN_THIRD_PARTY_INCLUDES_END
+#if ZEN_WITH_TESTS
+# include <zencore/testing.h>
+# include <zencore/testutils.h>
+#endif // ZEN_WITH_TESTS
+
namespace zen {
using namespace std::literals;
@@ -715,6 +722,7 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent,
Result.ChunkedContent.ChunkHashes.reserve(BaseChunkCount);
Result.ChunkedContent.ChunkRawSizes.reserve(BaseChunkCount);
+ Result.ChunkedContent.ChunkOrders.reserve(BaseChunkCount);
tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceRawHashIndex;
RawHashToSequenceRawHashIndex.reserve(ExpectedPathCount);
@@ -828,6 +836,32 @@ CompareChunkedContent(const ChunkedFolderContent& Lhs, const ChunkedFolderConten
return true;
};
+static tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>
+BuildHashLookup(std::span<const IoHash> Hashes)
+{
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> Lookup;
+ Lookup.reserve(Hashes.size());
+ for (uint32_t Index = 0; Index < Hashes.size(); Index++)
+ {
+ Lookup.insert_or_assign(Hashes[Index], Index);
+ }
+ return Lookup;
+}
+
+static std::vector<uint32_t>
+BuildChunkOrderOffset(std::span<const uint32_t> ChunkCounts)
+{
+ std::vector<uint32_t> ChunkOffsets;
+ ChunkOffsets.reserve(ChunkCounts.size());
+ uint32_t Offset = 0;
+ for (uint32_t SequenceIndex = 0; SequenceIndex < ChunkCounts.size(); SequenceIndex++)
+ {
+ ChunkOffsets.push_back(Offset);
+ Offset += ChunkCounts[SequenceIndex];
+ }
+ return ChunkOffsets;
+}
+
ChunkedFolderContent
ChunkFolderContent(ChunkingStatistics& Stats,
WorkerThreadPool& WorkerPool,
@@ -979,6 +1013,8 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content)
const uint32_t ChunkCount = gsl::narrow<uint32_t>(Content.ChunkedContent.ChunkHashes.size());
Result.ChunkHashToChunkIndex.reserve(ChunkCount);
size_t RangeOffset = 0;
+ Result.ChunkSequenceLocationOffset.reserve(ChunkCount);
+ Result.ChunkSequenceLocationCounts.reserve(ChunkCount);
for (uint32_t ChunkIndex = 0; ChunkIndex < ChunkCount; ChunkIndex++)
{
Result.ChunkHashToChunkIndex.insert({Content.ChunkedContent.ChunkHashes[ChunkIndex], ChunkIndex});
@@ -1021,4 +1057,429 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content)
return Result;
}
+void
+ValidateChunkedFolderContent(const ChunkedFolderContent& Content,
+ std::span<const ChunkBlockDescription> BlockDescriptions,
+ std::span<const IoHash> LooseChunks)
+{
+ size_t TotalKnownChunkCount = LooseChunks.size();
+ for (const ChunkBlockDescription& BlockDescription : BlockDescriptions)
+ {
+ TotalKnownChunkCount += BlockDescription.ChunkRawHashes.size();
+ }
+
+ tsl::robin_set<IoHash, IoHash::Hasher> KnownChunks;
+ KnownChunks.reserve(TotalKnownChunkCount);
+ KnownChunks.insert(LooseChunks.begin(), LooseChunks.end());
+ for (const ChunkBlockDescription& BlockDescription : BlockDescriptions)
+ {
+ KnownChunks.insert(BlockDescription.ChunkRawHashes.begin(), BlockDescription.ChunkRawHashes.end());
+ }
+
+ std::vector<uint32_t> ChunkOrderOffsets = BuildChunkOrderOffset(Content.ChunkedContent.ChunkCounts);
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> SequenceIndexLookup = BuildHashLookup(Content.ChunkedContent.SequenceRawHashes);
+ std::vector<size_t> SequenceUseCount(Content.ChunkedContent.SequenceRawHashes.size(), 0);
+ std::vector<size_t> ChunkUseCount(Content.ChunkedContent.ChunkHashes.size(), 0);
+ for (size_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++)
+ {
+ const std::filesystem::path& Path = Content.Paths[PathIndex];
+ if (Path.empty())
+ {
+ throw std::runtime_error("Chunked folder content contains empty path");
+ }
+ const uint64_t RawSize = Content.RawSizes[PathIndex];
+ const IoHash RawHash = Content.RawHashes[PathIndex];
+ if (RawSize > 0)
+ {
+ if (auto It = SequenceIndexLookup.find(RawHash); It != SequenceIndexLookup.end())
+ {
+ const uint32_t SourceSequenceIndex = It->second;
+ SequenceUseCount[SourceSequenceIndex]++;
+ const uint32_t ChunkOrderOffset = ChunkOrderOffsets[SourceSequenceIndex];
+ const uint32_t ChunkCount = Content.ChunkedContent.ChunkCounts[SourceSequenceIndex];
+
+ std::span<const uint32_t> ChunkIndexes =
+ std::span<const uint32_t>(Content.ChunkedContent.ChunkOrders).subspan(ChunkOrderOffset, ChunkCount);
+
+ IoHashStream Hasher;
+ uint64_t SizeSum = 0;
+ for (uint32_t ChunkIndex : ChunkIndexes)
+ {
+ ChunkUseCount[ChunkIndex]++;
+ const uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex];
+ if (ChunkSize == 0)
+ {
+ throw std::runtime_error("Chunked folder content contains zero size chunk");
+ }
+ const IoHash& ChunkRawHash = Content.ChunkedContent.ChunkHashes[ChunkIndex];
+ if (ChunkRawHash == IoHash::Zero)
+ {
+ throw std::runtime_error("Chunked folder content contains zero chunk hash");
+ }
+ if (!KnownChunks.contains(ChunkRawHash))
+ {
+ throw std::runtime_error(fmt::format("Chunked folder content references an unknown chunk '{}'", ChunkRawHash));
+ }
+ SizeSum += ChunkSize;
+ }
+ if (SizeSum != RawSize)
+ {
+ throw std::runtime_error(
+ fmt::format("Chunked folder content sequence size {} does not match expected size '{}'", SizeSum, RawSize));
+ }
+ }
+ else
+ {
+ throw std::runtime_error(fmt::format("Chunked folder content references unknown sequence hash '{}'", RawHash));
+ }
+ }
+ else
+ {
+ if (RawHash != IoHash::Zero)
+ {
+ throw std::runtime_error(
+ fmt::format("Chunked folder content references zero size sequence with non-zero hash '{}'", RawHash));
+ }
+ }
+ }
+
+ for (uint32_t SequenceIndex = 0; SequenceIndex < SequenceUseCount.size(); SequenceIndex++)
+ {
+ if (SequenceUseCount[SequenceIndex] == 0)
+ {
+ throw std::runtime_error(
+ fmt::format("Chunked folder has unused sequence '{}'", Content.ChunkedContent.SequenceRawHashes[SequenceIndex]));
+ }
+ }
+ for (uint32_t ChunkIndex = 0; ChunkIndex < ChunkUseCount.size(); ChunkIndex++)
+ {
+ if (ChunkUseCount[ChunkIndex] == 0)
+ {
+ throw std::runtime_error(fmt::format("Chunked folder has unused chunk '{}'", Content.ChunkedContent.ChunkHashes[ChunkIndex]));
+ }
+ }
+}
+
+void
+InlineRemoveUnusedHashes(std::vector<IoHash>& InOutHashes, std::span<const IoHash> UsedHashes)
+{
+ tsl::robin_set<IoHash, IoHash::Hasher> UsedChunkHashes;
+ UsedChunkHashes.reserve(UsedHashes.size());
+ UsedChunkHashes.insert(UsedHashes.begin(), UsedHashes.end());
+ for (auto It = InOutHashes.begin(); It != InOutHashes.end();)
+ {
+ if (!UsedChunkHashes.contains(*It))
+ {
+ It = InOutHashes.erase(It);
+ }
+ else
+ {
+ It++;
+ }
+ }
+}
+
+#if ZEN_WITH_TESTS
+
+void
+chunkedcontent_forcelink()
+{
+}
+
+namespace chunked_test_utils {
+ struct ChunkedFile
+ {
+ IoHash RawHash;
+ std::vector<IoHash> ChunkHashes;
+ std::vector<uint64_t> ChunkSizes;
+ std::vector<IoBuffer> Chunks;
+ };
+
+ ChunkedFile CreateChunkedFile(FastRandom& Random, size_t Size, size_t ChunkingSize)
+ {
+ size_t ChunkCount = (Size + (ChunkingSize - 1)) / ChunkingSize;
+ std::vector<IoHash> ChunkHashes;
+ std::vector<uint64_t> ChunkSizes;
+ std::vector<IoBuffer> Chunks;
+ ChunkHashes.reserve(ChunkCount);
+ ChunkSizes.reserve(ChunkCount);
+
+ IoHashStream HashStream;
+ while (Size > 0)
+ {
+ size_t ChunkSize = Min(Size, ChunkingSize);
+ IoBuffer ChunkBuffer = CreateRandomBlob(Random, ChunkSize);
+ HashStream.Append(ChunkBuffer);
+ ChunkHashes.push_back(IoHash::HashBuffer(ChunkBuffer));
+ ChunkSizes.push_back(ChunkSize);
+ Chunks.emplace_back(std::move(ChunkBuffer));
+ Size -= ChunkSize;
+ }
+ return ChunkedFile{.RawHash = HashStream.GetHash(),
+ .ChunkHashes = std::move(ChunkHashes),
+ .ChunkSizes = std::move(ChunkSizes),
+ .Chunks = std::move(Chunks)};
+ }
+
+ ChunkedFolderContent CreateChunkedFolderContent(FastRandom& Random,
+ std::span<const std::pair<const std::string, uint64_t>> PathAndSizes,
+ uint64_t ChunkingSize,
+ std::vector<IoBuffer>& ChunkPayloads)
+ {
+ ChunkedFolderContent Result;
+ Result.Paths.reserve(PathAndSizes.size());
+ Result.RawSizes.reserve(PathAndSizes.size());
+ Result.Attributes.reserve(PathAndSizes.size());
+ Result.RawHashes.reserve(PathAndSizes.size());
+ ChunkPayloads.reserve(PathAndSizes.size());
+
+ tsl::robin_map<IoHash, uint32_t> SequenceToIndex;
+ tsl::robin_map<IoHash, uint32_t> ChunkToIndex;
+ for (size_t PathIndex = 0; PathIndex < PathAndSizes.size(); PathIndex++)
+ {
+ const std::string& Path = PathAndSizes[PathIndex].first;
+ const uint64_t Size = PathAndSizes[PathIndex].second;
+
+ Result.Paths.push_back(Path);
+ Result.RawSizes.push_back(Size);
+ Result.Attributes.push_back(0);
+
+ if (Size > 0)
+ {
+ ChunkedFile File = CreateChunkedFile(Random, Size, ChunkingSize);
+ Result.RawHashes.push_back(File.RawHash);
+
+ if (auto SequenceIt = SequenceToIndex.find(File.RawHash); SequenceIt == SequenceToIndex.end())
+ {
+ SequenceToIndex.insert_or_assign(File.RawHash, gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size()));
+ Result.ChunkedContent.SequenceRawHashes.push_back(File.RawHash);
+ Result.ChunkedContent.ChunkCounts.push_back(gsl::narrow<uint32_t>(File.ChunkHashes.size()));
+ for (size_t ChunkIndex = 0; ChunkIndex < File.ChunkHashes.size(); ChunkIndex++)
+ {
+ const IoHash& ChunkHash = File.ChunkHashes[ChunkIndex];
+ if (auto ChunkIt = ChunkToIndex.find(ChunkHash); ChunkIt == ChunkToIndex.end())
+ {
+ const uint32_t ChunkedContentChunkIndex = gsl::narrow<uint32_t>(Result.ChunkedContent.ChunkOrders.size());
+ Result.ChunkedContent.ChunkOrders.push_back(gsl::narrow<uint32_t>(ChunkedContentChunkIndex));
+
+ Result.ChunkedContent.ChunkHashes.push_back(ChunkHash);
+ Result.ChunkedContent.ChunkRawSizes.push_back(File.ChunkSizes[ChunkIndex]);
+ ChunkPayloads.push_back(std::move(File.Chunks[ChunkIndex]));
+ }
+ else
+ {
+ const uint32_t ChunkedContentChunkIndex = ChunkIt->second;
+ Result.ChunkedContent.ChunkOrders.push_back(ChunkedContentChunkIndex);
+ }
+ }
+ }
+ }
+ else
+ {
+ Result.RawHashes.push_back(IoHash::Zero);
+ }
+ }
+ return Result;
+ }
+# if 0
+ void ValidateChunkedFolderContent(const ChunkedFolderContent& Content, std::span<const IoBuffer> Chunks)
+ {
+ std::vector<uint32_t> ChunkOrderOffsets = BuildChunkOrderOffset(Content.ChunkedContent.ChunkCounts);
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> SequenceIndexLookup = BuildHashLookup(Content.ChunkedContent.SequenceRawHashes);
+ std::vector<size_t> SequenceUseCount(Content.ChunkedContent.SequenceRawHashes.size(), 0);
+ std::vector<size_t> ChunkUseCount(Content.ChunkedContent.ChunkHashes.size(), 0);
+ for (size_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++)
+ {
+ const std::filesystem::path& Path = Content.Paths[PathIndex];
+ ZEN_ASSERT(!Path.empty());
+ const uint64_t RawSize = Content.RawSizes[PathIndex];
+ const IoHash RawHash = Content.RawHashes[PathIndex];
+ if (RawSize > 0)
+ {
+ if (auto It = SequenceIndexLookup.find(RawHash); It != SequenceIndexLookup.end())
+ {
+ const uint32_t SourceSequenceIndex = It->second;
+ SequenceUseCount[SourceSequenceIndex]++;
+ const uint32_t ChunkOrderOffset = ChunkOrderOffsets[SourceSequenceIndex];
+ const uint32_t ChunkCount = Content.ChunkedContent.ChunkCounts[SourceSequenceIndex];
+
+ std::span<const uint32_t> ChunkIndexes =
+ std::span<const uint32_t>(Content.ChunkedContent.ChunkOrders).subspan(ChunkOrderOffset, ChunkCount);
+
+ IoHashStream Hasher;
+ uint64_t SizeSum = 0;
+ for (uint32_t ChunkIndex : ChunkIndexes)
+ {
+ ChunkUseCount[ChunkIndex]++;
+ const IoBuffer& ChunkBuffer = Chunks[ChunkIndex];
+ const uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex];
+ const IoHash& ChunkRawHash = Content.ChunkedContent.ChunkHashes[ChunkIndex];
+ SizeSum += ChunkSize;
+ CHECK_EQ(ChunkRawHash, IoHash::HashBuffer(ChunkBuffer));
+ Hasher.Append(ChunkBuffer);
+ }
+ CHECK_EQ(RawHash, Hasher.GetHash());
+ CHECK_EQ(SizeSum, RawSize);
+ }
+ else
+ {
+ CHECK(false);
+ }
+ }
+ else
+ {
+ CHECK(RawHash == IoHash::Zero);
+ }
+ }
+
+ for (uint32_t SequenceIndex = 0; SequenceIndex < SequenceUseCount.size(); SequenceIndex++)
+ {
+ CHECK(SequenceUseCount[SequenceIndex] > 0);
+ }
+ for (uint32_t ChunkIndex = 0; ChunkIndex < ChunkUseCount.size(); ChunkIndex++)
+ {
+ CHECK(ChunkUseCount[ChunkIndex] > 0);
+ }
+ }
+# endif // 0
+ std::vector<IoBuffer> GetChunkPayloads(std::span<const IoHash> BaseHashes,
+ std::span<const IoBuffer> BaseChunks,
+ std::span<const IoHash> OverlayHashes,
+ std::span<const IoBuffer> OverlayChunks,
+ std::span<const IoHash> WantedHashes)
+ {
+ std::vector<IoBuffer> Result;
+ Result.reserve(WantedHashes.size());
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> BaseChunkLookup = BuildHashLookup(BaseHashes);
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> OverlayChunkLookup = BuildHashLookup(OverlayHashes);
+ for (const IoHash& ChunkHash : WantedHashes)
+ {
+ if (auto It = BaseChunkLookup.find(ChunkHash); It != BaseChunkLookup.end())
+ {
+ Result.push_back(BaseChunks[It->second]);
+ }
+ else if (It = OverlayChunkLookup.find(ChunkHash); It != OverlayChunkLookup.end())
+ {
+ Result.push_back(OverlayChunks[It->second]);
+ }
+ else
+ {
+ CHECK(false);
+ }
+ }
+ return Result;
+ }
+
+ tsl::robin_map<std::string, uint32_t> BuildPathLookup(std::span<const std::filesystem::path> Paths)
+ {
+ tsl::robin_map<std::string, uint32_t> Result;
+ Result.reserve(Paths.size());
+ for (size_t Index = 0; Index < Paths.size(); Index++)
+ {
+ const std::filesystem::path& Path = Paths[Index];
+ Result.insert_or_assign(Path.generic_string(), Index);
+ }
+ return Result;
+ }
+
+ bool IncludePath(std::span<const std::string> IncludeWildcards,
+ std::span<const std::string> ExcludeWildcards,
+ const std::filesystem::path& Path)
+ {
+ const std::string PathString = Path.generic_string();
+ bool IncludePath = true;
+ if (!IncludeWildcards.empty())
+ {
+ IncludePath = false;
+ for (const std::string& IncludeWildcard : IncludeWildcards)
+ {
+ if (MatchWildcard(IncludeWildcard, PathString, /*CaseSensitive*/ false))
+ {
+ IncludePath = true;
+ break;
+ }
+ }
+ if (!IncludePath)
+ {
+ return false;
+ }
+ }
+ for (const std::string& ExcludeWildcard : ExcludeWildcards)
+ {
+ if (MatchWildcard(ExcludeWildcard, PathString, /*CaseSensitive*/ false))
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+} // namespace chunked_test_utils
+
+TEST_CASE("DeletePathsFromContent")
+{
+ FastRandom BaseRandom;
+
+ std::vector<IoBuffer> BaseChunks;
+
+ const std::string BasePaths[11] = {{"file_1"},
+ {"file_2.exe"},
+ {"file_3.txt"},
+ {"dir_1/dir1_file_1.exe"},
+ {"dir_1/dir1_file_2.pdb"},
+ {"dir_1/dir1_file_3.txt"},
+ {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"},
+ {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"},
+ {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"},
+ {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"},
+ {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}};
+ const uint64_t BaseSizes[11] =
+ {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u};
+
+ std::pair<const std::string, uint64_t> BasePathAndSizes[11] = {{BasePaths[0], BaseSizes[0]},
+ {BasePaths[1], BaseSizes[1]},
+ {BasePaths[2], BaseSizes[2]},
+ {BasePaths[3], BaseSizes[3]},
+ {BasePaths[4], BaseSizes[4]},
+ {BasePaths[5], BaseSizes[5]},
+ {BasePaths[6], BaseSizes[6]},
+ {BasePaths[7], BaseSizes[7]},
+ {BasePaths[8], BaseSizes[8]},
+ {BasePaths[9], BaseSizes[9]},
+ {BasePaths[10], BaseSizes[10]}};
+
+ ChunkedFolderContent Base = chunked_test_utils::CreateChunkedFolderContent(BaseRandom, BasePathAndSizes, 4u * 1024u, BaseChunks);
+ ValidateChunkedFolderContent(Base, {}, Base.ChunkedContent.ChunkHashes);
+
+ tsl::robin_map<IoHash, size_t, IoHash::Hasher> BaseChunksLookup;
+ for (size_t Index = 0; Index < BaseChunks.size(); Index++)
+ {
+ BaseChunksLookup.insert_or_assign(Base.ChunkedContent.ChunkHashes[Index], Index);
+ }
+
+ std::vector<std::string> IncludeWildcards;
+ std::vector<std::string> ExcludeWildcards = {"*.map*", "*.pdb*", "*optional*", "*Manifest_*", "*pakchunk10sm6*"};
+
+ std::vector<std::filesystem::path> DeletedPaths;
+ for (const std::filesystem::path& RemotePath : Base.Paths)
+ {
+ if (!chunked_test_utils::IncludePath(IncludeWildcards, ExcludeWildcards, RemotePath))
+ {
+ DeletedPaths.push_back(RemotePath);
+ }
+ }
+
+ ZEN_ASSERT(DeletedPaths.size() == 2);
+
+ ChunkedFolderContent FilteredContent = DeletePathsFromChunkedContent(Base, DeletedPaths);
+
+ std::vector<IoHash> FilteredChunks = Base.ChunkedContent.ChunkHashes;
+
+ InlineRemoveUnusedHashes(FilteredChunks, FilteredContent.ChunkedContent.ChunkHashes);
+
+ ValidateChunkedFolderContent(FilteredContent, {}, FilteredChunks);
+}
+
+#endif // ZEN_WITH_TESTS
+
} // namespace zen
diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h b/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h
index eb6ba19aa..227d877d8 100644
--- a/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h
+++ b/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h
@@ -196,4 +196,16 @@ GetFirstPathIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& Ra
return GetFirstPathIndexForSeqeuenceIndex(Lookup, SequenceIndex);
}
+struct ChunkBlockDescription;
+
+void ValidateChunkedFolderContent(const ChunkedFolderContent& Content,
+ std::span<const ChunkBlockDescription> BlockDescriptions,
+ std::span<const IoHash> LooseChunks);
+
+void InlineRemoveUnusedHashes(std::vector<IoHash>& InOutHashes, std::span<const IoHash> UsedHashes);
+
+#if ZEN_WITH_TESTS
+void chunkedcontent_forcelink();
+#endif // ZEN_WITH_TESTS
+
} // namespace zen
diff --git a/src/zenremotestore/zenremotestore.cpp b/src/zenremotestore/zenremotestore.cpp
index c019bc71d..03baf1cb8 100644
--- a/src/zenremotestore/zenremotestore.cpp
+++ b/src/zenremotestore/zenremotestore.cpp
@@ -2,6 +2,7 @@
#include <zenremotestore/zenremotestore.h>
+#include <zenremotestore/chunking/chunkedcontent.h>
#include <zenremotestore/chunking/chunkedfile.h>
#include <zenremotestore/projectstore/remoteprojectstore.h>
@@ -13,6 +14,7 @@ void
zenremotestore_forcelinktests()
{
chunkblock_forcelink();
+ chunkedcontent_forcelink();
chunkedfile_forcelink();
remoteprojectstore_forcelink();
}