diff options
| author | Dan Engelbrecht <[email protected]> | 2025-11-18 16:34:17 +0100 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2025-11-18 16:34:17 +0100 |
| commit | 2f9b8b2000b71650ff5a2b72dae3a5312e670465 (patch) | |
| tree | c58814f89b7cddc94db237f630b018e4d7982733 /src | |
| parent | retain xmake state between runs (#656) (diff) | |
| download | zen-2f9b8b2000b71650ff5a2b72dae3a5312e670465.tar.xz zen-2f9b8b2000b71650ff5a2b72dae3a5312e670465.zip | |
loose chunk filtering bug when using wildcards (#654)
* fix filtering of loose chunks when downloading with a filter
add tests
* changelog
* move InlineRemoveUnusedHashes
* remove extra braces
Diffstat (limited to 'src')
| -rw-r--r-- | src/zen/cmds/builds_cmd.cpp | 22 | ||||
| -rw-r--r-- | src/zencore/include/zencore/testutils.h | 15 | ||||
| -rw-r--r-- | src/zencore/testutils.cpp | 43 | ||||
| -rw-r--r-- | src/zenremotestore/chunking/chunkedcontent.cpp | 461 | ||||
| -rw-r--r-- | src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h | 12 | ||||
| -rw-r--r-- | src/zenremotestore/zenremotestore.cpp | 2 |
6 files changed, 529 insertions, 26 deletions
diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp index 14686a173..fcda6e809 100644 --- a/src/zen/cmds/builds_cmd.cpp +++ b/src/zen/cmds/builds_cmd.cpp @@ -1267,22 +1267,13 @@ namespace { if (!DeletedPaths.empty()) { OutRemoteContent = DeletePathsFromChunkedContent(OutRemoteContent, DeletedPaths); - - tsl::robin_set<IoHash, IoHash::Hasher> UsedLooseChunkHashes; - UsedLooseChunkHashes.insert(OutRemoteContent.RawHashes.begin(), OutRemoteContent.RawHashes.end()); - for (auto It = OutLooseChunkHashes.begin(); It != OutLooseChunkHashes.end();) - { - if (!UsedLooseChunkHashes.contains(*It)) - { - It = OutLooseChunkHashes.erase(It); - } - else - { - It++; - } - } + InlineRemoveUnusedHashes(OutLooseChunkHashes, OutRemoteContent.ChunkedContent.ChunkHashes); } } + +#if ZEN_BUILD_DEBUG + ValidateChunkedFolderContent(OutRemoteContent, OutBlockDescriptions, OutLooseChunkHashes); +#endif // ZEN_BUILD_DEBUG }; OutPartContents.resize(1); @@ -1846,6 +1837,9 @@ namespace { Options.IncludeWildcards, Options.ExcludeWildcards, LocalFolderContent); +#if ZEN_BUILD_DEBUG + ValidateChunkedFolderContent(LocalContent, {}, LocalContent.ChunkedContent.ChunkHashes); +#endif // ZEN_BUILD_DEBUG } else { diff --git a/src/zencore/include/zencore/testutils.h b/src/zencore/include/zencore/testutils.h index 45fde4eda..e2a4f8346 100644 --- a/src/zencore/include/zencore/testutils.h +++ b/src/zencore/include/zencore/testutils.h @@ -35,6 +35,21 @@ struct ScopedCurrentDirectoryChange IoBuffer CreateRandomBlob(uint64_t Size); IoBuffer CreateSemiRandomBlob(uint64_t Size); +struct FastRandom +{ + uint64_t Seed = 0x7CEBF54E45B9F5D1; + inline uint64_t Next() + { + uint64_t z = (Seed += UINT64_C(0x9E3779B97F4A7C15)); + z = (z ^ (z >> 30)) * UINT64_C(0xBF58476D1CE4E5B9); + z = (z ^ (z >> 27)) * UINT64_C(0x94D049BB133111EB); + return z ^ (z >> 31); + } +}; + +IoBuffer CreateRandomBlob(FastRandom& Random, uint64_t Size); +IoBuffer CreateSemiRandomBlob(FastRandom& Random, uint64_t Size); + struct FalseType { static const bool Enabled = false; diff --git a/src/zencore/testutils.cpp b/src/zencore/testutils.cpp index 9f50de032..5bc2841ae 100644 --- a/src/zencore/testutils.cpp +++ b/src/zencore/testutils.cpp @@ -46,22 +46,41 @@ ScopedTemporaryDirectory::~ScopedTemporaryDirectory() IoBuffer CreateRandomBlob(uint64_t Size) { - static uint64_t Seed{0x7CEBF54E45B9F5D1}; - auto Next = [](uint64_t& seed) { - uint64_t z = (seed += UINT64_C(0x9E3779B97F4A7C15)); - z = (z ^ (z >> 30)) * UINT64_C(0xBF58476D1CE4E5B9); - z = (z ^ (z >> 27)) * UINT64_C(0x94D049BB133111EB); - return z ^ (z >> 31); - }; + static FastRandom Rand{.Seed = 0x7CEBF54E45B9F5D1}; + return CreateRandomBlob(Rand, Size); +}; + +IoBuffer +CreateSemiRandomBlob(uint64_t Size) +{ + IoBuffer Result(Size); + const size_t PartCount = (Size / (1u * 1024u * 64)) + 1; + const size_t PartSize = Size / PartCount; + auto Part = CreateRandomBlob(PartSize); + auto Remain = Result.GetMutableView().CopyFrom(Part.GetView()); + while (Remain.GetSize() >= PartSize) + { + Remain = Remain.CopyFrom(Part.GetView()); + } + if (Remain.GetSize() > 0) + { + auto RemainBuffer = CreateRandomBlob(Remain.GetSize()); + Remain.CopyFrom(RemainBuffer.GetView()); + } + return Result; +}; +IoBuffer +CreateRandomBlob(FastRandom& Rand, uint64_t Size) +{ IoBuffer Data(Size); uint64_t* DataPtr = reinterpret_cast<uint64_t*>(Data.MutableData()); while (Size > sizeof(uint64_t)) { - *DataPtr++ = Next(Seed); + *DataPtr++ = Rand.Next(); Size -= sizeof(uint64_t); } - uint64_t ByteNext = Next(Seed); + uint64_t ByteNext = Rand.Next(); uint8_t* ByteDataPtr = reinterpret_cast<uint8_t*>(DataPtr); while (Size > 0) { @@ -73,12 +92,12 @@ CreateRandomBlob(uint64_t Size) }; IoBuffer -CreateSemiRandomBlob(uint64_t Size) +CreateSemiRandomBlob(FastRandom& Rand, uint64_t Size) { IoBuffer Result(Size); const size_t PartCount = (Size / (1u * 1024u * 64)) + 1; const size_t PartSize = Size / PartCount; - auto Part = CreateRandomBlob(PartSize); + auto Part = CreateRandomBlob(Rand, PartSize); auto Remain = Result.GetMutableView().CopyFrom(Part.GetView()); while (Remain.GetSize() >= PartSize) { @@ -86,7 +105,7 @@ CreateSemiRandomBlob(uint64_t Size) } if (Remain.GetSize() > 0) { - auto RemainBuffer = CreateRandomBlob(Remain.GetSize()); + auto RemainBuffer = CreateRandomBlob(Rand, Remain.GetSize()); Remain.CopyFrom(RemainBuffer.GetView()); } return Result; diff --git a/src/zenremotestore/chunking/chunkedcontent.cpp b/src/zenremotestore/chunking/chunkedcontent.cpp index af1f06cec..ac979a64b 100644 --- a/src/zenremotestore/chunking/chunkedcontent.cpp +++ b/src/zenremotestore/chunking/chunkedcontent.cpp @@ -10,14 +10,21 @@ #include <zencore/scopeguard.h> #include <zencore/timer.h> #include <zencore/trace.h> +#include <zenremotestore/chunking/chunkblock.h> #include <zenremotestore/chunking/chunkedfile.h> #include <zenremotestore/chunking/chunkingcontroller.h> +#include <zenutil/wildcard.h> ZEN_THIRD_PARTY_INCLUDES_START #include <tsl/robin_set.h> #include <gsl/gsl-lite.hpp> ZEN_THIRD_PARTY_INCLUDES_END +#if ZEN_WITH_TESTS +# include <zencore/testing.h> +# include <zencore/testutils.h> +#endif // ZEN_WITH_TESTS + namespace zen { using namespace std::literals; @@ -715,6 +722,7 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, Result.ChunkedContent.ChunkHashes.reserve(BaseChunkCount); Result.ChunkedContent.ChunkRawSizes.reserve(BaseChunkCount); + Result.ChunkedContent.ChunkOrders.reserve(BaseChunkCount); tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceRawHashIndex; RawHashToSequenceRawHashIndex.reserve(ExpectedPathCount); @@ -828,6 +836,32 @@ CompareChunkedContent(const ChunkedFolderContent& Lhs, const ChunkedFolderConten return true; }; +static tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> +BuildHashLookup(std::span<const IoHash> Hashes) +{ + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> Lookup; + Lookup.reserve(Hashes.size()); + for (uint32_t Index = 0; Index < Hashes.size(); Index++) + { + Lookup.insert_or_assign(Hashes[Index], Index); + } + return Lookup; +} + +static std::vector<uint32_t> +BuildChunkOrderOffset(std::span<const uint32_t> ChunkCounts) +{ + std::vector<uint32_t> ChunkOffsets; + ChunkOffsets.reserve(ChunkCounts.size()); + uint32_t Offset = 0; + for (uint32_t SequenceIndex = 0; SequenceIndex < ChunkCounts.size(); SequenceIndex++) + { + ChunkOffsets.push_back(Offset); + Offset += ChunkCounts[SequenceIndex]; + } + return ChunkOffsets; +} + ChunkedFolderContent ChunkFolderContent(ChunkingStatistics& Stats, WorkerThreadPool& WorkerPool, @@ -979,6 +1013,8 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) const uint32_t ChunkCount = gsl::narrow<uint32_t>(Content.ChunkedContent.ChunkHashes.size()); Result.ChunkHashToChunkIndex.reserve(ChunkCount); size_t RangeOffset = 0; + Result.ChunkSequenceLocationOffset.reserve(ChunkCount); + Result.ChunkSequenceLocationCounts.reserve(ChunkCount); for (uint32_t ChunkIndex = 0; ChunkIndex < ChunkCount; ChunkIndex++) { Result.ChunkHashToChunkIndex.insert({Content.ChunkedContent.ChunkHashes[ChunkIndex], ChunkIndex}); @@ -1021,4 +1057,429 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) return Result; } +void +ValidateChunkedFolderContent(const ChunkedFolderContent& Content, + std::span<const ChunkBlockDescription> BlockDescriptions, + std::span<const IoHash> LooseChunks) +{ + size_t TotalKnownChunkCount = LooseChunks.size(); + for (const ChunkBlockDescription& BlockDescription : BlockDescriptions) + { + TotalKnownChunkCount += BlockDescription.ChunkRawHashes.size(); + } + + tsl::robin_set<IoHash, IoHash::Hasher> KnownChunks; + KnownChunks.reserve(TotalKnownChunkCount); + KnownChunks.insert(LooseChunks.begin(), LooseChunks.end()); + for (const ChunkBlockDescription& BlockDescription : BlockDescriptions) + { + KnownChunks.insert(BlockDescription.ChunkRawHashes.begin(), BlockDescription.ChunkRawHashes.end()); + } + + std::vector<uint32_t> ChunkOrderOffsets = BuildChunkOrderOffset(Content.ChunkedContent.ChunkCounts); + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> SequenceIndexLookup = BuildHashLookup(Content.ChunkedContent.SequenceRawHashes); + std::vector<size_t> SequenceUseCount(Content.ChunkedContent.SequenceRawHashes.size(), 0); + std::vector<size_t> ChunkUseCount(Content.ChunkedContent.ChunkHashes.size(), 0); + for (size_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++) + { + const std::filesystem::path& Path = Content.Paths[PathIndex]; + if (Path.empty()) + { + throw std::runtime_error("Chunked folder content contains empty path"); + } + const uint64_t RawSize = Content.RawSizes[PathIndex]; + const IoHash RawHash = Content.RawHashes[PathIndex]; + if (RawSize > 0) + { + if (auto It = SequenceIndexLookup.find(RawHash); It != SequenceIndexLookup.end()) + { + const uint32_t SourceSequenceIndex = It->second; + SequenceUseCount[SourceSequenceIndex]++; + const uint32_t ChunkOrderOffset = ChunkOrderOffsets[SourceSequenceIndex]; + const uint32_t ChunkCount = Content.ChunkedContent.ChunkCounts[SourceSequenceIndex]; + + std::span<const uint32_t> ChunkIndexes = + std::span<const uint32_t>(Content.ChunkedContent.ChunkOrders).subspan(ChunkOrderOffset, ChunkCount); + + IoHashStream Hasher; + uint64_t SizeSum = 0; + for (uint32_t ChunkIndex : ChunkIndexes) + { + ChunkUseCount[ChunkIndex]++; + const uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + if (ChunkSize == 0) + { + throw std::runtime_error("Chunked folder content contains zero size chunk"); + } + const IoHash& ChunkRawHash = Content.ChunkedContent.ChunkHashes[ChunkIndex]; + if (ChunkRawHash == IoHash::Zero) + { + throw std::runtime_error("Chunked folder content contains zero chunk hash"); + } + if (!KnownChunks.contains(ChunkRawHash)) + { + throw std::runtime_error(fmt::format("Chunked folder content references an unknown chunk '{}'", ChunkRawHash)); + } + SizeSum += ChunkSize; + } + if (SizeSum != RawSize) + { + throw std::runtime_error( + fmt::format("Chunked folder content sequence size {} does not match expected size '{}'", SizeSum, RawSize)); + } + } + else + { + throw std::runtime_error(fmt::format("Chunked folder content references unknown sequence hash '{}'", RawHash)); + } + } + else + { + if (RawHash != IoHash::Zero) + { + throw std::runtime_error( + fmt::format("Chunked folder content references zero size sequence with non-zero hash '{}'", RawHash)); + } + } + } + + for (uint32_t SequenceIndex = 0; SequenceIndex < SequenceUseCount.size(); SequenceIndex++) + { + if (SequenceUseCount[SequenceIndex] == 0) + { + throw std::runtime_error( + fmt::format("Chunked folder has unused sequence '{}'", Content.ChunkedContent.SequenceRawHashes[SequenceIndex])); + } + } + for (uint32_t ChunkIndex = 0; ChunkIndex < ChunkUseCount.size(); ChunkIndex++) + { + if (ChunkUseCount[ChunkIndex] == 0) + { + throw std::runtime_error(fmt::format("Chunked folder has unused chunk '{}'", Content.ChunkedContent.ChunkHashes[ChunkIndex])); + } + } +} + +void +InlineRemoveUnusedHashes(std::vector<IoHash>& InOutHashes, std::span<const IoHash> UsedHashes) +{ + tsl::robin_set<IoHash, IoHash::Hasher> UsedChunkHashes; + UsedChunkHashes.reserve(UsedHashes.size()); + UsedChunkHashes.insert(UsedHashes.begin(), UsedHashes.end()); + for (auto It = InOutHashes.begin(); It != InOutHashes.end();) + { + if (!UsedChunkHashes.contains(*It)) + { + It = InOutHashes.erase(It); + } + else + { + It++; + } + } +} + +#if ZEN_WITH_TESTS + +void +chunkedcontent_forcelink() +{ +} + +namespace chunked_test_utils { + struct ChunkedFile + { + IoHash RawHash; + std::vector<IoHash> ChunkHashes; + std::vector<uint64_t> ChunkSizes; + std::vector<IoBuffer> Chunks; + }; + + ChunkedFile CreateChunkedFile(FastRandom& Random, size_t Size, size_t ChunkingSize) + { + size_t ChunkCount = (Size + (ChunkingSize - 1)) / ChunkingSize; + std::vector<IoHash> ChunkHashes; + std::vector<uint64_t> ChunkSizes; + std::vector<IoBuffer> Chunks; + ChunkHashes.reserve(ChunkCount); + ChunkSizes.reserve(ChunkCount); + + IoHashStream HashStream; + while (Size > 0) + { + size_t ChunkSize = Min(Size, ChunkingSize); + IoBuffer ChunkBuffer = CreateRandomBlob(Random, ChunkSize); + HashStream.Append(ChunkBuffer); + ChunkHashes.push_back(IoHash::HashBuffer(ChunkBuffer)); + ChunkSizes.push_back(ChunkSize); + Chunks.emplace_back(std::move(ChunkBuffer)); + Size -= ChunkSize; + } + return ChunkedFile{.RawHash = HashStream.GetHash(), + .ChunkHashes = std::move(ChunkHashes), + .ChunkSizes = std::move(ChunkSizes), + .Chunks = std::move(Chunks)}; + } + + ChunkedFolderContent CreateChunkedFolderContent(FastRandom& Random, + std::span<const std::pair<const std::string, uint64_t>> PathAndSizes, + uint64_t ChunkingSize, + std::vector<IoBuffer>& ChunkPayloads) + { + ChunkedFolderContent Result; + Result.Paths.reserve(PathAndSizes.size()); + Result.RawSizes.reserve(PathAndSizes.size()); + Result.Attributes.reserve(PathAndSizes.size()); + Result.RawHashes.reserve(PathAndSizes.size()); + ChunkPayloads.reserve(PathAndSizes.size()); + + tsl::robin_map<IoHash, uint32_t> SequenceToIndex; + tsl::robin_map<IoHash, uint32_t> ChunkToIndex; + for (size_t PathIndex = 0; PathIndex < PathAndSizes.size(); PathIndex++) + { + const std::string& Path = PathAndSizes[PathIndex].first; + const uint64_t Size = PathAndSizes[PathIndex].second; + + Result.Paths.push_back(Path); + Result.RawSizes.push_back(Size); + Result.Attributes.push_back(0); + + if (Size > 0) + { + ChunkedFile File = CreateChunkedFile(Random, Size, ChunkingSize); + Result.RawHashes.push_back(File.RawHash); + + if (auto SequenceIt = SequenceToIndex.find(File.RawHash); SequenceIt == SequenceToIndex.end()) + { + SequenceToIndex.insert_or_assign(File.RawHash, gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size())); + Result.ChunkedContent.SequenceRawHashes.push_back(File.RawHash); + Result.ChunkedContent.ChunkCounts.push_back(gsl::narrow<uint32_t>(File.ChunkHashes.size())); + for (size_t ChunkIndex = 0; ChunkIndex < File.ChunkHashes.size(); ChunkIndex++) + { + const IoHash& ChunkHash = File.ChunkHashes[ChunkIndex]; + if (auto ChunkIt = ChunkToIndex.find(ChunkHash); ChunkIt == ChunkToIndex.end()) + { + const uint32_t ChunkedContentChunkIndex = gsl::narrow<uint32_t>(Result.ChunkedContent.ChunkOrders.size()); + Result.ChunkedContent.ChunkOrders.push_back(gsl::narrow<uint32_t>(ChunkedContentChunkIndex)); + + Result.ChunkedContent.ChunkHashes.push_back(ChunkHash); + Result.ChunkedContent.ChunkRawSizes.push_back(File.ChunkSizes[ChunkIndex]); + ChunkPayloads.push_back(std::move(File.Chunks[ChunkIndex])); + } + else + { + const uint32_t ChunkedContentChunkIndex = ChunkIt->second; + Result.ChunkedContent.ChunkOrders.push_back(ChunkedContentChunkIndex); + } + } + } + } + else + { + Result.RawHashes.push_back(IoHash::Zero); + } + } + return Result; + } +# if 0 + void ValidateChunkedFolderContent(const ChunkedFolderContent& Content, std::span<const IoBuffer> Chunks) + { + std::vector<uint32_t> ChunkOrderOffsets = BuildChunkOrderOffset(Content.ChunkedContent.ChunkCounts); + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> SequenceIndexLookup = BuildHashLookup(Content.ChunkedContent.SequenceRawHashes); + std::vector<size_t> SequenceUseCount(Content.ChunkedContent.SequenceRawHashes.size(), 0); + std::vector<size_t> ChunkUseCount(Content.ChunkedContent.ChunkHashes.size(), 0); + for (size_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++) + { + const std::filesystem::path& Path = Content.Paths[PathIndex]; + ZEN_ASSERT(!Path.empty()); + const uint64_t RawSize = Content.RawSizes[PathIndex]; + const IoHash RawHash = Content.RawHashes[PathIndex]; + if (RawSize > 0) + { + if (auto It = SequenceIndexLookup.find(RawHash); It != SequenceIndexLookup.end()) + { + const uint32_t SourceSequenceIndex = It->second; + SequenceUseCount[SourceSequenceIndex]++; + const uint32_t ChunkOrderOffset = ChunkOrderOffsets[SourceSequenceIndex]; + const uint32_t ChunkCount = Content.ChunkedContent.ChunkCounts[SourceSequenceIndex]; + + std::span<const uint32_t> ChunkIndexes = + std::span<const uint32_t>(Content.ChunkedContent.ChunkOrders).subspan(ChunkOrderOffset, ChunkCount); + + IoHashStream Hasher; + uint64_t SizeSum = 0; + for (uint32_t ChunkIndex : ChunkIndexes) + { + ChunkUseCount[ChunkIndex]++; + const IoBuffer& ChunkBuffer = Chunks[ChunkIndex]; + const uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + const IoHash& ChunkRawHash = Content.ChunkedContent.ChunkHashes[ChunkIndex]; + SizeSum += ChunkSize; + CHECK_EQ(ChunkRawHash, IoHash::HashBuffer(ChunkBuffer)); + Hasher.Append(ChunkBuffer); + } + CHECK_EQ(RawHash, Hasher.GetHash()); + CHECK_EQ(SizeSum, RawSize); + } + else + { + CHECK(false); + } + } + else + { + CHECK(RawHash == IoHash::Zero); + } + } + + for (uint32_t SequenceIndex = 0; SequenceIndex < SequenceUseCount.size(); SequenceIndex++) + { + CHECK(SequenceUseCount[SequenceIndex] > 0); + } + for (uint32_t ChunkIndex = 0; ChunkIndex < ChunkUseCount.size(); ChunkIndex++) + { + CHECK(ChunkUseCount[ChunkIndex] > 0); + } + } +# endif // 0 + std::vector<IoBuffer> GetChunkPayloads(std::span<const IoHash> BaseHashes, + std::span<const IoBuffer> BaseChunks, + std::span<const IoHash> OverlayHashes, + std::span<const IoBuffer> OverlayChunks, + std::span<const IoHash> WantedHashes) + { + std::vector<IoBuffer> Result; + Result.reserve(WantedHashes.size()); + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> BaseChunkLookup = BuildHashLookup(BaseHashes); + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> OverlayChunkLookup = BuildHashLookup(OverlayHashes); + for (const IoHash& ChunkHash : WantedHashes) + { + if (auto It = BaseChunkLookup.find(ChunkHash); It != BaseChunkLookup.end()) + { + Result.push_back(BaseChunks[It->second]); + } + else if (It = OverlayChunkLookup.find(ChunkHash); It != OverlayChunkLookup.end()) + { + Result.push_back(OverlayChunks[It->second]); + } + else + { + CHECK(false); + } + } + return Result; + } + + tsl::robin_map<std::string, uint32_t> BuildPathLookup(std::span<const std::filesystem::path> Paths) + { + tsl::robin_map<std::string, uint32_t> Result; + Result.reserve(Paths.size()); + for (size_t Index = 0; Index < Paths.size(); Index++) + { + const std::filesystem::path& Path = Paths[Index]; + Result.insert_or_assign(Path.generic_string(), Index); + } + return Result; + } + + bool IncludePath(std::span<const std::string> IncludeWildcards, + std::span<const std::string> ExcludeWildcards, + const std::filesystem::path& Path) + { + const std::string PathString = Path.generic_string(); + bool IncludePath = true; + if (!IncludeWildcards.empty()) + { + IncludePath = false; + for (const std::string& IncludeWildcard : IncludeWildcards) + { + if (MatchWildcard(IncludeWildcard, PathString, /*CaseSensitive*/ false)) + { + IncludePath = true; + break; + } + } + if (!IncludePath) + { + return false; + } + } + for (const std::string& ExcludeWildcard : ExcludeWildcards) + { + if (MatchWildcard(ExcludeWildcard, PathString, /*CaseSensitive*/ false)) + { + return false; + } + } + return true; + } + +} // namespace chunked_test_utils + +TEST_CASE("DeletePathsFromContent") +{ + FastRandom BaseRandom; + + std::vector<IoBuffer> BaseChunks; + + const std::string BasePaths[11] = {{"file_1"}, + {"file_2.exe"}, + {"file_3.txt"}, + {"dir_1/dir1_file_1.exe"}, + {"dir_1/dir1_file_2.pdb"}, + {"dir_1/dir1_file_3.txt"}, + {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"}, + {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"}, + {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"}, + {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"}, + {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}}; + const uint64_t BaseSizes[11] = + {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u}; + + std::pair<const std::string, uint64_t> BasePathAndSizes[11] = {{BasePaths[0], BaseSizes[0]}, + {BasePaths[1], BaseSizes[1]}, + {BasePaths[2], BaseSizes[2]}, + {BasePaths[3], BaseSizes[3]}, + {BasePaths[4], BaseSizes[4]}, + {BasePaths[5], BaseSizes[5]}, + {BasePaths[6], BaseSizes[6]}, + {BasePaths[7], BaseSizes[7]}, + {BasePaths[8], BaseSizes[8]}, + {BasePaths[9], BaseSizes[9]}, + {BasePaths[10], BaseSizes[10]}}; + + ChunkedFolderContent Base = chunked_test_utils::CreateChunkedFolderContent(BaseRandom, BasePathAndSizes, 4u * 1024u, BaseChunks); + ValidateChunkedFolderContent(Base, {}, Base.ChunkedContent.ChunkHashes); + + tsl::robin_map<IoHash, size_t, IoHash::Hasher> BaseChunksLookup; + for (size_t Index = 0; Index < BaseChunks.size(); Index++) + { + BaseChunksLookup.insert_or_assign(Base.ChunkedContent.ChunkHashes[Index], Index); + } + + std::vector<std::string> IncludeWildcards; + std::vector<std::string> ExcludeWildcards = {"*.map*", "*.pdb*", "*optional*", "*Manifest_*", "*pakchunk10sm6*"}; + + std::vector<std::filesystem::path> DeletedPaths; + for (const std::filesystem::path& RemotePath : Base.Paths) + { + if (!chunked_test_utils::IncludePath(IncludeWildcards, ExcludeWildcards, RemotePath)) + { + DeletedPaths.push_back(RemotePath); + } + } + + ZEN_ASSERT(DeletedPaths.size() == 2); + + ChunkedFolderContent FilteredContent = DeletePathsFromChunkedContent(Base, DeletedPaths); + + std::vector<IoHash> FilteredChunks = Base.ChunkedContent.ChunkHashes; + + InlineRemoveUnusedHashes(FilteredChunks, FilteredContent.ChunkedContent.ChunkHashes); + + ValidateChunkedFolderContent(FilteredContent, {}, FilteredChunks); +} + +#endif // ZEN_WITH_TESTS + } // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h b/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h index eb6ba19aa..227d877d8 100644 --- a/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h +++ b/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h @@ -196,4 +196,16 @@ GetFirstPathIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& Ra return GetFirstPathIndexForSeqeuenceIndex(Lookup, SequenceIndex); } +struct ChunkBlockDescription; + +void ValidateChunkedFolderContent(const ChunkedFolderContent& Content, + std::span<const ChunkBlockDescription> BlockDescriptions, + std::span<const IoHash> LooseChunks); + +void InlineRemoveUnusedHashes(std::vector<IoHash>& InOutHashes, std::span<const IoHash> UsedHashes); + +#if ZEN_WITH_TESTS +void chunkedcontent_forcelink(); +#endif // ZEN_WITH_TESTS + } // namespace zen diff --git a/src/zenremotestore/zenremotestore.cpp b/src/zenremotestore/zenremotestore.cpp index c019bc71d..03baf1cb8 100644 --- a/src/zenremotestore/zenremotestore.cpp +++ b/src/zenremotestore/zenremotestore.cpp @@ -2,6 +2,7 @@ #include <zenremotestore/zenremotestore.h> +#include <zenremotestore/chunking/chunkedcontent.h> #include <zenremotestore/chunking/chunkedfile.h> #include <zenremotestore/projectstore/remoteprojectstore.h> @@ -13,6 +14,7 @@ void zenremotestore_forcelinktests() { chunkblock_forcelink(); + chunkedcontent_forcelink(); chunkedfile_forcelink(); remoteprojectstore_forcelink(); } |