// Copyright Epic Games, Inc. All Rights Reserved. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if ZEN_WITH_TESTS # include # include # include #endif // ZEN_WITH_TESTS ZEN_THIRD_PARTY_INCLUDES_START #include #include ZEN_THIRD_PARTY_INCLUDES_END #if ZEN_WITH_TESTS # include # include #endif // ZEN_WITH_TESTS namespace zen { using namespace std::literals; namespace { void AddChunkSequence(ChunkingStatistics& Stats, ChunkedContentData& InOutChunkedContent, tsl::robin_map& ChunkHashToChunkIndex, const IoHash& RawHash, std::span ChunkSequence, std::span ChunkHashes, std::span ChunkRawSizes) { ZEN_ASSERT(ChunkHashes.size() == ChunkRawSizes.size()); InOutChunkedContent.ChunkCounts.push_back(gsl::narrow(ChunkSequence.size())); for (uint32_t ChunkedSequenceIndex : ChunkSequence) { const IoHash& ChunkHash = ChunkHashes[ChunkedSequenceIndex]; if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end()) { uint32_t ChunkIndex = gsl::narrow(It->second); InOutChunkedContent.ChunkOrders.push_back(ChunkIndex); } else { uint32_t ChunkIndex = gsl::narrow(InOutChunkedContent.ChunkHashes.size()); ChunkHashToChunkIndex.insert_or_assign(ChunkHash, ChunkIndex); InOutChunkedContent.ChunkHashes.push_back(ChunkHash); InOutChunkedContent.ChunkRawSizes.push_back(ChunkRawSizes[ChunkedSequenceIndex]); InOutChunkedContent.ChunkOrders.push_back(ChunkIndex); Stats.UniqueChunksFound++; Stats.UniqueBytesFound += ChunkRawSizes[ChunkedSequenceIndex]; } } InOutChunkedContent.SequenceRawHashes.push_back(RawHash); Stats.UniqueSequencesFound++; } void AddChunkSequence(ChunkingStatistics& Stats, ChunkedContentData& InOutChunkedContent, tsl::robin_map& ChunkHashToChunkIndex, const IoHash& RawHash, const uint64_t RawSize) { InOutChunkedContent.ChunkCounts.push_back(1); if (auto It = ChunkHashToChunkIndex.find(RawHash); It != ChunkHashToChunkIndex.end()) { uint32_t ChunkIndex = gsl::narrow(It->second); InOutChunkedContent.ChunkOrders.push_back(ChunkIndex); } else { uint32_t ChunkIndex = gsl::narrow(InOutChunkedContent.ChunkHashes.size()); ChunkHashToChunkIndex.insert_or_assign(RawHash, ChunkIndex); InOutChunkedContent.ChunkHashes.push_back(RawHash); InOutChunkedContent.ChunkRawSizes.push_back(RawSize); InOutChunkedContent.ChunkOrders.push_back(ChunkIndex); Stats.UniqueChunksFound++; Stats.UniqueBytesFound += RawSize; } InOutChunkedContent.SequenceRawHashes.push_back(RawHash); Stats.UniqueSequencesFound++; } IoHash HashOneFile(ChunkingStatistics& Stats, const ChunkingController& InChunkingController, ChunkingCache& InChunkingCache, std::span ModificationTicks, ChunkedFolderContent& OutChunkedContent, tsl::robin_map& ChunkHashToChunkIndex, tsl::robin_map& RawHashToSequenceRawHashIndex, RwLock& Lock, const std::filesystem::path& FolderPath, uint32_t PathIndex, std::atomic& AbortFlag) { ZEN_TRACE_CPU("HashOneFile"); const std::filesystem::path& Path = OutChunkedContent.Paths[PathIndex]; const uint64_t RawSize = OutChunkedContent.RawSizes[PathIndex]; const uint64_t ModificationTick = ModificationTicks[PathIndex]; if (RawSize == 0) { return IoHash::Zero; } else { std::filesystem::path FullPath = FolderPath / Path; FullPath.make_preferred(); ChunkedInfoWithSource Chunked; if (!InChunkingCache.GetCachedFile(FullPath, RawSize, ModificationTick, Chunked)) { const bool DidChunking = InChunkingController.ProcessFile(FullPath, RawSize, Chunked, Stats.BytesHashed, AbortFlag); if (!DidChunking) { ZEN_TRACE_CPU("HashOnly"); IoBuffer Buffer = IoBufferBuilder::MakeFromFile(FullPath); if (Buffer.GetSize() != RawSize) { throw std::runtime_error(fmt::format("Failed opening file '{}' for hashing", FolderPath / Path)); } Chunked.Info.RawSize = RawSize; Chunked.Info.RawHash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed); } if (InChunkingCache.PutCachedFile(FullPath, ModificationTick, Chunked)) { Stats.FilesStoredInCache++; Stats.ChunksStoredInCache += Chunked.Info.ChunkSequence.empty() ? 1 : Chunked.Info.ChunkHashes.size(); Stats.BytesStoredInCache += RawSize; } } else { Stats.FilesFoundInCache++; Stats.ChunksFoundInCache += Chunked.Info.ChunkSequence.empty() ? 1 : Chunked.Info.ChunkHashes.size(); Stats.BytesFoundInCache += RawSize; } Lock.WithExclusiveLock([&]() { if (!RawHashToSequenceRawHashIndex.contains(Chunked.Info.RawHash)) { RawHashToSequenceRawHashIndex.insert( {Chunked.Info.RawHash, gsl::narrow(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())}); if (Chunked.Info.ChunkSequence.empty()) { AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Chunked.Info.RawHash, RawSize); Stats.UniqueSequencesFound++; } else { std::vector ChunkSizes; ChunkSizes.reserve(Chunked.ChunkSources.size()); for (const ChunkSource& Source : Chunked.ChunkSources) { ChunkSizes.push_back(Source.Size); } OutChunkedContent.ChunkedContent.ChunkOrders.reserve(OutChunkedContent.ChunkedContent.ChunkOrders.size() + Chunked.Info.ChunkSequence.size()); AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Chunked.Info.RawHash, Chunked.Info.ChunkSequence, Chunked.Info.ChunkHashes, ChunkSizes); } Stats.UniqueSequencesFound++; } }); Stats.FilesChunked++; return Chunked.Info.RawHash; } } std::string PathCompareString(const std::filesystem::path& Path) { return ToLower(Path.generic_string()); } } // namespace std::string_view FolderContentSourcePlatformNames[(size_t)SourcePlatform::_Count] = {"Windows"sv, "Linux"sv, "MacOS"sv}; std::string_view ToString(SourcePlatform Platform) { return FolderContentSourcePlatformNames[(size_t)Platform]; } SourcePlatform FromString(std::string_view Platform, SourcePlatform Default) { for (size_t Index = 0; Index < (size_t)SourcePlatform::_Count; Index++) { if (Platform == FolderContentSourcePlatformNames[Index]) { return (SourcePlatform)Index; } } return Default; } SourcePlatform GetSourceCurrentPlatform() { #if ZEN_PLATFORM_WINDOWS return SourcePlatform::Windows; #endif #if ZEN_PLATFORM_MAC return SourcePlatform::MacOS; #endif #if ZEN_PLATFORM_LINUX return SourcePlatform::Linux; #endif } bool FolderContent::AreFileAttributesEqual(const uint32_t Lhs, const uint32_t Rhs) { #if ZEN_PLATFORM_WINDOWS return (Lhs & 0xff) == (Rhs & 0xff); #endif #if ZEN_PLATFORM_MAC return Lhs == Rhs; #endif #if ZEN_PLATFORM_LINUX return Lhs == Rhs; #endif } bool FolderContent::operator==(const FolderContent& Rhs) const { if ((Platform == Rhs.Platform) && (RawSizes == Rhs.RawSizes) && (Attributes == Rhs.Attributes) && (ModificationTicks == Rhs.ModificationTicks) && (Paths.size() == Rhs.Paths.size())) { size_t PathCount = 0; for (size_t PathIndex = 0; PathIndex < PathCount; PathIndex++) { if (Paths[PathIndex].generic_string() != Rhs.Paths[PathIndex].generic_string()) { return false; } } return true; } return false; } bool FolderContent::AreKnownFilesEqual(const FolderContent& Rhs) const { ZEN_TRACE_CPU("FolderContent::AreKnownFilesEqual"); tsl::robin_map RhsPathToIndex; const size_t RhsPathCount = Rhs.Paths.size(); RhsPathToIndex.reserve(RhsPathCount); for (size_t RhsPathIndex = 0; RhsPathIndex < RhsPathCount; RhsPathIndex++) { RhsPathToIndex.insert({Rhs.Paths[RhsPathIndex].generic_string(), RhsPathIndex}); } const size_t PathCount = Paths.size(); for (size_t PathIndex = 0; PathIndex < PathCount; PathIndex++) { if (auto It = RhsPathToIndex.find(Paths[PathIndex].generic_string()); It != RhsPathToIndex.end()) { const size_t RhsPathIndex = It->second; if ((RawSizes[PathIndex] != Rhs.RawSizes[RhsPathIndex]) || (!AreFileAttributesEqual(Attributes[PathIndex], Rhs.Attributes[RhsPathIndex])) || (ModificationTicks[PathIndex] != Rhs.ModificationTicks[RhsPathIndex])) { return false; } } else { return false; } } return true; } void FolderContent::UpdateState(const FolderContent& Rhs, std::vector& OutPathIndexesOufOfDate) { ZEN_TRACE_CPU("FolderContent::UpdateState"); tsl::robin_map RhsPathToIndex; const uint32_t RhsPathCount = gsl::narrow(Rhs.Paths.size()); RhsPathToIndex.reserve(RhsPathCount); for (uint32_t RhsPathIndex = 0; RhsPathIndex < RhsPathCount; RhsPathIndex++) { RhsPathToIndex.insert({Rhs.Paths[RhsPathIndex].generic_string(), RhsPathIndex}); } uint32_t PathCount = gsl::narrow(Paths.size()); for (uint32_t PathIndex = 0; PathIndex < PathCount;) { if (auto It = RhsPathToIndex.find(Paths[PathIndex].generic_string()); It != RhsPathToIndex.end()) { const uint32_t RhsPathIndex = It->second; if ((RawSizes[PathIndex] != Rhs.RawSizes[RhsPathIndex]) || (ModificationTicks[PathIndex] != Rhs.ModificationTicks[RhsPathIndex])) { RawSizes[PathIndex] = Rhs.RawSizes[RhsPathIndex]; ModificationTicks[PathIndex] = Rhs.ModificationTicks[RhsPathIndex]; OutPathIndexesOufOfDate.push_back(PathIndex); } Attributes[PathIndex] = Rhs.Attributes[RhsPathIndex]; PathIndex++; } else { Paths.erase(Paths.begin() + PathIndex); RawSizes.erase(RawSizes.begin() + PathIndex); Attributes.erase(Attributes.begin() + PathIndex); ModificationTicks.erase(ModificationTicks.begin() + PathIndex); PathCount--; } } } FolderContent GetUpdatedContent(const FolderContent& Old, const FolderContent& New, std::vector& OutDeletedPaths) { ZEN_TRACE_CPU("FolderContent::GetUpdatedContent"); const uint32_t NewPathCount = gsl::narrow(New.Paths.size()); FolderContent Result = {.Platform = Old.Platform}; Result.Paths.reserve(NewPathCount); Result.RawSizes.reserve(NewPathCount); Result.Attributes.reserve(NewPathCount); Result.ModificationTicks.reserve(NewPathCount); tsl::robin_map NewPathToIndex; NewPathToIndex.reserve(NewPathCount); for (uint32_t NewPathIndex = 0; NewPathIndex < NewPathCount; NewPathIndex++) { NewPathToIndex.insert({New.Paths[NewPathIndex].generic_string(), NewPathIndex}); } uint32_t OldPathCount = gsl::narrow(Old.Paths.size()); for (uint32_t OldPathIndex = 0; OldPathIndex < OldPathCount; OldPathIndex++) { if (auto It = NewPathToIndex.find(Old.Paths[OldPathIndex].generic_string()); It != NewPathToIndex.end()) { const uint32_t NewPathIndex = It->second; if ((Old.RawSizes[OldPathIndex] != New.RawSizes[NewPathIndex]) || (Old.ModificationTicks[OldPathIndex] != New.ModificationTicks[NewPathIndex])) { Result.Paths.push_back(New.Paths[NewPathIndex]); Result.RawSizes.push_back(New.RawSizes[NewPathIndex]); Result.Attributes.push_back(New.Attributes[NewPathIndex]); Result.ModificationTicks.push_back(New.ModificationTicks[NewPathIndex]); } } else { OutDeletedPaths.push_back(Old.Paths[OldPathIndex]); } } return Result; } void SaveFolderContentToCompactBinary(const FolderContent& Content, CbWriter& Output) { ZEN_TRACE_CPU("SaveFolderContentToCompactBinary"); Output.AddString("platform"sv, ToString(Content.Platform)); compactbinary_helpers::WriteArray(Content.Paths, "paths"sv, Output); compactbinary_helpers::WriteArray(Content.RawSizes, "rawSizes"sv, Output); compactbinary_helpers::WriteArray(Content.Attributes, "attributes"sv, Output); compactbinary_helpers::WriteArray(Content.ModificationTicks, "modificationTimes"sv, Output); } FolderContent LoadFolderContentToCompactBinary(CbObjectView Input) { ZEN_TRACE_CPU("LoadFolderContentToCompactBinary"); FolderContent Content; Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform()); Content.Paths = compactbinary_helpers::ReadArray("paths"sv, Input); Content.RawSizes = compactbinary_helpers::ReadArray("rawSizes"sv, Input); Content.Attributes = compactbinary_helpers::ReadArray("attributes"sv, Input); Content.ModificationTicks = compactbinary_helpers::ReadArray("modificationTimes"sv, Input); return Content; } FolderContent GetFolderContent(GetFolderContentStatistics& Stats, const std::filesystem::path& RootPath, std::function&& AcceptDirectory, std::function&& AcceptFile, WorkerThreadPool& WorkerPool, int32_t UpdateIntervalMS, std::function&& UpdateCallback, std::atomic& AbortFlag) { ZEN_TRACE_CPU("GetFolderContent"); Stopwatch Timer; auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); FolderContent Content; struct AsyncVisitor : public GetDirectoryContentVisitor { AsyncVisitor(GetFolderContentStatistics& Stats, std::atomic& AbortFlag, FolderContent& Content, std::function&& AcceptDirectory, std::function&& AcceptFile) : m_Stats(Stats) , m_AbortFlag(AbortFlag) , m_FoundContent(Content) , m_AcceptDirectory(std::move(AcceptDirectory)) , m_AcceptFile(std::move(AcceptFile)) { } virtual void AsyncVisitDirectory(const std::filesystem::path& RelativeRoot, DirectoryContent&& Content) override { if (!m_AbortFlag) { m_Stats.FoundFileCount += Content.FileNames.size(); for (uint64_t FileSize : Content.FileSizes) { m_Stats.FoundFileByteCount += FileSize; } std::string RelativeDirectoryPath = RelativeRoot.generic_string(); if (m_AcceptDirectory(RelativeDirectoryPath)) { std::vector Paths; std::vector RawSizes; std::vector Attributes; std::vector ModificatonTicks; Paths.reserve(Content.FileNames.size()); RawSizes.reserve(Content.FileNames.size()); Attributes.reserve(Content.FileNames.size()); ModificatonTicks.reserve(Content.FileModificationTicks.size()); for (size_t FileIndex = 0; FileIndex < Content.FileNames.size(); FileIndex++) { const std::filesystem::path& FileName = Content.FileNames[FileIndex]; std::string RelativePath = (RelativeRoot / FileName).generic_string(); std::replace(RelativePath.begin(), RelativePath.end(), '\\', '/'); if (m_AcceptFile(RelativePath, Content.FileSizes[FileIndex], Content.FileAttributes[FileIndex])) { Paths.emplace_back(std::move(RelativePath)); RawSizes.emplace_back(Content.FileSizes[FileIndex]); Attributes.emplace_back(Content.FileAttributes[FileIndex]); ModificatonTicks.emplace_back(Content.FileModificationTicks[FileIndex]); m_Stats.AcceptedFileCount++; m_Stats.AcceptedFileByteCount += Content.FileSizes[FileIndex]; } } m_Lock.WithExclusiveLock([&]() { m_FoundContent.Paths.insert(m_FoundContent.Paths.end(), Paths.begin(), Paths.end()); m_FoundContent.RawSizes.insert(m_FoundContent.RawSizes.end(), RawSizes.begin(), RawSizes.end()); m_FoundContent.Attributes.insert(m_FoundContent.Attributes.end(), Attributes.begin(), Attributes.end()); m_FoundContent.ModificationTicks.insert(m_FoundContent.ModificationTicks.end(), ModificatonTicks.begin(), ModificatonTicks.end()); }); } } } GetFolderContentStatistics& m_Stats; std::atomic& m_AbortFlag; RwLock m_Lock; FolderContent& m_FoundContent; std::function m_AcceptDirectory; std::function m_AcceptFile; } Visitor(Stats, AbortFlag, Content, std::move(AcceptDirectory), std::move(AcceptFile)); Latch PendingWork(1); GetDirectoryContent(RootPath, DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::Recursive | DirectoryContentFlags::IncludeFileSizes | DirectoryContentFlags::IncludeAttributes | DirectoryContentFlags::IncludeModificationTick, Visitor, WorkerPool, PendingWork); PendingWork.CountDown(); while (!PendingWork.Wait(UpdateIntervalMS)) { UpdateCallback(AbortFlag.load(), PendingWork.Remaining()); } std::vector Order; size_t PathCount = Content.Paths.size(); Order.resize(Content.Paths.size()); std::vector Parents; Parents.reserve(PathCount); std::vector Filenames; Filenames.reserve(PathCount); for (size_t OrderIndex = 0; OrderIndex < PathCount; OrderIndex++) { Order[OrderIndex] = OrderIndex; Parents.emplace_back(Content.Paths[OrderIndex].parent_path().generic_string()); Filenames.emplace_back(Content.Paths[OrderIndex].filename().generic_string()); } std::sort(Order.begin(), Order.end(), [&Parents, &Filenames](size_t Lhs, size_t Rhs) { const std::string& LhsParent = Parents[Lhs]; const std::string& RhsParent = Parents[Rhs]; if (LhsParent < RhsParent) { return true; } else if (LhsParent > RhsParent) { return false; } return Filenames[Lhs] < Filenames[Rhs]; }); FolderContent OrderedContent; OrderedContent.Paths.reserve(PathCount); OrderedContent.RawSizes.reserve(PathCount); OrderedContent.Attributes.reserve(PathCount); OrderedContent.ModificationTicks.reserve(PathCount); for (size_t OrderIndex : Order) { OrderedContent.Paths.emplace_back(std::move(Content.Paths[OrderIndex])); OrderedContent.RawSizes.emplace_back(Content.RawSizes[OrderIndex]); OrderedContent.Attributes.emplace_back(Content.Attributes[OrderIndex]); OrderedContent.ModificationTicks.emplace_back(Content.ModificationTicks[OrderIndex]); } return OrderedContent; } FolderContent GetValidFolderContent(WorkerThreadPool& WorkerPool, GetFolderContentStatistics& FolderScanStats, const std::filesystem::path& Path, std::span PathsToCheck, std::function&& ProgressCallback, uint32_t ProgressUpdateDelayMS, std::atomic& AbortFlag, std::atomic& PauseFlag) { ZEN_TRACE_CPU("GetValidFolderContent"); FolderContent Result; const uint32_t PathCount = gsl::narrow(PathsToCheck.size()); Result.Paths.resize(PathCount); Result.RawSizes.resize(PathCount); Result.Attributes.resize(PathCount); Result.ModificationTicks.resize(PathCount); { Stopwatch Timer; auto _ = MakeGuard([&FolderScanStats, &Timer]() { FolderScanStats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); tsl::robin_map PathToPathIndex; PathToPathIndex.reserve(PathsToCheck.size()); std::vector DirectoriesToScan; { tsl::robin_set DirectoriesFound; for (size_t PathIndex = 0; PathIndex < PathsToCheck.size(); PathIndex++) { const std::filesystem::path PathToCheck = (Path / PathsToCheck[PathIndex]); const std::string LookupPath = PathToCheck.generic_string(); PathToPathIndex.insert_or_assign(LookupPath, PathIndex); std::filesystem::path ParentDirectoryPath = PathToCheck.parent_path(); const std::string Directory = ParentDirectoryPath.generic_string(); if (DirectoriesFound.insert(Directory).second) { DirectoriesToScan.push_back(ParentDirectoryPath.make_preferred()); } } } ParallelWork Work(AbortFlag, PauseFlag, ProgressCallback ? WorkerThreadPool::EMode::EnableBacklog : WorkerThreadPool::EMode::DisableBacklog); std::atomic CompletedDirectoryCount = 0; for (size_t DirectoryIndex = 0; DirectoryIndex < DirectoriesToScan.size(); DirectoryIndex++) { Work.ScheduleWork( WorkerPool, [&DirectoriesToScan, DirectoryIndex, &Result, &FolderScanStats, &PathsToCheck, &PathToPathIndex, &CompletedDirectoryCount]( std::atomic& AbortFlag) { if (!AbortFlag) { ZEN_TRACE_CPU("GetValidFolderContent_ScanDirectory"); const std::filesystem::path ParentDirectoryPath = DirectoriesToScan[DirectoryIndex]; try { if (IsDir(ParentDirectoryPath)) { DirectoryContent DirContent; GetDirectoryContent(ParentDirectoryPath, DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeFileSizes | DirectoryContentFlags::IncludeModificationTick | DirectoryContentFlags::IncludeAttributes, DirContent); for (size_t FoundIndex = 0; FoundIndex < DirContent.Files.size(); FoundIndex++) { const std::filesystem::path& FoundPath = DirContent.Files[FoundIndex]; if (auto It = PathToPathIndex.find(FoundPath.generic_string()); It != PathToPathIndex.end()) { const size_t PathIndex = It->second; Result.Paths[PathIndex] = PathsToCheck[PathIndex]; Result.RawSizes[PathIndex] = DirContent.FileSizes[FoundIndex]; Result.ModificationTicks[PathIndex] = DirContent.FileModificationTicks[FoundIndex]; Result.Attributes[PathIndex] = DirContent.FileAttributes[FoundIndex]; FolderScanStats.FoundFileCount++; FolderScanStats.FoundFileByteCount += Result.RawSizes[PathIndex]; FolderScanStats.AcceptedFileCount++; FolderScanStats.AcceptedFileByteCount += Result.RawSizes[PathIndex]; } } } } catch (const std::exception& Ex) { ZEN_WARN("Failed checking content of folder '{}', reason: {}", ParentDirectoryPath, Ex.what()); } } CompletedDirectoryCount++; }); } Work.Wait(ProgressUpdateDelayMS, [&](bool, bool, ptrdiff_t) { if (ProgressCallback) { ProgressCallback(DirectoriesToScan.size(), CompletedDirectoryCount.load()); } }); } uint32_t WritePathIndex = 0; for (uint32_t ReadPathIndex = 0; ReadPathIndex < PathCount; ReadPathIndex++) { if (!Result.Paths[ReadPathIndex].empty()) { if (WritePathIndex < ReadPathIndex) { Result.Paths[WritePathIndex] = std::move(Result.Paths[ReadPathIndex]); Result.RawSizes[WritePathIndex] = Result.RawSizes[ReadPathIndex]; Result.Attributes[WritePathIndex] = Result.Attributes[ReadPathIndex]; Result.ModificationTicks[WritePathIndex] = Result.ModificationTicks[ReadPathIndex]; } WritePathIndex++; } } Result.Paths.resize(WritePathIndex); Result.RawSizes.resize(WritePathIndex); Result.Attributes.resize(WritePathIndex); Result.ModificationTicks.resize(WritePathIndex); return Result; } void SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Content, CbWriter& Output) { ZEN_TRACE_CPU("SaveChunkedFolderContentToCompactBinary"); Output.AddString("platform"sv, ToString(Content.Platform)); compactbinary_helpers::WriteArray(Content.Paths, "paths"sv, Output); compactbinary_helpers::WriteArray(Content.RawSizes, "rawSizes"sv, Output); compactbinary_helpers::WriteArray(Content.Attributes, "attributes"sv, Output); compactbinary_helpers::WriteArray(Content.RawHashes, "rawHashes"sv, Output); Output.BeginObject("chunkedContent"); compactbinary_helpers::WriteArray(Content.ChunkedContent.SequenceRawHashes, "sequenceRawHashes"sv, Output); compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkCounts, "chunkCounts"sv, Output); compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkOrders, "chunkOrders"sv, Output); compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkHashes, "chunkHashes"sv, Output); compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkRawSizes, "chunkRawSizes"sv, Output); Output.EndObject(); // chunkedContent } ChunkedFolderContent LoadChunkedFolderContentFromCompactBinary(CbObjectView Input) { ZEN_TRACE_CPU("LoadChunkedFolderContentFromCompactBinary"); ChunkedFolderContent Content; Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform()); Content.Paths = compactbinary_helpers::ReadArray("paths"sv, Input); Content.RawSizes = compactbinary_helpers::ReadArray("rawSizes"sv, Input); Content.Attributes = compactbinary_helpers::ReadArray("attributes"sv, Input); Content.RawHashes = compactbinary_helpers::ReadArray("rawHashes"sv, Input); CbObjectView ChunkedContentView = Input["chunkedContent"sv].AsObjectView(); Content.ChunkedContent.SequenceRawHashes = compactbinary_helpers::ReadArray("sequenceRawHashes"sv, ChunkedContentView); Content.ChunkedContent.ChunkCounts = compactbinary_helpers::ReadArray("chunkCounts"sv, ChunkedContentView); Content.ChunkedContent.ChunkOrders = compactbinary_helpers::ReadArray("chunkOrders"sv, ChunkedContentView); Content.ChunkedContent.ChunkHashes = compactbinary_helpers::ReadArray("chunkHashes"sv, ChunkedContentView); Content.ChunkedContent.ChunkRawSizes = compactbinary_helpers::ReadArray("chunkRawSizes"sv, ChunkedContentView); return Content; } ChunkedFolderContent MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span Overlays) { ZEN_TRACE_CPU("MergeChunkedFolderContents"); ZEN_ASSERT(!Overlays.empty()); ChunkedFolderContent Result; const size_t BasePathCount = Base.Paths.size(); Result.Paths.reserve(BasePathCount); Result.RawSizes.reserve(BasePathCount); Result.Attributes.reserve(BasePathCount); Result.RawHashes.reserve(BasePathCount); const size_t BaseChunkCount = Base.ChunkedContent.ChunkHashes.size(); Result.ChunkedContent.SequenceRawHashes.reserve(Base.ChunkedContent.SequenceRawHashes.size()); Result.ChunkedContent.ChunkCounts.reserve(BaseChunkCount); Result.ChunkedContent.ChunkHashes.reserve(BaseChunkCount); Result.ChunkedContent.ChunkRawSizes.reserve(BaseChunkCount); Result.ChunkedContent.ChunkOrders.reserve(Base.ChunkedContent.ChunkOrders.size()); tsl::robin_map GenericPathToActualPath; for (const std::filesystem::path& Path : Base.Paths) { GenericPathToActualPath.insert({PathCompareString(Path), Path}); } for (const ChunkedFolderContent& Overlay : Overlays) { for (const std::filesystem::path& Path : Overlay.Paths) { GenericPathToActualPath.insert({PathCompareString(Path), Path}); } } tsl::robin_map RawHashToSequenceRawHashIndex; auto BuildOverlayPaths = [](std::span Overlays) -> tsl::robin_set { tsl::robin_set Result; for (const ChunkedFolderContent& OverlayContent : Overlays) { for (const std::filesystem::path& Path : OverlayContent.Paths) { Result.insert(PathCompareString(Path)); } } return Result; }; auto AddContent = [&BuildOverlayPaths](ChunkedFolderContent& Result, const ChunkedFolderContent& OverlayContent, tsl::robin_map& ChunkHashToChunkIndex, tsl::robin_map& RawHashToSequenceRawHashIndex, const tsl::robin_map& GenericPathToActualPath, std::span Overlays) { const ChunkedContentLookup OverlayLookup = BuildChunkedContentLookup(OverlayContent); tsl::robin_set BaseOverlayPaths = BuildOverlayPaths(Overlays); struct SequenceToAdd { IoHash RawHash; std::span OriginalChunkOrder; }; std::vector SequencesToAdd; size_t ChunkCountsAdded = 0; size_t ChunkOrdersAdded = 0; ChunkingStatistics Stats; for (uint32_t PathIndex = 0; PathIndex < OverlayContent.Paths.size(); PathIndex++) { std::string GenericPath = PathCompareString(OverlayContent.Paths[PathIndex]); if (!BaseOverlayPaths.contains(GenericPath)) { // This asset will not be overridden by a later layer - add it const std::filesystem::path OriginalPath = GenericPathToActualPath.at(GenericPath); Result.Paths.push_back(OriginalPath); const IoHash& RawHash = OverlayContent.RawHashes[PathIndex]; Result.RawSizes.push_back(OverlayContent.RawSizes[PathIndex]); Result.Attributes.push_back(OverlayContent.Attributes[PathIndex]); Result.RawHashes.push_back(RawHash); if (OverlayContent.RawSizes[PathIndex] > 0) { if (!RawHashToSequenceRawHashIndex.contains(RawHash)) { RawHashToSequenceRawHashIndex.insert( {RawHash, gsl::narrow(Result.ChunkedContent.SequenceRawHashes.size())}); const uint32_t SequenceRawHashIndex = OverlayLookup.RawHashToSequenceIndex.at(RawHash); const uint32_t OrderIndexOffset = OverlayLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex]; const uint32_t ChunkCount = OverlayContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; std::span OriginalChunkOrder = std::span(OverlayContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); SequencesToAdd.push_back(SequenceToAdd{.RawHash = RawHash, .OriginalChunkOrder = OriginalChunkOrder}); ChunkCountsAdded++; ChunkOrdersAdded += OriginalChunkOrder.size(); Stats.UniqueSequencesFound++; } } } } Result.ChunkedContent.ChunkCounts.reserve(Result.ChunkedContent.ChunkCounts.size() + ChunkCountsAdded); Result.ChunkedContent.ChunkOrders.reserve(Result.ChunkedContent.ChunkOrders.size() + ChunkOrdersAdded); for (SequenceToAdd& NewSequence : SequencesToAdd) { AddChunkSequence(Stats, Result.ChunkedContent, ChunkHashToChunkIndex, NewSequence.RawHash, NewSequence.OriginalChunkOrder, OverlayContent.ChunkedContent.ChunkHashes, OverlayContent.ChunkedContent.ChunkRawSizes); } }; tsl::robin_map MergedChunkHashToChunkIndex; AddContent(Result, Base, MergedChunkHashToChunkIndex, RawHashToSequenceRawHashIndex, GenericPathToActualPath, Overlays); for (uint32_t OverlayIndex = 0; OverlayIndex < Overlays.size(); OverlayIndex++) { AddContent(Result, Overlays[OverlayIndex], MergedChunkHashToChunkIndex, RawHashToSequenceRawHashIndex, GenericPathToActualPath, Overlays.subspan(OverlayIndex + 1)); } return Result; } ChunkedFolderContent DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, const tsl::robin_map& RawHashToSequenceIndex, std::vector SequenceIndexChunkOrderOffset, std::span DeletedPaths) { ZEN_TRACE_CPU("DeletePathsFromChunkedContent"); ZEN_ASSERT(DeletedPaths.size() <= BaseContent.Paths.size()); ChunkedFolderContent Result = {.Platform = BaseContent.Platform}; if (DeletedPaths.size() < BaseContent.Paths.size()) { tsl::robin_set DeletedPathSet; DeletedPathSet.reserve(DeletedPaths.size()); for (const std::filesystem::path& DeletedPath : DeletedPaths) { DeletedPathSet.insert(PathCompareString(DeletedPath)); } const size_t BaseChunkCount = BaseContent.ChunkedContent.ChunkHashes.size(); std::vector NewChunkIndexes(BaseChunkCount, (uint32_t)-1); const size_t ExpectedPathCount = BaseContent.Paths.size() - DeletedPaths.size(); Result.Paths.reserve(ExpectedPathCount); Result.RawSizes.reserve(ExpectedPathCount); Result.Attributes.reserve(ExpectedPathCount); Result.RawHashes.reserve(ExpectedPathCount); Result.ChunkedContent.ChunkHashes.reserve(BaseChunkCount); Result.ChunkedContent.ChunkRawSizes.reserve(BaseChunkCount); Result.ChunkedContent.ChunkOrders.reserve(BaseChunkCount); tsl::robin_map RawHashToSequenceRawHashIndex; RawHashToSequenceRawHashIndex.reserve(ExpectedPathCount); Result.ChunkedContent.SequenceRawHashes.reserve(ExpectedPathCount); for (uint32_t PathIndex = 0; PathIndex < BaseContent.Paths.size(); PathIndex++) { const std::filesystem::path& Path = BaseContent.Paths[PathIndex]; if (!DeletedPathSet.contains(PathCompareString(Path))) { const IoHash& RawHash = BaseContent.RawHashes[PathIndex]; const uint64_t RawSize = BaseContent.RawSizes[PathIndex]; Result.Paths.push_back(Path); Result.RawSizes.push_back(RawSize); Result.Attributes.push_back(BaseContent.Attributes[PathIndex]); Result.RawHashes.push_back(RawHash); if (RawSize > 0) { if (!RawHashToSequenceRawHashIndex.contains(RawHash)) { RawHashToSequenceRawHashIndex.insert( {RawHash, gsl::narrow(Result.ChunkedContent.SequenceRawHashes.size())}); const uint32_t SequenceRawHashIndex = RawHashToSequenceIndex.at(RawHash); const uint32_t OrderIndexOffset = SequenceIndexChunkOrderOffset[SequenceRawHashIndex]; const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; std::span OriginalChunkOrder = std::span(BaseContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); Result.ChunkedContent.ChunkCounts.push_back(gsl::narrow(OriginalChunkOrder.size())); for (uint32_t OldChunkIndex : OriginalChunkOrder) { if (uint32_t FoundChunkIndex = NewChunkIndexes[OldChunkIndex]; FoundChunkIndex != (uint32_t)-1) { Result.ChunkedContent.ChunkOrders.push_back(FoundChunkIndex); } else { const uint32_t NewChunkIndex = gsl::narrow(Result.ChunkedContent.ChunkHashes.size()); NewChunkIndexes[OldChunkIndex] = NewChunkIndex; const IoHash& ChunkHash = BaseContent.ChunkedContent.ChunkHashes[OldChunkIndex]; const uint64_t OldChunkSize = BaseContent.ChunkedContent.ChunkRawSizes[OldChunkIndex]; Result.ChunkedContent.ChunkHashes.push_back(ChunkHash); Result.ChunkedContent.ChunkRawSizes.push_back(OldChunkSize); Result.ChunkedContent.ChunkOrders.push_back(NewChunkIndex); } } Result.ChunkedContent.SequenceRawHashes.push_back(RawHash); } } } } } return Result; } ChunkedFolderContent DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span DeletedPaths) { ZEN_TRACE_CPU("DeletePathsFromChunkedContent"); ZEN_ASSERT(DeletedPaths.size() <= BaseContent.Paths.size()); if (DeletedPaths.size() == BaseContent.Paths.size()) { return {}; } tsl::robin_map BaseSequenceHashToSequenceIndex = BuildHashLookup(BaseContent.ChunkedContent.SequenceRawHashes); std::vector BaseSequenceChunkOrderOffset = BuildChunkOrderOffset(BaseContent.ChunkedContent.ChunkCounts); return DeletePathsFromChunkedContent(BaseContent, BaseSequenceHashToSequenceIndex, BaseSequenceChunkOrderOffset, DeletedPaths); } bool CompareChunkedContent(const ChunkedFolderContent& Lhs, const ChunkedFolderContent& Rhs) { tsl::robin_map RhsPathToIndex; const size_t RhsPathCount = Rhs.Paths.size(); RhsPathToIndex.reserve(RhsPathCount); for (size_t RhsPathIndex = 0; RhsPathIndex < RhsPathCount; RhsPathIndex++) { RhsPathToIndex.insert({Rhs.Paths[RhsPathIndex].generic_string(), RhsPathIndex}); } const size_t LhsPathCount = Lhs.Paths.size(); for (size_t LhsPathIndex = 0; LhsPathIndex < LhsPathCount; LhsPathIndex++) { if (auto It = RhsPathToIndex.find(Lhs.Paths[LhsPathIndex].generic_string()); It != RhsPathToIndex.end()) { const size_t RhsPathIndex = It->second; if ((Lhs.RawHashes[LhsPathIndex] != Rhs.RawHashes[RhsPathIndex]) || (!FolderContent::AreFileAttributesEqual(Lhs.Attributes[LhsPathIndex], Rhs.Attributes[RhsPathIndex]))) { return false; } } else { return false; } } tsl::robin_set LhsPathExists; LhsPathExists.reserve(LhsPathCount); for (size_t LhsPathIndex = 0; LhsPathIndex < LhsPathCount; LhsPathIndex++) { LhsPathExists.insert({Lhs.Paths[LhsPathIndex].generic_string()}); } for (size_t RhsPathIndex = 0; RhsPathIndex < RhsPathCount; RhsPathIndex++) { if (!LhsPathExists.contains(Rhs.Paths[RhsPathIndex].generic_string())) { return false; } } return true; }; ChunkedFolderContent ApplyChunkedContentOverlay(const ChunkedFolderContent& Base, const ChunkedFolderContent& Overlay, std::span OverlayIncludeWildcards, std::span OverlayExcludeWildcards) { ChunkedFolderContent Result = {.Platform = Base.Platform}; tsl::robin_map BaseSequenceHashToSequenceIndex = BuildHashLookup(Base.ChunkedContent.SequenceRawHashes); tsl::robin_map BaseChunkHashToChunkIndex = BuildHashLookup(Base.ChunkedContent.ChunkHashes); std::vector BaseSequenceChunkOrderOffset = BuildChunkOrderOffset(Base.ChunkedContent.ChunkCounts); tsl::robin_map OverlaySequenceHashToSequenceIndex = BuildHashLookup(Overlay.ChunkedContent.SequenceRawHashes); tsl::robin_map OverlayChunkHashToChunkIndex = BuildHashLookup(Overlay.ChunkedContent.ChunkHashes); std::vector OverlaySequenceChunkOrderOffset = BuildChunkOrderOffset(Overlay.ChunkedContent.ChunkCounts); tsl::robin_map ResultSequenceHashToSequenceIndex; tsl::robin_map ResultChunkHashToChunkIndex; const size_t EstimatedPathCount = Max(Base.Paths.size(), Overlay.Paths.size()); Result.Attributes.reserve(EstimatedPathCount); Result.Paths.reserve(EstimatedPathCount); Result.RawSizes.reserve(EstimatedPathCount); Result.RawHashes.reserve(EstimatedPathCount); const size_t EstimatedSequenceCount = Max(Base.ChunkedContent.SequenceRawHashes.size(), Overlay.ChunkedContent.SequenceRawHashes.size()); Result.ChunkedContent.SequenceRawHashes.reserve(EstimatedSequenceCount); const size_t EstimatedChunkCount = Max(Base.ChunkedContent.ChunkHashes.size(), Overlay.ChunkedContent.ChunkHashes.size()); Result.ChunkedContent.ChunkHashes.reserve(EstimatedChunkCount); Result.ChunkedContent.ChunkRawSizes.reserve(EstimatedChunkCount); const size_t EstimatedChunkOrderCount = Max(Base.ChunkedContent.ChunkOrders.size(), Overlay.ChunkedContent.ChunkOrders.size()); Result.ChunkedContent.ChunkOrders.reserve(EstimatedChunkOrderCount); auto AddPath = [&Result, &ResultSequenceHashToSequenceIndex, &ResultChunkHashToChunkIndex]( const ChunkedFolderContent& Source, uint32_t SourcePathIndex, const tsl::robin_map& SourceSequenceHashToSequenceIndex, const std::vector& SourceSequenceChunkOrderOffset) { Result.Attributes.push_back(Source.Attributes[SourcePathIndex]); Result.Paths.push_back(Source.Paths[SourcePathIndex]); Result.RawSizes.push_back(Source.RawSizes[SourcePathIndex]); Result.RawHashes.push_back(Source.RawHashes[SourcePathIndex]); if (Source.RawSizes[SourcePathIndex] > 0) { if (!ResultSequenceHashToSequenceIndex.contains(Source.RawHashes[SourcePathIndex])) { const uint32_t ResultSequenceIndex = gsl::narrow(Result.ChunkedContent.SequenceRawHashes.size()); ResultSequenceHashToSequenceIndex.insert_or_assign(Source.RawHashes[SourcePathIndex], ResultSequenceIndex); Result.ChunkedContent.SequenceRawHashes.push_back(Source.RawHashes[SourcePathIndex]); auto SourceSequenceIndexIt = SourceSequenceHashToSequenceIndex.find(Source.RawHashes[SourcePathIndex]); ZEN_ASSERT(SourceSequenceIndexIt != SourceSequenceHashToSequenceIndex.end()); const uint32_t SourceSequenceIndex = SourceSequenceIndexIt->second; const uint32_t ChunkOrderOffset = SourceSequenceChunkOrderOffset[SourceSequenceIndex]; const uint32_t ChunkCount = Source.ChunkedContent.ChunkCounts[SourceSequenceIndex]; Result.ChunkedContent.ChunkCounts.push_back(ChunkCount); std::span SourceChunkIndexes = std::span(Source.ChunkedContent.ChunkOrders).subspan(ChunkOrderOffset, ChunkCount); for (uint32_t SourceChunkIndex : SourceChunkIndexes) { const IoHash& ChunkHash = Source.ChunkedContent.ChunkHashes[SourceChunkIndex]; if (auto It = ResultChunkHashToChunkIndex.find(ChunkHash); It != ResultChunkHashToChunkIndex.end()) { const uint32_t ResultChunkIndex = It->second; Result.ChunkedContent.ChunkOrders.push_back(ResultChunkIndex); } else { const uint32_t ResultChunkIndex = gsl::narrow(Result.ChunkedContent.ChunkHashes.size()); Result.ChunkedContent.ChunkHashes.push_back(ChunkHash); Result.ChunkedContent.ChunkRawSizes.push_back(Source.ChunkedContent.ChunkRawSizes[SourceChunkIndex]); Result.ChunkedContent.ChunkOrders.push_back(ResultChunkIndex); ResultChunkHashToChunkIndex.insert_or_assign(ChunkHash, ResultChunkIndex); } } } } }; if (OverlayIncludeWildcards.empty() && OverlayExcludeWildcards.empty()) { tsl::robin_set OverlayPaths; OverlayPaths.reserve(Overlay.Paths.size()); for (uint32_t OverlayPathIndex = 0; OverlayPathIndex < Overlay.Paths.size(); OverlayPathIndex++) { const std::string PathString = ToLower(Overlay.Paths[OverlayPathIndex].generic_string()); OverlayPaths.insert(PathString); } for (uint32_t BasePathIndex = 0; BasePathIndex < Base.Paths.size(); BasePathIndex++) { const std::string PathString = ToLower(Base.Paths[BasePathIndex].generic_string()); if (!OverlayPaths.contains(PathString)) { AddPath(Base, BasePathIndex, BaseSequenceHashToSequenceIndex, BaseSequenceChunkOrderOffset); } } for (uint32_t OverlayPathIndex = 0; OverlayPathIndex < Overlay.Paths.size(); OverlayPathIndex++) { AddPath(Overlay, OverlayPathIndex, OverlaySequenceHashToSequenceIndex, OverlaySequenceChunkOrderOffset); } } else { for (uint32_t BasePathIndex = 0; BasePathIndex < Base.Paths.size(); BasePathIndex++) { const std::string PathString = ToLower(Base.Paths[BasePathIndex].generic_string()); if (!IncludePath(OverlayIncludeWildcards, OverlayExcludeWildcards, PathString, /*CaseSensitive*/ true)) { AddPath(Base, BasePathIndex, BaseSequenceHashToSequenceIndex, BaseSequenceChunkOrderOffset); } } for (uint32_t OverlayPathIndex = 0; OverlayPathIndex < Overlay.Paths.size(); OverlayPathIndex++) { const std::string PathString = ToLower(Overlay.Paths[OverlayPathIndex].generic_string()); if (IncludePath(OverlayIncludeWildcards, OverlayExcludeWildcards, PathString, /*CaseSensitive*/ true)) { AddPath(Overlay, OverlayPathIndex, OverlaySequenceHashToSequenceIndex, OverlaySequenceChunkOrderOffset); } } } return Result; } ChunkedFolderContent ChunkFolderContent(ChunkingStatistics& Stats, WorkerThreadPool& WorkerPool, const std::filesystem::path& RootPath, const FolderContent& Content, const ChunkingController& InChunkingController, ChunkingCache& InChunkingCache, int32_t UpdateIntervalMS, std::function&& UpdateCallback, std::atomic& AbortFlag, std::atomic& PauseFlag) { ZEN_TRACE_CPU("ChunkFolderContent"); Stopwatch Timer; auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); ZEN_ASSERT(Content.ModificationTicks.size() == Content.Paths.size()); ZEN_ASSERT(Content.RawSizes.size() == Content.Paths.size()); ZEN_ASSERT(Content.Attributes.size() == Content.Paths.size()); ChunkedFolderContent Result = {.Platform = Content.Platform, .Paths = Content.Paths, .RawSizes = Content.RawSizes, .Attributes = Content.Attributes}; const size_t ItemCount = Result.Paths.size(); Result.RawHashes.resize(ItemCount, IoHash::Zero); Result.ChunkedContent.SequenceRawHashes.reserve(ItemCount); // Up to 1 per file, maybe less Result.ChunkedContent.ChunkCounts.reserve(ItemCount); // Up to one per file Result.ChunkedContent.ChunkOrders.reserve(ItemCount); // At least 1 per file, maybe more Result.ChunkedContent.ChunkHashes.reserve(ItemCount); // At least 1 per file, maybe more Result.ChunkedContent.ChunkRawSizes.reserve(ItemCount); // At least 1 per file, maybe more tsl::robin_map ChunkHashToChunkIndex; tsl::robin_map RawHashToChunkSequenceIndex; RawHashToChunkSequenceIndex.reserve(ItemCount); ChunkHashToChunkIndex.reserve(ItemCount); { std::vector Order; Order.resize(ItemCount); for (uint32_t I = 0; I < ItemCount; I++) { Order[I] = I; } // Handle the biggest files first so we don't end up with one straggling large file at the end // std::sort(Order.begin(), Order.end(), [&](uint32_t Lhs, uint32_t Rhs) { return Result.RawSizes[Lhs] > Result.RawSizes[Rhs]; //}); tsl::robin_map RawHashToSequenceRawHashIndex; RawHashToSequenceRawHashIndex.reserve(ItemCount); RwLock Lock; ParallelWork Work(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog); for (uint32_t PathIndex : Order) { if (Work.IsAborted()) { break; } Work.ScheduleWork(WorkerPool, // GetSyncWorkerPool() [&, PathIndex](std::atomic& AbortFlag) { if (!AbortFlag) { IoHash RawHash = HashOneFile(Stats, InChunkingController, InChunkingCache, Content.ModificationTicks, Result, ChunkHashToChunkIndex, RawHashToSequenceRawHashIndex, Lock, RootPath, PathIndex, AbortFlag); Lock.WithExclusiveLock([&]() { Result.RawHashes[PathIndex] = RawHash; }); Stats.FilesProcessed++; } }); } Work.Wait(UpdateIntervalMS, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { ZEN_UNUSED(PendingWork); UpdateCallback(IsAborted, IsPaused, Work.PendingWork().Remaining()); }); } return Result; } tsl::robin_map BuildHashLookup(std::span Hashes) { tsl::robin_map Lookup; Lookup.reserve(Hashes.size()); for (uint32_t Index = 0; Index < Hashes.size(); Index++) { bool IsNew = Lookup.insert_or_assign(Hashes[Index], Index).second; ZEN_ASSERT(IsNew); } return Lookup; } std::vector BuildChunkOrderOffset(std::span ChunkCounts) { std::vector ChunkOffsets; ChunkOffsets.reserve(ChunkCounts.size()); uint32_t Offset = 0; for (uint32_t SequenceIndex = 0; SequenceIndex < ChunkCounts.size(); SequenceIndex++) { ChunkOffsets.push_back(Offset); Offset += ChunkCounts[SequenceIndex]; } return ChunkOffsets; } ChunkedContentLookup BuildChunkedContentLookup(const ChunkedFolderContent& Content) { ZEN_TRACE_CPU("BuildChunkedContentLookup"); struct ChunkLocationReference { uint32_t ChunkIndex = (uint32_t)-1; uint32_t SequenceIndex = (uint32_t)-1; uint64_t Offset = (uint64_t)-1; }; ChunkedContentLookup Result; { Result.SequenceIndexChunkOrderOffset = BuildChunkOrderOffset(Content.ChunkedContent.ChunkCounts); Result.RawHashToSequenceIndex = BuildHashLookup(Content.ChunkedContent.SequenceRawHashes); } std::vector Locations; Locations.reserve(Content.ChunkedContent.ChunkOrders.size()); for (uint32_t SequenceIndex = 0; SequenceIndex < Content.ChunkedContent.SequenceRawHashes.size(); SequenceIndex++) { const uint32_t OrderOffset = Result.SequenceIndexChunkOrderOffset[SequenceIndex]; const uint32_t ChunkCount = Content.ChunkedContent.ChunkCounts[SequenceIndex]; uint64_t LocationOffset = 0; for (size_t OrderIndex = OrderOffset; OrderIndex < OrderOffset + ChunkCount; OrderIndex++) { uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex]; Locations.push_back(ChunkLocationReference{.ChunkIndex = ChunkIndex, .SequenceIndex = SequenceIndex, .Offset = LocationOffset}); LocationOffset += Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; } } std::sort(Locations.begin(), Locations.end(), [](const ChunkLocationReference& Lhs, const ChunkLocationReference& Rhs) { if (Lhs.ChunkIndex < Rhs.ChunkIndex) { return true; } if (Lhs.ChunkIndex > Rhs.ChunkIndex) { return false; } if (Lhs.SequenceIndex < Rhs.SequenceIndex) { return true; } if (Lhs.SequenceIndex > Rhs.SequenceIndex) { return false; } return Lhs.Offset < Rhs.Offset; }); Result.ChunkSequenceLocations.reserve(Locations.size()); const uint32_t ChunkCount = gsl::narrow(Content.ChunkedContent.ChunkHashes.size()); Result.ChunkHashToChunkIndex.reserve(ChunkCount); size_t RangeOffset = 0; Result.ChunkSequenceLocationOffset.reserve(ChunkCount); Result.ChunkSequenceLocationCounts.reserve(ChunkCount); for (uint32_t ChunkIndex = 0; ChunkIndex < ChunkCount; ChunkIndex++) { Result.ChunkHashToChunkIndex.insert({Content.ChunkedContent.ChunkHashes[ChunkIndex], ChunkIndex}); uint32_t Count = 0; while ((RangeOffset + Count < Locations.size()) && (Locations[RangeOffset + Count].ChunkIndex == ChunkIndex)) { const ChunkLocationReference& LocationReference = Locations[RangeOffset + Count]; Result.ChunkSequenceLocations.push_back( ChunkedContentLookup::ChunkSequenceLocation{.SequenceIndex = LocationReference.SequenceIndex, .Offset = LocationReference.Offset}); Count++; } Result.ChunkSequenceLocationOffset.push_back(RangeOffset); Result.ChunkSequenceLocationCounts.push_back(Count); RangeOffset += Count; } Result.SequenceIndexFirstPathIndex.resize(Content.ChunkedContent.SequenceRawHashes.size(), (uint32_t)-1); Result.PathExtensionHash.resize(Content.Paths.size()); for (uint32_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++) { std::string LowercaseExtension = Content.Paths[PathIndex].extension().string(); std::transform(LowercaseExtension.begin(), LowercaseExtension.end(), LowercaseExtension.begin(), [](char c) { return (char)::tolower(c); }); Result.PathExtensionHash[PathIndex] = HashStringDjb2(LowercaseExtension); if (Content.RawSizes[PathIndex] > 0) { const IoHash& RawHash = Content.RawHashes[PathIndex]; auto SequenceIndexIt = Result.RawHashToSequenceIndex.find(RawHash); ZEN_ASSERT(SequenceIndexIt != Result.RawHashToSequenceIndex.end()); const uint32_t SequenceIndex = SequenceIndexIt->second; if (Result.SequenceIndexFirstPathIndex[SequenceIndex] == (uint32_t)-1) { Result.SequenceIndexFirstPathIndex[SequenceIndex] = PathIndex; } } } return Result; } void CalculateLocalChunkOrders(const std::span& AbsoluteChunkOrders, const std::span LooseChunkHashes, const std::span LooseChunkRawSizes, const std::span& BlockDescriptions, std::vector& OutLocalChunkHashes, std::vector& OutLocalChunkRawSizes, std::vector& OutLocalChunkOrders, bool DoExtraVerify) { ZEN_TRACE_CPU("CalculateLocalChunkOrders"); std::vector AbsoluteChunkHashes; std::vector AbsoluteChunkRawSizes; AbsoluteChunkHashes.insert(AbsoluteChunkHashes.end(), LooseChunkHashes.begin(), LooseChunkHashes.end()); AbsoluteChunkRawSizes.insert(AbsoluteChunkRawSizes.end(), LooseChunkRawSizes.begin(), LooseChunkRawSizes.end()); for (const ChunkBlockDescription& Block : BlockDescriptions) { AbsoluteChunkHashes.insert(AbsoluteChunkHashes.end(), Block.ChunkRawHashes.begin(), Block.ChunkRawHashes.end()); AbsoluteChunkRawSizes.insert(AbsoluteChunkRawSizes.end(), Block.ChunkRawLengths.begin(), Block.ChunkRawLengths.end()); } OutLocalChunkHashes.reserve(AbsoluteChunkHashes.size()); OutLocalChunkRawSizes.reserve(AbsoluteChunkRawSizes.size()); OutLocalChunkOrders.reserve(AbsoluteChunkOrders.size()); tsl::robin_map ChunkHashToChunkIndex; ChunkHashToChunkIndex.reserve(AbsoluteChunkHashes.size()); for (uint32_t AbsoluteChunkOrderIndex = 0; AbsoluteChunkOrderIndex < AbsoluteChunkOrders.size(); AbsoluteChunkOrderIndex++) { const uint32_t AbsoluteChunkIndex = AbsoluteChunkOrders[AbsoluteChunkOrderIndex]; const IoHash& AbsoluteChunkHash = AbsoluteChunkHashes[AbsoluteChunkIndex]; const uint64_t AbsoluteChunkRawSize = AbsoluteChunkRawSizes[AbsoluteChunkIndex]; if (auto It = ChunkHashToChunkIndex.find(AbsoluteChunkHash); It != ChunkHashToChunkIndex.end()) { const uint32_t LocalChunkIndex = It->second; OutLocalChunkOrders.push_back(LocalChunkIndex); } else { uint32_t LocalChunkIndex = gsl::narrow(OutLocalChunkHashes.size()); OutLocalChunkHashes.push_back(AbsoluteChunkHash); OutLocalChunkRawSizes.push_back(AbsoluteChunkRawSize); OutLocalChunkOrders.push_back(LocalChunkIndex); ChunkHashToChunkIndex.insert_or_assign(AbsoluteChunkHash, LocalChunkIndex); } if (DoExtraVerify) { const uint32_t LocalChunkIndex = OutLocalChunkOrders[AbsoluteChunkOrderIndex]; const IoHash& LocalChunkHash = OutLocalChunkHashes[LocalChunkIndex]; const uint64_t& LocalChunkRawSize = OutLocalChunkRawSizes[LocalChunkIndex]; ZEN_ASSERT(LocalChunkHash == AbsoluteChunkHash); ZEN_ASSERT(LocalChunkRawSize == AbsoluteChunkRawSize); } } if (DoExtraVerify) { for (uint32_t OrderIndex = 0; OrderIndex < OutLocalChunkOrders.size(); OrderIndex++) { uint32_t LocalChunkIndex = OutLocalChunkOrders[OrderIndex]; const IoHash LocalChunkHash = OutLocalChunkHashes[LocalChunkIndex]; uint64_t LocalChunkRawSize = OutLocalChunkRawSizes[LocalChunkIndex]; uint32_t VerifyChunkIndex = AbsoluteChunkOrders[OrderIndex]; const IoHash VerifyChunkHash = AbsoluteChunkHashes[VerifyChunkIndex]; uint64_t VerifyChunkRawSize = AbsoluteChunkRawSizes[VerifyChunkIndex]; ZEN_ASSERT(LocalChunkHash == VerifyChunkHash); ZEN_ASSERT(LocalChunkRawSize == VerifyChunkRawSize); } } } void ValidateChunkedFolderContent(const ChunkedFolderContent& Content, std::span BlockDescriptions, std::span LooseChunks, std::span IncludeWildcards, std::span ExcludeWildcards) { size_t TotalKnownChunkCount = LooseChunks.size(); for (const ChunkBlockDescription& BlockDescription : BlockDescriptions) { TotalKnownChunkCount += BlockDescription.ChunkRawHashes.size(); } tsl::robin_set KnownChunks; KnownChunks.reserve(TotalKnownChunkCount); KnownChunks.insert(LooseChunks.begin(), LooseChunks.end()); for (const ChunkBlockDescription& BlockDescription : BlockDescriptions) { KnownChunks.insert(BlockDescription.ChunkRawHashes.begin(), BlockDescription.ChunkRawHashes.end()); } std::vector ChunkOrderOffsets = BuildChunkOrderOffset(Content.ChunkedContent.ChunkCounts); tsl::robin_map SequenceIndexLookup = BuildHashLookup(Content.ChunkedContent.SequenceRawHashes); std::vector SequenceUseCount(Content.ChunkedContent.SequenceRawHashes.size(), 0); std::vector ChunkUseCount(Content.ChunkedContent.ChunkHashes.size(), 0); for (size_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++) { const std::filesystem::path& Path = Content.Paths[PathIndex]; if (Path.empty()) { throw std::runtime_error("Chunked folder content contains empty path"); } const uint64_t RawSize = Content.RawSizes[PathIndex]; const IoHash RawHash = Content.RawHashes[PathIndex]; if (RawSize > 0) { if (auto It = SequenceIndexLookup.find(RawHash); It != SequenceIndexLookup.end()) { const uint32_t SourceSequenceIndex = It->second; SequenceUseCount[SourceSequenceIndex]++; const uint32_t ChunkOrderOffset = ChunkOrderOffsets[SourceSequenceIndex]; const uint32_t ChunkCount = Content.ChunkedContent.ChunkCounts[SourceSequenceIndex]; std::span ChunkIndexes = std::span(Content.ChunkedContent.ChunkOrders).subspan(ChunkOrderOffset, ChunkCount); bool VerifyIfChunkExists = IncludePath(IncludeWildcards, ExcludeWildcards, ToLower(Path.generic_string()), true); IoHashStream Hasher; uint64_t SizeSum = 0; for (uint32_t ChunkIndex : ChunkIndexes) { ChunkUseCount[ChunkIndex]++; const uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; if (ChunkSize == 0) { throw std::runtime_error("Chunked folder content contains zero size chunk"); } const IoHash& ChunkRawHash = Content.ChunkedContent.ChunkHashes[ChunkIndex]; if (ChunkRawHash == IoHash::Zero) { throw std::runtime_error("Chunked folder content contains zero chunk hash"); } if (VerifyIfChunkExists) { if (!KnownChunks.contains(ChunkRawHash)) { throw std::runtime_error(fmt::format("Chunked folder content references an unknown chunk '{}'", ChunkRawHash)); } } SizeSum += ChunkSize; } if (SizeSum != RawSize) { throw std::runtime_error( fmt::format("Chunked folder content sequence size {} does not match expected size '{}'", SizeSum, RawSize)); } } else { throw std::runtime_error(fmt::format("Chunked folder content references unknown sequence hash '{}'", RawHash)); } } else { if (RawHash != IoHash::Zero) { throw std::runtime_error( fmt::format("Chunked folder content references zero size sequence with non-zero hash '{}'", RawHash)); } } } for (uint32_t SequenceIndex = 0; SequenceIndex < SequenceUseCount.size(); SequenceIndex++) { if (SequenceUseCount[SequenceIndex] == 0) { throw std::runtime_error( fmt::format("Chunked folder has unused sequence '{}'", Content.ChunkedContent.SequenceRawHashes[SequenceIndex])); } } for (uint32_t ChunkIndex = 0; ChunkIndex < ChunkUseCount.size(); ChunkIndex++) { if (ChunkUseCount[ChunkIndex] == 0) { throw std::runtime_error(fmt::format("Chunked folder has unused chunk '{}'", Content.ChunkedContent.ChunkHashes[ChunkIndex])); } } } void InlineRemoveUnusedHashes(std::vector& InOutHashes, std::span UsedHashes) { tsl::robin_set UsedChunkHashes; UsedChunkHashes.reserve(UsedHashes.size()); UsedChunkHashes.insert(UsedHashes.begin(), UsedHashes.end()); for (auto It = InOutHashes.begin(); It != InOutHashes.end();) { if (!UsedChunkHashes.contains(*It)) { It = InOutHashes.erase(It); } else { It++; } } } #if ZEN_WITH_TESTS void chunkedcontent_forcelink() { } namespace chunkedcontent_testutils { ChunkedFile CreateChunkedFile(FastRandom& Random, const size_t FinalSize, size_t ChunkingSize, const IoBuffer& LastUsedBuffer) { size_t ChunkCount = (FinalSize + (ChunkingSize - 1)) / ChunkingSize; std::vector ChunkHashes; std::vector ChunkSizes; std::vector Chunks; ChunkHashes.reserve(ChunkCount); ChunkSizes.reserve(ChunkCount); size_t SizeLeft = FinalSize; IoHashStream HashStream; while (SizeLeft > 0) { size_t ChunkSize = Min(SizeLeft, ChunkingSize); IoBuffer ChunkBuffer; if (LastUsedBuffer && FinalSize == SizeLeft) { ChunkSize = Min(ChunkSize, LastUsedBuffer.GetSize()); ChunkBuffer = IoBuffer(LastUsedBuffer, 0, ChunkSize); } else { ChunkBuffer = CreateRandomBlob(Random, ChunkSize); } HashStream.Append(ChunkBuffer); ChunkHashes.push_back(IoHash::HashBuffer(ChunkBuffer)); ChunkSizes.push_back(ChunkSize); Chunks.emplace_back(std::move(ChunkBuffer)); SizeLeft -= ChunkSize; } ZEN_ASSERT(std::accumulate(ChunkSizes.begin(), ChunkSizes.end(), uint64_t(0)) == FinalSize); ZEN_ASSERT(std::accumulate(Chunks.begin(), Chunks.end(), uint64_t(0), [](uint64_t Current, const IoBuffer& B) { return Current + B.GetSize(); }) == FinalSize); return ChunkedFile{.RawHash = HashStream.GetHash(), .ChunkHashes = std::move(ChunkHashes), .ChunkSizes = std::move(ChunkSizes), .Chunks = std::move(Chunks)}; } ChunkedFolderContent CreateChunkedFolderContent(FastRandom& Random, std::span> PathAndSizes, uint64_t ChunkingSize, std::vector& ChunkPayloads) { ChunkedFolderContent Result; Result.Paths.reserve(PathAndSizes.size()); Result.RawSizes.reserve(PathAndSizes.size()); Result.Attributes.reserve(PathAndSizes.size()); Result.RawHashes.reserve(PathAndSizes.size()); ChunkPayloads.reserve(PathAndSizes.size()); IoBuffer LastChunkGenerated; tsl::robin_map SequenceToIndex; tsl::robin_map ChunkToIndex; for (size_t PathIndex = 0; PathIndex < PathAndSizes.size(); PathIndex++) { const std::string& Path = PathAndSizes[PathIndex].first; const uint64_t Size = PathAndSizes[PathIndex].second; Result.Paths.push_back(Path); Result.RawSizes.push_back(Size); Result.Attributes.push_back(0); if (Size > 0) { ChunkedFile File = CreateChunkedFile(Random, Size, ChunkingSize, LastChunkGenerated); LastChunkGenerated = File.Chunks.back(); Result.RawHashes.push_back(File.RawHash); if (auto SequenceIt = SequenceToIndex.find(File.RawHash); SequenceIt == SequenceToIndex.end()) { SequenceToIndex.insert_or_assign(File.RawHash, gsl::narrow(Result.ChunkedContent.SequenceRawHashes.size())); Result.ChunkedContent.SequenceRawHashes.push_back(File.RawHash); Result.ChunkedContent.ChunkCounts.push_back(gsl::narrow(File.ChunkHashes.size())); for (size_t ChunkIndex = 0; ChunkIndex < File.ChunkHashes.size(); ChunkIndex++) { const IoHash& ChunkHash = File.ChunkHashes[ChunkIndex]; if (auto ChunkIt = ChunkToIndex.find(ChunkHash); ChunkIt == ChunkToIndex.end()) { const uint32_t ChunkedContentChunkIndex = gsl::narrow(Result.ChunkedContent.ChunkHashes.size()); Result.ChunkedContent.ChunkOrders.push_back(gsl::narrow(ChunkedContentChunkIndex)); Result.ChunkedContent.ChunkHashes.push_back(ChunkHash); Result.ChunkedContent.ChunkRawSizes.push_back(File.ChunkSizes[ChunkIndex]); ChunkPayloads.push_back(std::move(File.Chunks[ChunkIndex])); ChunkToIndex.insert_or_assign(ChunkHash, ChunkedContentChunkIndex); } else { const uint32_t ChunkedContentChunkIndex = ChunkIt->second; Result.ChunkedContent.ChunkOrders.push_back(ChunkedContentChunkIndex); } } } } else { Result.RawHashes.push_back(IoHash::Zero); } } return Result; } std::vector GetChunkPayloads(std::span BaseHashes, std::span BaseChunks, std::span OverlayHashes, std::span OverlayChunks, std::span WantedHashes) { std::vector Result; Result.reserve(WantedHashes.size()); tsl::robin_map BaseChunkLookup = BuildHashLookup(BaseHashes); tsl::robin_map OverlayChunkLookup = BuildHashLookup(OverlayHashes); for (const IoHash& ChunkHash : WantedHashes) { if (auto It = BaseChunkLookup.find(ChunkHash); It != BaseChunkLookup.end()) { Result.push_back(BaseChunks[It->second]); } else if (It = OverlayChunkLookup.find(ChunkHash); It != OverlayChunkLookup.end()) { Result.push_back(OverlayChunks[It->second]); } else { CHECK(false); } } return Result; } tsl::robin_map BuildPathLookup(std::span Paths) { tsl::robin_map Result; Result.reserve(Paths.size()); for (size_t Index = 0; Index < Paths.size(); Index++) { const std::filesystem::path& Path = Paths[Index]; Result.insert_or_assign(Path.generic_string(), Index); } return Result; } } // namespace chunkedcontent_testutils TEST_CASE("chunkedcontent.DeletePathsFromContent") { FastRandom BaseRandom; std::vector BaseChunks; const std::string BasePaths[11] = {{"file_1"}, {"file_2.exe"}, {"file_3.txt"}, {"dir_1/dir1_file_1.exe"}, {"dir_1/dir1_file_2.pdb"}, {"dir_1/dir1_file_3.txt"}, {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"}, {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"}, {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"}, {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"}, {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}}; const uint64_t BaseSizes[11] = {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u}; std::pair BasePathAndSizes[11] = {{BasePaths[0], BaseSizes[0]}, {BasePaths[1], BaseSizes[1]}, {BasePaths[2], BaseSizes[2]}, {BasePaths[3], BaseSizes[3]}, {BasePaths[4], BaseSizes[4]}, {BasePaths[5], BaseSizes[5]}, {BasePaths[6], BaseSizes[6]}, {BasePaths[7], BaseSizes[7]}, {BasePaths[8], BaseSizes[8]}, {BasePaths[9], BaseSizes[9]}, {BasePaths[10], BaseSizes[10]}}; ChunkedFolderContent Base = chunkedcontent_testutils::CreateChunkedFolderContent(BaseRandom, BasePathAndSizes, 4u * 1024u, BaseChunks); ValidateChunkedFolderContent(Base, {}, Base.ChunkedContent.ChunkHashes, {}, {}); tsl::robin_map BaseChunksLookup; for (size_t Index = 0; Index < BaseChunks.size(); Index++) { BaseChunksLookup.insert_or_assign(Base.ChunkedContent.ChunkHashes[Index], Index); } std::vector IncludeWildcards; std::vector ExcludeWildcards = {"*.map*", "*.pdb*", "*optional*", "*Manifest_*", "*pakchunk10sm6*"}; std::vector DeletedPaths; for (const std::filesystem::path& RemotePath : Base.Paths) { if (!IncludePath(IncludeWildcards, ExcludeWildcards, ToLower(RemotePath.generic_string()), true)) { DeletedPaths.push_back(RemotePath); } } ZEN_ASSERT(DeletedPaths.size() == 2); ChunkedFolderContent FilteredContent = DeletePathsFromChunkedContent(Base, DeletedPaths); std::vector FilteredChunks = Base.ChunkedContent.ChunkHashes; InlineRemoveUnusedHashes(FilteredChunks, FilteredContent.ChunkedContent.ChunkHashes); ValidateChunkedFolderContent(FilteredContent, {}, FilteredChunks, {}, {}); } TEST_CASE("chunkedcontent.ApplyChunkedContentOverlay") { FastRandom BaseRandom; std::vector BaseChunks; const std::string BasePaths[11] = {{"file_1"}, {"file_2.exe"}, {"file_3.txt"}, {"dir_1/dir1_file_1.exe"}, {"dir_1/dir1_file_2.pdb"}, {"dir_1/dir1_file_3.txt"}, {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"}, {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"}, {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"}, {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"}, {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}}; const uint64_t BaseSizes[11] = {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u}; std::pair BasePathAndSizes[11] = {{BasePaths[0], BaseSizes[0]}, {BasePaths[1], BaseSizes[1]}, {BasePaths[2], BaseSizes[2]}, {BasePaths[3], BaseSizes[3]}, {BasePaths[4], BaseSizes[4]}, {BasePaths[5], BaseSizes[5]}, {BasePaths[6], BaseSizes[6]}, {BasePaths[7], BaseSizes[7]}, {BasePaths[8], BaseSizes[8]}, {BasePaths[9], BaseSizes[9]}, {BasePaths[10], BaseSizes[10]}}; const std::string OverlayPaths[6] = {{"file_1"}, {"file_4"}, {"dir_1/dir1_file_1.exe"}, {"dir_1/dir1_file_2.pdb"}, {"dir_2/dir2_dir1/dir2_dir1_file_1.self"}, {"dir_2/dir2_dir1/dir2_dir1_file_2.sym"}}; const uint64_t OverlaySizes[6] = {7u * 1024u, 1249, 17u * 1024u, 9u * 1024u, 0, 17u * 1024u}; std::pair OverlayPathAndSizes[6] = {{OverlayPaths[0], OverlaySizes[0]}, {OverlayPaths[1], OverlaySizes[1]}, {OverlayPaths[2], OverlaySizes[2]}, {OverlayPaths[3], OverlaySizes[3]}, {OverlayPaths[4], OverlaySizes[4]}, {OverlayPaths[5], OverlaySizes[5]}}; ChunkedFolderContent Base = chunkedcontent_testutils::CreateChunkedFolderContent(BaseRandom, BasePathAndSizes, 4u * 1024u, BaseChunks); ValidateChunkedFolderContent(Base, {}, Base.ChunkedContent.ChunkHashes, {}, {}); tsl::robin_map BasePathLookup = chunkedcontent_testutils::BuildPathLookup(Base.Paths); std::vector OverlayChunks; ChunkedFolderContent Overlay = chunkedcontent_testutils::CreateChunkedFolderContent(BaseRandom, OverlayPathAndSizes, 4u * 1024u, OverlayChunks); ValidateChunkedFolderContent(Overlay, {}, Overlay.ChunkedContent.ChunkHashes, {}, {}); tsl::robin_map OverlayPathLookup = chunkedcontent_testutils::BuildPathLookup(Overlay.Paths); auto PathMatchesBase = [&](const std::string& Path, const ChunkedFolderContent& MergedContent, tsl::robin_map MergedPathLookup) { return MergedContent.RawHashes[MergedPathLookup.at(Path)] == Base.RawHashes[BasePathLookup.at(Path)]; }; auto PathMatchesOverlay = [&](const std::string& Path, const ChunkedFolderContent& MergedContent, tsl::robin_map MergedPathLookup) { return MergedContent.RawHashes[MergedPathLookup.at(Path)] == Overlay.RawHashes[OverlayPathLookup.at(Path)]; }; { ChunkedFolderContent AllMergedContent = ApplyChunkedContentOverlay(Base, Overlay, {}, {}); CHECK_EQ(AllMergedContent.Paths.size(), 14); std::vector AllMergedChunks = chunkedcontent_testutils::GetChunkPayloads(Base.ChunkedContent.ChunkHashes, BaseChunks, Overlay.ChunkedContent.ChunkHashes, OverlayChunks, AllMergedContent.ChunkedContent.ChunkHashes); ValidateChunkedFolderContent(AllMergedContent, {}, AllMergedContent.ChunkedContent.ChunkHashes, {}, {}); tsl::robin_map AllMergedPathLookup = chunkedcontent_testutils::BuildPathLookup(AllMergedContent.Paths); CHECK(PathMatchesBase("file_2.exe", AllMergedContent, AllMergedPathLookup)); CHECK(PathMatchesBase("file_3.txt", AllMergedContent, AllMergedPathLookup)); CHECK(PathMatchesBase("dir_1/dir1_file_3.txt", AllMergedContent, AllMergedPathLookup)); CHECK(PathMatchesBase("dir_2/dir2_dir1/dir2_dir1_file_1.exe", AllMergedContent, AllMergedPathLookup)); CHECK(PathMatchesBase("dir_2/dir2_dir1/dir2_dir1_file_2.pdb", AllMergedContent, AllMergedPathLookup)); CHECK(PathMatchesBase("dir_2/dir2_dir1/dir2_dir1_file_3.dll", AllMergedContent, AllMergedPathLookup)); CHECK(PathMatchesBase("dir_2/dir2_dir2/dir2_dir2_file_1.txt", AllMergedContent, AllMergedPathLookup)); CHECK(PathMatchesBase("dir_2/dir2_dir2/dir2_dir2_file_2.json", AllMergedContent, AllMergedPathLookup)); CHECK(PathMatchesOverlay("file_1", AllMergedContent, AllMergedPathLookup)); CHECK(PathMatchesOverlay("file_4", AllMergedContent, AllMergedPathLookup)); CHECK(PathMatchesOverlay("dir_1/dir1_file_1.exe", AllMergedContent, AllMergedPathLookup)); CHECK(PathMatchesOverlay("dir_1/dir1_file_2.pdb", AllMergedContent, AllMergedPathLookup)); CHECK(PathMatchesOverlay("dir_2/dir2_dir1/dir2_dir1_file_1.self", AllMergedContent, AllMergedPathLookup)); CHECK(PathMatchesOverlay("dir_2/dir2_dir1/dir2_dir1_file_2.sym", AllMergedContent, AllMergedPathLookup)); } { ChunkedFolderContent ReplaceExecutablesContent = ApplyChunkedContentOverlay(Base, Overlay, std::vector{"*.exe", "*.self"}, {}); CHECK_EQ(ReplaceExecutablesContent.Paths.size(), 10); std::vector ReplaceExecutablesChunks = chunkedcontent_testutils::GetChunkPayloads(Base.ChunkedContent.ChunkHashes, BaseChunks, Overlay.ChunkedContent.ChunkHashes, OverlayChunks, ReplaceExecutablesContent.ChunkedContent.ChunkHashes); ValidateChunkedFolderContent(ReplaceExecutablesContent, {}, ReplaceExecutablesContent.ChunkedContent.ChunkHashes, {}, {}); tsl::robin_map ReplaceExecutablesPathLookup = chunkedcontent_testutils::BuildPathLookup(ReplaceExecutablesContent.Paths); CHECK(PathMatchesBase("file_1", ReplaceExecutablesContent, ReplaceExecutablesPathLookup)); CHECK(PathMatchesBase("file_3.txt", ReplaceExecutablesContent, ReplaceExecutablesPathLookup)); CHECK(PathMatchesBase("dir_1/dir1_file_2.pdb", ReplaceExecutablesContent, ReplaceExecutablesPathLookup)); CHECK(PathMatchesBase("dir_1/dir1_file_3.txt", ReplaceExecutablesContent, ReplaceExecutablesPathLookup)); CHECK(PathMatchesBase("dir_2/dir2_dir1/dir2_dir1_file_2.pdb", ReplaceExecutablesContent, ReplaceExecutablesPathLookup)); CHECK(PathMatchesBase("dir_2/dir2_dir1/dir2_dir1_file_3.dll", ReplaceExecutablesContent, ReplaceExecutablesPathLookup)); CHECK(PathMatchesBase("dir_2/dir2_dir2/dir2_dir2_file_1.txt", ReplaceExecutablesContent, ReplaceExecutablesPathLookup)); CHECK(PathMatchesBase("dir_2/dir2_dir2/dir2_dir2_file_2.json", ReplaceExecutablesContent, ReplaceExecutablesPathLookup)); CHECK(PathMatchesOverlay("dir_1/dir1_file_1.exe", ReplaceExecutablesContent, ReplaceExecutablesPathLookup)); CHECK(PathMatchesOverlay("dir_2/dir2_dir1/dir2_dir1_file_1.self", ReplaceExecutablesContent, ReplaceExecutablesPathLookup)); } { ChunkedFolderContent ReplaceDir1ExecutablesContent = ApplyChunkedContentOverlay(Base, Overlay, std::vector{"dir_1/*.exe", "dir_2/*"}, std::vector{"dir_2/*.sym"}); CHECK_EQ(ReplaceDir1ExecutablesContent.Paths.size(), 7); std::vector ReplaceDir1Chunks = chunkedcontent_testutils::GetChunkPayloads(Base.ChunkedContent.ChunkHashes, BaseChunks, Overlay.ChunkedContent.ChunkHashes, OverlayChunks, ReplaceDir1ExecutablesContent.ChunkedContent.ChunkHashes); ValidateChunkedFolderContent(ReplaceDir1ExecutablesContent, {}, ReplaceDir1ExecutablesContent.ChunkedContent.ChunkHashes, {}, {}); tsl::robin_map ReplaceDir1ExecutablesPathLookup = chunkedcontent_testutils::BuildPathLookup(ReplaceDir1ExecutablesContent.Paths); CHECK(PathMatchesBase("file_1", ReplaceDir1ExecutablesContent, ReplaceDir1ExecutablesPathLookup)); CHECK(PathMatchesBase("file_2.exe", ReplaceDir1ExecutablesContent, ReplaceDir1ExecutablesPathLookup)); CHECK(PathMatchesBase("file_3.txt", ReplaceDir1ExecutablesContent, ReplaceDir1ExecutablesPathLookup)); CHECK(PathMatchesBase("dir_1/dir1_file_3.txt", ReplaceDir1ExecutablesContent, ReplaceDir1ExecutablesPathLookup)); CHECK(PathMatchesOverlay("dir_1/dir1_file_1.exe", ReplaceDir1ExecutablesContent, ReplaceDir1ExecutablesPathLookup)); CHECK(PathMatchesOverlay("dir_2/dir2_dir1/dir2_dir1_file_1.self", ReplaceDir1ExecutablesContent, ReplaceDir1ExecutablesPathLookup)); } } #endif // ZEN_WITH_TESTS } // namespace zen