// Copyright Epic Games, Inc. All Rights Reserved. #include #include #include #include "chunking.h" ZEN_THIRD_PARTY_INCLUDES_START #include #include ZEN_THIRD_PARTY_INCLUDES_END namespace zen { namespace { struct ChunkedHeader { static constexpr uint32_t ExpectedMagic = 0x646b6863; // chkd static constexpr uint32_t CurrentVersion = 1; uint32_t Magic = ExpectedMagic; uint32_t Version = CurrentVersion; uint32_t ChunkSequenceLength; uint32_t ChunkHashCount; uint64_t ChunkSequenceOffset; uint64_t ChunkHashesOffset; uint64_t RawSize = 0; IoHash RawHash; }; } // namespace IoBuffer SerializeChunkedInfo(const ChunkedInfo& Info) { ZEN_TRACE_CPU("SerializeChunkedInfo"); size_t HeaderSize = RoundUp(sizeof(ChunkedHeader), 16) + RoundUp(sizeof(uint32_t) * Info.ChunkSequence.size(), 16) + RoundUp(sizeof(IoHash) * Info.ChunkHashes.size(), 16); IoBuffer HeaderData(HeaderSize); ChunkedHeader Header; Header.ChunkSequenceLength = gsl::narrow(Info.ChunkSequence.size()); Header.ChunkHashCount = gsl::narrow(Info.ChunkHashes.size()); Header.ChunkSequenceOffset = RoundUp(sizeof(ChunkedHeader), 16); Header.ChunkHashesOffset = RoundUp(Header.ChunkSequenceOffset + sizeof(uint32_t) * Header.ChunkSequenceLength, 16); Header.RawSize = Info.RawSize; Header.RawHash = Info.RawHash; MutableMemoryView WriteView = HeaderData.GetMutableView(); { MutableMemoryView HeaderWriteView = WriteView.Left(sizeof(Header)); HeaderWriteView.CopyFrom(MemoryView(&Header, sizeof(Header))); } { MutableMemoryView ChunkSequenceWriteView = WriteView.Mid(Header.ChunkSequenceOffset, sizeof(uint32_t) * Header.ChunkSequenceLength); ChunkSequenceWriteView.CopyFrom(MemoryView(Info.ChunkSequence.data(), ChunkSequenceWriteView.GetSize())); } { MutableMemoryView ChunksWriteView = WriteView.Mid(Header.ChunkHashesOffset, sizeof(IoHash) * Header.ChunkHashCount); ChunksWriteView.CopyFrom(MemoryView(Info.ChunkHashes.data(), ChunksWriteView.GetSize())); } return HeaderData; } ChunkedInfo DeserializeChunkedInfo(IoBuffer& Buffer) { ZEN_TRACE_CPU("DeserializeChunkedInfo"); MemoryView View = Buffer.GetView(); ChunkedHeader Header; { MutableMemoryView HeaderWriteView(&Header, sizeof(Header)); HeaderWriteView.CopyFrom(View.Left(sizeof(Header))); } if (Header.Magic != ChunkedHeader::ExpectedMagic) { return {}; } if (Header.Version != ChunkedHeader::CurrentVersion) { return {}; } ChunkedInfo Info; Info.RawSize = Header.RawSize; Info.RawHash = Header.RawHash; Info.ChunkSequence.resize(Header.ChunkSequenceLength); Info.ChunkHashes.resize(Header.ChunkHashCount); { MutableMemoryView ChunkSequenceWriteView(Info.ChunkSequence.data(), sizeof(uint32_t) * Header.ChunkSequenceLength); ChunkSequenceWriteView.CopyFrom(View.Mid(Header.ChunkSequenceOffset, ChunkSequenceWriteView.GetSize())); } { MutableMemoryView ChunksWriteView(Info.ChunkHashes.data(), sizeof(IoHash) * Header.ChunkHashCount); ChunksWriteView.CopyFrom(View.Mid(Header.ChunkHashesOffset, ChunksWriteView.GetSize())); } return Info; } void Reconstruct(const ChunkedInfo& Info, const std::filesystem::path& TargetPath, std::function GetChunk) { ZEN_TRACE_CPU("Reconstruct"); BasicFile Reconstructed; Reconstructed.Open(TargetPath, BasicFile::Mode::kTruncate); BasicFileWriter ReconstructedWriter(Reconstructed, 64 * 1024); uint64_t Offset = 0; for (uint32_t SequenceIndex : Info.ChunkSequence) { IoBuffer Chunk = GetChunk(Info.ChunkHashes[SequenceIndex]); ReconstructedWriter.Write(Chunk.GetData(), Chunk.GetSize(), Offset); Offset += Chunk.GetSize(); } } ChunkedInfoWithSource ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Params, std::atomic* BytesProcessed, std::atomic* AbortFlag) { ZEN_TRACE_CPU("ChunkData"); ChunkedInfoWithSource Result; tsl::robin_map FoundChunks; ZenChunkHelper Chunker; Chunker.SetUseThreshold(Params.UseThreshold); Chunker.SetChunkSize(Params.MinSize, Params.MaxSize, Params.AvgSize); size_t End = Offset + Size; const size_t ScanBufferSize = Max(1u * 1024 * 1024, Params.MaxSize); BasicFileBuffer RawBuffer(RawData, ScanBufferSize); MemoryView SliceView = RawBuffer.MakeView(Min(End - Offset, ScanBufferSize), Offset); ZEN_ASSERT(!SliceView.IsEmpty()); size_t SliceSize = SliceView.GetSize(); IoHashStream RawHashStream; while (Offset < End) { if (AbortFlag != nullptr && AbortFlag->load()) { return {}; } size_t ScanLength = Chunker.ScanChunk(SliceView.GetData(), SliceSize); if (ScanLength == ZenChunkHelper::kNoBoundaryFound) { if (Offset + SliceSize == End) { ScanLength = SliceSize; } else { SliceView = RawBuffer.MakeView(Min(End - Offset, ScanBufferSize), Offset); SliceSize = SliceView.GetSize(); Chunker.Reset(); continue; } } uint32_t ChunkLength = gsl::narrow(ScanLength); // +HashedLength); MemoryView ChunkView = SliceView.Left(ScanLength); RawHashStream.Append(ChunkView); IoHash ChunkHash = IoHash::HashBuffer(ChunkView); SliceView.RightChopInline(ScanLength); if (auto It = FoundChunks.find(ChunkHash); It != FoundChunks.end()) { Result.Info.ChunkSequence.push_back(It->second); } else { uint32_t ChunkIndex = gsl::narrow(Result.Info.ChunkHashes.size()); FoundChunks.insert_or_assign(ChunkHash, ChunkIndex); Result.Info.ChunkHashes.push_back(ChunkHash); Result.ChunkSources.push_back(ChunkSource{.Offset = Offset, .Size = ChunkLength}); Result.Info.ChunkSequence.push_back(ChunkIndex); } SliceSize = SliceView.GetSize(); Offset += ChunkLength; if (BytesProcessed != nullptr) { BytesProcessed->fetch_add(ChunkLength); } } Result.Info.RawSize = Size; Result.Info.RawHash = RawHashStream.GetHash(); return Result; } } // namespace zen #if ZEN_WITH_TESTS # include # include # include # include # include # include # include # include # include # include "chunking.h" ZEN_THIRD_PARTY_INCLUDES_START # include # include ZEN_THIRD_PARTY_INCLUDES_END namespace zen { # if 0 TEST_CASE("chunkedfile.findparams") { # if 1 DirectoryContent SourceContent1; GetDirectoryContent("E:\\Temp\\ChunkingTestData\\31379208", DirectoryContentFlags::IncludeFiles, SourceContent1); const std::vector& SourceFiles1 = SourceContent1.Files; DirectoryContent SourceContent2; GetDirectoryContent("E:\\Temp\\ChunkingTestData\\31379208_2", DirectoryContentFlags::IncludeFiles, SourceContent2); const std::vector& SourceFiles2 = SourceContent2.Files; # else std::filesystem::path SourcePath1 = "E:\\Temp\\ChunkingTestData\\31375996\\ShaderArchive-FortniteGame_Chunk10-PCD3D_SM6-PCD3D_SM6.ushaderbytecode"; std::filesystem::path SourcePath2 = "E:\\Temp\\ChunkingTestData\\31379208\\ShaderArchive-FortniteGame_Chunk10-PCD3D_SM6-PCD3D_SM6.ushaderbytecode"; const std::vector& SourceFiles1 = {SourcePath1}; const std::vector& SourceFiles2 = {SourcePath2}; # endif ChunkedParams Params[] = {ChunkedParams{.UseThreshold = false, .MinSize = 17280, .MaxSize = 139264, .AvgSize = 36340}, ChunkedParams{.UseThreshold = false, .MinSize = 15456, .MaxSize = 122880, .AvgSize = 35598}, ChunkedParams{.UseThreshold = false, .MinSize = 16848, .MaxSize = 135168, .AvgSize = 39030}, ChunkedParams{.UseThreshold = false, .MinSize = 14256, .MaxSize = 114688, .AvgSize = 36222}, ChunkedParams{.UseThreshold = false, .MinSize = 15744, .MaxSize = 126976, .AvgSize = 36600}, ChunkedParams{.UseThreshold = false, .MinSize = 15264, .MaxSize = 122880, .AvgSize = 35442}, ChunkedParams{.UseThreshold = false, .MinSize = 16464, .MaxSize = 131072, .AvgSize = 37950}, ChunkedParams{.UseThreshold = false, .MinSize = 15408, .MaxSize = 122880, .AvgSize = 38914}, ChunkedParams{.UseThreshold = false, .MinSize = 15408, .MaxSize = 122880, .AvgSize = 35556}, ChunkedParams{.UseThreshold = false, .MinSize = 15360, .MaxSize = 122880, .AvgSize = 35520}, ChunkedParams{.UseThreshold = false, .MinSize = 15312, .MaxSize = 122880, .AvgSize = 35478}, ChunkedParams{.UseThreshold = false, .MinSize = 16896, .MaxSize = 135168, .AvgSize = 39072}, ChunkedParams{.UseThreshold = false, .MinSize = 15360, .MaxSize = 122880, .AvgSize = 38880}, ChunkedParams{.UseThreshold = false, .MinSize = 15840, .MaxSize = 126976, .AvgSize = 36678}, ChunkedParams{.UseThreshold = false, .MinSize = 16800, .MaxSize = 135168, .AvgSize = 38994}, ChunkedParams{.UseThreshold = false, .MinSize = 15888, .MaxSize = 126976, .AvgSize = 36714}, ChunkedParams{.UseThreshold = false, .MinSize = 15792, .MaxSize = 126976, .AvgSize = 36636}, ChunkedParams{.UseThreshold = false, .MinSize = 14880, .MaxSize = 118784, .AvgSize = 37609}, ChunkedParams{.UseThreshold = false, .MinSize = 15936, .MaxSize = 126976, .AvgSize = 36756}, ChunkedParams{.UseThreshold = false, .MinSize = 15456, .MaxSize = 122880, .AvgSize = 38955}, ChunkedParams{.UseThreshold = false, .MinSize = 15984, .MaxSize = 126976, .AvgSize = 36792}, ChunkedParams{.UseThreshold = false, .MinSize = 14400, .MaxSize = 114688, .AvgSize = 36338}, ChunkedParams{.UseThreshold = false, .MinSize = 14832, .MaxSize = 118784, .AvgSize = 37568}, ChunkedParams{.UseThreshold = false, .MinSize = 16944, .MaxSize = 135168, .AvgSize = 39108}, ChunkedParams{.UseThreshold = false, .MinSize = 14352, .MaxSize = 114688, .AvgSize = 36297}, ChunkedParams{.UseThreshold = false, .MinSize = 14208, .MaxSize = 114688, .AvgSize = 36188}, ChunkedParams{.UseThreshold = false, .MinSize = 14448, .MaxSize = 114688, .AvgSize = 36372}, ChunkedParams{.UseThreshold = false, .MinSize = 13296, .MaxSize = 106496, .AvgSize = 36592}, ChunkedParams{.UseThreshold = false, .MinSize = 15264, .MaxSize = 122880, .AvgSize = 38805}, ChunkedParams{.UseThreshold = false, .MinSize = 14304, .MaxSize = 114688, .AvgSize = 36263}, ChunkedParams{.UseThreshold = false, .MinSize = 14784, .MaxSize = 118784, .AvgSize = 37534}, ChunkedParams{.UseThreshold = false, .MinSize = 15312, .MaxSize = 122880, .AvgSize = 38839}, ChunkedParams{.UseThreshold = false, .MinSize = 14256, .MaxSize = 114688, .AvgSize = 39360}, ChunkedParams{.UseThreshold = false, .MinSize = 13776, .MaxSize = 110592, .AvgSize = 37976}, ChunkedParams{.UseThreshold = false, .MinSize = 14736, .MaxSize = 118784, .AvgSize = 37493}, ChunkedParams{.UseThreshold = false, .MinSize = 14928, .MaxSize = 118784, .AvgSize = 37643}, ChunkedParams{.UseThreshold = false, .MinSize = 14448, .MaxSize = 114688, .AvgSize = 39504}, ChunkedParams{.UseThreshold = false, .MinSize = 13392, .MaxSize = 106496, .AvgSize = 36664}, ChunkedParams{.UseThreshold = false, .MinSize = 13872, .MaxSize = 110592, .AvgSize = 38048}, ChunkedParams{.UseThreshold = false, .MinSize = 14352, .MaxSize = 114688, .AvgSize = 39432}, ChunkedParams{.UseThreshold = false, .MinSize = 13200, .MaxSize = 106496, .AvgSize = 36520}, ChunkedParams{.UseThreshold = false, .MinSize = 17328, .MaxSize = 139264, .AvgSize = 36378}, ChunkedParams{.UseThreshold = false, .MinSize = 17376, .MaxSize = 139264, .AvgSize = 36421}, ChunkedParams{.UseThreshold = false, .MinSize = 17424, .MaxSize = 139264, .AvgSize = 36459}, ChunkedParams{.UseThreshold = false, .MinSize = 17472, .MaxSize = 139264, .AvgSize = 36502}, ChunkedParams{.UseThreshold = false, .MinSize = 17520, .MaxSize = 139264, .AvgSize = 36540}, ChunkedParams{.UseThreshold = false, .MinSize = 17808, .MaxSize = 143360, .AvgSize = 37423}, ChunkedParams{.UseThreshold = false, .MinSize = 17856, .MaxSize = 143360, .AvgSize = 37466}, ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 25834}, ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 21917}, ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 29751}, ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 33668}, ChunkedParams{.UseThreshold = false, .MinSize = 17952, .MaxSize = 143360, .AvgSize = 37547}, ChunkedParams{.UseThreshold = false, .MinSize = 17904, .MaxSize = 143360, .AvgSize = 37504}, ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 22371}, ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 37585}, ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 26406}, ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 26450}, ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 30615}, ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 30441}, ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 22417}, ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 22557}, ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 30528}, ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 27112}, ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 34644}, ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 34476}, ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 35408}, ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 38592}, ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 30483}, ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 26586}, ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 26496}, ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 31302}, ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 34516}, ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 22964}, ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 35448}, ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 38630}, ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 23010}, ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 31260}, ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 34600}, ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 27156}, ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 30570}, ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 38549}, ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 22510}, ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 38673}, ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 34560}, ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 22464}, ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 26540}, ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 38511}, ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 23057}, ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 27202}, ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 31347}, ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 35492}, ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 31389}, ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 27246}, ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 23103}, ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 35532}, ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 23150}, ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 27292}, ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 31434}, ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 35576}, ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 27336}, ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 23196}, ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 31476}, ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 35616}, ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 27862}, ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 32121}, ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 23603}, ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 36380}, ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 27908}, ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 23650}, ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 32166}, ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 36424}, ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 23696}, ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 32253}, ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 32208}, ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 23743}, ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 36548}, ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 28042}, ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 23789}, ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 32295}, ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 36508}, ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 27952}, ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 27998}, ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 36464}}; static const size_t ParamsCount = sizeof(Params) / sizeof(ChunkedParams); std::vector Infos1(SourceFiles1.size()); std::vector Infos2(SourceFiles2.size()); WorkerThreadPool WorkerPool(32); for (size_t I = 0; I < ParamsCount; I++) { for (int UseThreshold = 0; UseThreshold < 2; UseThreshold++) { Latch WorkLatch(1); ChunkedParams Param = Params[I]; Param.UseThreshold = UseThreshold == 1; Stopwatch Timer; for (size_t F = 0; F < SourceFiles1.size(); F++) { WorkLatch.AddCount(1); WorkerPool.ScheduleWork([&WorkLatch, F, Param, &SourceFiles1, &Infos1]() { auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); }); BasicFile SourceData1; SourceData1.Open(SourceFiles1[F], BasicFile::Mode::kRead); Infos1[F] = ChunkData(SourceData1, 0, SourceData1.FileSize(), Param); }); } for (size_t F = 0; F < SourceFiles2.size(); F++) { WorkLatch.AddCount(1); WorkerPool.ScheduleWork([&WorkLatch, F, Param, &SourceFiles2, &Infos2]() { auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); }); BasicFile SourceData2; SourceData2.Open(SourceFiles2[F], BasicFile::Mode::kRead); Infos2[F] = ChunkData(SourceData2, 0, SourceData2.FileSize(), Param); }); } WorkLatch.CountDown(); WorkLatch.Wait(); uint64_t ChunkTimeMS = Timer.GetElapsedTimeMs(); uint64_t Raw1Size = 0; tsl::robin_set Chunks1; size_t ChunkedSize1 = 0; for (size_t F = 0; F < SourceFiles1.size(); F++) { const ChunkedInfoWithSource& Info = Infos1[F]; Raw1Size += Info.Info.RawSize; for (uint32_t Chunk1Index = 0; Chunk1Index < Info.Info.ChunkHashes.size(); ++Chunk1Index) { const IoHash ChunkHash = Info.Info.ChunkHashes[Chunk1Index]; if (Chunks1.insert(ChunkHash).second) { ChunkedSize1 += Info.ChunkSources[Chunk1Index].Size; } } } uint64_t Raw2Size = 0; tsl::robin_set Chunks2; size_t ChunkedSize2 = 0; size_t DiffSize = 0; for (size_t F = 0; F < SourceFiles2.size(); F++) { const ChunkedInfoWithSource& Info = Infos2[F]; Raw2Size += Info.Info.RawSize; for (uint32_t Chunk2Index = 0; Chunk2Index < Info.Info.ChunkHashes.size(); ++Chunk2Index) { const IoHash ChunkHash = Info.Info.ChunkHashes[Chunk2Index]; if (Chunks2.insert(ChunkHash).second) { ChunkedSize2 += Info.ChunkSources[Chunk2Index].Size; if (!Chunks1.contains(ChunkHash)) { DiffSize += Info.ChunkSources[Chunk2Index].Size; } } } } ZEN_INFO( "Diff = {}, Chunks1 = {}, Chunks2 = {}, .UseThreshold = {}, .MinSize = {}, .MaxSize = {}, .AvgSize = {}, RawSize(1) = {}, " "RawSize(2) = {}, " "Saved(1) = {}, Saved(2) = {} in {}", NiceBytes(DiffSize), Chunks1.size(), Chunks2.size(), Param.UseThreshold, Param.MinSize, Param.MaxSize, Param.AvgSize, NiceBytes(Raw1Size), NiceBytes(Raw2Size), NiceBytes(Raw1Size - ChunkedSize1), NiceBytes(Raw2Size - ChunkedSize2), NiceTimeSpanMs(ChunkTimeMS)); } } # if 0 for (int64_t MinSizeBase = (12u * 1024u); MinSizeBase <= (32u * 1024u); MinSizeBase += 512) { for (int64_t Wiggle = -132; Wiggle < 126; Wiggle += 2) { // size_t MinSize = 7 * 1024 - 61; // (size_t)(MinSizeBase + Wiggle); // size_t MaxSize = 16 * (7 * 1024); // 8 * 7 * 1024;// MinSizeBase * 6; // size_t AvgSize = MaxSize / 2; // 4 * 7 * 1024;// MinSizeBase * 3; size_t MinSize = (size_t)(MinSizeBase + Wiggle); //for (size_t MaxSize = (MinSize * 4) - 768; MaxSize < (MinSize * 5) + 768; MaxSize += 64) size_t MaxSize = 8u * MinSizeBase; { for (size_t AvgSize = (MaxSize - MinSize) / 32 + MinSize; AvgSize < (MaxSize - MinSize) / 4 + MinSize; AvgSize += (MaxSize - MinSize) / 32) // size_t AvgSize = (MaxSize - MinSize) / 4 + MinSize; { WorkLatch.AddCount(1); WorkerPool.ScheduleWork([&WorkLatch, MinSize, MaxSize, AvgSize, SourcePath1, SourcePath2]() { auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); }); ChunkedParams Params{ .UseThreshold = true, .MinSize = MinSize, .MaxSize = MaxSize, .AvgSize = AvgSize }; BasicFile SourceData1; SourceData1.Open(SourcePath1, BasicFile::Mode::kRead); BasicFile SourceData2; SourceData2.Open(SourcePath2, BasicFile::Mode::kRead); ChunkedInfoWithSource Info1 = ChunkData(SourceData1, Params); ChunkedInfoWithSource Info2 = ChunkData(SourceData2, Params); tsl::robin_set Chunks1; Chunks1.reserve(Info1.Info.ChunkHashes.size()); Chunks1.insert(Info1.Info.ChunkHashes.begin(), Info1.Info.ChunkHashes.end()); size_t ChunkedSize1 = 0; for (uint32_t Chunk1Index = 0; Chunk1Index < Info1.Info.ChunkHashes.size(); ++Chunk1Index) { ChunkedSize1 += Info1.ChunkSources[Chunk1Index].Size; } size_t DiffSavedSize = 0; size_t ChunkedSize2 = 0; for (uint32_t Chunk2Index = 0; Chunk2Index < Info2.Info.ChunkHashes.size(); ++Chunk2Index) { ChunkedSize2 += Info2.ChunkSources[Chunk2Index].Size; if (Chunks1.find(Info2.Info.ChunkHashes[Chunk2Index]) == Chunks1.end()) { DiffSavedSize += Info2.ChunkSources[Chunk2Index].Size; } } ZEN_INFO("Diff {}, Chunks1: {}, Chunks2: {}, Min: {}, Max: {}, Avg: {}, Saved(1) {}, Saved(2) {}", NiceBytes(DiffSavedSize), Info1.Info.ChunkHashes.size(), Info2.Info.ChunkHashes.size(), MinSize, MaxSize, AvgSize, NiceBytes(Info1.Info.RawSize - ChunkedSize1), NiceBytes(Info2.Info.RawSize - ChunkedSize2)); }); } } } } # endif // 0 // WorkLatch.CountDown(); // WorkLatch.Wait(); } # endif // 0 void chunkedfile_forcelink() { } } // namespace zen #endif