aboutsummaryrefslogtreecommitdiff
path: root/src/zenutil/chunkedfile.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2025-10-03 11:49:14 +0200
committerGitHub Enterprise <[email protected]>2025-10-03 11:49:14 +0200
commitfaf0b7c9b6a08b095f8dc895904f4f7d3f30dcde (patch)
tree2bcd09fe17af6f25108fd05578e7eda6a827d8ec /src/zenutil/chunkedfile.cpp
parentcache RPC replay fixes (minor) (#544) (diff)
downloadzen-faf0b7c9b6a08b095f8dc895904f4f7d3f30dcde.tar.xz
zen-faf0b7c9b6a08b095f8dc895904f4f7d3f30dcde.zip
move chunking code to zenremotestore lib (#545)
Diffstat (limited to 'src/zenutil/chunkedfile.cpp')
-rw-r--r--src/zenutil/chunkedfile.cpp525
1 files changed, 0 insertions, 525 deletions
diff --git a/src/zenutil/chunkedfile.cpp b/src/zenutil/chunkedfile.cpp
deleted file mode 100644
index a2c041ffd..000000000
--- a/src/zenutil/chunkedfile.cpp
+++ /dev/null
@@ -1,525 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#include <zenutil/chunkedfile.h>
-
-#include <zencore/basicfile.h>
-#include <zencore/trace.h>
-
-#include "chunking.h"
-
-ZEN_THIRD_PARTY_INCLUDES_START
-#include <tsl/robin_map.h>
-#include <gsl/gsl-lite.hpp>
-ZEN_THIRD_PARTY_INCLUDES_END
-
-namespace zen {
-
-namespace {
- struct ChunkedHeader
- {
- static constexpr uint32_t ExpectedMagic = 0x646b6863; // chkd
- static constexpr uint32_t CurrentVersion = 1;
-
- uint32_t Magic = ExpectedMagic;
- uint32_t Version = CurrentVersion;
- uint32_t ChunkSequenceLength;
- uint32_t ChunkHashCount;
- uint64_t ChunkSequenceOffset;
- uint64_t ChunkHashesOffset;
- uint64_t RawSize = 0;
- IoHash RawHash;
- };
-} // namespace
-
-IoBuffer
-SerializeChunkedInfo(const ChunkedInfo& Info)
-{
- ZEN_TRACE_CPU("SerializeChunkedInfo");
- size_t HeaderSize = RoundUp(sizeof(ChunkedHeader), 16) + RoundUp(sizeof(uint32_t) * Info.ChunkSequence.size(), 16) +
- RoundUp(sizeof(IoHash) * Info.ChunkHashes.size(), 16);
- IoBuffer HeaderData(HeaderSize);
-
- ChunkedHeader Header;
- Header.ChunkSequenceLength = gsl::narrow<uint32_t>(Info.ChunkSequence.size());
- Header.ChunkHashCount = gsl::narrow<uint32_t>(Info.ChunkHashes.size());
- Header.ChunkSequenceOffset = RoundUp(sizeof(ChunkedHeader), 16);
- Header.ChunkHashesOffset = RoundUp(Header.ChunkSequenceOffset + sizeof(uint32_t) * Header.ChunkSequenceLength, 16);
- Header.RawSize = Info.RawSize;
- Header.RawHash = Info.RawHash;
-
- MutableMemoryView WriteView = HeaderData.GetMutableView();
- {
- MutableMemoryView HeaderWriteView = WriteView.Left(sizeof(Header));
- HeaderWriteView.CopyFrom(MemoryView(&Header, sizeof(Header)));
- }
- {
- MutableMemoryView ChunkSequenceWriteView = WriteView.Mid(Header.ChunkSequenceOffset, sizeof(uint32_t) * Header.ChunkSequenceLength);
- ChunkSequenceWriteView.CopyFrom(MemoryView(Info.ChunkSequence.data(), ChunkSequenceWriteView.GetSize()));
- }
- {
- MutableMemoryView ChunksWriteView = WriteView.Mid(Header.ChunkHashesOffset, sizeof(IoHash) * Header.ChunkHashCount);
- ChunksWriteView.CopyFrom(MemoryView(Info.ChunkHashes.data(), ChunksWriteView.GetSize()));
- }
-
- return HeaderData;
-}
-
-ChunkedInfo
-DeserializeChunkedInfo(IoBuffer& Buffer)
-{
- ZEN_TRACE_CPU("DeserializeChunkedInfo");
- MemoryView View = Buffer.GetView();
- ChunkedHeader Header;
- {
- MutableMemoryView HeaderWriteView(&Header, sizeof(Header));
- HeaderWriteView.CopyFrom(View.Left(sizeof(Header)));
- }
- if (Header.Magic != ChunkedHeader::ExpectedMagic)
- {
- return {};
- }
- if (Header.Version != ChunkedHeader::CurrentVersion)
- {
- return {};
- }
- ChunkedInfo Info;
- Info.RawSize = Header.RawSize;
- Info.RawHash = Header.RawHash;
- Info.ChunkSequence.resize(Header.ChunkSequenceLength);
- Info.ChunkHashes.resize(Header.ChunkHashCount);
- {
- MutableMemoryView ChunkSequenceWriteView(Info.ChunkSequence.data(), sizeof(uint32_t) * Header.ChunkSequenceLength);
- ChunkSequenceWriteView.CopyFrom(View.Mid(Header.ChunkSequenceOffset, ChunkSequenceWriteView.GetSize()));
- }
- {
- MutableMemoryView ChunksWriteView(Info.ChunkHashes.data(), sizeof(IoHash) * Header.ChunkHashCount);
- ChunksWriteView.CopyFrom(View.Mid(Header.ChunkHashesOffset, ChunksWriteView.GetSize()));
- }
-
- return Info;
-}
-
-void
-Reconstruct(const ChunkedInfo& Info, const std::filesystem::path& TargetPath, std::function<IoBuffer(const IoHash& ChunkHash)> GetChunk)
-{
- ZEN_TRACE_CPU("Reconstruct");
- BasicFile Reconstructed;
- Reconstructed.Open(TargetPath, BasicFile::Mode::kTruncate);
- BasicFileWriter ReconstructedWriter(Reconstructed, 64 * 1024);
- uint64_t Offset = 0;
- for (uint32_t SequenceIndex : Info.ChunkSequence)
- {
- IoBuffer Chunk = GetChunk(Info.ChunkHashes[SequenceIndex]);
- ReconstructedWriter.Write(Chunk.GetData(), Chunk.GetSize(), Offset);
- Offset += Chunk.GetSize();
- }
-}
-
-ChunkedInfoWithSource
-ChunkData(BasicFile& RawData,
- uint64_t Offset,
- uint64_t Size,
- ChunkedParams Params,
- std::atomic<uint64_t>* BytesProcessed,
- std::atomic<bool>* AbortFlag)
-{
- ZEN_TRACE_CPU("ChunkData");
-
- ChunkedInfoWithSource Result;
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> FoundChunks;
-
- ZenChunkHelper Chunker;
- Chunker.SetUseThreshold(Params.UseThreshold);
- Chunker.SetChunkSize(Params.MinSize, Params.MaxSize, Params.AvgSize);
- size_t End = Offset + Size;
- const size_t ScanBufferSize = Max(1u * 1024 * 1024, Params.MaxSize);
- BasicFileBuffer RawBuffer(RawData, ScanBufferSize);
- MemoryView SliceView = RawBuffer.MakeView(Min(End - Offset, ScanBufferSize), Offset);
- ZEN_ASSERT(!SliceView.IsEmpty());
- size_t SliceSize = SliceView.GetSize();
- IoHashStream RawHashStream;
- while (Offset < End)
- {
- if (AbortFlag != nullptr && AbortFlag->load())
- {
- return {};
- }
- size_t ScanLength = Chunker.ScanChunk(SliceView.GetData(), SliceSize);
- if (ScanLength == ZenChunkHelper::kNoBoundaryFound)
- {
- if (Offset + SliceSize == End)
- {
- ScanLength = SliceSize;
- }
- else
- {
- SliceView = RawBuffer.MakeView(Min(End - Offset, ScanBufferSize), Offset);
- SliceSize = SliceView.GetSize();
- Chunker.Reset();
- continue;
- }
- }
- uint32_t ChunkLength = gsl::narrow<uint32_t>(ScanLength); // +HashedLength);
- MemoryView ChunkView = SliceView.Left(ScanLength);
- RawHashStream.Append(ChunkView);
- IoHash ChunkHash = IoHash::HashBuffer(ChunkView);
- SliceView.RightChopInline(ScanLength);
- if (auto It = FoundChunks.find(ChunkHash); It != FoundChunks.end())
- {
- Result.Info.ChunkSequence.push_back(It->second);
- }
- else
- {
- uint32_t ChunkIndex = gsl::narrow<uint32_t>(Result.Info.ChunkHashes.size());
- FoundChunks.insert_or_assign(ChunkHash, ChunkIndex);
- Result.Info.ChunkHashes.push_back(ChunkHash);
- Result.ChunkSources.push_back(ChunkSource{.Offset = Offset, .Size = ChunkLength});
- Result.Info.ChunkSequence.push_back(ChunkIndex);
- }
-
- SliceSize = SliceView.GetSize();
- Offset += ChunkLength;
- if (BytesProcessed != nullptr)
- {
- BytesProcessed->fetch_add(ChunkLength);
- }
- }
- Result.Info.RawSize = Size;
- Result.Info.RawHash = RawHashStream.GetHash();
- return Result;
-}
-
-} // namespace zen
-
-#if ZEN_WITH_TESTS
-# include <zencore/filesystem.h>
-# include <zencore/fmtutils.h>
-# include <zencore/iohash.h>
-# include <zencore/logging.h>
-# include <zencore/scopeguard.h>
-# include <zencore/timer.h>
-# include <zencore/testing.h>
-# include <zencore/testutils.h>
-# include <zencore/workthreadpool.h>
-
-# include "chunking.h"
-
-ZEN_THIRD_PARTY_INCLUDES_START
-# include <tsl/robin_map.h>
-# include <tsl/robin_set.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-
-namespace zen {
-# if 0
-TEST_CASE("chunkedfile.findparams")
-{
-# if 1
- DirectoryContent SourceContent1;
- GetDirectoryContent("E:\\Temp\\ChunkingTestData\\31379208", DirectoryContentFlags::IncludeFiles, SourceContent1);
- const std::vector<std::filesystem::path>& SourceFiles1 = SourceContent1.Files;
- DirectoryContent SourceContent2;
- GetDirectoryContent("E:\\Temp\\ChunkingTestData\\31379208_2", DirectoryContentFlags::IncludeFiles, SourceContent2);
- const std::vector<std::filesystem::path>& SourceFiles2 = SourceContent2.Files;
-# else
- std::filesystem::path SourcePath1 =
- "E:\\Temp\\ChunkingTestData\\31375996\\ShaderArchive-FortniteGame_Chunk10-PCD3D_SM6-PCD3D_SM6.ushaderbytecode";
- std::filesystem::path SourcePath2 =
- "E:\\Temp\\ChunkingTestData\\31379208\\ShaderArchive-FortniteGame_Chunk10-PCD3D_SM6-PCD3D_SM6.ushaderbytecode";
- const std::vector<std::filesystem::path>& SourceFiles1 = {SourcePath1};
- const std::vector<std::filesystem::path>& SourceFiles2 = {SourcePath2};
-# endif
- ChunkedParams Params[] = {ChunkedParams{.UseThreshold = false, .MinSize = 17280, .MaxSize = 139264, .AvgSize = 36340},
- ChunkedParams{.UseThreshold = false, .MinSize = 15456, .MaxSize = 122880, .AvgSize = 35598},
- ChunkedParams{.UseThreshold = false, .MinSize = 16848, .MaxSize = 135168, .AvgSize = 39030},
- ChunkedParams{.UseThreshold = false, .MinSize = 14256, .MaxSize = 114688, .AvgSize = 36222},
- ChunkedParams{.UseThreshold = false, .MinSize = 15744, .MaxSize = 126976, .AvgSize = 36600},
- ChunkedParams{.UseThreshold = false, .MinSize = 15264, .MaxSize = 122880, .AvgSize = 35442},
- ChunkedParams{.UseThreshold = false, .MinSize = 16464, .MaxSize = 131072, .AvgSize = 37950},
- ChunkedParams{.UseThreshold = false, .MinSize = 15408, .MaxSize = 122880, .AvgSize = 38914},
- ChunkedParams{.UseThreshold = false, .MinSize = 15408, .MaxSize = 122880, .AvgSize = 35556},
- ChunkedParams{.UseThreshold = false, .MinSize = 15360, .MaxSize = 122880, .AvgSize = 35520},
- ChunkedParams{.UseThreshold = false, .MinSize = 15312, .MaxSize = 122880, .AvgSize = 35478},
- ChunkedParams{.UseThreshold = false, .MinSize = 16896, .MaxSize = 135168, .AvgSize = 39072},
- ChunkedParams{.UseThreshold = false, .MinSize = 15360, .MaxSize = 122880, .AvgSize = 38880},
- ChunkedParams{.UseThreshold = false, .MinSize = 15840, .MaxSize = 126976, .AvgSize = 36678},
- ChunkedParams{.UseThreshold = false, .MinSize = 16800, .MaxSize = 135168, .AvgSize = 38994},
- ChunkedParams{.UseThreshold = false, .MinSize = 15888, .MaxSize = 126976, .AvgSize = 36714},
- ChunkedParams{.UseThreshold = false, .MinSize = 15792, .MaxSize = 126976, .AvgSize = 36636},
- ChunkedParams{.UseThreshold = false, .MinSize = 14880, .MaxSize = 118784, .AvgSize = 37609},
- ChunkedParams{.UseThreshold = false, .MinSize = 15936, .MaxSize = 126976, .AvgSize = 36756},
- ChunkedParams{.UseThreshold = false, .MinSize = 15456, .MaxSize = 122880, .AvgSize = 38955},
- ChunkedParams{.UseThreshold = false, .MinSize = 15984, .MaxSize = 126976, .AvgSize = 36792},
- ChunkedParams{.UseThreshold = false, .MinSize = 14400, .MaxSize = 114688, .AvgSize = 36338},
- ChunkedParams{.UseThreshold = false, .MinSize = 14832, .MaxSize = 118784, .AvgSize = 37568},
- ChunkedParams{.UseThreshold = false, .MinSize = 16944, .MaxSize = 135168, .AvgSize = 39108},
- ChunkedParams{.UseThreshold = false, .MinSize = 14352, .MaxSize = 114688, .AvgSize = 36297},
- ChunkedParams{.UseThreshold = false, .MinSize = 14208, .MaxSize = 114688, .AvgSize = 36188},
- ChunkedParams{.UseThreshold = false, .MinSize = 14448, .MaxSize = 114688, .AvgSize = 36372},
- ChunkedParams{.UseThreshold = false, .MinSize = 13296, .MaxSize = 106496, .AvgSize = 36592},
- ChunkedParams{.UseThreshold = false, .MinSize = 15264, .MaxSize = 122880, .AvgSize = 38805},
- ChunkedParams{.UseThreshold = false, .MinSize = 14304, .MaxSize = 114688, .AvgSize = 36263},
- ChunkedParams{.UseThreshold = false, .MinSize = 14784, .MaxSize = 118784, .AvgSize = 37534},
- ChunkedParams{.UseThreshold = false, .MinSize = 15312, .MaxSize = 122880, .AvgSize = 38839},
- ChunkedParams{.UseThreshold = false, .MinSize = 14256, .MaxSize = 114688, .AvgSize = 39360},
- ChunkedParams{.UseThreshold = false, .MinSize = 13776, .MaxSize = 110592, .AvgSize = 37976},
- ChunkedParams{.UseThreshold = false, .MinSize = 14736, .MaxSize = 118784, .AvgSize = 37493},
- ChunkedParams{.UseThreshold = false, .MinSize = 14928, .MaxSize = 118784, .AvgSize = 37643},
- ChunkedParams{.UseThreshold = false, .MinSize = 14448, .MaxSize = 114688, .AvgSize = 39504},
- ChunkedParams{.UseThreshold = false, .MinSize = 13392, .MaxSize = 106496, .AvgSize = 36664},
- ChunkedParams{.UseThreshold = false, .MinSize = 13872, .MaxSize = 110592, .AvgSize = 38048},
- ChunkedParams{.UseThreshold = false, .MinSize = 14352, .MaxSize = 114688, .AvgSize = 39432},
- ChunkedParams{.UseThreshold = false, .MinSize = 13200, .MaxSize = 106496, .AvgSize = 36520},
- ChunkedParams{.UseThreshold = false, .MinSize = 17328, .MaxSize = 139264, .AvgSize = 36378},
- ChunkedParams{.UseThreshold = false, .MinSize = 17376, .MaxSize = 139264, .AvgSize = 36421},
- ChunkedParams{.UseThreshold = false, .MinSize = 17424, .MaxSize = 139264, .AvgSize = 36459},
- ChunkedParams{.UseThreshold = false, .MinSize = 17472, .MaxSize = 139264, .AvgSize = 36502},
- ChunkedParams{.UseThreshold = false, .MinSize = 17520, .MaxSize = 139264, .AvgSize = 36540},
- ChunkedParams{.UseThreshold = false, .MinSize = 17808, .MaxSize = 143360, .AvgSize = 37423},
- ChunkedParams{.UseThreshold = false, .MinSize = 17856, .MaxSize = 143360, .AvgSize = 37466},
- ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 25834},
- ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 21917},
- ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 29751},
- ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 33668},
- ChunkedParams{.UseThreshold = false, .MinSize = 17952, .MaxSize = 143360, .AvgSize = 37547},
- ChunkedParams{.UseThreshold = false, .MinSize = 17904, .MaxSize = 143360, .AvgSize = 37504},
- ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 22371},
- ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 37585},
- ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 26406},
- ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 26450},
- ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 30615},
- ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 30441},
- ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 22417},
- ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 22557},
- ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 30528},
- ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 27112},
- ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 34644},
- ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 34476},
- ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 35408},
- ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 38592},
- ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 30483},
- ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 26586},
- ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 26496},
- ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 31302},
- ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 34516},
- ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 22964},
- ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 35448},
- ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 38630},
- ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 23010},
- ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 31260},
- ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 34600},
- ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 27156},
- ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 30570},
- ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 38549},
- ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 22510},
- ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 38673},
- ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 34560},
- ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 22464},
- ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 26540},
- ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 38511},
- ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 23057},
- ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 27202},
- ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 31347},
- ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 35492},
- ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 31389},
- ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 27246},
- ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 23103},
- ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 35532},
- ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 23150},
- ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 27292},
- ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 31434},
- ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 35576},
- ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 27336},
- ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 23196},
- ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 31476},
- ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 35616},
- ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 27862},
- ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 32121},
- ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 23603},
- ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 36380},
- ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 27908},
- ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 23650},
- ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 32166},
- ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 36424},
- ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 23696},
- ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 32253},
- ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 32208},
- ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 23743},
- ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 36548},
- ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 28042},
- ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 23789},
- ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 32295},
- ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 36508},
- ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 27952},
- ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 27998},
- ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 36464}};
-
- static const size_t ParamsCount = sizeof(Params) / sizeof(ChunkedParams);
- std::vector<ChunkedInfoWithSource> Infos1(SourceFiles1.size());
- std::vector<ChunkedInfoWithSource> Infos2(SourceFiles2.size());
-
- WorkerThreadPool WorkerPool(32);
-
- for (size_t I = 0; I < ParamsCount; I++)
- {
- for (int UseThreshold = 0; UseThreshold < 2; UseThreshold++)
- {
- Latch WorkLatch(1);
- ChunkedParams Param = Params[I];
- Param.UseThreshold = UseThreshold == 1;
- Stopwatch Timer;
- for (size_t F = 0; F < SourceFiles1.size(); F++)
- {
- WorkLatch.AddCount(1);
- WorkerPool.ScheduleWork([&WorkLatch, F, Param, &SourceFiles1, &Infos1]() {
- auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); });
- BasicFile SourceData1;
- SourceData1.Open(SourceFiles1[F], BasicFile::Mode::kRead);
- Infos1[F] = ChunkData(SourceData1, 0, SourceData1.FileSize(), Param);
- });
- }
- for (size_t F = 0; F < SourceFiles2.size(); F++)
- {
- WorkLatch.AddCount(1);
- WorkerPool.ScheduleWork([&WorkLatch, F, Param, &SourceFiles2, &Infos2]() {
- auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); });
- BasicFile SourceData2;
- SourceData2.Open(SourceFiles2[F], BasicFile::Mode::kRead);
- Infos2[F] = ChunkData(SourceData2, 0, SourceData2.FileSize(), Param);
- });
- }
- WorkLatch.CountDown();
- WorkLatch.Wait();
- uint64_t ChunkTimeMS = Timer.GetElapsedTimeMs();
-
- uint64_t Raw1Size = 0;
- tsl::robin_set<IoHash> Chunks1;
- size_t ChunkedSize1 = 0;
- for (size_t F = 0; F < SourceFiles1.size(); F++)
- {
- const ChunkedInfoWithSource& Info = Infos1[F];
- Raw1Size += Info.Info.RawSize;
- for (uint32_t Chunk1Index = 0; Chunk1Index < Info.Info.ChunkHashes.size(); ++Chunk1Index)
- {
- const IoHash ChunkHash = Info.Info.ChunkHashes[Chunk1Index];
- if (Chunks1.insert(ChunkHash).second)
- {
- ChunkedSize1 += Info.ChunkSources[Chunk1Index].Size;
- }
- }
- }
-
- uint64_t Raw2Size = 0;
- tsl::robin_set<IoHash> Chunks2;
- size_t ChunkedSize2 = 0;
- size_t DiffSize = 0;
- for (size_t F = 0; F < SourceFiles2.size(); F++)
- {
- const ChunkedInfoWithSource& Info = Infos2[F];
- Raw2Size += Info.Info.RawSize;
- for (uint32_t Chunk2Index = 0; Chunk2Index < Info.Info.ChunkHashes.size(); ++Chunk2Index)
- {
- const IoHash ChunkHash = Info.Info.ChunkHashes[Chunk2Index];
- if (Chunks2.insert(ChunkHash).second)
- {
- ChunkedSize2 += Info.ChunkSources[Chunk2Index].Size;
- if (!Chunks1.contains(ChunkHash))
- {
- DiffSize += Info.ChunkSources[Chunk2Index].Size;
- }
- }
- }
- }
-
- ZEN_INFO(
- "Diff = {}, Chunks1 = {}, Chunks2 = {}, .UseThreshold = {}, .MinSize = {}, .MaxSize = {}, .AvgSize = {}, RawSize(1) = {}, "
- "RawSize(2) = {}, "
- "Saved(1) = {}, Saved(2) = {} in {}",
- NiceBytes(DiffSize),
- Chunks1.size(),
- Chunks2.size(),
- Param.UseThreshold,
- Param.MinSize,
- Param.MaxSize,
- Param.AvgSize,
- NiceBytes(Raw1Size),
- NiceBytes(Raw2Size),
- NiceBytes(Raw1Size - ChunkedSize1),
- NiceBytes(Raw2Size - ChunkedSize2),
- NiceTimeSpanMs(ChunkTimeMS));
- }
- }
-
-# if 0
- for (int64_t MinSizeBase = (12u * 1024u); MinSizeBase <= (32u * 1024u); MinSizeBase += 512)
- {
- for (int64_t Wiggle = -132; Wiggle < 126; Wiggle += 2)
- {
- // size_t MinSize = 7 * 1024 - 61; // (size_t)(MinSizeBase + Wiggle);
- // size_t MaxSize = 16 * (7 * 1024); // 8 * 7 * 1024;// MinSizeBase * 6;
- // size_t AvgSize = MaxSize / 2; // 4 * 7 * 1024;// MinSizeBase * 3;
- size_t MinSize = (size_t)(MinSizeBase + Wiggle);
- //for (size_t MaxSize = (MinSize * 4) - 768; MaxSize < (MinSize * 5) + 768; MaxSize += 64)
- size_t MaxSize = 8u * MinSizeBase;
- {
- for (size_t AvgSize = (MaxSize - MinSize) / 32 + MinSize; AvgSize < (MaxSize - MinSize) / 4 + MinSize; AvgSize += (MaxSize - MinSize) / 32)
-// size_t AvgSize = (MaxSize - MinSize) / 4 + MinSize;
- {
- WorkLatch.AddCount(1);
- WorkerPool.ScheduleWork([&WorkLatch, MinSize, MaxSize, AvgSize, SourcePath1, SourcePath2]()
- {
- auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); });
- ChunkedParams Params{ .UseThreshold = true, .MinSize = MinSize, .MaxSize = MaxSize, .AvgSize = AvgSize };
- BasicFile SourceData1;
- SourceData1.Open(SourcePath1, BasicFile::Mode::kRead);
- BasicFile SourceData2;
- SourceData2.Open(SourcePath2, BasicFile::Mode::kRead);
- ChunkedInfoWithSource Info1 = ChunkData(SourceData1, Params);
- ChunkedInfoWithSource Info2 = ChunkData(SourceData2, Params);
-
- tsl::robin_set<IoHash> Chunks1;
- Chunks1.reserve(Info1.Info.ChunkHashes.size());
- Chunks1.insert(Info1.Info.ChunkHashes.begin(), Info1.Info.ChunkHashes.end());
- size_t ChunkedSize1 = 0;
- for (uint32_t Chunk1Index = 0; Chunk1Index < Info1.Info.ChunkHashes.size(); ++Chunk1Index)
- {
- ChunkedSize1 += Info1.ChunkSources[Chunk1Index].Size;
- }
- size_t DiffSavedSize = 0;
- size_t ChunkedSize2 = 0;
- for (uint32_t Chunk2Index = 0; Chunk2Index < Info2.Info.ChunkHashes.size(); ++Chunk2Index)
- {
- ChunkedSize2 += Info2.ChunkSources[Chunk2Index].Size;
- if (Chunks1.find(Info2.Info.ChunkHashes[Chunk2Index]) == Chunks1.end())
- {
- DiffSavedSize += Info2.ChunkSources[Chunk2Index].Size;
- }
- }
- ZEN_INFO("Diff {}, Chunks1: {}, Chunks2: {}, Min: {}, Max: {}, Avg: {}, Saved(1) {}, Saved(2) {}",
- NiceBytes(DiffSavedSize),
- Info1.Info.ChunkHashes.size(),
- Info2.Info.ChunkHashes.size(),
- MinSize,
- MaxSize,
- AvgSize,
- NiceBytes(Info1.Info.RawSize - ChunkedSize1),
- NiceBytes(Info2.Info.RawSize - ChunkedSize2));
- });
- }
- }
- }
- }
-# endif // 0
-
- // WorkLatch.CountDown();
- // WorkLatch.Wait();
-}
-# endif // 0
-
-void
-chunkedfile_forcelink()
-{
-}
-
-} // namespace zen
-
-#endif