aboutsummaryrefslogtreecommitdiff
path: root/src/zenutil
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2025-10-03 11:49:14 +0200
committerGitHub Enterprise <[email protected]>2025-10-03 11:49:14 +0200
commitfaf0b7c9b6a08b095f8dc895904f4f7d3f30dcde (patch)
tree2bcd09fe17af6f25108fd05578e7eda6a827d8ec /src/zenutil
parentcache RPC replay fixes (minor) (#544) (diff)
downloadzen-faf0b7c9b6a08b095f8dc895904f4f7d3f30dcde.tar.xz
zen-faf0b7c9b6a08b095f8dc895904f4f7d3f30dcde.zip
move chunking code to zenremotestore lib (#545)
Diffstat (limited to 'src/zenutil')
-rw-r--r--src/zenutil/chunkblock.cpp257
-rw-r--r--src/zenutil/chunkedcontent.cpp953
-rw-r--r--src/zenutil/chunkedfile.cpp525
-rw-r--r--src/zenutil/chunking.cpp383
-rw-r--r--src/zenutil/chunking.h56
-rw-r--r--src/zenutil/chunkingcontroller.cpp359
-rw-r--r--src/zenutil/include/zenutil/chunkblock.h40
-rw-r--r--src/zenutil/include/zenutil/chunkedcontent.h288
-rw-r--r--src/zenutil/include/zenutil/chunkedfile.h59
-rw-r--r--src/zenutil/include/zenutil/chunkingcontroller.h75
-rw-r--r--src/zenutil/zenutil.cpp2
11 files changed, 0 insertions, 2997 deletions
diff --git a/src/zenutil/chunkblock.cpp b/src/zenutil/chunkblock.cpp
deleted file mode 100644
index abfc0fb63..000000000
--- a/src/zenutil/chunkblock.cpp
+++ /dev/null
@@ -1,257 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#include <zenutil/chunkblock.h>
-
-#include <zencore/compactbinarybuilder.h>
-#include <zencore/fmtutils.h>
-#include <zencore/logging.h>
-
-#include <vector>
-
-namespace zen {
-
-using namespace std::literals;
-
-ChunkBlockDescription
-ParseChunkBlockDescription(const CbObjectView& BlockObject)
-{
- ChunkBlockDescription Result;
- Result.BlockHash = BlockObject["rawHash"sv].AsHash();
- if (Result.BlockHash != IoHash::Zero)
- {
- Result.HeaderSize = BlockObject["headerSize"sv].AsUInt64();
- CbArrayView ChunksArray = BlockObject["rawHashes"sv].AsArrayView();
- Result.ChunkRawHashes.reserve(ChunksArray.Num());
- for (CbFieldView ChunkView : ChunksArray)
- {
- Result.ChunkRawHashes.push_back(ChunkView.AsHash());
- }
-
- CbArrayView ChunkRawLengthsArray = BlockObject["chunkRawLengths"sv].AsArrayView();
- Result.ChunkRawLengths.reserve(ChunkRawLengthsArray.Num());
- for (CbFieldView ChunkView : ChunkRawLengthsArray)
- {
- Result.ChunkRawLengths.push_back(ChunkView.AsUInt32());
- }
-
- CbArrayView ChunkCompressedLengthsArray = BlockObject["chunkCompressedLengths"sv].AsArrayView();
- Result.ChunkCompressedLengths.reserve(ChunkCompressedLengthsArray.Num());
- for (CbFieldView ChunkView : ChunkCompressedLengthsArray)
- {
- Result.ChunkCompressedLengths.push_back(ChunkView.AsUInt32());
- }
- }
- return Result;
-}
-
-std::vector<ChunkBlockDescription>
-ParseChunkBlockDescriptionList(const CbObjectView& BlocksObject)
-{
- if (!BlocksObject)
- {
- return {};
- }
- std::vector<ChunkBlockDescription> Result;
- CbArrayView Blocks = BlocksObject["blocks"sv].AsArrayView();
- Result.reserve(Blocks.Num());
- for (CbFieldView BlockView : Blocks)
- {
- CbObjectView BlockObject = BlockView.AsObjectView();
- Result.emplace_back(ParseChunkBlockDescription(BlockObject));
- }
- return Result;
-}
-
-CbObject
-BuildChunkBlockDescription(const ChunkBlockDescription& Block, CbObjectView MetaData)
-{
- ZEN_ASSERT(Block.BlockHash != IoHash::Zero);
- ZEN_ASSERT(Block.HeaderSize > 0);
- ZEN_ASSERT(Block.ChunkRawLengths.size() == Block.ChunkRawHashes.size());
- ZEN_ASSERT(Block.ChunkCompressedLengths.size() == Block.ChunkRawHashes.size());
-
- CbObjectWriter Writer;
- Writer.AddHash("rawHash"sv, Block.BlockHash);
- Writer.AddInteger("headerSize"sv, Block.HeaderSize);
- Writer.BeginArray("rawHashes"sv);
- {
- for (const IoHash& ChunkHash : Block.ChunkRawHashes)
- {
- Writer.AddHash(ChunkHash);
- }
- }
- Writer.EndArray();
-
- Writer.BeginArray("chunkRawLengths");
- {
- for (uint32_t ChunkSize : Block.ChunkRawLengths)
- {
- Writer.AddInteger(ChunkSize);
- }
- }
- Writer.EndArray();
-
- Writer.BeginArray("chunkCompressedLengths");
- {
- for (uint32_t ChunkSize : Block.ChunkCompressedLengths)
- {
- Writer.AddInteger(ChunkSize);
- }
- }
- Writer.EndArray();
-
- Writer.AddObject("metadata", MetaData);
-
- return Writer.Save();
-}
-
-ChunkBlockDescription
-GetChunkBlockDescription(const SharedBuffer& BlockPayload, const IoHash& RawHash)
-{
- ChunkBlockDescription BlockDescription = {{.BlockHash = IoHash::HashBuffer(BlockPayload)}};
- if (BlockDescription.BlockHash != RawHash)
- {
- throw std::runtime_error(fmt::format("Block {} content hash {} does not match block hash", RawHash, BlockDescription.BlockHash));
- }
- if (IterateChunkBlock(
- BlockPayload,
- [&BlockDescription, RawHash](CompressedBuffer&& Chunk, const IoHash& AttachmentHash) {
- if (CompositeBuffer Decompressed = Chunk.DecompressToComposite(); Decompressed)
- {
- IoHash ChunkHash = IoHash::HashBuffer(Decompressed.Flatten());
- if (ChunkHash != AttachmentHash)
- {
- throw std::runtime_error(
- fmt::format("Chunk {} in block {} content hash {} does not match chunk", AttachmentHash, RawHash, ChunkHash));
- }
- BlockDescription.ChunkRawHashes.push_back(AttachmentHash);
- BlockDescription.ChunkRawLengths.push_back(gsl::narrow<uint32_t>(Decompressed.GetSize()));
- BlockDescription.ChunkCompressedLengths.push_back(gsl::narrow<uint32_t>(Chunk.GetCompressedSize()));
- }
- else
- {
- throw std::runtime_error(fmt::format("Chunk {} in block {} is not a compressed buffer", AttachmentHash, RawHash));
- }
- },
- BlockDescription.HeaderSize))
- {
- return BlockDescription;
- }
- else
- {
- throw std::runtime_error(fmt::format("Block {} is malformed", RawHash));
- }
-}
-
-CompressedBuffer
-GenerateChunkBlock(std::vector<std::pair<IoHash, FetchChunkFunc>>&& FetchChunks, ChunkBlockDescription& OutBlock)
-{
- const size_t ChunkCount = FetchChunks.size();
-
- std::vector<SharedBuffer> ChunkSegments;
- ChunkSegments.resize(1);
- ChunkSegments.reserve(1 + ChunkCount);
- OutBlock.ChunkRawHashes.reserve(ChunkCount);
- OutBlock.ChunkRawLengths.reserve(ChunkCount);
- OutBlock.ChunkCompressedLengths.reserve(ChunkCount);
- {
- IoBuffer TempBuffer(ChunkCount * 9);
- MutableMemoryView View = TempBuffer.GetMutableView();
- uint8_t* BufferStartPtr = reinterpret_cast<uint8_t*>(View.GetData());
- uint8_t* BufferEndPtr = BufferStartPtr;
- BufferEndPtr += WriteVarUInt(gsl::narrow<uint64_t>(ChunkCount), BufferEndPtr);
- for (const auto& It : FetchChunks)
- {
- std::pair<uint64_t, CompressedBuffer> Chunk = It.second(It.first);
- uint64_t ChunkSize = 0;
- std::span<const SharedBuffer> Segments = Chunk.second.GetCompressed().GetSegments();
- for (const SharedBuffer& Segment : Segments)
- {
- ZEN_ASSERT(Segment.IsOwned());
- ChunkSize += Segment.GetSize();
- ChunkSegments.push_back(Segment);
- }
- BufferEndPtr += WriteVarUInt(ChunkSize, BufferEndPtr);
- OutBlock.ChunkRawHashes.push_back(It.first);
- OutBlock.ChunkRawLengths.push_back(gsl::narrow<uint32_t>(Chunk.first));
- OutBlock.ChunkCompressedLengths.push_back(gsl::narrow<uint32_t>(ChunkSize));
- }
- ZEN_ASSERT(BufferEndPtr <= View.GetDataEnd());
- ptrdiff_t TempBufferLength = std::distance(BufferStartPtr, BufferEndPtr);
- ChunkSegments[0] = SharedBuffer(IoBuffer(TempBuffer, 0, gsl::narrow<size_t>(TempBufferLength)));
- OutBlock.HeaderSize = TempBufferLength;
- }
- CompressedBuffer CompressedBlock =
- CompressedBuffer::Compress(CompositeBuffer(std::move(ChunkSegments)), OodleCompressor::Mermaid, OodleCompressionLevel::None);
- OutBlock.BlockHash = CompressedBlock.DecodeRawHash();
- return CompressedBlock;
-}
-
-std::vector<uint32_t>
-ReadChunkBlockHeader(const MemoryView BlockView, uint64_t& OutHeaderSize)
-{
- const uint8_t* ReadPtr = reinterpret_cast<const uint8_t*>(BlockView.GetData());
- uint32_t NumberSize;
- uint64_t ChunkCount = ReadVarUInt(ReadPtr, NumberSize);
- ReadPtr += NumberSize;
- std::vector<uint32_t> ChunkSizes;
- ChunkSizes.reserve(ChunkCount);
- while (ChunkCount--)
- {
- if (ReadPtr >= BlockView.GetDataEnd())
- {
- throw std::runtime_error("Invalid block header, block data ended unexpectedly");
- }
- uint64_t ChunkSize = ReadVarUInt(ReadPtr, NumberSize);
- if (ChunkSize > std::numeric_limits<uint32_t>::max())
- {
- throw std::runtime_error("Invalid block header, header data is corrupt");
- }
- if (ChunkSize < 1)
- {
- throw std::runtime_error("Invalid block header, header data is corrupt");
- }
- ChunkSizes.push_back(gsl::narrow<uint32_t>(ChunkSize));
- ReadPtr += NumberSize;
- }
- uint64_t Offset = std::distance((const uint8_t*)BlockView.GetData(), ReadPtr);
- OutHeaderSize = Offset;
- return ChunkSizes;
-}
-
-bool
-IterateChunkBlock(const SharedBuffer& BlockPayload,
- std::function<void(CompressedBuffer&& Chunk, const IoHash& AttachmentHash)> Visitor,
- uint64_t& OutHeaderSize)
-{
- ZEN_ASSERT(BlockPayload);
- if (BlockPayload.GetSize() < 1)
- {
- return false;
- }
-
- MemoryView BlockView = BlockPayload.GetView();
-
- std::vector<uint32_t> ChunkSizes = ReadChunkBlockHeader(BlockView, OutHeaderSize);
- uint64_t Offset = OutHeaderSize;
- OutHeaderSize = Offset;
- for (uint64_t ChunkSize : ChunkSizes)
- {
- IoBuffer Chunk(BlockPayload.AsIoBuffer(), Offset, ChunkSize);
- IoHash AttachmentRawHash;
- uint64_t AttachmentRawSize;
- CompressedBuffer CompressedChunk = CompressedBuffer::FromCompressed(SharedBuffer(Chunk), AttachmentRawHash, AttachmentRawSize);
- ZEN_ASSERT_SLOW(IoHash::HashBuffer(CompressedChunk.DecompressToComposite()) == AttachmentRawHash);
- if (!CompressedChunk)
- {
- ZEN_ERROR("Invalid chunk in block");
- return false;
- }
- Visitor(std::move(CompressedChunk), AttachmentRawHash);
- Offset += ChunkSize;
- ZEN_ASSERT(Offset <= BlockView.GetSize());
- }
- return true;
-};
-
-} // namespace zen
diff --git a/src/zenutil/chunkedcontent.cpp b/src/zenutil/chunkedcontent.cpp
deleted file mode 100644
index 757bcfae5..000000000
--- a/src/zenutil/chunkedcontent.cpp
+++ /dev/null
@@ -1,953 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#include <zenutil/chunkedcontent.h>
-
-#include <zencore/filesystem.h>
-#include <zencore/fmtutils.h>
-#include <zencore/logging.h>
-#include <zencore/scopeguard.h>
-#include <zencore/timer.h>
-#include <zencore/trace.h>
-
-#include <zenutil/chunkedfile.h>
-#include <zenutil/chunkingcontroller.h>
-#include <zenutil/parallelwork.h>
-#include <zenutil/workerpools.h>
-
-ZEN_THIRD_PARTY_INCLUDES_START
-#include <tsl/robin_set.h>
-#include <gsl/gsl-lite.hpp>
-ZEN_THIRD_PARTY_INCLUDES_END
-
-namespace zen {
-
-using namespace std::literals;
-
-namespace {
- void AddChunkSequence(ChunkingStatistics& Stats,
- ChunkedContentData& InOutChunkedContent,
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex,
- const IoHash& RawHash,
- std::span<const uint32_t> ChunkSequence,
- std::span<const IoHash> ChunkHashes,
- std::span<const uint64_t> ChunkRawSizes)
- {
- ZEN_ASSERT(ChunkHashes.size() == ChunkRawSizes.size());
- InOutChunkedContent.ChunkCounts.push_back(gsl::narrow<uint32_t>(ChunkSequence.size()));
- InOutChunkedContent.ChunkOrders.reserve(InOutChunkedContent.ChunkOrders.size() + ChunkSequence.size());
-
- for (uint32_t ChunkedSequenceIndex : ChunkSequence)
- {
- const IoHash& ChunkHash = ChunkHashes[ChunkedSequenceIndex];
- if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end())
- {
- uint32_t ChunkIndex = gsl::narrow<uint32_t>(It->second);
- InOutChunkedContent.ChunkOrders.push_back(ChunkIndex);
- }
- else
- {
- uint32_t ChunkIndex = gsl::narrow<uint32_t>(InOutChunkedContent.ChunkHashes.size());
- ChunkHashToChunkIndex.insert_or_assign(ChunkHash, ChunkIndex);
- InOutChunkedContent.ChunkHashes.push_back(ChunkHash);
- InOutChunkedContent.ChunkRawSizes.push_back(ChunkRawSizes[ChunkedSequenceIndex]);
- InOutChunkedContent.ChunkOrders.push_back(ChunkIndex);
- Stats.UniqueChunksFound++;
- Stats.UniqueBytesFound += ChunkRawSizes[ChunkedSequenceIndex];
- }
- }
- InOutChunkedContent.SequenceRawHashes.push_back(RawHash);
- Stats.UniqueSequencesFound++;
- }
-
- void AddChunkSequence(ChunkingStatistics& Stats,
- ChunkedContentData& InOutChunkedContent,
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex,
- const IoHash& RawHash,
- const uint64_t RawSize)
- {
- InOutChunkedContent.ChunkCounts.push_back(1);
-
- if (auto It = ChunkHashToChunkIndex.find(RawHash); It != ChunkHashToChunkIndex.end())
- {
- uint32_t ChunkIndex = gsl::narrow<uint32_t>(It->second);
- InOutChunkedContent.ChunkOrders.push_back(ChunkIndex);
- }
- else
- {
- uint32_t ChunkIndex = gsl::narrow<uint32_t>(InOutChunkedContent.ChunkHashes.size());
- ChunkHashToChunkIndex.insert_or_assign(RawHash, ChunkIndex);
- InOutChunkedContent.ChunkHashes.push_back(RawHash);
- InOutChunkedContent.ChunkRawSizes.push_back(RawSize);
- InOutChunkedContent.ChunkOrders.push_back(ChunkIndex);
- Stats.UniqueChunksFound++;
- Stats.UniqueBytesFound += RawSize;
- }
- InOutChunkedContent.SequenceRawHashes.push_back(RawHash);
- Stats.UniqueSequencesFound++;
- }
-
- IoHash HashOneFile(ChunkingStatistics& Stats,
- const ChunkingController& InChunkingController,
- ChunkedFolderContent& OutChunkedContent,
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex,
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& RawHashToSequenceRawHashIndex,
- RwLock& Lock,
- const std::filesystem::path& FolderPath,
- uint32_t PathIndex,
- std::atomic<bool>& AbortFlag)
- {
- ZEN_TRACE_CPU("ChunkFolderContent");
-
- const uint64_t RawSize = OutChunkedContent.RawSizes[PathIndex];
- const std::filesystem::path& Path = OutChunkedContent.Paths[PathIndex];
-
- if (RawSize == 0)
- {
- return IoHash::Zero;
- }
- else
- {
- ChunkedInfoWithSource Chunked;
- const bool DidChunking =
- InChunkingController.ProcessFile((FolderPath / Path).make_preferred(), RawSize, Chunked, Stats.BytesHashed, AbortFlag);
- if (DidChunking)
- {
- Lock.WithExclusiveLock([&]() {
- if (!RawHashToSequenceRawHashIndex.contains(Chunked.Info.RawHash))
- {
- RawHashToSequenceRawHashIndex.insert(
- {Chunked.Info.RawHash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())});
- std::vector<uint64_t> ChunkSizes;
- ChunkSizes.reserve(Chunked.ChunkSources.size());
- for (const ChunkSource& Source : Chunked.ChunkSources)
- {
- ChunkSizes.push_back(Source.Size);
- }
- AddChunkSequence(Stats,
- OutChunkedContent.ChunkedContent,
- ChunkHashToChunkIndex,
- Chunked.Info.RawHash,
- Chunked.Info.ChunkSequence,
- Chunked.Info.ChunkHashes,
- ChunkSizes);
- Stats.UniqueSequencesFound++;
- }
- });
- Stats.FilesChunked++;
- return Chunked.Info.RawHash;
- }
- else
- {
- ZEN_TRACE_CPU("HashOnly");
-
- IoBuffer Buffer = IoBufferBuilder::MakeFromFile((FolderPath / Path).make_preferred());
- if (Buffer.GetSize() != RawSize)
- {
- throw std::runtime_error(fmt::format("Failed opening file '{}' for hashing", FolderPath / Path));
- }
- const IoHash Hash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed);
-
- Lock.WithExclusiveLock([&]() {
- if (!RawHashToSequenceRawHashIndex.contains(Hash))
- {
- RawHashToSequenceRawHashIndex.insert(
- {Hash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())});
- AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Hash, RawSize);
- Stats.UniqueSequencesFound++;
- }
- });
- return Hash;
- }
- }
- }
-
- std::string PathCompareString(const std::filesystem::path& Path) { return ToLower(Path.generic_string()); }
-
-} // namespace
-
-std::string_view FolderContentSourcePlatformNames[(size_t)SourcePlatform::_Count] = {"Windows"sv, "Linux"sv, "MacOS"sv};
-
-std::string_view
-ToString(SourcePlatform Platform)
-{
- return FolderContentSourcePlatformNames[(size_t)Platform];
-}
-
-SourcePlatform
-FromString(std::string_view Platform, SourcePlatform Default)
-{
- for (size_t Index = 0; Index < (size_t)SourcePlatform::_Count; Index++)
- {
- if (Platform == FolderContentSourcePlatformNames[Index])
- {
- return (SourcePlatform)Index;
- }
- }
- return Default;
-}
-
-SourcePlatform
-GetSourceCurrentPlatform()
-{
-#if ZEN_PLATFORM_WINDOWS
- return SourcePlatform::Windows;
-#endif
-#if ZEN_PLATFORM_MAC
- return SourcePlatform::MacOS;
-#endif
-#if ZEN_PLATFORM_LINUX
- return SourcePlatform::Linux;
-#endif
-}
-
-bool
-FolderContent::AreFileAttributesEqual(const uint32_t Lhs, const uint32_t Rhs)
-{
-#if ZEN_PLATFORM_WINDOWS
- return (Lhs & 0xff) == (Rhs & 0xff);
-#endif
-#if ZEN_PLATFORM_MAC
- return Lhs == Rhs;
-#endif
-#if ZEN_PLATFORM_LINUX
- return Lhs == Rhs;
-#endif
-}
-
-bool
-FolderContent::operator==(const FolderContent& Rhs) const
-{
- if ((Platform == Rhs.Platform) && (RawSizes == Rhs.RawSizes) && (Attributes == Rhs.Attributes) &&
- (ModificationTicks == Rhs.ModificationTicks) && (Paths.size() == Rhs.Paths.size()))
- {
- size_t PathCount = 0;
- for (size_t PathIndex = 0; PathIndex < PathCount; PathIndex++)
- {
- if (Paths[PathIndex].generic_string() != Rhs.Paths[PathIndex].generic_string())
- {
- return false;
- }
- }
- return true;
- }
- return false;
-}
-
-bool
-FolderContent::AreKnownFilesEqual(const FolderContent& Rhs) const
-{
- ZEN_TRACE_CPU("FolderContent::AreKnownFilesEqual");
- tsl::robin_map<std::string, size_t> RhsPathToIndex;
- const size_t RhsPathCount = Rhs.Paths.size();
- RhsPathToIndex.reserve(RhsPathCount);
- for (size_t RhsPathIndex = 0; RhsPathIndex < RhsPathCount; RhsPathIndex++)
- {
- RhsPathToIndex.insert({Rhs.Paths[RhsPathIndex].generic_string(), RhsPathIndex});
- }
- const size_t PathCount = Paths.size();
- for (size_t PathIndex = 0; PathIndex < PathCount; PathIndex++)
- {
- if (auto It = RhsPathToIndex.find(Paths[PathIndex].generic_string()); It != RhsPathToIndex.end())
- {
- const size_t RhsPathIndex = It->second;
- if ((RawSizes[PathIndex] != Rhs.RawSizes[RhsPathIndex]) ||
- (!AreFileAttributesEqual(Attributes[PathIndex], Rhs.Attributes[RhsPathIndex])) ||
- (ModificationTicks[PathIndex] != Rhs.ModificationTicks[RhsPathIndex]))
- {
- return false;
- }
- }
- else
- {
- return false;
- }
- }
- return true;
-}
-
-void
-FolderContent::UpdateState(const FolderContent& Rhs, std::vector<uint32_t>& OutPathIndexesOufOfDate)
-{
- ZEN_TRACE_CPU("FolderContent::UpdateState");
- tsl::robin_map<std::string, uint32_t> RhsPathToIndex;
- const uint32_t RhsPathCount = gsl::narrow<uint32_t>(Rhs.Paths.size());
- RhsPathToIndex.reserve(RhsPathCount);
- for (uint32_t RhsPathIndex = 0; RhsPathIndex < RhsPathCount; RhsPathIndex++)
- {
- RhsPathToIndex.insert({Rhs.Paths[RhsPathIndex].generic_string(), RhsPathIndex});
- }
- uint32_t PathCount = gsl::narrow<uint32_t>(Paths.size());
- for (uint32_t PathIndex = 0; PathIndex < PathCount;)
- {
- if (auto It = RhsPathToIndex.find(Paths[PathIndex].generic_string()); It != RhsPathToIndex.end())
- {
- const uint32_t RhsPathIndex = It->second;
-
- if ((RawSizes[PathIndex] != Rhs.RawSizes[RhsPathIndex]) ||
- (ModificationTicks[PathIndex] != Rhs.ModificationTicks[RhsPathIndex]))
- {
- RawSizes[PathIndex] = Rhs.RawSizes[RhsPathIndex];
- ModificationTicks[PathIndex] = Rhs.ModificationTicks[RhsPathIndex];
- OutPathIndexesOufOfDate.push_back(PathIndex);
- }
- Attributes[PathIndex] = Rhs.Attributes[RhsPathIndex];
- PathIndex++;
- }
- else
- {
- Paths.erase(Paths.begin() + PathIndex);
- RawSizes.erase(RawSizes.begin() + PathIndex);
- Attributes.erase(Attributes.begin() + PathIndex);
- ModificationTicks.erase(ModificationTicks.begin() + PathIndex);
- PathCount--;
- }
- }
-}
-
-FolderContent
-GetUpdatedContent(const FolderContent& Old, const FolderContent& New, std::vector<std::filesystem::path>& OutDeletedPaths)
-{
- ZEN_TRACE_CPU("FolderContent::GetUpdatedContent");
-
- const uint32_t NewPathCount = gsl::narrow<uint32_t>(New.Paths.size());
-
- FolderContent Result = {.Platform = Old.Platform};
- Result.Paths.reserve(NewPathCount);
- Result.RawSizes.reserve(NewPathCount);
- Result.Attributes.reserve(NewPathCount);
- Result.ModificationTicks.reserve(NewPathCount);
-
- tsl::robin_map<std::string, uint32_t> NewPathToIndex;
- NewPathToIndex.reserve(NewPathCount);
- for (uint32_t NewPathIndex = 0; NewPathIndex < NewPathCount; NewPathIndex++)
- {
- NewPathToIndex.insert({New.Paths[NewPathIndex].generic_string(), NewPathIndex});
- }
-
- uint32_t OldPathCount = gsl::narrow<uint32_t>(Old.Paths.size());
- for (uint32_t OldPathIndex = 0; OldPathIndex < OldPathCount; OldPathIndex++)
- {
- if (auto It = NewPathToIndex.find(Old.Paths[OldPathIndex].generic_string()); It != NewPathToIndex.end())
- {
- const uint32_t NewPathIndex = It->second;
-
- if ((Old.RawSizes[OldPathIndex] != New.RawSizes[NewPathIndex]) ||
- (Old.ModificationTicks[OldPathIndex] != New.ModificationTicks[NewPathIndex]))
- {
- Result.Paths.push_back(New.Paths[NewPathIndex]);
- Result.RawSizes.push_back(New.RawSizes[NewPathIndex]);
- Result.Attributes.push_back(New.Attributes[NewPathIndex]);
- Result.ModificationTicks.push_back(New.ModificationTicks[NewPathIndex]);
- }
- }
- else
- {
- OutDeletedPaths.push_back(Old.Paths[OldPathIndex]);
- }
- }
- return Result;
-}
-
-void
-SaveFolderContentToCompactBinary(const FolderContent& Content, CbWriter& Output)
-{
- ZEN_TRACE_CPU("SaveFolderContentToCompactBinary");
- Output.AddString("platform"sv, ToString(Content.Platform));
- compactbinary_helpers::WriteArray(Content.Paths, "paths"sv, Output);
- compactbinary_helpers::WriteArray(Content.RawSizes, "rawSizes"sv, Output);
- compactbinary_helpers::WriteArray(Content.Attributes, "attributes"sv, Output);
- compactbinary_helpers::WriteArray(Content.ModificationTicks, "modificationTimes"sv, Output);
-}
-
-FolderContent
-LoadFolderContentToCompactBinary(CbObjectView Input)
-{
- ZEN_TRACE_CPU("LoadFolderContentToCompactBinary");
- FolderContent Content;
- Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform());
- compactbinary_helpers::ReadArray("paths"sv, Input, Content.Paths);
- compactbinary_helpers::ReadArray("rawSizes"sv, Input, Content.RawSizes);
- compactbinary_helpers::ReadArray("attributes"sv, Input, Content.Attributes);
- compactbinary_helpers::ReadArray("modificationTimes"sv, Input, Content.ModificationTicks);
- return Content;
-}
-
-FolderContent
-GetFolderContent(GetFolderContentStatistics& Stats,
- const std::filesystem::path& RootPath,
- std::function<bool(const std::string_view& RelativePath)>&& AcceptDirectory,
- std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& AcceptFile,
- WorkerThreadPool& WorkerPool,
- int32_t UpdateIntervalMS,
- std::function<void(bool IsAborted, std::ptrdiff_t PendingWork)>&& UpdateCallback,
- std::atomic<bool>& AbortFlag)
-{
- ZEN_TRACE_CPU("GetFolderContent");
-
- Stopwatch Timer;
- auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); });
-
- FolderContent Content;
- struct AsyncVisitor : public GetDirectoryContentVisitor
- {
- AsyncVisitor(GetFolderContentStatistics& Stats,
- std::atomic<bool>& AbortFlag,
- FolderContent& Content,
- std::function<bool(const std::string_view& RelativePath)>&& AcceptDirectory,
- std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& AcceptFile)
- : m_Stats(Stats)
- , m_AbortFlag(AbortFlag)
- , m_FoundContent(Content)
- , m_AcceptDirectory(std::move(AcceptDirectory))
- , m_AcceptFile(std::move(AcceptFile))
- {
- }
- virtual void AsyncVisitDirectory(const std::filesystem::path& RelativeRoot, DirectoryContent&& Content) override
- {
- if (!m_AbortFlag)
- {
- m_Stats.FoundFileCount += Content.FileNames.size();
- for (uint64_t FileSize : Content.FileSizes)
- {
- m_Stats.FoundFileByteCount += FileSize;
- }
- std::string RelativeDirectoryPath = RelativeRoot.generic_string();
- if (m_AcceptDirectory(RelativeDirectoryPath))
- {
- std::vector<std::filesystem::path> Paths;
- std::vector<uint64_t> RawSizes;
- std::vector<uint32_t> Attributes;
- std::vector<uint64_t> ModificatonTicks;
- Paths.reserve(Content.FileNames.size());
- RawSizes.reserve(Content.FileNames.size());
- Attributes.reserve(Content.FileNames.size());
- ModificatonTicks.reserve(Content.FileModificationTicks.size());
-
- for (size_t FileIndex = 0; FileIndex < Content.FileNames.size(); FileIndex++)
- {
- const std::filesystem::path& FileName = Content.FileNames[FileIndex];
- std::string RelativePath = (RelativeRoot / FileName).generic_string();
- std::replace(RelativePath.begin(), RelativePath.end(), '\\', '/');
- if (m_AcceptFile(RelativePath, Content.FileSizes[FileIndex], Content.FileAttributes[FileIndex]))
- {
- Paths.emplace_back(std::move(RelativePath));
- RawSizes.emplace_back(Content.FileSizes[FileIndex]);
- Attributes.emplace_back(Content.FileAttributes[FileIndex]);
- ModificatonTicks.emplace_back(Content.FileModificationTicks[FileIndex]);
-
- m_Stats.AcceptedFileCount++;
- m_Stats.AcceptedFileByteCount += Content.FileSizes[FileIndex];
- }
- }
- m_Lock.WithExclusiveLock([&]() {
- m_FoundContent.Paths.insert(m_FoundContent.Paths.end(), Paths.begin(), Paths.end());
- m_FoundContent.RawSizes.insert(m_FoundContent.RawSizes.end(), RawSizes.begin(), RawSizes.end());
- m_FoundContent.Attributes.insert(m_FoundContent.Attributes.end(), Attributes.begin(), Attributes.end());
- m_FoundContent.ModificationTicks.insert(m_FoundContent.ModificationTicks.end(),
- ModificatonTicks.begin(),
- ModificatonTicks.end());
- });
- }
- }
- }
-
- GetFolderContentStatistics& m_Stats;
- std::atomic<bool>& m_AbortFlag;
- RwLock m_Lock;
- FolderContent& m_FoundContent;
- std::function<bool(const std::string_view& RelativePath)> m_AcceptDirectory;
- std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)> m_AcceptFile;
- } Visitor(Stats, AbortFlag, Content, std::move(AcceptDirectory), std::move(AcceptFile));
-
- Latch PendingWork(1);
- GetDirectoryContent(RootPath,
- DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::Recursive | DirectoryContentFlags::IncludeFileSizes |
- DirectoryContentFlags::IncludeAttributes | DirectoryContentFlags::IncludeModificationTick,
- Visitor,
- WorkerPool,
- PendingWork);
- PendingWork.CountDown();
- while (!PendingWork.Wait(UpdateIntervalMS))
- {
- UpdateCallback(AbortFlag.load(), PendingWork.Remaining());
- }
- std::vector<size_t> Order;
- size_t PathCount = Content.Paths.size();
- Order.resize(Content.Paths.size());
- std::vector<std::string> Parents;
- Parents.reserve(PathCount);
- std::vector<std::string> Filenames;
- Filenames.reserve(PathCount);
- for (size_t OrderIndex = 0; OrderIndex < PathCount; OrderIndex++)
- {
- Order[OrderIndex] = OrderIndex;
- Parents.emplace_back(Content.Paths[OrderIndex].parent_path().generic_string());
- Filenames.emplace_back(Content.Paths[OrderIndex].filename().generic_string());
- }
- std::sort(Order.begin(), Order.end(), [&Parents, &Filenames](size_t Lhs, size_t Rhs) {
- const std::string& LhsParent = Parents[Lhs];
- const std::string& RhsParent = Parents[Rhs];
- if (LhsParent < RhsParent)
- {
- return true;
- }
- else if (LhsParent > RhsParent)
- {
- return false;
- }
- return Filenames[Lhs] < Filenames[Rhs];
- });
- FolderContent OrderedContent;
- OrderedContent.Paths.reserve(PathCount);
- OrderedContent.RawSizes.reserve(PathCount);
- OrderedContent.Attributes.reserve(PathCount);
- OrderedContent.ModificationTicks.reserve(PathCount);
- for (size_t OrderIndex : Order)
- {
- OrderedContent.Paths.emplace_back(std::move(Content.Paths[OrderIndex]));
- OrderedContent.RawSizes.emplace_back(Content.RawSizes[OrderIndex]);
- OrderedContent.Attributes.emplace_back(Content.Attributes[OrderIndex]);
- OrderedContent.ModificationTicks.emplace_back(Content.ModificationTicks[OrderIndex]);
- }
- return OrderedContent;
-}
-
-void
-SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Content, CbWriter& Output)
-{
- ZEN_TRACE_CPU("SaveChunkedFolderContentToCompactBinary");
- Output.AddString("platform"sv, ToString(Content.Platform));
- compactbinary_helpers::WriteArray(Content.Paths, "paths"sv, Output);
- compactbinary_helpers::WriteArray(Content.RawSizes, "rawSizes"sv, Output);
- compactbinary_helpers::WriteArray(Content.Attributes, "attributes"sv, Output);
- compactbinary_helpers::WriteArray(Content.RawHashes, "rawHashes"sv, Output);
-
- Output.BeginObject("chunkedContent");
- compactbinary_helpers::WriteArray(Content.ChunkedContent.SequenceRawHashes, "sequenceRawHashes"sv, Output);
- compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkCounts, "chunkCounts"sv, Output);
- compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkOrders, "chunkOrders"sv, Output);
- compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkHashes, "chunkHashes"sv, Output);
- compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkRawSizes, "chunkRawSizes"sv, Output);
- Output.EndObject(); // chunkedContent
-}
-
-ChunkedFolderContent
-LoadChunkedFolderContentToCompactBinary(CbObjectView Input)
-{
- ZEN_TRACE_CPU("LoadChunkedFolderContentToCompactBinary");
- ChunkedFolderContent Content;
- Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform());
- compactbinary_helpers::ReadArray("paths"sv, Input, Content.Paths);
- compactbinary_helpers::ReadArray("rawSizes"sv, Input, Content.RawSizes);
- compactbinary_helpers::ReadArray("attributes"sv, Input, Content.Attributes);
- compactbinary_helpers::ReadArray("rawHashes"sv, Input, Content.RawHashes);
-
- CbObjectView ChunkedContentView = Input["chunkedContent"sv].AsObjectView();
- compactbinary_helpers::ReadArray("sequenceRawHashes"sv, ChunkedContentView, Content.ChunkedContent.SequenceRawHashes);
- compactbinary_helpers::ReadArray("chunkCounts"sv, ChunkedContentView, Content.ChunkedContent.ChunkCounts);
- compactbinary_helpers::ReadArray("chunkOrders"sv, ChunkedContentView, Content.ChunkedContent.ChunkOrders);
- compactbinary_helpers::ReadArray("chunkHashes"sv, ChunkedContentView, Content.ChunkedContent.ChunkHashes);
- compactbinary_helpers::ReadArray("chunkRawSizes"sv, ChunkedContentView, Content.ChunkedContent.ChunkRawSizes);
- return Content;
-}
-
-ChunkedFolderContent
-MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span<const ChunkedFolderContent> Overlays)
-{
- ZEN_TRACE_CPU("MergeChunkedFolderContents");
-
- ZEN_ASSERT(!Overlays.empty());
-
- ChunkedFolderContent Result;
- const size_t BasePathCount = Base.Paths.size();
- Result.Paths.reserve(BasePathCount);
- Result.RawSizes.reserve(BasePathCount);
- Result.Attributes.reserve(BasePathCount);
- Result.RawHashes.reserve(BasePathCount);
-
- const size_t BaseChunkCount = Base.ChunkedContent.ChunkHashes.size();
- Result.ChunkedContent.SequenceRawHashes.reserve(Base.ChunkedContent.SequenceRawHashes.size());
- Result.ChunkedContent.ChunkCounts.reserve(BaseChunkCount);
- Result.ChunkedContent.ChunkHashes.reserve(BaseChunkCount);
- Result.ChunkedContent.ChunkRawSizes.reserve(BaseChunkCount);
- Result.ChunkedContent.ChunkOrders.reserve(Base.ChunkedContent.ChunkOrders.size());
-
- tsl::robin_map<std::string, std::filesystem::path> GenericPathToActualPath;
- for (const std::filesystem::path& Path : Base.Paths)
- {
- GenericPathToActualPath.insert({PathCompareString(Path), Path});
- }
- for (const ChunkedFolderContent& Overlay : Overlays)
- {
- for (const std::filesystem::path& Path : Overlay.Paths)
- {
- GenericPathToActualPath.insert({PathCompareString(Path), Path});
- }
- }
-
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceRawHashIndex;
-
- auto BuildOverlayPaths = [](std::span<const ChunkedFolderContent> Overlays) -> tsl::robin_set<std::string> {
- tsl::robin_set<std::string> Result;
- for (const ChunkedFolderContent& OverlayContent : Overlays)
- {
- for (const std::filesystem::path& Path : OverlayContent.Paths)
- {
- Result.insert(PathCompareString(Path));
- }
- }
- return Result;
- };
-
- auto AddContent = [&BuildOverlayPaths](ChunkedFolderContent& Result,
- const ChunkedFolderContent& OverlayContent,
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex,
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& RawHashToSequenceRawHashIndex,
- const tsl::robin_map<std::string, std::filesystem::path>& GenericPathToActualPath,
- std::span<const ChunkedFolderContent> Overlays) {
- const ChunkedContentLookup OverlayLookup = BuildChunkedContentLookup(OverlayContent);
- tsl::robin_set<std::string> BaseOverlayPaths = BuildOverlayPaths(Overlays);
- for (uint32_t PathIndex = 0; PathIndex < OverlayContent.Paths.size(); PathIndex++)
- {
- std::string GenericPath = PathCompareString(OverlayContent.Paths[PathIndex]);
- if (!BaseOverlayPaths.contains(GenericPath))
- {
- // This asset will not be overridden by a later layer - add it
-
- const std::filesystem::path OriginalPath = GenericPathToActualPath.at(GenericPath);
- Result.Paths.push_back(OriginalPath);
- const IoHash& RawHash = OverlayContent.RawHashes[PathIndex];
- Result.RawSizes.push_back(OverlayContent.RawSizes[PathIndex]);
- Result.Attributes.push_back(OverlayContent.Attributes[PathIndex]);
- Result.RawHashes.push_back(RawHash);
-
- if (OverlayContent.RawSizes[PathIndex] > 0)
- {
- if (!RawHashToSequenceRawHashIndex.contains(RawHash))
- {
- RawHashToSequenceRawHashIndex.insert(
- {RawHash, gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size())});
- const uint32_t SequenceRawHashIndex = OverlayLookup.RawHashToSequenceIndex.at(RawHash);
- const uint32_t OrderIndexOffset = OverlayLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex];
- const uint32_t ChunkCount = OverlayContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex];
- ChunkingStatistics Stats;
- std::span<const uint32_t> OriginalChunkOrder =
- std::span<const uint32_t>(OverlayContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount);
- AddChunkSequence(Stats,
- Result.ChunkedContent,
- ChunkHashToChunkIndex,
- RawHash,
- OriginalChunkOrder,
- OverlayContent.ChunkedContent.ChunkHashes,
- OverlayContent.ChunkedContent.ChunkRawSizes);
- Stats.UniqueSequencesFound++;
- }
- }
- }
- }
- };
-
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> MergedChunkHashToChunkIndex;
- AddContent(Result, Base, MergedChunkHashToChunkIndex, RawHashToSequenceRawHashIndex, GenericPathToActualPath, Overlays);
- for (uint32_t OverlayIndex = 0; OverlayIndex < Overlays.size(); OverlayIndex++)
- {
- AddContent(Result,
- Overlays[OverlayIndex],
- MergedChunkHashToChunkIndex,
- RawHashToSequenceRawHashIndex,
- GenericPathToActualPath,
- Overlays.subspan(OverlayIndex + 1));
- }
- return Result;
-}
-
-ChunkedFolderContent
-DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent,
- const ChunkedContentLookup& BaseContentLookup,
- std::span<const std::filesystem::path> DeletedPaths)
-{
- ZEN_TRACE_CPU("DeletePathsFromChunkedContent");
-
- ZEN_ASSERT(DeletedPaths.size() <= BaseContent.Paths.size());
- ChunkedFolderContent Result = {.Platform = BaseContent.Platform};
- if (DeletedPaths.size() < BaseContent.Paths.size())
- {
- tsl::robin_set<std::string> DeletedPathSet;
- DeletedPathSet.reserve(DeletedPaths.size());
- for (const std::filesystem::path& DeletedPath : DeletedPaths)
- {
- DeletedPathSet.insert(PathCompareString(DeletedPath));
- }
-
- const size_t BaseChunkCount = BaseContent.ChunkedContent.ChunkHashes.size();
- std::vector<uint32_t> NewChunkIndexes(BaseChunkCount, (uint32_t)-1);
-
- const size_t ExpectedPathCount = BaseContent.Paths.size() - DeletedPaths.size();
- Result.Paths.reserve(ExpectedPathCount);
- Result.RawSizes.reserve(ExpectedPathCount);
- Result.Attributes.reserve(ExpectedPathCount);
- Result.RawHashes.reserve(ExpectedPathCount);
-
- Result.ChunkedContent.ChunkHashes.reserve(BaseChunkCount);
- Result.ChunkedContent.ChunkRawSizes.reserve(BaseChunkCount);
-
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceRawHashIndex;
- for (uint32_t PathIndex = 0; PathIndex < BaseContent.Paths.size(); PathIndex++)
- {
- const std::filesystem::path& Path = BaseContent.Paths[PathIndex];
- if (!DeletedPathSet.contains(PathCompareString(Path)))
- {
- const IoHash& RawHash = BaseContent.RawHashes[PathIndex];
- const uint64_t RawSize = BaseContent.RawSizes[PathIndex];
- Result.Paths.push_back(Path);
- Result.RawSizes.push_back(RawSize);
- Result.Attributes.push_back(BaseContent.Attributes[PathIndex]);
- Result.RawHashes.push_back(RawHash);
- if (RawSize > 0)
- {
- if (!RawHashToSequenceRawHashIndex.contains(RawHash))
- {
- RawHashToSequenceRawHashIndex.insert(
- {RawHash, gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size())});
- const uint32_t SequenceRawHashIndex = BaseContentLookup.RawHashToSequenceIndex.at(RawHash);
- const uint32_t OrderIndexOffset = BaseContentLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex];
- const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex];
-
- std::span<const uint32_t> OriginalChunkOrder =
- std::span<const uint32_t>(BaseContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount);
-
- Result.ChunkedContent.ChunkCounts.push_back(gsl::narrow<uint32_t>(OriginalChunkOrder.size()));
-
- for (uint32_t OldChunkIndex : OriginalChunkOrder)
- {
- if (uint32_t FoundChunkIndex = NewChunkIndexes[OldChunkIndex]; FoundChunkIndex != (uint32_t)-1)
- {
- Result.ChunkedContent.ChunkOrders.push_back(FoundChunkIndex);
- }
- else
- {
- const uint32_t NewChunkIndex = gsl::narrow<uint32_t>(Result.ChunkedContent.ChunkHashes.size());
- NewChunkIndexes[OldChunkIndex] = NewChunkIndex;
- const IoHash& ChunkHash = BaseContent.ChunkedContent.ChunkHashes[OldChunkIndex];
- const uint64_t OldChunkSize = BaseContent.ChunkedContent.ChunkRawSizes[OldChunkIndex];
- Result.ChunkedContent.ChunkHashes.push_back(ChunkHash);
- Result.ChunkedContent.ChunkRawSizes.push_back(OldChunkSize);
- Result.ChunkedContent.ChunkOrders.push_back(NewChunkIndex);
- }
- }
- Result.ChunkedContent.SequenceRawHashes.push_back(RawHash);
- }
- }
- }
- }
- }
- return Result;
-}
-
-ChunkedFolderContent
-DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span<const std::filesystem::path> DeletedPaths)
-{
- ZEN_TRACE_CPU("DeletePathsFromChunkedContent");
- ZEN_ASSERT(DeletedPaths.size() <= BaseContent.Paths.size());
- if (DeletedPaths.size() == BaseContent.Paths.size())
- {
- return {};
- }
- const ChunkedContentLookup BaseLookup = BuildChunkedContentLookup(BaseContent);
- return DeletePathsFromChunkedContent(BaseContent, BaseLookup, DeletedPaths);
-}
-
-ChunkedFolderContent
-ChunkFolderContent(ChunkingStatistics& Stats,
- WorkerThreadPool& WorkerPool,
- const std::filesystem::path& RootPath,
- const FolderContent& Content,
- const ChunkingController& InChunkingController,
- int32_t UpdateIntervalMS,
- std::function<void(bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork)>&& UpdateCallback,
- std::atomic<bool>& AbortFlag,
- std::atomic<bool>& PauseFlag)
-{
- ZEN_TRACE_CPU("ChunkFolderContent");
-
- Stopwatch Timer;
- auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); });
-
- ChunkedFolderContent Result = {.Platform = Content.Platform,
- .Paths = Content.Paths,
- .RawSizes = Content.RawSizes,
- .Attributes = Content.Attributes};
- const size_t ItemCount = Result.Paths.size();
- Result.RawHashes.resize(ItemCount, IoHash::Zero);
- Result.ChunkedContent.SequenceRawHashes.reserve(ItemCount); // Up to 1 per file, maybe less
- Result.ChunkedContent.ChunkCounts.reserve(ItemCount); // Up to one per file
- Result.ChunkedContent.ChunkOrders.reserve(ItemCount); // At least 1 per file, maybe more
- Result.ChunkedContent.ChunkHashes.reserve(ItemCount); // At least 1 per file, maybe more
- Result.ChunkedContent.ChunkRawSizes.reserve(ItemCount); // At least 1 per file, maybe more
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex;
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToChunkSequenceIndex;
- RawHashToChunkSequenceIndex.reserve(ItemCount);
- ChunkHashToChunkIndex.reserve(ItemCount);
- {
- std::vector<uint32_t> Order;
- Order.resize(ItemCount);
- for (uint32_t I = 0; I < ItemCount; I++)
- {
- Order[I] = I;
- }
-
- // Handle the biggest files first so we don't end up with one straggling large file at the end
- // std::sort(Order.begin(), Order.end(), [&](uint32_t Lhs, uint32_t Rhs) { return Result.RawSizes[Lhs] > Result.RawSizes[Rhs];
- //});
-
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceRawHashIndex;
- RawHashToSequenceRawHashIndex.reserve(ItemCount);
-
- RwLock Lock;
-
- ParallelWork Work(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
-
- for (uint32_t PathIndex : Order)
- {
- if (Work.IsAborted())
- {
- break;
- }
- Work.ScheduleWork(WorkerPool, // GetSyncWorkerPool()
- [&, PathIndex](std::atomic<bool>& AbortFlag) {
- if (!AbortFlag)
- {
- IoHash RawHash = HashOneFile(Stats,
- InChunkingController,
- Result,
- ChunkHashToChunkIndex,
- RawHashToSequenceRawHashIndex,
- Lock,
- RootPath,
- PathIndex,
- AbortFlag);
- Lock.WithExclusiveLock([&]() { Result.RawHashes[PathIndex] = RawHash; });
- Stats.FilesProcessed++;
- }
- });
- }
-
- Work.Wait(UpdateIntervalMS, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
- ZEN_UNUSED(PendingWork);
- UpdateCallback(IsAborted, IsPaused, Work.PendingWork().Remaining());
- });
- }
- return Result;
-}
-
-ChunkedContentLookup
-BuildChunkedContentLookup(const ChunkedFolderContent& Content)
-{
- ZEN_TRACE_CPU("BuildChunkedContentLookup");
-
- struct ChunkLocationReference
- {
- uint32_t ChunkIndex = (uint32_t)-1;
- uint32_t SequenceIndex = (uint32_t)-1;
- uint64_t Offset = (uint64_t)-1;
- };
-
- ChunkedContentLookup Result;
- {
- const uint32_t SequenceRawHashesCount = gsl::narrow<uint32_t>(Content.ChunkedContent.SequenceRawHashes.size());
- Result.RawHashToSequenceIndex.reserve(SequenceRawHashesCount);
- Result.SequenceIndexChunkOrderOffset.reserve(SequenceRawHashesCount);
- uint32_t OrderOffset = 0;
- for (uint32_t SequenceRawHashIndex = 0; SequenceRawHashIndex < Content.ChunkedContent.SequenceRawHashes.size();
- SequenceRawHashIndex++)
- {
- Result.RawHashToSequenceIndex.insert({Content.ChunkedContent.SequenceRawHashes[SequenceRawHashIndex], SequenceRawHashIndex});
- Result.SequenceIndexChunkOrderOffset.push_back(OrderOffset);
- OrderOffset += Content.ChunkedContent.ChunkCounts[SequenceRawHashIndex];
- }
- }
-
- std::vector<ChunkLocationReference> Locations;
- Locations.reserve(Content.ChunkedContent.ChunkOrders.size());
- for (uint32_t SequenceIndex = 0; SequenceIndex < Content.ChunkedContent.SequenceRawHashes.size(); SequenceIndex++)
- {
- const uint32_t OrderOffset = Result.SequenceIndexChunkOrderOffset[SequenceIndex];
- const uint32_t ChunkCount = Content.ChunkedContent.ChunkCounts[SequenceIndex];
- uint64_t LocationOffset = 0;
- for (size_t OrderIndex = OrderOffset; OrderIndex < OrderOffset + ChunkCount; OrderIndex++)
- {
- uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex];
-
- Locations.push_back(ChunkLocationReference{.ChunkIndex = ChunkIndex, .SequenceIndex = SequenceIndex, .Offset = LocationOffset});
-
- LocationOffset += Content.ChunkedContent.ChunkRawSizes[ChunkIndex];
- }
- }
-
- std::sort(Locations.begin(), Locations.end(), [](const ChunkLocationReference& Lhs, const ChunkLocationReference& Rhs) {
- if (Lhs.ChunkIndex < Rhs.ChunkIndex)
- {
- return true;
- }
- if (Lhs.ChunkIndex > Rhs.ChunkIndex)
- {
- return false;
- }
- if (Lhs.SequenceIndex < Rhs.SequenceIndex)
- {
- return true;
- }
- if (Lhs.SequenceIndex > Rhs.SequenceIndex)
- {
- return false;
- }
- return Lhs.Offset < Rhs.Offset;
- });
-
- Result.ChunkSequenceLocations.reserve(Locations.size());
- const uint32_t ChunkCount = gsl::narrow<uint32_t>(Content.ChunkedContent.ChunkHashes.size());
- Result.ChunkHashToChunkIndex.reserve(ChunkCount);
- size_t RangeOffset = 0;
- for (uint32_t ChunkIndex = 0; ChunkIndex < ChunkCount; ChunkIndex++)
- {
- Result.ChunkHashToChunkIndex.insert({Content.ChunkedContent.ChunkHashes[ChunkIndex], ChunkIndex});
- uint32_t Count = 0;
- while ((RangeOffset + Count < Locations.size()) && (Locations[RangeOffset + Count].ChunkIndex == ChunkIndex))
- {
- const ChunkLocationReference& LocationReference = Locations[RangeOffset + Count];
- Result.ChunkSequenceLocations.push_back(
- ChunkedContentLookup::ChunkSequenceLocation{.SequenceIndex = LocationReference.SequenceIndex,
- .Offset = LocationReference.Offset});
- Count++;
- }
- Result.ChunkSequenceLocationOffset.push_back(RangeOffset);
- Result.ChunkSequenceLocationCounts.push_back(Count);
- RangeOffset += Count;
- }
-
- Result.SequenceIndexFirstPathIndex.resize(Content.ChunkedContent.SequenceRawHashes.size(), (uint32_t)-1);
- Result.PathExtensionHash.resize(Content.Paths.size());
- for (uint32_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++)
- {
- std::string LowercaseExtension = Content.Paths[PathIndex].extension().string();
- std::transform(LowercaseExtension.begin(), LowercaseExtension.end(), LowercaseExtension.begin(), [](char c) {
- return (char)::tolower(c);
- });
- Result.PathExtensionHash[PathIndex] = HashStringDjb2(LowercaseExtension);
- if (Content.RawSizes[PathIndex] > 0)
- {
- const IoHash& RawHash = Content.RawHashes[PathIndex];
- auto SequenceIndexIt = Result.RawHashToSequenceIndex.find(RawHash);
- ZEN_ASSERT(SequenceIndexIt != Result.RawHashToSequenceIndex.end());
- const uint32_t SequenceIndex = SequenceIndexIt->second;
- if (Result.SequenceIndexFirstPathIndex[SequenceIndex] == (uint32_t)-1)
- {
- Result.SequenceIndexFirstPathIndex[SequenceIndex] = PathIndex;
- }
- }
- }
-
- return Result;
-}
-
-} // namespace zen
diff --git a/src/zenutil/chunkedfile.cpp b/src/zenutil/chunkedfile.cpp
deleted file mode 100644
index a2c041ffd..000000000
--- a/src/zenutil/chunkedfile.cpp
+++ /dev/null
@@ -1,525 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#include <zenutil/chunkedfile.h>
-
-#include <zencore/basicfile.h>
-#include <zencore/trace.h>
-
-#include "chunking.h"
-
-ZEN_THIRD_PARTY_INCLUDES_START
-#include <tsl/robin_map.h>
-#include <gsl/gsl-lite.hpp>
-ZEN_THIRD_PARTY_INCLUDES_END
-
-namespace zen {
-
-namespace {
- struct ChunkedHeader
- {
- static constexpr uint32_t ExpectedMagic = 0x646b6863; // chkd
- static constexpr uint32_t CurrentVersion = 1;
-
- uint32_t Magic = ExpectedMagic;
- uint32_t Version = CurrentVersion;
- uint32_t ChunkSequenceLength;
- uint32_t ChunkHashCount;
- uint64_t ChunkSequenceOffset;
- uint64_t ChunkHashesOffset;
- uint64_t RawSize = 0;
- IoHash RawHash;
- };
-} // namespace
-
-IoBuffer
-SerializeChunkedInfo(const ChunkedInfo& Info)
-{
- ZEN_TRACE_CPU("SerializeChunkedInfo");
- size_t HeaderSize = RoundUp(sizeof(ChunkedHeader), 16) + RoundUp(sizeof(uint32_t) * Info.ChunkSequence.size(), 16) +
- RoundUp(sizeof(IoHash) * Info.ChunkHashes.size(), 16);
- IoBuffer HeaderData(HeaderSize);
-
- ChunkedHeader Header;
- Header.ChunkSequenceLength = gsl::narrow<uint32_t>(Info.ChunkSequence.size());
- Header.ChunkHashCount = gsl::narrow<uint32_t>(Info.ChunkHashes.size());
- Header.ChunkSequenceOffset = RoundUp(sizeof(ChunkedHeader), 16);
- Header.ChunkHashesOffset = RoundUp(Header.ChunkSequenceOffset + sizeof(uint32_t) * Header.ChunkSequenceLength, 16);
- Header.RawSize = Info.RawSize;
- Header.RawHash = Info.RawHash;
-
- MutableMemoryView WriteView = HeaderData.GetMutableView();
- {
- MutableMemoryView HeaderWriteView = WriteView.Left(sizeof(Header));
- HeaderWriteView.CopyFrom(MemoryView(&Header, sizeof(Header)));
- }
- {
- MutableMemoryView ChunkSequenceWriteView = WriteView.Mid(Header.ChunkSequenceOffset, sizeof(uint32_t) * Header.ChunkSequenceLength);
- ChunkSequenceWriteView.CopyFrom(MemoryView(Info.ChunkSequence.data(), ChunkSequenceWriteView.GetSize()));
- }
- {
- MutableMemoryView ChunksWriteView = WriteView.Mid(Header.ChunkHashesOffset, sizeof(IoHash) * Header.ChunkHashCount);
- ChunksWriteView.CopyFrom(MemoryView(Info.ChunkHashes.data(), ChunksWriteView.GetSize()));
- }
-
- return HeaderData;
-}
-
-ChunkedInfo
-DeserializeChunkedInfo(IoBuffer& Buffer)
-{
- ZEN_TRACE_CPU("DeserializeChunkedInfo");
- MemoryView View = Buffer.GetView();
- ChunkedHeader Header;
- {
- MutableMemoryView HeaderWriteView(&Header, sizeof(Header));
- HeaderWriteView.CopyFrom(View.Left(sizeof(Header)));
- }
- if (Header.Magic != ChunkedHeader::ExpectedMagic)
- {
- return {};
- }
- if (Header.Version != ChunkedHeader::CurrentVersion)
- {
- return {};
- }
- ChunkedInfo Info;
- Info.RawSize = Header.RawSize;
- Info.RawHash = Header.RawHash;
- Info.ChunkSequence.resize(Header.ChunkSequenceLength);
- Info.ChunkHashes.resize(Header.ChunkHashCount);
- {
- MutableMemoryView ChunkSequenceWriteView(Info.ChunkSequence.data(), sizeof(uint32_t) * Header.ChunkSequenceLength);
- ChunkSequenceWriteView.CopyFrom(View.Mid(Header.ChunkSequenceOffset, ChunkSequenceWriteView.GetSize()));
- }
- {
- MutableMemoryView ChunksWriteView(Info.ChunkHashes.data(), sizeof(IoHash) * Header.ChunkHashCount);
- ChunksWriteView.CopyFrom(View.Mid(Header.ChunkHashesOffset, ChunksWriteView.GetSize()));
- }
-
- return Info;
-}
-
-void
-Reconstruct(const ChunkedInfo& Info, const std::filesystem::path& TargetPath, std::function<IoBuffer(const IoHash& ChunkHash)> GetChunk)
-{
- ZEN_TRACE_CPU("Reconstruct");
- BasicFile Reconstructed;
- Reconstructed.Open(TargetPath, BasicFile::Mode::kTruncate);
- BasicFileWriter ReconstructedWriter(Reconstructed, 64 * 1024);
- uint64_t Offset = 0;
- for (uint32_t SequenceIndex : Info.ChunkSequence)
- {
- IoBuffer Chunk = GetChunk(Info.ChunkHashes[SequenceIndex]);
- ReconstructedWriter.Write(Chunk.GetData(), Chunk.GetSize(), Offset);
- Offset += Chunk.GetSize();
- }
-}
-
-ChunkedInfoWithSource
-ChunkData(BasicFile& RawData,
- uint64_t Offset,
- uint64_t Size,
- ChunkedParams Params,
- std::atomic<uint64_t>* BytesProcessed,
- std::atomic<bool>* AbortFlag)
-{
- ZEN_TRACE_CPU("ChunkData");
-
- ChunkedInfoWithSource Result;
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> FoundChunks;
-
- ZenChunkHelper Chunker;
- Chunker.SetUseThreshold(Params.UseThreshold);
- Chunker.SetChunkSize(Params.MinSize, Params.MaxSize, Params.AvgSize);
- size_t End = Offset + Size;
- const size_t ScanBufferSize = Max(1u * 1024 * 1024, Params.MaxSize);
- BasicFileBuffer RawBuffer(RawData, ScanBufferSize);
- MemoryView SliceView = RawBuffer.MakeView(Min(End - Offset, ScanBufferSize), Offset);
- ZEN_ASSERT(!SliceView.IsEmpty());
- size_t SliceSize = SliceView.GetSize();
- IoHashStream RawHashStream;
- while (Offset < End)
- {
- if (AbortFlag != nullptr && AbortFlag->load())
- {
- return {};
- }
- size_t ScanLength = Chunker.ScanChunk(SliceView.GetData(), SliceSize);
- if (ScanLength == ZenChunkHelper::kNoBoundaryFound)
- {
- if (Offset + SliceSize == End)
- {
- ScanLength = SliceSize;
- }
- else
- {
- SliceView = RawBuffer.MakeView(Min(End - Offset, ScanBufferSize), Offset);
- SliceSize = SliceView.GetSize();
- Chunker.Reset();
- continue;
- }
- }
- uint32_t ChunkLength = gsl::narrow<uint32_t>(ScanLength); // +HashedLength);
- MemoryView ChunkView = SliceView.Left(ScanLength);
- RawHashStream.Append(ChunkView);
- IoHash ChunkHash = IoHash::HashBuffer(ChunkView);
- SliceView.RightChopInline(ScanLength);
- if (auto It = FoundChunks.find(ChunkHash); It != FoundChunks.end())
- {
- Result.Info.ChunkSequence.push_back(It->second);
- }
- else
- {
- uint32_t ChunkIndex = gsl::narrow<uint32_t>(Result.Info.ChunkHashes.size());
- FoundChunks.insert_or_assign(ChunkHash, ChunkIndex);
- Result.Info.ChunkHashes.push_back(ChunkHash);
- Result.ChunkSources.push_back(ChunkSource{.Offset = Offset, .Size = ChunkLength});
- Result.Info.ChunkSequence.push_back(ChunkIndex);
- }
-
- SliceSize = SliceView.GetSize();
- Offset += ChunkLength;
- if (BytesProcessed != nullptr)
- {
- BytesProcessed->fetch_add(ChunkLength);
- }
- }
- Result.Info.RawSize = Size;
- Result.Info.RawHash = RawHashStream.GetHash();
- return Result;
-}
-
-} // namespace zen
-
-#if ZEN_WITH_TESTS
-# include <zencore/filesystem.h>
-# include <zencore/fmtutils.h>
-# include <zencore/iohash.h>
-# include <zencore/logging.h>
-# include <zencore/scopeguard.h>
-# include <zencore/timer.h>
-# include <zencore/testing.h>
-# include <zencore/testutils.h>
-# include <zencore/workthreadpool.h>
-
-# include "chunking.h"
-
-ZEN_THIRD_PARTY_INCLUDES_START
-# include <tsl/robin_map.h>
-# include <tsl/robin_set.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-
-namespace zen {
-# if 0
-TEST_CASE("chunkedfile.findparams")
-{
-# if 1
- DirectoryContent SourceContent1;
- GetDirectoryContent("E:\\Temp\\ChunkingTestData\\31379208", DirectoryContentFlags::IncludeFiles, SourceContent1);
- const std::vector<std::filesystem::path>& SourceFiles1 = SourceContent1.Files;
- DirectoryContent SourceContent2;
- GetDirectoryContent("E:\\Temp\\ChunkingTestData\\31379208_2", DirectoryContentFlags::IncludeFiles, SourceContent2);
- const std::vector<std::filesystem::path>& SourceFiles2 = SourceContent2.Files;
-# else
- std::filesystem::path SourcePath1 =
- "E:\\Temp\\ChunkingTestData\\31375996\\ShaderArchive-FortniteGame_Chunk10-PCD3D_SM6-PCD3D_SM6.ushaderbytecode";
- std::filesystem::path SourcePath2 =
- "E:\\Temp\\ChunkingTestData\\31379208\\ShaderArchive-FortniteGame_Chunk10-PCD3D_SM6-PCD3D_SM6.ushaderbytecode";
- const std::vector<std::filesystem::path>& SourceFiles1 = {SourcePath1};
- const std::vector<std::filesystem::path>& SourceFiles2 = {SourcePath2};
-# endif
- ChunkedParams Params[] = {ChunkedParams{.UseThreshold = false, .MinSize = 17280, .MaxSize = 139264, .AvgSize = 36340},
- ChunkedParams{.UseThreshold = false, .MinSize = 15456, .MaxSize = 122880, .AvgSize = 35598},
- ChunkedParams{.UseThreshold = false, .MinSize = 16848, .MaxSize = 135168, .AvgSize = 39030},
- ChunkedParams{.UseThreshold = false, .MinSize = 14256, .MaxSize = 114688, .AvgSize = 36222},
- ChunkedParams{.UseThreshold = false, .MinSize = 15744, .MaxSize = 126976, .AvgSize = 36600},
- ChunkedParams{.UseThreshold = false, .MinSize = 15264, .MaxSize = 122880, .AvgSize = 35442},
- ChunkedParams{.UseThreshold = false, .MinSize = 16464, .MaxSize = 131072, .AvgSize = 37950},
- ChunkedParams{.UseThreshold = false, .MinSize = 15408, .MaxSize = 122880, .AvgSize = 38914},
- ChunkedParams{.UseThreshold = false, .MinSize = 15408, .MaxSize = 122880, .AvgSize = 35556},
- ChunkedParams{.UseThreshold = false, .MinSize = 15360, .MaxSize = 122880, .AvgSize = 35520},
- ChunkedParams{.UseThreshold = false, .MinSize = 15312, .MaxSize = 122880, .AvgSize = 35478},
- ChunkedParams{.UseThreshold = false, .MinSize = 16896, .MaxSize = 135168, .AvgSize = 39072},
- ChunkedParams{.UseThreshold = false, .MinSize = 15360, .MaxSize = 122880, .AvgSize = 38880},
- ChunkedParams{.UseThreshold = false, .MinSize = 15840, .MaxSize = 126976, .AvgSize = 36678},
- ChunkedParams{.UseThreshold = false, .MinSize = 16800, .MaxSize = 135168, .AvgSize = 38994},
- ChunkedParams{.UseThreshold = false, .MinSize = 15888, .MaxSize = 126976, .AvgSize = 36714},
- ChunkedParams{.UseThreshold = false, .MinSize = 15792, .MaxSize = 126976, .AvgSize = 36636},
- ChunkedParams{.UseThreshold = false, .MinSize = 14880, .MaxSize = 118784, .AvgSize = 37609},
- ChunkedParams{.UseThreshold = false, .MinSize = 15936, .MaxSize = 126976, .AvgSize = 36756},
- ChunkedParams{.UseThreshold = false, .MinSize = 15456, .MaxSize = 122880, .AvgSize = 38955},
- ChunkedParams{.UseThreshold = false, .MinSize = 15984, .MaxSize = 126976, .AvgSize = 36792},
- ChunkedParams{.UseThreshold = false, .MinSize = 14400, .MaxSize = 114688, .AvgSize = 36338},
- ChunkedParams{.UseThreshold = false, .MinSize = 14832, .MaxSize = 118784, .AvgSize = 37568},
- ChunkedParams{.UseThreshold = false, .MinSize = 16944, .MaxSize = 135168, .AvgSize = 39108},
- ChunkedParams{.UseThreshold = false, .MinSize = 14352, .MaxSize = 114688, .AvgSize = 36297},
- ChunkedParams{.UseThreshold = false, .MinSize = 14208, .MaxSize = 114688, .AvgSize = 36188},
- ChunkedParams{.UseThreshold = false, .MinSize = 14448, .MaxSize = 114688, .AvgSize = 36372},
- ChunkedParams{.UseThreshold = false, .MinSize = 13296, .MaxSize = 106496, .AvgSize = 36592},
- ChunkedParams{.UseThreshold = false, .MinSize = 15264, .MaxSize = 122880, .AvgSize = 38805},
- ChunkedParams{.UseThreshold = false, .MinSize = 14304, .MaxSize = 114688, .AvgSize = 36263},
- ChunkedParams{.UseThreshold = false, .MinSize = 14784, .MaxSize = 118784, .AvgSize = 37534},
- ChunkedParams{.UseThreshold = false, .MinSize = 15312, .MaxSize = 122880, .AvgSize = 38839},
- ChunkedParams{.UseThreshold = false, .MinSize = 14256, .MaxSize = 114688, .AvgSize = 39360},
- ChunkedParams{.UseThreshold = false, .MinSize = 13776, .MaxSize = 110592, .AvgSize = 37976},
- ChunkedParams{.UseThreshold = false, .MinSize = 14736, .MaxSize = 118784, .AvgSize = 37493},
- ChunkedParams{.UseThreshold = false, .MinSize = 14928, .MaxSize = 118784, .AvgSize = 37643},
- ChunkedParams{.UseThreshold = false, .MinSize = 14448, .MaxSize = 114688, .AvgSize = 39504},
- ChunkedParams{.UseThreshold = false, .MinSize = 13392, .MaxSize = 106496, .AvgSize = 36664},
- ChunkedParams{.UseThreshold = false, .MinSize = 13872, .MaxSize = 110592, .AvgSize = 38048},
- ChunkedParams{.UseThreshold = false, .MinSize = 14352, .MaxSize = 114688, .AvgSize = 39432},
- ChunkedParams{.UseThreshold = false, .MinSize = 13200, .MaxSize = 106496, .AvgSize = 36520},
- ChunkedParams{.UseThreshold = false, .MinSize = 17328, .MaxSize = 139264, .AvgSize = 36378},
- ChunkedParams{.UseThreshold = false, .MinSize = 17376, .MaxSize = 139264, .AvgSize = 36421},
- ChunkedParams{.UseThreshold = false, .MinSize = 17424, .MaxSize = 139264, .AvgSize = 36459},
- ChunkedParams{.UseThreshold = false, .MinSize = 17472, .MaxSize = 139264, .AvgSize = 36502},
- ChunkedParams{.UseThreshold = false, .MinSize = 17520, .MaxSize = 139264, .AvgSize = 36540},
- ChunkedParams{.UseThreshold = false, .MinSize = 17808, .MaxSize = 143360, .AvgSize = 37423},
- ChunkedParams{.UseThreshold = false, .MinSize = 17856, .MaxSize = 143360, .AvgSize = 37466},
- ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 25834},
- ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 21917},
- ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 29751},
- ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 33668},
- ChunkedParams{.UseThreshold = false, .MinSize = 17952, .MaxSize = 143360, .AvgSize = 37547},
- ChunkedParams{.UseThreshold = false, .MinSize = 17904, .MaxSize = 143360, .AvgSize = 37504},
- ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 22371},
- ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 37585},
- ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 26406},
- ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 26450},
- ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 30615},
- ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 30441},
- ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 22417},
- ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 22557},
- ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 30528},
- ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 27112},
- ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 34644},
- ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 34476},
- ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 35408},
- ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 38592},
- ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 30483},
- ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 26586},
- ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 26496},
- ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 31302},
- ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 34516},
- ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 22964},
- ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 35448},
- ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 38630},
- ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 23010},
- ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 31260},
- ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 34600},
- ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 27156},
- ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 30570},
- ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 38549},
- ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 22510},
- ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 38673},
- ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 34560},
- ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 22464},
- ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 26540},
- ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 38511},
- ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 23057},
- ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 27202},
- ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 31347},
- ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 35492},
- ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 31389},
- ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 27246},
- ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 23103},
- ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 35532},
- ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 23150},
- ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 27292},
- ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 31434},
- ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 35576},
- ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 27336},
- ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 23196},
- ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 31476},
- ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 35616},
- ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 27862},
- ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 32121},
- ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 23603},
- ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 36380},
- ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 27908},
- ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 23650},
- ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 32166},
- ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 36424},
- ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 23696},
- ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 32253},
- ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 32208},
- ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 23743},
- ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 36548},
- ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 28042},
- ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 23789},
- ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 32295},
- ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 36508},
- ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 27952},
- ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 27998},
- ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 36464}};
-
- static const size_t ParamsCount = sizeof(Params) / sizeof(ChunkedParams);
- std::vector<ChunkedInfoWithSource> Infos1(SourceFiles1.size());
- std::vector<ChunkedInfoWithSource> Infos2(SourceFiles2.size());
-
- WorkerThreadPool WorkerPool(32);
-
- for (size_t I = 0; I < ParamsCount; I++)
- {
- for (int UseThreshold = 0; UseThreshold < 2; UseThreshold++)
- {
- Latch WorkLatch(1);
- ChunkedParams Param = Params[I];
- Param.UseThreshold = UseThreshold == 1;
- Stopwatch Timer;
- for (size_t F = 0; F < SourceFiles1.size(); F++)
- {
- WorkLatch.AddCount(1);
- WorkerPool.ScheduleWork([&WorkLatch, F, Param, &SourceFiles1, &Infos1]() {
- auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); });
- BasicFile SourceData1;
- SourceData1.Open(SourceFiles1[F], BasicFile::Mode::kRead);
- Infos1[F] = ChunkData(SourceData1, 0, SourceData1.FileSize(), Param);
- });
- }
- for (size_t F = 0; F < SourceFiles2.size(); F++)
- {
- WorkLatch.AddCount(1);
- WorkerPool.ScheduleWork([&WorkLatch, F, Param, &SourceFiles2, &Infos2]() {
- auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); });
- BasicFile SourceData2;
- SourceData2.Open(SourceFiles2[F], BasicFile::Mode::kRead);
- Infos2[F] = ChunkData(SourceData2, 0, SourceData2.FileSize(), Param);
- });
- }
- WorkLatch.CountDown();
- WorkLatch.Wait();
- uint64_t ChunkTimeMS = Timer.GetElapsedTimeMs();
-
- uint64_t Raw1Size = 0;
- tsl::robin_set<IoHash> Chunks1;
- size_t ChunkedSize1 = 0;
- for (size_t F = 0; F < SourceFiles1.size(); F++)
- {
- const ChunkedInfoWithSource& Info = Infos1[F];
- Raw1Size += Info.Info.RawSize;
- for (uint32_t Chunk1Index = 0; Chunk1Index < Info.Info.ChunkHashes.size(); ++Chunk1Index)
- {
- const IoHash ChunkHash = Info.Info.ChunkHashes[Chunk1Index];
- if (Chunks1.insert(ChunkHash).second)
- {
- ChunkedSize1 += Info.ChunkSources[Chunk1Index].Size;
- }
- }
- }
-
- uint64_t Raw2Size = 0;
- tsl::robin_set<IoHash> Chunks2;
- size_t ChunkedSize2 = 0;
- size_t DiffSize = 0;
- for (size_t F = 0; F < SourceFiles2.size(); F++)
- {
- const ChunkedInfoWithSource& Info = Infos2[F];
- Raw2Size += Info.Info.RawSize;
- for (uint32_t Chunk2Index = 0; Chunk2Index < Info.Info.ChunkHashes.size(); ++Chunk2Index)
- {
- const IoHash ChunkHash = Info.Info.ChunkHashes[Chunk2Index];
- if (Chunks2.insert(ChunkHash).second)
- {
- ChunkedSize2 += Info.ChunkSources[Chunk2Index].Size;
- if (!Chunks1.contains(ChunkHash))
- {
- DiffSize += Info.ChunkSources[Chunk2Index].Size;
- }
- }
- }
- }
-
- ZEN_INFO(
- "Diff = {}, Chunks1 = {}, Chunks2 = {}, .UseThreshold = {}, .MinSize = {}, .MaxSize = {}, .AvgSize = {}, RawSize(1) = {}, "
- "RawSize(2) = {}, "
- "Saved(1) = {}, Saved(2) = {} in {}",
- NiceBytes(DiffSize),
- Chunks1.size(),
- Chunks2.size(),
- Param.UseThreshold,
- Param.MinSize,
- Param.MaxSize,
- Param.AvgSize,
- NiceBytes(Raw1Size),
- NiceBytes(Raw2Size),
- NiceBytes(Raw1Size - ChunkedSize1),
- NiceBytes(Raw2Size - ChunkedSize2),
- NiceTimeSpanMs(ChunkTimeMS));
- }
- }
-
-# if 0
- for (int64_t MinSizeBase = (12u * 1024u); MinSizeBase <= (32u * 1024u); MinSizeBase += 512)
- {
- for (int64_t Wiggle = -132; Wiggle < 126; Wiggle += 2)
- {
- // size_t MinSize = 7 * 1024 - 61; // (size_t)(MinSizeBase + Wiggle);
- // size_t MaxSize = 16 * (7 * 1024); // 8 * 7 * 1024;// MinSizeBase * 6;
- // size_t AvgSize = MaxSize / 2; // 4 * 7 * 1024;// MinSizeBase * 3;
- size_t MinSize = (size_t)(MinSizeBase + Wiggle);
- //for (size_t MaxSize = (MinSize * 4) - 768; MaxSize < (MinSize * 5) + 768; MaxSize += 64)
- size_t MaxSize = 8u * MinSizeBase;
- {
- for (size_t AvgSize = (MaxSize - MinSize) / 32 + MinSize; AvgSize < (MaxSize - MinSize) / 4 + MinSize; AvgSize += (MaxSize - MinSize) / 32)
-// size_t AvgSize = (MaxSize - MinSize) / 4 + MinSize;
- {
- WorkLatch.AddCount(1);
- WorkerPool.ScheduleWork([&WorkLatch, MinSize, MaxSize, AvgSize, SourcePath1, SourcePath2]()
- {
- auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); });
- ChunkedParams Params{ .UseThreshold = true, .MinSize = MinSize, .MaxSize = MaxSize, .AvgSize = AvgSize };
- BasicFile SourceData1;
- SourceData1.Open(SourcePath1, BasicFile::Mode::kRead);
- BasicFile SourceData2;
- SourceData2.Open(SourcePath2, BasicFile::Mode::kRead);
- ChunkedInfoWithSource Info1 = ChunkData(SourceData1, Params);
- ChunkedInfoWithSource Info2 = ChunkData(SourceData2, Params);
-
- tsl::robin_set<IoHash> Chunks1;
- Chunks1.reserve(Info1.Info.ChunkHashes.size());
- Chunks1.insert(Info1.Info.ChunkHashes.begin(), Info1.Info.ChunkHashes.end());
- size_t ChunkedSize1 = 0;
- for (uint32_t Chunk1Index = 0; Chunk1Index < Info1.Info.ChunkHashes.size(); ++Chunk1Index)
- {
- ChunkedSize1 += Info1.ChunkSources[Chunk1Index].Size;
- }
- size_t DiffSavedSize = 0;
- size_t ChunkedSize2 = 0;
- for (uint32_t Chunk2Index = 0; Chunk2Index < Info2.Info.ChunkHashes.size(); ++Chunk2Index)
- {
- ChunkedSize2 += Info2.ChunkSources[Chunk2Index].Size;
- if (Chunks1.find(Info2.Info.ChunkHashes[Chunk2Index]) == Chunks1.end())
- {
- DiffSavedSize += Info2.ChunkSources[Chunk2Index].Size;
- }
- }
- ZEN_INFO("Diff {}, Chunks1: {}, Chunks2: {}, Min: {}, Max: {}, Avg: {}, Saved(1) {}, Saved(2) {}",
- NiceBytes(DiffSavedSize),
- Info1.Info.ChunkHashes.size(),
- Info2.Info.ChunkHashes.size(),
- MinSize,
- MaxSize,
- AvgSize,
- NiceBytes(Info1.Info.RawSize - ChunkedSize1),
- NiceBytes(Info2.Info.RawSize - ChunkedSize2));
- });
- }
- }
- }
- }
-# endif // 0
-
- // WorkLatch.CountDown();
- // WorkLatch.Wait();
-}
-# endif // 0
-
-void
-chunkedfile_forcelink()
-{
-}
-
-} // namespace zen
-
-#endif
diff --git a/src/zenutil/chunking.cpp b/src/zenutil/chunking.cpp
deleted file mode 100644
index 71f0a06e4..000000000
--- a/src/zenutil/chunking.cpp
+++ /dev/null
@@ -1,383 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#include "chunking.h"
-
-#include <gsl/gsl-lite.hpp>
-
-#include <cmath>
-#include <cstring>
-
-namespace zen::detail {
-
-static const uint32_t BuzhashTable[] = {
- 0x458be752, 0xc10748cc, 0xfbbcdbb8, 0x6ded5b68, 0xb10a82b5, 0x20d75648, 0xdfc5665f, 0xa8428801, 0x7ebf5191, 0x841135c7, 0x65cc53b3,
- 0x280a597c, 0x16f60255, 0xc78cbc3e, 0x294415f5, 0xb938d494, 0xec85c4e6, 0xb7d33edc, 0xe549b544, 0xfdeda5aa, 0x882bf287, 0x3116737c,
- 0x05569956, 0xe8cc1f68, 0x0806ac5e, 0x22a14443, 0x15297e10, 0x50d090e7, 0x4ba60f6f, 0xefd9f1a7, 0x5c5c885c, 0x82482f93, 0x9bfd7c64,
- 0x0b3e7276, 0xf2688e77, 0x8fad8abc, 0xb0509568, 0xf1ada29f, 0xa53efdfe, 0xcb2b1d00, 0xf2a9e986, 0x6463432b, 0x95094051, 0x5a223ad2,
- 0x9be8401b, 0x61e579cb, 0x1a556a14, 0x5840fdc2, 0x9261ddf6, 0xcde002bb, 0x52432bb0, 0xbf17373e, 0x7b7c222f, 0x2955ed16, 0x9f10ca59,
- 0xe840c4c9, 0xccabd806, 0x14543f34, 0x1462417a, 0x0d4a1f9c, 0x087ed925, 0xd7f8f24c, 0x7338c425, 0xcf86c8f5, 0xb19165cd, 0x9891c393,
- 0x325384ac, 0x0308459d, 0x86141d7e, 0xc922116a, 0xe2ffa6b6, 0x53f52aed, 0x2cd86197, 0xf5b9f498, 0xbf319c8f, 0xe0411fae, 0x977eb18c,
- 0xd8770976, 0x9833466a, 0xc674df7f, 0x8c297d45, 0x8ca48d26, 0xc49ed8e2, 0x7344f874, 0x556f79c7, 0x6b25eaed, 0xa03e2b42, 0xf68f66a4,
- 0x8e8b09a2, 0xf2e0e62a, 0x0d3a9806, 0x9729e493, 0x8c72b0fc, 0x160b94f6, 0x450e4d3d, 0x7a320e85, 0xbef8f0e1, 0x21d73653, 0x4e3d977a,
- 0x1e7b3929, 0x1cc6c719, 0xbe478d53, 0x8d752809, 0xe6d8c2c6, 0x275f0892, 0xc8acc273, 0x4cc21580, 0xecc4a617, 0xf5f7be70, 0xe795248a,
- 0x375a2fe9, 0x425570b6, 0x8898dcf8, 0xdc2d97c4, 0x0106114b, 0x364dc22f, 0x1e0cad1f, 0xbe63803c, 0x5f69fac2, 0x4d5afa6f, 0x1bc0dfb5,
- 0xfb273589, 0x0ea47f7b, 0x3c1c2b50, 0x21b2a932, 0x6b1223fd, 0x2fe706a8, 0xf9bd6ce2, 0xa268e64e, 0xe987f486, 0x3eacf563, 0x1ca2018c,
- 0x65e18228, 0x2207360a, 0x57cf1715, 0x34c37d2b, 0x1f8f3cde, 0x93b657cf, 0x31a019fd, 0xe69eb729, 0x8bca7b9b, 0x4c9d5bed, 0x277ebeaf,
- 0xe0d8f8ae, 0xd150821c, 0x31381871, 0xafc3f1b0, 0x927db328, 0xe95effac, 0x305a47bd, 0x426ba35b, 0x1233af3f, 0x686a5b83, 0x50e072e5,
- 0xd9d3bb2a, 0x8befc475, 0x487f0de6, 0xc88dff89, 0xbd664d5e, 0x971b5d18, 0x63b14847, 0xd7d3c1ce, 0x7f583cf3, 0x72cbcb09, 0xc0d0a81c,
- 0x7fa3429b, 0xe9158a1b, 0x225ea19a, 0xd8ca9ea3, 0xc763b282, 0xbb0c6341, 0x020b8293, 0xd4cd299d, 0x58cfa7f8, 0x91b4ee53, 0x37e4d140,
- 0x95ec764c, 0x30f76b06, 0x5ee68d24, 0x679c8661, 0xa41979c2, 0xf2b61284, 0x4fac1475, 0x0adb49f9, 0x19727a23, 0x15a7e374, 0xc43a18d5,
- 0x3fb1aa73, 0x342fc615, 0x924c0793, 0xbee2d7f0, 0x8a279de9, 0x4aa2d70c, 0xe24dd37f, 0xbe862c0b, 0x177c22c2, 0x5388e5ee, 0xcd8a7510,
- 0xf901b4fd, 0xdbc13dbc, 0x6c0bae5b, 0x64efe8c7, 0x48b02079, 0x80331a49, 0xca3d8ae6, 0xf3546190, 0xfed7108b, 0xc49b941b, 0x32baf4a9,
- 0xeb833a4a, 0x88a3f1a5, 0x3a91ce0a, 0x3cc27da1, 0x7112e684, 0x4a3096b1, 0x3794574c, 0xa3c8b6f3, 0x1d213941, 0x6e0a2e00, 0x233479f1,
- 0x0f4cd82f, 0x6093edd2, 0x5d7d209e, 0x464fe319, 0xd4dcac9e, 0x0db845cb, 0xfb5e4bc3, 0xe0256ce1, 0x09fb4ed1, 0x0914be1e, 0xa5bdb2c3,
- 0xc6eb57bb, 0x30320350, 0x3f397e91, 0xa67791bc, 0x86bc0e2c, 0xefa0a7e2, 0xe9ff7543, 0xe733612c, 0xd185897b, 0x329e5388, 0x91dd236b,
- 0x2ecb0d93, 0xf4d82a3d, 0x35b5c03f, 0xe4e606f0, 0x05b21843, 0x37b45964, 0x5eff22f4, 0x6027f4cc, 0x77178b3c, 0xae507131, 0x7bf7cabc,
- 0xf9c18d66, 0x593ade65, 0xd95ddf11,
-};
-
-// ROL operation (compiler turns this into a ROL when optimizing)
-ZEN_FORCEINLINE static uint32_t
-Rotate32(uint32_t Value, size_t RotateCount)
-{
- RotateCount &= 31;
-
- return ((Value) << (RotateCount)) | ((Value) >> (32 - RotateCount));
-}
-
-} // namespace zen::detail
-
-namespace zen {
-
-void
-ZenChunkHelper::Reset()
-{
- InternalReset();
-
- m_BytesScanned = 0;
-}
-
-void
-ZenChunkHelper::InternalReset()
-{
- m_CurrentHash = 0;
- m_CurrentChunkSize = 0;
- m_WindowSize = 0;
-}
-
-void
-ZenChunkHelper::SetChunkSize(size_t MinSize, size_t MaxSize, size_t AvgSize)
-{
- if (m_WindowSize)
- return; // Already started
-
- static_assert(kChunkSizeLimitMin > kWindowSize);
-
- if (AvgSize)
- {
- // TODO: Validate AvgSize range
- }
- else
- {
- if (MinSize && MaxSize)
- {
- AvgSize = std::lrint(std::pow(2, (std::log2(MinSize) + std::log2(MaxSize)) / 2));
- }
- else if (MinSize)
- {
- AvgSize = MinSize * 4;
- }
- else if (MaxSize)
- {
- AvgSize = MaxSize / 4;
- }
- else
- {
- AvgSize = kDefaultAverageChunkSize;
- }
- }
-
- if (MinSize)
- {
- // TODO: Validate MinSize range
- }
- else
- {
- MinSize = std::max(AvgSize / 4, kChunkSizeLimitMin);
- }
-
- if (MaxSize)
- {
- // TODO: Validate MaxSize range
- }
- else
- {
- MaxSize = std::min(AvgSize * 4, kChunkSizeLimitMax);
- }
-
- m_Discriminator = gsl::narrow<uint32_t>(AvgSize - MinSize);
-
- if (m_Discriminator < MinSize)
- {
- m_Discriminator = gsl::narrow<uint32_t>(MinSize);
- }
-
- if (m_Discriminator > MaxSize)
- {
- m_Discriminator = gsl::narrow<uint32_t>(MaxSize);
- }
-
- m_Threshold = gsl::narrow<uint32_t>((uint64_t(std::numeric_limits<uint32_t>::max()) + 1) / m_Discriminator);
-
- m_ChunkSizeMin = MinSize;
- m_ChunkSizeMax = MaxSize;
- m_ChunkSizeAvg = AvgSize;
-}
-
-size_t
-ZenChunkHelper::ScanChunk(const void* DataBytesIn, size_t ByteCount)
-{
- size_t Result = InternalScanChunk(DataBytesIn, ByteCount);
-
- if (Result == kNoBoundaryFound)
- {
- m_BytesScanned += ByteCount;
- }
- else
- {
- m_BytesScanned += Result;
- }
-
- return Result;
-}
-
-size_t
-ZenChunkHelper::InternalScanChunk(const void* DataBytesIn, size_t ByteCount)
-{
- size_t CurrentOffset = 0;
- const uint8_t* CursorPtr = reinterpret_cast<const uint8_t*>(DataBytesIn);
-
- // There's no point in updating the hash if we know we're not
- // going to have a cut point, so just skip the data. This logic currently
- // provides roughly a 20% speedup on my machine
-
- const size_t NeedHashOffset = m_ChunkSizeMin - kWindowSize;
-
- if (m_CurrentChunkSize < NeedHashOffset)
- {
- const uint32_t SkipBytes = gsl::narrow<uint32_t>(std::min<uint64_t>(ByteCount, NeedHashOffset - m_CurrentChunkSize));
-
- ByteCount -= SkipBytes;
- m_CurrentChunkSize += SkipBytes;
- CurrentOffset += SkipBytes;
- CursorPtr += SkipBytes;
-
- m_WindowSize = 0;
-
- if (ByteCount == 0)
- {
- return kNoBoundaryFound;
- }
- }
-
- // Fill window first
-
- if (m_WindowSize < kWindowSize)
- {
- const uint32_t FillBytes = uint32_t(std::min<size_t>(ByteCount, kWindowSize - m_WindowSize));
-
- memcpy(&m_Window[m_WindowSize], CursorPtr, FillBytes);
-
- CursorPtr += FillBytes;
-
- m_WindowSize += FillBytes;
- m_CurrentChunkSize += FillBytes;
-
- CurrentOffset += FillBytes;
- ByteCount -= FillBytes;
-
- if (m_WindowSize < kWindowSize)
- {
- return kNoBoundaryFound;
- }
-
- // We have a full window, initialize hash
-
- uint32_t CurrentHash = 0;
-
- for (int i = 1; i < kWindowSize; ++i)
- {
- CurrentHash ^= detail::Rotate32(detail::BuzhashTable[m_Window[i - 1]], kWindowSize - i);
- }
-
- m_CurrentHash = CurrentHash ^ detail::BuzhashTable[m_Window[kWindowSize - 1]];
- }
-
- // Scan for boundaries (i.e points where the hash matches the value determined by
- // the discriminator)
-
- uint32_t CurrentHash = m_CurrentHash;
- uint32_t CurrentChunkSize = m_CurrentChunkSize;
-
- size_t Index = CurrentChunkSize % kWindowSize;
-
- if (m_Threshold && m_UseThreshold)
- {
- // This is roughly 4x faster than the general modulo approach on my
- // TR 3990X (~940MB/sec) and doesn't require any special parameters to
- // achieve max performance
-
- while (ByteCount)
- {
- const uint8_t NewByte = *CursorPtr;
- const uint8_t OldByte = m_Window[Index];
-
- CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^
- detail::BuzhashTable[NewByte];
-
- CurrentChunkSize++;
- CurrentOffset++;
-
- if (CurrentChunkSize >= m_ChunkSizeMin)
- {
- bool FoundBoundary;
-
- if (CurrentChunkSize >= m_ChunkSizeMax)
- {
- FoundBoundary = true;
- }
- else
- {
- FoundBoundary = CurrentHash <= m_Threshold;
- }
-
- if (FoundBoundary)
- {
- // Boundary found!
- InternalReset();
-
- return CurrentOffset;
- }
- }
-
- m_Window[Index++] = *CursorPtr;
-
- if (Index == kWindowSize)
- {
- Index = 0;
- }
-
- ++CursorPtr;
- --ByteCount;
- }
- }
- else if ((m_Discriminator & (m_Discriminator - 1)) == 0)
- {
- // This is quite a bit faster than the generic modulo path, but
- // requires a very specific average chunk size to be used. If you
- // pass in an even power-of-two divided by 0.75 as the average
- // chunk size you'll hit this path
-
- const uint32_t Mask = m_Discriminator - 1;
-
- while (ByteCount)
- {
- const uint8_t NewByte = *CursorPtr;
- const uint8_t OldByte = m_Window[Index];
-
- CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^
- detail::BuzhashTable[NewByte];
-
- CurrentChunkSize++;
- CurrentOffset++;
-
- if (CurrentChunkSize >= m_ChunkSizeMin)
- {
- bool FoundBoundary;
-
- if (CurrentChunkSize >= m_ChunkSizeMax)
- {
- FoundBoundary = true;
- }
- else
- {
- FoundBoundary = (CurrentHash & Mask) == Mask;
- }
-
- if (FoundBoundary)
- {
- // Boundary found!
- InternalReset();
-
- return CurrentOffset;
- }
- }
-
- m_Window[Index++] = *CursorPtr;
-
- if (Index == kWindowSize)
- {
- Index = 0;
- }
-
- ++CursorPtr;
- --ByteCount;
- }
- }
- else
- {
- // This is the slowest path, which caps out around 250MB/sec for large sizes
- // on my TR3900X
-
- while (ByteCount)
- {
- const uint8_t NewByte = *CursorPtr;
- const uint8_t OldByte = m_Window[Index];
-
- CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^
- detail::BuzhashTable[NewByte];
-
- CurrentChunkSize++;
- CurrentOffset++;
-
- if (CurrentChunkSize >= m_ChunkSizeMin)
- {
- bool FoundBoundary;
-
- if (CurrentChunkSize >= m_ChunkSizeMax)
- {
- FoundBoundary = true;
- }
- else
- {
- FoundBoundary = (CurrentHash % m_Discriminator) == (m_Discriminator - 1);
- }
-
- if (FoundBoundary)
- {
- // Boundary found!
- InternalReset();
-
- return CurrentOffset;
- }
- }
-
- m_Window[Index++] = *CursorPtr;
-
- if (Index == kWindowSize)
- {
- Index = 0;
- }
-
- ++CursorPtr;
- --ByteCount;
- }
- }
-
- m_CurrentChunkSize = CurrentChunkSize;
- m_CurrentHash = CurrentHash;
-
- return kNoBoundaryFound;
-}
-
-} // namespace zen
diff --git a/src/zenutil/chunking.h b/src/zenutil/chunking.h
deleted file mode 100644
index 09c56454f..000000000
--- a/src/zenutil/chunking.h
+++ /dev/null
@@ -1,56 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#pragma once
-#include <zencore/zencore.h>
-
-namespace zen {
-
-/** Content-defined chunking helper
- */
-class ZenChunkHelper
-{
-public:
- void SetChunkSize(size_t MinSize, size_t MaxSize, size_t AvgSize);
- size_t ScanChunk(const void* DataBytes, size_t ByteCount);
- void Reset();
-
- // This controls which chunking approach is used - threshold or
- // modulo based. Threshold is faster and generates similarly sized
- // chunks
- void SetUseThreshold(bool NewState) { m_UseThreshold = NewState; }
-
- inline size_t ChunkSizeMin() const { return m_ChunkSizeMin; }
- inline size_t ChunkSizeMax() const { return m_ChunkSizeMax; }
- inline size_t ChunkSizeAvg() const { return m_ChunkSizeAvg; }
- inline uint64_t BytesScanned() const { return m_BytesScanned; }
-
- static constexpr size_t kNoBoundaryFound = size_t(~0ull);
-
-private:
- size_t m_ChunkSizeMin = 0;
- size_t m_ChunkSizeMax = 0;
- size_t m_ChunkSizeAvg = 0;
-
- uint32_t m_Discriminator = 0; // Computed in SetChunkSize()
- uint32_t m_Threshold = 0; // Computed in SetChunkSize()
-
- bool m_UseThreshold = true;
-
- static constexpr size_t kChunkSizeLimitMax = 64 * 1024 * 1024;
- static constexpr size_t kChunkSizeLimitMin = 1024;
- static constexpr size_t kDefaultAverageChunkSize = 64 * 1024;
-
- static constexpr int kWindowSize = 48;
- uint8_t m_Window[kWindowSize];
- uint32_t m_WindowSize = 0;
-
- uint32_t m_CurrentHash = 0;
- uint32_t m_CurrentChunkSize = 0;
-
- uint64_t m_BytesScanned = 0;
-
- size_t InternalScanChunk(const void* DataBytes, size_t ByteCount);
- void InternalReset();
-};
-
-} // namespace zen
diff --git a/src/zenutil/chunkingcontroller.cpp b/src/zenutil/chunkingcontroller.cpp
deleted file mode 100644
index 6fb4182c0..000000000
--- a/src/zenutil/chunkingcontroller.cpp
+++ /dev/null
@@ -1,359 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#include <zenutil/chunkingcontroller.h>
-
-#include <zencore/basicfile.h>
-#include <zencore/compactbinarybuilder.h>
-#include <zencore/filesystem.h>
-#include <zencore/trace.h>
-
-ZEN_THIRD_PARTY_INCLUDES_START
-#include <tsl/robin_map.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-
-namespace zen {
-using namespace std::literals;
-
-namespace {
- std::vector<std::string> ReadStringArray(CbArrayView StringArray)
- {
- std::vector<std::string> Result;
- Result.reserve(StringArray.Num());
- for (CbFieldView FieldView : StringArray)
- {
- Result.emplace_back(FieldView.AsString());
- }
- return Result;
- }
-
- ChunkedParams ReadChunkParams(CbObjectView Params)
- {
- bool UseThreshold = Params["UseThreshold"sv].AsBool(true);
- size_t MinSize = Params["MinSize"sv].AsUInt64(DefaultChunkedParams.MinSize);
- size_t MaxSize = Params["MaxSize"sv].AsUInt64(DefaultChunkedParams.MaxSize);
- size_t AvgSize = Params["AvgSize"sv].AsUInt64(DefaultChunkedParams.AvgSize);
-
- return ChunkedParams{.UseThreshold = UseThreshold, .MinSize = MinSize, .MaxSize = MaxSize, .AvgSize = AvgSize};
- }
-
- void WriteChunkParams(CbObjectWriter& Writer, const ChunkedParams& Params)
- {
- Writer.BeginObject("ChunkingParams"sv);
- {
- Writer.AddBool("UseThreshold"sv, Params.UseThreshold);
-
- Writer.AddInteger("MinSize"sv, (uint64_t)Params.MinSize);
- Writer.AddInteger("MaxSize"sv, (uint64_t)Params.MaxSize);
- Writer.AddInteger("AvgSize"sv, (uint64_t)Params.AvgSize);
- }
- Writer.EndObject(); // ChunkingParams
- }
-
- bool IsElfFile(BasicFile& Buffer)
- {
- if (Buffer.FileSize() > 4)
- {
- uint32_t ElfCheck = 0;
- Buffer.Read(&ElfCheck, 4, 0);
- if (ElfCheck == 0x464c457f)
- {
- return true;
- }
- }
- return false;
- }
-
- bool IsMachOFile(BasicFile& Buffer)
- {
- if (Buffer.FileSize() > 4)
- {
- uint32_t MachOCheck = 0;
- Buffer.Read(&MachOCheck, 4, 0);
- if ((MachOCheck == 0xfeedface) || (MachOCheck == 0xcefaedfe))
- {
- return true;
- }
- }
- return false;
- }
-} // namespace
-
-class BasicChunkingController : public ChunkingController
-{
-public:
- BasicChunkingController(const BasicChunkingControllerSettings& Settings) : m_Settings(Settings) {}
-
- BasicChunkingController(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {}
-
- virtual bool ProcessFile(const std::filesystem::path& InputPath,
- uint64_t RawSize,
- ChunkedInfoWithSource& OutChunked,
- std::atomic<uint64_t>& BytesProcessed,
- std::atomic<bool>& AbortFlag) const override
- {
- ZEN_TRACE_CPU("BasicChunkingController::ProcessFile");
- const bool ExcludeFromChunking =
- std::find(m_Settings.ExcludeExtensions.begin(), m_Settings.ExcludeExtensions.end(), InputPath.extension()) !=
- m_Settings.ExcludeExtensions.end();
-
- if (ExcludeFromChunking || (RawSize < m_Settings.ChunkFileSizeLimit))
- {
- return false;
- }
-
- BasicFile Buffer(InputPath, BasicFile::Mode::kRead);
- if (m_Settings.ExcludeElfFiles && IsElfFile(Buffer))
- {
- return false;
- }
- if (m_Settings.ExcludeMachOFiles && IsMachOFile(Buffer))
- {
- return false;
- }
-
- OutChunked = ChunkData(Buffer, 0, RawSize, m_Settings.ChunkingParams, &BytesProcessed, &AbortFlag);
- return true;
- }
-
- virtual std::string_view GetName() const override { return Name; }
-
- virtual CbObject GetParameters() const override
- {
- CbObjectWriter Writer;
- Writer.BeginArray("ChunkExcludeExtensions"sv);
- {
- for (const std::string& Extension : m_Settings.ExcludeExtensions)
- {
- Writer.AddString(Extension);
- }
- }
- Writer.EndArray(); // ChunkExcludeExtensions
-
- Writer.AddBool("ExcludeElfFiles"sv, m_Settings.ExcludeElfFiles);
- Writer.AddBool("ExcludeMachOFiles"sv, m_Settings.ExcludeMachOFiles);
- Writer.AddInteger("ChunkFileSizeLimit"sv, m_Settings.ChunkFileSizeLimit);
-
- WriteChunkParams(Writer, m_Settings.ChunkingParams);
-
- return Writer.Save();
- }
- static constexpr std::string_view Name = "BasicChunkingController"sv;
-
-private:
- static BasicChunkingControllerSettings ReadSettings(CbObjectView Parameters)
- {
- return BasicChunkingControllerSettings{
- .ExcludeExtensions = ReadStringArray(Parameters["ChunkExcludeExtensions"sv].AsArrayView()),
- .ExcludeElfFiles = Parameters["ExcludeElfFiles"sv].AsBool(DefaultChunkingExcludeElfFiles),
- .ExcludeMachOFiles = Parameters["ExcludeMachOFiles"sv].AsBool(DefaultChunkingExcludeMachOFiles),
- .ChunkFileSizeLimit = Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit),
- .ChunkingParams = ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView())};
- }
-
- const BasicChunkingControllerSettings m_Settings;
-};
-
-class ChunkingControllerWithFixedChunking : public ChunkingController
-{
-public:
- ChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Settings) : m_Settings(Settings) {}
-
- ChunkingControllerWithFixedChunking(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {}
-
- virtual bool ProcessFile(const std::filesystem::path& InputPath,
- uint64_t RawSize,
- ChunkedInfoWithSource& OutChunked,
- std::atomic<uint64_t>& BytesProcessed,
- std::atomic<bool>& AbortFlag) const override
- {
- ZEN_TRACE_CPU("ChunkingControllerWithFixedChunking::ProcessFile");
- const bool ExcludeFromChunking =
- std::find(m_Settings.ExcludeExtensions.begin(), m_Settings.ExcludeExtensions.end(), InputPath.extension()) !=
- m_Settings.ExcludeExtensions.end();
-
- if (ExcludeFromChunking || (RawSize < m_Settings.ChunkFileSizeLimit))
- {
- return false;
- }
-
- const bool FixedChunkingExtension =
- std::find(m_Settings.FixedChunkingExtensions.begin(), m_Settings.FixedChunkingExtensions.end(), InputPath.extension()) !=
- m_Settings.FixedChunkingExtensions.end();
-
- if (FixedChunkingExtension)
- {
- if (RawSize < m_Settings.MinSizeForFixedChunking)
- {
- return false;
- }
- ZEN_TRACE_CPU("FixedChunking");
- IoHashStream FullHasher;
- BasicFile Source(InputPath, BasicFile::Mode::kRead);
- uint64_t Offset = 0;
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex;
- const uint64_t ExpectedChunkCount = 1 + (RawSize / m_Settings.FixedChunkingChunkSize);
- ChunkHashToChunkIndex.reserve(ExpectedChunkCount);
- OutChunked.Info.ChunkHashes.reserve(ExpectedChunkCount);
- OutChunked.Info.ChunkSequence.reserve(ExpectedChunkCount);
- OutChunked.ChunkSources.reserve(ExpectedChunkCount);
-
- static const uint64_t BufferingSize = 256u * 1024u;
-
- IoHashStream ChunkHasher;
-
- while (Offset < RawSize)
- {
- if (AbortFlag)
- {
- return false;
- }
-
- ChunkHasher.Reset();
-
- uint64_t ChunkSize = std::min<uint64_t>(RawSize - Offset, m_Settings.FixedChunkingChunkSize);
- if (ChunkSize >= (BufferingSize + BufferingSize / 2))
- {
- ScanFile(Source.Handle(),
- Offset,
- ChunkSize,
- BufferingSize,
- [&FullHasher, &ChunkHasher, &BytesProcessed](const void* Data, size_t Size) {
- FullHasher.Append(Data, Size);
- ChunkHasher.Append(Data, Size);
- BytesProcessed.fetch_add(Size);
- });
- }
- else
- {
- IoBuffer ChunkData = Source.ReadRange(Offset, ChunkSize);
- FullHasher.Append(ChunkData);
- ChunkHasher.Append(ChunkData);
- BytesProcessed.fetch_add(ChunkSize);
- }
-
- const IoHash ChunkHash = ChunkHasher.GetHash();
- if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end())
- {
- OutChunked.Info.ChunkSequence.push_back(It->second);
- }
- else
- {
- uint32_t ChunkIndex = gsl::narrow<uint32_t>(OutChunked.Info.ChunkHashes.size());
- OutChunked.Info.ChunkHashes.push_back(ChunkHash);
- OutChunked.Info.ChunkSequence.push_back(ChunkIndex);
- OutChunked.ChunkSources.push_back({.Offset = Offset, .Size = gsl::narrow<uint32_t>(ChunkSize)});
- }
- Offset += ChunkSize;
- }
- OutChunked.Info.RawSize = RawSize;
- OutChunked.Info.RawHash = FullHasher.GetHash();
- return true;
- }
- else
- {
- BasicFile Buffer(InputPath, BasicFile::Mode::kRead);
- if (m_Settings.ExcludeElfFiles && IsElfFile(Buffer))
- {
- return false;
- }
- if (m_Settings.ExcludeMachOFiles && IsMachOFile(Buffer))
- {
- return false;
- }
-
- OutChunked = ChunkData(Buffer, 0, RawSize, m_Settings.ChunkingParams, &BytesProcessed, &AbortFlag);
- return true;
- }
- }
-
- virtual std::string_view GetName() const override { return Name; }
-
- virtual CbObject GetParameters() const override
- {
- CbObjectWriter Writer;
- Writer.BeginArray("FixedChunkingExtensions");
- {
- for (const std::string& Extension : m_Settings.FixedChunkingExtensions)
- {
- Writer.AddString(Extension);
- }
- }
- Writer.EndArray(); // ChunkExcludeExtensions
-
- Writer.BeginArray("ChunkExcludeExtensions"sv);
- {
- for (const std::string& Extension : m_Settings.ExcludeExtensions)
- {
- Writer.AddString(Extension);
- }
- }
- Writer.EndArray(); // ChunkExcludeExtensions
-
- Writer.AddBool("ExcludeElfFiles"sv, m_Settings.ExcludeElfFiles);
- Writer.AddBool("ExcludeMachOFiles"sv, m_Settings.ExcludeMachOFiles);
-
- Writer.AddInteger("ChunkFileSizeLimit"sv, m_Settings.ChunkFileSizeLimit);
-
- WriteChunkParams(Writer, m_Settings.ChunkingParams);
-
- Writer.AddInteger("FixedChunkingChunkSize"sv, m_Settings.FixedChunkingChunkSize);
- Writer.AddInteger("MinSizeForFixedChunking"sv, m_Settings.MinSizeForFixedChunking);
- return Writer.Save();
- }
-
- static constexpr std::string_view Name = "ChunkingControllerWithFixedChunking"sv;
-
-private:
- static ChunkingControllerWithFixedChunkingSettings ReadSettings(CbObjectView Parameters)
- {
- return ChunkingControllerWithFixedChunkingSettings{
- .FixedChunkingExtensions = ReadStringArray(Parameters["FixedChunkingExtensions"sv].AsArrayView()),
- .ExcludeExtensions = ReadStringArray(Parameters["ChunkExcludeExtensions"sv].AsArrayView()),
- .ExcludeElfFiles = Parameters["ExcludeElfFiles"sv].AsBool(DefaultChunkingExcludeElfFiles),
- .ExcludeMachOFiles = Parameters["ExcludeMachOFiles"sv].AsBool(DefaultChunkingExcludeMachOFiles),
- .ChunkFileSizeLimit = Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit),
- .ChunkingParams = ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView()),
- .FixedChunkingChunkSize = Parameters["FixedChunkingChunkSize"sv].AsUInt64(DefaultFixedChunkingChunkSize),
- .MinSizeForFixedChunking = Parameters["MinSizeForFixedChunking"sv].AsUInt64(DefaultFixedChunkingChunkSize)};
- }
-
- const ChunkingControllerWithFixedChunkingSettings m_Settings;
-};
-
-std::unique_ptr<ChunkingController>
-CreateBasicChunkingController(const BasicChunkingControllerSettings& Settings)
-{
- return std::make_unique<BasicChunkingController>(Settings);
-}
-std::unique_ptr<ChunkingController>
-CreateBasicChunkingController(CbObjectView Parameters)
-{
- return std::make_unique<BasicChunkingController>(Parameters);
-}
-
-std::unique_ptr<ChunkingController>
-CreateChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Setting)
-{
- return std::make_unique<ChunkingControllerWithFixedChunking>(Setting);
-}
-std::unique_ptr<ChunkingController>
-CreateChunkingControllerWithFixedChunking(CbObjectView Parameters)
-{
- return std::make_unique<ChunkingControllerWithFixedChunking>(Parameters);
-}
-
-std::unique_ptr<ChunkingController>
-CreateChunkingController(std::string_view Name, CbObjectView Parameters)
-{
- if (Name == BasicChunkingController::Name)
- {
- return CreateBasicChunkingController(Parameters);
- }
- else if (Name == ChunkingControllerWithFixedChunking::Name)
- {
- return CreateChunkingControllerWithFixedChunking(Parameters);
- }
- return {};
-}
-
-} // namespace zen
diff --git a/src/zenutil/include/zenutil/chunkblock.h b/src/zenutil/include/zenutil/chunkblock.h
deleted file mode 100644
index 277580c74..000000000
--- a/src/zenutil/include/zenutil/chunkblock.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#pragma once
-
-#include <zencore/iohash.h>
-
-#include <zencore/compactbinary.h>
-#include <zencore/compress.h>
-
-#include <optional>
-#include <vector>
-
-namespace zen {
-
-struct ThinChunkBlockDescription
-{
- IoHash BlockHash;
- std::vector<IoHash> ChunkRawHashes;
-};
-
-struct ChunkBlockDescription : public ThinChunkBlockDescription
-{
- uint64_t HeaderSize;
- std::vector<uint32_t> ChunkRawLengths;
- std::vector<uint32_t> ChunkCompressedLengths;
-};
-
-std::vector<ChunkBlockDescription> ParseChunkBlockDescriptionList(const CbObjectView& BlocksObject);
-ChunkBlockDescription ParseChunkBlockDescription(const CbObjectView& BlockObject);
-CbObject BuildChunkBlockDescription(const ChunkBlockDescription& Block, CbObjectView MetaData);
-ChunkBlockDescription GetChunkBlockDescription(const SharedBuffer& BlockPayload, const IoHash& RawHash);
-typedef std::function<std::pair<uint64_t, CompressedBuffer>(const IoHash& RawHash)> FetchChunkFunc;
-
-CompressedBuffer GenerateChunkBlock(std::vector<std::pair<IoHash, FetchChunkFunc>>&& FetchChunks, ChunkBlockDescription& OutBlock);
-bool IterateChunkBlock(const SharedBuffer& BlockPayload,
- std::function<void(CompressedBuffer&& Chunk, const IoHash& AttachmentHash)> Visitor,
- uint64_t& OutHeaderSize);
-std::vector<uint32_t> ReadChunkBlockHeader(const MemoryView BlockView, uint64_t& OutHeaderSize);
-
-} // namespace zen
diff --git a/src/zenutil/include/zenutil/chunkedcontent.h b/src/zenutil/include/zenutil/chunkedcontent.h
deleted file mode 100644
index 306a5d990..000000000
--- a/src/zenutil/include/zenutil/chunkedcontent.h
+++ /dev/null
@@ -1,288 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#pragma once
-
-#include <zencore/compactbinary.h>
-#include <zencore/compactbinarybuilder.h>
-#include <zencore/iohash.h>
-
-#include <filesystem>
-#include <vector>
-
-ZEN_THIRD_PARTY_INCLUDES_START
-#include <tsl/robin_map.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-
-namespace zen {
-
-class CbWriter;
-class ChunkingController;
-class WorkerThreadPool;
-
-enum class SourcePlatform
-{
- Windows = 0,
- Linux = 1,
- MacOS = 2,
- _Count
-};
-
-std::string_view ToString(SourcePlatform Platform);
-SourcePlatform FromString(std::string_view Platform, SourcePlatform Default);
-SourcePlatform GetSourceCurrentPlatform();
-
-struct FolderContent
-{
- SourcePlatform Platform = GetSourceCurrentPlatform();
- std::vector<std::filesystem::path> Paths;
- std::vector<uint64_t> RawSizes;
- std::vector<uint32_t> Attributes;
- std::vector<uint64_t> ModificationTicks;
-
- bool operator==(const FolderContent& Rhs) const;
-
- bool AreKnownFilesEqual(const FolderContent& Rhs) const;
- void UpdateState(const FolderContent& Rhs, std::vector<uint32_t>& PathIndexesOufOfDate);
- static bool AreFileAttributesEqual(const uint32_t Lhs, const uint32_t Rhs);
-};
-
-FolderContent GetUpdatedContent(const FolderContent& Old,
- const FolderContent& New,
- std::vector<std::filesystem::path>& OutDeletedPathIndexes);
-
-void SaveFolderContentToCompactBinary(const FolderContent& Content, CbWriter& Output);
-FolderContent LoadFolderContentToCompactBinary(CbObjectView Input);
-
-struct GetFolderContentStatistics
-{
- std::atomic<uint64_t> FoundFileCount = 0;
- std::atomic<uint64_t> FoundFileByteCount = 0;
- std::atomic<uint64_t> AcceptedFileCount = 0;
- std::atomic<uint64_t> AcceptedFileByteCount = 0;
- uint64_t ElapsedWallTimeUS = 0;
-};
-
-FolderContent GetFolderContent(GetFolderContentStatistics& Stats,
- const std::filesystem::path& RootPath,
- std::function<bool(const std::string_view& RelativePath)>&& AcceptDirectory,
- std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& AcceptFile,
- WorkerThreadPool& WorkerPool,
- int32_t UpdateIntervalMS,
- std::function<void(bool IsAborted, std::ptrdiff_t PendingWork)>&& UpdateCallback,
- std::atomic<bool>& AbortFlag);
-
-struct ChunkedContentData
-{
- // To describe one asset with a particular RawHash, find the index of the hash in SequenceRawHashes
- // ChunkCounts for that index will be the number of indexes in ChunkOrders that describe
- // the sequence of chunks required to reconstruct the asset.
- // Offset into ChunkOrders is based on how many entries in ChunkOrders the previous [n - 1] SequenceRawHashes uses
- std::vector<IoHash> SequenceRawHashes; // Raw hash for Chunk sequence
- std::vector<uint32_t> ChunkCounts; // Chunk count of ChunkOrder for SequenceRawHashes[n]
- std::vector<uint32_t> ChunkOrders; // Chunk sequence indexed into ChunkHashes, ChunkCounts[n] indexes per SequenceRawHashes[n]
- std::vector<IoHash> ChunkHashes; // Unique chunk hashes
- std::vector<uint64_t> ChunkRawSizes; // Unique chunk raw size for ChunkHash[n]
-};
-
-struct ChunkedFolderContent
-{
- SourcePlatform Platform = GetSourceCurrentPlatform();
- std::vector<std::filesystem::path> Paths;
- std::vector<uint64_t> RawSizes;
- std::vector<uint32_t> Attributes;
- std::vector<IoHash> RawHashes;
- ChunkedContentData ChunkedContent;
-};
-
-struct ChunkedContentLookup
-{
- struct ChunkSequenceLocation
- {
- uint32_t SequenceIndex = (uint32_t)-1;
- uint64_t Offset = (uint64_t)-1;
- };
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex;
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceIndex;
- std::vector<uint32_t> SequenceIndexChunkOrderOffset;
- std::vector<ChunkSequenceLocation> ChunkSequenceLocations;
- std::vector<size_t>
- ChunkSequenceLocationOffset; // ChunkSequenceLocations[ChunkLocationOffset[ChunkIndex]] -> start of sources for ChunkIndex
- std::vector<uint32_t> ChunkSequenceLocationCounts; // ChunkSequenceLocationCounts[ChunkIndex] count of chunk locations for ChunkIndex
- std::vector<uint32_t> SequenceIndexFirstPathIndex; // SequenceIndexFirstPathIndex[SequenceIndex] -> first path index with that RawHash
- std::vector<uint32_t> PathExtensionHash;
-};
-
-void SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Content, CbWriter& Output);
-ChunkedFolderContent LoadChunkedFolderContentToCompactBinary(CbObjectView Input);
-
-ChunkedFolderContent MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span<const ChunkedFolderContent> Overlays);
-ChunkedFolderContent DeletePathsFromChunkedContent(const ChunkedFolderContent& Base,
- const ChunkedContentLookup& BaseContentLookup,
- std::span<const std::filesystem::path> DeletedPaths);
-ChunkedFolderContent DeletePathsFromChunkedContent(const ChunkedFolderContent& Base, std::span<const std::filesystem::path> DeletedPaths);
-
-struct ChunkingStatistics
-{
- std::atomic<uint64_t> FilesProcessed = 0;
- std::atomic<uint64_t> FilesChunked = 0;
- std::atomic<uint64_t> BytesHashed = 0;
- std::atomic<uint64_t> UniqueChunksFound = 0;
- std::atomic<uint64_t> UniqueSequencesFound = 0;
- std::atomic<uint64_t> UniqueBytesFound = 0;
- uint64_t ElapsedWallTimeUS = 0;
-};
-
-ChunkedFolderContent ChunkFolderContent(ChunkingStatistics& Stats,
- WorkerThreadPool& WorkerPool,
- const std::filesystem::path& RootPath,
- const FolderContent& Content,
- const ChunkingController& InChunkingController,
- int32_t UpdateIntervalMS,
- std::function<void(bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork)>&& UpdateCallback,
- std::atomic<bool>& AbortFlag,
- std::atomic<bool>& PauseFlag);
-
-ChunkedContentLookup BuildChunkedContentLookup(const ChunkedFolderContent& Content);
-
-inline std::pair<size_t, uint32_t>
-GetChunkSequenceLocationRange(const ChunkedContentLookup& Lookup, uint32_t ChunkIndex)
-{
- return std::make_pair(Lookup.ChunkSequenceLocationOffset[ChunkIndex], Lookup.ChunkSequenceLocationCounts[ChunkIndex]);
-}
-
-inline std::span<const ChunkedContentLookup::ChunkSequenceLocation>
-GetChunkSequenceLocations(const ChunkedContentLookup& Lookup, uint32_t ChunkIndex)
-{
- std::pair<size_t, uint32_t> Range = GetChunkSequenceLocationRange(Lookup, ChunkIndex);
- return std::span<const ChunkedContentLookup::ChunkSequenceLocation>(Lookup.ChunkSequenceLocations).subspan(Range.first, Range.second);
-}
-
-inline uint32_t
-GetSequenceIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& RawHash)
-{
- return Lookup.RawHashToSequenceIndex.at(RawHash);
-}
-
-inline uint32_t
-GetChunkIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& RawHash)
-{
- return Lookup.RawHashToSequenceIndex.at(RawHash);
-}
-
-inline uint32_t
-GetFirstPathIndexForSeqeuenceIndex(const ChunkedContentLookup& Lookup, const uint32_t SequenceIndex)
-{
- return Lookup.SequenceIndexFirstPathIndex[SequenceIndex];
-}
-
-inline uint32_t
-GetFirstPathIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& RawHash)
-{
- const uint32_t SequenceIndex = GetSequenceIndexForRawHash(Lookup, RawHash);
- return GetFirstPathIndexForSeqeuenceIndex(Lookup, SequenceIndex);
-}
-
-namespace compactbinary_helpers {
- template<typename Type>
- void WriteArray(std::span<const Type> Values, std::string_view ArrayName, CbWriter& Output)
- {
- Output.BeginArray(ArrayName);
- for (const Type Value : Values)
- {
- Output << Value;
- }
- Output.EndArray();
- }
-
- template<typename Type>
- void WriteArray(const std::vector<Type>& Values, std::string_view ArrayName, CbWriter& Output)
- {
- WriteArray(std::span<const Type>(Values), ArrayName, Output);
- }
-
- template<>
- inline void WriteArray(std::span<const std::filesystem::path> Values, std::string_view ArrayName, CbWriter& Output)
- {
- Output.BeginArray(ArrayName);
- for (const std::filesystem::path& Path : Values)
- {
- Output.AddString((const char*)Path.generic_u8string().c_str());
- }
- Output.EndArray();
- }
-
- template<>
- inline void WriteArray(const std::vector<std::filesystem::path>& Values, std::string_view ArrayName, CbWriter& Output)
- {
- WriteArray(std::span<const std::filesystem::path>(Values), ArrayName, Output);
- }
-
- inline void WriteBinaryAttachmentArray(std::span<const IoHash> Values, std::string_view ArrayName, CbWriter& Output)
- {
- Output.BeginArray(ArrayName);
- for (const IoHash& Hash : Values)
- {
- Output.AddBinaryAttachment(Hash);
- }
- Output.EndArray();
- }
-
- inline void WriteBinaryAttachmentArray(const std::vector<IoHash>& Values, std::string_view ArrayName, CbWriter& Output)
- {
- WriteArray(std::span<const IoHash>(Values), ArrayName, Output);
- }
-
- inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector<uint32_t>& Result)
- {
- CbArrayView Array = Input[ArrayName].AsArrayView();
- Result.reserve(Array.Num());
- for (CbFieldView ItemView : Array)
- {
- Result.push_back(ItemView.AsUInt32());
- }
- }
-
- inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector<uint64_t>& Result)
- {
- CbArrayView Array = Input[ArrayName].AsArrayView();
- Result.reserve(Array.Num());
- for (CbFieldView ItemView : Array)
- {
- Result.push_back(ItemView.AsUInt64());
- }
- }
-
- inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector<std::filesystem::path>& Result)
- {
- CbArrayView Array = Input[ArrayName].AsArrayView();
- Result.reserve(Array.Num());
- for (CbFieldView ItemView : Array)
- {
- std::u8string_view U8Path = ItemView.AsU8String();
- Result.push_back(std::filesystem::path(U8Path));
- }
- }
-
- inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector<IoHash>& Result)
- {
- CbArrayView Array = Input[ArrayName].AsArrayView();
- Result.reserve(Array.Num());
- for (CbFieldView ItemView : Array)
- {
- Result.push_back(ItemView.AsHash());
- }
- }
-
- inline void ReadBinaryAttachmentArray(std::string_view ArrayName, CbObjectView Input, std::vector<IoHash>& Result)
- {
- CbArrayView Array = Input[ArrayName].AsArrayView();
- Result.reserve(Array.Num());
- for (CbFieldView ItemView : Array)
- {
- Result.push_back(ItemView.AsBinaryAttachment());
- }
- }
-
-} // namespace compactbinary_helpers
-
-} // namespace zen
diff --git a/src/zenutil/include/zenutil/chunkedfile.h b/src/zenutil/include/zenutil/chunkedfile.h
deleted file mode 100644
index 4cec80fdb..000000000
--- a/src/zenutil/include/zenutil/chunkedfile.h
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#pragma once
-
-#include <zencore/iobuffer.h>
-#include <zencore/iohash.h>
-#include <zencore/zencore.h>
-
-#include <functional>
-#include <vector>
-
-namespace zen {
-
-class BasicFile;
-
-struct ChunkedInfo
-{
- uint64_t RawSize = 0;
- IoHash RawHash;
- std::vector<uint32_t> ChunkSequence;
- std::vector<IoHash> ChunkHashes;
-};
-
-struct ChunkSource
-{
- uint64_t Offset; // 8
- uint32_t Size; // 4
-};
-
-struct ChunkedInfoWithSource
-{
- ChunkedInfo Info;
- std::vector<ChunkSource> ChunkSources;
-};
-
-struct ChunkedParams
-{
- bool UseThreshold = true;
- size_t MinSize = (2u * 1024u) - 128u;
- size_t MaxSize = (16u * 1024u);
- size_t AvgSize = (3u * 1024u);
-};
-
-static const ChunkedParams UShaderByteCodeParams = {.UseThreshold = true, .MinSize = 17280, .MaxSize = 139264, .AvgSize = 36340};
-
-ChunkedInfoWithSource ChunkData(BasicFile& RawData,
- uint64_t Offset,
- uint64_t Size,
- ChunkedParams Params = {},
- std::atomic<uint64_t>* BytesProcessed = nullptr,
- std::atomic<bool>* AbortFlag = nullptr);
-void Reconstruct(const ChunkedInfo& Info,
- const std::filesystem::path& TargetPath,
- std::function<IoBuffer(const IoHash& ChunkHash)> GetChunk);
-IoBuffer SerializeChunkedInfo(const ChunkedInfo& Info);
-ChunkedInfo DeserializeChunkedInfo(IoBuffer& Buffer);
-
-void chunkedfile_forcelink();
-} // namespace zen
diff --git a/src/zenutil/include/zenutil/chunkingcontroller.h b/src/zenutil/include/zenutil/chunkingcontroller.h
deleted file mode 100644
index 315502265..000000000
--- a/src/zenutil/include/zenutil/chunkingcontroller.h
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#pragma once
-
-#include <zencore/compactbinary.h>
-
-#include <zenutil/chunkedfile.h>
-
-#include <atomic>
-#include <filesystem>
-
-namespace zen {
-
-const std::vector<std::string> DefaultChunkingExcludeExtensions =
- {".exe", ".dll", ".pdb", ".self", ".mp4", ".zip", ".7z", ".bzip", ".rar", ".gzip"};
-const std::vector<std::string> DefaultFixedChunkingExtensions = {".apk", ".nsp", ".xvc", ".pkg", ".dmg", ".ipa"};
-const bool DefaultChunkingExcludeElfFiles = true;
-const bool DefaultChunkingExcludeMachOFiles = true;
-
-const ChunkedParams DefaultChunkedParams = {.MinSize = ((8u * 1u) * 1024u) - 128u,
- .MaxSize = 128u * 1024u,
- .AvgSize = ((8u * 4u) * 1024u) + 128u};
-
-const size_t DefaultChunkingFileSizeLimit = DefaultChunkedParams.MaxSize;
-
-const uint64_t DefaultFixedChunkingChunkSize = 32u * 1024u * 1024u;
-const uint64_t DefaultMinSizeForFixedChunking = DefaultFixedChunkingChunkSize * 8u;
-
-struct ChunkedInfoWithSource;
-
-class ChunkingController
-{
-public:
- virtual ~ChunkingController() {}
-
- // Return true if the input file was processed. If true is returned OutChunked will contain the chunked info
- virtual bool ProcessFile(const std::filesystem::path& InputPath,
- uint64_t RawSize,
- ChunkedInfoWithSource& OutChunked,
- std::atomic<uint64_t>& BytesProcessed,
- std::atomic<bool>& AbortFlag) const = 0;
- virtual std::string_view GetName() const = 0;
- virtual CbObject GetParameters() const = 0;
-};
-
-struct BasicChunkingControllerSettings
-{
- std::vector<std::string> ExcludeExtensions = DefaultChunkingExcludeExtensions;
- bool ExcludeElfFiles = DefaultChunkingExcludeElfFiles;
- bool ExcludeMachOFiles = DefaultChunkingExcludeMachOFiles;
- uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit;
- ChunkedParams ChunkingParams = DefaultChunkedParams;
-};
-
-std::unique_ptr<ChunkingController> CreateBasicChunkingController(const BasicChunkingControllerSettings& Settings);
-std::unique_ptr<ChunkingController> CreateBasicChunkingController(CbObjectView Parameters);
-
-struct ChunkingControllerWithFixedChunkingSettings
-{
- std::vector<std::string> FixedChunkingExtensions = DefaultFixedChunkingExtensions;
- std::vector<std::string> ExcludeExtensions = DefaultChunkingExcludeExtensions;
- bool ExcludeElfFiles = DefaultChunkingExcludeElfFiles;
- bool ExcludeMachOFiles = DefaultChunkingExcludeMachOFiles;
- uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit;
- ChunkedParams ChunkingParams = DefaultChunkedParams;
- uint64_t FixedChunkingChunkSize = DefaultFixedChunkingChunkSize;
- uint64_t MinSizeForFixedChunking = DefaultMinSizeForFixedChunking;
-};
-
-std::unique_ptr<ChunkingController> CreateChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Setting);
-std::unique_ptr<ChunkingController> CreateChunkingControllerWithFixedChunking(CbObjectView Parameters);
-
-std::unique_ptr<ChunkingController> CreateChunkingController(std::string_view Name, CbObjectView Parameters);
-
-} // namespace zen
diff --git a/src/zenutil/zenutil.cpp b/src/zenutil/zenutil.cpp
index 37b229c49..88be8a244 100644
--- a/src/zenutil/zenutil.cpp
+++ b/src/zenutil/zenutil.cpp
@@ -6,7 +6,6 @@
# include <zenutil/cache/cacherequests.h>
# include <zenutil/cache/rpcrecording.h>
-# include <zenutil/chunkedfile.h>
# include <zenutil/commandlineoptions.h>
# include <zenutil/parallelwork.h>
# include <zenutil/wildcard.h>
@@ -19,7 +18,6 @@ zenutil_forcelinktests()
cachepolicy_forcelink();
cache::rpcrecord_forcelink();
cacherequests_forcelink();
- chunkedfile_forcelink();
commandlineoptions_forcelink();
parallellwork_forcelink();
wildcard_forcelink();