diff options
| author | Dan Engelbrecht <[email protected]> | 2025-10-03 11:49:14 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2025-10-03 11:49:14 +0200 |
| commit | faf0b7c9b6a08b095f8dc895904f4f7d3f30dcde (patch) | |
| tree | 2bcd09fe17af6f25108fd05578e7eda6a827d8ec /src/zenutil | |
| parent | cache RPC replay fixes (minor) (#544) (diff) | |
| download | zen-faf0b7c9b6a08b095f8dc895904f4f7d3f30dcde.tar.xz zen-faf0b7c9b6a08b095f8dc895904f4f7d3f30dcde.zip | |
move chunking code to zenremotestore lib (#545)
Diffstat (limited to 'src/zenutil')
| -rw-r--r-- | src/zenutil/chunkblock.cpp | 257 | ||||
| -rw-r--r-- | src/zenutil/chunkedcontent.cpp | 953 | ||||
| -rw-r--r-- | src/zenutil/chunkedfile.cpp | 525 | ||||
| -rw-r--r-- | src/zenutil/chunking.cpp | 383 | ||||
| -rw-r--r-- | src/zenutil/chunking.h | 56 | ||||
| -rw-r--r-- | src/zenutil/chunkingcontroller.cpp | 359 | ||||
| -rw-r--r-- | src/zenutil/include/zenutil/chunkblock.h | 40 | ||||
| -rw-r--r-- | src/zenutil/include/zenutil/chunkedcontent.h | 288 | ||||
| -rw-r--r-- | src/zenutil/include/zenutil/chunkedfile.h | 59 | ||||
| -rw-r--r-- | src/zenutil/include/zenutil/chunkingcontroller.h | 75 | ||||
| -rw-r--r-- | src/zenutil/zenutil.cpp | 2 |
11 files changed, 0 insertions, 2997 deletions
diff --git a/src/zenutil/chunkblock.cpp b/src/zenutil/chunkblock.cpp deleted file mode 100644 index abfc0fb63..000000000 --- a/src/zenutil/chunkblock.cpp +++ /dev/null @@ -1,257 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#include <zenutil/chunkblock.h> - -#include <zencore/compactbinarybuilder.h> -#include <zencore/fmtutils.h> -#include <zencore/logging.h> - -#include <vector> - -namespace zen { - -using namespace std::literals; - -ChunkBlockDescription -ParseChunkBlockDescription(const CbObjectView& BlockObject) -{ - ChunkBlockDescription Result; - Result.BlockHash = BlockObject["rawHash"sv].AsHash(); - if (Result.BlockHash != IoHash::Zero) - { - Result.HeaderSize = BlockObject["headerSize"sv].AsUInt64(); - CbArrayView ChunksArray = BlockObject["rawHashes"sv].AsArrayView(); - Result.ChunkRawHashes.reserve(ChunksArray.Num()); - for (CbFieldView ChunkView : ChunksArray) - { - Result.ChunkRawHashes.push_back(ChunkView.AsHash()); - } - - CbArrayView ChunkRawLengthsArray = BlockObject["chunkRawLengths"sv].AsArrayView(); - Result.ChunkRawLengths.reserve(ChunkRawLengthsArray.Num()); - for (CbFieldView ChunkView : ChunkRawLengthsArray) - { - Result.ChunkRawLengths.push_back(ChunkView.AsUInt32()); - } - - CbArrayView ChunkCompressedLengthsArray = BlockObject["chunkCompressedLengths"sv].AsArrayView(); - Result.ChunkCompressedLengths.reserve(ChunkCompressedLengthsArray.Num()); - for (CbFieldView ChunkView : ChunkCompressedLengthsArray) - { - Result.ChunkCompressedLengths.push_back(ChunkView.AsUInt32()); - } - } - return Result; -} - -std::vector<ChunkBlockDescription> -ParseChunkBlockDescriptionList(const CbObjectView& BlocksObject) -{ - if (!BlocksObject) - { - return {}; - } - std::vector<ChunkBlockDescription> Result; - CbArrayView Blocks = BlocksObject["blocks"sv].AsArrayView(); - Result.reserve(Blocks.Num()); - for (CbFieldView BlockView : Blocks) - { - CbObjectView BlockObject = BlockView.AsObjectView(); - Result.emplace_back(ParseChunkBlockDescription(BlockObject)); - } - return Result; -} - -CbObject -BuildChunkBlockDescription(const ChunkBlockDescription& Block, CbObjectView MetaData) -{ - ZEN_ASSERT(Block.BlockHash != IoHash::Zero); - ZEN_ASSERT(Block.HeaderSize > 0); - ZEN_ASSERT(Block.ChunkRawLengths.size() == Block.ChunkRawHashes.size()); - ZEN_ASSERT(Block.ChunkCompressedLengths.size() == Block.ChunkRawHashes.size()); - - CbObjectWriter Writer; - Writer.AddHash("rawHash"sv, Block.BlockHash); - Writer.AddInteger("headerSize"sv, Block.HeaderSize); - Writer.BeginArray("rawHashes"sv); - { - for (const IoHash& ChunkHash : Block.ChunkRawHashes) - { - Writer.AddHash(ChunkHash); - } - } - Writer.EndArray(); - - Writer.BeginArray("chunkRawLengths"); - { - for (uint32_t ChunkSize : Block.ChunkRawLengths) - { - Writer.AddInteger(ChunkSize); - } - } - Writer.EndArray(); - - Writer.BeginArray("chunkCompressedLengths"); - { - for (uint32_t ChunkSize : Block.ChunkCompressedLengths) - { - Writer.AddInteger(ChunkSize); - } - } - Writer.EndArray(); - - Writer.AddObject("metadata", MetaData); - - return Writer.Save(); -} - -ChunkBlockDescription -GetChunkBlockDescription(const SharedBuffer& BlockPayload, const IoHash& RawHash) -{ - ChunkBlockDescription BlockDescription = {{.BlockHash = IoHash::HashBuffer(BlockPayload)}}; - if (BlockDescription.BlockHash != RawHash) - { - throw std::runtime_error(fmt::format("Block {} content hash {} does not match block hash", RawHash, BlockDescription.BlockHash)); - } - if (IterateChunkBlock( - BlockPayload, - [&BlockDescription, RawHash](CompressedBuffer&& Chunk, const IoHash& AttachmentHash) { - if (CompositeBuffer Decompressed = Chunk.DecompressToComposite(); Decompressed) - { - IoHash ChunkHash = IoHash::HashBuffer(Decompressed.Flatten()); - if (ChunkHash != AttachmentHash) - { - throw std::runtime_error( - fmt::format("Chunk {} in block {} content hash {} does not match chunk", AttachmentHash, RawHash, ChunkHash)); - } - BlockDescription.ChunkRawHashes.push_back(AttachmentHash); - BlockDescription.ChunkRawLengths.push_back(gsl::narrow<uint32_t>(Decompressed.GetSize())); - BlockDescription.ChunkCompressedLengths.push_back(gsl::narrow<uint32_t>(Chunk.GetCompressedSize())); - } - else - { - throw std::runtime_error(fmt::format("Chunk {} in block {} is not a compressed buffer", AttachmentHash, RawHash)); - } - }, - BlockDescription.HeaderSize)) - { - return BlockDescription; - } - else - { - throw std::runtime_error(fmt::format("Block {} is malformed", RawHash)); - } -} - -CompressedBuffer -GenerateChunkBlock(std::vector<std::pair<IoHash, FetchChunkFunc>>&& FetchChunks, ChunkBlockDescription& OutBlock) -{ - const size_t ChunkCount = FetchChunks.size(); - - std::vector<SharedBuffer> ChunkSegments; - ChunkSegments.resize(1); - ChunkSegments.reserve(1 + ChunkCount); - OutBlock.ChunkRawHashes.reserve(ChunkCount); - OutBlock.ChunkRawLengths.reserve(ChunkCount); - OutBlock.ChunkCompressedLengths.reserve(ChunkCount); - { - IoBuffer TempBuffer(ChunkCount * 9); - MutableMemoryView View = TempBuffer.GetMutableView(); - uint8_t* BufferStartPtr = reinterpret_cast<uint8_t*>(View.GetData()); - uint8_t* BufferEndPtr = BufferStartPtr; - BufferEndPtr += WriteVarUInt(gsl::narrow<uint64_t>(ChunkCount), BufferEndPtr); - for (const auto& It : FetchChunks) - { - std::pair<uint64_t, CompressedBuffer> Chunk = It.second(It.first); - uint64_t ChunkSize = 0; - std::span<const SharedBuffer> Segments = Chunk.second.GetCompressed().GetSegments(); - for (const SharedBuffer& Segment : Segments) - { - ZEN_ASSERT(Segment.IsOwned()); - ChunkSize += Segment.GetSize(); - ChunkSegments.push_back(Segment); - } - BufferEndPtr += WriteVarUInt(ChunkSize, BufferEndPtr); - OutBlock.ChunkRawHashes.push_back(It.first); - OutBlock.ChunkRawLengths.push_back(gsl::narrow<uint32_t>(Chunk.first)); - OutBlock.ChunkCompressedLengths.push_back(gsl::narrow<uint32_t>(ChunkSize)); - } - ZEN_ASSERT(BufferEndPtr <= View.GetDataEnd()); - ptrdiff_t TempBufferLength = std::distance(BufferStartPtr, BufferEndPtr); - ChunkSegments[0] = SharedBuffer(IoBuffer(TempBuffer, 0, gsl::narrow<size_t>(TempBufferLength))); - OutBlock.HeaderSize = TempBufferLength; - } - CompressedBuffer CompressedBlock = - CompressedBuffer::Compress(CompositeBuffer(std::move(ChunkSegments)), OodleCompressor::Mermaid, OodleCompressionLevel::None); - OutBlock.BlockHash = CompressedBlock.DecodeRawHash(); - return CompressedBlock; -} - -std::vector<uint32_t> -ReadChunkBlockHeader(const MemoryView BlockView, uint64_t& OutHeaderSize) -{ - const uint8_t* ReadPtr = reinterpret_cast<const uint8_t*>(BlockView.GetData()); - uint32_t NumberSize; - uint64_t ChunkCount = ReadVarUInt(ReadPtr, NumberSize); - ReadPtr += NumberSize; - std::vector<uint32_t> ChunkSizes; - ChunkSizes.reserve(ChunkCount); - while (ChunkCount--) - { - if (ReadPtr >= BlockView.GetDataEnd()) - { - throw std::runtime_error("Invalid block header, block data ended unexpectedly"); - } - uint64_t ChunkSize = ReadVarUInt(ReadPtr, NumberSize); - if (ChunkSize > std::numeric_limits<uint32_t>::max()) - { - throw std::runtime_error("Invalid block header, header data is corrupt"); - } - if (ChunkSize < 1) - { - throw std::runtime_error("Invalid block header, header data is corrupt"); - } - ChunkSizes.push_back(gsl::narrow<uint32_t>(ChunkSize)); - ReadPtr += NumberSize; - } - uint64_t Offset = std::distance((const uint8_t*)BlockView.GetData(), ReadPtr); - OutHeaderSize = Offset; - return ChunkSizes; -} - -bool -IterateChunkBlock(const SharedBuffer& BlockPayload, - std::function<void(CompressedBuffer&& Chunk, const IoHash& AttachmentHash)> Visitor, - uint64_t& OutHeaderSize) -{ - ZEN_ASSERT(BlockPayload); - if (BlockPayload.GetSize() < 1) - { - return false; - } - - MemoryView BlockView = BlockPayload.GetView(); - - std::vector<uint32_t> ChunkSizes = ReadChunkBlockHeader(BlockView, OutHeaderSize); - uint64_t Offset = OutHeaderSize; - OutHeaderSize = Offset; - for (uint64_t ChunkSize : ChunkSizes) - { - IoBuffer Chunk(BlockPayload.AsIoBuffer(), Offset, ChunkSize); - IoHash AttachmentRawHash; - uint64_t AttachmentRawSize; - CompressedBuffer CompressedChunk = CompressedBuffer::FromCompressed(SharedBuffer(Chunk), AttachmentRawHash, AttachmentRawSize); - ZEN_ASSERT_SLOW(IoHash::HashBuffer(CompressedChunk.DecompressToComposite()) == AttachmentRawHash); - if (!CompressedChunk) - { - ZEN_ERROR("Invalid chunk in block"); - return false; - } - Visitor(std::move(CompressedChunk), AttachmentRawHash); - Offset += ChunkSize; - ZEN_ASSERT(Offset <= BlockView.GetSize()); - } - return true; -}; - -} // namespace zen diff --git a/src/zenutil/chunkedcontent.cpp b/src/zenutil/chunkedcontent.cpp deleted file mode 100644 index 757bcfae5..000000000 --- a/src/zenutil/chunkedcontent.cpp +++ /dev/null @@ -1,953 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#include <zenutil/chunkedcontent.h> - -#include <zencore/filesystem.h> -#include <zencore/fmtutils.h> -#include <zencore/logging.h> -#include <zencore/scopeguard.h> -#include <zencore/timer.h> -#include <zencore/trace.h> - -#include <zenutil/chunkedfile.h> -#include <zenutil/chunkingcontroller.h> -#include <zenutil/parallelwork.h> -#include <zenutil/workerpools.h> - -ZEN_THIRD_PARTY_INCLUDES_START -#include <tsl/robin_set.h> -#include <gsl/gsl-lite.hpp> -ZEN_THIRD_PARTY_INCLUDES_END - -namespace zen { - -using namespace std::literals; - -namespace { - void AddChunkSequence(ChunkingStatistics& Stats, - ChunkedContentData& InOutChunkedContent, - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex, - const IoHash& RawHash, - std::span<const uint32_t> ChunkSequence, - std::span<const IoHash> ChunkHashes, - std::span<const uint64_t> ChunkRawSizes) - { - ZEN_ASSERT(ChunkHashes.size() == ChunkRawSizes.size()); - InOutChunkedContent.ChunkCounts.push_back(gsl::narrow<uint32_t>(ChunkSequence.size())); - InOutChunkedContent.ChunkOrders.reserve(InOutChunkedContent.ChunkOrders.size() + ChunkSequence.size()); - - for (uint32_t ChunkedSequenceIndex : ChunkSequence) - { - const IoHash& ChunkHash = ChunkHashes[ChunkedSequenceIndex]; - if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end()) - { - uint32_t ChunkIndex = gsl::narrow<uint32_t>(It->second); - InOutChunkedContent.ChunkOrders.push_back(ChunkIndex); - } - else - { - uint32_t ChunkIndex = gsl::narrow<uint32_t>(InOutChunkedContent.ChunkHashes.size()); - ChunkHashToChunkIndex.insert_or_assign(ChunkHash, ChunkIndex); - InOutChunkedContent.ChunkHashes.push_back(ChunkHash); - InOutChunkedContent.ChunkRawSizes.push_back(ChunkRawSizes[ChunkedSequenceIndex]); - InOutChunkedContent.ChunkOrders.push_back(ChunkIndex); - Stats.UniqueChunksFound++; - Stats.UniqueBytesFound += ChunkRawSizes[ChunkedSequenceIndex]; - } - } - InOutChunkedContent.SequenceRawHashes.push_back(RawHash); - Stats.UniqueSequencesFound++; - } - - void AddChunkSequence(ChunkingStatistics& Stats, - ChunkedContentData& InOutChunkedContent, - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex, - const IoHash& RawHash, - const uint64_t RawSize) - { - InOutChunkedContent.ChunkCounts.push_back(1); - - if (auto It = ChunkHashToChunkIndex.find(RawHash); It != ChunkHashToChunkIndex.end()) - { - uint32_t ChunkIndex = gsl::narrow<uint32_t>(It->second); - InOutChunkedContent.ChunkOrders.push_back(ChunkIndex); - } - else - { - uint32_t ChunkIndex = gsl::narrow<uint32_t>(InOutChunkedContent.ChunkHashes.size()); - ChunkHashToChunkIndex.insert_or_assign(RawHash, ChunkIndex); - InOutChunkedContent.ChunkHashes.push_back(RawHash); - InOutChunkedContent.ChunkRawSizes.push_back(RawSize); - InOutChunkedContent.ChunkOrders.push_back(ChunkIndex); - Stats.UniqueChunksFound++; - Stats.UniqueBytesFound += RawSize; - } - InOutChunkedContent.SequenceRawHashes.push_back(RawHash); - Stats.UniqueSequencesFound++; - } - - IoHash HashOneFile(ChunkingStatistics& Stats, - const ChunkingController& InChunkingController, - ChunkedFolderContent& OutChunkedContent, - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex, - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& RawHashToSequenceRawHashIndex, - RwLock& Lock, - const std::filesystem::path& FolderPath, - uint32_t PathIndex, - std::atomic<bool>& AbortFlag) - { - ZEN_TRACE_CPU("ChunkFolderContent"); - - const uint64_t RawSize = OutChunkedContent.RawSizes[PathIndex]; - const std::filesystem::path& Path = OutChunkedContent.Paths[PathIndex]; - - if (RawSize == 0) - { - return IoHash::Zero; - } - else - { - ChunkedInfoWithSource Chunked; - const bool DidChunking = - InChunkingController.ProcessFile((FolderPath / Path).make_preferred(), RawSize, Chunked, Stats.BytesHashed, AbortFlag); - if (DidChunking) - { - Lock.WithExclusiveLock([&]() { - if (!RawHashToSequenceRawHashIndex.contains(Chunked.Info.RawHash)) - { - RawHashToSequenceRawHashIndex.insert( - {Chunked.Info.RawHash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())}); - std::vector<uint64_t> ChunkSizes; - ChunkSizes.reserve(Chunked.ChunkSources.size()); - for (const ChunkSource& Source : Chunked.ChunkSources) - { - ChunkSizes.push_back(Source.Size); - } - AddChunkSequence(Stats, - OutChunkedContent.ChunkedContent, - ChunkHashToChunkIndex, - Chunked.Info.RawHash, - Chunked.Info.ChunkSequence, - Chunked.Info.ChunkHashes, - ChunkSizes); - Stats.UniqueSequencesFound++; - } - }); - Stats.FilesChunked++; - return Chunked.Info.RawHash; - } - else - { - ZEN_TRACE_CPU("HashOnly"); - - IoBuffer Buffer = IoBufferBuilder::MakeFromFile((FolderPath / Path).make_preferred()); - if (Buffer.GetSize() != RawSize) - { - throw std::runtime_error(fmt::format("Failed opening file '{}' for hashing", FolderPath / Path)); - } - const IoHash Hash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed); - - Lock.WithExclusiveLock([&]() { - if (!RawHashToSequenceRawHashIndex.contains(Hash)) - { - RawHashToSequenceRawHashIndex.insert( - {Hash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())}); - AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Hash, RawSize); - Stats.UniqueSequencesFound++; - } - }); - return Hash; - } - } - } - - std::string PathCompareString(const std::filesystem::path& Path) { return ToLower(Path.generic_string()); } - -} // namespace - -std::string_view FolderContentSourcePlatformNames[(size_t)SourcePlatform::_Count] = {"Windows"sv, "Linux"sv, "MacOS"sv}; - -std::string_view -ToString(SourcePlatform Platform) -{ - return FolderContentSourcePlatformNames[(size_t)Platform]; -} - -SourcePlatform -FromString(std::string_view Platform, SourcePlatform Default) -{ - for (size_t Index = 0; Index < (size_t)SourcePlatform::_Count; Index++) - { - if (Platform == FolderContentSourcePlatformNames[Index]) - { - return (SourcePlatform)Index; - } - } - return Default; -} - -SourcePlatform -GetSourceCurrentPlatform() -{ -#if ZEN_PLATFORM_WINDOWS - return SourcePlatform::Windows; -#endif -#if ZEN_PLATFORM_MAC - return SourcePlatform::MacOS; -#endif -#if ZEN_PLATFORM_LINUX - return SourcePlatform::Linux; -#endif -} - -bool -FolderContent::AreFileAttributesEqual(const uint32_t Lhs, const uint32_t Rhs) -{ -#if ZEN_PLATFORM_WINDOWS - return (Lhs & 0xff) == (Rhs & 0xff); -#endif -#if ZEN_PLATFORM_MAC - return Lhs == Rhs; -#endif -#if ZEN_PLATFORM_LINUX - return Lhs == Rhs; -#endif -} - -bool -FolderContent::operator==(const FolderContent& Rhs) const -{ - if ((Platform == Rhs.Platform) && (RawSizes == Rhs.RawSizes) && (Attributes == Rhs.Attributes) && - (ModificationTicks == Rhs.ModificationTicks) && (Paths.size() == Rhs.Paths.size())) - { - size_t PathCount = 0; - for (size_t PathIndex = 0; PathIndex < PathCount; PathIndex++) - { - if (Paths[PathIndex].generic_string() != Rhs.Paths[PathIndex].generic_string()) - { - return false; - } - } - return true; - } - return false; -} - -bool -FolderContent::AreKnownFilesEqual(const FolderContent& Rhs) const -{ - ZEN_TRACE_CPU("FolderContent::AreKnownFilesEqual"); - tsl::robin_map<std::string, size_t> RhsPathToIndex; - const size_t RhsPathCount = Rhs.Paths.size(); - RhsPathToIndex.reserve(RhsPathCount); - for (size_t RhsPathIndex = 0; RhsPathIndex < RhsPathCount; RhsPathIndex++) - { - RhsPathToIndex.insert({Rhs.Paths[RhsPathIndex].generic_string(), RhsPathIndex}); - } - const size_t PathCount = Paths.size(); - for (size_t PathIndex = 0; PathIndex < PathCount; PathIndex++) - { - if (auto It = RhsPathToIndex.find(Paths[PathIndex].generic_string()); It != RhsPathToIndex.end()) - { - const size_t RhsPathIndex = It->second; - if ((RawSizes[PathIndex] != Rhs.RawSizes[RhsPathIndex]) || - (!AreFileAttributesEqual(Attributes[PathIndex], Rhs.Attributes[RhsPathIndex])) || - (ModificationTicks[PathIndex] != Rhs.ModificationTicks[RhsPathIndex])) - { - return false; - } - } - else - { - return false; - } - } - return true; -} - -void -FolderContent::UpdateState(const FolderContent& Rhs, std::vector<uint32_t>& OutPathIndexesOufOfDate) -{ - ZEN_TRACE_CPU("FolderContent::UpdateState"); - tsl::robin_map<std::string, uint32_t> RhsPathToIndex; - const uint32_t RhsPathCount = gsl::narrow<uint32_t>(Rhs.Paths.size()); - RhsPathToIndex.reserve(RhsPathCount); - for (uint32_t RhsPathIndex = 0; RhsPathIndex < RhsPathCount; RhsPathIndex++) - { - RhsPathToIndex.insert({Rhs.Paths[RhsPathIndex].generic_string(), RhsPathIndex}); - } - uint32_t PathCount = gsl::narrow<uint32_t>(Paths.size()); - for (uint32_t PathIndex = 0; PathIndex < PathCount;) - { - if (auto It = RhsPathToIndex.find(Paths[PathIndex].generic_string()); It != RhsPathToIndex.end()) - { - const uint32_t RhsPathIndex = It->second; - - if ((RawSizes[PathIndex] != Rhs.RawSizes[RhsPathIndex]) || - (ModificationTicks[PathIndex] != Rhs.ModificationTicks[RhsPathIndex])) - { - RawSizes[PathIndex] = Rhs.RawSizes[RhsPathIndex]; - ModificationTicks[PathIndex] = Rhs.ModificationTicks[RhsPathIndex]; - OutPathIndexesOufOfDate.push_back(PathIndex); - } - Attributes[PathIndex] = Rhs.Attributes[RhsPathIndex]; - PathIndex++; - } - else - { - Paths.erase(Paths.begin() + PathIndex); - RawSizes.erase(RawSizes.begin() + PathIndex); - Attributes.erase(Attributes.begin() + PathIndex); - ModificationTicks.erase(ModificationTicks.begin() + PathIndex); - PathCount--; - } - } -} - -FolderContent -GetUpdatedContent(const FolderContent& Old, const FolderContent& New, std::vector<std::filesystem::path>& OutDeletedPaths) -{ - ZEN_TRACE_CPU("FolderContent::GetUpdatedContent"); - - const uint32_t NewPathCount = gsl::narrow<uint32_t>(New.Paths.size()); - - FolderContent Result = {.Platform = Old.Platform}; - Result.Paths.reserve(NewPathCount); - Result.RawSizes.reserve(NewPathCount); - Result.Attributes.reserve(NewPathCount); - Result.ModificationTicks.reserve(NewPathCount); - - tsl::robin_map<std::string, uint32_t> NewPathToIndex; - NewPathToIndex.reserve(NewPathCount); - for (uint32_t NewPathIndex = 0; NewPathIndex < NewPathCount; NewPathIndex++) - { - NewPathToIndex.insert({New.Paths[NewPathIndex].generic_string(), NewPathIndex}); - } - - uint32_t OldPathCount = gsl::narrow<uint32_t>(Old.Paths.size()); - for (uint32_t OldPathIndex = 0; OldPathIndex < OldPathCount; OldPathIndex++) - { - if (auto It = NewPathToIndex.find(Old.Paths[OldPathIndex].generic_string()); It != NewPathToIndex.end()) - { - const uint32_t NewPathIndex = It->second; - - if ((Old.RawSizes[OldPathIndex] != New.RawSizes[NewPathIndex]) || - (Old.ModificationTicks[OldPathIndex] != New.ModificationTicks[NewPathIndex])) - { - Result.Paths.push_back(New.Paths[NewPathIndex]); - Result.RawSizes.push_back(New.RawSizes[NewPathIndex]); - Result.Attributes.push_back(New.Attributes[NewPathIndex]); - Result.ModificationTicks.push_back(New.ModificationTicks[NewPathIndex]); - } - } - else - { - OutDeletedPaths.push_back(Old.Paths[OldPathIndex]); - } - } - return Result; -} - -void -SaveFolderContentToCompactBinary(const FolderContent& Content, CbWriter& Output) -{ - ZEN_TRACE_CPU("SaveFolderContentToCompactBinary"); - Output.AddString("platform"sv, ToString(Content.Platform)); - compactbinary_helpers::WriteArray(Content.Paths, "paths"sv, Output); - compactbinary_helpers::WriteArray(Content.RawSizes, "rawSizes"sv, Output); - compactbinary_helpers::WriteArray(Content.Attributes, "attributes"sv, Output); - compactbinary_helpers::WriteArray(Content.ModificationTicks, "modificationTimes"sv, Output); -} - -FolderContent -LoadFolderContentToCompactBinary(CbObjectView Input) -{ - ZEN_TRACE_CPU("LoadFolderContentToCompactBinary"); - FolderContent Content; - Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform()); - compactbinary_helpers::ReadArray("paths"sv, Input, Content.Paths); - compactbinary_helpers::ReadArray("rawSizes"sv, Input, Content.RawSizes); - compactbinary_helpers::ReadArray("attributes"sv, Input, Content.Attributes); - compactbinary_helpers::ReadArray("modificationTimes"sv, Input, Content.ModificationTicks); - return Content; -} - -FolderContent -GetFolderContent(GetFolderContentStatistics& Stats, - const std::filesystem::path& RootPath, - std::function<bool(const std::string_view& RelativePath)>&& AcceptDirectory, - std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& AcceptFile, - WorkerThreadPool& WorkerPool, - int32_t UpdateIntervalMS, - std::function<void(bool IsAborted, std::ptrdiff_t PendingWork)>&& UpdateCallback, - std::atomic<bool>& AbortFlag) -{ - ZEN_TRACE_CPU("GetFolderContent"); - - Stopwatch Timer; - auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); - - FolderContent Content; - struct AsyncVisitor : public GetDirectoryContentVisitor - { - AsyncVisitor(GetFolderContentStatistics& Stats, - std::atomic<bool>& AbortFlag, - FolderContent& Content, - std::function<bool(const std::string_view& RelativePath)>&& AcceptDirectory, - std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& AcceptFile) - : m_Stats(Stats) - , m_AbortFlag(AbortFlag) - , m_FoundContent(Content) - , m_AcceptDirectory(std::move(AcceptDirectory)) - , m_AcceptFile(std::move(AcceptFile)) - { - } - virtual void AsyncVisitDirectory(const std::filesystem::path& RelativeRoot, DirectoryContent&& Content) override - { - if (!m_AbortFlag) - { - m_Stats.FoundFileCount += Content.FileNames.size(); - for (uint64_t FileSize : Content.FileSizes) - { - m_Stats.FoundFileByteCount += FileSize; - } - std::string RelativeDirectoryPath = RelativeRoot.generic_string(); - if (m_AcceptDirectory(RelativeDirectoryPath)) - { - std::vector<std::filesystem::path> Paths; - std::vector<uint64_t> RawSizes; - std::vector<uint32_t> Attributes; - std::vector<uint64_t> ModificatonTicks; - Paths.reserve(Content.FileNames.size()); - RawSizes.reserve(Content.FileNames.size()); - Attributes.reserve(Content.FileNames.size()); - ModificatonTicks.reserve(Content.FileModificationTicks.size()); - - for (size_t FileIndex = 0; FileIndex < Content.FileNames.size(); FileIndex++) - { - const std::filesystem::path& FileName = Content.FileNames[FileIndex]; - std::string RelativePath = (RelativeRoot / FileName).generic_string(); - std::replace(RelativePath.begin(), RelativePath.end(), '\\', '/'); - if (m_AcceptFile(RelativePath, Content.FileSizes[FileIndex], Content.FileAttributes[FileIndex])) - { - Paths.emplace_back(std::move(RelativePath)); - RawSizes.emplace_back(Content.FileSizes[FileIndex]); - Attributes.emplace_back(Content.FileAttributes[FileIndex]); - ModificatonTicks.emplace_back(Content.FileModificationTicks[FileIndex]); - - m_Stats.AcceptedFileCount++; - m_Stats.AcceptedFileByteCount += Content.FileSizes[FileIndex]; - } - } - m_Lock.WithExclusiveLock([&]() { - m_FoundContent.Paths.insert(m_FoundContent.Paths.end(), Paths.begin(), Paths.end()); - m_FoundContent.RawSizes.insert(m_FoundContent.RawSizes.end(), RawSizes.begin(), RawSizes.end()); - m_FoundContent.Attributes.insert(m_FoundContent.Attributes.end(), Attributes.begin(), Attributes.end()); - m_FoundContent.ModificationTicks.insert(m_FoundContent.ModificationTicks.end(), - ModificatonTicks.begin(), - ModificatonTicks.end()); - }); - } - } - } - - GetFolderContentStatistics& m_Stats; - std::atomic<bool>& m_AbortFlag; - RwLock m_Lock; - FolderContent& m_FoundContent; - std::function<bool(const std::string_view& RelativePath)> m_AcceptDirectory; - std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)> m_AcceptFile; - } Visitor(Stats, AbortFlag, Content, std::move(AcceptDirectory), std::move(AcceptFile)); - - Latch PendingWork(1); - GetDirectoryContent(RootPath, - DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::Recursive | DirectoryContentFlags::IncludeFileSizes | - DirectoryContentFlags::IncludeAttributes | DirectoryContentFlags::IncludeModificationTick, - Visitor, - WorkerPool, - PendingWork); - PendingWork.CountDown(); - while (!PendingWork.Wait(UpdateIntervalMS)) - { - UpdateCallback(AbortFlag.load(), PendingWork.Remaining()); - } - std::vector<size_t> Order; - size_t PathCount = Content.Paths.size(); - Order.resize(Content.Paths.size()); - std::vector<std::string> Parents; - Parents.reserve(PathCount); - std::vector<std::string> Filenames; - Filenames.reserve(PathCount); - for (size_t OrderIndex = 0; OrderIndex < PathCount; OrderIndex++) - { - Order[OrderIndex] = OrderIndex; - Parents.emplace_back(Content.Paths[OrderIndex].parent_path().generic_string()); - Filenames.emplace_back(Content.Paths[OrderIndex].filename().generic_string()); - } - std::sort(Order.begin(), Order.end(), [&Parents, &Filenames](size_t Lhs, size_t Rhs) { - const std::string& LhsParent = Parents[Lhs]; - const std::string& RhsParent = Parents[Rhs]; - if (LhsParent < RhsParent) - { - return true; - } - else if (LhsParent > RhsParent) - { - return false; - } - return Filenames[Lhs] < Filenames[Rhs]; - }); - FolderContent OrderedContent; - OrderedContent.Paths.reserve(PathCount); - OrderedContent.RawSizes.reserve(PathCount); - OrderedContent.Attributes.reserve(PathCount); - OrderedContent.ModificationTicks.reserve(PathCount); - for (size_t OrderIndex : Order) - { - OrderedContent.Paths.emplace_back(std::move(Content.Paths[OrderIndex])); - OrderedContent.RawSizes.emplace_back(Content.RawSizes[OrderIndex]); - OrderedContent.Attributes.emplace_back(Content.Attributes[OrderIndex]); - OrderedContent.ModificationTicks.emplace_back(Content.ModificationTicks[OrderIndex]); - } - return OrderedContent; -} - -void -SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Content, CbWriter& Output) -{ - ZEN_TRACE_CPU("SaveChunkedFolderContentToCompactBinary"); - Output.AddString("platform"sv, ToString(Content.Platform)); - compactbinary_helpers::WriteArray(Content.Paths, "paths"sv, Output); - compactbinary_helpers::WriteArray(Content.RawSizes, "rawSizes"sv, Output); - compactbinary_helpers::WriteArray(Content.Attributes, "attributes"sv, Output); - compactbinary_helpers::WriteArray(Content.RawHashes, "rawHashes"sv, Output); - - Output.BeginObject("chunkedContent"); - compactbinary_helpers::WriteArray(Content.ChunkedContent.SequenceRawHashes, "sequenceRawHashes"sv, Output); - compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkCounts, "chunkCounts"sv, Output); - compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkOrders, "chunkOrders"sv, Output); - compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkHashes, "chunkHashes"sv, Output); - compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkRawSizes, "chunkRawSizes"sv, Output); - Output.EndObject(); // chunkedContent -} - -ChunkedFolderContent -LoadChunkedFolderContentToCompactBinary(CbObjectView Input) -{ - ZEN_TRACE_CPU("LoadChunkedFolderContentToCompactBinary"); - ChunkedFolderContent Content; - Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform()); - compactbinary_helpers::ReadArray("paths"sv, Input, Content.Paths); - compactbinary_helpers::ReadArray("rawSizes"sv, Input, Content.RawSizes); - compactbinary_helpers::ReadArray("attributes"sv, Input, Content.Attributes); - compactbinary_helpers::ReadArray("rawHashes"sv, Input, Content.RawHashes); - - CbObjectView ChunkedContentView = Input["chunkedContent"sv].AsObjectView(); - compactbinary_helpers::ReadArray("sequenceRawHashes"sv, ChunkedContentView, Content.ChunkedContent.SequenceRawHashes); - compactbinary_helpers::ReadArray("chunkCounts"sv, ChunkedContentView, Content.ChunkedContent.ChunkCounts); - compactbinary_helpers::ReadArray("chunkOrders"sv, ChunkedContentView, Content.ChunkedContent.ChunkOrders); - compactbinary_helpers::ReadArray("chunkHashes"sv, ChunkedContentView, Content.ChunkedContent.ChunkHashes); - compactbinary_helpers::ReadArray("chunkRawSizes"sv, ChunkedContentView, Content.ChunkedContent.ChunkRawSizes); - return Content; -} - -ChunkedFolderContent -MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span<const ChunkedFolderContent> Overlays) -{ - ZEN_TRACE_CPU("MergeChunkedFolderContents"); - - ZEN_ASSERT(!Overlays.empty()); - - ChunkedFolderContent Result; - const size_t BasePathCount = Base.Paths.size(); - Result.Paths.reserve(BasePathCount); - Result.RawSizes.reserve(BasePathCount); - Result.Attributes.reserve(BasePathCount); - Result.RawHashes.reserve(BasePathCount); - - const size_t BaseChunkCount = Base.ChunkedContent.ChunkHashes.size(); - Result.ChunkedContent.SequenceRawHashes.reserve(Base.ChunkedContent.SequenceRawHashes.size()); - Result.ChunkedContent.ChunkCounts.reserve(BaseChunkCount); - Result.ChunkedContent.ChunkHashes.reserve(BaseChunkCount); - Result.ChunkedContent.ChunkRawSizes.reserve(BaseChunkCount); - Result.ChunkedContent.ChunkOrders.reserve(Base.ChunkedContent.ChunkOrders.size()); - - tsl::robin_map<std::string, std::filesystem::path> GenericPathToActualPath; - for (const std::filesystem::path& Path : Base.Paths) - { - GenericPathToActualPath.insert({PathCompareString(Path), Path}); - } - for (const ChunkedFolderContent& Overlay : Overlays) - { - for (const std::filesystem::path& Path : Overlay.Paths) - { - GenericPathToActualPath.insert({PathCompareString(Path), Path}); - } - } - - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceRawHashIndex; - - auto BuildOverlayPaths = [](std::span<const ChunkedFolderContent> Overlays) -> tsl::robin_set<std::string> { - tsl::robin_set<std::string> Result; - for (const ChunkedFolderContent& OverlayContent : Overlays) - { - for (const std::filesystem::path& Path : OverlayContent.Paths) - { - Result.insert(PathCompareString(Path)); - } - } - return Result; - }; - - auto AddContent = [&BuildOverlayPaths](ChunkedFolderContent& Result, - const ChunkedFolderContent& OverlayContent, - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex, - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& RawHashToSequenceRawHashIndex, - const tsl::robin_map<std::string, std::filesystem::path>& GenericPathToActualPath, - std::span<const ChunkedFolderContent> Overlays) { - const ChunkedContentLookup OverlayLookup = BuildChunkedContentLookup(OverlayContent); - tsl::robin_set<std::string> BaseOverlayPaths = BuildOverlayPaths(Overlays); - for (uint32_t PathIndex = 0; PathIndex < OverlayContent.Paths.size(); PathIndex++) - { - std::string GenericPath = PathCompareString(OverlayContent.Paths[PathIndex]); - if (!BaseOverlayPaths.contains(GenericPath)) - { - // This asset will not be overridden by a later layer - add it - - const std::filesystem::path OriginalPath = GenericPathToActualPath.at(GenericPath); - Result.Paths.push_back(OriginalPath); - const IoHash& RawHash = OverlayContent.RawHashes[PathIndex]; - Result.RawSizes.push_back(OverlayContent.RawSizes[PathIndex]); - Result.Attributes.push_back(OverlayContent.Attributes[PathIndex]); - Result.RawHashes.push_back(RawHash); - - if (OverlayContent.RawSizes[PathIndex] > 0) - { - if (!RawHashToSequenceRawHashIndex.contains(RawHash)) - { - RawHashToSequenceRawHashIndex.insert( - {RawHash, gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size())}); - const uint32_t SequenceRawHashIndex = OverlayLookup.RawHashToSequenceIndex.at(RawHash); - const uint32_t OrderIndexOffset = OverlayLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex]; - const uint32_t ChunkCount = OverlayContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; - ChunkingStatistics Stats; - std::span<const uint32_t> OriginalChunkOrder = - std::span<const uint32_t>(OverlayContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); - AddChunkSequence(Stats, - Result.ChunkedContent, - ChunkHashToChunkIndex, - RawHash, - OriginalChunkOrder, - OverlayContent.ChunkedContent.ChunkHashes, - OverlayContent.ChunkedContent.ChunkRawSizes); - Stats.UniqueSequencesFound++; - } - } - } - } - }; - - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> MergedChunkHashToChunkIndex; - AddContent(Result, Base, MergedChunkHashToChunkIndex, RawHashToSequenceRawHashIndex, GenericPathToActualPath, Overlays); - for (uint32_t OverlayIndex = 0; OverlayIndex < Overlays.size(); OverlayIndex++) - { - AddContent(Result, - Overlays[OverlayIndex], - MergedChunkHashToChunkIndex, - RawHashToSequenceRawHashIndex, - GenericPathToActualPath, - Overlays.subspan(OverlayIndex + 1)); - } - return Result; -} - -ChunkedFolderContent -DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, - const ChunkedContentLookup& BaseContentLookup, - std::span<const std::filesystem::path> DeletedPaths) -{ - ZEN_TRACE_CPU("DeletePathsFromChunkedContent"); - - ZEN_ASSERT(DeletedPaths.size() <= BaseContent.Paths.size()); - ChunkedFolderContent Result = {.Platform = BaseContent.Platform}; - if (DeletedPaths.size() < BaseContent.Paths.size()) - { - tsl::robin_set<std::string> DeletedPathSet; - DeletedPathSet.reserve(DeletedPaths.size()); - for (const std::filesystem::path& DeletedPath : DeletedPaths) - { - DeletedPathSet.insert(PathCompareString(DeletedPath)); - } - - const size_t BaseChunkCount = BaseContent.ChunkedContent.ChunkHashes.size(); - std::vector<uint32_t> NewChunkIndexes(BaseChunkCount, (uint32_t)-1); - - const size_t ExpectedPathCount = BaseContent.Paths.size() - DeletedPaths.size(); - Result.Paths.reserve(ExpectedPathCount); - Result.RawSizes.reserve(ExpectedPathCount); - Result.Attributes.reserve(ExpectedPathCount); - Result.RawHashes.reserve(ExpectedPathCount); - - Result.ChunkedContent.ChunkHashes.reserve(BaseChunkCount); - Result.ChunkedContent.ChunkRawSizes.reserve(BaseChunkCount); - - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceRawHashIndex; - for (uint32_t PathIndex = 0; PathIndex < BaseContent.Paths.size(); PathIndex++) - { - const std::filesystem::path& Path = BaseContent.Paths[PathIndex]; - if (!DeletedPathSet.contains(PathCompareString(Path))) - { - const IoHash& RawHash = BaseContent.RawHashes[PathIndex]; - const uint64_t RawSize = BaseContent.RawSizes[PathIndex]; - Result.Paths.push_back(Path); - Result.RawSizes.push_back(RawSize); - Result.Attributes.push_back(BaseContent.Attributes[PathIndex]); - Result.RawHashes.push_back(RawHash); - if (RawSize > 0) - { - if (!RawHashToSequenceRawHashIndex.contains(RawHash)) - { - RawHashToSequenceRawHashIndex.insert( - {RawHash, gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size())}); - const uint32_t SequenceRawHashIndex = BaseContentLookup.RawHashToSequenceIndex.at(RawHash); - const uint32_t OrderIndexOffset = BaseContentLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex]; - const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; - - std::span<const uint32_t> OriginalChunkOrder = - std::span<const uint32_t>(BaseContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); - - Result.ChunkedContent.ChunkCounts.push_back(gsl::narrow<uint32_t>(OriginalChunkOrder.size())); - - for (uint32_t OldChunkIndex : OriginalChunkOrder) - { - if (uint32_t FoundChunkIndex = NewChunkIndexes[OldChunkIndex]; FoundChunkIndex != (uint32_t)-1) - { - Result.ChunkedContent.ChunkOrders.push_back(FoundChunkIndex); - } - else - { - const uint32_t NewChunkIndex = gsl::narrow<uint32_t>(Result.ChunkedContent.ChunkHashes.size()); - NewChunkIndexes[OldChunkIndex] = NewChunkIndex; - const IoHash& ChunkHash = BaseContent.ChunkedContent.ChunkHashes[OldChunkIndex]; - const uint64_t OldChunkSize = BaseContent.ChunkedContent.ChunkRawSizes[OldChunkIndex]; - Result.ChunkedContent.ChunkHashes.push_back(ChunkHash); - Result.ChunkedContent.ChunkRawSizes.push_back(OldChunkSize); - Result.ChunkedContent.ChunkOrders.push_back(NewChunkIndex); - } - } - Result.ChunkedContent.SequenceRawHashes.push_back(RawHash); - } - } - } - } - } - return Result; -} - -ChunkedFolderContent -DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span<const std::filesystem::path> DeletedPaths) -{ - ZEN_TRACE_CPU("DeletePathsFromChunkedContent"); - ZEN_ASSERT(DeletedPaths.size() <= BaseContent.Paths.size()); - if (DeletedPaths.size() == BaseContent.Paths.size()) - { - return {}; - } - const ChunkedContentLookup BaseLookup = BuildChunkedContentLookup(BaseContent); - return DeletePathsFromChunkedContent(BaseContent, BaseLookup, DeletedPaths); -} - -ChunkedFolderContent -ChunkFolderContent(ChunkingStatistics& Stats, - WorkerThreadPool& WorkerPool, - const std::filesystem::path& RootPath, - const FolderContent& Content, - const ChunkingController& InChunkingController, - int32_t UpdateIntervalMS, - std::function<void(bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork)>&& UpdateCallback, - std::atomic<bool>& AbortFlag, - std::atomic<bool>& PauseFlag) -{ - ZEN_TRACE_CPU("ChunkFolderContent"); - - Stopwatch Timer; - auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); - - ChunkedFolderContent Result = {.Platform = Content.Platform, - .Paths = Content.Paths, - .RawSizes = Content.RawSizes, - .Attributes = Content.Attributes}; - const size_t ItemCount = Result.Paths.size(); - Result.RawHashes.resize(ItemCount, IoHash::Zero); - Result.ChunkedContent.SequenceRawHashes.reserve(ItemCount); // Up to 1 per file, maybe less - Result.ChunkedContent.ChunkCounts.reserve(ItemCount); // Up to one per file - Result.ChunkedContent.ChunkOrders.reserve(ItemCount); // At least 1 per file, maybe more - Result.ChunkedContent.ChunkHashes.reserve(ItemCount); // At least 1 per file, maybe more - Result.ChunkedContent.ChunkRawSizes.reserve(ItemCount); // At least 1 per file, maybe more - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex; - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToChunkSequenceIndex; - RawHashToChunkSequenceIndex.reserve(ItemCount); - ChunkHashToChunkIndex.reserve(ItemCount); - { - std::vector<uint32_t> Order; - Order.resize(ItemCount); - for (uint32_t I = 0; I < ItemCount; I++) - { - Order[I] = I; - } - - // Handle the biggest files first so we don't end up with one straggling large file at the end - // std::sort(Order.begin(), Order.end(), [&](uint32_t Lhs, uint32_t Rhs) { return Result.RawSizes[Lhs] > Result.RawSizes[Rhs]; - //}); - - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceRawHashIndex; - RawHashToSequenceRawHashIndex.reserve(ItemCount); - - RwLock Lock; - - ParallelWork Work(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog); - - for (uint32_t PathIndex : Order) - { - if (Work.IsAborted()) - { - break; - } - Work.ScheduleWork(WorkerPool, // GetSyncWorkerPool() - [&, PathIndex](std::atomic<bool>& AbortFlag) { - if (!AbortFlag) - { - IoHash RawHash = HashOneFile(Stats, - InChunkingController, - Result, - ChunkHashToChunkIndex, - RawHashToSequenceRawHashIndex, - Lock, - RootPath, - PathIndex, - AbortFlag); - Lock.WithExclusiveLock([&]() { Result.RawHashes[PathIndex] = RawHash; }); - Stats.FilesProcessed++; - } - }); - } - - Work.Wait(UpdateIntervalMS, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { - ZEN_UNUSED(PendingWork); - UpdateCallback(IsAborted, IsPaused, Work.PendingWork().Remaining()); - }); - } - return Result; -} - -ChunkedContentLookup -BuildChunkedContentLookup(const ChunkedFolderContent& Content) -{ - ZEN_TRACE_CPU("BuildChunkedContentLookup"); - - struct ChunkLocationReference - { - uint32_t ChunkIndex = (uint32_t)-1; - uint32_t SequenceIndex = (uint32_t)-1; - uint64_t Offset = (uint64_t)-1; - }; - - ChunkedContentLookup Result; - { - const uint32_t SequenceRawHashesCount = gsl::narrow<uint32_t>(Content.ChunkedContent.SequenceRawHashes.size()); - Result.RawHashToSequenceIndex.reserve(SequenceRawHashesCount); - Result.SequenceIndexChunkOrderOffset.reserve(SequenceRawHashesCount); - uint32_t OrderOffset = 0; - for (uint32_t SequenceRawHashIndex = 0; SequenceRawHashIndex < Content.ChunkedContent.SequenceRawHashes.size(); - SequenceRawHashIndex++) - { - Result.RawHashToSequenceIndex.insert({Content.ChunkedContent.SequenceRawHashes[SequenceRawHashIndex], SequenceRawHashIndex}); - Result.SequenceIndexChunkOrderOffset.push_back(OrderOffset); - OrderOffset += Content.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; - } - } - - std::vector<ChunkLocationReference> Locations; - Locations.reserve(Content.ChunkedContent.ChunkOrders.size()); - for (uint32_t SequenceIndex = 0; SequenceIndex < Content.ChunkedContent.SequenceRawHashes.size(); SequenceIndex++) - { - const uint32_t OrderOffset = Result.SequenceIndexChunkOrderOffset[SequenceIndex]; - const uint32_t ChunkCount = Content.ChunkedContent.ChunkCounts[SequenceIndex]; - uint64_t LocationOffset = 0; - for (size_t OrderIndex = OrderOffset; OrderIndex < OrderOffset + ChunkCount; OrderIndex++) - { - uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex]; - - Locations.push_back(ChunkLocationReference{.ChunkIndex = ChunkIndex, .SequenceIndex = SequenceIndex, .Offset = LocationOffset}); - - LocationOffset += Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; - } - } - - std::sort(Locations.begin(), Locations.end(), [](const ChunkLocationReference& Lhs, const ChunkLocationReference& Rhs) { - if (Lhs.ChunkIndex < Rhs.ChunkIndex) - { - return true; - } - if (Lhs.ChunkIndex > Rhs.ChunkIndex) - { - return false; - } - if (Lhs.SequenceIndex < Rhs.SequenceIndex) - { - return true; - } - if (Lhs.SequenceIndex > Rhs.SequenceIndex) - { - return false; - } - return Lhs.Offset < Rhs.Offset; - }); - - Result.ChunkSequenceLocations.reserve(Locations.size()); - const uint32_t ChunkCount = gsl::narrow<uint32_t>(Content.ChunkedContent.ChunkHashes.size()); - Result.ChunkHashToChunkIndex.reserve(ChunkCount); - size_t RangeOffset = 0; - for (uint32_t ChunkIndex = 0; ChunkIndex < ChunkCount; ChunkIndex++) - { - Result.ChunkHashToChunkIndex.insert({Content.ChunkedContent.ChunkHashes[ChunkIndex], ChunkIndex}); - uint32_t Count = 0; - while ((RangeOffset + Count < Locations.size()) && (Locations[RangeOffset + Count].ChunkIndex == ChunkIndex)) - { - const ChunkLocationReference& LocationReference = Locations[RangeOffset + Count]; - Result.ChunkSequenceLocations.push_back( - ChunkedContentLookup::ChunkSequenceLocation{.SequenceIndex = LocationReference.SequenceIndex, - .Offset = LocationReference.Offset}); - Count++; - } - Result.ChunkSequenceLocationOffset.push_back(RangeOffset); - Result.ChunkSequenceLocationCounts.push_back(Count); - RangeOffset += Count; - } - - Result.SequenceIndexFirstPathIndex.resize(Content.ChunkedContent.SequenceRawHashes.size(), (uint32_t)-1); - Result.PathExtensionHash.resize(Content.Paths.size()); - for (uint32_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++) - { - std::string LowercaseExtension = Content.Paths[PathIndex].extension().string(); - std::transform(LowercaseExtension.begin(), LowercaseExtension.end(), LowercaseExtension.begin(), [](char c) { - return (char)::tolower(c); - }); - Result.PathExtensionHash[PathIndex] = HashStringDjb2(LowercaseExtension); - if (Content.RawSizes[PathIndex] > 0) - { - const IoHash& RawHash = Content.RawHashes[PathIndex]; - auto SequenceIndexIt = Result.RawHashToSequenceIndex.find(RawHash); - ZEN_ASSERT(SequenceIndexIt != Result.RawHashToSequenceIndex.end()); - const uint32_t SequenceIndex = SequenceIndexIt->second; - if (Result.SequenceIndexFirstPathIndex[SequenceIndex] == (uint32_t)-1) - { - Result.SequenceIndexFirstPathIndex[SequenceIndex] = PathIndex; - } - } - } - - return Result; -} - -} // namespace zen diff --git a/src/zenutil/chunkedfile.cpp b/src/zenutil/chunkedfile.cpp deleted file mode 100644 index a2c041ffd..000000000 --- a/src/zenutil/chunkedfile.cpp +++ /dev/null @@ -1,525 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#include <zenutil/chunkedfile.h> - -#include <zencore/basicfile.h> -#include <zencore/trace.h> - -#include "chunking.h" - -ZEN_THIRD_PARTY_INCLUDES_START -#include <tsl/robin_map.h> -#include <gsl/gsl-lite.hpp> -ZEN_THIRD_PARTY_INCLUDES_END - -namespace zen { - -namespace { - struct ChunkedHeader - { - static constexpr uint32_t ExpectedMagic = 0x646b6863; // chkd - static constexpr uint32_t CurrentVersion = 1; - - uint32_t Magic = ExpectedMagic; - uint32_t Version = CurrentVersion; - uint32_t ChunkSequenceLength; - uint32_t ChunkHashCount; - uint64_t ChunkSequenceOffset; - uint64_t ChunkHashesOffset; - uint64_t RawSize = 0; - IoHash RawHash; - }; -} // namespace - -IoBuffer -SerializeChunkedInfo(const ChunkedInfo& Info) -{ - ZEN_TRACE_CPU("SerializeChunkedInfo"); - size_t HeaderSize = RoundUp(sizeof(ChunkedHeader), 16) + RoundUp(sizeof(uint32_t) * Info.ChunkSequence.size(), 16) + - RoundUp(sizeof(IoHash) * Info.ChunkHashes.size(), 16); - IoBuffer HeaderData(HeaderSize); - - ChunkedHeader Header; - Header.ChunkSequenceLength = gsl::narrow<uint32_t>(Info.ChunkSequence.size()); - Header.ChunkHashCount = gsl::narrow<uint32_t>(Info.ChunkHashes.size()); - Header.ChunkSequenceOffset = RoundUp(sizeof(ChunkedHeader), 16); - Header.ChunkHashesOffset = RoundUp(Header.ChunkSequenceOffset + sizeof(uint32_t) * Header.ChunkSequenceLength, 16); - Header.RawSize = Info.RawSize; - Header.RawHash = Info.RawHash; - - MutableMemoryView WriteView = HeaderData.GetMutableView(); - { - MutableMemoryView HeaderWriteView = WriteView.Left(sizeof(Header)); - HeaderWriteView.CopyFrom(MemoryView(&Header, sizeof(Header))); - } - { - MutableMemoryView ChunkSequenceWriteView = WriteView.Mid(Header.ChunkSequenceOffset, sizeof(uint32_t) * Header.ChunkSequenceLength); - ChunkSequenceWriteView.CopyFrom(MemoryView(Info.ChunkSequence.data(), ChunkSequenceWriteView.GetSize())); - } - { - MutableMemoryView ChunksWriteView = WriteView.Mid(Header.ChunkHashesOffset, sizeof(IoHash) * Header.ChunkHashCount); - ChunksWriteView.CopyFrom(MemoryView(Info.ChunkHashes.data(), ChunksWriteView.GetSize())); - } - - return HeaderData; -} - -ChunkedInfo -DeserializeChunkedInfo(IoBuffer& Buffer) -{ - ZEN_TRACE_CPU("DeserializeChunkedInfo"); - MemoryView View = Buffer.GetView(); - ChunkedHeader Header; - { - MutableMemoryView HeaderWriteView(&Header, sizeof(Header)); - HeaderWriteView.CopyFrom(View.Left(sizeof(Header))); - } - if (Header.Magic != ChunkedHeader::ExpectedMagic) - { - return {}; - } - if (Header.Version != ChunkedHeader::CurrentVersion) - { - return {}; - } - ChunkedInfo Info; - Info.RawSize = Header.RawSize; - Info.RawHash = Header.RawHash; - Info.ChunkSequence.resize(Header.ChunkSequenceLength); - Info.ChunkHashes.resize(Header.ChunkHashCount); - { - MutableMemoryView ChunkSequenceWriteView(Info.ChunkSequence.data(), sizeof(uint32_t) * Header.ChunkSequenceLength); - ChunkSequenceWriteView.CopyFrom(View.Mid(Header.ChunkSequenceOffset, ChunkSequenceWriteView.GetSize())); - } - { - MutableMemoryView ChunksWriteView(Info.ChunkHashes.data(), sizeof(IoHash) * Header.ChunkHashCount); - ChunksWriteView.CopyFrom(View.Mid(Header.ChunkHashesOffset, ChunksWriteView.GetSize())); - } - - return Info; -} - -void -Reconstruct(const ChunkedInfo& Info, const std::filesystem::path& TargetPath, std::function<IoBuffer(const IoHash& ChunkHash)> GetChunk) -{ - ZEN_TRACE_CPU("Reconstruct"); - BasicFile Reconstructed; - Reconstructed.Open(TargetPath, BasicFile::Mode::kTruncate); - BasicFileWriter ReconstructedWriter(Reconstructed, 64 * 1024); - uint64_t Offset = 0; - for (uint32_t SequenceIndex : Info.ChunkSequence) - { - IoBuffer Chunk = GetChunk(Info.ChunkHashes[SequenceIndex]); - ReconstructedWriter.Write(Chunk.GetData(), Chunk.GetSize(), Offset); - Offset += Chunk.GetSize(); - } -} - -ChunkedInfoWithSource -ChunkData(BasicFile& RawData, - uint64_t Offset, - uint64_t Size, - ChunkedParams Params, - std::atomic<uint64_t>* BytesProcessed, - std::atomic<bool>* AbortFlag) -{ - ZEN_TRACE_CPU("ChunkData"); - - ChunkedInfoWithSource Result; - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> FoundChunks; - - ZenChunkHelper Chunker; - Chunker.SetUseThreshold(Params.UseThreshold); - Chunker.SetChunkSize(Params.MinSize, Params.MaxSize, Params.AvgSize); - size_t End = Offset + Size; - const size_t ScanBufferSize = Max(1u * 1024 * 1024, Params.MaxSize); - BasicFileBuffer RawBuffer(RawData, ScanBufferSize); - MemoryView SliceView = RawBuffer.MakeView(Min(End - Offset, ScanBufferSize), Offset); - ZEN_ASSERT(!SliceView.IsEmpty()); - size_t SliceSize = SliceView.GetSize(); - IoHashStream RawHashStream; - while (Offset < End) - { - if (AbortFlag != nullptr && AbortFlag->load()) - { - return {}; - } - size_t ScanLength = Chunker.ScanChunk(SliceView.GetData(), SliceSize); - if (ScanLength == ZenChunkHelper::kNoBoundaryFound) - { - if (Offset + SliceSize == End) - { - ScanLength = SliceSize; - } - else - { - SliceView = RawBuffer.MakeView(Min(End - Offset, ScanBufferSize), Offset); - SliceSize = SliceView.GetSize(); - Chunker.Reset(); - continue; - } - } - uint32_t ChunkLength = gsl::narrow<uint32_t>(ScanLength); // +HashedLength); - MemoryView ChunkView = SliceView.Left(ScanLength); - RawHashStream.Append(ChunkView); - IoHash ChunkHash = IoHash::HashBuffer(ChunkView); - SliceView.RightChopInline(ScanLength); - if (auto It = FoundChunks.find(ChunkHash); It != FoundChunks.end()) - { - Result.Info.ChunkSequence.push_back(It->second); - } - else - { - uint32_t ChunkIndex = gsl::narrow<uint32_t>(Result.Info.ChunkHashes.size()); - FoundChunks.insert_or_assign(ChunkHash, ChunkIndex); - Result.Info.ChunkHashes.push_back(ChunkHash); - Result.ChunkSources.push_back(ChunkSource{.Offset = Offset, .Size = ChunkLength}); - Result.Info.ChunkSequence.push_back(ChunkIndex); - } - - SliceSize = SliceView.GetSize(); - Offset += ChunkLength; - if (BytesProcessed != nullptr) - { - BytesProcessed->fetch_add(ChunkLength); - } - } - Result.Info.RawSize = Size; - Result.Info.RawHash = RawHashStream.GetHash(); - return Result; -} - -} // namespace zen - -#if ZEN_WITH_TESTS -# include <zencore/filesystem.h> -# include <zencore/fmtutils.h> -# include <zencore/iohash.h> -# include <zencore/logging.h> -# include <zencore/scopeguard.h> -# include <zencore/timer.h> -# include <zencore/testing.h> -# include <zencore/testutils.h> -# include <zencore/workthreadpool.h> - -# include "chunking.h" - -ZEN_THIRD_PARTY_INCLUDES_START -# include <tsl/robin_map.h> -# include <tsl/robin_set.h> -ZEN_THIRD_PARTY_INCLUDES_END - -namespace zen { -# if 0 -TEST_CASE("chunkedfile.findparams") -{ -# if 1 - DirectoryContent SourceContent1; - GetDirectoryContent("E:\\Temp\\ChunkingTestData\\31379208", DirectoryContentFlags::IncludeFiles, SourceContent1); - const std::vector<std::filesystem::path>& SourceFiles1 = SourceContent1.Files; - DirectoryContent SourceContent2; - GetDirectoryContent("E:\\Temp\\ChunkingTestData\\31379208_2", DirectoryContentFlags::IncludeFiles, SourceContent2); - const std::vector<std::filesystem::path>& SourceFiles2 = SourceContent2.Files; -# else - std::filesystem::path SourcePath1 = - "E:\\Temp\\ChunkingTestData\\31375996\\ShaderArchive-FortniteGame_Chunk10-PCD3D_SM6-PCD3D_SM6.ushaderbytecode"; - std::filesystem::path SourcePath2 = - "E:\\Temp\\ChunkingTestData\\31379208\\ShaderArchive-FortniteGame_Chunk10-PCD3D_SM6-PCD3D_SM6.ushaderbytecode"; - const std::vector<std::filesystem::path>& SourceFiles1 = {SourcePath1}; - const std::vector<std::filesystem::path>& SourceFiles2 = {SourcePath2}; -# endif - ChunkedParams Params[] = {ChunkedParams{.UseThreshold = false, .MinSize = 17280, .MaxSize = 139264, .AvgSize = 36340}, - ChunkedParams{.UseThreshold = false, .MinSize = 15456, .MaxSize = 122880, .AvgSize = 35598}, - ChunkedParams{.UseThreshold = false, .MinSize = 16848, .MaxSize = 135168, .AvgSize = 39030}, - ChunkedParams{.UseThreshold = false, .MinSize = 14256, .MaxSize = 114688, .AvgSize = 36222}, - ChunkedParams{.UseThreshold = false, .MinSize = 15744, .MaxSize = 126976, .AvgSize = 36600}, - ChunkedParams{.UseThreshold = false, .MinSize = 15264, .MaxSize = 122880, .AvgSize = 35442}, - ChunkedParams{.UseThreshold = false, .MinSize = 16464, .MaxSize = 131072, .AvgSize = 37950}, - ChunkedParams{.UseThreshold = false, .MinSize = 15408, .MaxSize = 122880, .AvgSize = 38914}, - ChunkedParams{.UseThreshold = false, .MinSize = 15408, .MaxSize = 122880, .AvgSize = 35556}, - ChunkedParams{.UseThreshold = false, .MinSize = 15360, .MaxSize = 122880, .AvgSize = 35520}, - ChunkedParams{.UseThreshold = false, .MinSize = 15312, .MaxSize = 122880, .AvgSize = 35478}, - ChunkedParams{.UseThreshold = false, .MinSize = 16896, .MaxSize = 135168, .AvgSize = 39072}, - ChunkedParams{.UseThreshold = false, .MinSize = 15360, .MaxSize = 122880, .AvgSize = 38880}, - ChunkedParams{.UseThreshold = false, .MinSize = 15840, .MaxSize = 126976, .AvgSize = 36678}, - ChunkedParams{.UseThreshold = false, .MinSize = 16800, .MaxSize = 135168, .AvgSize = 38994}, - ChunkedParams{.UseThreshold = false, .MinSize = 15888, .MaxSize = 126976, .AvgSize = 36714}, - ChunkedParams{.UseThreshold = false, .MinSize = 15792, .MaxSize = 126976, .AvgSize = 36636}, - ChunkedParams{.UseThreshold = false, .MinSize = 14880, .MaxSize = 118784, .AvgSize = 37609}, - ChunkedParams{.UseThreshold = false, .MinSize = 15936, .MaxSize = 126976, .AvgSize = 36756}, - ChunkedParams{.UseThreshold = false, .MinSize = 15456, .MaxSize = 122880, .AvgSize = 38955}, - ChunkedParams{.UseThreshold = false, .MinSize = 15984, .MaxSize = 126976, .AvgSize = 36792}, - ChunkedParams{.UseThreshold = false, .MinSize = 14400, .MaxSize = 114688, .AvgSize = 36338}, - ChunkedParams{.UseThreshold = false, .MinSize = 14832, .MaxSize = 118784, .AvgSize = 37568}, - ChunkedParams{.UseThreshold = false, .MinSize = 16944, .MaxSize = 135168, .AvgSize = 39108}, - ChunkedParams{.UseThreshold = false, .MinSize = 14352, .MaxSize = 114688, .AvgSize = 36297}, - ChunkedParams{.UseThreshold = false, .MinSize = 14208, .MaxSize = 114688, .AvgSize = 36188}, - ChunkedParams{.UseThreshold = false, .MinSize = 14448, .MaxSize = 114688, .AvgSize = 36372}, - ChunkedParams{.UseThreshold = false, .MinSize = 13296, .MaxSize = 106496, .AvgSize = 36592}, - ChunkedParams{.UseThreshold = false, .MinSize = 15264, .MaxSize = 122880, .AvgSize = 38805}, - ChunkedParams{.UseThreshold = false, .MinSize = 14304, .MaxSize = 114688, .AvgSize = 36263}, - ChunkedParams{.UseThreshold = false, .MinSize = 14784, .MaxSize = 118784, .AvgSize = 37534}, - ChunkedParams{.UseThreshold = false, .MinSize = 15312, .MaxSize = 122880, .AvgSize = 38839}, - ChunkedParams{.UseThreshold = false, .MinSize = 14256, .MaxSize = 114688, .AvgSize = 39360}, - ChunkedParams{.UseThreshold = false, .MinSize = 13776, .MaxSize = 110592, .AvgSize = 37976}, - ChunkedParams{.UseThreshold = false, .MinSize = 14736, .MaxSize = 118784, .AvgSize = 37493}, - ChunkedParams{.UseThreshold = false, .MinSize = 14928, .MaxSize = 118784, .AvgSize = 37643}, - ChunkedParams{.UseThreshold = false, .MinSize = 14448, .MaxSize = 114688, .AvgSize = 39504}, - ChunkedParams{.UseThreshold = false, .MinSize = 13392, .MaxSize = 106496, .AvgSize = 36664}, - ChunkedParams{.UseThreshold = false, .MinSize = 13872, .MaxSize = 110592, .AvgSize = 38048}, - ChunkedParams{.UseThreshold = false, .MinSize = 14352, .MaxSize = 114688, .AvgSize = 39432}, - ChunkedParams{.UseThreshold = false, .MinSize = 13200, .MaxSize = 106496, .AvgSize = 36520}, - ChunkedParams{.UseThreshold = false, .MinSize = 17328, .MaxSize = 139264, .AvgSize = 36378}, - ChunkedParams{.UseThreshold = false, .MinSize = 17376, .MaxSize = 139264, .AvgSize = 36421}, - ChunkedParams{.UseThreshold = false, .MinSize = 17424, .MaxSize = 139264, .AvgSize = 36459}, - ChunkedParams{.UseThreshold = false, .MinSize = 17472, .MaxSize = 139264, .AvgSize = 36502}, - ChunkedParams{.UseThreshold = false, .MinSize = 17520, .MaxSize = 139264, .AvgSize = 36540}, - ChunkedParams{.UseThreshold = false, .MinSize = 17808, .MaxSize = 143360, .AvgSize = 37423}, - ChunkedParams{.UseThreshold = false, .MinSize = 17856, .MaxSize = 143360, .AvgSize = 37466}, - ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 25834}, - ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 21917}, - ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 29751}, - ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 33668}, - ChunkedParams{.UseThreshold = false, .MinSize = 17952, .MaxSize = 143360, .AvgSize = 37547}, - ChunkedParams{.UseThreshold = false, .MinSize = 17904, .MaxSize = 143360, .AvgSize = 37504}, - ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 22371}, - ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 37585}, - ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 26406}, - ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 26450}, - ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 30615}, - ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 30441}, - ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 22417}, - ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 22557}, - ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 30528}, - ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 27112}, - ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 34644}, - ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 34476}, - ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 35408}, - ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 38592}, - ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 30483}, - ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 26586}, - ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 26496}, - ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 31302}, - ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 34516}, - ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 22964}, - ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 35448}, - ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 38630}, - ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 23010}, - ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 31260}, - ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 34600}, - ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 27156}, - ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 30570}, - ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 38549}, - ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 22510}, - ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 38673}, - ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 34560}, - ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 22464}, - ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 26540}, - ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 38511}, - ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 23057}, - ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 27202}, - ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 31347}, - ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 35492}, - ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 31389}, - ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 27246}, - ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 23103}, - ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 35532}, - ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 23150}, - ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 27292}, - ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 31434}, - ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 35576}, - ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 27336}, - ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 23196}, - ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 31476}, - ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 35616}, - ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 27862}, - ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 32121}, - ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 23603}, - ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 36380}, - ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 27908}, - ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 23650}, - ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 32166}, - ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 36424}, - ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 23696}, - ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 32253}, - ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 32208}, - ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 23743}, - ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 36548}, - ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 28042}, - ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 23789}, - ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 32295}, - ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 36508}, - ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 27952}, - ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 27998}, - ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 36464}}; - - static const size_t ParamsCount = sizeof(Params) / sizeof(ChunkedParams); - std::vector<ChunkedInfoWithSource> Infos1(SourceFiles1.size()); - std::vector<ChunkedInfoWithSource> Infos2(SourceFiles2.size()); - - WorkerThreadPool WorkerPool(32); - - for (size_t I = 0; I < ParamsCount; I++) - { - for (int UseThreshold = 0; UseThreshold < 2; UseThreshold++) - { - Latch WorkLatch(1); - ChunkedParams Param = Params[I]; - Param.UseThreshold = UseThreshold == 1; - Stopwatch Timer; - for (size_t F = 0; F < SourceFiles1.size(); F++) - { - WorkLatch.AddCount(1); - WorkerPool.ScheduleWork([&WorkLatch, F, Param, &SourceFiles1, &Infos1]() { - auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); }); - BasicFile SourceData1; - SourceData1.Open(SourceFiles1[F], BasicFile::Mode::kRead); - Infos1[F] = ChunkData(SourceData1, 0, SourceData1.FileSize(), Param); - }); - } - for (size_t F = 0; F < SourceFiles2.size(); F++) - { - WorkLatch.AddCount(1); - WorkerPool.ScheduleWork([&WorkLatch, F, Param, &SourceFiles2, &Infos2]() { - auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); }); - BasicFile SourceData2; - SourceData2.Open(SourceFiles2[F], BasicFile::Mode::kRead); - Infos2[F] = ChunkData(SourceData2, 0, SourceData2.FileSize(), Param); - }); - } - WorkLatch.CountDown(); - WorkLatch.Wait(); - uint64_t ChunkTimeMS = Timer.GetElapsedTimeMs(); - - uint64_t Raw1Size = 0; - tsl::robin_set<IoHash> Chunks1; - size_t ChunkedSize1 = 0; - for (size_t F = 0; F < SourceFiles1.size(); F++) - { - const ChunkedInfoWithSource& Info = Infos1[F]; - Raw1Size += Info.Info.RawSize; - for (uint32_t Chunk1Index = 0; Chunk1Index < Info.Info.ChunkHashes.size(); ++Chunk1Index) - { - const IoHash ChunkHash = Info.Info.ChunkHashes[Chunk1Index]; - if (Chunks1.insert(ChunkHash).second) - { - ChunkedSize1 += Info.ChunkSources[Chunk1Index].Size; - } - } - } - - uint64_t Raw2Size = 0; - tsl::robin_set<IoHash> Chunks2; - size_t ChunkedSize2 = 0; - size_t DiffSize = 0; - for (size_t F = 0; F < SourceFiles2.size(); F++) - { - const ChunkedInfoWithSource& Info = Infos2[F]; - Raw2Size += Info.Info.RawSize; - for (uint32_t Chunk2Index = 0; Chunk2Index < Info.Info.ChunkHashes.size(); ++Chunk2Index) - { - const IoHash ChunkHash = Info.Info.ChunkHashes[Chunk2Index]; - if (Chunks2.insert(ChunkHash).second) - { - ChunkedSize2 += Info.ChunkSources[Chunk2Index].Size; - if (!Chunks1.contains(ChunkHash)) - { - DiffSize += Info.ChunkSources[Chunk2Index].Size; - } - } - } - } - - ZEN_INFO( - "Diff = {}, Chunks1 = {}, Chunks2 = {}, .UseThreshold = {}, .MinSize = {}, .MaxSize = {}, .AvgSize = {}, RawSize(1) = {}, " - "RawSize(2) = {}, " - "Saved(1) = {}, Saved(2) = {} in {}", - NiceBytes(DiffSize), - Chunks1.size(), - Chunks2.size(), - Param.UseThreshold, - Param.MinSize, - Param.MaxSize, - Param.AvgSize, - NiceBytes(Raw1Size), - NiceBytes(Raw2Size), - NiceBytes(Raw1Size - ChunkedSize1), - NiceBytes(Raw2Size - ChunkedSize2), - NiceTimeSpanMs(ChunkTimeMS)); - } - } - -# if 0 - for (int64_t MinSizeBase = (12u * 1024u); MinSizeBase <= (32u * 1024u); MinSizeBase += 512) - { - for (int64_t Wiggle = -132; Wiggle < 126; Wiggle += 2) - { - // size_t MinSize = 7 * 1024 - 61; // (size_t)(MinSizeBase + Wiggle); - // size_t MaxSize = 16 * (7 * 1024); // 8 * 7 * 1024;// MinSizeBase * 6; - // size_t AvgSize = MaxSize / 2; // 4 * 7 * 1024;// MinSizeBase * 3; - size_t MinSize = (size_t)(MinSizeBase + Wiggle); - //for (size_t MaxSize = (MinSize * 4) - 768; MaxSize < (MinSize * 5) + 768; MaxSize += 64) - size_t MaxSize = 8u * MinSizeBase; - { - for (size_t AvgSize = (MaxSize - MinSize) / 32 + MinSize; AvgSize < (MaxSize - MinSize) / 4 + MinSize; AvgSize += (MaxSize - MinSize) / 32) -// size_t AvgSize = (MaxSize - MinSize) / 4 + MinSize; - { - WorkLatch.AddCount(1); - WorkerPool.ScheduleWork([&WorkLatch, MinSize, MaxSize, AvgSize, SourcePath1, SourcePath2]() - { - auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); }); - ChunkedParams Params{ .UseThreshold = true, .MinSize = MinSize, .MaxSize = MaxSize, .AvgSize = AvgSize }; - BasicFile SourceData1; - SourceData1.Open(SourcePath1, BasicFile::Mode::kRead); - BasicFile SourceData2; - SourceData2.Open(SourcePath2, BasicFile::Mode::kRead); - ChunkedInfoWithSource Info1 = ChunkData(SourceData1, Params); - ChunkedInfoWithSource Info2 = ChunkData(SourceData2, Params); - - tsl::robin_set<IoHash> Chunks1; - Chunks1.reserve(Info1.Info.ChunkHashes.size()); - Chunks1.insert(Info1.Info.ChunkHashes.begin(), Info1.Info.ChunkHashes.end()); - size_t ChunkedSize1 = 0; - for (uint32_t Chunk1Index = 0; Chunk1Index < Info1.Info.ChunkHashes.size(); ++Chunk1Index) - { - ChunkedSize1 += Info1.ChunkSources[Chunk1Index].Size; - } - size_t DiffSavedSize = 0; - size_t ChunkedSize2 = 0; - for (uint32_t Chunk2Index = 0; Chunk2Index < Info2.Info.ChunkHashes.size(); ++Chunk2Index) - { - ChunkedSize2 += Info2.ChunkSources[Chunk2Index].Size; - if (Chunks1.find(Info2.Info.ChunkHashes[Chunk2Index]) == Chunks1.end()) - { - DiffSavedSize += Info2.ChunkSources[Chunk2Index].Size; - } - } - ZEN_INFO("Diff {}, Chunks1: {}, Chunks2: {}, Min: {}, Max: {}, Avg: {}, Saved(1) {}, Saved(2) {}", - NiceBytes(DiffSavedSize), - Info1.Info.ChunkHashes.size(), - Info2.Info.ChunkHashes.size(), - MinSize, - MaxSize, - AvgSize, - NiceBytes(Info1.Info.RawSize - ChunkedSize1), - NiceBytes(Info2.Info.RawSize - ChunkedSize2)); - }); - } - } - } - } -# endif // 0 - - // WorkLatch.CountDown(); - // WorkLatch.Wait(); -} -# endif // 0 - -void -chunkedfile_forcelink() -{ -} - -} // namespace zen - -#endif diff --git a/src/zenutil/chunking.cpp b/src/zenutil/chunking.cpp deleted file mode 100644 index 71f0a06e4..000000000 --- a/src/zenutil/chunking.cpp +++ /dev/null @@ -1,383 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#include "chunking.h" - -#include <gsl/gsl-lite.hpp> - -#include <cmath> -#include <cstring> - -namespace zen::detail { - -static const uint32_t BuzhashTable[] = { - 0x458be752, 0xc10748cc, 0xfbbcdbb8, 0x6ded5b68, 0xb10a82b5, 0x20d75648, 0xdfc5665f, 0xa8428801, 0x7ebf5191, 0x841135c7, 0x65cc53b3, - 0x280a597c, 0x16f60255, 0xc78cbc3e, 0x294415f5, 0xb938d494, 0xec85c4e6, 0xb7d33edc, 0xe549b544, 0xfdeda5aa, 0x882bf287, 0x3116737c, - 0x05569956, 0xe8cc1f68, 0x0806ac5e, 0x22a14443, 0x15297e10, 0x50d090e7, 0x4ba60f6f, 0xefd9f1a7, 0x5c5c885c, 0x82482f93, 0x9bfd7c64, - 0x0b3e7276, 0xf2688e77, 0x8fad8abc, 0xb0509568, 0xf1ada29f, 0xa53efdfe, 0xcb2b1d00, 0xf2a9e986, 0x6463432b, 0x95094051, 0x5a223ad2, - 0x9be8401b, 0x61e579cb, 0x1a556a14, 0x5840fdc2, 0x9261ddf6, 0xcde002bb, 0x52432bb0, 0xbf17373e, 0x7b7c222f, 0x2955ed16, 0x9f10ca59, - 0xe840c4c9, 0xccabd806, 0x14543f34, 0x1462417a, 0x0d4a1f9c, 0x087ed925, 0xd7f8f24c, 0x7338c425, 0xcf86c8f5, 0xb19165cd, 0x9891c393, - 0x325384ac, 0x0308459d, 0x86141d7e, 0xc922116a, 0xe2ffa6b6, 0x53f52aed, 0x2cd86197, 0xf5b9f498, 0xbf319c8f, 0xe0411fae, 0x977eb18c, - 0xd8770976, 0x9833466a, 0xc674df7f, 0x8c297d45, 0x8ca48d26, 0xc49ed8e2, 0x7344f874, 0x556f79c7, 0x6b25eaed, 0xa03e2b42, 0xf68f66a4, - 0x8e8b09a2, 0xf2e0e62a, 0x0d3a9806, 0x9729e493, 0x8c72b0fc, 0x160b94f6, 0x450e4d3d, 0x7a320e85, 0xbef8f0e1, 0x21d73653, 0x4e3d977a, - 0x1e7b3929, 0x1cc6c719, 0xbe478d53, 0x8d752809, 0xe6d8c2c6, 0x275f0892, 0xc8acc273, 0x4cc21580, 0xecc4a617, 0xf5f7be70, 0xe795248a, - 0x375a2fe9, 0x425570b6, 0x8898dcf8, 0xdc2d97c4, 0x0106114b, 0x364dc22f, 0x1e0cad1f, 0xbe63803c, 0x5f69fac2, 0x4d5afa6f, 0x1bc0dfb5, - 0xfb273589, 0x0ea47f7b, 0x3c1c2b50, 0x21b2a932, 0x6b1223fd, 0x2fe706a8, 0xf9bd6ce2, 0xa268e64e, 0xe987f486, 0x3eacf563, 0x1ca2018c, - 0x65e18228, 0x2207360a, 0x57cf1715, 0x34c37d2b, 0x1f8f3cde, 0x93b657cf, 0x31a019fd, 0xe69eb729, 0x8bca7b9b, 0x4c9d5bed, 0x277ebeaf, - 0xe0d8f8ae, 0xd150821c, 0x31381871, 0xafc3f1b0, 0x927db328, 0xe95effac, 0x305a47bd, 0x426ba35b, 0x1233af3f, 0x686a5b83, 0x50e072e5, - 0xd9d3bb2a, 0x8befc475, 0x487f0de6, 0xc88dff89, 0xbd664d5e, 0x971b5d18, 0x63b14847, 0xd7d3c1ce, 0x7f583cf3, 0x72cbcb09, 0xc0d0a81c, - 0x7fa3429b, 0xe9158a1b, 0x225ea19a, 0xd8ca9ea3, 0xc763b282, 0xbb0c6341, 0x020b8293, 0xd4cd299d, 0x58cfa7f8, 0x91b4ee53, 0x37e4d140, - 0x95ec764c, 0x30f76b06, 0x5ee68d24, 0x679c8661, 0xa41979c2, 0xf2b61284, 0x4fac1475, 0x0adb49f9, 0x19727a23, 0x15a7e374, 0xc43a18d5, - 0x3fb1aa73, 0x342fc615, 0x924c0793, 0xbee2d7f0, 0x8a279de9, 0x4aa2d70c, 0xe24dd37f, 0xbe862c0b, 0x177c22c2, 0x5388e5ee, 0xcd8a7510, - 0xf901b4fd, 0xdbc13dbc, 0x6c0bae5b, 0x64efe8c7, 0x48b02079, 0x80331a49, 0xca3d8ae6, 0xf3546190, 0xfed7108b, 0xc49b941b, 0x32baf4a9, - 0xeb833a4a, 0x88a3f1a5, 0x3a91ce0a, 0x3cc27da1, 0x7112e684, 0x4a3096b1, 0x3794574c, 0xa3c8b6f3, 0x1d213941, 0x6e0a2e00, 0x233479f1, - 0x0f4cd82f, 0x6093edd2, 0x5d7d209e, 0x464fe319, 0xd4dcac9e, 0x0db845cb, 0xfb5e4bc3, 0xe0256ce1, 0x09fb4ed1, 0x0914be1e, 0xa5bdb2c3, - 0xc6eb57bb, 0x30320350, 0x3f397e91, 0xa67791bc, 0x86bc0e2c, 0xefa0a7e2, 0xe9ff7543, 0xe733612c, 0xd185897b, 0x329e5388, 0x91dd236b, - 0x2ecb0d93, 0xf4d82a3d, 0x35b5c03f, 0xe4e606f0, 0x05b21843, 0x37b45964, 0x5eff22f4, 0x6027f4cc, 0x77178b3c, 0xae507131, 0x7bf7cabc, - 0xf9c18d66, 0x593ade65, 0xd95ddf11, -}; - -// ROL operation (compiler turns this into a ROL when optimizing) -ZEN_FORCEINLINE static uint32_t -Rotate32(uint32_t Value, size_t RotateCount) -{ - RotateCount &= 31; - - return ((Value) << (RotateCount)) | ((Value) >> (32 - RotateCount)); -} - -} // namespace zen::detail - -namespace zen { - -void -ZenChunkHelper::Reset() -{ - InternalReset(); - - m_BytesScanned = 0; -} - -void -ZenChunkHelper::InternalReset() -{ - m_CurrentHash = 0; - m_CurrentChunkSize = 0; - m_WindowSize = 0; -} - -void -ZenChunkHelper::SetChunkSize(size_t MinSize, size_t MaxSize, size_t AvgSize) -{ - if (m_WindowSize) - return; // Already started - - static_assert(kChunkSizeLimitMin > kWindowSize); - - if (AvgSize) - { - // TODO: Validate AvgSize range - } - else - { - if (MinSize && MaxSize) - { - AvgSize = std::lrint(std::pow(2, (std::log2(MinSize) + std::log2(MaxSize)) / 2)); - } - else if (MinSize) - { - AvgSize = MinSize * 4; - } - else if (MaxSize) - { - AvgSize = MaxSize / 4; - } - else - { - AvgSize = kDefaultAverageChunkSize; - } - } - - if (MinSize) - { - // TODO: Validate MinSize range - } - else - { - MinSize = std::max(AvgSize / 4, kChunkSizeLimitMin); - } - - if (MaxSize) - { - // TODO: Validate MaxSize range - } - else - { - MaxSize = std::min(AvgSize * 4, kChunkSizeLimitMax); - } - - m_Discriminator = gsl::narrow<uint32_t>(AvgSize - MinSize); - - if (m_Discriminator < MinSize) - { - m_Discriminator = gsl::narrow<uint32_t>(MinSize); - } - - if (m_Discriminator > MaxSize) - { - m_Discriminator = gsl::narrow<uint32_t>(MaxSize); - } - - m_Threshold = gsl::narrow<uint32_t>((uint64_t(std::numeric_limits<uint32_t>::max()) + 1) / m_Discriminator); - - m_ChunkSizeMin = MinSize; - m_ChunkSizeMax = MaxSize; - m_ChunkSizeAvg = AvgSize; -} - -size_t -ZenChunkHelper::ScanChunk(const void* DataBytesIn, size_t ByteCount) -{ - size_t Result = InternalScanChunk(DataBytesIn, ByteCount); - - if (Result == kNoBoundaryFound) - { - m_BytesScanned += ByteCount; - } - else - { - m_BytesScanned += Result; - } - - return Result; -} - -size_t -ZenChunkHelper::InternalScanChunk(const void* DataBytesIn, size_t ByteCount) -{ - size_t CurrentOffset = 0; - const uint8_t* CursorPtr = reinterpret_cast<const uint8_t*>(DataBytesIn); - - // There's no point in updating the hash if we know we're not - // going to have a cut point, so just skip the data. This logic currently - // provides roughly a 20% speedup on my machine - - const size_t NeedHashOffset = m_ChunkSizeMin - kWindowSize; - - if (m_CurrentChunkSize < NeedHashOffset) - { - const uint32_t SkipBytes = gsl::narrow<uint32_t>(std::min<uint64_t>(ByteCount, NeedHashOffset - m_CurrentChunkSize)); - - ByteCount -= SkipBytes; - m_CurrentChunkSize += SkipBytes; - CurrentOffset += SkipBytes; - CursorPtr += SkipBytes; - - m_WindowSize = 0; - - if (ByteCount == 0) - { - return kNoBoundaryFound; - } - } - - // Fill window first - - if (m_WindowSize < kWindowSize) - { - const uint32_t FillBytes = uint32_t(std::min<size_t>(ByteCount, kWindowSize - m_WindowSize)); - - memcpy(&m_Window[m_WindowSize], CursorPtr, FillBytes); - - CursorPtr += FillBytes; - - m_WindowSize += FillBytes; - m_CurrentChunkSize += FillBytes; - - CurrentOffset += FillBytes; - ByteCount -= FillBytes; - - if (m_WindowSize < kWindowSize) - { - return kNoBoundaryFound; - } - - // We have a full window, initialize hash - - uint32_t CurrentHash = 0; - - for (int i = 1; i < kWindowSize; ++i) - { - CurrentHash ^= detail::Rotate32(detail::BuzhashTable[m_Window[i - 1]], kWindowSize - i); - } - - m_CurrentHash = CurrentHash ^ detail::BuzhashTable[m_Window[kWindowSize - 1]]; - } - - // Scan for boundaries (i.e points where the hash matches the value determined by - // the discriminator) - - uint32_t CurrentHash = m_CurrentHash; - uint32_t CurrentChunkSize = m_CurrentChunkSize; - - size_t Index = CurrentChunkSize % kWindowSize; - - if (m_Threshold && m_UseThreshold) - { - // This is roughly 4x faster than the general modulo approach on my - // TR 3990X (~940MB/sec) and doesn't require any special parameters to - // achieve max performance - - while (ByteCount) - { - const uint8_t NewByte = *CursorPtr; - const uint8_t OldByte = m_Window[Index]; - - CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^ - detail::BuzhashTable[NewByte]; - - CurrentChunkSize++; - CurrentOffset++; - - if (CurrentChunkSize >= m_ChunkSizeMin) - { - bool FoundBoundary; - - if (CurrentChunkSize >= m_ChunkSizeMax) - { - FoundBoundary = true; - } - else - { - FoundBoundary = CurrentHash <= m_Threshold; - } - - if (FoundBoundary) - { - // Boundary found! - InternalReset(); - - return CurrentOffset; - } - } - - m_Window[Index++] = *CursorPtr; - - if (Index == kWindowSize) - { - Index = 0; - } - - ++CursorPtr; - --ByteCount; - } - } - else if ((m_Discriminator & (m_Discriminator - 1)) == 0) - { - // This is quite a bit faster than the generic modulo path, but - // requires a very specific average chunk size to be used. If you - // pass in an even power-of-two divided by 0.75 as the average - // chunk size you'll hit this path - - const uint32_t Mask = m_Discriminator - 1; - - while (ByteCount) - { - const uint8_t NewByte = *CursorPtr; - const uint8_t OldByte = m_Window[Index]; - - CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^ - detail::BuzhashTable[NewByte]; - - CurrentChunkSize++; - CurrentOffset++; - - if (CurrentChunkSize >= m_ChunkSizeMin) - { - bool FoundBoundary; - - if (CurrentChunkSize >= m_ChunkSizeMax) - { - FoundBoundary = true; - } - else - { - FoundBoundary = (CurrentHash & Mask) == Mask; - } - - if (FoundBoundary) - { - // Boundary found! - InternalReset(); - - return CurrentOffset; - } - } - - m_Window[Index++] = *CursorPtr; - - if (Index == kWindowSize) - { - Index = 0; - } - - ++CursorPtr; - --ByteCount; - } - } - else - { - // This is the slowest path, which caps out around 250MB/sec for large sizes - // on my TR3900X - - while (ByteCount) - { - const uint8_t NewByte = *CursorPtr; - const uint8_t OldByte = m_Window[Index]; - - CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^ - detail::BuzhashTable[NewByte]; - - CurrentChunkSize++; - CurrentOffset++; - - if (CurrentChunkSize >= m_ChunkSizeMin) - { - bool FoundBoundary; - - if (CurrentChunkSize >= m_ChunkSizeMax) - { - FoundBoundary = true; - } - else - { - FoundBoundary = (CurrentHash % m_Discriminator) == (m_Discriminator - 1); - } - - if (FoundBoundary) - { - // Boundary found! - InternalReset(); - - return CurrentOffset; - } - } - - m_Window[Index++] = *CursorPtr; - - if (Index == kWindowSize) - { - Index = 0; - } - - ++CursorPtr; - --ByteCount; - } - } - - m_CurrentChunkSize = CurrentChunkSize; - m_CurrentHash = CurrentHash; - - return kNoBoundaryFound; -} - -} // namespace zen diff --git a/src/zenutil/chunking.h b/src/zenutil/chunking.h deleted file mode 100644 index 09c56454f..000000000 --- a/src/zenutil/chunking.h +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#pragma once -#include <zencore/zencore.h> - -namespace zen { - -/** Content-defined chunking helper - */ -class ZenChunkHelper -{ -public: - void SetChunkSize(size_t MinSize, size_t MaxSize, size_t AvgSize); - size_t ScanChunk(const void* DataBytes, size_t ByteCount); - void Reset(); - - // This controls which chunking approach is used - threshold or - // modulo based. Threshold is faster and generates similarly sized - // chunks - void SetUseThreshold(bool NewState) { m_UseThreshold = NewState; } - - inline size_t ChunkSizeMin() const { return m_ChunkSizeMin; } - inline size_t ChunkSizeMax() const { return m_ChunkSizeMax; } - inline size_t ChunkSizeAvg() const { return m_ChunkSizeAvg; } - inline uint64_t BytesScanned() const { return m_BytesScanned; } - - static constexpr size_t kNoBoundaryFound = size_t(~0ull); - -private: - size_t m_ChunkSizeMin = 0; - size_t m_ChunkSizeMax = 0; - size_t m_ChunkSizeAvg = 0; - - uint32_t m_Discriminator = 0; // Computed in SetChunkSize() - uint32_t m_Threshold = 0; // Computed in SetChunkSize() - - bool m_UseThreshold = true; - - static constexpr size_t kChunkSizeLimitMax = 64 * 1024 * 1024; - static constexpr size_t kChunkSizeLimitMin = 1024; - static constexpr size_t kDefaultAverageChunkSize = 64 * 1024; - - static constexpr int kWindowSize = 48; - uint8_t m_Window[kWindowSize]; - uint32_t m_WindowSize = 0; - - uint32_t m_CurrentHash = 0; - uint32_t m_CurrentChunkSize = 0; - - uint64_t m_BytesScanned = 0; - - size_t InternalScanChunk(const void* DataBytes, size_t ByteCount); - void InternalReset(); -}; - -} // namespace zen diff --git a/src/zenutil/chunkingcontroller.cpp b/src/zenutil/chunkingcontroller.cpp deleted file mode 100644 index 6fb4182c0..000000000 --- a/src/zenutil/chunkingcontroller.cpp +++ /dev/null @@ -1,359 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#include <zenutil/chunkingcontroller.h> - -#include <zencore/basicfile.h> -#include <zencore/compactbinarybuilder.h> -#include <zencore/filesystem.h> -#include <zencore/trace.h> - -ZEN_THIRD_PARTY_INCLUDES_START -#include <tsl/robin_map.h> -ZEN_THIRD_PARTY_INCLUDES_END - -namespace zen { -using namespace std::literals; - -namespace { - std::vector<std::string> ReadStringArray(CbArrayView StringArray) - { - std::vector<std::string> Result; - Result.reserve(StringArray.Num()); - for (CbFieldView FieldView : StringArray) - { - Result.emplace_back(FieldView.AsString()); - } - return Result; - } - - ChunkedParams ReadChunkParams(CbObjectView Params) - { - bool UseThreshold = Params["UseThreshold"sv].AsBool(true); - size_t MinSize = Params["MinSize"sv].AsUInt64(DefaultChunkedParams.MinSize); - size_t MaxSize = Params["MaxSize"sv].AsUInt64(DefaultChunkedParams.MaxSize); - size_t AvgSize = Params["AvgSize"sv].AsUInt64(DefaultChunkedParams.AvgSize); - - return ChunkedParams{.UseThreshold = UseThreshold, .MinSize = MinSize, .MaxSize = MaxSize, .AvgSize = AvgSize}; - } - - void WriteChunkParams(CbObjectWriter& Writer, const ChunkedParams& Params) - { - Writer.BeginObject("ChunkingParams"sv); - { - Writer.AddBool("UseThreshold"sv, Params.UseThreshold); - - Writer.AddInteger("MinSize"sv, (uint64_t)Params.MinSize); - Writer.AddInteger("MaxSize"sv, (uint64_t)Params.MaxSize); - Writer.AddInteger("AvgSize"sv, (uint64_t)Params.AvgSize); - } - Writer.EndObject(); // ChunkingParams - } - - bool IsElfFile(BasicFile& Buffer) - { - if (Buffer.FileSize() > 4) - { - uint32_t ElfCheck = 0; - Buffer.Read(&ElfCheck, 4, 0); - if (ElfCheck == 0x464c457f) - { - return true; - } - } - return false; - } - - bool IsMachOFile(BasicFile& Buffer) - { - if (Buffer.FileSize() > 4) - { - uint32_t MachOCheck = 0; - Buffer.Read(&MachOCheck, 4, 0); - if ((MachOCheck == 0xfeedface) || (MachOCheck == 0xcefaedfe)) - { - return true; - } - } - return false; - } -} // namespace - -class BasicChunkingController : public ChunkingController -{ -public: - BasicChunkingController(const BasicChunkingControllerSettings& Settings) : m_Settings(Settings) {} - - BasicChunkingController(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {} - - virtual bool ProcessFile(const std::filesystem::path& InputPath, - uint64_t RawSize, - ChunkedInfoWithSource& OutChunked, - std::atomic<uint64_t>& BytesProcessed, - std::atomic<bool>& AbortFlag) const override - { - ZEN_TRACE_CPU("BasicChunkingController::ProcessFile"); - const bool ExcludeFromChunking = - std::find(m_Settings.ExcludeExtensions.begin(), m_Settings.ExcludeExtensions.end(), InputPath.extension()) != - m_Settings.ExcludeExtensions.end(); - - if (ExcludeFromChunking || (RawSize < m_Settings.ChunkFileSizeLimit)) - { - return false; - } - - BasicFile Buffer(InputPath, BasicFile::Mode::kRead); - if (m_Settings.ExcludeElfFiles && IsElfFile(Buffer)) - { - return false; - } - if (m_Settings.ExcludeMachOFiles && IsMachOFile(Buffer)) - { - return false; - } - - OutChunked = ChunkData(Buffer, 0, RawSize, m_Settings.ChunkingParams, &BytesProcessed, &AbortFlag); - return true; - } - - virtual std::string_view GetName() const override { return Name; } - - virtual CbObject GetParameters() const override - { - CbObjectWriter Writer; - Writer.BeginArray("ChunkExcludeExtensions"sv); - { - for (const std::string& Extension : m_Settings.ExcludeExtensions) - { - Writer.AddString(Extension); - } - } - Writer.EndArray(); // ChunkExcludeExtensions - - Writer.AddBool("ExcludeElfFiles"sv, m_Settings.ExcludeElfFiles); - Writer.AddBool("ExcludeMachOFiles"sv, m_Settings.ExcludeMachOFiles); - Writer.AddInteger("ChunkFileSizeLimit"sv, m_Settings.ChunkFileSizeLimit); - - WriteChunkParams(Writer, m_Settings.ChunkingParams); - - return Writer.Save(); - } - static constexpr std::string_view Name = "BasicChunkingController"sv; - -private: - static BasicChunkingControllerSettings ReadSettings(CbObjectView Parameters) - { - return BasicChunkingControllerSettings{ - .ExcludeExtensions = ReadStringArray(Parameters["ChunkExcludeExtensions"sv].AsArrayView()), - .ExcludeElfFiles = Parameters["ExcludeElfFiles"sv].AsBool(DefaultChunkingExcludeElfFiles), - .ExcludeMachOFiles = Parameters["ExcludeMachOFiles"sv].AsBool(DefaultChunkingExcludeMachOFiles), - .ChunkFileSizeLimit = Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit), - .ChunkingParams = ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView())}; - } - - const BasicChunkingControllerSettings m_Settings; -}; - -class ChunkingControllerWithFixedChunking : public ChunkingController -{ -public: - ChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Settings) : m_Settings(Settings) {} - - ChunkingControllerWithFixedChunking(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {} - - virtual bool ProcessFile(const std::filesystem::path& InputPath, - uint64_t RawSize, - ChunkedInfoWithSource& OutChunked, - std::atomic<uint64_t>& BytesProcessed, - std::atomic<bool>& AbortFlag) const override - { - ZEN_TRACE_CPU("ChunkingControllerWithFixedChunking::ProcessFile"); - const bool ExcludeFromChunking = - std::find(m_Settings.ExcludeExtensions.begin(), m_Settings.ExcludeExtensions.end(), InputPath.extension()) != - m_Settings.ExcludeExtensions.end(); - - if (ExcludeFromChunking || (RawSize < m_Settings.ChunkFileSizeLimit)) - { - return false; - } - - const bool FixedChunkingExtension = - std::find(m_Settings.FixedChunkingExtensions.begin(), m_Settings.FixedChunkingExtensions.end(), InputPath.extension()) != - m_Settings.FixedChunkingExtensions.end(); - - if (FixedChunkingExtension) - { - if (RawSize < m_Settings.MinSizeForFixedChunking) - { - return false; - } - ZEN_TRACE_CPU("FixedChunking"); - IoHashStream FullHasher; - BasicFile Source(InputPath, BasicFile::Mode::kRead); - uint64_t Offset = 0; - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex; - const uint64_t ExpectedChunkCount = 1 + (RawSize / m_Settings.FixedChunkingChunkSize); - ChunkHashToChunkIndex.reserve(ExpectedChunkCount); - OutChunked.Info.ChunkHashes.reserve(ExpectedChunkCount); - OutChunked.Info.ChunkSequence.reserve(ExpectedChunkCount); - OutChunked.ChunkSources.reserve(ExpectedChunkCount); - - static const uint64_t BufferingSize = 256u * 1024u; - - IoHashStream ChunkHasher; - - while (Offset < RawSize) - { - if (AbortFlag) - { - return false; - } - - ChunkHasher.Reset(); - - uint64_t ChunkSize = std::min<uint64_t>(RawSize - Offset, m_Settings.FixedChunkingChunkSize); - if (ChunkSize >= (BufferingSize + BufferingSize / 2)) - { - ScanFile(Source.Handle(), - Offset, - ChunkSize, - BufferingSize, - [&FullHasher, &ChunkHasher, &BytesProcessed](const void* Data, size_t Size) { - FullHasher.Append(Data, Size); - ChunkHasher.Append(Data, Size); - BytesProcessed.fetch_add(Size); - }); - } - else - { - IoBuffer ChunkData = Source.ReadRange(Offset, ChunkSize); - FullHasher.Append(ChunkData); - ChunkHasher.Append(ChunkData); - BytesProcessed.fetch_add(ChunkSize); - } - - const IoHash ChunkHash = ChunkHasher.GetHash(); - if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end()) - { - OutChunked.Info.ChunkSequence.push_back(It->second); - } - else - { - uint32_t ChunkIndex = gsl::narrow<uint32_t>(OutChunked.Info.ChunkHashes.size()); - OutChunked.Info.ChunkHashes.push_back(ChunkHash); - OutChunked.Info.ChunkSequence.push_back(ChunkIndex); - OutChunked.ChunkSources.push_back({.Offset = Offset, .Size = gsl::narrow<uint32_t>(ChunkSize)}); - } - Offset += ChunkSize; - } - OutChunked.Info.RawSize = RawSize; - OutChunked.Info.RawHash = FullHasher.GetHash(); - return true; - } - else - { - BasicFile Buffer(InputPath, BasicFile::Mode::kRead); - if (m_Settings.ExcludeElfFiles && IsElfFile(Buffer)) - { - return false; - } - if (m_Settings.ExcludeMachOFiles && IsMachOFile(Buffer)) - { - return false; - } - - OutChunked = ChunkData(Buffer, 0, RawSize, m_Settings.ChunkingParams, &BytesProcessed, &AbortFlag); - return true; - } - } - - virtual std::string_view GetName() const override { return Name; } - - virtual CbObject GetParameters() const override - { - CbObjectWriter Writer; - Writer.BeginArray("FixedChunkingExtensions"); - { - for (const std::string& Extension : m_Settings.FixedChunkingExtensions) - { - Writer.AddString(Extension); - } - } - Writer.EndArray(); // ChunkExcludeExtensions - - Writer.BeginArray("ChunkExcludeExtensions"sv); - { - for (const std::string& Extension : m_Settings.ExcludeExtensions) - { - Writer.AddString(Extension); - } - } - Writer.EndArray(); // ChunkExcludeExtensions - - Writer.AddBool("ExcludeElfFiles"sv, m_Settings.ExcludeElfFiles); - Writer.AddBool("ExcludeMachOFiles"sv, m_Settings.ExcludeMachOFiles); - - Writer.AddInteger("ChunkFileSizeLimit"sv, m_Settings.ChunkFileSizeLimit); - - WriteChunkParams(Writer, m_Settings.ChunkingParams); - - Writer.AddInteger("FixedChunkingChunkSize"sv, m_Settings.FixedChunkingChunkSize); - Writer.AddInteger("MinSizeForFixedChunking"sv, m_Settings.MinSizeForFixedChunking); - return Writer.Save(); - } - - static constexpr std::string_view Name = "ChunkingControllerWithFixedChunking"sv; - -private: - static ChunkingControllerWithFixedChunkingSettings ReadSettings(CbObjectView Parameters) - { - return ChunkingControllerWithFixedChunkingSettings{ - .FixedChunkingExtensions = ReadStringArray(Parameters["FixedChunkingExtensions"sv].AsArrayView()), - .ExcludeExtensions = ReadStringArray(Parameters["ChunkExcludeExtensions"sv].AsArrayView()), - .ExcludeElfFiles = Parameters["ExcludeElfFiles"sv].AsBool(DefaultChunkingExcludeElfFiles), - .ExcludeMachOFiles = Parameters["ExcludeMachOFiles"sv].AsBool(DefaultChunkingExcludeMachOFiles), - .ChunkFileSizeLimit = Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit), - .ChunkingParams = ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView()), - .FixedChunkingChunkSize = Parameters["FixedChunkingChunkSize"sv].AsUInt64(DefaultFixedChunkingChunkSize), - .MinSizeForFixedChunking = Parameters["MinSizeForFixedChunking"sv].AsUInt64(DefaultFixedChunkingChunkSize)}; - } - - const ChunkingControllerWithFixedChunkingSettings m_Settings; -}; - -std::unique_ptr<ChunkingController> -CreateBasicChunkingController(const BasicChunkingControllerSettings& Settings) -{ - return std::make_unique<BasicChunkingController>(Settings); -} -std::unique_ptr<ChunkingController> -CreateBasicChunkingController(CbObjectView Parameters) -{ - return std::make_unique<BasicChunkingController>(Parameters); -} - -std::unique_ptr<ChunkingController> -CreateChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Setting) -{ - return std::make_unique<ChunkingControllerWithFixedChunking>(Setting); -} -std::unique_ptr<ChunkingController> -CreateChunkingControllerWithFixedChunking(CbObjectView Parameters) -{ - return std::make_unique<ChunkingControllerWithFixedChunking>(Parameters); -} - -std::unique_ptr<ChunkingController> -CreateChunkingController(std::string_view Name, CbObjectView Parameters) -{ - if (Name == BasicChunkingController::Name) - { - return CreateBasicChunkingController(Parameters); - } - else if (Name == ChunkingControllerWithFixedChunking::Name) - { - return CreateChunkingControllerWithFixedChunking(Parameters); - } - return {}; -} - -} // namespace zen diff --git a/src/zenutil/include/zenutil/chunkblock.h b/src/zenutil/include/zenutil/chunkblock.h deleted file mode 100644 index 277580c74..000000000 --- a/src/zenutil/include/zenutil/chunkblock.h +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#pragma once - -#include <zencore/iohash.h> - -#include <zencore/compactbinary.h> -#include <zencore/compress.h> - -#include <optional> -#include <vector> - -namespace zen { - -struct ThinChunkBlockDescription -{ - IoHash BlockHash; - std::vector<IoHash> ChunkRawHashes; -}; - -struct ChunkBlockDescription : public ThinChunkBlockDescription -{ - uint64_t HeaderSize; - std::vector<uint32_t> ChunkRawLengths; - std::vector<uint32_t> ChunkCompressedLengths; -}; - -std::vector<ChunkBlockDescription> ParseChunkBlockDescriptionList(const CbObjectView& BlocksObject); -ChunkBlockDescription ParseChunkBlockDescription(const CbObjectView& BlockObject); -CbObject BuildChunkBlockDescription(const ChunkBlockDescription& Block, CbObjectView MetaData); -ChunkBlockDescription GetChunkBlockDescription(const SharedBuffer& BlockPayload, const IoHash& RawHash); -typedef std::function<std::pair<uint64_t, CompressedBuffer>(const IoHash& RawHash)> FetchChunkFunc; - -CompressedBuffer GenerateChunkBlock(std::vector<std::pair<IoHash, FetchChunkFunc>>&& FetchChunks, ChunkBlockDescription& OutBlock); -bool IterateChunkBlock(const SharedBuffer& BlockPayload, - std::function<void(CompressedBuffer&& Chunk, const IoHash& AttachmentHash)> Visitor, - uint64_t& OutHeaderSize); -std::vector<uint32_t> ReadChunkBlockHeader(const MemoryView BlockView, uint64_t& OutHeaderSize); - -} // namespace zen diff --git a/src/zenutil/include/zenutil/chunkedcontent.h b/src/zenutil/include/zenutil/chunkedcontent.h deleted file mode 100644 index 306a5d990..000000000 --- a/src/zenutil/include/zenutil/chunkedcontent.h +++ /dev/null @@ -1,288 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#pragma once - -#include <zencore/compactbinary.h> -#include <zencore/compactbinarybuilder.h> -#include <zencore/iohash.h> - -#include <filesystem> -#include <vector> - -ZEN_THIRD_PARTY_INCLUDES_START -#include <tsl/robin_map.h> -ZEN_THIRD_PARTY_INCLUDES_END - -namespace zen { - -class CbWriter; -class ChunkingController; -class WorkerThreadPool; - -enum class SourcePlatform -{ - Windows = 0, - Linux = 1, - MacOS = 2, - _Count -}; - -std::string_view ToString(SourcePlatform Platform); -SourcePlatform FromString(std::string_view Platform, SourcePlatform Default); -SourcePlatform GetSourceCurrentPlatform(); - -struct FolderContent -{ - SourcePlatform Platform = GetSourceCurrentPlatform(); - std::vector<std::filesystem::path> Paths; - std::vector<uint64_t> RawSizes; - std::vector<uint32_t> Attributes; - std::vector<uint64_t> ModificationTicks; - - bool operator==(const FolderContent& Rhs) const; - - bool AreKnownFilesEqual(const FolderContent& Rhs) const; - void UpdateState(const FolderContent& Rhs, std::vector<uint32_t>& PathIndexesOufOfDate); - static bool AreFileAttributesEqual(const uint32_t Lhs, const uint32_t Rhs); -}; - -FolderContent GetUpdatedContent(const FolderContent& Old, - const FolderContent& New, - std::vector<std::filesystem::path>& OutDeletedPathIndexes); - -void SaveFolderContentToCompactBinary(const FolderContent& Content, CbWriter& Output); -FolderContent LoadFolderContentToCompactBinary(CbObjectView Input); - -struct GetFolderContentStatistics -{ - std::atomic<uint64_t> FoundFileCount = 0; - std::atomic<uint64_t> FoundFileByteCount = 0; - std::atomic<uint64_t> AcceptedFileCount = 0; - std::atomic<uint64_t> AcceptedFileByteCount = 0; - uint64_t ElapsedWallTimeUS = 0; -}; - -FolderContent GetFolderContent(GetFolderContentStatistics& Stats, - const std::filesystem::path& RootPath, - std::function<bool(const std::string_view& RelativePath)>&& AcceptDirectory, - std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& AcceptFile, - WorkerThreadPool& WorkerPool, - int32_t UpdateIntervalMS, - std::function<void(bool IsAborted, std::ptrdiff_t PendingWork)>&& UpdateCallback, - std::atomic<bool>& AbortFlag); - -struct ChunkedContentData -{ - // To describe one asset with a particular RawHash, find the index of the hash in SequenceRawHashes - // ChunkCounts for that index will be the number of indexes in ChunkOrders that describe - // the sequence of chunks required to reconstruct the asset. - // Offset into ChunkOrders is based on how many entries in ChunkOrders the previous [n - 1] SequenceRawHashes uses - std::vector<IoHash> SequenceRawHashes; // Raw hash for Chunk sequence - std::vector<uint32_t> ChunkCounts; // Chunk count of ChunkOrder for SequenceRawHashes[n] - std::vector<uint32_t> ChunkOrders; // Chunk sequence indexed into ChunkHashes, ChunkCounts[n] indexes per SequenceRawHashes[n] - std::vector<IoHash> ChunkHashes; // Unique chunk hashes - std::vector<uint64_t> ChunkRawSizes; // Unique chunk raw size for ChunkHash[n] -}; - -struct ChunkedFolderContent -{ - SourcePlatform Platform = GetSourceCurrentPlatform(); - std::vector<std::filesystem::path> Paths; - std::vector<uint64_t> RawSizes; - std::vector<uint32_t> Attributes; - std::vector<IoHash> RawHashes; - ChunkedContentData ChunkedContent; -}; - -struct ChunkedContentLookup -{ - struct ChunkSequenceLocation - { - uint32_t SequenceIndex = (uint32_t)-1; - uint64_t Offset = (uint64_t)-1; - }; - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex; - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceIndex; - std::vector<uint32_t> SequenceIndexChunkOrderOffset; - std::vector<ChunkSequenceLocation> ChunkSequenceLocations; - std::vector<size_t> - ChunkSequenceLocationOffset; // ChunkSequenceLocations[ChunkLocationOffset[ChunkIndex]] -> start of sources for ChunkIndex - std::vector<uint32_t> ChunkSequenceLocationCounts; // ChunkSequenceLocationCounts[ChunkIndex] count of chunk locations for ChunkIndex - std::vector<uint32_t> SequenceIndexFirstPathIndex; // SequenceIndexFirstPathIndex[SequenceIndex] -> first path index with that RawHash - std::vector<uint32_t> PathExtensionHash; -}; - -void SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Content, CbWriter& Output); -ChunkedFolderContent LoadChunkedFolderContentToCompactBinary(CbObjectView Input); - -ChunkedFolderContent MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span<const ChunkedFolderContent> Overlays); -ChunkedFolderContent DeletePathsFromChunkedContent(const ChunkedFolderContent& Base, - const ChunkedContentLookup& BaseContentLookup, - std::span<const std::filesystem::path> DeletedPaths); -ChunkedFolderContent DeletePathsFromChunkedContent(const ChunkedFolderContent& Base, std::span<const std::filesystem::path> DeletedPaths); - -struct ChunkingStatistics -{ - std::atomic<uint64_t> FilesProcessed = 0; - std::atomic<uint64_t> FilesChunked = 0; - std::atomic<uint64_t> BytesHashed = 0; - std::atomic<uint64_t> UniqueChunksFound = 0; - std::atomic<uint64_t> UniqueSequencesFound = 0; - std::atomic<uint64_t> UniqueBytesFound = 0; - uint64_t ElapsedWallTimeUS = 0; -}; - -ChunkedFolderContent ChunkFolderContent(ChunkingStatistics& Stats, - WorkerThreadPool& WorkerPool, - const std::filesystem::path& RootPath, - const FolderContent& Content, - const ChunkingController& InChunkingController, - int32_t UpdateIntervalMS, - std::function<void(bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork)>&& UpdateCallback, - std::atomic<bool>& AbortFlag, - std::atomic<bool>& PauseFlag); - -ChunkedContentLookup BuildChunkedContentLookup(const ChunkedFolderContent& Content); - -inline std::pair<size_t, uint32_t> -GetChunkSequenceLocationRange(const ChunkedContentLookup& Lookup, uint32_t ChunkIndex) -{ - return std::make_pair(Lookup.ChunkSequenceLocationOffset[ChunkIndex], Lookup.ChunkSequenceLocationCounts[ChunkIndex]); -} - -inline std::span<const ChunkedContentLookup::ChunkSequenceLocation> -GetChunkSequenceLocations(const ChunkedContentLookup& Lookup, uint32_t ChunkIndex) -{ - std::pair<size_t, uint32_t> Range = GetChunkSequenceLocationRange(Lookup, ChunkIndex); - return std::span<const ChunkedContentLookup::ChunkSequenceLocation>(Lookup.ChunkSequenceLocations).subspan(Range.first, Range.second); -} - -inline uint32_t -GetSequenceIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& RawHash) -{ - return Lookup.RawHashToSequenceIndex.at(RawHash); -} - -inline uint32_t -GetChunkIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& RawHash) -{ - return Lookup.RawHashToSequenceIndex.at(RawHash); -} - -inline uint32_t -GetFirstPathIndexForSeqeuenceIndex(const ChunkedContentLookup& Lookup, const uint32_t SequenceIndex) -{ - return Lookup.SequenceIndexFirstPathIndex[SequenceIndex]; -} - -inline uint32_t -GetFirstPathIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& RawHash) -{ - const uint32_t SequenceIndex = GetSequenceIndexForRawHash(Lookup, RawHash); - return GetFirstPathIndexForSeqeuenceIndex(Lookup, SequenceIndex); -} - -namespace compactbinary_helpers { - template<typename Type> - void WriteArray(std::span<const Type> Values, std::string_view ArrayName, CbWriter& Output) - { - Output.BeginArray(ArrayName); - for (const Type Value : Values) - { - Output << Value; - } - Output.EndArray(); - } - - template<typename Type> - void WriteArray(const std::vector<Type>& Values, std::string_view ArrayName, CbWriter& Output) - { - WriteArray(std::span<const Type>(Values), ArrayName, Output); - } - - template<> - inline void WriteArray(std::span<const std::filesystem::path> Values, std::string_view ArrayName, CbWriter& Output) - { - Output.BeginArray(ArrayName); - for (const std::filesystem::path& Path : Values) - { - Output.AddString((const char*)Path.generic_u8string().c_str()); - } - Output.EndArray(); - } - - template<> - inline void WriteArray(const std::vector<std::filesystem::path>& Values, std::string_view ArrayName, CbWriter& Output) - { - WriteArray(std::span<const std::filesystem::path>(Values), ArrayName, Output); - } - - inline void WriteBinaryAttachmentArray(std::span<const IoHash> Values, std::string_view ArrayName, CbWriter& Output) - { - Output.BeginArray(ArrayName); - for (const IoHash& Hash : Values) - { - Output.AddBinaryAttachment(Hash); - } - Output.EndArray(); - } - - inline void WriteBinaryAttachmentArray(const std::vector<IoHash>& Values, std::string_view ArrayName, CbWriter& Output) - { - WriteArray(std::span<const IoHash>(Values), ArrayName, Output); - } - - inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector<uint32_t>& Result) - { - CbArrayView Array = Input[ArrayName].AsArrayView(); - Result.reserve(Array.Num()); - for (CbFieldView ItemView : Array) - { - Result.push_back(ItemView.AsUInt32()); - } - } - - inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector<uint64_t>& Result) - { - CbArrayView Array = Input[ArrayName].AsArrayView(); - Result.reserve(Array.Num()); - for (CbFieldView ItemView : Array) - { - Result.push_back(ItemView.AsUInt64()); - } - } - - inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector<std::filesystem::path>& Result) - { - CbArrayView Array = Input[ArrayName].AsArrayView(); - Result.reserve(Array.Num()); - for (CbFieldView ItemView : Array) - { - std::u8string_view U8Path = ItemView.AsU8String(); - Result.push_back(std::filesystem::path(U8Path)); - } - } - - inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector<IoHash>& Result) - { - CbArrayView Array = Input[ArrayName].AsArrayView(); - Result.reserve(Array.Num()); - for (CbFieldView ItemView : Array) - { - Result.push_back(ItemView.AsHash()); - } - } - - inline void ReadBinaryAttachmentArray(std::string_view ArrayName, CbObjectView Input, std::vector<IoHash>& Result) - { - CbArrayView Array = Input[ArrayName].AsArrayView(); - Result.reserve(Array.Num()); - for (CbFieldView ItemView : Array) - { - Result.push_back(ItemView.AsBinaryAttachment()); - } - } - -} // namespace compactbinary_helpers - -} // namespace zen diff --git a/src/zenutil/include/zenutil/chunkedfile.h b/src/zenutil/include/zenutil/chunkedfile.h deleted file mode 100644 index 4cec80fdb..000000000 --- a/src/zenutil/include/zenutil/chunkedfile.h +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#pragma once - -#include <zencore/iobuffer.h> -#include <zencore/iohash.h> -#include <zencore/zencore.h> - -#include <functional> -#include <vector> - -namespace zen { - -class BasicFile; - -struct ChunkedInfo -{ - uint64_t RawSize = 0; - IoHash RawHash; - std::vector<uint32_t> ChunkSequence; - std::vector<IoHash> ChunkHashes; -}; - -struct ChunkSource -{ - uint64_t Offset; // 8 - uint32_t Size; // 4 -}; - -struct ChunkedInfoWithSource -{ - ChunkedInfo Info; - std::vector<ChunkSource> ChunkSources; -}; - -struct ChunkedParams -{ - bool UseThreshold = true; - size_t MinSize = (2u * 1024u) - 128u; - size_t MaxSize = (16u * 1024u); - size_t AvgSize = (3u * 1024u); -}; - -static const ChunkedParams UShaderByteCodeParams = {.UseThreshold = true, .MinSize = 17280, .MaxSize = 139264, .AvgSize = 36340}; - -ChunkedInfoWithSource ChunkData(BasicFile& RawData, - uint64_t Offset, - uint64_t Size, - ChunkedParams Params = {}, - std::atomic<uint64_t>* BytesProcessed = nullptr, - std::atomic<bool>* AbortFlag = nullptr); -void Reconstruct(const ChunkedInfo& Info, - const std::filesystem::path& TargetPath, - std::function<IoBuffer(const IoHash& ChunkHash)> GetChunk); -IoBuffer SerializeChunkedInfo(const ChunkedInfo& Info); -ChunkedInfo DeserializeChunkedInfo(IoBuffer& Buffer); - -void chunkedfile_forcelink(); -} // namespace zen diff --git a/src/zenutil/include/zenutil/chunkingcontroller.h b/src/zenutil/include/zenutil/chunkingcontroller.h deleted file mode 100644 index 315502265..000000000 --- a/src/zenutil/include/zenutil/chunkingcontroller.h +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#pragma once - -#include <zencore/compactbinary.h> - -#include <zenutil/chunkedfile.h> - -#include <atomic> -#include <filesystem> - -namespace zen { - -const std::vector<std::string> DefaultChunkingExcludeExtensions = - {".exe", ".dll", ".pdb", ".self", ".mp4", ".zip", ".7z", ".bzip", ".rar", ".gzip"}; -const std::vector<std::string> DefaultFixedChunkingExtensions = {".apk", ".nsp", ".xvc", ".pkg", ".dmg", ".ipa"}; -const bool DefaultChunkingExcludeElfFiles = true; -const bool DefaultChunkingExcludeMachOFiles = true; - -const ChunkedParams DefaultChunkedParams = {.MinSize = ((8u * 1u) * 1024u) - 128u, - .MaxSize = 128u * 1024u, - .AvgSize = ((8u * 4u) * 1024u) + 128u}; - -const size_t DefaultChunkingFileSizeLimit = DefaultChunkedParams.MaxSize; - -const uint64_t DefaultFixedChunkingChunkSize = 32u * 1024u * 1024u; -const uint64_t DefaultMinSizeForFixedChunking = DefaultFixedChunkingChunkSize * 8u; - -struct ChunkedInfoWithSource; - -class ChunkingController -{ -public: - virtual ~ChunkingController() {} - - // Return true if the input file was processed. If true is returned OutChunked will contain the chunked info - virtual bool ProcessFile(const std::filesystem::path& InputPath, - uint64_t RawSize, - ChunkedInfoWithSource& OutChunked, - std::atomic<uint64_t>& BytesProcessed, - std::atomic<bool>& AbortFlag) const = 0; - virtual std::string_view GetName() const = 0; - virtual CbObject GetParameters() const = 0; -}; - -struct BasicChunkingControllerSettings -{ - std::vector<std::string> ExcludeExtensions = DefaultChunkingExcludeExtensions; - bool ExcludeElfFiles = DefaultChunkingExcludeElfFiles; - bool ExcludeMachOFiles = DefaultChunkingExcludeMachOFiles; - uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit; - ChunkedParams ChunkingParams = DefaultChunkedParams; -}; - -std::unique_ptr<ChunkingController> CreateBasicChunkingController(const BasicChunkingControllerSettings& Settings); -std::unique_ptr<ChunkingController> CreateBasicChunkingController(CbObjectView Parameters); - -struct ChunkingControllerWithFixedChunkingSettings -{ - std::vector<std::string> FixedChunkingExtensions = DefaultFixedChunkingExtensions; - std::vector<std::string> ExcludeExtensions = DefaultChunkingExcludeExtensions; - bool ExcludeElfFiles = DefaultChunkingExcludeElfFiles; - bool ExcludeMachOFiles = DefaultChunkingExcludeMachOFiles; - uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit; - ChunkedParams ChunkingParams = DefaultChunkedParams; - uint64_t FixedChunkingChunkSize = DefaultFixedChunkingChunkSize; - uint64_t MinSizeForFixedChunking = DefaultMinSizeForFixedChunking; -}; - -std::unique_ptr<ChunkingController> CreateChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Setting); -std::unique_ptr<ChunkingController> CreateChunkingControllerWithFixedChunking(CbObjectView Parameters); - -std::unique_ptr<ChunkingController> CreateChunkingController(std::string_view Name, CbObjectView Parameters); - -} // namespace zen diff --git a/src/zenutil/zenutil.cpp b/src/zenutil/zenutil.cpp index 37b229c49..88be8a244 100644 --- a/src/zenutil/zenutil.cpp +++ b/src/zenutil/zenutil.cpp @@ -6,7 +6,6 @@ # include <zenutil/cache/cacherequests.h> # include <zenutil/cache/rpcrecording.h> -# include <zenutil/chunkedfile.h> # include <zenutil/commandlineoptions.h> # include <zenutil/parallelwork.h> # include <zenutil/wildcard.h> @@ -19,7 +18,6 @@ zenutil_forcelinktests() cachepolicy_forcelink(); cache::rpcrecord_forcelink(); cacherequests_forcelink(); - chunkedfile_forcelink(); commandlineoptions_forcelink(); parallellwork_forcelink(); wildcard_forcelink(); |