// Copyright Epic Games, Inc. All Rights Reserved. #include #include #include #include #include #include ZEN_THIRD_PARTY_INCLUDES_START #include #include ZEN_THIRD_PARTY_INCLUDES_END namespace zen { using namespace std::literals; namespace { ChunkedParams ReadChunkParams(CbObjectView Params) { bool UseThreshold = Params["UseThreshold"sv].AsBool(true); size_t MinSize = Params["MinSize"sv].AsUInt64(DefaultDynamicChunkingParams.MinSize); size_t MaxSize = Params["MaxSize"sv].AsUInt64(DefaultDynamicChunkingParams.MaxSize); size_t AvgSize = Params["AvgSize"sv].AsUInt64(DefaultDynamicChunkingParams.AvgSize); return ChunkedParams{.UseThreshold = UseThreshold, .MinSize = MinSize, .MaxSize = MaxSize, .AvgSize = AvgSize}; } void WriteChunkParams(CbObjectWriter& Writer, const ChunkedParams& Params) { Writer.BeginObject("ChunkingParams"sv); { Writer.AddBool("UseThreshold"sv, Params.UseThreshold); Writer.AddInteger("MinSize"sv, (uint64_t)Params.MinSize); Writer.AddInteger("MaxSize"sv, (uint64_t)Params.MaxSize); Writer.AddInteger("AvgSize"sv, (uint64_t)Params.AvgSize); } Writer.EndObject(); // ChunkingParams } } // namespace namespace legacy { const std::vector DefaultChunkingExcludeExtensions = { ".exe", ".dll", ".pdb", ".self", ".mp4", ".zip", ".7z", ".bzip", ".rar", ".gzip", ".sym", ".psym", ".txt", ".ini", ".json", ".verse", ".versemodule", ".jpg", ".c", ".h", ".cpp", ".cxx", ".c++", ".cc", ".hpp", ".hxx", ".h++", ".py", ".ogg", ".plist"}; const std::vector DefaultFixedChunkingExtensions = {".apk", ".nsp", ".xvc", ".pkg", ".dmg", ".ipa"}; const bool DefaultChunkingExcludeElfFiles = true; const bool DefaultChunkingExcludeMachOFiles = true; const size_t DefaultChunkingFileSizeLimit = DefaultDynamicChunkingParams.MaxSize; const uint64_t DefaultFixedChunkingChunkSize = 32u * 1024u * 1024u; const uint64_t DefaultMinSizeForFixedChunking = DefaultFixedChunkingChunkSize * 8u; struct BasicChunkingControllerSettings { std::vector ExcludeExtensions = DefaultChunkingExcludeExtensions; bool ExcludeElfFiles = DefaultChunkingExcludeElfFiles; bool ExcludeMachOFiles = DefaultChunkingExcludeMachOFiles; uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit; ChunkedParams ChunkingParams = DefaultDynamicChunkingParams; }; struct ChunkingControllerWithFixedChunkingSettings { std::vector FixedChunkingExtensions = DefaultFixedChunkingExtensions; std::vector ExcludeExtensions = DefaultChunkingExcludeExtensions; bool ExcludeElfFiles = DefaultChunkingExcludeElfFiles; bool ExcludeMachOFiles = DefaultChunkingExcludeMachOFiles; uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit; ChunkedParams ChunkingParams = DefaultDynamicChunkingParams; uint64_t FixedChunkingChunkSize = DefaultFixedChunkingChunkSize; uint64_t MinSizeForFixedChunking = DefaultMinSizeForFixedChunking; }; bool IsElfFile(BasicFile& Buffer) { if (Buffer.FileSize() > 4) { uint32_t ElfCheck = 0; Buffer.Read(&ElfCheck, 4, 0); if (ElfCheck == 0x464c457f) { return true; } } return false; } bool IsMachOFile(BasicFile& Buffer) { if (Buffer.FileSize() > 4) { uint32_t MachOCheck = 0; Buffer.Read(&MachOCheck, 4, 0); if ((MachOCheck == 0xfeedface) || (MachOCheck == 0xcefaedfe)) { return true; } } return false; } //////////// BasicChunkingController class BasicChunkingController : public ChunkingController { public: BasicChunkingController(const BasicChunkingControllerSettings& Settings) : m_Settings(Settings) { m_ExcludeExtensionHashes.reserve(Settings.ExcludeExtensions.size()); for (const std::string& Extension : Settings.ExcludeExtensions) { m_ExcludeExtensionHashes.insert(HashStringAsLowerDjb2(Extension)); } } BasicChunkingController(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {} virtual bool ProcessFile(const std::filesystem::path& InputPath, uint64_t RawSize, ChunkedInfoWithSource& OutChunked, std::atomic& BytesProcessed, std::atomic& AbortFlag) const override { ZEN_TRACE_CPU("BasicChunkingController::ProcessFile"); if (RawSize < m_Settings.ChunkFileSizeLimit) { return false; } const uint32_t ExtensionHash = HashStringAsLowerDjb2(InputPath.extension().string()); const bool ExcludeFromChunking = m_ExcludeExtensionHashes.contains(ExtensionHash); if (ExcludeFromChunking) { return false; } BasicFile Buffer(InputPath, BasicFile::Mode::kRead); if (m_Settings.ExcludeElfFiles && IsElfFile(Buffer)) { return false; } if (m_Settings.ExcludeMachOFiles && IsMachOFile(Buffer)) { return false; } OutChunked = ChunkData(Buffer, 0, RawSize, m_Settings.ChunkingParams, &BytesProcessed, &AbortFlag); return true; } virtual std::string_view GetName() const override { return Name; } virtual CbObject GetParameters() const override { CbObjectWriter Writer; compactbinary_helpers::WriteArray(m_Settings.ExcludeExtensions, "ChunkExcludeExtensions"sv, Writer); Writer.AddBool("ExcludeElfFiles"sv, m_Settings.ExcludeElfFiles); Writer.AddBool("ExcludeMachOFiles"sv, m_Settings.ExcludeMachOFiles); Writer.AddInteger("ChunkFileSizeLimit"sv, m_Settings.ChunkFileSizeLimit); WriteChunkParams(Writer, m_Settings.ChunkingParams); return Writer.Save(); } static constexpr std::string_view Name = "BasicChunkingController"sv; private: static BasicChunkingControllerSettings ReadSettings(CbObjectView Parameters) { return BasicChunkingControllerSettings{ .ExcludeExtensions = compactbinary_helpers::ReadArray("ChunkExcludeExtensions"sv, Parameters), .ExcludeElfFiles = Parameters["ExcludeElfFiles"sv].AsBool(DefaultChunkingExcludeElfFiles), .ExcludeMachOFiles = Parameters["ExcludeMachOFiles"sv].AsBool(DefaultChunkingExcludeMachOFiles), .ChunkFileSizeLimit = Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit), .ChunkingParams = ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView())}; } const BasicChunkingControllerSettings m_Settings; tsl::robin_set m_ExcludeExtensionHashes; }; std::unique_ptr CreateBasicChunkingController(CbObjectView Parameters) { return std::make_unique(Parameters); } //////////// ChunkingControllerWithFixedChunking class ChunkingControllerWithFixedChunking : public ChunkingController { public: ChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Settings) : m_Settings(Settings) { m_ExcludeExtensionHashes.reserve(Settings.ExcludeExtensions.size()); for (const std::string& Extension : Settings.ExcludeExtensions) { m_ExcludeExtensionHashes.insert(HashStringAsLowerDjb2(Extension)); } m_FixedChunkingExtensionHashes.reserve(Settings.FixedChunkingExtensions.size()); for (const std::string& Extension : Settings.FixedChunkingExtensions) { m_FixedChunkingExtensionHashes.insert(HashStringAsLowerDjb2(Extension)); } } ChunkingControllerWithFixedChunking(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {} virtual bool ProcessFile(const std::filesystem::path& InputPath, uint64_t RawSize, ChunkedInfoWithSource& OutChunked, std::atomic& BytesProcessed, std::atomic& AbortFlag) const override { ZEN_TRACE_CPU("ChunkingControllerWithFixedChunking::ProcessFile"); if (RawSize < m_Settings.ChunkFileSizeLimit) { return false; } const uint32_t ExtensionHash = HashStringAsLowerDjb2(InputPath.extension().string()); const bool ExcludeFromChunking = m_ExcludeExtensionHashes.contains(ExtensionHash); if (ExcludeFromChunking) { return false; } const bool FixedChunkingExtension = m_FixedChunkingExtensionHashes.contains(ExtensionHash); if (FixedChunkingExtension) { if (RawSize < m_Settings.MinSizeForFixedChunking) { return false; } ZEN_TRACE_CPU("FixedChunking"); IoHashStream FullHasher; BasicFile Source(InputPath, BasicFile::Mode::kRead); uint64_t Offset = 0; tsl::robin_map ChunkHashToChunkIndex; const uint64_t ExpectedChunkCount = 1 + (RawSize / m_Settings.FixedChunkingChunkSize); ChunkHashToChunkIndex.reserve(ExpectedChunkCount); OutChunked.Info.ChunkHashes.reserve(ExpectedChunkCount); OutChunked.Info.ChunkSequence.reserve(ExpectedChunkCount); OutChunked.ChunkSources.reserve(ExpectedChunkCount); static const uint64_t BufferingSize = 256u * 1024u; IoHashStream ChunkHasher; while (Offset < RawSize) { if (AbortFlag) { return false; } ChunkHasher.Reset(); uint64_t ChunkSize = std::min(RawSize - Offset, m_Settings.FixedChunkingChunkSize); if (ChunkSize >= (BufferingSize + BufferingSize / 2)) { ScanFile(Source.Handle(), Offset, ChunkSize, BufferingSize, [&FullHasher, &ChunkHasher, &BytesProcessed](const void* Data, size_t Size) { FullHasher.Append(Data, Size); ChunkHasher.Append(Data, Size); BytesProcessed.fetch_add(Size); }); } else { IoBuffer ChunkData = Source.ReadRange(Offset, ChunkSize); FullHasher.Append(ChunkData); ChunkHasher.Append(ChunkData); BytesProcessed.fetch_add(ChunkSize); } const IoHash ChunkHash = ChunkHasher.GetHash(); if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end()) { OutChunked.Info.ChunkSequence.push_back(It->second); } else { uint32_t ChunkIndex = gsl::narrow(OutChunked.Info.ChunkHashes.size()); OutChunked.Info.ChunkHashes.push_back(ChunkHash); OutChunked.Info.ChunkSequence.push_back(ChunkIndex); OutChunked.ChunkSources.push_back({.Offset = Offset, .Size = gsl::narrow(ChunkSize)}); } Offset += ChunkSize; } OutChunked.Info.RawSize = RawSize; OutChunked.Info.RawHash = FullHasher.GetHash(); return true; } else { BasicFile Buffer(InputPath, BasicFile::Mode::kRead); if (m_Settings.ExcludeElfFiles && IsElfFile(Buffer)) { return false; } if (m_Settings.ExcludeMachOFiles && IsMachOFile(Buffer)) { return false; } OutChunked = ChunkData(Buffer, 0, RawSize, m_Settings.ChunkingParams, &BytesProcessed, &AbortFlag); return true; } } virtual std::string_view GetName() const override { return Name; } virtual CbObject GetParameters() const override { CbObjectWriter Writer; compactbinary_helpers::WriteArray(m_Settings.FixedChunkingExtensions, "FixedChunkingExtensions", Writer); compactbinary_helpers::WriteArray(m_Settings.ExcludeExtensions, "ChunkExcludeExtensions", Writer); Writer.AddBool("ExcludeElfFiles"sv, m_Settings.ExcludeElfFiles); Writer.AddBool("ExcludeMachOFiles"sv, m_Settings.ExcludeMachOFiles); Writer.AddInteger("ChunkFileSizeLimit"sv, m_Settings.ChunkFileSizeLimit); WriteChunkParams(Writer, m_Settings.ChunkingParams); Writer.AddInteger("FixedChunkingChunkSize"sv, m_Settings.FixedChunkingChunkSize); Writer.AddInteger("MinSizeForFixedChunking"sv, m_Settings.MinSizeForFixedChunking); return Writer.Save(); } static constexpr std::string_view Name = "ChunkingControllerWithFixedChunking"sv; private: static ChunkingControllerWithFixedChunkingSettings ReadSettings(CbObjectView Parameters) { return ChunkingControllerWithFixedChunkingSettings{ .FixedChunkingExtensions = compactbinary_helpers::ReadArray("FixedChunkingExtensions"sv, Parameters), .ExcludeExtensions = compactbinary_helpers::ReadArray("ChunkExcludeExtensions"sv, Parameters), .ExcludeElfFiles = Parameters["ExcludeElfFiles"sv].AsBool(DefaultChunkingExcludeElfFiles), .ExcludeMachOFiles = Parameters["ExcludeMachOFiles"sv].AsBool(DefaultChunkingExcludeMachOFiles), .ChunkFileSizeLimit = Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit), .ChunkingParams = ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView()), .FixedChunkingChunkSize = Parameters["FixedChunkingChunkSize"sv].AsUInt64(DefaultFixedChunkingChunkSize), .MinSizeForFixedChunking = Parameters["MinSizeForFixedChunking"sv].AsUInt64(DefaultFixedChunkingChunkSize)}; } const ChunkingControllerWithFixedChunkingSettings m_Settings; tsl::robin_set m_FixedChunkingExtensionHashes; tsl::robin_set m_ExcludeExtensionHashes; }; std::unique_ptr CreateChunkingControllerWithFixedChunking(CbObjectView Parameters) { return std::make_unique(Parameters); } } // namespace legacy //////////// StandardChunkingController class StandardChunkingController : public ChunkingController { public: StandardChunkingController(const StandardChunkingControllerSettings& Settings) : m_Settings(Settings) { m_SplitOnlyExtensionHashes.reserve(Settings.SplitOnlyExtensions.size()); for (const std::string& Extension : Settings.SplitOnlyExtensions) { m_SplitOnlyExtensionHashes.insert(HashStringAsLowerDjb2(Extension)); } m_SplitAndCompressExtensionHashes.reserve(Settings.SplitAndCompressExtensions.size()); for (const std::string& Extension : Settings.SplitAndCompressExtensions) { m_SplitAndCompressExtensionHashes.insert(HashStringAsLowerDjb2(Extension)); } } StandardChunkingController(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {} bool FixedChunking(BasicFile& Source, uint64_t RawSize, ChunkedInfoWithSource& OutChunked, std::atomic& BytesProcessed, const uint64_t FixedChunkSize, std::atomic& AbortFlag) const { ZEN_TRACE_CPU("FixedChunking"); IoHashStream FullHasher; uint64_t Offset = 0; tsl::robin_map ChunkHashToChunkIndex; const uint64_t ExpectedChunkCount = 1 + (RawSize / FixedChunkSize); ChunkHashToChunkIndex.reserve(ExpectedChunkCount); OutChunked.Info.ChunkHashes.reserve(ExpectedChunkCount); OutChunked.Info.ChunkSequence.reserve(ExpectedChunkCount); OutChunked.ChunkSources.reserve(ExpectedChunkCount); static const uint64_t BufferingSize = 256u * 1024u; static const uint64_t MinimumLastChunkSize = Min(128u * 1024u, FixedChunkSize / 32); IoHashStream ChunkHasher; BasicFileBuffer SourceBuffer(Source, Min(BufferingSize, RawSize)); while (Offset < RawSize) { if (AbortFlag) { return false; } ChunkHasher.Reset(); const uint64_t ChunkStartOffset = Offset; const uint64_t BytesLeft = RawSize - Offset; uint64_t ChunkSize = std::min(BytesLeft, FixedChunkSize); if ((BytesLeft - ChunkSize) < MinimumLastChunkSize) { // Avoid small chunks from the end of the file ChunkSize = BytesLeft; } const uint64_t End = ChunkStartOffset + ChunkSize; while (Offset < End) { const uint64_t BufferSize = std::min(RawSize - Offset, BufferingSize); MemoryView ChunkData = SourceBuffer.MakeView(BufferSize, Offset); if (ChunkData.IsEmpty()) { throw std::runtime_error(fmt::format("Invalid format. Expected to read {} bytes at {}", BufferSize, Offset)); } FullHasher.Append(ChunkData); ChunkHasher.Append(ChunkData); BytesProcessed.fetch_add(BufferSize); Offset += BufferSize; } const IoHash ChunkHash = ChunkHasher.GetHash(); if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end()) { OutChunked.Info.ChunkSequence.push_back(It->second); } else { uint32_t ChunkIndex = gsl::narrow(OutChunked.Info.ChunkHashes.size()); OutChunked.Info.ChunkHashes.push_back(ChunkHash); OutChunked.Info.ChunkSequence.push_back(ChunkIndex); OutChunked.ChunkSources.push_back({.Offset = ChunkStartOffset, .Size = gsl::narrow(ChunkSize)}); } } OutChunked.Info.RawSize = RawSize; OutChunked.Info.RawHash = FullHasher.GetHash(); return true; } virtual bool ProcessFile(const std::filesystem::path& InputPath, uint64_t RawSize, ChunkedInfoWithSource& OutChunked, std::atomic& BytesProcessed, std::atomic& AbortFlag) const override { ZEN_TRACE_CPU("StandardChunkingController::ProcessFile"); const uint32_t ExtensionHash = HashStringAsLowerDjb2(InputPath.extension().string()); if (m_SplitOnlyExtensionHashes.contains(ExtensionHash)) { if (RawSize < m_Settings.SplitOnlyMinSize) { return false; } BasicFile Source(InputPath, BasicFile::Mode::kRead); return FixedChunking(Source, RawSize, OutChunked, BytesProcessed, m_Settings.SplitOnlyChunkSize, AbortFlag); } if (m_SplitAndCompressExtensionHashes.contains(ExtensionHash)) { if (RawSize < m_Settings.SplitAndCompressMinSize) { return false; } BasicFile Source(InputPath, BasicFile::Mode::kRead); return FixedChunking(Source, RawSize, OutChunked, BytesProcessed, m_Settings.SplitAndCompressChunkSize, AbortFlag); } if (RawSize > sizeof(uint32_t) && !m_Settings.SplitAndCompressFileLeadingBytes.empty()) { BasicFile Source(InputPath, BasicFile::Mode::kRead); uint32_t LeadingBytes = 0; Source.Read(&LeadingBytes, 4, 0); if (std::find(m_Settings.SplitAndCompressFileLeadingBytes.begin(), m_Settings.SplitAndCompressFileLeadingBytes.end(), LeadingBytes) != m_Settings.SplitAndCompressFileLeadingBytes.end()) { if (RawSize < m_Settings.SplitAndCompressMinSize) { return false; } else { return FixedChunking(Source, RawSize, OutChunked, BytesProcessed, m_Settings.SplitAndCompressChunkSize, AbortFlag); } } } if (RawSize < m_Settings.DynamicChunkingParams.MaxSize) { return false; } else { BasicFile Source(InputPath, BasicFile::Mode::kRead); OutChunked = ChunkData(Source, 0, RawSize, m_Settings.DynamicChunkingParams, &BytesProcessed, &AbortFlag); } return true; } virtual std::string_view GetName() const override { return Name; } virtual CbObject GetParameters() const override { CbObjectWriter Writer; compactbinary_helpers::WriteArray(m_Settings.SplitOnlyExtensions, "SplitOnlyExtensions"sv, Writer); Writer.AddInteger("SplitOnlyChunkSize"sv, m_Settings.SplitOnlyChunkSize); Writer.AddInteger("SplitOnlyMinSize"sv, m_Settings.SplitOnlyMinSize); compactbinary_helpers::WriteArray(m_Settings.SplitAndCompressExtensions, "SplitAndCompressExtensions"sv, Writer); compactbinary_helpers::WriteArray(m_Settings.SplitAndCompressFileLeadingBytes, "SplitAndCompressFileLeadingBytes"sv, Writer); Writer.AddInteger("SplitAndCompressChunkSize"sv, m_Settings.SplitAndCompressChunkSize); Writer.AddInteger("SplitAndCompressMinSize"sv, m_Settings.SplitAndCompressMinSize); WriteChunkParams(Writer, m_Settings.DynamicChunkingParams); return Writer.Save(); } static constexpr std::string_view Name = "StandardChunkingController"sv; private: static StandardChunkingControllerSettings ReadSettings(CbObjectView Parameters) { return StandardChunkingControllerSettings{ .SplitOnlyExtensions = compactbinary_helpers::ReadArray("SplitOnlyExtensions"sv, Parameters), .SplitOnlyChunkSize = Parameters["SplitOnlyChunkSize"sv].AsUInt64(DefaultSplitOnlyChunkSize), .SplitOnlyMinSize = Parameters["SplitOnlyMinSize"sv].AsUInt64(DefaultSplitOnlyMinSize), .SplitAndCompressExtensions = compactbinary_helpers::ReadArray("SplitAndCompressExtensions"sv, Parameters), .SplitAndCompressFileLeadingBytes = compactbinary_helpers::ReadArray("SplitAndCompressFileLeadingBytes"sv, Parameters), .SplitAndCompressChunkSize = Parameters["SplitAndCompressChunkSize"sv].AsUInt64(DefaultSplitAndCompressChunkSize), .SplitAndCompressMinSize = Parameters["SplitAndCompressMinSize"sv].AsUInt64(DefaultSplitAndCompressMinSize), .DynamicChunkingParams = ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView())}; } const StandardChunkingControllerSettings m_Settings; tsl::robin_set m_SplitOnlyExtensionHashes; tsl::robin_set m_SplitAndCompressExtensionHashes; }; std::unique_ptr CreateStandardChunkingController(const StandardChunkingControllerSettings& Setting) { return std::make_unique(Setting); } std::unique_ptr CreateStandardChunkingController(CbObjectView Parameters) { return std::make_unique(Parameters); } std::unique_ptr CreateChunkingController(std::string_view Name, CbObjectView Parameters) { if (Name == legacy::BasicChunkingController::Name) { return legacy::CreateBasicChunkingController(Parameters); } else if (Name == legacy::ChunkingControllerWithFixedChunking::Name) { return legacy::CreateChunkingControllerWithFixedChunking(Parameters); } else if (Name == StandardChunkingController::Name) { return CreateStandardChunkingController(Parameters); } return {}; } } // namespace zen