// Copyright Epic Games, Inc. All Rights Reserved. #include #include #include #include #include ZEN_THIRD_PARTY_INCLUDES_START #include ZEN_THIRD_PARTY_INCLUDES_END namespace zen { using namespace std::literals; namespace { std::vector ReadStringArray(CbArrayView StringArray) { std::vector Result; Result.reserve(StringArray.Num()); for (CbFieldView FieldView : StringArray) { Result.emplace_back(FieldView.AsString()); } return Result; } ChunkedParams ReadChunkParams(CbObjectView Params) { bool UseThreshold = Params["UseThreshold"sv].AsBool(true); size_t MinSize = Params["MinSize"sv].AsUInt64(DefaultChunkedParams.MinSize); size_t MaxSize = Params["MaxSize"sv].AsUInt64(DefaultChunkedParams.MaxSize); size_t AvgSize = Params["AvgSize"sv].AsUInt64(DefaultChunkedParams.AvgSize); return ChunkedParams{.UseThreshold = UseThreshold, .MinSize = MinSize, .MaxSize = MaxSize, .AvgSize = AvgSize}; } void WriteChunkParams(CbObjectWriter& Writer, const ChunkedParams& Params) { Writer.BeginObject("ChunkingParams"sv); { Writer.AddBool("UseThreshold"sv, Params.UseThreshold); Writer.AddInteger("MinSize"sv, (uint64_t)Params.MinSize); Writer.AddInteger("MaxSize"sv, (uint64_t)Params.MaxSize); Writer.AddInteger("AvgSize"sv, (uint64_t)Params.AvgSize); } Writer.EndObject(); // ChunkingParams } bool IsElfFile(BasicFile& Buffer) { if (Buffer.FileSize() > 4) { uint32_t ElfCheck = 0; Buffer.Read(&ElfCheck, 4, 0); if (ElfCheck == 0x464c457f) { return true; } } return false; } bool IsMachOFile(BasicFile& Buffer) { if (Buffer.FileSize() > 4) { uint32_t MachOCheck = 0; Buffer.Read(&MachOCheck, 4, 0); if ((MachOCheck == 0xfeedface) || (MachOCheck == 0xcefaedfe)) { return true; } } return false; } } // namespace class BasicChunkingController : public ChunkingController { public: BasicChunkingController(const BasicChunkingControllerSettings& Settings) : m_Settings(Settings) {} BasicChunkingController(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {} virtual bool ProcessFile(const std::filesystem::path& InputPath, uint64_t RawSize, ChunkedInfoWithSource& OutChunked, std::atomic& BytesProcessed, std::atomic& AbortFlag) const override { ZEN_TRACE_CPU("BasicChunkingController::ProcessFile"); const bool ExcludeFromChunking = std::find(m_Settings.ExcludeExtensions.begin(), m_Settings.ExcludeExtensions.end(), InputPath.extension()) != m_Settings.ExcludeExtensions.end(); if (ExcludeFromChunking || (RawSize < m_Settings.ChunkFileSizeLimit)) { return false; } BasicFile Buffer(InputPath, BasicFile::Mode::kRead); if (m_Settings.ExcludeElfFiles && IsElfFile(Buffer)) { return false; } if (m_Settings.ExcludeMachOFiles && IsMachOFile(Buffer)) { return false; } OutChunked = ChunkData(Buffer, 0, RawSize, m_Settings.ChunkingParams, &BytesProcessed, &AbortFlag); return true; } virtual std::string_view GetName() const override { return Name; } virtual CbObject GetParameters() const override { CbObjectWriter Writer; Writer.BeginArray("ChunkExcludeExtensions"sv); { for (const std::string& Extension : m_Settings.ExcludeExtensions) { Writer.AddString(Extension); } } Writer.EndArray(); // ChunkExcludeExtensions Writer.AddBool("ExcludeElfFiles"sv, m_Settings.ExcludeElfFiles); Writer.AddBool("ExcludeMachOFiles"sv, m_Settings.ExcludeMachOFiles); Writer.AddInteger("ChunkFileSizeLimit"sv, m_Settings.ChunkFileSizeLimit); WriteChunkParams(Writer, m_Settings.ChunkingParams); return Writer.Save(); } static constexpr std::string_view Name = "BasicChunkingController"sv; private: static BasicChunkingControllerSettings ReadSettings(CbObjectView Parameters) { return BasicChunkingControllerSettings{ .ExcludeExtensions = ReadStringArray(Parameters["ChunkExcludeExtensions"sv].AsArrayView()), .ExcludeElfFiles = Parameters["ExcludeElfFiles"sv].AsBool(DefaultChunkingExcludeElfFiles), .ExcludeMachOFiles = Parameters["ExcludeMachOFiles"sv].AsBool(DefaultChunkingExcludeMachOFiles), .ChunkFileSizeLimit = Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit), .ChunkingParams = ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView())}; } const BasicChunkingControllerSettings m_Settings; }; class ChunkingControllerWithFixedChunking : public ChunkingController { public: ChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Settings) : m_Settings(Settings) {} ChunkingControllerWithFixedChunking(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {} virtual bool ProcessFile(const std::filesystem::path& InputPath, uint64_t RawSize, ChunkedInfoWithSource& OutChunked, std::atomic& BytesProcessed, std::atomic& AbortFlag) const override { ZEN_TRACE_CPU("ChunkingControllerWithFixedChunking::ProcessFile"); const bool ExcludeFromChunking = std::find(m_Settings.ExcludeExtensions.begin(), m_Settings.ExcludeExtensions.end(), InputPath.extension()) != m_Settings.ExcludeExtensions.end(); if (ExcludeFromChunking || (RawSize < m_Settings.ChunkFileSizeLimit)) { return false; } const bool FixedChunkingExtension = std::find(m_Settings.FixedChunkingExtensions.begin(), m_Settings.FixedChunkingExtensions.end(), InputPath.extension()) != m_Settings.FixedChunkingExtensions.end(); if (FixedChunkingExtension) { if (RawSize < m_Settings.MinSizeForFixedChunking) { return false; } ZEN_TRACE_CPU("FixedChunking"); IoHashStream FullHasher; BasicFile Source(InputPath, BasicFile::Mode::kRead); uint64_t Offset = 0; tsl::robin_map ChunkHashToChunkIndex; const uint64_t ExpectedChunkCount = 1 + (RawSize / m_Settings.FixedChunkingChunkSize); ChunkHashToChunkIndex.reserve(ExpectedChunkCount); OutChunked.Info.ChunkHashes.reserve(ExpectedChunkCount); OutChunked.Info.ChunkSequence.reserve(ExpectedChunkCount); OutChunked.ChunkSources.reserve(ExpectedChunkCount); static const uint64_t BufferingSize = 256u * 1024u; IoHashStream ChunkHasher; while (Offset < RawSize) { if (AbortFlag) { return false; } ChunkHasher.Reset(); uint64_t ChunkSize = std::min(RawSize - Offset, m_Settings.FixedChunkingChunkSize); if (ChunkSize >= (BufferingSize + BufferingSize / 2)) { ScanFile(Source.Handle(), Offset, ChunkSize, BufferingSize, [&FullHasher, &ChunkHasher, &BytesProcessed](const void* Data, size_t Size) { FullHasher.Append(Data, Size); ChunkHasher.Append(Data, Size); BytesProcessed.fetch_add(Size); }); } else { IoBuffer ChunkData = Source.ReadRange(Offset, ChunkSize); FullHasher.Append(ChunkData); ChunkHasher.Append(ChunkData); BytesProcessed.fetch_add(ChunkSize); } const IoHash ChunkHash = ChunkHasher.GetHash(); if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end()) { OutChunked.Info.ChunkSequence.push_back(It->second); } else { uint32_t ChunkIndex = gsl::narrow(OutChunked.Info.ChunkHashes.size()); OutChunked.Info.ChunkHashes.push_back(ChunkHash); OutChunked.Info.ChunkSequence.push_back(ChunkIndex); OutChunked.ChunkSources.push_back({.Offset = Offset, .Size = gsl::narrow(ChunkSize)}); } Offset += ChunkSize; } OutChunked.Info.RawSize = RawSize; OutChunked.Info.RawHash = FullHasher.GetHash(); return true; } else { BasicFile Buffer(InputPath, BasicFile::Mode::kRead); if (m_Settings.ExcludeElfFiles && IsElfFile(Buffer)) { return false; } if (m_Settings.ExcludeMachOFiles && IsMachOFile(Buffer)) { return false; } OutChunked = ChunkData(Buffer, 0, RawSize, m_Settings.ChunkingParams, &BytesProcessed, &AbortFlag); return true; } } virtual std::string_view GetName() const override { return Name; } virtual CbObject GetParameters() const override { CbObjectWriter Writer; Writer.BeginArray("FixedChunkingExtensions"); { for (const std::string& Extension : m_Settings.FixedChunkingExtensions) { Writer.AddString(Extension); } } Writer.EndArray(); // ChunkExcludeExtensions Writer.BeginArray("ChunkExcludeExtensions"sv); { for (const std::string& Extension : m_Settings.ExcludeExtensions) { Writer.AddString(Extension); } } Writer.EndArray(); // ChunkExcludeExtensions Writer.AddBool("ExcludeElfFiles"sv, m_Settings.ExcludeElfFiles); Writer.AddBool("ExcludeMachOFiles"sv, m_Settings.ExcludeMachOFiles); Writer.AddInteger("ChunkFileSizeLimit"sv, m_Settings.ChunkFileSizeLimit); WriteChunkParams(Writer, m_Settings.ChunkingParams); Writer.AddInteger("FixedChunkingChunkSize"sv, m_Settings.FixedChunkingChunkSize); Writer.AddInteger("MinSizeForFixedChunking"sv, m_Settings.MinSizeForFixedChunking); return Writer.Save(); } static constexpr std::string_view Name = "ChunkingControllerWithFixedChunking"sv; private: static ChunkingControllerWithFixedChunkingSettings ReadSettings(CbObjectView Parameters) { return ChunkingControllerWithFixedChunkingSettings{ .FixedChunkingExtensions = ReadStringArray(Parameters["FixedChunkingExtensions"sv].AsArrayView()), .ExcludeExtensions = ReadStringArray(Parameters["ChunkExcludeExtensions"sv].AsArrayView()), .ExcludeElfFiles = Parameters["ExcludeElfFiles"sv].AsBool(DefaultChunkingExcludeElfFiles), .ExcludeMachOFiles = Parameters["ExcludeMachOFiles"sv].AsBool(DefaultChunkingExcludeMachOFiles), .ChunkFileSizeLimit = Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit), .ChunkingParams = ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView()), .FixedChunkingChunkSize = Parameters["FixedChunkingChunkSize"sv].AsUInt64(DefaultFixedChunkingChunkSize), .MinSizeForFixedChunking = Parameters["MinSizeForFixedChunking"sv].AsUInt64(DefaultFixedChunkingChunkSize)}; } const ChunkingControllerWithFixedChunkingSettings m_Settings; }; std::unique_ptr CreateBasicChunkingController(const BasicChunkingControllerSettings& Settings) { return std::make_unique(Settings); } std::unique_ptr CreateBasicChunkingController(CbObjectView Parameters) { return std::make_unique(Parameters); } std::unique_ptr CreateChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Setting) { return std::make_unique(Setting); } std::unique_ptr CreateChunkingControllerWithFixedChunking(CbObjectView Parameters) { return std::make_unique(Parameters); } std::unique_ptr CreateChunkingController(std::string_view Name, CbObjectView Parameters) { if (Name == BasicChunkingController::Name) { return CreateBasicChunkingController(Parameters); } else if (Name == ChunkingControllerWithFixedChunking::Name) { return CreateChunkingControllerWithFixedChunking(Parameters); } return {}; } } // namespace zen