diff options
| author | Dan Engelbrecht <[email protected]> | 2025-10-03 11:49:14 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2025-10-03 11:49:14 +0200 |
| commit | faf0b7c9b6a08b095f8dc895904f4f7d3f30dcde (patch) | |
| tree | 2bcd09fe17af6f25108fd05578e7eda6a827d8ec /src/zenutil/chunkingcontroller.cpp | |
| parent | cache RPC replay fixes (minor) (#544) (diff) | |
| download | zen-faf0b7c9b6a08b095f8dc895904f4f7d3f30dcde.tar.xz zen-faf0b7c9b6a08b095f8dc895904f4f7d3f30dcde.zip | |
move chunking code to zenremotestore lib (#545)
Diffstat (limited to 'src/zenutil/chunkingcontroller.cpp')
| -rw-r--r-- | src/zenutil/chunkingcontroller.cpp | 359 |
1 files changed, 0 insertions, 359 deletions
diff --git a/src/zenutil/chunkingcontroller.cpp b/src/zenutil/chunkingcontroller.cpp deleted file mode 100644 index 6fb4182c0..000000000 --- a/src/zenutil/chunkingcontroller.cpp +++ /dev/null @@ -1,359 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#include <zenutil/chunkingcontroller.h> - -#include <zencore/basicfile.h> -#include <zencore/compactbinarybuilder.h> -#include <zencore/filesystem.h> -#include <zencore/trace.h> - -ZEN_THIRD_PARTY_INCLUDES_START -#include <tsl/robin_map.h> -ZEN_THIRD_PARTY_INCLUDES_END - -namespace zen { -using namespace std::literals; - -namespace { - std::vector<std::string> ReadStringArray(CbArrayView StringArray) - { - std::vector<std::string> Result; - Result.reserve(StringArray.Num()); - for (CbFieldView FieldView : StringArray) - { - Result.emplace_back(FieldView.AsString()); - } - return Result; - } - - ChunkedParams ReadChunkParams(CbObjectView Params) - { - bool UseThreshold = Params["UseThreshold"sv].AsBool(true); - size_t MinSize = Params["MinSize"sv].AsUInt64(DefaultChunkedParams.MinSize); - size_t MaxSize = Params["MaxSize"sv].AsUInt64(DefaultChunkedParams.MaxSize); - size_t AvgSize = Params["AvgSize"sv].AsUInt64(DefaultChunkedParams.AvgSize); - - return ChunkedParams{.UseThreshold = UseThreshold, .MinSize = MinSize, .MaxSize = MaxSize, .AvgSize = AvgSize}; - } - - void WriteChunkParams(CbObjectWriter& Writer, const ChunkedParams& Params) - { - Writer.BeginObject("ChunkingParams"sv); - { - Writer.AddBool("UseThreshold"sv, Params.UseThreshold); - - Writer.AddInteger("MinSize"sv, (uint64_t)Params.MinSize); - Writer.AddInteger("MaxSize"sv, (uint64_t)Params.MaxSize); - Writer.AddInteger("AvgSize"sv, (uint64_t)Params.AvgSize); - } - Writer.EndObject(); // ChunkingParams - } - - bool IsElfFile(BasicFile& Buffer) - { - if (Buffer.FileSize() > 4) - { - uint32_t ElfCheck = 0; - Buffer.Read(&ElfCheck, 4, 0); - if (ElfCheck == 0x464c457f) - { - return true; - } - } - return false; - } - - bool IsMachOFile(BasicFile& Buffer) - { - if (Buffer.FileSize() > 4) - { - uint32_t MachOCheck = 0; - Buffer.Read(&MachOCheck, 4, 0); - if ((MachOCheck == 0xfeedface) || (MachOCheck == 0xcefaedfe)) - { - return true; - } - } - return false; - } -} // namespace - -class BasicChunkingController : public ChunkingController -{ -public: - BasicChunkingController(const BasicChunkingControllerSettings& Settings) : m_Settings(Settings) {} - - BasicChunkingController(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {} - - virtual bool ProcessFile(const std::filesystem::path& InputPath, - uint64_t RawSize, - ChunkedInfoWithSource& OutChunked, - std::atomic<uint64_t>& BytesProcessed, - std::atomic<bool>& AbortFlag) const override - { - ZEN_TRACE_CPU("BasicChunkingController::ProcessFile"); - const bool ExcludeFromChunking = - std::find(m_Settings.ExcludeExtensions.begin(), m_Settings.ExcludeExtensions.end(), InputPath.extension()) != - m_Settings.ExcludeExtensions.end(); - - if (ExcludeFromChunking || (RawSize < m_Settings.ChunkFileSizeLimit)) - { - return false; - } - - BasicFile Buffer(InputPath, BasicFile::Mode::kRead); - if (m_Settings.ExcludeElfFiles && IsElfFile(Buffer)) - { - return false; - } - if (m_Settings.ExcludeMachOFiles && IsMachOFile(Buffer)) - { - return false; - } - - OutChunked = ChunkData(Buffer, 0, RawSize, m_Settings.ChunkingParams, &BytesProcessed, &AbortFlag); - return true; - } - - virtual std::string_view GetName() const override { return Name; } - - virtual CbObject GetParameters() const override - { - CbObjectWriter Writer; - Writer.BeginArray("ChunkExcludeExtensions"sv); - { - for (const std::string& Extension : m_Settings.ExcludeExtensions) - { - Writer.AddString(Extension); - } - } - Writer.EndArray(); // ChunkExcludeExtensions - - Writer.AddBool("ExcludeElfFiles"sv, m_Settings.ExcludeElfFiles); - Writer.AddBool("ExcludeMachOFiles"sv, m_Settings.ExcludeMachOFiles); - Writer.AddInteger("ChunkFileSizeLimit"sv, m_Settings.ChunkFileSizeLimit); - - WriteChunkParams(Writer, m_Settings.ChunkingParams); - - return Writer.Save(); - } - static constexpr std::string_view Name = "BasicChunkingController"sv; - -private: - static BasicChunkingControllerSettings ReadSettings(CbObjectView Parameters) - { - return BasicChunkingControllerSettings{ - .ExcludeExtensions = ReadStringArray(Parameters["ChunkExcludeExtensions"sv].AsArrayView()), - .ExcludeElfFiles = Parameters["ExcludeElfFiles"sv].AsBool(DefaultChunkingExcludeElfFiles), - .ExcludeMachOFiles = Parameters["ExcludeMachOFiles"sv].AsBool(DefaultChunkingExcludeMachOFiles), - .ChunkFileSizeLimit = Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit), - .ChunkingParams = ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView())}; - } - - const BasicChunkingControllerSettings m_Settings; -}; - -class ChunkingControllerWithFixedChunking : public ChunkingController -{ -public: - ChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Settings) : m_Settings(Settings) {} - - ChunkingControllerWithFixedChunking(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {} - - virtual bool ProcessFile(const std::filesystem::path& InputPath, - uint64_t RawSize, - ChunkedInfoWithSource& OutChunked, - std::atomic<uint64_t>& BytesProcessed, - std::atomic<bool>& AbortFlag) const override - { - ZEN_TRACE_CPU("ChunkingControllerWithFixedChunking::ProcessFile"); - const bool ExcludeFromChunking = - std::find(m_Settings.ExcludeExtensions.begin(), m_Settings.ExcludeExtensions.end(), InputPath.extension()) != - m_Settings.ExcludeExtensions.end(); - - if (ExcludeFromChunking || (RawSize < m_Settings.ChunkFileSizeLimit)) - { - return false; - } - - const bool FixedChunkingExtension = - std::find(m_Settings.FixedChunkingExtensions.begin(), m_Settings.FixedChunkingExtensions.end(), InputPath.extension()) != - m_Settings.FixedChunkingExtensions.end(); - - if (FixedChunkingExtension) - { - if (RawSize < m_Settings.MinSizeForFixedChunking) - { - return false; - } - ZEN_TRACE_CPU("FixedChunking"); - IoHashStream FullHasher; - BasicFile Source(InputPath, BasicFile::Mode::kRead); - uint64_t Offset = 0; - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex; - const uint64_t ExpectedChunkCount = 1 + (RawSize / m_Settings.FixedChunkingChunkSize); - ChunkHashToChunkIndex.reserve(ExpectedChunkCount); - OutChunked.Info.ChunkHashes.reserve(ExpectedChunkCount); - OutChunked.Info.ChunkSequence.reserve(ExpectedChunkCount); - OutChunked.ChunkSources.reserve(ExpectedChunkCount); - - static const uint64_t BufferingSize = 256u * 1024u; - - IoHashStream ChunkHasher; - - while (Offset < RawSize) - { - if (AbortFlag) - { - return false; - } - - ChunkHasher.Reset(); - - uint64_t ChunkSize = std::min<uint64_t>(RawSize - Offset, m_Settings.FixedChunkingChunkSize); - if (ChunkSize >= (BufferingSize + BufferingSize / 2)) - { - ScanFile(Source.Handle(), - Offset, - ChunkSize, - BufferingSize, - [&FullHasher, &ChunkHasher, &BytesProcessed](const void* Data, size_t Size) { - FullHasher.Append(Data, Size); - ChunkHasher.Append(Data, Size); - BytesProcessed.fetch_add(Size); - }); - } - else - { - IoBuffer ChunkData = Source.ReadRange(Offset, ChunkSize); - FullHasher.Append(ChunkData); - ChunkHasher.Append(ChunkData); - BytesProcessed.fetch_add(ChunkSize); - } - - const IoHash ChunkHash = ChunkHasher.GetHash(); - if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end()) - { - OutChunked.Info.ChunkSequence.push_back(It->second); - } - else - { - uint32_t ChunkIndex = gsl::narrow<uint32_t>(OutChunked.Info.ChunkHashes.size()); - OutChunked.Info.ChunkHashes.push_back(ChunkHash); - OutChunked.Info.ChunkSequence.push_back(ChunkIndex); - OutChunked.ChunkSources.push_back({.Offset = Offset, .Size = gsl::narrow<uint32_t>(ChunkSize)}); - } - Offset += ChunkSize; - } - OutChunked.Info.RawSize = RawSize; - OutChunked.Info.RawHash = FullHasher.GetHash(); - return true; - } - else - { - BasicFile Buffer(InputPath, BasicFile::Mode::kRead); - if (m_Settings.ExcludeElfFiles && IsElfFile(Buffer)) - { - return false; - } - if (m_Settings.ExcludeMachOFiles && IsMachOFile(Buffer)) - { - return false; - } - - OutChunked = ChunkData(Buffer, 0, RawSize, m_Settings.ChunkingParams, &BytesProcessed, &AbortFlag); - return true; - } - } - - virtual std::string_view GetName() const override { return Name; } - - virtual CbObject GetParameters() const override - { - CbObjectWriter Writer; - Writer.BeginArray("FixedChunkingExtensions"); - { - for (const std::string& Extension : m_Settings.FixedChunkingExtensions) - { - Writer.AddString(Extension); - } - } - Writer.EndArray(); // ChunkExcludeExtensions - - Writer.BeginArray("ChunkExcludeExtensions"sv); - { - for (const std::string& Extension : m_Settings.ExcludeExtensions) - { - Writer.AddString(Extension); - } - } - Writer.EndArray(); // ChunkExcludeExtensions - - Writer.AddBool("ExcludeElfFiles"sv, m_Settings.ExcludeElfFiles); - Writer.AddBool("ExcludeMachOFiles"sv, m_Settings.ExcludeMachOFiles); - - Writer.AddInteger("ChunkFileSizeLimit"sv, m_Settings.ChunkFileSizeLimit); - - WriteChunkParams(Writer, m_Settings.ChunkingParams); - - Writer.AddInteger("FixedChunkingChunkSize"sv, m_Settings.FixedChunkingChunkSize); - Writer.AddInteger("MinSizeForFixedChunking"sv, m_Settings.MinSizeForFixedChunking); - return Writer.Save(); - } - - static constexpr std::string_view Name = "ChunkingControllerWithFixedChunking"sv; - -private: - static ChunkingControllerWithFixedChunkingSettings ReadSettings(CbObjectView Parameters) - { - return ChunkingControllerWithFixedChunkingSettings{ - .FixedChunkingExtensions = ReadStringArray(Parameters["FixedChunkingExtensions"sv].AsArrayView()), - .ExcludeExtensions = ReadStringArray(Parameters["ChunkExcludeExtensions"sv].AsArrayView()), - .ExcludeElfFiles = Parameters["ExcludeElfFiles"sv].AsBool(DefaultChunkingExcludeElfFiles), - .ExcludeMachOFiles = Parameters["ExcludeMachOFiles"sv].AsBool(DefaultChunkingExcludeMachOFiles), - .ChunkFileSizeLimit = Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit), - .ChunkingParams = ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView()), - .FixedChunkingChunkSize = Parameters["FixedChunkingChunkSize"sv].AsUInt64(DefaultFixedChunkingChunkSize), - .MinSizeForFixedChunking = Parameters["MinSizeForFixedChunking"sv].AsUInt64(DefaultFixedChunkingChunkSize)}; - } - - const ChunkingControllerWithFixedChunkingSettings m_Settings; -}; - -std::unique_ptr<ChunkingController> -CreateBasicChunkingController(const BasicChunkingControllerSettings& Settings) -{ - return std::make_unique<BasicChunkingController>(Settings); -} -std::unique_ptr<ChunkingController> -CreateBasicChunkingController(CbObjectView Parameters) -{ - return std::make_unique<BasicChunkingController>(Parameters); -} - -std::unique_ptr<ChunkingController> -CreateChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Setting) -{ - return std::make_unique<ChunkingControllerWithFixedChunking>(Setting); -} -std::unique_ptr<ChunkingController> -CreateChunkingControllerWithFixedChunking(CbObjectView Parameters) -{ - return std::make_unique<ChunkingControllerWithFixedChunking>(Parameters); -} - -std::unique_ptr<ChunkingController> -CreateChunkingController(std::string_view Name, CbObjectView Parameters) -{ - if (Name == BasicChunkingController::Name) - { - return CreateBasicChunkingController(Parameters); - } - else if (Name == ChunkingControllerWithFixedChunking::Name) - { - return CreateChunkingControllerWithFixedChunking(Parameters); - } - return {}; -} - -} // namespace zen |