aboutsummaryrefslogtreecommitdiff
path: root/src/zenutil/chunkingcontroller.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2025-10-03 11:49:14 +0200
committerGitHub Enterprise <[email protected]>2025-10-03 11:49:14 +0200
commitfaf0b7c9b6a08b095f8dc895904f4f7d3f30dcde (patch)
tree2bcd09fe17af6f25108fd05578e7eda6a827d8ec /src/zenutil/chunkingcontroller.cpp
parentcache RPC replay fixes (minor) (#544) (diff)
downloadzen-faf0b7c9b6a08b095f8dc895904f4f7d3f30dcde.tar.xz
zen-faf0b7c9b6a08b095f8dc895904f4f7d3f30dcde.zip
move chunking code to zenremotestore lib (#545)
Diffstat (limited to 'src/zenutil/chunkingcontroller.cpp')
-rw-r--r--src/zenutil/chunkingcontroller.cpp359
1 files changed, 0 insertions, 359 deletions
diff --git a/src/zenutil/chunkingcontroller.cpp b/src/zenutil/chunkingcontroller.cpp
deleted file mode 100644
index 6fb4182c0..000000000
--- a/src/zenutil/chunkingcontroller.cpp
+++ /dev/null
@@ -1,359 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#include <zenutil/chunkingcontroller.h>
-
-#include <zencore/basicfile.h>
-#include <zencore/compactbinarybuilder.h>
-#include <zencore/filesystem.h>
-#include <zencore/trace.h>
-
-ZEN_THIRD_PARTY_INCLUDES_START
-#include <tsl/robin_map.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-
-namespace zen {
-using namespace std::literals;
-
-namespace {
- std::vector<std::string> ReadStringArray(CbArrayView StringArray)
- {
- std::vector<std::string> Result;
- Result.reserve(StringArray.Num());
- for (CbFieldView FieldView : StringArray)
- {
- Result.emplace_back(FieldView.AsString());
- }
- return Result;
- }
-
- ChunkedParams ReadChunkParams(CbObjectView Params)
- {
- bool UseThreshold = Params["UseThreshold"sv].AsBool(true);
- size_t MinSize = Params["MinSize"sv].AsUInt64(DefaultChunkedParams.MinSize);
- size_t MaxSize = Params["MaxSize"sv].AsUInt64(DefaultChunkedParams.MaxSize);
- size_t AvgSize = Params["AvgSize"sv].AsUInt64(DefaultChunkedParams.AvgSize);
-
- return ChunkedParams{.UseThreshold = UseThreshold, .MinSize = MinSize, .MaxSize = MaxSize, .AvgSize = AvgSize};
- }
-
- void WriteChunkParams(CbObjectWriter& Writer, const ChunkedParams& Params)
- {
- Writer.BeginObject("ChunkingParams"sv);
- {
- Writer.AddBool("UseThreshold"sv, Params.UseThreshold);
-
- Writer.AddInteger("MinSize"sv, (uint64_t)Params.MinSize);
- Writer.AddInteger("MaxSize"sv, (uint64_t)Params.MaxSize);
- Writer.AddInteger("AvgSize"sv, (uint64_t)Params.AvgSize);
- }
- Writer.EndObject(); // ChunkingParams
- }
-
- bool IsElfFile(BasicFile& Buffer)
- {
- if (Buffer.FileSize() > 4)
- {
- uint32_t ElfCheck = 0;
- Buffer.Read(&ElfCheck, 4, 0);
- if (ElfCheck == 0x464c457f)
- {
- return true;
- }
- }
- return false;
- }
-
- bool IsMachOFile(BasicFile& Buffer)
- {
- if (Buffer.FileSize() > 4)
- {
- uint32_t MachOCheck = 0;
- Buffer.Read(&MachOCheck, 4, 0);
- if ((MachOCheck == 0xfeedface) || (MachOCheck == 0xcefaedfe))
- {
- return true;
- }
- }
- return false;
- }
-} // namespace
-
-class BasicChunkingController : public ChunkingController
-{
-public:
- BasicChunkingController(const BasicChunkingControllerSettings& Settings) : m_Settings(Settings) {}
-
- BasicChunkingController(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {}
-
- virtual bool ProcessFile(const std::filesystem::path& InputPath,
- uint64_t RawSize,
- ChunkedInfoWithSource& OutChunked,
- std::atomic<uint64_t>& BytesProcessed,
- std::atomic<bool>& AbortFlag) const override
- {
- ZEN_TRACE_CPU("BasicChunkingController::ProcessFile");
- const bool ExcludeFromChunking =
- std::find(m_Settings.ExcludeExtensions.begin(), m_Settings.ExcludeExtensions.end(), InputPath.extension()) !=
- m_Settings.ExcludeExtensions.end();
-
- if (ExcludeFromChunking || (RawSize < m_Settings.ChunkFileSizeLimit))
- {
- return false;
- }
-
- BasicFile Buffer(InputPath, BasicFile::Mode::kRead);
- if (m_Settings.ExcludeElfFiles && IsElfFile(Buffer))
- {
- return false;
- }
- if (m_Settings.ExcludeMachOFiles && IsMachOFile(Buffer))
- {
- return false;
- }
-
- OutChunked = ChunkData(Buffer, 0, RawSize, m_Settings.ChunkingParams, &BytesProcessed, &AbortFlag);
- return true;
- }
-
- virtual std::string_view GetName() const override { return Name; }
-
- virtual CbObject GetParameters() const override
- {
- CbObjectWriter Writer;
- Writer.BeginArray("ChunkExcludeExtensions"sv);
- {
- for (const std::string& Extension : m_Settings.ExcludeExtensions)
- {
- Writer.AddString(Extension);
- }
- }
- Writer.EndArray(); // ChunkExcludeExtensions
-
- Writer.AddBool("ExcludeElfFiles"sv, m_Settings.ExcludeElfFiles);
- Writer.AddBool("ExcludeMachOFiles"sv, m_Settings.ExcludeMachOFiles);
- Writer.AddInteger("ChunkFileSizeLimit"sv, m_Settings.ChunkFileSizeLimit);
-
- WriteChunkParams(Writer, m_Settings.ChunkingParams);
-
- return Writer.Save();
- }
- static constexpr std::string_view Name = "BasicChunkingController"sv;
-
-private:
- static BasicChunkingControllerSettings ReadSettings(CbObjectView Parameters)
- {
- return BasicChunkingControllerSettings{
- .ExcludeExtensions = ReadStringArray(Parameters["ChunkExcludeExtensions"sv].AsArrayView()),
- .ExcludeElfFiles = Parameters["ExcludeElfFiles"sv].AsBool(DefaultChunkingExcludeElfFiles),
- .ExcludeMachOFiles = Parameters["ExcludeMachOFiles"sv].AsBool(DefaultChunkingExcludeMachOFiles),
- .ChunkFileSizeLimit = Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit),
- .ChunkingParams = ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView())};
- }
-
- const BasicChunkingControllerSettings m_Settings;
-};
-
-class ChunkingControllerWithFixedChunking : public ChunkingController
-{
-public:
- ChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Settings) : m_Settings(Settings) {}
-
- ChunkingControllerWithFixedChunking(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {}
-
- virtual bool ProcessFile(const std::filesystem::path& InputPath,
- uint64_t RawSize,
- ChunkedInfoWithSource& OutChunked,
- std::atomic<uint64_t>& BytesProcessed,
- std::atomic<bool>& AbortFlag) const override
- {
- ZEN_TRACE_CPU("ChunkingControllerWithFixedChunking::ProcessFile");
- const bool ExcludeFromChunking =
- std::find(m_Settings.ExcludeExtensions.begin(), m_Settings.ExcludeExtensions.end(), InputPath.extension()) !=
- m_Settings.ExcludeExtensions.end();
-
- if (ExcludeFromChunking || (RawSize < m_Settings.ChunkFileSizeLimit))
- {
- return false;
- }
-
- const bool FixedChunkingExtension =
- std::find(m_Settings.FixedChunkingExtensions.begin(), m_Settings.FixedChunkingExtensions.end(), InputPath.extension()) !=
- m_Settings.FixedChunkingExtensions.end();
-
- if (FixedChunkingExtension)
- {
- if (RawSize < m_Settings.MinSizeForFixedChunking)
- {
- return false;
- }
- ZEN_TRACE_CPU("FixedChunking");
- IoHashStream FullHasher;
- BasicFile Source(InputPath, BasicFile::Mode::kRead);
- uint64_t Offset = 0;
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex;
- const uint64_t ExpectedChunkCount = 1 + (RawSize / m_Settings.FixedChunkingChunkSize);
- ChunkHashToChunkIndex.reserve(ExpectedChunkCount);
- OutChunked.Info.ChunkHashes.reserve(ExpectedChunkCount);
- OutChunked.Info.ChunkSequence.reserve(ExpectedChunkCount);
- OutChunked.ChunkSources.reserve(ExpectedChunkCount);
-
- static const uint64_t BufferingSize = 256u * 1024u;
-
- IoHashStream ChunkHasher;
-
- while (Offset < RawSize)
- {
- if (AbortFlag)
- {
- return false;
- }
-
- ChunkHasher.Reset();
-
- uint64_t ChunkSize = std::min<uint64_t>(RawSize - Offset, m_Settings.FixedChunkingChunkSize);
- if (ChunkSize >= (BufferingSize + BufferingSize / 2))
- {
- ScanFile(Source.Handle(),
- Offset,
- ChunkSize,
- BufferingSize,
- [&FullHasher, &ChunkHasher, &BytesProcessed](const void* Data, size_t Size) {
- FullHasher.Append(Data, Size);
- ChunkHasher.Append(Data, Size);
- BytesProcessed.fetch_add(Size);
- });
- }
- else
- {
- IoBuffer ChunkData = Source.ReadRange(Offset, ChunkSize);
- FullHasher.Append(ChunkData);
- ChunkHasher.Append(ChunkData);
- BytesProcessed.fetch_add(ChunkSize);
- }
-
- const IoHash ChunkHash = ChunkHasher.GetHash();
- if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end())
- {
- OutChunked.Info.ChunkSequence.push_back(It->second);
- }
- else
- {
- uint32_t ChunkIndex = gsl::narrow<uint32_t>(OutChunked.Info.ChunkHashes.size());
- OutChunked.Info.ChunkHashes.push_back(ChunkHash);
- OutChunked.Info.ChunkSequence.push_back(ChunkIndex);
- OutChunked.ChunkSources.push_back({.Offset = Offset, .Size = gsl::narrow<uint32_t>(ChunkSize)});
- }
- Offset += ChunkSize;
- }
- OutChunked.Info.RawSize = RawSize;
- OutChunked.Info.RawHash = FullHasher.GetHash();
- return true;
- }
- else
- {
- BasicFile Buffer(InputPath, BasicFile::Mode::kRead);
- if (m_Settings.ExcludeElfFiles && IsElfFile(Buffer))
- {
- return false;
- }
- if (m_Settings.ExcludeMachOFiles && IsMachOFile(Buffer))
- {
- return false;
- }
-
- OutChunked = ChunkData(Buffer, 0, RawSize, m_Settings.ChunkingParams, &BytesProcessed, &AbortFlag);
- return true;
- }
- }
-
- virtual std::string_view GetName() const override { return Name; }
-
- virtual CbObject GetParameters() const override
- {
- CbObjectWriter Writer;
- Writer.BeginArray("FixedChunkingExtensions");
- {
- for (const std::string& Extension : m_Settings.FixedChunkingExtensions)
- {
- Writer.AddString(Extension);
- }
- }
- Writer.EndArray(); // ChunkExcludeExtensions
-
- Writer.BeginArray("ChunkExcludeExtensions"sv);
- {
- for (const std::string& Extension : m_Settings.ExcludeExtensions)
- {
- Writer.AddString(Extension);
- }
- }
- Writer.EndArray(); // ChunkExcludeExtensions
-
- Writer.AddBool("ExcludeElfFiles"sv, m_Settings.ExcludeElfFiles);
- Writer.AddBool("ExcludeMachOFiles"sv, m_Settings.ExcludeMachOFiles);
-
- Writer.AddInteger("ChunkFileSizeLimit"sv, m_Settings.ChunkFileSizeLimit);
-
- WriteChunkParams(Writer, m_Settings.ChunkingParams);
-
- Writer.AddInteger("FixedChunkingChunkSize"sv, m_Settings.FixedChunkingChunkSize);
- Writer.AddInteger("MinSizeForFixedChunking"sv, m_Settings.MinSizeForFixedChunking);
- return Writer.Save();
- }
-
- static constexpr std::string_view Name = "ChunkingControllerWithFixedChunking"sv;
-
-private:
- static ChunkingControllerWithFixedChunkingSettings ReadSettings(CbObjectView Parameters)
- {
- return ChunkingControllerWithFixedChunkingSettings{
- .FixedChunkingExtensions = ReadStringArray(Parameters["FixedChunkingExtensions"sv].AsArrayView()),
- .ExcludeExtensions = ReadStringArray(Parameters["ChunkExcludeExtensions"sv].AsArrayView()),
- .ExcludeElfFiles = Parameters["ExcludeElfFiles"sv].AsBool(DefaultChunkingExcludeElfFiles),
- .ExcludeMachOFiles = Parameters["ExcludeMachOFiles"sv].AsBool(DefaultChunkingExcludeMachOFiles),
- .ChunkFileSizeLimit = Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit),
- .ChunkingParams = ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView()),
- .FixedChunkingChunkSize = Parameters["FixedChunkingChunkSize"sv].AsUInt64(DefaultFixedChunkingChunkSize),
- .MinSizeForFixedChunking = Parameters["MinSizeForFixedChunking"sv].AsUInt64(DefaultFixedChunkingChunkSize)};
- }
-
- const ChunkingControllerWithFixedChunkingSettings m_Settings;
-};
-
-std::unique_ptr<ChunkingController>
-CreateBasicChunkingController(const BasicChunkingControllerSettings& Settings)
-{
- return std::make_unique<BasicChunkingController>(Settings);
-}
-std::unique_ptr<ChunkingController>
-CreateBasicChunkingController(CbObjectView Parameters)
-{
- return std::make_unique<BasicChunkingController>(Parameters);
-}
-
-std::unique_ptr<ChunkingController>
-CreateChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Setting)
-{
- return std::make_unique<ChunkingControllerWithFixedChunking>(Setting);
-}
-std::unique_ptr<ChunkingController>
-CreateChunkingControllerWithFixedChunking(CbObjectView Parameters)
-{
- return std::make_unique<ChunkingControllerWithFixedChunking>(Parameters);
-}
-
-std::unique_ptr<ChunkingController>
-CreateChunkingController(std::string_view Name, CbObjectView Parameters)
-{
- if (Name == BasicChunkingController::Name)
- {
- return CreateBasicChunkingController(Parameters);
- }
- else if (Name == ChunkingControllerWithFixedChunking::Name)
- {
- return CreateChunkingControllerWithFixedChunking(Parameters);
- }
- return {};
-}
-
-} // namespace zen