aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2025-10-20 12:09:46 +0200
committerGitHub Enterprise <[email protected]>2025-10-20 12:09:46 +0200
commitc1af02eeb2badfbd2c01125730c6b85bbed8be9e (patch)
treed5a21612f886940166f905b6abc408959220834d /src
parent5.7.7-pre0 (diff)
downloadzen-c1af02eeb2badfbd2c01125730c6b85bbed8be9e.tar.xz
zen-c1af02eeb2badfbd2c01125730c6b85bbed8be9e.zip
updated chunking strategy (#589)
- Improvement: `zen builds`now split large files that are compress only into 64 MB chunks to avoiding very large files in Cloud Storage - Improvement: `zen builds` now treats `.msixvc` files as non-compressable Moved and cleaned up compactbinary_helpers functions Tweaked fixed chunking implementation for better performance Refactored so we have one list of "non-compressable" extensions Implemented new `StandardChunkingStrategy` and move the two existing to hidden legacy namespace Added `FilteredDownloadedBytesPerSecond.Start();` call that got lost during previous refactoring
Diffstat (limited to 'src')
-rw-r--r--src/zen/cmds/builds_cmd.cpp23
-rw-r--r--src/zencore/include/zencore/compactbinaryutil.h132
-rw-r--r--src/zenremotestore/builds/buildsavedstate.cpp30
-rw-r--r--src/zenremotestore/builds/buildstorageoperations.cpp44
-rw-r--r--src/zenremotestore/chunking/chunkedcontent.cpp35
-rw-r--r--src/zenremotestore/chunking/chunkingcontroller.cpp642
-rw-r--r--src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h4
-rw-r--r--src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h103
-rw-r--r--src/zenremotestore/include/zenremotestore/chunking/chunkingcontroller.h74
9 files changed, 666 insertions, 421 deletions
diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp
index 27f050a44..9c6fd17ab 100644
--- a/src/zen/cmds/builds_cmd.cpp
+++ b/src/zen/cmds/builds_cmd.cpp
@@ -550,7 +550,8 @@ namespace {
.TempDir = TempDir,
.ExcludeFolders = DefaultExcludeFolders,
.ExcludeExtensions = DefaultExcludeExtensions,
- .ZenExcludeManifestName = ZenExcludeManifestName});
+ .ZenExcludeManifestName = ZenExcludeManifestName,
+ .NonCompressableExtensions = DefaultSplitOnlyExtensions});
UploadOp.Execute();
if (AbortFlag)
{
@@ -1282,6 +1283,11 @@ namespace {
// TODO: GetBlockDescriptions for all BlockRawHashes in one go - check for local block descriptions when we cache them
{
+ if (!IsQuiet)
+ {
+ ZEN_CONSOLE("Fetching metadata for {} blocks", BlockRawHashes.size());
+ }
+
Stopwatch GetBlockMetadataTimer;
std::vector<ChunkBlockDescription> UnorderedList;
@@ -2045,7 +2051,7 @@ namespace {
if (!ChunkController && !IsQuiet)
{
ZEN_CONSOLE_WARN("Unspecified chunking algorith, using default");
- ChunkController = CreateChunkingControllerWithFixedChunking(ChunkingControllerWithFixedChunkingSettings{});
+ ChunkController = CreateStandardChunkingController(StandardChunkingControllerSettings{});
}
LocalContent = GetLocalContent(LocalFolderScanStats,
@@ -2348,14 +2354,17 @@ namespace {
ChunkedFolderContent CompareFolderContent;
{
- std::unique_ptr<ChunkingController> ChunkController =
- CreateChunkingControllerWithFixedChunking(ChunkingControllerWithFixedChunkingSettings{});
- std::vector<std::string> ExcludeExtensions = DefaultExcludeExtensions;
+ StandardChunkingControllerSettings ChunkingSettings;
+ std::unique_ptr<ChunkingController> ChunkController = CreateStandardChunkingController(ChunkingSettings);
+ std::vector<std::string> ExcludeExtensions = DefaultExcludeExtensions;
if (OnlyChunked)
{
ExcludeExtensions.insert(ExcludeExtensions.end(),
- DefaultChunkingExcludeExtensions.begin(),
- DefaultChunkingExcludeExtensions.end());
+ ChunkingSettings.SplitOnlyExtensions.begin(),
+ ChunkingSettings.SplitOnlyExtensions.end());
+ ExcludeExtensions.insert(ExcludeExtensions.end(),
+ ChunkingSettings.SplitAndCompressExtensions.begin(),
+ ChunkingSettings.SplitAndCompressExtensions.end());
}
auto IsAcceptedFolder = [ExcludeFolders = DefaultExcludeFolders](const std::string_view& RelativePath) -> bool {
diff --git a/src/zencore/include/zencore/compactbinaryutil.h b/src/zencore/include/zencore/compactbinaryutil.h
index d750c6492..eecc3344b 100644
--- a/src/zencore/include/zencore/compactbinaryutil.h
+++ b/src/zencore/include/zencore/compactbinaryutil.h
@@ -52,4 +52,136 @@ ValidateAndReadCompactBinaryObject(const IoBuffer&& Payload, CbValidateError& Ou
}
CbObject ValidateAndReadCompactBinaryObject(const CompressedBuffer&& Payload, CbValidateError& OutError);
+namespace compactbinary_helpers {
+ template<typename Type>
+ inline void WriteArray(std::span<const Type> Values, std::string_view ArrayName, CbWriter& Output)
+ {
+ Output.BeginArray(ArrayName);
+ for (const Type Value : Values)
+ {
+ Output << Value;
+ }
+ Output.EndArray();
+ }
+
+ template<typename Type>
+ inline void WriteArray(const std::vector<Type>& Values, std::string_view ArrayName, CbWriter& Output)
+ {
+ WriteArray(std::span<const Type>(Values), ArrayName, Output);
+ }
+
+ template<>
+ inline void WriteArray(std::span<const std::filesystem::path> Values, std::string_view ArrayName, CbWriter& Output)
+ {
+ Output.BeginArray(ArrayName);
+ for (const std::filesystem::path& Path : Values)
+ {
+ Output.AddString((const char*)Path.generic_u8string().c_str());
+ }
+ Output.EndArray();
+ }
+
+ template<>
+ inline void WriteArray(const std::vector<std::filesystem::path>& Values, std::string_view ArrayName, CbWriter& Output)
+ {
+ WriteArray(std::span<const std::filesystem::path>(Values), ArrayName, Output);
+ }
+
+ inline void WriteBinaryAttachmentArray(std::span<const IoHash> Values, std::string_view ArrayName, CbWriter& Output)
+ {
+ Output.BeginArray(ArrayName);
+ for (const IoHash& Hash : Values)
+ {
+ Output.AddBinaryAttachment(Hash);
+ }
+ Output.EndArray();
+ }
+
+ inline void WriteBinaryAttachmentArray(const std::vector<IoHash>& Values, std::string_view ArrayName, CbWriter& Output)
+ {
+ WriteArray(std::span<const IoHash>(Values), ArrayName, Output);
+ }
+
+ template<typename Type>
+ std::vector<Type> ReadArray(std::string_view ArrayName, CbObjectView Input);
+
+ template<>
+ inline std::vector<uint32_t> ReadArray(std::string_view ArrayName, CbObjectView Input)
+ {
+ std::vector<uint32_t> Result;
+ CbArrayView Array = Input[ArrayName].AsArrayView();
+ Result.reserve(Array.Num());
+ for (CbFieldView ItemView : Array)
+ {
+ Result.push_back(ItemView.AsUInt32());
+ }
+ return Result;
+ }
+
+ template<>
+ inline std::vector<uint64_t> ReadArray(std::string_view ArrayName, CbObjectView Input)
+ {
+ std::vector<uint64_t> Result;
+ CbArrayView Array = Input[ArrayName].AsArrayView();
+ Result.reserve(Array.Num());
+ for (CbFieldView ItemView : Array)
+ {
+ Result.push_back(ItemView.AsUInt64());
+ }
+ return Result;
+ }
+
+ template<>
+ inline std::vector<std::filesystem::path> ReadArray(std::string_view ArrayName, CbObjectView Input)
+ {
+ std::vector<std::filesystem::path> Result;
+ CbArrayView Array = Input[ArrayName].AsArrayView();
+ Result.reserve(Array.Num());
+ for (CbFieldView ItemView : Array)
+ {
+ Result.push_back(std::filesystem::path(ItemView.AsU8String()));
+ }
+ return Result;
+ }
+
+ template<>
+ inline std::vector<std::string> ReadArray(std::string_view ArrayName, CbObjectView Input)
+ {
+ std::vector<std::string> Result;
+ CbArrayView Array = Input[ArrayName].AsArrayView();
+ Result.reserve(Array.Num());
+ for (CbFieldView ItemView : Array)
+ {
+ Result.push_back(std::string(ItemView.AsString()));
+ }
+ return Result;
+ }
+
+ template<>
+ inline std::vector<IoHash> ReadArray(std::string_view ArrayName, CbObjectView Input)
+ {
+ std::vector<IoHash> Result;
+ CbArrayView Array = Input[ArrayName].AsArrayView();
+ Result.reserve(Array.Num());
+ for (CbFieldView ItemView : Array)
+ {
+ Result.push_back(ItemView.AsHash());
+ }
+ return Result;
+ }
+
+ inline std::vector<IoHash> ReadBinaryAttachmentArray(std::string_view ArrayName, CbObjectView Input)
+ {
+ std::vector<IoHash> Result;
+ CbArrayView Array = Input[ArrayName].AsArrayView();
+ Result.reserve(Array.Num());
+ for (CbFieldView ItemView : Array)
+ {
+ Result.push_back(ItemView.AsBinaryAttachment());
+ }
+ return Result;
+ }
+
+} // namespace compactbinary_helpers
+
} // namespace zen
diff --git a/src/zenremotestore/builds/buildsavedstate.cpp b/src/zenremotestore/builds/buildsavedstate.cpp
index 933616856..cf46668f9 100644
--- a/src/zenremotestore/builds/buildsavedstate.cpp
+++ b/src/zenremotestore/builds/buildsavedstate.cpp
@@ -89,9 +89,9 @@ ReadBuildContentFromCompactBinary(CbObjectView BuildPartManifest,
CbObjectView FilesObject = BuildPartManifest["files"sv].AsObjectView();
- compactbinary_helpers::ReadArray("paths"sv, FilesObject, OutPaths);
- compactbinary_helpers::ReadArray("rawhashes"sv, FilesObject, OutRawHashes);
- compactbinary_helpers::ReadArray("rawsizes"sv, FilesObject, OutRawSizes);
+ OutPaths = compactbinary_helpers::ReadArray<std::filesystem::path>("paths"sv, FilesObject);
+ OutRawHashes = compactbinary_helpers::ReadArray<IoHash>("rawhashes"sv, FilesObject);
+ OutRawSizes = compactbinary_helpers::ReadArray<uint64_t>("rawsizes"sv, FilesObject);
uint64_t PathCount = OutPaths.size();
if (OutRawHashes.size() != PathCount)
@@ -103,15 +103,13 @@ ReadBuildContentFromCompactBinary(CbObjectView BuildPartManifest,
throw std::runtime_error(fmt::format("Number of raw sizes entries does not match number of paths"));
}
- std::vector<uint32_t> ModeArray;
- compactbinary_helpers::ReadArray("mode"sv, FilesObject, ModeArray);
+ std::vector<uint32_t> ModeArray = compactbinary_helpers::ReadArray<uint32_t>("mode"sv, FilesObject);
if (ModeArray.size() != PathCount && ModeArray.size() != 0)
{
throw std::runtime_error(fmt::format("Number of attribute entries does not match number of paths"));
}
- std::vector<uint32_t> AttributeArray;
- compactbinary_helpers::ReadArray("attributes"sv, FilesObject, ModeArray);
+ std::vector<uint32_t> AttributeArray = compactbinary_helpers::ReadArray<uint32_t>("attributes"sv, FilesObject);
if (AttributeArray.size() != PathCount && AttributeArray.size() != 0)
{
throw std::runtime_error(fmt::format("Number of attribute entries does not match number of paths"));
@@ -143,26 +141,24 @@ ReadBuildContentFromCompactBinary(CbObjectView BuildPartManifest,
if (CbObjectView ChunkContentView = BuildPartManifest["chunkedContent"sv].AsObjectView(); ChunkContentView)
{
- compactbinary_helpers::ReadArray("sequenceRawHashes"sv, ChunkContentView, OutSequenceRawHashes);
- compactbinary_helpers::ReadArray("chunkcounts"sv, ChunkContentView, OutChunkCounts);
+ OutSequenceRawHashes = compactbinary_helpers::ReadArray<IoHash>("sequenceRawHashes"sv, ChunkContentView);
+ OutChunkCounts = compactbinary_helpers::ReadArray<uint32_t>("chunkcounts"sv, ChunkContentView);
if (OutChunkCounts.size() != OutSequenceRawHashes.size())
{
throw std::runtime_error(fmt::format("Number of chunk count entries does not match number of paths"));
}
- compactbinary_helpers::ReadArray("chunkorders"sv, ChunkContentView, OutAbsoluteChunkOrders);
+ OutAbsoluteChunkOrders = compactbinary_helpers::ReadArray<uint32_t>("chunkorders"sv, ChunkContentView);
}
else if (FilesObject["chunkcounts"sv])
{
// Legacy zen style
- std::vector<uint32_t> LegacyChunkCounts;
- compactbinary_helpers::ReadArray("chunkcounts"sv, FilesObject, LegacyChunkCounts);
+ std::vector<uint32_t> LegacyChunkCounts = compactbinary_helpers::ReadArray<uint32_t>("chunkcounts"sv, FilesObject);
if (LegacyChunkCounts.size() != PathCount)
{
throw std::runtime_error(fmt::format("Number of chunk count entries does not match number of paths"));
}
- std::vector<uint32_t> LegacyAbsoluteChunkOrders;
- compactbinary_helpers::ReadArray("chunkorders"sv, FilesObject, LegacyAbsoluteChunkOrders);
+ std::vector<uint32_t> LegacyAbsoluteChunkOrders = compactbinary_helpers::ReadArray<uint32_t>("chunkorders"sv, FilesObject);
CbArrayView ChunkOrdersArray = BuildPartManifest["chunkorders"sv].AsArrayView();
const uint64_t ChunkOrdersCount = ChunkOrdersArray.Num();
@@ -217,8 +213,8 @@ ReadBuildContentFromCompactBinary(CbObjectView BuildPartManifest,
CbObjectView ChunkAttachmentsView = BuildPartManifest["chunkAttachments"sv].AsObjectView();
{
- compactbinary_helpers::ReadBinaryAttachmentArray("rawHashes"sv, ChunkAttachmentsView, OutLooseChunkHashes);
- compactbinary_helpers::ReadArray("chunkRawSizes"sv, ChunkAttachmentsView, OutLooseChunkRawSizes);
+ OutLooseChunkHashes = compactbinary_helpers::ReadBinaryAttachmentArray("rawHashes"sv, ChunkAttachmentsView);
+ OutLooseChunkRawSizes = compactbinary_helpers::ReadArray<uint64_t>("chunkRawSizes"sv, ChunkAttachmentsView);
if (OutLooseChunkHashes.size() != OutLooseChunkRawSizes.size())
{
throw std::runtime_error(fmt::format("Number of attachment chunk hashes does not match number of attachemnt chunk raw sizes"));
@@ -227,7 +223,7 @@ ReadBuildContentFromCompactBinary(CbObjectView BuildPartManifest,
CbObjectView BlocksView = BuildPartManifest["blockAttachments"sv].AsObjectView();
{
- compactbinary_helpers::ReadBinaryAttachmentArray("rawHashes"sv, BlocksView, OutBlockRawHashes);
+ OutBlockRawHashes = compactbinary_helpers::ReadBinaryAttachmentArray("rawHashes"sv, BlocksView);
}
}
diff --git a/src/zenremotestore/builds/buildstorageoperations.cpp b/src/zenremotestore/builds/buildstorageoperations.cpp
index 39f7e8edf..ebb876ed9 100644
--- a/src/zenremotestore/builds/buildstorageoperations.cpp
+++ b/src/zenremotestore/builds/buildstorageoperations.cpp
@@ -11,6 +11,7 @@
#include <zencore/basicfile.h>
#include <zencore/compactbinary.h>
#include <zencore/compactbinaryfile.h>
+#include <zencore/compactbinaryutil.h>
#include <zencore/filesystem.h>
#include <zencore/fmtutils.h>
#include <zencore/parallelwork.h>
@@ -143,24 +144,15 @@ namespace {
return Result.FailedRemovePaths.empty();
}
- const std::vector<uint32_t> NonCompressableExtensions({HashStringDjb2(".mp4"sv),
- HashStringDjb2(".zip"sv),
- HashStringDjb2(".7z"sv),
- HashStringDjb2(".bzip"sv),
- HashStringDjb2(".rar"sv),
- HashStringDjb2(".gzip"sv),
- HashStringDjb2(".apk"sv),
- HashStringDjb2(".nsp"sv),
- HashStringDjb2(".xvc"sv),
- HashStringDjb2(".pkg"sv),
- HashStringDjb2(".dmg"sv),
- HashStringDjb2(".ipa"sv)});
-
- const tsl::robin_set<uint32_t> NonCompressableExtensionSet(NonCompressableExtensions.begin(), NonCompressableExtensions.end());
-
- bool IsExtensionHashCompressable(const uint32_t PathHash) { return !NonCompressableExtensionSet.contains(PathHash); }
+ bool IsExtensionHashCompressable(const tsl::robin_set<uint32_t>& NonCompressableExtensionHashes, const uint32_t PathHash)
+ {
+ return !NonCompressableExtensionHashes.contains(PathHash);
+ }
- bool IsChunkCompressable(const ChunkedFolderContent& Content, const ChunkedContentLookup& Lookup, uint32_t ChunkIndex)
+ bool IsChunkCompressable(const tsl::robin_set<uint32_t>& NonCompressableExtensionHashes,
+ const ChunkedFolderContent& Content,
+ const ChunkedContentLookup& Lookup,
+ uint32_t ChunkIndex)
{
ZEN_UNUSED(Content);
const uint32_t ChunkLocationCount = Lookup.ChunkSequenceLocationCounts[ChunkIndex];
@@ -173,7 +165,7 @@ namespace {
const uint32_t PathIndex = Lookup.SequenceIndexFirstPathIndex[SequenceIndex];
const uint32_t ExtensionHash = Lookup.PathExtensionHash[PathIndex];
- const bool IsCompressable = IsExtensionHashCompressable(ExtensionHash);
+ const bool IsCompressable = IsExtensionHashCompressable(NonCompressableExtensionHashes, ExtensionHash);
return IsCompressable;
}
@@ -1701,6 +1693,8 @@ BuildsOperationUpdateFolder::Execute(FolderContent& OutLocalFolderState)
const BlockRangeDescriptor& BlockRange = BlockRangeWorks[BlockRangeIndex];
+ FilteredDownloadedBytesPerSecond.Start();
+
DownloadPartialBlock(
BlockRange,
ExistsResult,
@@ -4603,6 +4597,11 @@ BuildsOperationUploadFolder::BuildsOperationUploadFolder(BuildOpLogOutput& L
, m_MetaData(MetaData)
, m_Options(Options)
{
+ m_NonCompressableExtensionHashes.reserve(Options.NonCompressableExtensions.size());
+ for (const std::string& Extension : Options.NonCompressableExtensions)
+ {
+ m_NonCompressableExtensionHashes.insert(HashStringAsLowerDjb2(Extension));
+ }
}
void
@@ -4783,8 +4782,7 @@ BuildsOperationUploadFolder::Execute()
m_LocalFolderScanStats.ElapsedWallTimeUS = ManifestParseTimer.GetElapsedTimeUs();
}
- std::unique_ptr<ChunkingController> ChunkController =
- CreateChunkingControllerWithFixedChunking(ChunkingControllerWithFixedChunkingSettings{});
+ std::unique_ptr<ChunkingController> ChunkController = CreateStandardChunkingController(StandardChunkingControllerSettings{});
{
CbObjectWriter ChunkParametersWriter;
ChunkParametersWriter.AddString("name"sv, ChunkController->GetName());
@@ -6145,7 +6143,7 @@ BuildsOperationUploadFolder::GenerateBlock(const ChunkedFolderContent& Content,
uint64_t RawSize = Chunk.GetSize();
const bool ShouldCompressChunk = Lookup.RawHashToSequenceIndex.contains(ChunkHash) &&
(RawSize >= m_Options.MinimumSizeForCompressInBlock) &&
- IsChunkCompressable(Content, Lookup, ChunkIndex);
+ IsChunkCompressable(m_NonCompressableExtensionHashes, Content, Lookup, ChunkIndex);
const OodleCompressionLevel CompressionLevel =
ShouldCompressChunk ? OodleCompressionLevel::VeryFast : OodleCompressionLevel::None;
return {RawSize, CompressedBuffer::Compress(Chunk, OodleCompressor::Mermaid, CompressionLevel)};
@@ -6187,7 +6185,7 @@ BuildsOperationUploadFolder::RebuildBlock(const ChunkedFolderContent& Content,
const uint64_t RawSize = Chunk.GetSize();
const bool ShouldCompressChunk = Lookup.RawHashToSequenceIndex.contains(ChunkHash) &&
(RawSize >= m_Options.MinimumSizeForCompressInBlock) &&
- IsChunkCompressable(Content, Lookup, ChunkIndex);
+ IsChunkCompressable(m_NonCompressableExtensionHashes, Content, Lookup, ChunkIndex);
const OodleCompressionLevel CompressionLevel = ShouldCompressChunk ? OodleCompressionLevel::VeryFast : OodleCompressionLevel::None;
CompositeBuffer CompressedChunk =
@@ -6682,7 +6680,7 @@ BuildsOperationUploadFolder::CompressChunk(const ChunkedFolderContent& Content,
throw std::runtime_error(fmt::format("Fetched chunk {} has invalid size", ChunkHash));
}
- const bool ShouldCompressChunk = IsChunkCompressable(Content, Lookup, ChunkIndex);
+ const bool ShouldCompressChunk = IsChunkCompressable(m_NonCompressableExtensionHashes, Content, Lookup, ChunkIndex);
const OodleCompressionLevel CompressionLevel = ShouldCompressChunk ? OodleCompressionLevel::VeryFast : OodleCompressionLevel::None;
if (ShouldCompressChunk)
diff --git a/src/zenremotestore/chunking/chunkedcontent.cpp b/src/zenremotestore/chunking/chunkedcontent.cpp
index eb0e8bdc9..ea67e3d94 100644
--- a/src/zenremotestore/chunking/chunkedcontent.cpp
+++ b/src/zenremotestore/chunking/chunkedcontent.cpp
@@ -2,6 +2,7 @@
#include <zenremotestore/chunking/chunkedcontent.h>
+#include <zencore/compactbinaryutil.h>
#include <zencore/filesystem.h>
#include <zencore/fmtutils.h>
#include <zencore/logging.h>
@@ -362,11 +363,11 @@ LoadFolderContentToCompactBinary(CbObjectView Input)
{
ZEN_TRACE_CPU("LoadFolderContentToCompactBinary");
FolderContent Content;
- Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform());
- compactbinary_helpers::ReadArray("paths"sv, Input, Content.Paths);
- compactbinary_helpers::ReadArray("rawSizes"sv, Input, Content.RawSizes);
- compactbinary_helpers::ReadArray("attributes"sv, Input, Content.Attributes);
- compactbinary_helpers::ReadArray("modificationTimes"sv, Input, Content.ModificationTicks);
+ Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform());
+ Content.Paths = compactbinary_helpers::ReadArray<std::filesystem::path>("paths"sv, Input);
+ Content.RawSizes = compactbinary_helpers::ReadArray<uint64_t>("rawSizes"sv, Input);
+ Content.Attributes = compactbinary_helpers::ReadArray<uint32_t>("attributes"sv, Input);
+ Content.ModificationTicks = compactbinary_helpers::ReadArray<uint64_t>("modificationTimes"sv, Input);
return Content;
}
@@ -534,18 +535,18 @@ LoadChunkedFolderContentToCompactBinary(CbObjectView Input)
{
ZEN_TRACE_CPU("LoadChunkedFolderContentToCompactBinary");
ChunkedFolderContent Content;
- Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform());
- compactbinary_helpers::ReadArray("paths"sv, Input, Content.Paths);
- compactbinary_helpers::ReadArray("rawSizes"sv, Input, Content.RawSizes);
- compactbinary_helpers::ReadArray("attributes"sv, Input, Content.Attributes);
- compactbinary_helpers::ReadArray("rawHashes"sv, Input, Content.RawHashes);
-
- CbObjectView ChunkedContentView = Input["chunkedContent"sv].AsObjectView();
- compactbinary_helpers::ReadArray("sequenceRawHashes"sv, ChunkedContentView, Content.ChunkedContent.SequenceRawHashes);
- compactbinary_helpers::ReadArray("chunkCounts"sv, ChunkedContentView, Content.ChunkedContent.ChunkCounts);
- compactbinary_helpers::ReadArray("chunkOrders"sv, ChunkedContentView, Content.ChunkedContent.ChunkOrders);
- compactbinary_helpers::ReadArray("chunkHashes"sv, ChunkedContentView, Content.ChunkedContent.ChunkHashes);
- compactbinary_helpers::ReadArray("chunkRawSizes"sv, ChunkedContentView, Content.ChunkedContent.ChunkRawSizes);
+ Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform());
+ Content.Paths = compactbinary_helpers::ReadArray<std::filesystem::path>("paths"sv, Input);
+ Content.RawSizes = compactbinary_helpers::ReadArray<uint64_t>("rawSizes"sv, Input);
+ Content.Attributes = compactbinary_helpers::ReadArray<uint32_t>("attributes"sv, Input);
+ Content.RawHashes = compactbinary_helpers::ReadArray<IoHash>("rawHashes"sv, Input);
+
+ CbObjectView ChunkedContentView = Input["chunkedContent"sv].AsObjectView();
+ Content.ChunkedContent.SequenceRawHashes = compactbinary_helpers::ReadArray<IoHash>("sequenceRawHashes"sv, ChunkedContentView);
+ Content.ChunkedContent.ChunkCounts = compactbinary_helpers::ReadArray<uint32_t>("chunkCounts"sv, ChunkedContentView);
+ Content.ChunkedContent.ChunkOrders = compactbinary_helpers::ReadArray<uint32_t>("chunkOrders"sv, ChunkedContentView);
+ Content.ChunkedContent.ChunkHashes = compactbinary_helpers::ReadArray<IoHash>("chunkHashes"sv, ChunkedContentView);
+ Content.ChunkedContent.ChunkRawSizes = compactbinary_helpers::ReadArray<uint64_t>("chunkRawSizes"sv, ChunkedContentView);
return Content;
}
diff --git a/src/zenremotestore/chunking/chunkingcontroller.cpp b/src/zenremotestore/chunking/chunkingcontroller.cpp
index cc20446ea..91ca18d10 100644
--- a/src/zenremotestore/chunking/chunkingcontroller.cpp
+++ b/src/zenremotestore/chunking/chunkingcontroller.cpp
@@ -3,7 +3,7 @@
#include <zenremotestore/chunking/chunkingcontroller.h>
#include <zencore/basicfile.h>
-#include <zencore/compactbinarybuilder.h>
+#include <zencore/compactbinaryutil.h>
#include <zencore/filesystem.h>
#include <zencore/trace.h>
@@ -16,23 +16,12 @@ namespace zen {
using namespace std::literals;
namespace {
- std::vector<std::string> ReadStringArray(CbArrayView StringArray)
- {
- std::vector<std::string> Result;
- Result.reserve(StringArray.Num());
- for (CbFieldView FieldView : StringArray)
- {
- Result.emplace_back(FieldView.AsString());
- }
- return Result;
- }
-
ChunkedParams ReadChunkParams(CbObjectView Params)
{
bool UseThreshold = Params["UseThreshold"sv].AsBool(true);
- size_t MinSize = Params["MinSize"sv].AsUInt64(DefaultChunkedParams.MinSize);
- size_t MaxSize = Params["MaxSize"sv].AsUInt64(DefaultChunkedParams.MaxSize);
- size_t AvgSize = Params["AvgSize"sv].AsUInt64(DefaultChunkedParams.AvgSize);
+ size_t MinSize = Params["MinSize"sv].AsUInt64(DefaultDynamicChunkingParams.MinSize);
+ size_t MaxSize = Params["MaxSize"sv].AsUInt64(DefaultDynamicChunkingParams.MaxSize);
+ size_t AvgSize = Params["AvgSize"sv].AsUInt64(DefaultDynamicChunkingParams.AvgSize);
return ChunkedParams{.UseThreshold = UseThreshold, .MinSize = MinSize, .MaxSize = MaxSize, .AvgSize = AvgSize};
}
@@ -50,6 +39,44 @@ namespace {
Writer.EndObject(); // ChunkingParams
}
+} // namespace
+
+namespace legacy {
+ const std::vector<std::string> DefaultChunkingExcludeExtensions = {
+ ".exe", ".dll", ".pdb", ".self", ".mp4", ".zip", ".7z", ".bzip", ".rar", ".gzip",
+ ".sym", ".psym", ".txt", ".ini", ".json", ".verse", ".versemodule", ".jpg", ".c", ".h",
+ ".cpp", ".cxx", ".c++", ".cc", ".hpp", ".hxx", ".h++", ".py", ".ogg", ".plist"};
+
+ const std::vector<std::string> DefaultFixedChunkingExtensions = {".apk", ".nsp", ".xvc", ".pkg", ".dmg", ".ipa"};
+ const bool DefaultChunkingExcludeElfFiles = true;
+ const bool DefaultChunkingExcludeMachOFiles = true;
+
+ const size_t DefaultChunkingFileSizeLimit = DefaultDynamicChunkingParams.MaxSize;
+
+ const uint64_t DefaultFixedChunkingChunkSize = 32u * 1024u * 1024u;
+ const uint64_t DefaultMinSizeForFixedChunking = DefaultFixedChunkingChunkSize * 8u;
+
+ struct BasicChunkingControllerSettings
+ {
+ std::vector<std::string> ExcludeExtensions = DefaultChunkingExcludeExtensions;
+ bool ExcludeElfFiles = DefaultChunkingExcludeElfFiles;
+ bool ExcludeMachOFiles = DefaultChunkingExcludeMachOFiles;
+ uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit;
+ ChunkedParams ChunkingParams = DefaultDynamicChunkingParams;
+ };
+
+ struct ChunkingControllerWithFixedChunkingSettings
+ {
+ std::vector<std::string> FixedChunkingExtensions = DefaultFixedChunkingExtensions;
+ std::vector<std::string> ExcludeExtensions = DefaultChunkingExcludeExtensions;
+ bool ExcludeElfFiles = DefaultChunkingExcludeElfFiles;
+ bool ExcludeMachOFiles = DefaultChunkingExcludeMachOFiles;
+ uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit;
+ ChunkedParams ChunkingParams = DefaultDynamicChunkingParams;
+ uint64_t FixedChunkingChunkSize = DefaultFixedChunkingChunkSize;
+ uint64_t MinSizeForFixedChunking = DefaultMinSizeForFixedChunking;
+ };
+
bool IsElfFile(BasicFile& Buffer)
{
if (Buffer.FileSize() > 4)
@@ -77,114 +104,363 @@ namespace {
}
return false;
}
-} // namespace
-class BasicChunkingController : public ChunkingController
-{
-public:
- BasicChunkingController(const BasicChunkingControllerSettings& Settings) : m_Settings(Settings)
+ //////////// BasicChunkingController
+
+ class BasicChunkingController : public ChunkingController
{
- m_ExcludeExtensionHashes.reserve(Settings.ExcludeExtensions.size());
- for (const std::string& Extension : Settings.ExcludeExtensions)
+ public:
+ BasicChunkingController(const BasicChunkingControllerSettings& Settings) : m_Settings(Settings)
{
- m_ExcludeExtensionHashes.insert(HashStringAsLowerDjb2(Extension));
+ m_ExcludeExtensionHashes.reserve(Settings.ExcludeExtensions.size());
+ for (const std::string& Extension : Settings.ExcludeExtensions)
+ {
+ m_ExcludeExtensionHashes.insert(HashStringAsLowerDjb2(Extension));
+ }
}
- }
- BasicChunkingController(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {}
+ BasicChunkingController(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {}
- virtual bool ProcessFile(const std::filesystem::path& InputPath,
- uint64_t RawSize,
- ChunkedInfoWithSource& OutChunked,
- std::atomic<uint64_t>& BytesProcessed,
- std::atomic<bool>& AbortFlag) const override
- {
- ZEN_TRACE_CPU("BasicChunkingController::ProcessFile");
- if (RawSize < m_Settings.ChunkFileSizeLimit)
+ virtual bool ProcessFile(const std::filesystem::path& InputPath,
+ uint64_t RawSize,
+ ChunkedInfoWithSource& OutChunked,
+ std::atomic<uint64_t>& BytesProcessed,
+ std::atomic<bool>& AbortFlag) const override
{
- return false;
- }
+ ZEN_TRACE_CPU("BasicChunkingController::ProcessFile");
+ if (RawSize < m_Settings.ChunkFileSizeLimit)
+ {
+ return false;
+ }
- const uint32_t ExtensionHash = HashStringAsLowerDjb2(InputPath.extension().string());
- const bool ExcludeFromChunking = m_ExcludeExtensionHashes.contains(ExtensionHash);
+ const uint32_t ExtensionHash = HashStringAsLowerDjb2(InputPath.extension().string());
+ const bool ExcludeFromChunking = m_ExcludeExtensionHashes.contains(ExtensionHash);
- if (ExcludeFromChunking)
- {
- return false;
+ if (ExcludeFromChunking)
+ {
+ return false;
+ }
+
+ BasicFile Buffer(InputPath, BasicFile::Mode::kRead);
+ if (m_Settings.ExcludeElfFiles && IsElfFile(Buffer))
+ {
+ return false;
+ }
+ if (m_Settings.ExcludeMachOFiles && IsMachOFile(Buffer))
+ {
+ return false;
+ }
+
+ OutChunked = ChunkData(Buffer, 0, RawSize, m_Settings.ChunkingParams, &BytesProcessed, &AbortFlag);
+ return true;
}
- BasicFile Buffer(InputPath, BasicFile::Mode::kRead);
- if (m_Settings.ExcludeElfFiles && IsElfFile(Buffer))
+ virtual std::string_view GetName() const override { return Name; }
+
+ virtual CbObject GetParameters() const override
{
- return false;
+ CbObjectWriter Writer;
+ compactbinary_helpers::WriteArray(m_Settings.ExcludeExtensions, "ChunkExcludeExtensions"sv, Writer);
+
+ Writer.AddBool("ExcludeElfFiles"sv, m_Settings.ExcludeElfFiles);
+ Writer.AddBool("ExcludeMachOFiles"sv, m_Settings.ExcludeMachOFiles);
+ Writer.AddInteger("ChunkFileSizeLimit"sv, m_Settings.ChunkFileSizeLimit);
+
+ WriteChunkParams(Writer, m_Settings.ChunkingParams);
+
+ return Writer.Save();
}
- if (m_Settings.ExcludeMachOFiles && IsMachOFile(Buffer))
+ static constexpr std::string_view Name = "BasicChunkingController"sv;
+
+ private:
+ static BasicChunkingControllerSettings ReadSettings(CbObjectView Parameters)
{
- return false;
+ return BasicChunkingControllerSettings{
+ .ExcludeExtensions = compactbinary_helpers::ReadArray<std::string>("ChunkExcludeExtensions"sv, Parameters),
+ .ExcludeElfFiles = Parameters["ExcludeElfFiles"sv].AsBool(DefaultChunkingExcludeElfFiles),
+ .ExcludeMachOFiles = Parameters["ExcludeMachOFiles"sv].AsBool(DefaultChunkingExcludeMachOFiles),
+ .ChunkFileSizeLimit = Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit),
+ .ChunkingParams = ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView())};
}
- OutChunked = ChunkData(Buffer, 0, RawSize, m_Settings.ChunkingParams, &BytesProcessed, &AbortFlag);
- return true;
+ const BasicChunkingControllerSettings m_Settings;
+ tsl::robin_set<uint32_t> m_ExcludeExtensionHashes;
+ };
+
+ std::unique_ptr<ChunkingController> CreateBasicChunkingController(CbObjectView Parameters)
+ {
+ return std::make_unique<BasicChunkingController>(Parameters);
}
- virtual std::string_view GetName() const override { return Name; }
+ //////////// ChunkingControllerWithFixedChunking
- virtual CbObject GetParameters() const override
+ class ChunkingControllerWithFixedChunking : public ChunkingController
{
- CbObjectWriter Writer;
- Writer.BeginArray("ChunkExcludeExtensions"sv);
+ public:
+ ChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Settings) : m_Settings(Settings)
{
- for (const std::string& Extension : m_Settings.ExcludeExtensions)
+ m_ExcludeExtensionHashes.reserve(Settings.ExcludeExtensions.size());
+ for (const std::string& Extension : Settings.ExcludeExtensions)
+ {
+ m_ExcludeExtensionHashes.insert(HashStringAsLowerDjb2(Extension));
+ }
+
+ m_FixedChunkingExtensionHashes.reserve(Settings.FixedChunkingExtensions.size());
+ for (const std::string& Extension : Settings.FixedChunkingExtensions)
{
- Writer.AddString(Extension);
+ m_FixedChunkingExtensionHashes.insert(HashStringAsLowerDjb2(Extension));
}
}
- Writer.EndArray(); // ChunkExcludeExtensions
- Writer.AddBool("ExcludeElfFiles"sv, m_Settings.ExcludeElfFiles);
- Writer.AddBool("ExcludeMachOFiles"sv, m_Settings.ExcludeMachOFiles);
- Writer.AddInteger("ChunkFileSizeLimit"sv, m_Settings.ChunkFileSizeLimit);
+ ChunkingControllerWithFixedChunking(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {}
- WriteChunkParams(Writer, m_Settings.ChunkingParams);
+ virtual bool ProcessFile(const std::filesystem::path& InputPath,
+ uint64_t RawSize,
+ ChunkedInfoWithSource& OutChunked,
+ std::atomic<uint64_t>& BytesProcessed,
+ std::atomic<bool>& AbortFlag) const override
+ {
+ ZEN_TRACE_CPU("ChunkingControllerWithFixedChunking::ProcessFile");
+ if (RawSize < m_Settings.ChunkFileSizeLimit)
+ {
+ return false;
+ }
- return Writer.Save();
- }
- static constexpr std::string_view Name = "BasicChunkingController"sv;
+ const uint32_t ExtensionHash = HashStringAsLowerDjb2(InputPath.extension().string());
+ const bool ExcludeFromChunking = m_ExcludeExtensionHashes.contains(ExtensionHash);
-private:
- static BasicChunkingControllerSettings ReadSettings(CbObjectView Parameters)
+ if (ExcludeFromChunking)
+ {
+ return false;
+ }
+
+ const bool FixedChunkingExtension = m_FixedChunkingExtensionHashes.contains(ExtensionHash);
+
+ if (FixedChunkingExtension)
+ {
+ if (RawSize < m_Settings.MinSizeForFixedChunking)
+ {
+ return false;
+ }
+ ZEN_TRACE_CPU("FixedChunking");
+ IoHashStream FullHasher;
+ BasicFile Source(InputPath, BasicFile::Mode::kRead);
+ uint64_t Offset = 0;
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex;
+ const uint64_t ExpectedChunkCount = 1 + (RawSize / m_Settings.FixedChunkingChunkSize);
+ ChunkHashToChunkIndex.reserve(ExpectedChunkCount);
+ OutChunked.Info.ChunkHashes.reserve(ExpectedChunkCount);
+ OutChunked.Info.ChunkSequence.reserve(ExpectedChunkCount);
+ OutChunked.ChunkSources.reserve(ExpectedChunkCount);
+
+ static const uint64_t BufferingSize = 256u * 1024u;
+
+ IoHashStream ChunkHasher;
+
+ while (Offset < RawSize)
+ {
+ if (AbortFlag)
+ {
+ return false;
+ }
+
+ ChunkHasher.Reset();
+
+ uint64_t ChunkSize = std::min<uint64_t>(RawSize - Offset, m_Settings.FixedChunkingChunkSize);
+ if (ChunkSize >= (BufferingSize + BufferingSize / 2))
+ {
+ ScanFile(Source.Handle(),
+ Offset,
+ ChunkSize,
+ BufferingSize,
+ [&FullHasher, &ChunkHasher, &BytesProcessed](const void* Data, size_t Size) {
+ FullHasher.Append(Data, Size);
+ ChunkHasher.Append(Data, Size);
+ BytesProcessed.fetch_add(Size);
+ });
+ }
+ else
+ {
+ IoBuffer ChunkData = Source.ReadRange(Offset, ChunkSize);
+ FullHasher.Append(ChunkData);
+ ChunkHasher.Append(ChunkData);
+ BytesProcessed.fetch_add(ChunkSize);
+ }
+
+ const IoHash ChunkHash = ChunkHasher.GetHash();
+ if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end())
+ {
+ OutChunked.Info.ChunkSequence.push_back(It->second);
+ }
+ else
+ {
+ uint32_t ChunkIndex = gsl::narrow<uint32_t>(OutChunked.Info.ChunkHashes.size());
+ OutChunked.Info.ChunkHashes.push_back(ChunkHash);
+ OutChunked.Info.ChunkSequence.push_back(ChunkIndex);
+ OutChunked.ChunkSources.push_back({.Offset = Offset, .Size = gsl::narrow<uint32_t>(ChunkSize)});
+ }
+ Offset += ChunkSize;
+ }
+ OutChunked.Info.RawSize = RawSize;
+ OutChunked.Info.RawHash = FullHasher.GetHash();
+ return true;
+ }
+ else
+ {
+ BasicFile Buffer(InputPath, BasicFile::Mode::kRead);
+ if (m_Settings.ExcludeElfFiles && IsElfFile(Buffer))
+ {
+ return false;
+ }
+ if (m_Settings.ExcludeMachOFiles && IsMachOFile(Buffer))
+ {
+ return false;
+ }
+
+ OutChunked = ChunkData(Buffer, 0, RawSize, m_Settings.ChunkingParams, &BytesProcessed, &AbortFlag);
+ return true;
+ }
+ }
+
+ virtual std::string_view GetName() const override { return Name; }
+
+ virtual CbObject GetParameters() const override
+ {
+ CbObjectWriter Writer;
+ compactbinary_helpers::WriteArray(m_Settings.FixedChunkingExtensions, "FixedChunkingExtensions", Writer);
+ compactbinary_helpers::WriteArray(m_Settings.ExcludeExtensions, "ChunkExcludeExtensions", Writer);
+
+ Writer.AddBool("ExcludeElfFiles"sv, m_Settings.ExcludeElfFiles);
+ Writer.AddBool("ExcludeMachOFiles"sv, m_Settings.ExcludeMachOFiles);
+
+ Writer.AddInteger("ChunkFileSizeLimit"sv, m_Settings.ChunkFileSizeLimit);
+
+ WriteChunkParams(Writer, m_Settings.ChunkingParams);
+
+ Writer.AddInteger("FixedChunkingChunkSize"sv, m_Settings.FixedChunkingChunkSize);
+ Writer.AddInteger("MinSizeForFixedChunking"sv, m_Settings.MinSizeForFixedChunking);
+ return Writer.Save();
+ }
+
+ static constexpr std::string_view Name = "ChunkingControllerWithFixedChunking"sv;
+
+ private:
+ static ChunkingControllerWithFixedChunkingSettings ReadSettings(CbObjectView Parameters)
+ {
+ return ChunkingControllerWithFixedChunkingSettings{
+ .FixedChunkingExtensions = compactbinary_helpers::ReadArray<std::string>("FixedChunkingExtensions"sv, Parameters),
+ .ExcludeExtensions = compactbinary_helpers::ReadArray<std::string>("ChunkExcludeExtensions"sv, Parameters),
+ .ExcludeElfFiles = Parameters["ExcludeElfFiles"sv].AsBool(DefaultChunkingExcludeElfFiles),
+ .ExcludeMachOFiles = Parameters["ExcludeMachOFiles"sv].AsBool(DefaultChunkingExcludeMachOFiles),
+ .ChunkFileSizeLimit = Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit),
+ .ChunkingParams = ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView()),
+ .FixedChunkingChunkSize = Parameters["FixedChunkingChunkSize"sv].AsUInt64(DefaultFixedChunkingChunkSize),
+ .MinSizeForFixedChunking = Parameters["MinSizeForFixedChunking"sv].AsUInt64(DefaultFixedChunkingChunkSize)};
+ }
+
+ const ChunkingControllerWithFixedChunkingSettings m_Settings;
+ tsl::robin_set<uint32_t> m_FixedChunkingExtensionHashes;
+ tsl::robin_set<uint32_t> m_ExcludeExtensionHashes;
+ };
+
+ std::unique_ptr<ChunkingController> CreateChunkingControllerWithFixedChunking(CbObjectView Parameters)
{
- return BasicChunkingControllerSettings{
- .ExcludeExtensions = ReadStringArray(Parameters["ChunkExcludeExtensions"sv].AsArrayView()),
- .ExcludeElfFiles = Parameters["ExcludeElfFiles"sv].AsBool(DefaultChunkingExcludeElfFiles),
- .ExcludeMachOFiles = Parameters["ExcludeMachOFiles"sv].AsBool(DefaultChunkingExcludeMachOFiles),
- .ChunkFileSizeLimit = Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit),
- .ChunkingParams = ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView())};
+ return std::make_unique<ChunkingControllerWithFixedChunking>(Parameters);
}
+} // namespace legacy
- const BasicChunkingControllerSettings m_Settings;
- tsl::robin_set<uint32_t> m_ExcludeExtensionHashes;
-};
+//////////// StandardChunkingController
-class ChunkingControllerWithFixedChunking : public ChunkingController
+class StandardChunkingController : public ChunkingController
{
public:
- ChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Settings) : m_Settings(Settings)
+ StandardChunkingController(const StandardChunkingControllerSettings& Settings) : m_Settings(Settings)
{
- m_ExcludeExtensionHashes.reserve(Settings.ExcludeExtensions.size());
- for (const std::string& Extension : Settings.ExcludeExtensions)
+ m_SplitOnlyExtensionHashes.reserve(Settings.SplitOnlyExtensions.size());
+ for (const std::string& Extension : Settings.SplitOnlyExtensions)
{
- m_ExcludeExtensionHashes.insert(HashStringAsLowerDjb2(Extension));
+ m_SplitOnlyExtensionHashes.insert(HashStringAsLowerDjb2(Extension));
}
- m_FixedChunkingExtensionHashes.reserve(Settings.FixedChunkingExtensions.size());
- for (const std::string& Extension : Settings.FixedChunkingExtensions)
+ m_SplitAndCompressExtensionHashes.reserve(Settings.SplitAndCompressExtensions.size());
+ for (const std::string& Extension : Settings.SplitAndCompressExtensions)
{
- m_FixedChunkingExtensionHashes.insert(HashStringAsLowerDjb2(Extension));
+ m_SplitAndCompressExtensionHashes.insert(HashStringAsLowerDjb2(Extension));
}
}
- ChunkingControllerWithFixedChunking(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {}
+ StandardChunkingController(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {}
+
+ bool FixedChunking(BasicFile& Source,
+ uint64_t RawSize,
+ ChunkedInfoWithSource& OutChunked,
+ std::atomic<uint64_t>& BytesProcessed,
+ const uint64_t FixedChunkSize,
+ std::atomic<bool>& AbortFlag) const
+ {
+ ZEN_TRACE_CPU("FixedChunking");
+
+ IoHashStream FullHasher;
+ uint64_t Offset = 0;
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex;
+ const uint64_t ExpectedChunkCount = 1 + (RawSize / FixedChunkSize);
+ ChunkHashToChunkIndex.reserve(ExpectedChunkCount);
+ OutChunked.Info.ChunkHashes.reserve(ExpectedChunkCount);
+ OutChunked.Info.ChunkSequence.reserve(ExpectedChunkCount);
+ OutChunked.ChunkSources.reserve(ExpectedChunkCount);
+
+ static const uint64_t BufferingSize = 256u * 1024u;
+ static const uint64_t MinimumLastChunkSize = Min(128u * 1024u, FixedChunkSize / 32);
+
+ IoHashStream ChunkHasher;
+
+ BasicFileBuffer SourceBuffer(Source, Min(BufferingSize, RawSize));
+ while (Offset < RawSize)
+ {
+ if (AbortFlag)
+ {
+ return false;
+ }
+
+ ChunkHasher.Reset();
+
+ const uint64_t ChunkStartOffset = Offset;
+ const uint64_t BytesLeft = RawSize - Offset;
+ uint64_t ChunkSize = std::min<uint64_t>(BytesLeft, FixedChunkSize);
+ if ((BytesLeft - ChunkSize) < MinimumLastChunkSize)
+ {
+ // Avoid small chunks from the end of the file
+ ChunkSize = BytesLeft;
+ }
+ const uint64_t End = ChunkStartOffset + ChunkSize;
+ while (Offset < End)
+ {
+ const uint64_t BufferSize = std::min<uint64_t>(RawSize - Offset, BufferingSize);
+ MemoryView ChunkData = SourceBuffer.MakeView(BufferSize, Offset);
+ FullHasher.Append(ChunkData);
+ ChunkHasher.Append(ChunkData);
+ BytesProcessed.fetch_add(BufferSize);
+ Offset += BufferSize;
+ }
+
+ const IoHash ChunkHash = ChunkHasher.GetHash();
+ if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end())
+ {
+ OutChunked.Info.ChunkSequence.push_back(It->second);
+ }
+ else
+ {
+ uint32_t ChunkIndex = gsl::narrow<uint32_t>(OutChunked.Info.ChunkHashes.size());
+ OutChunked.Info.ChunkHashes.push_back(ChunkHash);
+ OutChunked.Info.ChunkSequence.push_back(ChunkIndex);
+ OutChunked.ChunkSources.push_back({.Offset = ChunkStartOffset, .Size = gsl::narrow<uint32_t>(ChunkSize)});
+ }
+ }
+
+ OutChunked.Info.RawSize = RawSize;
+ OutChunked.Info.RawHash = FullHasher.GetHash();
+ return true;
+ }
virtual bool ProcessFile(const std::filesystem::path& InputPath,
uint64_t RawSize,
@@ -192,106 +468,60 @@ public:
std::atomic<uint64_t>& BytesProcessed,
std::atomic<bool>& AbortFlag) const override
{
- ZEN_TRACE_CPU("ChunkingControllerWithFixedChunking::ProcessFile");
- if (RawSize < m_Settings.ChunkFileSizeLimit)
- {
- return false;
- }
+ ZEN_TRACE_CPU("StandardChunkingController::ProcessFile");
- const uint32_t ExtensionHash = HashStringAsLowerDjb2(InputPath.extension().string());
- const bool ExcludeFromChunking = m_ExcludeExtensionHashes.contains(ExtensionHash);
+ const uint32_t ExtensionHash = HashStringAsLowerDjb2(InputPath.extension().string());
- if (ExcludeFromChunking)
+ if (m_SplitOnlyExtensionHashes.contains(ExtensionHash))
{
- return false;
+ if (RawSize < m_Settings.SplitOnlyMinSize)
+ {
+ return false;
+ }
+ BasicFile Source(InputPath, BasicFile::Mode::kRead);
+ return FixedChunking(Source, RawSize, OutChunked, BytesProcessed, m_Settings.SplitOnlyChunkSize, AbortFlag);
}
- const bool FixedChunkingExtension = m_FixedChunkingExtensionHashes.contains(ExtensionHash);
-
- if (FixedChunkingExtension)
+ if (m_SplitAndCompressExtensionHashes.contains(ExtensionHash))
{
- if (RawSize < m_Settings.MinSizeForFixedChunking)
+ if (RawSize < m_Settings.SplitAndCompressMinSize)
{
return false;
}
- ZEN_TRACE_CPU("FixedChunking");
- IoHashStream FullHasher;
- BasicFile Source(InputPath, BasicFile::Mode::kRead);
- uint64_t Offset = 0;
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex;
- const uint64_t ExpectedChunkCount = 1 + (RawSize / m_Settings.FixedChunkingChunkSize);
- ChunkHashToChunkIndex.reserve(ExpectedChunkCount);
- OutChunked.Info.ChunkHashes.reserve(ExpectedChunkCount);
- OutChunked.Info.ChunkSequence.reserve(ExpectedChunkCount);
- OutChunked.ChunkSources.reserve(ExpectedChunkCount);
-
- static const uint64_t BufferingSize = 256u * 1024u;
-
- IoHashStream ChunkHasher;
-
- while (Offset < RawSize)
+ BasicFile Source(InputPath, BasicFile::Mode::kRead);
+ return FixedChunking(Source, RawSize, OutChunked, BytesProcessed, m_Settings.SplitAndCompressChunkSize, AbortFlag);
+ }
+
+ if (RawSize > sizeof(uint32_t) && !m_Settings.SplitAndCompressFileLeadingBytes.empty())
+ {
+ BasicFile Source(InputPath, BasicFile::Mode::kRead);
+ uint32_t LeadingBytes = 0;
+ Source.Read(&LeadingBytes, 4, 0);
+ if (std::find(m_Settings.SplitAndCompressFileLeadingBytes.begin(),
+ m_Settings.SplitAndCompressFileLeadingBytes.end(),
+ LeadingBytes) != m_Settings.SplitAndCompressFileLeadingBytes.end())
{
- if (AbortFlag)
+ if (RawSize < m_Settings.SplitAndCompressMinSize)
{
return false;
}
-
- ChunkHasher.Reset();
-
- uint64_t ChunkSize = std::min<uint64_t>(RawSize - Offset, m_Settings.FixedChunkingChunkSize);
- if (ChunkSize >= (BufferingSize + BufferingSize / 2))
- {
- ScanFile(Source.Handle(),
- Offset,
- ChunkSize,
- BufferingSize,
- [&FullHasher, &ChunkHasher, &BytesProcessed](const void* Data, size_t Size) {
- FullHasher.Append(Data, Size);
- ChunkHasher.Append(Data, Size);
- BytesProcessed.fetch_add(Size);
- });
- }
- else
- {
- IoBuffer ChunkData = Source.ReadRange(Offset, ChunkSize);
- FullHasher.Append(ChunkData);
- ChunkHasher.Append(ChunkData);
- BytesProcessed.fetch_add(ChunkSize);
- }
-
- const IoHash ChunkHash = ChunkHasher.GetHash();
- if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end())
- {
- OutChunked.Info.ChunkSequence.push_back(It->second);
- }
else
{
- uint32_t ChunkIndex = gsl::narrow<uint32_t>(OutChunked.Info.ChunkHashes.size());
- OutChunked.Info.ChunkHashes.push_back(ChunkHash);
- OutChunked.Info.ChunkSequence.push_back(ChunkIndex);
- OutChunked.ChunkSources.push_back({.Offset = Offset, .Size = gsl::narrow<uint32_t>(ChunkSize)});
+ return FixedChunking(Source, RawSize, OutChunked, BytesProcessed, m_Settings.SplitAndCompressChunkSize, AbortFlag);
}
- Offset += ChunkSize;
}
- OutChunked.Info.RawSize = RawSize;
- OutChunked.Info.RawHash = FullHasher.GetHash();
- return true;
+ }
+
+ if (RawSize < m_Settings.DynamicChunkingParams.MaxSize)
+ {
+ return false;
}
else
{
- BasicFile Buffer(InputPath, BasicFile::Mode::kRead);
- if (m_Settings.ExcludeElfFiles && IsElfFile(Buffer))
- {
- return false;
- }
- if (m_Settings.ExcludeMachOFiles && IsMachOFile(Buffer))
- {
- return false;
- }
-
- OutChunked = ChunkData(Buffer, 0, RawSize, m_Settings.ChunkingParams, &BytesProcessed, &AbortFlag);
- return true;
+ BasicFile Source(InputPath, BasicFile::Mode::kRead);
+ OutChunked = ChunkData(Source, 0, RawSize, m_Settings.DynamicChunkingParams, &BytesProcessed, &AbortFlag);
}
+ return true;
}
virtual std::string_view GetName() const override { return Name; }
@@ -299,89 +529,71 @@ public:
virtual CbObject GetParameters() const override
{
CbObjectWriter Writer;
- Writer.BeginArray("FixedChunkingExtensions");
- {
- for (const std::string& Extension : m_Settings.FixedChunkingExtensions)
- {
- Writer.AddString(Extension);
- }
- }
- Writer.EndArray(); // ChunkExcludeExtensions
-
- Writer.BeginArray("ChunkExcludeExtensions"sv);
- {
- for (const std::string& Extension : m_Settings.ExcludeExtensions)
- {
- Writer.AddString(Extension);
- }
- }
- Writer.EndArray(); // ChunkExcludeExtensions
- Writer.AddBool("ExcludeElfFiles"sv, m_Settings.ExcludeElfFiles);
- Writer.AddBool("ExcludeMachOFiles"sv, m_Settings.ExcludeMachOFiles);
+ compactbinary_helpers::WriteArray(m_Settings.SplitOnlyExtensions, "SplitOnlyExtensions"sv, Writer);
+ Writer.AddInteger("SplitOnlyChunkSize"sv, m_Settings.SplitOnlyChunkSize);
+ Writer.AddInteger("SplitOnlyMinSize"sv, m_Settings.SplitOnlyMinSize);
- Writer.AddInteger("ChunkFileSizeLimit"sv, m_Settings.ChunkFileSizeLimit);
+ compactbinary_helpers::WriteArray(m_Settings.SplitAndCompressExtensions, "SplitAndCompressExtensions"sv, Writer);
+ compactbinary_helpers::WriteArray(m_Settings.SplitAndCompressFileLeadingBytes, "SplitAndCompressFileLeadingBytes"sv, Writer);
+ Writer.AddInteger("SplitAndCompressChunkSize"sv, m_Settings.SplitAndCompressChunkSize);
+ Writer.AddInteger("SplitAndCompressMinSize"sv, m_Settings.SplitAndCompressMinSize);
- WriteChunkParams(Writer, m_Settings.ChunkingParams);
+ WriteChunkParams(Writer, m_Settings.DynamicChunkingParams);
- Writer.AddInteger("FixedChunkingChunkSize"sv, m_Settings.FixedChunkingChunkSize);
- Writer.AddInteger("MinSizeForFixedChunking"sv, m_Settings.MinSizeForFixedChunking);
return Writer.Save();
}
- static constexpr std::string_view Name = "ChunkingControllerWithFixedChunking"sv;
+ static constexpr std::string_view Name = "StandardChunkingController"sv;
private:
- static ChunkingControllerWithFixedChunkingSettings ReadSettings(CbObjectView Parameters)
+ static StandardChunkingControllerSettings ReadSettings(CbObjectView Parameters)
{
- return ChunkingControllerWithFixedChunkingSettings{
- .FixedChunkingExtensions = ReadStringArray(Parameters["FixedChunkingExtensions"sv].AsArrayView()),
- .ExcludeExtensions = ReadStringArray(Parameters["ChunkExcludeExtensions"sv].AsArrayView()),
- .ExcludeElfFiles = Parameters["ExcludeElfFiles"sv].AsBool(DefaultChunkingExcludeElfFiles),
- .ExcludeMachOFiles = Parameters["ExcludeMachOFiles"sv].AsBool(DefaultChunkingExcludeMachOFiles),
- .ChunkFileSizeLimit = Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit),
- .ChunkingParams = ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView()),
- .FixedChunkingChunkSize = Parameters["FixedChunkingChunkSize"sv].AsUInt64(DefaultFixedChunkingChunkSize),
- .MinSizeForFixedChunking = Parameters["MinSizeForFixedChunking"sv].AsUInt64(DefaultFixedChunkingChunkSize)};
+ return StandardChunkingControllerSettings{
+ .SplitOnlyExtensions = compactbinary_helpers::ReadArray<std::string>("SplitOnlyExtensions"sv, Parameters),
+ .SplitOnlyChunkSize = Parameters["SplitOnlyChunkSize"sv].AsUInt64(DefaultSplitOnlyChunkSize),
+ .SplitOnlyMinSize = Parameters["SplitOnlyMinSize"sv].AsUInt64(DefaultSplitOnlyMinSize),
+
+ .SplitAndCompressExtensions = compactbinary_helpers::ReadArray<std::string>("SplitAndCompressExtensions"sv, Parameters),
+ .SplitAndCompressFileLeadingBytes =
+ compactbinary_helpers::ReadArray<uint32_t>("SplitAndCompressFileLeadingBytes"sv, Parameters),
+ .SplitAndCompressChunkSize = Parameters["SplitAndCompressChunkSize"sv].AsUInt64(DefaultSplitAndCompressChunkSize),
+ .SplitAndCompressMinSize = Parameters["SplitAndCompressMinSize"sv].AsUInt64(DefaultSplitAndCompressMinSize),
+
+ .DynamicChunkingParams = ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView())};
}
- const ChunkingControllerWithFixedChunkingSettings m_Settings;
- tsl::robin_set<uint32_t> m_FixedChunkingExtensionHashes;
- tsl::robin_set<uint32_t> m_ExcludeExtensionHashes;
+ const StandardChunkingControllerSettings m_Settings;
+ tsl::robin_set<uint32_t> m_SplitOnlyExtensionHashes;
+ tsl::robin_set<uint32_t> m_SplitAndCompressExtensionHashes;
};
std::unique_ptr<ChunkingController>
-CreateBasicChunkingController(const BasicChunkingControllerSettings& Settings)
-{
- return std::make_unique<BasicChunkingController>(Settings);
-}
-std::unique_ptr<ChunkingController>
-CreateBasicChunkingController(CbObjectView Parameters)
+CreateStandardChunkingController(const StandardChunkingControllerSettings& Setting)
{
- return std::make_unique<BasicChunkingController>(Parameters);
+ return std::make_unique<StandardChunkingController>(Setting);
}
std::unique_ptr<ChunkingController>
-CreateChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Setting)
-{
- return std::make_unique<ChunkingControllerWithFixedChunking>(Setting);
-}
-std::unique_ptr<ChunkingController>
-CreateChunkingControllerWithFixedChunking(CbObjectView Parameters)
+CreateStandardChunkingController(CbObjectView Parameters)
{
- return std::make_unique<ChunkingControllerWithFixedChunking>(Parameters);
+ return std::make_unique<StandardChunkingController>(Parameters);
}
std::unique_ptr<ChunkingController>
CreateChunkingController(std::string_view Name, CbObjectView Parameters)
{
- if (Name == BasicChunkingController::Name)
+ if (Name == legacy::BasicChunkingController::Name)
+ {
+ return legacy::CreateBasicChunkingController(Parameters);
+ }
+ else if (Name == legacy::ChunkingControllerWithFixedChunking::Name)
{
- return CreateBasicChunkingController(Parameters);
+ return legacy::CreateChunkingControllerWithFixedChunking(Parameters);
}
- else if (Name == ChunkingControllerWithFixedChunking::Name)
+ else if (Name == StandardChunkingController::Name)
{
- return CreateChunkingControllerWithFixedChunking(Parameters);
+ return CreateStandardChunkingController(Parameters);
}
return {};
}
diff --git a/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h b/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h
index f200a342c..7e3903892 100644
--- a/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h
+++ b/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h
@@ -565,6 +565,8 @@ public:
std::vector<std::string> ExcludeFolders;
std::vector<std::string> ExcludeExtensions;
std::string ZenExcludeManifestName = ".zen_exclude_manifest.txt";
+
+ std::vector<std::string> NonCompressableExtensions;
};
BuildsOperationUploadFolder(BuildOpLogOutput& LogOutput,
StorageInstance& Storage,
@@ -693,6 +695,8 @@ private:
const bool m_CreateBuild; // ?? Member?
const CbObject m_MetaData; // ?? Member
const Options m_Options;
+
+ tsl::robin_set<uint32_t> m_NonCompressableExtensionHashes;
};
struct ValidateStatistics
diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h b/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h
index 2a56d14d3..a4f22f4df 100644
--- a/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h
+++ b/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h
@@ -184,107 +184,4 @@ GetFirstPathIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& Ra
return GetFirstPathIndexForSeqeuenceIndex(Lookup, SequenceIndex);
}
-namespace compactbinary_helpers {
- template<typename Type>
- void WriteArray(std::span<const Type> Values, std::string_view ArrayName, CbWriter& Output)
- {
- Output.BeginArray(ArrayName);
- for (const Type Value : Values)
- {
- Output << Value;
- }
- Output.EndArray();
- }
-
- template<typename Type>
- void WriteArray(const std::vector<Type>& Values, std::string_view ArrayName, CbWriter& Output)
- {
- WriteArray(std::span<const Type>(Values), ArrayName, Output);
- }
-
- template<>
- inline void WriteArray(std::span<const std::filesystem::path> Values, std::string_view ArrayName, CbWriter& Output)
- {
- Output.BeginArray(ArrayName);
- for (const std::filesystem::path& Path : Values)
- {
- Output.AddString((const char*)Path.generic_u8string().c_str());
- }
- Output.EndArray();
- }
-
- template<>
- inline void WriteArray(const std::vector<std::filesystem::path>& Values, std::string_view ArrayName, CbWriter& Output)
- {
- WriteArray(std::span<const std::filesystem::path>(Values), ArrayName, Output);
- }
-
- inline void WriteBinaryAttachmentArray(std::span<const IoHash> Values, std::string_view ArrayName, CbWriter& Output)
- {
- Output.BeginArray(ArrayName);
- for (const IoHash& Hash : Values)
- {
- Output.AddBinaryAttachment(Hash);
- }
- Output.EndArray();
- }
-
- inline void WriteBinaryAttachmentArray(const std::vector<IoHash>& Values, std::string_view ArrayName, CbWriter& Output)
- {
- WriteArray(std::span<const IoHash>(Values), ArrayName, Output);
- }
-
- inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector<uint32_t>& Result)
- {
- CbArrayView Array = Input[ArrayName].AsArrayView();
- Result.reserve(Array.Num());
- for (CbFieldView ItemView : Array)
- {
- Result.push_back(ItemView.AsUInt32());
- }
- }
-
- inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector<uint64_t>& Result)
- {
- CbArrayView Array = Input[ArrayName].AsArrayView();
- Result.reserve(Array.Num());
- for (CbFieldView ItemView : Array)
- {
- Result.push_back(ItemView.AsUInt64());
- }
- }
-
- inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector<std::filesystem::path>& Result)
- {
- CbArrayView Array = Input[ArrayName].AsArrayView();
- Result.reserve(Array.Num());
- for (CbFieldView ItemView : Array)
- {
- std::u8string_view U8Path = ItemView.AsU8String();
- Result.push_back(std::filesystem::path(U8Path));
- }
- }
-
- inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector<IoHash>& Result)
- {
- CbArrayView Array = Input[ArrayName].AsArrayView();
- Result.reserve(Array.Num());
- for (CbFieldView ItemView : Array)
- {
- Result.push_back(ItemView.AsHash());
- }
- }
-
- inline void ReadBinaryAttachmentArray(std::string_view ArrayName, CbObjectView Input, std::vector<IoHash>& Result)
- {
- CbArrayView Array = Input[ArrayName].AsArrayView();
- Result.reserve(Array.Num());
- for (CbFieldView ItemView : Array)
- {
- Result.push_back(ItemView.AsBinaryAttachment());
- }
- }
-
-} // namespace compactbinary_helpers
-
} // namespace zen
diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkingcontroller.h b/src/zenremotestore/include/zenremotestore/chunking/chunkingcontroller.h
index b49d76a09..809e7c475 100644
--- a/src/zenremotestore/include/zenremotestore/chunking/chunkingcontroller.h
+++ b/src/zenremotestore/include/zenremotestore/chunking/chunkingcontroller.h
@@ -11,23 +11,6 @@
namespace zen {
-const std::vector<std::string> DefaultChunkingExcludeExtensions = {
- ".exe", ".dll", ".pdb", ".self", ".mp4", ".zip", ".7z", ".bzip", ".rar", ".gzip", ".sym", ".psym", ".txt", ".ini", ".json",
- ".verse", ".versemodule", ".jpg", ".c", ".h", ".cpp", ".cxx", ".c++", ".cc", ".hpp", ".hxx", ".h++", ".py", ".ogg", ".plist"};
-
-const std::vector<std::string> DefaultFixedChunkingExtensions = {".apk", ".nsp", ".xvc", ".pkg", ".dmg", ".ipa"};
-const bool DefaultChunkingExcludeElfFiles = true;
-const bool DefaultChunkingExcludeMachOFiles = true;
-
-const ChunkedParams DefaultChunkedParams = {.MinSize = ((8u * 1u) * 1024u) - 128u,
- .MaxSize = 128u * 1024u,
- .AvgSize = ((8u * 4u) * 1024u) + 128u};
-
-const size_t DefaultChunkingFileSizeLimit = DefaultChunkedParams.MaxSize;
-
-const uint64_t DefaultFixedChunkingChunkSize = 32u * 1024u * 1024u;
-const uint64_t DefaultMinSizeForFixedChunking = DefaultFixedChunkingChunkSize * 8u;
-
struct ChunkedInfoWithSource;
class ChunkingController
@@ -45,33 +28,46 @@ public:
virtual CbObject GetParameters() const = 0;
};
-struct BasicChunkingControllerSettings
-{
- std::vector<std::string> ExcludeExtensions = DefaultChunkingExcludeExtensions;
- bool ExcludeElfFiles = DefaultChunkingExcludeElfFiles;
- bool ExcludeMachOFiles = DefaultChunkingExcludeMachOFiles;
- uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit;
- ChunkedParams ChunkingParams = DefaultChunkedParams;
-};
+//////////// Standard chunking controller
-std::unique_ptr<ChunkingController> CreateBasicChunkingController(const BasicChunkingControllerSettings& Settings);
-std::unique_ptr<ChunkingController> CreateBasicChunkingController(CbObjectView Parameters);
+const std::vector<std::string> DefaultSplitOnlyExtensions(
+ {".mp4", ".zip", ".7z", ".bzip", ".rar", ".gzip", ".apk", ".nsp", ".xvc", ".pkg", ".dmg", ".ipa", ".jpg", ".ogg", ".msixvc"});
+const uint64_t DefaultSplitOnlyChunkSize = 32u * 1024u * 1024u;
+const uint64_t DefaultSplitOnlyMinSize = DefaultSplitOnlyChunkSize * 4u;
-struct ChunkingControllerWithFixedChunkingSettings
+const std::vector<std::string> DefaultSplitAndCompressExtensions({".exe", ".dll", ".pdb", ".self", ".sym", ".psym",
+ ".dSYM", ".txt", ".ini", ".json", ".verse", ".versemodule",
+ ".c", ".h", ".cpp", ".cxx", ".c++", ".cc",
+ ".hpp", ".hxx", ".h++", ".py", ".plist"});
+
+const std::vector<uint32_t> DefaultSplitAndCompressFileLeadingBytes({
+ 0x464c457f, // Elf
+ 0xfeedface, // MachO Big Endian
+ 0xcefaedfe, // MachO Little Endian
+});
+
+const uint64_t DefaultSplitAndCompressChunkSize = 64u * 1024u * 1024u;
+const uint64_t DefaultSplitAndCompressMinSize = DefaultSplitAndCompressChunkSize * 4u;
+
+const ChunkedParams DefaultDynamicChunkingParams = {.MinSize = ((8u * 1u) * 1024u) - 128u,
+ .MaxSize = 128u * 1024u,
+ .AvgSize = ((8u * 4u) * 1024u) + 128u};
+
+struct StandardChunkingControllerSettings
{
- std::vector<std::string> FixedChunkingExtensions = DefaultFixedChunkingExtensions;
- std::vector<std::string> ExcludeExtensions = DefaultChunkingExcludeExtensions;
- bool ExcludeElfFiles = DefaultChunkingExcludeElfFiles;
- bool ExcludeMachOFiles = DefaultChunkingExcludeMachOFiles;
- uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit;
- ChunkedParams ChunkingParams = DefaultChunkedParams;
- uint64_t FixedChunkingChunkSize = DefaultFixedChunkingChunkSize;
- uint64_t MinSizeForFixedChunking = DefaultMinSizeForFixedChunking;
-};
+ std::vector<std::string> SplitOnlyExtensions = DefaultSplitOnlyExtensions;
+ uint64_t SplitOnlyChunkSize = DefaultSplitOnlyChunkSize;
+ uint64_t SplitOnlyMinSize = DefaultSplitOnlyMinSize;
+
+ std::vector<std::string> SplitAndCompressExtensions = DefaultSplitAndCompressExtensions;
+ std::vector<uint32_t> SplitAndCompressFileLeadingBytes = DefaultSplitAndCompressFileLeadingBytes;
+ uint64_t SplitAndCompressChunkSize = DefaultSplitAndCompressChunkSize;
+ uint64_t SplitAndCompressMinSize = DefaultSplitAndCompressMinSize;
-std::unique_ptr<ChunkingController> CreateChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Setting);
-std::unique_ptr<ChunkingController> CreateChunkingControllerWithFixedChunking(CbObjectView Parameters);
+ ChunkedParams DynamicChunkingParams = DefaultDynamicChunkingParams;
+};
+std::unique_ptr<ChunkingController> CreateStandardChunkingController(const StandardChunkingControllerSettings& Setting);
std::unique_ptr<ChunkingController> CreateChunkingController(std::string_view Name, CbObjectView Parameters);
} // namespace zen