aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2025-03-11 23:17:48 +0100
committerStefan Boberg <[email protected]>2025-03-11 23:17:48 +0100
commit6361eaa38691e102521d959c650df25624927e7d (patch)
tree3fd78fb93b659191b9fe04a226741d9599299cfc /src
parentadded support for delta-encoding of main manifest (diff)
downloadzen-6361eaa38691e102521d959c650df25624927e7d.tar.xz
zen-6361eaa38691e102521d959c650df25624927e7d.zip
Further tweaks to build part manifest encoding
Brings down the size of the build part manifest to 144KB from the original 5.3MB (and further down from the previous delta encoding @ 1.8MB)
Diffstat (limited to 'src')
-rw-r--r--src/zen/cmds/builds_cmd.cpp13
-rw-r--r--src/zenutil/include/zenutil/chunkedcontent.h119
2 files changed, 119 insertions, 13 deletions
diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp
index b7191d682..430a8937c 100644
--- a/src/zen/cmds/builds_cmd.cpp
+++ b/src/zen/cmds/builds_cmd.cpp
@@ -666,6 +666,8 @@ namespace {
#endif // EXTRA_VERIFY
}
+ bool g_UseDeltaEncoding = true;
+
void WriteBuildContentToCompactBinary(CbObjectWriter& PartManifestWriter,
const SourcePlatform Platform,
std::span<const std::filesystem::path> Paths,
@@ -676,7 +678,7 @@ namespace {
std::span<const uint32_t> ChunkCounts,
std::span<const IoHash> LocalChunkHashes,
std::span<const uint64_t> LocalChunkRawSizes,
- std::vector<uint32_t> AbsoluteChunkOrders,
+ const std::vector<uint32_t>& AbsoluteChunkOrders,
const std::span<const uint32_t> LooseLocalChunkIndexes,
const std::span<IoHash> BlockHashes)
{
@@ -711,8 +713,7 @@ namespace {
compactbinary_helpers::WriteArray(SequenceRawHashes, "sequenceRawHashes"sv, PartManifestWriter);
compactbinary_helpers::WriteArray(ChunkCounts, "chunkcounts"sv, PartManifestWriter);
- bool UseDeltaCoding = false;
- if (UseDeltaCoding)
+ if (g_UseDeltaEncoding)
{
compactbinary_helpers::WriteDeltaArray(AbsoluteChunkOrders, "chunkorders_delta"sv, PartManifestWriter);
}
@@ -6735,6 +6736,12 @@ BuildsCommand::BuildsCommand()
"<manifestpath>");
m_UploadOptions
.add_option("", "", "verify", "Enable post upload verify of all uploaded data", cxxopts::value(m_PostUploadVerify), "<verify>");
+ m_UploadOptions.add_option("",
+ "",
+ "allow-deltaencoding",
+ "Allow efficient encoding of build manifest. Defaults to true.",
+ cxxopts::value(g_UseDeltaEncoding),
+ "<allowdeltaencoding>");
m_UploadOptions.parse_positional({"local-path", "build-id"});
m_UploadOptions.positional_help("local-path build-id");
diff --git a/src/zenutil/include/zenutil/chunkedcontent.h b/src/zenutil/include/zenutil/chunkedcontent.h
index 533ec431f..e21be27b0 100644
--- a/src/zenutil/include/zenutil/chunkedcontent.h
+++ b/src/zenutil/include/zenutil/chunkedcontent.h
@@ -198,14 +198,50 @@ namespace compactbinary_helpers {
inline void WriteDeltaArray(std::span<const uint32_t> Values, std::string_view ArrayName, CbWriter& Output)
{
Output.BeginArray(ArrayName);
- uint32_t PreviousValue = 0;
+ uint32_t PreviousValue = 0;
+ uint64_t PreviousDeltaZigZag = 1;
+ uint32_t RunLength = 0;
+
+ auto EmitEndRun = [&] {
+ switch (RunLength)
+ {
+ case 0:
+ return;
+ case 1:
+ Output.AddInteger(PreviousDeltaZigZag);
+ break;
+ case 2:
+ Output.AddInteger(PreviousDeltaZigZag);
+ Output.AddInteger(PreviousDeltaZigZag);
+ break;
+ default: // >= 3
+ Output.AddInteger(1);
+ Output.AddInteger(RunLength - 3);
+ break;
+ }
+
+ RunLength = 0;
+ };
+
for (const uint32_t Value : Values)
{
- int64_t Delta = int64_t(Value) - PreviousValue;
- uint64_t DeltaZigZag = (Delta >= 0) ? (Delta << 1) : (((-Delta - 1) << 1) | 1);
- Output.AddInteger(DeltaZigZag);
+ int64_t Delta = int64_t(Value) - PreviousValue;
+ const uint64_t DeltaZigZag = (Delta >= 0) ? (Delta << 1) : (((-Delta) << 1) | 1);
+
+ if (DeltaZigZag == PreviousDeltaZigZag)
+ {
+ ++RunLength;
+ }
+ else
+ {
+ EmitEndRun();
+ Output.AddInteger(DeltaZigZag);
+
+ PreviousDeltaZigZag = DeltaZigZag;
+ }
PreviousValue = Value;
}
+ EmitEndRun();
Output.EndArray();
}
@@ -259,14 +295,77 @@ namespace compactbinary_helpers {
inline void ReadDeltaArray(std::string_view ArrayName, CbObjectView Input, std::vector<uint32_t>& Result)
{
CbArrayView Array = Input[ArrayName].AsArrayView();
- Result.reserve(Array.Num());
- uint32_t PreviousValue = 0;
- for (CbFieldView ItemView : Array)
+
{
- uint64_t DeltaZigZag = ItemView.AsUInt64();
- const int64_t Delta = (DeltaZigZag & 1) ? -int64_t((DeltaZigZag >> 1) + 1) : int64_t(DeltaZigZag >> 1);
- PreviousValue = uint32_t(PreviousValue + Delta);
+ // Count entries for reserve
+
+ uint64_t EntryCount = 0;
+ uint64_t EncodedEntryCount = 0;
+
+ bool InRun = false;
+
+ for (CbFieldView ItemView : Array)
+ {
+ ++EncodedEntryCount;
+
+ const uint64_t DeltaZigZag = ItemView.AsUInt64();
+ if (InRun)
+ {
+ uint64_t RunLength = DeltaZigZag + 3;
+
+ EntryCount += RunLength;
+
+ InRun = false;
+ }
+ else if (DeltaZigZag == 1)
+ {
+ // Encoded run, next value is the repeat count
+ InRun = true;
+ }
+ else
+ {
+ ++EntryCount;
+ }
+ }
+
+ Result.reserve(EntryCount);
+ }
+
+ uint32_t PreviousValue = 0;
+ uint64_t PreviousDeltaZigZag = 1;
+
+ auto EmitEntry = [&](uint64_t DeltaZigZag) {
+ PreviousDeltaZigZag = DeltaZigZag;
+ const int64_t Delta = (DeltaZigZag & 1) ? -int64_t((DeltaZigZag >> 1)) : int64_t(DeltaZigZag >> 1);
+ PreviousValue = uint32_t(PreviousValue + Delta);
Result.push_back(PreviousValue);
+ };
+
+ bool InRun = false;
+
+ for (CbFieldView ItemView : Array)
+ {
+ const uint64_t DeltaZigZag = ItemView.AsUInt64();
+ if (InRun)
+ {
+ uint64_t RunLength = DeltaZigZag + 3;
+
+ while (RunLength--)
+ {
+ EmitEntry(PreviousDeltaZigZag);
+ }
+
+ InRun = false;
+ }
+ else if (DeltaZigZag == 1)
+ {
+ // Encoded run, next value is the repeat count
+ InRun = true;
+ }
+ else
+ {
+ EmitEntry(DeltaZigZag);
+ }
}
}