aboutsummaryrefslogtreecommitdiff
path: root/src/zenutil/chunking.cpp
diff options
context:
space:
mode:
authorzousar <[email protected]>2025-06-24 16:26:29 -0600
committerzousar <[email protected]>2025-06-24 16:26:29 -0600
commitbb298631ba35a323827dda0b8cd6158e276b5f61 (patch)
tree7ba8db91c44ce83f2c518f80f80ab14910eefa6f /src/zenutil/chunking.cpp
parentChange to PutResult structure (diff)
parent5.6.14 (diff)
downloadzen-bb298631ba35a323827dda0b8cd6158e276b5f61.tar.xz
zen-bb298631ba35a323827dda0b8cd6158e276b5f61.zip
Merge branch 'main' into zs/put-overwrite-policy
Diffstat (limited to 'src/zenutil/chunking.cpp')
-rw-r--r--src/zenutil/chunking.cpp382
1 files changed, 382 insertions, 0 deletions
diff --git a/src/zenutil/chunking.cpp b/src/zenutil/chunking.cpp
new file mode 100644
index 000000000..30edd322a
--- /dev/null
+++ b/src/zenutil/chunking.cpp
@@ -0,0 +1,382 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include "chunking.h"
+
+#include <gsl/gsl-lite.hpp>
+
+#include <cmath>
+
+namespace zen::detail {
+
+static const uint32_t BuzhashTable[] = {
+ 0x458be752, 0xc10748cc, 0xfbbcdbb8, 0x6ded5b68, 0xb10a82b5, 0x20d75648, 0xdfc5665f, 0xa8428801, 0x7ebf5191, 0x841135c7, 0x65cc53b3,
+ 0x280a597c, 0x16f60255, 0xc78cbc3e, 0x294415f5, 0xb938d494, 0xec85c4e6, 0xb7d33edc, 0xe549b544, 0xfdeda5aa, 0x882bf287, 0x3116737c,
+ 0x05569956, 0xe8cc1f68, 0x0806ac5e, 0x22a14443, 0x15297e10, 0x50d090e7, 0x4ba60f6f, 0xefd9f1a7, 0x5c5c885c, 0x82482f93, 0x9bfd7c64,
+ 0x0b3e7276, 0xf2688e77, 0x8fad8abc, 0xb0509568, 0xf1ada29f, 0xa53efdfe, 0xcb2b1d00, 0xf2a9e986, 0x6463432b, 0x95094051, 0x5a223ad2,
+ 0x9be8401b, 0x61e579cb, 0x1a556a14, 0x5840fdc2, 0x9261ddf6, 0xcde002bb, 0x52432bb0, 0xbf17373e, 0x7b7c222f, 0x2955ed16, 0x9f10ca59,
+ 0xe840c4c9, 0xccabd806, 0x14543f34, 0x1462417a, 0x0d4a1f9c, 0x087ed925, 0xd7f8f24c, 0x7338c425, 0xcf86c8f5, 0xb19165cd, 0x9891c393,
+ 0x325384ac, 0x0308459d, 0x86141d7e, 0xc922116a, 0xe2ffa6b6, 0x53f52aed, 0x2cd86197, 0xf5b9f498, 0xbf319c8f, 0xe0411fae, 0x977eb18c,
+ 0xd8770976, 0x9833466a, 0xc674df7f, 0x8c297d45, 0x8ca48d26, 0xc49ed8e2, 0x7344f874, 0x556f79c7, 0x6b25eaed, 0xa03e2b42, 0xf68f66a4,
+ 0x8e8b09a2, 0xf2e0e62a, 0x0d3a9806, 0x9729e493, 0x8c72b0fc, 0x160b94f6, 0x450e4d3d, 0x7a320e85, 0xbef8f0e1, 0x21d73653, 0x4e3d977a,
+ 0x1e7b3929, 0x1cc6c719, 0xbe478d53, 0x8d752809, 0xe6d8c2c6, 0x275f0892, 0xc8acc273, 0x4cc21580, 0xecc4a617, 0xf5f7be70, 0xe795248a,
+ 0x375a2fe9, 0x425570b6, 0x8898dcf8, 0xdc2d97c4, 0x0106114b, 0x364dc22f, 0x1e0cad1f, 0xbe63803c, 0x5f69fac2, 0x4d5afa6f, 0x1bc0dfb5,
+ 0xfb273589, 0x0ea47f7b, 0x3c1c2b50, 0x21b2a932, 0x6b1223fd, 0x2fe706a8, 0xf9bd6ce2, 0xa268e64e, 0xe987f486, 0x3eacf563, 0x1ca2018c,
+ 0x65e18228, 0x2207360a, 0x57cf1715, 0x34c37d2b, 0x1f8f3cde, 0x93b657cf, 0x31a019fd, 0xe69eb729, 0x8bca7b9b, 0x4c9d5bed, 0x277ebeaf,
+ 0xe0d8f8ae, 0xd150821c, 0x31381871, 0xafc3f1b0, 0x927db328, 0xe95effac, 0x305a47bd, 0x426ba35b, 0x1233af3f, 0x686a5b83, 0x50e072e5,
+ 0xd9d3bb2a, 0x8befc475, 0x487f0de6, 0xc88dff89, 0xbd664d5e, 0x971b5d18, 0x63b14847, 0xd7d3c1ce, 0x7f583cf3, 0x72cbcb09, 0xc0d0a81c,
+ 0x7fa3429b, 0xe9158a1b, 0x225ea19a, 0xd8ca9ea3, 0xc763b282, 0xbb0c6341, 0x020b8293, 0xd4cd299d, 0x58cfa7f8, 0x91b4ee53, 0x37e4d140,
+ 0x95ec764c, 0x30f76b06, 0x5ee68d24, 0x679c8661, 0xa41979c2, 0xf2b61284, 0x4fac1475, 0x0adb49f9, 0x19727a23, 0x15a7e374, 0xc43a18d5,
+ 0x3fb1aa73, 0x342fc615, 0x924c0793, 0xbee2d7f0, 0x8a279de9, 0x4aa2d70c, 0xe24dd37f, 0xbe862c0b, 0x177c22c2, 0x5388e5ee, 0xcd8a7510,
+ 0xf901b4fd, 0xdbc13dbc, 0x6c0bae5b, 0x64efe8c7, 0x48b02079, 0x80331a49, 0xca3d8ae6, 0xf3546190, 0xfed7108b, 0xc49b941b, 0x32baf4a9,
+ 0xeb833a4a, 0x88a3f1a5, 0x3a91ce0a, 0x3cc27da1, 0x7112e684, 0x4a3096b1, 0x3794574c, 0xa3c8b6f3, 0x1d213941, 0x6e0a2e00, 0x233479f1,
+ 0x0f4cd82f, 0x6093edd2, 0x5d7d209e, 0x464fe319, 0xd4dcac9e, 0x0db845cb, 0xfb5e4bc3, 0xe0256ce1, 0x09fb4ed1, 0x0914be1e, 0xa5bdb2c3,
+ 0xc6eb57bb, 0x30320350, 0x3f397e91, 0xa67791bc, 0x86bc0e2c, 0xefa0a7e2, 0xe9ff7543, 0xe733612c, 0xd185897b, 0x329e5388, 0x91dd236b,
+ 0x2ecb0d93, 0xf4d82a3d, 0x35b5c03f, 0xe4e606f0, 0x05b21843, 0x37b45964, 0x5eff22f4, 0x6027f4cc, 0x77178b3c, 0xae507131, 0x7bf7cabc,
+ 0xf9c18d66, 0x593ade65, 0xd95ddf11,
+};
+
+// ROL operation (compiler turns this into a ROL when optimizing)
+ZEN_FORCEINLINE static uint32_t
+Rotate32(uint32_t Value, size_t RotateCount)
+{
+ RotateCount &= 31;
+
+ return ((Value) << (RotateCount)) | ((Value) >> (32 - RotateCount));
+}
+
+} // namespace zen::detail
+
+namespace zen {
+
+void
+ZenChunkHelper::Reset()
+{
+ InternalReset();
+
+ m_BytesScanned = 0;
+}
+
+void
+ZenChunkHelper::InternalReset()
+{
+ m_CurrentHash = 0;
+ m_CurrentChunkSize = 0;
+ m_WindowSize = 0;
+}
+
+void
+ZenChunkHelper::SetChunkSize(size_t MinSize, size_t MaxSize, size_t AvgSize)
+{
+ if (m_WindowSize)
+ return; // Already started
+
+ static_assert(kChunkSizeLimitMin > kWindowSize);
+
+ if (AvgSize)
+ {
+ // TODO: Validate AvgSize range
+ }
+ else
+ {
+ if (MinSize && MaxSize)
+ {
+ AvgSize = std::lrint(std::pow(2, (std::log2(MinSize) + std::log2(MaxSize)) / 2));
+ }
+ else if (MinSize)
+ {
+ AvgSize = MinSize * 4;
+ }
+ else if (MaxSize)
+ {
+ AvgSize = MaxSize / 4;
+ }
+ else
+ {
+ AvgSize = kDefaultAverageChunkSize;
+ }
+ }
+
+ if (MinSize)
+ {
+ // TODO: Validate MinSize range
+ }
+ else
+ {
+ MinSize = std::max(AvgSize / 4, kChunkSizeLimitMin);
+ }
+
+ if (MaxSize)
+ {
+ // TODO: Validate MaxSize range
+ }
+ else
+ {
+ MaxSize = std::min(AvgSize * 4, kChunkSizeLimitMax);
+ }
+
+ m_Discriminator = gsl::narrow<uint32_t>(AvgSize - MinSize);
+
+ if (m_Discriminator < MinSize)
+ {
+ m_Discriminator = gsl::narrow<uint32_t>(MinSize);
+ }
+
+ if (m_Discriminator > MaxSize)
+ {
+ m_Discriminator = gsl::narrow<uint32_t>(MaxSize);
+ }
+
+ m_Threshold = gsl::narrow<uint32_t>((uint64_t(std::numeric_limits<uint32_t>::max()) + 1) / m_Discriminator);
+
+ m_ChunkSizeMin = MinSize;
+ m_ChunkSizeMax = MaxSize;
+ m_ChunkSizeAvg = AvgSize;
+}
+
+size_t
+ZenChunkHelper::ScanChunk(const void* DataBytesIn, size_t ByteCount)
+{
+ size_t Result = InternalScanChunk(DataBytesIn, ByteCount);
+
+ if (Result == kNoBoundaryFound)
+ {
+ m_BytesScanned += ByteCount;
+ }
+ else
+ {
+ m_BytesScanned += Result;
+ }
+
+ return Result;
+}
+
+size_t
+ZenChunkHelper::InternalScanChunk(const void* DataBytesIn, size_t ByteCount)
+{
+ size_t CurrentOffset = 0;
+ const uint8_t* CursorPtr = reinterpret_cast<const uint8_t*>(DataBytesIn);
+
+ // There's no point in updating the hash if we know we're not
+ // going to have a cut point, so just skip the data. This logic currently
+ // provides roughly a 20% speedup on my machine
+
+ const size_t NeedHashOffset = m_ChunkSizeMin - kWindowSize;
+
+ if (m_CurrentChunkSize < NeedHashOffset)
+ {
+ const uint32_t SkipBytes = gsl::narrow<uint32_t>(std::min<uint64_t>(ByteCount, NeedHashOffset - m_CurrentChunkSize));
+
+ ByteCount -= SkipBytes;
+ m_CurrentChunkSize += SkipBytes;
+ CurrentOffset += SkipBytes;
+ CursorPtr += SkipBytes;
+
+ m_WindowSize = 0;
+
+ if (ByteCount == 0)
+ {
+ return kNoBoundaryFound;
+ }
+ }
+
+ // Fill window first
+
+ if (m_WindowSize < kWindowSize)
+ {
+ const uint32_t FillBytes = uint32_t(std::min<size_t>(ByteCount, kWindowSize - m_WindowSize));
+
+ memcpy(&m_Window[m_WindowSize], CursorPtr, FillBytes);
+
+ CursorPtr += FillBytes;
+
+ m_WindowSize += FillBytes;
+ m_CurrentChunkSize += FillBytes;
+
+ CurrentOffset += FillBytes;
+ ByteCount -= FillBytes;
+
+ if (m_WindowSize < kWindowSize)
+ {
+ return kNoBoundaryFound;
+ }
+
+ // We have a full window, initialize hash
+
+ uint32_t CurrentHash = 0;
+
+ for (int i = 1; i < kWindowSize; ++i)
+ {
+ CurrentHash ^= detail::Rotate32(detail::BuzhashTable[m_Window[i - 1]], kWindowSize - i);
+ }
+
+ m_CurrentHash = CurrentHash ^ detail::BuzhashTable[m_Window[kWindowSize - 1]];
+ }
+
+ // Scan for boundaries (i.e points where the hash matches the value determined by
+ // the discriminator)
+
+ uint32_t CurrentHash = m_CurrentHash;
+ uint32_t CurrentChunkSize = m_CurrentChunkSize;
+
+ size_t Index = CurrentChunkSize % kWindowSize;
+
+ if (m_Threshold && m_UseThreshold)
+ {
+ // This is roughly 4x faster than the general modulo approach on my
+ // TR 3990X (~940MB/sec) and doesn't require any special parameters to
+ // achieve max performance
+
+ while (ByteCount)
+ {
+ const uint8_t NewByte = *CursorPtr;
+ const uint8_t OldByte = m_Window[Index];
+
+ CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^
+ detail::BuzhashTable[NewByte];
+
+ CurrentChunkSize++;
+ CurrentOffset++;
+
+ if (CurrentChunkSize >= m_ChunkSizeMin)
+ {
+ bool FoundBoundary;
+
+ if (CurrentChunkSize >= m_ChunkSizeMax)
+ {
+ FoundBoundary = true;
+ }
+ else
+ {
+ FoundBoundary = CurrentHash <= m_Threshold;
+ }
+
+ if (FoundBoundary)
+ {
+ // Boundary found!
+ InternalReset();
+
+ return CurrentOffset;
+ }
+ }
+
+ m_Window[Index++] = *CursorPtr;
+
+ if (Index == kWindowSize)
+ {
+ Index = 0;
+ }
+
+ ++CursorPtr;
+ --ByteCount;
+ }
+ }
+ else if ((m_Discriminator & (m_Discriminator - 1)) == 0)
+ {
+ // This is quite a bit faster than the generic modulo path, but
+ // requires a very specific average chunk size to be used. If you
+ // pass in an even power-of-two divided by 0.75 as the average
+ // chunk size you'll hit this path
+
+ const uint32_t Mask = m_Discriminator - 1;
+
+ while (ByteCount)
+ {
+ const uint8_t NewByte = *CursorPtr;
+ const uint8_t OldByte = m_Window[Index];
+
+ CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^
+ detail::BuzhashTable[NewByte];
+
+ CurrentChunkSize++;
+ CurrentOffset++;
+
+ if (CurrentChunkSize >= m_ChunkSizeMin)
+ {
+ bool FoundBoundary;
+
+ if (CurrentChunkSize >= m_ChunkSizeMax)
+ {
+ FoundBoundary = true;
+ }
+ else
+ {
+ FoundBoundary = (CurrentHash & Mask) == Mask;
+ }
+
+ if (FoundBoundary)
+ {
+ // Boundary found!
+ InternalReset();
+
+ return CurrentOffset;
+ }
+ }
+
+ m_Window[Index++] = *CursorPtr;
+
+ if (Index == kWindowSize)
+ {
+ Index = 0;
+ }
+
+ ++CursorPtr;
+ --ByteCount;
+ }
+ }
+ else
+ {
+ // This is the slowest path, which caps out around 250MB/sec for large sizes
+ // on my TR3900X
+
+ while (ByteCount)
+ {
+ const uint8_t NewByte = *CursorPtr;
+ const uint8_t OldByte = m_Window[Index];
+
+ CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^
+ detail::BuzhashTable[NewByte];
+
+ CurrentChunkSize++;
+ CurrentOffset++;
+
+ if (CurrentChunkSize >= m_ChunkSizeMin)
+ {
+ bool FoundBoundary;
+
+ if (CurrentChunkSize >= m_ChunkSizeMax)
+ {
+ FoundBoundary = true;
+ }
+ else
+ {
+ FoundBoundary = (CurrentHash % m_Discriminator) == (m_Discriminator - 1);
+ }
+
+ if (FoundBoundary)
+ {
+ // Boundary found!
+ InternalReset();
+
+ return CurrentOffset;
+ }
+ }
+
+ m_Window[Index++] = *CursorPtr;
+
+ if (Index == kWindowSize)
+ {
+ Index = 0;
+ }
+
+ ++CursorPtr;
+ --ByteCount;
+ }
+ }
+
+ m_CurrentChunkSize = CurrentChunkSize;
+ m_CurrentHash = CurrentHash;
+
+ return kNoBoundaryFound;
+}
+
+} // namespace zen