aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2025-03-14 09:50:00 +0100
committerDan Engelbrecht <[email protected]>2025-03-14 09:50:00 +0100
commit55c67aec301cfc99178ab54c6366cbc88f35d46a (patch)
tree84b4c73220f7dd041763b6d1919eedc8d0b90844 /src/zenstore
parentMerge remote-tracking branch 'origin/de/zen-service-command' into de/zen-serv... (diff)
parentfix quoted command lines arguments (#306) (diff)
downloadzen-55c67aec301cfc99178ab54c6366cbc88f35d46a.tar.xz
zen-55c67aec301cfc99178ab54c6366cbc88f35d46a.zip
Merge remote-tracking branch 'origin/main' into de/zen-service-command
Diffstat (limited to 'src/zenstore')
-rw-r--r--src/zenstore/cache/cachedisklayer.cpp121
-rw-r--r--src/zenstore/cache/cacherpc.cpp73
-rw-r--r--src/zenstore/cache/structuredcachestore.cpp8
-rw-r--r--src/zenstore/chunkedfile.cpp505
-rw-r--r--src/zenstore/chunking.cpp383
-rw-r--r--src/zenstore/chunking.h56
-rw-r--r--src/zenstore/filecas.cpp4
-rw-r--r--src/zenstore/include/zenstore/cache/cachedisklayer.h38
-rw-r--r--src/zenstore/include/zenstore/cache/cacheshared.h4
-rw-r--r--src/zenstore/include/zenstore/cache/structuredcachestore.h6
-rw-r--r--src/zenstore/include/zenstore/chunkedfile.h54
-rw-r--r--src/zenstore/xmake.lua1
12 files changed, 145 insertions, 1108 deletions
diff --git a/src/zenstore/cache/cachedisklayer.cpp b/src/zenstore/cache/cachedisklayer.cpp
index 25f68330a..61552fafc 100644
--- a/src/zenstore/cache/cachedisklayer.cpp
+++ b/src/zenstore/cache/cachedisklayer.cpp
@@ -708,11 +708,11 @@ namespace zen {
ZenCacheDiskLayer::CacheBucket::CacheBucket(GcManager& Gc,
std::atomic_uint64_t& OuterCacheMemoryUsage,
- std::string BucketName,
+ std::string_view BucketName,
const BucketConfiguration& Config)
: m_Gc(Gc)
, m_OuterCacheMemoryUsage(OuterCacheMemoryUsage)
-, m_BucketName(std::move(BucketName))
+, m_BucketName(BucketName)
, m_Configuration(Config)
, m_BucketId(Oid::Zero)
{
@@ -1329,7 +1329,7 @@ ZenCacheDiskLayer::CacheBucket::EndPutBatch(PutBatchHandle* Batch) noexcept
struct ZenCacheDiskLayer::CacheBucket::GetBatchHandle
{
- GetBatchHandle(std::vector<ZenCacheValue>& OutResults) : OutResults(OutResults)
+ GetBatchHandle(ZenCacheValueVec_t& OutResults) : OutResults(OutResults)
{
Keys.reserve(OutResults.capacity());
ResultIndexes.reserve(OutResults.capacity());
@@ -1340,11 +1340,11 @@ struct ZenCacheDiskLayer::CacheBucket::GetBatchHandle
std::vector<IoHash> Keys;
std::vector<size_t> ResultIndexes;
- std::vector<ZenCacheValue>& OutResults;
+ ZenCacheValueVec_t& OutResults;
};
ZenCacheDiskLayer::CacheBucket::GetBatchHandle*
-ZenCacheDiskLayer::CacheBucket::BeginGetBatch(std::vector<ZenCacheValue>& OutResult)
+ZenCacheDiskLayer::CacheBucket::BeginGetBatch(ZenCacheValueVec_t& OutResult)
{
ZEN_TRACE_CPU("Z$::Bucket::BeginGetBatch");
return new GetBatchHandle(OutResult);
@@ -1364,13 +1364,13 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept
if (!Batch->ResultIndexes.empty())
{
- std::vector<DiskLocation> StandaloneDiskLocations;
- std::vector<size_t> StandaloneKeyIndexes;
- std::vector<size_t> MemCachedKeyIndexes;
- std::vector<DiskLocation> InlineDiskLocations;
- std::vector<BlockStoreLocation> InlineBlockLocations;
- std::vector<size_t> InlineKeyIndexes;
- std::vector<bool> FillRawHashAndRawSize(Batch->Keys.size(), false);
+ eastl::fixed_vector<DiskLocation, 16> StandaloneDiskLocations;
+ eastl::fixed_vector<size_t, 16> StandaloneKeyIndexes;
+ eastl::fixed_vector<size_t, 16> MemCachedKeyIndexes;
+ eastl::fixed_vector<DiskLocation, 16> InlineDiskLocations;
+ eastl::fixed_vector<BlockStoreLocation, 16> InlineBlockLocations;
+ eastl::fixed_vector<size_t, 16> InlineKeyIndexes;
+ eastl::fixed_vector<bool, 16> FillRawHashAndRawSize(Batch->Keys.size(), false);
{
RwLock::SharedLockScope IndexLock(m_IndexLock);
for (size_t KeyIndex = 0; KeyIndex < Batch->Keys.size(); KeyIndex++)
@@ -1526,33 +1526,35 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept
if (!InlineDiskLocations.empty())
{
ZEN_TRACE_CPU("Z$::Bucket::EndGetBatch::ReadInline");
- m_BlockStore.IterateChunks(InlineBlockLocations, [&](uint32_t, std::span<const size_t> ChunkIndexes) -> bool {
- // Only read into memory the IoBuffers we could potentially add to memcache
- const uint64_t LargeChunkSizeLimit = Max(m_Configuration.MemCacheSizeThreshold, 1u * 1024u);
- m_BlockStore.IterateBlock(
- InlineBlockLocations,
- ChunkIndexes,
- [this, &FillOne, &InlineDiskLocations, &InlineKeyIndexes](size_t ChunkIndex,
- const void* Data,
- uint64_t Size) -> bool {
- if (Data != nullptr)
- {
- FillOne(InlineDiskLocations[ChunkIndex],
- InlineKeyIndexes[ChunkIndex],
- IoBufferBuilder::MakeCloneFromMemory(Data, Size));
- }
- return true;
- },
- [this, &FillOne, &InlineDiskLocations, &InlineKeyIndexes](size_t ChunkIndex,
- BlockStoreFile& File,
- uint64_t Offset,
- uint64_t Size) -> bool {
- FillOne(InlineDiskLocations[ChunkIndex], InlineKeyIndexes[ChunkIndex], File.GetChunk(Offset, Size));
- return true;
- },
- LargeChunkSizeLimit);
- return true;
- });
+ m_BlockStore.IterateChunks(
+ std::span{begin(InlineBlockLocations), end(InlineBlockLocations)},
+ [&](uint32_t, std::span<const size_t> ChunkIndexes) -> bool {
+ // Only read into memory the IoBuffers we could potentially add to memcache
+ const uint64_t LargeChunkSizeLimit = Max(m_Configuration.MemCacheSizeThreshold, 1u * 1024u);
+ m_BlockStore.IterateBlock(
+ std::span{begin(InlineBlockLocations), end(InlineBlockLocations)},
+ ChunkIndexes,
+ [this, &FillOne, &InlineDiskLocations, &InlineKeyIndexes](size_t ChunkIndex,
+ const void* Data,
+ uint64_t Size) -> bool {
+ if (Data != nullptr)
+ {
+ FillOne(InlineDiskLocations[ChunkIndex],
+ InlineKeyIndexes[ChunkIndex],
+ IoBufferBuilder::MakeCloneFromMemory(Data, Size));
+ }
+ return true;
+ },
+ [this, &FillOne, &InlineDiskLocations, &InlineKeyIndexes](size_t ChunkIndex,
+ BlockStoreFile& File,
+ uint64_t Offset,
+ uint64_t Size) -> bool {
+ FillOne(InlineDiskLocations[ChunkIndex], InlineKeyIndexes[ChunkIndex], File.GetChunk(Offset, Size));
+ return true;
+ },
+ LargeChunkSizeLimit);
+ return true;
+ });
}
if (!StandaloneDiskLocations.empty())
@@ -3581,15 +3583,29 @@ ZenCacheDiskLayer::~ZenCacheDiskLayer()
}
}
+template<typename T, typename U>
+struct equal_to_2 : public eastl::binary_function<T, U, bool>
+{
+ constexpr bool operator()(const T& a, const U& b) const { return a == b; }
+
+ template<typename T_ = T,
+ typename U_ = U,
+ typename = eastl::enable_if_t<!eastl::is_same_v<eastl::remove_const_t<T_>, eastl::remove_const_t<U_>>>>
+ constexpr bool operator()(const U& b, const T& a) const
+ {
+ return b == a;
+ }
+};
+
ZenCacheDiskLayer::CacheBucket*
ZenCacheDiskLayer::GetOrCreateBucket(std::string_view InBucket)
{
ZEN_TRACE_CPU("Z$::GetOrCreateBucket");
- const auto BucketName = std::string(InBucket);
{
RwLock::SharedLockScope SharedLock(m_Lock);
- if (auto It = m_Buckets.find(BucketName); It != m_Buckets.end())
+ if (auto It = m_Buckets.find_as(InBucket, std::hash<std::string_view>(), equal_to_2<std::string, std::string_view>());
+ It != m_Buckets.end())
{
return It->second.get();
}
@@ -3597,31 +3613,32 @@ ZenCacheDiskLayer::GetOrCreateBucket(std::string_view InBucket)
// We create the bucket without holding a lock since contructor calls GcManager::AddGcReferencer which takes an exclusive lock.
// This can cause a deadlock, if GC is running we would block while holding ZenCacheDiskLayer::m_Lock
- std::unique_ptr<CacheBucket> Bucket(
- std::make_unique<CacheBucket>(m_Gc, m_TotalMemCachedSize, BucketName, m_Configuration.BucketConfig));
+ std::unique_ptr<CacheBucket> Bucket(std::make_unique<CacheBucket>(m_Gc, m_TotalMemCachedSize, InBucket, m_Configuration.BucketConfig));
RwLock::ExclusiveLockScope Lock(m_Lock);
- if (auto It = m_Buckets.find(BucketName); It != m_Buckets.end())
+ if (auto It = m_Buckets.find_as(InBucket, std::hash<std::string_view>(), equal_to_2<std::string, std::string_view>());
+ It != m_Buckets.end())
{
return It->second.get();
}
std::filesystem::path BucketPath = m_RootDir;
- BucketPath /= BucketName;
+ BucketPath /= InBucket;
try
{
if (!Bucket->OpenOrCreate(BucketPath))
{
- ZEN_WARN("Found directory '{}' in our base directory '{}' but it is not a valid bucket", BucketName, m_RootDir);
+ ZEN_WARN("Found directory '{}' in our base directory '{}' but it is not a valid bucket", InBucket, m_RootDir);
return nullptr;
}
}
catch (const std::exception& Err)
{
- ZEN_WARN("Creating bucket '{}' in '{}' FAILED, reason: '{}'", BucketName, BucketPath, Err.what());
+ ZEN_WARN("Creating bucket '{}' in '{}' FAILED, reason: '{}'", InBucket, BucketPath, Err.what());
throw;
}
+ std::string BucketName{InBucket};
CacheBucket* Result = Bucket.get();
m_Buckets.emplace(BucketName, std::move(Bucket));
if (m_CapturedBuckets)
@@ -3720,7 +3737,7 @@ ZenCacheDiskLayer::EndPutBatch(PutBatchHandle* Batch) noexcept
struct ZenCacheDiskLayer::GetBatchHandle
{
- GetBatchHandle(std::vector<ZenCacheValue>& OutResults) : OutResults(OutResults) {}
+ GetBatchHandle(ZenCacheValueVec_t& OutResults) : OutResults(OutResults) {}
struct BucketHandle
{
CacheBucket* Bucket;
@@ -3780,13 +3797,13 @@ struct ZenCacheDiskLayer::GetBatchHandle
return NewBucketHandle;
}
- RwLock Lock;
- std::vector<BucketHandle> BucketHandles;
- std::vector<ZenCacheValue>& OutResults;
+ RwLock Lock;
+ eastl::fixed_vector<BucketHandle, 4> BucketHandles;
+ ZenCacheValueVec_t& OutResults;
};
ZenCacheDiskLayer::GetBatchHandle*
-ZenCacheDiskLayer::BeginGetBatch(std::vector<ZenCacheValue>& OutResults)
+ZenCacheDiskLayer::BeginGetBatch(ZenCacheValueVec_t& OutResults)
{
return new GetBatchHandle(OutResults);
}
diff --git a/src/zenstore/cache/cacherpc.cpp b/src/zenstore/cache/cacherpc.cpp
index cca51e63e..97e26a38d 100644
--- a/src/zenstore/cache/cacherpc.cpp
+++ b/src/zenstore/cache/cacherpc.cpp
@@ -20,6 +20,8 @@
#include <zencore/memory/llm.h>
+#include <EASTL/fixed_vector.h>
+
//////////////////////////////////////////////////////////////////////////
namespace zen {
@@ -89,7 +91,7 @@ GetRpcRequestCacheKey(const CbObjectView& KeyView, CacheKey& Key)
return false;
}
IoHash Hash = HashField.AsHash();
- Key = CacheKey::Create(*Bucket, Hash);
+ Key = CacheKey::CreateValidated(std::move(*Bucket), Hash);
return true;
}
@@ -305,7 +307,7 @@ CacheRpcHandler::HandleRpcPutCacheRecords(const CacheRequestContext& Context, co
}
DefaultPolicy = !PolicyText.empty() ? ParseCachePolicy(PolicyText) : CachePolicy::Default;
- std::vector<bool> Results;
+ eastl::fixed_vector<bool, 32> Results;
CbArrayView RequestsArray = Params["Requests"sv].AsArrayView();
for (CbFieldView RequestField : RequestsArray)
@@ -481,16 +483,15 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb
bool Exists = false;
bool ReadFromUpstream = false;
};
- struct RecordRequestData
+ struct RecordRequestData : public CacheKeyRequest
{
- CacheKeyRequest Upstream;
- CbObjectView RecordObject;
- IoBuffer RecordCacheValue;
- CacheRecordPolicy DownstreamPolicy;
- std::vector<ValueRequestData> Values;
- bool Complete = false;
- const UpstreamEndpointInfo* Source = nullptr;
- uint64_t ElapsedTimeUs;
+ CbObjectView RecordObject;
+ IoBuffer RecordCacheValue;
+ CacheRecordPolicy DownstreamPolicy;
+ eastl::fixed_vector<ValueRequestData, 4> Values;
+ bool Complete = false;
+ const UpstreamEndpointInfo* Source = nullptr;
+ uint64_t ElapsedTimeUs;
};
std::string_view PolicyText = Params["DefaultPolicy"sv].AsString();
@@ -503,8 +504,8 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb
const bool HasUpstream = m_UpstreamCache.IsActive();
- std::vector<RecordRequestData> Requests;
- std::vector<size_t> UpstreamIndexes;
+ eastl::fixed_vector<RecordRequestData, 16> Requests;
+ eastl::fixed_vector<size_t, 16> UpstreamIndexes;
auto ParseValues = [](RecordRequestData& Request) {
CbArrayView ValuesArray = Request.RecordObject["Values"sv].AsArrayView();
@@ -535,7 +536,7 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb
CbObjectView RequestObject = RequestField.AsObjectView();
CbObjectView KeyObject = RequestObject["Key"sv].AsObjectView();
- CacheKey& Key = Request.Upstream.Key;
+ CacheKey& Key = Request.Key;
if (!GetRpcRequestCacheKey(KeyObject, Key))
{
return CbPackage{};
@@ -707,7 +708,7 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb
for (size_t Index : UpstreamIndexes)
{
RecordRequestData& Request = Requests[Index];
- UpstreamRequests.push_back(&Request.Upstream);
+ UpstreamRequests.push_back(&Request);
if (Request.Values.size())
{
@@ -721,13 +722,13 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb
UpstreamPolicy |= !Value.ReadFromUpstream ? CachePolicy::SkipData : CachePolicy::None;
Builder.AddValuePolicy(Value.ValueId, UpstreamPolicy);
}
- Request.Upstream.Policy = Builder.Build();
+ Request.Policy = Builder.Build();
}
else
{
// We don't know which Values exist in the Record; ask the upstrem for all values that the client wants,
// and convert the CacheRecordPolicy to an upstream policy
- Request.Upstream.Policy = Request.DownstreamPolicy.ConvertToUpstream();
+ Request.Policy = Request.DownstreamPolicy.ConvertToUpstream();
}
}
@@ -737,10 +738,9 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb
return;
}
- RecordRequestData& Request =
- *reinterpret_cast<RecordRequestData*>(reinterpret_cast<char*>(&Params.Request) - offsetof(RecordRequestData, Upstream));
+ RecordRequestData& Request = *static_cast<RecordRequestData*>(&Params.Request);
Request.ElapsedTimeUs += static_cast<uint64_t>(Params.ElapsedSeconds * 1000000.0);
- const CacheKey& Key = Request.Upstream.Key;
+ const CacheKey& Key = Request.Key;
Stopwatch Timer;
auto TimeGuard = MakeGuard([&Timer, &Request]() { Request.ElapsedTimeUs += Timer.GetElapsedTimeUs(); });
if (!Request.RecordObject)
@@ -832,10 +832,12 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb
CbPackage ResponsePackage;
CbObjectWriter ResponseObject{2048};
+ ResponsePackage.ReserveAttachments(Requests.size());
+
ResponseObject.BeginArray("Result"sv);
for (RecordRequestData& Request : Requests)
{
- const CacheKey& Key = Request.Upstream.Key;
+ const CacheKey& Key = Request.Key;
if (Request.Complete ||
(Request.RecordObject && EnumHasAllFlags(Request.DownstreamPolicy.GetRecordPolicy(), CachePolicy::PartialRecord)))
{
@@ -910,11 +912,12 @@ CacheRpcHandler::HandleRpcPutCacheValues(const CacheRequestContext& Context, con
const bool HasUpstream = m_UpstreamCache.IsActive();
CbArrayView RequestsArray = Params["Requests"sv].AsArrayView();
- std::vector<bool> BatchResults;
- std::vector<size_t> BatchResultIndexes;
- std::vector<bool> Results;
- std::vector<CacheKey> UpstreamCacheKeys;
- uint64_t RequestCount = RequestsArray.Num();
+ std::vector<bool> BatchResults;
+ eastl::fixed_vector<size_t, 32> BatchResultIndexes;
+ eastl::fixed_vector<bool, 32> Results;
+ eastl::fixed_vector<CacheKey, 32> UpstreamCacheKeys;
+
+ uint64_t RequestCount = RequestsArray.Num();
{
Results.reserve(RequestCount);
std::unique_ptr<ZenCacheStore::PutBatch> Batch;
@@ -1099,15 +1102,15 @@ CacheRpcHandler::HandleRpcGetCacheValues(const CacheRequestContext& Context, CbO
uint64_t RawSize = 0;
CompressedBuffer Result;
};
- std::vector<RequestData> Requests;
+ eastl::fixed_vector<RequestData, 16> Requests;
- std::vector<size_t> RemoteRequestIndexes;
+ eastl::fixed_vector<size_t, 16> RemoteRequestIndexes;
const bool HasUpstream = m_UpstreamCache.IsActive();
- CbArrayView RequestsArray = Params["Requests"sv].AsArrayView();
- std::vector<ZenCacheValue> CacheValues;
- const uint64_t RequestCount = RequestsArray.Num();
+ CbArrayView RequestsArray = Params["Requests"sv].AsArrayView();
+ ZenCacheValueVec_t CacheValues;
+ const uint64_t RequestCount = RequestsArray.Num();
CacheValues.reserve(RequestCount);
{
std::unique_ptr<ZenCacheStore::GetBatch> Batch;
@@ -1136,7 +1139,6 @@ CacheRpcHandler::HandleRpcGetCacheValues(const CacheRequestContext& Context, CbO
CacheKey& Key = Request.Key;
CachePolicy Policy = Request.Policy;
- ZenCacheValue CacheValue;
if (EnumHasAllFlags(Policy, CachePolicy::QueryLocal))
{
if (Batch)
@@ -1276,6 +1278,9 @@ CacheRpcHandler::HandleRpcGetCacheValues(const CacheRequestContext& Context, CbO
ZEN_TRACE_CPU("Z$::RpcGetCacheValues::Response");
CbPackage RpcResponse;
CbObjectWriter ResponseObject{1024};
+
+ RpcResponse.ReserveAttachments(Requests.size());
+
ResponseObject.BeginArray("Result"sv);
for (const RequestData& Request : Requests)
{
@@ -1642,7 +1647,7 @@ CacheRpcHandler::GetLocalCacheValues(const CacheRequestContext& Context,
using namespace cache::detail;
const bool HasUpstream = m_UpstreamCache.IsActive();
- std::vector<ZenCacheValue> Chunks;
+ ZenCacheValueVec_t Chunks;
Chunks.reserve(ValueRequests.size());
{
std::unique_ptr<ZenCacheStore::GetBatch> Batch;
@@ -1796,6 +1801,8 @@ CacheRpcHandler::WriteGetCacheChunksResponse([[maybe_unused]] const CacheRequest
CbPackage RpcResponse;
CbObjectWriter Writer{1024};
+ RpcResponse.ReserveAttachments(Requests.size());
+
Writer.BeginArray("Result"sv);
for (ChunkRequest& Request : Requests)
{
diff --git a/src/zenstore/cache/structuredcachestore.cpp b/src/zenstore/cache/structuredcachestore.cpp
index 133cb42d7..7d277329e 100644
--- a/src/zenstore/cache/structuredcachestore.cpp
+++ b/src/zenstore/cache/structuredcachestore.cpp
@@ -178,13 +178,13 @@ ZenCacheNamespace::EndPutBatch(PutBatchHandle* Batch) noexcept
struct ZenCacheNamespace::GetBatchHandle
{
- GetBatchHandle(std::vector<ZenCacheValue>& OutResult) : Results(OutResult) {}
- std::vector<ZenCacheValue>& Results;
+ GetBatchHandle(ZenCacheValueVec_t& OutResult) : Results(OutResult) {}
+ ZenCacheValueVec_t& Results;
ZenCacheDiskLayer::GetBatchHandle* DiskLayerHandle = nullptr;
};
ZenCacheNamespace::GetBatchHandle*
-ZenCacheNamespace::BeginGetBatch(std::vector<ZenCacheValue>& OutResult)
+ZenCacheNamespace::BeginGetBatch(ZenCacheValueVec_t& OutResult)
{
ZenCacheNamespace::GetBatchHandle* Handle = new ZenCacheNamespace::GetBatchHandle(OutResult);
Handle->DiskLayerHandle = m_DiskLayer.BeginGetBatch(OutResult);
@@ -580,7 +580,7 @@ ZenCacheStore::PutBatch::~PutBatch()
}
}
-ZenCacheStore::GetBatch::GetBatch(ZenCacheStore& CacheStore, std::string_view InNamespace, std::vector<ZenCacheValue>& OutResult)
+ZenCacheStore::GetBatch::GetBatch(ZenCacheStore& CacheStore, std::string_view InNamespace, ZenCacheValueVec_t& OutResult)
: m_CacheStore(CacheStore)
, Results(OutResult)
{
diff --git a/src/zenstore/chunkedfile.cpp b/src/zenstore/chunkedfile.cpp
deleted file mode 100644
index f200bc1ec..000000000
--- a/src/zenstore/chunkedfile.cpp
+++ /dev/null
@@ -1,505 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#include <zencore/basicfile.h>
-#include <zenstore/chunkedfile.h>
-
-#include "chunking.h"
-
-ZEN_THIRD_PARTY_INCLUDES_START
-#include <tsl/robin_map.h>
-#include <gsl/gsl-lite.hpp>
-ZEN_THIRD_PARTY_INCLUDES_END
-
-namespace zen {
-
-namespace {
- struct ChunkedHeader
- {
- static constexpr uint32_t ExpectedMagic = 0x646b6863; // chkd
- static constexpr uint32_t CurrentVersion = 1;
-
- uint32_t Magic = ExpectedMagic;
- uint32_t Version = CurrentVersion;
- uint32_t ChunkSequenceLength;
- uint32_t ChunkHashCount;
- uint64_t ChunkSequenceOffset;
- uint64_t ChunkHashesOffset;
- uint64_t RawSize = 0;
- IoHash RawHash;
- };
-} // namespace
-
-IoBuffer
-SerializeChunkedInfo(const ChunkedInfo& Info)
-{
- size_t HeaderSize = RoundUp(sizeof(ChunkedHeader), 16) + RoundUp(sizeof(uint32_t) * Info.ChunkSequence.size(), 16) +
- RoundUp(sizeof(IoHash) * Info.ChunkHashes.size(), 16);
- IoBuffer HeaderData(HeaderSize);
-
- ChunkedHeader Header;
- Header.ChunkSequenceLength = gsl::narrow<uint32_t>(Info.ChunkSequence.size());
- Header.ChunkHashCount = gsl::narrow<uint32_t>(Info.ChunkHashes.size());
- Header.ChunkSequenceOffset = RoundUp(sizeof(ChunkedHeader), 16);
- Header.ChunkHashesOffset = RoundUp(Header.ChunkSequenceOffset + sizeof(uint32_t) * Header.ChunkSequenceLength, 16);
- Header.RawSize = Info.RawSize;
- Header.RawHash = Info.RawHash;
-
- MutableMemoryView WriteView = HeaderData.GetMutableView();
- {
- MutableMemoryView HeaderWriteView = WriteView.Left(sizeof(Header));
- HeaderWriteView.CopyFrom(MemoryView(&Header, sizeof(Header)));
- }
- {
- MutableMemoryView ChunkSequenceWriteView = WriteView.Mid(Header.ChunkSequenceOffset, sizeof(uint32_t) * Header.ChunkSequenceLength);
- ChunkSequenceWriteView.CopyFrom(MemoryView(Info.ChunkSequence.data(), ChunkSequenceWriteView.GetSize()));
- }
- {
- MutableMemoryView ChunksWriteView = WriteView.Mid(Header.ChunkHashesOffset, sizeof(IoHash) * Header.ChunkHashCount);
- ChunksWriteView.CopyFrom(MemoryView(Info.ChunkHashes.data(), ChunksWriteView.GetSize()));
- }
-
- return HeaderData;
-}
-
-ChunkedInfo
-DeserializeChunkedInfo(IoBuffer& Buffer)
-{
- MemoryView View = Buffer.GetView();
- ChunkedHeader Header;
- {
- MutableMemoryView HeaderWriteView(&Header, sizeof(Header));
- HeaderWriteView.CopyFrom(View.Left(sizeof(Header)));
- }
- if (Header.Magic != ChunkedHeader::ExpectedMagic)
- {
- return {};
- }
- if (Header.Version != ChunkedHeader::CurrentVersion)
- {
- return {};
- }
- ChunkedInfo Info;
- Info.RawSize = Header.RawSize;
- Info.RawHash = Header.RawHash;
- Info.ChunkSequence.resize(Header.ChunkSequenceLength);
- Info.ChunkHashes.resize(Header.ChunkHashCount);
- {
- MutableMemoryView ChunkSequenceWriteView(Info.ChunkSequence.data(), sizeof(uint32_t) * Header.ChunkSequenceLength);
- ChunkSequenceWriteView.CopyFrom(View.Mid(Header.ChunkSequenceOffset, ChunkSequenceWriteView.GetSize()));
- }
- {
- MutableMemoryView ChunksWriteView(Info.ChunkHashes.data(), sizeof(IoHash) * Header.ChunkHashCount);
- ChunksWriteView.CopyFrom(View.Mid(Header.ChunkHashesOffset, ChunksWriteView.GetSize()));
- }
-
- return Info;
-}
-
-void
-Reconstruct(const ChunkedInfo& Info, const std::filesystem::path& TargetPath, std::function<IoBuffer(const IoHash& ChunkHash)> GetChunk)
-{
- BasicFile Reconstructed;
- Reconstructed.Open(TargetPath, BasicFile::Mode::kTruncate);
- BasicFileWriter ReconstructedWriter(Reconstructed, 64 * 1024);
- uint64_t Offset = 0;
- for (uint32_t SequenceIndex : Info.ChunkSequence)
- {
- IoBuffer Chunk = GetChunk(Info.ChunkHashes[SequenceIndex]);
- ReconstructedWriter.Write(Chunk.GetData(), Chunk.GetSize(), Offset);
- Offset += Chunk.GetSize();
- }
-}
-
-ChunkedInfoWithSource
-ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Params)
-{
- ChunkedInfoWithSource Result;
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> FoundChunks;
-
- ZenChunkHelper Chunker;
- Chunker.SetUseThreshold(Params.UseThreshold);
- Chunker.SetChunkSize(Params.MinSize, Params.MaxSize, Params.AvgSize);
- size_t End = Offset + Size;
- const size_t ScanBufferSize = 1u * 1024 * 1024; // (Params.MaxSize * 9) / 3;//1 * 1024 * 1024;
- BasicFileBuffer RawBuffer(RawData, ScanBufferSize);
- MemoryView SliceView = RawBuffer.MakeView(Min(End - Offset, ScanBufferSize), Offset);
- ZEN_ASSERT(!SliceView.IsEmpty());
- size_t SliceSize = SliceView.GetSize();
- IoHashStream RawHashStream;
- while (Offset < End)
- {
- size_t ScanLength = Chunker.ScanChunk(SliceView.GetData(), SliceSize);
- if (ScanLength == ZenChunkHelper::kNoBoundaryFound)
- {
- if (Offset + SliceSize == End)
- {
- ScanLength = SliceSize;
- }
- else
- {
- SliceView = RawBuffer.MakeView(Min(End - Offset, ScanBufferSize), Offset);
- SliceSize = SliceView.GetSize();
- Chunker.Reset();
- continue;
- }
- }
- uint32_t ChunkLength = gsl::narrow<uint32_t>(ScanLength); // +HashedLength);
- MemoryView ChunkView = SliceView.Left(ScanLength);
- RawHashStream.Append(ChunkView);
- IoHash ChunkHash = IoHash::HashBuffer(ChunkView);
- SliceView.RightChopInline(ScanLength);
- if (auto It = FoundChunks.find(ChunkHash); It != FoundChunks.end())
- {
- Result.Info.ChunkSequence.push_back(It->second);
- }
- else
- {
- uint32_t ChunkIndex = gsl::narrow<uint32_t>(Result.Info.ChunkHashes.size());
- FoundChunks.insert_or_assign(ChunkHash, ChunkIndex);
- Result.Info.ChunkHashes.push_back(ChunkHash);
- Result.ChunkSources.push_back(ChunkSource{.Offset = Offset, .Size = ChunkLength});
- Result.Info.ChunkSequence.push_back(ChunkIndex);
- }
-
- SliceSize = SliceView.GetSize();
- Offset += ChunkLength;
- }
- Result.Info.RawSize = Size;
- Result.Info.RawHash = RawHashStream.GetHash();
- return Result;
-}
-
-} // namespace zen
-
-#if ZEN_WITH_TESTS
-# include <zencore/filesystem.h>
-# include <zencore/fmtutils.h>
-# include <zencore/iohash.h>
-# include <zencore/logging.h>
-# include <zencore/scopeguard.h>
-# include <zencore/timer.h>
-# include <zencore/testing.h>
-# include <zencore/testutils.h>
-# include <zencore/workthreadpool.h>
-
-# include "chunking.h"
-
-ZEN_THIRD_PARTY_INCLUDES_START
-# include <tsl/robin_map.h>
-# include <tsl/robin_set.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-
-namespace zen {
-# if 0
-TEST_CASE("chunkedfile.findparams")
-{
-# if 1
- DirectoryContent SourceContent1;
- GetDirectoryContent("E:\\Temp\\ChunkingTestData\\31379208", DirectoryContentFlags::IncludeFiles, SourceContent1);
- const std::vector<std::filesystem::path>& SourceFiles1 = SourceContent1.Files;
- DirectoryContent SourceContent2;
- GetDirectoryContent("E:\\Temp\\ChunkingTestData\\31379208_2", DirectoryContentFlags::IncludeFiles, SourceContent2);
- const std::vector<std::filesystem::path>& SourceFiles2 = SourceContent2.Files;
-# else
- std::filesystem::path SourcePath1 =
- "E:\\Temp\\ChunkingTestData\\31375996\\ShaderArchive-FortniteGame_Chunk10-PCD3D_SM6-PCD3D_SM6.ushaderbytecode";
- std::filesystem::path SourcePath2 =
- "E:\\Temp\\ChunkingTestData\\31379208\\ShaderArchive-FortniteGame_Chunk10-PCD3D_SM6-PCD3D_SM6.ushaderbytecode";
- const std::vector<std::filesystem::path>& SourceFiles1 = {SourcePath1};
- const std::vector<std::filesystem::path>& SourceFiles2 = {SourcePath2};
-# endif
- ChunkedParams Params[] = {ChunkedParams{.UseThreshold = false, .MinSize = 17280, .MaxSize = 139264, .AvgSize = 36340},
- ChunkedParams{.UseThreshold = false, .MinSize = 15456, .MaxSize = 122880, .AvgSize = 35598},
- ChunkedParams{.UseThreshold = false, .MinSize = 16848, .MaxSize = 135168, .AvgSize = 39030},
- ChunkedParams{.UseThreshold = false, .MinSize = 14256, .MaxSize = 114688, .AvgSize = 36222},
- ChunkedParams{.UseThreshold = false, .MinSize = 15744, .MaxSize = 126976, .AvgSize = 36600},
- ChunkedParams{.UseThreshold = false, .MinSize = 15264, .MaxSize = 122880, .AvgSize = 35442},
- ChunkedParams{.UseThreshold = false, .MinSize = 16464, .MaxSize = 131072, .AvgSize = 37950},
- ChunkedParams{.UseThreshold = false, .MinSize = 15408, .MaxSize = 122880, .AvgSize = 38914},
- ChunkedParams{.UseThreshold = false, .MinSize = 15408, .MaxSize = 122880, .AvgSize = 35556},
- ChunkedParams{.UseThreshold = false, .MinSize = 15360, .MaxSize = 122880, .AvgSize = 35520},
- ChunkedParams{.UseThreshold = false, .MinSize = 15312, .MaxSize = 122880, .AvgSize = 35478},
- ChunkedParams{.UseThreshold = false, .MinSize = 16896, .MaxSize = 135168, .AvgSize = 39072},
- ChunkedParams{.UseThreshold = false, .MinSize = 15360, .MaxSize = 122880, .AvgSize = 38880},
- ChunkedParams{.UseThreshold = false, .MinSize = 15840, .MaxSize = 126976, .AvgSize = 36678},
- ChunkedParams{.UseThreshold = false, .MinSize = 16800, .MaxSize = 135168, .AvgSize = 38994},
- ChunkedParams{.UseThreshold = false, .MinSize = 15888, .MaxSize = 126976, .AvgSize = 36714},
- ChunkedParams{.UseThreshold = false, .MinSize = 15792, .MaxSize = 126976, .AvgSize = 36636},
- ChunkedParams{.UseThreshold = false, .MinSize = 14880, .MaxSize = 118784, .AvgSize = 37609},
- ChunkedParams{.UseThreshold = false, .MinSize = 15936, .MaxSize = 126976, .AvgSize = 36756},
- ChunkedParams{.UseThreshold = false, .MinSize = 15456, .MaxSize = 122880, .AvgSize = 38955},
- ChunkedParams{.UseThreshold = false, .MinSize = 15984, .MaxSize = 126976, .AvgSize = 36792},
- ChunkedParams{.UseThreshold = false, .MinSize = 14400, .MaxSize = 114688, .AvgSize = 36338},
- ChunkedParams{.UseThreshold = false, .MinSize = 14832, .MaxSize = 118784, .AvgSize = 37568},
- ChunkedParams{.UseThreshold = false, .MinSize = 16944, .MaxSize = 135168, .AvgSize = 39108},
- ChunkedParams{.UseThreshold = false, .MinSize = 14352, .MaxSize = 114688, .AvgSize = 36297},
- ChunkedParams{.UseThreshold = false, .MinSize = 14208, .MaxSize = 114688, .AvgSize = 36188},
- ChunkedParams{.UseThreshold = false, .MinSize = 14448, .MaxSize = 114688, .AvgSize = 36372},
- ChunkedParams{.UseThreshold = false, .MinSize = 13296, .MaxSize = 106496, .AvgSize = 36592},
- ChunkedParams{.UseThreshold = false, .MinSize = 15264, .MaxSize = 122880, .AvgSize = 38805},
- ChunkedParams{.UseThreshold = false, .MinSize = 14304, .MaxSize = 114688, .AvgSize = 36263},
- ChunkedParams{.UseThreshold = false, .MinSize = 14784, .MaxSize = 118784, .AvgSize = 37534},
- ChunkedParams{.UseThreshold = false, .MinSize = 15312, .MaxSize = 122880, .AvgSize = 38839},
- ChunkedParams{.UseThreshold = false, .MinSize = 14256, .MaxSize = 114688, .AvgSize = 39360},
- ChunkedParams{.UseThreshold = false, .MinSize = 13776, .MaxSize = 110592, .AvgSize = 37976},
- ChunkedParams{.UseThreshold = false, .MinSize = 14736, .MaxSize = 118784, .AvgSize = 37493},
- ChunkedParams{.UseThreshold = false, .MinSize = 14928, .MaxSize = 118784, .AvgSize = 37643},
- ChunkedParams{.UseThreshold = false, .MinSize = 14448, .MaxSize = 114688, .AvgSize = 39504},
- ChunkedParams{.UseThreshold = false, .MinSize = 13392, .MaxSize = 106496, .AvgSize = 36664},
- ChunkedParams{.UseThreshold = false, .MinSize = 13872, .MaxSize = 110592, .AvgSize = 38048},
- ChunkedParams{.UseThreshold = false, .MinSize = 14352, .MaxSize = 114688, .AvgSize = 39432},
- ChunkedParams{.UseThreshold = false, .MinSize = 13200, .MaxSize = 106496, .AvgSize = 36520},
- ChunkedParams{.UseThreshold = false, .MinSize = 17328, .MaxSize = 139264, .AvgSize = 36378},
- ChunkedParams{.UseThreshold = false, .MinSize = 17376, .MaxSize = 139264, .AvgSize = 36421},
- ChunkedParams{.UseThreshold = false, .MinSize = 17424, .MaxSize = 139264, .AvgSize = 36459},
- ChunkedParams{.UseThreshold = false, .MinSize = 17472, .MaxSize = 139264, .AvgSize = 36502},
- ChunkedParams{.UseThreshold = false, .MinSize = 17520, .MaxSize = 139264, .AvgSize = 36540},
- ChunkedParams{.UseThreshold = false, .MinSize = 17808, .MaxSize = 143360, .AvgSize = 37423},
- ChunkedParams{.UseThreshold = false, .MinSize = 17856, .MaxSize = 143360, .AvgSize = 37466},
- ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 25834},
- ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 21917},
- ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 29751},
- ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 33668},
- ChunkedParams{.UseThreshold = false, .MinSize = 17952, .MaxSize = 143360, .AvgSize = 37547},
- ChunkedParams{.UseThreshold = false, .MinSize = 17904, .MaxSize = 143360, .AvgSize = 37504},
- ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 22371},
- ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 37585},
- ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 26406},
- ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 26450},
- ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 30615},
- ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 30441},
- ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 22417},
- ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 22557},
- ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 30528},
- ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 27112},
- ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 34644},
- ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 34476},
- ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 35408},
- ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 38592},
- ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 30483},
- ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 26586},
- ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 26496},
- ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 31302},
- ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 34516},
- ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 22964},
- ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 35448},
- ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 38630},
- ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 23010},
- ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 31260},
- ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 34600},
- ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 27156},
- ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 30570},
- ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 38549},
- ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 22510},
- ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 38673},
- ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 34560},
- ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 22464},
- ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 26540},
- ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 38511},
- ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 23057},
- ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 27202},
- ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 31347},
- ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 35492},
- ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 31389},
- ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 27246},
- ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 23103},
- ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 35532},
- ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 23150},
- ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 27292},
- ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 31434},
- ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 35576},
- ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 27336},
- ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 23196},
- ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 31476},
- ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 35616},
- ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 27862},
- ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 32121},
- ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 23603},
- ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 36380},
- ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 27908},
- ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 23650},
- ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 32166},
- ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 36424},
- ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 23696},
- ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 32253},
- ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 32208},
- ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 23743},
- ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 36548},
- ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 28042},
- ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 23789},
- ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 32295},
- ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 36508},
- ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 27952},
- ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 27998},
- ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 36464}};
-
- static const size_t ParamsCount = sizeof(Params) / sizeof(ChunkedParams);
- std::vector<ChunkedInfoWithSource> Infos1(SourceFiles1.size());
- std::vector<ChunkedInfoWithSource> Infos2(SourceFiles2.size());
-
- WorkerThreadPool WorkerPool(32);
-
- for (size_t I = 0; I < ParamsCount; I++)
- {
- for (int UseThreshold = 0; UseThreshold < 2; UseThreshold++)
- {
- Latch WorkLatch(1);
- ChunkedParams Param = Params[I];
- Param.UseThreshold = UseThreshold == 1;
- Stopwatch Timer;
- for (size_t F = 0; F < SourceFiles1.size(); F++)
- {
- WorkLatch.AddCount(1);
- WorkerPool.ScheduleWork([&WorkLatch, F, Param, &SourceFiles1, &Infos1]() {
- auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); });
- BasicFile SourceData1;
- SourceData1.Open(SourceFiles1[F], BasicFile::Mode::kRead);
- Infos1[F] = ChunkData(SourceData1, 0, SourceData1.FileSize(), Param);
- });
- }
- for (size_t F = 0; F < SourceFiles2.size(); F++)
- {
- WorkLatch.AddCount(1);
- WorkerPool.ScheduleWork([&WorkLatch, F, Param, &SourceFiles2, &Infos2]() {
- auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); });
- BasicFile SourceData2;
- SourceData2.Open(SourceFiles2[F], BasicFile::Mode::kRead);
- Infos2[F] = ChunkData(SourceData2, 0, SourceData2.FileSize(), Param);
- });
- }
- WorkLatch.CountDown();
- WorkLatch.Wait();
- uint64_t ChunkTimeMS = Timer.GetElapsedTimeMs();
-
- uint64_t Raw1Size = 0;
- tsl::robin_set<IoHash> Chunks1;
- size_t ChunkedSize1 = 0;
- for (size_t F = 0; F < SourceFiles1.size(); F++)
- {
- const ChunkedInfoWithSource& Info = Infos1[F];
- Raw1Size += Info.Info.RawSize;
- for (uint32_t Chunk1Index = 0; Chunk1Index < Info.Info.ChunkHashes.size(); ++Chunk1Index)
- {
- const IoHash ChunkHash = Info.Info.ChunkHashes[Chunk1Index];
- if (Chunks1.insert(ChunkHash).second)
- {
- ChunkedSize1 += Info.ChunkSources[Chunk1Index].Size;
- }
- }
- }
-
- uint64_t Raw2Size = 0;
- tsl::robin_set<IoHash> Chunks2;
- size_t ChunkedSize2 = 0;
- size_t DiffSize = 0;
- for (size_t F = 0; F < SourceFiles2.size(); F++)
- {
- const ChunkedInfoWithSource& Info = Infos2[F];
- Raw2Size += Info.Info.RawSize;
- for (uint32_t Chunk2Index = 0; Chunk2Index < Info.Info.ChunkHashes.size(); ++Chunk2Index)
- {
- const IoHash ChunkHash = Info.Info.ChunkHashes[Chunk2Index];
- if (Chunks2.insert(ChunkHash).second)
- {
- ChunkedSize2 += Info.ChunkSources[Chunk2Index].Size;
- if (!Chunks1.contains(ChunkHash))
- {
- DiffSize += Info.ChunkSources[Chunk2Index].Size;
- }
- }
- }
- }
-
- ZEN_INFO(
- "Diff = {}, Chunks1 = {}, Chunks2 = {}, .UseThreshold = {}, .MinSize = {}, .MaxSize = {}, .AvgSize = {}, RawSize(1) = {}, "
- "RawSize(2) = {}, "
- "Saved(1) = {}, Saved(2) = {} in {}",
- NiceBytes(DiffSize),
- Chunks1.size(),
- Chunks2.size(),
- Param.UseThreshold,
- Param.MinSize,
- Param.MaxSize,
- Param.AvgSize,
- NiceBytes(Raw1Size),
- NiceBytes(Raw2Size),
- NiceBytes(Raw1Size - ChunkedSize1),
- NiceBytes(Raw2Size - ChunkedSize2),
- NiceTimeSpanMs(ChunkTimeMS));
- }
- }
-
-# if 0
- for (int64_t MinSizeBase = (12u * 1024u); MinSizeBase <= (32u * 1024u); MinSizeBase += 512)
- {
- for (int64_t Wiggle = -132; Wiggle < 126; Wiggle += 2)
- {
- // size_t MinSize = 7 * 1024 - 61; // (size_t)(MinSizeBase + Wiggle);
- // size_t MaxSize = 16 * (7 * 1024); // 8 * 7 * 1024;// MinSizeBase * 6;
- // size_t AvgSize = MaxSize / 2; // 4 * 7 * 1024;// MinSizeBase * 3;
- size_t MinSize = (size_t)(MinSizeBase + Wiggle);
- //for (size_t MaxSize = (MinSize * 4) - 768; MaxSize < (MinSize * 5) + 768; MaxSize += 64)
- size_t MaxSize = 8u * MinSizeBase;
- {
- for (size_t AvgSize = (MaxSize - MinSize) / 32 + MinSize; AvgSize < (MaxSize - MinSize) / 4 + MinSize; AvgSize += (MaxSize - MinSize) / 32)
-// size_t AvgSize = (MaxSize - MinSize) / 4 + MinSize;
- {
- WorkLatch.AddCount(1);
- WorkerPool.ScheduleWork([&WorkLatch, MinSize, MaxSize, AvgSize, SourcePath1, SourcePath2]()
- {
- auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); });
- ChunkedParams Params{ .UseThreshold = true, .MinSize = MinSize, .MaxSize = MaxSize, .AvgSize = AvgSize };
- BasicFile SourceData1;
- SourceData1.Open(SourcePath1, BasicFile::Mode::kRead);
- BasicFile SourceData2;
- SourceData2.Open(SourcePath2, BasicFile::Mode::kRead);
- ChunkedInfoWithSource Info1 = ChunkData(SourceData1, Params);
- ChunkedInfoWithSource Info2 = ChunkData(SourceData2, Params);
-
- tsl::robin_set<IoHash> Chunks1;
- Chunks1.reserve(Info1.Info.ChunkHashes.size());
- Chunks1.insert(Info1.Info.ChunkHashes.begin(), Info1.Info.ChunkHashes.end());
- size_t ChunkedSize1 = 0;
- for (uint32_t Chunk1Index = 0; Chunk1Index < Info1.Info.ChunkHashes.size(); ++Chunk1Index)
- {
- ChunkedSize1 += Info1.ChunkSources[Chunk1Index].Size;
- }
- size_t DiffSavedSize = 0;
- size_t ChunkedSize2 = 0;
- for (uint32_t Chunk2Index = 0; Chunk2Index < Info2.Info.ChunkHashes.size(); ++Chunk2Index)
- {
- ChunkedSize2 += Info2.ChunkSources[Chunk2Index].Size;
- if (Chunks1.find(Info2.Info.ChunkHashes[Chunk2Index]) == Chunks1.end())
- {
- DiffSavedSize += Info2.ChunkSources[Chunk2Index].Size;
- }
- }
- ZEN_INFO("Diff {}, Chunks1: {}, Chunks2: {}, Min: {}, Max: {}, Avg: {}, Saved(1) {}, Saved(2) {}",
- NiceBytes(DiffSavedSize),
- Info1.Info.ChunkHashes.size(),
- Info2.Info.ChunkHashes.size(),
- MinSize,
- MaxSize,
- AvgSize,
- NiceBytes(Info1.Info.RawSize - ChunkedSize1),
- NiceBytes(Info2.Info.RawSize - ChunkedSize2));
- });
- }
- }
- }
- }
-# endif // 0
-
- // WorkLatch.CountDown();
- // WorkLatch.Wait();
-}
-# endif // 0
-
-void
-chunkedfile_forcelink()
-{
-}
-
-} // namespace zen
-
-#endif
diff --git a/src/zenstore/chunking.cpp b/src/zenstore/chunking.cpp
deleted file mode 100644
index 71f0a06e4..000000000
--- a/src/zenstore/chunking.cpp
+++ /dev/null
@@ -1,383 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#include "chunking.h"
-
-#include <gsl/gsl-lite.hpp>
-
-#include <cmath>
-#include <cstring>
-
-namespace zen::detail {
-
-static const uint32_t BuzhashTable[] = {
- 0x458be752, 0xc10748cc, 0xfbbcdbb8, 0x6ded5b68, 0xb10a82b5, 0x20d75648, 0xdfc5665f, 0xa8428801, 0x7ebf5191, 0x841135c7, 0x65cc53b3,
- 0x280a597c, 0x16f60255, 0xc78cbc3e, 0x294415f5, 0xb938d494, 0xec85c4e6, 0xb7d33edc, 0xe549b544, 0xfdeda5aa, 0x882bf287, 0x3116737c,
- 0x05569956, 0xe8cc1f68, 0x0806ac5e, 0x22a14443, 0x15297e10, 0x50d090e7, 0x4ba60f6f, 0xefd9f1a7, 0x5c5c885c, 0x82482f93, 0x9bfd7c64,
- 0x0b3e7276, 0xf2688e77, 0x8fad8abc, 0xb0509568, 0xf1ada29f, 0xa53efdfe, 0xcb2b1d00, 0xf2a9e986, 0x6463432b, 0x95094051, 0x5a223ad2,
- 0x9be8401b, 0x61e579cb, 0x1a556a14, 0x5840fdc2, 0x9261ddf6, 0xcde002bb, 0x52432bb0, 0xbf17373e, 0x7b7c222f, 0x2955ed16, 0x9f10ca59,
- 0xe840c4c9, 0xccabd806, 0x14543f34, 0x1462417a, 0x0d4a1f9c, 0x087ed925, 0xd7f8f24c, 0x7338c425, 0xcf86c8f5, 0xb19165cd, 0x9891c393,
- 0x325384ac, 0x0308459d, 0x86141d7e, 0xc922116a, 0xe2ffa6b6, 0x53f52aed, 0x2cd86197, 0xf5b9f498, 0xbf319c8f, 0xe0411fae, 0x977eb18c,
- 0xd8770976, 0x9833466a, 0xc674df7f, 0x8c297d45, 0x8ca48d26, 0xc49ed8e2, 0x7344f874, 0x556f79c7, 0x6b25eaed, 0xa03e2b42, 0xf68f66a4,
- 0x8e8b09a2, 0xf2e0e62a, 0x0d3a9806, 0x9729e493, 0x8c72b0fc, 0x160b94f6, 0x450e4d3d, 0x7a320e85, 0xbef8f0e1, 0x21d73653, 0x4e3d977a,
- 0x1e7b3929, 0x1cc6c719, 0xbe478d53, 0x8d752809, 0xe6d8c2c6, 0x275f0892, 0xc8acc273, 0x4cc21580, 0xecc4a617, 0xf5f7be70, 0xe795248a,
- 0x375a2fe9, 0x425570b6, 0x8898dcf8, 0xdc2d97c4, 0x0106114b, 0x364dc22f, 0x1e0cad1f, 0xbe63803c, 0x5f69fac2, 0x4d5afa6f, 0x1bc0dfb5,
- 0xfb273589, 0x0ea47f7b, 0x3c1c2b50, 0x21b2a932, 0x6b1223fd, 0x2fe706a8, 0xf9bd6ce2, 0xa268e64e, 0xe987f486, 0x3eacf563, 0x1ca2018c,
- 0x65e18228, 0x2207360a, 0x57cf1715, 0x34c37d2b, 0x1f8f3cde, 0x93b657cf, 0x31a019fd, 0xe69eb729, 0x8bca7b9b, 0x4c9d5bed, 0x277ebeaf,
- 0xe0d8f8ae, 0xd150821c, 0x31381871, 0xafc3f1b0, 0x927db328, 0xe95effac, 0x305a47bd, 0x426ba35b, 0x1233af3f, 0x686a5b83, 0x50e072e5,
- 0xd9d3bb2a, 0x8befc475, 0x487f0de6, 0xc88dff89, 0xbd664d5e, 0x971b5d18, 0x63b14847, 0xd7d3c1ce, 0x7f583cf3, 0x72cbcb09, 0xc0d0a81c,
- 0x7fa3429b, 0xe9158a1b, 0x225ea19a, 0xd8ca9ea3, 0xc763b282, 0xbb0c6341, 0x020b8293, 0xd4cd299d, 0x58cfa7f8, 0x91b4ee53, 0x37e4d140,
- 0x95ec764c, 0x30f76b06, 0x5ee68d24, 0x679c8661, 0xa41979c2, 0xf2b61284, 0x4fac1475, 0x0adb49f9, 0x19727a23, 0x15a7e374, 0xc43a18d5,
- 0x3fb1aa73, 0x342fc615, 0x924c0793, 0xbee2d7f0, 0x8a279de9, 0x4aa2d70c, 0xe24dd37f, 0xbe862c0b, 0x177c22c2, 0x5388e5ee, 0xcd8a7510,
- 0xf901b4fd, 0xdbc13dbc, 0x6c0bae5b, 0x64efe8c7, 0x48b02079, 0x80331a49, 0xca3d8ae6, 0xf3546190, 0xfed7108b, 0xc49b941b, 0x32baf4a9,
- 0xeb833a4a, 0x88a3f1a5, 0x3a91ce0a, 0x3cc27da1, 0x7112e684, 0x4a3096b1, 0x3794574c, 0xa3c8b6f3, 0x1d213941, 0x6e0a2e00, 0x233479f1,
- 0x0f4cd82f, 0x6093edd2, 0x5d7d209e, 0x464fe319, 0xd4dcac9e, 0x0db845cb, 0xfb5e4bc3, 0xe0256ce1, 0x09fb4ed1, 0x0914be1e, 0xa5bdb2c3,
- 0xc6eb57bb, 0x30320350, 0x3f397e91, 0xa67791bc, 0x86bc0e2c, 0xefa0a7e2, 0xe9ff7543, 0xe733612c, 0xd185897b, 0x329e5388, 0x91dd236b,
- 0x2ecb0d93, 0xf4d82a3d, 0x35b5c03f, 0xe4e606f0, 0x05b21843, 0x37b45964, 0x5eff22f4, 0x6027f4cc, 0x77178b3c, 0xae507131, 0x7bf7cabc,
- 0xf9c18d66, 0x593ade65, 0xd95ddf11,
-};
-
-// ROL operation (compiler turns this into a ROL when optimizing)
-ZEN_FORCEINLINE static uint32_t
-Rotate32(uint32_t Value, size_t RotateCount)
-{
- RotateCount &= 31;
-
- return ((Value) << (RotateCount)) | ((Value) >> (32 - RotateCount));
-}
-
-} // namespace zen::detail
-
-namespace zen {
-
-void
-ZenChunkHelper::Reset()
-{
- InternalReset();
-
- m_BytesScanned = 0;
-}
-
-void
-ZenChunkHelper::InternalReset()
-{
- m_CurrentHash = 0;
- m_CurrentChunkSize = 0;
- m_WindowSize = 0;
-}
-
-void
-ZenChunkHelper::SetChunkSize(size_t MinSize, size_t MaxSize, size_t AvgSize)
-{
- if (m_WindowSize)
- return; // Already started
-
- static_assert(kChunkSizeLimitMin > kWindowSize);
-
- if (AvgSize)
- {
- // TODO: Validate AvgSize range
- }
- else
- {
- if (MinSize && MaxSize)
- {
- AvgSize = std::lrint(std::pow(2, (std::log2(MinSize) + std::log2(MaxSize)) / 2));
- }
- else if (MinSize)
- {
- AvgSize = MinSize * 4;
- }
- else if (MaxSize)
- {
- AvgSize = MaxSize / 4;
- }
- else
- {
- AvgSize = kDefaultAverageChunkSize;
- }
- }
-
- if (MinSize)
- {
- // TODO: Validate MinSize range
- }
- else
- {
- MinSize = std::max(AvgSize / 4, kChunkSizeLimitMin);
- }
-
- if (MaxSize)
- {
- // TODO: Validate MaxSize range
- }
- else
- {
- MaxSize = std::min(AvgSize * 4, kChunkSizeLimitMax);
- }
-
- m_Discriminator = gsl::narrow<uint32_t>(AvgSize - MinSize);
-
- if (m_Discriminator < MinSize)
- {
- m_Discriminator = gsl::narrow<uint32_t>(MinSize);
- }
-
- if (m_Discriminator > MaxSize)
- {
- m_Discriminator = gsl::narrow<uint32_t>(MaxSize);
- }
-
- m_Threshold = gsl::narrow<uint32_t>((uint64_t(std::numeric_limits<uint32_t>::max()) + 1) / m_Discriminator);
-
- m_ChunkSizeMin = MinSize;
- m_ChunkSizeMax = MaxSize;
- m_ChunkSizeAvg = AvgSize;
-}
-
-size_t
-ZenChunkHelper::ScanChunk(const void* DataBytesIn, size_t ByteCount)
-{
- size_t Result = InternalScanChunk(DataBytesIn, ByteCount);
-
- if (Result == kNoBoundaryFound)
- {
- m_BytesScanned += ByteCount;
- }
- else
- {
- m_BytesScanned += Result;
- }
-
- return Result;
-}
-
-size_t
-ZenChunkHelper::InternalScanChunk(const void* DataBytesIn, size_t ByteCount)
-{
- size_t CurrentOffset = 0;
- const uint8_t* CursorPtr = reinterpret_cast<const uint8_t*>(DataBytesIn);
-
- // There's no point in updating the hash if we know we're not
- // going to have a cut point, so just skip the data. This logic currently
- // provides roughly a 20% speedup on my machine
-
- const size_t NeedHashOffset = m_ChunkSizeMin - kWindowSize;
-
- if (m_CurrentChunkSize < NeedHashOffset)
- {
- const uint32_t SkipBytes = gsl::narrow<uint32_t>(std::min<uint64_t>(ByteCount, NeedHashOffset - m_CurrentChunkSize));
-
- ByteCount -= SkipBytes;
- m_CurrentChunkSize += SkipBytes;
- CurrentOffset += SkipBytes;
- CursorPtr += SkipBytes;
-
- m_WindowSize = 0;
-
- if (ByteCount == 0)
- {
- return kNoBoundaryFound;
- }
- }
-
- // Fill window first
-
- if (m_WindowSize < kWindowSize)
- {
- const uint32_t FillBytes = uint32_t(std::min<size_t>(ByteCount, kWindowSize - m_WindowSize));
-
- memcpy(&m_Window[m_WindowSize], CursorPtr, FillBytes);
-
- CursorPtr += FillBytes;
-
- m_WindowSize += FillBytes;
- m_CurrentChunkSize += FillBytes;
-
- CurrentOffset += FillBytes;
- ByteCount -= FillBytes;
-
- if (m_WindowSize < kWindowSize)
- {
- return kNoBoundaryFound;
- }
-
- // We have a full window, initialize hash
-
- uint32_t CurrentHash = 0;
-
- for (int i = 1; i < kWindowSize; ++i)
- {
- CurrentHash ^= detail::Rotate32(detail::BuzhashTable[m_Window[i - 1]], kWindowSize - i);
- }
-
- m_CurrentHash = CurrentHash ^ detail::BuzhashTable[m_Window[kWindowSize - 1]];
- }
-
- // Scan for boundaries (i.e points where the hash matches the value determined by
- // the discriminator)
-
- uint32_t CurrentHash = m_CurrentHash;
- uint32_t CurrentChunkSize = m_CurrentChunkSize;
-
- size_t Index = CurrentChunkSize % kWindowSize;
-
- if (m_Threshold && m_UseThreshold)
- {
- // This is roughly 4x faster than the general modulo approach on my
- // TR 3990X (~940MB/sec) and doesn't require any special parameters to
- // achieve max performance
-
- while (ByteCount)
- {
- const uint8_t NewByte = *CursorPtr;
- const uint8_t OldByte = m_Window[Index];
-
- CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^
- detail::BuzhashTable[NewByte];
-
- CurrentChunkSize++;
- CurrentOffset++;
-
- if (CurrentChunkSize >= m_ChunkSizeMin)
- {
- bool FoundBoundary;
-
- if (CurrentChunkSize >= m_ChunkSizeMax)
- {
- FoundBoundary = true;
- }
- else
- {
- FoundBoundary = CurrentHash <= m_Threshold;
- }
-
- if (FoundBoundary)
- {
- // Boundary found!
- InternalReset();
-
- return CurrentOffset;
- }
- }
-
- m_Window[Index++] = *CursorPtr;
-
- if (Index == kWindowSize)
- {
- Index = 0;
- }
-
- ++CursorPtr;
- --ByteCount;
- }
- }
- else if ((m_Discriminator & (m_Discriminator - 1)) == 0)
- {
- // This is quite a bit faster than the generic modulo path, but
- // requires a very specific average chunk size to be used. If you
- // pass in an even power-of-two divided by 0.75 as the average
- // chunk size you'll hit this path
-
- const uint32_t Mask = m_Discriminator - 1;
-
- while (ByteCount)
- {
- const uint8_t NewByte = *CursorPtr;
- const uint8_t OldByte = m_Window[Index];
-
- CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^
- detail::BuzhashTable[NewByte];
-
- CurrentChunkSize++;
- CurrentOffset++;
-
- if (CurrentChunkSize >= m_ChunkSizeMin)
- {
- bool FoundBoundary;
-
- if (CurrentChunkSize >= m_ChunkSizeMax)
- {
- FoundBoundary = true;
- }
- else
- {
- FoundBoundary = (CurrentHash & Mask) == Mask;
- }
-
- if (FoundBoundary)
- {
- // Boundary found!
- InternalReset();
-
- return CurrentOffset;
- }
- }
-
- m_Window[Index++] = *CursorPtr;
-
- if (Index == kWindowSize)
- {
- Index = 0;
- }
-
- ++CursorPtr;
- --ByteCount;
- }
- }
- else
- {
- // This is the slowest path, which caps out around 250MB/sec for large sizes
- // on my TR3900X
-
- while (ByteCount)
- {
- const uint8_t NewByte = *CursorPtr;
- const uint8_t OldByte = m_Window[Index];
-
- CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^
- detail::BuzhashTable[NewByte];
-
- CurrentChunkSize++;
- CurrentOffset++;
-
- if (CurrentChunkSize >= m_ChunkSizeMin)
- {
- bool FoundBoundary;
-
- if (CurrentChunkSize >= m_ChunkSizeMax)
- {
- FoundBoundary = true;
- }
- else
- {
- FoundBoundary = (CurrentHash % m_Discriminator) == (m_Discriminator - 1);
- }
-
- if (FoundBoundary)
- {
- // Boundary found!
- InternalReset();
-
- return CurrentOffset;
- }
- }
-
- m_Window[Index++] = *CursorPtr;
-
- if (Index == kWindowSize)
- {
- Index = 0;
- }
-
- ++CursorPtr;
- --ByteCount;
- }
- }
-
- m_CurrentChunkSize = CurrentChunkSize;
- m_CurrentHash = CurrentHash;
-
- return kNoBoundaryFound;
-}
-
-} // namespace zen
diff --git a/src/zenstore/chunking.h b/src/zenstore/chunking.h
deleted file mode 100644
index 09c56454f..000000000
--- a/src/zenstore/chunking.h
+++ /dev/null
@@ -1,56 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#pragma once
-#include <zencore/zencore.h>
-
-namespace zen {
-
-/** Content-defined chunking helper
- */
-class ZenChunkHelper
-{
-public:
- void SetChunkSize(size_t MinSize, size_t MaxSize, size_t AvgSize);
- size_t ScanChunk(const void* DataBytes, size_t ByteCount);
- void Reset();
-
- // This controls which chunking approach is used - threshold or
- // modulo based. Threshold is faster and generates similarly sized
- // chunks
- void SetUseThreshold(bool NewState) { m_UseThreshold = NewState; }
-
- inline size_t ChunkSizeMin() const { return m_ChunkSizeMin; }
- inline size_t ChunkSizeMax() const { return m_ChunkSizeMax; }
- inline size_t ChunkSizeAvg() const { return m_ChunkSizeAvg; }
- inline uint64_t BytesScanned() const { return m_BytesScanned; }
-
- static constexpr size_t kNoBoundaryFound = size_t(~0ull);
-
-private:
- size_t m_ChunkSizeMin = 0;
- size_t m_ChunkSizeMax = 0;
- size_t m_ChunkSizeAvg = 0;
-
- uint32_t m_Discriminator = 0; // Computed in SetChunkSize()
- uint32_t m_Threshold = 0; // Computed in SetChunkSize()
-
- bool m_UseThreshold = true;
-
- static constexpr size_t kChunkSizeLimitMax = 64 * 1024 * 1024;
- static constexpr size_t kChunkSizeLimitMin = 1024;
- static constexpr size_t kDefaultAverageChunkSize = 64 * 1024;
-
- static constexpr int kWindowSize = 48;
- uint8_t m_Window[kWindowSize];
- uint32_t m_WindowSize = 0;
-
- uint32_t m_CurrentHash = 0;
- uint32_t m_CurrentChunkSize = 0;
-
- uint64_t m_BytesScanned = 0;
-
- size_t InternalScanChunk(const void* DataBytes, size_t ByteCount);
- void InternalReset();
-};
-
-} // namespace zen
diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp
index 14123528c..34db51aa9 100644
--- a/src/zenstore/filecas.cpp
+++ b/src/zenstore/filecas.cpp
@@ -185,7 +185,7 @@ FileCasStrategy::Initialize(const std::filesystem::path& RootDirectory, bool IsN
// in this folder as well
struct Visitor : public FileSystemTraversal::TreeVisitor
{
- virtual void VisitFile(const std::filesystem::path&, const path_view&, uint64_t, uint32_t) override
+ virtual void VisitFile(const std::filesystem::path&, const path_view&, uint64_t, uint32_t, uint64_t) override
{
// We don't care about files
}
@@ -1174,7 +1174,7 @@ FileCasStrategy::ScanFolderForCasFiles(const std::filesystem::path& RootDir)
struct Visitor : public FileSystemTraversal::TreeVisitor
{
Visitor(const std::filesystem::path& RootDir, std::vector<FileCasIndexEntry>& Entries) : RootDirectory(RootDir), Entries(Entries) {}
- virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t) override
+ virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t, uint64_t) override
{
std::filesystem::path RelPath = std::filesystem::relative(Parent, RootDirectory);
diff --git a/src/zenstore/include/zenstore/cache/cachedisklayer.h b/src/zenstore/include/zenstore/cache/cachedisklayer.h
index b0b4f22cb..05400c784 100644
--- a/src/zenstore/include/zenstore/cache/cachedisklayer.h
+++ b/src/zenstore/include/zenstore/cache/cachedisklayer.h
@@ -12,8 +12,9 @@ ZEN_THIRD_PARTY_INCLUDES_START
#include <tsl/robin_map.h>
ZEN_THIRD_PARTY_INCLUDES_END
+#include <EASTL/string.h>
+#include <EASTL/unordered_map.h>
#include <filesystem>
-#include <unordered_map>
namespace zen {
@@ -169,7 +170,7 @@ public:
~ZenCacheDiskLayer();
struct GetBatchHandle;
- GetBatchHandle* BeginGetBatch(std::vector<ZenCacheValue>& OutResult);
+ GetBatchHandle* BeginGetBatch(ZenCacheValueVec_t& OutResult);
void EndGetBatch(GetBatchHandle* Batch) noexcept;
bool Get(std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue);
void Get(std::string_view Bucket, const IoHash& HashKey, GetBatchHandle& BatchHandle);
@@ -216,13 +217,16 @@ public:
*/
struct CacheBucket : public GcReferencer
{
- CacheBucket(GcManager& Gc, std::atomic_uint64_t& OuterCacheMemoryUsage, std::string BucketName, const BucketConfiguration& Config);
+ CacheBucket(GcManager& Gc,
+ std::atomic_uint64_t& OuterCacheMemoryUsage,
+ std::string_view BucketName,
+ const BucketConfiguration& Config);
~CacheBucket();
bool OpenOrCreate(std::filesystem::path BucketDir, bool AllowCreate = true);
struct GetBatchHandle;
- GetBatchHandle* BeginGetBatch(std::vector<ZenCacheValue>& OutResult);
+ GetBatchHandle* BeginGetBatch(ZenCacheValueVec_t& OutResult);
void EndGetBatch(GetBatchHandle* Batch) noexcept;
bool Get(const IoHash& HashKey, ZenCacheValue& OutValue);
void Get(const IoHash& HashKey, GetBatchHandle& BatchHandle);
@@ -486,18 +490,20 @@ private:
bool StartAsyncMemCacheTrim();
void MemCacheTrim();
- GcManager& m_Gc;
- JobQueue& m_JobQueue;
- std::filesystem::path m_RootDir;
- Configuration m_Configuration;
- std::atomic_uint64_t m_TotalMemCachedSize{};
- std::atomic_bool m_IsMemCacheTrimming = false;
- std::atomic<GcClock::Tick> m_NextAllowedTrimTick;
- mutable RwLock m_Lock;
- std::unordered_map<std::string, std::unique_ptr<CacheBucket>> m_Buckets;
- std::vector<std::unique_ptr<CacheBucket>> m_DroppedBuckets;
- uint32_t m_UpdateCaptureRefCounter = 0;
- std::unique_ptr<std::vector<std::string>> m_CapturedBuckets;
+ typedef eastl::unordered_map<std::string, std::unique_ptr<CacheBucket>, std::hash<std::string>, std::equal_to<std::string>> BucketMap_t;
+
+ GcManager& m_Gc;
+ JobQueue& m_JobQueue;
+ std::filesystem::path m_RootDir;
+ Configuration m_Configuration;
+ std::atomic_uint64_t m_TotalMemCachedSize{};
+ std::atomic_bool m_IsMemCacheTrimming = false;
+ std::atomic<GcClock::Tick> m_NextAllowedTrimTick;
+ mutable RwLock m_Lock;
+ BucketMap_t m_Buckets;
+ std::vector<std::unique_ptr<CacheBucket>> m_DroppedBuckets;
+ uint32_t m_UpdateCaptureRefCounter = 0;
+ std::unique_ptr<std::vector<std::string>> m_CapturedBuckets;
ZenCacheDiskLayer(const ZenCacheDiskLayer&) = delete;
ZenCacheDiskLayer& operator=(const ZenCacheDiskLayer&) = delete;
diff --git a/src/zenstore/include/zenstore/cache/cacheshared.h b/src/zenstore/include/zenstore/cache/cacheshared.h
index 9b45c7b21..521c78bb1 100644
--- a/src/zenstore/include/zenstore/cache/cacheshared.h
+++ b/src/zenstore/include/zenstore/cache/cacheshared.h
@@ -6,6 +6,8 @@
#include <zencore/iohash.h>
#include <zenstore/gc.h>
+#include <EASTL/fixed_vector.h>
+
#include <gsl/gsl-lite.hpp>
#include <unordered_map>
@@ -32,6 +34,8 @@ struct ZenCacheValue
IoHash RawHash = IoHash::Zero;
};
+typedef eastl::fixed_vector<ZenCacheValue, 16> ZenCacheValueVec_t;
+
struct CacheValueDetails
{
struct ValueDetails
diff --git a/src/zenstore/include/zenstore/cache/structuredcachestore.h b/src/zenstore/include/zenstore/cache/structuredcachestore.h
index 82fec9b0e..5e056cf2d 100644
--- a/src/zenstore/include/zenstore/cache/structuredcachestore.h
+++ b/src/zenstore/include/zenstore/cache/structuredcachestore.h
@@ -86,7 +86,7 @@ public:
void EndPutBatch(PutBatchHandle* Batch) noexcept;
struct GetBatchHandle;
- GetBatchHandle* BeginGetBatch(std::vector<ZenCacheValue>& OutResults);
+ GetBatchHandle* BeginGetBatch(ZenCacheValueVec_t& OutResults);
void EndGetBatch(GetBatchHandle* Batch) noexcept;
bool Get(std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue);
@@ -220,14 +220,14 @@ public:
class GetBatch
{
public:
- GetBatch(ZenCacheStore& CacheStore, std::string_view Namespace, std::vector<ZenCacheValue>& OutResult);
+ GetBatch(ZenCacheStore& CacheStore, std::string_view Namespace, ZenCacheValueVec_t& OutResult);
~GetBatch();
private:
ZenCacheStore& m_CacheStore;
ZenCacheNamespace* m_Store = nullptr;
ZenCacheNamespace::GetBatchHandle* m_NamespaceBatchHandle = nullptr;
- std::vector<ZenCacheValue>& Results;
+ ZenCacheValueVec_t& Results;
friend class ZenCacheStore;
};
diff --git a/src/zenstore/include/zenstore/chunkedfile.h b/src/zenstore/include/zenstore/chunkedfile.h
deleted file mode 100644
index c6330bdbd..000000000
--- a/src/zenstore/include/zenstore/chunkedfile.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#pragma once
-
-#include <zencore/iobuffer.h>
-#include <zencore/iohash.h>
-#include <zencore/zencore.h>
-
-#include <functional>
-#include <vector>
-
-namespace zen {
-
-class BasicFile;
-
-struct ChunkedInfo
-{
- uint64_t RawSize = 0;
- IoHash RawHash;
- std::vector<uint32_t> ChunkSequence;
- std::vector<IoHash> ChunkHashes;
-};
-
-struct ChunkSource
-{
- uint64_t Offset; // 8
- uint32_t Size; // 4
-};
-
-struct ChunkedInfoWithSource
-{
- ChunkedInfo Info;
- std::vector<ChunkSource> ChunkSources;
-};
-
-struct ChunkedParams
-{
- bool UseThreshold = true;
- size_t MinSize = (2u * 1024u) - 128u;
- size_t MaxSize = (16u * 1024u);
- size_t AvgSize = (3u * 1024u);
-};
-
-static const ChunkedParams UShaderByteCodeParams = {.UseThreshold = true, .MinSize = 17280, .MaxSize = 139264, .AvgSize = 36340};
-
-ChunkedInfoWithSource ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Params = {});
-void Reconstruct(const ChunkedInfo& Info,
- const std::filesystem::path& TargetPath,
- std::function<IoBuffer(const IoHash& ChunkHash)> GetChunk);
-IoBuffer SerializeChunkedInfo(const ChunkedInfo& Info);
-ChunkedInfo DeserializeChunkedInfo(IoBuffer& Buffer);
-
-void chunkedfile_forcelink();
-} // namespace zen
diff --git a/src/zenstore/xmake.lua b/src/zenstore/xmake.lua
index f0bd64d2e..031a66829 100644
--- a/src/zenstore/xmake.lua
+++ b/src/zenstore/xmake.lua
@@ -8,3 +8,4 @@ target('zenstore')
add_includedirs("include", {public=true})
add_deps("zencore", "zenutil")
add_packages("vcpkg::robin-map")
+ add_packages("vcpkg::eastl", {public=true});