diff options
| author | Dan Engelbrecht <[email protected]> | 2025-03-14 09:50:00 +0100 |
|---|---|---|
| committer | Dan Engelbrecht <[email protected]> | 2025-03-14 09:50:00 +0100 |
| commit | 55c67aec301cfc99178ab54c6366cbc88f35d46a (patch) | |
| tree | 84b4c73220f7dd041763b6d1919eedc8d0b90844 /src/zenstore | |
| parent | Merge remote-tracking branch 'origin/de/zen-service-command' into de/zen-serv... (diff) | |
| parent | fix quoted command lines arguments (#306) (diff) | |
| download | zen-55c67aec301cfc99178ab54c6366cbc88f35d46a.tar.xz zen-55c67aec301cfc99178ab54c6366cbc88f35d46a.zip | |
Merge remote-tracking branch 'origin/main' into de/zen-service-command
Diffstat (limited to 'src/zenstore')
| -rw-r--r-- | src/zenstore/cache/cachedisklayer.cpp | 121 | ||||
| -rw-r--r-- | src/zenstore/cache/cacherpc.cpp | 73 | ||||
| -rw-r--r-- | src/zenstore/cache/structuredcachestore.cpp | 8 | ||||
| -rw-r--r-- | src/zenstore/chunkedfile.cpp | 505 | ||||
| -rw-r--r-- | src/zenstore/chunking.cpp | 383 | ||||
| -rw-r--r-- | src/zenstore/chunking.h | 56 | ||||
| -rw-r--r-- | src/zenstore/filecas.cpp | 4 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/cache/cachedisklayer.h | 38 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/cache/cacheshared.h | 4 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/cache/structuredcachestore.h | 6 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/chunkedfile.h | 54 | ||||
| -rw-r--r-- | src/zenstore/xmake.lua | 1 |
12 files changed, 145 insertions, 1108 deletions
diff --git a/src/zenstore/cache/cachedisklayer.cpp b/src/zenstore/cache/cachedisklayer.cpp index 25f68330a..61552fafc 100644 --- a/src/zenstore/cache/cachedisklayer.cpp +++ b/src/zenstore/cache/cachedisklayer.cpp @@ -708,11 +708,11 @@ namespace zen { ZenCacheDiskLayer::CacheBucket::CacheBucket(GcManager& Gc, std::atomic_uint64_t& OuterCacheMemoryUsage, - std::string BucketName, + std::string_view BucketName, const BucketConfiguration& Config) : m_Gc(Gc) , m_OuterCacheMemoryUsage(OuterCacheMemoryUsage) -, m_BucketName(std::move(BucketName)) +, m_BucketName(BucketName) , m_Configuration(Config) , m_BucketId(Oid::Zero) { @@ -1329,7 +1329,7 @@ ZenCacheDiskLayer::CacheBucket::EndPutBatch(PutBatchHandle* Batch) noexcept struct ZenCacheDiskLayer::CacheBucket::GetBatchHandle { - GetBatchHandle(std::vector<ZenCacheValue>& OutResults) : OutResults(OutResults) + GetBatchHandle(ZenCacheValueVec_t& OutResults) : OutResults(OutResults) { Keys.reserve(OutResults.capacity()); ResultIndexes.reserve(OutResults.capacity()); @@ -1340,11 +1340,11 @@ struct ZenCacheDiskLayer::CacheBucket::GetBatchHandle std::vector<IoHash> Keys; std::vector<size_t> ResultIndexes; - std::vector<ZenCacheValue>& OutResults; + ZenCacheValueVec_t& OutResults; }; ZenCacheDiskLayer::CacheBucket::GetBatchHandle* -ZenCacheDiskLayer::CacheBucket::BeginGetBatch(std::vector<ZenCacheValue>& OutResult) +ZenCacheDiskLayer::CacheBucket::BeginGetBatch(ZenCacheValueVec_t& OutResult) { ZEN_TRACE_CPU("Z$::Bucket::BeginGetBatch"); return new GetBatchHandle(OutResult); @@ -1364,13 +1364,13 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept if (!Batch->ResultIndexes.empty()) { - std::vector<DiskLocation> StandaloneDiskLocations; - std::vector<size_t> StandaloneKeyIndexes; - std::vector<size_t> MemCachedKeyIndexes; - std::vector<DiskLocation> InlineDiskLocations; - std::vector<BlockStoreLocation> InlineBlockLocations; - std::vector<size_t> InlineKeyIndexes; - std::vector<bool> FillRawHashAndRawSize(Batch->Keys.size(), false); + eastl::fixed_vector<DiskLocation, 16> StandaloneDiskLocations; + eastl::fixed_vector<size_t, 16> StandaloneKeyIndexes; + eastl::fixed_vector<size_t, 16> MemCachedKeyIndexes; + eastl::fixed_vector<DiskLocation, 16> InlineDiskLocations; + eastl::fixed_vector<BlockStoreLocation, 16> InlineBlockLocations; + eastl::fixed_vector<size_t, 16> InlineKeyIndexes; + eastl::fixed_vector<bool, 16> FillRawHashAndRawSize(Batch->Keys.size(), false); { RwLock::SharedLockScope IndexLock(m_IndexLock); for (size_t KeyIndex = 0; KeyIndex < Batch->Keys.size(); KeyIndex++) @@ -1526,33 +1526,35 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept if (!InlineDiskLocations.empty()) { ZEN_TRACE_CPU("Z$::Bucket::EndGetBatch::ReadInline"); - m_BlockStore.IterateChunks(InlineBlockLocations, [&](uint32_t, std::span<const size_t> ChunkIndexes) -> bool { - // Only read into memory the IoBuffers we could potentially add to memcache - const uint64_t LargeChunkSizeLimit = Max(m_Configuration.MemCacheSizeThreshold, 1u * 1024u); - m_BlockStore.IterateBlock( - InlineBlockLocations, - ChunkIndexes, - [this, &FillOne, &InlineDiskLocations, &InlineKeyIndexes](size_t ChunkIndex, - const void* Data, - uint64_t Size) -> bool { - if (Data != nullptr) - { - FillOne(InlineDiskLocations[ChunkIndex], - InlineKeyIndexes[ChunkIndex], - IoBufferBuilder::MakeCloneFromMemory(Data, Size)); - } - return true; - }, - [this, &FillOne, &InlineDiskLocations, &InlineKeyIndexes](size_t ChunkIndex, - BlockStoreFile& File, - uint64_t Offset, - uint64_t Size) -> bool { - FillOne(InlineDiskLocations[ChunkIndex], InlineKeyIndexes[ChunkIndex], File.GetChunk(Offset, Size)); - return true; - }, - LargeChunkSizeLimit); - return true; - }); + m_BlockStore.IterateChunks( + std::span{begin(InlineBlockLocations), end(InlineBlockLocations)}, + [&](uint32_t, std::span<const size_t> ChunkIndexes) -> bool { + // Only read into memory the IoBuffers we could potentially add to memcache + const uint64_t LargeChunkSizeLimit = Max(m_Configuration.MemCacheSizeThreshold, 1u * 1024u); + m_BlockStore.IterateBlock( + std::span{begin(InlineBlockLocations), end(InlineBlockLocations)}, + ChunkIndexes, + [this, &FillOne, &InlineDiskLocations, &InlineKeyIndexes](size_t ChunkIndex, + const void* Data, + uint64_t Size) -> bool { + if (Data != nullptr) + { + FillOne(InlineDiskLocations[ChunkIndex], + InlineKeyIndexes[ChunkIndex], + IoBufferBuilder::MakeCloneFromMemory(Data, Size)); + } + return true; + }, + [this, &FillOne, &InlineDiskLocations, &InlineKeyIndexes](size_t ChunkIndex, + BlockStoreFile& File, + uint64_t Offset, + uint64_t Size) -> bool { + FillOne(InlineDiskLocations[ChunkIndex], InlineKeyIndexes[ChunkIndex], File.GetChunk(Offset, Size)); + return true; + }, + LargeChunkSizeLimit); + return true; + }); } if (!StandaloneDiskLocations.empty()) @@ -3581,15 +3583,29 @@ ZenCacheDiskLayer::~ZenCacheDiskLayer() } } +template<typename T, typename U> +struct equal_to_2 : public eastl::binary_function<T, U, bool> +{ + constexpr bool operator()(const T& a, const U& b) const { return a == b; } + + template<typename T_ = T, + typename U_ = U, + typename = eastl::enable_if_t<!eastl::is_same_v<eastl::remove_const_t<T_>, eastl::remove_const_t<U_>>>> + constexpr bool operator()(const U& b, const T& a) const + { + return b == a; + } +}; + ZenCacheDiskLayer::CacheBucket* ZenCacheDiskLayer::GetOrCreateBucket(std::string_view InBucket) { ZEN_TRACE_CPU("Z$::GetOrCreateBucket"); - const auto BucketName = std::string(InBucket); { RwLock::SharedLockScope SharedLock(m_Lock); - if (auto It = m_Buckets.find(BucketName); It != m_Buckets.end()) + if (auto It = m_Buckets.find_as(InBucket, std::hash<std::string_view>(), equal_to_2<std::string, std::string_view>()); + It != m_Buckets.end()) { return It->second.get(); } @@ -3597,31 +3613,32 @@ ZenCacheDiskLayer::GetOrCreateBucket(std::string_view InBucket) // We create the bucket without holding a lock since contructor calls GcManager::AddGcReferencer which takes an exclusive lock. // This can cause a deadlock, if GC is running we would block while holding ZenCacheDiskLayer::m_Lock - std::unique_ptr<CacheBucket> Bucket( - std::make_unique<CacheBucket>(m_Gc, m_TotalMemCachedSize, BucketName, m_Configuration.BucketConfig)); + std::unique_ptr<CacheBucket> Bucket(std::make_unique<CacheBucket>(m_Gc, m_TotalMemCachedSize, InBucket, m_Configuration.BucketConfig)); RwLock::ExclusiveLockScope Lock(m_Lock); - if (auto It = m_Buckets.find(BucketName); It != m_Buckets.end()) + if (auto It = m_Buckets.find_as(InBucket, std::hash<std::string_view>(), equal_to_2<std::string, std::string_view>()); + It != m_Buckets.end()) { return It->second.get(); } std::filesystem::path BucketPath = m_RootDir; - BucketPath /= BucketName; + BucketPath /= InBucket; try { if (!Bucket->OpenOrCreate(BucketPath)) { - ZEN_WARN("Found directory '{}' in our base directory '{}' but it is not a valid bucket", BucketName, m_RootDir); + ZEN_WARN("Found directory '{}' in our base directory '{}' but it is not a valid bucket", InBucket, m_RootDir); return nullptr; } } catch (const std::exception& Err) { - ZEN_WARN("Creating bucket '{}' in '{}' FAILED, reason: '{}'", BucketName, BucketPath, Err.what()); + ZEN_WARN("Creating bucket '{}' in '{}' FAILED, reason: '{}'", InBucket, BucketPath, Err.what()); throw; } + std::string BucketName{InBucket}; CacheBucket* Result = Bucket.get(); m_Buckets.emplace(BucketName, std::move(Bucket)); if (m_CapturedBuckets) @@ -3720,7 +3737,7 @@ ZenCacheDiskLayer::EndPutBatch(PutBatchHandle* Batch) noexcept struct ZenCacheDiskLayer::GetBatchHandle { - GetBatchHandle(std::vector<ZenCacheValue>& OutResults) : OutResults(OutResults) {} + GetBatchHandle(ZenCacheValueVec_t& OutResults) : OutResults(OutResults) {} struct BucketHandle { CacheBucket* Bucket; @@ -3780,13 +3797,13 @@ struct ZenCacheDiskLayer::GetBatchHandle return NewBucketHandle; } - RwLock Lock; - std::vector<BucketHandle> BucketHandles; - std::vector<ZenCacheValue>& OutResults; + RwLock Lock; + eastl::fixed_vector<BucketHandle, 4> BucketHandles; + ZenCacheValueVec_t& OutResults; }; ZenCacheDiskLayer::GetBatchHandle* -ZenCacheDiskLayer::BeginGetBatch(std::vector<ZenCacheValue>& OutResults) +ZenCacheDiskLayer::BeginGetBatch(ZenCacheValueVec_t& OutResults) { return new GetBatchHandle(OutResults); } diff --git a/src/zenstore/cache/cacherpc.cpp b/src/zenstore/cache/cacherpc.cpp index cca51e63e..97e26a38d 100644 --- a/src/zenstore/cache/cacherpc.cpp +++ b/src/zenstore/cache/cacherpc.cpp @@ -20,6 +20,8 @@ #include <zencore/memory/llm.h> +#include <EASTL/fixed_vector.h> + ////////////////////////////////////////////////////////////////////////// namespace zen { @@ -89,7 +91,7 @@ GetRpcRequestCacheKey(const CbObjectView& KeyView, CacheKey& Key) return false; } IoHash Hash = HashField.AsHash(); - Key = CacheKey::Create(*Bucket, Hash); + Key = CacheKey::CreateValidated(std::move(*Bucket), Hash); return true; } @@ -305,7 +307,7 @@ CacheRpcHandler::HandleRpcPutCacheRecords(const CacheRequestContext& Context, co } DefaultPolicy = !PolicyText.empty() ? ParseCachePolicy(PolicyText) : CachePolicy::Default; - std::vector<bool> Results; + eastl::fixed_vector<bool, 32> Results; CbArrayView RequestsArray = Params["Requests"sv].AsArrayView(); for (CbFieldView RequestField : RequestsArray) @@ -481,16 +483,15 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb bool Exists = false; bool ReadFromUpstream = false; }; - struct RecordRequestData + struct RecordRequestData : public CacheKeyRequest { - CacheKeyRequest Upstream; - CbObjectView RecordObject; - IoBuffer RecordCacheValue; - CacheRecordPolicy DownstreamPolicy; - std::vector<ValueRequestData> Values; - bool Complete = false; - const UpstreamEndpointInfo* Source = nullptr; - uint64_t ElapsedTimeUs; + CbObjectView RecordObject; + IoBuffer RecordCacheValue; + CacheRecordPolicy DownstreamPolicy; + eastl::fixed_vector<ValueRequestData, 4> Values; + bool Complete = false; + const UpstreamEndpointInfo* Source = nullptr; + uint64_t ElapsedTimeUs; }; std::string_view PolicyText = Params["DefaultPolicy"sv].AsString(); @@ -503,8 +504,8 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb const bool HasUpstream = m_UpstreamCache.IsActive(); - std::vector<RecordRequestData> Requests; - std::vector<size_t> UpstreamIndexes; + eastl::fixed_vector<RecordRequestData, 16> Requests; + eastl::fixed_vector<size_t, 16> UpstreamIndexes; auto ParseValues = [](RecordRequestData& Request) { CbArrayView ValuesArray = Request.RecordObject["Values"sv].AsArrayView(); @@ -535,7 +536,7 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb CbObjectView RequestObject = RequestField.AsObjectView(); CbObjectView KeyObject = RequestObject["Key"sv].AsObjectView(); - CacheKey& Key = Request.Upstream.Key; + CacheKey& Key = Request.Key; if (!GetRpcRequestCacheKey(KeyObject, Key)) { return CbPackage{}; @@ -707,7 +708,7 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb for (size_t Index : UpstreamIndexes) { RecordRequestData& Request = Requests[Index]; - UpstreamRequests.push_back(&Request.Upstream); + UpstreamRequests.push_back(&Request); if (Request.Values.size()) { @@ -721,13 +722,13 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb UpstreamPolicy |= !Value.ReadFromUpstream ? CachePolicy::SkipData : CachePolicy::None; Builder.AddValuePolicy(Value.ValueId, UpstreamPolicy); } - Request.Upstream.Policy = Builder.Build(); + Request.Policy = Builder.Build(); } else { // We don't know which Values exist in the Record; ask the upstrem for all values that the client wants, // and convert the CacheRecordPolicy to an upstream policy - Request.Upstream.Policy = Request.DownstreamPolicy.ConvertToUpstream(); + Request.Policy = Request.DownstreamPolicy.ConvertToUpstream(); } } @@ -737,10 +738,9 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb return; } - RecordRequestData& Request = - *reinterpret_cast<RecordRequestData*>(reinterpret_cast<char*>(&Params.Request) - offsetof(RecordRequestData, Upstream)); + RecordRequestData& Request = *static_cast<RecordRequestData*>(&Params.Request); Request.ElapsedTimeUs += static_cast<uint64_t>(Params.ElapsedSeconds * 1000000.0); - const CacheKey& Key = Request.Upstream.Key; + const CacheKey& Key = Request.Key; Stopwatch Timer; auto TimeGuard = MakeGuard([&Timer, &Request]() { Request.ElapsedTimeUs += Timer.GetElapsedTimeUs(); }); if (!Request.RecordObject) @@ -832,10 +832,12 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb CbPackage ResponsePackage; CbObjectWriter ResponseObject{2048}; + ResponsePackage.ReserveAttachments(Requests.size()); + ResponseObject.BeginArray("Result"sv); for (RecordRequestData& Request : Requests) { - const CacheKey& Key = Request.Upstream.Key; + const CacheKey& Key = Request.Key; if (Request.Complete || (Request.RecordObject && EnumHasAllFlags(Request.DownstreamPolicy.GetRecordPolicy(), CachePolicy::PartialRecord))) { @@ -910,11 +912,12 @@ CacheRpcHandler::HandleRpcPutCacheValues(const CacheRequestContext& Context, con const bool HasUpstream = m_UpstreamCache.IsActive(); CbArrayView RequestsArray = Params["Requests"sv].AsArrayView(); - std::vector<bool> BatchResults; - std::vector<size_t> BatchResultIndexes; - std::vector<bool> Results; - std::vector<CacheKey> UpstreamCacheKeys; - uint64_t RequestCount = RequestsArray.Num(); + std::vector<bool> BatchResults; + eastl::fixed_vector<size_t, 32> BatchResultIndexes; + eastl::fixed_vector<bool, 32> Results; + eastl::fixed_vector<CacheKey, 32> UpstreamCacheKeys; + + uint64_t RequestCount = RequestsArray.Num(); { Results.reserve(RequestCount); std::unique_ptr<ZenCacheStore::PutBatch> Batch; @@ -1099,15 +1102,15 @@ CacheRpcHandler::HandleRpcGetCacheValues(const CacheRequestContext& Context, CbO uint64_t RawSize = 0; CompressedBuffer Result; }; - std::vector<RequestData> Requests; + eastl::fixed_vector<RequestData, 16> Requests; - std::vector<size_t> RemoteRequestIndexes; + eastl::fixed_vector<size_t, 16> RemoteRequestIndexes; const bool HasUpstream = m_UpstreamCache.IsActive(); - CbArrayView RequestsArray = Params["Requests"sv].AsArrayView(); - std::vector<ZenCacheValue> CacheValues; - const uint64_t RequestCount = RequestsArray.Num(); + CbArrayView RequestsArray = Params["Requests"sv].AsArrayView(); + ZenCacheValueVec_t CacheValues; + const uint64_t RequestCount = RequestsArray.Num(); CacheValues.reserve(RequestCount); { std::unique_ptr<ZenCacheStore::GetBatch> Batch; @@ -1136,7 +1139,6 @@ CacheRpcHandler::HandleRpcGetCacheValues(const CacheRequestContext& Context, CbO CacheKey& Key = Request.Key; CachePolicy Policy = Request.Policy; - ZenCacheValue CacheValue; if (EnumHasAllFlags(Policy, CachePolicy::QueryLocal)) { if (Batch) @@ -1276,6 +1278,9 @@ CacheRpcHandler::HandleRpcGetCacheValues(const CacheRequestContext& Context, CbO ZEN_TRACE_CPU("Z$::RpcGetCacheValues::Response"); CbPackage RpcResponse; CbObjectWriter ResponseObject{1024}; + + RpcResponse.ReserveAttachments(Requests.size()); + ResponseObject.BeginArray("Result"sv); for (const RequestData& Request : Requests) { @@ -1642,7 +1647,7 @@ CacheRpcHandler::GetLocalCacheValues(const CacheRequestContext& Context, using namespace cache::detail; const bool HasUpstream = m_UpstreamCache.IsActive(); - std::vector<ZenCacheValue> Chunks; + ZenCacheValueVec_t Chunks; Chunks.reserve(ValueRequests.size()); { std::unique_ptr<ZenCacheStore::GetBatch> Batch; @@ -1796,6 +1801,8 @@ CacheRpcHandler::WriteGetCacheChunksResponse([[maybe_unused]] const CacheRequest CbPackage RpcResponse; CbObjectWriter Writer{1024}; + RpcResponse.ReserveAttachments(Requests.size()); + Writer.BeginArray("Result"sv); for (ChunkRequest& Request : Requests) { diff --git a/src/zenstore/cache/structuredcachestore.cpp b/src/zenstore/cache/structuredcachestore.cpp index 133cb42d7..7d277329e 100644 --- a/src/zenstore/cache/structuredcachestore.cpp +++ b/src/zenstore/cache/structuredcachestore.cpp @@ -178,13 +178,13 @@ ZenCacheNamespace::EndPutBatch(PutBatchHandle* Batch) noexcept struct ZenCacheNamespace::GetBatchHandle { - GetBatchHandle(std::vector<ZenCacheValue>& OutResult) : Results(OutResult) {} - std::vector<ZenCacheValue>& Results; + GetBatchHandle(ZenCacheValueVec_t& OutResult) : Results(OutResult) {} + ZenCacheValueVec_t& Results; ZenCacheDiskLayer::GetBatchHandle* DiskLayerHandle = nullptr; }; ZenCacheNamespace::GetBatchHandle* -ZenCacheNamespace::BeginGetBatch(std::vector<ZenCacheValue>& OutResult) +ZenCacheNamespace::BeginGetBatch(ZenCacheValueVec_t& OutResult) { ZenCacheNamespace::GetBatchHandle* Handle = new ZenCacheNamespace::GetBatchHandle(OutResult); Handle->DiskLayerHandle = m_DiskLayer.BeginGetBatch(OutResult); @@ -580,7 +580,7 @@ ZenCacheStore::PutBatch::~PutBatch() } } -ZenCacheStore::GetBatch::GetBatch(ZenCacheStore& CacheStore, std::string_view InNamespace, std::vector<ZenCacheValue>& OutResult) +ZenCacheStore::GetBatch::GetBatch(ZenCacheStore& CacheStore, std::string_view InNamespace, ZenCacheValueVec_t& OutResult) : m_CacheStore(CacheStore) , Results(OutResult) { diff --git a/src/zenstore/chunkedfile.cpp b/src/zenstore/chunkedfile.cpp deleted file mode 100644 index f200bc1ec..000000000 --- a/src/zenstore/chunkedfile.cpp +++ /dev/null @@ -1,505 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#include <zencore/basicfile.h> -#include <zenstore/chunkedfile.h> - -#include "chunking.h" - -ZEN_THIRD_PARTY_INCLUDES_START -#include <tsl/robin_map.h> -#include <gsl/gsl-lite.hpp> -ZEN_THIRD_PARTY_INCLUDES_END - -namespace zen { - -namespace { - struct ChunkedHeader - { - static constexpr uint32_t ExpectedMagic = 0x646b6863; // chkd - static constexpr uint32_t CurrentVersion = 1; - - uint32_t Magic = ExpectedMagic; - uint32_t Version = CurrentVersion; - uint32_t ChunkSequenceLength; - uint32_t ChunkHashCount; - uint64_t ChunkSequenceOffset; - uint64_t ChunkHashesOffset; - uint64_t RawSize = 0; - IoHash RawHash; - }; -} // namespace - -IoBuffer -SerializeChunkedInfo(const ChunkedInfo& Info) -{ - size_t HeaderSize = RoundUp(sizeof(ChunkedHeader), 16) + RoundUp(sizeof(uint32_t) * Info.ChunkSequence.size(), 16) + - RoundUp(sizeof(IoHash) * Info.ChunkHashes.size(), 16); - IoBuffer HeaderData(HeaderSize); - - ChunkedHeader Header; - Header.ChunkSequenceLength = gsl::narrow<uint32_t>(Info.ChunkSequence.size()); - Header.ChunkHashCount = gsl::narrow<uint32_t>(Info.ChunkHashes.size()); - Header.ChunkSequenceOffset = RoundUp(sizeof(ChunkedHeader), 16); - Header.ChunkHashesOffset = RoundUp(Header.ChunkSequenceOffset + sizeof(uint32_t) * Header.ChunkSequenceLength, 16); - Header.RawSize = Info.RawSize; - Header.RawHash = Info.RawHash; - - MutableMemoryView WriteView = HeaderData.GetMutableView(); - { - MutableMemoryView HeaderWriteView = WriteView.Left(sizeof(Header)); - HeaderWriteView.CopyFrom(MemoryView(&Header, sizeof(Header))); - } - { - MutableMemoryView ChunkSequenceWriteView = WriteView.Mid(Header.ChunkSequenceOffset, sizeof(uint32_t) * Header.ChunkSequenceLength); - ChunkSequenceWriteView.CopyFrom(MemoryView(Info.ChunkSequence.data(), ChunkSequenceWriteView.GetSize())); - } - { - MutableMemoryView ChunksWriteView = WriteView.Mid(Header.ChunkHashesOffset, sizeof(IoHash) * Header.ChunkHashCount); - ChunksWriteView.CopyFrom(MemoryView(Info.ChunkHashes.data(), ChunksWriteView.GetSize())); - } - - return HeaderData; -} - -ChunkedInfo -DeserializeChunkedInfo(IoBuffer& Buffer) -{ - MemoryView View = Buffer.GetView(); - ChunkedHeader Header; - { - MutableMemoryView HeaderWriteView(&Header, sizeof(Header)); - HeaderWriteView.CopyFrom(View.Left(sizeof(Header))); - } - if (Header.Magic != ChunkedHeader::ExpectedMagic) - { - return {}; - } - if (Header.Version != ChunkedHeader::CurrentVersion) - { - return {}; - } - ChunkedInfo Info; - Info.RawSize = Header.RawSize; - Info.RawHash = Header.RawHash; - Info.ChunkSequence.resize(Header.ChunkSequenceLength); - Info.ChunkHashes.resize(Header.ChunkHashCount); - { - MutableMemoryView ChunkSequenceWriteView(Info.ChunkSequence.data(), sizeof(uint32_t) * Header.ChunkSequenceLength); - ChunkSequenceWriteView.CopyFrom(View.Mid(Header.ChunkSequenceOffset, ChunkSequenceWriteView.GetSize())); - } - { - MutableMemoryView ChunksWriteView(Info.ChunkHashes.data(), sizeof(IoHash) * Header.ChunkHashCount); - ChunksWriteView.CopyFrom(View.Mid(Header.ChunkHashesOffset, ChunksWriteView.GetSize())); - } - - return Info; -} - -void -Reconstruct(const ChunkedInfo& Info, const std::filesystem::path& TargetPath, std::function<IoBuffer(const IoHash& ChunkHash)> GetChunk) -{ - BasicFile Reconstructed; - Reconstructed.Open(TargetPath, BasicFile::Mode::kTruncate); - BasicFileWriter ReconstructedWriter(Reconstructed, 64 * 1024); - uint64_t Offset = 0; - for (uint32_t SequenceIndex : Info.ChunkSequence) - { - IoBuffer Chunk = GetChunk(Info.ChunkHashes[SequenceIndex]); - ReconstructedWriter.Write(Chunk.GetData(), Chunk.GetSize(), Offset); - Offset += Chunk.GetSize(); - } -} - -ChunkedInfoWithSource -ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Params) -{ - ChunkedInfoWithSource Result; - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> FoundChunks; - - ZenChunkHelper Chunker; - Chunker.SetUseThreshold(Params.UseThreshold); - Chunker.SetChunkSize(Params.MinSize, Params.MaxSize, Params.AvgSize); - size_t End = Offset + Size; - const size_t ScanBufferSize = 1u * 1024 * 1024; // (Params.MaxSize * 9) / 3;//1 * 1024 * 1024; - BasicFileBuffer RawBuffer(RawData, ScanBufferSize); - MemoryView SliceView = RawBuffer.MakeView(Min(End - Offset, ScanBufferSize), Offset); - ZEN_ASSERT(!SliceView.IsEmpty()); - size_t SliceSize = SliceView.GetSize(); - IoHashStream RawHashStream; - while (Offset < End) - { - size_t ScanLength = Chunker.ScanChunk(SliceView.GetData(), SliceSize); - if (ScanLength == ZenChunkHelper::kNoBoundaryFound) - { - if (Offset + SliceSize == End) - { - ScanLength = SliceSize; - } - else - { - SliceView = RawBuffer.MakeView(Min(End - Offset, ScanBufferSize), Offset); - SliceSize = SliceView.GetSize(); - Chunker.Reset(); - continue; - } - } - uint32_t ChunkLength = gsl::narrow<uint32_t>(ScanLength); // +HashedLength); - MemoryView ChunkView = SliceView.Left(ScanLength); - RawHashStream.Append(ChunkView); - IoHash ChunkHash = IoHash::HashBuffer(ChunkView); - SliceView.RightChopInline(ScanLength); - if (auto It = FoundChunks.find(ChunkHash); It != FoundChunks.end()) - { - Result.Info.ChunkSequence.push_back(It->second); - } - else - { - uint32_t ChunkIndex = gsl::narrow<uint32_t>(Result.Info.ChunkHashes.size()); - FoundChunks.insert_or_assign(ChunkHash, ChunkIndex); - Result.Info.ChunkHashes.push_back(ChunkHash); - Result.ChunkSources.push_back(ChunkSource{.Offset = Offset, .Size = ChunkLength}); - Result.Info.ChunkSequence.push_back(ChunkIndex); - } - - SliceSize = SliceView.GetSize(); - Offset += ChunkLength; - } - Result.Info.RawSize = Size; - Result.Info.RawHash = RawHashStream.GetHash(); - return Result; -} - -} // namespace zen - -#if ZEN_WITH_TESTS -# include <zencore/filesystem.h> -# include <zencore/fmtutils.h> -# include <zencore/iohash.h> -# include <zencore/logging.h> -# include <zencore/scopeguard.h> -# include <zencore/timer.h> -# include <zencore/testing.h> -# include <zencore/testutils.h> -# include <zencore/workthreadpool.h> - -# include "chunking.h" - -ZEN_THIRD_PARTY_INCLUDES_START -# include <tsl/robin_map.h> -# include <tsl/robin_set.h> -ZEN_THIRD_PARTY_INCLUDES_END - -namespace zen { -# if 0 -TEST_CASE("chunkedfile.findparams") -{ -# if 1 - DirectoryContent SourceContent1; - GetDirectoryContent("E:\\Temp\\ChunkingTestData\\31379208", DirectoryContentFlags::IncludeFiles, SourceContent1); - const std::vector<std::filesystem::path>& SourceFiles1 = SourceContent1.Files; - DirectoryContent SourceContent2; - GetDirectoryContent("E:\\Temp\\ChunkingTestData\\31379208_2", DirectoryContentFlags::IncludeFiles, SourceContent2); - const std::vector<std::filesystem::path>& SourceFiles2 = SourceContent2.Files; -# else - std::filesystem::path SourcePath1 = - "E:\\Temp\\ChunkingTestData\\31375996\\ShaderArchive-FortniteGame_Chunk10-PCD3D_SM6-PCD3D_SM6.ushaderbytecode"; - std::filesystem::path SourcePath2 = - "E:\\Temp\\ChunkingTestData\\31379208\\ShaderArchive-FortniteGame_Chunk10-PCD3D_SM6-PCD3D_SM6.ushaderbytecode"; - const std::vector<std::filesystem::path>& SourceFiles1 = {SourcePath1}; - const std::vector<std::filesystem::path>& SourceFiles2 = {SourcePath2}; -# endif - ChunkedParams Params[] = {ChunkedParams{.UseThreshold = false, .MinSize = 17280, .MaxSize = 139264, .AvgSize = 36340}, - ChunkedParams{.UseThreshold = false, .MinSize = 15456, .MaxSize = 122880, .AvgSize = 35598}, - ChunkedParams{.UseThreshold = false, .MinSize = 16848, .MaxSize = 135168, .AvgSize = 39030}, - ChunkedParams{.UseThreshold = false, .MinSize = 14256, .MaxSize = 114688, .AvgSize = 36222}, - ChunkedParams{.UseThreshold = false, .MinSize = 15744, .MaxSize = 126976, .AvgSize = 36600}, - ChunkedParams{.UseThreshold = false, .MinSize = 15264, .MaxSize = 122880, .AvgSize = 35442}, - ChunkedParams{.UseThreshold = false, .MinSize = 16464, .MaxSize = 131072, .AvgSize = 37950}, - ChunkedParams{.UseThreshold = false, .MinSize = 15408, .MaxSize = 122880, .AvgSize = 38914}, - ChunkedParams{.UseThreshold = false, .MinSize = 15408, .MaxSize = 122880, .AvgSize = 35556}, - ChunkedParams{.UseThreshold = false, .MinSize = 15360, .MaxSize = 122880, .AvgSize = 35520}, - ChunkedParams{.UseThreshold = false, .MinSize = 15312, .MaxSize = 122880, .AvgSize = 35478}, - ChunkedParams{.UseThreshold = false, .MinSize = 16896, .MaxSize = 135168, .AvgSize = 39072}, - ChunkedParams{.UseThreshold = false, .MinSize = 15360, .MaxSize = 122880, .AvgSize = 38880}, - ChunkedParams{.UseThreshold = false, .MinSize = 15840, .MaxSize = 126976, .AvgSize = 36678}, - ChunkedParams{.UseThreshold = false, .MinSize = 16800, .MaxSize = 135168, .AvgSize = 38994}, - ChunkedParams{.UseThreshold = false, .MinSize = 15888, .MaxSize = 126976, .AvgSize = 36714}, - ChunkedParams{.UseThreshold = false, .MinSize = 15792, .MaxSize = 126976, .AvgSize = 36636}, - ChunkedParams{.UseThreshold = false, .MinSize = 14880, .MaxSize = 118784, .AvgSize = 37609}, - ChunkedParams{.UseThreshold = false, .MinSize = 15936, .MaxSize = 126976, .AvgSize = 36756}, - ChunkedParams{.UseThreshold = false, .MinSize = 15456, .MaxSize = 122880, .AvgSize = 38955}, - ChunkedParams{.UseThreshold = false, .MinSize = 15984, .MaxSize = 126976, .AvgSize = 36792}, - ChunkedParams{.UseThreshold = false, .MinSize = 14400, .MaxSize = 114688, .AvgSize = 36338}, - ChunkedParams{.UseThreshold = false, .MinSize = 14832, .MaxSize = 118784, .AvgSize = 37568}, - ChunkedParams{.UseThreshold = false, .MinSize = 16944, .MaxSize = 135168, .AvgSize = 39108}, - ChunkedParams{.UseThreshold = false, .MinSize = 14352, .MaxSize = 114688, .AvgSize = 36297}, - ChunkedParams{.UseThreshold = false, .MinSize = 14208, .MaxSize = 114688, .AvgSize = 36188}, - ChunkedParams{.UseThreshold = false, .MinSize = 14448, .MaxSize = 114688, .AvgSize = 36372}, - ChunkedParams{.UseThreshold = false, .MinSize = 13296, .MaxSize = 106496, .AvgSize = 36592}, - ChunkedParams{.UseThreshold = false, .MinSize = 15264, .MaxSize = 122880, .AvgSize = 38805}, - ChunkedParams{.UseThreshold = false, .MinSize = 14304, .MaxSize = 114688, .AvgSize = 36263}, - ChunkedParams{.UseThreshold = false, .MinSize = 14784, .MaxSize = 118784, .AvgSize = 37534}, - ChunkedParams{.UseThreshold = false, .MinSize = 15312, .MaxSize = 122880, .AvgSize = 38839}, - ChunkedParams{.UseThreshold = false, .MinSize = 14256, .MaxSize = 114688, .AvgSize = 39360}, - ChunkedParams{.UseThreshold = false, .MinSize = 13776, .MaxSize = 110592, .AvgSize = 37976}, - ChunkedParams{.UseThreshold = false, .MinSize = 14736, .MaxSize = 118784, .AvgSize = 37493}, - ChunkedParams{.UseThreshold = false, .MinSize = 14928, .MaxSize = 118784, .AvgSize = 37643}, - ChunkedParams{.UseThreshold = false, .MinSize = 14448, .MaxSize = 114688, .AvgSize = 39504}, - ChunkedParams{.UseThreshold = false, .MinSize = 13392, .MaxSize = 106496, .AvgSize = 36664}, - ChunkedParams{.UseThreshold = false, .MinSize = 13872, .MaxSize = 110592, .AvgSize = 38048}, - ChunkedParams{.UseThreshold = false, .MinSize = 14352, .MaxSize = 114688, .AvgSize = 39432}, - ChunkedParams{.UseThreshold = false, .MinSize = 13200, .MaxSize = 106496, .AvgSize = 36520}, - ChunkedParams{.UseThreshold = false, .MinSize = 17328, .MaxSize = 139264, .AvgSize = 36378}, - ChunkedParams{.UseThreshold = false, .MinSize = 17376, .MaxSize = 139264, .AvgSize = 36421}, - ChunkedParams{.UseThreshold = false, .MinSize = 17424, .MaxSize = 139264, .AvgSize = 36459}, - ChunkedParams{.UseThreshold = false, .MinSize = 17472, .MaxSize = 139264, .AvgSize = 36502}, - ChunkedParams{.UseThreshold = false, .MinSize = 17520, .MaxSize = 139264, .AvgSize = 36540}, - ChunkedParams{.UseThreshold = false, .MinSize = 17808, .MaxSize = 143360, .AvgSize = 37423}, - ChunkedParams{.UseThreshold = false, .MinSize = 17856, .MaxSize = 143360, .AvgSize = 37466}, - ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 25834}, - ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 21917}, - ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 29751}, - ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 33668}, - ChunkedParams{.UseThreshold = false, .MinSize = 17952, .MaxSize = 143360, .AvgSize = 37547}, - ChunkedParams{.UseThreshold = false, .MinSize = 17904, .MaxSize = 143360, .AvgSize = 37504}, - ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 22371}, - ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 37585}, - ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 26406}, - ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 26450}, - ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 30615}, - ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 30441}, - ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 22417}, - ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 22557}, - ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 30528}, - ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 27112}, - ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 34644}, - ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 34476}, - ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 35408}, - ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 38592}, - ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 30483}, - ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 26586}, - ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 26496}, - ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 31302}, - ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 34516}, - ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 22964}, - ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 35448}, - ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 38630}, - ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 23010}, - ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 31260}, - ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 34600}, - ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 27156}, - ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 30570}, - ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 38549}, - ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 22510}, - ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 38673}, - ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 34560}, - ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 22464}, - ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 26540}, - ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 38511}, - ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 23057}, - ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 27202}, - ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 31347}, - ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 35492}, - ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 31389}, - ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 27246}, - ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 23103}, - ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 35532}, - ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 23150}, - ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 27292}, - ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 31434}, - ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 35576}, - ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 27336}, - ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 23196}, - ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 31476}, - ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 35616}, - ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 27862}, - ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 32121}, - ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 23603}, - ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 36380}, - ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 27908}, - ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 23650}, - ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 32166}, - ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 36424}, - ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 23696}, - ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 32253}, - ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 32208}, - ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 23743}, - ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 36548}, - ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 28042}, - ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 23789}, - ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 32295}, - ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 36508}, - ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 27952}, - ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 27998}, - ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 36464}}; - - static const size_t ParamsCount = sizeof(Params) / sizeof(ChunkedParams); - std::vector<ChunkedInfoWithSource> Infos1(SourceFiles1.size()); - std::vector<ChunkedInfoWithSource> Infos2(SourceFiles2.size()); - - WorkerThreadPool WorkerPool(32); - - for (size_t I = 0; I < ParamsCount; I++) - { - for (int UseThreshold = 0; UseThreshold < 2; UseThreshold++) - { - Latch WorkLatch(1); - ChunkedParams Param = Params[I]; - Param.UseThreshold = UseThreshold == 1; - Stopwatch Timer; - for (size_t F = 0; F < SourceFiles1.size(); F++) - { - WorkLatch.AddCount(1); - WorkerPool.ScheduleWork([&WorkLatch, F, Param, &SourceFiles1, &Infos1]() { - auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); }); - BasicFile SourceData1; - SourceData1.Open(SourceFiles1[F], BasicFile::Mode::kRead); - Infos1[F] = ChunkData(SourceData1, 0, SourceData1.FileSize(), Param); - }); - } - for (size_t F = 0; F < SourceFiles2.size(); F++) - { - WorkLatch.AddCount(1); - WorkerPool.ScheduleWork([&WorkLatch, F, Param, &SourceFiles2, &Infos2]() { - auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); }); - BasicFile SourceData2; - SourceData2.Open(SourceFiles2[F], BasicFile::Mode::kRead); - Infos2[F] = ChunkData(SourceData2, 0, SourceData2.FileSize(), Param); - }); - } - WorkLatch.CountDown(); - WorkLatch.Wait(); - uint64_t ChunkTimeMS = Timer.GetElapsedTimeMs(); - - uint64_t Raw1Size = 0; - tsl::robin_set<IoHash> Chunks1; - size_t ChunkedSize1 = 0; - for (size_t F = 0; F < SourceFiles1.size(); F++) - { - const ChunkedInfoWithSource& Info = Infos1[F]; - Raw1Size += Info.Info.RawSize; - for (uint32_t Chunk1Index = 0; Chunk1Index < Info.Info.ChunkHashes.size(); ++Chunk1Index) - { - const IoHash ChunkHash = Info.Info.ChunkHashes[Chunk1Index]; - if (Chunks1.insert(ChunkHash).second) - { - ChunkedSize1 += Info.ChunkSources[Chunk1Index].Size; - } - } - } - - uint64_t Raw2Size = 0; - tsl::robin_set<IoHash> Chunks2; - size_t ChunkedSize2 = 0; - size_t DiffSize = 0; - for (size_t F = 0; F < SourceFiles2.size(); F++) - { - const ChunkedInfoWithSource& Info = Infos2[F]; - Raw2Size += Info.Info.RawSize; - for (uint32_t Chunk2Index = 0; Chunk2Index < Info.Info.ChunkHashes.size(); ++Chunk2Index) - { - const IoHash ChunkHash = Info.Info.ChunkHashes[Chunk2Index]; - if (Chunks2.insert(ChunkHash).second) - { - ChunkedSize2 += Info.ChunkSources[Chunk2Index].Size; - if (!Chunks1.contains(ChunkHash)) - { - DiffSize += Info.ChunkSources[Chunk2Index].Size; - } - } - } - } - - ZEN_INFO( - "Diff = {}, Chunks1 = {}, Chunks2 = {}, .UseThreshold = {}, .MinSize = {}, .MaxSize = {}, .AvgSize = {}, RawSize(1) = {}, " - "RawSize(2) = {}, " - "Saved(1) = {}, Saved(2) = {} in {}", - NiceBytes(DiffSize), - Chunks1.size(), - Chunks2.size(), - Param.UseThreshold, - Param.MinSize, - Param.MaxSize, - Param.AvgSize, - NiceBytes(Raw1Size), - NiceBytes(Raw2Size), - NiceBytes(Raw1Size - ChunkedSize1), - NiceBytes(Raw2Size - ChunkedSize2), - NiceTimeSpanMs(ChunkTimeMS)); - } - } - -# if 0 - for (int64_t MinSizeBase = (12u * 1024u); MinSizeBase <= (32u * 1024u); MinSizeBase += 512) - { - for (int64_t Wiggle = -132; Wiggle < 126; Wiggle += 2) - { - // size_t MinSize = 7 * 1024 - 61; // (size_t)(MinSizeBase + Wiggle); - // size_t MaxSize = 16 * (7 * 1024); // 8 * 7 * 1024;// MinSizeBase * 6; - // size_t AvgSize = MaxSize / 2; // 4 * 7 * 1024;// MinSizeBase * 3; - size_t MinSize = (size_t)(MinSizeBase + Wiggle); - //for (size_t MaxSize = (MinSize * 4) - 768; MaxSize < (MinSize * 5) + 768; MaxSize += 64) - size_t MaxSize = 8u * MinSizeBase; - { - for (size_t AvgSize = (MaxSize - MinSize) / 32 + MinSize; AvgSize < (MaxSize - MinSize) / 4 + MinSize; AvgSize += (MaxSize - MinSize) / 32) -// size_t AvgSize = (MaxSize - MinSize) / 4 + MinSize; - { - WorkLatch.AddCount(1); - WorkerPool.ScheduleWork([&WorkLatch, MinSize, MaxSize, AvgSize, SourcePath1, SourcePath2]() - { - auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); }); - ChunkedParams Params{ .UseThreshold = true, .MinSize = MinSize, .MaxSize = MaxSize, .AvgSize = AvgSize }; - BasicFile SourceData1; - SourceData1.Open(SourcePath1, BasicFile::Mode::kRead); - BasicFile SourceData2; - SourceData2.Open(SourcePath2, BasicFile::Mode::kRead); - ChunkedInfoWithSource Info1 = ChunkData(SourceData1, Params); - ChunkedInfoWithSource Info2 = ChunkData(SourceData2, Params); - - tsl::robin_set<IoHash> Chunks1; - Chunks1.reserve(Info1.Info.ChunkHashes.size()); - Chunks1.insert(Info1.Info.ChunkHashes.begin(), Info1.Info.ChunkHashes.end()); - size_t ChunkedSize1 = 0; - for (uint32_t Chunk1Index = 0; Chunk1Index < Info1.Info.ChunkHashes.size(); ++Chunk1Index) - { - ChunkedSize1 += Info1.ChunkSources[Chunk1Index].Size; - } - size_t DiffSavedSize = 0; - size_t ChunkedSize2 = 0; - for (uint32_t Chunk2Index = 0; Chunk2Index < Info2.Info.ChunkHashes.size(); ++Chunk2Index) - { - ChunkedSize2 += Info2.ChunkSources[Chunk2Index].Size; - if (Chunks1.find(Info2.Info.ChunkHashes[Chunk2Index]) == Chunks1.end()) - { - DiffSavedSize += Info2.ChunkSources[Chunk2Index].Size; - } - } - ZEN_INFO("Diff {}, Chunks1: {}, Chunks2: {}, Min: {}, Max: {}, Avg: {}, Saved(1) {}, Saved(2) {}", - NiceBytes(DiffSavedSize), - Info1.Info.ChunkHashes.size(), - Info2.Info.ChunkHashes.size(), - MinSize, - MaxSize, - AvgSize, - NiceBytes(Info1.Info.RawSize - ChunkedSize1), - NiceBytes(Info2.Info.RawSize - ChunkedSize2)); - }); - } - } - } - } -# endif // 0 - - // WorkLatch.CountDown(); - // WorkLatch.Wait(); -} -# endif // 0 - -void -chunkedfile_forcelink() -{ -} - -} // namespace zen - -#endif diff --git a/src/zenstore/chunking.cpp b/src/zenstore/chunking.cpp deleted file mode 100644 index 71f0a06e4..000000000 --- a/src/zenstore/chunking.cpp +++ /dev/null @@ -1,383 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#include "chunking.h" - -#include <gsl/gsl-lite.hpp> - -#include <cmath> -#include <cstring> - -namespace zen::detail { - -static const uint32_t BuzhashTable[] = { - 0x458be752, 0xc10748cc, 0xfbbcdbb8, 0x6ded5b68, 0xb10a82b5, 0x20d75648, 0xdfc5665f, 0xa8428801, 0x7ebf5191, 0x841135c7, 0x65cc53b3, - 0x280a597c, 0x16f60255, 0xc78cbc3e, 0x294415f5, 0xb938d494, 0xec85c4e6, 0xb7d33edc, 0xe549b544, 0xfdeda5aa, 0x882bf287, 0x3116737c, - 0x05569956, 0xe8cc1f68, 0x0806ac5e, 0x22a14443, 0x15297e10, 0x50d090e7, 0x4ba60f6f, 0xefd9f1a7, 0x5c5c885c, 0x82482f93, 0x9bfd7c64, - 0x0b3e7276, 0xf2688e77, 0x8fad8abc, 0xb0509568, 0xf1ada29f, 0xa53efdfe, 0xcb2b1d00, 0xf2a9e986, 0x6463432b, 0x95094051, 0x5a223ad2, - 0x9be8401b, 0x61e579cb, 0x1a556a14, 0x5840fdc2, 0x9261ddf6, 0xcde002bb, 0x52432bb0, 0xbf17373e, 0x7b7c222f, 0x2955ed16, 0x9f10ca59, - 0xe840c4c9, 0xccabd806, 0x14543f34, 0x1462417a, 0x0d4a1f9c, 0x087ed925, 0xd7f8f24c, 0x7338c425, 0xcf86c8f5, 0xb19165cd, 0x9891c393, - 0x325384ac, 0x0308459d, 0x86141d7e, 0xc922116a, 0xe2ffa6b6, 0x53f52aed, 0x2cd86197, 0xf5b9f498, 0xbf319c8f, 0xe0411fae, 0x977eb18c, - 0xd8770976, 0x9833466a, 0xc674df7f, 0x8c297d45, 0x8ca48d26, 0xc49ed8e2, 0x7344f874, 0x556f79c7, 0x6b25eaed, 0xa03e2b42, 0xf68f66a4, - 0x8e8b09a2, 0xf2e0e62a, 0x0d3a9806, 0x9729e493, 0x8c72b0fc, 0x160b94f6, 0x450e4d3d, 0x7a320e85, 0xbef8f0e1, 0x21d73653, 0x4e3d977a, - 0x1e7b3929, 0x1cc6c719, 0xbe478d53, 0x8d752809, 0xe6d8c2c6, 0x275f0892, 0xc8acc273, 0x4cc21580, 0xecc4a617, 0xf5f7be70, 0xe795248a, - 0x375a2fe9, 0x425570b6, 0x8898dcf8, 0xdc2d97c4, 0x0106114b, 0x364dc22f, 0x1e0cad1f, 0xbe63803c, 0x5f69fac2, 0x4d5afa6f, 0x1bc0dfb5, - 0xfb273589, 0x0ea47f7b, 0x3c1c2b50, 0x21b2a932, 0x6b1223fd, 0x2fe706a8, 0xf9bd6ce2, 0xa268e64e, 0xe987f486, 0x3eacf563, 0x1ca2018c, - 0x65e18228, 0x2207360a, 0x57cf1715, 0x34c37d2b, 0x1f8f3cde, 0x93b657cf, 0x31a019fd, 0xe69eb729, 0x8bca7b9b, 0x4c9d5bed, 0x277ebeaf, - 0xe0d8f8ae, 0xd150821c, 0x31381871, 0xafc3f1b0, 0x927db328, 0xe95effac, 0x305a47bd, 0x426ba35b, 0x1233af3f, 0x686a5b83, 0x50e072e5, - 0xd9d3bb2a, 0x8befc475, 0x487f0de6, 0xc88dff89, 0xbd664d5e, 0x971b5d18, 0x63b14847, 0xd7d3c1ce, 0x7f583cf3, 0x72cbcb09, 0xc0d0a81c, - 0x7fa3429b, 0xe9158a1b, 0x225ea19a, 0xd8ca9ea3, 0xc763b282, 0xbb0c6341, 0x020b8293, 0xd4cd299d, 0x58cfa7f8, 0x91b4ee53, 0x37e4d140, - 0x95ec764c, 0x30f76b06, 0x5ee68d24, 0x679c8661, 0xa41979c2, 0xf2b61284, 0x4fac1475, 0x0adb49f9, 0x19727a23, 0x15a7e374, 0xc43a18d5, - 0x3fb1aa73, 0x342fc615, 0x924c0793, 0xbee2d7f0, 0x8a279de9, 0x4aa2d70c, 0xe24dd37f, 0xbe862c0b, 0x177c22c2, 0x5388e5ee, 0xcd8a7510, - 0xf901b4fd, 0xdbc13dbc, 0x6c0bae5b, 0x64efe8c7, 0x48b02079, 0x80331a49, 0xca3d8ae6, 0xf3546190, 0xfed7108b, 0xc49b941b, 0x32baf4a9, - 0xeb833a4a, 0x88a3f1a5, 0x3a91ce0a, 0x3cc27da1, 0x7112e684, 0x4a3096b1, 0x3794574c, 0xa3c8b6f3, 0x1d213941, 0x6e0a2e00, 0x233479f1, - 0x0f4cd82f, 0x6093edd2, 0x5d7d209e, 0x464fe319, 0xd4dcac9e, 0x0db845cb, 0xfb5e4bc3, 0xe0256ce1, 0x09fb4ed1, 0x0914be1e, 0xa5bdb2c3, - 0xc6eb57bb, 0x30320350, 0x3f397e91, 0xa67791bc, 0x86bc0e2c, 0xefa0a7e2, 0xe9ff7543, 0xe733612c, 0xd185897b, 0x329e5388, 0x91dd236b, - 0x2ecb0d93, 0xf4d82a3d, 0x35b5c03f, 0xe4e606f0, 0x05b21843, 0x37b45964, 0x5eff22f4, 0x6027f4cc, 0x77178b3c, 0xae507131, 0x7bf7cabc, - 0xf9c18d66, 0x593ade65, 0xd95ddf11, -}; - -// ROL operation (compiler turns this into a ROL when optimizing) -ZEN_FORCEINLINE static uint32_t -Rotate32(uint32_t Value, size_t RotateCount) -{ - RotateCount &= 31; - - return ((Value) << (RotateCount)) | ((Value) >> (32 - RotateCount)); -} - -} // namespace zen::detail - -namespace zen { - -void -ZenChunkHelper::Reset() -{ - InternalReset(); - - m_BytesScanned = 0; -} - -void -ZenChunkHelper::InternalReset() -{ - m_CurrentHash = 0; - m_CurrentChunkSize = 0; - m_WindowSize = 0; -} - -void -ZenChunkHelper::SetChunkSize(size_t MinSize, size_t MaxSize, size_t AvgSize) -{ - if (m_WindowSize) - return; // Already started - - static_assert(kChunkSizeLimitMin > kWindowSize); - - if (AvgSize) - { - // TODO: Validate AvgSize range - } - else - { - if (MinSize && MaxSize) - { - AvgSize = std::lrint(std::pow(2, (std::log2(MinSize) + std::log2(MaxSize)) / 2)); - } - else if (MinSize) - { - AvgSize = MinSize * 4; - } - else if (MaxSize) - { - AvgSize = MaxSize / 4; - } - else - { - AvgSize = kDefaultAverageChunkSize; - } - } - - if (MinSize) - { - // TODO: Validate MinSize range - } - else - { - MinSize = std::max(AvgSize / 4, kChunkSizeLimitMin); - } - - if (MaxSize) - { - // TODO: Validate MaxSize range - } - else - { - MaxSize = std::min(AvgSize * 4, kChunkSizeLimitMax); - } - - m_Discriminator = gsl::narrow<uint32_t>(AvgSize - MinSize); - - if (m_Discriminator < MinSize) - { - m_Discriminator = gsl::narrow<uint32_t>(MinSize); - } - - if (m_Discriminator > MaxSize) - { - m_Discriminator = gsl::narrow<uint32_t>(MaxSize); - } - - m_Threshold = gsl::narrow<uint32_t>((uint64_t(std::numeric_limits<uint32_t>::max()) + 1) / m_Discriminator); - - m_ChunkSizeMin = MinSize; - m_ChunkSizeMax = MaxSize; - m_ChunkSizeAvg = AvgSize; -} - -size_t -ZenChunkHelper::ScanChunk(const void* DataBytesIn, size_t ByteCount) -{ - size_t Result = InternalScanChunk(DataBytesIn, ByteCount); - - if (Result == kNoBoundaryFound) - { - m_BytesScanned += ByteCount; - } - else - { - m_BytesScanned += Result; - } - - return Result; -} - -size_t -ZenChunkHelper::InternalScanChunk(const void* DataBytesIn, size_t ByteCount) -{ - size_t CurrentOffset = 0; - const uint8_t* CursorPtr = reinterpret_cast<const uint8_t*>(DataBytesIn); - - // There's no point in updating the hash if we know we're not - // going to have a cut point, so just skip the data. This logic currently - // provides roughly a 20% speedup on my machine - - const size_t NeedHashOffset = m_ChunkSizeMin - kWindowSize; - - if (m_CurrentChunkSize < NeedHashOffset) - { - const uint32_t SkipBytes = gsl::narrow<uint32_t>(std::min<uint64_t>(ByteCount, NeedHashOffset - m_CurrentChunkSize)); - - ByteCount -= SkipBytes; - m_CurrentChunkSize += SkipBytes; - CurrentOffset += SkipBytes; - CursorPtr += SkipBytes; - - m_WindowSize = 0; - - if (ByteCount == 0) - { - return kNoBoundaryFound; - } - } - - // Fill window first - - if (m_WindowSize < kWindowSize) - { - const uint32_t FillBytes = uint32_t(std::min<size_t>(ByteCount, kWindowSize - m_WindowSize)); - - memcpy(&m_Window[m_WindowSize], CursorPtr, FillBytes); - - CursorPtr += FillBytes; - - m_WindowSize += FillBytes; - m_CurrentChunkSize += FillBytes; - - CurrentOffset += FillBytes; - ByteCount -= FillBytes; - - if (m_WindowSize < kWindowSize) - { - return kNoBoundaryFound; - } - - // We have a full window, initialize hash - - uint32_t CurrentHash = 0; - - for (int i = 1; i < kWindowSize; ++i) - { - CurrentHash ^= detail::Rotate32(detail::BuzhashTable[m_Window[i - 1]], kWindowSize - i); - } - - m_CurrentHash = CurrentHash ^ detail::BuzhashTable[m_Window[kWindowSize - 1]]; - } - - // Scan for boundaries (i.e points where the hash matches the value determined by - // the discriminator) - - uint32_t CurrentHash = m_CurrentHash; - uint32_t CurrentChunkSize = m_CurrentChunkSize; - - size_t Index = CurrentChunkSize % kWindowSize; - - if (m_Threshold && m_UseThreshold) - { - // This is roughly 4x faster than the general modulo approach on my - // TR 3990X (~940MB/sec) and doesn't require any special parameters to - // achieve max performance - - while (ByteCount) - { - const uint8_t NewByte = *CursorPtr; - const uint8_t OldByte = m_Window[Index]; - - CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^ - detail::BuzhashTable[NewByte]; - - CurrentChunkSize++; - CurrentOffset++; - - if (CurrentChunkSize >= m_ChunkSizeMin) - { - bool FoundBoundary; - - if (CurrentChunkSize >= m_ChunkSizeMax) - { - FoundBoundary = true; - } - else - { - FoundBoundary = CurrentHash <= m_Threshold; - } - - if (FoundBoundary) - { - // Boundary found! - InternalReset(); - - return CurrentOffset; - } - } - - m_Window[Index++] = *CursorPtr; - - if (Index == kWindowSize) - { - Index = 0; - } - - ++CursorPtr; - --ByteCount; - } - } - else if ((m_Discriminator & (m_Discriminator - 1)) == 0) - { - // This is quite a bit faster than the generic modulo path, but - // requires a very specific average chunk size to be used. If you - // pass in an even power-of-two divided by 0.75 as the average - // chunk size you'll hit this path - - const uint32_t Mask = m_Discriminator - 1; - - while (ByteCount) - { - const uint8_t NewByte = *CursorPtr; - const uint8_t OldByte = m_Window[Index]; - - CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^ - detail::BuzhashTable[NewByte]; - - CurrentChunkSize++; - CurrentOffset++; - - if (CurrentChunkSize >= m_ChunkSizeMin) - { - bool FoundBoundary; - - if (CurrentChunkSize >= m_ChunkSizeMax) - { - FoundBoundary = true; - } - else - { - FoundBoundary = (CurrentHash & Mask) == Mask; - } - - if (FoundBoundary) - { - // Boundary found! - InternalReset(); - - return CurrentOffset; - } - } - - m_Window[Index++] = *CursorPtr; - - if (Index == kWindowSize) - { - Index = 0; - } - - ++CursorPtr; - --ByteCount; - } - } - else - { - // This is the slowest path, which caps out around 250MB/sec for large sizes - // on my TR3900X - - while (ByteCount) - { - const uint8_t NewByte = *CursorPtr; - const uint8_t OldByte = m_Window[Index]; - - CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^ - detail::BuzhashTable[NewByte]; - - CurrentChunkSize++; - CurrentOffset++; - - if (CurrentChunkSize >= m_ChunkSizeMin) - { - bool FoundBoundary; - - if (CurrentChunkSize >= m_ChunkSizeMax) - { - FoundBoundary = true; - } - else - { - FoundBoundary = (CurrentHash % m_Discriminator) == (m_Discriminator - 1); - } - - if (FoundBoundary) - { - // Boundary found! - InternalReset(); - - return CurrentOffset; - } - } - - m_Window[Index++] = *CursorPtr; - - if (Index == kWindowSize) - { - Index = 0; - } - - ++CursorPtr; - --ByteCount; - } - } - - m_CurrentChunkSize = CurrentChunkSize; - m_CurrentHash = CurrentHash; - - return kNoBoundaryFound; -} - -} // namespace zen diff --git a/src/zenstore/chunking.h b/src/zenstore/chunking.h deleted file mode 100644 index 09c56454f..000000000 --- a/src/zenstore/chunking.h +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#pragma once -#include <zencore/zencore.h> - -namespace zen { - -/** Content-defined chunking helper - */ -class ZenChunkHelper -{ -public: - void SetChunkSize(size_t MinSize, size_t MaxSize, size_t AvgSize); - size_t ScanChunk(const void* DataBytes, size_t ByteCount); - void Reset(); - - // This controls which chunking approach is used - threshold or - // modulo based. Threshold is faster and generates similarly sized - // chunks - void SetUseThreshold(bool NewState) { m_UseThreshold = NewState; } - - inline size_t ChunkSizeMin() const { return m_ChunkSizeMin; } - inline size_t ChunkSizeMax() const { return m_ChunkSizeMax; } - inline size_t ChunkSizeAvg() const { return m_ChunkSizeAvg; } - inline uint64_t BytesScanned() const { return m_BytesScanned; } - - static constexpr size_t kNoBoundaryFound = size_t(~0ull); - -private: - size_t m_ChunkSizeMin = 0; - size_t m_ChunkSizeMax = 0; - size_t m_ChunkSizeAvg = 0; - - uint32_t m_Discriminator = 0; // Computed in SetChunkSize() - uint32_t m_Threshold = 0; // Computed in SetChunkSize() - - bool m_UseThreshold = true; - - static constexpr size_t kChunkSizeLimitMax = 64 * 1024 * 1024; - static constexpr size_t kChunkSizeLimitMin = 1024; - static constexpr size_t kDefaultAverageChunkSize = 64 * 1024; - - static constexpr int kWindowSize = 48; - uint8_t m_Window[kWindowSize]; - uint32_t m_WindowSize = 0; - - uint32_t m_CurrentHash = 0; - uint32_t m_CurrentChunkSize = 0; - - uint64_t m_BytesScanned = 0; - - size_t InternalScanChunk(const void* DataBytes, size_t ByteCount); - void InternalReset(); -}; - -} // namespace zen diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp index 14123528c..34db51aa9 100644 --- a/src/zenstore/filecas.cpp +++ b/src/zenstore/filecas.cpp @@ -185,7 +185,7 @@ FileCasStrategy::Initialize(const std::filesystem::path& RootDirectory, bool IsN // in this folder as well struct Visitor : public FileSystemTraversal::TreeVisitor { - virtual void VisitFile(const std::filesystem::path&, const path_view&, uint64_t, uint32_t) override + virtual void VisitFile(const std::filesystem::path&, const path_view&, uint64_t, uint32_t, uint64_t) override { // We don't care about files } @@ -1174,7 +1174,7 @@ FileCasStrategy::ScanFolderForCasFiles(const std::filesystem::path& RootDir) struct Visitor : public FileSystemTraversal::TreeVisitor { Visitor(const std::filesystem::path& RootDir, std::vector<FileCasIndexEntry>& Entries) : RootDirectory(RootDir), Entries(Entries) {} - virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t) override + virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t, uint64_t) override { std::filesystem::path RelPath = std::filesystem::relative(Parent, RootDirectory); diff --git a/src/zenstore/include/zenstore/cache/cachedisklayer.h b/src/zenstore/include/zenstore/cache/cachedisklayer.h index b0b4f22cb..05400c784 100644 --- a/src/zenstore/include/zenstore/cache/cachedisklayer.h +++ b/src/zenstore/include/zenstore/cache/cachedisklayer.h @@ -12,8 +12,9 @@ ZEN_THIRD_PARTY_INCLUDES_START #include <tsl/robin_map.h> ZEN_THIRD_PARTY_INCLUDES_END +#include <EASTL/string.h> +#include <EASTL/unordered_map.h> #include <filesystem> -#include <unordered_map> namespace zen { @@ -169,7 +170,7 @@ public: ~ZenCacheDiskLayer(); struct GetBatchHandle; - GetBatchHandle* BeginGetBatch(std::vector<ZenCacheValue>& OutResult); + GetBatchHandle* BeginGetBatch(ZenCacheValueVec_t& OutResult); void EndGetBatch(GetBatchHandle* Batch) noexcept; bool Get(std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue); void Get(std::string_view Bucket, const IoHash& HashKey, GetBatchHandle& BatchHandle); @@ -216,13 +217,16 @@ public: */ struct CacheBucket : public GcReferencer { - CacheBucket(GcManager& Gc, std::atomic_uint64_t& OuterCacheMemoryUsage, std::string BucketName, const BucketConfiguration& Config); + CacheBucket(GcManager& Gc, + std::atomic_uint64_t& OuterCacheMemoryUsage, + std::string_view BucketName, + const BucketConfiguration& Config); ~CacheBucket(); bool OpenOrCreate(std::filesystem::path BucketDir, bool AllowCreate = true); struct GetBatchHandle; - GetBatchHandle* BeginGetBatch(std::vector<ZenCacheValue>& OutResult); + GetBatchHandle* BeginGetBatch(ZenCacheValueVec_t& OutResult); void EndGetBatch(GetBatchHandle* Batch) noexcept; bool Get(const IoHash& HashKey, ZenCacheValue& OutValue); void Get(const IoHash& HashKey, GetBatchHandle& BatchHandle); @@ -486,18 +490,20 @@ private: bool StartAsyncMemCacheTrim(); void MemCacheTrim(); - GcManager& m_Gc; - JobQueue& m_JobQueue; - std::filesystem::path m_RootDir; - Configuration m_Configuration; - std::atomic_uint64_t m_TotalMemCachedSize{}; - std::atomic_bool m_IsMemCacheTrimming = false; - std::atomic<GcClock::Tick> m_NextAllowedTrimTick; - mutable RwLock m_Lock; - std::unordered_map<std::string, std::unique_ptr<CacheBucket>> m_Buckets; - std::vector<std::unique_ptr<CacheBucket>> m_DroppedBuckets; - uint32_t m_UpdateCaptureRefCounter = 0; - std::unique_ptr<std::vector<std::string>> m_CapturedBuckets; + typedef eastl::unordered_map<std::string, std::unique_ptr<CacheBucket>, std::hash<std::string>, std::equal_to<std::string>> BucketMap_t; + + GcManager& m_Gc; + JobQueue& m_JobQueue; + std::filesystem::path m_RootDir; + Configuration m_Configuration; + std::atomic_uint64_t m_TotalMemCachedSize{}; + std::atomic_bool m_IsMemCacheTrimming = false; + std::atomic<GcClock::Tick> m_NextAllowedTrimTick; + mutable RwLock m_Lock; + BucketMap_t m_Buckets; + std::vector<std::unique_ptr<CacheBucket>> m_DroppedBuckets; + uint32_t m_UpdateCaptureRefCounter = 0; + std::unique_ptr<std::vector<std::string>> m_CapturedBuckets; ZenCacheDiskLayer(const ZenCacheDiskLayer&) = delete; ZenCacheDiskLayer& operator=(const ZenCacheDiskLayer&) = delete; diff --git a/src/zenstore/include/zenstore/cache/cacheshared.h b/src/zenstore/include/zenstore/cache/cacheshared.h index 9b45c7b21..521c78bb1 100644 --- a/src/zenstore/include/zenstore/cache/cacheshared.h +++ b/src/zenstore/include/zenstore/cache/cacheshared.h @@ -6,6 +6,8 @@ #include <zencore/iohash.h> #include <zenstore/gc.h> +#include <EASTL/fixed_vector.h> + #include <gsl/gsl-lite.hpp> #include <unordered_map> @@ -32,6 +34,8 @@ struct ZenCacheValue IoHash RawHash = IoHash::Zero; }; +typedef eastl::fixed_vector<ZenCacheValue, 16> ZenCacheValueVec_t; + struct CacheValueDetails { struct ValueDetails diff --git a/src/zenstore/include/zenstore/cache/structuredcachestore.h b/src/zenstore/include/zenstore/cache/structuredcachestore.h index 82fec9b0e..5e056cf2d 100644 --- a/src/zenstore/include/zenstore/cache/structuredcachestore.h +++ b/src/zenstore/include/zenstore/cache/structuredcachestore.h @@ -86,7 +86,7 @@ public: void EndPutBatch(PutBatchHandle* Batch) noexcept; struct GetBatchHandle; - GetBatchHandle* BeginGetBatch(std::vector<ZenCacheValue>& OutResults); + GetBatchHandle* BeginGetBatch(ZenCacheValueVec_t& OutResults); void EndGetBatch(GetBatchHandle* Batch) noexcept; bool Get(std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue); @@ -220,14 +220,14 @@ public: class GetBatch { public: - GetBatch(ZenCacheStore& CacheStore, std::string_view Namespace, std::vector<ZenCacheValue>& OutResult); + GetBatch(ZenCacheStore& CacheStore, std::string_view Namespace, ZenCacheValueVec_t& OutResult); ~GetBatch(); private: ZenCacheStore& m_CacheStore; ZenCacheNamespace* m_Store = nullptr; ZenCacheNamespace::GetBatchHandle* m_NamespaceBatchHandle = nullptr; - std::vector<ZenCacheValue>& Results; + ZenCacheValueVec_t& Results; friend class ZenCacheStore; }; diff --git a/src/zenstore/include/zenstore/chunkedfile.h b/src/zenstore/include/zenstore/chunkedfile.h deleted file mode 100644 index c6330bdbd..000000000 --- a/src/zenstore/include/zenstore/chunkedfile.h +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#pragma once - -#include <zencore/iobuffer.h> -#include <zencore/iohash.h> -#include <zencore/zencore.h> - -#include <functional> -#include <vector> - -namespace zen { - -class BasicFile; - -struct ChunkedInfo -{ - uint64_t RawSize = 0; - IoHash RawHash; - std::vector<uint32_t> ChunkSequence; - std::vector<IoHash> ChunkHashes; -}; - -struct ChunkSource -{ - uint64_t Offset; // 8 - uint32_t Size; // 4 -}; - -struct ChunkedInfoWithSource -{ - ChunkedInfo Info; - std::vector<ChunkSource> ChunkSources; -}; - -struct ChunkedParams -{ - bool UseThreshold = true; - size_t MinSize = (2u * 1024u) - 128u; - size_t MaxSize = (16u * 1024u); - size_t AvgSize = (3u * 1024u); -}; - -static const ChunkedParams UShaderByteCodeParams = {.UseThreshold = true, .MinSize = 17280, .MaxSize = 139264, .AvgSize = 36340}; - -ChunkedInfoWithSource ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Params = {}); -void Reconstruct(const ChunkedInfo& Info, - const std::filesystem::path& TargetPath, - std::function<IoBuffer(const IoHash& ChunkHash)> GetChunk); -IoBuffer SerializeChunkedInfo(const ChunkedInfo& Info); -ChunkedInfo DeserializeChunkedInfo(IoBuffer& Buffer); - -void chunkedfile_forcelink(); -} // namespace zen diff --git a/src/zenstore/xmake.lua b/src/zenstore/xmake.lua index f0bd64d2e..031a66829 100644 --- a/src/zenstore/xmake.lua +++ b/src/zenstore/xmake.lua @@ -8,3 +8,4 @@ target('zenstore') add_includedirs("include", {public=true}) add_deps("zencore", "zenutil") add_packages("vcpkg::robin-map") + add_packages("vcpkg::eastl", {public=true}); |