diff options
| author | Dan Engelbrecht <[email protected]> | 2025-06-03 16:21:01 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2025-06-03 16:21:01 +0200 |
| commit | a0b10b046095d57ffbdb46c83084601a832f4562 (patch) | |
| tree | fe015645ea07d83c2784e3e28d0e976a37054859 /src/zenutil | |
| parent | minor: fix unused variable warning on some compilers (diff) | |
| download | zen-a0b10b046095d57ffbdb46c83084601a832f4562.tar.xz zen-a0b10b046095d57ffbdb46c83084601a832f4562.zip | |
fixed size chunking for encrypted files (#410)
- Improvement: Use fixed size block chunking for know encrypted/compressed file types
- Improvement: Skip trying to compress chunks that are sourced from files that are known to be encrypted/compressed
- Improvement: Add global open file cache for written files increasing throughput during download by reducing overhead of open/close of file by 80%
Diffstat (limited to 'src/zenutil')
| -rw-r--r-- | src/zenutil/bufferedwritefilecache.cpp | 177 | ||||
| -rw-r--r-- | src/zenutil/chunkedcontent.cpp | 4 | ||||
| -rw-r--r-- | src/zenutil/chunkingcontroller.cpp | 289 | ||||
| -rw-r--r-- | src/zenutil/filebuildstorage.cpp | 19 | ||||
| -rw-r--r-- | src/zenutil/include/zenutil/bufferedwritefilecache.h | 106 | ||||
| -rw-r--r-- | src/zenutil/include/zenutil/chunkedcontent.h | 1 | ||||
| -rw-r--r-- | src/zenutil/include/zenutil/chunkingcontroller.h | 45 |
7 files changed, 501 insertions, 140 deletions
diff --git a/src/zenutil/bufferedwritefilecache.cpp b/src/zenutil/bufferedwritefilecache.cpp new file mode 100644 index 000000000..a52850314 --- /dev/null +++ b/src/zenutil/bufferedwritefilecache.cpp @@ -0,0 +1,177 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenutil/bufferedwritefilecache.h> + +#include <zencore/logging.h> +#include <zencore/trace.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <gsl/gsl-lite.hpp> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +BufferedWriteFileCache::BufferedWriteFileCache() : m_CacheHitCount(0), m_CacheMissCount(0), m_OpenHandleCount(0), m_DroppedHandleCount(0) +{ +} + +BufferedWriteFileCache::~BufferedWriteFileCache() +{ + ZEN_TRACE_CPU("~BufferedWriteFileCache()"); + + try + { + for (TOpenHandles& OpenHandles : m_OpenFiles) + { + while (BasicFile* File = OpenHandles.Pop()) + { + std::unique_ptr<BasicFile> FileToClose(File); + m_OpenHandleCount--; + } + } + m_OpenFiles.clear(); + m_ChunkWriters.clear(); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("~BufferedWriteFileCache() threw exeption: {}", Ex.what()); + } +} + +std::unique_ptr<BasicFile> +BufferedWriteFileCache::Get(uint32_t FileIndex) +{ + ZEN_TRACE_CPU("BufferedWriteFileCache::Get"); + + RwLock::ExclusiveLockScope _(m_WriterLock); + if (auto It = m_ChunkWriters.find(FileIndex); It != m_ChunkWriters.end()) + { + const uint32_t HandleIndex = It->second; + TOpenHandles& OpenHandles = m_OpenFiles[HandleIndex]; + if (BasicFile* File = OpenHandles.Pop(); File != nullptr) + { + m_OpenHandleCount--; + m_CacheHitCount++; + return std::unique_ptr<BasicFile>(File); + } + } + m_CacheMissCount++; + return nullptr; +} + +void +BufferedWriteFileCache::Put(uint32_t FileIndex, std::unique_ptr<BasicFile>&& Writer) +{ + ZEN_TRACE_CPU("BufferedWriteFileCache::Put"); + + if (m_OpenHandleCount.load() >= MaxBufferedCount) + { + m_DroppedHandleCount++; + return; + } + RwLock::ExclusiveLockScope _(m_WriterLock); + if (auto It = m_ChunkWriters.find(FileIndex); It != m_ChunkWriters.end()) + { + const uint32_t HandleIndex = It->second; + TOpenHandles& OpenHandles = m_OpenFiles[HandleIndex]; + if (OpenHandles.Push(Writer.get())) + { + Writer.release(); + m_OpenHandleCount++; + } + else + { + m_DroppedHandleCount++; + } + } + else + { + const uint32_t HandleIndex = gsl::narrow<uint32_t>(m_OpenFiles.size()); + m_OpenFiles.push_back(TOpenHandles{}); + m_OpenFiles.back().Push(Writer.release()); + m_ChunkWriters.insert_or_assign(FileIndex, HandleIndex); + m_OpenHandleCount++; + } +} + +void +BufferedWriteFileCache::Close(std::span<uint32_t> FileIndexes) +{ + ZEN_TRACE_CPU("BufferedWriteFileCache::Close"); + + std::vector<std::unique_ptr<BasicFile>> FilesToClose; + FilesToClose.reserve(FileIndexes.size()); + { + RwLock::ExclusiveLockScope _(m_WriterLock); + for (uint32_t FileIndex : FileIndexes) + { + if (auto It = m_ChunkWriters.find(FileIndex); It != m_ChunkWriters.end()) + { + const uint32_t HandleIndex = It->second; + TOpenHandles& OpenHandles = m_OpenFiles[HandleIndex]; + while (BasicFile* File = OpenHandles.Pop()) + { + FilesToClose.emplace_back(std::unique_ptr<BasicFile>(File)); + m_OpenHandleCount--; + } + m_ChunkWriters.erase(It); + } + } + } + FilesToClose.clear(); +} + +BufferedWriteFileCache::Local::Local(BufferedWriteFileCache& Cache) : m_Cache(Cache) +{ +} + +BufferedWriteFileCache::Local::Writer* +BufferedWriteFileCache::Local::GetWriter(uint32_t FileIndex) +{ + if (auto It = m_FileIndexToWriterIndex.find(FileIndex); It != m_FileIndexToWriterIndex.end()) + { + return m_ChunkWriters[It->second].get(); + } + std::unique_ptr<BasicFile> File = m_Cache.Get(FileIndex); + if (File) + { + const uint32_t WriterIndex = gsl::narrow<uint32_t>(m_ChunkWriters.size()); + m_FileIndexToWriterIndex.insert_or_assign(FileIndex, WriterIndex); + m_ChunkWriters.emplace_back(std::make_unique<Writer>(Writer{.File = std::move(File)})); + return m_ChunkWriters.back().get(); + } + return nullptr; +} + +BufferedWriteFileCache::Local::Writer* +BufferedWriteFileCache::Local::PutWriter(uint32_t FileIndex, std::unique_ptr<Writer> Writer) +{ + ZEN_ASSERT(!m_FileIndexToWriterIndex.contains(FileIndex)); + const uint32_t WriterIndex = gsl::narrow<uint32_t>(m_ChunkWriters.size()); + m_FileIndexToWriterIndex.insert_or_assign(FileIndex, WriterIndex); + m_ChunkWriters.emplace_back(std::move(Writer)); + return m_ChunkWriters.back().get(); +} + +BufferedWriteFileCache::Local::~Local() +{ + ZEN_TRACE_CPU("BufferedWriteFileCache::~Local()"); + try + { + for (auto& It : m_FileIndexToWriterIndex) + { + const uint32_t FileIndex = It.first; + const uint32_t WriterIndex = It.second; + m_ChunkWriters[WriterIndex]->Writer.reset(); + std::unique_ptr<BasicFile> File; + File.swap(m_ChunkWriters[WriterIndex]->File); + m_Cache.Put(FileIndex, std::move(File)); + } + } + catch (const std::exception& Ex) + { + ZEN_ERROR("BufferedWriteFileCache::~Local() threw exeption: {}", Ex.what()); + } +} + +} // namespace zen diff --git a/src/zenutil/chunkedcontent.cpp b/src/zenutil/chunkedcontent.cpp index ae129324e..4bec4901a 100644 --- a/src/zenutil/chunkedcontent.cpp +++ b/src/zenutil/chunkedcontent.cpp @@ -891,8 +891,12 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) } Result.SequenceIndexFirstPathIndex.resize(Content.ChunkedContent.SequenceRawHashes.size(), (uint32_t)-1); + Result.PathExtensionHash.resize(Content.Paths.size()); for (uint32_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++) { + std::string LowercaseExtension = Content.Paths[PathIndex].extension().string(); + std::transform(LowercaseExtension.begin(), LowercaseExtension.end(), LowercaseExtension.begin(), ::tolower); + Result.PathExtensionHash[PathIndex] = HashStringDjb2(LowercaseExtension); if (Content.RawSizes[PathIndex] > 0) { const IoHash& RawHash = Content.RawHashes[PathIndex]; diff --git a/src/zenutil/chunkingcontroller.cpp b/src/zenutil/chunkingcontroller.cpp index a5ebce193..6fb4182c0 100644 --- a/src/zenutil/chunkingcontroller.cpp +++ b/src/zenutil/chunkingcontroller.cpp @@ -4,6 +4,7 @@ #include <zencore/basicfile.h> #include <zencore/compactbinarybuilder.h> +#include <zencore/filesystem.h> #include <zencore/trace.h> ZEN_THIRD_PARTY_INCLUDES_START @@ -35,32 +36,54 @@ namespace { return ChunkedParams{.UseThreshold = UseThreshold, .MinSize = MinSize, .MaxSize = MaxSize, .AvgSize = AvgSize}; } -} // namespace + void WriteChunkParams(CbObjectWriter& Writer, const ChunkedParams& Params) + { + Writer.BeginObject("ChunkingParams"sv); + { + Writer.AddBool("UseThreshold"sv, Params.UseThreshold); -class BasicChunkingController : public ChunkingController -{ -public: - BasicChunkingController(std::span<const std::string_view> ExcludeExtensions, - bool ExcludeElfFiles, - bool ExcludeMachOFiles, - uint64_t ChunkFileSizeLimit, - const ChunkedParams& ChunkingParams) - : m_ChunkExcludeExtensions(ExcludeExtensions.begin(), ExcludeExtensions.end()) - , m_ExcludeElfFiles(ExcludeElfFiles) - , m_ExcludeMachOFiles(ExcludeMachOFiles) - , m_ChunkFileSizeLimit(ChunkFileSizeLimit) - , m_ChunkingParams(ChunkingParams) + Writer.AddInteger("MinSize"sv, (uint64_t)Params.MinSize); + Writer.AddInteger("MaxSize"sv, (uint64_t)Params.MaxSize); + Writer.AddInteger("AvgSize"sv, (uint64_t)Params.AvgSize); + } + Writer.EndObject(); // ChunkingParams + } + + bool IsElfFile(BasicFile& Buffer) { + if (Buffer.FileSize() > 4) + { + uint32_t ElfCheck = 0; + Buffer.Read(&ElfCheck, 4, 0); + if (ElfCheck == 0x464c457f) + { + return true; + } + } + return false; } - BasicChunkingController(CbObjectView Parameters) - : m_ChunkExcludeExtensions(ReadStringArray(Parameters["ChunkExcludeExtensions"sv].AsArrayView())) - , m_ExcludeElfFiles(Parameters["ExcludeElfFiles"sv].AsBool(DefaultChunkingExcludeElfFiles)) - , m_ExcludeMachOFiles(Parameters["ExcludeMachOFiles"sv].AsBool(DefaultChunkingExcludeMachOFiles)) - , m_ChunkFileSizeLimit(Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit)) - , m_ChunkingParams(ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView())) + bool IsMachOFile(BasicFile& Buffer) { + if (Buffer.FileSize() > 4) + { + uint32_t MachOCheck = 0; + Buffer.Read(&MachOCheck, 4, 0); + if ((MachOCheck == 0xfeedface) || (MachOCheck == 0xcefaedfe)) + { + return true; + } + } + return false; } +} // namespace + +class BasicChunkingController : public ChunkingController +{ +public: + BasicChunkingController(const BasicChunkingControllerSettings& Settings) : m_Settings(Settings) {} + + BasicChunkingController(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {} virtual bool ProcessFile(const std::filesystem::path& InputPath, uint64_t RawSize, @@ -70,35 +93,25 @@ public: { ZEN_TRACE_CPU("BasicChunkingController::ProcessFile"); const bool ExcludeFromChunking = - std::find(m_ChunkExcludeExtensions.begin(), m_ChunkExcludeExtensions.end(), InputPath.extension()) != - m_ChunkExcludeExtensions.end(); + std::find(m_Settings.ExcludeExtensions.begin(), m_Settings.ExcludeExtensions.end(), InputPath.extension()) != + m_Settings.ExcludeExtensions.end(); - if (ExcludeFromChunking || (RawSize < m_ChunkFileSizeLimit)) + if (ExcludeFromChunking || (RawSize < m_Settings.ChunkFileSizeLimit)) { return false; } BasicFile Buffer(InputPath, BasicFile::Mode::kRead); - if (m_ExcludeElfFiles && Buffer.FileSize() > 4) + if (m_Settings.ExcludeElfFiles && IsElfFile(Buffer)) { - uint32_t ElfCheck = 0; - Buffer.Read(&ElfCheck, 4, 0); - if (ElfCheck == 0x464c457f) - { - return false; - } + return false; } - if (m_ExcludeMachOFiles && Buffer.FileSize() > 4) + if (m_Settings.ExcludeMachOFiles && IsMachOFile(Buffer)) { - uint32_t MachOCheck = 0; - Buffer.Read(&MachOCheck, 4, 0); - if ((MachOCheck == 0xfeedface) || (MachOCheck == 0xcefaedfe)) - { - return false; - } + return false; } - OutChunked = ChunkData(Buffer, 0, RawSize, m_ChunkingParams, &BytesProcessed, &AbortFlag); + OutChunked = ChunkData(Buffer, 0, RawSize, m_Settings.ChunkingParams, &BytesProcessed, &AbortFlag); return true; } @@ -109,59 +122,43 @@ public: CbObjectWriter Writer; Writer.BeginArray("ChunkExcludeExtensions"sv); { - for (const std::string& Extension : m_ChunkExcludeExtensions) + for (const std::string& Extension : m_Settings.ExcludeExtensions) { Writer.AddString(Extension); } } Writer.EndArray(); // ChunkExcludeExtensions - Writer.AddBool("ExcludeElfFiles"sv, m_ExcludeElfFiles); - Writer.AddBool("ExcludeMachOFiles"sv, m_ExcludeMachOFiles); + Writer.AddBool("ExcludeElfFiles"sv, m_Settings.ExcludeElfFiles); + Writer.AddBool("ExcludeMachOFiles"sv, m_Settings.ExcludeMachOFiles); + Writer.AddInteger("ChunkFileSizeLimit"sv, m_Settings.ChunkFileSizeLimit); - Writer.AddInteger("ChunkFileSizeLimit"sv, m_ChunkFileSizeLimit); - Writer.BeginObject("ChunkingParams"sv); - { - Writer.AddBool("UseThreshold"sv, m_ChunkingParams.UseThreshold); + WriteChunkParams(Writer, m_Settings.ChunkingParams); - Writer.AddInteger("MinSize"sv, (uint64_t)m_ChunkingParams.MinSize); - Writer.AddInteger("MaxSize"sv, (uint64_t)m_ChunkingParams.MaxSize); - Writer.AddInteger("AvgSize"sv, (uint64_t)m_ChunkingParams.AvgSize); - } - Writer.EndObject(); // ChunkingParams return Writer.Save(); } static constexpr std::string_view Name = "BasicChunkingController"sv; -protected: - const std::vector<std::string> m_ChunkExcludeExtensions; - const bool m_ExcludeElfFiles = false; - const bool m_ExcludeMachOFiles = false; - const uint64_t m_ChunkFileSizeLimit; - const ChunkedParams m_ChunkingParams; +private: + static BasicChunkingControllerSettings ReadSettings(CbObjectView Parameters) + { + return BasicChunkingControllerSettings{ + .ExcludeExtensions = ReadStringArray(Parameters["ChunkExcludeExtensions"sv].AsArrayView()), + .ExcludeElfFiles = Parameters["ExcludeElfFiles"sv].AsBool(DefaultChunkingExcludeElfFiles), + .ExcludeMachOFiles = Parameters["ExcludeMachOFiles"sv].AsBool(DefaultChunkingExcludeMachOFiles), + .ChunkFileSizeLimit = Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit), + .ChunkingParams = ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView())}; + } + + const BasicChunkingControllerSettings m_Settings; }; class ChunkingControllerWithFixedChunking : public ChunkingController { public: - ChunkingControllerWithFixedChunking(std::span<const std::string_view> FixedChunkingExtensions, - uint64_t ChunkFileSizeLimit, - const ChunkedParams& ChunkingParams, - uint32_t FixedChunkingChunkSize) - : m_FixedChunkingExtensions(FixedChunkingExtensions.begin(), FixedChunkingExtensions.end()) - , m_ChunkFileSizeLimit(ChunkFileSizeLimit) - , m_ChunkingParams(ChunkingParams) - , m_FixedChunkingChunkSize(FixedChunkingChunkSize) - { - } + ChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Settings) : m_Settings(Settings) {} - ChunkingControllerWithFixedChunking(CbObjectView Parameters) - : m_FixedChunkingExtensions(ReadStringArray(Parameters["FixedChunkingExtensions"sv].AsArrayView())) - , m_ChunkFileSizeLimit(Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit)) - , m_ChunkingParams(ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView())) - , m_FixedChunkingChunkSize(Parameters["FixedChunkingChunkSize"sv].AsUInt32(16u * 1024u * 1024u)) - { - } + ChunkingControllerWithFixedChunking(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {} virtual bool ProcessFile(const std::filesystem::path& InputPath, uint64_t RawSize, @@ -170,33 +167,71 @@ public: std::atomic<bool>& AbortFlag) const override { ZEN_TRACE_CPU("ChunkingControllerWithFixedChunking::ProcessFile"); - if (RawSize < m_ChunkFileSizeLimit) + const bool ExcludeFromChunking = + std::find(m_Settings.ExcludeExtensions.begin(), m_Settings.ExcludeExtensions.end(), InputPath.extension()) != + m_Settings.ExcludeExtensions.end(); + + if (ExcludeFromChunking || (RawSize < m_Settings.ChunkFileSizeLimit)) { return false; } - const bool FixedChunking = std::find(m_FixedChunkingExtensions.begin(), m_FixedChunkingExtensions.end(), InputPath.extension()) != - m_FixedChunkingExtensions.end(); - if (FixedChunking) + const bool FixedChunkingExtension = + std::find(m_Settings.FixedChunkingExtensions.begin(), m_Settings.FixedChunkingExtensions.end(), InputPath.extension()) != + m_Settings.FixedChunkingExtensions.end(); + + if (FixedChunkingExtension) { + if (RawSize < m_Settings.MinSizeForFixedChunking) + { + return false; + } ZEN_TRACE_CPU("FixedChunking"); - IoHashStream FullHash; - IoBuffer Source = IoBufferBuilder::MakeFromFile(InputPath); + IoHashStream FullHasher; + BasicFile Source(InputPath, BasicFile::Mode::kRead); uint64_t Offset = 0; tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex; - ChunkHashToChunkIndex.reserve(1 + (RawSize / m_FixedChunkingChunkSize)); + const uint64_t ExpectedChunkCount = 1 + (RawSize / m_Settings.FixedChunkingChunkSize); + ChunkHashToChunkIndex.reserve(ExpectedChunkCount); + OutChunked.Info.ChunkHashes.reserve(ExpectedChunkCount); + OutChunked.Info.ChunkSequence.reserve(ExpectedChunkCount); + OutChunked.ChunkSources.reserve(ExpectedChunkCount); + + static const uint64_t BufferingSize = 256u * 1024u; + + IoHashStream ChunkHasher; + while (Offset < RawSize) { if (AbortFlag) { return false; } - uint64_t ChunkSize = std::min<uint64_t>(RawSize - Offset, m_FixedChunkingChunkSize); - IoBuffer Chunk(Source, Offset, ChunkSize); - MemoryView ChunkData = Chunk.GetView(); - FullHash.Append(ChunkData); - IoHash ChunkHash = IoHash::HashBuffer(ChunkData); + ChunkHasher.Reset(); + + uint64_t ChunkSize = std::min<uint64_t>(RawSize - Offset, m_Settings.FixedChunkingChunkSize); + if (ChunkSize >= (BufferingSize + BufferingSize / 2)) + { + ScanFile(Source.Handle(), + Offset, + ChunkSize, + BufferingSize, + [&FullHasher, &ChunkHasher, &BytesProcessed](const void* Data, size_t Size) { + FullHasher.Append(Data, Size); + ChunkHasher.Append(Data, Size); + BytesProcessed.fetch_add(Size); + }); + } + else + { + IoBuffer ChunkData = Source.ReadRange(Offset, ChunkSize); + FullHasher.Append(ChunkData); + ChunkHasher.Append(ChunkData); + BytesProcessed.fetch_add(ChunkSize); + } + + const IoHash ChunkHash = ChunkHasher.GetHash(); if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end()) { OutChunked.Info.ChunkSequence.push_back(It->second); @@ -209,16 +244,24 @@ public: OutChunked.ChunkSources.push_back({.Offset = Offset, .Size = gsl::narrow<uint32_t>(ChunkSize)}); } Offset += ChunkSize; - BytesProcessed.fetch_add(ChunkSize); } OutChunked.Info.RawSize = RawSize; - OutChunked.Info.RawHash = FullHash.GetHash(); + OutChunked.Info.RawHash = FullHasher.GetHash(); return true; } else { BasicFile Buffer(InputPath, BasicFile::Mode::kRead); - OutChunked = ChunkData(Buffer, 0, RawSize, m_ChunkingParams, &BytesProcessed); + if (m_Settings.ExcludeElfFiles && IsElfFile(Buffer)) + { + return false; + } + if (m_Settings.ExcludeMachOFiles && IsMachOFile(Buffer)) + { + return false; + } + + OutChunked = ChunkData(Buffer, 0, RawSize, m_Settings.ChunkingParams, &BytesProcessed, &AbortFlag); return true; } } @@ -230,47 +273,57 @@ public: CbObjectWriter Writer; Writer.BeginArray("FixedChunkingExtensions"); { - for (const std::string& Extension : m_FixedChunkingExtensions) + for (const std::string& Extension : m_Settings.FixedChunkingExtensions) { Writer.AddString(Extension); } } Writer.EndArray(); // ChunkExcludeExtensions - Writer.AddInteger("ChunkFileSizeLimit"sv, m_ChunkFileSizeLimit); - Writer.BeginObject("ChunkingParams"sv); - { - Writer.AddBool("UseThreshold"sv, m_ChunkingParams.UseThreshold); - Writer.AddInteger("MinSize"sv, (uint64_t)m_ChunkingParams.MinSize); - Writer.AddInteger("MaxSize"sv, (uint64_t)m_ChunkingParams.MaxSize); - Writer.AddInteger("AvgSize"sv, (uint64_t)m_ChunkingParams.AvgSize); + Writer.BeginArray("ChunkExcludeExtensions"sv); + { + for (const std::string& Extension : m_Settings.ExcludeExtensions) + { + Writer.AddString(Extension); + } } - Writer.EndObject(); // ChunkingParams - Writer.AddInteger("FixedChunkingChunkSize"sv, m_FixedChunkingChunkSize); + Writer.EndArray(); // ChunkExcludeExtensions + + Writer.AddBool("ExcludeElfFiles"sv, m_Settings.ExcludeElfFiles); + Writer.AddBool("ExcludeMachOFiles"sv, m_Settings.ExcludeMachOFiles); + + Writer.AddInteger("ChunkFileSizeLimit"sv, m_Settings.ChunkFileSizeLimit); + + WriteChunkParams(Writer, m_Settings.ChunkingParams); + + Writer.AddInteger("FixedChunkingChunkSize"sv, m_Settings.FixedChunkingChunkSize); + Writer.AddInteger("MinSizeForFixedChunking"sv, m_Settings.MinSizeForFixedChunking); return Writer.Save(); } static constexpr std::string_view Name = "ChunkingControllerWithFixedChunking"sv; -protected: - const std::vector<std::string> m_FixedChunkingExtensions; - const uint64_t m_ChunkFileSizeLimit; - const ChunkedParams m_ChunkingParams; - const uint32_t m_FixedChunkingChunkSize; +private: + static ChunkingControllerWithFixedChunkingSettings ReadSettings(CbObjectView Parameters) + { + return ChunkingControllerWithFixedChunkingSettings{ + .FixedChunkingExtensions = ReadStringArray(Parameters["FixedChunkingExtensions"sv].AsArrayView()), + .ExcludeExtensions = ReadStringArray(Parameters["ChunkExcludeExtensions"sv].AsArrayView()), + .ExcludeElfFiles = Parameters["ExcludeElfFiles"sv].AsBool(DefaultChunkingExcludeElfFiles), + .ExcludeMachOFiles = Parameters["ExcludeMachOFiles"sv].AsBool(DefaultChunkingExcludeMachOFiles), + .ChunkFileSizeLimit = Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit), + .ChunkingParams = ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView()), + .FixedChunkingChunkSize = Parameters["FixedChunkingChunkSize"sv].AsUInt64(DefaultFixedChunkingChunkSize), + .MinSizeForFixedChunking = Parameters["MinSizeForFixedChunking"sv].AsUInt64(DefaultFixedChunkingChunkSize)}; + } + + const ChunkingControllerWithFixedChunkingSettings m_Settings; }; std::unique_ptr<ChunkingController> -CreateBasicChunkingController(std::span<const std::string_view> ExcludeExtensions, - bool ExcludeElfFiles, - bool ExcludeMachOFiles, - uint64_t ChunkFileSizeLimit, - const ChunkedParams& ChunkingParams) +CreateBasicChunkingController(const BasicChunkingControllerSettings& Settings) { - return std::make_unique<BasicChunkingController>(ExcludeExtensions, - ExcludeElfFiles, - ExcludeMachOFiles, - ChunkFileSizeLimit, - ChunkingParams); + return std::make_unique<BasicChunkingController>(Settings); } std::unique_ptr<ChunkingController> CreateBasicChunkingController(CbObjectView Parameters) @@ -279,15 +332,9 @@ CreateBasicChunkingController(CbObjectView Parameters) } std::unique_ptr<ChunkingController> -CreateChunkingControllerWithFixedChunking(std::span<const std::string_view> FixedChunkingExtensions, - uint64_t ChunkFileSizeLimit, - const ChunkedParams& ChunkingParams, - uint32_t FixedChunkingChunkSize) +CreateChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Setting) { - return std::make_unique<ChunkingControllerWithFixedChunking>(FixedChunkingExtensions, - ChunkFileSizeLimit, - ChunkingParams, - FixedChunkingChunkSize); + return std::make_unique<ChunkingControllerWithFixedChunking>(Setting); } std::unique_ptr<ChunkingController> CreateChunkingControllerWithFixedChunking(CbObjectView Parameters) diff --git a/src/zenutil/filebuildstorage.cpp b/src/zenutil/filebuildstorage.cpp index badfb4840..c389d16c5 100644 --- a/src/zenutil/filebuildstorage.cpp +++ b/src/zenutil/filebuildstorage.cpp @@ -678,13 +678,24 @@ protected: { return false; } - CompositeBuffer Decompressed = ValidateBuffer.DecompressToComposite(); - if (!Decompressed) + + IoHashStream Hash; + bool CouldDecompress = ValidateBuffer.DecompressToStream( + 0, + (uint64_t)-1, + [&](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) { + ZEN_UNUSED(SourceOffset, SourceSize, Offset); + for (const SharedBuffer& Segment : RangeBuffer.GetSegments()) + { + Hash.Append(Segment.GetView()); + } + return true; + }); + if (!CouldDecompress) { return false; } - IoHash Hash = IoHash::HashBuffer(Decompressed); - if (Hash != RawHash) + if (Hash.GetHash() != VerifyHash) { return false; } diff --git a/src/zenutil/include/zenutil/bufferedwritefilecache.h b/src/zenutil/include/zenutil/bufferedwritefilecache.h new file mode 100644 index 000000000..68d6c375e --- /dev/null +++ b/src/zenutil/include/zenutil/bufferedwritefilecache.h @@ -0,0 +1,106 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/basicfile.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_map.h> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +class CompositeBuffer; + +class BufferedWriteFileCache +{ +public: + BufferedWriteFileCache(const BufferedWriteFileCache&) = delete; + BufferedWriteFileCache& operator=(const BufferedWriteFileCache&) = delete; + + BufferedWriteFileCache(); + + ~BufferedWriteFileCache(); + + std::unique_ptr<BasicFile> Get(uint32_t FileIndex); + + void Put(uint32_t FileIndex, std::unique_ptr<BasicFile>&& Writer); + + void Close(std::span<uint32_t> FileIndexes); + + class Local + { + public: + struct Writer + { + std::unique_ptr<BasicFile> File; + std::unique_ptr<BasicFileWriter> Writer; + + inline void Write(const CompositeBuffer& Chunk, uint64_t FileOffset) + { + if (Writer) + { + Writer->Write(Chunk, FileOffset); + } + else + { + File->Write(Chunk, FileOffset); + } + } + }; + + Local(const Local&) = delete; + Local& operator=(const Local&) = delete; + + explicit Local(BufferedWriteFileCache& Cache); + ~Local(); + + Writer* GetWriter(uint32_t FileIndex); + Writer* PutWriter(uint32_t FileIndex, std::unique_ptr<Writer> Writer); + + private: + tsl::robin_map<uint32_t, uint32_t> m_FileIndexToWriterIndex; + std::vector<std::unique_ptr<Writer>> m_ChunkWriters; + BufferedWriteFileCache& m_Cache; + }; + +private: + static constexpr size_t MaxHandlesPerPath = 7; + static constexpr size_t MaxBufferedCount = 1024; + struct TOpenHandles + { + BasicFile* Files[MaxHandlesPerPath]; + uint64_t Size = 0; + inline BasicFile* Pop() + { + if (Size > 0) + { + return Files[--Size]; + } + else + { + return nullptr; + } + } + inline bool Push(BasicFile* File) + { + if (Size < MaxHandlesPerPath) + { + Files[Size++] = File; + return true; + } + return false; + } + }; + static_assert(sizeof(TOpenHandles) == 64); + + RwLock m_WriterLock; + tsl::robin_map<uint32_t, uint32_t> m_ChunkWriters; + std::vector<TOpenHandles> m_OpenFiles; + std::atomic<uint32_t> m_CacheHitCount; + std::atomic<uint32_t> m_CacheMissCount; + std::atomic<uint32_t> m_OpenHandleCount; + std::atomic<uint32_t> m_DroppedHandleCount; +}; + +} // namespace zen diff --git a/src/zenutil/include/zenutil/chunkedcontent.h b/src/zenutil/include/zenutil/chunkedcontent.h index d33869be2..03f52e5f6 100644 --- a/src/zenutil/include/zenutil/chunkedcontent.h +++ b/src/zenutil/include/zenutil/chunkedcontent.h @@ -135,6 +135,7 @@ struct ChunkedContentLookup ChunkSequenceLocationOffset; // ChunkSequenceLocations[ChunkLocationOffset[ChunkIndex]] -> start of sources for ChunkIndex std::vector<uint32_t> ChunkSequenceLocationCounts; // ChunkSequenceLocationCounts[ChunkIndex] count of chunk locations for ChunkIndex std::vector<uint32_t> SequenceIndexFirstPathIndex; // SequenceIndexFirstPathIndex[SequenceIndex] -> first path index with that RawHash + std::vector<uint32_t> PathExtensionHash; }; ChunkedContentLookup BuildChunkedContentLookup(const ChunkedFolderContent& Content); diff --git a/src/zenutil/include/zenutil/chunkingcontroller.h b/src/zenutil/include/zenutil/chunkingcontroller.h index 970917fb0..315502265 100644 --- a/src/zenutil/include/zenutil/chunkingcontroller.h +++ b/src/zenutil/include/zenutil/chunkingcontroller.h @@ -11,9 +11,11 @@ namespace zen { -const std::vector<std::string_view> DefaultChunkingExcludeExtensions = {".exe", ".dll", ".pdb", ".self", ".mp4"}; -const bool DefaultChunkingExcludeElfFiles = true; -const bool DefaultChunkingExcludeMachOFiles = true; +const std::vector<std::string> DefaultChunkingExcludeExtensions = + {".exe", ".dll", ".pdb", ".self", ".mp4", ".zip", ".7z", ".bzip", ".rar", ".gzip"}; +const std::vector<std::string> DefaultFixedChunkingExtensions = {".apk", ".nsp", ".xvc", ".pkg", ".dmg", ".ipa"}; +const bool DefaultChunkingExcludeElfFiles = true; +const bool DefaultChunkingExcludeMachOFiles = true; const ChunkedParams DefaultChunkedParams = {.MinSize = ((8u * 1u) * 1024u) - 128u, .MaxSize = 128u * 1024u, @@ -21,7 +23,8 @@ const ChunkedParams DefaultChunkedParams = {.MinSize = ((8u * 1u) * 1024u) - 128 const size_t DefaultChunkingFileSizeLimit = DefaultChunkedParams.MaxSize; -const uint32_t DefaultFixedChunkingChunkSize = 16u * 1024u * 1024u; +const uint64_t DefaultFixedChunkingChunkSize = 32u * 1024u * 1024u; +const uint64_t DefaultMinSizeForFixedChunking = DefaultFixedChunkingChunkSize * 8u; struct ChunkedInfoWithSource; @@ -40,19 +43,31 @@ public: virtual CbObject GetParameters() const = 0; }; -std::unique_ptr<ChunkingController> CreateBasicChunkingController( - std::span<const std::string_view> ExcludeExtensions = DefaultChunkingExcludeExtensions, - bool ExcludeElfFiles = DefaultChunkingExcludeElfFiles, - bool ExcludeMachOFiles = DefaultChunkingExcludeMachOFiles, - uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit, - const ChunkedParams& ChunkingParams = DefaultChunkedParams); +struct BasicChunkingControllerSettings +{ + std::vector<std::string> ExcludeExtensions = DefaultChunkingExcludeExtensions; + bool ExcludeElfFiles = DefaultChunkingExcludeElfFiles; + bool ExcludeMachOFiles = DefaultChunkingExcludeMachOFiles; + uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit; + ChunkedParams ChunkingParams = DefaultChunkedParams; +}; + +std::unique_ptr<ChunkingController> CreateBasicChunkingController(const BasicChunkingControllerSettings& Settings); std::unique_ptr<ChunkingController> CreateBasicChunkingController(CbObjectView Parameters); -std::unique_ptr<ChunkingController> CreateChunkingControllerWithFixedChunking( - std::span<const std::string_view> ExcludeExtensions = DefaultChunkingExcludeExtensions, - uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit, - const ChunkedParams& ChunkingParams = DefaultChunkedParams, - uint32_t FixedChunkingChunkSize = DefaultFixedChunkingChunkSize); +struct ChunkingControllerWithFixedChunkingSettings +{ + std::vector<std::string> FixedChunkingExtensions = DefaultFixedChunkingExtensions; + std::vector<std::string> ExcludeExtensions = DefaultChunkingExcludeExtensions; + bool ExcludeElfFiles = DefaultChunkingExcludeElfFiles; + bool ExcludeMachOFiles = DefaultChunkingExcludeMachOFiles; + uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit; + ChunkedParams ChunkingParams = DefaultChunkedParams; + uint64_t FixedChunkingChunkSize = DefaultFixedChunkingChunkSize; + uint64_t MinSizeForFixedChunking = DefaultMinSizeForFixedChunking; +}; + +std::unique_ptr<ChunkingController> CreateChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Setting); std::unique_ptr<ChunkingController> CreateChunkingControllerWithFixedChunking(CbObjectView Parameters); std::unique_ptr<ChunkingController> CreateChunkingController(std::string_view Name, CbObjectView Parameters); |