diff options
| author | Dan Engelbrecht <[email protected]> | 2025-06-03 16:21:01 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2025-06-03 16:21:01 +0200 |
| commit | a0b10b046095d57ffbdb46c83084601a832f4562 (patch) | |
| tree | fe015645ea07d83c2784e3e28d0e976a37054859 /src/zenutil/include | |
| parent | minor: fix unused variable warning on some compilers (diff) | |
| download | zen-a0b10b046095d57ffbdb46c83084601a832f4562.tar.xz zen-a0b10b046095d57ffbdb46c83084601a832f4562.zip | |
fixed size chunking for encrypted files (#410)
- Improvement: Use fixed size block chunking for know encrypted/compressed file types
- Improvement: Skip trying to compress chunks that are sourced from files that are known to be encrypted/compressed
- Improvement: Add global open file cache for written files increasing throughput during download by reducing overhead of open/close of file by 80%
Diffstat (limited to 'src/zenutil/include')
| -rw-r--r-- | src/zenutil/include/zenutil/bufferedwritefilecache.h | 106 | ||||
| -rw-r--r-- | src/zenutil/include/zenutil/chunkedcontent.h | 1 | ||||
| -rw-r--r-- | src/zenutil/include/zenutil/chunkingcontroller.h | 45 |
3 files changed, 137 insertions, 15 deletions
diff --git a/src/zenutil/include/zenutil/bufferedwritefilecache.h b/src/zenutil/include/zenutil/bufferedwritefilecache.h new file mode 100644 index 000000000..68d6c375e --- /dev/null +++ b/src/zenutil/include/zenutil/bufferedwritefilecache.h @@ -0,0 +1,106 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/basicfile.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_map.h> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +class CompositeBuffer; + +class BufferedWriteFileCache +{ +public: + BufferedWriteFileCache(const BufferedWriteFileCache&) = delete; + BufferedWriteFileCache& operator=(const BufferedWriteFileCache&) = delete; + + BufferedWriteFileCache(); + + ~BufferedWriteFileCache(); + + std::unique_ptr<BasicFile> Get(uint32_t FileIndex); + + void Put(uint32_t FileIndex, std::unique_ptr<BasicFile>&& Writer); + + void Close(std::span<uint32_t> FileIndexes); + + class Local + { + public: + struct Writer + { + std::unique_ptr<BasicFile> File; + std::unique_ptr<BasicFileWriter> Writer; + + inline void Write(const CompositeBuffer& Chunk, uint64_t FileOffset) + { + if (Writer) + { + Writer->Write(Chunk, FileOffset); + } + else + { + File->Write(Chunk, FileOffset); + } + } + }; + + Local(const Local&) = delete; + Local& operator=(const Local&) = delete; + + explicit Local(BufferedWriteFileCache& Cache); + ~Local(); + + Writer* GetWriter(uint32_t FileIndex); + Writer* PutWriter(uint32_t FileIndex, std::unique_ptr<Writer> Writer); + + private: + tsl::robin_map<uint32_t, uint32_t> m_FileIndexToWriterIndex; + std::vector<std::unique_ptr<Writer>> m_ChunkWriters; + BufferedWriteFileCache& m_Cache; + }; + +private: + static constexpr size_t MaxHandlesPerPath = 7; + static constexpr size_t MaxBufferedCount = 1024; + struct TOpenHandles + { + BasicFile* Files[MaxHandlesPerPath]; + uint64_t Size = 0; + inline BasicFile* Pop() + { + if (Size > 0) + { + return Files[--Size]; + } + else + { + return nullptr; + } + } + inline bool Push(BasicFile* File) + { + if (Size < MaxHandlesPerPath) + { + Files[Size++] = File; + return true; + } + return false; + } + }; + static_assert(sizeof(TOpenHandles) == 64); + + RwLock m_WriterLock; + tsl::robin_map<uint32_t, uint32_t> m_ChunkWriters; + std::vector<TOpenHandles> m_OpenFiles; + std::atomic<uint32_t> m_CacheHitCount; + std::atomic<uint32_t> m_CacheMissCount; + std::atomic<uint32_t> m_OpenHandleCount; + std::atomic<uint32_t> m_DroppedHandleCount; +}; + +} // namespace zen diff --git a/src/zenutil/include/zenutil/chunkedcontent.h b/src/zenutil/include/zenutil/chunkedcontent.h index d33869be2..03f52e5f6 100644 --- a/src/zenutil/include/zenutil/chunkedcontent.h +++ b/src/zenutil/include/zenutil/chunkedcontent.h @@ -135,6 +135,7 @@ struct ChunkedContentLookup ChunkSequenceLocationOffset; // ChunkSequenceLocations[ChunkLocationOffset[ChunkIndex]] -> start of sources for ChunkIndex std::vector<uint32_t> ChunkSequenceLocationCounts; // ChunkSequenceLocationCounts[ChunkIndex] count of chunk locations for ChunkIndex std::vector<uint32_t> SequenceIndexFirstPathIndex; // SequenceIndexFirstPathIndex[SequenceIndex] -> first path index with that RawHash + std::vector<uint32_t> PathExtensionHash; }; ChunkedContentLookup BuildChunkedContentLookup(const ChunkedFolderContent& Content); diff --git a/src/zenutil/include/zenutil/chunkingcontroller.h b/src/zenutil/include/zenutil/chunkingcontroller.h index 970917fb0..315502265 100644 --- a/src/zenutil/include/zenutil/chunkingcontroller.h +++ b/src/zenutil/include/zenutil/chunkingcontroller.h @@ -11,9 +11,11 @@ namespace zen { -const std::vector<std::string_view> DefaultChunkingExcludeExtensions = {".exe", ".dll", ".pdb", ".self", ".mp4"}; -const bool DefaultChunkingExcludeElfFiles = true; -const bool DefaultChunkingExcludeMachOFiles = true; +const std::vector<std::string> DefaultChunkingExcludeExtensions = + {".exe", ".dll", ".pdb", ".self", ".mp4", ".zip", ".7z", ".bzip", ".rar", ".gzip"}; +const std::vector<std::string> DefaultFixedChunkingExtensions = {".apk", ".nsp", ".xvc", ".pkg", ".dmg", ".ipa"}; +const bool DefaultChunkingExcludeElfFiles = true; +const bool DefaultChunkingExcludeMachOFiles = true; const ChunkedParams DefaultChunkedParams = {.MinSize = ((8u * 1u) * 1024u) - 128u, .MaxSize = 128u * 1024u, @@ -21,7 +23,8 @@ const ChunkedParams DefaultChunkedParams = {.MinSize = ((8u * 1u) * 1024u) - 128 const size_t DefaultChunkingFileSizeLimit = DefaultChunkedParams.MaxSize; -const uint32_t DefaultFixedChunkingChunkSize = 16u * 1024u * 1024u; +const uint64_t DefaultFixedChunkingChunkSize = 32u * 1024u * 1024u; +const uint64_t DefaultMinSizeForFixedChunking = DefaultFixedChunkingChunkSize * 8u; struct ChunkedInfoWithSource; @@ -40,19 +43,31 @@ public: virtual CbObject GetParameters() const = 0; }; -std::unique_ptr<ChunkingController> CreateBasicChunkingController( - std::span<const std::string_view> ExcludeExtensions = DefaultChunkingExcludeExtensions, - bool ExcludeElfFiles = DefaultChunkingExcludeElfFiles, - bool ExcludeMachOFiles = DefaultChunkingExcludeMachOFiles, - uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit, - const ChunkedParams& ChunkingParams = DefaultChunkedParams); +struct BasicChunkingControllerSettings +{ + std::vector<std::string> ExcludeExtensions = DefaultChunkingExcludeExtensions; + bool ExcludeElfFiles = DefaultChunkingExcludeElfFiles; + bool ExcludeMachOFiles = DefaultChunkingExcludeMachOFiles; + uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit; + ChunkedParams ChunkingParams = DefaultChunkedParams; +}; + +std::unique_ptr<ChunkingController> CreateBasicChunkingController(const BasicChunkingControllerSettings& Settings); std::unique_ptr<ChunkingController> CreateBasicChunkingController(CbObjectView Parameters); -std::unique_ptr<ChunkingController> CreateChunkingControllerWithFixedChunking( - std::span<const std::string_view> ExcludeExtensions = DefaultChunkingExcludeExtensions, - uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit, - const ChunkedParams& ChunkingParams = DefaultChunkedParams, - uint32_t FixedChunkingChunkSize = DefaultFixedChunkingChunkSize); +struct ChunkingControllerWithFixedChunkingSettings +{ + std::vector<std::string> FixedChunkingExtensions = DefaultFixedChunkingExtensions; + std::vector<std::string> ExcludeExtensions = DefaultChunkingExcludeExtensions; + bool ExcludeElfFiles = DefaultChunkingExcludeElfFiles; + bool ExcludeMachOFiles = DefaultChunkingExcludeMachOFiles; + uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit; + ChunkedParams ChunkingParams = DefaultChunkedParams; + uint64_t FixedChunkingChunkSize = DefaultFixedChunkingChunkSize; + uint64_t MinSizeForFixedChunking = DefaultMinSizeForFixedChunking; +}; + +std::unique_ptr<ChunkingController> CreateChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Setting); std::unique_ptr<ChunkingController> CreateChunkingControllerWithFixedChunking(CbObjectView Parameters); std::unique_ptr<ChunkingController> CreateChunkingController(std::string_view Name, CbObjectView Parameters); |