diff options
| author | Dan Engelbrecht <[email protected]> | 2025-06-03 16:21:01 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2025-06-03 16:21:01 +0200 |
| commit | a0b10b046095d57ffbdb46c83084601a832f4562 (patch) | |
| tree | fe015645ea07d83c2784e3e28d0e976a37054859 /src/zencore | |
| parent | minor: fix unused variable warning on some compilers (diff) | |
| download | zen-a0b10b046095d57ffbdb46c83084601a832f4562.tar.xz zen-a0b10b046095d57ffbdb46c83084601a832f4562.zip | |
fixed size chunking for encrypted files (#410)
- Improvement: Use fixed size block chunking for know encrypted/compressed file types
- Improvement: Skip trying to compress chunks that are sourced from files that are known to be encrypted/compressed
- Improvement: Add global open file cache for written files increasing throughput during download by reducing overhead of open/close of file by 80%
Diffstat (limited to 'src/zencore')
| -rw-r--r-- | src/zencore/basicfile.cpp | 10 | ||||
| -rw-r--r-- | src/zencore/blake3.cpp | 22 | ||||
| -rw-r--r-- | src/zencore/compress.cpp | 50 | ||||
| -rw-r--r-- | src/zencore/filesystem.cpp | 23 | ||||
| -rw-r--r-- | src/zencore/include/zencore/blake3.h | 1 | ||||
| -rw-r--r-- | src/zencore/include/zencore/iohash.h | 6 |
6 files changed, 86 insertions, 26 deletions
diff --git a/src/zencore/basicfile.cpp b/src/zencore/basicfile.cpp index 993f2b616..6989da67e 100644 --- a/src/zencore/basicfile.cpp +++ b/src/zencore/basicfile.cpp @@ -283,7 +283,7 @@ BasicFile::Write(MemoryView Data, uint64_t FileOffset, std::error_code& Ec) void BasicFile::Write(const void* Data, uint64_t Size, uint64_t FileOffset, std::error_code& Ec) { - const uint64_t MaxChunkSize = 2u * 1024 * 1024 * 1024; + const uint64_t MaxChunkSize = 2u * 1024 * 1024; WriteFile(m_FileHandle, Data, Size, FileOffset, MaxChunkSize, Ec); } @@ -794,7 +794,7 @@ WriteToTempFile(CompositeBuffer&& Buffer, const std::filesystem::path& Path) { uint64_t Offset = 0; static const uint64_t BufferingSize = 256u * 1024u; - // BasicFileWriter BufferedOutput(BlockFile, BufferingSize / 2); + BasicFileWriter BufferedOutput(Temp, Min(BufferingSize, BufferSize)); for (const SharedBuffer& Segment : Buffer.GetSegments()) { size_t SegmentSize = Segment.GetSize(); @@ -806,14 +806,14 @@ WriteToTempFile(CompositeBuffer&& Buffer, const std::filesystem::path& Path) FileRef.FileChunkOffset, FileRef.FileChunkSize, BufferingSize, - [&Temp, &Offset](const void* Data, size_t Size) { - Temp.Write(Data, Size, Offset); + [&BufferedOutput, &Offset](const void* Data, size_t Size) { + BufferedOutput.Write(Data, Size, Offset); Offset += Size; }); } else { - Temp.Write(Segment.GetData(), SegmentSize, Offset); + BufferedOutput.Write(Segment.GetData(), SegmentSize, Offset); Offset += SegmentSize; } } diff --git a/src/zencore/blake3.cpp b/src/zencore/blake3.cpp index 4a77aa49a..054f0d3a0 100644 --- a/src/zencore/blake3.cpp +++ b/src/zencore/blake3.cpp @@ -151,6 +151,28 @@ BLAKE3Stream::Append(const void* data, size_t byteCount) return *this; } +BLAKE3Stream& +BLAKE3Stream::Append(const IoBuffer& Buffer) +{ + blake3_hasher* b3h = reinterpret_cast<blake3_hasher*>(m_HashState); + + size_t BufferSize = Buffer.GetSize(); + static const uint64_t BufferingSize = 256u * 1024u; + IoBufferFileReference FileRef; + if (BufferSize >= (BufferingSize + BufferingSize / 2) && Buffer.GetFileReference(FileRef)) + { + ScanFile(FileRef.FileHandle, FileRef.FileChunkOffset, FileRef.FileChunkSize, BufferingSize, [&b3h](const void* Data, size_t Size) { + blake3_hasher_update(b3h, Data, Size); + }); + } + else + { + blake3_hasher_update(b3h, Buffer.GetData(), BufferSize); + } + + return *this; +} + BLAKE3 BLAKE3Stream::GetHash() { diff --git a/src/zencore/compress.cpp b/src/zencore/compress.cpp index 62b64bc9d..d9f381811 100644 --- a/src/zencore/compress.cpp +++ b/src/zencore/compress.cpp @@ -216,23 +216,45 @@ public: std::function<void(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback, uint64_t /* BlockSize */) const final { - UniqueBuffer HeaderData = CompressedBuffer::CreateHeaderForNoneEncoder(RawData.GetSize(), BLAKE3::HashBuffer(RawData)); - Callback(0, 0, 0, CompositeBuffer(IoBuffer(IoBuffer::Wrap, HeaderData.GetData(), HeaderData.GetSize()))); + const uint64_t HeaderSize = CompressedBuffer::GetHeaderSizeForNoneEncoder(); - IoBufferFileReference FileRef = {nullptr, 0, 0}; - if ((RawData.GetSegments().size() == 1) && RawData.GetSegments()[0].AsIoBuffer().GetFileReference(FileRef)) + uint64_t RawOffset = 0; + BLAKE3Stream HashStream; + + for (const SharedBuffer& Segment : RawData.GetSegments()) { - ZEN_ASSERT(FileRef.FileHandle != nullptr); - uint64_t CallbackOffset = 0; - ScanFile(FileRef.FileHandle, 0, RawData.GetSize(), 512u * 1024u, [&](const void* Data, size_t Size) { - CompositeBuffer Tmp(SharedBuffer(IoBuffer(IoBuffer::Wrap, Data, Size))); - Callback(CallbackOffset, Size, HeaderData.GetSize() + CallbackOffset, Tmp); - CallbackOffset += Size; - }); - return true; + IoBufferFileReference FileRef = {nullptr, 0, 0}; + IoBuffer SegmentBuffer = Segment.AsIoBuffer(); + if (SegmentBuffer.GetFileReference(FileRef)) + { + ZEN_ASSERT(FileRef.FileHandle != nullptr); + + ScanFile(FileRef.FileHandle, + FileRef.FileChunkOffset, + FileRef.FileChunkSize, + 512u * 1024u, + [&](const void* Data, size_t Size) { + HashStream.Append(Data, Size); + CompositeBuffer Tmp(SharedBuffer::MakeView(Data, Size)); + Callback(RawOffset, Size, HeaderSize + RawOffset, Tmp); + RawOffset += Size; + }); + } + else + { + const uint64_t Size = SegmentBuffer.GetSize(); + HashStream.Append(SegmentBuffer); + Callback(RawOffset, Size, HeaderSize + RawOffset, CompositeBuffer(Segment)); + RawOffset += Size; + } } - Callback(0, RawData.GetSize(), HeaderData.GetSize(), RawData); + ZEN_ASSERT(RawOffset == RawData.GetSize()); + + UniqueBuffer HeaderData = CompressedBuffer::CreateHeaderForNoneEncoder(RawData.GetSize(), HashStream.GetHash()); + ZEN_ASSERT(HeaderData.GetSize() == HeaderSize); + Callback(0, 0, 0, CompositeBuffer(HeaderData.MoveToShared())); + return true; } }; @@ -323,7 +345,7 @@ public: ScanFile(FileRef.FileHandle, sizeof(BufferHeader) + RawOffset, RawSize, 512u * 1024u, [&](const void* Data, size_t Size) { if (Result) { - CompositeBuffer Tmp(SharedBuffer(IoBuffer(IoBuffer::Wrap, Data, Size))); + CompositeBuffer Tmp(SharedBuffer::MakeView(Data, Size)); Result = Callback(sizeof(BufferHeader) + RawOffset + CallbackOffset, Size, CallbackOffset, Tmp); } CallbackOffset += Size; diff --git a/src/zencore/filesystem.cpp b/src/zencore/filesystem.cpp index 0a9b2a73a..c4264bc29 100644 --- a/src/zencore/filesystem.cpp +++ b/src/zencore/filesystem.cpp @@ -2275,23 +2275,32 @@ PrepareFileForScatteredWrite(void* FileHandle, uint64_t FinalSize) { bool Result = true; #if ZEN_PLATFORM_WINDOWS - DWORD _ = 0; - BOOL Ok = DeviceIoControl(FileHandle, FSCTL_SET_SPARSE, nullptr, 0, nullptr, 0, &_, nullptr); - if (!Ok) + + BY_HANDLE_FILE_INFORMATION Information; + if (GetFileInformationByHandle(FileHandle, &Information)) { - std::error_code DummyEc; - ZEN_DEBUG("Unable to set sparse mode for file '{}'", PathFromHandle(FileHandle, DummyEc)); - Result = false; + if ((Information.dwFileAttributes & FILE_ATTRIBUTE_SPARSE_FILE) == 0) + { + DWORD _ = 0; + BOOL Ok = DeviceIoControl(FileHandle, FSCTL_SET_SPARSE, nullptr, 0, nullptr, 0, &_, nullptr); + if (!Ok) + { + std::error_code DummyEc; + ZEN_DEBUG("Unable to set sparse mode for file '{}'", PathFromHandle(FileHandle, DummyEc)); + Result = false; + } + } } FILE_ALLOCATION_INFO AllocationInfo = {}; - AllocationInfo.AllocationSize.QuadPart = FinalSize; + AllocationInfo.AllocationSize.QuadPart = LONGLONG(FinalSize); if (!SetFileInformationByHandle(FileHandle, FileAllocationInfo, &AllocationInfo, DWORD(sizeof(AllocationInfo)))) { std::error_code DummyEc; ZEN_DEBUG("Unable to set file allocation size to {} for file '{}'", FinalSize, PathFromHandle(FileHandle, DummyEc)); Result = false; } + #else // ZEN_PLATFORM_WINDOWS ZEN_UNUSED(FileHandle, FinalSize); #endif // ZEN_PLATFORM_WINDOWS diff --git a/src/zencore/include/zencore/blake3.h b/src/zencore/include/zencore/blake3.h index 28bb348c0..f01e45266 100644 --- a/src/zencore/include/zencore/blake3.h +++ b/src/zencore/include/zencore/blake3.h @@ -53,6 +53,7 @@ struct BLAKE3Stream void Reset(); // Begin streaming hash compute (not needed on freshly constructed instance) BLAKE3Stream& Append(const void* data, size_t byteCount); // Append another chunk BLAKE3Stream& Append(MemoryView DataView) { return Append(DataView.GetData(), DataView.GetSize()); } // Append another chunk + BLAKE3Stream& Append(const IoBuffer& Buffer); // Append another chunk BLAKE3 GetHash(); // Obtain final hash. If you wish to reuse the instance call reset() private: diff --git a/src/zencore/include/zencore/iohash.h b/src/zencore/include/zencore/iohash.h index 7443e17b7..a619b0053 100644 --- a/src/zencore/include/zencore/iohash.h +++ b/src/zencore/include/zencore/iohash.h @@ -102,6 +102,12 @@ struct IoHashStream return *this; } + IoHashStream& Append(const IoBuffer& Buffer) + { + m_Blake3Stream.Append(Buffer); + return *this; + } + /// Append another chunk IoHashStream& Append(MemoryView Data) { |