aboutsummaryrefslogtreecommitdiff
path: root/src/zencore
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2025-06-03 16:21:01 +0200
committerGitHub Enterprise <[email protected]>2025-06-03 16:21:01 +0200
commita0b10b046095d57ffbdb46c83084601a832f4562 (patch)
treefe015645ea07d83c2784e3e28d0e976a37054859 /src/zencore
parentminor: fix unused variable warning on some compilers (diff)
downloadzen-a0b10b046095d57ffbdb46c83084601a832f4562.tar.xz
zen-a0b10b046095d57ffbdb46c83084601a832f4562.zip
fixed size chunking for encrypted files (#410)
- Improvement: Use fixed size block chunking for know encrypted/compressed file types - Improvement: Skip trying to compress chunks that are sourced from files that are known to be encrypted/compressed - Improvement: Add global open file cache for written files increasing throughput during download by reducing overhead of open/close of file by 80%
Diffstat (limited to 'src/zencore')
-rw-r--r--src/zencore/basicfile.cpp10
-rw-r--r--src/zencore/blake3.cpp22
-rw-r--r--src/zencore/compress.cpp50
-rw-r--r--src/zencore/filesystem.cpp23
-rw-r--r--src/zencore/include/zencore/blake3.h1
-rw-r--r--src/zencore/include/zencore/iohash.h6
6 files changed, 86 insertions, 26 deletions
diff --git a/src/zencore/basicfile.cpp b/src/zencore/basicfile.cpp
index 993f2b616..6989da67e 100644
--- a/src/zencore/basicfile.cpp
+++ b/src/zencore/basicfile.cpp
@@ -283,7 +283,7 @@ BasicFile::Write(MemoryView Data, uint64_t FileOffset, std::error_code& Ec)
void
BasicFile::Write(const void* Data, uint64_t Size, uint64_t FileOffset, std::error_code& Ec)
{
- const uint64_t MaxChunkSize = 2u * 1024 * 1024 * 1024;
+ const uint64_t MaxChunkSize = 2u * 1024 * 1024;
WriteFile(m_FileHandle, Data, Size, FileOffset, MaxChunkSize, Ec);
}
@@ -794,7 +794,7 @@ WriteToTempFile(CompositeBuffer&& Buffer, const std::filesystem::path& Path)
{
uint64_t Offset = 0;
static const uint64_t BufferingSize = 256u * 1024u;
- // BasicFileWriter BufferedOutput(BlockFile, BufferingSize / 2);
+ BasicFileWriter BufferedOutput(Temp, Min(BufferingSize, BufferSize));
for (const SharedBuffer& Segment : Buffer.GetSegments())
{
size_t SegmentSize = Segment.GetSize();
@@ -806,14 +806,14 @@ WriteToTempFile(CompositeBuffer&& Buffer, const std::filesystem::path& Path)
FileRef.FileChunkOffset,
FileRef.FileChunkSize,
BufferingSize,
- [&Temp, &Offset](const void* Data, size_t Size) {
- Temp.Write(Data, Size, Offset);
+ [&BufferedOutput, &Offset](const void* Data, size_t Size) {
+ BufferedOutput.Write(Data, Size, Offset);
Offset += Size;
});
}
else
{
- Temp.Write(Segment.GetData(), SegmentSize, Offset);
+ BufferedOutput.Write(Segment.GetData(), SegmentSize, Offset);
Offset += SegmentSize;
}
}
diff --git a/src/zencore/blake3.cpp b/src/zencore/blake3.cpp
index 4a77aa49a..054f0d3a0 100644
--- a/src/zencore/blake3.cpp
+++ b/src/zencore/blake3.cpp
@@ -151,6 +151,28 @@ BLAKE3Stream::Append(const void* data, size_t byteCount)
return *this;
}
+BLAKE3Stream&
+BLAKE3Stream::Append(const IoBuffer& Buffer)
+{
+ blake3_hasher* b3h = reinterpret_cast<blake3_hasher*>(m_HashState);
+
+ size_t BufferSize = Buffer.GetSize();
+ static const uint64_t BufferingSize = 256u * 1024u;
+ IoBufferFileReference FileRef;
+ if (BufferSize >= (BufferingSize + BufferingSize / 2) && Buffer.GetFileReference(FileRef))
+ {
+ ScanFile(FileRef.FileHandle, FileRef.FileChunkOffset, FileRef.FileChunkSize, BufferingSize, [&b3h](const void* Data, size_t Size) {
+ blake3_hasher_update(b3h, Data, Size);
+ });
+ }
+ else
+ {
+ blake3_hasher_update(b3h, Buffer.GetData(), BufferSize);
+ }
+
+ return *this;
+}
+
BLAKE3
BLAKE3Stream::GetHash()
{
diff --git a/src/zencore/compress.cpp b/src/zencore/compress.cpp
index 62b64bc9d..d9f381811 100644
--- a/src/zencore/compress.cpp
+++ b/src/zencore/compress.cpp
@@ -216,23 +216,45 @@ public:
std::function<void(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback,
uint64_t /* BlockSize */) const final
{
- UniqueBuffer HeaderData = CompressedBuffer::CreateHeaderForNoneEncoder(RawData.GetSize(), BLAKE3::HashBuffer(RawData));
- Callback(0, 0, 0, CompositeBuffer(IoBuffer(IoBuffer::Wrap, HeaderData.GetData(), HeaderData.GetSize())));
+ const uint64_t HeaderSize = CompressedBuffer::GetHeaderSizeForNoneEncoder();
- IoBufferFileReference FileRef = {nullptr, 0, 0};
- if ((RawData.GetSegments().size() == 1) && RawData.GetSegments()[0].AsIoBuffer().GetFileReference(FileRef))
+ uint64_t RawOffset = 0;
+ BLAKE3Stream HashStream;
+
+ for (const SharedBuffer& Segment : RawData.GetSegments())
{
- ZEN_ASSERT(FileRef.FileHandle != nullptr);
- uint64_t CallbackOffset = 0;
- ScanFile(FileRef.FileHandle, 0, RawData.GetSize(), 512u * 1024u, [&](const void* Data, size_t Size) {
- CompositeBuffer Tmp(SharedBuffer(IoBuffer(IoBuffer::Wrap, Data, Size)));
- Callback(CallbackOffset, Size, HeaderData.GetSize() + CallbackOffset, Tmp);
- CallbackOffset += Size;
- });
- return true;
+ IoBufferFileReference FileRef = {nullptr, 0, 0};
+ IoBuffer SegmentBuffer = Segment.AsIoBuffer();
+ if (SegmentBuffer.GetFileReference(FileRef))
+ {
+ ZEN_ASSERT(FileRef.FileHandle != nullptr);
+
+ ScanFile(FileRef.FileHandle,
+ FileRef.FileChunkOffset,
+ FileRef.FileChunkSize,
+ 512u * 1024u,
+ [&](const void* Data, size_t Size) {
+ HashStream.Append(Data, Size);
+ CompositeBuffer Tmp(SharedBuffer::MakeView(Data, Size));
+ Callback(RawOffset, Size, HeaderSize + RawOffset, Tmp);
+ RawOffset += Size;
+ });
+ }
+ else
+ {
+ const uint64_t Size = SegmentBuffer.GetSize();
+ HashStream.Append(SegmentBuffer);
+ Callback(RawOffset, Size, HeaderSize + RawOffset, CompositeBuffer(Segment));
+ RawOffset += Size;
+ }
}
- Callback(0, RawData.GetSize(), HeaderData.GetSize(), RawData);
+ ZEN_ASSERT(RawOffset == RawData.GetSize());
+
+ UniqueBuffer HeaderData = CompressedBuffer::CreateHeaderForNoneEncoder(RawData.GetSize(), HashStream.GetHash());
+ ZEN_ASSERT(HeaderData.GetSize() == HeaderSize);
+ Callback(0, 0, 0, CompositeBuffer(HeaderData.MoveToShared()));
+
return true;
}
};
@@ -323,7 +345,7 @@ public:
ScanFile(FileRef.FileHandle, sizeof(BufferHeader) + RawOffset, RawSize, 512u * 1024u, [&](const void* Data, size_t Size) {
if (Result)
{
- CompositeBuffer Tmp(SharedBuffer(IoBuffer(IoBuffer::Wrap, Data, Size)));
+ CompositeBuffer Tmp(SharedBuffer::MakeView(Data, Size));
Result = Callback(sizeof(BufferHeader) + RawOffset + CallbackOffset, Size, CallbackOffset, Tmp);
}
CallbackOffset += Size;
diff --git a/src/zencore/filesystem.cpp b/src/zencore/filesystem.cpp
index 0a9b2a73a..c4264bc29 100644
--- a/src/zencore/filesystem.cpp
+++ b/src/zencore/filesystem.cpp
@@ -2275,23 +2275,32 @@ PrepareFileForScatteredWrite(void* FileHandle, uint64_t FinalSize)
{
bool Result = true;
#if ZEN_PLATFORM_WINDOWS
- DWORD _ = 0;
- BOOL Ok = DeviceIoControl(FileHandle, FSCTL_SET_SPARSE, nullptr, 0, nullptr, 0, &_, nullptr);
- if (!Ok)
+
+ BY_HANDLE_FILE_INFORMATION Information;
+ if (GetFileInformationByHandle(FileHandle, &Information))
{
- std::error_code DummyEc;
- ZEN_DEBUG("Unable to set sparse mode for file '{}'", PathFromHandle(FileHandle, DummyEc));
- Result = false;
+ if ((Information.dwFileAttributes & FILE_ATTRIBUTE_SPARSE_FILE) == 0)
+ {
+ DWORD _ = 0;
+ BOOL Ok = DeviceIoControl(FileHandle, FSCTL_SET_SPARSE, nullptr, 0, nullptr, 0, &_, nullptr);
+ if (!Ok)
+ {
+ std::error_code DummyEc;
+ ZEN_DEBUG("Unable to set sparse mode for file '{}'", PathFromHandle(FileHandle, DummyEc));
+ Result = false;
+ }
+ }
}
FILE_ALLOCATION_INFO AllocationInfo = {};
- AllocationInfo.AllocationSize.QuadPart = FinalSize;
+ AllocationInfo.AllocationSize.QuadPart = LONGLONG(FinalSize);
if (!SetFileInformationByHandle(FileHandle, FileAllocationInfo, &AllocationInfo, DWORD(sizeof(AllocationInfo))))
{
std::error_code DummyEc;
ZEN_DEBUG("Unable to set file allocation size to {} for file '{}'", FinalSize, PathFromHandle(FileHandle, DummyEc));
Result = false;
}
+
#else // ZEN_PLATFORM_WINDOWS
ZEN_UNUSED(FileHandle, FinalSize);
#endif // ZEN_PLATFORM_WINDOWS
diff --git a/src/zencore/include/zencore/blake3.h b/src/zencore/include/zencore/blake3.h
index 28bb348c0..f01e45266 100644
--- a/src/zencore/include/zencore/blake3.h
+++ b/src/zencore/include/zencore/blake3.h
@@ -53,6 +53,7 @@ struct BLAKE3Stream
void Reset(); // Begin streaming hash compute (not needed on freshly constructed instance)
BLAKE3Stream& Append(const void* data, size_t byteCount); // Append another chunk
BLAKE3Stream& Append(MemoryView DataView) { return Append(DataView.GetData(), DataView.GetSize()); } // Append another chunk
+ BLAKE3Stream& Append(const IoBuffer& Buffer); // Append another chunk
BLAKE3 GetHash(); // Obtain final hash. If you wish to reuse the instance call reset()
private:
diff --git a/src/zencore/include/zencore/iohash.h b/src/zencore/include/zencore/iohash.h
index 7443e17b7..a619b0053 100644
--- a/src/zencore/include/zencore/iohash.h
+++ b/src/zencore/include/zencore/iohash.h
@@ -102,6 +102,12 @@ struct IoHashStream
return *this;
}
+ IoHashStream& Append(const IoBuffer& Buffer)
+ {
+ m_Blake3Stream.Append(Buffer);
+ return *this;
+ }
+
/// Append another chunk
IoHashStream& Append(MemoryView Data)
{