diff options
| author | Dan Engelbrecht <[email protected]> | 2024-04-26 10:12:03 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2024-04-26 10:12:03 +0200 |
| commit | ef1b4234c851131cf5a6d249684c14c5c27d2878 (patch) | |
| tree | afd972c077b2585c2dc336b79eb1d31d02372295 /src/zencore/iohash.cpp | |
| parent | fix oplog import during gcv2 (#62) (diff) | |
| download | zen-ef1b4234c851131cf5a6d249684c14c5c27d2878.tar.xz zen-ef1b4234c851131cf5a6d249684c14c5c27d2878.zip | |
use direct file access for large file hash (#63)
- Improvement: Refactor `IoHash::HashBuffer` and `BLAKE3::HashBuffer` to not use memory mapped files. Performs better and saves ~10% of oplog export time on CI
Diffstat (limited to 'src/zencore/iohash.cpp')
| -rw-r--r-- | src/zencore/iohash.cpp | 47 |
1 files changed, 35 insertions, 12 deletions
diff --git a/src/zencore/iohash.cpp b/src/zencore/iohash.cpp index a6bf25f6c..1bf2c033d 100644 --- a/src/zencore/iohash.cpp +++ b/src/zencore/iohash.cpp @@ -4,6 +4,7 @@ #include <zencore/blake3.h> #include <zencore/compositebuffer.h> +#include <zencore/filesystem.h> #include <zencore/string.h> #include <zencore/testing.h> @@ -31,19 +32,17 @@ IoHash::HashBuffer(const CompositeBuffer& Buffer) for (const SharedBuffer& Segment : Buffer.GetSegments()) { - size_t SegmentSize = Segment.GetSize(); - static const size_t BufferingSize = 512 * 1024; - if (SegmentSize >= (BufferingSize + BufferingSize / 2) && Segment.IsFileReference()) + size_t SegmentSize = Segment.GetSize(); + static const uint64_t BufferingSize = 256u * 1024u; + + IoBufferFileReference FileRef; + if (SegmentSize >= (BufferingSize + BufferingSize / 2) && Segment.GetFileReference(FileRef)) { - const IoBuffer SegmentBuffer = Segment.AsIoBuffer(); - size_t Offset = 0; - while (Offset < SegmentSize) - { - size_t ChunkSize = Min<size_t>(SegmentSize - Offset, BufferingSize); - IoBuffer SubRange(SegmentBuffer, Offset, ChunkSize); - Hasher.Append(SubRange.GetData(), ChunkSize); - Offset += ChunkSize; - } + ScanFile(FileRef.FileHandle, + FileRef.FileChunkOffset, + FileRef.FileChunkSize, + BufferingSize, + [&Hasher](const void* Data, size_t Size) { Hasher.Append(Data, Size); }); } else { @@ -55,6 +54,30 @@ IoHash::HashBuffer(const CompositeBuffer& Buffer) } IoHash +IoHash::HashBuffer(const IoBuffer& Buffer) +{ + IoHashStream Hasher; + + size_t BufferSize = Buffer.GetSize(); + static const uint64_t BufferingSize = 256u * 1024u; + IoBufferFileReference FileRef; + if (BufferSize >= (BufferingSize + BufferingSize / 2) && Buffer.GetFileReference(FileRef)) + { + ScanFile(FileRef.FileHandle, + FileRef.FileChunkOffset, + FileRef.FileChunkSize, + BufferingSize, + [&Hasher](const void* Data, size_t Size) { Hasher.Append(Data, Size); }); + } + else + { + Hasher.Append(Buffer.GetData(), BufferSize); + } + + return Hasher.GetHash(); +} + +IoHash IoHash::FromHexString(const char* string) { return FromHexString({string, sizeof(IoHash::Hash) * 2}); |