diff options
| author | Dan Engelbrecht <[email protected]> | 2024-04-26 10:12:03 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2024-04-26 10:12:03 +0200 |
| commit | ef1b4234c851131cf5a6d249684c14c5c27d2878 (patch) | |
| tree | afd972c077b2585c2dc336b79eb1d31d02372295 /src/zencore/blake3.cpp | |
| parent | fix oplog import during gcv2 (#62) (diff) | |
| download | zen-ef1b4234c851131cf5a6d249684c14c5c27d2878.tar.xz zen-ef1b4234c851131cf5a6d249684c14c5c27d2878.zip | |
use direct file access for large file hash (#63)
- Improvement: Refactor `IoHash::HashBuffer` and `BLAKE3::HashBuffer` to not use memory mapped files. Performs better and saves ~10% of oplog export time on CI
Diffstat (limited to 'src/zencore/blake3.cpp')
| -rw-r--r-- | src/zencore/blake3.cpp | 52 |
1 files changed, 40 insertions, 12 deletions
diff --git a/src/zencore/blake3.cpp b/src/zencore/blake3.cpp index e4edff227..4a77aa49a 100644 --- a/src/zencore/blake3.cpp +++ b/src/zencore/blake3.cpp @@ -3,6 +3,7 @@ #include <zencore/blake3.h> #include <zencore/compositebuffer.h> +#include <zencore/filesystem.h> #include <zencore/string.h> #include <zencore/testing.h> #include <zencore/zencore.h> @@ -45,19 +46,17 @@ BLAKE3::HashBuffer(const CompositeBuffer& Buffer) for (const SharedBuffer& Segment : Buffer.GetSegments()) { - size_t SegmentSize = Segment.GetSize(); - static const size_t BufferingSize = 512 * 1024; - if (SegmentSize >= (BufferingSize + BufferingSize / 2) && Segment.IsFileReference()) + size_t SegmentSize = Segment.GetSize(); + static const uint64_t BufferingSize = 256u * 1024u; + + IoBufferFileReference FileRef; + if (SegmentSize >= (BufferingSize + BufferingSize / 2) && Segment.GetFileReference(FileRef)) { - const IoBuffer SegmentBuffer = Segment.AsIoBuffer(); - size_t Offset = 0; - while (Offset < SegmentSize) - { - size_t ChunkSize = Min<size_t>(SegmentSize - Offset, BufferingSize); - IoBuffer SubRange(SegmentBuffer, Offset, ChunkSize); - blake3_hasher_update(&Hasher, SubRange.GetData(), ChunkSize); - Offset += ChunkSize; - } + ScanFile(FileRef.FileHandle, + FileRef.FileChunkOffset, + FileRef.FileChunkSize, + BufferingSize, + [&Hasher](const void* Data, size_t Size) { blake3_hasher_update(&Hasher, Data, Size); }); } else { @@ -71,6 +70,35 @@ BLAKE3::HashBuffer(const CompositeBuffer& Buffer) } BLAKE3 +BLAKE3::HashBuffer(const IoBuffer& Buffer) +{ + BLAKE3 Hash; + + blake3_hasher Hasher; + blake3_hasher_init(&Hasher); + + size_t BufferSize = Buffer.GetSize(); + static const uint64_t BufferingSize = 256u * 1024u; + IoBufferFileReference FileRef; + if (BufferSize >= (BufferingSize + BufferingSize / 2) && Buffer.GetFileReference(FileRef)) + { + ScanFile(FileRef.FileHandle, + FileRef.FileChunkOffset, + FileRef.FileChunkSize, + BufferingSize, + [&Hasher](const void* Data, size_t Size) { blake3_hasher_update(&Hasher, Data, Size); }); + } + else + { + blake3_hasher_update(&Hasher, Buffer.GetData(), BufferSize); + } + + blake3_hasher_finalize(&Hasher, Hash.Hash, sizeof Hash.Hash); + + return Hash; +} + +BLAKE3 BLAKE3::FromHexString(const char* string) { BLAKE3 b3; |