aboutsummaryrefslogtreecommitdiff
path: root/src/zencore/blake3.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2024-04-26 10:12:03 +0200
committerGitHub Enterprise <[email protected]>2024-04-26 10:12:03 +0200
commitef1b4234c851131cf5a6d249684c14c5c27d2878 (patch)
treeafd972c077b2585c2dc336b79eb1d31d02372295 /src/zencore/blake3.cpp
parentfix oplog import during gcv2 (#62) (diff)
downloadzen-ef1b4234c851131cf5a6d249684c14c5c27d2878.tar.xz
zen-ef1b4234c851131cf5a6d249684c14c5c27d2878.zip
use direct file access for large file hash (#63)
- Improvement: Refactor `IoHash::HashBuffer` and `BLAKE3::HashBuffer` to not use memory mapped files. Performs better and saves ~10% of oplog export time on CI
Diffstat (limited to 'src/zencore/blake3.cpp')
-rw-r--r--src/zencore/blake3.cpp52
1 files changed, 40 insertions, 12 deletions
diff --git a/src/zencore/blake3.cpp b/src/zencore/blake3.cpp
index e4edff227..4a77aa49a 100644
--- a/src/zencore/blake3.cpp
+++ b/src/zencore/blake3.cpp
@@ -3,6 +3,7 @@
#include <zencore/blake3.h>
#include <zencore/compositebuffer.h>
+#include <zencore/filesystem.h>
#include <zencore/string.h>
#include <zencore/testing.h>
#include <zencore/zencore.h>
@@ -45,19 +46,17 @@ BLAKE3::HashBuffer(const CompositeBuffer& Buffer)
for (const SharedBuffer& Segment : Buffer.GetSegments())
{
- size_t SegmentSize = Segment.GetSize();
- static const size_t BufferingSize = 512 * 1024;
- if (SegmentSize >= (BufferingSize + BufferingSize / 2) && Segment.IsFileReference())
+ size_t SegmentSize = Segment.GetSize();
+ static const uint64_t BufferingSize = 256u * 1024u;
+
+ IoBufferFileReference FileRef;
+ if (SegmentSize >= (BufferingSize + BufferingSize / 2) && Segment.GetFileReference(FileRef))
{
- const IoBuffer SegmentBuffer = Segment.AsIoBuffer();
- size_t Offset = 0;
- while (Offset < SegmentSize)
- {
- size_t ChunkSize = Min<size_t>(SegmentSize - Offset, BufferingSize);
- IoBuffer SubRange(SegmentBuffer, Offset, ChunkSize);
- blake3_hasher_update(&Hasher, SubRange.GetData(), ChunkSize);
- Offset += ChunkSize;
- }
+ ScanFile(FileRef.FileHandle,
+ FileRef.FileChunkOffset,
+ FileRef.FileChunkSize,
+ BufferingSize,
+ [&Hasher](const void* Data, size_t Size) { blake3_hasher_update(&Hasher, Data, Size); });
}
else
{
@@ -71,6 +70,35 @@ BLAKE3::HashBuffer(const CompositeBuffer& Buffer)
}
BLAKE3
+BLAKE3::HashBuffer(const IoBuffer& Buffer)
+{
+ BLAKE3 Hash;
+
+ blake3_hasher Hasher;
+ blake3_hasher_init(&Hasher);
+
+ size_t BufferSize = Buffer.GetSize();
+ static const uint64_t BufferingSize = 256u * 1024u;
+ IoBufferFileReference FileRef;
+ if (BufferSize >= (BufferingSize + BufferingSize / 2) && Buffer.GetFileReference(FileRef))
+ {
+ ScanFile(FileRef.FileHandle,
+ FileRef.FileChunkOffset,
+ FileRef.FileChunkSize,
+ BufferingSize,
+ [&Hasher](const void* Data, size_t Size) { blake3_hasher_update(&Hasher, Data, Size); });
+ }
+ else
+ {
+ blake3_hasher_update(&Hasher, Buffer.GetData(), BufferSize);
+ }
+
+ blake3_hasher_finalize(&Hasher, Hash.Hash, sizeof Hash.Hash);
+
+ return Hash;
+}
+
+BLAKE3
BLAKE3::FromHexString(const char* string)
{
BLAKE3 b3;