aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2024-04-26 10:12:03 +0200
committerGitHub Enterprise <[email protected]>2024-04-26 10:12:03 +0200
commitef1b4234c851131cf5a6d249684c14c5c27d2878 (patch)
treeafd972c077b2585c2dc336b79eb1d31d02372295 /src
parentfix oplog import during gcv2 (#62) (diff)
downloadzen-ef1b4234c851131cf5a6d249684c14c5c27d2878.tar.xz
zen-ef1b4234c851131cf5a6d249684c14c5c27d2878.zip
use direct file access for large file hash (#63)
- Improvement: Refactor `IoHash::HashBuffer` and `BLAKE3::HashBuffer` to not use memory mapped files. Performs better and saves ~10% of oplog export time on CI
Diffstat (limited to 'src')
-rw-r--r--src/zencore/blake3.cpp52
-rw-r--r--src/zencore/filesystem.cpp42
-rw-r--r--src/zencore/include/zencore/blake3.h2
-rw-r--r--src/zencore/include/zencore/filesystem.h5
-rw-r--r--src/zencore/include/zencore/iobuffer.h2
-rw-r--r--src/zencore/include/zencore/iohash.h1
-rw-r--r--src/zencore/include/zencore/sharedbuffer.h5
-rw-r--r--src/zencore/iobuffer.cpp25
-rw-r--r--src/zencore/iohash.cpp47
-rw-r--r--src/zenserver/projectstore/remoteprojectstore.cpp24
-rw-r--r--src/zenstore/cache/structuredcachestore.cpp8
-rw-r--r--src/zenstore/compactcas.cpp8
-rw-r--r--src/zenstore/filecas.cpp2
13 files changed, 155 insertions, 68 deletions
diff --git a/src/zencore/blake3.cpp b/src/zencore/blake3.cpp
index e4edff227..4a77aa49a 100644
--- a/src/zencore/blake3.cpp
+++ b/src/zencore/blake3.cpp
@@ -3,6 +3,7 @@
#include <zencore/blake3.h>
#include <zencore/compositebuffer.h>
+#include <zencore/filesystem.h>
#include <zencore/string.h>
#include <zencore/testing.h>
#include <zencore/zencore.h>
@@ -45,19 +46,17 @@ BLAKE3::HashBuffer(const CompositeBuffer& Buffer)
for (const SharedBuffer& Segment : Buffer.GetSegments())
{
- size_t SegmentSize = Segment.GetSize();
- static const size_t BufferingSize = 512 * 1024;
- if (SegmentSize >= (BufferingSize + BufferingSize / 2) && Segment.IsFileReference())
+ size_t SegmentSize = Segment.GetSize();
+ static const uint64_t BufferingSize = 256u * 1024u;
+
+ IoBufferFileReference FileRef;
+ if (SegmentSize >= (BufferingSize + BufferingSize / 2) && Segment.GetFileReference(FileRef))
{
- const IoBuffer SegmentBuffer = Segment.AsIoBuffer();
- size_t Offset = 0;
- while (Offset < SegmentSize)
- {
- size_t ChunkSize = Min<size_t>(SegmentSize - Offset, BufferingSize);
- IoBuffer SubRange(SegmentBuffer, Offset, ChunkSize);
- blake3_hasher_update(&Hasher, SubRange.GetData(), ChunkSize);
- Offset += ChunkSize;
- }
+ ScanFile(FileRef.FileHandle,
+ FileRef.FileChunkOffset,
+ FileRef.FileChunkSize,
+ BufferingSize,
+ [&Hasher](const void* Data, size_t Size) { blake3_hasher_update(&Hasher, Data, Size); });
}
else
{
@@ -71,6 +70,35 @@ BLAKE3::HashBuffer(const CompositeBuffer& Buffer)
}
BLAKE3
+BLAKE3::HashBuffer(const IoBuffer& Buffer)
+{
+ BLAKE3 Hash;
+
+ blake3_hasher Hasher;
+ blake3_hasher_init(&Hasher);
+
+ size_t BufferSize = Buffer.GetSize();
+ static const uint64_t BufferingSize = 256u * 1024u;
+ IoBufferFileReference FileRef;
+ if (BufferSize >= (BufferingSize + BufferingSize / 2) && Buffer.GetFileReference(FileRef))
+ {
+ ScanFile(FileRef.FileHandle,
+ FileRef.FileChunkOffset,
+ FileRef.FileChunkSize,
+ BufferingSize,
+ [&Hasher](const void* Data, size_t Size) { blake3_hasher_update(&Hasher, Data, Size); });
+ }
+ else
+ {
+ blake3_hasher_update(&Hasher, Buffer.GetData(), BufferSize);
+ }
+
+ blake3_hasher_finalize(&Hasher, Hash.Hash, sizeof Hash.Hash);
+
+ return Hash;
+}
+
+BLAKE3
BLAKE3::FromHexString(const char* string)
{
BLAKE3 b3;
diff --git a/src/zencore/filesystem.cpp b/src/zencore/filesystem.cpp
index ca2b3101f..e41149d64 100644
--- a/src/zencore/filesystem.cpp
+++ b/src/zencore/filesystem.cpp
@@ -1015,6 +1015,46 @@ ReadFile(std::filesystem::path Path)
return Contents;
}
+ZENCORE_API void
+ScanFile(void* NativeHandle,
+ uint64_t Offset,
+ uint64_t Size,
+ uint64_t ChunkSize,
+ std::function<void(const void* Data, size_t Size)>&& ProcessFunc)
+{
+ ZEN_ASSERT(NativeHandle != nullptr);
+ uint64_t BufferSize = Min(ChunkSize, Size);
+ std::vector<uint8_t> ReadBuffer(BufferSize);
+ uint64_t ReadOffset = 0;
+ while (ReadOffset < Size)
+ {
+ const uint64_t NumberOfBytesToRead = Min(Size - ReadOffset, BufferSize);
+ uint64_t FileOffset = Offset + ReadOffset;
+
+#if ZEN_PLATFORM_WINDOWS
+ OVERLAPPED Ovl{};
+
+ Ovl.Offset = DWORD(FileOffset & 0xffff'ffffu);
+ Ovl.OffsetHigh = DWORD(FileOffset >> 32);
+
+ DWORD BytesRead = 0;
+ BOOL Success = ::ReadFile(NativeHandle, ReadBuffer.data(), DWORD(NumberOfBytesToRead), &BytesRead, &Ovl);
+ if (!Success)
+ {
+ throw std::system_error(std::error_code(::GetLastError(), std::system_category()), "file scan failed");
+ }
+#else
+ int BytesRead = pread(int(intptr_t(NativeHandle)), ReadBuffer.data(), size_t(NumberOfBytesToRead), off_t(FileOffset));
+ if (BytesRead < 0)
+ {
+ throw std::system_error(std::error_code(errno, std::system_category()), "file scan failed");
+ }
+#endif
+ ProcessFunc(ReadBuffer.data(), (size_t)BytesRead);
+ ReadOffset += (uint64_t)BytesRead;
+ }
+}
+
bool
ScanFile(std::filesystem::path Path, const uint64_t ChunkSize, std::function<void(const void* Data, size_t Size)>&& ProcessFunc)
{
@@ -1050,7 +1090,7 @@ ScanFile(std::filesystem::path Path, const uint64_t ChunkSize, std::function<voi
ProcessFunc(ReadBuffer.data(), dwBytesRead);
}
#else
- int Fd = open(Path.c_str(), O_RDONLY | O_CLOEXEC);
+ int Fd = open(Path.c_str(), O_RDONLY | O_CLOEXEC);
if (Fd < 0)
{
return false;
diff --git a/src/zencore/include/zencore/blake3.h b/src/zencore/include/zencore/blake3.h
index b31b710a7..86d050e1a 100644
--- a/src/zencore/include/zencore/blake3.h
+++ b/src/zencore/include/zencore/blake3.h
@@ -11,6 +11,7 @@
namespace zen {
class CompositeBuffer;
+class IoBuffer;
class StringBuilderBase;
/**
@@ -23,6 +24,7 @@ struct BLAKE3
inline auto operator<=>(const BLAKE3& Rhs) const = default;
static BLAKE3 HashBuffer(const CompositeBuffer& Buffer);
+ static BLAKE3 HashBuffer(const IoBuffer& Buffer);
static BLAKE3 HashMemory(const void* Data, size_t ByteCount);
static BLAKE3 FromHexString(const char* String);
const char* ToHexString(char* OutString /* 40 characters + NUL terminator */) const;
diff --git a/src/zencore/include/zencore/filesystem.h b/src/zencore/include/zencore/filesystem.h
index 233941479..0aab6a4ae 100644
--- a/src/zencore/include/zencore/filesystem.h
+++ b/src/zencore/include/zencore/filesystem.h
@@ -78,6 +78,11 @@ ZENCORE_API void WriteFile(std::filesystem::path Path, const IoBuffer* const* Da
ZENCORE_API void WriteFile(std::filesystem::path Path, IoBuffer Data);
ZENCORE_API void WriteFile(std::filesystem::path Path, CompositeBuffer Data);
ZENCORE_API bool MoveToFile(std::filesystem::path Path, IoBuffer Data);
+ZENCORE_API void ScanFile(void* NativeHandle,
+ uint64_t Offset,
+ uint64_t Size,
+ uint64_t ChunkSize,
+ std::function<void(const void* Data, size_t Size)>&& ProcessFunc);
struct CopyFileOptions
{
diff --git a/src/zencore/include/zencore/iobuffer.h b/src/zencore/include/zencore/iobuffer.h
index dcf1b4db8..759a9b25e 100644
--- a/src/zencore/include/zencore/iobuffer.h
+++ b/src/zencore/include/zencore/iobuffer.h
@@ -442,8 +442,6 @@ public:
inline static IoBuffer MakeCloneFromMemory(MemoryView Memory) { return MakeCloneFromMemory(Memory.GetData(), Memory.GetSize()); }
};
-IoHash HashBuffer(IoBuffer& Buffer);
-
void iobuffer_forcelink();
} // namespace zen
diff --git a/src/zencore/include/zencore/iohash.h b/src/zencore/include/zencore/iohash.h
index 79ed8ea1c..c70e98e47 100644
--- a/src/zencore/include/zencore/iohash.h
+++ b/src/zencore/include/zencore/iohash.h
@@ -48,6 +48,7 @@ struct IoHash
static IoHash HashBuffer(const void* data, size_t byteCount);
static IoHash HashBuffer(MemoryView Data) { return HashBuffer(Data.GetData(), Data.GetSize()); }
static IoHash HashBuffer(const CompositeBuffer& Buffer);
+ static IoHash HashBuffer(const IoBuffer& Buffer);
static IoHash FromHexString(const char* string);
static IoHash FromHexString(const std::string_view string);
const char* ToHexString(char* outString /* 40 characters + NUL terminator */) const;
diff --git a/src/zencore/include/zencore/sharedbuffer.h b/src/zencore/include/zencore/sharedbuffer.h
index e31da27ec..9d77a2814 100644
--- a/src/zencore/include/zencore/sharedbuffer.h
+++ b/src/zencore/include/zencore/sharedbuffer.h
@@ -114,12 +114,11 @@ public:
[[nodiscard]] bool IsOwned() const { return !m_Buffer || m_Buffer->IsOwned(); }
[[nodiscard]] inline bool IsNull() const { return !m_Buffer; }
inline void Reset() { m_Buffer = nullptr; }
- inline bool IsFileReference() const
+ inline bool GetFileReference(IoBufferFileReference& OutRef) const
{
if (const IoBufferExtendedCore* Core = m_Buffer->ExtendedCore())
{
- IoBufferFileReference _;
- return Core->GetFileReference(_);
+ return Core->GetFileReference(OutRef);
}
else
{
diff --git a/src/zencore/iobuffer.cpp b/src/zencore/iobuffer.cpp
index 96a893082..e1e8750a1 100644
--- a/src/zencore/iobuffer.cpp
+++ b/src/zencore/iobuffer.cpp
@@ -701,31 +701,6 @@ IoBufferBuilder::MakeFromTemporaryFile(const std::filesystem::path& FileName)
return IoBuffer(IoBuffer::File, Handle, 0, FileSize, /*IsWholeFile*/ true);
}
-IoHash
-HashBuffer(IoBuffer& Buffer)
-{
- size_t BufferSize = Buffer.Size();
- static const size_t BufferingSize = 512 * 1024;
- if (BufferSize >= (BufferingSize + BufferingSize / 2))
- {
- IoBufferFileReference _;
- if (Buffer.GetFileReference(/* out */ _))
- {
- size_t Offset = 0;
- IoHashStream HashStream;
- while (Offset < BufferSize)
- {
- size_t ChunkSize = Min<size_t>(BufferSize - Offset, BufferingSize);
- IoBuffer SubRange(Buffer, Offset, ChunkSize);
- HashStream.Append(SubRange.GetData(), SubRange.GetSize());
- Offset += ChunkSize;
- }
- return HashStream.GetHash();
- }
- }
- return IoHash::HashBuffer(Buffer.Data(), BufferSize);
-}
-
//////////////////////////////////////////////////////////////////////////
#if ZEN_WITH_TESTS
diff --git a/src/zencore/iohash.cpp b/src/zencore/iohash.cpp
index a6bf25f6c..1bf2c033d 100644
--- a/src/zencore/iohash.cpp
+++ b/src/zencore/iohash.cpp
@@ -4,6 +4,7 @@
#include <zencore/blake3.h>
#include <zencore/compositebuffer.h>
+#include <zencore/filesystem.h>
#include <zencore/string.h>
#include <zencore/testing.h>
@@ -31,19 +32,17 @@ IoHash::HashBuffer(const CompositeBuffer& Buffer)
for (const SharedBuffer& Segment : Buffer.GetSegments())
{
- size_t SegmentSize = Segment.GetSize();
- static const size_t BufferingSize = 512 * 1024;
- if (SegmentSize >= (BufferingSize + BufferingSize / 2) && Segment.IsFileReference())
+ size_t SegmentSize = Segment.GetSize();
+ static const uint64_t BufferingSize = 256u * 1024u;
+
+ IoBufferFileReference FileRef;
+ if (SegmentSize >= (BufferingSize + BufferingSize / 2) && Segment.GetFileReference(FileRef))
{
- const IoBuffer SegmentBuffer = Segment.AsIoBuffer();
- size_t Offset = 0;
- while (Offset < SegmentSize)
- {
- size_t ChunkSize = Min<size_t>(SegmentSize - Offset, BufferingSize);
- IoBuffer SubRange(SegmentBuffer, Offset, ChunkSize);
- Hasher.Append(SubRange.GetData(), ChunkSize);
- Offset += ChunkSize;
- }
+ ScanFile(FileRef.FileHandle,
+ FileRef.FileChunkOffset,
+ FileRef.FileChunkSize,
+ BufferingSize,
+ [&Hasher](const void* Data, size_t Size) { Hasher.Append(Data, Size); });
}
else
{
@@ -55,6 +54,30 @@ IoHash::HashBuffer(const CompositeBuffer& Buffer)
}
IoHash
+IoHash::HashBuffer(const IoBuffer& Buffer)
+{
+ IoHashStream Hasher;
+
+ size_t BufferSize = Buffer.GetSize();
+ static const uint64_t BufferingSize = 256u * 1024u;
+ IoBufferFileReference FileRef;
+ if (BufferSize >= (BufferingSize + BufferingSize / 2) && Buffer.GetFileReference(FileRef))
+ {
+ ScanFile(FileRef.FileHandle,
+ FileRef.FileChunkOffset,
+ FileRef.FileChunkSize,
+ BufferingSize,
+ [&Hasher](const void* Data, size_t Size) { Hasher.Append(Data, Size); });
+ }
+ else
+ {
+ Hasher.Append(Buffer.GetData(), BufferSize);
+ }
+
+ return Hasher.GetHash();
+}
+
+IoHash
IoHash::FromHexString(const char* string)
{
return FromHexString({string, sizeof(IoHash::Hash) * 2});
diff --git a/src/zenserver/projectstore/remoteprojectstore.cpp b/src/zenserver/projectstore/remoteprojectstore.cpp
index 0aa8df362..42c93f7cd 100644
--- a/src/zenserver/projectstore/remoteprojectstore.cpp
+++ b/src/zenserver/projectstore/remoteprojectstore.cpp
@@ -296,12 +296,28 @@ WriteToTempFile(CompressedBuffer&& CompressedBuffer, std::filesystem::path Path)
uint64_t Offset = 0;
{
CompositeBuffer Compressed = std::move(CompressedBuffer).GetCompressed();
- BasicFileWriter BlockWriter(BlockFile, 64u * 1024u);
for (const SharedBuffer& Segment : Compressed.GetSegments())
{
- size_t SegmentSize = Segment.GetSize();
- BlockWriter.Write(Segment.GetData(), SegmentSize, Offset);
- Offset += SegmentSize;
+ size_t SegmentSize = Segment.GetSize();
+ static const uint64_t BufferingSize = 256u * 1024u;
+
+ IoBufferFileReference FileRef;
+ if (SegmentSize >= (BufferingSize + BufferingSize / 2) && Segment.GetFileReference(FileRef))
+ {
+ ScanFile(FileRef.FileHandle,
+ FileRef.FileChunkOffset,
+ FileRef.FileChunkSize,
+ BufferingSize,
+ [&BlockFile, &Offset](const void* Data, size_t Size) {
+ BlockFile.Write(Data, Size, Offset);
+ Offset += Size;
+ });
+ }
+ else
+ {
+ BlockFile.Write(Segment.GetData(), SegmentSize, Offset);
+ Offset += SegmentSize;
+ }
}
}
void* FileHandle = BlockFile.Detach();
diff --git a/src/zenstore/cache/structuredcachestore.cpp b/src/zenstore/cache/structuredcachestore.cpp
index c4ee6f4d3..e7524271e 100644
--- a/src/zenstore/cache/structuredcachestore.cpp
+++ b/src/zenstore/cache/structuredcachestore.cpp
@@ -1233,7 +1233,7 @@ TEST_CASE_TEMPLATE("cachestore.threadedinsert", GCV2, FalseType, TrueType) // *
while (true)
{
IoBuffer Chunk = CreateRandomBlob(kChunkSize);
- IoHash Hash = HashBuffer(Chunk);
+ IoHash Hash = IoHash::HashBuffer(Chunk);
if (Chunks.contains(Hash))
{
continue;
@@ -1244,7 +1244,7 @@ TEST_CASE_TEMPLATE("cachestore.threadedinsert", GCV2, FalseType, TrueType) // *
while (true)
{
IoBuffer Chunk = CreateRandomBlob(kChunkSize);
- IoHash Hash = HashBuffer(Chunk);
+ IoHash Hash = IoHash::HashBuffer(Chunk);
if (Chunks.contains(Hash))
{
continue;
@@ -1347,12 +1347,12 @@ TEST_CASE_TEMPLATE("cachestore.threadedinsert", GCV2, FalseType, TrueType) // *
{
{
IoBuffer Chunk = CreateRandomBlob(kChunkSize);
- IoHash Hash = HashBuffer(Chunk);
+ IoHash Hash = IoHash::HashBuffer(Chunk);
NewChunks[Hash] = {.Bucket = Bucket1, .Buffer = Chunk};
}
{
IoBuffer Chunk = CreateRandomBlob(kChunkSize);
- IoHash Hash = HashBuffer(Chunk);
+ IoHash Hash = IoHash::HashBuffer(Chunk);
NewChunks[Hash] = {.Bucket = Bucket2, .Buffer = Chunk};
}
}
diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp
index 7b11200a5..d6e5efdaa 100644
--- a/src/zenstore/compactcas.cpp
+++ b/src/zenstore/compactcas.cpp
@@ -1310,7 +1310,7 @@ TEST_CASE("compactcas.compact.gc")
CbObject Obj = Cbo.Save();
IoBuffer ObjBuffer = Obj.GetBuffer().AsIoBuffer();
- const IoHash Hash = HashBuffer(ObjBuffer);
+ const IoHash Hash = IoHash::HashBuffer(ObjBuffer);
Cas.InsertChunk(ObjBuffer, Hash);
@@ -1370,7 +1370,7 @@ TEST_CASE("compactcas.compact.totalsize")
for (int32_t Idx = 0; Idx < kChunkCount; ++Idx)
{
IoBuffer Chunk = CreateRandomBlob(kChunkSize);
- const IoHash Hash = HashBuffer(Chunk);
+ const IoHash Hash = IoHash::HashBuffer(Chunk);
CasStore::InsertResult InsertResult = Cas.InsertChunk(Chunk, Hash);
ZEN_ASSERT(InsertResult.New);
}
@@ -1823,7 +1823,7 @@ TEST_CASE_TEMPLATE("compactcas.threadedinsert", GCV2, FalseType, TrueType)
while (true)
{
IoBuffer Chunk = CreateRandomBlob(kChunkSize);
- IoHash Hash = HashBuffer(Chunk);
+ IoHash Hash = IoHash::HashBuffer(Chunk);
if (Chunks.contains(Hash))
{
continue;
@@ -1892,7 +1892,7 @@ TEST_CASE_TEMPLATE("compactcas.threadedinsert", GCV2, FalseType, TrueType)
for (int32_t Idx = 0; Idx < kChunkCount; ++Idx)
{
IoBuffer Chunk = CreateRandomBlob(kChunkSize);
- IoHash Hash = HashBuffer(Chunk);
+ IoHash Hash = IoHash::HashBuffer(Chunk);
NewChunks[Hash] = Chunk;
GcChunkHashes.insert(Hash);
}
diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp
index 88d64eb45..df039d4b6 100644
--- a/src/zenstore/filecas.cpp
+++ b/src/zenstore/filecas.cpp
@@ -1799,7 +1799,7 @@ TEST_CASE("cas.file.gc")
CbObject Obj = Cbo.Save();
IoBuffer ObjBuffer = Obj.GetBuffer().AsIoBuffer();
- IoHash Hash = HashBuffer(ObjBuffer);
+ IoHash Hash = IoHash::HashBuffer(ObjBuffer);
FileCas.InsertChunk(ObjBuffer, Hash);