diff options
| author | Dan Engelbrecht <[email protected]> | 2024-04-26 10:12:03 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2024-04-26 10:12:03 +0200 |
| commit | ef1b4234c851131cf5a6d249684c14c5c27d2878 (patch) | |
| tree | afd972c077b2585c2dc336b79eb1d31d02372295 /src | |
| parent | fix oplog import during gcv2 (#62) (diff) | |
| download | zen-ef1b4234c851131cf5a6d249684c14c5c27d2878.tar.xz zen-ef1b4234c851131cf5a6d249684c14c5c27d2878.zip | |
use direct file access for large file hash (#63)
- Improvement: Refactor `IoHash::HashBuffer` and `BLAKE3::HashBuffer` to not use memory mapped files. Performs better and saves ~10% of oplog export time on CI
Diffstat (limited to 'src')
| -rw-r--r-- | src/zencore/blake3.cpp | 52 | ||||
| -rw-r--r-- | src/zencore/filesystem.cpp | 42 | ||||
| -rw-r--r-- | src/zencore/include/zencore/blake3.h | 2 | ||||
| -rw-r--r-- | src/zencore/include/zencore/filesystem.h | 5 | ||||
| -rw-r--r-- | src/zencore/include/zencore/iobuffer.h | 2 | ||||
| -rw-r--r-- | src/zencore/include/zencore/iohash.h | 1 | ||||
| -rw-r--r-- | src/zencore/include/zencore/sharedbuffer.h | 5 | ||||
| -rw-r--r-- | src/zencore/iobuffer.cpp | 25 | ||||
| -rw-r--r-- | src/zencore/iohash.cpp | 47 | ||||
| -rw-r--r-- | src/zenserver/projectstore/remoteprojectstore.cpp | 24 | ||||
| -rw-r--r-- | src/zenstore/cache/structuredcachestore.cpp | 8 | ||||
| -rw-r--r-- | src/zenstore/compactcas.cpp | 8 | ||||
| -rw-r--r-- | src/zenstore/filecas.cpp | 2 |
13 files changed, 155 insertions, 68 deletions
diff --git a/src/zencore/blake3.cpp b/src/zencore/blake3.cpp index e4edff227..4a77aa49a 100644 --- a/src/zencore/blake3.cpp +++ b/src/zencore/blake3.cpp @@ -3,6 +3,7 @@ #include <zencore/blake3.h> #include <zencore/compositebuffer.h> +#include <zencore/filesystem.h> #include <zencore/string.h> #include <zencore/testing.h> #include <zencore/zencore.h> @@ -45,19 +46,17 @@ BLAKE3::HashBuffer(const CompositeBuffer& Buffer) for (const SharedBuffer& Segment : Buffer.GetSegments()) { - size_t SegmentSize = Segment.GetSize(); - static const size_t BufferingSize = 512 * 1024; - if (SegmentSize >= (BufferingSize + BufferingSize / 2) && Segment.IsFileReference()) + size_t SegmentSize = Segment.GetSize(); + static const uint64_t BufferingSize = 256u * 1024u; + + IoBufferFileReference FileRef; + if (SegmentSize >= (BufferingSize + BufferingSize / 2) && Segment.GetFileReference(FileRef)) { - const IoBuffer SegmentBuffer = Segment.AsIoBuffer(); - size_t Offset = 0; - while (Offset < SegmentSize) - { - size_t ChunkSize = Min<size_t>(SegmentSize - Offset, BufferingSize); - IoBuffer SubRange(SegmentBuffer, Offset, ChunkSize); - blake3_hasher_update(&Hasher, SubRange.GetData(), ChunkSize); - Offset += ChunkSize; - } + ScanFile(FileRef.FileHandle, + FileRef.FileChunkOffset, + FileRef.FileChunkSize, + BufferingSize, + [&Hasher](const void* Data, size_t Size) { blake3_hasher_update(&Hasher, Data, Size); }); } else { @@ -71,6 +70,35 @@ BLAKE3::HashBuffer(const CompositeBuffer& Buffer) } BLAKE3 +BLAKE3::HashBuffer(const IoBuffer& Buffer) +{ + BLAKE3 Hash; + + blake3_hasher Hasher; + blake3_hasher_init(&Hasher); + + size_t BufferSize = Buffer.GetSize(); + static const uint64_t BufferingSize = 256u * 1024u; + IoBufferFileReference FileRef; + if (BufferSize >= (BufferingSize + BufferingSize / 2) && Buffer.GetFileReference(FileRef)) + { + ScanFile(FileRef.FileHandle, + FileRef.FileChunkOffset, + FileRef.FileChunkSize, + BufferingSize, + [&Hasher](const void* Data, size_t Size) { blake3_hasher_update(&Hasher, Data, Size); }); + } + else + { + blake3_hasher_update(&Hasher, Buffer.GetData(), BufferSize); + } + + blake3_hasher_finalize(&Hasher, Hash.Hash, sizeof Hash.Hash); + + return Hash; +} + +BLAKE3 BLAKE3::FromHexString(const char* string) { BLAKE3 b3; diff --git a/src/zencore/filesystem.cpp b/src/zencore/filesystem.cpp index ca2b3101f..e41149d64 100644 --- a/src/zencore/filesystem.cpp +++ b/src/zencore/filesystem.cpp @@ -1015,6 +1015,46 @@ ReadFile(std::filesystem::path Path) return Contents; } +ZENCORE_API void +ScanFile(void* NativeHandle, + uint64_t Offset, + uint64_t Size, + uint64_t ChunkSize, + std::function<void(const void* Data, size_t Size)>&& ProcessFunc) +{ + ZEN_ASSERT(NativeHandle != nullptr); + uint64_t BufferSize = Min(ChunkSize, Size); + std::vector<uint8_t> ReadBuffer(BufferSize); + uint64_t ReadOffset = 0; + while (ReadOffset < Size) + { + const uint64_t NumberOfBytesToRead = Min(Size - ReadOffset, BufferSize); + uint64_t FileOffset = Offset + ReadOffset; + +#if ZEN_PLATFORM_WINDOWS + OVERLAPPED Ovl{}; + + Ovl.Offset = DWORD(FileOffset & 0xffff'ffffu); + Ovl.OffsetHigh = DWORD(FileOffset >> 32); + + DWORD BytesRead = 0; + BOOL Success = ::ReadFile(NativeHandle, ReadBuffer.data(), DWORD(NumberOfBytesToRead), &BytesRead, &Ovl); + if (!Success) + { + throw std::system_error(std::error_code(::GetLastError(), std::system_category()), "file scan failed"); + } +#else + int BytesRead = pread(int(intptr_t(NativeHandle)), ReadBuffer.data(), size_t(NumberOfBytesToRead), off_t(FileOffset)); + if (BytesRead < 0) + { + throw std::system_error(std::error_code(errno, std::system_category()), "file scan failed"); + } +#endif + ProcessFunc(ReadBuffer.data(), (size_t)BytesRead); + ReadOffset += (uint64_t)BytesRead; + } +} + bool ScanFile(std::filesystem::path Path, const uint64_t ChunkSize, std::function<void(const void* Data, size_t Size)>&& ProcessFunc) { @@ -1050,7 +1090,7 @@ ScanFile(std::filesystem::path Path, const uint64_t ChunkSize, std::function<voi ProcessFunc(ReadBuffer.data(), dwBytesRead); } #else - int Fd = open(Path.c_str(), O_RDONLY | O_CLOEXEC); + int Fd = open(Path.c_str(), O_RDONLY | O_CLOEXEC); if (Fd < 0) { return false; diff --git a/src/zencore/include/zencore/blake3.h b/src/zencore/include/zencore/blake3.h index b31b710a7..86d050e1a 100644 --- a/src/zencore/include/zencore/blake3.h +++ b/src/zencore/include/zencore/blake3.h @@ -11,6 +11,7 @@ namespace zen { class CompositeBuffer; +class IoBuffer; class StringBuilderBase; /** @@ -23,6 +24,7 @@ struct BLAKE3 inline auto operator<=>(const BLAKE3& Rhs) const = default; static BLAKE3 HashBuffer(const CompositeBuffer& Buffer); + static BLAKE3 HashBuffer(const IoBuffer& Buffer); static BLAKE3 HashMemory(const void* Data, size_t ByteCount); static BLAKE3 FromHexString(const char* String); const char* ToHexString(char* OutString /* 40 characters + NUL terminator */) const; diff --git a/src/zencore/include/zencore/filesystem.h b/src/zencore/include/zencore/filesystem.h index 233941479..0aab6a4ae 100644 --- a/src/zencore/include/zencore/filesystem.h +++ b/src/zencore/include/zencore/filesystem.h @@ -78,6 +78,11 @@ ZENCORE_API void WriteFile(std::filesystem::path Path, const IoBuffer* const* Da ZENCORE_API void WriteFile(std::filesystem::path Path, IoBuffer Data); ZENCORE_API void WriteFile(std::filesystem::path Path, CompositeBuffer Data); ZENCORE_API bool MoveToFile(std::filesystem::path Path, IoBuffer Data); +ZENCORE_API void ScanFile(void* NativeHandle, + uint64_t Offset, + uint64_t Size, + uint64_t ChunkSize, + std::function<void(const void* Data, size_t Size)>&& ProcessFunc); struct CopyFileOptions { diff --git a/src/zencore/include/zencore/iobuffer.h b/src/zencore/include/zencore/iobuffer.h index dcf1b4db8..759a9b25e 100644 --- a/src/zencore/include/zencore/iobuffer.h +++ b/src/zencore/include/zencore/iobuffer.h @@ -442,8 +442,6 @@ public: inline static IoBuffer MakeCloneFromMemory(MemoryView Memory) { return MakeCloneFromMemory(Memory.GetData(), Memory.GetSize()); } }; -IoHash HashBuffer(IoBuffer& Buffer); - void iobuffer_forcelink(); } // namespace zen diff --git a/src/zencore/include/zencore/iohash.h b/src/zencore/include/zencore/iohash.h index 79ed8ea1c..c70e98e47 100644 --- a/src/zencore/include/zencore/iohash.h +++ b/src/zencore/include/zencore/iohash.h @@ -48,6 +48,7 @@ struct IoHash static IoHash HashBuffer(const void* data, size_t byteCount); static IoHash HashBuffer(MemoryView Data) { return HashBuffer(Data.GetData(), Data.GetSize()); } static IoHash HashBuffer(const CompositeBuffer& Buffer); + static IoHash HashBuffer(const IoBuffer& Buffer); static IoHash FromHexString(const char* string); static IoHash FromHexString(const std::string_view string); const char* ToHexString(char* outString /* 40 characters + NUL terminator */) const; diff --git a/src/zencore/include/zencore/sharedbuffer.h b/src/zencore/include/zencore/sharedbuffer.h index e31da27ec..9d77a2814 100644 --- a/src/zencore/include/zencore/sharedbuffer.h +++ b/src/zencore/include/zencore/sharedbuffer.h @@ -114,12 +114,11 @@ public: [[nodiscard]] bool IsOwned() const { return !m_Buffer || m_Buffer->IsOwned(); } [[nodiscard]] inline bool IsNull() const { return !m_Buffer; } inline void Reset() { m_Buffer = nullptr; } - inline bool IsFileReference() const + inline bool GetFileReference(IoBufferFileReference& OutRef) const { if (const IoBufferExtendedCore* Core = m_Buffer->ExtendedCore()) { - IoBufferFileReference _; - return Core->GetFileReference(_); + return Core->GetFileReference(OutRef); } else { diff --git a/src/zencore/iobuffer.cpp b/src/zencore/iobuffer.cpp index 96a893082..e1e8750a1 100644 --- a/src/zencore/iobuffer.cpp +++ b/src/zencore/iobuffer.cpp @@ -701,31 +701,6 @@ IoBufferBuilder::MakeFromTemporaryFile(const std::filesystem::path& FileName) return IoBuffer(IoBuffer::File, Handle, 0, FileSize, /*IsWholeFile*/ true); } -IoHash -HashBuffer(IoBuffer& Buffer) -{ - size_t BufferSize = Buffer.Size(); - static const size_t BufferingSize = 512 * 1024; - if (BufferSize >= (BufferingSize + BufferingSize / 2)) - { - IoBufferFileReference _; - if (Buffer.GetFileReference(/* out */ _)) - { - size_t Offset = 0; - IoHashStream HashStream; - while (Offset < BufferSize) - { - size_t ChunkSize = Min<size_t>(BufferSize - Offset, BufferingSize); - IoBuffer SubRange(Buffer, Offset, ChunkSize); - HashStream.Append(SubRange.GetData(), SubRange.GetSize()); - Offset += ChunkSize; - } - return HashStream.GetHash(); - } - } - return IoHash::HashBuffer(Buffer.Data(), BufferSize); -} - ////////////////////////////////////////////////////////////////////////// #if ZEN_WITH_TESTS diff --git a/src/zencore/iohash.cpp b/src/zencore/iohash.cpp index a6bf25f6c..1bf2c033d 100644 --- a/src/zencore/iohash.cpp +++ b/src/zencore/iohash.cpp @@ -4,6 +4,7 @@ #include <zencore/blake3.h> #include <zencore/compositebuffer.h> +#include <zencore/filesystem.h> #include <zencore/string.h> #include <zencore/testing.h> @@ -31,19 +32,17 @@ IoHash::HashBuffer(const CompositeBuffer& Buffer) for (const SharedBuffer& Segment : Buffer.GetSegments()) { - size_t SegmentSize = Segment.GetSize(); - static const size_t BufferingSize = 512 * 1024; - if (SegmentSize >= (BufferingSize + BufferingSize / 2) && Segment.IsFileReference()) + size_t SegmentSize = Segment.GetSize(); + static const uint64_t BufferingSize = 256u * 1024u; + + IoBufferFileReference FileRef; + if (SegmentSize >= (BufferingSize + BufferingSize / 2) && Segment.GetFileReference(FileRef)) { - const IoBuffer SegmentBuffer = Segment.AsIoBuffer(); - size_t Offset = 0; - while (Offset < SegmentSize) - { - size_t ChunkSize = Min<size_t>(SegmentSize - Offset, BufferingSize); - IoBuffer SubRange(SegmentBuffer, Offset, ChunkSize); - Hasher.Append(SubRange.GetData(), ChunkSize); - Offset += ChunkSize; - } + ScanFile(FileRef.FileHandle, + FileRef.FileChunkOffset, + FileRef.FileChunkSize, + BufferingSize, + [&Hasher](const void* Data, size_t Size) { Hasher.Append(Data, Size); }); } else { @@ -55,6 +54,30 @@ IoHash::HashBuffer(const CompositeBuffer& Buffer) } IoHash +IoHash::HashBuffer(const IoBuffer& Buffer) +{ + IoHashStream Hasher; + + size_t BufferSize = Buffer.GetSize(); + static const uint64_t BufferingSize = 256u * 1024u; + IoBufferFileReference FileRef; + if (BufferSize >= (BufferingSize + BufferingSize / 2) && Buffer.GetFileReference(FileRef)) + { + ScanFile(FileRef.FileHandle, + FileRef.FileChunkOffset, + FileRef.FileChunkSize, + BufferingSize, + [&Hasher](const void* Data, size_t Size) { Hasher.Append(Data, Size); }); + } + else + { + Hasher.Append(Buffer.GetData(), BufferSize); + } + + return Hasher.GetHash(); +} + +IoHash IoHash::FromHexString(const char* string) { return FromHexString({string, sizeof(IoHash::Hash) * 2}); diff --git a/src/zenserver/projectstore/remoteprojectstore.cpp b/src/zenserver/projectstore/remoteprojectstore.cpp index 0aa8df362..42c93f7cd 100644 --- a/src/zenserver/projectstore/remoteprojectstore.cpp +++ b/src/zenserver/projectstore/remoteprojectstore.cpp @@ -296,12 +296,28 @@ WriteToTempFile(CompressedBuffer&& CompressedBuffer, std::filesystem::path Path) uint64_t Offset = 0; { CompositeBuffer Compressed = std::move(CompressedBuffer).GetCompressed(); - BasicFileWriter BlockWriter(BlockFile, 64u * 1024u); for (const SharedBuffer& Segment : Compressed.GetSegments()) { - size_t SegmentSize = Segment.GetSize(); - BlockWriter.Write(Segment.GetData(), SegmentSize, Offset); - Offset += SegmentSize; + size_t SegmentSize = Segment.GetSize(); + static const uint64_t BufferingSize = 256u * 1024u; + + IoBufferFileReference FileRef; + if (SegmentSize >= (BufferingSize + BufferingSize / 2) && Segment.GetFileReference(FileRef)) + { + ScanFile(FileRef.FileHandle, + FileRef.FileChunkOffset, + FileRef.FileChunkSize, + BufferingSize, + [&BlockFile, &Offset](const void* Data, size_t Size) { + BlockFile.Write(Data, Size, Offset); + Offset += Size; + }); + } + else + { + BlockFile.Write(Segment.GetData(), SegmentSize, Offset); + Offset += SegmentSize; + } } } void* FileHandle = BlockFile.Detach(); diff --git a/src/zenstore/cache/structuredcachestore.cpp b/src/zenstore/cache/structuredcachestore.cpp index c4ee6f4d3..e7524271e 100644 --- a/src/zenstore/cache/structuredcachestore.cpp +++ b/src/zenstore/cache/structuredcachestore.cpp @@ -1233,7 +1233,7 @@ TEST_CASE_TEMPLATE("cachestore.threadedinsert", GCV2, FalseType, TrueType) // * while (true) { IoBuffer Chunk = CreateRandomBlob(kChunkSize); - IoHash Hash = HashBuffer(Chunk); + IoHash Hash = IoHash::HashBuffer(Chunk); if (Chunks.contains(Hash)) { continue; @@ -1244,7 +1244,7 @@ TEST_CASE_TEMPLATE("cachestore.threadedinsert", GCV2, FalseType, TrueType) // * while (true) { IoBuffer Chunk = CreateRandomBlob(kChunkSize); - IoHash Hash = HashBuffer(Chunk); + IoHash Hash = IoHash::HashBuffer(Chunk); if (Chunks.contains(Hash)) { continue; @@ -1347,12 +1347,12 @@ TEST_CASE_TEMPLATE("cachestore.threadedinsert", GCV2, FalseType, TrueType) // * { { IoBuffer Chunk = CreateRandomBlob(kChunkSize); - IoHash Hash = HashBuffer(Chunk); + IoHash Hash = IoHash::HashBuffer(Chunk); NewChunks[Hash] = {.Bucket = Bucket1, .Buffer = Chunk}; } { IoBuffer Chunk = CreateRandomBlob(kChunkSize); - IoHash Hash = HashBuffer(Chunk); + IoHash Hash = IoHash::HashBuffer(Chunk); NewChunks[Hash] = {.Bucket = Bucket2, .Buffer = Chunk}; } } diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp index 7b11200a5..d6e5efdaa 100644 --- a/src/zenstore/compactcas.cpp +++ b/src/zenstore/compactcas.cpp @@ -1310,7 +1310,7 @@ TEST_CASE("compactcas.compact.gc") CbObject Obj = Cbo.Save(); IoBuffer ObjBuffer = Obj.GetBuffer().AsIoBuffer(); - const IoHash Hash = HashBuffer(ObjBuffer); + const IoHash Hash = IoHash::HashBuffer(ObjBuffer); Cas.InsertChunk(ObjBuffer, Hash); @@ -1370,7 +1370,7 @@ TEST_CASE("compactcas.compact.totalsize") for (int32_t Idx = 0; Idx < kChunkCount; ++Idx) { IoBuffer Chunk = CreateRandomBlob(kChunkSize); - const IoHash Hash = HashBuffer(Chunk); + const IoHash Hash = IoHash::HashBuffer(Chunk); CasStore::InsertResult InsertResult = Cas.InsertChunk(Chunk, Hash); ZEN_ASSERT(InsertResult.New); } @@ -1823,7 +1823,7 @@ TEST_CASE_TEMPLATE("compactcas.threadedinsert", GCV2, FalseType, TrueType) while (true) { IoBuffer Chunk = CreateRandomBlob(kChunkSize); - IoHash Hash = HashBuffer(Chunk); + IoHash Hash = IoHash::HashBuffer(Chunk); if (Chunks.contains(Hash)) { continue; @@ -1892,7 +1892,7 @@ TEST_CASE_TEMPLATE("compactcas.threadedinsert", GCV2, FalseType, TrueType) for (int32_t Idx = 0; Idx < kChunkCount; ++Idx) { IoBuffer Chunk = CreateRandomBlob(kChunkSize); - IoHash Hash = HashBuffer(Chunk); + IoHash Hash = IoHash::HashBuffer(Chunk); NewChunks[Hash] = Chunk; GcChunkHashes.insert(Hash); } diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp index 88d64eb45..df039d4b6 100644 --- a/src/zenstore/filecas.cpp +++ b/src/zenstore/filecas.cpp @@ -1799,7 +1799,7 @@ TEST_CASE("cas.file.gc") CbObject Obj = Cbo.Save(); IoBuffer ObjBuffer = Obj.GetBuffer().AsIoBuffer(); - IoHash Hash = HashBuffer(ObjBuffer); + IoHash Hash = IoHash::HashBuffer(ObjBuffer); FileCas.InsertChunk(ObjBuffer, Hash); |