diff options
| author | Dan Engelbrecht <[email protected]> | 2024-02-26 19:08:27 +0100 |
|---|---|---|
| committer | GitHub <[email protected]> | 2024-02-26 19:08:27 +0100 |
| commit | 8672d2235e73545abde15f075934f68495adeaf3 (patch) | |
| tree | 421443b0eb76910c612bb219354a9306081cca10 /src/zencore | |
| parent | adding context to http.sys error message (diff) | |
| download | zen-8672d2235e73545abde15f075934f68495adeaf3.tar.xz zen-8672d2235e73545abde15f075934f68495adeaf3.zip | |
hashing fixes (#657)
* move structuredcachestore tests to zenstore-test
* Don't materialize entire files when hashing if it is a large files
* rewrite CompositeBuffer::Mid to never materialize buffers
Diffstat (limited to 'src/zencore')
| -rw-r--r-- | src/zencore/blake3.cpp | 18 | ||||
| -rw-r--r-- | src/zencore/compositebuffer.cpp | 32 | ||||
| -rw-r--r-- | src/zencore/include/zencore/sharedbuffer.h | 12 | ||||
| -rw-r--r-- | src/zencore/iobuffer.cpp | 21 | ||||
| -rw-r--r-- | src/zencore/iohash.cpp | 18 |
5 files changed, 94 insertions, 7 deletions
diff --git a/src/zencore/blake3.cpp b/src/zencore/blake3.cpp index 89826ae5d..bdbc8fb3e 100644 --- a/src/zencore/blake3.cpp +++ b/src/zencore/blake3.cpp @@ -45,7 +45,23 @@ BLAKE3::HashBuffer(const CompositeBuffer& Buffer) for (const SharedBuffer& Segment : Buffer.GetSegments()) { - blake3_hasher_update(&Hasher, Segment.GetData(), Segment.GetSize()); + size_t SegmentSize = Segment.GetSize(); + if (SegmentSize >= (65536 + 32768) && Segment.IsFileReference()) + { + const IoBuffer SegmentBuffer = Segment.AsIoBuffer(); + size_t Offset = 0; + while (Offset < SegmentSize) + { + size_t ChunkSize = Min<size_t>(SegmentSize - Offset, 65536u); + IoBuffer SubRange(SegmentBuffer, Offset, ChunkSize); + blake3_hasher_update(&Hasher, SubRange.GetData(), ChunkSize); + Offset += ChunkSize; + } + } + else + { + blake3_hasher_update(&Hasher, Segment.GetData(), SegmentSize); + } } blake3_hasher_finalize(&Hasher, Hash.Hash, sizeof Hash.Hash); diff --git a/src/zencore/compositebuffer.cpp b/src/zencore/compositebuffer.cpp index 583ef19c6..d2b6d97f9 100644 --- a/src/zencore/compositebuffer.cpp +++ b/src/zencore/compositebuffer.cpp @@ -93,10 +93,36 @@ CompositeBuffer::Mid(uint64_t Offset, uint64_t Size) const const uint64_t BufferSize = GetSize(); Offset = Min(Offset, BufferSize); Size = Min(Size, BufferSize - Offset); + CompositeBuffer Buffer; - IterateRange(Offset, Size, [&Buffer](MemoryView View, const SharedBuffer& ViewOuter) { - Buffer.m_Segments.push_back(SharedBuffer::MakeView(View, ViewOuter)); - }); + { + for (const SharedBuffer& Segment : m_Segments) + { + if (const uint64_t SegmentSize = Segment.GetSize(); Offset <= SegmentSize) + { + size_t PartSize = Min(Size, SegmentSize - Offset); + if (PartSize == SegmentSize) + { + Buffer.m_Segments.push_back(Segment); + } + else if (PartSize > 0 || Size == 0) + { + // We need to add the segment even if PartSize is zero if we are picking up zero bytes. + Buffer.m_Segments.push_back(SharedBuffer(IoBuffer(Segment.AsIoBuffer(), Offset, PartSize))); + } + Offset = 0; + Size -= PartSize; + if (Size == 0) + { + break; + } + } + else + { + Offset -= SegmentSize; + } + } + } return Buffer; } diff --git a/src/zencore/include/zencore/sharedbuffer.h b/src/zencore/include/zencore/sharedbuffer.h index 618bd2937..e31da27ec 100644 --- a/src/zencore/include/zencore/sharedbuffer.h +++ b/src/zencore/include/zencore/sharedbuffer.h @@ -114,6 +114,18 @@ public: [[nodiscard]] bool IsOwned() const { return !m_Buffer || m_Buffer->IsOwned(); } [[nodiscard]] inline bool IsNull() const { return !m_Buffer; } inline void Reset() { m_Buffer = nullptr; } + inline bool IsFileReference() const + { + if (const IoBufferExtendedCore* Core = m_Buffer->ExtendedCore()) + { + IoBufferFileReference _; + return Core->GetFileReference(_); + } + else + { + return false; + } + } [[nodiscard]] MemoryView GetView() const { diff --git a/src/zencore/iobuffer.cpp b/src/zencore/iobuffer.cpp index 54f2aaee8..c8bc4a629 100644 --- a/src/zencore/iobuffer.cpp +++ b/src/zencore/iobuffer.cpp @@ -704,8 +704,25 @@ IoBufferBuilder::MakeFromTemporaryFile(const std::filesystem::path& FileName) IoHash HashBuffer(IoBuffer& Buffer) { - // TODO: handle disk buffers with special path - return IoHash::HashBuffer(Buffer.Data(), Buffer.Size()); + size_t BufferSize = Buffer.Size(); + if (BufferSize >= (65536 + 32768)) + { + IoBufferFileReference _; + if (Buffer.GetFileReference(/* out */ _)) + { + size_t Offset = 0; + IoHashStream HashStream; + while (Offset < BufferSize) + { + size_t ChunkSize = Min<size_t>(BufferSize - Offset, 65536u); + IoBuffer SubRange(Buffer, Offset, ChunkSize); + HashStream.Append(SubRange.GetData(), SubRange.GetSize()); + Offset += ChunkSize; + } + return HashStream.GetHash(); + } + } + return IoHash::HashBuffer(Buffer.Data(), BufferSize); } ////////////////////////////////////////////////////////////////////////// diff --git a/src/zencore/iohash.cpp b/src/zencore/iohash.cpp index 77076c133..cedee913a 100644 --- a/src/zencore/iohash.cpp +++ b/src/zencore/iohash.cpp @@ -31,7 +31,23 @@ IoHash::HashBuffer(const CompositeBuffer& Buffer) for (const SharedBuffer& Segment : Buffer.GetSegments()) { - Hasher.Append(Segment.GetData(), Segment.GetSize()); + size_t SegmentSize = Segment.GetSize(); + if (SegmentSize >= (65536 + 32768) && Segment.IsFileReference()) + { + const IoBuffer SegmentBuffer = Segment.AsIoBuffer(); + size_t Offset = 0; + while (Offset < SegmentSize) + { + size_t ChunkSize = Min<size_t>(SegmentSize - Offset, 65536u); + IoBuffer SubRange(SegmentBuffer, Offset, ChunkSize); + Hasher.Append(SubRange.GetData(), ChunkSize); + Offset += ChunkSize; + } + } + else + { + Hasher.Append(Segment.GetData(), SegmentSize); + } } return Hasher.GetHash(); |