diff options
| author | Dan Engelbrecht <[email protected]> | 2024-04-03 09:58:32 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2024-04-03 09:58:32 +0200 |
| commit | 246c9bc79e01e84b8f689a230f3eed062fd428f1 (patch) | |
| tree | 7c137a9b80700e0b6abb6892612387935417eff7 | |
| parent | 5.4.3 (diff) | |
| download | zen-246c9bc79e01e84b8f689a230f3eed062fd428f1.tar.xz zen-246c9bc79e01e84b8f689a230f3eed062fd428f1.zip | |
compressed header reading opt (#33)
* refactor so we don't have to re-read data from source to get block sizes
| -rw-r--r-- | CHANGELOG.md | 3 | ||||
| -rw-r--r-- | src/zencore/compress.cpp | 138 |
2 files changed, 88 insertions, 53 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index f16b110cb..dce254fe3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,5 @@ ## - Bugfix: Fix sentry using wrong folder path when data path contains non-ascii characters UE-210530 -- Improvement: Faster reading of compressed buffer headers by not materializing entire source buffer - Bugfix: Get raw size for compressed chunks correctly for `/prj/{project}/oplog/{log}/chunkinfos` - Improvement: It is now possible to control which fields to include in `/prj/{project}/oplog/{log}/chunkinfos` request by adding a comma delimited list of filed names for `fieldnames` parameter - Default fields are: `id`, `rawhash` and `rawsize` (translates to `?fieldnames=id,rawhash,rawsize`) @@ -10,6 +9,8 @@ - Use `?fieldnames=*` to get all the fields - Improvement: Use multithreading to fetch size/rawsize of entries in `/prj/{project}/oplog/{log}/chunkinfos` and `/prj/{project}/oplog/{log}/files` - Improvement: Add `GetMediumWorkerPool()` in addition to `LargeWorkerPool()` and `SmallWorkerPool()` +- Improvement: Optimize `CompositeBuffer::ViewOrCopyRange` speeding up compressed buffer headers by not materializing entire source buffer +- Improvement: Optimize `CompressedBuffer::GetRange()` with new `CompressedBuffer::ReadHeader()` that does one less read from source data resulting in a 30% perf increase. ## 5.4.2 - Bugfix: Shared memory for zenserver state may hang around after all zenserver processes exit - make sure we find a valid entry in `zen up` before bailing diff --git a/src/zencore/compress.cpp b/src/zencore/compress.cpp index 58be65f13..143317e65 100644 --- a/src/zencore/compress.cpp +++ b/src/zencore/compress.cpp @@ -863,7 +863,7 @@ GetDecoder(CompressionMethod Method) ////////////////////////////////////////////////////////////////////////// bool -BufferHeader::IsValid(const CompositeBuffer& CompressedData, IoHash& OutRawHash, uint64_t& OutRawSize) +ReadHeader(const CompositeBuffer& CompressedData, BufferHeader& OutHeader, UniqueBuffer* OutHeaderData) { const uint64_t CompressedDataSize = CompressedData.GetSize(); if (CompressedDataSize < sizeof(BufferHeader)) @@ -871,61 +871,89 @@ BufferHeader::IsValid(const CompositeBuffer& CompressedData, IoHash& OutRawHash, return false; } - const size_t StackBufferSize = 256; - uint8_t StackBuffer[StackBufferSize]; - uint64_t ReadSize = Min(CompressedDataSize, StackBufferSize); - BufferHeader* Header = reinterpret_cast<BufferHeader*>(StackBuffer); + const size_t HeaderBufferSize = 1024; + uint8_t HeaderBuffer[HeaderBufferSize]; + uint64_t ReadSize = Min(CompressedDataSize, HeaderBufferSize); + uint64_t FirstSegmentSize = CompressedData.GetSegments()[0].GetSize(); + if (FirstSegmentSize >= sizeof(BufferHeader)) { - CompositeBuffer::Iterator It; - CompressedData.CopyTo(MutableMemoryView(StackBuffer, StackBuffer + StackBufferSize), It); + // Keep first read inside first segment if possible + ReadSize = Min(ReadSize, FirstSegmentSize); } - Header->ByteSwap(); - if (Header->Magic != BufferHeader::ExpectedMagic) + + MutableMemoryView HeaderMemory(HeaderBuffer, &HeaderBuffer[ReadSize]); + CompositeBuffer::Iterator It = CompressedData.GetIterator(0); + CompressedData.CopyTo(HeaderMemory, It); + + OutHeader = *reinterpret_cast<BufferHeader*>(HeaderMemory.GetData()); + OutHeader.ByteSwap(); + if (OutHeader.Magic != BufferHeader::ExpectedMagic) { return false; } - - const BaseDecoder* const Decoder = GetDecoder(Header->Method); + if (OutHeader.TotalCompressedSize > CompressedDataSize) + { + return false; + } + const BaseDecoder* const Decoder = GetDecoder(OutHeader.Method); if (!Decoder) { return false; } - - uint32_t Crc32 = Header->Crc32; - OutRawHash = IoHash::FromBLAKE3(Header->RawHash); - OutRawSize = Header->TotalRawSize; - uint64_t HeaderSize = Decoder->GetHeaderSize(*Header); - - if (Header->TotalCompressedSize > CompressedDataSize) + uint64_t FullHeaderSize = Decoder->GetHeaderSize(OutHeader); + if (FullHeaderSize > CompressedDataSize) { return false; } - - Header->ByteSwap(); - - if (HeaderSize > ReadSize) + if (OutHeaderData) { - UniqueBuffer HeaderCopy = UniqueBuffer::Alloc(HeaderSize); - CompositeBuffer::Iterator It; - CompressedData.CopyTo(HeaderCopy.GetMutableView(), It); - const MemoryView HeaderView = HeaderCopy.GetView(); - if (Crc32 != BufferHeader::CalculateCrc32(HeaderView)) + *OutHeaderData = UniqueBuffer::Alloc(FullHeaderSize); + MutableMemoryView RemainingHeaderView = OutHeaderData->GetMutableView().CopyFrom(HeaderMemory.Mid(0, FullHeaderSize)); + if (!RemainingHeaderView.IsEmpty()) + { + CompressedData.CopyTo(RemainingHeaderView, It); + } + if (OutHeader.Crc32 != BufferHeader::CalculateCrc32(OutHeaderData->GetView())) + { + return false; + } + } + else if (FullHeaderSize < ReadSize) + { + if (OutHeader.Crc32 != BufferHeader::CalculateCrc32(HeaderMemory.Mid(0, FullHeaderSize))) { return false; } } else { - MemoryView FullHeaderView(StackBuffer, StackBuffer + HeaderSize); - if (Crc32 != BufferHeader::CalculateCrc32(FullHeaderView)) + UniqueBuffer HeaderData = UniqueBuffer::Alloc(FullHeaderSize); + MutableMemoryView RemainingHeaderView = HeaderData.GetMutableView().CopyFrom(HeaderMemory.Mid(0, FullHeaderSize)); + if (!RemainingHeaderView.IsEmpty()) + { + CompressedData.CopyTo(RemainingHeaderView, It); + } + if (OutHeader.Crc32 != BufferHeader::CalculateCrc32(HeaderData.GetView())) { return false; } } - return true; } +bool +BufferHeader::IsValid(const CompositeBuffer& CompressedData, IoHash& OutRawHash, uint64_t& OutRawSize) +{ + detail::BufferHeader Header; + if (ReadHeader(CompressedData, Header, nullptr)) + { + OutRawHash = IoHash::FromBLAKE3(Header.RawHash); + OutRawSize = Header.TotalRawSize; + return true; + } + return false; +} + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// static bool @@ -1097,7 +1125,11 @@ ValidBufferOrEmpty(BufferType&& CompressedData, IoHash& OutRawHash, uint64_t& Ou } CompositeBuffer -GetCompressedRange(const BufferHeader& Header, const CompositeBuffer& CompressedData, uint64_t RawOffset, uint64_t RawSize) +GetCompressedRange(const BufferHeader& Header, + MemoryView HeaderRawData, + const CompositeBuffer& CompressedData, + uint64_t RawOffset, + uint64_t RawSize) { if (Header.TotalRawSize < RawOffset + RawSize) { @@ -1118,9 +1150,7 @@ GetCompressedRange(const BufferHeader& Header, const CompositeBuffer& Compressed } else { - UniqueBuffer BlockSizeBuffer; - MemoryView BlockSizeView = - CompressedData.ViewOrCopyRange(sizeof(BufferHeader), Header.BlockCount * sizeof(uint32_t), BlockSizeBuffer); + MemoryView BlockSizeView = HeaderRawData.Mid(sizeof(Header), Header.BlockCount * sizeof(uint32_t)); std::span<uint32_t const> CompressedBlockSizes(reinterpret_cast<const uint32_t*>(BlockSizeView.GetData()), Header.BlockCount); const uint64_t BlockSize = uint64_t(1) << Header.BlockSizeExponent; @@ -1179,7 +1209,11 @@ GetCompressedRange(const BufferHeader& Header, const CompositeBuffer& Compressed } CompositeBuffer -CopyCompressedRange(const BufferHeader& Header, const CompositeBuffer& CompressedData, uint64_t RawOffset, uint64_t RawSize) +CopyCompressedRange(const BufferHeader& Header, + MemoryView HeaderRawData, + const CompositeBuffer& CompressedData, + uint64_t RawOffset, + uint64_t RawSize) { if (Header.TotalRawSize < RawOffset + RawSize) { @@ -1204,9 +1238,7 @@ CopyCompressedRange(const BufferHeader& Header, const CompositeBuffer& Compresse } else { - UniqueBuffer BlockSizeBuffer; - MemoryView BlockSizeView = - CompressedData.ViewOrCopyRange(sizeof(BufferHeader), Header.BlockCount * sizeof(uint32_t), BlockSizeBuffer); + MemoryView BlockSizeView = HeaderRawData.Mid(sizeof(Header), Header.BlockCount * sizeof(uint32_t)); std::span<uint32_t const> CompressedBlockSizes(reinterpret_cast<const uint32_t*>(BlockSizeView.GetData()), Header.BlockCount); const uint64_t BlockSize = uint64_t(1) << Header.BlockSizeExponent; @@ -1410,26 +1442,28 @@ CompressedBuffer::DecodeRawHash() const CompressedBuffer CompressedBuffer::CopyRange(uint64_t RawOffset, uint64_t RawSize) const { - using namespace detail; - const BufferHeader Header = BufferHeader::Read(CompressedData); - const uint64_t TotalRawSize = RawSize < ~uint64_t(0) ? RawSize : Header.TotalRawSize - RawOffset; - - CompressedBuffer Range; - Range.CompressedData = CopyCompressedRange(Header, CompressedData, RawOffset, TotalRawSize); - + CompressedBuffer Range; + detail::BufferHeader Header; + UniqueBuffer RawHeaderData; + if (ReadHeader(CompressedData, Header, &RawHeaderData)) + { + const uint64_t TotalRawSize = RawSize < ~uint64_t(0) ? RawSize : Header.TotalRawSize - RawOffset; + Range.CompressedData = CopyCompressedRange(Header, RawHeaderData.GetView(), CompressedData, RawOffset, TotalRawSize); + } return Range; } CompressedBuffer CompressedBuffer::GetRange(uint64_t RawOffset, uint64_t RawSize) const { - using namespace detail; - const BufferHeader Header = BufferHeader::Read(CompressedData); - const uint64_t TotalRawSize = RawSize < ~uint64_t(0) ? RawSize : Header.TotalRawSize - RawOffset; - - CompressedBuffer Range; - Range.CompressedData = GetCompressedRange(Header, CompressedData, RawOffset, TotalRawSize); - + CompressedBuffer Range; + detail::BufferHeader Header; + UniqueBuffer RawHeaderData; + if (ReadHeader(CompressedData, Header, &RawHeaderData)) + { + const uint64_t TotalRawSize = RawSize < ~uint64_t(0) ? RawSize : Header.TotalRawSize - RawOffset; + Range.CompressedData = GetCompressedRange(Header, RawHeaderData.GetView(), CompressedData, RawOffset, TotalRawSize); + } return Range; } |