aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2024-04-03 09:58:32 +0200
committerGitHub Enterprise <[email protected]>2024-04-03 09:58:32 +0200
commit246c9bc79e01e84b8f689a230f3eed062fd428f1 (patch)
tree7c137a9b80700e0b6abb6892612387935417eff7
parent5.4.3 (diff)
downloadzen-246c9bc79e01e84b8f689a230f3eed062fd428f1.tar.xz
zen-246c9bc79e01e84b8f689a230f3eed062fd428f1.zip
compressed header reading opt (#33)
* refactor so we don't have to re-read data from source to get block sizes
-rw-r--r--CHANGELOG.md3
-rw-r--r--src/zencore/compress.cpp138
2 files changed, 88 insertions, 53 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f16b110cb..dce254fe3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,5 @@
##
- Bugfix: Fix sentry using wrong folder path when data path contains non-ascii characters UE-210530
-- Improvement: Faster reading of compressed buffer headers by not materializing entire source buffer
- Bugfix: Get raw size for compressed chunks correctly for `/prj/{project}/oplog/{log}/chunkinfos`
- Improvement: It is now possible to control which fields to include in `/prj/{project}/oplog/{log}/chunkinfos` request by adding a comma delimited list of filed names for `fieldnames` parameter
- Default fields are: `id`, `rawhash` and `rawsize` (translates to `?fieldnames=id,rawhash,rawsize`)
@@ -10,6 +9,8 @@
- Use `?fieldnames=*` to get all the fields
- Improvement: Use multithreading to fetch size/rawsize of entries in `/prj/{project}/oplog/{log}/chunkinfos` and `/prj/{project}/oplog/{log}/files`
- Improvement: Add `GetMediumWorkerPool()` in addition to `LargeWorkerPool()` and `SmallWorkerPool()`
+- Improvement: Optimize `CompositeBuffer::ViewOrCopyRange` speeding up compressed buffer headers by not materializing entire source buffer
+- Improvement: Optimize `CompressedBuffer::GetRange()` with new `CompressedBuffer::ReadHeader()` that does one less read from source data resulting in a 30% perf increase.
## 5.4.2
- Bugfix: Shared memory for zenserver state may hang around after all zenserver processes exit - make sure we find a valid entry in `zen up` before bailing
diff --git a/src/zencore/compress.cpp b/src/zencore/compress.cpp
index 58be65f13..143317e65 100644
--- a/src/zencore/compress.cpp
+++ b/src/zencore/compress.cpp
@@ -863,7 +863,7 @@ GetDecoder(CompressionMethod Method)
//////////////////////////////////////////////////////////////////////////
bool
-BufferHeader::IsValid(const CompositeBuffer& CompressedData, IoHash& OutRawHash, uint64_t& OutRawSize)
+ReadHeader(const CompositeBuffer& CompressedData, BufferHeader& OutHeader, UniqueBuffer* OutHeaderData)
{
const uint64_t CompressedDataSize = CompressedData.GetSize();
if (CompressedDataSize < sizeof(BufferHeader))
@@ -871,61 +871,89 @@ BufferHeader::IsValid(const CompositeBuffer& CompressedData, IoHash& OutRawHash,
return false;
}
- const size_t StackBufferSize = 256;
- uint8_t StackBuffer[StackBufferSize];
- uint64_t ReadSize = Min(CompressedDataSize, StackBufferSize);
- BufferHeader* Header = reinterpret_cast<BufferHeader*>(StackBuffer);
+ const size_t HeaderBufferSize = 1024;
+ uint8_t HeaderBuffer[HeaderBufferSize];
+ uint64_t ReadSize = Min(CompressedDataSize, HeaderBufferSize);
+ uint64_t FirstSegmentSize = CompressedData.GetSegments()[0].GetSize();
+ if (FirstSegmentSize >= sizeof(BufferHeader))
{
- CompositeBuffer::Iterator It;
- CompressedData.CopyTo(MutableMemoryView(StackBuffer, StackBuffer + StackBufferSize), It);
+ // Keep first read inside first segment if possible
+ ReadSize = Min(ReadSize, FirstSegmentSize);
}
- Header->ByteSwap();
- if (Header->Magic != BufferHeader::ExpectedMagic)
+
+ MutableMemoryView HeaderMemory(HeaderBuffer, &HeaderBuffer[ReadSize]);
+ CompositeBuffer::Iterator It = CompressedData.GetIterator(0);
+ CompressedData.CopyTo(HeaderMemory, It);
+
+ OutHeader = *reinterpret_cast<BufferHeader*>(HeaderMemory.GetData());
+ OutHeader.ByteSwap();
+ if (OutHeader.Magic != BufferHeader::ExpectedMagic)
{
return false;
}
-
- const BaseDecoder* const Decoder = GetDecoder(Header->Method);
+ if (OutHeader.TotalCompressedSize > CompressedDataSize)
+ {
+ return false;
+ }
+ const BaseDecoder* const Decoder = GetDecoder(OutHeader.Method);
if (!Decoder)
{
return false;
}
-
- uint32_t Crc32 = Header->Crc32;
- OutRawHash = IoHash::FromBLAKE3(Header->RawHash);
- OutRawSize = Header->TotalRawSize;
- uint64_t HeaderSize = Decoder->GetHeaderSize(*Header);
-
- if (Header->TotalCompressedSize > CompressedDataSize)
+ uint64_t FullHeaderSize = Decoder->GetHeaderSize(OutHeader);
+ if (FullHeaderSize > CompressedDataSize)
{
return false;
}
-
- Header->ByteSwap();
-
- if (HeaderSize > ReadSize)
+ if (OutHeaderData)
{
- UniqueBuffer HeaderCopy = UniqueBuffer::Alloc(HeaderSize);
- CompositeBuffer::Iterator It;
- CompressedData.CopyTo(HeaderCopy.GetMutableView(), It);
- const MemoryView HeaderView = HeaderCopy.GetView();
- if (Crc32 != BufferHeader::CalculateCrc32(HeaderView))
+ *OutHeaderData = UniqueBuffer::Alloc(FullHeaderSize);
+ MutableMemoryView RemainingHeaderView = OutHeaderData->GetMutableView().CopyFrom(HeaderMemory.Mid(0, FullHeaderSize));
+ if (!RemainingHeaderView.IsEmpty())
+ {
+ CompressedData.CopyTo(RemainingHeaderView, It);
+ }
+ if (OutHeader.Crc32 != BufferHeader::CalculateCrc32(OutHeaderData->GetView()))
+ {
+ return false;
+ }
+ }
+ else if (FullHeaderSize < ReadSize)
+ {
+ if (OutHeader.Crc32 != BufferHeader::CalculateCrc32(HeaderMemory.Mid(0, FullHeaderSize)))
{
return false;
}
}
else
{
- MemoryView FullHeaderView(StackBuffer, StackBuffer + HeaderSize);
- if (Crc32 != BufferHeader::CalculateCrc32(FullHeaderView))
+ UniqueBuffer HeaderData = UniqueBuffer::Alloc(FullHeaderSize);
+ MutableMemoryView RemainingHeaderView = HeaderData.GetMutableView().CopyFrom(HeaderMemory.Mid(0, FullHeaderSize));
+ if (!RemainingHeaderView.IsEmpty())
+ {
+ CompressedData.CopyTo(RemainingHeaderView, It);
+ }
+ if (OutHeader.Crc32 != BufferHeader::CalculateCrc32(HeaderData.GetView()))
{
return false;
}
}
-
return true;
}
+bool
+BufferHeader::IsValid(const CompositeBuffer& CompressedData, IoHash& OutRawHash, uint64_t& OutRawSize)
+{
+ detail::BufferHeader Header;
+ if (ReadHeader(CompressedData, Header, nullptr))
+ {
+ OutRawHash = IoHash::FromBLAKE3(Header.RawHash);
+ OutRawSize = Header.TotalRawSize;
+ return true;
+ }
+ return false;
+}
+
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
static bool
@@ -1097,7 +1125,11 @@ ValidBufferOrEmpty(BufferType&& CompressedData, IoHash& OutRawHash, uint64_t& Ou
}
CompositeBuffer
-GetCompressedRange(const BufferHeader& Header, const CompositeBuffer& CompressedData, uint64_t RawOffset, uint64_t RawSize)
+GetCompressedRange(const BufferHeader& Header,
+ MemoryView HeaderRawData,
+ const CompositeBuffer& CompressedData,
+ uint64_t RawOffset,
+ uint64_t RawSize)
{
if (Header.TotalRawSize < RawOffset + RawSize)
{
@@ -1118,9 +1150,7 @@ GetCompressedRange(const BufferHeader& Header, const CompositeBuffer& Compressed
}
else
{
- UniqueBuffer BlockSizeBuffer;
- MemoryView BlockSizeView =
- CompressedData.ViewOrCopyRange(sizeof(BufferHeader), Header.BlockCount * sizeof(uint32_t), BlockSizeBuffer);
+ MemoryView BlockSizeView = HeaderRawData.Mid(sizeof(Header), Header.BlockCount * sizeof(uint32_t));
std::span<uint32_t const> CompressedBlockSizes(reinterpret_cast<const uint32_t*>(BlockSizeView.GetData()), Header.BlockCount);
const uint64_t BlockSize = uint64_t(1) << Header.BlockSizeExponent;
@@ -1179,7 +1209,11 @@ GetCompressedRange(const BufferHeader& Header, const CompositeBuffer& Compressed
}
CompositeBuffer
-CopyCompressedRange(const BufferHeader& Header, const CompositeBuffer& CompressedData, uint64_t RawOffset, uint64_t RawSize)
+CopyCompressedRange(const BufferHeader& Header,
+ MemoryView HeaderRawData,
+ const CompositeBuffer& CompressedData,
+ uint64_t RawOffset,
+ uint64_t RawSize)
{
if (Header.TotalRawSize < RawOffset + RawSize)
{
@@ -1204,9 +1238,7 @@ CopyCompressedRange(const BufferHeader& Header, const CompositeBuffer& Compresse
}
else
{
- UniqueBuffer BlockSizeBuffer;
- MemoryView BlockSizeView =
- CompressedData.ViewOrCopyRange(sizeof(BufferHeader), Header.BlockCount * sizeof(uint32_t), BlockSizeBuffer);
+ MemoryView BlockSizeView = HeaderRawData.Mid(sizeof(Header), Header.BlockCount * sizeof(uint32_t));
std::span<uint32_t const> CompressedBlockSizes(reinterpret_cast<const uint32_t*>(BlockSizeView.GetData()), Header.BlockCount);
const uint64_t BlockSize = uint64_t(1) << Header.BlockSizeExponent;
@@ -1410,26 +1442,28 @@ CompressedBuffer::DecodeRawHash() const
CompressedBuffer
CompressedBuffer::CopyRange(uint64_t RawOffset, uint64_t RawSize) const
{
- using namespace detail;
- const BufferHeader Header = BufferHeader::Read(CompressedData);
- const uint64_t TotalRawSize = RawSize < ~uint64_t(0) ? RawSize : Header.TotalRawSize - RawOffset;
-
- CompressedBuffer Range;
- Range.CompressedData = CopyCompressedRange(Header, CompressedData, RawOffset, TotalRawSize);
-
+ CompressedBuffer Range;
+ detail::BufferHeader Header;
+ UniqueBuffer RawHeaderData;
+ if (ReadHeader(CompressedData, Header, &RawHeaderData))
+ {
+ const uint64_t TotalRawSize = RawSize < ~uint64_t(0) ? RawSize : Header.TotalRawSize - RawOffset;
+ Range.CompressedData = CopyCompressedRange(Header, RawHeaderData.GetView(), CompressedData, RawOffset, TotalRawSize);
+ }
return Range;
}
CompressedBuffer
CompressedBuffer::GetRange(uint64_t RawOffset, uint64_t RawSize) const
{
- using namespace detail;
- const BufferHeader Header = BufferHeader::Read(CompressedData);
- const uint64_t TotalRawSize = RawSize < ~uint64_t(0) ? RawSize : Header.TotalRawSize - RawOffset;
-
- CompressedBuffer Range;
- Range.CompressedData = GetCompressedRange(Header, CompressedData, RawOffset, TotalRawSize);
-
+ CompressedBuffer Range;
+ detail::BufferHeader Header;
+ UniqueBuffer RawHeaderData;
+ if (ReadHeader(CompressedData, Header, &RawHeaderData))
+ {
+ const uint64_t TotalRawSize = RawSize < ~uint64_t(0) ? RawSize : Header.TotalRawSize - RawOffset;
+ Range.CompressedData = GetCompressedRange(Header, RawHeaderData.GetView(), CompressedData, RawOffset, TotalRawSize);
+ }
return Range;
}