aboutsummaryrefslogtreecommitdiff
path: root/zencore/compress.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2022-12-07 11:21:41 +0100
committerGitHub <[email protected]>2022-12-07 02:21:41 -0800
commit100c8f966b1c5b2fb190748f0177600562d1c5fe (patch)
treefc85e350dea47330149a1d42eb7a6c7ae0a06111 /zencore/compress.cpp
parentCache request record/replay (#198) (diff)
downloadzen-100c8f966b1c5b2fb190748f0177600562d1c5fe.tar.xz
zen-100c8f966b1c5b2fb190748f0177600562d1c5fe.zip
optimizations (#200)
* Use direct file read and direct buffer allocation for small IoBuffer materalization * Reduce range of materialized data in CompositeBuffer reading CompressedBuffer header reading often only need a small part and not the whole file * reduce lock contention in IoBuffer::Materialize * Reduce parsing of compressed headers Validate header type at decompression * faster CreateDirectories - start from leaf going up and recurse back * optimized BufferHeader::IsValid * Add ValidateCompressedHeader to use when we don't need the actual compressed data Validate that we always get compressed data in CidStore::AddChunk * changelog
Diffstat (limited to 'zencore/compress.cpp')
-rw-r--r--zencore/compress.cpp179
1 files changed, 124 insertions, 55 deletions
diff --git a/zencore/compress.cpp b/zencore/compress.cpp
index 15cc5f6a7..6e06739ea 100644
--- a/zencore/compress.cpp
+++ b/zencore/compress.cpp
@@ -6,6 +6,7 @@
#include <zencore/compositebuffer.h>
#include <zencore/crc32.h>
#include <zencore/endian.h>
+#include <zencore/iohash.h>
#include <zencore/testing.h>
#include "../thirdparty/Oodle/include/oodle2.h"
@@ -56,8 +57,11 @@ struct BufferHeader
BLAKE3 RawHash; // The hash of the uncompressed data
/** Checks validity of the buffer based on the magic number, method, and CRC-32. */
- static bool IsValid(const CompositeBuffer& CompressedData);
- static bool IsValid(const SharedBuffer& CompressedData) { return IsValid(CompositeBuffer(CompressedData)); }
+ static bool IsValid(const CompositeBuffer& CompressedData, IoHash& OutRawHash, uint64_t& OutRawSize);
+ static bool IsValid(const SharedBuffer& CompressedData, IoHash& OutRawHash, uint64_t& OutRawSize)
+ {
+ return IsValid(CompositeBuffer(CompressedData), OutRawHash, OutRawSize);
+ }
/** Read a header from a buffer that is at least sizeof(BufferHeader) without any validation. */
static BufferHeader Read(const CompositeBuffer& CompressedData)
@@ -65,6 +69,10 @@ struct BufferHeader
BufferHeader Header;
if (sizeof(BufferHeader) <= CompressedData.GetSize())
{
+ // if (CompressedData.GetSegments()[0].AsIoBuffer().IsWholeFile())
+ // {
+ // ZEN_ASSERT(true);
+ // }
CompositeBuffer::Iterator It;
CompressedData.CopyTo(MakeMutableMemoryView(&Header, &Header + 1), It);
Header.ByteSwap();
@@ -664,36 +672,68 @@ GetDecoder(CompressionMethod Method)
//////////////////////////////////////////////////////////////////////////
bool
-BufferHeader::IsValid(const CompositeBuffer& CompressedData)
+BufferHeader::IsValid(const CompositeBuffer& CompressedData, IoHash& OutRawHash, uint64_t& OutRawSize)
{
- if (sizeof(BufferHeader) <= CompressedData.GetSize())
+ uint64_t Size = CompressedData.GetSize();
+ if (Size < sizeof(BufferHeader))
{
- const BufferHeader Header = Read(CompressedData);
- if (Header.Magic == BufferHeader::ExpectedMagic)
+ return false;
+ }
+ const size_t StackBufferSize = 256;
+ uint8_t StackBuffer[StackBufferSize];
+ uint64_t ReadSize = Min(Size, StackBufferSize);
+ BufferHeader* Header = reinterpret_cast<BufferHeader*>(StackBuffer);
+ {
+ CompositeBuffer::Iterator It;
+ CompressedData.CopyTo(MutableMemoryView(StackBuffer, StackBuffer + StackBufferSize), It);
+ }
+ Header->ByteSwap();
+ if (Header->Magic != BufferHeader::ExpectedMagic)
+ {
+ return false;
+ }
+ const BaseDecoder* const Decoder = GetDecoder(Header->Method);
+ if (!Decoder)
+ {
+ return false;
+ }
+ uint32_t Crc32 = Header->Crc32;
+ OutRawHash = IoHash::FromBLAKE3(Header->RawHash);
+ OutRawSize = Header->TotalRawSize;
+ uint64_t HeaderSize = Decoder->GetHeaderSize(*Header);
+ Header->ByteSwap();
+
+ if (HeaderSize > ReadSize)
+ {
+ // 0.004% of cases on a Fortnite hot cache cook
+ UniqueBuffer HeaderCopy = UniqueBuffer::Alloc(HeaderSize);
+ CompositeBuffer::Iterator It;
+ CompressedData.CopyTo(HeaderCopy.GetMutableView(), It);
+ const MemoryView HeaderView = HeaderCopy.GetView();
+ if (Crc32 != BufferHeader::CalculateCrc32(HeaderView))
{
- if (const BaseDecoder* const Decoder = GetDecoder(Header.Method))
- {
- UniqueBuffer HeaderCopy = UniqueBuffer::Alloc(Decoder->GetHeaderSize(Header));
- CompositeBuffer::Iterator It;
- CompressedData.CopyTo(HeaderCopy.GetMutableView(), It);
- const MemoryView HeaderView = HeaderCopy.GetView();
- if (Header.Crc32 == BufferHeader::CalculateCrc32(HeaderView))
- {
- return true;
- }
- }
+ return false;
}
}
- return false;
+ else
+ {
+ MemoryView FullHeaderView(StackBuffer, StackBuffer + HeaderSize);
+ if (Crc32 != BufferHeader::CalculateCrc32(FullHeaderView))
+ {
+ return false;
+ }
+ }
+ return true;
}
//////////////////////////////////////////////////////////////////////////
template<typename BufferType>
inline CompositeBuffer
-ValidBufferOrEmpty(BufferType&& CompressedData)
+ValidBufferOrEmpty(BufferType&& CompressedData, IoHash& OutRawHash, uint64_t& OutRawSize)
{
- return BufferHeader::IsValid(CompressedData) ? CompositeBuffer(std::forward<BufferType>(CompressedData)) : CompositeBuffer();
+ return BufferHeader::IsValid(CompressedData, OutRawHash, OutRawSize) ? CompositeBuffer(std::forward<BufferType>(CompressedData))
+ : CompositeBuffer();
}
CompositeBuffer
@@ -826,34 +866,34 @@ CompressedBuffer::Compress(const SharedBuffer& RawData,
}
CompressedBuffer
-CompressedBuffer::FromCompressed(const CompositeBuffer& InCompressedData)
+CompressedBuffer::FromCompressed(const CompositeBuffer& InCompressedData, IoHash& OutRawHash, uint64_t& OutRawSize)
{
CompressedBuffer Local;
- Local.CompressedData = detail::ValidBufferOrEmpty(InCompressedData);
+ Local.CompressedData = detail::ValidBufferOrEmpty(InCompressedData, OutRawHash, OutRawSize);
return Local;
}
CompressedBuffer
-CompressedBuffer::FromCompressed(CompositeBuffer&& InCompressedData)
+CompressedBuffer::FromCompressed(CompositeBuffer&& InCompressedData, IoHash& OutRawHash, uint64_t& OutRawSize)
{
CompressedBuffer Local;
- Local.CompressedData = detail::ValidBufferOrEmpty(std::move(InCompressedData));
+ Local.CompressedData = detail::ValidBufferOrEmpty(std::move(InCompressedData), OutRawHash, OutRawSize);
return Local;
}
CompressedBuffer
-CompressedBuffer::FromCompressed(const SharedBuffer& InCompressedData)
+CompressedBuffer::FromCompressed(const SharedBuffer& InCompressedData, IoHash& OutRawHash, uint64_t& OutRawSize)
{
CompressedBuffer Local;
- Local.CompressedData = detail::ValidBufferOrEmpty(InCompressedData);
+ Local.CompressedData = detail::ValidBufferOrEmpty(InCompressedData, OutRawHash, OutRawSize);
return Local;
}
CompressedBuffer
-CompressedBuffer::FromCompressed(SharedBuffer&& InCompressedData)
+CompressedBuffer::FromCompressed(SharedBuffer&& InCompressedData, IoHash& OutRawHash, uint64_t& OutRawSize)
{
CompressedBuffer Local;
- Local.CompressedData = detail::ValidBufferOrEmpty(std::move(InCompressedData));
+ Local.CompressedData = detail::ValidBufferOrEmpty(std::move(InCompressedData), OutRawHash, OutRawSize);
return Local;
}
@@ -881,14 +921,26 @@ CompressedBuffer::FromCompressedNoValidate(CompositeBuffer&& InCompressedData)
return Local;
}
+bool
+CompressedBuffer::ValidateCompressedHeader(IoBuffer&& CompressedData, IoHash& OutRawHash, uint64_t& OutRawSize)
+{
+ return detail::BufferHeader::IsValid(SharedBuffer(std::move(CompressedData)), OutRawHash, OutRawSize);
+}
+
+bool
+CompressedBuffer::ValidateCompressedHeader(const IoBuffer& CompressedData, IoHash& OutRawHash, uint64_t& OutRawSize)
+{
+ return detail::BufferHeader::IsValid(SharedBuffer(CompressedData), OutRawHash, OutRawSize);
+}
+
uint64_t
-CompressedBuffer::GetRawSize() const
+CompressedBuffer::DecodeRawSize() const
{
return CompressedData ? detail::BufferHeader::Read(CompressedData).TotalRawSize : 0;
}
BLAKE3
-CompressedBuffer::GetRawHash() const
+CompressedBuffer::DecodeRawHash() const
{
return CompressedData ? detail::BufferHeader::Read(CompressedData).RawHash : BLAKE3();
}
@@ -913,9 +965,12 @@ CompressedBuffer::TryDecompressTo(MutableMemoryView RawView, uint64_t RawOffset)
if (CompressedData)
{
const BufferHeader Header = BufferHeader::Read(CompressedData);
- if (const BaseDecoder* const Decoder = GetDecoder(Header.Method))
+ if (Header.Magic == BufferHeader::ExpectedMagic)
{
- return Decoder->TryDecompressTo(Header, CompressedData, RawView, RawOffset);
+ if (const BaseDecoder* const Decoder = GetDecoder(Header.Method))
+ {
+ return Decoder->TryDecompressTo(Header, CompressedData, RawView, RawOffset);
+ }
}
}
return false;
@@ -928,13 +983,16 @@ CompressedBuffer::Decompress(uint64_t RawOffset, uint64_t RawSize) const
if (CompressedData && RawSize > 0)
{
const BufferHeader Header = BufferHeader::Read(CompressedData);
- if (const BaseDecoder* const Decoder = GetDecoder(Header.Method))
+ if (Header.Magic == BufferHeader::ExpectedMagic)
{
- const uint64_t TotalRawSize = RawSize < ~uint64_t(0) ? RawSize : Header.TotalRawSize - RawOffset;
- UniqueBuffer RawData = UniqueBuffer::Alloc(TotalRawSize);
- if (Decoder->TryDecompressTo(Header, CompressedData, RawData, RawOffset))
+ if (const BaseDecoder* const Decoder = GetDecoder(Header.Method))
{
- return RawData.MoveToShared();
+ const uint64_t TotalRawSize = RawSize < ~uint64_t(0) ? RawSize : Header.TotalRawSize - RawOffset;
+ UniqueBuffer RawData = UniqueBuffer::Alloc(TotalRawSize);
+ if (Decoder->TryDecompressTo(Header, CompressedData, RawData, RawOffset))
+ {
+ return RawData.MoveToShared();
+ }
}
}
}
@@ -948,9 +1006,12 @@ CompressedBuffer::DecompressToComposite() const
if (CompressedData)
{
const BufferHeader Header = BufferHeader::Read(CompressedData);
- if (const BaseDecoder* const Decoder = GetDecoder(Header.Method))
+ if (Header.Magic == BufferHeader::ExpectedMagic)
{
- return Decoder->Decompress(Header, CompressedData);
+ if (const BaseDecoder* const Decoder = GetDecoder(Header.Method))
+ {
+ return Decoder->Decompress(Header, CompressedData);
+ }
}
}
return CompositeBuffer();
@@ -1005,18 +1066,20 @@ TEST_CASE("CompressedBuffer")
OodleCompressor::NotSet,
OodleCompressionLevel::None);
- CHECK(Buffer.GetRawSize() == sizeof(Zeroes));
+ CHECK(Buffer.DecodeRawSize() == sizeof(Zeroes));
CHECK(Buffer.GetCompressedSize() == (sizeof(Zeroes) + sizeof(detail::BufferHeader)));
CompositeBuffer Compressed = Buffer.GetCompressed();
- CompressedBuffer BufferD = CompressedBuffer::FromCompressed(Compressed);
+ IoHash DecodedHash;
+ uint64_t DecodedRawSize;
+ CompressedBuffer BufferD = CompressedBuffer::FromCompressed(Compressed, DecodedHash, DecodedRawSize);
CHECK(BufferD.IsNull() == false);
CompositeBuffer Decomp = BufferD.DecompressToComposite();
- CHECK(Decomp.GetSize() == Buffer.GetRawSize());
- CHECK(BLAKE3::HashBuffer(Decomp) == BufferD.GetRawHash());
+ CHECK(Decomp.GetSize() == DecodedRawSize);
+ CHECK(IoHash::HashBuffer(Decomp) == DecodedHash);
}
{
@@ -1025,53 +1088,59 @@ TEST_CASE("CompressedBuffer")
OodleCompressor::NotSet,
OodleCompressionLevel::None);
- CHECK(Buffer.GetRawSize() == (sizeof(Zeroes) + sizeof(Ones)));
+ CHECK(Buffer.DecodeRawSize() == (sizeof(Zeroes) + sizeof(Ones)));
CHECK(Buffer.GetCompressedSize() == (sizeof(Zeroes) + sizeof(Ones) + sizeof(detail::BufferHeader)));
CompositeBuffer Compressed = Buffer.GetCompressed();
- CompressedBuffer BufferD = CompressedBuffer::FromCompressed(Compressed);
+ IoHash DecodedHash;
+ uint64_t DecodedRawSize;
+ CompressedBuffer BufferD = CompressedBuffer::FromCompressed(Compressed, DecodedHash, DecodedRawSize);
CHECK(BufferD.IsNull() == false);
CompositeBuffer Decomp = BufferD.DecompressToComposite();
- CHECK(Decomp.GetSize() == Buffer.GetRawSize());
- CHECK(BLAKE3::HashBuffer(Decomp) == BufferD.GetRawHash());
+ CHECK(Decomp.GetSize() == DecodedRawSize);
+ CHECK(IoHash::HashBuffer(Decomp) == DecodedHash);
}
{
CompressedBuffer Buffer = CompressedBuffer::Compress(CompositeBuffer(SharedBuffer::MakeView(MakeMemoryView(Zeroes))));
- CHECK(Buffer.GetRawSize() == sizeof(Zeroes));
+ CHECK(Buffer.DecodeRawSize() == sizeof(Zeroes));
CHECK(Buffer.GetCompressedSize() < sizeof(Zeroes));
CompositeBuffer Compressed = Buffer.GetCompressed();
- CompressedBuffer BufferD = CompressedBuffer::FromCompressed(Compressed);
+ IoHash DecodedHash;
+ uint64_t DecodedRawSize;
+ CompressedBuffer BufferD = CompressedBuffer::FromCompressed(Compressed, DecodedHash, DecodedRawSize);
CHECK(BufferD.IsNull() == false);
CompositeBuffer Decomp = BufferD.DecompressToComposite();
- CHECK(Decomp.GetSize() == Buffer.GetRawSize());
- CHECK(BLAKE3::HashBuffer(Decomp) == BufferD.GetRawHash());
+ CHECK(Decomp.GetSize() == DecodedRawSize);
+ CHECK(IoHash::HashBuffer(Decomp) == DecodedHash);
}
{
CompressedBuffer Buffer = CompressedBuffer::Compress(
CompositeBuffer(SharedBuffer::MakeView(MakeMemoryView(Zeroes)), SharedBuffer::MakeView(MakeMemoryView(Ones))));
- CHECK(Buffer.GetRawSize() == (sizeof(Zeroes) + sizeof(Ones)));
+ CHECK(Buffer.DecodeRawSize() == (sizeof(Zeroes) + sizeof(Ones)));
CHECK(Buffer.GetCompressedSize() < (sizeof(Zeroes) + sizeof(Ones)));
CompositeBuffer Compressed = Buffer.GetCompressed();
- CompressedBuffer BufferD = CompressedBuffer::FromCompressed(Compressed);
+ IoHash DecodedHash;
+ uint64_t DecodedRawSize;
+ CompressedBuffer BufferD = CompressedBuffer::FromCompressed(Compressed, DecodedHash, DecodedRawSize);
CHECK(BufferD.IsNull() == false);
CompositeBuffer Decomp = BufferD.DecompressToComposite();
- CHECK(Decomp.GetSize() == Buffer.GetRawSize());
- CHECK(BLAKE3::HashBuffer(Decomp) == BufferD.GetRawHash());
+ CHECK(Decomp.GetSize() == DecodedRawSize);
+ CHECK(IoHash::HashBuffer(Decomp) == DecodedHash);
}
auto GenerateData = [](uint64_t N) -> std::vector<uint64_t> {