diff options
| author | Dan Engelbrecht <[email protected]> | 2022-12-07 11:21:41 +0100 |
|---|---|---|
| committer | GitHub <[email protected]> | 2022-12-07 02:21:41 -0800 |
| commit | 100c8f966b1c5b2fb190748f0177600562d1c5fe (patch) | |
| tree | fc85e350dea47330149a1d42eb7a6c7ae0a06111 /zencore/compress.cpp | |
| parent | Cache request record/replay (#198) (diff) | |
| download | zen-100c8f966b1c5b2fb190748f0177600562d1c5fe.tar.xz zen-100c8f966b1c5b2fb190748f0177600562d1c5fe.zip | |
optimizations (#200)
* Use direct file read and direct buffer allocation for small IoBuffer materalization
* Reduce range of materialized data in CompositeBuffer reading
CompressedBuffer header reading often only need a small part and not the whole file
* reduce lock contention in IoBuffer::Materialize
* Reduce parsing of compressed headers
Validate header type at decompression
* faster CreateDirectories - start from leaf going up and recurse back
* optimized BufferHeader::IsValid
* Add ValidateCompressedHeader to use when we don't need the actual compressed data
Validate that we always get compressed data in CidStore::AddChunk
* changelog
Diffstat (limited to 'zencore/compress.cpp')
| -rw-r--r-- | zencore/compress.cpp | 179 |
1 files changed, 124 insertions, 55 deletions
diff --git a/zencore/compress.cpp b/zencore/compress.cpp index 15cc5f6a7..6e06739ea 100644 --- a/zencore/compress.cpp +++ b/zencore/compress.cpp @@ -6,6 +6,7 @@ #include <zencore/compositebuffer.h> #include <zencore/crc32.h> #include <zencore/endian.h> +#include <zencore/iohash.h> #include <zencore/testing.h> #include "../thirdparty/Oodle/include/oodle2.h" @@ -56,8 +57,11 @@ struct BufferHeader BLAKE3 RawHash; // The hash of the uncompressed data /** Checks validity of the buffer based on the magic number, method, and CRC-32. */ - static bool IsValid(const CompositeBuffer& CompressedData); - static bool IsValid(const SharedBuffer& CompressedData) { return IsValid(CompositeBuffer(CompressedData)); } + static bool IsValid(const CompositeBuffer& CompressedData, IoHash& OutRawHash, uint64_t& OutRawSize); + static bool IsValid(const SharedBuffer& CompressedData, IoHash& OutRawHash, uint64_t& OutRawSize) + { + return IsValid(CompositeBuffer(CompressedData), OutRawHash, OutRawSize); + } /** Read a header from a buffer that is at least sizeof(BufferHeader) without any validation. */ static BufferHeader Read(const CompositeBuffer& CompressedData) @@ -65,6 +69,10 @@ struct BufferHeader BufferHeader Header; if (sizeof(BufferHeader) <= CompressedData.GetSize()) { + // if (CompressedData.GetSegments()[0].AsIoBuffer().IsWholeFile()) + // { + // ZEN_ASSERT(true); + // } CompositeBuffer::Iterator It; CompressedData.CopyTo(MakeMutableMemoryView(&Header, &Header + 1), It); Header.ByteSwap(); @@ -664,36 +672,68 @@ GetDecoder(CompressionMethod Method) ////////////////////////////////////////////////////////////////////////// bool -BufferHeader::IsValid(const CompositeBuffer& CompressedData) +BufferHeader::IsValid(const CompositeBuffer& CompressedData, IoHash& OutRawHash, uint64_t& OutRawSize) { - if (sizeof(BufferHeader) <= CompressedData.GetSize()) + uint64_t Size = CompressedData.GetSize(); + if (Size < sizeof(BufferHeader)) { - const BufferHeader Header = Read(CompressedData); - if (Header.Magic == BufferHeader::ExpectedMagic) + return false; + } + const size_t StackBufferSize = 256; + uint8_t StackBuffer[StackBufferSize]; + uint64_t ReadSize = Min(Size, StackBufferSize); + BufferHeader* Header = reinterpret_cast<BufferHeader*>(StackBuffer); + { + CompositeBuffer::Iterator It; + CompressedData.CopyTo(MutableMemoryView(StackBuffer, StackBuffer + StackBufferSize), It); + } + Header->ByteSwap(); + if (Header->Magic != BufferHeader::ExpectedMagic) + { + return false; + } + const BaseDecoder* const Decoder = GetDecoder(Header->Method); + if (!Decoder) + { + return false; + } + uint32_t Crc32 = Header->Crc32; + OutRawHash = IoHash::FromBLAKE3(Header->RawHash); + OutRawSize = Header->TotalRawSize; + uint64_t HeaderSize = Decoder->GetHeaderSize(*Header); + Header->ByteSwap(); + + if (HeaderSize > ReadSize) + { + // 0.004% of cases on a Fortnite hot cache cook + UniqueBuffer HeaderCopy = UniqueBuffer::Alloc(HeaderSize); + CompositeBuffer::Iterator It; + CompressedData.CopyTo(HeaderCopy.GetMutableView(), It); + const MemoryView HeaderView = HeaderCopy.GetView(); + if (Crc32 != BufferHeader::CalculateCrc32(HeaderView)) { - if (const BaseDecoder* const Decoder = GetDecoder(Header.Method)) - { - UniqueBuffer HeaderCopy = UniqueBuffer::Alloc(Decoder->GetHeaderSize(Header)); - CompositeBuffer::Iterator It; - CompressedData.CopyTo(HeaderCopy.GetMutableView(), It); - const MemoryView HeaderView = HeaderCopy.GetView(); - if (Header.Crc32 == BufferHeader::CalculateCrc32(HeaderView)) - { - return true; - } - } + return false; } } - return false; + else + { + MemoryView FullHeaderView(StackBuffer, StackBuffer + HeaderSize); + if (Crc32 != BufferHeader::CalculateCrc32(FullHeaderView)) + { + return false; + } + } + return true; } ////////////////////////////////////////////////////////////////////////// template<typename BufferType> inline CompositeBuffer -ValidBufferOrEmpty(BufferType&& CompressedData) +ValidBufferOrEmpty(BufferType&& CompressedData, IoHash& OutRawHash, uint64_t& OutRawSize) { - return BufferHeader::IsValid(CompressedData) ? CompositeBuffer(std::forward<BufferType>(CompressedData)) : CompositeBuffer(); + return BufferHeader::IsValid(CompressedData, OutRawHash, OutRawSize) ? CompositeBuffer(std::forward<BufferType>(CompressedData)) + : CompositeBuffer(); } CompositeBuffer @@ -826,34 +866,34 @@ CompressedBuffer::Compress(const SharedBuffer& RawData, } CompressedBuffer -CompressedBuffer::FromCompressed(const CompositeBuffer& InCompressedData) +CompressedBuffer::FromCompressed(const CompositeBuffer& InCompressedData, IoHash& OutRawHash, uint64_t& OutRawSize) { CompressedBuffer Local; - Local.CompressedData = detail::ValidBufferOrEmpty(InCompressedData); + Local.CompressedData = detail::ValidBufferOrEmpty(InCompressedData, OutRawHash, OutRawSize); return Local; } CompressedBuffer -CompressedBuffer::FromCompressed(CompositeBuffer&& InCompressedData) +CompressedBuffer::FromCompressed(CompositeBuffer&& InCompressedData, IoHash& OutRawHash, uint64_t& OutRawSize) { CompressedBuffer Local; - Local.CompressedData = detail::ValidBufferOrEmpty(std::move(InCompressedData)); + Local.CompressedData = detail::ValidBufferOrEmpty(std::move(InCompressedData), OutRawHash, OutRawSize); return Local; } CompressedBuffer -CompressedBuffer::FromCompressed(const SharedBuffer& InCompressedData) +CompressedBuffer::FromCompressed(const SharedBuffer& InCompressedData, IoHash& OutRawHash, uint64_t& OutRawSize) { CompressedBuffer Local; - Local.CompressedData = detail::ValidBufferOrEmpty(InCompressedData); + Local.CompressedData = detail::ValidBufferOrEmpty(InCompressedData, OutRawHash, OutRawSize); return Local; } CompressedBuffer -CompressedBuffer::FromCompressed(SharedBuffer&& InCompressedData) +CompressedBuffer::FromCompressed(SharedBuffer&& InCompressedData, IoHash& OutRawHash, uint64_t& OutRawSize) { CompressedBuffer Local; - Local.CompressedData = detail::ValidBufferOrEmpty(std::move(InCompressedData)); + Local.CompressedData = detail::ValidBufferOrEmpty(std::move(InCompressedData), OutRawHash, OutRawSize); return Local; } @@ -881,14 +921,26 @@ CompressedBuffer::FromCompressedNoValidate(CompositeBuffer&& InCompressedData) return Local; } +bool +CompressedBuffer::ValidateCompressedHeader(IoBuffer&& CompressedData, IoHash& OutRawHash, uint64_t& OutRawSize) +{ + return detail::BufferHeader::IsValid(SharedBuffer(std::move(CompressedData)), OutRawHash, OutRawSize); +} + +bool +CompressedBuffer::ValidateCompressedHeader(const IoBuffer& CompressedData, IoHash& OutRawHash, uint64_t& OutRawSize) +{ + return detail::BufferHeader::IsValid(SharedBuffer(CompressedData), OutRawHash, OutRawSize); +} + uint64_t -CompressedBuffer::GetRawSize() const +CompressedBuffer::DecodeRawSize() const { return CompressedData ? detail::BufferHeader::Read(CompressedData).TotalRawSize : 0; } BLAKE3 -CompressedBuffer::GetRawHash() const +CompressedBuffer::DecodeRawHash() const { return CompressedData ? detail::BufferHeader::Read(CompressedData).RawHash : BLAKE3(); } @@ -913,9 +965,12 @@ CompressedBuffer::TryDecompressTo(MutableMemoryView RawView, uint64_t RawOffset) if (CompressedData) { const BufferHeader Header = BufferHeader::Read(CompressedData); - if (const BaseDecoder* const Decoder = GetDecoder(Header.Method)) + if (Header.Magic == BufferHeader::ExpectedMagic) { - return Decoder->TryDecompressTo(Header, CompressedData, RawView, RawOffset); + if (const BaseDecoder* const Decoder = GetDecoder(Header.Method)) + { + return Decoder->TryDecompressTo(Header, CompressedData, RawView, RawOffset); + } } } return false; @@ -928,13 +983,16 @@ CompressedBuffer::Decompress(uint64_t RawOffset, uint64_t RawSize) const if (CompressedData && RawSize > 0) { const BufferHeader Header = BufferHeader::Read(CompressedData); - if (const BaseDecoder* const Decoder = GetDecoder(Header.Method)) + if (Header.Magic == BufferHeader::ExpectedMagic) { - const uint64_t TotalRawSize = RawSize < ~uint64_t(0) ? RawSize : Header.TotalRawSize - RawOffset; - UniqueBuffer RawData = UniqueBuffer::Alloc(TotalRawSize); - if (Decoder->TryDecompressTo(Header, CompressedData, RawData, RawOffset)) + if (const BaseDecoder* const Decoder = GetDecoder(Header.Method)) { - return RawData.MoveToShared(); + const uint64_t TotalRawSize = RawSize < ~uint64_t(0) ? RawSize : Header.TotalRawSize - RawOffset; + UniqueBuffer RawData = UniqueBuffer::Alloc(TotalRawSize); + if (Decoder->TryDecompressTo(Header, CompressedData, RawData, RawOffset)) + { + return RawData.MoveToShared(); + } } } } @@ -948,9 +1006,12 @@ CompressedBuffer::DecompressToComposite() const if (CompressedData) { const BufferHeader Header = BufferHeader::Read(CompressedData); - if (const BaseDecoder* const Decoder = GetDecoder(Header.Method)) + if (Header.Magic == BufferHeader::ExpectedMagic) { - return Decoder->Decompress(Header, CompressedData); + if (const BaseDecoder* const Decoder = GetDecoder(Header.Method)) + { + return Decoder->Decompress(Header, CompressedData); + } } } return CompositeBuffer(); @@ -1005,18 +1066,20 @@ TEST_CASE("CompressedBuffer") OodleCompressor::NotSet, OodleCompressionLevel::None); - CHECK(Buffer.GetRawSize() == sizeof(Zeroes)); + CHECK(Buffer.DecodeRawSize() == sizeof(Zeroes)); CHECK(Buffer.GetCompressedSize() == (sizeof(Zeroes) + sizeof(detail::BufferHeader))); CompositeBuffer Compressed = Buffer.GetCompressed(); - CompressedBuffer BufferD = CompressedBuffer::FromCompressed(Compressed); + IoHash DecodedHash; + uint64_t DecodedRawSize; + CompressedBuffer BufferD = CompressedBuffer::FromCompressed(Compressed, DecodedHash, DecodedRawSize); CHECK(BufferD.IsNull() == false); CompositeBuffer Decomp = BufferD.DecompressToComposite(); - CHECK(Decomp.GetSize() == Buffer.GetRawSize()); - CHECK(BLAKE3::HashBuffer(Decomp) == BufferD.GetRawHash()); + CHECK(Decomp.GetSize() == DecodedRawSize); + CHECK(IoHash::HashBuffer(Decomp) == DecodedHash); } { @@ -1025,53 +1088,59 @@ TEST_CASE("CompressedBuffer") OodleCompressor::NotSet, OodleCompressionLevel::None); - CHECK(Buffer.GetRawSize() == (sizeof(Zeroes) + sizeof(Ones))); + CHECK(Buffer.DecodeRawSize() == (sizeof(Zeroes) + sizeof(Ones))); CHECK(Buffer.GetCompressedSize() == (sizeof(Zeroes) + sizeof(Ones) + sizeof(detail::BufferHeader))); CompositeBuffer Compressed = Buffer.GetCompressed(); - CompressedBuffer BufferD = CompressedBuffer::FromCompressed(Compressed); + IoHash DecodedHash; + uint64_t DecodedRawSize; + CompressedBuffer BufferD = CompressedBuffer::FromCompressed(Compressed, DecodedHash, DecodedRawSize); CHECK(BufferD.IsNull() == false); CompositeBuffer Decomp = BufferD.DecompressToComposite(); - CHECK(Decomp.GetSize() == Buffer.GetRawSize()); - CHECK(BLAKE3::HashBuffer(Decomp) == BufferD.GetRawHash()); + CHECK(Decomp.GetSize() == DecodedRawSize); + CHECK(IoHash::HashBuffer(Decomp) == DecodedHash); } { CompressedBuffer Buffer = CompressedBuffer::Compress(CompositeBuffer(SharedBuffer::MakeView(MakeMemoryView(Zeroes)))); - CHECK(Buffer.GetRawSize() == sizeof(Zeroes)); + CHECK(Buffer.DecodeRawSize() == sizeof(Zeroes)); CHECK(Buffer.GetCompressedSize() < sizeof(Zeroes)); CompositeBuffer Compressed = Buffer.GetCompressed(); - CompressedBuffer BufferD = CompressedBuffer::FromCompressed(Compressed); + IoHash DecodedHash; + uint64_t DecodedRawSize; + CompressedBuffer BufferD = CompressedBuffer::FromCompressed(Compressed, DecodedHash, DecodedRawSize); CHECK(BufferD.IsNull() == false); CompositeBuffer Decomp = BufferD.DecompressToComposite(); - CHECK(Decomp.GetSize() == Buffer.GetRawSize()); - CHECK(BLAKE3::HashBuffer(Decomp) == BufferD.GetRawHash()); + CHECK(Decomp.GetSize() == DecodedRawSize); + CHECK(IoHash::HashBuffer(Decomp) == DecodedHash); } { CompressedBuffer Buffer = CompressedBuffer::Compress( CompositeBuffer(SharedBuffer::MakeView(MakeMemoryView(Zeroes)), SharedBuffer::MakeView(MakeMemoryView(Ones)))); - CHECK(Buffer.GetRawSize() == (sizeof(Zeroes) + sizeof(Ones))); + CHECK(Buffer.DecodeRawSize() == (sizeof(Zeroes) + sizeof(Ones))); CHECK(Buffer.GetCompressedSize() < (sizeof(Zeroes) + sizeof(Ones))); CompositeBuffer Compressed = Buffer.GetCompressed(); - CompressedBuffer BufferD = CompressedBuffer::FromCompressed(Compressed); + IoHash DecodedHash; + uint64_t DecodedRawSize; + CompressedBuffer BufferD = CompressedBuffer::FromCompressed(Compressed, DecodedHash, DecodedRawSize); CHECK(BufferD.IsNull() == false); CompositeBuffer Decomp = BufferD.DecompressToComposite(); - CHECK(Decomp.GetSize() == Buffer.GetRawSize()); - CHECK(BLAKE3::HashBuffer(Decomp) == BufferD.GetRawHash()); + CHECK(Decomp.GetSize() == DecodedRawSize); + CHECK(IoHash::HashBuffer(Decomp) == DecodedHash); } auto GenerateData = [](uint64_t N) -> std::vector<uint64_t> { |