// Copyright Epic Games, Inc. All Rights Reserved. #include #include #include #include #include #include #include #include namespace zen::detail { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// static constexpr uint64_t DefaultBlockSize = 256 * 1024; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /** Header used on every compressed buffer. Always stored in big-endian format. */ struct BufferHeader { static constexpr uint32_t ExpectedMagic = 0xb7756362; // ucb uint32_t Magic = ExpectedMagic; // A magic number to identify a compressed buffer. Always 0xb7756362. uint32_t Crc32 = 0; // A CRC-32 used to check integrity of the buffer. Uses the polynomial 0x04c11db7 CompressionMethod Method = CompressionMethod::None; // The method used to compress the buffer. Affects layout of data following the header uint8_t Reserved[2]{}; // The reserved bytes must be initialized to zero uint8_t BlockSizeExponent = 0; // The power of two size of every uncompressed block except the last. Size is 1 << BlockSizeExponent uint32_t BlockCount = 0; // The number of blocks that follow the header uint64_t TotalRawSize = 0; // The total size of the uncompressed data uint64_t TotalCompressedSize = 0; // The total size of the compressed data including the header BLAKE3 RawHash; // The hash of the uncompressed data /** Checks validity of the buffer based on the magic number, method, and CRC-32. */ static bool IsValid(const CompositeBuffer& CompressedData); static bool IsValid(const SharedBuffer& CompressedData) { return IsValid(CompositeBuffer(CompressedData)); } /** Read a header from a buffer that is at least sizeof(BufferHeader) without any validation. */ static BufferHeader Read(const CompositeBuffer& CompressedData) { BufferHeader Header; if (sizeof(BufferHeader) <= CompressedData.GetSize()) { CompressedData.CopyTo(MakeMutableMemoryView(&Header, &Header + 1)); Header.ByteSwap(); } return Header; } /** * Write a header to a memory view that is at least sizeof(BufferHeader). * * @param HeaderView View of the header to write, including any method-specific header data. */ void Write(MutableMemoryView HeaderView) const { BufferHeader Header = *this; Header.ByteSwap(); HeaderView.CopyFrom(MakeMemoryView(&Header, &Header + 1)); Header.ByteSwap(); Header.Crc32 = CalculateCrc32(HeaderView); Header.ByteSwap(); HeaderView.CopyFrom(MakeMemoryView(&Header, &Header + 1)); } void ByteSwap() { Magic = zen::ByteSwap(Magic); Crc32 = zen::ByteSwap(Crc32); BlockCount = zen::ByteSwap(BlockCount); TotalRawSize = zen::ByteSwap(TotalRawSize); TotalCompressedSize = zen::ByteSwap(TotalCompressedSize); } /** Calculate the CRC-32 from a view of a header including any method-specific header data. */ static uint32_t CalculateCrc32(MemoryView HeaderView) { uint32_t Crc32 = 0; constexpr uint64_t MethodOffset = offsetof(BufferHeader, Method); for (MemoryView View = HeaderView + MethodOffset; const uint64_t ViewSize = View.GetSize();) { const int32_t Size = static_cast(zen::Min(ViewSize, INT_MAX)); Crc32 = zen::MemCrc32(View.GetData(), Size, Crc32); View += Size; } return Crc32; } }; static_assert(sizeof(BufferHeader) == 64, "BufferHeader is the wrong size."); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// class CompressionMethodBase { public: virtual CompositeBuffer Compress(const CompositeBuffer& RawData, uint64_t BlockSize = DefaultBlockSize) const = 0; virtual CompositeBuffer Decompress(const BufferHeader& Header, const CompositeBuffer& CompressedData) const = 0; virtual bool TryDecompressTo(const BufferHeader& Header, const CompositeBuffer& CompressedData, MutableMemoryView RawView) const = 0; virtual uint64_t GetHeaderSize(const BufferHeader& Header) const = 0; }; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// class MethodNone final : public CompressionMethodBase { public: [[nodiscard]] CompositeBuffer Compress(const CompositeBuffer& RawData, uint64_t /* BlockSize */) const final { BufferHeader Header; Header.Method = CompressionMethod::None; Header.BlockCount = 1; Header.TotalRawSize = RawData.GetSize(); Header.TotalCompressedSize = Header.TotalRawSize + sizeof(BufferHeader); Header.RawHash = BLAKE3::HashBuffer(RawData); UniqueBuffer HeaderData = UniqueBuffer::Alloc(sizeof(BufferHeader)); Header.Write(HeaderData); return CompositeBuffer(HeaderData.MoveToShared(), RawData.MakeOwned()); } [[nodiscard]] CompositeBuffer Decompress(const BufferHeader& Header, const CompositeBuffer& CompressedData) const final { if (Header.Method == CompressionMethod::None && Header.TotalCompressedSize == CompressedData.GetSize() && Header.TotalCompressedSize == Header.TotalRawSize + sizeof(BufferHeader)) { return CompressedData.Mid(sizeof(BufferHeader), Header.TotalRawSize).MakeOwned(); } return CompositeBuffer(); } [[nodiscard]] bool TryDecompressTo(const BufferHeader& Header, const CompositeBuffer& CompressedData, MutableMemoryView RawView) const final { if (Header.Method == CompressionMethod::None && Header.TotalRawSize == RawView.GetSize() && Header.TotalCompressedSize == CompressedData.GetSize() && Header.TotalCompressedSize == Header.TotalRawSize + sizeof(BufferHeader)) { CompressedData.CopyTo(RawView, sizeof(BufferHeader)); return true; } return false; } [[nodiscard]] uint64_t GetHeaderSize(const BufferHeader&) const final { return sizeof(BufferHeader); } }; ////////////////////////////////////////////////////////////////////////// class MethodBlockBase : public CompressionMethodBase { public: CompositeBuffer Compress(const CompositeBuffer& RawData, uint64_t BlockSize = DefaultBlockSize) const final; CompositeBuffer Decompress(const BufferHeader& Header, const CompositeBuffer& CompressedData) const final; [[nodiscard]] bool TryDecompressTo(const BufferHeader& Header, const CompositeBuffer& CompressedData, MutableMemoryView RawView) const final; [[nodiscard]] uint64_t GetHeaderSize(const BufferHeader& Header) const final { return sizeof(BufferHeader) + sizeof(uint32_t) * uint64_t(Header.BlockCount); } protected: virtual CompressionMethod GetMethod() const = 0; virtual uint64_t CompressBlockBound(uint64_t RawSize) const = 0; virtual bool CompressBlock(MutableMemoryView& CompressedData, MemoryView RawData) const = 0; virtual bool DecompressBlock(MutableMemoryView RawData, MemoryView CompressedData) const = 0; private: uint64_t GetCompressedBlocksBound(uint64_t BlockCount, uint64_t BlockSize, uint64_t RawSize) const { switch (BlockCount) { case 0: return 0; case 1: return CompressBlockBound(RawSize); default: return CompressBlockBound(BlockSize) - BlockSize + RawSize; } } }; CompositeBuffer MethodBlockBase::Compress(const CompositeBuffer& RawData, const uint64_t BlockSize) const { ZEN_ASSERT(IsPow2(BlockSize) && BlockSize <= (1 << 31)); const uint64_t RawSize = RawData.GetSize(); BLAKE3Stream RawHash; const uint64_t BlockCount = RoundUp(RawSize, BlockSize) / BlockSize; ZEN_ASSERT(BlockCount <= ~uint32_t(0)); // Allocate the buffer for the header, metadata, and compressed blocks. const uint64_t MetaSize = BlockCount * sizeof(uint32_t); const uint64_t CompressedDataSize = sizeof(BufferHeader) + MetaSize + GetCompressedBlocksBound(BlockCount, BlockSize, RawSize); UniqueBuffer CompressedData = UniqueBuffer::Alloc(CompressedDataSize); // Compress the raw data in blocks and store the raw data for incompressible blocks. std::vector CompressedBlockSizes; CompressedBlockSizes.reserve(BlockCount); uint64_t CompressedSize = 0; { UniqueBuffer RawBlockCopy; MutableMemoryView CompressedBlocksView = CompressedData.GetMutableView() + sizeof(BufferHeader) + MetaSize; for (uint64_t RawOffset = 0; RawOffset < RawSize;) { const uint64_t RawBlockSize = zen::Min(RawSize - RawOffset, BlockSize); const MemoryView RawBlock = RawData.ViewOrCopyRange(RawOffset, RawBlockSize, RawBlockCopy); RawHash.Append(RawBlock); MutableMemoryView CompressedBlock = CompressedBlocksView; if (!CompressBlock(CompressedBlock, RawBlock)) { return CompositeBuffer(); } uint64_t CompressedBlockSize = CompressedBlock.GetSize(); if (RawBlockSize <= CompressedBlockSize) { CompressedBlockSize = RawBlockSize; CompressedBlocksView = CompressedBlocksView.CopyFrom(RawBlock); } else { CompressedBlocksView += CompressedBlockSize; } CompressedBlockSizes.push_back(static_cast(CompressedBlockSize)); CompressedSize += CompressedBlockSize; RawOffset += RawBlockSize; } } // Return an uncompressed buffer if the compressed data is larger than the raw data. if (RawSize <= MetaSize + CompressedSize) { CompressedData.Reset(); return MethodNone().Compress(RawData, BlockSize); } // Write the header and calculate the CRC-32. for (uint32_t& Size : CompressedBlockSizes) { Size = ByteSwap(Size); } CompressedData.GetMutableView().Mid(sizeof(BufferHeader), MetaSize).CopyFrom(MakeMemoryView(CompressedBlockSizes)); BufferHeader Header; Header.Method = GetMethod(); Header.BlockSizeExponent = static_cast(zen::FloorLog2_64(BlockSize)); Header.BlockCount = static_cast(BlockCount); Header.TotalRawSize = RawSize; Header.TotalCompressedSize = sizeof(BufferHeader) + MetaSize + CompressedSize; Header.RawHash = RawHash.GetHash(); Header.Write(CompressedData.GetMutableView().Left(sizeof(BufferHeader) + MetaSize)); const MemoryView CompositeView = CompressedData.GetView().Left(Header.TotalCompressedSize); return CompositeBuffer(SharedBuffer::MakeView(CompositeView, CompressedData.MoveToShared())); } CompositeBuffer MethodBlockBase::Decompress(const BufferHeader& Header, const CompositeBuffer& CompressedData) const { if (Header.BlockCount == 0 || Header.TotalCompressedSize != CompressedData.GetSize()) { return CompositeBuffer(); } // The raw data cannot reference the compressed data unless it is owned. // An empty raw buffer requires an empty segment, which this path creates. if (!CompressedData.IsOwned() || Header.TotalRawSize == 0) { UniqueBuffer Buffer = UniqueBuffer::Alloc(Header.TotalRawSize); return TryDecompressTo(Header, CompressedData, Buffer) ? CompositeBuffer(Buffer.MoveToShared()) : CompositeBuffer(); } std::vector CompressedBlockSizes; CompressedBlockSizes.resize(Header.BlockCount); CompressedData.CopyTo(MakeMutableMemoryView(CompressedBlockSizes), sizeof(BufferHeader)); for (uint32_t& Size : CompressedBlockSizes) { Size = ByteSwap(Size); } // Allocate the buffer for the raw blocks that were compressed. SharedBuffer RawData; MutableMemoryView RawDataView; const uint64_t BlockSize = uint64_t(1) << Header.BlockSizeExponent; { uint64_t RawDataSize = 0; uint64_t RemainingRawSize = Header.TotalRawSize; for (const uint32_t CompressedBlockSize : CompressedBlockSizes) { const uint64_t RawBlockSize = zen::Min(RemainingRawSize, BlockSize); if (CompressedBlockSize < BlockSize) { RawDataSize += RawBlockSize; } RemainingRawSize -= RawBlockSize; } UniqueBuffer RawDataBuffer = UniqueBuffer::Alloc(RawDataSize); RawDataView = RawDataBuffer; RawData = RawDataBuffer.MoveToShared(); } // Decompress the compressed data in blocks and reference the uncompressed blocks. uint64_t PendingCompressedSegmentOffset = sizeof(BufferHeader) + uint64_t(Header.BlockCount) * sizeof(uint32_t); uint64_t PendingCompressedSegmentSize = 0; uint64_t PendingRawSegmentOffset = 0; uint64_t PendingRawSegmentSize = 0; std::vector Segments; const auto CommitPendingCompressedSegment = [&PendingCompressedSegmentOffset, &PendingCompressedSegmentSize, &CompressedData, &Segments] { if (PendingCompressedSegmentSize) { CompressedData.IterateRange(PendingCompressedSegmentOffset, PendingCompressedSegmentSize, [&Segments](MemoryView View, const SharedBuffer& ViewOuter) { Segments.push_back(SharedBuffer::MakeView(View, ViewOuter)); }); PendingCompressedSegmentOffset += PendingCompressedSegmentSize; PendingCompressedSegmentSize = 0; } }; const auto CommitPendingRawSegment = [&PendingRawSegmentOffset, &PendingRawSegmentSize, &RawData, &Segments] { if (PendingRawSegmentSize) { const MemoryView PendingSegment = RawData.GetView().Mid(PendingRawSegmentOffset, PendingRawSegmentSize); Segments.push_back(SharedBuffer::MakeView(PendingSegment, RawData)); PendingRawSegmentOffset += PendingRawSegmentSize; PendingRawSegmentSize = 0; } }; UniqueBuffer CompressedBlockCopy; uint64_t RemainingRawSize = Header.TotalRawSize; uint64_t RemainingCompressedSize = CompressedData.GetSize(); for (const uint32_t CompressedBlockSize : CompressedBlockSizes) { if (RemainingCompressedSize < CompressedBlockSize) { return CompositeBuffer(); } const uint64_t RawBlockSize = zen::Min(RemainingRawSize, BlockSize); if (RawBlockSize == CompressedBlockSize) { CommitPendingRawSegment(); PendingCompressedSegmentSize += RawBlockSize; } else { CommitPendingCompressedSegment(); const MemoryView CompressedBlock = CompressedData.ViewOrCopyRange(PendingCompressedSegmentOffset, CompressedBlockSize, CompressedBlockCopy); if (!DecompressBlock(RawDataView.Left(RawBlockSize), CompressedBlock)) { return CompositeBuffer(); } PendingCompressedSegmentOffset += CompressedBlockSize; PendingRawSegmentSize += RawBlockSize; RawDataView += RawBlockSize; } RemainingCompressedSize -= CompressedBlockSize; RemainingRawSize -= RawBlockSize; } CommitPendingCompressedSegment(); CommitPendingRawSegment(); return CompositeBuffer(std::move(Segments)); } bool MethodBlockBase::TryDecompressTo(const BufferHeader& Header, const CompositeBuffer& CompressedData, MutableMemoryView RawView) const { if (Header.TotalRawSize != RawView.GetSize() || Header.TotalCompressedSize != CompressedData.GetSize()) { return false; } std::vector CompressedBlockSizes; CompressedBlockSizes.resize(Header.BlockCount); CompressedData.CopyTo(MakeMutableMemoryView(CompressedBlockSizes), sizeof(BufferHeader)); for (uint32_t& Size : CompressedBlockSizes) { Size = ByteSwap(Size); } UniqueBuffer CompressedBlockCopy; const uint64_t BlockSize = uint64_t(1) << Header.BlockSizeExponent; uint64_t CompressedOffset = sizeof(BufferHeader) + uint64_t(Header.BlockCount) * sizeof(uint32_t); uint64_t RemainingRawSize = Header.TotalRawSize; uint64_t RemainingCompressedSize = CompressedData.GetSize(); for (uint32_t CompressedBlockSize : CompressedBlockSizes) { if (RemainingCompressedSize < CompressedBlockSize) { return false; } const uint64_t RawBlockSize = zen::Min(RemainingRawSize, BlockSize); if (RawBlockSize == CompressedBlockSize) { CompressedData.CopyTo(RawView.Left(RawBlockSize), CompressedOffset); } else { const MemoryView CompressedBlock = CompressedData.ViewOrCopyRange(CompressedOffset, CompressedBlockSize, CompressedBlockCopy); if (!DecompressBlock(RawView.Left(RawBlockSize), CompressedBlock)) { return false; } } RemainingCompressedSize -= CompressedBlockSize; RemainingRawSize -= RawBlockSize; CompressedOffset += CompressedBlockSize; RawView += RawBlockSize; } return RemainingRawSize == 0; } ////////////////////////////////////////////////////////////////////////// class MethodLZ4 final : public MethodBlockBase { protected: CompressionMethod GetMethod() const final { return CompressionMethod::LZ4; } uint64_t CompressBlockBound(uint64_t RawSize) const final { if (RawSize <= LZ4_MAX_INPUT_SIZE) { return static_cast(LZ4_compressBound(static_cast(RawSize))); } return 0; } bool CompressBlock(MutableMemoryView& CompressedData, MemoryView RawData) const final { if (RawData.GetSize() <= LZ4_MAX_INPUT_SIZE) { const int Size = LZ4_compress_default(static_cast(RawData.GetData()), static_cast(CompressedData.GetData()), static_cast(RawData.GetSize()), static_cast(zen::Min(CompressedData.GetSize(), std::numeric_limits::max()))); CompressedData.LeftInline(static_cast(Size)); return Size > 0; } return false; } bool DecompressBlock(MutableMemoryView RawData, MemoryView CompressedData) const final { if (CompressedData.GetSize() <= std::numeric_limits::max()) { const int Size = LZ4_decompress_safe(static_cast(CompressedData.GetData()), static_cast(RawData.GetData()), static_cast(CompressedData.GetSize()), static_cast(zen::Min(RawData.GetSize(), LZ4_MAX_INPUT_SIZE))); return static_cast(Size) == RawData.GetSize(); } return false; } }; ////////////////////////////////////////////////////////////////////////// static const CompressionMethodBase* GetMethod(CompressionMethod Method) { static MethodNone None; static MethodLZ4 LZ4; switch (Method) { default: return nullptr; case CompressionMethod::None: return &None; case CompressionMethod::LZ4: return &LZ4; } } static const char* GetMethodName(CompressionMethod Method) { switch (Method) { default: return "error"; case CompressionMethod::None: return "None"; case CompressionMethod::LZ4: return "LZ4"; } } ////////////////////////////////////////////////////////////////////////// bool BufferHeader::IsValid(const CompositeBuffer& CompressedData) { if (sizeof(BufferHeader) <= CompressedData.GetSize()) { const BufferHeader Header = Read(CompressedData); if (Header.Magic == BufferHeader::ExpectedMagic) { if (const CompressionMethodBase* const Method = GetMethod(Header.Method)) { UniqueBuffer HeaderCopy; const MemoryView HeaderView = CompressedData.ViewOrCopyRange(0, Method->GetHeaderSize(Header), HeaderCopy); if (Header.Crc32 == BufferHeader::CalculateCrc32(HeaderView)) { return true; } } } } return false; } ////////////////////////////////////////////////////////////////////////// template inline CompositeBuffer ValidBufferOrEmpty(BufferType&& CompressedData) { return BufferHeader::IsValid(CompressedData) ? CompositeBuffer(std::forward(CompressedData)) : CompositeBuffer(); } } // namespace zen::detail namespace zen { CompressedBuffer CompressedBuffer::Compress(CompressionMethod Method, const CompositeBuffer& RawData) { using namespace detail; CompressedBuffer Local; if (const CompressionMethodBase* const Impl = GetMethod(Method)) { Local.CompressedData = Impl->Compress(RawData); } return Local; } CompressedBuffer CompressedBuffer::Compress(CompressionMethod Method, const SharedBuffer& RawData) { return Compress(Method, CompositeBuffer(RawData)); } CompressedBuffer CompressedBuffer::FromCompressed(const CompositeBuffer& InCompressedData) { CompressedBuffer Local; Local.CompressedData = detail::ValidBufferOrEmpty(InCompressedData); return Local; } CompressedBuffer CompressedBuffer::FromCompressed(CompositeBuffer&& InCompressedData) { CompressedBuffer Local; Local.CompressedData = detail::ValidBufferOrEmpty(std::move(InCompressedData)); return Local; } CompressedBuffer CompressedBuffer::FromCompressed(const SharedBuffer& InCompressedData) { CompressedBuffer Local; Local.CompressedData = detail::ValidBufferOrEmpty(InCompressedData); return Local; } CompressedBuffer CompressedBuffer::FromCompressed(SharedBuffer&& InCompressedData) { CompressedBuffer Local; Local.CompressedData = detail::ValidBufferOrEmpty(std::move(InCompressedData)); return Local; } uint64_t CompressedBuffer::GetRawSize() const { return CompressedData ? detail::BufferHeader::Read(CompressedData).TotalRawSize : 0; } BLAKE3 CompressedBuffer::GetRawHash() const { return CompressedData ? detail::BufferHeader::Read(CompressedData).RawHash : BLAKE3(); } bool CompressedBuffer::TryDecompressTo(MutableMemoryView RawView) const { using namespace detail; if (CompressedData) { const BufferHeader Header = BufferHeader::Read(CompressedData); if (const CompressionMethodBase* const Method = GetMethod(Header.Method)) { return Method->TryDecompressTo(Header, CompressedData, RawView); } } return false; } SharedBuffer CompressedBuffer::Decompress() const { using namespace detail; if (CompressedData) { const BufferHeader Header = BufferHeader::Read(CompressedData); if (const CompressionMethodBase* const Method = GetMethod(Header.Method)) { if (Header.Method == CompressionMethod::None) { return Method->Decompress(Header, CompressedData).Flatten(); } UniqueBuffer RawData = UniqueBuffer::Alloc(Header.TotalRawSize); if (Method->TryDecompressTo(Header, CompressedData, RawData)) { return RawData.MoveToShared(); } } } return SharedBuffer(); } CompositeBuffer CompressedBuffer::DecompressToComposite() const { using namespace detail; if (CompressedData) { const BufferHeader Header = BufferHeader::Read(CompressedData); if (const CompressionMethodBase* const Method = GetMethod(Header.Method)) { return Method->Decompress(Header, CompressedData); } } return CompositeBuffer(); } const char* CompressedBuffer::GetFormatName() const { return detail::GetMethodName(CompressedData ? detail::BufferHeader::Read(CompressedData).Method : CompressionMethod::None); } /** ______________________ _____________________________ \__ ___/\_ _____// _____/\__ ___/ _____/ | | | __)_ \_____ \ | | \_____ \ | | | \/ \ | | / \ |____| /_______ /_______ / |____| /_______ / \/ \/ \/ */ TEST_CASE("CompressedBuffer") { uint8_t Zeroes[256]{}; uint8_t Ones[256]; memset(Ones, 1, sizeof Ones); CompressedBuffer Buffer1 = CompressedBuffer::Compress(CompressionMethod::None, CompositeBuffer(SharedBuffer::MakeView(MakeMemoryView(Zeroes)))); CHECK(Buffer1.GetRawSize() == sizeof(Zeroes)); CHECK(Buffer1.GetCompressedSize() == (sizeof(Zeroes) + sizeof(detail::BufferHeader))); CompressedBuffer Buffer2 = CompressedBuffer::Compress( CompressionMethod::None, CompositeBuffer(SharedBuffer::MakeView(MakeMemoryView(Zeroes)), SharedBuffer::MakeView(MakeMemoryView(Ones)))); CHECK(Buffer2.GetRawSize() == (sizeof(Zeroes) + sizeof(Ones))); CHECK(Buffer2.GetCompressedSize() == (sizeof(Zeroes) + sizeof(Ones) + sizeof(detail::BufferHeader))); CompressedBuffer Buffer3 = CompressedBuffer::Compress(CompressionMethod::LZ4, CompositeBuffer(SharedBuffer::MakeView(MakeMemoryView(Zeroes)))); CHECK(Buffer3.GetRawSize() == sizeof(Zeroes)); CHECK(Buffer3.GetCompressedSize() == (15 + sizeof(detail::BufferHeader))); CompressedBuffer Buffer4 = CompressedBuffer::Compress( CompressionMethod::LZ4, CompositeBuffer(SharedBuffer::MakeView(MakeMemoryView(Zeroes)), SharedBuffer::MakeView(MakeMemoryView(Ones)))); CHECK(Buffer4.GetRawSize() == (sizeof(Zeroes) + sizeof(Ones))); CHECK(Buffer4.GetCompressedSize() == (20 + sizeof(detail::BufferHeader))); } void compress_forcelink() { } } // namespace zen