diff options
| author | Per Larsson <[email protected]> | 2021-10-13 11:41:18 +0200 |
|---|---|---|
| committer | GitHub <[email protected]> | 2021-10-13 11:41:18 +0200 |
| commit | 738e245fb3403a425d658ce11204210ea83cacdc (patch) | |
| tree | ce5b193e9bda29bd00be0d62088e0236a4c39659 /zencore/compress.cpp | |
| parent | Ratios should not be percentages (this should be done in presentation) (diff) | |
| download | zen-738e245fb3403a425d658ce11204210ea83cacdc.tar.xz zen-738e245fb3403a425d658ce11204210ea83cacdc.zip | |
Added support for decompressing from offset.
Diffstat (limited to 'zencore/compress.cpp')
| -rw-r--r-- | zencore/compress.cpp | 228 |
1 files changed, 182 insertions, 46 deletions
diff --git a/zencore/compress.cpp b/zencore/compress.cpp index 8ca799e39..8d309e010 100644 --- a/zencore/compress.cpp +++ b/zencore/compress.cpp @@ -124,9 +124,12 @@ public: class BaseDecoder { public: - virtual CompositeBuffer Decompress(const BufferHeader& Header, const CompositeBuffer& CompressedData) const = 0; - virtual bool TryDecompressTo(const BufferHeader& Header, const CompositeBuffer& CompressedData, MutableMemoryView RawView) const = 0; - virtual uint64_t GetHeaderSize(const BufferHeader& Header) const = 0; + virtual CompositeBuffer Decompress(const BufferHeader& Header, const CompositeBuffer& CompressedData) const = 0; + virtual bool TryDecompressTo(const BufferHeader& Header, + const CompositeBuffer& CompressedData, + MutableMemoryView RawView, + uint64_t RawOffset) const = 0; + virtual uint64_t GetHeaderSize(const BufferHeader& Header) const = 0; }; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -164,13 +167,14 @@ public: [[nodiscard]] bool TryDecompressTo(const BufferHeader& Header, const CompositeBuffer& CompressedData, - MutableMemoryView RawView) const final + MutableMemoryView RawView, + uint64_t RawOffset) const final { - if (Header.Method == CompressionMethod::None && Header.TotalRawSize == RawView.GetSize() && + if (Header.Method == CompressionMethod::None && RawOffset + RawView.GetSize() <= Header.TotalRawSize && Header.TotalCompressedSize == CompressedData.GetSize() && Header.TotalCompressedSize == Header.TotalRawSize + sizeof(BufferHeader)) { - CompressedData.CopyTo(RawView, sizeof(BufferHeader)); + CompressedData.CopyTo(RawView, sizeof(BufferHeader) + RawOffset); return true; } return false; @@ -295,7 +299,8 @@ public: CompositeBuffer Decompress(const BufferHeader& Header, const CompositeBuffer& CompressedData) const final; [[nodiscard]] bool TryDecompressTo(const BufferHeader& Header, const CompositeBuffer& CompressedData, - MutableMemoryView RawView) const final; + MutableMemoryView RawView, + uint64_t RawOffset) const final; [[nodiscard]] uint64_t GetHeaderSize(const BufferHeader& Header) const final { return sizeof(BufferHeader) + sizeof(uint32_t) * uint64_t(Header.BlockCount); @@ -318,7 +323,7 @@ BlockDecoder::Decompress(const BufferHeader& Header, const CompositeBuffer& Comp if (!CompressedData.IsOwned() || Header.TotalRawSize == 0) { UniqueBuffer Buffer = UniqueBuffer::Alloc(Header.TotalRawSize); - return TryDecompressTo(Header, CompressedData, Buffer) ? CompositeBuffer(Buffer.MoveToShared()) : CompositeBuffer(); + return TryDecompressTo(Header, CompressedData, Buffer, 0) ? CompositeBuffer(Buffer.MoveToShared()) : CompositeBuffer(); } std::vector<uint32_t> CompressedBlockSizes; @@ -423,52 +428,83 @@ BlockDecoder::Decompress(const BufferHeader& Header, const CompositeBuffer& Comp } bool -BlockDecoder::TryDecompressTo(const BufferHeader& Header, const CompositeBuffer& CompressedData, MutableMemoryView RawView) const +BlockDecoder::TryDecompressTo(const BufferHeader& Header, + const CompositeBuffer& CompressedData, + MutableMemoryView RawView, + uint64_t RawOffset) const { - if (Header.TotalRawSize != RawView.GetSize() || Header.TotalCompressedSize != CompressedData.GetSize()) + if (Header.TotalRawSize < RawOffset + RawView.GetSize() || Header.TotalCompressedSize != CompressedData.GetSize()) { return false; } - std::vector<uint32_t> CompressedBlockSizes; - CompressedBlockSizes.resize(Header.BlockCount); - CompressedData.CopyTo(MakeMutableMemoryView(CompressedBlockSizes), sizeof(BufferHeader)); + const uint64_t BlockSize = uint64_t(1) << Header.BlockSizeExponent; - for (uint32_t& Size : CompressedBlockSizes) + UniqueBuffer BlockSizeBuffer; + MemoryView BlockSizeView = CompressedData.ViewOrCopyRange(sizeof(BufferHeader), Header.BlockCount * sizeof(uint32_t), BlockSizeBuffer); + std::span<uint32_t const> CompressedBlockSizes(reinterpret_cast<const uint32_t*>(BlockSizeView.GetData()), Header.BlockCount); + + UniqueBuffer CompressedBlockCopy; + UniqueBuffer UncompressedBlockCopy; + + const size_t FirstBlockIndex = uint64_t(RawOffset / BlockSize); + const size_t LastBlockIndex = uint64_t((RawOffset + RawView.GetSize() - 1) / BlockSize); + const uint64_t LastBlockSize = BlockSize - ((Header.BlockCount * BlockSize) - Header.TotalRawSize); + uint64_t OffsetInFirstBlock = RawOffset % BlockSize; + uint64_t CompressedOffset = sizeof(BufferHeader) + uint64_t(Header.BlockCount) * sizeof(uint32_t); + uint64_t RemainingRawSize = RawView.GetSize(); + + for (size_t BlockIndex = 0; BlockIndex < FirstBlockIndex; BlockIndex++) { - Size = ByteSwap(Size); + const uint32_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]); + CompressedOffset += CompressedBlockSize; } - UniqueBuffer CompressedBlockCopy; - const uint64_t BlockSize = uint64_t(1) << Header.BlockSizeExponent; - uint64_t CompressedOffset = sizeof(BufferHeader) + uint64_t(Header.BlockCount) * sizeof(uint32_t); - uint64_t RemainingRawSize = Header.TotalRawSize; - uint64_t RemainingCompressedSize = CompressedData.GetSize(); - for (uint32_t CompressedBlockSize : CompressedBlockSizes) + for (size_t BlockIndex = FirstBlockIndex; BlockIndex <= LastBlockIndex; BlockIndex++) { - if (RemainingCompressedSize < CompressedBlockSize) - { - return false; - } + const uint64_t UncompressedBlockSize = BlockIndex == Header.BlockCount - 1 ? LastBlockSize : BlockSize; + const uint32_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]); + const bool IsCompressed = CompressedBlockSize < UncompressedBlockSize; - const uint64_t RawBlockSize = zen::Min(RemainingRawSize, BlockSize); - if (RawBlockSize == CompressedBlockSize) - { - CompressedData.CopyTo(RawView.Left(RawBlockSize), CompressedOffset); - } - else + const uint64_t BytesToUncompress = OffsetInFirstBlock > 0 ? zen::Min(RawView.GetSize(), UncompressedBlockSize - OffsetInFirstBlock) + : zen::Min(RemainingRawSize, BlockSize); + + MemoryView CompressedBlock = CompressedData.ViewOrCopyRange(CompressedOffset, CompressedBlockSize, CompressedBlockCopy); + + if (IsCompressed) { - const MemoryView CompressedBlock = CompressedData.ViewOrCopyRange(CompressedOffset, CompressedBlockSize, CompressedBlockCopy); - if (!DecompressBlock(RawView.Left(RawBlockSize), CompressedBlock)) + MutableMemoryView UncompressedBlock = RawView.Left(BytesToUncompress); + + const bool IsAligned = BytesToUncompress == UncompressedBlockSize; + if (!IsAligned) + { + // Decompress to a temporary buffer when the first or the last block reads are not aligned with the block boundaries. + if (UncompressedBlockCopy.IsNull()) + { + UncompressedBlockCopy = UniqueBuffer::Alloc(BlockSize); + } + UncompressedBlock = UncompressedBlockCopy.GetMutableView().Mid(0, UncompressedBlockSize); + } + + if (!DecompressBlock(UncompressedBlock, CompressedBlock)) { return false; } + + if (!IsAligned) + { + RawView.CopyFrom(UncompressedBlock.Mid(OffsetInFirstBlock, BytesToUncompress)); + } + } + else + { + RawView.CopyFrom(CompressedBlock.Mid(OffsetInFirstBlock, BytesToUncompress)); } - RemainingCompressedSize -= CompressedBlockSize; - RemainingRawSize -= RawBlockSize; + OffsetInFirstBlock = 0; + RemainingRawSize -= BytesToUncompress; CompressedOffset += CompressedBlockSize; - RawView += RawBlockSize; + RawView += BytesToUncompress; } return RemainingRawSize == 0; @@ -739,7 +775,7 @@ CompressedBuffer::GetRawHash() const } bool -CompressedBuffer::TryDecompressTo(MutableMemoryView RawView) const +CompressedBuffer::TryDecompressTo(MutableMemoryView RawView, uint64_t RawOffset) const { using namespace detail; if (CompressedData) @@ -747,27 +783,24 @@ CompressedBuffer::TryDecompressTo(MutableMemoryView RawView) const const BufferHeader Header = BufferHeader::Read(CompressedData); if (const BaseDecoder* const Decoder = GetDecoder(Header.Method)) { - return Decoder->TryDecompressTo(Header, CompressedData, RawView); + return Decoder->TryDecompressTo(Header, CompressedData, RawView, RawOffset); } } return false; } SharedBuffer -CompressedBuffer::Decompress() const +CompressedBuffer::Decompress(uint64_t RawOffset, uint64_t RawSize) const { using namespace detail; - if (CompressedData) + if (CompressedData && RawSize > 0) { const BufferHeader Header = BufferHeader::Read(CompressedData); if (const BaseDecoder* const Decoder = GetDecoder(Header.Method)) { - if (Header.Method == CompressionMethod::None) - { - return Decoder->Decompress(Header, CompressedData).Flatten(); - } - UniqueBuffer RawData = UniqueBuffer::Alloc(Header.TotalRawSize); - if (Decoder->TryDecompressTo(Header, CompressedData, RawData)) + const uint64_t TotalRawSize = RawSize < ~uint64_t(0) ? RawSize : Header.TotalRawSize - RawOffset; + UniqueBuffer RawData = UniqueBuffer::Alloc(TotalRawSize); + if (Decoder->TryDecompressTo(Header, CompressedData, RawData, RawOffset)) { return RawData.MoveToShared(); } @@ -904,6 +937,109 @@ TEST_CASE("CompressedBuffer") CHECK(Decomp.GetSize() == Buffer.GetRawSize()); CHECK(BLAKE3::HashBuffer(Decomp) == BufferD.GetRawHash()); } + + auto GenerateData = [](uint64_t N) -> std::vector<uint64_t> { + std::vector<uint64_t> Data; + Data.resize(N); + for (size_t Idx = 0; Idx < Data.size(); ++Idx) + { + Data[Idx] = Idx; + } + return Data; + }; + + auto ValidateData = [](std::span<uint64_t const> Values, std::span<uint64_t const> ExpectedValues, uint64_t Offset) { + for (size_t Idx = Offset; uint64_t Value : Values) + { + const uint64_t ExpectedValue = ExpectedValues[Idx++]; + CHECK(Value == ExpectedValue); + } + }; + + SUBCASE("decompress with offset and size") + { + auto UncompressAndValidate = [&ValidateData](CompressedBuffer Compressed, + uint64_t OffsetCount, + uint64_t Count, + const std::vector<uint64_t>& ExpectedValues) { + SharedBuffer Uncompressed = Compressed.Decompress(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t)); + CHECK(Uncompressed.GetSize() == Count * sizeof(uint64_t)); + std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t)); + ValidateData(Values, ExpectedValues, OffsetCount); + }; + + const uint64_t BlockSize = 64 * sizeof(uint64_t); + const uint64_t N = 5000; + std::vector<uint64_t> ExpectedValues = GenerateData(N); + CompressedBuffer Compressed = CompressedBuffer::Compress(SharedBuffer::MakeView(MakeMemoryView(ExpectedValues)), + OodleCompressor::Mermaid, + OodleCompressionLevel::Optimal4, + BlockSize); + UncompressAndValidate(Compressed, 0, N, ExpectedValues); + UncompressAndValidate(Compressed, 1, N - 1, ExpectedValues); + UncompressAndValidate(Compressed, N - 1, 1, ExpectedValues); + UncompressAndValidate(Compressed, 0, 1, ExpectedValues); + UncompressAndValidate(Compressed, 2, 4, ExpectedValues); + UncompressAndValidate(Compressed, 0, 512, ExpectedValues); + UncompressAndValidate(Compressed, 3, 514, ExpectedValues); + UncompressAndValidate(Compressed, 256, 512, ExpectedValues); + UncompressAndValidate(Compressed, 512, 512, ExpectedValues); + } + + SUBCASE("decompress with offset only") + { + const uint64_t BlockSize = 64 * sizeof(uint64_t); + const uint64_t N = 1000; + std::vector<uint64_t> ExpectedValues = GenerateData(N); + CompressedBuffer Compressed = CompressedBuffer::Compress(SharedBuffer::MakeView(MakeMemoryView(ExpectedValues)), + OodleCompressor::Mermaid, + OodleCompressionLevel::Optimal4, + BlockSize); + const uint64_t OffsetCount = 150; + SharedBuffer Uncompressed = Compressed.Decompress(OffsetCount * sizeof(uint64_t)); + std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t)); + ValidateData(Values, ExpectedValues, OffsetCount); + } + + SUBCASE("decompress buffer with one block") + { + const uint64_t BlockSize = 256 * sizeof(uint64_t); + const uint64_t N = 100; + std::vector<uint64_t> ExpectedValues = GenerateData(N); + CompressedBuffer Compressed = CompressedBuffer::Compress(SharedBuffer::MakeView(MakeMemoryView(ExpectedValues)), + OodleCompressor::Mermaid, + OodleCompressionLevel::Optimal4, + BlockSize); + const uint64_t OffsetCount = 2; + const uint64_t Count = 50; + SharedBuffer Uncompressed = Compressed.Decompress(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t)); + std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t)); + ValidateData(Values, ExpectedValues, OffsetCount); + } + + SUBCASE("decompress uncompressed buffer") + { + const uint64_t N = 4242; + std::vector<uint64_t> ExpectedValues = GenerateData(N); + CompressedBuffer Compressed = CompressedBuffer::Compress(SharedBuffer::MakeView(MakeMemoryView(ExpectedValues)), + OodleCompressor::NotSet, + OodleCompressionLevel::None); + { + const uint64_t OffsetCount = 0; + const uint64_t Count = N; + SharedBuffer Uncompressed = Compressed.Decompress(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t)); + std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t)); + ValidateData(Values, ExpectedValues, OffsetCount); + } + + { + const uint64_t OffsetCount = 21; + const uint64_t Count = 999; + SharedBuffer Uncompressed = Compressed.Decompress(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t)); + std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t)); + ValidateData(Values, ExpectedValues, OffsetCount); + } + } } void |