aboutsummaryrefslogtreecommitdiff
path: root/zencore/compress.cpp
diff options
context:
space:
mode:
authorPer Larsson <[email protected]>2021-10-13 11:41:18 +0200
committerGitHub <[email protected]>2021-10-13 11:41:18 +0200
commit738e245fb3403a425d658ce11204210ea83cacdc (patch)
treece5b193e9bda29bd00be0d62088e0236a4c39659 /zencore/compress.cpp
parentRatios should not be percentages (this should be done in presentation) (diff)
downloadzen-738e245fb3403a425d658ce11204210ea83cacdc.tar.xz
zen-738e245fb3403a425d658ce11204210ea83cacdc.zip
Added support for decompressing from offset.
Diffstat (limited to 'zencore/compress.cpp')
-rw-r--r--zencore/compress.cpp228
1 files changed, 182 insertions, 46 deletions
diff --git a/zencore/compress.cpp b/zencore/compress.cpp
index 8ca799e39..8d309e010 100644
--- a/zencore/compress.cpp
+++ b/zencore/compress.cpp
@@ -124,9 +124,12 @@ public:
class BaseDecoder
{
public:
- virtual CompositeBuffer Decompress(const BufferHeader& Header, const CompositeBuffer& CompressedData) const = 0;
- virtual bool TryDecompressTo(const BufferHeader& Header, const CompositeBuffer& CompressedData, MutableMemoryView RawView) const = 0;
- virtual uint64_t GetHeaderSize(const BufferHeader& Header) const = 0;
+ virtual CompositeBuffer Decompress(const BufferHeader& Header, const CompositeBuffer& CompressedData) const = 0;
+ virtual bool TryDecompressTo(const BufferHeader& Header,
+ const CompositeBuffer& CompressedData,
+ MutableMemoryView RawView,
+ uint64_t RawOffset) const = 0;
+ virtual uint64_t GetHeaderSize(const BufferHeader& Header) const = 0;
};
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -164,13 +167,14 @@ public:
[[nodiscard]] bool TryDecompressTo(const BufferHeader& Header,
const CompositeBuffer& CompressedData,
- MutableMemoryView RawView) const final
+ MutableMemoryView RawView,
+ uint64_t RawOffset) const final
{
- if (Header.Method == CompressionMethod::None && Header.TotalRawSize == RawView.GetSize() &&
+ if (Header.Method == CompressionMethod::None && RawOffset + RawView.GetSize() <= Header.TotalRawSize &&
Header.TotalCompressedSize == CompressedData.GetSize() &&
Header.TotalCompressedSize == Header.TotalRawSize + sizeof(BufferHeader))
{
- CompressedData.CopyTo(RawView, sizeof(BufferHeader));
+ CompressedData.CopyTo(RawView, sizeof(BufferHeader) + RawOffset);
return true;
}
return false;
@@ -295,7 +299,8 @@ public:
CompositeBuffer Decompress(const BufferHeader& Header, const CompositeBuffer& CompressedData) const final;
[[nodiscard]] bool TryDecompressTo(const BufferHeader& Header,
const CompositeBuffer& CompressedData,
- MutableMemoryView RawView) const final;
+ MutableMemoryView RawView,
+ uint64_t RawOffset) const final;
[[nodiscard]] uint64_t GetHeaderSize(const BufferHeader& Header) const final
{
return sizeof(BufferHeader) + sizeof(uint32_t) * uint64_t(Header.BlockCount);
@@ -318,7 +323,7 @@ BlockDecoder::Decompress(const BufferHeader& Header, const CompositeBuffer& Comp
if (!CompressedData.IsOwned() || Header.TotalRawSize == 0)
{
UniqueBuffer Buffer = UniqueBuffer::Alloc(Header.TotalRawSize);
- return TryDecompressTo(Header, CompressedData, Buffer) ? CompositeBuffer(Buffer.MoveToShared()) : CompositeBuffer();
+ return TryDecompressTo(Header, CompressedData, Buffer, 0) ? CompositeBuffer(Buffer.MoveToShared()) : CompositeBuffer();
}
std::vector<uint32_t> CompressedBlockSizes;
@@ -423,52 +428,83 @@ BlockDecoder::Decompress(const BufferHeader& Header, const CompositeBuffer& Comp
}
bool
-BlockDecoder::TryDecompressTo(const BufferHeader& Header, const CompositeBuffer& CompressedData, MutableMemoryView RawView) const
+BlockDecoder::TryDecompressTo(const BufferHeader& Header,
+ const CompositeBuffer& CompressedData,
+ MutableMemoryView RawView,
+ uint64_t RawOffset) const
{
- if (Header.TotalRawSize != RawView.GetSize() || Header.TotalCompressedSize != CompressedData.GetSize())
+ if (Header.TotalRawSize < RawOffset + RawView.GetSize() || Header.TotalCompressedSize != CompressedData.GetSize())
{
return false;
}
- std::vector<uint32_t> CompressedBlockSizes;
- CompressedBlockSizes.resize(Header.BlockCount);
- CompressedData.CopyTo(MakeMutableMemoryView(CompressedBlockSizes), sizeof(BufferHeader));
+ const uint64_t BlockSize = uint64_t(1) << Header.BlockSizeExponent;
- for (uint32_t& Size : CompressedBlockSizes)
+ UniqueBuffer BlockSizeBuffer;
+ MemoryView BlockSizeView = CompressedData.ViewOrCopyRange(sizeof(BufferHeader), Header.BlockCount * sizeof(uint32_t), BlockSizeBuffer);
+ std::span<uint32_t const> CompressedBlockSizes(reinterpret_cast<const uint32_t*>(BlockSizeView.GetData()), Header.BlockCount);
+
+ UniqueBuffer CompressedBlockCopy;
+ UniqueBuffer UncompressedBlockCopy;
+
+ const size_t FirstBlockIndex = uint64_t(RawOffset / BlockSize);
+ const size_t LastBlockIndex = uint64_t((RawOffset + RawView.GetSize() - 1) / BlockSize);
+ const uint64_t LastBlockSize = BlockSize - ((Header.BlockCount * BlockSize) - Header.TotalRawSize);
+ uint64_t OffsetInFirstBlock = RawOffset % BlockSize;
+ uint64_t CompressedOffset = sizeof(BufferHeader) + uint64_t(Header.BlockCount) * sizeof(uint32_t);
+ uint64_t RemainingRawSize = RawView.GetSize();
+
+ for (size_t BlockIndex = 0; BlockIndex < FirstBlockIndex; BlockIndex++)
{
- Size = ByteSwap(Size);
+ const uint32_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]);
+ CompressedOffset += CompressedBlockSize;
}
- UniqueBuffer CompressedBlockCopy;
- const uint64_t BlockSize = uint64_t(1) << Header.BlockSizeExponent;
- uint64_t CompressedOffset = sizeof(BufferHeader) + uint64_t(Header.BlockCount) * sizeof(uint32_t);
- uint64_t RemainingRawSize = Header.TotalRawSize;
- uint64_t RemainingCompressedSize = CompressedData.GetSize();
- for (uint32_t CompressedBlockSize : CompressedBlockSizes)
+ for (size_t BlockIndex = FirstBlockIndex; BlockIndex <= LastBlockIndex; BlockIndex++)
{
- if (RemainingCompressedSize < CompressedBlockSize)
- {
- return false;
- }
+ const uint64_t UncompressedBlockSize = BlockIndex == Header.BlockCount - 1 ? LastBlockSize : BlockSize;
+ const uint32_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]);
+ const bool IsCompressed = CompressedBlockSize < UncompressedBlockSize;
- const uint64_t RawBlockSize = zen::Min(RemainingRawSize, BlockSize);
- if (RawBlockSize == CompressedBlockSize)
- {
- CompressedData.CopyTo(RawView.Left(RawBlockSize), CompressedOffset);
- }
- else
+ const uint64_t BytesToUncompress = OffsetInFirstBlock > 0 ? zen::Min(RawView.GetSize(), UncompressedBlockSize - OffsetInFirstBlock)
+ : zen::Min(RemainingRawSize, BlockSize);
+
+ MemoryView CompressedBlock = CompressedData.ViewOrCopyRange(CompressedOffset, CompressedBlockSize, CompressedBlockCopy);
+
+ if (IsCompressed)
{
- const MemoryView CompressedBlock = CompressedData.ViewOrCopyRange(CompressedOffset, CompressedBlockSize, CompressedBlockCopy);
- if (!DecompressBlock(RawView.Left(RawBlockSize), CompressedBlock))
+ MutableMemoryView UncompressedBlock = RawView.Left(BytesToUncompress);
+
+ const bool IsAligned = BytesToUncompress == UncompressedBlockSize;
+ if (!IsAligned)
+ {
+ // Decompress to a temporary buffer when the first or the last block reads are not aligned with the block boundaries.
+ if (UncompressedBlockCopy.IsNull())
+ {
+ UncompressedBlockCopy = UniqueBuffer::Alloc(BlockSize);
+ }
+ UncompressedBlock = UncompressedBlockCopy.GetMutableView().Mid(0, UncompressedBlockSize);
+ }
+
+ if (!DecompressBlock(UncompressedBlock, CompressedBlock))
{
return false;
}
+
+ if (!IsAligned)
+ {
+ RawView.CopyFrom(UncompressedBlock.Mid(OffsetInFirstBlock, BytesToUncompress));
+ }
+ }
+ else
+ {
+ RawView.CopyFrom(CompressedBlock.Mid(OffsetInFirstBlock, BytesToUncompress));
}
- RemainingCompressedSize -= CompressedBlockSize;
- RemainingRawSize -= RawBlockSize;
+ OffsetInFirstBlock = 0;
+ RemainingRawSize -= BytesToUncompress;
CompressedOffset += CompressedBlockSize;
- RawView += RawBlockSize;
+ RawView += BytesToUncompress;
}
return RemainingRawSize == 0;
@@ -739,7 +775,7 @@ CompressedBuffer::GetRawHash() const
}
bool
-CompressedBuffer::TryDecompressTo(MutableMemoryView RawView) const
+CompressedBuffer::TryDecompressTo(MutableMemoryView RawView, uint64_t RawOffset) const
{
using namespace detail;
if (CompressedData)
@@ -747,27 +783,24 @@ CompressedBuffer::TryDecompressTo(MutableMemoryView RawView) const
const BufferHeader Header = BufferHeader::Read(CompressedData);
if (const BaseDecoder* const Decoder = GetDecoder(Header.Method))
{
- return Decoder->TryDecompressTo(Header, CompressedData, RawView);
+ return Decoder->TryDecompressTo(Header, CompressedData, RawView, RawOffset);
}
}
return false;
}
SharedBuffer
-CompressedBuffer::Decompress() const
+CompressedBuffer::Decompress(uint64_t RawOffset, uint64_t RawSize) const
{
using namespace detail;
- if (CompressedData)
+ if (CompressedData && RawSize > 0)
{
const BufferHeader Header = BufferHeader::Read(CompressedData);
if (const BaseDecoder* const Decoder = GetDecoder(Header.Method))
{
- if (Header.Method == CompressionMethod::None)
- {
- return Decoder->Decompress(Header, CompressedData).Flatten();
- }
- UniqueBuffer RawData = UniqueBuffer::Alloc(Header.TotalRawSize);
- if (Decoder->TryDecompressTo(Header, CompressedData, RawData))
+ const uint64_t TotalRawSize = RawSize < ~uint64_t(0) ? RawSize : Header.TotalRawSize - RawOffset;
+ UniqueBuffer RawData = UniqueBuffer::Alloc(TotalRawSize);
+ if (Decoder->TryDecompressTo(Header, CompressedData, RawData, RawOffset))
{
return RawData.MoveToShared();
}
@@ -904,6 +937,109 @@ TEST_CASE("CompressedBuffer")
CHECK(Decomp.GetSize() == Buffer.GetRawSize());
CHECK(BLAKE3::HashBuffer(Decomp) == BufferD.GetRawHash());
}
+
+ auto GenerateData = [](uint64_t N) -> std::vector<uint64_t> {
+ std::vector<uint64_t> Data;
+ Data.resize(N);
+ for (size_t Idx = 0; Idx < Data.size(); ++Idx)
+ {
+ Data[Idx] = Idx;
+ }
+ return Data;
+ };
+
+ auto ValidateData = [](std::span<uint64_t const> Values, std::span<uint64_t const> ExpectedValues, uint64_t Offset) {
+ for (size_t Idx = Offset; uint64_t Value : Values)
+ {
+ const uint64_t ExpectedValue = ExpectedValues[Idx++];
+ CHECK(Value == ExpectedValue);
+ }
+ };
+
+ SUBCASE("decompress with offset and size")
+ {
+ auto UncompressAndValidate = [&ValidateData](CompressedBuffer Compressed,
+ uint64_t OffsetCount,
+ uint64_t Count,
+ const std::vector<uint64_t>& ExpectedValues) {
+ SharedBuffer Uncompressed = Compressed.Decompress(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t));
+ CHECK(Uncompressed.GetSize() == Count * sizeof(uint64_t));
+ std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t));
+ ValidateData(Values, ExpectedValues, OffsetCount);
+ };
+
+ const uint64_t BlockSize = 64 * sizeof(uint64_t);
+ const uint64_t N = 5000;
+ std::vector<uint64_t> ExpectedValues = GenerateData(N);
+ CompressedBuffer Compressed = CompressedBuffer::Compress(SharedBuffer::MakeView(MakeMemoryView(ExpectedValues)),
+ OodleCompressor::Mermaid,
+ OodleCompressionLevel::Optimal4,
+ BlockSize);
+ UncompressAndValidate(Compressed, 0, N, ExpectedValues);
+ UncompressAndValidate(Compressed, 1, N - 1, ExpectedValues);
+ UncompressAndValidate(Compressed, N - 1, 1, ExpectedValues);
+ UncompressAndValidate(Compressed, 0, 1, ExpectedValues);
+ UncompressAndValidate(Compressed, 2, 4, ExpectedValues);
+ UncompressAndValidate(Compressed, 0, 512, ExpectedValues);
+ UncompressAndValidate(Compressed, 3, 514, ExpectedValues);
+ UncompressAndValidate(Compressed, 256, 512, ExpectedValues);
+ UncompressAndValidate(Compressed, 512, 512, ExpectedValues);
+ }
+
+ SUBCASE("decompress with offset only")
+ {
+ const uint64_t BlockSize = 64 * sizeof(uint64_t);
+ const uint64_t N = 1000;
+ std::vector<uint64_t> ExpectedValues = GenerateData(N);
+ CompressedBuffer Compressed = CompressedBuffer::Compress(SharedBuffer::MakeView(MakeMemoryView(ExpectedValues)),
+ OodleCompressor::Mermaid,
+ OodleCompressionLevel::Optimal4,
+ BlockSize);
+ const uint64_t OffsetCount = 150;
+ SharedBuffer Uncompressed = Compressed.Decompress(OffsetCount * sizeof(uint64_t));
+ std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t));
+ ValidateData(Values, ExpectedValues, OffsetCount);
+ }
+
+ SUBCASE("decompress buffer with one block")
+ {
+ const uint64_t BlockSize = 256 * sizeof(uint64_t);
+ const uint64_t N = 100;
+ std::vector<uint64_t> ExpectedValues = GenerateData(N);
+ CompressedBuffer Compressed = CompressedBuffer::Compress(SharedBuffer::MakeView(MakeMemoryView(ExpectedValues)),
+ OodleCompressor::Mermaid,
+ OodleCompressionLevel::Optimal4,
+ BlockSize);
+ const uint64_t OffsetCount = 2;
+ const uint64_t Count = 50;
+ SharedBuffer Uncompressed = Compressed.Decompress(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t));
+ std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t));
+ ValidateData(Values, ExpectedValues, OffsetCount);
+ }
+
+ SUBCASE("decompress uncompressed buffer")
+ {
+ const uint64_t N = 4242;
+ std::vector<uint64_t> ExpectedValues = GenerateData(N);
+ CompressedBuffer Compressed = CompressedBuffer::Compress(SharedBuffer::MakeView(MakeMemoryView(ExpectedValues)),
+ OodleCompressor::NotSet,
+ OodleCompressionLevel::None);
+ {
+ const uint64_t OffsetCount = 0;
+ const uint64_t Count = N;
+ SharedBuffer Uncompressed = Compressed.Decompress(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t));
+ std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t));
+ ValidateData(Values, ExpectedValues, OffsetCount);
+ }
+
+ {
+ const uint64_t OffsetCount = 21;
+ const uint64_t Count = 999;
+ SharedBuffer Uncompressed = Compressed.Decompress(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t));
+ std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t));
+ ValidateData(Values, ExpectedValues, OffsetCount);
+ }
+ }
}
void