aboutsummaryrefslogtreecommitdiff
path: root/src/zencore/compress.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2024-03-20 15:13:03 +0100
committerGitHub Enterprise <[email protected]>2024-03-20 15:13:03 +0100
commitd6071e029b7cb9eec6abfa612b16abc16c84e6a3 (patch)
tree21745ab3bb73594a56b2fc548022d900df8ea62f /src/zencore/compress.cpp
parentremove hv tags on actions since they are no longer useful (diff)
downloadzen-d6071e029b7cb9eec6abfa612b16abc16c84e6a3.tar.xz
zen-d6071e029b7cb9eec6abfa612b16abc16c84e6a3.zip
non memory copy compressed range (#13)
* Add CompressedBuffer::GetRange that references source data rather than make a memory copy * Use Compressed.CopyRange in project store GetChunkRange * docs for CompressedBuffer::CopyRange and CompressedBuffer::GetRange
Diffstat (limited to 'src/zencore/compress.cpp')
-rw-r--r--src/zencore/compress.cpp192
1 files changed, 192 insertions, 0 deletions
diff --git a/src/zencore/compress.cpp b/src/zencore/compress.cpp
index a8e8a79f4..58be65f13 100644
--- a/src/zencore/compress.cpp
+++ b/src/zencore/compress.cpp
@@ -1097,6 +1097,88 @@ ValidBufferOrEmpty(BufferType&& CompressedData, IoHash& OutRawHash, uint64_t& Ou
}
CompositeBuffer
+GetCompressedRange(const BufferHeader& Header, const CompositeBuffer& CompressedData, uint64_t RawOffset, uint64_t RawSize)
+{
+ if (Header.TotalRawSize < RawOffset + RawSize)
+ {
+ return CompositeBuffer();
+ }
+ if (Header.Method == CompressionMethod::None)
+ {
+ BufferHeader NewHeader = Header;
+ NewHeader.Crc32 = 0;
+ NewHeader.TotalRawSize = RawSize;
+ NewHeader.TotalCompressedSize = NewHeader.TotalRawSize + sizeof(BufferHeader);
+ NewHeader.RawHash = BLAKE3();
+
+ UniqueBuffer HeaderData = UniqueBuffer::Alloc(sizeof(BufferHeader));
+ NewHeader.Write(HeaderData);
+
+ return CompositeBuffer(HeaderData.MoveToShared(), CompressedData.Mid(sizeof(BufferHeader) + RawOffset, RawSize).MakeOwned());
+ }
+ else
+ {
+ UniqueBuffer BlockSizeBuffer;
+ MemoryView BlockSizeView =
+ CompressedData.ViewOrCopyRange(sizeof(BufferHeader), Header.BlockCount * sizeof(uint32_t), BlockSizeBuffer);
+ std::span<uint32_t const> CompressedBlockSizes(reinterpret_cast<const uint32_t*>(BlockSizeView.GetData()), Header.BlockCount);
+
+ const uint64_t BlockSize = uint64_t(1) << Header.BlockSizeExponent;
+ const uint64_t LastBlockSize = BlockSize - ((Header.BlockCount * BlockSize) - Header.TotalRawSize);
+ const size_t FirstBlock = uint64_t(RawOffset / BlockSize);
+ const size_t LastBlock = uint64_t((RawOffset + RawSize - 1) / BlockSize);
+ uint64_t CompressedOffset = sizeof(BufferHeader) + uint64_t(Header.BlockCount) * sizeof(uint32_t);
+
+ const uint64_t NewBlockCount = LastBlock - FirstBlock + 1;
+ const uint64_t NewMetaSize = NewBlockCount * sizeof(uint32_t);
+ uint64_t NewCompressedSize = 0;
+ uint64_t NewTotalRawSize = 0;
+ std::vector<uint32_t> NewCompressedBlockSizes;
+
+ NewCompressedBlockSizes.reserve(NewBlockCount);
+ for (size_t BlockIndex = FirstBlock; BlockIndex <= LastBlock; ++BlockIndex)
+ {
+ const uint64_t UncompressedBlockSize = (BlockIndex == Header.BlockCount - 1) ? LastBlockSize : BlockSize;
+ NewTotalRawSize += UncompressedBlockSize;
+
+ const uint32_t CompressedBlockSize = CompressedBlockSizes[BlockIndex];
+ NewCompressedBlockSizes.push_back(CompressedBlockSize);
+ NewCompressedSize += ByteSwap(CompressedBlockSize);
+ }
+
+ const uint64_t NewTotalCompressedSize = sizeof(BufferHeader) + NewBlockCount * sizeof(uint32_t) + NewCompressedSize;
+ const uint64_t NewCompressedHeaderSize = sizeof(BufferHeader) + NewBlockCount * sizeof(uint32_t);
+ UniqueBuffer NewCompressedHeaderData = UniqueBuffer::Alloc(NewCompressedHeaderSize);
+
+ // Seek to first compressed block
+ for (size_t BlockIndex = 0; BlockIndex < FirstBlock; ++BlockIndex)
+ {
+ const uint64_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]);
+ CompressedOffset += CompressedBlockSize;
+ }
+
+ CompositeBuffer NewCompressedData = CompressedData.Mid(CompressedOffset, NewCompressedSize).MakeOwned();
+
+ // Copy block sizes
+ NewCompressedHeaderData.GetMutableView().Mid(sizeof(BufferHeader), NewMetaSize).CopyFrom(MakeMemoryView(NewCompressedBlockSizes));
+
+ BufferHeader NewHeader;
+ NewHeader.Crc32 = 0;
+ NewHeader.Method = Header.Method;
+ NewHeader.Compressor = Header.Compressor;
+ NewHeader.CompressionLevel = Header.CompressionLevel;
+ NewHeader.BlockSizeExponent = Header.BlockSizeExponent;
+ NewHeader.BlockCount = static_cast<uint32_t>(NewBlockCount);
+ NewHeader.TotalRawSize = NewTotalRawSize;
+ NewHeader.TotalCompressedSize = NewTotalCompressedSize;
+ NewHeader.RawHash = BLAKE3();
+ NewHeader.Write(NewCompressedHeaderData.GetMutableView().Left(sizeof(BufferHeader) + NewMetaSize));
+
+ return CompositeBuffer(NewCompressedHeaderData.MoveToShared(), NewCompressedData);
+ }
+}
+
+CompositeBuffer
CopyCompressedRange(const BufferHeader& Header, const CompositeBuffer& CompressedData, uint64_t RawOffset, uint64_t RawSize)
{
if (Header.TotalRawSize < RawOffset + RawSize)
@@ -1338,6 +1420,19 @@ CompressedBuffer::CopyRange(uint64_t RawOffset, uint64_t RawSize) const
return Range;
}
+CompressedBuffer
+CompressedBuffer::GetRange(uint64_t RawOffset, uint64_t RawSize) const
+{
+ using namespace detail;
+ const BufferHeader Header = BufferHeader::Read(CompressedData);
+ const uint64_t TotalRawSize = RawSize < ~uint64_t(0) ? RawSize : Header.TotalRawSize - RawOffset;
+
+ CompressedBuffer Range;
+ Range.CompressedData = GetCompressedRange(Header, CompressedData, RawOffset, TotalRawSize);
+
+ return Range;
+}
+
bool
CompressedBuffer::TryDecompressTo(MutableMemoryView RawView, uint64_t RawOffset) const
{
@@ -1920,6 +2015,66 @@ TEST_CASE("CompressedBuffer")
}
}
+ SUBCASE("get range")
+ {
+ const uint64_t BlockSize = 64 * sizeof(uint64_t);
+ const uint64_t N = 1000;
+ std::vector<uint64_t> ExpectedValues = GenerateData(N);
+
+ CompressedBuffer Compressed = CompressedBuffer::Compress(SharedBuffer::MakeView(MakeMemoryView(ExpectedValues)),
+ OodleCompressor::Mermaid,
+ OodleCompressionLevel::Optimal4,
+ BlockSize);
+
+ {
+ const uint64_t OffsetCount = 0;
+ const uint64_t Count = N;
+ SharedBuffer Uncompressed = Compressed.GetRange(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t)).Decompress();
+ std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t));
+ CHECK(Values.size() == Count);
+ ValidateData(Values, ExpectedValues, OffsetCount);
+ }
+
+ {
+ const uint64_t OffsetCount = 64;
+ const uint64_t Count = N - 64;
+ SharedBuffer Uncompressed = Compressed.GetRange(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t)).Decompress();
+ std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t));
+ CHECK(Values.size() == Count);
+ ValidateData(Values, ExpectedValues, OffsetCount);
+ }
+
+ {
+ const uint64_t OffsetCount = 64 * 2 + 32;
+ const uint64_t Count = N - OffsetCount;
+ const uint64_t RawOffset = OffsetCount * sizeof(uint64_t);
+ const uint64_t RawSize = Count * sizeof(uint64_t);
+ uint64_t FirstBlockOffset = RawOffset % BlockSize;
+
+ SharedBuffer Uncompressed = Compressed.GetRange(RawOffset, RawSize).Decompress();
+ std::span<uint64_t const> AllValues((const uint64_t*)Uncompressed.GetData(), RawSize / sizeof(uint64_t));
+ std::span<uint64_t const> Values((const uint64_t*)(((const uint8_t*)(Uncompressed.GetData()) + FirstBlockOffset)),
+ RawSize / sizeof(uint64_t));
+ CHECK(Values.size() == Count);
+ ValidateData(Values, ExpectedValues, OffsetCount);
+ }
+
+ {
+ const uint64_t OffsetCount = 64 * 2 + 63;
+ const uint64_t Count = N - OffsetCount - 5;
+ const uint64_t RawOffset = OffsetCount * sizeof(uint64_t);
+ const uint64_t RawSize = Count * sizeof(uint64_t);
+ uint64_t FirstBlockOffset = RawOffset % BlockSize;
+
+ SharedBuffer Uncompressed = Compressed.GetRange(RawOffset, RawSize).Decompress();
+ std::span<uint64_t const> AllValues((const uint64_t*)Uncompressed.GetData(), RawSize / sizeof(uint64_t));
+ std::span<uint64_t const> Values((const uint64_t*)(((const uint8_t*)(Uncompressed.GetData()) + FirstBlockOffset)),
+ RawSize / sizeof(uint64_t));
+ CHECK(Values.size() == Count);
+ ValidateData(Values, ExpectedValues, OffsetCount);
+ }
+ }
+
SUBCASE("copy uncompressed range")
{
const uint64_t N = 1000;
@@ -1956,6 +2111,43 @@ TEST_CASE("CompressedBuffer")
ValidateData(Values, ExpectedValues, OffsetCount);
}
}
+
+ SUBCASE("get uncompressed range")
+ {
+ const uint64_t N = 1000;
+ std::vector<uint64_t> ExpectedValues = GenerateData(N);
+
+ CompressedBuffer Compressed = CompressedBuffer::Compress(SharedBuffer::MakeView(MakeMemoryView(ExpectedValues)),
+ OodleCompressor::NotSet,
+ OodleCompressionLevel::None);
+
+ {
+ const uint64_t OffsetCount = 0;
+ const uint64_t Count = N;
+ SharedBuffer Uncompressed = Compressed.GetRange(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t)).Decompress();
+ std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t));
+ CHECK(Values.size() == Count);
+ ValidateData(Values, ExpectedValues, OffsetCount);
+ }
+
+ {
+ const uint64_t OffsetCount = 1;
+ const uint64_t Count = N - OffsetCount;
+ SharedBuffer Uncompressed = Compressed.GetRange(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t)).Decompress();
+ std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t));
+ CHECK(Values.size() == Count);
+ ValidateData(Values, ExpectedValues, OffsetCount);
+ }
+
+ {
+ const uint64_t OffsetCount = 42;
+ const uint64_t Count = 100;
+ SharedBuffer Uncompressed = Compressed.GetRange(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t)).Decompress();
+ std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t));
+ CHECK(Values.size() == Count);
+ ValidateData(Values, ExpectedValues, OffsetCount);
+ }
+ }
}
TEST_CASE("CompressedBufferReader")