diff options
| author | Dan Engelbrecht <[email protected]> | 2024-03-20 15:13:03 +0100 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2024-03-20 15:13:03 +0100 |
| commit | d6071e029b7cb9eec6abfa612b16abc16c84e6a3 (patch) | |
| tree | 21745ab3bb73594a56b2fc548022d900df8ea62f /src/zencore/compress.cpp | |
| parent | remove hv tags on actions since they are no longer useful (diff) | |
| download | zen-d6071e029b7cb9eec6abfa612b16abc16c84e6a3.tar.xz zen-d6071e029b7cb9eec6abfa612b16abc16c84e6a3.zip | |
non memory copy compressed range (#13)
* Add CompressedBuffer::GetRange that references source data rather than make a memory copy
* Use Compressed.CopyRange in project store GetChunkRange
* docs for CompressedBuffer::CopyRange and CompressedBuffer::GetRange
Diffstat (limited to 'src/zencore/compress.cpp')
| -rw-r--r-- | src/zencore/compress.cpp | 192 |
1 files changed, 192 insertions, 0 deletions
diff --git a/src/zencore/compress.cpp b/src/zencore/compress.cpp index a8e8a79f4..58be65f13 100644 --- a/src/zencore/compress.cpp +++ b/src/zencore/compress.cpp @@ -1097,6 +1097,88 @@ ValidBufferOrEmpty(BufferType&& CompressedData, IoHash& OutRawHash, uint64_t& Ou } CompositeBuffer +GetCompressedRange(const BufferHeader& Header, const CompositeBuffer& CompressedData, uint64_t RawOffset, uint64_t RawSize) +{ + if (Header.TotalRawSize < RawOffset + RawSize) + { + return CompositeBuffer(); + } + if (Header.Method == CompressionMethod::None) + { + BufferHeader NewHeader = Header; + NewHeader.Crc32 = 0; + NewHeader.TotalRawSize = RawSize; + NewHeader.TotalCompressedSize = NewHeader.TotalRawSize + sizeof(BufferHeader); + NewHeader.RawHash = BLAKE3(); + + UniqueBuffer HeaderData = UniqueBuffer::Alloc(sizeof(BufferHeader)); + NewHeader.Write(HeaderData); + + return CompositeBuffer(HeaderData.MoveToShared(), CompressedData.Mid(sizeof(BufferHeader) + RawOffset, RawSize).MakeOwned()); + } + else + { + UniqueBuffer BlockSizeBuffer; + MemoryView BlockSizeView = + CompressedData.ViewOrCopyRange(sizeof(BufferHeader), Header.BlockCount * sizeof(uint32_t), BlockSizeBuffer); + std::span<uint32_t const> CompressedBlockSizes(reinterpret_cast<const uint32_t*>(BlockSizeView.GetData()), Header.BlockCount); + + const uint64_t BlockSize = uint64_t(1) << Header.BlockSizeExponent; + const uint64_t LastBlockSize = BlockSize - ((Header.BlockCount * BlockSize) - Header.TotalRawSize); + const size_t FirstBlock = uint64_t(RawOffset / BlockSize); + const size_t LastBlock = uint64_t((RawOffset + RawSize - 1) / BlockSize); + uint64_t CompressedOffset = sizeof(BufferHeader) + uint64_t(Header.BlockCount) * sizeof(uint32_t); + + const uint64_t NewBlockCount = LastBlock - FirstBlock + 1; + const uint64_t NewMetaSize = NewBlockCount * sizeof(uint32_t); + uint64_t NewCompressedSize = 0; + uint64_t NewTotalRawSize = 0; + std::vector<uint32_t> NewCompressedBlockSizes; + + NewCompressedBlockSizes.reserve(NewBlockCount); + for (size_t BlockIndex = FirstBlock; BlockIndex <= LastBlock; ++BlockIndex) + { + const uint64_t UncompressedBlockSize = (BlockIndex == Header.BlockCount - 1) ? LastBlockSize : BlockSize; + NewTotalRawSize += UncompressedBlockSize; + + const uint32_t CompressedBlockSize = CompressedBlockSizes[BlockIndex]; + NewCompressedBlockSizes.push_back(CompressedBlockSize); + NewCompressedSize += ByteSwap(CompressedBlockSize); + } + + const uint64_t NewTotalCompressedSize = sizeof(BufferHeader) + NewBlockCount * sizeof(uint32_t) + NewCompressedSize; + const uint64_t NewCompressedHeaderSize = sizeof(BufferHeader) + NewBlockCount * sizeof(uint32_t); + UniqueBuffer NewCompressedHeaderData = UniqueBuffer::Alloc(NewCompressedHeaderSize); + + // Seek to first compressed block + for (size_t BlockIndex = 0; BlockIndex < FirstBlock; ++BlockIndex) + { + const uint64_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]); + CompressedOffset += CompressedBlockSize; + } + + CompositeBuffer NewCompressedData = CompressedData.Mid(CompressedOffset, NewCompressedSize).MakeOwned(); + + // Copy block sizes + NewCompressedHeaderData.GetMutableView().Mid(sizeof(BufferHeader), NewMetaSize).CopyFrom(MakeMemoryView(NewCompressedBlockSizes)); + + BufferHeader NewHeader; + NewHeader.Crc32 = 0; + NewHeader.Method = Header.Method; + NewHeader.Compressor = Header.Compressor; + NewHeader.CompressionLevel = Header.CompressionLevel; + NewHeader.BlockSizeExponent = Header.BlockSizeExponent; + NewHeader.BlockCount = static_cast<uint32_t>(NewBlockCount); + NewHeader.TotalRawSize = NewTotalRawSize; + NewHeader.TotalCompressedSize = NewTotalCompressedSize; + NewHeader.RawHash = BLAKE3(); + NewHeader.Write(NewCompressedHeaderData.GetMutableView().Left(sizeof(BufferHeader) + NewMetaSize)); + + return CompositeBuffer(NewCompressedHeaderData.MoveToShared(), NewCompressedData); + } +} + +CompositeBuffer CopyCompressedRange(const BufferHeader& Header, const CompositeBuffer& CompressedData, uint64_t RawOffset, uint64_t RawSize) { if (Header.TotalRawSize < RawOffset + RawSize) @@ -1338,6 +1420,19 @@ CompressedBuffer::CopyRange(uint64_t RawOffset, uint64_t RawSize) const return Range; } +CompressedBuffer +CompressedBuffer::GetRange(uint64_t RawOffset, uint64_t RawSize) const +{ + using namespace detail; + const BufferHeader Header = BufferHeader::Read(CompressedData); + const uint64_t TotalRawSize = RawSize < ~uint64_t(0) ? RawSize : Header.TotalRawSize - RawOffset; + + CompressedBuffer Range; + Range.CompressedData = GetCompressedRange(Header, CompressedData, RawOffset, TotalRawSize); + + return Range; +} + bool CompressedBuffer::TryDecompressTo(MutableMemoryView RawView, uint64_t RawOffset) const { @@ -1920,6 +2015,66 @@ TEST_CASE("CompressedBuffer") } } + SUBCASE("get range") + { + const uint64_t BlockSize = 64 * sizeof(uint64_t); + const uint64_t N = 1000; + std::vector<uint64_t> ExpectedValues = GenerateData(N); + + CompressedBuffer Compressed = CompressedBuffer::Compress(SharedBuffer::MakeView(MakeMemoryView(ExpectedValues)), + OodleCompressor::Mermaid, + OodleCompressionLevel::Optimal4, + BlockSize); + + { + const uint64_t OffsetCount = 0; + const uint64_t Count = N; + SharedBuffer Uncompressed = Compressed.GetRange(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t)).Decompress(); + std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t)); + CHECK(Values.size() == Count); + ValidateData(Values, ExpectedValues, OffsetCount); + } + + { + const uint64_t OffsetCount = 64; + const uint64_t Count = N - 64; + SharedBuffer Uncompressed = Compressed.GetRange(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t)).Decompress(); + std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t)); + CHECK(Values.size() == Count); + ValidateData(Values, ExpectedValues, OffsetCount); + } + + { + const uint64_t OffsetCount = 64 * 2 + 32; + const uint64_t Count = N - OffsetCount; + const uint64_t RawOffset = OffsetCount * sizeof(uint64_t); + const uint64_t RawSize = Count * sizeof(uint64_t); + uint64_t FirstBlockOffset = RawOffset % BlockSize; + + SharedBuffer Uncompressed = Compressed.GetRange(RawOffset, RawSize).Decompress(); + std::span<uint64_t const> AllValues((const uint64_t*)Uncompressed.GetData(), RawSize / sizeof(uint64_t)); + std::span<uint64_t const> Values((const uint64_t*)(((const uint8_t*)(Uncompressed.GetData()) + FirstBlockOffset)), + RawSize / sizeof(uint64_t)); + CHECK(Values.size() == Count); + ValidateData(Values, ExpectedValues, OffsetCount); + } + + { + const uint64_t OffsetCount = 64 * 2 + 63; + const uint64_t Count = N - OffsetCount - 5; + const uint64_t RawOffset = OffsetCount * sizeof(uint64_t); + const uint64_t RawSize = Count * sizeof(uint64_t); + uint64_t FirstBlockOffset = RawOffset % BlockSize; + + SharedBuffer Uncompressed = Compressed.GetRange(RawOffset, RawSize).Decompress(); + std::span<uint64_t const> AllValues((const uint64_t*)Uncompressed.GetData(), RawSize / sizeof(uint64_t)); + std::span<uint64_t const> Values((const uint64_t*)(((const uint8_t*)(Uncompressed.GetData()) + FirstBlockOffset)), + RawSize / sizeof(uint64_t)); + CHECK(Values.size() == Count); + ValidateData(Values, ExpectedValues, OffsetCount); + } + } + SUBCASE("copy uncompressed range") { const uint64_t N = 1000; @@ -1956,6 +2111,43 @@ TEST_CASE("CompressedBuffer") ValidateData(Values, ExpectedValues, OffsetCount); } } + + SUBCASE("get uncompressed range") + { + const uint64_t N = 1000; + std::vector<uint64_t> ExpectedValues = GenerateData(N); + + CompressedBuffer Compressed = CompressedBuffer::Compress(SharedBuffer::MakeView(MakeMemoryView(ExpectedValues)), + OodleCompressor::NotSet, + OodleCompressionLevel::None); + + { + const uint64_t OffsetCount = 0; + const uint64_t Count = N; + SharedBuffer Uncompressed = Compressed.GetRange(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t)).Decompress(); + std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t)); + CHECK(Values.size() == Count); + ValidateData(Values, ExpectedValues, OffsetCount); + } + + { + const uint64_t OffsetCount = 1; + const uint64_t Count = N - OffsetCount; + SharedBuffer Uncompressed = Compressed.GetRange(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t)).Decompress(); + std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t)); + CHECK(Values.size() == Count); + ValidateData(Values, ExpectedValues, OffsetCount); + } + + { + const uint64_t OffsetCount = 42; + const uint64_t Count = 100; + SharedBuffer Uncompressed = Compressed.GetRange(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t)).Decompress(); + std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t)); + CHECK(Values.size() == Count); + ValidateData(Values, ExpectedValues, OffsetCount); + } + } } TEST_CASE("CompressedBufferReader") |