aboutsummaryrefslogtreecommitdiff
path: root/zencore/compress.cpp
diff options
context:
space:
mode:
authorPer Larsson <[email protected]>2021-10-21 08:21:45 +0200
committerGitHub <[email protected]>2021-10-21 08:21:45 +0200
commita63dc510c62830382f243e965be45b705d396879 (patch)
tree63799eb1b05788c93a177a422a94af670aa77b84 /zencore/compress.cpp
parentAdded missing include. (diff)
downloadzen-a63dc510c62830382f243e965be45b705d396879.tar.xz
zen-a63dc510c62830382f243e965be45b705d396879.zip
Compressed oplog attachments
Diffstat (limited to 'zencore/compress.cpp')
-rw-r--r--zencore/compress.cpp193
1 files changed, 193 insertions, 0 deletions
diff --git a/zencore/compress.cpp b/zencore/compress.cpp
index 8d309e010..4a8d116fa 100644
--- a/zencore/compress.cpp
+++ b/zencore/compress.cpp
@@ -690,6 +690,90 @@ ValidBufferOrEmpty(BufferType&& CompressedData)
return BufferHeader::IsValid(CompressedData) ? CompositeBuffer(std::forward<BufferType>(CompressedData)) : CompositeBuffer();
}
+CompositeBuffer
+CopyCompressedRange(const BufferHeader& Header, const CompositeBuffer& CompressedData, uint64_t RawOffset, uint64_t RawSize)
+{
+ if (Header.Method == CompressionMethod::None)
+ {
+ UniqueBuffer NewCompressedData = UniqueBuffer::Alloc(RawSize);
+ CompressedData.CopyTo(NewCompressedData.GetMutableView(), sizeof(Header) + RawOffset);
+
+ BufferHeader NewHeader = Header;
+ NewHeader.Crc32 = 0;
+ NewHeader.TotalRawSize = RawSize;
+ NewHeader.TotalCompressedSize = NewHeader.TotalRawSize + sizeof(BufferHeader);
+ NewHeader.RawHash = BLAKE3();
+
+ UniqueBuffer HeaderData = UniqueBuffer::Alloc(sizeof(BufferHeader));
+ NewHeader.Write(HeaderData);
+
+ return CompositeBuffer(HeaderData.MoveToShared(), NewCompressedData.MoveToShared());
+ }
+ else
+ {
+ UniqueBuffer BlockSizeBuffer;
+ MemoryView BlockSizeView =
+ CompressedData.ViewOrCopyRange(sizeof(BufferHeader), Header.BlockCount * sizeof(uint32_t), BlockSizeBuffer);
+ std::span<uint32_t const> CompressedBlockSizes(reinterpret_cast<const uint32_t*>(BlockSizeView.GetData()), Header.BlockCount);
+
+ const uint64_t BlockSize = uint64_t(1) << Header.BlockSizeExponent;
+ const uint64_t LastBlockSize = BlockSize - ((Header.BlockCount * BlockSize) - Header.TotalRawSize);
+ const size_t FirstBlock = uint64_t(RawOffset / BlockSize);
+ const size_t LastBlock = uint64_t((RawOffset + RawSize - 1) / BlockSize);
+ uint64_t CompressedOffset = sizeof(BufferHeader) + uint64_t(Header.BlockCount) * sizeof(uint32_t);
+
+ const uint64_t NewBlockCount = LastBlock - FirstBlock + 1;
+ const uint64_t NewMetaSize = NewBlockCount * sizeof(uint32_t);
+ uint64_t NewCompressedSize = 0;
+ uint64_t NewTotalRawSize = 0;
+ std::vector<uint32_t> NewCompressedBlockSizes;
+
+ NewCompressedBlockSizes.reserve(NewBlockCount);
+ for (size_t BlockIndex = FirstBlock; BlockIndex <= LastBlock; ++BlockIndex)
+ {
+ const uint64_t UncompressedBlockSize = (BlockIndex == Header.BlockCount - 1) ? LastBlockSize : BlockSize;
+ NewTotalRawSize += UncompressedBlockSize;
+
+ const uint32_t CompressedBlockSize = CompressedBlockSizes[BlockIndex];
+ NewCompressedBlockSizes.push_back(CompressedBlockSize);
+ NewCompressedSize += ByteSwap(CompressedBlockSize);
+ }
+
+ const uint64_t NewTotalCompressedSize = sizeof(BufferHeader) + NewBlockCount * sizeof(uint32_t) + NewCompressedSize;
+ UniqueBuffer NewCompressedData = UniqueBuffer::Alloc(NewTotalCompressedSize);
+ MutableMemoryView NewCompressedBlocks = NewCompressedData.GetMutableView() + sizeof(BufferHeader) + NewMetaSize;
+
+ // Seek to first compressed block
+ for (size_t BlockIndex = 0; BlockIndex < FirstBlock; ++BlockIndex)
+ {
+ const uint64_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]);
+ CompressedOffset += CompressedBlockSize;
+ }
+
+ // Copy blocks
+ UniqueBuffer CompressedBlockCopy;
+ const MemoryView CompressedRange = CompressedData.ViewOrCopyRange(CompressedOffset, NewCompressedSize, CompressedBlockCopy);
+ NewCompressedBlocks.CopyFrom(CompressedRange);
+
+ // Copy block sizes
+ NewCompressedData.GetMutableView().Mid(sizeof(BufferHeader), NewMetaSize).CopyFrom(MakeMemoryView(NewCompressedBlockSizes));
+
+ BufferHeader NewHeader;
+ NewHeader.Crc32 = 0;
+ NewHeader.Method = Header.Method;
+ NewHeader.Compressor = Header.Compressor;
+ NewHeader.CompressionLevel = Header.CompressionLevel;
+ NewHeader.BlockSizeExponent = Header.BlockSizeExponent;
+ NewHeader.BlockCount = static_cast<uint32_t>(NewBlockCount);
+ NewHeader.TotalRawSize = NewTotalRawSize;
+ NewHeader.TotalCompressedSize = NewTotalCompressedSize;
+ NewHeader.RawHash = BLAKE3();
+ NewHeader.Write(NewCompressedData.GetMutableView().Left(sizeof(BufferHeader) + NewMetaSize));
+
+ return CompositeBuffer(NewCompressedData.MoveToShared());
+ }
+}
+
} // namespace zen::detail
namespace zen {
@@ -774,6 +858,17 @@ CompressedBuffer::GetRawHash() const
return CompressedData ? detail::BufferHeader::Read(CompressedData).RawHash : BLAKE3();
}
+CompressedBuffer
+CompressedBuffer::CopyRange(uint64_t RawOffset, uint64_t RawSize) const
+{
+ using namespace detail;
+ const BufferHeader Header = BufferHeader::Read(CompressedData);
+ CompressedBuffer Range;
+ Range.CompressedData = CopyCompressedRange(Header, CompressedData, RawOffset, RawSize);
+
+ return Range;
+}
+
bool
CompressedBuffer::TryDecompressTo(MutableMemoryView RawView, uint64_t RawOffset) const
{
@@ -1040,6 +1135,104 @@ TEST_CASE("CompressedBuffer")
ValidateData(Values, ExpectedValues, OffsetCount);
}
}
+
+ SUBCASE("copy range")
+ {
+ const uint64_t BlockSize = 64 * sizeof(uint64_t);
+ const uint64_t N = 1000;
+ std::vector<uint64_t> ExpectedValues = GenerateData(N);
+
+ CompressedBuffer Compressed = CompressedBuffer::Compress(SharedBuffer::MakeView(MakeMemoryView(ExpectedValues)),
+ OodleCompressor::Mermaid,
+ OodleCompressionLevel::Optimal4,
+ BlockSize);
+
+ {
+ const uint64_t OffsetCount = 0;
+ const uint64_t Count = N;
+ SharedBuffer Uncompressed = Compressed.CopyRange(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t)).Decompress();
+ std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t));
+ CHECK(Values.size() == Count);
+ ValidateData(Values, ExpectedValues, OffsetCount);
+ }
+
+ {
+ const uint64_t OffsetCount = 64;
+ const uint64_t Count = N - 64;
+ SharedBuffer Uncompressed = Compressed.CopyRange(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t)).Decompress();
+ std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t));
+ CHECK(Values.size() == Count);
+ ValidateData(Values, ExpectedValues, OffsetCount);
+ }
+
+ {
+ const uint64_t OffsetCount = 64 * 2 + 32;
+ const uint64_t Count = N - OffsetCount;
+ const uint64_t RawOffset = OffsetCount * sizeof(uint64_t);
+ const uint64_t RawSize = Count * sizeof(uint64_t);
+ uint64_t FirstBlockOffset = RawOffset % BlockSize;
+
+ SharedBuffer Uncompressed = Compressed.CopyRange(RawOffset, RawSize).Decompress();
+ std::span<uint64_t const> AllValues((const uint64_t*)Uncompressed.GetData(), RawSize / sizeof(uint64_t));
+ std::span<uint64_t const> Values((const uint64_t*)(((const uint8_t*)(Uncompressed.GetData()) + FirstBlockOffset)),
+ RawSize / sizeof(uint64_t));
+ CHECK(Values.size() == Count);
+ ValidateData(Values, ExpectedValues, OffsetCount);
+ }
+
+ {
+ const uint64_t OffsetCount = 64 * 2 + 63;
+ const uint64_t Count = N - OffsetCount - 5;
+ const uint64_t RawOffset = OffsetCount * sizeof(uint64_t);
+ const uint64_t RawSize = Count * sizeof(uint64_t);
+ uint64_t FirstBlockOffset = RawOffset % BlockSize;
+
+ SharedBuffer Uncompressed = Compressed.CopyRange(RawOffset, RawSize).Decompress();
+ std::span<uint64_t const> AllValues((const uint64_t*)Uncompressed.GetData(), RawSize / sizeof(uint64_t));
+ std::span<uint64_t const> Values((const uint64_t*)(((const uint8_t*)(Uncompressed.GetData()) + FirstBlockOffset)),
+ RawSize / sizeof(uint64_t));
+ CHECK(Values.size() == Count);
+ ValidateData(Values, ExpectedValues, OffsetCount);
+ }
+ }
+
+ SUBCASE("copy uncompressed range")
+ {
+ const uint64_t BlockSize = 64 * sizeof(uint64_t);
+ const uint64_t N = 1000;
+ std::vector<uint64_t> ExpectedValues = GenerateData(N);
+
+ CompressedBuffer Compressed = CompressedBuffer::Compress(SharedBuffer::MakeView(MakeMemoryView(ExpectedValues)),
+ OodleCompressor::NotSet,
+ OodleCompressionLevel::None);
+
+ {
+ const uint64_t OffsetCount = 0;
+ const uint64_t Count = N;
+ SharedBuffer Uncompressed = Compressed.CopyRange(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t)).Decompress();
+ std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t));
+ CHECK(Values.size() == Count);
+ ValidateData(Values, ExpectedValues, OffsetCount);
+ }
+
+ {
+ const uint64_t OffsetCount = 1;
+ const uint64_t Count = N - OffsetCount;
+ SharedBuffer Uncompressed = Compressed.CopyRange(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t)).Decompress();
+ std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t));
+ CHECK(Values.size() == Count);
+ ValidateData(Values, ExpectedValues, OffsetCount);
+ }
+
+ {
+ const uint64_t OffsetCount = 42;
+ const uint64_t Count = 100;
+ SharedBuffer Uncompressed = Compressed.CopyRange(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t)).Decompress();
+ std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t));
+ CHECK(Values.size() == Count);
+ ValidateData(Values, ExpectedValues, OffsetCount);
+ }
+ }
}
void