diff options
| author | Per Larsson <[email protected]> | 2021-10-21 08:21:45 +0200 |
|---|---|---|
| committer | GitHub <[email protected]> | 2021-10-21 08:21:45 +0200 |
| commit | a63dc510c62830382f243e965be45b705d396879 (patch) | |
| tree | 63799eb1b05788c93a177a422a94af670aa77b84 /zencore | |
| parent | Added missing include. (diff) | |
| download | zen-a63dc510c62830382f243e965be45b705d396879.tar.xz zen-a63dc510c62830382f243e965be45b705d396879.zip | |
Compressed oplog attachments
Diffstat (limited to 'zencore')
| -rw-r--r-- | zencore/compactbinarypackage.cpp | 46 | ||||
| -rw-r--r-- | zencore/compress.cpp | 193 | ||||
| -rw-r--r-- | zencore/include/zencore/compress.h | 2 |
3 files changed, 235 insertions, 6 deletions
diff --git a/zencore/compactbinarypackage.cpp b/zencore/compactbinarypackage.cpp index 88757d47f..f7ce371c8 100644 --- a/zencore/compactbinarypackage.cpp +++ b/zencore/compactbinarypackage.cpp @@ -635,6 +635,11 @@ namespace legacy { Writer.AddBinary(Attachment.AsBinary()); Writer.AddBinaryAttachment(Attachment.GetHash()); } + else if (Attachment.IsCompressedBinary()) + { + Writer.AddBinary(Attachment.AsCompressedBinary().GetCompressed()); + Writer.AddBinaryAttachment(Attachment.GetHash()); + } else if (Attachment.IsNull()) { Writer.AddBinary(MemoryView()); @@ -695,17 +700,32 @@ namespace legacy { SharedBuffer Buffer = SharedBuffer::MakeView(View, ValueField.GetOuterBuffer()).MakeOwned(); CbField HashField = LoadCompactBinary(Reader, Allocator); const IoHash& Hash = HashField.AsAttachment(); - if (HashField.HasError() || IoHash::HashBuffer(Buffer) != Hash) + if (HashField.HasError()) { return false; } - if (HashField.IsObjectAttachment()) + if (CompressedBuffer Compressed = CompressedBuffer::FromCompressed(Buffer)) { - Package.AddAttachment(CbAttachment(CbObject(std::move(Buffer)), Hash)); + if (IoHash::FromBLAKE3(Compressed.GetRawHash()) != Hash) + { + return false; + } + Package.AddAttachment(CbAttachment(Compressed)); } else { - Package.AddAttachment(CbAttachment(CompositeBuffer(std::move(Buffer)), Hash)); + if (IoHash::HashBuffer(Buffer) != Hash) + { + return false; + } + if (HashField.IsObjectAttachment()) + { + Package.AddAttachment(CbAttachment(CbObject(std::move(Buffer)), Hash)); + } + else + { + Package.AddAttachment(CbAttachment(CompositeBuffer(std::move(Buffer)), Hash)); + } } } } @@ -714,8 +734,22 @@ namespace legacy { const IoHash Hash = ValueField.AsHash(); ZEN_ASSERT(Mapper); - - Package.AddAttachment(CbAttachment((*Mapper)(Hash), Hash)); + if (SharedBuffer AttachmentData = (*Mapper)(Hash)) + { + if (CompressedBuffer Compressed = CompressedBuffer::FromCompressed(AttachmentData)) + { + Package.AddAttachment(CbAttachment(Compressed)); + } + else + { + const CbValidateError ValidationResult = ValidateCompactBinary(AttachmentData.GetView(), CbValidateMode::All); + if (ValidationResult != CbValidateError::None) + { + return false; + } + Package.AddAttachment(CbAttachment(CbObject(std::move(AttachmentData)), Hash)); + } + } } else { diff --git a/zencore/compress.cpp b/zencore/compress.cpp index 8d309e010..4a8d116fa 100644 --- a/zencore/compress.cpp +++ b/zencore/compress.cpp @@ -690,6 +690,90 @@ ValidBufferOrEmpty(BufferType&& CompressedData) return BufferHeader::IsValid(CompressedData) ? CompositeBuffer(std::forward<BufferType>(CompressedData)) : CompositeBuffer(); } +CompositeBuffer +CopyCompressedRange(const BufferHeader& Header, const CompositeBuffer& CompressedData, uint64_t RawOffset, uint64_t RawSize) +{ + if (Header.Method == CompressionMethod::None) + { + UniqueBuffer NewCompressedData = UniqueBuffer::Alloc(RawSize); + CompressedData.CopyTo(NewCompressedData.GetMutableView(), sizeof(Header) + RawOffset); + + BufferHeader NewHeader = Header; + NewHeader.Crc32 = 0; + NewHeader.TotalRawSize = RawSize; + NewHeader.TotalCompressedSize = NewHeader.TotalRawSize + sizeof(BufferHeader); + NewHeader.RawHash = BLAKE3(); + + UniqueBuffer HeaderData = UniqueBuffer::Alloc(sizeof(BufferHeader)); + NewHeader.Write(HeaderData); + + return CompositeBuffer(HeaderData.MoveToShared(), NewCompressedData.MoveToShared()); + } + else + { + UniqueBuffer BlockSizeBuffer; + MemoryView BlockSizeView = + CompressedData.ViewOrCopyRange(sizeof(BufferHeader), Header.BlockCount * sizeof(uint32_t), BlockSizeBuffer); + std::span<uint32_t const> CompressedBlockSizes(reinterpret_cast<const uint32_t*>(BlockSizeView.GetData()), Header.BlockCount); + + const uint64_t BlockSize = uint64_t(1) << Header.BlockSizeExponent; + const uint64_t LastBlockSize = BlockSize - ((Header.BlockCount * BlockSize) - Header.TotalRawSize); + const size_t FirstBlock = uint64_t(RawOffset / BlockSize); + const size_t LastBlock = uint64_t((RawOffset + RawSize - 1) / BlockSize); + uint64_t CompressedOffset = sizeof(BufferHeader) + uint64_t(Header.BlockCount) * sizeof(uint32_t); + + const uint64_t NewBlockCount = LastBlock - FirstBlock + 1; + const uint64_t NewMetaSize = NewBlockCount * sizeof(uint32_t); + uint64_t NewCompressedSize = 0; + uint64_t NewTotalRawSize = 0; + std::vector<uint32_t> NewCompressedBlockSizes; + + NewCompressedBlockSizes.reserve(NewBlockCount); + for (size_t BlockIndex = FirstBlock; BlockIndex <= LastBlock; ++BlockIndex) + { + const uint64_t UncompressedBlockSize = (BlockIndex == Header.BlockCount - 1) ? LastBlockSize : BlockSize; + NewTotalRawSize += UncompressedBlockSize; + + const uint32_t CompressedBlockSize = CompressedBlockSizes[BlockIndex]; + NewCompressedBlockSizes.push_back(CompressedBlockSize); + NewCompressedSize += ByteSwap(CompressedBlockSize); + } + + const uint64_t NewTotalCompressedSize = sizeof(BufferHeader) + NewBlockCount * sizeof(uint32_t) + NewCompressedSize; + UniqueBuffer NewCompressedData = UniqueBuffer::Alloc(NewTotalCompressedSize); + MutableMemoryView NewCompressedBlocks = NewCompressedData.GetMutableView() + sizeof(BufferHeader) + NewMetaSize; + + // Seek to first compressed block + for (size_t BlockIndex = 0; BlockIndex < FirstBlock; ++BlockIndex) + { + const uint64_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]); + CompressedOffset += CompressedBlockSize; + } + + // Copy blocks + UniqueBuffer CompressedBlockCopy; + const MemoryView CompressedRange = CompressedData.ViewOrCopyRange(CompressedOffset, NewCompressedSize, CompressedBlockCopy); + NewCompressedBlocks.CopyFrom(CompressedRange); + + // Copy block sizes + NewCompressedData.GetMutableView().Mid(sizeof(BufferHeader), NewMetaSize).CopyFrom(MakeMemoryView(NewCompressedBlockSizes)); + + BufferHeader NewHeader; + NewHeader.Crc32 = 0; + NewHeader.Method = Header.Method; + NewHeader.Compressor = Header.Compressor; + NewHeader.CompressionLevel = Header.CompressionLevel; + NewHeader.BlockSizeExponent = Header.BlockSizeExponent; + NewHeader.BlockCount = static_cast<uint32_t>(NewBlockCount); + NewHeader.TotalRawSize = NewTotalRawSize; + NewHeader.TotalCompressedSize = NewTotalCompressedSize; + NewHeader.RawHash = BLAKE3(); + NewHeader.Write(NewCompressedData.GetMutableView().Left(sizeof(BufferHeader) + NewMetaSize)); + + return CompositeBuffer(NewCompressedData.MoveToShared()); + } +} + } // namespace zen::detail namespace zen { @@ -774,6 +858,17 @@ CompressedBuffer::GetRawHash() const return CompressedData ? detail::BufferHeader::Read(CompressedData).RawHash : BLAKE3(); } +CompressedBuffer +CompressedBuffer::CopyRange(uint64_t RawOffset, uint64_t RawSize) const +{ + using namespace detail; + const BufferHeader Header = BufferHeader::Read(CompressedData); + CompressedBuffer Range; + Range.CompressedData = CopyCompressedRange(Header, CompressedData, RawOffset, RawSize); + + return Range; +} + bool CompressedBuffer::TryDecompressTo(MutableMemoryView RawView, uint64_t RawOffset) const { @@ -1040,6 +1135,104 @@ TEST_CASE("CompressedBuffer") ValidateData(Values, ExpectedValues, OffsetCount); } } + + SUBCASE("copy range") + { + const uint64_t BlockSize = 64 * sizeof(uint64_t); + const uint64_t N = 1000; + std::vector<uint64_t> ExpectedValues = GenerateData(N); + + CompressedBuffer Compressed = CompressedBuffer::Compress(SharedBuffer::MakeView(MakeMemoryView(ExpectedValues)), + OodleCompressor::Mermaid, + OodleCompressionLevel::Optimal4, + BlockSize); + + { + const uint64_t OffsetCount = 0; + const uint64_t Count = N; + SharedBuffer Uncompressed = Compressed.CopyRange(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t)).Decompress(); + std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t)); + CHECK(Values.size() == Count); + ValidateData(Values, ExpectedValues, OffsetCount); + } + + { + const uint64_t OffsetCount = 64; + const uint64_t Count = N - 64; + SharedBuffer Uncompressed = Compressed.CopyRange(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t)).Decompress(); + std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t)); + CHECK(Values.size() == Count); + ValidateData(Values, ExpectedValues, OffsetCount); + } + + { + const uint64_t OffsetCount = 64 * 2 + 32; + const uint64_t Count = N - OffsetCount; + const uint64_t RawOffset = OffsetCount * sizeof(uint64_t); + const uint64_t RawSize = Count * sizeof(uint64_t); + uint64_t FirstBlockOffset = RawOffset % BlockSize; + + SharedBuffer Uncompressed = Compressed.CopyRange(RawOffset, RawSize).Decompress(); + std::span<uint64_t const> AllValues((const uint64_t*)Uncompressed.GetData(), RawSize / sizeof(uint64_t)); + std::span<uint64_t const> Values((const uint64_t*)(((const uint8_t*)(Uncompressed.GetData()) + FirstBlockOffset)), + RawSize / sizeof(uint64_t)); + CHECK(Values.size() == Count); + ValidateData(Values, ExpectedValues, OffsetCount); + } + + { + const uint64_t OffsetCount = 64 * 2 + 63; + const uint64_t Count = N - OffsetCount - 5; + const uint64_t RawOffset = OffsetCount * sizeof(uint64_t); + const uint64_t RawSize = Count * sizeof(uint64_t); + uint64_t FirstBlockOffset = RawOffset % BlockSize; + + SharedBuffer Uncompressed = Compressed.CopyRange(RawOffset, RawSize).Decompress(); + std::span<uint64_t const> AllValues((const uint64_t*)Uncompressed.GetData(), RawSize / sizeof(uint64_t)); + std::span<uint64_t const> Values((const uint64_t*)(((const uint8_t*)(Uncompressed.GetData()) + FirstBlockOffset)), + RawSize / sizeof(uint64_t)); + CHECK(Values.size() == Count); + ValidateData(Values, ExpectedValues, OffsetCount); + } + } + + SUBCASE("copy uncompressed range") + { + const uint64_t BlockSize = 64 * sizeof(uint64_t); + const uint64_t N = 1000; + std::vector<uint64_t> ExpectedValues = GenerateData(N); + + CompressedBuffer Compressed = CompressedBuffer::Compress(SharedBuffer::MakeView(MakeMemoryView(ExpectedValues)), + OodleCompressor::NotSet, + OodleCompressionLevel::None); + + { + const uint64_t OffsetCount = 0; + const uint64_t Count = N; + SharedBuffer Uncompressed = Compressed.CopyRange(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t)).Decompress(); + std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t)); + CHECK(Values.size() == Count); + ValidateData(Values, ExpectedValues, OffsetCount); + } + + { + const uint64_t OffsetCount = 1; + const uint64_t Count = N - OffsetCount; + SharedBuffer Uncompressed = Compressed.CopyRange(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t)).Decompress(); + std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t)); + CHECK(Values.size() == Count); + ValidateData(Values, ExpectedValues, OffsetCount); + } + + { + const uint64_t OffsetCount = 42; + const uint64_t Count = 100; + SharedBuffer Uncompressed = Compressed.CopyRange(OffsetCount * sizeof(uint64_t), Count * sizeof(uint64_t)).Decompress(); + std::span<uint64_t const> Values((const uint64_t*)Uncompressed.GetData(), Uncompressed.GetSize() / sizeof(uint64_t)); + CHECK(Values.size() == Count); + ValidateData(Values, ExpectedValues, OffsetCount); + } + } } void diff --git a/zencore/include/zencore/compress.h b/zencore/include/zencore/compress.h index 426b4981a..d37ecfa79 100644 --- a/zencore/include/zencore/compress.h +++ b/zencore/include/zencore/compress.h @@ -105,6 +105,8 @@ public: /** Returns the hash of the raw data. Zero on error or if this is null. */ [[nodiscard]] ZENCORE_API BLAKE3 GetRawHash() const; + [[nodiscard]] ZENCORE_API CompressedBuffer CopyRange(uint64_t RawOffset, uint64_t RawSize = ~uint64_t(0)) const; + /** * Returns the compressor and compression level used by this buffer. * |