diff options
Diffstat (limited to 'src/zencore/compress.cpp')
| -rw-r--r-- | src/zencore/compress.cpp | 191 |
1 files changed, 138 insertions, 53 deletions
diff --git a/src/zencore/compress.cpp b/src/zencore/compress.cpp index 88c3bb5b9..d9f381811 100644 --- a/src/zencore/compress.cpp +++ b/src/zencore/compress.cpp @@ -7,6 +7,7 @@ #include <zencore/compositebuffer.h> #include <zencore/crc32.h> #include <zencore/endian.h> +#include <zencore/filesystem.h> #include <zencore/intmath.h> #include <zencore/iohash.h> #include <zencore/stream.h> @@ -158,9 +159,10 @@ class BaseEncoder { public: [[nodiscard]] virtual CompositeBuffer Compress(const CompositeBuffer& RawData, uint64_t BlockSize = DefaultBlockSize) const = 0; - [[nodiscard]] virtual bool CompressToStream(const CompositeBuffer& RawData, - std::function<void(uint64_t Offset, const CompositeBuffer& Range)>&& Callback, - uint64_t BlockSize = DefaultBlockSize) const = 0; + [[nodiscard]] virtual bool CompressToStream( + const CompositeBuffer& RawData, + std::function<void(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback, + uint64_t BlockSize = DefaultBlockSize) const = 0; }; class BaseDecoder @@ -189,11 +191,13 @@ public: uint64_t RawOffset, uint64_t RawSize) const = 0; - virtual bool DecompressToStream(const BufferHeader& Header, - const CompositeBuffer& CompressedData, - uint64_t RawOffset, - uint64_t RawSize, - std::function<bool(uint64_t Offset, const CompositeBuffer& Range)>&& Callback) const = 0; + virtual bool DecompressToStream( + const BufferHeader& Header, + const CompositeBuffer& CompressedData, + uint64_t RawOffset, + uint64_t RawSize, + std::function<bool(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback) + const = 0; }; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -207,13 +211,50 @@ public: return CompositeBuffer(HeaderData.MoveToShared(), RawData.MakeOwned()); } - [[nodiscard]] virtual bool CompressToStream(const CompositeBuffer& RawData, - std::function<void(uint64_t Offset, const CompositeBuffer& Range)>&& Callback, - uint64_t /* BlockSize */) const final + [[nodiscard]] virtual bool CompressToStream( + const CompositeBuffer& RawData, + std::function<void(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback, + uint64_t /* BlockSize */) const final { - UniqueBuffer HeaderData = CompressedBuffer::CreateHeaderForNoneEncoder(RawData.GetSize(), BLAKE3::HashBuffer(RawData)); - Callback(0, CompositeBuffer(IoBuffer(IoBuffer::Wrap, HeaderData.GetData(), HeaderData.GetSize()))); - Callback(HeaderData.GetSize(), RawData); + const uint64_t HeaderSize = CompressedBuffer::GetHeaderSizeForNoneEncoder(); + + uint64_t RawOffset = 0; + BLAKE3Stream HashStream; + + for (const SharedBuffer& Segment : RawData.GetSegments()) + { + IoBufferFileReference FileRef = {nullptr, 0, 0}; + IoBuffer SegmentBuffer = Segment.AsIoBuffer(); + if (SegmentBuffer.GetFileReference(FileRef)) + { + ZEN_ASSERT(FileRef.FileHandle != nullptr); + + ScanFile(FileRef.FileHandle, + FileRef.FileChunkOffset, + FileRef.FileChunkSize, + 512u * 1024u, + [&](const void* Data, size_t Size) { + HashStream.Append(Data, Size); + CompositeBuffer Tmp(SharedBuffer::MakeView(Data, Size)); + Callback(RawOffset, Size, HeaderSize + RawOffset, Tmp); + RawOffset += Size; + }); + } + else + { + const uint64_t Size = SegmentBuffer.GetSize(); + HashStream.Append(SegmentBuffer); + Callback(RawOffset, Size, HeaderSize + RawOffset, CompositeBuffer(Segment)); + RawOffset += Size; + } + } + + ZEN_ASSERT(RawOffset == RawData.GetSize()); + + UniqueBuffer HeaderData = CompressedBuffer::CreateHeaderForNoneEncoder(RawData.GetSize(), HashStream.GetHash()); + ZEN_ASSERT(HeaderData.GetSize() == HeaderSize); + Callback(0, 0, 0, CompositeBuffer(HeaderData.MoveToShared())); + return true; } }; @@ -283,21 +324,41 @@ public: [[nodiscard]] uint64_t GetHeaderSize(const BufferHeader&) const final { return sizeof(BufferHeader); } - virtual bool DecompressToStream(const BufferHeader& Header, - const CompositeBuffer& CompressedData, - uint64_t RawOffset, - uint64_t RawSize, - std::function<bool(uint64_t Offset, const CompositeBuffer& Range)>&& Callback) const final + virtual bool DecompressToStream( + const BufferHeader& Header, + const CompositeBuffer& CompressedData, + uint64_t RawOffset, + uint64_t RawSize, + std::function<bool(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback) + const final { if (Header.Method == CompressionMethod::None && Header.TotalCompressedSize == CompressedData.GetSize() && Header.TotalCompressedSize == Header.TotalRawSize + sizeof(BufferHeader) && RawOffset < Header.TotalRawSize && (RawOffset + RawSize) <= Header.TotalRawSize) { - if (!Callback(0, CompressedData.Mid(sizeof(BufferHeader) + RawOffset, RawSize))) + bool Result = true; + IoBufferFileReference FileRef = {nullptr, 0, 0}; + if ((CompressedData.GetSegments().size() == 1) && CompressedData.GetSegments()[0].AsIoBuffer().GetFileReference(FileRef)) { - return false; + ZEN_ASSERT(FileRef.FileHandle != nullptr); + uint64_t CallbackOffset = 0; + ScanFile(FileRef.FileHandle, sizeof(BufferHeader) + RawOffset, RawSize, 512u * 1024u, [&](const void* Data, size_t Size) { + if (Result) + { + CompositeBuffer Tmp(SharedBuffer::MakeView(Data, Size)); + Result = Callback(sizeof(BufferHeader) + RawOffset + CallbackOffset, Size, CallbackOffset, Tmp); + } + CallbackOffset += Size; + }); + return Result; + } + else + { + return Callback(sizeof(BufferHeader) + RawOffset, + RawSize, + 0, + CompressedData.Mid(sizeof(BufferHeader) + RawOffset, RawSize)); } - return true; } return false; } @@ -309,9 +370,10 @@ class BlockEncoder : public BaseEncoder { public: virtual CompositeBuffer Compress(const CompositeBuffer& RawData, uint64_t BlockSize) const final; - virtual bool CompressToStream(const CompositeBuffer& RawData, - std::function<void(uint64_t Offset, const CompositeBuffer& Range)>&& Callback, - uint64_t BlockSize) const final; + virtual bool CompressToStream( + const CompositeBuffer& RawData, + std::function<void(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback, + uint64_t BlockSize) const final; protected: virtual CompressionMethod GetMethod() const = 0; @@ -460,9 +522,10 @@ BlockEncoder::Compress(const CompositeBuffer& RawData, const uint64_t BlockSize) } bool -BlockEncoder::CompressToStream(const CompositeBuffer& RawData, - std::function<void(uint64_t Offset, const CompositeBuffer& Range)>&& Callback, - uint64_t BlockSize = DefaultBlockSize) const +BlockEncoder::CompressToStream( + const CompositeBuffer& RawData, + std::function<void(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback, + uint64_t BlockSize = DefaultBlockSize) const { ZEN_ASSERT(IsPow2(BlockSize) && (BlockSize <= (1u << 31))); @@ -504,13 +567,17 @@ BlockEncoder::CompressToStream(const CompositeBuffer& RawData, uint64_t CompressedBlockSize = CompressedBlock.GetSize(); if (RawBlockSize <= CompressedBlockSize) { - Callback(FullHeaderSize + CompressedSize, + Callback(FileRef.FileChunkOffset + RawOffset, + RawBlockSize, + FullHeaderSize + CompressedSize, CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawBlockCopy.GetView().GetData(), RawBlockSize))); CompressedBlockSize = RawBlockSize; } else { - Callback(FullHeaderSize + CompressedSize, + Callback(FileRef.FileChunkOffset + RawOffset, + RawBlockSize, + FullHeaderSize + CompressedSize, CompositeBuffer(IoBuffer(IoBuffer::Wrap, CompressedBlock.GetData(), CompressedBlockSize))); } @@ -540,12 +607,17 @@ BlockEncoder::CompressToStream(const CompositeBuffer& RawData, uint64_t CompressedBlockSize = CompressedBlock.GetSize(); if (RawBlockSize <= CompressedBlockSize) { - Callback(FullHeaderSize + CompressedSize, CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawBlock.GetData(), RawBlockSize))); + Callback(RawOffset, + RawBlockSize, + FullHeaderSize + CompressedSize, + CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawBlock.GetData(), RawBlockSize))); CompressedBlockSize = RawBlockSize; } else { - Callback(FullHeaderSize + CompressedSize, + Callback(RawOffset, + RawBlockSize, + FullHeaderSize + CompressedSize, CompositeBuffer(IoBuffer(IoBuffer::Wrap, CompressedBlock.GetData(), CompressedBlockSize))); } @@ -582,7 +654,7 @@ BlockEncoder::CompressToStream(const CompositeBuffer& RawData, HeaderBuffer.GetMutableView().Mid(sizeof(BufferHeader), MetaSize).CopyFrom(MakeMemoryView(CompressedBlockSizes)); Header.Write(HeaderBuffer.GetMutableView()); - Callback(0, CompositeBuffer(IoBuffer(IoBuffer::Wrap, HeaderBuffer.GetData(), HeaderBuffer.GetSize()))); + Callback(0, 0, 0, CompositeBuffer(IoBuffer(IoBuffer::Wrap, HeaderBuffer.GetData(), HeaderBuffer.GetSize()))); return true; } @@ -615,11 +687,13 @@ public: MutableMemoryView RawView, uint64_t RawOffset) const final; - virtual bool DecompressToStream(const BufferHeader& Header, - const CompositeBuffer& CompressedData, - uint64_t RawOffset, - uint64_t RawSize, - std::function<bool(uint64_t Offset, const CompositeBuffer& Range)>&& Callback) const final; + virtual bool DecompressToStream( + const BufferHeader& Header, + const CompositeBuffer& CompressedData, + uint64_t RawOffset, + uint64_t RawSize, + std::function<bool(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback) + const final; protected: virtual bool DecompressBlock(MutableMemoryView RawData, MemoryView CompressedData) const = 0; @@ -743,11 +817,12 @@ BlockDecoder::DecompressToComposite(const BufferHeader& Header, const CompositeB } bool -BlockDecoder::DecompressToStream(const BufferHeader& Header, - const CompositeBuffer& CompressedData, - uint64_t RawOffset, - uint64_t RawSize, - std::function<bool(uint64_t Offset, const CompositeBuffer& Range)>&& Callback) const +BlockDecoder::DecompressToStream( + const BufferHeader& Header, + const CompositeBuffer& CompressedData, + uint64_t RawOffset, + uint64_t RawSize, + std::function<bool(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback) const { if (Header.TotalCompressedSize != CompressedData.GetSize()) { @@ -817,7 +892,9 @@ BlockDecoder::DecompressToStream(const BufferHeader& Header, Source.Detach(); return false; } - if (!Callback(BlockIndex * BlockSize + OffsetInFirstBlock, + if (!Callback(FileRef.FileChunkOffset + CompressedOffset, + CompressedBlockSize, + BlockIndex * BlockSize + OffsetInFirstBlock, CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawDataBuffer.GetData(), BytesToUncompress)))) { Source.Detach(); @@ -827,6 +904,8 @@ BlockDecoder::DecompressToStream(const BufferHeader& Header, else { if (!Callback( + FileRef.FileChunkOffset + CompressedOffset, + BytesToUncompress, BlockIndex * BlockSize + OffsetInFirstBlock, CompositeBuffer( IoBuffer(IoBuffer::Wrap, CompressedBlockCopy.GetView().Mid(OffsetInFirstBlock).GetData(), BytesToUncompress)))) @@ -870,7 +949,9 @@ BlockDecoder::DecompressToStream(const BufferHeader& Header, { return false; } - if (!Callback(BlockIndex * BlockSize + OffsetInFirstBlock, + if (!Callback(CompressedOffset, + UncompressedBlockSize, + BlockIndex * BlockSize + OffsetInFirstBlock, CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawDataBuffer.GetData(), BytesToUncompress)))) { return false; @@ -879,6 +960,8 @@ BlockDecoder::DecompressToStream(const BufferHeader& Header, else { if (!Callback( + CompressedOffset, + BytesToUncompress, BlockIndex * BlockSize + OffsetInFirstBlock, CompositeBuffer( IoBuffer(IoBuffer::Wrap, CompressedBlockCopy.GetView().Mid(OffsetInFirstBlock).GetData(), BytesToUncompress)))) @@ -1778,11 +1861,12 @@ CompressedBuffer::Compress(const SharedBuffer& RawData, } bool -CompressedBuffer::CompressToStream(const CompositeBuffer& RawData, - std::function<void(uint64_t Offset, const CompositeBuffer& Range)>&& Callback, - OodleCompressor Compressor, - OodleCompressionLevel CompressionLevel, - uint64_t BlockSize) +CompressedBuffer::CompressToStream( + const CompositeBuffer& RawData, + std::function<void(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback, + OodleCompressor Compressor, + OodleCompressionLevel CompressionLevel, + uint64_t BlockSize) { using namespace detail; @@ -1995,9 +2079,10 @@ CompressedBuffer::DecompressToComposite() const } bool -CompressedBuffer::DecompressToStream(uint64_t RawOffset, - uint64_t RawSize, - std::function<bool(uint64_t Offset, const CompositeBuffer& Range)>&& Callback) const +CompressedBuffer::DecompressToStream( + uint64_t RawOffset, + uint64_t RawSize, + std::function<bool(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback) const { using namespace detail; if (CompressedData) |