aboutsummaryrefslogtreecommitdiff
path: root/src/zencore/compress.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/zencore/compress.cpp')
-rw-r--r--src/zencore/compress.cpp191
1 files changed, 138 insertions, 53 deletions
diff --git a/src/zencore/compress.cpp b/src/zencore/compress.cpp
index 88c3bb5b9..d9f381811 100644
--- a/src/zencore/compress.cpp
+++ b/src/zencore/compress.cpp
@@ -7,6 +7,7 @@
#include <zencore/compositebuffer.h>
#include <zencore/crc32.h>
#include <zencore/endian.h>
+#include <zencore/filesystem.h>
#include <zencore/intmath.h>
#include <zencore/iohash.h>
#include <zencore/stream.h>
@@ -158,9 +159,10 @@ class BaseEncoder
{
public:
[[nodiscard]] virtual CompositeBuffer Compress(const CompositeBuffer& RawData, uint64_t BlockSize = DefaultBlockSize) const = 0;
- [[nodiscard]] virtual bool CompressToStream(const CompositeBuffer& RawData,
- std::function<void(uint64_t Offset, const CompositeBuffer& Range)>&& Callback,
- uint64_t BlockSize = DefaultBlockSize) const = 0;
+ [[nodiscard]] virtual bool CompressToStream(
+ const CompositeBuffer& RawData,
+ std::function<void(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback,
+ uint64_t BlockSize = DefaultBlockSize) const = 0;
};
class BaseDecoder
@@ -189,11 +191,13 @@ public:
uint64_t RawOffset,
uint64_t RawSize) const = 0;
- virtual bool DecompressToStream(const BufferHeader& Header,
- const CompositeBuffer& CompressedData,
- uint64_t RawOffset,
- uint64_t RawSize,
- std::function<bool(uint64_t Offset, const CompositeBuffer& Range)>&& Callback) const = 0;
+ virtual bool DecompressToStream(
+ const BufferHeader& Header,
+ const CompositeBuffer& CompressedData,
+ uint64_t RawOffset,
+ uint64_t RawSize,
+ std::function<bool(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback)
+ const = 0;
};
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -207,13 +211,50 @@ public:
return CompositeBuffer(HeaderData.MoveToShared(), RawData.MakeOwned());
}
- [[nodiscard]] virtual bool CompressToStream(const CompositeBuffer& RawData,
- std::function<void(uint64_t Offset, const CompositeBuffer& Range)>&& Callback,
- uint64_t /* BlockSize */) const final
+ [[nodiscard]] virtual bool CompressToStream(
+ const CompositeBuffer& RawData,
+ std::function<void(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback,
+ uint64_t /* BlockSize */) const final
{
- UniqueBuffer HeaderData = CompressedBuffer::CreateHeaderForNoneEncoder(RawData.GetSize(), BLAKE3::HashBuffer(RawData));
- Callback(0, CompositeBuffer(IoBuffer(IoBuffer::Wrap, HeaderData.GetData(), HeaderData.GetSize())));
- Callback(HeaderData.GetSize(), RawData);
+ const uint64_t HeaderSize = CompressedBuffer::GetHeaderSizeForNoneEncoder();
+
+ uint64_t RawOffset = 0;
+ BLAKE3Stream HashStream;
+
+ for (const SharedBuffer& Segment : RawData.GetSegments())
+ {
+ IoBufferFileReference FileRef = {nullptr, 0, 0};
+ IoBuffer SegmentBuffer = Segment.AsIoBuffer();
+ if (SegmentBuffer.GetFileReference(FileRef))
+ {
+ ZEN_ASSERT(FileRef.FileHandle != nullptr);
+
+ ScanFile(FileRef.FileHandle,
+ FileRef.FileChunkOffset,
+ FileRef.FileChunkSize,
+ 512u * 1024u,
+ [&](const void* Data, size_t Size) {
+ HashStream.Append(Data, Size);
+ CompositeBuffer Tmp(SharedBuffer::MakeView(Data, Size));
+ Callback(RawOffset, Size, HeaderSize + RawOffset, Tmp);
+ RawOffset += Size;
+ });
+ }
+ else
+ {
+ const uint64_t Size = SegmentBuffer.GetSize();
+ HashStream.Append(SegmentBuffer);
+ Callback(RawOffset, Size, HeaderSize + RawOffset, CompositeBuffer(Segment));
+ RawOffset += Size;
+ }
+ }
+
+ ZEN_ASSERT(RawOffset == RawData.GetSize());
+
+ UniqueBuffer HeaderData = CompressedBuffer::CreateHeaderForNoneEncoder(RawData.GetSize(), HashStream.GetHash());
+ ZEN_ASSERT(HeaderData.GetSize() == HeaderSize);
+ Callback(0, 0, 0, CompositeBuffer(HeaderData.MoveToShared()));
+
return true;
}
};
@@ -283,21 +324,41 @@ public:
[[nodiscard]] uint64_t GetHeaderSize(const BufferHeader&) const final { return sizeof(BufferHeader); }
- virtual bool DecompressToStream(const BufferHeader& Header,
- const CompositeBuffer& CompressedData,
- uint64_t RawOffset,
- uint64_t RawSize,
- std::function<bool(uint64_t Offset, const CompositeBuffer& Range)>&& Callback) const final
+ virtual bool DecompressToStream(
+ const BufferHeader& Header,
+ const CompositeBuffer& CompressedData,
+ uint64_t RawOffset,
+ uint64_t RawSize,
+ std::function<bool(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback)
+ const final
{
if (Header.Method == CompressionMethod::None && Header.TotalCompressedSize == CompressedData.GetSize() &&
Header.TotalCompressedSize == Header.TotalRawSize + sizeof(BufferHeader) && RawOffset < Header.TotalRawSize &&
(RawOffset + RawSize) <= Header.TotalRawSize)
{
- if (!Callback(0, CompressedData.Mid(sizeof(BufferHeader) + RawOffset, RawSize)))
+ bool Result = true;
+ IoBufferFileReference FileRef = {nullptr, 0, 0};
+ if ((CompressedData.GetSegments().size() == 1) && CompressedData.GetSegments()[0].AsIoBuffer().GetFileReference(FileRef))
{
- return false;
+ ZEN_ASSERT(FileRef.FileHandle != nullptr);
+ uint64_t CallbackOffset = 0;
+ ScanFile(FileRef.FileHandle, sizeof(BufferHeader) + RawOffset, RawSize, 512u * 1024u, [&](const void* Data, size_t Size) {
+ if (Result)
+ {
+ CompositeBuffer Tmp(SharedBuffer::MakeView(Data, Size));
+ Result = Callback(sizeof(BufferHeader) + RawOffset + CallbackOffset, Size, CallbackOffset, Tmp);
+ }
+ CallbackOffset += Size;
+ });
+ return Result;
+ }
+ else
+ {
+ return Callback(sizeof(BufferHeader) + RawOffset,
+ RawSize,
+ 0,
+ CompressedData.Mid(sizeof(BufferHeader) + RawOffset, RawSize));
}
- return true;
}
return false;
}
@@ -309,9 +370,10 @@ class BlockEncoder : public BaseEncoder
{
public:
virtual CompositeBuffer Compress(const CompositeBuffer& RawData, uint64_t BlockSize) const final;
- virtual bool CompressToStream(const CompositeBuffer& RawData,
- std::function<void(uint64_t Offset, const CompositeBuffer& Range)>&& Callback,
- uint64_t BlockSize) const final;
+ virtual bool CompressToStream(
+ const CompositeBuffer& RawData,
+ std::function<void(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback,
+ uint64_t BlockSize) const final;
protected:
virtual CompressionMethod GetMethod() const = 0;
@@ -460,9 +522,10 @@ BlockEncoder::Compress(const CompositeBuffer& RawData, const uint64_t BlockSize)
}
bool
-BlockEncoder::CompressToStream(const CompositeBuffer& RawData,
- std::function<void(uint64_t Offset, const CompositeBuffer& Range)>&& Callback,
- uint64_t BlockSize = DefaultBlockSize) const
+BlockEncoder::CompressToStream(
+ const CompositeBuffer& RawData,
+ std::function<void(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback,
+ uint64_t BlockSize = DefaultBlockSize) const
{
ZEN_ASSERT(IsPow2(BlockSize) && (BlockSize <= (1u << 31)));
@@ -504,13 +567,17 @@ BlockEncoder::CompressToStream(const CompositeBuffer& RawData,
uint64_t CompressedBlockSize = CompressedBlock.GetSize();
if (RawBlockSize <= CompressedBlockSize)
{
- Callback(FullHeaderSize + CompressedSize,
+ Callback(FileRef.FileChunkOffset + RawOffset,
+ RawBlockSize,
+ FullHeaderSize + CompressedSize,
CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawBlockCopy.GetView().GetData(), RawBlockSize)));
CompressedBlockSize = RawBlockSize;
}
else
{
- Callback(FullHeaderSize + CompressedSize,
+ Callback(FileRef.FileChunkOffset + RawOffset,
+ RawBlockSize,
+ FullHeaderSize + CompressedSize,
CompositeBuffer(IoBuffer(IoBuffer::Wrap, CompressedBlock.GetData(), CompressedBlockSize)));
}
@@ -540,12 +607,17 @@ BlockEncoder::CompressToStream(const CompositeBuffer& RawData,
uint64_t CompressedBlockSize = CompressedBlock.GetSize();
if (RawBlockSize <= CompressedBlockSize)
{
- Callback(FullHeaderSize + CompressedSize, CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawBlock.GetData(), RawBlockSize)));
+ Callback(RawOffset,
+ RawBlockSize,
+ FullHeaderSize + CompressedSize,
+ CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawBlock.GetData(), RawBlockSize)));
CompressedBlockSize = RawBlockSize;
}
else
{
- Callback(FullHeaderSize + CompressedSize,
+ Callback(RawOffset,
+ RawBlockSize,
+ FullHeaderSize + CompressedSize,
CompositeBuffer(IoBuffer(IoBuffer::Wrap, CompressedBlock.GetData(), CompressedBlockSize)));
}
@@ -582,7 +654,7 @@ BlockEncoder::CompressToStream(const CompositeBuffer& RawData,
HeaderBuffer.GetMutableView().Mid(sizeof(BufferHeader), MetaSize).CopyFrom(MakeMemoryView(CompressedBlockSizes));
Header.Write(HeaderBuffer.GetMutableView());
- Callback(0, CompositeBuffer(IoBuffer(IoBuffer::Wrap, HeaderBuffer.GetData(), HeaderBuffer.GetSize())));
+ Callback(0, 0, 0, CompositeBuffer(IoBuffer(IoBuffer::Wrap, HeaderBuffer.GetData(), HeaderBuffer.GetSize())));
return true;
}
@@ -615,11 +687,13 @@ public:
MutableMemoryView RawView,
uint64_t RawOffset) const final;
- virtual bool DecompressToStream(const BufferHeader& Header,
- const CompositeBuffer& CompressedData,
- uint64_t RawOffset,
- uint64_t RawSize,
- std::function<bool(uint64_t Offset, const CompositeBuffer& Range)>&& Callback) const final;
+ virtual bool DecompressToStream(
+ const BufferHeader& Header,
+ const CompositeBuffer& CompressedData,
+ uint64_t RawOffset,
+ uint64_t RawSize,
+ std::function<bool(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback)
+ const final;
protected:
virtual bool DecompressBlock(MutableMemoryView RawData, MemoryView CompressedData) const = 0;
@@ -743,11 +817,12 @@ BlockDecoder::DecompressToComposite(const BufferHeader& Header, const CompositeB
}
bool
-BlockDecoder::DecompressToStream(const BufferHeader& Header,
- const CompositeBuffer& CompressedData,
- uint64_t RawOffset,
- uint64_t RawSize,
- std::function<bool(uint64_t Offset, const CompositeBuffer& Range)>&& Callback) const
+BlockDecoder::DecompressToStream(
+ const BufferHeader& Header,
+ const CompositeBuffer& CompressedData,
+ uint64_t RawOffset,
+ uint64_t RawSize,
+ std::function<bool(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback) const
{
if (Header.TotalCompressedSize != CompressedData.GetSize())
{
@@ -817,7 +892,9 @@ BlockDecoder::DecompressToStream(const BufferHeader& Header,
Source.Detach();
return false;
}
- if (!Callback(BlockIndex * BlockSize + OffsetInFirstBlock,
+ if (!Callback(FileRef.FileChunkOffset + CompressedOffset,
+ CompressedBlockSize,
+ BlockIndex * BlockSize + OffsetInFirstBlock,
CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawDataBuffer.GetData(), BytesToUncompress))))
{
Source.Detach();
@@ -827,6 +904,8 @@ BlockDecoder::DecompressToStream(const BufferHeader& Header,
else
{
if (!Callback(
+ FileRef.FileChunkOffset + CompressedOffset,
+ BytesToUncompress,
BlockIndex * BlockSize + OffsetInFirstBlock,
CompositeBuffer(
IoBuffer(IoBuffer::Wrap, CompressedBlockCopy.GetView().Mid(OffsetInFirstBlock).GetData(), BytesToUncompress))))
@@ -870,7 +949,9 @@ BlockDecoder::DecompressToStream(const BufferHeader& Header,
{
return false;
}
- if (!Callback(BlockIndex * BlockSize + OffsetInFirstBlock,
+ if (!Callback(CompressedOffset,
+ UncompressedBlockSize,
+ BlockIndex * BlockSize + OffsetInFirstBlock,
CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawDataBuffer.GetData(), BytesToUncompress))))
{
return false;
@@ -879,6 +960,8 @@ BlockDecoder::DecompressToStream(const BufferHeader& Header,
else
{
if (!Callback(
+ CompressedOffset,
+ BytesToUncompress,
BlockIndex * BlockSize + OffsetInFirstBlock,
CompositeBuffer(
IoBuffer(IoBuffer::Wrap, CompressedBlockCopy.GetView().Mid(OffsetInFirstBlock).GetData(), BytesToUncompress))))
@@ -1778,11 +1861,12 @@ CompressedBuffer::Compress(const SharedBuffer& RawData,
}
bool
-CompressedBuffer::CompressToStream(const CompositeBuffer& RawData,
- std::function<void(uint64_t Offset, const CompositeBuffer& Range)>&& Callback,
- OodleCompressor Compressor,
- OodleCompressionLevel CompressionLevel,
- uint64_t BlockSize)
+CompressedBuffer::CompressToStream(
+ const CompositeBuffer& RawData,
+ std::function<void(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback,
+ OodleCompressor Compressor,
+ OodleCompressionLevel CompressionLevel,
+ uint64_t BlockSize)
{
using namespace detail;
@@ -1995,9 +2079,10 @@ CompressedBuffer::DecompressToComposite() const
}
bool
-CompressedBuffer::DecompressToStream(uint64_t RawOffset,
- uint64_t RawSize,
- std::function<bool(uint64_t Offset, const CompositeBuffer& Range)>&& Callback) const
+CompressedBuffer::DecompressToStream(
+ uint64_t RawOffset,
+ uint64_t RawSize,
+ std::function<bool(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback) const
{
using namespace detail;
if (CompressedData)