diff options
| author | Dan Engelbrecht <[email protected]> | 2024-02-26 19:08:27 +0100 |
|---|---|---|
| committer | GitHub <[email protected]> | 2024-02-26 19:08:27 +0100 |
| commit | 8672d2235e73545abde15f075934f68495adeaf3 (patch) | |
| tree | 421443b0eb76910c612bb219354a9306081cca10 /src | |
| parent | adding context to http.sys error message (diff) | |
| download | zen-8672d2235e73545abde15f075934f68495adeaf3.tar.xz zen-8672d2235e73545abde15f075934f68495adeaf3.zip | |
hashing fixes (#657)
* move structuredcachestore tests to zenstore-test
* Don't materialize entire files when hashing if it is a large files
* rewrite CompositeBuffer::Mid to never materialize buffers
Diffstat (limited to 'src')
| -rw-r--r-- | src/zencore/blake3.cpp | 18 | ||||
| -rw-r--r-- | src/zencore/compositebuffer.cpp | 32 | ||||
| -rw-r--r-- | src/zencore/include/zencore/sharedbuffer.h | 12 | ||||
| -rw-r--r-- | src/zencore/iobuffer.cpp | 21 | ||||
| -rw-r--r-- | src/zencore/iohash.cpp | 18 | ||||
| -rw-r--r-- | src/zenserver/main.cpp | 1 | ||||
| -rw-r--r-- | src/zenstore-test/zenstore-test.cpp | 2 | ||||
| -rw-r--r-- | src/zenstore/cache/cachedisklayer.cpp | 90 | ||||
| -rw-r--r-- | src/zenstore/cache/structuredcachestore.cpp | 73 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/cache/cacheshared.h | 1 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/cache/structuredcachestore.h | 2 |
11 files changed, 173 insertions, 97 deletions
diff --git a/src/zencore/blake3.cpp b/src/zencore/blake3.cpp index 89826ae5d..bdbc8fb3e 100644 --- a/src/zencore/blake3.cpp +++ b/src/zencore/blake3.cpp @@ -45,7 +45,23 @@ BLAKE3::HashBuffer(const CompositeBuffer& Buffer) for (const SharedBuffer& Segment : Buffer.GetSegments()) { - blake3_hasher_update(&Hasher, Segment.GetData(), Segment.GetSize()); + size_t SegmentSize = Segment.GetSize(); + if (SegmentSize >= (65536 + 32768) && Segment.IsFileReference()) + { + const IoBuffer SegmentBuffer = Segment.AsIoBuffer(); + size_t Offset = 0; + while (Offset < SegmentSize) + { + size_t ChunkSize = Min<size_t>(SegmentSize - Offset, 65536u); + IoBuffer SubRange(SegmentBuffer, Offset, ChunkSize); + blake3_hasher_update(&Hasher, SubRange.GetData(), ChunkSize); + Offset += ChunkSize; + } + } + else + { + blake3_hasher_update(&Hasher, Segment.GetData(), SegmentSize); + } } blake3_hasher_finalize(&Hasher, Hash.Hash, sizeof Hash.Hash); diff --git a/src/zencore/compositebuffer.cpp b/src/zencore/compositebuffer.cpp index 583ef19c6..d2b6d97f9 100644 --- a/src/zencore/compositebuffer.cpp +++ b/src/zencore/compositebuffer.cpp @@ -93,10 +93,36 @@ CompositeBuffer::Mid(uint64_t Offset, uint64_t Size) const const uint64_t BufferSize = GetSize(); Offset = Min(Offset, BufferSize); Size = Min(Size, BufferSize - Offset); + CompositeBuffer Buffer; - IterateRange(Offset, Size, [&Buffer](MemoryView View, const SharedBuffer& ViewOuter) { - Buffer.m_Segments.push_back(SharedBuffer::MakeView(View, ViewOuter)); - }); + { + for (const SharedBuffer& Segment : m_Segments) + { + if (const uint64_t SegmentSize = Segment.GetSize(); Offset <= SegmentSize) + { + size_t PartSize = Min(Size, SegmentSize - Offset); + if (PartSize == SegmentSize) + { + Buffer.m_Segments.push_back(Segment); + } + else if (PartSize > 0 || Size == 0) + { + // We need to add the segment even if PartSize is zero if we are picking up zero bytes. + Buffer.m_Segments.push_back(SharedBuffer(IoBuffer(Segment.AsIoBuffer(), Offset, PartSize))); + } + Offset = 0; + Size -= PartSize; + if (Size == 0) + { + break; + } + } + else + { + Offset -= SegmentSize; + } + } + } return Buffer; } diff --git a/src/zencore/include/zencore/sharedbuffer.h b/src/zencore/include/zencore/sharedbuffer.h index 618bd2937..e31da27ec 100644 --- a/src/zencore/include/zencore/sharedbuffer.h +++ b/src/zencore/include/zencore/sharedbuffer.h @@ -114,6 +114,18 @@ public: [[nodiscard]] bool IsOwned() const { return !m_Buffer || m_Buffer->IsOwned(); } [[nodiscard]] inline bool IsNull() const { return !m_Buffer; } inline void Reset() { m_Buffer = nullptr; } + inline bool IsFileReference() const + { + if (const IoBufferExtendedCore* Core = m_Buffer->ExtendedCore()) + { + IoBufferFileReference _; + return Core->GetFileReference(_); + } + else + { + return false; + } + } [[nodiscard]] MemoryView GetView() const { diff --git a/src/zencore/iobuffer.cpp b/src/zencore/iobuffer.cpp index 54f2aaee8..c8bc4a629 100644 --- a/src/zencore/iobuffer.cpp +++ b/src/zencore/iobuffer.cpp @@ -704,8 +704,25 @@ IoBufferBuilder::MakeFromTemporaryFile(const std::filesystem::path& FileName) IoHash HashBuffer(IoBuffer& Buffer) { - // TODO: handle disk buffers with special path - return IoHash::HashBuffer(Buffer.Data(), Buffer.Size()); + size_t BufferSize = Buffer.Size(); + if (BufferSize >= (65536 + 32768)) + { + IoBufferFileReference _; + if (Buffer.GetFileReference(/* out */ _)) + { + size_t Offset = 0; + IoHashStream HashStream; + while (Offset < BufferSize) + { + size_t ChunkSize = Min<size_t>(BufferSize - Offset, 65536u); + IoBuffer SubRange(Buffer, Offset, ChunkSize); + HashStream.Append(SubRange.GetData(), SubRange.GetSize()); + Offset += ChunkSize; + } + return HashStream.GetHash(); + } + } + return IoHash::HashBuffer(Buffer.Data(), BufferSize); } ////////////////////////////////////////////////////////////////////////// diff --git a/src/zencore/iohash.cpp b/src/zencore/iohash.cpp index 77076c133..cedee913a 100644 --- a/src/zencore/iohash.cpp +++ b/src/zencore/iohash.cpp @@ -31,7 +31,23 @@ IoHash::HashBuffer(const CompositeBuffer& Buffer) for (const SharedBuffer& Segment : Buffer.GetSegments()) { - Hasher.Append(Segment.GetData(), Segment.GetSize()); + size_t SegmentSize = Segment.GetSize(); + if (SegmentSize >= (65536 + 32768) && Segment.IsFileReference()) + { + const IoBuffer SegmentBuffer = Segment.AsIoBuffer(); + size_t Offset = 0; + while (Offset < SegmentSize) + { + size_t ChunkSize = Min<size_t>(SegmentSize - Offset, 65536u); + IoBuffer SubRange(SegmentBuffer, Offset, ChunkSize); + Hasher.Append(SubRange.GetData(), ChunkSize); + Offset += ChunkSize; + } + } + else + { + Hasher.Append(Segment.GetData(), SegmentSize); + } } return Hasher.GetHash(); diff --git a/src/zenserver/main.cpp b/src/zenserver/main.cpp index be2cdcc2d..8faefca88 100644 --- a/src/zenserver/main.cpp +++ b/src/zenserver/main.cpp @@ -297,7 +297,6 @@ test_main(int argc, char** argv) zen::zenhttp_forcelinktests(); zen::zenstore_forcelinktests(); zen::zenutil_forcelinktests(); - zen::z$_forcelink(); zen::z$service_forcelink(); zen::logging::InitializeLogging(); diff --git a/src/zenstore-test/zenstore-test.cpp b/src/zenstore-test/zenstore-test.cpp index 6ef311324..c301c04a3 100644 --- a/src/zenstore-test/zenstore-test.cpp +++ b/src/zenstore-test/zenstore-test.cpp @@ -3,6 +3,7 @@ #include <zencore/filesystem.h> #include <zencore/logging.h> #include <zencore/zencore.h> +#include <zenstore/cache/structuredcachestore.h> #include <zenstore/zenstore.h> #include <zenutil/zenutil.h> @@ -23,6 +24,7 @@ main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) #if ZEN_WITH_TESTS zen::zenstore_forcelinktests(); zen::zenutil_forcelinktests(); + zen::structured_cachestore_forcelink(); zen::logging::InitializeLogging(); zen::MaximizeOpenFileCount(); diff --git a/src/zenstore/cache/cachedisklayer.cpp b/src/zenstore/cache/cachedisklayer.cpp index 4d6b9f89e..615f8640f 100644 --- a/src/zenstore/cache/cachedisklayer.cpp +++ b/src/zenstore/cache/cachedisklayer.cpp @@ -22,21 +22,6 @@ namespace zen { -bool -IsKnownBadBucketName(std::string_view Bucket) -{ - if (Bucket.size() == 32) - { - uint8_t BucketHex[16]; - if (ParseHexBytes(Bucket, BucketHex)) - { - return true; - } - } - - return false; -} - namespace { #pragma pack(push) @@ -1577,75 +1562,6 @@ ZenCacheDiskLayer::CacheBucket::SaveSnapshot(const std::function<uint64_t()>& Cl } } -IoHash -HashBuffer(const CompositeBuffer& Buffer) -{ - IoHashStream Hasher; - - for (const SharedBuffer& Segment : Buffer.GetSegments()) - { - Hasher.Append(Segment.GetView()); - } - - return Hasher.GetHash(); -} - -bool -ValidateCacheBucketEntryValue(ZenContentType ContentType, IoBuffer Buffer) -{ - ZEN_ASSERT_SLOW(Buffer.GetContentType() == ContentType); - - if (ContentType == ZenContentType::kCbObject) - { - CbValidateError Error = ValidateCompactBinary(Buffer, CbValidateMode::All); - - if (Error == CbValidateError::None) - { - return true; - } - - ZEN_SCOPED_ERROR("compact binary validation failed: '{}'", ToString(Error)); - - return false; - } - else if (ContentType == ZenContentType::kCompressedBinary) - { - IoBuffer MemoryBuffer = IoBufferBuilder::ReadFromFileMaybe(Buffer); - - IoHash HeaderRawHash; - uint64_t RawSize = 0; - if (!CompressedBuffer::ValidateCompressedHeader(MemoryBuffer, /* out */ HeaderRawHash, /* out */ RawSize)) - { - ZEN_SCOPED_ERROR("compressed buffer header validation failed"); - - return false; - } - - CompressedBuffer Compressed = - CompressedBuffer::FromCompressed(SharedBuffer(MemoryBuffer), /* out */ HeaderRawHash, /* out */ RawSize); - CompositeBuffer Decompressed = Compressed.DecompressToComposite(); - IoHash DecompressedHash = HashBuffer(Decompressed); - - if (HeaderRawHash != DecompressedHash) - { - ZEN_SCOPED_ERROR("decompressed hash {} differs from header hash {}", DecompressedHash, HeaderRawHash); - - return false; - } - } - else - { - // No way to verify this kind of content (what is it exactly?) - - static int Once = [&] { - ZEN_WARN("ValidateCacheBucketEntryValue called with unknown content type ({})", ToString(ContentType)); - return 42; - }(); - } - - return true; -}; - void ZenCacheDiskLayer::CacheBucket::ScrubStorage(ScrubContext& Ctx) { @@ -1729,7 +1645,7 @@ ZenCacheDiskLayer::CacheBucket::ScrubStorage(ScrubContext& Ctx) ReportBadKey(HashKey); continue; } - if (!ValidateCacheBucketEntryValue(Loc.GetContentType(), Buffer)) + if (!ValidateIoBuffer(Loc.GetContentType(), Buffer)) { ReportBadKey(HashKey); continue; @@ -1768,7 +1684,7 @@ ZenCacheDiskLayer::CacheBucket::ScrubStorage(ScrubContext& Ctx) const BucketPayload& Payload = m_Payloads[m_Index.at(Hash)]; ZenContentType ContentType = Payload.Location.GetContentType(); Buffer.SetContentType(ContentType); - if (!ValidateCacheBucketEntryValue(ContentType, Buffer)) + if (!ValidateIoBuffer(ContentType, Buffer)) { ReportBadKey(Hash); return; @@ -1790,7 +1706,7 @@ ZenCacheDiskLayer::CacheBucket::ScrubStorage(ScrubContext& Ctx) const BucketPayload& Payload = m_Payloads[m_Index.at(Hash)]; ZenContentType ContentType = Payload.Location.GetContentType(); Buffer.SetContentType(ContentType); - if (!ValidateCacheBucketEntryValue(ContentType, Buffer)) + if (!ValidateIoBuffer(ContentType, Buffer)) { ReportBadKey(Hash); return; diff --git a/src/zenstore/cache/structuredcachestore.cpp b/src/zenstore/cache/structuredcachestore.cpp index fd04af2a3..49183600d 100644 --- a/src/zenstore/cache/structuredcachestore.cpp +++ b/src/zenstore/cache/structuredcachestore.cpp @@ -44,6 +44,77 @@ ZEN_THIRD_PARTY_INCLUDES_END namespace zen { +bool +IsKnownBadBucketName(std::string_view Bucket) +{ + if (Bucket.size() == 32) + { + uint8_t BucketHex[16]; + if (ParseHexBytes(Bucket, BucketHex)) + { + return true; + } + } + + return false; +} + +bool +ValidateIoBuffer(ZenContentType ContentType, IoBuffer Buffer) +{ + ZEN_ASSERT_SLOW(Buffer.GetContentType() == ContentType); + + if (ContentType == ZenContentType::kCbObject) + { + CbValidateError Error = ValidateCompactBinary(Buffer, CbValidateMode::All); + + if (Error == CbValidateError::None) + { + return true; + } + + ZEN_SCOPED_ERROR("compact binary validation failed: '{}'", ToString(Error)); + + return false; + } + else if (ContentType == ZenContentType::kCompressedBinary) + { + IoBuffer MemoryBuffer = IoBufferBuilder::ReadFromFileMaybe(Buffer); + + IoHash HeaderRawHash; + uint64_t RawSize = 0; + if (!CompressedBuffer::ValidateCompressedHeader(MemoryBuffer, /* out */ HeaderRawHash, /* out */ RawSize)) + { + ZEN_SCOPED_ERROR("compressed buffer header validation failed"); + + return false; + } + + CompressedBuffer Compressed = + CompressedBuffer::FromCompressed(SharedBuffer(MemoryBuffer), /* out */ HeaderRawHash, /* out */ RawSize); + CompositeBuffer Decompressed = Compressed.DecompressToComposite(); + IoHash DecompressedHash = IoHash::HashBuffer(Decompressed); + + if (HeaderRawHash != DecompressedHash) + { + ZEN_SCOPED_ERROR("decompressed hash {} differs from header hash {}", DecompressedHash, HeaderRawHash); + + return false; + } + } + else + { + // No way to verify this kind of content (what is it exactly?) + + static int Once = [&] { + ZEN_WARN("ValidateIoBuffer called with unknown content type ({})", ToString(ContentType)); + return 42; + }(); + } + + return true; +}; + ZenCacheNamespace::ZenCacheNamespace(GcManager& Gc, JobQueue& JobQueue, const std::filesystem::path& RootDir, const Configuration& Config) : m_Gc(Gc) , m_JobQueue(JobQueue) @@ -2433,7 +2504,7 @@ TEST_CASE_TEMPLATE("z$.newgc.basics", ReferenceCaching, testutils::FalseType, te #endif void -z$_forcelink() +structured_cachestore_forcelink() { } diff --git a/src/zenstore/include/zenstore/cache/cacheshared.h b/src/zenstore/include/zenstore/cache/cacheshared.h index e3e8a2f84..2d5b9cbc3 100644 --- a/src/zenstore/include/zenstore/cache/cacheshared.h +++ b/src/zenstore/include/zenstore/cache/cacheshared.h @@ -58,6 +58,7 @@ struct CacheValueDetails }; bool IsKnownBadBucketName(std::string_view BucketName); +bool ValidateIoBuffer(ZenContentType ContentType, IoBuffer Buffer); ////////////////////////////////////////////////////////////////////////// diff --git a/src/zenstore/include/zenstore/cache/structuredcachestore.h b/src/zenstore/include/zenstore/cache/structuredcachestore.h index 89d2abd11..3bed93d70 100644 --- a/src/zenstore/include/zenstore/cache/structuredcachestore.h +++ b/src/zenstore/include/zenstore/cache/structuredcachestore.h @@ -266,6 +266,6 @@ private: std::atomic_bool m_AccessLogEnabled; }; -void z$_forcelink(); +void structured_cachestore_forcelink(); } // namespace zen |