aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2024-02-26 19:08:27 +0100
committerGitHub <[email protected]>2024-02-26 19:08:27 +0100
commit8672d2235e73545abde15f075934f68495adeaf3 (patch)
tree421443b0eb76910c612bb219354a9306081cca10 /src
parentadding context to http.sys error message (diff)
downloadzen-8672d2235e73545abde15f075934f68495adeaf3.tar.xz
zen-8672d2235e73545abde15f075934f68495adeaf3.zip
hashing fixes (#657)
* move structuredcachestore tests to zenstore-test * Don't materialize entire files when hashing if it is a large files * rewrite CompositeBuffer::Mid to never materialize buffers
Diffstat (limited to 'src')
-rw-r--r--src/zencore/blake3.cpp18
-rw-r--r--src/zencore/compositebuffer.cpp32
-rw-r--r--src/zencore/include/zencore/sharedbuffer.h12
-rw-r--r--src/zencore/iobuffer.cpp21
-rw-r--r--src/zencore/iohash.cpp18
-rw-r--r--src/zenserver/main.cpp1
-rw-r--r--src/zenstore-test/zenstore-test.cpp2
-rw-r--r--src/zenstore/cache/cachedisklayer.cpp90
-rw-r--r--src/zenstore/cache/structuredcachestore.cpp73
-rw-r--r--src/zenstore/include/zenstore/cache/cacheshared.h1
-rw-r--r--src/zenstore/include/zenstore/cache/structuredcachestore.h2
11 files changed, 173 insertions, 97 deletions
diff --git a/src/zencore/blake3.cpp b/src/zencore/blake3.cpp
index 89826ae5d..bdbc8fb3e 100644
--- a/src/zencore/blake3.cpp
+++ b/src/zencore/blake3.cpp
@@ -45,7 +45,23 @@ BLAKE3::HashBuffer(const CompositeBuffer& Buffer)
for (const SharedBuffer& Segment : Buffer.GetSegments())
{
- blake3_hasher_update(&Hasher, Segment.GetData(), Segment.GetSize());
+ size_t SegmentSize = Segment.GetSize();
+ if (SegmentSize >= (65536 + 32768) && Segment.IsFileReference())
+ {
+ const IoBuffer SegmentBuffer = Segment.AsIoBuffer();
+ size_t Offset = 0;
+ while (Offset < SegmentSize)
+ {
+ size_t ChunkSize = Min<size_t>(SegmentSize - Offset, 65536u);
+ IoBuffer SubRange(SegmentBuffer, Offset, ChunkSize);
+ blake3_hasher_update(&Hasher, SubRange.GetData(), ChunkSize);
+ Offset += ChunkSize;
+ }
+ }
+ else
+ {
+ blake3_hasher_update(&Hasher, Segment.GetData(), SegmentSize);
+ }
}
blake3_hasher_finalize(&Hasher, Hash.Hash, sizeof Hash.Hash);
diff --git a/src/zencore/compositebuffer.cpp b/src/zencore/compositebuffer.cpp
index 583ef19c6..d2b6d97f9 100644
--- a/src/zencore/compositebuffer.cpp
+++ b/src/zencore/compositebuffer.cpp
@@ -93,10 +93,36 @@ CompositeBuffer::Mid(uint64_t Offset, uint64_t Size) const
const uint64_t BufferSize = GetSize();
Offset = Min(Offset, BufferSize);
Size = Min(Size, BufferSize - Offset);
+
CompositeBuffer Buffer;
- IterateRange(Offset, Size, [&Buffer](MemoryView View, const SharedBuffer& ViewOuter) {
- Buffer.m_Segments.push_back(SharedBuffer::MakeView(View, ViewOuter));
- });
+ {
+ for (const SharedBuffer& Segment : m_Segments)
+ {
+ if (const uint64_t SegmentSize = Segment.GetSize(); Offset <= SegmentSize)
+ {
+ size_t PartSize = Min(Size, SegmentSize - Offset);
+ if (PartSize == SegmentSize)
+ {
+ Buffer.m_Segments.push_back(Segment);
+ }
+ else if (PartSize > 0 || Size == 0)
+ {
+ // We need to add the segment even if PartSize is zero if we are picking up zero bytes.
+ Buffer.m_Segments.push_back(SharedBuffer(IoBuffer(Segment.AsIoBuffer(), Offset, PartSize)));
+ }
+ Offset = 0;
+ Size -= PartSize;
+ if (Size == 0)
+ {
+ break;
+ }
+ }
+ else
+ {
+ Offset -= SegmentSize;
+ }
+ }
+ }
return Buffer;
}
diff --git a/src/zencore/include/zencore/sharedbuffer.h b/src/zencore/include/zencore/sharedbuffer.h
index 618bd2937..e31da27ec 100644
--- a/src/zencore/include/zencore/sharedbuffer.h
+++ b/src/zencore/include/zencore/sharedbuffer.h
@@ -114,6 +114,18 @@ public:
[[nodiscard]] bool IsOwned() const { return !m_Buffer || m_Buffer->IsOwned(); }
[[nodiscard]] inline bool IsNull() const { return !m_Buffer; }
inline void Reset() { m_Buffer = nullptr; }
+ inline bool IsFileReference() const
+ {
+ if (const IoBufferExtendedCore* Core = m_Buffer->ExtendedCore())
+ {
+ IoBufferFileReference _;
+ return Core->GetFileReference(_);
+ }
+ else
+ {
+ return false;
+ }
+ }
[[nodiscard]] MemoryView GetView() const
{
diff --git a/src/zencore/iobuffer.cpp b/src/zencore/iobuffer.cpp
index 54f2aaee8..c8bc4a629 100644
--- a/src/zencore/iobuffer.cpp
+++ b/src/zencore/iobuffer.cpp
@@ -704,8 +704,25 @@ IoBufferBuilder::MakeFromTemporaryFile(const std::filesystem::path& FileName)
IoHash
HashBuffer(IoBuffer& Buffer)
{
- // TODO: handle disk buffers with special path
- return IoHash::HashBuffer(Buffer.Data(), Buffer.Size());
+ size_t BufferSize = Buffer.Size();
+ if (BufferSize >= (65536 + 32768))
+ {
+ IoBufferFileReference _;
+ if (Buffer.GetFileReference(/* out */ _))
+ {
+ size_t Offset = 0;
+ IoHashStream HashStream;
+ while (Offset < BufferSize)
+ {
+ size_t ChunkSize = Min<size_t>(BufferSize - Offset, 65536u);
+ IoBuffer SubRange(Buffer, Offset, ChunkSize);
+ HashStream.Append(SubRange.GetData(), SubRange.GetSize());
+ Offset += ChunkSize;
+ }
+ return HashStream.GetHash();
+ }
+ }
+ return IoHash::HashBuffer(Buffer.Data(), BufferSize);
}
//////////////////////////////////////////////////////////////////////////
diff --git a/src/zencore/iohash.cpp b/src/zencore/iohash.cpp
index 77076c133..cedee913a 100644
--- a/src/zencore/iohash.cpp
+++ b/src/zencore/iohash.cpp
@@ -31,7 +31,23 @@ IoHash::HashBuffer(const CompositeBuffer& Buffer)
for (const SharedBuffer& Segment : Buffer.GetSegments())
{
- Hasher.Append(Segment.GetData(), Segment.GetSize());
+ size_t SegmentSize = Segment.GetSize();
+ if (SegmentSize >= (65536 + 32768) && Segment.IsFileReference())
+ {
+ const IoBuffer SegmentBuffer = Segment.AsIoBuffer();
+ size_t Offset = 0;
+ while (Offset < SegmentSize)
+ {
+ size_t ChunkSize = Min<size_t>(SegmentSize - Offset, 65536u);
+ IoBuffer SubRange(SegmentBuffer, Offset, ChunkSize);
+ Hasher.Append(SubRange.GetData(), ChunkSize);
+ Offset += ChunkSize;
+ }
+ }
+ else
+ {
+ Hasher.Append(Segment.GetData(), SegmentSize);
+ }
}
return Hasher.GetHash();
diff --git a/src/zenserver/main.cpp b/src/zenserver/main.cpp
index be2cdcc2d..8faefca88 100644
--- a/src/zenserver/main.cpp
+++ b/src/zenserver/main.cpp
@@ -297,7 +297,6 @@ test_main(int argc, char** argv)
zen::zenhttp_forcelinktests();
zen::zenstore_forcelinktests();
zen::zenutil_forcelinktests();
- zen::z$_forcelink();
zen::z$service_forcelink();
zen::logging::InitializeLogging();
diff --git a/src/zenstore-test/zenstore-test.cpp b/src/zenstore-test/zenstore-test.cpp
index 6ef311324..c301c04a3 100644
--- a/src/zenstore-test/zenstore-test.cpp
+++ b/src/zenstore-test/zenstore-test.cpp
@@ -3,6 +3,7 @@
#include <zencore/filesystem.h>
#include <zencore/logging.h>
#include <zencore/zencore.h>
+#include <zenstore/cache/structuredcachestore.h>
#include <zenstore/zenstore.h>
#include <zenutil/zenutil.h>
@@ -23,6 +24,7 @@ main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[])
#if ZEN_WITH_TESTS
zen::zenstore_forcelinktests();
zen::zenutil_forcelinktests();
+ zen::structured_cachestore_forcelink();
zen::logging::InitializeLogging();
zen::MaximizeOpenFileCount();
diff --git a/src/zenstore/cache/cachedisklayer.cpp b/src/zenstore/cache/cachedisklayer.cpp
index 4d6b9f89e..615f8640f 100644
--- a/src/zenstore/cache/cachedisklayer.cpp
+++ b/src/zenstore/cache/cachedisklayer.cpp
@@ -22,21 +22,6 @@
namespace zen {
-bool
-IsKnownBadBucketName(std::string_view Bucket)
-{
- if (Bucket.size() == 32)
- {
- uint8_t BucketHex[16];
- if (ParseHexBytes(Bucket, BucketHex))
- {
- return true;
- }
- }
-
- return false;
-}
-
namespace {
#pragma pack(push)
@@ -1577,75 +1562,6 @@ ZenCacheDiskLayer::CacheBucket::SaveSnapshot(const std::function<uint64_t()>& Cl
}
}
-IoHash
-HashBuffer(const CompositeBuffer& Buffer)
-{
- IoHashStream Hasher;
-
- for (const SharedBuffer& Segment : Buffer.GetSegments())
- {
- Hasher.Append(Segment.GetView());
- }
-
- return Hasher.GetHash();
-}
-
-bool
-ValidateCacheBucketEntryValue(ZenContentType ContentType, IoBuffer Buffer)
-{
- ZEN_ASSERT_SLOW(Buffer.GetContentType() == ContentType);
-
- if (ContentType == ZenContentType::kCbObject)
- {
- CbValidateError Error = ValidateCompactBinary(Buffer, CbValidateMode::All);
-
- if (Error == CbValidateError::None)
- {
- return true;
- }
-
- ZEN_SCOPED_ERROR("compact binary validation failed: '{}'", ToString(Error));
-
- return false;
- }
- else if (ContentType == ZenContentType::kCompressedBinary)
- {
- IoBuffer MemoryBuffer = IoBufferBuilder::ReadFromFileMaybe(Buffer);
-
- IoHash HeaderRawHash;
- uint64_t RawSize = 0;
- if (!CompressedBuffer::ValidateCompressedHeader(MemoryBuffer, /* out */ HeaderRawHash, /* out */ RawSize))
- {
- ZEN_SCOPED_ERROR("compressed buffer header validation failed");
-
- return false;
- }
-
- CompressedBuffer Compressed =
- CompressedBuffer::FromCompressed(SharedBuffer(MemoryBuffer), /* out */ HeaderRawHash, /* out */ RawSize);
- CompositeBuffer Decompressed = Compressed.DecompressToComposite();
- IoHash DecompressedHash = HashBuffer(Decompressed);
-
- if (HeaderRawHash != DecompressedHash)
- {
- ZEN_SCOPED_ERROR("decompressed hash {} differs from header hash {}", DecompressedHash, HeaderRawHash);
-
- return false;
- }
- }
- else
- {
- // No way to verify this kind of content (what is it exactly?)
-
- static int Once = [&] {
- ZEN_WARN("ValidateCacheBucketEntryValue called with unknown content type ({})", ToString(ContentType));
- return 42;
- }();
- }
-
- return true;
-};
-
void
ZenCacheDiskLayer::CacheBucket::ScrubStorage(ScrubContext& Ctx)
{
@@ -1729,7 +1645,7 @@ ZenCacheDiskLayer::CacheBucket::ScrubStorage(ScrubContext& Ctx)
ReportBadKey(HashKey);
continue;
}
- if (!ValidateCacheBucketEntryValue(Loc.GetContentType(), Buffer))
+ if (!ValidateIoBuffer(Loc.GetContentType(), Buffer))
{
ReportBadKey(HashKey);
continue;
@@ -1768,7 +1684,7 @@ ZenCacheDiskLayer::CacheBucket::ScrubStorage(ScrubContext& Ctx)
const BucketPayload& Payload = m_Payloads[m_Index.at(Hash)];
ZenContentType ContentType = Payload.Location.GetContentType();
Buffer.SetContentType(ContentType);
- if (!ValidateCacheBucketEntryValue(ContentType, Buffer))
+ if (!ValidateIoBuffer(ContentType, Buffer))
{
ReportBadKey(Hash);
return;
@@ -1790,7 +1706,7 @@ ZenCacheDiskLayer::CacheBucket::ScrubStorage(ScrubContext& Ctx)
const BucketPayload& Payload = m_Payloads[m_Index.at(Hash)];
ZenContentType ContentType = Payload.Location.GetContentType();
Buffer.SetContentType(ContentType);
- if (!ValidateCacheBucketEntryValue(ContentType, Buffer))
+ if (!ValidateIoBuffer(ContentType, Buffer))
{
ReportBadKey(Hash);
return;
diff --git a/src/zenstore/cache/structuredcachestore.cpp b/src/zenstore/cache/structuredcachestore.cpp
index fd04af2a3..49183600d 100644
--- a/src/zenstore/cache/structuredcachestore.cpp
+++ b/src/zenstore/cache/structuredcachestore.cpp
@@ -44,6 +44,77 @@ ZEN_THIRD_PARTY_INCLUDES_END
namespace zen {
+bool
+IsKnownBadBucketName(std::string_view Bucket)
+{
+ if (Bucket.size() == 32)
+ {
+ uint8_t BucketHex[16];
+ if (ParseHexBytes(Bucket, BucketHex))
+ {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool
+ValidateIoBuffer(ZenContentType ContentType, IoBuffer Buffer)
+{
+ ZEN_ASSERT_SLOW(Buffer.GetContentType() == ContentType);
+
+ if (ContentType == ZenContentType::kCbObject)
+ {
+ CbValidateError Error = ValidateCompactBinary(Buffer, CbValidateMode::All);
+
+ if (Error == CbValidateError::None)
+ {
+ return true;
+ }
+
+ ZEN_SCOPED_ERROR("compact binary validation failed: '{}'", ToString(Error));
+
+ return false;
+ }
+ else if (ContentType == ZenContentType::kCompressedBinary)
+ {
+ IoBuffer MemoryBuffer = IoBufferBuilder::ReadFromFileMaybe(Buffer);
+
+ IoHash HeaderRawHash;
+ uint64_t RawSize = 0;
+ if (!CompressedBuffer::ValidateCompressedHeader(MemoryBuffer, /* out */ HeaderRawHash, /* out */ RawSize))
+ {
+ ZEN_SCOPED_ERROR("compressed buffer header validation failed");
+
+ return false;
+ }
+
+ CompressedBuffer Compressed =
+ CompressedBuffer::FromCompressed(SharedBuffer(MemoryBuffer), /* out */ HeaderRawHash, /* out */ RawSize);
+ CompositeBuffer Decompressed = Compressed.DecompressToComposite();
+ IoHash DecompressedHash = IoHash::HashBuffer(Decompressed);
+
+ if (HeaderRawHash != DecompressedHash)
+ {
+ ZEN_SCOPED_ERROR("decompressed hash {} differs from header hash {}", DecompressedHash, HeaderRawHash);
+
+ return false;
+ }
+ }
+ else
+ {
+ // No way to verify this kind of content (what is it exactly?)
+
+ static int Once = [&] {
+ ZEN_WARN("ValidateIoBuffer called with unknown content type ({})", ToString(ContentType));
+ return 42;
+ }();
+ }
+
+ return true;
+};
+
ZenCacheNamespace::ZenCacheNamespace(GcManager& Gc, JobQueue& JobQueue, const std::filesystem::path& RootDir, const Configuration& Config)
: m_Gc(Gc)
, m_JobQueue(JobQueue)
@@ -2433,7 +2504,7 @@ TEST_CASE_TEMPLATE("z$.newgc.basics", ReferenceCaching, testutils::FalseType, te
#endif
void
-z$_forcelink()
+structured_cachestore_forcelink()
{
}
diff --git a/src/zenstore/include/zenstore/cache/cacheshared.h b/src/zenstore/include/zenstore/cache/cacheshared.h
index e3e8a2f84..2d5b9cbc3 100644
--- a/src/zenstore/include/zenstore/cache/cacheshared.h
+++ b/src/zenstore/include/zenstore/cache/cacheshared.h
@@ -58,6 +58,7 @@ struct CacheValueDetails
};
bool IsKnownBadBucketName(std::string_view BucketName);
+bool ValidateIoBuffer(ZenContentType ContentType, IoBuffer Buffer);
//////////////////////////////////////////////////////////////////////////
diff --git a/src/zenstore/include/zenstore/cache/structuredcachestore.h b/src/zenstore/include/zenstore/cache/structuredcachestore.h
index 89d2abd11..3bed93d70 100644
--- a/src/zenstore/include/zenstore/cache/structuredcachestore.h
+++ b/src/zenstore/include/zenstore/cache/structuredcachestore.h
@@ -266,6 +266,6 @@ private:
std::atomic_bool m_AccessLogEnabled;
};
-void z$_forcelink();
+void structured_cachestore_forcelink();
} // namespace zen