diff options
| author | Dan Engelbrecht <[email protected]> | 2022-12-07 11:21:41 +0100 |
|---|---|---|
| committer | GitHub <[email protected]> | 2022-12-07 02:21:41 -0800 |
| commit | 100c8f966b1c5b2fb190748f0177600562d1c5fe (patch) | |
| tree | fc85e350dea47330149a1d42eb7a6c7ae0a06111 /zenstore | |
| parent | Cache request record/replay (#198) (diff) | |
| download | zen-100c8f966b1c5b2fb190748f0177600562d1c5fe.tar.xz zen-100c8f966b1c5b2fb190748f0177600562d1c5fe.zip | |
optimizations (#200)
* Use direct file read and direct buffer allocation for small IoBuffer materalization
* Reduce range of materialized data in CompositeBuffer reading
CompressedBuffer header reading often only need a small part and not the whole file
* reduce lock contention in IoBuffer::Materialize
* Reduce parsing of compressed headers
Validate header type at decompression
* faster CreateDirectories - start from leaf going up and recurse back
* optimized BufferHeader::IsValid
* Add ValidateCompressedHeader to use when we don't need the actual compressed data
Validate that we always get compressed data in CidStore::AddChunk
* changelog
Diffstat (limited to 'zenstore')
| -rw-r--r-- | zenstore/cidstore.cpp | 17 | ||||
| -rw-r--r-- | zenstore/compactcas.cpp | 13 | ||||
| -rw-r--r-- | zenstore/filecas.cpp | 10 | ||||
| -rw-r--r-- | zenstore/gc.cpp | 5 | ||||
| -rw-r--r-- | zenstore/include/zenstore/cidstore.h | 2 |
5 files changed, 28 insertions, 19 deletions
diff --git a/zenstore/cidstore.cpp b/zenstore/cidstore.cpp index 8b2797ce9..5a5116faf 100644 --- a/zenstore/cidstore.cpp +++ b/zenstore/cidstore.cpp @@ -23,14 +23,17 @@ struct CidStore::Impl void Initialize(const CidStoreConfiguration& Config) { m_CasStore.Initialize(Config); } - CidStore::InsertResult AddChunk(const CompressedBuffer& ChunkData, CidStore::InsertMode Mode) + CidStore::InsertResult AddChunk(const IoBuffer& ChunkData, const IoHash& RawHash, CidStore::InsertMode Mode) { - const IoHash DecompressedId = IoHash::FromBLAKE3(ChunkData.GetRawHash()); - IoBuffer Payload = ChunkData.GetCompressed().Flatten().AsIoBuffer(); - +#ifndef NDEBUG + IoHash VerifyRawHash; + uint64_t _; + ZEN_ASSERT(CompressedBuffer::ValidateCompressedHeader(ChunkData, VerifyRawHash, _) && RawHash == VerifyRawHash); +#endif // NDEBUG + IoBuffer Payload(ChunkData); Payload.SetContentType(ZenContentType::kCompressedBinary); - CasStore::InsertResult Result = m_CasStore.InsertChunk(Payload, DecompressedId, static_cast<CasStore::InsertMode>(Mode)); + CasStore::InsertResult Result = m_CasStore.InsertChunk(Payload, RawHash, static_cast<CasStore::InsertMode>(Mode)); return {.New = Result.New}; } @@ -78,9 +81,9 @@ CidStore::Initialize(const CidStoreConfiguration& Config) } CidStore::InsertResult -CidStore::AddChunk(const CompressedBuffer& ChunkData, InsertMode Mode) +CidStore::AddChunk(const IoBuffer& ChunkData, const IoHash& RawHash, InsertMode Mode) { - return m_Impl->AddChunk(ChunkData, Mode); + return m_Impl->AddChunk(ChunkData, RawHash, Mode); } IoBuffer diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index 7507a82f6..8679eb95e 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -269,9 +269,11 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) } IoBuffer Buffer(IoBuffer::Wrap, Data, Size); - if (CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(Buffer)); Compressed) + IoHash RawHash; + uint64_t RawSize; + if (CompressedBuffer::ValidateCompressedHeader(Buffer, RawHash, RawSize)) { - if (IoHash::FromBLAKE3(Compressed.GetRawHash()) != Hash) + if (RawHash != Hash) { // Hash mismatch BadKeys.push_back(Hash); @@ -295,10 +297,13 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) const IoHash& Hash = ChunkIndexToChunkHash[ChunkIndex]; IoBuffer Buffer(IoBuffer::BorrowedFile, File.GetBasicFile().Handle(), Offset, Size); + + IoHash RawHash; + uint64_t RawSize; // TODO: Add API to verify compressed buffer without having to memorymap the whole file - if (CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(Buffer)); Compressed) + if (CompressedBuffer::ValidateCompressedHeader(Buffer, RawHash, RawSize)) { - if (IoHash::FromBLAKE3(Compressed.GetRawHash()) != Hash) + if (RawHash != Hash) { // Hash mismatch BadKeys.push_back(Hash); diff --git a/zenstore/filecas.cpp b/zenstore/filecas.cpp index 38d7898cf..e67653e8a 100644 --- a/zenstore/filecas.cpp +++ b/zenstore/filecas.cpp @@ -519,9 +519,7 @@ FileCasStrategy::InsertChunk(const void* const ChunkData, const size_t ChunkSize if (hRes == HRESULT_FROM_WIN32(ERROR_PATH_NOT_FOUND)) { // Ensure parent directories exist and retry file creation - - std::filesystem::create_directories(std::wstring_view(Name.ShardedPath.c_str(), Name.Shard2len)); - + CreateDirectories(std::wstring_view(Name.ShardedPath.c_str(), Name.Shard2len)); hRes = InternalCreateFile(); } @@ -789,9 +787,11 @@ FileCasStrategy::Scrub(ScrubContext& Ctx) ChunkBytes += Payload.FileSize(); IoBuffer Buffer(IoBuffer::BorrowedFile, Payload.Handle(), 0, Payload.FileSize()); - if (CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(Buffer)); Compressed) + IoHash RawHash; + uint64_t RawSize; + if (CompressedBuffer::ValidateCompressedHeader(Buffer, RawHash, RawSize)) { - if (IoHash::FromBLAKE3(Compressed.GetRawHash()) != Hash) + if (RawHash != Hash) { // Hash mismatch BadHashes.push_back(Hash); diff --git a/zenstore/gc.cpp b/zenstore/gc.cpp index 4094716ae..c50f59b64 100644 --- a/zenstore/gc.cpp +++ b/zenstore/gc.cpp @@ -917,7 +917,8 @@ TEST_CASE("gc.basic") IoBuffer Chunk = CreateChunk(128); auto CompressedChunk = Compress(Chunk); - const auto InsertResult = CidStore.AddChunk(CompressedChunk); + const auto InsertResult = + CidStore.AddChunk(CompressedChunk.GetCompressed().Flatten().AsIoBuffer(), IoHash::FromBLAKE3(CompressedChunk.DecodeRawHash())); CHECK(InsertResult.New); GcContext GcCtx(GcClock::Now() - std::chrono::hours(24)); @@ -926,7 +927,7 @@ TEST_CASE("gc.basic") CidStore.Flush(); Gc.CollectGarbage(GcCtx); - CHECK(!CidStore.ContainsChunk(IoHash::FromBLAKE3(CompressedChunk.GetRawHash()))); + CHECK(!CidStore.ContainsChunk(IoHash::FromBLAKE3(CompressedChunk.DecodeRawHash()))); } TEST_CASE("gc.full") diff --git a/zenstore/include/zenstore/cidstore.h b/zenstore/include/zenstore/cidstore.h index e8984a83d..16ca78225 100644 --- a/zenstore/include/zenstore/cidstore.h +++ b/zenstore/include/zenstore/cidstore.h @@ -70,7 +70,7 @@ public: }; void Initialize(const CidStoreConfiguration& Config); - InsertResult AddChunk(const CompressedBuffer& ChunkData, InsertMode Mode = InsertMode::kMayBeMovedInPlace); + InsertResult AddChunk(const IoBuffer& ChunkData, const IoHash& RawHash, InsertMode Mode = InsertMode::kMayBeMovedInPlace); IoBuffer FindChunkByCid(const IoHash& DecompressedId); bool ContainsChunk(const IoHash& DecompressedId); void FilterChunks(HashKeySet& InOutChunks); |