diff options
| author | Stefan Boberg <[email protected]> | 2026-04-22 12:37:08 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2026-04-22 12:37:08 +0200 |
| commit | 4081fc5f231bc8450952f94c505ccb987d8eb65c (patch) | |
| tree | 6e37fb5845cfae5edcbd96f8be5107e0df31484b | |
| parent | Zen-style trace log events (#1006) (diff) | |
| download | archived-zen-4081fc5f231bc8450952f94c505ccb987d8eb65c.tar.xz archived-zen-4081fc5f231bc8450952f94c505ccb987d8eb65c.zip | |
BlockStore: fix correctness issues in block storage layer (#996)
1. **Assert invariant in `RemoveActiveWriteBlock`** — `erase(std::find(...))` was UB if the invariant ever broke. Now asserts the iterator before erasing.
2. **Single atomic delta in `SetMetaData`** — was `+= new; -= old` as two atomic ops, briefly inflating `TotalSize()` for concurrent readers. Collapsed into one `fetch_add`.
3. **Consistent `IncludeBlocks` / `IncludeBlock`** — `IncludeBlocks` asserted on duplicate keys while `IncludeBlock` silently skipped. Made both tolerant; also made the `reserve` call additive so a second call can't shrink the capacity request.
4. **Replace `operator[]` reads with `find` on `m_ChunkBlocks`** — `tsl::robin_map::operator[]` default-inserts; several read-intent lookups could produce ghost null entries if invariants broke (especially on compaction rollback paths).
5. **Bound `GetChunk` against actual file size** — `m_IoBuffer.GetSize()` is the mapped capacity (block size, e.g. 256 MiB), not written bytes. Requests inside the mapped region but past the real EOF returned views over zero-filled memory. Now bounds against `FileSize()`.
| -rw-r--r-- | src/zenstore/blockstore.cpp | 79 |
1 files changed, 47 insertions, 32 deletions
diff --git a/src/zenstore/blockstore.cpp b/src/zenstore/blockstore.cpp index 6528fcb2f..f25a0404a 100644 --- a/src/zenstore/blockstore.cpp +++ b/src/zenstore/blockstore.cpp @@ -106,31 +106,30 @@ BlockStoreFile::Create(uint64_t InitialSize) void* FileHandle = m_File.Handle(); + // File was truncated to 0 bytes; make the size cache reflect that explicitly. + m_CachedFileSize.store(0, std::memory_order::relaxed); + // We map our m_IoBuffer beyond the file size as we will grow it over time and want // to be able to create sub-buffers of all the written range later m_IoBuffer = IoBuffer(IoBuffer::File, FileHandle, 0, InitialSize, false); } +// `m_CachedFileSize` is the single source of truth for the file size. It is +// seeded by `Open` (from the filesystem) or `Create` (to 0 after truncate), +// and grown monotonically by `Write` via CAS. The BlockStore owns its block +// files exclusively, so nothing else can change the on-disk size behind our +// back - no filesystem round-trip is needed here. uint64_t BlockStoreFile::FileSize() const { - uint64_t CachedSize = m_CachedFileSize; - if (CachedSize == 0) - { - std::error_code Ec; - uint64_t Size = m_File.FileSize(Ec); - if (Ec) - { - ZEN_WARN("Failed to get file size of block {}. Reason: {}", m_Path, Ec.message()); - return 0; - } - uint64_t Expected = 0; - m_CachedFileSize.compare_exchange_strong(Expected, Size); - return Size; - } - return CachedSize; + return m_CachedFileSize.load(std::memory_order::relaxed); } +// Safe to call while other threads hold a Ref<BlockStoreFile> and read: the +// actual unlink is deferred until the last open handle closes (Windows pending +// delete / POSIX unlinked-but-open), and the Ref keeps this object alive until +// all readers drop it. Callers may therefore release the BlockStore's insert +// lock, hand out a Ref, and concurrently mark the file here. void BlockStoreFile::MarkAsDeleteOnClose() { @@ -141,7 +140,13 @@ BlockStoreFile::MarkAsDeleteOnClose() IoBuffer BlockStoreFile::GetChunk(uint64_t Offset, uint64_t Size) { - if (Offset + Size > m_IoBuffer.GetSize()) + // Bound the request against the actual written file size rather than the + // mapped buffer size. `m_IoBuffer` is created at `InitialSize` (= block + // capacity) on `Create`, so a bare `m_IoBuffer.GetSize()` check would + // accept offsets inside the mapped region but past the real end of the + // file, returning a view over uninitialised / zero-filled bytes. + const uint64_t WrittenSize = FileSize(); + if (Offset > WrittenSize || Size > WrittenSize - Offset) { return {}; } @@ -503,13 +508,17 @@ BlockStore::SyncExistingBlocksOnDisk(const BlockIndexSet& KnownBlocks) } for (std::uint32_t BlockIndex : DeleteBlocks) { - std::filesystem::path BlockPath = GetBlockPath(m_BlocksBasePath, BlockIndex); - if (m_ChunkBlocks[BlockIndex]) + auto It = m_ChunkBlocks.find(BlockIndex); + if (It == m_ChunkBlocks.end()) { - m_TotalSize.fetch_sub(m_ChunkBlocks[BlockIndex]->TotalSize(), std::memory_order::relaxed); - m_ChunkBlocks[BlockIndex]->MarkAsDeleteOnClose(); + continue; + } + if (It->second) + { + m_TotalSize.fetch_sub(It->second->TotalSize(), std::memory_order::relaxed); + It->second->MarkAsDeleteOnClose(); } - m_ChunkBlocks.erase(BlockIndex); + m_ChunkBlocks.erase(It); } return MissingBlocks; } @@ -657,14 +666,19 @@ BlockStore::RemoveActiveWriteBlock(uint32_t BlockIndex) eastl::fixed_vector<Ref<BlockStoreFile>, 2> FlushBlocks; { RwLock::ExclusiveLockScope _(m_InsertLock); - m_ActiveWriteBlocks.erase(std::find(m_ActiveWriteBlocks.begin(), m_ActiveWriteBlocks.end(), BlockIndex)); + auto ActiveIt = std::find(m_ActiveWriteBlocks.begin(), m_ActiveWriteBlocks.end(), BlockIndex); + ZEN_ASSERT(ActiveIt != m_ActiveWriteBlocks.end()); + m_ActiveWriteBlocks.erase(ActiveIt); for (auto It = m_BlocksToFlush.begin(); It != m_BlocksToFlush.end();) { const uint32_t FlushBlockIndex = *It; if (std::find(m_ActiveWriteBlocks.begin(), m_ActiveWriteBlocks.end(), FlushBlockIndex) == m_ActiveWriteBlocks.end()) { - FlushBlocks.push_back(m_ChunkBlocks[FlushBlockIndex]); - ZEN_DEBUG("Flushing block {} at '{}'", FlushBlockIndex, GetBlockPath(m_BlocksBasePath, FlushBlockIndex)); + if (auto BlockIt = m_ChunkBlocks.find(FlushBlockIndex); BlockIt != m_ChunkBlocks.end() && BlockIt->second) + { + FlushBlocks.push_back(BlockIt->second); + ZEN_DEBUG("Flushing block {} at '{}'", FlushBlockIndex, GetBlockPath(m_BlocksBasePath, FlushBlockIndex)); + } It = m_BlocksToFlush.erase(It); } else @@ -1195,9 +1209,9 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, { { RwLock::ExclusiveLockScope _l(m_InsertLock); - if (m_ChunkBlocks[NewBlockIndex] == NewBlockFile) + if (auto It = m_ChunkBlocks.find(NewBlockIndex); It != m_ChunkBlocks.end() && It->second == NewBlockFile) { - m_ChunkBlocks.erase(NewBlockIndex); + m_ChunkBlocks.erase(It); } } if (NewBlockFile->IsOpen()) @@ -1370,8 +1384,9 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, ZEN_ERROR("{}get disk space in '{}' FAILED, reason: '{}'", LogPrefix, m_BlocksBasePath, Error.message()); { RwLock::ExclusiveLockScope _l(m_InsertLock); - ZEN_ASSERT(m_ChunkBlocks[NextBlockIndex] == NewBlockFile); - m_ChunkBlocks.erase(NextBlockIndex); + auto It = m_ChunkBlocks.find(NextBlockIndex); + ZEN_ASSERT(It != m_ChunkBlocks.end() && It->second == NewBlockFile); + m_ChunkBlocks.erase(It); } ZEN_ASSERT_SLOW(!NewBlockFile->IsOpen()); NewBlockFile = nullptr; @@ -1390,8 +1405,9 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, NiceBytes(Space.Free + ReclaimedSpace)); { RwLock::ExclusiveLockScope _l(m_InsertLock); - ZEN_ASSERT(m_ChunkBlocks[NextBlockIndex] == NewBlockFile); - m_ChunkBlocks.erase(NextBlockIndex); + auto It = m_ChunkBlocks.find(NextBlockIndex); + ZEN_ASSERT(It != m_ChunkBlocks.end() && It->second == NewBlockFile); + m_ChunkBlocks.erase(It); } ZEN_ASSERT_SLOW(!NewBlockFile->IsOpen()); NewBlockFile = nullptr; @@ -1537,8 +1553,7 @@ BlockStore::SetMetaData(uint32_t BlockIndex, const IoBuffer& Payload) if (It->second->SetMetaData(Payload)) { uint64_t NewMetaSize = It->second->MetaSize(); - m_TotalSize += NewMetaSize; - m_TotalSize -= OldMetaSize; + m_TotalSize.fetch_add(NewMetaSize - OldMetaSize, std::memory_order::relaxed); } } } |