diff options
| author | Dan Engelbrecht <[email protected]> | 2023-10-20 13:31:09 +0200 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-10-20 13:31:09 +0200 |
| commit | 7cdafe520216f01f27094dc4a353256071510922 (patch) | |
| tree | 341e3f79cb5419c3bcb235773790d36aaff572bc /src/zenstore/blockstore.cpp | |
| parent | Cache (rpc) activitity recording improvements (#482) (diff) | |
| download | zen-7cdafe520216f01f27094dc4a353256071510922.tar.xz zen-7cdafe520216f01f27094dc4a353256071510922.zip | |
Don't prune block locations due to missing blocks a startup (#487)
* Don't prune block locations due to missing blocks a startup
This makes the behaviour consistent with FileCas - you can have an index that is not fully backed by data.
Asking for a location that is not backed by data results in getting an empty result back
Also, don't try to GC blocks that are unknown to the block store at the time of snapshot (to avoid removing data that comes in after GatherReferences in GC)
Diffstat (limited to 'src/zenstore/blockstore.cpp')
| -rw-r--r-- | src/zenstore/blockstore.cpp | 129 |
1 files changed, 90 insertions, 39 deletions
diff --git a/src/zenstore/blockstore.cpp b/src/zenstore/blockstore.cpp index 12ea24752..c4453a39f 100644 --- a/src/zenstore/blockstore.cpp +++ b/src/zenstore/blockstore.cpp @@ -109,6 +109,10 @@ BlockStoreFile::MarkAsDeleteOnClose() IoBuffer BlockStoreFile::GetChunk(uint64_t Offset, uint64_t Size) { + if (Offset + Size > m_IoBuffer.GetSize()) + { + return {}; + } return IoBuffer(m_IoBuffer, Offset, Size); } @@ -154,7 +158,7 @@ BlockStore::~BlockStore() { } -std::unordered_map<uint32_t, uint64_t> +void BlockStore::Initialize(const std::filesystem::path& BlocksBasePath, uint64_t MaxBlockSize, uint64_t MaxBlockCount) { ZEN_TRACE_CPU("BlockStore::Initialize"); @@ -217,40 +221,30 @@ BlockStore::Initialize(const std::filesystem::path& BlocksBasePath, uint64_t Max { CreateDirectories(m_BlocksBasePath); } - return FoundBlocks; } void -BlockStore::Prune(const std::vector<BlockStoreLocation>& KnownLocations) +BlockStore::CreateMissingBlocks(const std::vector<BlockStoreLocation>& KnownLocations) { - ZEN_TRACE_CPU("BlockStore::Prune"); + ZEN_TRACE_CPU("BlockStore::CreateMissingBlocks"); RwLock::ExclusiveLockScope InsertLock(m_InsertLock); - tsl::robin_set<uint32_t> KnownBlocks; + tsl::robin_set<uint32_t> MissingBlocks; for (const auto& Entry : KnownLocations) { - KnownBlocks.insert(Entry.BlockIndex); - } - std::vector<uint32_t> BlocksToDelete; - for (auto It = m_ChunkBlocks.begin(); It != m_ChunkBlocks.end(); ++It) - { - uint32_t BlockIndex = It->first; - if (!KnownBlocks.contains(BlockIndex)) + if (auto It = m_ChunkBlocks.find(Entry.BlockIndex); It != m_ChunkBlocks.end() && !It->second.IsNull()) { - Ref<BlockStoreFile> BlockFile = m_ChunkBlocks[BlockIndex]; - BlocksToDelete.push_back(BlockIndex); + continue; } + MissingBlocks.insert(Entry.BlockIndex); } - - for (uint32_t BlockIndex : BlocksToDelete) + for (std::uint32_t BlockIndex : MissingBlocks) { - // Clear out unused blocks - Ref<BlockStoreFile> BlockFile = m_ChunkBlocks[BlockIndex]; - m_TotalSize.fetch_sub(BlockFile->FileSize(), std::memory_order::relaxed); - m_ChunkBlocks.erase(BlockIndex); - ZEN_DEBUG("marking block store file '{}' for delete, block #{}", BlockFile->GetPath(), BlockIndex); - BlockFile->MarkAsDeleteOnClose(); + std::filesystem::path BlockPath = GetBlockPath(m_BlocksBasePath, BlockIndex); + Ref<BlockStoreFile> NewBlockFile(new BlockStoreFile(BlockPath)); + NewBlockFile->Create(0); + m_ChunkBlocks[BlockIndex] = NewBlockFile; } } @@ -367,7 +361,10 @@ BlockStore::GetReclaimSnapshotState() { State.m_ActiveWriteBlocks.insert(m_WriteBlockIndex); } - State.BlockCount = m_ChunkBlocks.size(); + for (auto It : m_ChunkBlocks) + { + State.m_BlockIndexes.insert(It.first); + } return State; } @@ -421,11 +418,6 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, const ReclaimCallback& ChangeCallback, const ClaimDiskReserveCallback& DiskReserveCallback) { - if (ChunkLocations.empty()) - { - return; - } - ZEN_TRACE_CPU("BlockStore::ReclaimSpace"); uint64_t WriteBlockTimeUs = 0; @@ -460,7 +452,7 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, NiceBytes(OldTotalSize)); }); - size_t BlockCount = Snapshot.BlockCount; + size_t BlockCount = Snapshot.m_BlockIndexes.size(); if (BlockCount == 0) { ZEN_DEBUG("garbage collect for '{}' SKIPPED, no blocks to process", m_BlocksBasePath); @@ -487,6 +479,11 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, for (size_t Index = 0; Index < TotalChunkCount; ++Index) { const BlockStoreLocation& Location = ChunkLocations[Index]; + if (!Snapshot.m_BlockIndexes.contains(Location.BlockIndex)) + { + // We did not know about the block when we took the snapshot, don't touch it + continue; + } OldTotalSize += Location.Size; auto BlockIndexPtr = BlockIndexToChunkMapIndex.find(Location.BlockIndex); size_t ChunkMapIndex = 0; @@ -516,7 +513,7 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, DeleteCount++; } - tsl::robin_set<uint32_t> BlocksToReWrite; + std::vector<uint32_t> BlocksToReWrite; BlocksToReWrite.reserve(BlockIndexToChunkMapIndex.size()); for (const auto& Entry : BlockIndexToChunkMapIndex) { @@ -527,7 +524,33 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, { continue; } - BlocksToReWrite.insert(BlockIndex); + BlocksToReWrite.push_back(BlockIndex); + } + + { + // Any known block not referenced should be added as well + RwLock::SharedLockScope __(m_InsertLock); + for (std::uint32_t BlockIndex : Snapshot.m_BlockIndexes) + { + if (!m_ChunkBlocks.contains(BlockIndex)) + { + continue; + } + bool WasActiveWriteBlock = Snapshot.m_ActiveWriteBlocks.contains(BlockIndex); + if (WasActiveWriteBlock) + { + continue; + } + if (BlockIndexToChunkMapIndex.contains(BlockIndex)) + { + continue; + } + size_t ChunkMapIndex = ChunkMapIndex = BlockKeepChunks.size(); + BlockIndexToChunkMapIndex[BlockIndex] = ChunkMapIndex; + BlockKeepChunks.resize(ChunkMapIndex + 1); + BlockDeleteChunks.resize(ChunkMapIndex + 1); + BlocksToReWrite.push_back(BlockIndex); + } } if (DryRun) @@ -618,6 +641,17 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, BlockDeleteChunks[ChunkMapIndex].insert(BlockDeleteChunks[ChunkMapIndex].end(), KeepMap.begin(), KeepMap.end()); KeepMap.clear(); } + else if (OldBlockFile && (OldBlockFile->FileSize() == 0)) + { + // Block created to accommodate missing blocks + ZEN_WARN("Missing block {} in {} - backing data for locations is missing, marking {} entries as deleted.", + BlockIndex, + m_BlocksBasePath, + KeepMap.size()); + + BlockDeleteChunks[ChunkMapIndex].insert(BlockDeleteChunks[ChunkMapIndex].end(), KeepMap.begin(), KeepMap.end()); + KeepMap.clear(); + } MovedChunksArray MovedChunks; if (OldBlockFile) @@ -1141,15 +1175,30 @@ TEST_CASE("blockstore.clean.stray.blocks") BlockStoreLocation SecondChunkLocation = WriteStringAsChunk(Store, SecondChunkData, 4); std::string ThirdChunkData = "This is a much longer string that will not fit in the first block so it should be placed in the second block"; - WriteStringAsChunk(Store, ThirdChunkData, 4); + BlockStoreLocation ThirdChunkLocation = WriteStringAsChunk(Store, ThirdChunkData, 4); Store.Close(); - // Not referencing the second block means that we should be deleted Store.Initialize(RootDirectory / "store", 128, 1024); - Store.Prune({FirstChunkLocation, SecondChunkLocation}); - + CHECK(GetDirectoryContent(RootDirectory / "store", true, false).size() == 2); + IoBuffer ThirdChunk = Store.TryGetChunk(ThirdChunkLocation); + CHECK(ThirdChunk); + + // Reclaim space should delete unreferenced block + Store.ReclaimSpace(Store.GetReclaimSnapshotState(), {FirstChunkLocation, SecondChunkLocation}, {0, 1}, 4, false); + // Block lives on as long as we reference it via ThirdChunk + CHECK(GetDirectoryContent(RootDirectory / "store", true, false).size() == 2); + ThirdChunk = {}; CHECK(GetDirectoryContent(RootDirectory / "store", true, false).size() == 1); + ThirdChunk = Store.TryGetChunk(ThirdChunkLocation); + CHECK(!ThirdChunk); + + // Recreate a fake block for an missing chunk location + Store.CreateMissingBlocks({FirstChunkLocation, SecondChunkLocation, ThirdChunkLocation}); + // We create a fake block for the location - we should still not be able to get the chunk + CHECK(GetDirectoryContent(RootDirectory / "store", true, false).size() == 2); + ThirdChunk = Store.TryGetChunk(ThirdChunkLocation); + CHECK(!ThirdChunk); } TEST_CASE("blockstore.flush.force.new.block") @@ -1368,11 +1417,13 @@ TEST_CASE("blockstore.reclaim.space") } } - NewChunkLocations = ChunkLocations; - MovedChunkCount = 0; - DeletedChunkCount = 0; + // We need to take a new state since reclaim space add new block when compacting + BlockStore::ReclaimSnapshotState State2 = Store.GetReclaimSnapshotState(); + NewChunkLocations = ChunkLocations; + MovedChunkCount = 0; + DeletedChunkCount = 0; Store.ReclaimSpace( - State1, + State2, ChunkLocations, {}, Alignment, |