diff options
| author | Dan Engelbrecht <[email protected]> | 2022-03-22 14:28:47 +0100 |
|---|---|---|
| committer | Dan Engelbrecht <[email protected]> | 2022-03-31 11:28:33 +0200 |
| commit | abb565d888cb2bb98f7919c8717f01f8418fd478 (patch) | |
| tree | 86c4e7e76e1fc41ac9e27e0ac37094d5543d0b95 /zenstore/compactcas.cpp | |
| parent | compactcas.threadedinsert test case (diff) | |
| download | zen-abb565d888cb2bb98f7919c8717f01f8418fd478.tar.xz zen-abb565d888cb2bb98f7919c8717f01f8418fd478.zip | |
Reduce lock contention when garbage collecting
Diffstat (limited to 'zenstore/compactcas.cpp')
| -rw-r--r-- | zenstore/compactcas.cpp | 125 |
1 files changed, 83 insertions, 42 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index 4a98fa87c..4138d12ce 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -526,62 +526,70 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) ZEN_INFO("collecting garbage from '{}'", m_Config.RootDirectory / m_ContainerBaseName); - std::unordered_map<uint64_t, size_t> BlockIndexToKeepChunksMap; + std::unordered_map<uint32_t, size_t> BlockIndexToChunkMapIndex; std::vector<std::unordered_map<IoHash, CasDiskLocation, IoHash::Hasher>> KeepChunks; + std::vector<std::unordered_set<IoHash, IoHash::Hasher>> DeleteChunks; std::vector<IoHash> DeletedChunks; + std::vector<IoHash> PendingDeletedChunks; std::unordered_set<uint32_t> BlocksToReWrite; + std::uint64_t CurrentWritePosition; + std::uint64_t CurrentWriteBlock; { - RwLock::ExclusiveLockScope _i(m_InsertLock); - RwLock::ExclusiveLockScope _l(m_LocationMapLock); + RwLock::SharedLockScope _l(m_LocationMapLock); - m_CasLog.Flush(); - if (auto WriteBlock = m_WriteBlock.lock()) - { - WriteBlock->Flush(); - } + const auto& LocationMap = m_LocationMap; - if (m_LocationMap.empty()) + if (LocationMap.empty()) { ZEN_INFO("garbage collect SKIPPED, for '{}', container is empty", m_Config.RootDirectory / m_ContainerBaseName); return; } - const uint64_t TotalChunkCount = m_LocationMap.size(); + const uint64_t TotalChunkCount = LocationMap.size(); BlocksToReWrite.reserve(m_ChunkBlocks.size()); - BlockIndexToKeepChunksMap.reserve(m_ChunkBlocks.size()); - KeepChunks.reserve(m_LocationMap.size()); - DeletedChunks.reserve(m_LocationMap.size()); + BlockIndexToChunkMapIndex.reserve(m_ChunkBlocks.size()); + KeepChunks.reserve(m_ChunkBlocks.size()); + DeleteChunks.reserve(m_ChunkBlocks.size()); + size_t GuesstimateCountPerBlock = TotalChunkCount / m_ChunkBlocks.size(); std::vector<IoHash> TotalChunkHashes; - TotalChunkHashes.reserve(m_LocationMap.size()); - for (const auto& Entry : m_LocationMap) + TotalChunkHashes.reserve(TotalChunkCount); + for (const auto& Entry : LocationMap) { TotalChunkHashes.push_back(Entry.first); const CasLocation Location = Entry.second.Get(m_PayloadAlignment); - if (BlockIndexToKeepChunksMap.contains(Location.BlockIndex)) + if (BlockIndexToChunkMapIndex.contains(Location.BlockIndex)) { continue; } - BlockIndexToKeepChunksMap[Location.BlockIndex] = KeepChunks.size(); + BlockIndexToChunkMapIndex[Location.BlockIndex] = KeepChunks.size(); KeepChunks.resize(KeepChunks.size() + 1); + KeepChunks.back().reserve(GuesstimateCountPerBlock); + DeleteChunks.resize(DeleteChunks.size() + 1); + DeleteChunks.back().reserve(GuesstimateCountPerBlock); } const bool PerformDelete = GcCtx.IsDeletionMode() && GcCtx.CollectSmallObjects(); + uint64_t DeleteCount = {}; uint64_t NewTotalSize = 0; GcCtx.FilterCas(TotalChunkHashes, [&](const IoHash& ChunkHash, bool Keep) { + auto KeyIt = LocationMap.find(ChunkHash); + const CasLocation ChunkLocation = KeyIt->second.Get(m_PayloadAlignment); + size_t ChunkMapIndex = BlockIndexToChunkMapIndex[ChunkLocation.BlockIndex]; if (Keep) { - auto KeyIt = m_LocationMap.find(ChunkHash); - const CasLocation ChunkLocation = KeyIt->second.Get(m_PayloadAlignment); - auto& ChunkMap = KeepChunks[BlockIndexToKeepChunksMap[ChunkLocation.BlockIndex]]; - ChunkMap[ChunkHash] = KeyIt->second; + auto& ChunkMap = KeepChunks[ChunkMapIndex]; + ChunkMap.emplace(ChunkHash, KeyIt->second); NewTotalSize += ChunkLocation.Size; } else { - DeletedChunks.push_back(ChunkHash); + auto& ChunkMap = DeleteChunks[ChunkMapIndex]; + ChunkMap.insert(ChunkHash); + DeleteCount++; + BlocksToReWrite.insert(ChunkLocation.BlockIndex); } }); @@ -590,28 +598,36 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) uint64_t TotalSize = m_TotalSize.load(); ZEN_INFO("garbage collect from '{}' DISABLED, found #{} {} chunks of total #{} {}", m_Config.RootDirectory / m_ContainerBaseName, - DeletedChunks.size(), + DeleteCount, NiceBytes(TotalSize - NewTotalSize), TotalChunkCount, NiceBytes(TotalSize)); return; } - for (const auto& ChunkHash : DeletedChunks) - { - auto KeyIt = m_LocationMap.find(ChunkHash); - const CasLocation& ChunkLocation = KeyIt->second.Get(m_PayloadAlignment); - BlocksToReWrite.insert(ChunkLocation.BlockIndex); - m_CasLog.Append({.Key = ChunkHash, .Location = KeyIt->second, .Flags = CasDiskIndexEntry::kTombstone}); - m_LocationMap.erase(ChunkHash); - m_TotalSize.fetch_sub(static_cast<uint64_t>(ChunkLocation.Size)); - } + DeletedChunks.reserve(DeleteCount); + CurrentWritePosition = m_CurrentInsertOffset; + CurrentWriteBlock = m_WriteBlockIndex.load(); + } + + { + RwLock::ExclusiveLockScope _i(m_InsertLock); + m_CasLog.Flush(); - // TODO: Be smarter about terminating current block - we should probably not rewrite if there is just - // a small amount of bytes to gain. - if (BlocksToReWrite.contains(m_WriteBlockIndex.load())) + // Did someone write into the current block while we released the m_InsertLock? + uint32_t WriteBlockIndex = m_WriteBlockIndex.load(); + if (BlocksToReWrite.contains(WriteBlockIndex)) { - m_WriteBlock.reset(); + // No - we can safely terminate appending to block and rewrite it + if (CurrentWritePosition == m_CurrentInsertOffset) + { + m_WriteBlock.reset(); + } + // Yes - don't touch the block! + else + { + BlocksToReWrite.erase(WriteBlockIndex); + } } } @@ -624,12 +640,24 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) for (auto BlockIndex : BlocksToReWrite) { - const auto& ChunkMap = KeepChunks[BlockIndexToKeepChunksMap[BlockIndex]]; - if (ChunkMap.empty()) + const size_t ChunkMapIndex = BlockIndexToChunkMapIndex[BlockIndex]; + const auto& KeepMap = KeepChunks[ChunkMapIndex]; + if (KeepMap.empty()) { std::shared_ptr<ChunkBlock> BlockFile; { RwLock::ExclusiveLockScope _i(m_LocationMapLock); + const auto& DeleteMap = DeleteChunks[ChunkMapIndex]; + for (const auto& ChunkHash : DeleteMap) + { + auto KeyIt = m_LocationMap.find(ChunkHash); + CHECK(KeyIt != m_LocationMap.end()); + m_LocationMap.erase(KeyIt); + const CasLocation& DeleteChunkLocation = KeyIt->second.Get(m_PayloadAlignment); + m_CasLog.Append({.Key = ChunkHash, .Location = KeyIt->second, .Flags = CasDiskIndexEntry::kTombstone}); + m_TotalSize.fetch_sub(static_cast<uint64_t>(DeleteChunkLocation.Size)); + } + DeletedChunks.insert(DeletedChunks.end(), DeleteMap.begin(), DeleteMap.end()); m_ChunkBlocks[BlockIndex].swap(BlockFile); } ZEN_DEBUG("marking cas store file for delete {}, block {}", m_ContainerBaseName, std::to_string(BlockIndex)); @@ -651,7 +679,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) { std::vector<uint8_t> Chunk; - for (auto& Entry : ChunkMap) + for (auto& Entry : KeepMap) { const CasLocation ChunkLocation = Entry.second.Get(m_PayloadAlignment); Chunk.resize(ChunkLocation.Size); @@ -686,8 +714,8 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) { NextBlockIndex = (NextBlockIndex + 1) & CasDiskLocation::MaxBlockIndex; } - auto NewBlockPath = BuildUcasPath(m_BlocksBasePath, NextBlockIndex); - NewBlockFile = std::make_shared<ChunkBlock>(NewBlockPath); + auto NewBlockPath = BuildUcasPath(m_BlocksBasePath, NextBlockIndex); + NewBlockFile = std::make_shared<ChunkBlock>(NewBlockPath); m_ChunkBlocks[NextBlockIndex] = NewBlockFile; } @@ -732,7 +760,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) NewBlockFile->Write(Chunk.data(), Chunk.size(), WriteOffset); CasLocation NewChunkLocation(NewBlockIndex, WriteOffset, Chunk.size()); MovedBlocks.emplace(Entry.first, CasDiskLocation(NewChunkLocation, m_PayloadAlignment)); - WriteOffset = RoundUp(WriteOffset + Chunk.size(), m_PayloadAlignment); + WriteOffset = RoundUp(WriteOffset + Chunk.size(), m_PayloadAlignment); } Chunk.clear(); @@ -745,6 +773,19 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) m_LocationMap[MovedEntry.first] = MovedEntry.second; m_CasLog.Append({.Key = MovedEntry.first, .Location = MovedEntry.second}); } + + const auto& DeleteMap = DeleteChunks[ChunkMapIndex]; + for (const auto& ChunkHash : DeleteMap) + { + auto KeyIt = m_LocationMap.find(ChunkHash); + CHECK(KeyIt != m_LocationMap.end()); + + m_LocationMap.erase(KeyIt); + const CasLocation& DeleteChunkLocation = KeyIt->second.Get(m_PayloadAlignment); + m_CasLog.Append({.Key = ChunkHash, .Location = KeyIt->second, .Flags = CasDiskIndexEntry::kTombstone}); + m_TotalSize.fetch_sub(static_cast<uint64_t>(DeleteChunkLocation.Size)); + } + DeletedChunks.insert(DeletedChunks.end(), DeleteMap.begin(), DeleteMap.end()); } ZEN_DEBUG("marking cas store file for delete {}, block index {}", m_ContainerBaseName, BlockIndex); std::error_code Ec; |