diff options
| author | Dan Engelbrecht <[email protected]> | 2023-11-21 15:06:25 +0100 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-11-21 15:06:25 +0100 |
| commit | 05178f7c18a48b21b9e260de282a86b91df26955 (patch) | |
| tree | 25f77af287730c6dbe8d655e0cb503f2652cbd36 /src/zenstore/blockstore.cpp | |
| parent | zen run command (#552) (diff) | |
| download | zen-05178f7c18a48b21b9e260de282a86b91df26955.tar.xz zen-05178f7c18a48b21b9e260de282a86b91df26955.zip | |
compact separate for gc referencer (#533)
- Refactor GCV2 so GcReferencer::RemoveExpiredData returns a store compactor, moving out the actual disk work from deleting items in the index.
- Refactor GCV2 GcResult to reuse GcCompactStoreStats and GcStats
- Make Compacting of stores non-parallell to not eat all the disk I/O when running GC
Diffstat (limited to 'src/zenstore/blockstore.cpp')
| -rw-r--r-- | src/zenstore/blockstore.cpp | 107 |
1 files changed, 86 insertions, 21 deletions
diff --git a/src/zenstore/blockstore.cpp b/src/zenstore/blockstore.cpp index 063d38707..ec299092d 100644 --- a/src/zenstore/blockstore.cpp +++ b/src/zenstore/blockstore.cpp @@ -15,6 +15,7 @@ ZEN_THIRD_PARTY_INCLUDES_START #include <tsl/robin_map.h> #include <tsl/robin_set.h> +#include <gsl/gsl-lite.hpp> ZEN_THIRD_PARTY_INCLUDES_END #if ZEN_WITH_TESTS @@ -267,6 +268,59 @@ BlockStore::SyncExistingBlocksOnDisk(const std::vector<BlockStoreLocation>& Know } } +std::vector<uint32_t> +BlockStore::GetBlocksToCompact(const std::unordered_map<uint32_t, uint64_t>& BlockUsage, uint32_t BlockUsageThresholdPercent) +{ + std::unordered_set<uint32_t> Result; + { + RwLock::SharedLockScope InsertLock(m_InsertLock); + for (const auto& It : m_ChunkBlocks) + { + uint32_t BlockIndex = It.first; + if (std::find(m_ActiveWriteBlocks.begin(), m_ActiveWriteBlocks.end(), BlockIndex) != m_ActiveWriteBlocks.end()) + { + continue; + } + uint64_t BlockSize = It.second ? It.second->FileSize() : 0u; + if (BlockSize == 0) + { + Result.insert(BlockIndex); + continue; + } + + uint64_t UsedSize = 0; + if (auto UsageIt = BlockUsage.find(BlockIndex); UsageIt != BlockUsage.end()) + { + UsedSize = UsageIt->second; + } + + if (BlockUsageThresholdPercent == 100) + { + if (UsedSize < BlockSize) + { + Result.insert(BlockIndex); + } + } + else if (BlockUsageThresholdPercent == 0) + { + if (UsedSize == 0) + { + Result.insert(BlockIndex); + } + } + else + { + const uint32_t UsedPercent = UsedSize < BlockSize ? gsl::narrow<uint32_t>((100 * UsedSize) / BlockSize) : 100u; + if (UsedPercent < BlockUsageThresholdPercent) + { + Result.insert(BlockIndex); + } + } + } + } + return std::vector<uint32_t>(Result.begin(), Result.end()); +} + void BlockStore::Close() { @@ -971,7 +1025,7 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, Stopwatch TotalTimer; const auto _ = MakeGuard([&] { - ZEN_DEBUG("compact blocks for '{}' DONE after {}, deleted {} and moved {} chunks ({}) ", + ZEN_DEBUG("Compact blocks for '{}' DONE after {}, deleted {} and moved {} chunks ({}) ", m_BlocksBasePath, NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs()), NiceBytes(DeletedSize), @@ -983,13 +1037,14 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, uint32_t NewBlockIndex = 0; MovedChunksArray MovedChunks; + uint64_t AddedSize = 0; uint64_t RemovedSize = 0; Ref<BlockStoreFile> NewBlockFile; auto NewBlockFileGuard = MakeGuard([&]() { if (NewBlockFile) { - ZEN_DEBUG("dropping incomplete cas block store file '{}'", NewBlockFile->GetPath()); + ZEN_DEBUG("Dropping incomplete cas block store file '{}'", NewBlockFile->GetPath()); { RwLock::ExclusiveLockScope _l(m_InsertLock); if (m_ChunkBlocks[NewBlockIndex] == NewBlockFile) @@ -1001,6 +1056,18 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, } }); + auto ReportChanges = [&]() { + if (!MovedChunks.empty() || RemovedSize > 0) + { + ChangeCallback(MovedChunks, RemovedSize > AddedSize ? RemovedSize - AddedSize : 0); + DeletedSize += RemovedSize; + RemovedSize = 0; + AddedSize = 0; + MovedCount += MovedChunks.size(); + MovedChunks.clear(); + } + }; + std::vector<uint32_t> RemovedBlocks; CompactState.IterateBlocks( @@ -1030,12 +1097,23 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, uint64_t OldBlockSize = OldBlockFile->FileSize(); - // TODO: Add heuristics for determining if it is worth to compact a block (if only a very small part is removed) - std::vector<uint8_t> Chunk; for (const size_t& ChunkIndex : KeepChunkIndexes) { const BlockStoreLocation ChunkLocation = ChunkLocations[ChunkIndex]; + if (ChunkLocation.Offset + ChunkLocation.Size > OldBlockSize) + { + ZEN_WARN( + "Compact Block skipping chunk outside of block range in '{}', Chunk start {}, Chunk size {} in Block {}, Block " + "size {}", + m_BlocksBasePath, + ChunkLocation.Offset, + ChunkLocation.Size, + OldBlockFile->GetPath(), + OldBlockSize); + continue; + } + Chunk.resize(ChunkLocation.Size); OldBlockFile->Read(Chunk.data(), Chunk.size(), ChunkLocation.Offset); @@ -1113,18 +1191,11 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, NewBlockFile->Write(Chunk.data(), Chunk.size(), WriteOffset); MovedChunks.push_back({ChunkIndex, {.BlockIndex = NewBlockIndex, .Offset = WriteOffset, .Size = Chunk.size()}}); WriteOffset = RoundUp(WriteOffset + Chunk.size(), PayloadAlignment); + AddedSize += Chunk.size(); } Chunk.clear(); - // Report what we have moved so we can purge the old block - if (!MovedChunks.empty() || RemovedSize > 0) - { - ChangeCallback(MovedChunks, RemovedSize); - DeletedSize += RemovedSize; - RemovedSize = 0; - MovedCount += MovedChunks.size(); - MovedChunks.clear(); - } + ReportChanges(); { RwLock::ExclusiveLockScope InsertLock(m_InsertLock); @@ -1135,6 +1206,7 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, RemovedSize += OldBlockSize; } }); + if (NewBlockFile) { NewBlockFile->Flush(); @@ -1142,14 +1214,7 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, NewBlockFile = nullptr; } - if (!MovedChunks.empty() || RemovedSize > 0) - { - ChangeCallback(MovedChunks, RemovedSize); - DeletedSize += RemovedSize; - RemovedSize = 0; - MovedCount += MovedChunks.size(); - MovedChunks.clear(); - } + ReportChanges(); } const char* |