diff options
| author | Dan Engelbrecht <[email protected]> | 2023-11-21 15:06:25 +0100 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-11-21 15:06:25 +0100 |
| commit | 05178f7c18a48b21b9e260de282a86b91df26955 (patch) | |
| tree | 25f77af287730c6dbe8d655e0cb503f2652cbd36 /src/zenserver/cache/cachedisklayer.cpp | |
| parent | zen run command (#552) (diff) | |
| download | zen-05178f7c18a48b21b9e260de282a86b91df26955.tar.xz zen-05178f7c18a48b21b9e260de282a86b91df26955.zip | |
compact separate for gc referencer (#533)
- Refactor GCV2 so GcReferencer::RemoveExpiredData returns a store compactor, moving out the actual disk work from deleting items in the index.
- Refactor GCV2 GcResult to reuse GcCompactStoreStats and GcStats
- Make Compacting of stores non-parallell to not eat all the disk I/O when running GC
Diffstat (limited to 'src/zenserver/cache/cachedisklayer.cpp')
| -rw-r--r-- | src/zenserver/cache/cachedisklayer.cpp | 380 |
1 files changed, 231 insertions, 149 deletions
diff --git a/src/zenserver/cache/cachedisklayer.cpp b/src/zenserver/cache/cachedisklayer.cpp index afb974d76..32ef420d1 100644 --- a/src/zenserver/cache/cachedisklayer.cpp +++ b/src/zenserver/cache/cachedisklayer.cpp @@ -2351,12 +2351,212 @@ ZenCacheDiskLayer::CacheBucket::GetGcName(GcCtx&) return fmt::format("cachebucket:'{}'", m_BucketDir.string()); } -void -ZenCacheDiskLayer::CacheBucket::RemoveExpiredData(GcCtx& Ctx, GcReferencerStats& Stats) +class DiskBucketStoreCompactor : public GcStoreCompactor { - size_t TotalEntries = 0; - tsl::robin_set<IoHash, IoHash::Hasher> ExpiredInlineKeys; - std::vector<std::pair<IoHash, uint64_t>> ExpiredStandaloneKeys; +public: + DiskBucketStoreCompactor(ZenCacheDiskLayer::CacheBucket& Bucket, std::vector<std::pair<IoHash, uint64_t>>&& ExpiredStandaloneKeys) + : m_Bucket(Bucket) + , m_ExpiredStandaloneKeys(std::move(ExpiredStandaloneKeys)) + { + m_ExpiredStandaloneKeys.shrink_to_fit(); + } + + virtual ~DiskBucketStoreCompactor() {} + + virtual void CompactStore(GcCtx& Ctx, GcCompactStoreStats& Stats, const std::function<uint64_t()>& ClaimDiskReserveCallback) override + { + Stopwatch Timer; + const auto _ = MakeGuard([&] { + if (!Ctx.Settings.Verbose) + { + return; + } + ZEN_INFO("GCV2: cachebucket [COMPACT] '{}': RemovedDisk: {} in {}", + m_Bucket.m_BucketDir, + NiceBytes(Stats.RemovedDisk), + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); + + if (!m_ExpiredStandaloneKeys.empty()) + { + // Compact standalone items + size_t Skipped = 0; + ExtendablePathBuilder<256> Path; + for (const std::pair<IoHash, uint64_t>& ExpiredKey : m_ExpiredStandaloneKeys) + { + Path.Reset(); + m_Bucket.BuildPath(Path, ExpiredKey.first); + fs::path FilePath = Path.ToPath(); + + RwLock::SharedLockScope IndexLock(m_Bucket.m_IndexLock); + if (m_Bucket.m_Index.contains(ExpiredKey.first)) + { + // Someone added it back, let the file on disk be + ZEN_DEBUG("GCV2: cachebucket [COMPACT] '{}': skipping z$ delete standalone of file '{}' FAILED, it has been added back", + m_Bucket.m_BucketDir, + Path.ToUtf8()); + continue; + } + + if (Ctx.Settings.IsDeleteMode) + { + RwLock::ExclusiveLockScope ValueLock(m_Bucket.LockForHash(ExpiredKey.first)); + IndexLock.ReleaseNow(); + ZEN_DEBUG("GCV2: cachebucket [COMPACT] '{}': deleting standalone cache file '{}'", m_Bucket.m_BucketDir, Path.ToUtf8()); + + std::error_code Ec; + if (!fs::remove(FilePath, Ec)) + { + continue; + } + if (Ec) + { + ZEN_WARN("GCV2: cachebucket [COMPACT] '{}': delete expired z$ standalone file '{}' FAILED, reason: '{}'", + m_Bucket.m_BucketDir, + Path.ToUtf8(), + Ec.message()); + continue; + } + Stats.RemovedDisk += ExpiredKey.second; + } + else + { + std::error_code Ec; + bool Existed = std::filesystem::is_regular_file(FilePath, Ec); + if (Ec) + { + ZEN_WARN("GCV2: cachebucket [COMPACT] '{}': failed checking cache payload file '{}'. Reason '{}'", + m_Bucket.m_BucketDir, + FilePath, + Ec.message()); + continue; + } + if (!Existed) + { + continue; + } + Skipped++; + } + } + if (Skipped > 0) + { + ZEN_DEBUG("GCV2: cachebucket [COMPACT] '{}': skipped deleting of {} eligible files", m_Bucket.m_BucketDir, Skipped); + } + } + + if (Ctx.Settings.CollectSmallObjects) + { + std::unordered_map<uint32_t, uint64_t> BlockUsage; + { + for (const auto& Entry : m_Bucket.m_Index) + { + ZenCacheDiskLayer::CacheBucket::PayloadIndex Index = Entry.second; + const ZenCacheDiskLayer::CacheBucket::BucketPayload& Payload = m_Bucket.m_Payloads[Index]; + const DiskLocation& Loc = Payload.Location; + + if (Loc.IsFlagSet(DiskLocation::kStandaloneFile)) + { + continue; + } + uint32_t BlockIndex = Loc.Location.BlockLocation.GetBlockIndex(); + uint64_t ChunkSize = RoundUp(Loc.Size(), m_Bucket.m_Configuration.PayloadAlignment); + auto It = BlockUsage.find(BlockIndex); + if (It == BlockUsage.end()) + { + BlockUsage.insert_or_assign(BlockIndex, ChunkSize); + } + else + { + It->second += ChunkSize; + } + } + } + + { + BlockStoreCompactState BlockCompactState; + std::vector<IoHash> BlockCompactStateKeys; + + std::vector<uint32_t> BlocksToCompact = + m_Bucket.m_BlockStore.GetBlocksToCompact(BlockUsage, Ctx.Settings.CompactBlockUsageThresholdPercent); + BlockCompactState.IncludeBlocks(BlocksToCompact); + + { + RwLock::SharedLockScope __(m_Bucket.m_IndexLock); + for (const auto& Entry : m_Bucket.m_Index) + { + ZenCacheDiskLayer::CacheBucket::PayloadIndex Index = Entry.second; + const ZenCacheDiskLayer::CacheBucket::BucketPayload& Payload = m_Bucket.m_Payloads[Index]; + const DiskLocation& Loc = Payload.Location; + + if (Loc.IsFlagSet(DiskLocation::kStandaloneFile)) + { + continue; + } + if (!BlockCompactState.AddKeepLocation(Loc.GetBlockLocation(m_Bucket.m_Configuration.PayloadAlignment))) + { + continue; + } + BlockCompactStateKeys.push_back(Entry.first); + } + } + + if (Ctx.Settings.IsDeleteMode) + { + ZEN_DEBUG("GCV2: cachebucket [COMPACT] '{}': compacting {} blocks", m_Bucket.m_BucketDir, BlocksToCompact.size()); + + m_Bucket.m_BlockStore.CompactBlocks( + BlockCompactState, + m_Bucket.m_Configuration.PayloadAlignment, + [&](const BlockStore::MovedChunksArray& MovedArray, uint64_t FreedDiskSpace) { + std::vector<DiskIndexEntry> MovedEntries; + RwLock::ExclusiveLockScope _(m_Bucket.m_IndexLock); + for (const std::pair<size_t, BlockStoreLocation>& Moved : MovedArray) + { + size_t ChunkIndex = Moved.first; + const IoHash& Key = BlockCompactStateKeys[ChunkIndex]; + + if (auto It = m_Bucket.m_Index.find(Key); It != m_Bucket.m_Index.end()) + { + ZenCacheDiskLayer::CacheBucket::BucketPayload& Payload = m_Bucket.m_Payloads[It->second]; + const BlockStoreLocation& OldLocation = BlockCompactState.GetLocation(ChunkIndex); + if (Payload.Location.GetBlockLocation(m_Bucket.m_Configuration.PayloadAlignment) != OldLocation) + { + // Someone has moved our chunk so lets just skip the new location we were provided, it will be GC:d + // at a later time + continue; + } + const BlockStoreLocation& NewLocation = Moved.second; + + Payload.Location = + DiskLocation(NewLocation, m_Bucket.m_Configuration.PayloadAlignment, Payload.Location.GetFlags()); + MovedEntries.push_back({.Key = Key, .Location = Payload.Location}); + } + } + m_Bucket.m_SlogFile.Append(MovedEntries); + Stats.RemovedDisk += FreedDiskSpace; + }, + ClaimDiskReserveCallback); + } + else + { + ZEN_DEBUG("GCV2: cachebucket [COMPACT] '{}': skipped compacting of {} eligible blocks", + m_Bucket.m_BucketDir, + BlocksToCompact.size()); + } + } + } + m_ExpiredStandaloneKeys.clear(); + } + +private: + ZenCacheDiskLayer::CacheBucket& m_Bucket; + std::vector<std::pair<IoHash, uint64_t>> m_ExpiredStandaloneKeys; +}; + +GcStoreCompactor* +ZenCacheDiskLayer::CacheBucket::RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) +{ + size_t TotalEntries = 0; Stopwatch Timer; const auto _ = MakeGuard([&] { @@ -2364,37 +2564,30 @@ ZenCacheDiskLayer::CacheBucket::RemoveExpiredData(GcCtx& Ctx, GcReferencerStats& { return; } - ZEN_INFO("GCV2: cachebucket [REMOVE EXPIRED] '{}': Count: {}, Expired: {}, Deleted: {}, RemovedDisk: {}, RemovedMemory: {} in {}", + ZEN_INFO("GCV2: cachebucket [REMOVE EXPIRED] '{}': Count: {}, Expired: {}, Deleted: {}, FreedMemory: {} in {}", m_BucketDir, - Stats.Count, - Stats.Expired, - Stats.Deleted, - NiceBytes(Stats.RemovedDisk), - NiceBytes(Stats.RemovedMemory), + Stats.CheckedCount, + Stats.FoundCount, + Stats.DeletedCount, + NiceBytes(Stats.FreedMemory), NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); const GcClock::Tick ExpireTicks = Ctx.Settings.CacheExpireTime.time_since_epoch().count(); - BlockStoreCompactState BlockCompactState; - BlockStore::ReclaimSnapshotState BlockSnapshotState; - std::vector<IoHash> BlockCompactStateKeys; - std::vector<DiskIndexEntry> ExpiredEntries; - uint64_t RemovedStandaloneSize = 0; + std::vector<DiskIndexEntry> ExpiredEntries; + std::vector<std::pair<IoHash, uint64_t>> ExpiredStandaloneKeys; + uint64_t RemovedStandaloneSize = 0; { RwLock::ExclusiveLockScope IndexLock(m_IndexLock); - if (Ctx.Settings.CollectSmallObjects) - { - BlockSnapshotState = m_BlockStore.GetReclaimSnapshotState(); - } TotalEntries = m_Index.size(); - // Find out expired keys and affected blocks + // Find out expired keys for (const auto& Entry : m_Index) { - const IoHash& Key = Entry.first; - size_t EntryIndex = Entry.second; - GcClock::Tick AccessTime = m_AccessTimes[EntryIndex]; + const IoHash& Key = Entry.first; + ZenCacheDiskLayer::CacheBucket::PayloadIndex EntryIndex = Entry.second; + GcClock::Tick AccessTime = m_AccessTimes[EntryIndex]; if (AccessTime >= ExpireTicks) { continue; @@ -2412,41 +2605,12 @@ ZenCacheDiskLayer::CacheBucket::RemoveExpiredData(GcCtx& Ctx, GcReferencerStats& } else if (Ctx.Settings.CollectSmallObjects) { - ExpiredInlineKeys.insert(Key); - uint32_t BlockIndex = Payload.Location.Location.BlockLocation.GetBlockIndex(); - bool IsActiveWriteBlock = BlockSnapshotState.m_ActiveWriteBlocks.contains(BlockIndex); - if (!IsActiveWriteBlock) - { - BlockCompactState.IncludeBlock(BlockIndex); - } ExpiredEntries.push_back(ExpiredEntry); } } - Stats.Expired += ExpiredStandaloneKeys.size() + ExpiredInlineKeys.size(); - - // Get all locations we need to keep for affected blocks - if (Ctx.Settings.CollectSmallObjects && !ExpiredInlineKeys.empty()) - { - for (const auto& Entry : m_Index) - { - const IoHash& Key = Entry.first; - if (ExpiredInlineKeys.contains(Key)) - { - continue; - } - size_t EntryIndex = Entry.second; - const BucketPayload& Payload = m_Payloads[EntryIndex]; - if (Payload.Location.Flags & DiskLocation::kStandaloneFile) - { - continue; - } - if (BlockCompactState.AddKeepLocation(Payload.Location.GetBlockLocation(m_Configuration.PayloadAlignment))) - { - BlockCompactStateKeys.push_back(Key); - } - } - } + Stats.CheckedCount += TotalEntries; + Stats.FoundCount += ExpiredEntries.size(); if (Ctx.Settings.IsDeleteMode) { @@ -2456,112 +2620,30 @@ ZenCacheDiskLayer::CacheBucket::RemoveExpiredData(GcCtx& Ctx, GcReferencerStats& ZEN_ASSERT(It != m_Index.end()); BucketPayload& Payload = m_Payloads[It->second]; RemoveMetaData(Payload); - Stats.RemovedMemory += RemoveMemCachedData(Payload); + Stats.FreedMemory += RemoveMemCachedData(Payload); m_Index.erase(It); + Stats.DeletedCount++; } m_SlogFile.Append(ExpiredEntries); m_StandaloneSize.fetch_sub(RemovedStandaloneSize, std::memory_order::relaxed); } } - Stats.Count += TotalEntries; - - if (ExpiredEntries.empty()) - { - return; - } - if (!Ctx.Settings.IsDeleteMode) + if (!ExpiredEntries.empty()) { - return; - } - - Stats.Deleted += ExpiredEntries.size(); - - // Compact standalone items - ExtendablePathBuilder<256> Path; - for (const std::pair<IoHash, uint64_t>& ExpiredKey : ExpiredStandaloneKeys) - { - Path.Reset(); - BuildPath(Path, ExpiredKey.first); - fs::path FilePath = Path.ToPath(); - - RwLock::SharedLockScope IndexLock(m_IndexLock); - if (m_Index.contains(ExpiredKey.first)) - { - // Someone added it back, let the file on disk be - ZEN_DEBUG("gc cache bucket '{}': skipping z$ delete standalone of file '{}' FAILED, it has been added back", - m_BucketDir, - Path.ToUtf8()); - continue; - } - - RwLock::ExclusiveLockScope ValueLock(LockForHash(ExpiredKey.first)); - IndexLock.ReleaseNow(); - ZEN_DEBUG("gc cache bucket '{}': deleting standalone cache file '{}'", m_BucketDir, Path.ToUtf8()); - - std::error_code Ec; - if (!fs::remove(FilePath, Ec)) - { - continue; - } - if (Ec) + std::vector<BucketPayload> Payloads; + std::vector<AccessTime> AccessTimes; + std::vector<BucketMetaData> MetaDatas; + std::vector<IoBuffer> MemCachedPayloads; + std::vector<ReferenceIndex> FirstReferenceIndex; + IndexMap Index; { - ZEN_WARN("gc cache bucket '{}': delete expired z$ standalone file '{}' FAILED, reason: '{}'", - m_BucketDir, - Path.ToUtf8(), - Ec.message()); - continue; + RwLock::ExclusiveLockScope IndexLock(m_IndexLock); + CompactState(Payloads, AccessTimes, MetaDatas, MemCachedPayloads, FirstReferenceIndex, Index, IndexLock); } - Stats.RemovedDisk += ExpiredKey.second; } - if (Ctx.Settings.CollectSmallObjects && !ExpiredInlineKeys.empty()) - { - // Compact block store - m_BlockStore.CompactBlocks( - BlockCompactState, - m_Configuration.PayloadAlignment, - [&](const BlockStore::MovedChunksArray& MovedArray, uint64_t FreedDiskSpace) { - std::vector<DiskIndexEntry> MovedEntries; - RwLock::ExclusiveLockScope _(m_IndexLock); - for (const std::pair<size_t, BlockStoreLocation>& Moved : MovedArray) - { - size_t ChunkIndex = Moved.first; - const IoHash& Key = BlockCompactStateKeys[ChunkIndex]; - - if (auto It = m_Index.find(Key); It != m_Index.end()) - { - BucketPayload& Payload = m_Payloads[It->second]; - const BlockStoreLocation& OldLocation = BlockCompactState.GetLocation(ChunkIndex); - if (Payload.Location.GetBlockLocation(m_Configuration.PayloadAlignment) != OldLocation) - { - // Someone has moved our chunk so lets just skip the new location we were provided, it will be GC:d at a later - // time - continue; - } - - const BlockStoreLocation& NewLocation = Moved.second; - - Payload.Location = DiskLocation(NewLocation, m_Configuration.PayloadAlignment, Payload.Location.GetFlags()); - MovedEntries.push_back({.Key = Key, .Location = Payload.Location}); - } - } - m_SlogFile.Append(MovedEntries); - Stats.RemovedDisk += FreedDiskSpace; - }, - [&]() { return 0; }); - } - - std::vector<BucketPayload> Payloads; - std::vector<AccessTime> AccessTimes; - std::vector<BucketMetaData> MetaDatas; - std::vector<IoBuffer> MemCachedPayloads; - std::vector<ReferenceIndex> FirstReferenceIndex; - IndexMap Index; - { - RwLock::ExclusiveLockScope IndexLock(m_IndexLock); - CompactState(Payloads, AccessTimes, MetaDatas, MemCachedPayloads, FirstReferenceIndex, Index, IndexLock); - } + return new DiskBucketStoreCompactor(*this, std::move(ExpiredStandaloneKeys)); } class DiskBucketReferenceChecker : public GcReferenceChecker |