diff options
| author | Dan Engelbrecht <[email protected]> | 2023-11-21 15:06:25 +0100 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-11-21 15:06:25 +0100 |
| commit | 05178f7c18a48b21b9e260de282a86b91df26955 (patch) | |
| tree | 25f77af287730c6dbe8d655e0cb503f2652cbd36 /src | |
| parent | zen run command (#552) (diff) | |
| download | zen-05178f7c18a48b21b9e260de282a86b91df26955.tar.xz zen-05178f7c18a48b21b9e260de282a86b91df26955.zip | |
compact separate for gc referencer (#533)
- Refactor GCV2 so GcReferencer::RemoveExpiredData returns a store compactor, moving out the actual disk work from deleting items in the index.
- Refactor GCV2 GcResult to reuse GcCompactStoreStats and GcStats
- Make Compacting of stores non-parallell to not eat all the disk I/O when running GC
Diffstat (limited to 'src')
| -rw-r--r-- | src/zencore/include/zencore/compactbinarybuilder.h | 18 | ||||
| -rw-r--r-- | src/zenserver/admin/admin.cpp | 26 | ||||
| -rw-r--r-- | src/zenserver/cache/cachedisklayer.cpp | 380 | ||||
| -rw-r--r-- | src/zenserver/cache/cachedisklayer.h | 4 | ||||
| -rw-r--r-- | src/zenserver/cache/structuredcachestore.cpp | 228 | ||||
| -rw-r--r-- | src/zenserver/projectstore/projectstore.cpp | 283 | ||||
| -rw-r--r-- | src/zenserver/projectstore/projectstore.h | 7 | ||||
| -rw-r--r-- | src/zenstore/blockstore.cpp | 107 | ||||
| -rw-r--r-- | src/zenstore/compactcas.cpp | 219 | ||||
| -rw-r--r-- | src/zenstore/filecas.cpp | 113 | ||||
| -rw-r--r-- | src/zenstore/gc.cpp | 821 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/blockstore.h | 22 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/gc.h | 84 |
13 files changed, 1321 insertions, 991 deletions
diff --git a/src/zencore/include/zencore/compactbinarybuilder.h b/src/zencore/include/zencore/compactbinarybuilder.h index 9cb8f7193..dcb767d96 100644 --- a/src/zencore/include/zencore/compactbinarybuilder.h +++ b/src/zencore/include/zencore/compactbinarybuilder.h @@ -655,6 +655,24 @@ operator<<(CbWriter& Writer, const Oid& Value) ZENCORE_API CbWriter& operator<<(CbWriter& Writer, DateTime Value); ZENCORE_API CbWriter& operator<<(CbWriter& Writer, TimeSpan Value); +ZENCORE_API inline TimeSpan +ToTimeSpan(std::chrono::seconds Secs) +{ + return TimeSpan(0, 0, gsl::narrow<int>(Secs.count())); +}; +ZENCORE_API inline TimeSpan +ToTimeSpan(std::chrono::milliseconds MS) +{ + return TimeSpan(MS.count() * TimeSpan::TicksPerMillisecond); +} +ZENCORE_API inline DateTime +ToDateTime(std::chrono::system_clock::time_point TimePoint) +{ + time_t Time = std::chrono::system_clock::to_time_t(TimePoint); + tm UTCTime = *gmtime(&Time); + return DateTime(1900 + UTCTime.tm_year, UTCTime.tm_mon, UTCTime.tm_mday, UTCTime.tm_hour, UTCTime.tm_min, UTCTime.tm_sec); +} + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void usonbuilder_forcelink(); // internal diff --git a/src/zenserver/admin/admin.cpp b/src/zenserver/admin/admin.cpp index d4c69f41b..0b302c36e 100644 --- a/src/zenserver/admin/admin.cpp +++ b/src/zenserver/admin/admin.cpp @@ -204,25 +204,21 @@ HttpAdminService::HttpAdminService(GcScheduler& Scheduler, Details = true; } - auto SecondsToString = [](std::chrono::seconds Secs) { - return NiceTimeSpanMs(uint64_t(std::chrono::milliseconds(Secs).count())); - }; - CbObjectWriter Response; Response << "Status"sv << (GcSchedulerStatus::kIdle == State.Status ? "Idle"sv : "Running"sv); Response.BeginObject("Config"); { Response << "RootDirectory" << State.Config.RootDirectory.string(); - Response << "MonitorInterval" << SecondsToString(State.Config.MonitorInterval); - Response << "Interval" << SecondsToString(State.Config.Interval); - Response << "MaxCacheDuration" << SecondsToString(State.Config.MaxCacheDuration); - Response << "MaxProjectStoreDuration" << SecondsToString(State.Config.MaxProjectStoreDuration); + Response << "MonitorInterval" << ToTimeSpan(State.Config.MonitorInterval); + Response << "Interval" << ToTimeSpan(State.Config.Interval); + Response << "MaxCacheDuration" << ToTimeSpan(State.Config.MaxCacheDuration); + Response << "MaxProjectStoreDuration" << ToTimeSpan(State.Config.MaxProjectStoreDuration); Response << "CollectSmallObjects" << State.Config.CollectSmallObjects; Response << "Enabled" << State.Config.Enabled; Response << "DiskReserveSize" << NiceBytes(State.Config.DiskReserveSize); Response << "DiskSizeSoftLimit" << NiceBytes(State.Config.DiskSizeSoftLimit); Response << "MinimumFreeDiskSpaceToAllowWrites" << NiceBytes(State.Config.MinimumFreeDiskSpaceToAllowWrites); - Response << "LightweightInterval" << SecondsToString(State.Config.LightweightInterval); + Response << "LightweightInterval" << ToTimeSpan(State.Config.LightweightInterval); } Response.EndObject(); Response << "AreDiskWritesBlocked" << State.AreDiskWritesBlocked; @@ -233,8 +229,8 @@ HttpAdminService::HttpAdminService(GcScheduler& Scheduler, Response.BeginObject("FullGC"); { - Response << "LastTime" << fmt::format("{}", State.LastFullGcTime); - Response << "TimeToNext" << SecondsToString(State.RemainingTimeUntilFullGc); + Response << "LastTime" << ToDateTime(State.LastFullGcTime); + Response << "TimeToNext" << ToTimeSpan(State.RemainingTimeUntilFullGc); if (State.Config.DiskSizeSoftLimit != 0) { Response << "SpaceToNext" << NiceBytes(State.RemainingSpaceUntilFullGC); @@ -246,7 +242,7 @@ HttpAdminService::HttpAdminService(GcScheduler& Scheduler, } else { - Response << "LastDuration" << NiceTimeSpanMs(State.LastFullGcDuration.count()); + Response << "LastDuration" << ToTimeSpan(State.LastFullGcDuration); Response << "LastDiskFreed" << NiceBytes(State.LastFullGCDiff.DiskSize); Response << "LastMemoryFreed" << NiceBytes(State.LastFullGCDiff.MemorySize); } @@ -254,8 +250,8 @@ HttpAdminService::HttpAdminService(GcScheduler& Scheduler, Response.EndObject(); Response.BeginObject("LightweightGC"); { - Response << "LastTime" << fmt::format("{}", State.LastLightweightGcTime); - Response << "TimeToNext" << SecondsToString(State.RemainingTimeUntilLightweightGc); + Response << "LastTime" << ToDateTime(State.LastLightweightGcTime); + Response << "TimeToNext" << ToTimeSpan(State.RemainingTimeUntilLightweightGc); if (State.LastLightweightGCV2Result) { @@ -264,7 +260,7 @@ HttpAdminService::HttpAdminService(GcScheduler& Scheduler, } else { - Response << "LastDuration" << NiceTimeSpanMs(State.LastLightweightGcDuration.count()); + Response << "LastDuration" << ToTimeSpan(State.LastLightweightGcDuration); Response << "LastDiskFreed" << NiceBytes(State.LastLightweightGCDiff.DiskSize); Response << "LastMemoryFreed" << NiceBytes(State.LastLightweightGCDiff.MemorySize); } diff --git a/src/zenserver/cache/cachedisklayer.cpp b/src/zenserver/cache/cachedisklayer.cpp index afb974d76..32ef420d1 100644 --- a/src/zenserver/cache/cachedisklayer.cpp +++ b/src/zenserver/cache/cachedisklayer.cpp @@ -2351,12 +2351,212 @@ ZenCacheDiskLayer::CacheBucket::GetGcName(GcCtx&) return fmt::format("cachebucket:'{}'", m_BucketDir.string()); } -void -ZenCacheDiskLayer::CacheBucket::RemoveExpiredData(GcCtx& Ctx, GcReferencerStats& Stats) +class DiskBucketStoreCompactor : public GcStoreCompactor { - size_t TotalEntries = 0; - tsl::robin_set<IoHash, IoHash::Hasher> ExpiredInlineKeys; - std::vector<std::pair<IoHash, uint64_t>> ExpiredStandaloneKeys; +public: + DiskBucketStoreCompactor(ZenCacheDiskLayer::CacheBucket& Bucket, std::vector<std::pair<IoHash, uint64_t>>&& ExpiredStandaloneKeys) + : m_Bucket(Bucket) + , m_ExpiredStandaloneKeys(std::move(ExpiredStandaloneKeys)) + { + m_ExpiredStandaloneKeys.shrink_to_fit(); + } + + virtual ~DiskBucketStoreCompactor() {} + + virtual void CompactStore(GcCtx& Ctx, GcCompactStoreStats& Stats, const std::function<uint64_t()>& ClaimDiskReserveCallback) override + { + Stopwatch Timer; + const auto _ = MakeGuard([&] { + if (!Ctx.Settings.Verbose) + { + return; + } + ZEN_INFO("GCV2: cachebucket [COMPACT] '{}': RemovedDisk: {} in {}", + m_Bucket.m_BucketDir, + NiceBytes(Stats.RemovedDisk), + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); + + if (!m_ExpiredStandaloneKeys.empty()) + { + // Compact standalone items + size_t Skipped = 0; + ExtendablePathBuilder<256> Path; + for (const std::pair<IoHash, uint64_t>& ExpiredKey : m_ExpiredStandaloneKeys) + { + Path.Reset(); + m_Bucket.BuildPath(Path, ExpiredKey.first); + fs::path FilePath = Path.ToPath(); + + RwLock::SharedLockScope IndexLock(m_Bucket.m_IndexLock); + if (m_Bucket.m_Index.contains(ExpiredKey.first)) + { + // Someone added it back, let the file on disk be + ZEN_DEBUG("GCV2: cachebucket [COMPACT] '{}': skipping z$ delete standalone of file '{}' FAILED, it has been added back", + m_Bucket.m_BucketDir, + Path.ToUtf8()); + continue; + } + + if (Ctx.Settings.IsDeleteMode) + { + RwLock::ExclusiveLockScope ValueLock(m_Bucket.LockForHash(ExpiredKey.first)); + IndexLock.ReleaseNow(); + ZEN_DEBUG("GCV2: cachebucket [COMPACT] '{}': deleting standalone cache file '{}'", m_Bucket.m_BucketDir, Path.ToUtf8()); + + std::error_code Ec; + if (!fs::remove(FilePath, Ec)) + { + continue; + } + if (Ec) + { + ZEN_WARN("GCV2: cachebucket [COMPACT] '{}': delete expired z$ standalone file '{}' FAILED, reason: '{}'", + m_Bucket.m_BucketDir, + Path.ToUtf8(), + Ec.message()); + continue; + } + Stats.RemovedDisk += ExpiredKey.second; + } + else + { + std::error_code Ec; + bool Existed = std::filesystem::is_regular_file(FilePath, Ec); + if (Ec) + { + ZEN_WARN("GCV2: cachebucket [COMPACT] '{}': failed checking cache payload file '{}'. Reason '{}'", + m_Bucket.m_BucketDir, + FilePath, + Ec.message()); + continue; + } + if (!Existed) + { + continue; + } + Skipped++; + } + } + if (Skipped > 0) + { + ZEN_DEBUG("GCV2: cachebucket [COMPACT] '{}': skipped deleting of {} eligible files", m_Bucket.m_BucketDir, Skipped); + } + } + + if (Ctx.Settings.CollectSmallObjects) + { + std::unordered_map<uint32_t, uint64_t> BlockUsage; + { + for (const auto& Entry : m_Bucket.m_Index) + { + ZenCacheDiskLayer::CacheBucket::PayloadIndex Index = Entry.second; + const ZenCacheDiskLayer::CacheBucket::BucketPayload& Payload = m_Bucket.m_Payloads[Index]; + const DiskLocation& Loc = Payload.Location; + + if (Loc.IsFlagSet(DiskLocation::kStandaloneFile)) + { + continue; + } + uint32_t BlockIndex = Loc.Location.BlockLocation.GetBlockIndex(); + uint64_t ChunkSize = RoundUp(Loc.Size(), m_Bucket.m_Configuration.PayloadAlignment); + auto It = BlockUsage.find(BlockIndex); + if (It == BlockUsage.end()) + { + BlockUsage.insert_or_assign(BlockIndex, ChunkSize); + } + else + { + It->second += ChunkSize; + } + } + } + + { + BlockStoreCompactState BlockCompactState; + std::vector<IoHash> BlockCompactStateKeys; + + std::vector<uint32_t> BlocksToCompact = + m_Bucket.m_BlockStore.GetBlocksToCompact(BlockUsage, Ctx.Settings.CompactBlockUsageThresholdPercent); + BlockCompactState.IncludeBlocks(BlocksToCompact); + + { + RwLock::SharedLockScope __(m_Bucket.m_IndexLock); + for (const auto& Entry : m_Bucket.m_Index) + { + ZenCacheDiskLayer::CacheBucket::PayloadIndex Index = Entry.second; + const ZenCacheDiskLayer::CacheBucket::BucketPayload& Payload = m_Bucket.m_Payloads[Index]; + const DiskLocation& Loc = Payload.Location; + + if (Loc.IsFlagSet(DiskLocation::kStandaloneFile)) + { + continue; + } + if (!BlockCompactState.AddKeepLocation(Loc.GetBlockLocation(m_Bucket.m_Configuration.PayloadAlignment))) + { + continue; + } + BlockCompactStateKeys.push_back(Entry.first); + } + } + + if (Ctx.Settings.IsDeleteMode) + { + ZEN_DEBUG("GCV2: cachebucket [COMPACT] '{}': compacting {} blocks", m_Bucket.m_BucketDir, BlocksToCompact.size()); + + m_Bucket.m_BlockStore.CompactBlocks( + BlockCompactState, + m_Bucket.m_Configuration.PayloadAlignment, + [&](const BlockStore::MovedChunksArray& MovedArray, uint64_t FreedDiskSpace) { + std::vector<DiskIndexEntry> MovedEntries; + RwLock::ExclusiveLockScope _(m_Bucket.m_IndexLock); + for (const std::pair<size_t, BlockStoreLocation>& Moved : MovedArray) + { + size_t ChunkIndex = Moved.first; + const IoHash& Key = BlockCompactStateKeys[ChunkIndex]; + + if (auto It = m_Bucket.m_Index.find(Key); It != m_Bucket.m_Index.end()) + { + ZenCacheDiskLayer::CacheBucket::BucketPayload& Payload = m_Bucket.m_Payloads[It->second]; + const BlockStoreLocation& OldLocation = BlockCompactState.GetLocation(ChunkIndex); + if (Payload.Location.GetBlockLocation(m_Bucket.m_Configuration.PayloadAlignment) != OldLocation) + { + // Someone has moved our chunk so lets just skip the new location we were provided, it will be GC:d + // at a later time + continue; + } + const BlockStoreLocation& NewLocation = Moved.second; + + Payload.Location = + DiskLocation(NewLocation, m_Bucket.m_Configuration.PayloadAlignment, Payload.Location.GetFlags()); + MovedEntries.push_back({.Key = Key, .Location = Payload.Location}); + } + } + m_Bucket.m_SlogFile.Append(MovedEntries); + Stats.RemovedDisk += FreedDiskSpace; + }, + ClaimDiskReserveCallback); + } + else + { + ZEN_DEBUG("GCV2: cachebucket [COMPACT] '{}': skipped compacting of {} eligible blocks", + m_Bucket.m_BucketDir, + BlocksToCompact.size()); + } + } + } + m_ExpiredStandaloneKeys.clear(); + } + +private: + ZenCacheDiskLayer::CacheBucket& m_Bucket; + std::vector<std::pair<IoHash, uint64_t>> m_ExpiredStandaloneKeys; +}; + +GcStoreCompactor* +ZenCacheDiskLayer::CacheBucket::RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) +{ + size_t TotalEntries = 0; Stopwatch Timer; const auto _ = MakeGuard([&] { @@ -2364,37 +2564,30 @@ ZenCacheDiskLayer::CacheBucket::RemoveExpiredData(GcCtx& Ctx, GcReferencerStats& { return; } - ZEN_INFO("GCV2: cachebucket [REMOVE EXPIRED] '{}': Count: {}, Expired: {}, Deleted: {}, RemovedDisk: {}, RemovedMemory: {} in {}", + ZEN_INFO("GCV2: cachebucket [REMOVE EXPIRED] '{}': Count: {}, Expired: {}, Deleted: {}, FreedMemory: {} in {}", m_BucketDir, - Stats.Count, - Stats.Expired, - Stats.Deleted, - NiceBytes(Stats.RemovedDisk), - NiceBytes(Stats.RemovedMemory), + Stats.CheckedCount, + Stats.FoundCount, + Stats.DeletedCount, + NiceBytes(Stats.FreedMemory), NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); const GcClock::Tick ExpireTicks = Ctx.Settings.CacheExpireTime.time_since_epoch().count(); - BlockStoreCompactState BlockCompactState; - BlockStore::ReclaimSnapshotState BlockSnapshotState; - std::vector<IoHash> BlockCompactStateKeys; - std::vector<DiskIndexEntry> ExpiredEntries; - uint64_t RemovedStandaloneSize = 0; + std::vector<DiskIndexEntry> ExpiredEntries; + std::vector<std::pair<IoHash, uint64_t>> ExpiredStandaloneKeys; + uint64_t RemovedStandaloneSize = 0; { RwLock::ExclusiveLockScope IndexLock(m_IndexLock); - if (Ctx.Settings.CollectSmallObjects) - { - BlockSnapshotState = m_BlockStore.GetReclaimSnapshotState(); - } TotalEntries = m_Index.size(); - // Find out expired keys and affected blocks + // Find out expired keys for (const auto& Entry : m_Index) { - const IoHash& Key = Entry.first; - size_t EntryIndex = Entry.second; - GcClock::Tick AccessTime = m_AccessTimes[EntryIndex]; + const IoHash& Key = Entry.first; + ZenCacheDiskLayer::CacheBucket::PayloadIndex EntryIndex = Entry.second; + GcClock::Tick AccessTime = m_AccessTimes[EntryIndex]; if (AccessTime >= ExpireTicks) { continue; @@ -2412,41 +2605,12 @@ ZenCacheDiskLayer::CacheBucket::RemoveExpiredData(GcCtx& Ctx, GcReferencerStats& } else if (Ctx.Settings.CollectSmallObjects) { - ExpiredInlineKeys.insert(Key); - uint32_t BlockIndex = Payload.Location.Location.BlockLocation.GetBlockIndex(); - bool IsActiveWriteBlock = BlockSnapshotState.m_ActiveWriteBlocks.contains(BlockIndex); - if (!IsActiveWriteBlock) - { - BlockCompactState.IncludeBlock(BlockIndex); - } ExpiredEntries.push_back(ExpiredEntry); } } - Stats.Expired += ExpiredStandaloneKeys.size() + ExpiredInlineKeys.size(); - - // Get all locations we need to keep for affected blocks - if (Ctx.Settings.CollectSmallObjects && !ExpiredInlineKeys.empty()) - { - for (const auto& Entry : m_Index) - { - const IoHash& Key = Entry.first; - if (ExpiredInlineKeys.contains(Key)) - { - continue; - } - size_t EntryIndex = Entry.second; - const BucketPayload& Payload = m_Payloads[EntryIndex]; - if (Payload.Location.Flags & DiskLocation::kStandaloneFile) - { - continue; - } - if (BlockCompactState.AddKeepLocation(Payload.Location.GetBlockLocation(m_Configuration.PayloadAlignment))) - { - BlockCompactStateKeys.push_back(Key); - } - } - } + Stats.CheckedCount += TotalEntries; + Stats.FoundCount += ExpiredEntries.size(); if (Ctx.Settings.IsDeleteMode) { @@ -2456,112 +2620,30 @@ ZenCacheDiskLayer::CacheBucket::RemoveExpiredData(GcCtx& Ctx, GcReferencerStats& ZEN_ASSERT(It != m_Index.end()); BucketPayload& Payload = m_Payloads[It->second]; RemoveMetaData(Payload); - Stats.RemovedMemory += RemoveMemCachedData(Payload); + Stats.FreedMemory += RemoveMemCachedData(Payload); m_Index.erase(It); + Stats.DeletedCount++; } m_SlogFile.Append(ExpiredEntries); m_StandaloneSize.fetch_sub(RemovedStandaloneSize, std::memory_order::relaxed); } } - Stats.Count += TotalEntries; - - if (ExpiredEntries.empty()) - { - return; - } - if (!Ctx.Settings.IsDeleteMode) + if (!ExpiredEntries.empty()) { - return; - } - - Stats.Deleted += ExpiredEntries.size(); - - // Compact standalone items - ExtendablePathBuilder<256> Path; - for (const std::pair<IoHash, uint64_t>& ExpiredKey : ExpiredStandaloneKeys) - { - Path.Reset(); - BuildPath(Path, ExpiredKey.first); - fs::path FilePath = Path.ToPath(); - - RwLock::SharedLockScope IndexLock(m_IndexLock); - if (m_Index.contains(ExpiredKey.first)) - { - // Someone added it back, let the file on disk be - ZEN_DEBUG("gc cache bucket '{}': skipping z$ delete standalone of file '{}' FAILED, it has been added back", - m_BucketDir, - Path.ToUtf8()); - continue; - } - - RwLock::ExclusiveLockScope ValueLock(LockForHash(ExpiredKey.first)); - IndexLock.ReleaseNow(); - ZEN_DEBUG("gc cache bucket '{}': deleting standalone cache file '{}'", m_BucketDir, Path.ToUtf8()); - - std::error_code Ec; - if (!fs::remove(FilePath, Ec)) - { - continue; - } - if (Ec) + std::vector<BucketPayload> Payloads; + std::vector<AccessTime> AccessTimes; + std::vector<BucketMetaData> MetaDatas; + std::vector<IoBuffer> MemCachedPayloads; + std::vector<ReferenceIndex> FirstReferenceIndex; + IndexMap Index; { - ZEN_WARN("gc cache bucket '{}': delete expired z$ standalone file '{}' FAILED, reason: '{}'", - m_BucketDir, - Path.ToUtf8(), - Ec.message()); - continue; + RwLock::ExclusiveLockScope IndexLock(m_IndexLock); + CompactState(Payloads, AccessTimes, MetaDatas, MemCachedPayloads, FirstReferenceIndex, Index, IndexLock); } - Stats.RemovedDisk += ExpiredKey.second; } - if (Ctx.Settings.CollectSmallObjects && !ExpiredInlineKeys.empty()) - { - // Compact block store - m_BlockStore.CompactBlocks( - BlockCompactState, - m_Configuration.PayloadAlignment, - [&](const BlockStore::MovedChunksArray& MovedArray, uint64_t FreedDiskSpace) { - std::vector<DiskIndexEntry> MovedEntries; - RwLock::ExclusiveLockScope _(m_IndexLock); - for (const std::pair<size_t, BlockStoreLocation>& Moved : MovedArray) - { - size_t ChunkIndex = Moved.first; - const IoHash& Key = BlockCompactStateKeys[ChunkIndex]; - - if (auto It = m_Index.find(Key); It != m_Index.end()) - { - BucketPayload& Payload = m_Payloads[It->second]; - const BlockStoreLocation& OldLocation = BlockCompactState.GetLocation(ChunkIndex); - if (Payload.Location.GetBlockLocation(m_Configuration.PayloadAlignment) != OldLocation) - { - // Someone has moved our chunk so lets just skip the new location we were provided, it will be GC:d at a later - // time - continue; - } - - const BlockStoreLocation& NewLocation = Moved.second; - - Payload.Location = DiskLocation(NewLocation, m_Configuration.PayloadAlignment, Payload.Location.GetFlags()); - MovedEntries.push_back({.Key = Key, .Location = Payload.Location}); - } - } - m_SlogFile.Append(MovedEntries); - Stats.RemovedDisk += FreedDiskSpace; - }, - [&]() { return 0; }); - } - - std::vector<BucketPayload> Payloads; - std::vector<AccessTime> AccessTimes; - std::vector<BucketMetaData> MetaDatas; - std::vector<IoBuffer> MemCachedPayloads; - std::vector<ReferenceIndex> FirstReferenceIndex; - IndexMap Index; - { - RwLock::ExclusiveLockScope IndexLock(m_IndexLock); - CompactState(Payloads, AccessTimes, MetaDatas, MemCachedPayloads, FirstReferenceIndex, Index, IndexLock); - } + return new DiskBucketStoreCompactor(*this, std::move(ExpiredStandaloneKeys)); } class DiskBucketReferenceChecker : public GcReferenceChecker diff --git a/src/zenserver/cache/cachedisklayer.h b/src/zenserver/cache/cachedisklayer.h index d9884a7bc..2986cedf8 100644 --- a/src/zenserver/cache/cachedisklayer.h +++ b/src/zenserver/cache/cachedisklayer.h @@ -334,7 +334,7 @@ private: std::atomic_uint64_t m_MemCachedSize{}; virtual std::string GetGcName(GcCtx& Ctx) override; - virtual void RemoveExpiredData(GcCtx& Ctx, GcReferencerStats& Stats) override; + virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) override; virtual std::vector<GcReferenceChecker*> CreateReferenceCheckers(GcCtx& Ctx) override; void BuildPath(PathBuilderBase& Path, const IoHash& HashKey) const; @@ -403,6 +403,7 @@ private: inline RwLock& LockForHash(const IoHash& Hash) const { return m_ShardedLocks[Hash.Hash[19]]; } friend class DiskBucketReferenceChecker; + friend class DiskBucketStoreCompactor; }; inline void TryMemCacheTrim() @@ -438,6 +439,7 @@ private: ZenCacheDiskLayer(const ZenCacheDiskLayer&) = delete; ZenCacheDiskLayer& operator=(const ZenCacheDiskLayer&) = delete; + friend class DiskBucketStoreCompactor; friend class DiskBucketReferenceChecker; }; diff --git a/src/zenserver/cache/structuredcachestore.cpp b/src/zenserver/cache/structuredcachestore.cpp index cc6fefc76..25dfd103d 100644 --- a/src/zenserver/cache/structuredcachestore.cpp +++ b/src/zenserver/cache/structuredcachestore.cpp @@ -1957,14 +1957,14 @@ TEST_CASE("z$.newgc.basics") .CollectSmallObjects = false, .IsDeleteMode = false, .Verbose = true}); - CHECK_EQ(7u, Result.ReferencerStat.Count); - CHECK_EQ(0u, Result.ReferencerStat.Expired); - CHECK_EQ(0u, Result.ReferencerStat.Deleted); - CHECK_EQ(5u, Result.ReferenceStoreStat.Count); - CHECK_EQ(0u, Result.ReferenceStoreStat.Pruned); - CHECK_EQ(0u, Result.ReferenceStoreStat.Compacted); - CHECK_EQ(0u, Result.RemovedDisk); - CHECK_EQ(0u, Result.RemovedMemory); + CHECK_EQ(7u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.FoundCount); + CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(5u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.FoundCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); + CHECK_EQ(0u, Result.CompactStoresStatSum.RemovedDisk); + CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory); CHECK(ValidateCacheEntry(Zcs, CidStore, TearDrinkerBucket, CacheRecords[0], true, true)); CHECK(ValidateCacheEntry(Zcs, CidStore, TearDrinkerBucket, CacheRecords[1], true, true)); @@ -1991,14 +1991,14 @@ TEST_CASE("z$.newgc.basics") .CollectSmallObjects = false, .IsDeleteMode = false, .Verbose = true}); - CHECK_EQ(7u, Result.ReferencerStat.Count); - CHECK_EQ(1u, Result.ReferencerStat.Expired); - CHECK_EQ(0u, Result.ReferencerStat.Deleted); - CHECK_EQ(5u, Result.ReferenceStoreStat.Count); - CHECK_EQ(0u, Result.ReferenceStoreStat.Pruned); - CHECK_EQ(0u, Result.ReferenceStoreStat.Compacted); - CHECK_EQ(0u, Result.RemovedDisk); - CHECK_EQ(0u, Result.RemovedMemory); + CHECK_EQ(7u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(1u, Result.ReferencerStatSum.RemoveExpiredDataStats.FoundCount); + CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(5u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.FoundCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); + CHECK_EQ(0u, Result.CompactStoresStatSum.RemovedDisk); + CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory); CHECK(ValidateCacheEntry(Zcs, CidStore, TearDrinkerBucket, CacheRecords[0], true, true)); CHECK(ValidateCacheEntry(Zcs, CidStore, TearDrinkerBucket, CacheRecords[1], true, true)); @@ -2025,14 +2025,14 @@ TEST_CASE("z$.newgc.basics") .CollectSmallObjects = true, .IsDeleteMode = false, .Verbose = true}); - CHECK_EQ(7u, Result.ReferencerStat.Count); - CHECK_EQ(7u, Result.ReferencerStat.Expired); - CHECK_EQ(0u, Result.ReferencerStat.Deleted); - CHECK_EQ(5u, Result.ReferenceStoreStat.Count); - CHECK_EQ(0u, Result.ReferenceStoreStat.Pruned); - CHECK_EQ(0u, Result.ReferenceStoreStat.Compacted); - CHECK_EQ(0u, Result.RemovedDisk); - CHECK_EQ(0u, Result.RemovedMemory); + CHECK_EQ(7u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(7u, Result.ReferencerStatSum.RemoveExpiredDataStats.FoundCount); + CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(5u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.FoundCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); + CHECK_EQ(0u, Result.CompactStoresStatSum.RemovedDisk); + CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory); CHECK(ValidateCacheEntry(Zcs, CidStore, TearDrinkerBucket, CacheRecords[0], true, true)); CHECK(ValidateCacheEntry(Zcs, CidStore, TearDrinkerBucket, CacheRecords[1], true, true)); @@ -2060,14 +2060,14 @@ TEST_CASE("z$.newgc.basics") .IsDeleteMode = true, .SkipCidDelete = true, .Verbose = true}); - CHECK_EQ(7u, Result.ReferencerStat.Count); - CHECK_EQ(1u, Result.ReferencerStat.Expired); - CHECK_EQ(1u, Result.ReferencerStat.Deleted); - CHECK_EQ(0u, Result.ReferenceStoreStat.Count); - CHECK_EQ(0u, Result.ReferenceStoreStat.Pruned); - CHECK_EQ(0u, Result.ReferenceStoreStat.Compacted); - CHECK_EQ(CacheEntries[UnstructuredCacheValues[2]].Data.GetSize(), Result.RemovedDisk); - CHECK_EQ(0u, Result.RemovedMemory); + CHECK_EQ(7u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(1u, Result.ReferencerStatSum.RemoveExpiredDataStats.FoundCount); + CHECK_EQ(1u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.FoundCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); + CHECK_EQ(CacheEntries[UnstructuredCacheValues[2]].Data.GetSize(), Result.CompactStoresStatSum.RemovedDisk); + CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory); CHECK(ValidateCacheEntry(Zcs, CidStore, TearDrinkerBucket, CacheRecords[0], true, true)); CHECK(ValidateCacheEntry(Zcs, CidStore, TearDrinkerBucket, CacheRecords[1], true, true)); @@ -2095,14 +2095,14 @@ TEST_CASE("z$.newgc.basics") .IsDeleteMode = true, .SkipCidDelete = true, .Verbose = true}); - CHECK_EQ(7u, Result.ReferencerStat.Count); - CHECK_EQ(7u, Result.ReferencerStat.Expired); - CHECK_EQ(7u, Result.ReferencerStat.Deleted); - CHECK_EQ(0u, Result.ReferenceStoreStat.Count); - CHECK_EQ(0u, Result.ReferenceStoreStat.Pruned); - CHECK_EQ(0u, Result.ReferenceStoreStat.Compacted); - CHECK_GE(Result.RemovedDisk, 0); - CHECK_EQ(0u, Result.RemovedMemory); + CHECK_EQ(7u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(7u, Result.ReferencerStatSum.RemoveExpiredDataStats.FoundCount); + CHECK_EQ(7u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.FoundCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); + CHECK_GE(Result.CompactStoresStatSum.RemovedDisk, 0); + CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory); CHECK(ValidateCacheEntry(Zcs, CidStore, TearDrinkerBucket, CacheRecords[0], false, true)); CHECK(ValidateCacheEntry(Zcs, CidStore, TearDrinkerBucket, CacheRecords[1], false, true)); @@ -2130,17 +2130,20 @@ TEST_CASE("z$.newgc.basics") .IsDeleteMode = true, .SkipCidDelete = false, .Verbose = true}); - CHECK_EQ(7u, Result.ReferencerStat.Count); - CHECK_EQ(1u, Result.ReferencerStat.Expired); // Only one cache value is pruned/deleted as that is the only large item in the cache - // (all other large items as in cas) - CHECK_EQ(1u, Result.ReferencerStat.Deleted); - CHECK_EQ(5u, Result.ReferenceStoreStat.Count); + CHECK_EQ(7u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(1u, + Result.ReferencerStatSum.RemoveExpiredDataStats.FoundCount); // Only one cache value is pruned/deleted as that is the only + // large item in the cache (all other large items as in cas) + CHECK_EQ(1u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(5u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); CHECK_EQ(0u, - Result.ReferenceStoreStat - .Pruned); // We won't remove any references since all referencers are small which retains all references - CHECK_EQ(0u, Result.ReferenceStoreStat.Compacted); - CHECK_EQ(CacheEntries[UnstructuredCacheValues[2]].Data.GetSize(), Result.RemovedDisk); - CHECK_EQ(0u, Result.RemovedMemory); + Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats + .FoundCount); // We won't remove any references since all referencers are small which retains all references + CHECK_EQ(0u, + Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats + .DeletedCount); // We won't remove any references since all referencers are small which retains all references + CHECK_EQ(CacheEntries[UnstructuredCacheValues[2]].Data.GetSize(), Result.CompactStoresStatSum.RemovedDisk); + CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory); CHECK(ValidateCacheEntry(Zcs, CidStore, TearDrinkerBucket, CacheRecords[0], true, true)); CHECK(ValidateCacheEntry(Zcs, CidStore, TearDrinkerBucket, CacheRecords[1], true, true)); @@ -2168,14 +2171,14 @@ TEST_CASE("z$.newgc.basics") .IsDeleteMode = true, .SkipCidDelete = false, .Verbose = true}); - CHECK_EQ(7u, Result.ReferencerStat.Count); - CHECK_EQ(7u, Result.ReferencerStat.Expired); - CHECK_EQ(7u, Result.ReferencerStat.Deleted); - CHECK_EQ(5u, Result.ReferenceStoreStat.Count); - CHECK_EQ(5u, Result.ReferenceStoreStat.Pruned); - CHECK_EQ(5u, Result.ReferenceStoreStat.Compacted); - CHECK_GT(Result.RemovedDisk, 0); - CHECK_EQ(0u, Result.RemovedMemory); + CHECK_EQ(7u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(7u, Result.ReferencerStatSum.RemoveExpiredDataStats.FoundCount); + CHECK_EQ(7u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(5u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); + CHECK_EQ(5u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.FoundCount); + CHECK_EQ(5u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); + CHECK_GT(Result.CompactStoresStatSum.RemovedDisk, 0); + CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory); CHECK(ValidateCacheEntry(Zcs, CidStore, TearDrinkerBucket, CacheRecords[0], false, false)); CHECK(ValidateCacheEntry(Zcs, CidStore, TearDrinkerBucket, CacheRecords[1], false, false)); @@ -2200,20 +2203,22 @@ TEST_CASE("z$.newgc.basics") Zcs.SetAccessTime(TearDrinkerBucket, CacheRecords[0], GcClock::Now() + std::chrono::hours(2)); - GcResult Result = Gc.CollectGarbage(GcSettings{.CacheExpireTime = GcClock::Now() + std::chrono::hours(1), - .ProjectStoreExpireTime = GcClock::Now() + std::chrono::hours(1), - .CollectSmallObjects = true, - .IsDeleteMode = true, - .SkipCidDelete = true, - .Verbose = true}); - CHECK_EQ(7u, Result.ReferencerStat.Count); - CHECK_EQ(6u, Result.ReferencerStat.Expired); - CHECK_EQ(6u, Result.ReferencerStat.Deleted); - CHECK_EQ(0u, Result.ReferenceStoreStat.Count); - CHECK_EQ(0u, Result.ReferenceStoreStat.Pruned); - CHECK_EQ(0u, Result.ReferenceStoreStat.Compacted); - CHECK_GT(Result.RemovedDisk, 0); - CHECK_EQ(0u, Result.RemovedMemory); + GcResult Result = Gc.CollectGarbage(GcSettings{.CacheExpireTime = GcClock::Now() + std::chrono::hours(1), + .ProjectStoreExpireTime = GcClock::Now() + std::chrono::hours(1), + .CollectSmallObjects = true, + .IsDeleteMode = true, + .SkipCidDelete = true, + .Verbose = true, + .CompactBlockUsageThresholdPercent = 100}); + CHECK_EQ(7u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(6u, Result.ReferencerStatSum.RemoveExpiredDataStats.FoundCount); + CHECK_EQ(6u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.FoundCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); + uint64_t MinExpectedRemoveSize = CacheEntries[UnstructuredCacheValues[2]].Data.GetSize(); + CHECK_LT(MinExpectedRemoveSize, Result.CompactStoresStatSum.RemovedDisk); + CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory); CHECK(ValidateCacheEntry(Zcs, CidStore, TearDrinkerBucket, CacheRecords[0], true, true)); CHECK(ValidateCacheEntry(Zcs, CidStore, TearDrinkerBucket, CacheRecords[1], false, true)); @@ -2245,14 +2250,14 @@ TEST_CASE("z$.newgc.basics") .IsDeleteMode = true, .SkipCidDelete = false, .Verbose = true}); - CHECK_EQ(7u, Result.ReferencerStat.Count); - CHECK_EQ(5u, Result.ReferencerStat.Expired); - CHECK_EQ(5u, Result.ReferencerStat.Deleted); - CHECK_EQ(5u, Result.ReferenceStoreStat.Count); - CHECK_EQ(0u, Result.ReferenceStoreStat.Pruned); - CHECK_EQ(0u, Result.ReferenceStoreStat.Compacted); - CHECK_GT(Result.RemovedDisk, 0); - CHECK_EQ(0u, Result.RemovedMemory); + CHECK_EQ(7u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(5u, Result.ReferencerStatSum.RemoveExpiredDataStats.FoundCount); + CHECK_EQ(5u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(5u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.FoundCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); + CHECK_GT(Result.CompactStoresStatSum.RemovedDisk, 0); + CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory); CHECK(ValidateCacheEntry(Zcs, CidStore, TearDrinkerBucket, CacheRecords[0], true, true)); CHECK(ValidateCacheEntry(Zcs, CidStore, TearDrinkerBucket, CacheRecords[1], true, true)); @@ -2285,14 +2290,14 @@ TEST_CASE("z$.newgc.basics") .IsDeleteMode = true, .SkipCidDelete = false, .Verbose = true}); - CHECK_EQ(7u, Result.ReferencerStat.Count); - CHECK_EQ(4u, Result.ReferencerStat.Expired); - CHECK_EQ(4u, Result.ReferencerStat.Deleted); - CHECK_EQ(5u, Result.ReferenceStoreStat.Count); - CHECK_EQ(5u, Result.ReferenceStoreStat.Pruned); - CHECK_EQ(5u, Result.ReferenceStoreStat.Compacted); - CHECK_GT(Result.RemovedDisk, 0); - CHECK_EQ(0u, Result.RemovedMemory); + CHECK_EQ(7u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(4u, Result.ReferencerStatSum.RemoveExpiredDataStats.FoundCount); + CHECK_EQ(4u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(5u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); + CHECK_EQ(5u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.FoundCount); + CHECK_EQ(5u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); + CHECK_GT(Result.CompactStoresStatSum.RemovedDisk, 0); + CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory); CHECK(ValidateCacheEntry(Zcs, CidStore, TearDrinkerBucket, CacheRecords[0], false, false)); CHECK(ValidateCacheEntry(Zcs, CidStore, TearDrinkerBucket, CacheRecords[1], false, false)); @@ -2329,22 +2334,23 @@ TEST_CASE("z$.newgc.basics") Zcs.SetAccessTime(TearDrinkerBucket, UnstructuredCacheValues[2], GcClock::Now() + std::chrono::hours(2)); Zcs.SetAccessTime(TearDrinkerBucket, UnstructuredCacheValues[3], GcClock::Now() + std::chrono::hours(2)); - GcResult Result = Gc.CollectGarbage(GcSettings{.CacheExpireTime = GcClock::Now() + std::chrono::hours(1), - .ProjectStoreExpireTime = GcClock::Now() + std::chrono::hours(1), - .CollectSmallObjects = true, - .IsDeleteMode = true, - .SkipCidDelete = true, - .Verbose = true}); - CHECK_EQ(7u, Result.ReferencerStat.Count); - CHECK_EQ(4u, Result.ReferencerStat.Expired); - CHECK_EQ(4u, Result.ReferencerStat.Deleted); - CHECK_EQ(0u, Result.ReferenceStoreStat.Count); - CHECK_EQ(0u, Result.ReferenceStoreStat.Pruned); - CHECK_EQ(0u, Result.ReferenceStoreStat.Compacted); - CHECK_GT(Result.RemovedDisk, 0); + GcResult Result = Gc.CollectGarbage(GcSettings{.CacheExpireTime = GcClock::Now() + std::chrono::hours(1), + .ProjectStoreExpireTime = GcClock::Now() + std::chrono::hours(1), + .CollectSmallObjects = true, + .IsDeleteMode = true, + .SkipCidDelete = true, + .Verbose = true, + .CompactBlockUsageThresholdPercent = 100}); + CHECK_EQ(7u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(4u, Result.ReferencerStatSum.RemoveExpiredDataStats.FoundCount); + CHECK_EQ(4u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.FoundCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); + CHECK_GT(Result.CompactStoresStatSum.RemovedDisk, 0); uint64_t MemoryClean = CacheEntries[CacheRecords[0]].Data.GetSize() + CacheEntries[CacheRecords[1]].Data.GetSize() + CacheEntries[CacheRecords[2]].Data.GetSize() + CacheEntries[UnstructuredCacheValues[0]].Data.GetSize(); - CHECK_EQ(MemoryClean, Result.RemovedMemory); + CHECK_EQ(MemoryClean, Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory); CHECK(ValidateCacheEntry(Zcs, CidStore, TearDrinkerBucket, CacheRecords[0], false, true)); CHECK(ValidateCacheEntry(Zcs, CidStore, TearDrinkerBucket, CacheRecords[1], false, true)); @@ -2393,15 +2399,17 @@ TEST_CASE("z$.newgc.basics") .IsDeleteMode = true, .SkipCidDelete = false, .Verbose = true}); - CHECK_EQ(8u, Result.ReferencerStat.Count); - CHECK_EQ(1u, Result.ReferencerStat.Expired); - CHECK_EQ(1u, Result.ReferencerStat.Deleted); - CHECK_EQ(9u, Result.ReferenceStoreStat.Count); - CHECK_EQ(4u, Result.ReferenceStoreStat.Pruned); - CHECK_EQ(4u, Result.ReferenceStoreStat.Compacted); - CHECK_EQ(Attachments[1].second.GetCompressed().GetSize() + Attachments[3].second.GetCompressed().GetSize(), Result.RemovedDisk); + // Write block can't be compacted so Compacted will be less than Deleted + CHECK_EQ(8u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(1u, Result.ReferencerStatSum.RemoveExpiredDataStats.FoundCount); + CHECK_EQ(1u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(9u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); + CHECK_EQ(4u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.FoundCount); + CHECK_EQ(4u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); + CHECK_EQ(Attachments[1].second.GetCompressed().GetSize() + Attachments[3].second.GetCompressed().GetSize(), + Result.CompactStoresStatSum.RemovedDisk); uint64_t MemoryClean = CacheEntries[CacheRecord].Data.GetSize(); - CHECK_EQ(MemoryClean, Result.RemovedMemory); + CHECK_EQ(MemoryClean, Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory); } } diff --git a/src/zenserver/projectstore/projectstore.cpp b/src/zenserver/projectstore/projectstore.cpp index 9fedd9165..617104ddc 100644 --- a/src/zenserver/projectstore/projectstore.cpp +++ b/src/zenserver/projectstore/projectstore.cpp @@ -1435,31 +1435,40 @@ ProjectStore::Project::OpenOplog(std::string_view OplogId) return nullptr; } -void -ProjectStore::Project::DeleteOplog(std::string_view OplogId) +std::filesystem::path +ProjectStore::Project::RemoveOplog(std::string_view OplogId) { + RwLock::ExclusiveLockScope _(m_ProjectLock); + std::filesystem::path DeletePath; + if (auto OplogIt = m_Oplogs.find(std::string(OplogId)); OplogIt == m_Oplogs.end()) { - RwLock::ExclusiveLockScope _(m_ProjectLock); + std::filesystem::path OplogBasePath = BasePathForOplog(OplogId); - if (auto OplogIt = m_Oplogs.find(std::string(OplogId)); OplogIt == m_Oplogs.end()) + if (Oplog::ExistsAt(OplogBasePath)) { - std::filesystem::path OplogBasePath = BasePathForOplog(OplogId); - - if (Oplog::ExistsAt(OplogBasePath)) + std::filesystem::path MovedDir; + if (PrepareDirectoryDelete(DeletePath, MovedDir)) { - DeletePath = OplogBasePath; + DeletePath = MovedDir; } } - else - { - std::unique_ptr<Oplog>& Oplog = OplogIt->second; - DeletePath = Oplog->PrepareForDelete(true); - m_DeletedOplogs.emplace_back(std::move(Oplog)); - m_Oplogs.erase(OplogIt); - } - m_LastAccessTimes.erase(std::string(OplogId)); } + else + { + std::unique_ptr<Oplog>& Oplog = OplogIt->second; + DeletePath = Oplog->PrepareForDelete(true); + m_DeletedOplogs.emplace_back(std::move(Oplog)); + m_Oplogs.erase(OplogIt); + } + m_LastAccessTimes.erase(std::string(OplogId)); + return DeletePath; +} + +void +ProjectStore::Project::DeleteOplog(std::string_view OplogId) +{ + std::filesystem::path DeletePath = RemoveOplog(OplogId); // Erase content on disk if (!DeletePath.empty()) @@ -1567,9 +1576,29 @@ ProjectStore::Project::GatherReferences(GcContext& GcCtx) } uint64_t +ProjectStore::Project::TotalSize(const std::filesystem::path& BasePath) +{ + using namespace std::literals; + + uint64_t Size = 0; + std::filesystem::path AccessTimesFilePath = BasePath / "AccessTimes.zcb"sv; + if (std::filesystem::exists(AccessTimesFilePath)) + { + Size += std::filesystem::file_size(AccessTimesFilePath); + } + std::filesystem::path ProjectFilePath = BasePath / "Project.zcb"sv; + if (std::filesystem::exists(ProjectFilePath)) + { + Size += std::filesystem::file_size(ProjectFilePath); + } + + return Size; +} + +uint64_t ProjectStore::Project::TotalSize() const { - uint64_t Result = 0; + uint64_t Result = TotalSize(m_OplogStoragePath); { std::vector<std::string> OpLogs = ScanForOplogs(); for (const std::string& OpLogId : OpLogs) @@ -1954,7 +1983,7 @@ ProjectStore::StorageSize() const std::filesystem::path ProjectStateFilePath = ProjectBasePath / "Project.zcb"sv; if (std::filesystem::exists(ProjectStateFilePath)) { - Result.DiskSize += std::filesystem::file_size(ProjectStateFilePath); + Result.DiskSize += Project::TotalSize(ProjectBasePath); DirectoryContent DirContent; GetDirectoryContent(ProjectBasePath, DirectoryContent::IncludeDirsFlag, DirContent); for (const std::filesystem::path& OplogBasePath : DirContent.Directories) @@ -2068,12 +2097,8 @@ ProjectStore::UpdateProject(std::string_view ProjectId, } bool -ProjectStore::DeleteProject(std::string_view ProjectId) +ProjectStore::RemoveProject(std::string_view ProjectId, std::filesystem::path& OutDeletePath) { - ZEN_TRACE_CPU("Store::DeleteProject"); - - ZEN_INFO("deleting project {}", ProjectId); - RwLock::ExclusiveLockScope ProjectsLock(m_ProjectsLock); auto ProjIt = m_Projects.find(std::string{ProjectId}); @@ -2083,20 +2108,34 @@ ProjectStore::DeleteProject(std::string_view ProjectId) return true; } - std::filesystem::path DeletePath; - bool Success = ProjIt->second->PrepareForDelete(DeletePath); + bool Success = ProjIt->second->PrepareForDelete(OutDeletePath); if (!Success) { return false; } m_Projects.erase(ProjIt); - ProjectsLock.ReleaseNow(); + return true; +} + +bool +ProjectStore::DeleteProject(std::string_view ProjectId) +{ + ZEN_TRACE_CPU("Store::DeleteProject"); + + ZEN_INFO("deleting project {}", ProjectId); + + std::filesystem::path DeletePath; + if (!RemoveProject(ProjectId, DeletePath)) + { + return false; + } if (!DeletePath.empty()) { DeleteDirectories(DeletePath); } + return true; } @@ -3042,29 +3081,106 @@ ProjectStore::GetGcName(GcCtx&) return fmt::format("projectstore:'{}'", m_ProjectBasePath.string()); } -void -ProjectStore::RemoveExpiredData(GcCtx& Ctx, GcReferencerStats& Stats) +class ProjectStoreGcStoreCompactor : public GcStoreCompactor +{ +public: + ProjectStoreGcStoreCompactor(const std::filesystem::path& BasePath, + std::vector<std::filesystem::path>&& OplogPathsToRemove, + std::vector<std::filesystem::path>&& ProjectPathsToRemove) + : m_BasePath(BasePath) + , m_OplogPathsToRemove(std::move(OplogPathsToRemove)) + , m_ProjectPathsToRemove(std::move(ProjectPathsToRemove)) + { + } + + virtual void CompactStore(GcCtx& Ctx, GcCompactStoreStats& Stats, const std::function<uint64_t()>&) + { + Stopwatch Timer; + const auto _ = MakeGuard([&] { + if (!Ctx.Settings.Verbose) + { + return; + } + ZEN_INFO("GCV2: projectstore [COMPACT] '{}': RemovedDisk: {} in {}", + m_BasePath, + NiceBytes(Stats.RemovedDisk), + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); + + if (Ctx.Settings.IsDeleteMode) + { + for (const std::filesystem::path& OplogPath : m_OplogPathsToRemove) + { + uint64_t OplogSize = ProjectStore::Oplog::TotalSize(OplogPath); + if (DeleteDirectories(OplogPath)) + { + ZEN_DEBUG("GCV2: projectstore [COMPACT] '{}': removed oplog folder '{}', removed {}", + m_BasePath, + OplogPath, + NiceBytes(OplogSize)); + Stats.RemovedDisk += OplogSize; + } + else + { + ZEN_WARN("GCV2: projectstore [COMPACT] '{}': Failed to remove oplog folder '{}'", m_BasePath, OplogPath); + } + } + + for (const std::filesystem::path& ProjectPath : m_ProjectPathsToRemove) + { + uint64_t ProjectSize = ProjectStore::Project::TotalSize(ProjectPath); + if (DeleteDirectories(ProjectPath)) + { + ZEN_DEBUG("GCV2: projectstore [COMPACT] '{}': removed project folder '{}', removed {}", + m_BasePath, + ProjectPath, + NiceBytes(ProjectSize)); + Stats.RemovedDisk += ProjectSize; + } + else + { + ZEN_WARN("GCV2: projectstore [COMPACT] '{}': Failed to remove project folder '{}'", m_BasePath, ProjectPath); + } + } + } + else + { + ZEN_DEBUG("GCV2: projectstore [COMPACT] '{}': Skipped deleting of {} oplogs and {} projects", + m_BasePath, + m_OplogPathsToRemove.size(), + m_ProjectPathsToRemove.size()); + } + + m_ProjectPathsToRemove.clear(); + m_OplogPathsToRemove.clear(); + } + +private: + std::filesystem::path m_BasePath; + std::vector<std::filesystem::path> m_OplogPathsToRemove; + std::vector<std::filesystem::path> m_ProjectPathsToRemove; +}; + +GcStoreCompactor* +ProjectStore::RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) { - size_t ProjectCount = 0; - size_t ExpiredProjectCount = 0; - size_t OplogCount = 0; - size_t ExpiredOplogCount = 0; Stopwatch Timer; const auto _ = MakeGuard([&] { if (!Ctx.Settings.Verbose) { return; } - ZEN_INFO("GCV2: projectstore [REMOVE EXPIRED] '{}': Count: {}, Expired: {}, Deleted: {}, RemovedDisk: {}, RemovedMemory: {} in {}", + ZEN_INFO("GCV2: projectstore [REMOVE EXPIRED] '{}': Count: {}, Expired: {}, Deleted: {} in {}", m_ProjectBasePath, - Stats.Count, - Stats.Expired, - Stats.Deleted, - NiceBytes(Stats.RemovedDisk), - NiceBytes(Stats.RemovedMemory), + Stats.CheckedCount, + Stats.FoundCount, + Stats.DeletedCount, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); + std::vector<std::filesystem::path> OplogPathsToRemove; + std::vector<std::filesystem::path> ProjectPathsToRemove; + std::vector<Ref<Project>> ExpiredProjects; std::vector<Ref<Project>> Projects; @@ -3081,6 +3197,8 @@ ProjectStore::RemoveExpiredData(GcCtx& Ctx, GcReferencerStats& Stats) } } + size_t OplogCount = 0; + size_t ExpiredOplogCount = 0; for (const Ref<Project>& Project : Projects) { std::vector<std::string> ExpiredOplogs; @@ -3101,70 +3219,69 @@ ProjectStore::RemoveExpiredData(GcCtx& Ctx, GcReferencerStats& Stats) { for (const std::string& OplogId : ExpiredOplogs) { - std::filesystem::path OplogBasePath = ProjectPath / OplogId; - uint64_t OplogSize = Oplog::TotalSize(OplogBasePath); - ZEN_DEBUG("gc project store '{}': garbage collected oplog '{}' in project '{}'. Removing storage on disk", - m_ProjectBasePath, - OplogId, - Project->Identifier); - Project->DeleteOplog(OplogId); - Stats.RemovedDisk += OplogSize; + std::filesystem::path RemovePath = Project->RemoveOplog(OplogId); + if (!RemovePath.empty()) + { + OplogPathsToRemove.push_back(RemovePath); + } } - Stats.Deleted += ExpiredOplogs.size(); + Stats.DeletedCount += ExpiredOplogs.size(); Project->Flush(); } } - ProjectCount = Projects.size(); - Stats.Count += ProjectCount + OplogCount; - ExpiredProjectCount = ExpiredProjects.size(); + size_t ProjectCount = Projects.size(); + + Stats.CheckedCount += ProjectCount + OplogCount; if (ExpiredProjects.empty()) { - ZEN_DEBUG("gc project store '{}': no expired projects found", m_ProjectBasePath); - return; + ZEN_DEBUG("GCV2: projectstore [REMOVE EXPIRED] '{}': no expired projects found", m_ProjectBasePath); + return nullptr; } if (Ctx.Settings.IsDeleteMode) { for (const Ref<Project>& Project : ExpiredProjects) { - std::filesystem::path PathToRemove; - std::string ProjectId = Project->Identifier; + std::string ProjectId = Project->Identifier; { { RwLock::SharedLockScope Lock(m_ProjectsLock); if (!Project->IsExpired(Lock, Ctx.Settings.ProjectStoreExpireTime)) { - ZEN_DEBUG("gc project store '{}': skipped garbage collect of project '{}'. Project no longer expired.", - m_ProjectBasePath, - ProjectId); + ZEN_DEBUG( + "GCV2: projectstore [REMOVE EXPIRED] '{}': skipped garbage collect of project '{}'. Project no longer expired.", + m_ProjectBasePath, + ProjectId); continue; } } - RwLock::ExclusiveLockScope __(m_ProjectsLock); - bool Success = Project->PrepareForDelete(PathToRemove); + std::filesystem::path RemovePath; + bool Success = RemoveProject(ProjectId, RemovePath); if (!Success) { - ZEN_DEBUG("gc project store '{}': skipped garbage collect of project '{}'. Project folder is locked.", - m_ProjectBasePath, - ProjectId); + ZEN_DEBUG( + "GCV2: projectstore [REMOVE EXPIRED] '{}': skipped garbage collect of project '{}'. Project folder is locked.", + m_ProjectBasePath, + ProjectId); continue; } - m_Projects.erase(ProjectId); - } - - ZEN_DEBUG("gc project store '{}': sgarbage collected project '{}'. Removing storage on disk", m_ProjectBasePath, ProjectId); - if (PathToRemove.empty()) - { - continue; + if (!RemovePath.empty()) + { + ProjectPathsToRemove.push_back(RemovePath); + } } - - DeleteDirectories(PathToRemove); } - Stats.Deleted += ExpiredProjects.size(); + Stats.DeletedCount += ExpiredProjects.size(); } - Stats.Expired += ExpiredOplogCount + ExpiredProjectCount; + size_t ExpiredProjectCount = ExpiredProjects.size(); + Stats.FoundCount += ExpiredOplogCount + ExpiredProjectCount; + if (!OplogPathsToRemove.empty() || !ProjectPathsToRemove.empty()) + { + return new ProjectStoreGcStoreCompactor(m_ProjectBasePath, std::move(OplogPathsToRemove), std::move(ProjectPathsToRemove)); + } + return nullptr; } class ProjectStoreReferenceChecker : public GcReferenceChecker @@ -3182,16 +3299,15 @@ public: } ZEN_INFO("GCV2: projectstore [LOCKSTATE] '{}': precached {} references in {} from {}/{}", m_Oplog.m_BasePath, - m_UncachedReferences.size(), + m_References.size(), NiceTimeSpanMs(Timer.GetElapsedTimeMs()), m_Oplog.m_OuterProject->Identifier, m_Oplog.OplogId()); }); RwLock::SharedLockScope __(m_Oplog.m_OplogLock); - m_Oplog.IterateOplog([&](CbObjectView Op) { - Op.IterateAttachments([&](CbFieldView Visitor) { m_UncachedReferences.insert(Visitor.AsAttachment()); }); - }); + m_Oplog.IterateOplog( + [&](CbObjectView Op) { Op.IterateAttachments([&](CbFieldView Visitor) { m_References.insert(Visitor.AsAttachment()); }); }); m_PreCachedLsn = m_Oplog.GetMaxOpIndex(); } } @@ -3208,7 +3324,7 @@ public: } ZEN_INFO("GCV2: projectstore [LOCKSTATE] '{}': found {} references in {} from {}/{}", m_Oplog.m_BasePath, - m_UncachedReferences.size(), + m_References.size(), NiceTimeSpanMs(Timer.GetElapsedTimeMs()), m_Oplog.m_OuterProject->Identifier, m_Oplog.OplogId()); @@ -3219,23 +3335,22 @@ public: { // TODO: Maybe we could just check the added oplog entries - we might get a few extra references from obsolete entries // but I don't think that would be critical - m_UncachedReferences.clear(); - m_Oplog.IterateOplog([&](CbObjectView Op) { - Op.IterateAttachments([&](CbFieldView Visitor) { m_UncachedReferences.insert(Visitor.AsAttachment()); }); - }); + m_References.clear(); + m_Oplog.IterateOplog( + [&](CbObjectView Op) { Op.IterateAttachments([&](CbFieldView Visitor) { m_References.insert(Visitor.AsAttachment()); }); }); } } virtual void RemoveUsedReferencesFromSet(GcCtx&, HashSet& IoCids) override { - for (const IoHash& ReferenceHash : m_UncachedReferences) + for (const IoHash& ReferenceHash : m_References) { IoCids.erase(ReferenceHash); } } ProjectStore::Oplog& m_Oplog; std::unique_ptr<RwLock::SharedLockScope> m_OplogLock; - HashSet m_UncachedReferences; + HashSet m_References; int m_PreCachedLsn = -1; }; diff --git a/src/zenserver/projectstore/projectstore.h b/src/zenserver/projectstore/projectstore.h index fe1068485..555f8bdf2 100644 --- a/src/zenserver/projectstore/projectstore.h +++ b/src/zenserver/projectstore/projectstore.h @@ -225,6 +225,7 @@ public: Oplog* NewOplog(std::string_view OplogId, const std::filesystem::path& MarkerPath); Oplog* OpenOplog(std::string_view OplogId); void DeleteOplog(std::string_view OplogId); + std::filesystem::path RemoveOplog(std::string_view OplogId); void IterateOplogs(std::function<void(const RwLock::SharedLockScope&, const Oplog&)>&& Fn) const; void IterateOplogs(std::function<void(const RwLock::SharedLockScope&, Oplog&)>&& Fn); std::vector<std::string> ScanForOplogs() const; @@ -245,6 +246,7 @@ public: void ScrubStorage(ScrubContext& Ctx); LoggerRef Log(); void GatherReferences(GcContext& GcCtx); + static uint64_t TotalSize(const std::filesystem::path& BasePath); uint64_t TotalSize() const; bool PrepareForDelete(std::filesystem::path& OutDeletePath); @@ -280,6 +282,7 @@ public: std::string_view EngineRootDir, std::string_view ProjectRootDir, std::string_view ProjectFilePath); + bool RemoveProject(std::string_view ProjectId, std::filesystem::path& OutDeletePath); bool DeleteProject(std::string_view ProjectId); bool Exists(std::string_view ProjectId); void Flush(); @@ -295,7 +298,7 @@ public: virtual GcStorageSize StorageSize() const override; virtual std::string GetGcName(GcCtx& Ctx) override; - virtual void RemoveExpiredData(GcCtx& Ctx, GcReferencerStats& Stats) override; + virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) override; virtual std::vector<GcReferenceChecker*> CreateReferenceCheckers(GcCtx& Ctx) override; CbArray GetProjectsList(); @@ -379,6 +382,8 @@ private: const DiskWriteBlocker* m_DiskWriteBlocker = nullptr; std::filesystem::path BasePathForProject(std::string_view ProjectId); + + friend class ProjectStoreGcStoreCompactor; }; void prj_forcelink(); diff --git a/src/zenstore/blockstore.cpp b/src/zenstore/blockstore.cpp index 063d38707..ec299092d 100644 --- a/src/zenstore/blockstore.cpp +++ b/src/zenstore/blockstore.cpp @@ -15,6 +15,7 @@ ZEN_THIRD_PARTY_INCLUDES_START #include <tsl/robin_map.h> #include <tsl/robin_set.h> +#include <gsl/gsl-lite.hpp> ZEN_THIRD_PARTY_INCLUDES_END #if ZEN_WITH_TESTS @@ -267,6 +268,59 @@ BlockStore::SyncExistingBlocksOnDisk(const std::vector<BlockStoreLocation>& Know } } +std::vector<uint32_t> +BlockStore::GetBlocksToCompact(const std::unordered_map<uint32_t, uint64_t>& BlockUsage, uint32_t BlockUsageThresholdPercent) +{ + std::unordered_set<uint32_t> Result; + { + RwLock::SharedLockScope InsertLock(m_InsertLock); + for (const auto& It : m_ChunkBlocks) + { + uint32_t BlockIndex = It.first; + if (std::find(m_ActiveWriteBlocks.begin(), m_ActiveWriteBlocks.end(), BlockIndex) != m_ActiveWriteBlocks.end()) + { + continue; + } + uint64_t BlockSize = It.second ? It.second->FileSize() : 0u; + if (BlockSize == 0) + { + Result.insert(BlockIndex); + continue; + } + + uint64_t UsedSize = 0; + if (auto UsageIt = BlockUsage.find(BlockIndex); UsageIt != BlockUsage.end()) + { + UsedSize = UsageIt->second; + } + + if (BlockUsageThresholdPercent == 100) + { + if (UsedSize < BlockSize) + { + Result.insert(BlockIndex); + } + } + else if (BlockUsageThresholdPercent == 0) + { + if (UsedSize == 0) + { + Result.insert(BlockIndex); + } + } + else + { + const uint32_t UsedPercent = UsedSize < BlockSize ? gsl::narrow<uint32_t>((100 * UsedSize) / BlockSize) : 100u; + if (UsedPercent < BlockUsageThresholdPercent) + { + Result.insert(BlockIndex); + } + } + } + } + return std::vector<uint32_t>(Result.begin(), Result.end()); +} + void BlockStore::Close() { @@ -971,7 +1025,7 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, Stopwatch TotalTimer; const auto _ = MakeGuard([&] { - ZEN_DEBUG("compact blocks for '{}' DONE after {}, deleted {} and moved {} chunks ({}) ", + ZEN_DEBUG("Compact blocks for '{}' DONE after {}, deleted {} and moved {} chunks ({}) ", m_BlocksBasePath, NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs()), NiceBytes(DeletedSize), @@ -983,13 +1037,14 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, uint32_t NewBlockIndex = 0; MovedChunksArray MovedChunks; + uint64_t AddedSize = 0; uint64_t RemovedSize = 0; Ref<BlockStoreFile> NewBlockFile; auto NewBlockFileGuard = MakeGuard([&]() { if (NewBlockFile) { - ZEN_DEBUG("dropping incomplete cas block store file '{}'", NewBlockFile->GetPath()); + ZEN_DEBUG("Dropping incomplete cas block store file '{}'", NewBlockFile->GetPath()); { RwLock::ExclusiveLockScope _l(m_InsertLock); if (m_ChunkBlocks[NewBlockIndex] == NewBlockFile) @@ -1001,6 +1056,18 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, } }); + auto ReportChanges = [&]() { + if (!MovedChunks.empty() || RemovedSize > 0) + { + ChangeCallback(MovedChunks, RemovedSize > AddedSize ? RemovedSize - AddedSize : 0); + DeletedSize += RemovedSize; + RemovedSize = 0; + AddedSize = 0; + MovedCount += MovedChunks.size(); + MovedChunks.clear(); + } + }; + std::vector<uint32_t> RemovedBlocks; CompactState.IterateBlocks( @@ -1030,12 +1097,23 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, uint64_t OldBlockSize = OldBlockFile->FileSize(); - // TODO: Add heuristics for determining if it is worth to compact a block (if only a very small part is removed) - std::vector<uint8_t> Chunk; for (const size_t& ChunkIndex : KeepChunkIndexes) { const BlockStoreLocation ChunkLocation = ChunkLocations[ChunkIndex]; + if (ChunkLocation.Offset + ChunkLocation.Size > OldBlockSize) + { + ZEN_WARN( + "Compact Block skipping chunk outside of block range in '{}', Chunk start {}, Chunk size {} in Block {}, Block " + "size {}", + m_BlocksBasePath, + ChunkLocation.Offset, + ChunkLocation.Size, + OldBlockFile->GetPath(), + OldBlockSize); + continue; + } + Chunk.resize(ChunkLocation.Size); OldBlockFile->Read(Chunk.data(), Chunk.size(), ChunkLocation.Offset); @@ -1113,18 +1191,11 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, NewBlockFile->Write(Chunk.data(), Chunk.size(), WriteOffset); MovedChunks.push_back({ChunkIndex, {.BlockIndex = NewBlockIndex, .Offset = WriteOffset, .Size = Chunk.size()}}); WriteOffset = RoundUp(WriteOffset + Chunk.size(), PayloadAlignment); + AddedSize += Chunk.size(); } Chunk.clear(); - // Report what we have moved so we can purge the old block - if (!MovedChunks.empty() || RemovedSize > 0) - { - ChangeCallback(MovedChunks, RemovedSize); - DeletedSize += RemovedSize; - RemovedSize = 0; - MovedCount += MovedChunks.size(); - MovedChunks.clear(); - } + ReportChanges(); { RwLock::ExclusiveLockScope InsertLock(m_InsertLock); @@ -1135,6 +1206,7 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, RemovedSize += OldBlockSize; } }); + if (NewBlockFile) { NewBlockFile->Flush(); @@ -1142,14 +1214,7 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, NewBlockFile = nullptr; } - if (!MovedChunks.empty() || RemovedSize > 0) - { - ChangeCallback(MovedChunks, RemovedSize); - DeletedSize += RemovedSize; - RemovedSize = 0; - MovedCount += MovedChunks.size(); - MovedChunks.clear(); - } + ReportChanges(); } const char* diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp index 00a018948..7b8e930b3 100644 --- a/src/zenstore/compactcas.cpp +++ b/src/zenstore/compactcas.cpp @@ -553,21 +553,12 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) GcCtx.AddDeletedCids(DeletedChunks); } -class CasContainerStoreCompactor : public GcReferenceStoreCompactor +class CasContainerStoreCompactor : public GcStoreCompactor { public: - CasContainerStoreCompactor(CasContainerStrategy& Owner, - BlockStoreCompactState&& CompactState, - std::vector<IoHash>&& CompactStateKeys, - std::vector<IoHash>&& PrunedKeys) - : m_CasContainerStrategy(Owner) - , m_CompactState(std::move(CompactState)) - , m_CompactStateKeys(std::move(CompactStateKeys)) - , m_PrunedKeys(std::move(PrunedKeys)) - { - } + CasContainerStoreCompactor(CasContainerStrategy& Owner) : m_CasContainerStrategy(Owner) {} - virtual void CompactReferenceStore(GcCtx& Ctx, GcReferenceStoreStats& Stats) + virtual void CompactStore(GcCtx& Ctx, GcCompactStoreStats& Stats, const std::function<uint64_t()>& ClaimDiskReserveCallback) override { Stopwatch Timer; const auto _ = MakeGuard([&] { @@ -575,60 +566,109 @@ public: { return; } - ZEN_INFO("GCV2: compactcas [COMPACT] '{}': Count: {}, Pruned: {}, Compacted: {}, RemovedDisk: {}, RemovedMemory: {} in {}", + ZEN_INFO("GCV2: compactcas [COMPACT] '{}': RemovedDisk: {} in {}", m_CasContainerStrategy.m_RootDirectory / m_CasContainerStrategy.m_ContainerBaseName, - Stats.Count, - Stats.Pruned, - Stats.Compacted, NiceBytes(Stats.RemovedDisk), - NiceBytes(Stats.RemovedMemory), NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); - if (Ctx.Settings.IsDeleteMode && Ctx.Settings.CollectSmallObjects) + if (Ctx.Settings.CollectSmallObjects) { - // Compact block store - m_CasContainerStrategy.m_BlockStore.CompactBlocks( - m_CompactState, - m_CasContainerStrategy.m_PayloadAlignment, - [&](const BlockStore::MovedChunksArray& MovedArray, uint64_t FreedDiskSpace) { - std::vector<CasDiskIndexEntry> MovedEntries; - RwLock::ExclusiveLockScope _(m_CasContainerStrategy.m_LocationMapLock); - for (const std::pair<size_t, BlockStoreLocation>& Moved : MovedArray) + std::unordered_map<uint32_t, uint64_t> BlockUsage; + { + RwLock::SharedLockScope __(m_CasContainerStrategy.m_LocationMapLock); + for (const auto& Entry : m_CasContainerStrategy.m_LocationMap) + { + size_t Index = Entry.second; + const BlockStoreDiskLocation& Loc = m_CasContainerStrategy.m_Locations[Index]; + + uint32_t BlockIndex = Loc.GetBlockIndex(); + uint64_t ChunkSize = RoundUp(Loc.GetSize(), m_CasContainerStrategy.m_PayloadAlignment); + auto It = BlockUsage.find(BlockIndex); + if (It == BlockUsage.end()) + { + BlockUsage.insert_or_assign(BlockIndex, ChunkSize); + } + else { - size_t ChunkIndex = Moved.first; - const IoHash& Key = m_CompactStateKeys[ChunkIndex]; + It->second += ChunkSize; + } + } + } - if (auto It = m_CasContainerStrategy.m_LocationMap.find(Key); It != m_CasContainerStrategy.m_LocationMap.end()) - { - BlockStoreDiskLocation& Location = m_CasContainerStrategy.m_Locations[It->second]; - const BlockStoreLocation& OldLocation = m_CompactState.GetLocation(ChunkIndex); - if (Location.Get(m_CasContainerStrategy.m_PayloadAlignment) != OldLocation) - { - // Someone has moved our chunk so lets just skip the new location we were provided, it will be GC:d at a - // later time - continue; - } + { + BlockStoreCompactState BlockCompactState; + std::vector<IoHash> BlockCompactStateKeys; + + std::vector<uint32_t> BlocksToCompact = + m_CasContainerStrategy.m_BlockStore.GetBlocksToCompact(BlockUsage, Ctx.Settings.CompactBlockUsageThresholdPercent); + BlockCompactState.IncludeBlocks(BlocksToCompact); - const BlockStoreLocation& NewLocation = Moved.second; - Location = BlockStoreDiskLocation(NewLocation, m_CasContainerStrategy.m_PayloadAlignment); - MovedEntries.push_back(CasDiskIndexEntry{.Key = Key, .Location = Location}); + { + RwLock::SharedLockScope __(m_CasContainerStrategy.m_LocationMapLock); + for (const auto& Entry : m_CasContainerStrategy.m_LocationMap) + { + size_t Index = Entry.second; + const BlockStoreDiskLocation& Loc = m_CasContainerStrategy.m_Locations[Index]; + + if (!BlockCompactState.AddKeepLocation(Loc.Get(m_CasContainerStrategy.m_PayloadAlignment))) + { + continue; } + BlockCompactStateKeys.push_back(Entry.first); } - m_CasContainerStrategy.m_CasLog.Append(MovedEntries); - Stats.RemovedDisk += FreedDiskSpace; - }, - [&]() { return 0; }); + } - Stats.Compacted += - m_PrunedKeys.size(); // Slightly missleading, it might not be compacted if the block is the currently writing block + if (Ctx.Settings.IsDeleteMode) + { + ZEN_DEBUG("GCV2: compactcas [COMPACT] '{}': compacting {} blocks", + m_CasContainerStrategy.m_RootDirectory / m_CasContainerStrategy.m_ContainerBaseName, + BlocksToCompact.size()); + + m_CasContainerStrategy.m_BlockStore.CompactBlocks( + BlockCompactState, + m_CasContainerStrategy.m_PayloadAlignment, + [&](const BlockStore::MovedChunksArray& MovedArray, uint64_t FreedDiskSpace) { + std::vector<CasDiskIndexEntry> MovedEntries; + RwLock::ExclusiveLockScope _(m_CasContainerStrategy.m_LocationMapLock); + for (const std::pair<size_t, BlockStoreLocation>& Moved : MovedArray) + { + size_t ChunkIndex = Moved.first; + const IoHash& Key = BlockCompactStateKeys[ChunkIndex]; + + if (auto It = m_CasContainerStrategy.m_LocationMap.find(Key); + It != m_CasContainerStrategy.m_LocationMap.end()) + { + BlockStoreDiskLocation& Location = m_CasContainerStrategy.m_Locations[It->second]; + const BlockStoreLocation& OldLocation = BlockCompactState.GetLocation(ChunkIndex); + if (Location.Get(m_CasContainerStrategy.m_PayloadAlignment) != OldLocation) + { + // Someone has moved our chunk so lets just skip the new location we were provided, it will be GC:d + // at a later time + continue; + } + const BlockStoreLocation& NewLocation = Moved.second; + + Location = BlockStoreDiskLocation(NewLocation, m_CasContainerStrategy.m_PayloadAlignment); + MovedEntries.push_back(CasDiskIndexEntry{.Key = Key, .Location = Location}); + } + } + m_CasContainerStrategy.m_CasLog.Append(MovedEntries); + Stats.RemovedDisk += FreedDiskSpace; + }, + ClaimDiskReserveCallback); + } + else + { + ZEN_DEBUG("GCV2: compactcas [COMPACT] '{}': skipped compacting of {} eligible blocks", + m_CasContainerStrategy.m_RootDirectory / m_CasContainerStrategy.m_ContainerBaseName, + BlocksToCompact.size()); + } + } } } - CasContainerStrategy& m_CasContainerStrategy; - BlockStoreCompactState m_CompactState; - std::vector<IoHash> m_CompactStateKeys; - std::vector<IoHash> m_PrunedKeys; + CasContainerStrategy& m_CasContainerStrategy; }; class CasContainerReferencePruner : public GcReferencePruner @@ -640,9 +680,7 @@ public: { } - virtual GcReferenceStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, - GcReferenceStoreStats& Stats, - const GetUnusedReferencesFunc& GetUnusedReferences) + virtual GcStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, GcStats& Stats, const GetUnusedReferencesFunc& GetUnusedReferences) { Stopwatch Timer; const auto _ = MakeGuard([&] { @@ -650,17 +688,17 @@ public: { return; } - ZEN_INFO("GCV2: compactcas [PRUNE] '{}': Count: {}, Pruned: {}, Compacted: {}, RemovedDisk: {}, RemovedMemory: {} in {}", + ZEN_INFO("GCV2: compactcas [PRUNE] '{}': Checked: {}, Deleted: {}, FreedMemory: {} in {}", m_CasContainerStrategy.m_RootDirectory / m_CasContainerStrategy.m_ContainerBaseName, - Stats.Count, - Stats.Pruned, - Stats.Compacted, - NiceBytes(Stats.RemovedDisk), - NiceBytes(Stats.RemovedMemory), + Stats.CheckedCount, + Stats.DeletedCount, + NiceBytes(Stats.FreedMemory), NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); std::vector<IoHash> UnusedCids = GetUnusedReferences(m_Cids); + Stats.CheckedCount = m_Cids.size(); + Stats.FoundCount = UnusedCids.size(); if (UnusedCids.empty()) { @@ -668,19 +706,11 @@ public: return nullptr; } - BlockStoreCompactState CompactState; - BlockStore::ReclaimSnapshotState BlockSnapshotState; - std::vector<IoHash> CompactStateKeys; - std::vector<CasDiskIndexEntry> ExpiredEntries; + std::vector<CasDiskIndexEntry> ExpiredEntries; ExpiredEntries.reserve(UnusedCids.size()); - tsl::robin_set<IoHash, IoHash::Hasher> UnusedKeys; { RwLock::ExclusiveLockScope __(m_CasContainerStrategy.m_LocationMapLock); - if (Ctx.Settings.CollectSmallObjects) - { - BlockSnapshotState = m_CasContainerStrategy.m_BlockStore.GetReclaimSnapshotState(); - } for (const IoHash& Cid : UnusedCids) { @@ -689,59 +719,28 @@ public: { continue; } - CasDiskIndexEntry ExpiredEntry = {.Key = Cid, - .Location = m_CasContainerStrategy.m_Locations[It->second], - .Flags = CasDiskIndexEntry::kTombstone}; - const BlockStoreDiskLocation& Location = m_CasContainerStrategy.m_Locations[It->second]; - BlockStoreLocation BlockLocation = Location.Get(m_CasContainerStrategy.m_PayloadAlignment); if (Ctx.Settings.CollectSmallObjects) { - UnusedKeys.insert(Cid); - uint32_t BlockIndex = BlockLocation.BlockIndex; - bool IsActiveWriteBlock = BlockSnapshotState.m_ActiveWriteBlocks.contains(BlockIndex); - if (!IsActiveWriteBlock) - { - CompactState.IncludeBlock(BlockIndex); - } + CasDiskIndexEntry ExpiredEntry = {.Key = Cid, + .Location = m_CasContainerStrategy.m_Locations[It->second], + .Flags = CasDiskIndexEntry::kTombstone}; ExpiredEntries.push_back(ExpiredEntry); } } - // Get all locations we need to keep for affected blocks - if (Ctx.Settings.CollectSmallObjects && !UnusedKeys.empty()) - { - for (const auto& Entry : m_CasContainerStrategy.m_LocationMap) - { - const IoHash& Key = Entry.first; - if (UnusedKeys.contains(Key)) - { - continue; - } - const BlockStoreDiskLocation& Location = m_CasContainerStrategy.m_Locations[Entry.second]; - BlockStoreLocation BlockLocation = Location.Get(m_CasContainerStrategy.m_PayloadAlignment); - if (CompactState.AddKeepLocation(BlockLocation)) - { - CompactStateKeys.push_back(Key); - } - } - } - if (Ctx.Settings.IsDeleteMode) { for (const CasDiskIndexEntry& Entry : ExpiredEntries) { m_CasContainerStrategy.m_LocationMap.erase(Entry.Key); + Stats.DeletedCount++; } m_CasContainerStrategy.m_CasLog.Append(ExpiredEntries); m_CasContainerStrategy.m_CasLog.Flush(); } } - Stats.Pruned += UnusedKeys.size(); - return new CasContainerStoreCompactor(m_CasContainerStrategy, - std::move(CompactState), - std::move(CompactStateKeys), - std::vector<IoHash>(UnusedKeys.begin(), UnusedKeys.end())); + return new CasContainerStoreCompactor(m_CasContainerStrategy); } private: @@ -756,7 +755,7 @@ CasContainerStrategy::GetGcName(GcCtx&) } GcReferencePruner* -CasContainerStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& Stats) +CasContainerStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats&) { Stopwatch Timer; const auto _ = MakeGuard([&] { @@ -764,13 +763,8 @@ CasContainerStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& S { return; } - ZEN_INFO("GCV2: compactcas [CREATE PRUNERS] '{}': Count: {}, Pruned: {}, Compacted: {}, RemovedDisk: {}, RemovedMemory: {} in {}", + ZEN_INFO("GCV2: compactcas [CREATE PRUNERS] '{}' in {}", m_RootDirectory / m_ContainerBaseName, - Stats.Count, - Stats.Pruned, - Stats.Compacted, - NiceBytes(Stats.RemovedDisk), - NiceBytes(Stats.RemovedMemory), NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); @@ -787,7 +781,6 @@ CasContainerStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& S CidsToCheck.push_back(It.first); } } - Stats.Count += CidsToCheck.size(); return new CasContainerReferencePruner(*this, std::move(CidsToCheck)); } diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp index a72619e4b..6e432bc9d 100644 --- a/src/zenstore/filecas.cpp +++ b/src/zenstore/filecas.cpp @@ -1331,35 +1331,31 @@ FileCasStrategy::ScanFolderForCasFiles(const std::filesystem::path& RootDir) return Entries; }; -class FileCasStoreCompactor : public GcReferenceStoreCompactor +class FileCasStoreCompactor : public GcStoreCompactor { public: FileCasStoreCompactor(FileCasStrategy& Owner, std::vector<IoHash>&& ReferencesToClean) : m_FileCasStrategy(Owner) , m_ReferencesToClean(std::move(ReferencesToClean)) { + m_ReferencesToClean.shrink_to_fit(); } - virtual void CompactReferenceStore(GcCtx& Ctx, GcReferenceStoreStats& Stats) + virtual void CompactStore(GcCtx& Ctx, GcCompactStoreStats& Stats, const std::function<uint64_t()>&) { - Stopwatch Timer; - const auto _ = MakeGuard([&] { - if (!Ctx.Settings.Verbose) - { - return; - } - ZEN_INFO("GCV2: filecas [COMPACT] '{}': Count: {}, Pruned: {}, Compacted: {}, RemovedDisk: {}, RemovedMemory: {} in {}", - m_FileCasStrategy.m_RootDirectory, - Stats.Count, - Stats.Pruned, - Stats.Compacted, - NiceBytes(Stats.RemovedDisk), - NiceBytes(Stats.RemovedMemory), - NiceTimeSpanMs(Timer.GetElapsedTimeMs())); - }); - std::vector<IoHash> ReferencedCleaned; - ReferencedCleaned.reserve(m_ReferencesToClean.size()); + Stopwatch Timer; + const auto _ = MakeGuard([&] { + if (!Ctx.Settings.Verbose) + { + return; + } + ZEN_INFO("GCV2: filecas [COMPACT] '{}': RemovedDisk: {} in {}", + m_FileCasStrategy.m_RootDirectory, + NiceBytes(Stats.RemovedDisk), + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); + size_t Skipped = 0; for (const IoHash& ChunkHash : m_ReferencesToClean) { FileCasStrategy::ShardingHelper Name(m_FileCasStrategy.m_RootDirectory.c_str(), ChunkHash); @@ -1372,7 +1368,9 @@ public: } if (Ctx.Settings.IsDeleteMode) { - ZEN_DEBUG("deleting CAS payload file '{}'", Name.ShardedPath.ToUtf8()); + ZEN_DEBUG("GCV2: filecas [COMPACT] '{}': Deleting CAS payload file '{}'", + m_FileCasStrategy.m_RootDirectory, + Name.ShardedPath.ToUtf8()); std::error_code Ec; uint64_t SizeOnDisk = std::filesystem::file_size(Name.ShardedPath.c_str(), Ec); if (Ec) @@ -1382,7 +1380,10 @@ public: bool Existed = std::filesystem::remove(Name.ShardedPath.c_str(), Ec); if (Ec) { - ZEN_WARN("failed deleting CAS payload file '{}'. Reason '{}'", Name.ShardedPath.ToUtf8(), Ec.message()); + ZEN_WARN("GCV2: filecas [COMPACT] '{}': Failed deleting CAS payload file '{}'. Reason '{}'", + m_FileCasStrategy.m_RootDirectory, + Name.ShardedPath.ToUtf8(), + Ec.message()); continue; } if (!Existed) @@ -1397,18 +1398,28 @@ public: bool Existed = std::filesystem::is_regular_file(Name.ShardedPath.c_str(), Ec); if (Ec) { - ZEN_WARN("failed checking CAS payload file '{}'. Reason '{}'", Name.ShardedPath.ToUtf8(), Ec.message()); + ZEN_WARN("GCV2: filecas [COMPACT] '{}': Failed checking CAS payload file '{}'. Reason '{}'", + m_FileCasStrategy.m_RootDirectory, + Name.ShardedPath.ToUtf8(), + Ec.message()); continue; } if (!Existed) { continue; } + Skipped++; } - ReferencedCleaned.push_back(ChunkHash); } } - Stats.Compacted += ReferencedCleaned.size(); + + if (Skipped > 0) + { + ZEN_DEBUG("GCV2: filecas [COMPACT] '{}': Skipped deleting of {} eligible files", m_FileCasStrategy.m_RootDirectory, Skipped); + } + + m_ReferencesToClean.clear(); + m_ReferencesToClean.shrink_to_fit(); } private: @@ -1421,9 +1432,7 @@ class FileCasReferencePruner : public GcReferencePruner public: FileCasReferencePruner(FileCasStrategy& Owner, std::vector<IoHash>&& Cids) : m_FileCasStrategy(Owner), m_Cids(std::move(Cids)) {} - virtual GcReferenceStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, - GcReferenceStoreStats& Stats, - const GetUnusedReferencesFunc& GetUnusedReferences) + virtual GcStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, GcStats& Stats, const GetUnusedReferencesFunc& GetUnusedReferences) { Stopwatch Timer; const auto _ = MakeGuard([&] { @@ -1431,23 +1440,29 @@ public: { return; } - ZEN_INFO("GCV2: filecas [PRUNE] '{}': Count: {}, Pruned: {}, Compacted: {}, RemovedDisk: {}, RemovedMemory: {} in {}", + ZEN_INFO("GCV2: filecas [PRUNE] '{}': Count: {}, Unreferenced: {}, FreedMemory: {} in {}", m_FileCasStrategy.m_RootDirectory, - Stats.Count, - Stats.Pruned, - Stats.Compacted, - NiceBytes(Stats.RemovedDisk), - NiceBytes(Stats.RemovedMemory), + Stats.CheckedCount, + Stats.FoundCount, + NiceBytes(Stats.FreedMemory), NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); std::vector<IoHash> UnusedCids = GetUnusedReferences(m_Cids); + Stats.CheckedCount = m_Cids.size(); if (UnusedCids.empty()) { // Nothing to collect return nullptr; } + Stats.FoundCount += UnusedCids.size(); + + if (!Ctx.Settings.IsDeleteMode) + { + return nullptr; + } + std::vector<IoHash> PrunedReferences; PrunedReferences.reserve(UnusedCids.size()); { @@ -1459,19 +1474,21 @@ public: { continue; } - if (Ctx.Settings.IsDeleteMode) - { - uint64_t FileSize = It->second.Size; - m_FileCasStrategy.m_Index.erase(It); - m_FileCasStrategy.m_CasLog.Append( - {.Key = ChunkHash, .Flags = FileCasStrategy::FileCasIndexEntry::kTombStone, .Size = FileSize}); - m_FileCasStrategy.m_TotalSize.fetch_sub(It->second.Size, std::memory_order_relaxed); - } + uint64_t FileSize = It->second.Size; + m_FileCasStrategy.m_Index.erase(It); + m_FileCasStrategy.m_CasLog.Append( + {.Key = ChunkHash, .Flags = FileCasStrategy::FileCasIndexEntry::kTombStone, .Size = FileSize}); + m_FileCasStrategy.m_TotalSize.fetch_sub(It->second.Size, std::memory_order_relaxed); PrunedReferences.push_back(ChunkHash); + Stats.DeletedCount++; } } - Stats.Pruned += PrunedReferences.size(); + if (PrunedReferences.empty()) + { + return nullptr; + } + return new FileCasStoreCompactor(m_FileCasStrategy, std::move(PrunedReferences)); } @@ -1487,7 +1504,7 @@ FileCasStrategy::GetGcName(GcCtx&) } GcReferencePruner* -FileCasStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& Stats) +FileCasStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats&) { Stopwatch Timer; const auto _ = MakeGuard([&] { @@ -1495,14 +1512,7 @@ FileCasStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& Stats) { return; } - ZEN_INFO("GCV2: filecas [CREATE PRUNERS] '{}': Count: {}, Pruned: {}, Compacted: {}, RemovedDisk: {}, RemovedMemory: {} in {}", - m_RootDirectory, - Stats.Count, - Stats.Pruned, - Stats.Compacted, - NiceBytes(Stats.RemovedDisk), - NiceBytes(Stats.RemovedMemory), - NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + ZEN_INFO("GCV2: filecas [CREATE PRUNERS] '{}' in {}", m_RootDirectory, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); std::vector<IoHash> CidsToCheck; { @@ -1517,7 +1527,6 @@ FileCasStrategy::CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& Stats) CidsToCheck.push_back(It.first); } } - Stats.Count += CidsToCheck.size(); return new FileCasReferencePruner(*this, std::move(CidsToCheck)); } diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp index 7f9ca5236..b78b23350 100644 --- a/src/zenstore/gc.cpp +++ b/src/zenstore/gc.cpp @@ -173,166 +173,6 @@ SaveCompactBinaryObject(const fs::path& Path, const CbObject& Object) ////////////////////////////////////////////////////////////////////////// -void -WriteReferencerStats(CbObjectWriter& Writer, const GcReferencerStats& Stats, bool HumanReadable) -{ - if (Stats.Count == 0) - { - return; - } - Writer << "Count" << Stats.Count; - Writer << "Expired" << Stats.Expired; - Writer << "Deleted" << Stats.Deleted; - - if (HumanReadable) - { - Writer << "RemovedDisk" << NiceBytes(Stats.RemovedDisk); - Writer << "RemovedMemory" << NiceBytes(Stats.RemovedMemory); - } - else - { - Writer << "RemovedDiskBytes" << Stats.RemovedDisk; - Writer << "RemovedMemoryBytes" << Stats.RemovedMemory; - } - - if (HumanReadable) - { - Writer << "RemoveExpiredData" << NiceTimeSpanMs(Stats.RemoveExpiredDataMS.count()); - Writer << "CreateReferenceCheckers" << NiceTimeSpanMs(Stats.CreateReferenceCheckersMS.count()); - Writer << "LockState" << NiceTimeSpanMs(Stats.LockStateMS.count()); - Writer << "Elapsed" << NiceTimeSpanMs(Stats.ElapsedMS.count()); - } - else - { - Writer << "RemoveExpiredDataMS" << gsl::narrow<int64_t>(Stats.RemoveExpiredDataMS.count()); - Writer << "CreateReferenceCheckersMS" << gsl::narrow<int64_t>(Stats.CreateReferenceCheckersMS.count()); - Writer << "LockStateMS" << gsl::narrow<int64_t>(Stats.LockStateMS.count()); - Writer << "ElapsedMS" << gsl::narrow<int64_t>(Stats.ElapsedMS.count()); - } -}; - -void -WriteReferenceStoreStats(CbObjectWriter& Writer, const GcReferenceStoreStats& Stats, bool HumanReadable) -{ - if (Stats.Count == 0) - { - return; - } - Writer << "Count" << Stats.Count; - Writer << "Pruned" << Stats.Pruned; - Writer << "Compacted" << Stats.Compacted; - - if (HumanReadable) - { - Writer << "RemovedDisk" << NiceBytes(Stats.RemovedDisk); - Writer << "RemovedMemory" << NiceBytes(Stats.RemovedMemory); - } - else - { - Writer << "RemovedDiskBytes" << Stats.RemovedDisk; - Writer << "RemovedMemoryBytes" << Stats.RemovedMemory; - } - - if (HumanReadable) - { - Writer << "CreateReferencePruner" << NiceTimeSpanMs(Stats.CreateReferencePrunerMS.count()); - Writer << "RemoveUnreferencedData" << NiceTimeSpanMs(Stats.RemoveUnreferencedDataMS.count()); - Writer << "CompactReferenceStore" << NiceTimeSpanMs(Stats.CompactReferenceStoreMS.count()); - Writer << "Elapsed" << NiceTimeSpanMs(Stats.ElapsedMS.count()); - } - else - { - Writer << "CreateReferencePrunerMS" << gsl::narrow<int64_t>(Stats.CreateReferencePrunerMS.count()); - Writer << "RemoveUnreferencedDataMS" << gsl::narrow<int64_t>(Stats.RemoveUnreferencedDataMS.count()); - Writer << "CompactReferenceStoreMS" << gsl::narrow<int64_t>(Stats.CompactReferenceStoreMS.count()); - Writer << "ElapsedMS" << gsl::narrow<int64_t>(Stats.ElapsedMS.count()); - } -}; - -void -WriteGCResult(CbObjectWriter& Writer, const GcResult& Result, bool HumanReadable, bool IncludeDetails) -{ - if (HumanReadable) - { - Writer << "RemovedDisk" << NiceBytes(Result.RemovedDisk); - Writer << "RemovedMemory" << NiceBytes(Result.RemovedMemory); - Writer << "WriteBlock" << NiceTimeSpanMs(Result.WriteBlockMS.count()); - Writer << "Elapsed" << NiceTimeSpanMs(Result.ElapsedMS.count()); - } - else - { - Writer << "RemovedDiskBytes" << gsl::narrow<int64_t>(Result.RemovedDisk); - Writer << "RemovedMemoryBytes" << gsl::narrow<int64_t>(Result.RemovedMemory); - Writer << "WriteBlockMS" << gsl::narrow<int64_t>(Result.WriteBlockMS.count()); - Writer << "ElapsedMS" << gsl::narrow<int64_t>(Result.ElapsedMS.count()); - } - - if (!IncludeDetails) - { - return; - } - - if (HumanReadable) - { - Writer << "RemoveExpiredData" << NiceTimeSpanMs(Result.RemoveExpiredDataMS.count()); - Writer << "CreateReferenceCheckers" << NiceTimeSpanMs(Result.CreateReferenceCheckersMS.count()); - Writer << "LockState" << NiceTimeSpanMs(Result.LockStateMS.count()); - - Writer << "CreateReferencePruner" << NiceTimeSpanMs(Result.CreateReferencePrunerMS.count()); - Writer << "RemoveUnreferencedData" << NiceTimeSpanMs(Result.RemoveUnreferencedDataMS.count()); - Writer << "CompactReferenceStore" << NiceTimeSpanMs(Result.CompactReferenceStoreMS.count()); - } - else - { - Writer << "RemoveExpiredDataMS" << gsl::narrow<int64_t>(Result.RemoveExpiredDataMS.count()); - Writer << "CreateReferenceCheckersMS" << gsl::narrow<int64_t>(Result.CreateReferenceCheckersMS.count()); - Writer << "LockStateMS" << gsl::narrow<int64_t>(Result.LockStateMS.count()); - - Writer << "CreateReferencePrunerMS" << gsl::narrow<int64_t>(Result.CreateReferencePrunerMS.count()); - Writer << "RemoveUnreferencedDataMS" << gsl::narrow<int64_t>(Result.RemoveUnreferencedDataMS.count()); - Writer << "CompactReferenceStoreMS" << gsl::narrow<int64_t>(Result.CompactReferenceStoreMS.count()); - } - - Writer.BeginObject("ReferencerStats"); - { - WriteReferencerStats(Writer, Result.ReferencerStat, HumanReadable); - } - Writer.EndObject(); - - Writer.BeginObject("ReferenceStoreStats"); - { - WriteReferenceStoreStats(Writer, Result.ReferenceStoreStat, HumanReadable); - } - Writer.EndObject(); - - if (!Result.ReferencerStats.empty()) - { - Writer.BeginArray("Referencers"); - { - for (const std::pair<std::string, GcReferencerStats>& It : Result.ReferencerStats) - { - Writer.BeginObject(); - Writer << "Name" << It.first; - WriteReferencerStats(Writer, It.second, HumanReadable); - Writer.EndObject(); - } - } - Writer.EndArray(); - } - if (!Result.ReferenceStoreStats.empty()) - { - Writer.BeginArray("ReferenceStores"); - for (const std::pair<std::string, GcReferenceStoreStats>& It : Result.ReferenceStoreStats) - { - Writer.BeginObject(); - Writer << "Name" << It.first; - WriteReferenceStoreStats(Writer, It.second, HumanReadable); - Writer.EndObject(); - } - Writer.EndArray(); - } -}; - struct GcContext::GcState { using CacheKeyContexts = std::unordered_map<std::string, std::vector<IoHash>>; @@ -490,44 +330,235 @@ GcManager::~GcManager() //////// Begin GC V2 void -GcResult::Sum() +WriteGcStats(CbObjectWriter& Writer, const GcStats& Stats, bool HumanReadable) { - for (std::pair<std::string, GcReferencerStats>& Referencer : ReferencerStats) + Writer << "Checked" << Stats.CheckedCount; + Writer << "Found" << Stats.FoundCount; + Writer << "Deleted" << Stats.DeletedCount; + if (HumanReadable) { - GcReferencerStats& SubStat = Referencer.second; - ReferencerStat.Count += SubStat.Count; - ReferencerStat.Expired += SubStat.Expired; - ReferencerStat.Deleted += SubStat.Deleted; - ReferencerStat.RemovedDisk += SubStat.RemovedDisk; - ReferencerStat.RemovedMemory += SubStat.RemovedMemory; - SubStat.ElapsedMS = SubStat.RemoveExpiredDataMS + SubStat.CreateReferenceCheckersMS + SubStat.LockStateMS; + Writer << "FreedMemory" << NiceBytes(Stats.FreedMemory); + } + else + { + Writer << "FreedMemoryBytes" << Stats.FreedMemory; + } + Writer << "Elapsed" << ToTimeSpan(Stats.ElapsedMS); +} - ReferencerStat.RemoveExpiredDataMS += SubStat.RemoveExpiredDataMS; - ReferencerStat.CreateReferenceCheckersMS += SubStat.CreateReferenceCheckersMS; - ReferencerStat.LockStateMS += SubStat.LockStateMS; - ReferencerStat.ElapsedMS += SubStat.ElapsedMS; +void +WriteCompactStoreStats(CbObjectWriter& Writer, const GcCompactStoreStats& Stats, bool HumanReadable) +{ + if (HumanReadable) + { + Writer << "RemovedDisk" << NiceBytes(Stats.RemovedDisk); + } + else + { + Writer << "RemovedDiskBytes" << Stats.RemovedDisk; + } + Writer << "Elapsed" << ToTimeSpan(Stats.ElapsedMS); +} - RemovedDisk += SubStat.RemovedDisk; - RemovedMemory += SubStat.RemovedMemory; +void +WriteReferencerStats(CbObjectWriter& Writer, const GcReferencerStats& Stats, bool HumanReadable) +{ + if (Stats.RemoveExpiredDataStats.CheckedCount == 0) + { + return; } - for (std::pair<std::string, GcReferenceStoreStats>& ReferenceStore : ReferenceStoreStats) + Writer.BeginObject("RemoveExpired"); { - GcReferenceStoreStats& SubStat = ReferenceStore.second; - ReferenceStoreStat.Count += SubStat.Count; - ReferenceStoreStat.Pruned += SubStat.Pruned; - ReferenceStoreStat.Compacted += SubStat.Compacted; - ReferenceStoreStat.RemovedDisk += SubStat.RemovedDisk; - ReferenceStoreStat.RemovedMemory += SubStat.RemovedMemory; - SubStat.ElapsedMS = SubStat.CreateReferencePrunerMS + SubStat.RemoveUnreferencedDataMS + SubStat.CompactReferenceStoreMS; + WriteGcStats(Writer, Stats.RemoveExpiredDataStats, HumanReadable); + } + Writer.EndObject(); + + Writer.BeginObject("Compact"); + { + WriteCompactStoreStats(Writer, Stats.CompactStoreStats, HumanReadable); + } + Writer.EndObject(); + + Writer << "CreateReferenceCheckers" << ToTimeSpan(Stats.CreateReferenceCheckersMS); + Writer << "LockState" << ToTimeSpan(Stats.LockStateMS); + Writer << "Elapsed" << ToTimeSpan(Stats.ElapsedMS); +}; + +void +WriteReferenceStoreStats(CbObjectWriter& Writer, const GcReferenceStoreStats& Stats, bool HumanReadable) +{ + if (Stats.RemoveUnreferencedDataStats.CheckedCount == 0) + { + return; + } + Writer.BeginObject("RemoveUnreferenced"); + { + WriteGcStats(Writer, Stats.RemoveUnreferencedDataStats, HumanReadable); + } + Writer.EndObject(); + + Writer.BeginObject("Compact"); + { + WriteCompactStoreStats(Writer, Stats.CompactStoreStats, HumanReadable); + } + Writer.EndObject(); + + Writer << "CreateReferencePruners" << ToTimeSpan(Stats.CreateReferencePrunersMS); + Writer << "Elapsed" << ToTimeSpan(Stats.ElapsedMS); +}; + +void +WriteGCResult(CbObjectWriter& Writer, const GcResult& Result, bool HumanReadable, bool IncludeDetails) +{ + if (!IncludeDetails) + { + if (HumanReadable) + { + Writer << "RemovedDisk" << NiceBytes(Result.CompactStoresStatSum.RemovedDisk); + Writer << "FreedMemory" << NiceBytes(Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory); + } + else + { + Writer << "RemovedDiskBytes" << gsl::narrow<int64_t>(Result.CompactStoresStatSum.RemovedDisk); + Writer << "RemovedMemoryBytes" << gsl::narrow<int64_t>(Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory); + } + Writer << "WriteBlock" << ToTimeSpan(Result.WriteBlockMS); + Writer << "Elapsed" << ToTimeSpan(Result.ElapsedMS); + return; + } + + Writer.BeginObject("Referencer"); + { + WriteReferencerStats(Writer, Result.ReferencerStatSum, HumanReadable); + } + Writer.EndObject(); + + Writer.BeginObject("ReferenceStore"); + { + WriteReferenceStoreStats(Writer, Result.ReferenceStoreStatSum, HumanReadable); + } + Writer.EndObject(); + + Writer.BeginObject("Compact"); + { + WriteCompactStoreStats(Writer, Result.CompactStoresStatSum, HumanReadable); + } + Writer.EndObject(); + + Writer << "RemoveExpiredData" << ToTimeSpan(Result.RemoveExpiredDataMS); + Writer << "CreateReferenceCheckers" << ToTimeSpan(Result.CreateReferenceCheckersMS); + Writer << "LockState" << ToTimeSpan(Result.LockStateMS); + + Writer << "CreateReferencePruners" << ToTimeSpan(Result.CreateReferencePrunersMS); + Writer << "RemoveUnreferencedData" << ToTimeSpan(Result.RemoveUnreferencedDataMS); + Writer << "CompactStores" << ToTimeSpan(Result.CompactStoresMS); + Writer << "WriteBlock" << ToTimeSpan(Result.WriteBlockMS); + Writer << "Elapsed" << ToTimeSpan(Result.ElapsedMS); + + if (!Result.ReferencerStats.empty()) + { + Writer.BeginArray("Referencers"); + { + for (const std::pair<std::string, GcReferencerStats>& It : Result.ReferencerStats) + { + Writer.BeginObject(); + Writer << "Name" << It.first; + WriteReferencerStats(Writer, It.second, HumanReadable); + Writer.EndObject(); + } + } + Writer.EndArray(); + } + if (!Result.ReferenceStoreStats.empty()) + { + Writer.BeginArray("ReferenceStores"); + for (const std::pair<std::string, GcReferenceStoreStats>& It : Result.ReferenceStoreStats) + { + Writer.BeginObject(); + Writer << "Name" << It.first; + WriteReferenceStoreStats(Writer, It.second, HumanReadable); + Writer.EndObject(); + } + Writer.EndArray(); + } +}; + +void +Add(GcCompactStoreStats& Sum, const GcCompactStoreStats& Sub) +{ + Sum.RemovedDisk += Sub.RemovedDisk; + + Sum.ElapsedMS += Sub.ElapsedMS; +} + +void +Add(GcStats& Sum, const GcStats& Sub) +{ + Sum.CheckedCount += Sub.CheckedCount; + Sum.FoundCount += Sub.FoundCount; + Sum.DeletedCount += Sub.DeletedCount; + Sum.FreedMemory += Sub.FreedMemory; + + Sum.ElapsedMS += Sub.ElapsedMS; +} + +void +Sum(GcReferencerStats& Stat) +{ + Stat.ElapsedMS = + Stat.RemoveExpiredDataStats.ElapsedMS + Stat.CompactStoreStats.ElapsedMS + Stat.CreateReferenceCheckersMS + Stat.LockStateMS; +} + +void +Add(GcReferencerStats& Sum, const GcReferencerStats& Sub) +{ + Add(Sum.RemoveExpiredDataStats, Sub.RemoveExpiredDataStats); + Add(Sum.CompactStoreStats, Sub.CompactStoreStats); + + Sum.CreateReferenceCheckersMS += Sub.CreateReferenceCheckersMS; + Sum.LockStateMS += Sub.LockStateMS; + + Sum.ElapsedMS += Sub.ElapsedMS; +} - ReferenceStoreStat.CreateReferencePrunerMS += SubStat.CreateReferencePrunerMS; - ReferenceStoreStat.RemoveUnreferencedDataMS += SubStat.RemoveUnreferencedDataMS; - ReferenceStoreStat.CompactReferenceStoreMS += SubStat.CompactReferenceStoreMS; - ReferenceStoreStat.ElapsedMS += SubStat.ElapsedMS; +void +Sum(GcReferenceStoreStats& Stat) +{ + Stat.ElapsedMS = Stat.RemoveUnreferencedDataStats.ElapsedMS + Stat.CompactStoreStats.ElapsedMS + Stat.CreateReferencePrunersMS; +} - RemovedDisk += SubStat.RemovedDisk; - RemovedMemory += SubStat.RemovedMemory; +void +Add(GcReferenceStoreStats& Sum, const GcReferenceStoreStats& Sub) +{ + Add(Sum.RemoveUnreferencedDataStats, Sub.RemoveUnreferencedDataStats); + Add(Sum.CompactStoreStats, Sub.CompactStoreStats); + + Sum.CreateReferencePrunersMS += Sub.CreateReferencePrunersMS; + + Sum.ElapsedMS += Sub.ElapsedMS; +} + +void +Sum(GcResult& Stat) +{ + for (std::pair<std::string, GcReferencerStats>& Referencer : Stat.ReferencerStats) + { + GcReferencerStats& SubStat = Referencer.second; + Sum(SubStat); + Add(Stat.ReferencerStatSum, SubStat); + } + for (std::pair<std::string, GcReferenceStoreStats>& ReferenceStore : Stat.ReferenceStoreStats) + { + GcReferenceStoreStats& SubStat = ReferenceStore.second; + Sum(SubStat); + Add(Stat.ReferenceStoreStatSum, SubStat); } + + Sum(Stat.ReferencerStatSum); + Sum(Stat.ReferenceStoreStatSum); + + Add(Stat.CompactStoresStatSum, Stat.ReferencerStatSum.CompactStoreStats); + Add(Stat.CompactStoresStatSum, Stat.ReferenceStoreStatSum.CompactStoreStats); } void @@ -581,7 +612,9 @@ GcManager::CollectGarbage(const GcSettings& Settings) Result.ReferencerStats.resize(m_GcReferencers.size()); - WorkerThreadPool ThreadPool(WorkerThreadPoolCount, "GCV2"); + std::unordered_map<std::unique_ptr<GcStoreCompactor>, GcCompactStoreStats*> StoreCompactors; + RwLock StoreCompactorsLock; + WorkerThreadPool ThreadPool(WorkerThreadPoolCount, "GCV2"); ZEN_INFO("GCV2: Removing expired data from {} referencers", m_GcReferencers.size()); if (!m_GcReferencers.empty()) @@ -594,52 +627,15 @@ GcManager::CollectGarbage(const GcSettings& Settings) GcReferencer* Owner = m_GcReferencers[Index]; std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index]; WorkLeft.AddCount(1); - ThreadPool.ScheduleWork([&Ctx, Owner, &Stats, &WorkLeft]() { + ThreadPool.ScheduleWork([&Ctx, &WorkLeft, Owner, &Stats, &StoreCompactorsLock, &StoreCompactors]() { auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); Stats.first = Owner->GetGcName(Ctx); - SCOPED_TIMER(Stats.second.RemoveExpiredDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - Owner->RemoveExpiredData(Ctx, Stats.second); - }); - } - WorkLeft.CountDown(); - WorkLeft.Wait(); - } - - if (Ctx.Settings.SkipCidDelete) - { - Result.Sum(); - return Result; - } - - Result.ReferenceStoreStats.resize(m_GcReferenceStores.size()); - - ZEN_INFO("GCV2: Creating reference pruners from {} reference stores", m_GcReferenceStores.size()); - std::unordered_map<size_t, std::unique_ptr<GcReferencePruner>> ReferencePruners; - if (!m_GcReferenceStores.empty()) - { - ReferencePruners.reserve(m_GcReferenceStores.size()); - Latch WorkLeft(1); - RwLock ReferencePrunersLock; - // CreateReferencePruner is usually not very heavy but big data sets change that - SCOPED_TIMER(Result.CreateReferencePrunerMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - for (size_t Index = 0; Index < m_GcReferenceStores.size(); Index++) - { - GcReferenceStore* ReferenceStore = m_GcReferenceStores[Index]; - std::pair<std::string, GcReferenceStoreStats>& Stats = Result.ReferenceStoreStats[Index]; - WorkLeft.AddCount(1); - ThreadPool.ScheduleWork([&Ctx, ReferenceStore, &Stats, Index, &WorkLeft, &ReferencePrunersLock, &ReferencePruners]() { - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); - Stats.first = ReferenceStore->GetGcName(Ctx); - std::unique_ptr<GcReferencePruner> ReferencePruner; - { - SCOPED_TIMER(Stats.second.CreateReferencePrunerMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - // The ReferenceStore will pick a list of CId entries to check, returning a collector - ReferencePruner = std::unique_ptr<GcReferencePruner>(ReferenceStore->CreateReferencePruner(Ctx, Stats.second)); - } - if (ReferencePruner) + SCOPED_TIMER(Stats.second.RemoveExpiredDataStats.ElapsedMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + std::unique_ptr<GcStoreCompactor> StoreCompactor(Owner->RemoveExpiredData(Ctx, Stats.second.RemoveExpiredDataStats)); + if (StoreCompactor) { - RwLock::ExclusiveLockScope __(ReferencePrunersLock); - ReferencePruners.insert_or_assign(Index, std::move(ReferencePruner)); + RwLock::ExclusiveLockScope __(StoreCompactorsLock); + StoreCompactors.insert_or_assign(std::move(StoreCompactor), &Stats.second.CompactStoreStats); } }); } @@ -647,180 +643,217 @@ GcManager::CollectGarbage(const GcSettings& Settings) WorkLeft.Wait(); } - ZEN_INFO("GCV2: Creating reference checkers from {} referencers", m_GcReferencers.size()); - std::unordered_map<std::unique_ptr<GcReferenceChecker>, size_t> ReferenceCheckers; - if (!m_GcReferencers.empty()) + if (!Ctx.Settings.SkipCidDelete) { - ReferenceCheckers.reserve(m_GcReferencers.size()); - Latch WorkLeft(1); - RwLock ReferenceCheckersLock; - SCOPED_TIMER(Result.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - // Lock all reference owners from changing the reference data and get access to check for referenced data - for (size_t Index = 0; Index < m_GcReferencers.size(); Index++) + Result.ReferenceStoreStats.resize(m_GcReferenceStores.size()); + + ZEN_INFO("GCV2: Creating reference pruners from {} reference stores", m_GcReferenceStores.size()); + std::unordered_map<size_t, std::unique_ptr<GcReferencePruner>> ReferencePruners; + if (!m_GcReferenceStores.empty()) { - GcReferencer* Referencer = m_GcReferencers[Index]; - std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index]; - WorkLeft.AddCount(1); - ThreadPool.ScheduleWork([&Ctx, &WorkLeft, Referencer, Index, &Stats, &ReferenceCheckersLock, &ReferenceCheckers]() { - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); - // The Referencer will create a reference checker that guarrantees that the references do not change as long as it lives - std::vector<GcReferenceChecker*> Checkers; - { - SCOPED_TIMER(Stats.second.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - Checkers = Referencer->CreateReferenceCheckers(Ctx); - } - try - { - if (!Checkers.empty()) + ReferencePruners.reserve(m_GcReferenceStores.size()); + Latch WorkLeft(1); + RwLock ReferencePrunersLock; + // CreateReferencePruner is usually not very heavy but big data sets change that + SCOPED_TIMER(Result.CreateReferencePrunersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + for (size_t Index = 0; Index < m_GcReferenceStores.size(); Index++) + { + GcReferenceStore* ReferenceStore = m_GcReferenceStores[Index]; + std::pair<std::string, GcReferenceStoreStats>& Stats = Result.ReferenceStoreStats[Index]; + WorkLeft.AddCount(1); + ThreadPool.ScheduleWork([&Ctx, ReferenceStore, &Stats, Index, &WorkLeft, &ReferencePrunersLock, &ReferencePruners]() { + auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); + Stats.first = ReferenceStore->GetGcName(Ctx); + std::unique_ptr<GcReferencePruner> ReferencePruner; { - RwLock::ExclusiveLockScope __(ReferenceCheckersLock); - for (auto& Checker : Checkers) - { - ReferenceCheckers.insert_or_assign(std::unique_ptr<GcReferenceChecker>(Checker), Index); - Checker = nullptr; - } + SCOPED_TIMER(Stats.second.CreateReferencePrunersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + // The ReferenceStore will pick a list of CId entries to check, returning a collector + ReferencePruner = std::unique_ptr<GcReferencePruner>(ReferenceStore->CreateReferencePruner(Ctx, Stats.second)); } - } - catch (std::exception&) - { - while (!Checkers.empty()) + if (ReferencePruner) { - delete Checkers.back(); - Checkers.pop_back(); + RwLock::ExclusiveLockScope __(ReferencePrunersLock); + ReferencePruners.insert_or_assign(Index, std::move(ReferencePruner)); } - throw; - } - }); - } - WorkLeft.CountDown(); - WorkLeft.Wait(); - } - - std::unordered_map<std::unique_ptr<GcReferenceStoreCompactor>, size_t> ReferenceStoreCompactors; - ReferenceStoreCompactors.reserve(ReferencePruners.size()); - - ZEN_INFO("GCV2: Locking state for {} reference checkers", ReferenceCheckers.size()); - { - SCOPED_TIMER(uint64_t ElapsedMS = Timer.GetElapsedTimeMs(); Result.WriteBlockMS = std::chrono::milliseconds(ElapsedMS); - ZEN_INFO("GCV2: Writes blocked for {}", NiceTimeSpanMs(ElapsedMS))); - if (!ReferenceCheckers.empty()) - { - // Locking all references checkers so we have a steady state of which references are used - // From this point we have blocked all writes to all References (DiskBucket/ProjectStore) until - // we delete the ReferenceCheckers - Latch WorkLeft(1); - - SCOPED_TIMER(Result.LockStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - for (auto& It : ReferenceCheckers) - { - GcReferenceChecker* Checker = It.first.get(); - size_t Index = It.second; - std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index]; - WorkLeft.AddCount(1); - ThreadPool.ScheduleWork([&Ctx, Checker, Index, &Stats, &WorkLeft]() { - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); - SCOPED_TIMER(Stats.second.LockStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - Checker->LockState(Ctx); }); } WorkLeft.CountDown(); WorkLeft.Wait(); } - ZEN_INFO("GCV2: Removing unreferenced data for {} reference pruners", ReferencePruners.size()); if (!ReferencePruners.empty()) { - const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::vector<IoHash> { - HashSet UnusedCids(References.begin(), References.end()); - for (const auto& It : ReferenceCheckers) + ZEN_INFO("GCV2: Creating reference checkers from {} referencers", m_GcReferencers.size()); + std::unordered_map<std::unique_ptr<GcReferenceChecker>, size_t> ReferenceCheckers; + if (!m_GcReferencers.empty()) + { + ReferenceCheckers.reserve(m_GcReferencers.size()); + Latch WorkLeft(1); + RwLock ReferenceCheckersLock; + SCOPED_TIMER(Result.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + // Lock all reference owners from changing the reference data and get access to check for referenced data + for (size_t Index = 0; Index < m_GcReferencers.size(); Index++) { - GcReferenceChecker* ReferenceChecker = It.first.get(); - ReferenceChecker->RemoveUsedReferencesFromSet(Ctx, UnusedCids); - if (UnusedCids.empty()) + GcReferencer* Referencer = m_GcReferencers[Index]; + std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index]; + WorkLeft.AddCount(1); + ThreadPool.ScheduleWork([&Ctx, &WorkLeft, Referencer, Index, &Stats, &ReferenceCheckersLock, &ReferenceCheckers]() { + auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); + // The Referencer will create a reference checker that guarrantees that the references do not change as long as + // it lives + std::vector<GcReferenceChecker*> Checkers; + { + SCOPED_TIMER(Stats.second.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + Checkers = Referencer->CreateReferenceCheckers(Ctx); + } + try + { + if (!Checkers.empty()) + { + RwLock::ExclusiveLockScope __(ReferenceCheckersLock); + for (auto& Checker : Checkers) + { + ReferenceCheckers.insert_or_assign(std::unique_ptr<GcReferenceChecker>(Checker), Index); + Checker = nullptr; + } + } + } + catch (std::exception&) + { + while (!Checkers.empty()) + { + delete Checkers.back(); + Checkers.pop_back(); + } + throw; + } + }); + } + WorkLeft.CountDown(); + WorkLeft.Wait(); + } + + ZEN_INFO("GCV2: Locking state for {} reference checkers", ReferenceCheckers.size()); + { + SCOPED_TIMER(uint64_t ElapsedMS = Timer.GetElapsedTimeMs(); Result.WriteBlockMS = std::chrono::milliseconds(ElapsedMS); + ZEN_INFO("GCV2: Writes blocked for {}", NiceTimeSpanMs(ElapsedMS))); + if (!ReferenceCheckers.empty()) + { + // Locking all references checkers so we have a steady state of which references are used + // From this point we have blocked all writes to all References (DiskBucket/ProjectStore) until + // we delete the ReferenceCheckers + Latch WorkLeft(1); + + SCOPED_TIMER(Result.LockStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + for (auto& It : ReferenceCheckers) { - return {}; + GcReferenceChecker* Checker = It.first.get(); + size_t Index = It.second; + std::pair<std::string, GcReferencerStats>& Stats = Result.ReferencerStats[Index]; + WorkLeft.AddCount(1); + ThreadPool.ScheduleWork([&Ctx, Checker, Index, &Stats, &WorkLeft]() { + auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); + SCOPED_TIMER(Stats.second.LockStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + Checker->LockState(Ctx); + }); } + WorkLeft.CountDown(); + WorkLeft.Wait(); } - return std::vector<IoHash>(UnusedCids.begin(), UnusedCids.end()); - }; - // checking all Cids agains references in cache - // Ask stores to remove data that the ReferenceCheckers says are not referenced - this should be a lightweight operation - // that only updates in-memory index, actual disk changes should be done by the ReferenceStoreCompactors + ZEN_INFO("GCV2: Removing unreferenced data for {} reference pruners", ReferencePruners.size()); + { + const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::vector<IoHash> { + HashSet UnusedCids(References.begin(), References.end()); + for (const auto& It : ReferenceCheckers) + { + GcReferenceChecker* ReferenceChecker = It.first.get(); + ReferenceChecker->RemoveUsedReferencesFromSet(Ctx, UnusedCids); + if (UnusedCids.empty()) + { + return {}; + } + } + return std::vector<IoHash>(UnusedCids.begin(), UnusedCids.end()); + }; - Latch WorkLeft(1); - RwLock ReferenceStoreCompactorsLock; + // checking all Cids agains references in cache + // Ask stores to remove data that the ReferenceCheckers says are not referenced - this should be a lightweight + // operation that only updates in-memory index, actual disk changes should be done by the ReferenceStoreCompactors - SCOPED_TIMER(Result.RemoveUnreferencedDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - for (auto& It : ReferencePruners) - { - GcReferencePruner* Pruner = It.second.get(); - size_t Index = It.first; - GcReferenceStoreStats& Stats = Result.ReferenceStoreStats[Index].second; - WorkLeft.AddCount(1); - ThreadPool.ScheduleWork([&Ctx, - Pruner, - &Stats, - &WorkLeft, - Index, - &GetUnusedReferences, - &ReferenceStoreCompactorsLock, - &ReferenceStoreCompactors]() { - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); - // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are referenced or not. - std::unique_ptr<GcReferenceStoreCompactor> ReferenceCompactor; - { - SCOPED_TIMER(Stats.RemoveUnreferencedDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - ReferenceCompactor = - std::unique_ptr<GcReferenceStoreCompactor>(Pruner->RemoveUnreferencedData(Ctx, Stats, GetUnusedReferences)); - } - if (ReferenceCompactor) + Latch WorkLeft(1); + + SCOPED_TIMER(Result.RemoveUnreferencedDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + for (auto& It : ReferencePruners) { - RwLock::ExclusiveLockScope __(ReferenceStoreCompactorsLock); - ReferenceStoreCompactors.insert_or_assign(std::move(ReferenceCompactor), Index); + GcReferencePruner* Pruner = It.second.get(); + size_t Index = It.first; + GcReferenceStoreStats& Stats = Result.ReferenceStoreStats[Index].second; + WorkLeft.AddCount(1); + ThreadPool.ScheduleWork( + [&Ctx, Pruner, &Stats, &WorkLeft, &GetUnusedReferences, &StoreCompactorsLock, &StoreCompactors]() { + auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); + // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are referenced + // or not. + std::unique_ptr<GcStoreCompactor> StoreCompactor; + { + SCOPED_TIMER(Stats.RemoveUnreferencedDataStats.ElapsedMS = + std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + StoreCompactor = std::unique_ptr<GcStoreCompactor>( + Pruner->RemoveUnreferencedData(Ctx, Stats.RemoveUnreferencedDataStats, GetUnusedReferences)); + } + if (StoreCompactor) + { + RwLock::ExclusiveLockScope __(StoreCompactorsLock); + StoreCompactors.insert_or_assign(std::move(StoreCompactor), &Stats.CompactStoreStats); + } + }); } - }); + WorkLeft.CountDown(); + WorkLeft.Wait(); + } + // Let the GcReferencers add new data, we will only change on-disk data at this point, adding new data is allowed + ReferenceCheckers.clear(); + ReferencePruners.clear(); } - WorkLeft.CountDown(); - WorkLeft.Wait(); } - // Let the GcReferencers add new data, we will only change on-disk data at this point, adding new data is allowed - ReferenceCheckers.clear(); } - // Let go of the pruners - ReferencePruners.clear(); - - ZEN_INFO("GCV2: Compacting reference stores for {} reference store compactors", ReferenceStoreCompactors.size()); - if (!ReferenceStoreCompactors.empty()) + ZEN_INFO("GCV2: Compacting reference stores for {} store compactors", StoreCompactors.size()); + if (!StoreCompactors.empty()) { - Latch WorkLeft(1); - + auto ClaimDiskReserve = [&]() -> uint64_t { + if (!std::filesystem::is_regular_file(Settings.DiskReservePath)) + { + return 0; + } + uint64_t ReclaimedSize = std::filesystem::file_size(Settings.DiskReservePath); + if (std::filesystem::remove(Settings.DiskReservePath)) + { + return ReclaimedSize; + } + return 0; + }; // Remove the stuff we deemed unreferenced from disk - may be heavy operation - SCOPED_TIMER(Result.CompactReferenceStoreMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - for (auto& It : ReferenceStoreCompactors) + // Don't do in parallel, we don't want to steal CPU/Disk from regular operation + SCOPED_TIMER(Result.CompactStoresMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + for (auto& It : StoreCompactors) { - GcReferenceStoreCompactor* Compactor = It.first.get(); - size_t Index = It.second; - GcReferenceStoreStats& Stats = Result.ReferenceStoreStats[Index].second; - WorkLeft.AddCount(1); - ThreadPool.ScheduleWork([&Ctx, Compactor, &Stats, &WorkLeft]() { - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); + GcStoreCompactor* Compactor = It.first.get(); + GcCompactStoreStats& Stats = *It.second; + { // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are referenced or not. - SCOPED_TIMER(Stats.CompactReferenceStoreMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - Compactor->CompactReferenceStore(Ctx, Stats); - }); + SCOPED_TIMER(Stats.ElapsedMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + Compactor->CompactStore(Ctx, Stats, ClaimDiskReserve); + } } - WorkLeft.CountDown(); - WorkLeft.Wait(); + StoreCompactors.clear(); } - ReferenceStoreCompactors.clear(); - ZEN_INFO("GCV2: Completed in {}", NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs())); } - Result.Sum(); + Sum(Result); return Result; } @@ -1227,22 +1260,17 @@ GcScheduler::AppendGCLog(GcClock::TimePoint StartTime, const GcSettings& Setting std::string Id = fmt::format("{}", gsl::narrow<int64_t>(StartTime.time_since_epoch().count())); Writer.BeginObject(Id); { - Writer << "StartTimeSec"sv - << gsl::narrow<int64_t>(std::chrono::duration_cast<std::chrono::seconds>(StartTime.time_since_epoch()).count()); + Writer << "StartTime"sv << ToDateTime(StartTime); Writer.BeginObject("Settings"sv); { - Writer << "CacheExpireTimeSec"sv - << gsl::narrow<int64_t>( - std::chrono::duration_cast<std::chrono::seconds>(Settings.CacheExpireTime.time_since_epoch()).count()); - Writer << "ProjectStoreExpireTimeSec"sv - << gsl::narrow<int64_t>( - std::chrono::duration_cast<std::chrono::seconds>(Settings.ProjectStoreExpireTime.time_since_epoch()) - .count()); + Writer << "CacheExpireTime"sv << ToDateTime(Settings.CacheExpireTime); + Writer << "ProjectStoreExpireTime"sv << ToDateTime(Settings.ProjectStoreExpireTime); Writer << "CollectSmallObjects"sv << Settings.CollectSmallObjects; Writer << "IsDeleteMode"sv << Settings.IsDeleteMode; Writer << "SkipCidDelete"sv << Settings.SkipCidDelete; Writer << "Verbose"sv << Settings.Verbose; Writer << "SingleThread"sv << Settings.SingleThread; + Writer << "CompactBlockUsageThresholdPercent"sv << Settings.CompactBlockUsageThresholdPercent; } Writer.EndObject(); @@ -1817,26 +1845,27 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, .ProjectStoreExpireTime = ProjectStoreExpireTime, .CollectSmallObjects = CollectSmallObjects, .IsDeleteMode = Delete, - .SkipCidDelete = SkipCid}; + .SkipCidDelete = SkipCid, + .DiskReservePath = m_Config.RootDirectory / "reserve.gc"}; GcClock::TimePoint GcStartTime = GcClock::Now(); GcResult Result = m_GcManager.CollectGarbage(Settings); ZEN_INFO( - "GCV2: Removed {} items out of {}, deleted {} out of {}. Pruned {} Cid entries out of {}, compacted {} Cid entries " - "out of {}, " - "freed " - "{} on disk and {} of memory in {}. CacheExpireTime: {}, ProjectStoreExpireTime: {}, CollectSmallObjects: {}, " + "GCV2: Found {} expired items out of {}, deleted {}. " + "Found {} unreferenced Cid entries out of {}, deleted {}. " + "Freed {} on disk and {} of memory in {}. " + "CacheExpireTime: {}, ProjectStoreExpireTime: {}, CollectSmallObjects: {}, " "IsDeleteMode: {}, SkipCidDelete: {}", - Result.ReferencerStat.Expired, - Result.ReferencerStat.Count, - Result.ReferencerStat.Deleted, - Result.ReferencerStat.Expired, - Result.ReferenceStoreStat.Pruned, - Result.ReferenceStoreStat.Count, - Result.ReferenceStoreStat.Compacted, - Result.ReferenceStoreStat.Pruned, - NiceBytes(Result.RemovedDisk), - NiceBytes(Result.RemovedMemory), + Result.ReferencerStatSum.RemoveExpiredDataStats.FoundCount, + Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount, + Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount, + + Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.FoundCount, + Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount, + Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount, + + NiceBytes(Result.CompactStoresStatSum.RemovedDisk), + NiceBytes(Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory), NiceTimeSpanMs(Result.ElapsedMS.count()), Settings.CacheExpireTime, Settings.ProjectStoreExpireTime, @@ -1854,8 +1883,8 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, { m_LastFullGCV2Result = Result; } - Diff.DiskSize = Result.RemovedDisk; - Diff.MemorySize = Result.RemovedMemory; + Diff.DiskSize = Result.CompactStoresStatSum.RemovedDisk; + Diff.MemorySize = Result.ReferencerStatSum.RemoveExpiredDataStats.FreedMemory; } break; } diff --git a/src/zenstore/include/zenstore/blockstore.h b/src/zenstore/include/zenstore/blockstore.h index 75accd9b8..1429a6b02 100644 --- a/src/zenstore/include/zenstore/blockstore.h +++ b/src/zenstore/include/zenstore/blockstore.h @@ -136,7 +136,8 @@ public: // Ask the store to create empty blocks for all locations that does not have a block // Remove any block that is not referenced - void SyncExistingBlocksOnDisk(const std::vector<BlockStoreLocation>& KnownLocations); + void SyncExistingBlocksOnDisk(const std::vector<BlockStoreLocation>& KnownLocations); + std::vector<uint32_t> GetBlocksToCompact(const std::unordered_map<uint32_t, uint64_t>& BlockUsage, uint32_t BlockUsageThresholdPercent); void Close(); @@ -193,16 +194,25 @@ class BlockStoreCompactState public: BlockStoreCompactState() = default; - void IncludeBlock(uint32_t BlockIndex) + void IncludeBlocks(const std::span<const uint32_t> BlockIndexes) { - auto It = m_BlockIndexToChunkMapIndex.find(BlockIndex); - if (It == m_BlockIndexToChunkMapIndex.end()) + for (uint32_t BlockIndex : BlockIndexes) { - m_KeepChunks.emplace_back(std::vector<size_t>()); - m_BlockIndexToChunkMapIndex.insert_or_assign(BlockIndex, m_KeepChunks.size() - 1); + auto It = m_BlockIndexToChunkMapIndex.find(BlockIndex); + if (It == m_BlockIndexToChunkMapIndex.end()) + { + m_KeepChunks.emplace_back(std::vector<size_t>()); + m_BlockIndexToChunkMapIndex.insert_or_assign(BlockIndex, m_KeepChunks.size() - 1); + } } } + void IncludeBlock(uint32_t BlockIndex) + { + const uint32_t Blocks[1] = {BlockIndex}; + IncludeBlocks(Blocks); + } + bool AddKeepLocation(const BlockStoreLocation& Location) { auto It = m_BlockIndexToChunkMapIndex.find(Location.BlockIndex); diff --git a/src/zenstore/include/zenstore/gc.h b/src/zenstore/include/zenstore/gc.h index d4c7bba25..4cd01bc2c 100644 --- a/src/zenstore/include/zenstore/gc.h +++ b/src/zenstore/include/zenstore/gc.h @@ -60,17 +60,32 @@ struct GcSettings bool SkipCidDelete = false; bool Verbose = false; bool SingleThread = false; + uint32_t CompactBlockUsageThresholdPercent = + 90; // 0 = compact only empty eligible blocks, 100 = compact all non-full eligible blocks, 1-99 = compact eligible blocks with less + // usage than CompactBlockUsageThresholdPercent + std::filesystem::path DiskReservePath; +}; + +struct GcCompactStoreStats +{ + std::uint64_t RemovedDisk = 0; + std::chrono::milliseconds ElapsedMS = {}; +}; + +struct GcStats +{ + std::uint64_t CheckedCount = 0; + std::uint64_t FoundCount = 0; + std::uint64_t DeletedCount = 0; + std::uint64_t FreedMemory = 0; + std::chrono::milliseconds ElapsedMS = {}; }; struct GcReferencerStats { - std::uint64_t Count = 0; - std::uint64_t Expired = 0; - std::uint64_t Deleted = 0; - std::uint64_t RemovedDisk = 0; - std::uint64_t RemovedMemory = 0; + GcStats RemoveExpiredDataStats; + GcCompactStoreStats CompactStoreStats; - std::chrono::milliseconds RemoveExpiredDataMS = {}; std::chrono::milliseconds CreateReferenceCheckersMS = {}; std::chrono::milliseconds LockStateMS = {}; std::chrono::milliseconds ElapsedMS = {}; @@ -78,43 +93,34 @@ struct GcReferencerStats struct GcReferenceStoreStats { - std::uint64_t Count = 0; - std::uint64_t Pruned = 0; - std::uint64_t Compacted = 0; - std::uint64_t RemovedDisk = 0; - std::uint64_t RemovedMemory = 0; + GcStats RemoveUnreferencedDataStats; + GcCompactStoreStats CompactStoreStats; - std::chrono::milliseconds CreateReferencePrunerMS = {}; - std::chrono::milliseconds RemoveUnreferencedDataMS = {}; - std::chrono::milliseconds CompactReferenceStoreMS = {}; + std::chrono::milliseconds CreateReferencePrunersMS = {}; std::chrono::milliseconds ElapsedMS = {}; }; struct GcResult { - GcReferencerStats ReferencerStat; - GcReferenceStoreStats ReferenceStoreStat; - - std::uint64_t RemovedDisk = 0; - std::uint64_t RemovedMemory = 0; - std::vector<std::pair<std::string, GcReferencerStats>> ReferencerStats; std::vector<std::pair<std::string, GcReferenceStoreStats>> ReferenceStoreStats; + GcReferencerStats ReferencerStatSum; + GcReferenceStoreStats ReferenceStoreStatSum; + GcCompactStoreStats CompactStoresStatSum; + // Wall times, not sum of each std::chrono::milliseconds RemoveExpiredDataMS = {}; std::chrono::milliseconds CreateReferenceCheckersMS = {}; std::chrono::milliseconds LockStateMS = {}; - std::chrono::milliseconds CreateReferencePrunerMS = {}; + std::chrono::milliseconds CreateReferencePrunersMS = {}; std::chrono::milliseconds RemoveUnreferencedDataMS = {}; - std::chrono::milliseconds CompactReferenceStoreMS = {}; + std::chrono::milliseconds CompactStoresMS = {}; std::chrono::milliseconds WriteBlockMS = {}; std::chrono::milliseconds ElapsedMS = {}; - - void Sum(); }; class CbObjectWriter; @@ -129,22 +135,23 @@ struct GcCtx typedef tsl::robin_set<IoHash> HashSet; /** - * @brief An interface to remove the stored data on disk after a GcReferencePruner::RemoveUnreferencedData + * @brief An interface to remove the stored data on disk after a GcReferencer::RemoveExpiredData and + * GcReferencePruner::RemoveUnreferencedData * - * CompactReferenceStore is called after pruning (GcReferencePruner::RemoveUnreferencedData) and state locking is - * complete so implementor must take care to only remove data that has not been altered since the prune operation. + * CompactStore is called after state locking is complete so implementor must take care to only remove + * data that has not been altered since the prune operation. * - * Instance will be deleted after CompactReferenceStore has completed execution. + * Instance will be deleted after CompactStore has completed execution. * * The subclass constructor should be provided with information on what is intended to be removed. */ -class GcReferenceStoreCompactor +class GcStoreCompactor { public: - virtual ~GcReferenceStoreCompactor() = default; + virtual ~GcStoreCompactor() = default; // Remove data on disk based on results from GcReferencePruner::RemoveUnreferencedData - virtual void CompactReferenceStore(GcCtx& Ctx, GcReferenceStoreStats& Stats) = 0; + virtual void CompactStore(GcCtx& Ctx, GcCompactStoreStats& Stats, const std::function<uint64_t()>& ClaimDiskReserveCallback) = 0; }; /** @@ -175,10 +182,6 @@ public: /** * @brief Interface to handle GC of data that references Cid data * - * TODO: Maybe we should split up being a referencer and something that holds cache values? - * - * GcCacheStore and GcReferencer? - * * This interface is registered/unregistered to GcManager vua AddGcReferencer() and RemoveGcReferencer() */ class GcReferencer @@ -190,10 +193,7 @@ public: virtual std::string GetGcName(GcCtx& Ctx) = 0; // Remove expired data based on either GcCtx::Settings CacheExpireTime/ProjectExpireTime - // TODO: For disk layer we need to first update it with access times from the memory layer - // The implementer of GcReferencer (in our case a disk bucket) does not know about any - // potential memory cache layer :( - virtual void RemoveExpiredData(GcCtx& Ctx, GcReferencerStats& Stats) = 0; + virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) = 0; // Create 0-n GcReferenceChecker for this GcReferencer. Caller will manage lifetime of // returned instances @@ -213,14 +213,12 @@ public: // Check a set of references to see if they are in use. // Use the GetUnusedReferences input function to check if references are used and update any pointers // so any query for references determined to be unreferences will not be found. - // If any references a found to be unused, return a GcReferenceStoreCompactor instance which will + // If any references a found to be unused, return a GcStoreCompactor instance which will // clean up any stored bulk data mapping to the pruned references. // Caller will manage lifetime of returned instance // This function should execute as fast as possible, so try to prepare a list of references to check ahead of // call to this function and make sure the removal of unreferences items is as lightweight as possible. - virtual GcReferenceStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, - GcReferenceStoreStats& Stats, - const GetUnusedReferencesFunc& GetUnusedReferences) = 0; + virtual GcStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, GcStats& Stats, const GetUnusedReferencesFunc& GetUnusedReferences) = 0; }; /** |