diff options
| author | Dan Engelbrecht <[email protected]> | 2023-10-30 09:32:54 +0100 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-10-30 09:32:54 +0100 |
| commit | 3a6a5855cf36967c6bde31292669bfaf832c6f0b (patch) | |
| tree | 593e7c21e6840e7ad312207fddc63e1934e19d85 /src/zenserver/projectstore/projectstore.cpp | |
| parent | set up arch properly when running tests (mac) (#505) (diff) | |
| download | zen-3a6a5855cf36967c6bde31292669bfaf832c6f0b.tar.xz zen-3a6a5855cf36967c6bde31292669bfaf832c6f0b.zip | |
New GC implementation (#459)
- Feature: New garbage collection implementation, still in evaluation mode. Enabled by `--gc-v2` command line option
Diffstat (limited to 'src/zenserver/projectstore/projectstore.cpp')
| -rw-r--r-- | src/zenserver/projectstore/projectstore.cpp | 258 |
1 files changed, 258 insertions, 0 deletions
diff --git a/src/zenserver/projectstore/projectstore.cpp b/src/zenserver/projectstore/projectstore.cpp index df23db1bd..274876123 100644 --- a/src/zenserver/projectstore/projectstore.cpp +++ b/src/zenserver/projectstore/projectstore.cpp @@ -452,6 +452,8 @@ struct ProjectStore::OplogStorage : public RefCounted m_OpBlobs.Flush(); } + uint32_t GetMaxLsn() const { return m_MaxLsn.load(); } + spdlog::logger& Log() { return m_OwnerOplog->Log(); } private: @@ -855,6 +857,17 @@ ProjectStore::Oplog::GetOpIndexByKey(const Oid& Key) return -1; } +int +ProjectStore::Oplog::GetMaxOpIndex() const +{ + RwLock::SharedLockScope _(m_OplogLock); + if (!m_Storage) + { + return -1; + } + return gsl::narrow<int>(m_Storage->GetMaxLsn()); +} + std::optional<CbObject> ProjectStore::Oplog::GetOpByKey(const Oid& Key) { @@ -1661,6 +1674,17 @@ ProjectStore::Project::TouchOplog(std::string_view Oplog) const m_LastAccessTimes.insert_or_assign(std::string(Oplog), GcClock::TickCount()); }; +GcClock::TimePoint +ProjectStore::Project::LastOplogAccessTime(std::string_view Oplog) const +{ + RwLock::SharedLockScope Lock(m_ProjectLock); + if (auto It = m_LastAccessTimes.find(std::string(Oplog)); It != m_LastAccessTimes.end()) + { + return GcClock::TimePointFromTick(It->second); + } + return GcClock::TimePoint::min(); +} + ////////////////////////////////////////////////////////////////////////// ProjectStore::ProjectStore(CidStore& Store, std::filesystem::path BasePath, GcManager& Gc, JobQueue& JobQueue) @@ -1675,11 +1699,13 @@ ProjectStore::ProjectStore(CidStore& Store, std::filesystem::path BasePath, GcMa // m_Log.set_level(spdlog::level::debug); m_Gc.AddGcContributor(this); m_Gc.AddGcStorage(this); + m_Gc.AddGcReferencer(*this); } ProjectStore::~ProjectStore() { ZEN_INFO("closing project store at '{}'", m_ProjectBasePath); + m_Gc.RemoveGcReferencer(*this); m_Gc.RemoveGcStorage(this); m_Gc.RemoveGcContributor(this); } @@ -3010,6 +3036,238 @@ ProjectStore::AreDiskWritesAllowed() const return (m_DiskWriteBlocker == nullptr || m_DiskWriteBlocker->AreDiskWritesAllowed()); } +void +ProjectStore::RemoveExpiredData(GcCtx& Ctx) +{ + size_t ProjectCount = 0; + size_t ExpiredProjectCount = 0; + size_t OplogCount = 0; + size_t ExpiredOplogCount = 0; + Stopwatch Timer; + const auto _ = MakeGuard([&] { + ZEN_DEBUG("gc project store '{}': removed {} expired projects out of {}, {} expired oplogs out of {} in {}", + m_ProjectBasePath, + ExpiredProjectCount, + ProjectCount, + ExpiredOplogCount, + OplogCount, + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); + + std::vector<Ref<Project>> ExpiredProjects; + std::vector<Ref<Project>> Projects; + + { + RwLock::SharedLockScope Lock(m_ProjectsLock); + for (auto& Kv : m_Projects) + { + if (Kv.second->IsExpired(Lock, Ctx.Settings.ProjectStoreExpireTime)) + { + ExpiredProjects.push_back(Kv.second); + continue; + } + Projects.push_back(Kv.second); + } + } + + for (const Ref<Project>& Project : Projects) + { + std::vector<std::string> ExpiredOplogs; + { + RwLock::ExclusiveLockScope __(m_ProjectsLock); + Project->IterateOplogs( + [&Ctx, &Project, &ExpiredOplogs, &OplogCount](const RwLock::SharedLockScope& Lock, ProjectStore::Oplog& Oplog) { + OplogCount++; + if (Project->IsExpired(Lock, Ctx.Settings.ProjectStoreExpireTime, Oplog)) + { + ExpiredOplogs.push_back(Oplog.OplogId()); + } + }); + } + std::filesystem::path ProjectPath = BasePathForProject(Project->Identifier); + ExpiredOplogCount += ExpiredOplogs.size(); + if (Ctx.Settings.IsDeleteMode) + { + for (const std::string& OplogId : ExpiredOplogs) + { + std::filesystem::path OplogBasePath = ProjectPath / OplogId; + uint64_t OplogSize = Oplog::TotalSize(OplogBasePath); + ZEN_DEBUG("gc project store '{}': garbage collected oplog '{}' in project '{}'. Removing storage on disk", + m_ProjectBasePath, + OplogId, + Project->Identifier); + Project->DeleteOplog(OplogId); + Ctx.RemovedDiskSpace.fetch_add(OplogSize); + } + Ctx.DeletedItems.fetch_add(ExpiredOplogs.size()); + Project->Flush(); + } + } + ProjectCount = Projects.size(); + Ctx.Items.fetch_add(ProjectCount + OplogCount); + ExpiredProjectCount = ExpiredProjects.size(); + + if (ExpiredProjects.empty()) + { + ZEN_DEBUG("gc project store '{}': no expired projects found", m_ProjectBasePath); + return; + } + + if (Ctx.Settings.IsDeleteMode) + { + for (const Ref<Project>& Project : ExpiredProjects) + { + std::filesystem::path PathToRemove; + std::string ProjectId = Project->Identifier; + { + { + RwLock::SharedLockScope Lock(m_ProjectsLock); + if (!Project->IsExpired(Lock, Ctx.Settings.ProjectStoreExpireTime)) + { + ZEN_DEBUG("gc project store '{}': skipped garbage collect of project '{}'. Project no longer expired.", + m_ProjectBasePath, + ProjectId); + continue; + } + } + RwLock::ExclusiveLockScope __(m_ProjectsLock); + bool Success = Project->PrepareForDelete(PathToRemove); + if (!Success) + { + ZEN_DEBUG("gc project store '{}': skipped garbage collect of project '{}'. Project folder is locked.", + m_ProjectBasePath, + ProjectId); + continue; + } + m_Projects.erase(ProjectId); + } + + ZEN_DEBUG("gc project store '{}': sgarbage collected project '{}'. Removing storage on disk", m_ProjectBasePath, ProjectId); + if (PathToRemove.empty()) + { + continue; + } + + DeleteDirectories(PathToRemove); + } + Ctx.DeletedItems.fetch_add(ExpiredProjects.size()); + } + + Ctx.ExpiredItems.fetch_add(ExpiredOplogCount + ExpiredProjectCount); +} + +class ProjectStoreReferenceChecker : public GcReferenceChecker +{ +public: + ProjectStoreReferenceChecker(ProjectStore::Oplog& Owner, bool PreCache) : m_Oplog(Owner) + { + if (PreCache) + { + RwLock::SharedLockScope _(m_Oplog.m_OplogLock); + m_Oplog.IterateOplog([&](CbObjectView Op) { + Op.IterateAttachments([&](CbFieldView Visitor) { m_UncachedReferences.insert(Visitor.AsAttachment()); }); + }); + m_PreCachedLsn = m_Oplog.GetMaxOpIndex(); + } + } + + virtual ~ProjectStoreReferenceChecker() {} + + virtual void LockState(GcCtx&) override + { + Stopwatch Timer; + const auto _ = MakeGuard([&] { + ZEN_DEBUG("gc project oplog '{}': found {} references in {} from {}/{}", + m_Oplog.m_BasePath, + m_UncachedReferences.size(), + NiceTimeSpanMs(Timer.GetElapsedTimeMs()), + m_Oplog.m_OuterProject->Identifier, + m_Oplog.OplogId()); + }); + + m_OplogLock = std::make_unique<RwLock::SharedLockScope>(m_Oplog.m_OplogLock); + if (m_Oplog.GetMaxOpIndex() != m_PreCachedLsn) + { + // TODO: Maybe we could just check the added oplog entries - we might get a few extra references from obsolete entries + // but I don't think that would be critical + m_UncachedReferences.clear(); + m_Oplog.IterateOplog([&](CbObjectView Op) { + Op.IterateAttachments([&](CbFieldView Visitor) { m_UncachedReferences.insert(Visitor.AsAttachment()); }); + }); + } + } + + virtual void RemoveUsedReferencesFromSet(GcCtx&, HashSet& IoCids) override + { + for (const IoHash& ReferenceHash : m_UncachedReferences) + { + IoCids.erase(ReferenceHash); + } + } + ProjectStore::Oplog& m_Oplog; + std::unique_ptr<RwLock::SharedLockScope> m_OplogLock; + HashSet m_UncachedReferences; + int m_PreCachedLsn = -1; +}; + +std::vector<GcReferenceChecker*> +ProjectStore::CreateReferenceCheckers(GcCtx&) +{ + size_t ProjectCount = 0; + size_t OplogCount = 0; + + Stopwatch Timer; + const auto _ = MakeGuard([&] { + ZEN_DEBUG("gc project store '{}': opened {} projects and {} oplogs in {}", + m_ProjectBasePath, + ProjectCount, + OplogCount, + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); + + DiscoverProjects(); + + std::vector<Ref<ProjectStore::Project>> Projects; + { + RwLock::SharedLockScope Lock(m_ProjectsLock); + Projects.reserve(m_Projects.size()); + + for (auto& Kv : m_Projects) + { + Projects.push_back(Kv.second); + } + } + ProjectCount += Projects.size(); + std::vector<GcReferenceChecker*> Checkers; + try + { + for (const Ref<ProjectStore::Project>& Project : Projects) + { + std::vector<std::string> OpLogs = Project->ScanForOplogs(); + Checkers.reserve(OpLogs.size()); + for (const std::string& OpLogId : OpLogs) + { + ProjectStore::Oplog* Oplog = Project->OpenOplog(OpLogId); + GcClock::TimePoint Now = GcClock::Now(); + bool TryPreCache = Project->LastOplogAccessTime(OpLogId) < (Now - std::chrono::minutes(5)); + Checkers.emplace_back(new ProjectStoreReferenceChecker(*Oplog, TryPreCache)); + } + OplogCount += OpLogs.size(); + } + } + catch (std::exception&) + { + while (!Checkers.empty()) + { + delete Checkers.back(); + Checkers.pop_back(); + } + throw; + } + + return Checkers; +} + ////////////////////////////////////////////////////////////////////////// #if ZEN_WITH_TESTS |