aboutsummaryrefslogtreecommitdiff
path: root/src/zenserver/projectstore/projectstore.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2023-10-30 09:32:54 +0100
committerGitHub <[email protected]>2023-10-30 09:32:54 +0100
commit3a6a5855cf36967c6bde31292669bfaf832c6f0b (patch)
tree593e7c21e6840e7ad312207fddc63e1934e19d85 /src/zenserver/projectstore/projectstore.cpp
parentset up arch properly when running tests (mac) (#505) (diff)
downloadzen-3a6a5855cf36967c6bde31292669bfaf832c6f0b.tar.xz
zen-3a6a5855cf36967c6bde31292669bfaf832c6f0b.zip
New GC implementation (#459)
- Feature: New garbage collection implementation, still in evaluation mode. Enabled by `--gc-v2` command line option
Diffstat (limited to 'src/zenserver/projectstore/projectstore.cpp')
-rw-r--r--src/zenserver/projectstore/projectstore.cpp258
1 files changed, 258 insertions, 0 deletions
diff --git a/src/zenserver/projectstore/projectstore.cpp b/src/zenserver/projectstore/projectstore.cpp
index df23db1bd..274876123 100644
--- a/src/zenserver/projectstore/projectstore.cpp
+++ b/src/zenserver/projectstore/projectstore.cpp
@@ -452,6 +452,8 @@ struct ProjectStore::OplogStorage : public RefCounted
m_OpBlobs.Flush();
}
+ uint32_t GetMaxLsn() const { return m_MaxLsn.load(); }
+
spdlog::logger& Log() { return m_OwnerOplog->Log(); }
private:
@@ -855,6 +857,17 @@ ProjectStore::Oplog::GetOpIndexByKey(const Oid& Key)
return -1;
}
+int
+ProjectStore::Oplog::GetMaxOpIndex() const
+{
+ RwLock::SharedLockScope _(m_OplogLock);
+ if (!m_Storage)
+ {
+ return -1;
+ }
+ return gsl::narrow<int>(m_Storage->GetMaxLsn());
+}
+
std::optional<CbObject>
ProjectStore::Oplog::GetOpByKey(const Oid& Key)
{
@@ -1661,6 +1674,17 @@ ProjectStore::Project::TouchOplog(std::string_view Oplog) const
m_LastAccessTimes.insert_or_assign(std::string(Oplog), GcClock::TickCount());
};
+GcClock::TimePoint
+ProjectStore::Project::LastOplogAccessTime(std::string_view Oplog) const
+{
+ RwLock::SharedLockScope Lock(m_ProjectLock);
+ if (auto It = m_LastAccessTimes.find(std::string(Oplog)); It != m_LastAccessTimes.end())
+ {
+ return GcClock::TimePointFromTick(It->second);
+ }
+ return GcClock::TimePoint::min();
+}
+
//////////////////////////////////////////////////////////////////////////
ProjectStore::ProjectStore(CidStore& Store, std::filesystem::path BasePath, GcManager& Gc, JobQueue& JobQueue)
@@ -1675,11 +1699,13 @@ ProjectStore::ProjectStore(CidStore& Store, std::filesystem::path BasePath, GcMa
// m_Log.set_level(spdlog::level::debug);
m_Gc.AddGcContributor(this);
m_Gc.AddGcStorage(this);
+ m_Gc.AddGcReferencer(*this);
}
ProjectStore::~ProjectStore()
{
ZEN_INFO("closing project store at '{}'", m_ProjectBasePath);
+ m_Gc.RemoveGcReferencer(*this);
m_Gc.RemoveGcStorage(this);
m_Gc.RemoveGcContributor(this);
}
@@ -3010,6 +3036,238 @@ ProjectStore::AreDiskWritesAllowed() const
return (m_DiskWriteBlocker == nullptr || m_DiskWriteBlocker->AreDiskWritesAllowed());
}
+void
+ProjectStore::RemoveExpiredData(GcCtx& Ctx)
+{
+ size_t ProjectCount = 0;
+ size_t ExpiredProjectCount = 0;
+ size_t OplogCount = 0;
+ size_t ExpiredOplogCount = 0;
+ Stopwatch Timer;
+ const auto _ = MakeGuard([&] {
+ ZEN_DEBUG("gc project store '{}': removed {} expired projects out of {}, {} expired oplogs out of {} in {}",
+ m_ProjectBasePath,
+ ExpiredProjectCount,
+ ProjectCount,
+ ExpiredOplogCount,
+ OplogCount,
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ });
+
+ std::vector<Ref<Project>> ExpiredProjects;
+ std::vector<Ref<Project>> Projects;
+
+ {
+ RwLock::SharedLockScope Lock(m_ProjectsLock);
+ for (auto& Kv : m_Projects)
+ {
+ if (Kv.second->IsExpired(Lock, Ctx.Settings.ProjectStoreExpireTime))
+ {
+ ExpiredProjects.push_back(Kv.second);
+ continue;
+ }
+ Projects.push_back(Kv.second);
+ }
+ }
+
+ for (const Ref<Project>& Project : Projects)
+ {
+ std::vector<std::string> ExpiredOplogs;
+ {
+ RwLock::ExclusiveLockScope __(m_ProjectsLock);
+ Project->IterateOplogs(
+ [&Ctx, &Project, &ExpiredOplogs, &OplogCount](const RwLock::SharedLockScope& Lock, ProjectStore::Oplog& Oplog) {
+ OplogCount++;
+ if (Project->IsExpired(Lock, Ctx.Settings.ProjectStoreExpireTime, Oplog))
+ {
+ ExpiredOplogs.push_back(Oplog.OplogId());
+ }
+ });
+ }
+ std::filesystem::path ProjectPath = BasePathForProject(Project->Identifier);
+ ExpiredOplogCount += ExpiredOplogs.size();
+ if (Ctx.Settings.IsDeleteMode)
+ {
+ for (const std::string& OplogId : ExpiredOplogs)
+ {
+ std::filesystem::path OplogBasePath = ProjectPath / OplogId;
+ uint64_t OplogSize = Oplog::TotalSize(OplogBasePath);
+ ZEN_DEBUG("gc project store '{}': garbage collected oplog '{}' in project '{}'. Removing storage on disk",
+ m_ProjectBasePath,
+ OplogId,
+ Project->Identifier);
+ Project->DeleteOplog(OplogId);
+ Ctx.RemovedDiskSpace.fetch_add(OplogSize);
+ }
+ Ctx.DeletedItems.fetch_add(ExpiredOplogs.size());
+ Project->Flush();
+ }
+ }
+ ProjectCount = Projects.size();
+ Ctx.Items.fetch_add(ProjectCount + OplogCount);
+ ExpiredProjectCount = ExpiredProjects.size();
+
+ if (ExpiredProjects.empty())
+ {
+ ZEN_DEBUG("gc project store '{}': no expired projects found", m_ProjectBasePath);
+ return;
+ }
+
+ if (Ctx.Settings.IsDeleteMode)
+ {
+ for (const Ref<Project>& Project : ExpiredProjects)
+ {
+ std::filesystem::path PathToRemove;
+ std::string ProjectId = Project->Identifier;
+ {
+ {
+ RwLock::SharedLockScope Lock(m_ProjectsLock);
+ if (!Project->IsExpired(Lock, Ctx.Settings.ProjectStoreExpireTime))
+ {
+ ZEN_DEBUG("gc project store '{}': skipped garbage collect of project '{}'. Project no longer expired.",
+ m_ProjectBasePath,
+ ProjectId);
+ continue;
+ }
+ }
+ RwLock::ExclusiveLockScope __(m_ProjectsLock);
+ bool Success = Project->PrepareForDelete(PathToRemove);
+ if (!Success)
+ {
+ ZEN_DEBUG("gc project store '{}': skipped garbage collect of project '{}'. Project folder is locked.",
+ m_ProjectBasePath,
+ ProjectId);
+ continue;
+ }
+ m_Projects.erase(ProjectId);
+ }
+
+ ZEN_DEBUG("gc project store '{}': sgarbage collected project '{}'. Removing storage on disk", m_ProjectBasePath, ProjectId);
+ if (PathToRemove.empty())
+ {
+ continue;
+ }
+
+ DeleteDirectories(PathToRemove);
+ }
+ Ctx.DeletedItems.fetch_add(ExpiredProjects.size());
+ }
+
+ Ctx.ExpiredItems.fetch_add(ExpiredOplogCount + ExpiredProjectCount);
+}
+
+class ProjectStoreReferenceChecker : public GcReferenceChecker
+{
+public:
+ ProjectStoreReferenceChecker(ProjectStore::Oplog& Owner, bool PreCache) : m_Oplog(Owner)
+ {
+ if (PreCache)
+ {
+ RwLock::SharedLockScope _(m_Oplog.m_OplogLock);
+ m_Oplog.IterateOplog([&](CbObjectView Op) {
+ Op.IterateAttachments([&](CbFieldView Visitor) { m_UncachedReferences.insert(Visitor.AsAttachment()); });
+ });
+ m_PreCachedLsn = m_Oplog.GetMaxOpIndex();
+ }
+ }
+
+ virtual ~ProjectStoreReferenceChecker() {}
+
+ virtual void LockState(GcCtx&) override
+ {
+ Stopwatch Timer;
+ const auto _ = MakeGuard([&] {
+ ZEN_DEBUG("gc project oplog '{}': found {} references in {} from {}/{}",
+ m_Oplog.m_BasePath,
+ m_UncachedReferences.size(),
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()),
+ m_Oplog.m_OuterProject->Identifier,
+ m_Oplog.OplogId());
+ });
+
+ m_OplogLock = std::make_unique<RwLock::SharedLockScope>(m_Oplog.m_OplogLock);
+ if (m_Oplog.GetMaxOpIndex() != m_PreCachedLsn)
+ {
+ // TODO: Maybe we could just check the added oplog entries - we might get a few extra references from obsolete entries
+ // but I don't think that would be critical
+ m_UncachedReferences.clear();
+ m_Oplog.IterateOplog([&](CbObjectView Op) {
+ Op.IterateAttachments([&](CbFieldView Visitor) { m_UncachedReferences.insert(Visitor.AsAttachment()); });
+ });
+ }
+ }
+
+ virtual void RemoveUsedReferencesFromSet(GcCtx&, HashSet& IoCids) override
+ {
+ for (const IoHash& ReferenceHash : m_UncachedReferences)
+ {
+ IoCids.erase(ReferenceHash);
+ }
+ }
+ ProjectStore::Oplog& m_Oplog;
+ std::unique_ptr<RwLock::SharedLockScope> m_OplogLock;
+ HashSet m_UncachedReferences;
+ int m_PreCachedLsn = -1;
+};
+
+std::vector<GcReferenceChecker*>
+ProjectStore::CreateReferenceCheckers(GcCtx&)
+{
+ size_t ProjectCount = 0;
+ size_t OplogCount = 0;
+
+ Stopwatch Timer;
+ const auto _ = MakeGuard([&] {
+ ZEN_DEBUG("gc project store '{}': opened {} projects and {} oplogs in {}",
+ m_ProjectBasePath,
+ ProjectCount,
+ OplogCount,
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ });
+
+ DiscoverProjects();
+
+ std::vector<Ref<ProjectStore::Project>> Projects;
+ {
+ RwLock::SharedLockScope Lock(m_ProjectsLock);
+ Projects.reserve(m_Projects.size());
+
+ for (auto& Kv : m_Projects)
+ {
+ Projects.push_back(Kv.second);
+ }
+ }
+ ProjectCount += Projects.size();
+ std::vector<GcReferenceChecker*> Checkers;
+ try
+ {
+ for (const Ref<ProjectStore::Project>& Project : Projects)
+ {
+ std::vector<std::string> OpLogs = Project->ScanForOplogs();
+ Checkers.reserve(OpLogs.size());
+ for (const std::string& OpLogId : OpLogs)
+ {
+ ProjectStore::Oplog* Oplog = Project->OpenOplog(OpLogId);
+ GcClock::TimePoint Now = GcClock::Now();
+ bool TryPreCache = Project->LastOplogAccessTime(OpLogId) < (Now - std::chrono::minutes(5));
+ Checkers.emplace_back(new ProjectStoreReferenceChecker(*Oplog, TryPreCache));
+ }
+ OplogCount += OpLogs.size();
+ }
+ }
+ catch (std::exception&)
+ {
+ while (!Checkers.empty())
+ {
+ delete Checkers.back();
+ Checkers.pop_back();
+ }
+ throw;
+ }
+
+ return Checkers;
+}
+
//////////////////////////////////////////////////////////////////////////
#if ZEN_WITH_TESTS