aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2023-05-16 12:23:42 +0200
committerGitHub <[email protected]>2023-05-16 12:23:42 +0200
commita4ff07d68eeae66c008bfac28cb87c94a92cf257 (patch)
tree630940f228c35d29fac31ced2ba7f9fd16fca1c8 /src/zenstore
parentAdded CHANGELOG.md descriptions for recent changes (diff)
downloadzen-a4ff07d68eeae66c008bfac28cb87c94a92cf257.tar.xz
zen-a4ff07d68eeae66c008bfac28cb87c94a92cf257.zip
Add `--gc-projectstore-duration-seconds` option (#281)
* Add `--gc-projectstore-duration-seconds` option * Cleanup lua gc options parsing * Remove dead configuration values * changelog
Diffstat (limited to 'src/zenstore')
-rw-r--r--src/zenstore/compactcas.cpp29
-rw-r--r--src/zenstore/filecas.cpp4
-rw-r--r--src/zenstore/gc.cpp77
-rw-r--r--src/zenstore/include/zenstore/gc.h18
4 files changed, 83 insertions, 45 deletions
diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp
index e4c2c2ecf..0f6f011e1 100644
--- a/src/zenstore/compactcas.cpp
+++ b/src/zenstore/compactcas.cpp
@@ -645,6 +645,13 @@ CasContainerStrategy::ReadIndexFile(const std::filesystem::path& IndexPath, uint
uint64_t
CasContainerStrategy::ReadLog(const std::filesystem::path& LogPath, uint64_t SkipEntryCount)
{
+ if (!TCasLogFile<CasDiskIndexEntry>::IsValid(LogPath))
+ {
+ ZEN_WARN("removing invalid cas log at '{}'", LogPath);
+ std::filesystem::remove(LogPath);
+ return 0;
+ }
+
size_t LogEntryCount = 0;
Stopwatch Timer;
const auto _ = MakeGuard([&] {
@@ -972,7 +979,7 @@ TEST_CASE("compactcas.gc.basic")
CHECK(InsertResult.New);
Cas.Flush();
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24));
+ GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
GcCtx.CollectSmallObjects(true);
Cas.CollectGarbage(GcCtx);
@@ -1002,7 +1009,7 @@ TEST_CASE("compactcas.gc.removefile")
CasContainerStrategy Cas(Gc);
Cas.Initialize(TempDir.Path(), "cb", 65536, 1 << 4, false);
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24));
+ GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
GcCtx.CollectSmallObjects(true);
Cas.CollectGarbage(GcCtx);
@@ -1057,7 +1064,7 @@ TEST_CASE("compactcas.gc.compact")
// Keep first and last
{
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24));
+ GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
GcCtx.CollectSmallObjects(true);
std::vector<IoHash> KeepChunks;
@@ -1092,7 +1099,7 @@ TEST_CASE("compactcas.gc.compact")
// Keep last
{
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24));
+ GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
GcCtx.CollectSmallObjects(true);
std::vector<IoHash> KeepChunks;
KeepChunks.push_back(ChunkHashes[8]);
@@ -1124,7 +1131,7 @@ TEST_CASE("compactcas.gc.compact")
// Keep mixed
{
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24));
+ GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
GcCtx.CollectSmallObjects(true);
std::vector<IoHash> KeepChunks;
KeepChunks.push_back(ChunkHashes[1]);
@@ -1159,7 +1166,7 @@ TEST_CASE("compactcas.gc.compact")
// Keep multiple at end
{
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24));
+ GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
GcCtx.CollectSmallObjects(true);
std::vector<IoHash> KeepChunks;
KeepChunks.push_back(ChunkHashes[6]);
@@ -1194,7 +1201,7 @@ TEST_CASE("compactcas.gc.compact")
// Keep every other
{
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24));
+ GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
GcCtx.CollectSmallObjects(true);
std::vector<IoHash> KeepChunks;
KeepChunks.push_back(ChunkHashes[0]);
@@ -1273,7 +1280,7 @@ TEST_CASE("compactcas.gc.deleteblockonopen")
// GC every other block
{
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24));
+ GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
GcCtx.CollectSmallObjects(true);
std::vector<IoHash> KeepChunks;
for (size_t i = 0; i < 20; i += 2)
@@ -1340,7 +1347,7 @@ TEST_CASE("compactcas.gc.handleopeniobuffer")
Cas.Flush();
// GC everything
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24));
+ GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
GcCtx.CollectSmallObjects(true);
Cas.CollectGarbage(GcCtx);
@@ -1496,7 +1503,7 @@ TEST_CASE("compactcas.threadedinsert")
C++;
}
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24));
+ GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
GcCtx.CollectSmallObjects(true);
GcCtx.AddRetainedCids(KeepHashes);
Cas.CollectGarbage(GcCtx);
@@ -1537,7 +1544,7 @@ TEST_CASE("compactcas.threadedinsert")
C++;
}
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24));
+ GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
GcCtx.CollectSmallObjects(true);
GcCtx.AddRetainedCids(KeepHashes);
Cas.CollectGarbage(GcCtx);
diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp
index 2b64bd202..88b847c51 100644
--- a/src/zenstore/filecas.cpp
+++ b/src/zenstore/filecas.cpp
@@ -1417,7 +1417,7 @@ TEST_CASE("cas.file.gc")
{
InsertChunks();
- GcContext Ctx(GcClock::Now() - std::chrono::hours(24));
+ GcContext Ctx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
FileCas.CollectGarbage(Ctx);
for (const IoHash& Key : Keys)
@@ -1433,7 +1433,7 @@ TEST_CASE("cas.file.gc")
{
InsertChunks();
- GcContext Ctx(GcClock::Now() - std::chrono::hours(24));
+ GcContext Ctx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
for (const IoHash& Key : Keys)
{
diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp
index a7c757877..2d7e0e02f 100644
--- a/src/zenstore/gc.cpp
+++ b/src/zenstore/gc.cpp
@@ -189,16 +189,19 @@ struct GcContext::GcState
CacheKeyContexts m_ExpiredCacheKeys;
HashKeySet m_RetainedCids;
HashKeySet m_DeletedCids;
- GcClock::TimePoint m_ExpireTime;
+ GcClock::TimePoint m_CacheExpireTime;
+ GcClock::TimePoint m_ProjectStoreExpireTime;
bool m_DeletionMode = true;
bool m_CollectSmallObjects = false;
std::filesystem::path DiskReservePath;
};
-GcContext::GcContext(const GcClock::TimePoint& ExpireTime) : m_State(std::make_unique<GcState>())
+GcContext::GcContext(const GcClock::TimePoint& CacheExpireTime, const GcClock::TimePoint& ProjectStoreExpireTime)
+: m_State(std::make_unique<GcState>())
{
- m_State->m_ExpireTime = ExpireTime;
+ m_State->m_CacheExpireTime = CacheExpireTime;
+ m_State->m_ProjectStoreExpireTime = ProjectStoreExpireTime;
}
GcContext::~GcContext()
@@ -278,9 +281,15 @@ GcContext::CollectSmallObjects(bool NewState)
}
GcClock::TimePoint
-GcContext::ExpireTime() const
+GcContext::CacheExpireTime() const
{
- return m_State->m_ExpireTime;
+ return m_State->m_CacheExpireTime;
+}
+
+GcClock::TimePoint
+GcContext::ProjectStoreExpireTime() const
+{
+ return m_State->m_ProjectStoreExpireTime;
}
void
@@ -710,11 +719,12 @@ GcScheduler::SchedulerThread()
continue;
}
- bool Delete = true;
- bool CollectSmallObjects = m_Config.CollectSmallObjects;
- std::chrono::seconds MaxCacheDuration = m_Config.MaxCacheDuration;
- uint64_t DiskSizeSoftLimit = m_Config.DiskSizeSoftLimit;
- GcClock::TimePoint Now = GcClock::Now();
+ bool Delete = true;
+ bool CollectSmallObjects = m_Config.CollectSmallObjects;
+ std::chrono::seconds MaxCacheDuration = m_Config.MaxCacheDuration;
+ std::chrono::seconds MaxProjectStoreDuration = m_Config.MaxProjectStoreDuration;
+ uint64_t DiskSizeSoftLimit = m_Config.DiskSizeSoftLimit;
+ GcClock::TimePoint Now = GcClock::Now();
if (m_TriggerGcParams)
{
const auto TriggerParams = m_TriggerGcParams.value();
@@ -725,13 +735,20 @@ GcScheduler::SchedulerThread()
{
MaxCacheDuration = TriggerParams.MaxCacheDuration;
}
+ if (TriggerParams.MaxProjectStoreDuration != std::chrono::seconds::max())
+ {
+ MaxProjectStoreDuration = TriggerParams.MaxProjectStoreDuration;
+ }
if (TriggerParams.DiskSizeSoftLimit != 0)
{
DiskSizeSoftLimit = TriggerParams.DiskSizeSoftLimit;
}
}
- GcClock::TimePoint ExpireTime = MaxCacheDuration == GcClock::Duration::max() ? GcClock::TimePoint::min() : Now - MaxCacheDuration;
+ GcClock::TimePoint CacheExpireTime =
+ MaxCacheDuration == GcClock::Duration::max() ? GcClock::TimePoint::min() : Now - MaxCacheDuration;
+ GcClock::TimePoint ProjectStoreExpireTime =
+ MaxProjectStoreDuration == GcClock::Duration::max() ? GcClock::TimePoint::min() : Now - MaxProjectStoreDuration;
const GcStorageSize TotalSize = m_GcManager.TotalStorageSize();
@@ -786,9 +803,13 @@ GcScheduler::SchedulerThread()
std::unique_lock Lock(m_GcMutex);
GcClock::Tick AgeTick = m_DiskUsageWindow.FindTimepointThatRemoves(GcDiskSpaceGoal, Now.time_since_epoch().count());
GcClock::TimePoint SizeBasedExpireTime = GcClock::TimePointFromTick(AgeTick);
- if (SizeBasedExpireTime > ExpireTime)
+ if (SizeBasedExpireTime > CacheExpireTime)
+ {
+ CacheExpireTime = SizeBasedExpireTime;
+ }
+ if (SizeBasedExpireTime > ProjectStoreExpireTime)
{
- ExpireTime = SizeBasedExpireTime;
+ ProjectStoreExpireTime = SizeBasedExpireTime;
}
}
@@ -832,7 +853,7 @@ GcScheduler::SchedulerThread()
}
}
- CollectGarbage(ExpireTime, Delete, CollectSmallObjects);
+ CollectGarbage(CacheExpireTime, ProjectStoreExpireTime, Delete, CollectSmallObjects);
uint32_t RunningState = static_cast<uint32_t>(GcSchedulerStatus::kRunning);
if (!m_Status.compare_exchange_strong(RunningState, static_cast<uint32_t>(GcSchedulerStatus::kIdle)))
@@ -859,17 +880,20 @@ GcScheduler::NextGcTime(GcClock::TimePoint CurrentTime)
}
void
-GcScheduler::CollectGarbage(const GcClock::TimePoint& ExpireTime, bool Delete, bool CollectSmallObjects)
+GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
+ const GcClock::TimePoint& ProjectStoreExpireTime,
+ bool Delete,
+ bool CollectSmallObjects)
{
- GcContext GcCtx(ExpireTime);
+ GcContext GcCtx(CacheExpireTime, ProjectStoreExpireTime);
GcCtx.SetDeletionMode(Delete);
GcCtx.CollectSmallObjects(CollectSmallObjects);
- // GcCtx.MaxCacheDuration(MaxCacheDuration);
GcCtx.DiskReservePath(m_Config.RootDirectory / "reserve.gc");
- ZEN_INFO("garbage collection STARTING, small objects gc {}, cutoff time {}",
+ ZEN_INFO("garbage collection STARTING, small objects gc {}, cache cutoff time {}, project store cutoff time {}",
GcCtx.CollectSmallObjects() ? "ENABLED"sv : "DISABLED"sv,
- ExpireTime);
+ CacheExpireTime,
+ ProjectStoreExpireTime);
{
Stopwatch Timer;
const auto __ = MakeGuard([&] { ZEN_INFO("garbage collection DONE in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); });
@@ -878,9 +902,10 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& ExpireTime, bool Delete, b
if (Delete)
{
- m_LastGcExpireTime = ExpireTime;
+ GcClock::TimePoint KeepRangeStart = Min(CacheExpireTime, ProjectStoreExpireTime);
+ m_LastGcExpireTime = KeepRangeStart;
std::unique_lock Lock(m_GcMutex);
- m_DiskUsageWindow.KeepRange(ExpireTime.time_since_epoch().count(), GcClock::Duration::max().count());
+ m_DiskUsageWindow.KeepRange(KeepRangeStart.time_since_epoch().count(), GcClock::Duration::max().count());
}
m_LastGcTime = GcClock::Now();
@@ -953,7 +978,7 @@ TEST_CASE("gc.basic")
const auto InsertResult = CidStore.AddChunk(CompressedChunk.GetCompressed().Flatten().AsIoBuffer(), CompressedChunk.DecodeRawHash());
CHECK(InsertResult.New);
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24));
+ GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
GcCtx.CollectSmallObjects(true);
CidStore.Flush();
@@ -1012,7 +1037,7 @@ TEST_CASE("gc.full")
// Keep first and last
{
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24));
+ GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
GcCtx.CollectSmallObjects(true);
std::vector<IoHash> KeepChunks;
@@ -1047,7 +1072,7 @@ TEST_CASE("gc.full")
// Keep last
{
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24));
+ GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
GcCtx.CollectSmallObjects(true);
std::vector<IoHash> KeepChunks;
KeepChunks.push_back(ChunkHashes[8]);
@@ -1079,7 +1104,7 @@ TEST_CASE("gc.full")
// Keep mixed
{
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24));
+ GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
GcCtx.CollectSmallObjects(true);
std::vector<IoHash> KeepChunks;
KeepChunks.push_back(ChunkHashes[1]);
@@ -1114,7 +1139,7 @@ TEST_CASE("gc.full")
// Keep multiple at end
{
- GcContext GcCtx(GcClock::Now() - std::chrono::hours(24));
+ GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
GcCtx.CollectSmallObjects(true);
std::vector<IoHash> KeepChunks;
KeepChunks.push_back(ChunkHashes[6]);
diff --git a/src/zenstore/include/zenstore/gc.h b/src/zenstore/include/zenstore/gc.h
index 4c709b8a2..881936d0f 100644
--- a/src/zenstore/include/zenstore/gc.h
+++ b/src/zenstore/include/zenstore/gc.h
@@ -48,7 +48,7 @@ public:
class GcContext
{
public:
- GcContext(const GcClock::TimePoint& ExpireTime);
+ GcContext(const GcClock::TimePoint& CacheExpireTime, const GcClock::TimePoint& ProjectStoreExpireTime);
~GcContext();
void AddRetainedCids(std::span<const IoHash> Cid);
@@ -70,7 +70,8 @@ public:
bool CollectSmallObjects() const;
void CollectSmallObjects(bool NewState);
- GcClock::TimePoint ExpireTime() const;
+ GcClock::TimePoint CacheExpireTime() const;
+ GcClock::TimePoint ProjectStoreExpireTime() const;
void DiskReservePath(const std::filesystem::path& Path);
uint64_t ClaimGCReserve();
@@ -174,6 +175,7 @@ struct GcSchedulerConfig
std::chrono::seconds MonitorInterval{30};
std::chrono::seconds Interval{};
std::chrono::seconds MaxCacheDuration{86400};
+ std::chrono::seconds MaxProjectStoreDuration{604800};
bool CollectSmallObjects = true;
bool Enabled = true;
uint64_t DiskReserveSize = 1ul << 28;
@@ -216,16 +218,20 @@ public:
struct TriggerGcParams
{
- bool CollectSmallObjects = false;
- std::chrono::seconds MaxCacheDuration = std::chrono::seconds::max();
- uint64_t DiskSizeSoftLimit = 0;
+ bool CollectSmallObjects = false;
+ std::chrono::seconds MaxCacheDuration = std::chrono::seconds::max();
+ std::chrono::seconds MaxProjectStoreDuration = std::chrono::seconds::max();
+ uint64_t DiskSizeSoftLimit = 0;
};
bool TriggerGc(const TriggerGcParams& Params);
private:
void SchedulerThread();
- void CollectGarbage(const GcClock::TimePoint& ExpireTime, bool Delete, bool CollectSmallObjects);
+ void CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
+ const GcClock::TimePoint& ProjectStoreExpireTime,
+ bool Delete,
+ bool CollectSmallObjects);
GcClock::TimePoint NextGcTime(GcClock::TimePoint CurrentTime);
spdlog::logger& Log() { return m_Log; }
virtual bool AreDiskWritesAllowed() const override { return !m_AreDiskWritesBlocked.load(); }