aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2023-05-09 15:11:10 +0200
committerGitHub <[email protected]>2023-05-09 15:11:10 +0200
commit2542797c56b84473395a877376b68fcc77687ea9 (patch)
tree698ebb1e4e6fb33ba9b8be973f8a851b2ee46c83 /src/zenstore
parentValidate that entries points inside valid blocks at startup (#280) (diff)
downloadzen-2542797c56b84473395a877376b68fcc77687ea9.tar.xz
zen-2542797c56b84473395a877376b68fcc77687ea9.zip
Low disk space detector (#277)
* - Feature: Disk writes are now blocked early and return an insufficient storage error if free disk space falls below the `--low-diskspace-threshold` value * Never keep an entry in m_ChunkBlocks that points to a nullptr
Diffstat (limited to 'src/zenstore')
-rw-r--r--src/zenstore/blockstore.cpp20
-rw-r--r--src/zenstore/gc.cpp48
-rw-r--r--src/zenstore/include/zenstore/gc.h29
3 files changed, 83 insertions, 14 deletions
diff --git a/src/zenstore/blockstore.cpp b/src/zenstore/blockstore.cpp
index 05bc69fcb..378d9fd52 100644
--- a/src/zenstore/blockstore.cpp
+++ b/src/zenstore/blockstore.cpp
@@ -472,7 +472,10 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot,
WriteBlockTimeUs += ElapsedUs;
WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs);
});
- OldBlockFile = m_ChunkBlocks[BlockIndex];
+ if (auto It = m_ChunkBlocks.find(BlockIndex); It != m_ChunkBlocks.end())
+ {
+ OldBlockFile = It->second;
+ }
}
if (!OldBlockFile)
@@ -504,8 +507,9 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot,
});
if (OldBlockFile)
{
- m_ChunkBlocks[BlockIndex] = nullptr;
ZEN_DEBUG("marking cas block store file '{}' for delete, block #{}", OldBlockFile->GetPath(), BlockIndex);
+ ZEN_ASSERT(m_ChunkBlocks[BlockIndex] == OldBlockFile);
+ m_ChunkBlocks.erase(BlockIndex);
m_TotalSize.fetch_sub(OldBlockFile->FileSize(), std::memory_order::relaxed);
OldBlockFile->MarkAsDeleteOnClose();
}
@@ -582,7 +586,9 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot,
ReadBlockTimeUs += ElapsedUs;
ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs);
});
+ ZEN_ASSERT(m_ChunkBlocks[NextBlockIndex] == NewBlockFile);
m_ChunkBlocks.erase(NextBlockIndex);
+ NewBlockFile->MarkAsDeleteOnClose();
return;
}
@@ -627,8 +633,9 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot,
ReadBlockTimeUs += ElapsedUs;
ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs);
});
- m_ChunkBlocks[BlockIndex] = nullptr;
ZEN_DEBUG("marking cas block store file '{}' for delete, block #{}", OldBlockFile->GetPath(), BlockIndex);
+ ZEN_ASSERT(m_ChunkBlocks[BlockIndex] == OldBlockFile);
+ m_ChunkBlocks.erase(BlockIndex);
m_TotalSize.fetch_sub(OldBlockFile->FileSize(), std::memory_order::relaxed);
OldBlockFile->MarkAsDeleteOnClose();
}
@@ -704,8 +711,8 @@ BlockStore::IterateChunks(const std::vector<BlockStoreLocation>& ChunkLocations,
size_t ChunkIndex = LocationIndexes[LocationIndexOffset];
const BlockStoreLocation& FirstLocation = ChunkLocations[ChunkIndex];
- const Ref<BlockStoreFile>& BlockFile = m_ChunkBlocks[FirstLocation.BlockIndex];
- if (!BlockFile)
+ auto FindBlockIt = m_ChunkBlocks.find(FirstLocation.BlockIndex);
+ if (FindBlockIt == m_ChunkBlocks.end())
{
while (ChunkLocations[ChunkIndex].BlockIndex == FirstLocation.BlockIndex)
{
@@ -719,6 +726,9 @@ BlockStore::IterateChunks(const std::vector<BlockStoreLocation>& ChunkLocations,
}
continue;
}
+ const Ref<BlockStoreFile>& BlockFile = FindBlockIt->second;
+ ZEN_ASSERT(BlockFile);
+
size_t BlockSize = BlockFile->FileSize();
size_t RangeCount = GetNextRange(LocationIndexOffset);
if (RangeCount > 0)
diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp
index 370c3c965..f9888722b 100644
--- a/src/zenstore/gc.cpp
+++ b/src/zenstore/gc.cpp
@@ -559,10 +559,12 @@ DiskUsageWindow::FindTimepointThatRemoves(uint64_t Amount, GcClock::Tick EndTick
GcScheduler::GcScheduler(GcManager& GcManager) : m_Log(logging::Get("gc")), m_GcManager(GcManager)
{
+ m_GcManager.SetDiskWriteBlocker(this);
}
GcScheduler::~GcScheduler()
{
+ m_GcManager.SetDiskWriteBlocker(nullptr);
Shutdown();
}
@@ -573,6 +575,18 @@ GcScheduler::Initialize(const GcSchedulerConfig& Config)
m_Config = Config;
+ std::error_code Ec;
+ DiskSpace Space = DiskSpaceInfo(m_Config.RootDirectory, Ec);
+ if (Ec)
+ {
+ m_AreDiskWritesBlocked.store(true);
+ ZEN_WARN("get disk space info FAILED, blocking disk writes, reason: '{}'", Ec.message());
+ }
+ else
+ {
+ CheckDiskSpace(Space);
+ }
+
if (m_Config.Interval.count() && m_Config.Interval < m_Config.MonitorInterval)
{
m_Config.Interval = m_Config.MonitorInterval;
@@ -580,7 +594,7 @@ GcScheduler::Initialize(const GcSchedulerConfig& Config)
std::filesystem::create_directories(Config.RootDirectory);
- std::error_code Ec = CreateGCReserve(m_Config.RootDirectory / "reserve.gc", m_Config.DiskReserveSize);
+ Ec = CreateGCReserve(m_Config.RootDirectory / "reserve.gc", m_Config.DiskReserveSize);
if (Ec)
{
ZEN_WARN("unable to create GC reserve at '{}' with size {}, reason '{}'",
@@ -664,6 +678,29 @@ GcScheduler::Trigger(const GcScheduler::TriggerParams& Params)
}
void
+GcScheduler::CheckDiskSpace(const DiskSpace& Space)
+{
+ bool AreDiskWritesBlocked = m_AreDiskWritesBlocked;
+ bool IsLowOnDiskSpace = (m_Config.MinimumFreeDiskSpaceToAllowWrites) != 0 && (Space.Free < m_Config.MinimumFreeDiskSpaceToAllowWrites);
+ if (IsLowOnDiskSpace != AreDiskWritesBlocked)
+ {
+ m_AreDiskWritesBlocked.store(IsLowOnDiskSpace);
+ if (IsLowOnDiskSpace)
+ {
+ ZEN_WARN("Writing to disk is blocked, free disk space: {}, minimum required {}",
+ NiceBytes(Space.Free),
+ NiceBytes(m_Config.MinimumFreeDiskSpaceToAllowWrites));
+ }
+ else
+ {
+ ZEN_INFO("Writing to disk is unblocked, free disk space: {}, minimum required {}",
+ NiceBytes(Space.Free),
+ NiceBytes(m_Config.MinimumFreeDiskSpaceToAllowWrites));
+ }
+ }
+}
+
+void
GcScheduler::SchedulerThread()
{
std::chrono::seconds WaitTime{0};
@@ -716,16 +753,21 @@ GcScheduler::SchedulerThread()
GcClock::TimePoint ExpireTime = MaxCacheDuration == GcClock::Duration::max() ? GcClock::TimePoint::min() : Now - MaxCacheDuration;
- std::error_code Ec;
const GcStorageSize TotalSize = m_GcManager.TotalStorageSize();
if (Timeout && Status() == GcSchedulerStatus::kIdle)
{
- DiskSpace Space = DiskSpaceInfo(m_Config.RootDirectory, Ec);
+ std::error_code Ec;
+ DiskSpace Space = DiskSpaceInfo(m_Config.RootDirectory, Ec);
if (Ec)
{
+ m_AreDiskWritesBlocked.store(true);
ZEN_WARN("get disk space info FAILED, reason: '{}'", Ec.message());
}
+ else
+ {
+ CheckDiskSpace(Space);
+ }
const int64_t PressureGraphLength = 30;
const std::chrono::duration LoadGraphTime = PressureGraphLength * m_Config.MonitorInterval;
diff --git a/src/zenstore/include/zenstore/gc.h b/src/zenstore/include/zenstore/gc.h
index e0354b331..fe9857e6a 100644
--- a/src/zenstore/include/zenstore/gc.h
+++ b/src/zenstore/include/zenstore/gc.h
@@ -27,6 +27,7 @@ class HashKeySet;
class GcManager;
class CidStore;
struct IoHash;
+struct DiskSpace;
/** GC clock
*/
@@ -121,6 +122,14 @@ private:
GcManager& m_Gc;
};
+/** Interface for querying if we are running low on disk space, used to deny put/writes to disk
+ */
+class DiskWriteBlocker
+{
+public:
+ virtual bool AreDiskWritesAllowed() const = 0;
+};
+
/** GC orchestrator
*/
class GcManager
@@ -139,6 +148,9 @@ public:
GcStorageSize TotalStorageSize() const;
+ const DiskWriteBlocker* GetDiskWriteBlocker() { return m_DiskWriteBlocker; }
+ void SetDiskWriteBlocker(const DiskWriteBlocker* Monitor) { m_DiskWriteBlocker = Monitor; }
+
#if ZEN_USE_REF_TRACKING
void OnNewCidReferences(std::span<IoHash> Hashes);
void OnCommittedCidReferences(std::span<IoHash> Hashes);
@@ -151,7 +163,8 @@ private:
mutable RwLock m_Lock;
std::vector<GcContributor*> m_GcContribs;
std::vector<GcStorage*> m_GcStorage;
- CidStore* m_CidStore = nullptr;
+ CidStore* m_CidStore = nullptr;
+ const DiskWriteBlocker* m_DiskWriteBlocker = nullptr;
};
enum class GcSchedulerStatus : uint32_t
@@ -167,10 +180,11 @@ struct GcSchedulerConfig
std::chrono::seconds MonitorInterval{30};
std::chrono::seconds Interval{};
std::chrono::seconds MaxCacheDuration{86400};
- bool CollectSmallObjects = true;
- bool Enabled = true;
- uint64_t DiskReserveSize = 1ul << 28;
- uint64_t DiskSizeSoftLimit = 0;
+ bool CollectSmallObjects = true;
+ bool Enabled = true;
+ uint64_t DiskReserveSize = 1ul << 28;
+ uint64_t DiskSizeSoftLimit = 0;
+ uint64_t MinimumFreeDiskSpaceToAllowWrites = 1ul << 28;
};
class DiskUsageWindow
@@ -196,7 +210,7 @@ public:
/**
* GC scheduler
*/
-class GcScheduler
+class GcScheduler : private DiskWriteBlocker
{
public:
GcScheduler(GcManager& GcManager);
@@ -220,6 +234,8 @@ private:
void CollectGarbage(const GcClock::TimePoint& ExpireTime, bool Delete, bool CollectSmallObjects);
GcClock::TimePoint NextGcTime(GcClock::TimePoint CurrentTime);
spdlog::logger& Log() { return m_Log; }
+ virtual bool AreDiskWritesAllowed() const override { return !m_AreDiskWritesBlocked.load(); }
+ void CheckDiskSpace(const DiskSpace& Space);
spdlog::logger& m_Log;
GcManager& m_GcManager;
@@ -232,6 +248,7 @@ private:
std::mutex m_GcMutex;
std::condition_variable m_GcSignal;
std::optional<TriggerParams> m_TriggerParams;
+ std::atomic_bool m_AreDiskWritesBlocked = false;
TCasLogFile<DiskUsageWindow::DiskUsageEntry> m_DiskUsageLog;
DiskUsageWindow m_DiskUsageWindow;