diff options
| author | Dan Engelbrecht <[email protected]> | 2023-05-09 15:11:10 +0200 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-05-09 15:11:10 +0200 |
| commit | 2542797c56b84473395a877376b68fcc77687ea9 (patch) | |
| tree | 698ebb1e4e6fb33ba9b8be973f8a851b2ee46c83 /src/zenstore | |
| parent | Validate that entries points inside valid blocks at startup (#280) (diff) | |
| download | zen-2542797c56b84473395a877376b68fcc77687ea9.tar.xz zen-2542797c56b84473395a877376b68fcc77687ea9.zip | |
Low disk space detector (#277)
* - Feature: Disk writes are now blocked early and return an insufficient storage error if free disk space falls below the `--low-diskspace-threshold` value
* Never keep an entry in m_ChunkBlocks that points to a nullptr
Diffstat (limited to 'src/zenstore')
| -rw-r--r-- | src/zenstore/blockstore.cpp | 20 | ||||
| -rw-r--r-- | src/zenstore/gc.cpp | 48 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/gc.h | 29 |
3 files changed, 83 insertions, 14 deletions
diff --git a/src/zenstore/blockstore.cpp b/src/zenstore/blockstore.cpp index 05bc69fcb..378d9fd52 100644 --- a/src/zenstore/blockstore.cpp +++ b/src/zenstore/blockstore.cpp @@ -472,7 +472,10 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, WriteBlockTimeUs += ElapsedUs; WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); }); - OldBlockFile = m_ChunkBlocks[BlockIndex]; + if (auto It = m_ChunkBlocks.find(BlockIndex); It != m_ChunkBlocks.end()) + { + OldBlockFile = It->second; + } } if (!OldBlockFile) @@ -504,8 +507,9 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, }); if (OldBlockFile) { - m_ChunkBlocks[BlockIndex] = nullptr; ZEN_DEBUG("marking cas block store file '{}' for delete, block #{}", OldBlockFile->GetPath(), BlockIndex); + ZEN_ASSERT(m_ChunkBlocks[BlockIndex] == OldBlockFile); + m_ChunkBlocks.erase(BlockIndex); m_TotalSize.fetch_sub(OldBlockFile->FileSize(), std::memory_order::relaxed); OldBlockFile->MarkAsDeleteOnClose(); } @@ -582,7 +586,9 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, ReadBlockTimeUs += ElapsedUs; ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); }); + ZEN_ASSERT(m_ChunkBlocks[NextBlockIndex] == NewBlockFile); m_ChunkBlocks.erase(NextBlockIndex); + NewBlockFile->MarkAsDeleteOnClose(); return; } @@ -627,8 +633,9 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, ReadBlockTimeUs += ElapsedUs; ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); }); - m_ChunkBlocks[BlockIndex] = nullptr; ZEN_DEBUG("marking cas block store file '{}' for delete, block #{}", OldBlockFile->GetPath(), BlockIndex); + ZEN_ASSERT(m_ChunkBlocks[BlockIndex] == OldBlockFile); + m_ChunkBlocks.erase(BlockIndex); m_TotalSize.fetch_sub(OldBlockFile->FileSize(), std::memory_order::relaxed); OldBlockFile->MarkAsDeleteOnClose(); } @@ -704,8 +711,8 @@ BlockStore::IterateChunks(const std::vector<BlockStoreLocation>& ChunkLocations, size_t ChunkIndex = LocationIndexes[LocationIndexOffset]; const BlockStoreLocation& FirstLocation = ChunkLocations[ChunkIndex]; - const Ref<BlockStoreFile>& BlockFile = m_ChunkBlocks[FirstLocation.BlockIndex]; - if (!BlockFile) + auto FindBlockIt = m_ChunkBlocks.find(FirstLocation.BlockIndex); + if (FindBlockIt == m_ChunkBlocks.end()) { while (ChunkLocations[ChunkIndex].BlockIndex == FirstLocation.BlockIndex) { @@ -719,6 +726,9 @@ BlockStore::IterateChunks(const std::vector<BlockStoreLocation>& ChunkLocations, } continue; } + const Ref<BlockStoreFile>& BlockFile = FindBlockIt->second; + ZEN_ASSERT(BlockFile); + size_t BlockSize = BlockFile->FileSize(); size_t RangeCount = GetNextRange(LocationIndexOffset); if (RangeCount > 0) diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp index 370c3c965..f9888722b 100644 --- a/src/zenstore/gc.cpp +++ b/src/zenstore/gc.cpp @@ -559,10 +559,12 @@ DiskUsageWindow::FindTimepointThatRemoves(uint64_t Amount, GcClock::Tick EndTick GcScheduler::GcScheduler(GcManager& GcManager) : m_Log(logging::Get("gc")), m_GcManager(GcManager) { + m_GcManager.SetDiskWriteBlocker(this); } GcScheduler::~GcScheduler() { + m_GcManager.SetDiskWriteBlocker(nullptr); Shutdown(); } @@ -573,6 +575,18 @@ GcScheduler::Initialize(const GcSchedulerConfig& Config) m_Config = Config; + std::error_code Ec; + DiskSpace Space = DiskSpaceInfo(m_Config.RootDirectory, Ec); + if (Ec) + { + m_AreDiskWritesBlocked.store(true); + ZEN_WARN("get disk space info FAILED, blocking disk writes, reason: '{}'", Ec.message()); + } + else + { + CheckDiskSpace(Space); + } + if (m_Config.Interval.count() && m_Config.Interval < m_Config.MonitorInterval) { m_Config.Interval = m_Config.MonitorInterval; @@ -580,7 +594,7 @@ GcScheduler::Initialize(const GcSchedulerConfig& Config) std::filesystem::create_directories(Config.RootDirectory); - std::error_code Ec = CreateGCReserve(m_Config.RootDirectory / "reserve.gc", m_Config.DiskReserveSize); + Ec = CreateGCReserve(m_Config.RootDirectory / "reserve.gc", m_Config.DiskReserveSize); if (Ec) { ZEN_WARN("unable to create GC reserve at '{}' with size {}, reason '{}'", @@ -664,6 +678,29 @@ GcScheduler::Trigger(const GcScheduler::TriggerParams& Params) } void +GcScheduler::CheckDiskSpace(const DiskSpace& Space) +{ + bool AreDiskWritesBlocked = m_AreDiskWritesBlocked; + bool IsLowOnDiskSpace = (m_Config.MinimumFreeDiskSpaceToAllowWrites) != 0 && (Space.Free < m_Config.MinimumFreeDiskSpaceToAllowWrites); + if (IsLowOnDiskSpace != AreDiskWritesBlocked) + { + m_AreDiskWritesBlocked.store(IsLowOnDiskSpace); + if (IsLowOnDiskSpace) + { + ZEN_WARN("Writing to disk is blocked, free disk space: {}, minimum required {}", + NiceBytes(Space.Free), + NiceBytes(m_Config.MinimumFreeDiskSpaceToAllowWrites)); + } + else + { + ZEN_INFO("Writing to disk is unblocked, free disk space: {}, minimum required {}", + NiceBytes(Space.Free), + NiceBytes(m_Config.MinimumFreeDiskSpaceToAllowWrites)); + } + } +} + +void GcScheduler::SchedulerThread() { std::chrono::seconds WaitTime{0}; @@ -716,16 +753,21 @@ GcScheduler::SchedulerThread() GcClock::TimePoint ExpireTime = MaxCacheDuration == GcClock::Duration::max() ? GcClock::TimePoint::min() : Now - MaxCacheDuration; - std::error_code Ec; const GcStorageSize TotalSize = m_GcManager.TotalStorageSize(); if (Timeout && Status() == GcSchedulerStatus::kIdle) { - DiskSpace Space = DiskSpaceInfo(m_Config.RootDirectory, Ec); + std::error_code Ec; + DiskSpace Space = DiskSpaceInfo(m_Config.RootDirectory, Ec); if (Ec) { + m_AreDiskWritesBlocked.store(true); ZEN_WARN("get disk space info FAILED, reason: '{}'", Ec.message()); } + else + { + CheckDiskSpace(Space); + } const int64_t PressureGraphLength = 30; const std::chrono::duration LoadGraphTime = PressureGraphLength * m_Config.MonitorInterval; diff --git a/src/zenstore/include/zenstore/gc.h b/src/zenstore/include/zenstore/gc.h index e0354b331..fe9857e6a 100644 --- a/src/zenstore/include/zenstore/gc.h +++ b/src/zenstore/include/zenstore/gc.h @@ -27,6 +27,7 @@ class HashKeySet; class GcManager; class CidStore; struct IoHash; +struct DiskSpace; /** GC clock */ @@ -121,6 +122,14 @@ private: GcManager& m_Gc; }; +/** Interface for querying if we are running low on disk space, used to deny put/writes to disk + */ +class DiskWriteBlocker +{ +public: + virtual bool AreDiskWritesAllowed() const = 0; +}; + /** GC orchestrator */ class GcManager @@ -139,6 +148,9 @@ public: GcStorageSize TotalStorageSize() const; + const DiskWriteBlocker* GetDiskWriteBlocker() { return m_DiskWriteBlocker; } + void SetDiskWriteBlocker(const DiskWriteBlocker* Monitor) { m_DiskWriteBlocker = Monitor; } + #if ZEN_USE_REF_TRACKING void OnNewCidReferences(std::span<IoHash> Hashes); void OnCommittedCidReferences(std::span<IoHash> Hashes); @@ -151,7 +163,8 @@ private: mutable RwLock m_Lock; std::vector<GcContributor*> m_GcContribs; std::vector<GcStorage*> m_GcStorage; - CidStore* m_CidStore = nullptr; + CidStore* m_CidStore = nullptr; + const DiskWriteBlocker* m_DiskWriteBlocker = nullptr; }; enum class GcSchedulerStatus : uint32_t @@ -167,10 +180,11 @@ struct GcSchedulerConfig std::chrono::seconds MonitorInterval{30}; std::chrono::seconds Interval{}; std::chrono::seconds MaxCacheDuration{86400}; - bool CollectSmallObjects = true; - bool Enabled = true; - uint64_t DiskReserveSize = 1ul << 28; - uint64_t DiskSizeSoftLimit = 0; + bool CollectSmallObjects = true; + bool Enabled = true; + uint64_t DiskReserveSize = 1ul << 28; + uint64_t DiskSizeSoftLimit = 0; + uint64_t MinimumFreeDiskSpaceToAllowWrites = 1ul << 28; }; class DiskUsageWindow @@ -196,7 +210,7 @@ public: /** * GC scheduler */ -class GcScheduler +class GcScheduler : private DiskWriteBlocker { public: GcScheduler(GcManager& GcManager); @@ -220,6 +234,8 @@ private: void CollectGarbage(const GcClock::TimePoint& ExpireTime, bool Delete, bool CollectSmallObjects); GcClock::TimePoint NextGcTime(GcClock::TimePoint CurrentTime); spdlog::logger& Log() { return m_Log; } + virtual bool AreDiskWritesAllowed() const override { return !m_AreDiskWritesBlocked.load(); } + void CheckDiskSpace(const DiskSpace& Space); spdlog::logger& m_Log; GcManager& m_GcManager; @@ -232,6 +248,7 @@ private: std::mutex m_GcMutex; std::condition_variable m_GcSignal; std::optional<TriggerParams> m_TriggerParams; + std::atomic_bool m_AreDiskWritesBlocked = false; TCasLogFile<DiskUsageWindow::DiskUsageEntry> m_DiskUsageLog; DiskUsageWindow m_DiskUsageWindow; |