diff options
Diffstat (limited to 'src/zenserver/hub/hub.h')
| -rw-r--r-- | src/zenserver/hub/hub.h | 43 |
1 files changed, 34 insertions, 9 deletions
diff --git a/src/zenserver/hub/hub.h b/src/zenserver/hub/hub.h index c343b19e2..9895f7068 100644 --- a/src/zenserver/hub/hub.h +++ b/src/zenserver/hub/hub.h @@ -6,6 +6,7 @@ #include "resourcemetrics.h" #include "storageserverinstance.h" +#include <zencore/filesystem.h> #include <zencore/system.h> #include <zenutil/zenserverprocess.h> @@ -68,6 +69,8 @@ public: std::string HydrationTargetSpecification; WatchDogConfiguration WatchDog; + + ResourceMetrics ResourceLimits; }; typedef std::function< @@ -86,7 +89,7 @@ public: struct InstanceInfo { HubInstanceState State = HubInstanceState::Unprovisioned; - std::chrono::system_clock::time_point ProvisionTime; + std::chrono::system_clock::time_point StateChangeTime; ProcessMetrics Metrics; uint16_t Port = 0; }; @@ -160,6 +163,8 @@ public: int GetMaxInstanceCount() const { return m_MaxInstanceCount.load(); } + void GetMachineMetrics(SystemMetrics& OutSystemMetrict, DiskSpace& OutDiskSpace) const; + const Configuration& GetConfig() const { return m_Config; } #if ZEN_WITH_TESTS @@ -181,9 +186,25 @@ private: #if ZEN_PLATFORM_WINDOWS JobObject m_JobObject; #endif - RwLock m_Lock; + mutable RwLock m_Lock; std::unordered_map<std::string, size_t> m_InstanceLookup; + // Mirrors ProcessMetrics with atomic fields, enabling lock-free reads alongside watchdog writes. + struct AtomicProcessMetrics + { + std::atomic<uint64_t> MemoryBytes = 0; + std::atomic<uint64_t> KernelTimeMs = 0; + std::atomic<uint64_t> UserTimeMs = 0; + std::atomic<uint64_t> WorkingSetSize = 0; + std::atomic<uint64_t> PeakWorkingSetSize = 0; + std::atomic<uint64_t> PagefileUsage = 0; + std::atomic<uint64_t> PeakPagefileUsage = 0; + + ProcessMetrics Load() const; + void Store(const ProcessMetrics& Metrics); + void Reset(); + }; + struct ActiveInstance { // Invariant: Instance == nullptr if and only if State == Unprovisioned. @@ -192,11 +213,16 @@ private: // without holding the hub lock. std::unique_ptr<StorageServerInstance> Instance; std::atomic<HubInstanceState> State = HubInstanceState::Unprovisioned; - // TODO: We should move current metrics here (from StorageServerInstance) - // Read and updated by WatchDog, updates to State triggers a reset of both + // Process metrics - written by WatchDog (inside instance shared lock), read lock-free. + AtomicProcessMetrics ProcessMetrics; + + // Activity tracking - written by WatchDog, reset on every state transition. std::atomic<uint64_t> LastKnownActivitySum = 0; std::atomic<std::chrono::system_clock::time_point> LastActivityTime = std::chrono::system_clock::time_point::min(); + + // Set in UpdateInstanceStateLocked on every state transition; read lock-free by Find/EnumerateModules. + std::atomic<std::chrono::system_clock::time_point> StateChangeTime = std::chrono::system_clock::time_point::min(); }; // UpdateInstanceState is overloaded to accept a locked instance pointer (exclusive or shared) or the hub exclusive @@ -226,21 +252,20 @@ private: std::vector<ActiveInstance> m_ActiveInstances; std::deque<size_t> m_FreeActiveInstanceIndexes; - ResourceMetrics m_ResourceLimits; - SystemMetrics m_HostMetrics; + SystemMetrics m_SystemMetrics; + DiskSpace m_DiskSpace; std::atomic<int> m_MaxInstanceCount = 0; std::thread m_WatchDog; Event m_WatchDogEvent; void WatchDog(); + void UpdateMachineMetrics(); bool CheckInstanceStatus(HttpClient& ActivityHttpClient, StorageServerInstance::SharedLockedPtr&& LockedInstance, size_t ActiveInstanceIndex); void AttemptRecoverInstance(std::string_view ModuleId); - void UpdateStats(); - void UpdateCapacityMetrics(); - bool CanProvisionInstance(std::string_view ModuleId, std::string& OutReason); + bool CanProvisionInstanceLocked(std::string_view ModuleId, std::string& OutReason); uint16_t GetInstanceIndexAssignedPort(size_t ActiveInstanceIndex) const; Response InternalDeprovision(const std::string& ModuleId, std::function<bool(ActiveInstance& Instance)>&& DeprovisionGate); |