aboutsummaryrefslogtreecommitdiff
path: root/src/zenserver/hub/hub.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/zenserver/hub/hub.h')
-rw-r--r--src/zenserver/hub/hub.h43
1 files changed, 34 insertions, 9 deletions
diff --git a/src/zenserver/hub/hub.h b/src/zenserver/hub/hub.h
index c343b19e2..9895f7068 100644
--- a/src/zenserver/hub/hub.h
+++ b/src/zenserver/hub/hub.h
@@ -6,6 +6,7 @@
#include "resourcemetrics.h"
#include "storageserverinstance.h"
+#include <zencore/filesystem.h>
#include <zencore/system.h>
#include <zenutil/zenserverprocess.h>
@@ -68,6 +69,8 @@ public:
std::string HydrationTargetSpecification;
WatchDogConfiguration WatchDog;
+
+ ResourceMetrics ResourceLimits;
};
typedef std::function<
@@ -86,7 +89,7 @@ public:
struct InstanceInfo
{
HubInstanceState State = HubInstanceState::Unprovisioned;
- std::chrono::system_clock::time_point ProvisionTime;
+ std::chrono::system_clock::time_point StateChangeTime;
ProcessMetrics Metrics;
uint16_t Port = 0;
};
@@ -160,6 +163,8 @@ public:
int GetMaxInstanceCount() const { return m_MaxInstanceCount.load(); }
+ void GetMachineMetrics(SystemMetrics& OutSystemMetrict, DiskSpace& OutDiskSpace) const;
+
const Configuration& GetConfig() const { return m_Config; }
#if ZEN_WITH_TESTS
@@ -181,9 +186,25 @@ private:
#if ZEN_PLATFORM_WINDOWS
JobObject m_JobObject;
#endif
- RwLock m_Lock;
+ mutable RwLock m_Lock;
std::unordered_map<std::string, size_t> m_InstanceLookup;
+ // Mirrors ProcessMetrics with atomic fields, enabling lock-free reads alongside watchdog writes.
+ struct AtomicProcessMetrics
+ {
+ std::atomic<uint64_t> MemoryBytes = 0;
+ std::atomic<uint64_t> KernelTimeMs = 0;
+ std::atomic<uint64_t> UserTimeMs = 0;
+ std::atomic<uint64_t> WorkingSetSize = 0;
+ std::atomic<uint64_t> PeakWorkingSetSize = 0;
+ std::atomic<uint64_t> PagefileUsage = 0;
+ std::atomic<uint64_t> PeakPagefileUsage = 0;
+
+ ProcessMetrics Load() const;
+ void Store(const ProcessMetrics& Metrics);
+ void Reset();
+ };
+
struct ActiveInstance
{
// Invariant: Instance == nullptr if and only if State == Unprovisioned.
@@ -192,11 +213,16 @@ private:
// without holding the hub lock.
std::unique_ptr<StorageServerInstance> Instance;
std::atomic<HubInstanceState> State = HubInstanceState::Unprovisioned;
- // TODO: We should move current metrics here (from StorageServerInstance)
- // Read and updated by WatchDog, updates to State triggers a reset of both
+ // Process metrics - written by WatchDog (inside instance shared lock), read lock-free.
+ AtomicProcessMetrics ProcessMetrics;
+
+ // Activity tracking - written by WatchDog, reset on every state transition.
std::atomic<uint64_t> LastKnownActivitySum = 0;
std::atomic<std::chrono::system_clock::time_point> LastActivityTime = std::chrono::system_clock::time_point::min();
+
+ // Set in UpdateInstanceStateLocked on every state transition; read lock-free by Find/EnumerateModules.
+ std::atomic<std::chrono::system_clock::time_point> StateChangeTime = std::chrono::system_clock::time_point::min();
};
// UpdateInstanceState is overloaded to accept a locked instance pointer (exclusive or shared) or the hub exclusive
@@ -226,21 +252,20 @@ private:
std::vector<ActiveInstance> m_ActiveInstances;
std::deque<size_t> m_FreeActiveInstanceIndexes;
- ResourceMetrics m_ResourceLimits;
- SystemMetrics m_HostMetrics;
+ SystemMetrics m_SystemMetrics;
+ DiskSpace m_DiskSpace;
std::atomic<int> m_MaxInstanceCount = 0;
std::thread m_WatchDog;
Event m_WatchDogEvent;
void WatchDog();
+ void UpdateMachineMetrics();
bool CheckInstanceStatus(HttpClient& ActivityHttpClient,
StorageServerInstance::SharedLockedPtr&& LockedInstance,
size_t ActiveInstanceIndex);
void AttemptRecoverInstance(std::string_view ModuleId);
- void UpdateStats();
- void UpdateCapacityMetrics();
- bool CanProvisionInstance(std::string_view ModuleId, std::string& OutReason);
+ bool CanProvisionInstanceLocked(std::string_view ModuleId, std::string& OutReason);
uint16_t GetInstanceIndexAssignedPort(size_t ActiveInstanceIndex) const;
Response InternalDeprovision(const std::string& ModuleId, std::function<bool(ActiveInstance& Instance)>&& DeprovisionGate);