diff options
| author | Dan Engelbrecht <[email protected]> | 2026-03-30 11:21:50 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2026-03-30 11:21:50 +0200 |
| commit | 10613ce78f4a01658ebacdd0ff8ce464b3d13c3f (patch) | |
| tree | fb45b1ff37e8c5582c82c56bfd2540511ce94abe /src/zenserver/hub/zenhubserver.cpp | |
| parent | reuse single MinIO instance across s3client integration test (#901) (diff) | |
| download | zen-10613ce78f4a01658ebacdd0ff8ce464b3d13c3f.tar.xz zen-10613ce78f4a01658ebacdd0ff8ce464b3d13c3f.zip | |
hub resource limits (#900)
- Feature: Hub dashboard now shows a Resources tile with disk and memory usage against configured limits
- Feature: Hub module listing now shows state-change timestamps and duration for each instance
- Improvement: Hub provisioning rejects new instances when disk or memory usage exceeds configurable thresholds; limits are disabled by default (0 = no limit)
- `--hub-provision-disk-limit-bytes` - Reject provisioning when used disk exceeds this many bytes
- `--hub-provision-disk-limit-percent` - Reject provisioning when used disk exceeds this percentage of total disk
- `--hub-provision-memory-limit-bytes` - Reject provisioning when used memory exceeds this many bytes
- `--hub-provision-memory-limit-percent` - Reject provisioning when used memory exceeds this percentage of total RAM
- Improvement: Hub process metrics are now tracked atomically per active instance slot, eliminating per-query process handle lookups
- Improvement: Hub, Build Store, and Workspaces service stats sections in the dashboard are now collapsible
- Bugfix: Hub watchdog loop did not check `m_ShutdownFlag`, causing it to spin indefinitely on shutdown
Diffstat (limited to 'src/zenserver/hub/zenhubserver.cpp')
| -rw-r--r-- | src/zenserver/hub/zenhubserver.cpp | 122 |
1 files changed, 103 insertions, 19 deletions
diff --git a/src/zenserver/hub/zenhubserver.cpp b/src/zenserver/hub/zenhubserver.cpp index 314031246..2d0d5398b 100644 --- a/src/zenserver/hub/zenhubserver.cpp +++ b/src/zenserver/hub/zenhubserver.cpp @@ -7,12 +7,15 @@ #include "hub.h" #include <zencore/config.h> +#include <zencore/except.h> +#include <zencore/filesystem.h> #include <zencore/fmtutils.h> #include <zencore/memory/llm.h> #include <zencore/memory/memorytrace.h> #include <zencore/memory/tagtrace.h> #include <zencore/scopeguard.h> #include <zencore/sentryintegration.h> +#include <zencore/system.h> #include <zencore/windows.h> #include <zenhttp/httpapiservice.h> #include <zenutil/service.h> @@ -203,6 +206,34 @@ ZenHubServerConfigurator::AddCliOptions(cxxopts::Options& Options) "Request timeout in milliseconds for instance activity check requests", cxxopts::value<uint32_t>(m_ServerOptions.WatchdogConfig.ActivityCheckRequestTimeoutMs)->default_value("200"), "<ms>"); + + Options.add_option("hub", + "", + "hub-provision-disk-limit-bytes", + "Reject provisioning when used disk bytes exceed this value (0 = no limit).", + cxxopts::value<uint64_t>(m_ServerOptions.HubProvisionDiskLimitBytes), + "<bytes>"); + + Options.add_option("hub", + "", + "hub-provision-disk-limit-percent", + "Reject provisioning when used disk exceeds this percentage of total disk (0 = no limit).", + cxxopts::value<uint32_t>(m_ServerOptions.HubProvisionDiskLimitPercent), + "<percent>"); + + Options.add_option("hub", + "", + "hub-provision-memory-limit-bytes", + "Reject provisioning when used memory bytes exceed this value (0 = no limit).", + cxxopts::value<uint64_t>(m_ServerOptions.HubProvisionMemoryLimitBytes), + "<bytes>"); + + Options.add_option("hub", + "", + "hub-provision-memory-limit-percent", + "Reject provisioning when used memory exceeds this percentage of total RAM (0 = no limit).", + cxxopts::value<uint32_t>(m_ServerOptions.HubProvisionMemoryLimitPercent), + "<percent>"); } void @@ -226,6 +257,18 @@ ZenHubServerConfigurator::OnConfigFileParsed(LuaConfig::Options& LuaOptions) void ZenHubServerConfigurator::ValidateOptions() { + if (m_ServerOptions.HubProvisionDiskLimitPercent > 100) + { + throw OptionParseException( + fmt::format("'--hub-provision-disk-limit-percent' ({}) must be in range 0..100", m_ServerOptions.HubProvisionDiskLimitPercent), + {}); + } + if (m_ServerOptions.HubProvisionMemoryLimitPercent > 100) + { + throw OptionParseException(fmt::format("'--hub-provision-memory-limit-percent' ({}) must be in range 0..100", + m_ServerOptions.HubProvisionMemoryLimitPercent), + {}); + } } /////////////////////////////////////////////////////////////////////////// @@ -373,30 +416,71 @@ ZenHubServer::InitializeState(const ZenHubServerConfig& ServerConfig) ZEN_UNUSED(ServerConfig); } +ResourceMetrics +ZenHubServer::ResolveLimits(const ZenHubServerConfig& ServerConfig) +{ + uint64_t DiskTotal = 0; + uint64_t MemoryTotal = 0; + + if (ServerConfig.HubProvisionDiskLimitPercent > 0) + { + DiskSpace Disk; + if (DiskSpaceInfo(ServerConfig.DataDir, Disk)) + { + DiskTotal = Disk.Total; + } + else + { + ZEN_WARN("Failed to query disk space for '{}'; disk percent limit will not be applied", ServerConfig.DataDir); + } + } + if (ServerConfig.HubProvisionMemoryLimitPercent > 0) + { + MemoryTotal = GetSystemMetrics().SystemMemoryMiB * 1024 * 1024; + } + + auto Resolve = [](uint64_t Bytes, uint32_t Pct, uint64_t Total) -> uint64_t { + const uint64_t PctBytes = Pct > 0 ? (Total * Pct) / 100 : 0; + if (Bytes > 0 && PctBytes > 0) + { + return Min(Bytes, PctBytes); + } + return Bytes > 0 ? Bytes : PctBytes; + }; + + return { + .DiskUsageBytes = Resolve(ServerConfig.HubProvisionDiskLimitBytes, ServerConfig.HubProvisionDiskLimitPercent, DiskTotal), + .MemoryUsageBytes = Resolve(ServerConfig.HubProvisionMemoryLimitBytes, ServerConfig.HubProvisionMemoryLimitPercent, MemoryTotal), + }; +} + void ZenHubServer::InitializeServices(const ZenHubServerConfig& ServerConfig) { ZEN_INFO("instantiating Hub"); + Hub::Configuration HubConfig{ + .UseJobObject = ServerConfig.HubUseJobObject, + .BasePortNumber = ServerConfig.HubBasePortNumber, + .InstanceLimit = ServerConfig.HubInstanceLimit, + .InstanceHttpThreadCount = ServerConfig.HubInstanceHttpThreadCount, + .InstanceCoreLimit = ServerConfig.HubInstanceCoreLimit, + .InstanceConfigPath = ServerConfig.HubInstanceConfigPath, + .HydrationTargetSpecification = ServerConfig.HydrationTargetSpecification, + .WatchDog = + { + .CycleInterval = std::chrono::milliseconds(ServerConfig.WatchdogConfig.CycleIntervalMs), + .CycleProcessingBudget = std::chrono::milliseconds(ServerConfig.WatchdogConfig.CycleProcessingBudgetMs), + .InstanceCheckThrottle = std::chrono::milliseconds(ServerConfig.WatchdogConfig.InstanceCheckThrottleMs), + .ProvisionedInactivityTimeout = std::chrono::seconds(ServerConfig.WatchdogConfig.ProvisionedInactivityTimeoutSeconds), + .HibernatedInactivityTimeout = std::chrono::seconds(ServerConfig.WatchdogConfig.HibernatedInactivityTimeoutSeconds), + .InactivityCheckMargin = std::chrono::seconds(ServerConfig.WatchdogConfig.InactivityCheckMarginSeconds), + .ActivityCheckConnectTimeout = std::chrono::milliseconds(ServerConfig.WatchdogConfig.ActivityCheckConnectTimeoutMs), + .ActivityCheckRequestTimeout = std::chrono::milliseconds(ServerConfig.WatchdogConfig.ActivityCheckRequestTimeoutMs), + }, + .ResourceLimits = ResolveLimits(ServerConfig)}; + m_Hub = std::make_unique<Hub>( - Hub::Configuration{ - .UseJobObject = ServerConfig.HubUseJobObject, - .BasePortNumber = ServerConfig.HubBasePortNumber, - .InstanceLimit = ServerConfig.HubInstanceLimit, - .InstanceHttpThreadCount = ServerConfig.HubInstanceHttpThreadCount, - .InstanceCoreLimit = ServerConfig.HubInstanceCoreLimit, - .InstanceConfigPath = ServerConfig.HubInstanceConfigPath, - .HydrationTargetSpecification = ServerConfig.HydrationTargetSpecification, - .WatchDog = - { - .CycleInterval = std::chrono::milliseconds(ServerConfig.WatchdogConfig.CycleIntervalMs), - .CycleProcessingBudget = std::chrono::milliseconds(ServerConfig.WatchdogConfig.CycleProcessingBudgetMs), - .InstanceCheckThrottle = std::chrono::milliseconds(ServerConfig.WatchdogConfig.InstanceCheckThrottleMs), - .ProvisionedInactivityTimeout = std::chrono::seconds(ServerConfig.WatchdogConfig.ProvisionedInactivityTimeoutSeconds), - .HibernatedInactivityTimeout = std::chrono::seconds(ServerConfig.WatchdogConfig.HibernatedInactivityTimeoutSeconds), - .InactivityCheckMargin = std::chrono::seconds(ServerConfig.WatchdogConfig.InactivityCheckMarginSeconds), - .ActivityCheckConnectTimeout = std::chrono::milliseconds(ServerConfig.WatchdogConfig.ActivityCheckConnectTimeoutMs), - .ActivityCheckRequestTimeout = std::chrono::milliseconds(ServerConfig.WatchdogConfig.ActivityCheckRequestTimeoutMs), - }}, + std::move(HubConfig), ZenServerEnvironment(ZenServerEnvironment::Hub, ServerConfig.DataDir / "hub", ServerConfig.DataDir / "servers", |