diff options
| author | Stefan Boberg <[email protected]> | 2026-04-14 16:18:23 +0200 |
|---|---|---|
| committer | Stefan Boberg <[email protected]> | 2026-04-14 16:18:23 +0200 |
| commit | 053b7373357d2555bac111b94c6909bc148f24ac (patch) | |
| tree | 456a8ce2a1b38ff6aef342324f7fa4c17fdadd30 /src/zenserver/compute/computeserver.cpp | |
| parent | 5.8.4 (diff) | |
| download | zen-sb/compute-overmind.tar.xz zen-sb/compute-overmind.zip | |
Add Overmind provisioner alongside Horde and Nomadsb/compute-overmind
Introduces the zenovermind module with an HTTP client targeting the
Overmind REST gateway (/v1/jobs) and a management-thread provisioner
that schedules, polls, and cancels jobs following the same pattern as
the existing Nomad provisioner. Wired into the compute server with
full CLI options (--overmind-*), lifecycle management, and maintenance
tick support behind the ZEN_WITH_OVERMIND compile flag.
Diffstat (limited to 'src/zenserver/compute/computeserver.cpp')
| -rw-r--r-- | src/zenserver/compute/computeserver.cpp | 172 |
1 files changed, 172 insertions, 0 deletions
diff --git a/src/zenserver/compute/computeserver.cpp b/src/zenserver/compute/computeserver.cpp index f35fe0f97..89886beee 100644 --- a/src/zenserver/compute/computeserver.cpp +++ b/src/zenserver/compute/computeserver.cpp @@ -29,6 +29,10 @@ # include <zennomad/nomadconfig.h> # include <zennomad/nomadprovisioner.h> # endif +# if ZEN_WITH_OVERMIND +# include <zenovermind/overmindconfig.h> +# include <zenovermind/overmindprovisioner.h> +# endif ZEN_THIRD_PARTY_INCLUDES_START # include <cxxopts.hpp> @@ -331,6 +335,107 @@ ZenComputeServerConfigurator::AddCliOptions(cxxopts::Options& Options) cxxopts::value<std::string>(m_ServerOptions.NomadConfig.JobPrefix)->default_value("zenserver-worker"), ""); # endif + +# if ZEN_WITH_OVERMIND + // Overmind provisioning options + Options.add_option("overmind", + "", + "overmind-enabled", + "Enable Overmind worker provisioning", + cxxopts::value<bool>(m_ServerOptions.OvermindConfig.Enabled)->default_value("false"), + ""); + + Options.add_option("overmind", + "", + "overmind-server", + "Overmind REST gateway URL", + cxxopts::value<std::string>(m_ServerOptions.OvermindConfig.ServerUrl)->default_value(""), + ""); + + Options.add_option("overmind", + "", + "overmind-token", + "Overmind JWT bearer token", + cxxopts::value<std::string>(m_ServerOptions.OvermindConfig.AuthToken)->default_value(""), + ""); + + Options.add_option("overmind", + "", + "overmind-namespace", + "Overmind namespace for job submission", + cxxopts::value<std::string>(m_ServerOptions.OvermindConfig.Namespace)->default_value(""), + ""); + + Options.add_option("overmind", + "", + "overmind-region", + "Overmind target region (e.g. REGION_US_EAST)", + cxxopts::value<std::string>(m_ServerOptions.OvermindConfig.Region)->default_value(""), + ""); + + Options.add_option("overmind", + "", + "overmind-command", + "Overmind command reference (namespace:name:version)", + cxxopts::value<std::string>(m_ServerOptions.OvermindConfig.CommandRef)->default_value(""), + ""); + + Options.add_option("overmind", + "", + "overmind-os", + "Target operating system for Overmind jobs", + cxxopts::value<std::string>(m_ServerOptions.OvermindConfig.Os)->default_value("OPERATING_SYSTEM_LINUX"), + ""); + + Options.add_option("overmind", + "", + "overmind-arch", + "Target CPU architecture for Overmind jobs", + cxxopts::value<std::string>(m_ServerOptions.OvermindConfig.Arch)->default_value("CPU_ARCHITECTURE_X86_64"), + ""); + + Options.add_option("overmind", + "", + "overmind-memory", + "Memory per Overmind task (e.g. 4GiB)", + cxxopts::value<std::string>(m_ServerOptions.OvermindConfig.Memory)->default_value("4GiB"), + ""); + + Options.add_option("overmind", + "", + "overmind-cpu", + "CPU per Overmind task in millicores (e.g. 2000m)", + cxxopts::value<std::string>(m_ServerOptions.OvermindConfig.Cpu)->default_value("2000m"), + ""); + + Options.add_option("overmind", + "", + "overmind-max-jobs", + "Maximum concurrent Overmind jobs", + cxxopts::value<int>(m_ServerOptions.OvermindConfig.MaxJobs)->default_value("64"), + ""); + + Options.add_option("overmind", + "", + "overmind-cores-per-job", + "Estimated cores per Overmind job (for scaling)", + cxxopts::value<int>(m_ServerOptions.OvermindConfig.CoresPerJob)->default_value("32"), + ""); + + Options.add_option("overmind", + "", + "overmind-max-cores", + "Maximum total cores to provision via Overmind", + cxxopts::value<int>(m_ServerOptions.OvermindConfig.MaxCores)->default_value("2048"), + ""); + + Options.add_option("overmind", + "", + "overmind-job-name", + "Name for generated Overmind jobs", + cxxopts::value<std::string>(m_ServerOptions.OvermindConfig.JobName)->default_value("zenserver-worker"), + ""); +# endif } void @@ -467,6 +572,12 @@ ZenComputeServer::Cleanup() m_NomadProvisioner.reset(); # endif +# if ZEN_WITH_OVERMIND + // Shut down Overmind provisioner - stops the management thread and + // cancels all tracked jobs. + m_OvermindProvisioner.reset(); +# endif + // Close the orchestrator WebSocket client before stopping the io_context m_WsReconnectTimer.cancel(); if (m_OrchestratorWsClient) @@ -630,6 +741,36 @@ ZenComputeServer::InitializeServices(const ZenComputeServerConfig& ServerConfig) } } # endif + +# if ZEN_WITH_OVERMIND + // Overmind provisioner + if (ServerConfig.OvermindConfig.Enabled && !ServerConfig.OvermindConfig.ServerUrl.empty()) + { + ZEN_INFO("instantiating Overmind provisioner (server: {})", ServerConfig.OvermindConfig.ServerUrl); + + const auto& OvermindCfg = ServerConfig.OvermindConfig; + + if (!OvermindCfg.Validate()) + { + ZEN_ERROR("invalid Overmind configuration"); + } + else + { + ExtendableStringBuilder<256> OrchestratorEndpoint; + OrchestratorEndpoint << m_Http->GetServiceUri(m_OrchestratorService.get()); + if (auto View = OrchestratorEndpoint.ToView(); !View.empty() && View.back() != '/') + { + OrchestratorEndpoint << '/'; + } + + m_OvermindProvisioner = std::make_unique<overmind::OvermindProvisioner>(OvermindCfg, + OrchestratorEndpoint, + m_OrchestratorService->GetSessionId().ToString(), + ServerConfig.ProvisionClean, + ServerConfig.ProvisionTraceHost); + } + } +# endif } void @@ -727,6 +868,11 @@ ZenComputeServer::BuildAnnounceBody() AnnounceBody << "provisioner" << "nomad"; } + else if (m_InstanceId.starts_with("overmind-")) + { + AnnounceBody << "provisioner" + << "overmind"; + } if (!m_CoordinatorSession.empty()) { @@ -901,6 +1047,18 @@ ZenComputeServer::ProvisionerMaintenanceTick() Stats.RunningJobCount); } # endif + +# if ZEN_WITH_OVERMIND + if (m_OvermindProvisioner) + { + m_OvermindProvisioner->SetTargetCoreCount(UINT32_MAX); + auto Stats = m_OvermindProvisioner->GetStats(); + ZEN_DEBUG("Overmind maintenance: target={}, estimated={}, running jobs={}", + Stats.TargetCoreCount, + Stats.EstimatedCoreCount, + Stats.RunningJobCount); + } +# endif } void @@ -913,6 +1071,9 @@ ZenComputeServer::EnqueueProvisionerMaintenanceTimer() # if ZEN_WITH_NOMAD HasProvisioner = HasProvisioner || (m_NomadProvisioner != nullptr); # endif +# if ZEN_WITH_OVERMIND + HasProvisioner = HasProvisioner || (m_OvermindProvisioner != nullptr); +# endif if (!HasProvisioner) { @@ -1011,6 +1172,17 @@ ZenComputeServer::Run() } # endif +# if ZEN_WITH_OVERMIND + // Start Overmind provisioning if configured - request maximum allowed cores. + // SetTargetCoreCount clamps to OvermindConfig::MaxCores internally. + if (m_OvermindProvisioner) + { + m_OvermindProvisioner->SetTargetCoreCount(UINT32_MAX); + auto Stats = m_OvermindProvisioner->GetStats(); + ZEN_INFO("Overmind provisioning started (target cores: {})", Stats.TargetCoreCount); + } +# endif + EnqueueProvisionerMaintenanceTimer(); m_Http->Run(IsInteractiveMode); |