aboutsummaryrefslogtreecommitdiff
path: root/src/zenserver/compute
diff options
context:
space:
mode:
Diffstat (limited to 'src/zenserver/compute')
-rw-r--r--src/zenserver/compute/computeserver.cpp172
-rw-r--r--src/zenserver/compute/computeserver.h18
2 files changed, 190 insertions, 0 deletions
diff --git a/src/zenserver/compute/computeserver.cpp b/src/zenserver/compute/computeserver.cpp
index f35fe0f97..89886beee 100644
--- a/src/zenserver/compute/computeserver.cpp
+++ b/src/zenserver/compute/computeserver.cpp
@@ -29,6 +29,10 @@
# include <zennomad/nomadconfig.h>
# include <zennomad/nomadprovisioner.h>
# endif
+# if ZEN_WITH_OVERMIND
+# include <zenovermind/overmindconfig.h>
+# include <zenovermind/overmindprovisioner.h>
+# endif
ZEN_THIRD_PARTY_INCLUDES_START
# include <cxxopts.hpp>
@@ -331,6 +335,107 @@ ZenComputeServerConfigurator::AddCliOptions(cxxopts::Options& Options)
cxxopts::value<std::string>(m_ServerOptions.NomadConfig.JobPrefix)->default_value("zenserver-worker"),
"");
# endif
+
+# if ZEN_WITH_OVERMIND
+ // Overmind provisioning options
+ Options.add_option("overmind",
+ "",
+ "overmind-enabled",
+ "Enable Overmind worker provisioning",
+ cxxopts::value<bool>(m_ServerOptions.OvermindConfig.Enabled)->default_value("false"),
+ "");
+
+ Options.add_option("overmind",
+ "",
+ "overmind-server",
+ "Overmind REST gateway URL",
+ cxxopts::value<std::string>(m_ServerOptions.OvermindConfig.ServerUrl)->default_value(""),
+ "");
+
+ Options.add_option("overmind",
+ "",
+ "overmind-token",
+ "Overmind JWT bearer token",
+ cxxopts::value<std::string>(m_ServerOptions.OvermindConfig.AuthToken)->default_value(""),
+ "");
+
+ Options.add_option("overmind",
+ "",
+ "overmind-namespace",
+ "Overmind namespace for job submission",
+ cxxopts::value<std::string>(m_ServerOptions.OvermindConfig.Namespace)->default_value(""),
+ "");
+
+ Options.add_option("overmind",
+ "",
+ "overmind-region",
+ "Overmind target region (e.g. REGION_US_EAST)",
+ cxxopts::value<std::string>(m_ServerOptions.OvermindConfig.Region)->default_value(""),
+ "");
+
+ Options.add_option("overmind",
+ "",
+ "overmind-command",
+ "Overmind command reference (namespace:name:version)",
+ cxxopts::value<std::string>(m_ServerOptions.OvermindConfig.CommandRef)->default_value(""),
+ "");
+
+ Options.add_option("overmind",
+ "",
+ "overmind-os",
+ "Target operating system for Overmind jobs",
+ cxxopts::value<std::string>(m_ServerOptions.OvermindConfig.Os)->default_value("OPERATING_SYSTEM_LINUX"),
+ "");
+
+ Options.add_option("overmind",
+ "",
+ "overmind-arch",
+ "Target CPU architecture for Overmind jobs",
+ cxxopts::value<std::string>(m_ServerOptions.OvermindConfig.Arch)->default_value("CPU_ARCHITECTURE_X86_64"),
+ "");
+
+ Options.add_option("overmind",
+ "",
+ "overmind-memory",
+ "Memory per Overmind task (e.g. 4GiB)",
+ cxxopts::value<std::string>(m_ServerOptions.OvermindConfig.Memory)->default_value("4GiB"),
+ "");
+
+ Options.add_option("overmind",
+ "",
+ "overmind-cpu",
+ "CPU per Overmind task in millicores (e.g. 2000m)",
+ cxxopts::value<std::string>(m_ServerOptions.OvermindConfig.Cpu)->default_value("2000m"),
+ "");
+
+ Options.add_option("overmind",
+ "",
+ "overmind-max-jobs",
+ "Maximum concurrent Overmind jobs",
+ cxxopts::value<int>(m_ServerOptions.OvermindConfig.MaxJobs)->default_value("64"),
+ "");
+
+ Options.add_option("overmind",
+ "",
+ "overmind-cores-per-job",
+ "Estimated cores per Overmind job (for scaling)",
+ cxxopts::value<int>(m_ServerOptions.OvermindConfig.CoresPerJob)->default_value("32"),
+ "");
+
+ Options.add_option("overmind",
+ "",
+ "overmind-max-cores",
+ "Maximum total cores to provision via Overmind",
+ cxxopts::value<int>(m_ServerOptions.OvermindConfig.MaxCores)->default_value("2048"),
+ "");
+
+ Options.add_option("overmind",
+ "",
+ "overmind-job-name",
+ "Name for generated Overmind jobs",
+ cxxopts::value<std::string>(m_ServerOptions.OvermindConfig.JobName)->default_value("zenserver-worker"),
+ "");
+# endif
}
void
@@ -467,6 +572,12 @@ ZenComputeServer::Cleanup()
m_NomadProvisioner.reset();
# endif
+# if ZEN_WITH_OVERMIND
+ // Shut down Overmind provisioner - stops the management thread and
+ // cancels all tracked jobs.
+ m_OvermindProvisioner.reset();
+# endif
+
// Close the orchestrator WebSocket client before stopping the io_context
m_WsReconnectTimer.cancel();
if (m_OrchestratorWsClient)
@@ -630,6 +741,36 @@ ZenComputeServer::InitializeServices(const ZenComputeServerConfig& ServerConfig)
}
}
# endif
+
+# if ZEN_WITH_OVERMIND
+ // Overmind provisioner
+ if (ServerConfig.OvermindConfig.Enabled && !ServerConfig.OvermindConfig.ServerUrl.empty())
+ {
+ ZEN_INFO("instantiating Overmind provisioner (server: {})", ServerConfig.OvermindConfig.ServerUrl);
+
+ const auto& OvermindCfg = ServerConfig.OvermindConfig;
+
+ if (!OvermindCfg.Validate())
+ {
+ ZEN_ERROR("invalid Overmind configuration");
+ }
+ else
+ {
+ ExtendableStringBuilder<256> OrchestratorEndpoint;
+ OrchestratorEndpoint << m_Http->GetServiceUri(m_OrchestratorService.get());
+ if (auto View = OrchestratorEndpoint.ToView(); !View.empty() && View.back() != '/')
+ {
+ OrchestratorEndpoint << '/';
+ }
+
+ m_OvermindProvisioner = std::make_unique<overmind::OvermindProvisioner>(OvermindCfg,
+ OrchestratorEndpoint,
+ m_OrchestratorService->GetSessionId().ToString(),
+ ServerConfig.ProvisionClean,
+ ServerConfig.ProvisionTraceHost);
+ }
+ }
+# endif
}
void
@@ -727,6 +868,11 @@ ZenComputeServer::BuildAnnounceBody()
AnnounceBody << "provisioner"
<< "nomad";
}
+ else if (m_InstanceId.starts_with("overmind-"))
+ {
+ AnnounceBody << "provisioner"
+ << "overmind";
+ }
if (!m_CoordinatorSession.empty())
{
@@ -901,6 +1047,18 @@ ZenComputeServer::ProvisionerMaintenanceTick()
Stats.RunningJobCount);
}
# endif
+
+# if ZEN_WITH_OVERMIND
+ if (m_OvermindProvisioner)
+ {
+ m_OvermindProvisioner->SetTargetCoreCount(UINT32_MAX);
+ auto Stats = m_OvermindProvisioner->GetStats();
+ ZEN_DEBUG("Overmind maintenance: target={}, estimated={}, running jobs={}",
+ Stats.TargetCoreCount,
+ Stats.EstimatedCoreCount,
+ Stats.RunningJobCount);
+ }
+# endif
}
void
@@ -913,6 +1071,9 @@ ZenComputeServer::EnqueueProvisionerMaintenanceTimer()
# if ZEN_WITH_NOMAD
HasProvisioner = HasProvisioner || (m_NomadProvisioner != nullptr);
# endif
+# if ZEN_WITH_OVERMIND
+ HasProvisioner = HasProvisioner || (m_OvermindProvisioner != nullptr);
+# endif
if (!HasProvisioner)
{
@@ -1011,6 +1172,17 @@ ZenComputeServer::Run()
}
# endif
+# if ZEN_WITH_OVERMIND
+ // Start Overmind provisioning if configured - request maximum allowed cores.
+ // SetTargetCoreCount clamps to OvermindConfig::MaxCores internally.
+ if (m_OvermindProvisioner)
+ {
+ m_OvermindProvisioner->SetTargetCoreCount(UINT32_MAX);
+ auto Stats = m_OvermindProvisioner->GetStats();
+ ZEN_INFO("Overmind provisioning started (target cores: {})", Stats.TargetCoreCount);
+ }
+# endif
+
EnqueueProvisionerMaintenanceTimer();
m_Http->Run(IsInteractiveMode);
diff --git a/src/zenserver/compute/computeserver.h b/src/zenserver/compute/computeserver.h
index aa9c1a5b3..38705d2e4 100644
--- a/src/zenserver/compute/computeserver.h
+++ b/src/zenserver/compute/computeserver.h
@@ -40,6 +40,13 @@ class NomadProvisioner;
} // namespace zen::nomad
# endif
+# if ZEN_WITH_OVERMIND
+# include <zenovermind/overmindconfig.h>
+namespace zen::overmind {
+class OvermindProvisioner;
+} // namespace zen::overmind
+# endif
+
namespace zen {
class HttpApiService;
@@ -64,6 +71,10 @@ struct ZenComputeServerConfig : public ZenServerConfig
# if ZEN_WITH_NOMAD
nomad::NomadConfig NomadConfig;
# endif
+
+# if ZEN_WITH_OVERMIND
+ overmind::OvermindConfig OvermindConfig;
+# endif
};
struct ZenComputeServerConfigurator : public ZenServerConfiguratorBase
@@ -95,6 +106,10 @@ private:
std::string m_NomadDriverStr = "raw_exec";
std::string m_NomadDistributionStr = "predeployed";
# endif
+
+# if ZEN_WITH_OVERMIND
+ // No string-to-enum options needed for Overmind yet
+# endif
};
class ZenComputeServerMain : public ZenServerMain
@@ -150,6 +165,9 @@ private:
# if ZEN_WITH_NOMAD
std::unique_ptr<zen::nomad::NomadProvisioner> m_NomadProvisioner;
# endif
+# if ZEN_WITH_OVERMIND
+ std::unique_ptr<zen::overmind::OvermindProvisioner> m_OvermindProvisioner;
+# endif
SystemMetricsTracker m_MetricsTracker;
std::string m_CoordinatorEndpoint;
std::string m_CoordinatorSession;