aboutsummaryrefslogtreecommitdiff
path: root/src/zenovermind/include
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2026-04-14 16:18:23 +0200
committerStefan Boberg <[email protected]>2026-04-14 16:18:23 +0200
commit053b7373357d2555bac111b94c6909bc148f24ac (patch)
tree456a8ce2a1b38ff6aef342324f7fa4c17fdadd30 /src/zenovermind/include
parent5.8.4 (diff)
downloadzen-sb/compute-overmind.tar.xz
zen-sb/compute-overmind.zip
Add Overmind provisioner alongside Horde and Nomadsb/compute-overmind
Introduces the zenovermind module with an HTTP client targeting the Overmind REST gateway (/v1/jobs) and a management-thread provisioner that schedules, polls, and cancels jobs following the same pattern as the existing Nomad provisioner. Wired into the compute server with full CLI options (--overmind-*), lifecycle management, and maintenance tick support behind the ZEN_WITH_OVERMIND compile flag.
Diffstat (limited to 'src/zenovermind/include')
-rw-r--r--src/zenovermind/include/zenovermind/overmindclient.h69
-rw-r--r--src/zenovermind/include/zenovermind/overmindconfig.h45
-rw-r--r--src/zenovermind/include/zenovermind/overmindprovisioner.h110
-rw-r--r--src/zenovermind/include/zenovermind/zenovermind.h9
4 files changed, 233 insertions, 0 deletions
diff --git a/src/zenovermind/include/zenovermind/overmindclient.h b/src/zenovermind/include/zenovermind/overmindclient.h
new file mode 100644
index 000000000..68348b4a6
--- /dev/null
+++ b/src/zenovermind/include/zenovermind/overmindclient.h
@@ -0,0 +1,69 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zenovermind/overmindconfig.h>
+
+#include <zencore/logbase.h>
+
+#include <memory>
+#include <string>
+
+namespace zen {
+class HttpClient;
+}
+
+namespace zen::overmind {
+
+/** Summary of an Overmind job returned by the REST API. */
+struct OvermindJobInfo
+{
+ std::string Id;
+ std::string Status; ///< "STATUS_PENDING", "STATUS_RUNNING", "STATUS_COMPLETE", "STATUS_ERROR", ...
+};
+
+/** HTTP client for the Overmind REST gateway (v1).
+ *
+ * Handles job scheduling, status polling, and job cancellation via the
+ * grpc-gateway REST endpoints on port 2580.
+ *
+ * All calls are synchronous. Thread safety: individual methods are
+ * not thread-safe; callers must synchronize access.
+ */
+class OvermindClient
+{
+public:
+ explicit OvermindClient(const OvermindConfig& Config);
+ ~OvermindClient();
+
+ OvermindClient(const OvermindClient&) = delete;
+ OvermindClient& operator=(const OvermindClient&) = delete;
+
+ /** Initialize the underlying HTTP client. Must be called before other methods. */
+ bool Initialize();
+
+ /** Build the JSON body for a ScheduleJob request. */
+ std::string BuildJobJson(const std::string& JobName,
+ const std::string& OrchestratorEndpoint,
+ const std::string& CoordinatorSession = {},
+ bool CleanStart = false,
+ const std::string& TraceHost = {}) const;
+
+ /** Schedule a job via POST /v1/jobs. On success, populates OutJob. */
+ bool ScheduleJob(const std::string& JobJson, OvermindJobInfo& OutJob);
+
+ /** Get the status of a job via GET /v1/jobs/{jobId}. */
+ bool GetJobStatus(const std::string& JobId, OvermindJobInfo& OutJob);
+
+ /** Cancel a job via DELETE /v1/jobs/{jobId}. */
+ bool CancelJob(const std::string& JobId);
+
+ LoggerRef Log() { return m_Log; }
+
+private:
+ OvermindConfig m_Config;
+ std::unique_ptr<zen::HttpClient> m_Http;
+ LoggerRef m_Log;
+};
+
+} // namespace zen::overmind
diff --git a/src/zenovermind/include/zenovermind/overmindconfig.h b/src/zenovermind/include/zenovermind/overmindconfig.h
new file mode 100644
index 000000000..a463e31ea
--- /dev/null
+++ b/src/zenovermind/include/zenovermind/overmindconfig.h
@@ -0,0 +1,45 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zenovermind/zenovermind.h>
+
+#include <string>
+
+namespace zen::overmind {
+
+/** Configuration for Overmind worker provisioning.
+ *
+ * Specifies the Overmind REST gateway URL, authentication, namespace,
+ * region, and resource limits. Used by OvermindClient and OvermindProvisioner.
+ */
+struct OvermindConfig
+{
+ bool Enabled = false; ///< Whether Overmind provisioning is active
+ std::string ServerUrl; ///< Overmind REST gateway URL (e.g. "http://localhost:2580")
+ std::string AuthToken; ///< JWT bearer token for authentication
+
+ std::string Namespace; ///< Overmind namespace for job submission
+ std::string Region; ///< Target region (e.g. "REGION_US_EAST")
+
+ /** Overmind command reference for the zenserver binary in
+ * "namespace:name:version" format (e.g. "infra:zenserver:v1.0.0"). */
+ std::string CommandRef;
+
+ std::string Os = "OPERATING_SYSTEM_LINUX"; ///< Target operating system
+ std::string Arch = "CPU_ARCHITECTURE_X86_64"; ///< Target CPU architecture
+
+ std::string Memory = "4GiB"; ///< Memory per task
+ std::string Cpu = "2000m"; ///< CPU per task (millicores)
+
+ int MaxJobs = 64; ///< Maximum concurrent Overmind jobs
+ int CoresPerJob = 32; ///< Estimated cores per job (for scaling calculations)
+ int MaxCores = 2048; ///< Maximum total cores to provision
+
+ std::string JobName = "zenserver-worker"; ///< Name for generated Overmind jobs
+
+ /** Validate the configuration. Returns false if required fields are missing. */
+ bool Validate() const;
+};
+
+} // namespace zen::overmind
diff --git a/src/zenovermind/include/zenovermind/overmindprovisioner.h b/src/zenovermind/include/zenovermind/overmindprovisioner.h
new file mode 100644
index 000000000..cb0a84728
--- /dev/null
+++ b/src/zenovermind/include/zenovermind/overmindprovisioner.h
@@ -0,0 +1,110 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zenovermind/overmindconfig.h>
+
+#include <zencore/logbase.h>
+
+#include <atomic>
+#include <condition_variable>
+#include <cstdint>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <thread>
+#include <vector>
+
+namespace zen::overmind {
+
+class OvermindClient;
+
+/** Snapshot of the current Overmind provisioning state, returned by OvermindProvisioner::GetStats(). */
+struct OvermindProvisioningStats
+{
+ uint32_t TargetCoreCount = 0; ///< Requested number of cores (clamped to MaxCores)
+ uint32_t EstimatedCoreCount = 0; ///< Cores expected from submitted jobs
+ uint32_t ActiveJobCount = 0; ///< Number of currently tracked Overmind jobs
+ uint32_t RunningJobCount = 0; ///< Number of jobs in running status
+};
+
+/** Job lifecycle manager for Overmind worker provisioning.
+ *
+ * Provisions remote compute workers by scheduling jobs via the Overmind
+ * REST gateway. Each job runs zenserver in compute mode, which
+ * announces itself back to the orchestrator.
+ *
+ * Uses a single management thread that periodically:
+ * 1. Submits new jobs when estimated cores < target cores
+ * 2. Polls existing jobs for status changes
+ * 3. Cleans up completed/failed jobs and adjusts counters
+ *
+ * Thread safety: SetTargetCoreCount and GetStats may be called from any thread.
+ */
+class OvermindProvisioner
+{
+public:
+ /** Construct a provisioner.
+ * @param Config Overmind connection and job configuration.
+ * @param OrchestratorEndpoint URL of the orchestrator that remote workers announce to. */
+ OvermindProvisioner(const OvermindConfig& Config,
+ std::string_view OrchestratorEndpoint,
+ std::string_view CoordinatorSession = {},
+ bool CleanStart = false,
+ std::string_view TraceHost = {});
+
+ /** Signals the management thread to exit and cancels all tracked jobs. */
+ ~OvermindProvisioner();
+
+ OvermindProvisioner(const OvermindProvisioner&) = delete;
+ OvermindProvisioner& operator=(const OvermindProvisioner&) = delete;
+
+ /** Set the target number of cores to provision.
+ * Clamped to OvermindConfig::MaxCores. The management thread will
+ * schedule new jobs to approach this target. */
+ void SetTargetCoreCount(uint32_t Count);
+
+ /** Return a snapshot of the current provisioning counters. */
+ OvermindProvisioningStats GetStats() const;
+
+private:
+ LoggerRef Log() { return m_Log; }
+
+ struct TrackedJob
+ {
+ std::string Id;
+ std::string Status; ///< Overmind status string
+ int Cores = 0;
+ };
+
+ void ManagementThread();
+ void SubmitNewJobs();
+ void PollExistingJobs();
+ void CleanupFinishedJobs();
+ void CancelAllJobs();
+
+ OvermindConfig m_Config;
+ std::string m_OrchestratorEndpoint;
+ std::string m_CoordinatorSession;
+ bool m_CleanStart = false;
+ std::string m_TraceHost;
+
+ std::unique_ptr<OvermindClient> m_Client;
+
+ mutable std::mutex m_JobsLock;
+ std::vector<TrackedJob> m_Jobs;
+ std::atomic<uint32_t> m_JobIndex{0};
+
+ std::atomic<uint32_t> m_TargetCoreCount{0};
+ std::atomic<uint32_t> m_EstimatedCoreCount{0};
+ std::atomic<uint32_t> m_RunningJobCount{0};
+
+ std::thread m_Thread;
+ std::mutex m_WakeMutex;
+ std::condition_variable m_WakeCV;
+ std::atomic<bool> m_ShouldExit{false};
+
+ LoggerRef m_Log;
+};
+
+} // namespace zen::overmind
diff --git a/src/zenovermind/include/zenovermind/zenovermind.h b/src/zenovermind/include/zenovermind/zenovermind.h
new file mode 100644
index 000000000..b7f451a16
--- /dev/null
+++ b/src/zenovermind/include/zenovermind/zenovermind.h
@@ -0,0 +1,9 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/zencore.h>
+
+#if !defined(ZEN_WITH_OVERMIND)
+# define ZEN_WITH_OVERMIND 1
+#endif