From 795345e5fd7974a1f5227d507a58bb3ed75eafd5 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Mon, 13 Apr 2026 16:38:16 +0200 Subject: Compute OIDC auth, async Horde agents, and orchestrator improvements (#913) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rework of the Horde agent subsystem from synchronous per-thread I/O to an async ASIO-driven architecture, plus provisioner scale-down with graceful draining, OIDC authentication, scheduler improvements, and dashboard UI for provisioner control. ### Async Horde Agent Rewrite - Replace synchronous `HordeAgent` (one thread per agent, blocking I/O) with `AsyncHordeAgent` — an ASIO state machine running on a shared `io_context` thread pool - Replace `TcpComputeTransport`/`AesComputeTransport` with `AsyncTcpComputeTransport`/`AsyncAesComputeTransport` - Replace `AgentMessageChannel` with `AsyncAgentMessageChannel` using frame queuing and ASIO timers - Delete `ComputeBuffer` and `ComputeChannel` ring-buffer classes (no longer needed) ### Provisioner Drain / Scale-Down - `HordeProvisioner` can now drain agents when target core count is lowered: queries each agent's `/compute/session/status` for workload, selects candidates by largest-fit/lowest-workload, and sends `/compute/session/drain` - Configurable `--horde-drain-grace-period` (default 300s) before force-kill - Implement `IProvisionerStateProvider` interface to expose provisioner state to the orchestrator HTTP layer - Forward `--coordinator-session`, `--provision-clean`, and `--provision-tracehost` through both Horde and Nomad provisioners to spawned workers ### OIDC Authentication - `HordeClient` accepts an `AccessTokenProvider` (refreshable token function) as alternative to static `--horde-token` - Wire up `OidcToken.exe` auto-discovery via `httpclientauth::CreateFromOidcTokenExecutable` with `--HordeUrl` mode - New `--horde-oidctoken-exe-path` CLI option for explicit path override ### Orchestrator & Scheduler - Orchestrator generates a session ID at startup; workers include `coordinator_session` in announcements so the orchestrator can reject stale-session workers - New `Rejected` action state — when a remote runner declines at capacity, the action is rescheduled without retry count increment - Reduce scheduler lock contention: snapshot pending actions under shared lock, sort/trim outside the lock - Parallelize remote action submission across runners via `WorkerThreadPool` with slow-submit warnings - New action field `FailureReason` populated by all runner types (exit codes, sandbox failures, exceptions) - New endpoints: `session/drain`, `session/status`, `session/sunset`, `provisioner/status`, `provisioner/target` ### Remote Execution - Eager-attach mode for `RemoteHttpRunner` — bundles all attachments upfront in a `CbPackage` for single-roundtrip submits - Track in-flight submissions to prevent over-queuing - Show remote runner hostname in `GetDisplayName()` - `--announce-url` to override the endpoint announced to the coordinator (e.g. relay-visible address) ### Frontend Dashboard - Delete standalone `compute.html` (925 lines) and `orchestrator.html` (669 lines), consolidated into JS page modules - Add provisioner panel to orchestrator dashboard: target/active/estimated core counts, draining agent count - Editable target-cores input with debounced POST to `/orch/provisioner/target` - Per-agent provisioning status badges (active / draining / deallocated) in the agents table - Active vs total CPU counts in agents summary row ### CLI - New `zen compute record-start` / `record-stop` subcommands - `zen exec` progress bar with submit and completion phases, atomic work counters, `--progress` mode (Pretty/Plain/Quiet) ### Other - `DataDir` supports environment variable expansion - Worker manifest validation checks for `worker.zcb` marker to detect incomplete cached directories - Linux/Mac runners `nice(5)` child processes to avoid starving the main server - `ComputeService::SetShutdownCallback` wired to `RequestExit` via `session/sunset` - Curl HTTP client logs effective URL on failure - `MachineInfo` carries `Pool` and `Mode` from Horde response - Horde bundle creation includes `.pdb` on Windows --- src/zen/cmds/exec_cmd.cpp | 80 +++++++++++++++++++++++++++++++---------------- 1 file changed, 53 insertions(+), 27 deletions(-) (limited to 'src/zen/cmds/exec_cmd.cpp') diff --git a/src/zen/cmds/exec_cmd.cpp b/src/zen/cmds/exec_cmd.cpp index 9719fce77..89bbf3638 100644 --- a/src/zen/cmds/exec_cmd.cpp +++ b/src/zen/cmds/exec_cmd.cpp @@ -23,6 +23,8 @@ #include #include +#include "../progressbar.h" + #include #include #include @@ -124,13 +126,14 @@ struct ExecSessionConfig std::vector& FunctionList; // mutable for EmitFunctionListOnce std::string_view OrchestratorUrl; const std::filesystem::path& OutputPath; - int Offset = 0; - int Stride = 1; - int Limit = 0; - bool Verbose = false; - bool Quiet = false; - bool DumpActions = false; - bool Binary = false; + int Offset = 0; + int Stride = 1; + int Limit = 0; + bool Verbose = false; + bool Quiet = false; + bool DumpActions = false; + bool Binary = false; + ProgressBar::Mode ProgressMode = ProgressBar::Mode::PrettyScroll; }; ////////////////////////////////////////////////////////////////////////// @@ -345,8 +348,6 @@ ExecSessionRunner::DrainCompletedJobs() } m_PendingJobs.Remove(CompleteLsn); - - ZEN_CONSOLE("completed: LSN {} ({} still pending)", CompleteLsn, m_PendingJobs.GetSize()); } } } @@ -897,17 +898,20 @@ ExecSessionRunner::Run() // Then submit work items - int FailedWorkCounter = 0; - size_t RemainingWorkItems = m_Config.RecordingReader.GetActionCount(); - int SubmittedWorkItems = 0; + std::atomic FailedWorkCounter{0}; + std::atomic RemainingWorkItems{m_Config.RecordingReader.GetActionCount()}; + std::atomic SubmittedWorkItems{0}; + size_t TotalWorkItems = RemainingWorkItems.load(); - ZEN_CONSOLE("submitting {} work items", RemainingWorkItems); + ProgressBar SubmitProgress(m_Config.ProgressMode, "Submit"); + SubmitProgress.UpdateState({.Task = "Submitting work items", .TotalCount = TotalWorkItems, .RemainingCount = RemainingWorkItems.load()}, + false); int OffsetCounter = m_Config.Offset; int StrideCounter = m_Config.Stride; auto ShouldSchedule = [&]() -> bool { - if (m_Config.Limit && SubmittedWorkItems >= m_Config.Limit) + if (m_Config.Limit && SubmittedWorkItems.load() >= m_Config.Limit) { // Limit reached, ignore @@ -1005,17 +1009,14 @@ ExecSessionRunner::Run() { const int32_t LsnField = EnqueueResult.Lsn; - --RemainingWorkItems; - ++SubmittedWorkItems; + size_t Remaining = --RemainingWorkItems; + int Submitted = ++SubmittedWorkItems; - if (!m_Config.Quiet) - { - ZEN_CONSOLE("submitted work item #{} - LSN {} - {}. {} remaining", - SubmittedWorkItems, - LsnField, - NiceTimeSpanMs(SubmitTimer.GetElapsedTimeMs()), - RemainingWorkItems); - } + SubmitProgress.UpdateState({.Task = "Submitting work items", + .Details = fmt::format("#{} LSN {}", Submitted, LsnField), + .TotalCount = TotalWorkItems, + .RemainingCount = Remaining}, + false); if (!m_Config.OutputPath.empty()) { @@ -1055,22 +1056,36 @@ ExecSessionRunner::Run() }, TargetParallelism); + SubmitProgress.Finish(); + // Wait until all pending work is complete + size_t TotalPendingJobs = m_PendingJobs.GetSize(); + + ProgressBar CompletionProgress(m_Config.ProgressMode, "Execute"); + while (!m_PendingJobs.IsEmpty()) { - // TODO: improve this logic - zen::Sleep(500); + size_t PendingCount = m_PendingJobs.GetSize(); + CompletionProgress.UpdateState({.Task = "Executing work items", + .Details = fmt::format("{} completed, {} remaining", TotalPendingJobs - PendingCount, PendingCount), + .TotalCount = TotalPendingJobs, + .RemainingCount = PendingCount}, + false); + + zen::Sleep(GetUpdateDelayMS(m_Config.ProgressMode)); DrainCompletedJobs(); SendOrchestratorHeartbeat(); } + CompletionProgress.Finish(); + // Write summary files WriteSummaryFiles(); - if (FailedWorkCounter) + if (FailedWorkCounter.load()) { return 1; } @@ -1423,6 +1438,16 @@ ExecCommand::OnParentOptionsParsed(const ZenCliOptions& GlobalOptions) int ExecCommand::RunSession(zen::compute::ComputeServiceSession& ComputeSession, std::string_view OrchestratorUrl) { + ProgressBar::Mode ProgressMode = ProgressBar::Mode::Pretty; + if (m_VerboseLogging) + { + ProgressMode = ProgressBar::Mode::Plain; + } + else if (m_QuietLogging) + { + ProgressMode = ProgressBar::Mode::Quiet; + } + ExecSessionConfig Config{ .Resolver = *m_ChunkResolver, .RecordingReader = *m_RecordingReader, @@ -1437,6 +1462,7 @@ ExecCommand::RunSession(zen::compute::ComputeServiceSession& ComputeSession, std .Quiet = m_QuietLogging, .DumpActions = m_DumpActions, .Binary = m_Binary, + .ProgressMode = ProgressMode, }; ExecSessionRunner Runner(ComputeSession, Config); -- cgit v1.2.3 From 3d59b5d7036c35fe484d052ff32dbdc9d0a75cf7 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 13 Apr 2026 19:17:09 +0200 Subject: fix utf characters in source code (#953) --- src/zen/cmds/exec_cmd.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/zen/cmds/exec_cmd.cpp') diff --git a/src/zen/cmds/exec_cmd.cpp b/src/zen/cmds/exec_cmd.cpp index 89bbf3638..93bf24caa 100644 --- a/src/zen/cmds/exec_cmd.cpp +++ b/src/zen/cmds/exec_cmd.cpp @@ -116,7 +116,7 @@ namespace { } // namespace ////////////////////////////////////////////////////////////////////////// -// ExecSessionConfig — read-only configuration for a session run +// ExecSessionConfig - read-only configuration for a session run struct ExecSessionConfig { @@ -137,7 +137,7 @@ struct ExecSessionConfig }; ////////////////////////////////////////////////////////////////////////// -// ExecSessionRunner — owns per-run state, drives the session lifecycle +// ExecSessionRunner - owns per-run state, drives the session lifecycle class ExecSessionRunner { -- cgit v1.2.3 From f07e04aa501b26b96e345f2e8ac42d231a015e55 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Fri, 17 Apr 2026 15:00:01 +0200 Subject: replace pretty progress with prettyscroll implementation (#970) --- src/zen/cmds/exec_cmd.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'src/zen/cmds/exec_cmd.cpp') diff --git a/src/zen/cmds/exec_cmd.cpp b/src/zen/cmds/exec_cmd.cpp index 93bf24caa..dab53f13c 100644 --- a/src/zen/cmds/exec_cmd.cpp +++ b/src/zen/cmds/exec_cmd.cpp @@ -133,7 +133,7 @@ struct ExecSessionConfig bool Quiet = false; bool DumpActions = false; bool Binary = false; - ProgressBar::Mode ProgressMode = ProgressBar::Mode::PrettyScroll; + ProgressBar::Mode ProgressMode = ProgressBar::Mode::Pretty; }; ////////////////////////////////////////////////////////////////////////// @@ -1439,11 +1439,7 @@ int ExecCommand::RunSession(zen::compute::ComputeServiceSession& ComputeSession, std::string_view OrchestratorUrl) { ProgressBar::Mode ProgressMode = ProgressBar::Mode::Pretty; - if (m_VerboseLogging) - { - ProgressMode = ProgressBar::Mode::Plain; - } - else if (m_QuietLogging) + if (m_QuietLogging) { ProgressMode = ProgressBar::Mode::Quiet; } -- cgit v1.2.3 From c7c59cdc5a70bfd6e5f66f3b032ea3f8f6b4d12a Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 20 Apr 2026 07:27:35 +0200 Subject: builds cmd refactor (#975) - Bugfix: `builds download` partial-block fetch decisions now account for build storage host latency - Bugfix: Transfer rate displays in `builds` commands now smooth correctly - Split `buildstorageoperations.cpp` (8.5k lines) into per-operation TUs: buildinspect, buildprimecache, buildstorageresolve, buildupdatefolder, builduploadfolder, buildvalidatebuildpart; stats moved to buildstoragestats.h. - FilteredRate extracted to zenutil. - BuildsCommand shared state consolidated into a BuildsConfiguration struct; subcommands inherit from BuildsSubCmdBase holding a `const BuildsConfiguration&` instead of a `BuildsCommand&`. - `ProgressBar` renamed to `ConsoleProgressBar`; mode enum (`ConsoleProgressMode`) lifted to namespace scope; `PushLogOperation`/`PopLogOperation`/`ForceLinebreak` promoted to virtuals on `ProgressBase`. - Free-function wrappers (`UploadFolder`, `DownloadFolder`, `ValidateBuildPart`) added around the existing operation classes so callers stop reimplementing setup + stats logging. --- src/zen/cmds/exec_cmd.cpp | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) (limited to 'src/zen/cmds/exec_cmd.cpp') diff --git a/src/zen/cmds/exec_cmd.cpp b/src/zen/cmds/exec_cmd.cpp index dab53f13c..6cebfa430 100644 --- a/src/zen/cmds/exec_cmd.cpp +++ b/src/zen/cmds/exec_cmd.cpp @@ -23,7 +23,7 @@ #include #include -#include "../progressbar.h" +#include "consoleprogress.h" #include #include @@ -133,7 +133,7 @@ struct ExecSessionConfig bool Quiet = false; bool DumpActions = false; bool Binary = false; - ProgressBar::Mode ProgressMode = ProgressBar::Mode::Pretty; + ConsoleProgressMode ProgressMode = ConsoleProgressMode::Pretty; }; ////////////////////////////////////////////////////////////////////////// @@ -903,9 +903,11 @@ ExecSessionRunner::Run() std::atomic SubmittedWorkItems{0}; size_t TotalWorkItems = RemainingWorkItems.load(); - ProgressBar SubmitProgress(m_Config.ProgressMode, "Submit"); - SubmitProgress.UpdateState({.Task = "Submitting work items", .TotalCount = TotalWorkItems, .RemainingCount = RemainingWorkItems.load()}, - false); + std::unique_ptr ProgressOwner(CreateConsoleProgress(m_Config.ProgressMode)); + std::unique_ptr SubmitProgress = ProgressOwner->CreateProgressBar("Submit"); + SubmitProgress->UpdateState( + {.Task = "Submitting work items", .TotalCount = TotalWorkItems, .RemainingCount = RemainingWorkItems.load()}, + false); int OffsetCounter = m_Config.Offset; int StrideCounter = m_Config.Stride; @@ -1012,11 +1014,11 @@ ExecSessionRunner::Run() size_t Remaining = --RemainingWorkItems; int Submitted = ++SubmittedWorkItems; - SubmitProgress.UpdateState({.Task = "Submitting work items", - .Details = fmt::format("#{} LSN {}", Submitted, LsnField), - .TotalCount = TotalWorkItems, - .RemainingCount = Remaining}, - false); + SubmitProgress->UpdateState({.Task = "Submitting work items", + .Details = fmt::format("#{} LSN {}", Submitted, LsnField), + .TotalCount = TotalWorkItems, + .RemainingCount = Remaining}, + false); if (!m_Config.OutputPath.empty()) { @@ -1056,30 +1058,31 @@ ExecSessionRunner::Run() }, TargetParallelism); - SubmitProgress.Finish(); + SubmitProgress->Finish(); // Wait until all pending work is complete size_t TotalPendingJobs = m_PendingJobs.GetSize(); - ProgressBar CompletionProgress(m_Config.ProgressMode, "Execute"); + std::unique_ptr CompletionProgress = ProgressOwner->CreateProgressBar("Execute"); while (!m_PendingJobs.IsEmpty()) { size_t PendingCount = m_PendingJobs.GetSize(); - CompletionProgress.UpdateState({.Task = "Executing work items", - .Details = fmt::format("{} completed, {} remaining", TotalPendingJobs - PendingCount, PendingCount), - .TotalCount = TotalPendingJobs, - .RemainingCount = PendingCount}, - false); + CompletionProgress->UpdateState( + {.Task = "Executing work items", + .Details = fmt::format("{} completed, {} remaining", TotalPendingJobs - PendingCount, PendingCount), + .TotalCount = TotalPendingJobs, + .RemainingCount = PendingCount}, + false); - zen::Sleep(GetUpdateDelayMS(m_Config.ProgressMode)); + zen::Sleep(ProgressOwner->GetProgressUpdateDelayMS()); DrainCompletedJobs(); SendOrchestratorHeartbeat(); } - CompletionProgress.Finish(); + CompletionProgress->Finish(); // Write summary files @@ -1438,10 +1441,10 @@ ExecCommand::OnParentOptionsParsed(const ZenCliOptions& GlobalOptions) int ExecCommand::RunSession(zen::compute::ComputeServiceSession& ComputeSession, std::string_view OrchestratorUrl) { - ProgressBar::Mode ProgressMode = ProgressBar::Mode::Pretty; + ConsoleProgressMode ProgressMode = ConsoleProgressMode::Pretty; if (m_QuietLogging) { - ProgressMode = ProgressBar::Mode::Quiet; + ProgressMode = ConsoleProgressMode::Quiet; } ExecSessionConfig Config{ -- cgit v1.2.3 From 27d72af24a8de9a81500e68a0874f1430297b3bc Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Mon, 20 Apr 2026 23:52:38 +0200 Subject: Zen CLI common server interface (#920) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces a common `ZenServiceClient` RAII wrapper for zen CLI commands that interact with a zenserver instance. CLI operations (admin, builds, cache, exec, hub, info, projectstore, trace, ui, version, vfs, workspaces) automatically register sessions so they become visible in the server's session list, and forward log output to the server's session log endpoint. All session HTTP I/O (announce, remove, log batches) runs on a single background worker thread, so CLI startup and shutdown never block on server availability. ### Key changes - **`ZenServiceClient`** — new RAII class that wraps host resolution, HTTP client creation, and session lifecycle (register on connect, remove on exit). Replaces ad-hoc boilerplate across all command files that talk to a server, including the new `trace` subcommands (`start`, `stop`, `status`). - **Async session I/O** — `SessionsServiceClient` now owns a single worker thread and command queue. `Announce()`, `Remove()`, and `UpdateMetadata()` enqueue commands and return immediately. The worker creates one `HttpClient` with a 5-second total timeout, bounding any individual request. Eliminates main-thread stalls when the server is unreachable. - **Session log forwarding** — `SessionLogSink` is a thin enqueuer that posts log messages to the same worker queue (no separate thread or HTTP client). Log levels are serialized as integers; the server-side ingest handles both string and integer formats for backwards compatibility, with bounds checking on integer values. - **Build & projectstore session registration** — Long-running `builds` and projectstore cache (oplog-download) connections register sessions too, making them visible alongside regular CLI command sessions. ### Cleanup - Extract `SetupCacheSession` helper on `StorageInstance` to reduce duplication. - Remove unused `HttpClient` reference in ui command. --- src/zen/cmds/exec_cmd.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/zen/cmds/exec_cmd.cpp') diff --git a/src/zen/cmds/exec_cmd.cpp b/src/zen/cmds/exec_cmd.cpp index 6cebfa430..60968b521 100644 --- a/src/zen/cmds/exec_cmd.cpp +++ b/src/zen/cmds/exec_cmd.cpp @@ -2,6 +2,8 @@ #include "exec_cmd.h" +#include "zenserviceclient.h" + #include #include #include @@ -1107,7 +1109,7 @@ ExecHttpSubCmd::ExecHttpSubCmd(ExecCommand& Parent) : ZenSubCmdBase("http", "For void ExecHttpSubCmd::Run(const ZenCliOptions& /*GlobalOptions*/) { - m_HostName = ZenCmdBase::ResolveTargetHostSpec(m_HostName); + ZenServiceClient Service({.HostSpec = m_HostName, .CommandName = ExecCommand::Name}); ZEN_ASSERT(m_Parent.m_ChunkResolver); ChunkResolver& Resolver = *m_Parent.m_ChunkResolver; @@ -1115,7 +1117,7 @@ ExecHttpSubCmd::Run(const ZenCliOptions& /*GlobalOptions*/) std::filesystem::path TempPath = std::filesystem::absolute(".zen_temp"); zen::compute::ComputeServiceSession ComputeSession(Resolver); - ComputeSession.AddRemoteRunner(Resolver, TempPath, m_HostName); + ComputeSession.AddRemoteRunner(Resolver, TempPath, Service.HostSpec()); Stopwatch ExecTimer; int ReturnValue = m_Parent.RunSession(ComputeSession); -- cgit v1.2.3