From 9e6999f53c91ec44d04ef6685dd97800e1a66306 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Fri, 10 Apr 2026 16:59:16 +0200 Subject: reduce test runtime (#933) * reduce zenserver spawns in tests * fix filesystemutils wrong test suite name * tweak tests for faster runtime * reduce more test runtime * more wall time improvements * fast http and processmanager tests --- src/zenserver/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/zenserver/main.cpp') diff --git a/src/zenserver/main.cpp b/src/zenserver/main.cpp index 00b7a67d7..c5f8724ca 100644 --- a/src/zenserver/main.cpp +++ b/src/zenserver/main.cpp @@ -250,7 +250,7 @@ test_main(int argc, char** argv) zen::MaximizeOpenFileCount(); zen::testing::TestRunner Runner; - Runner.ApplyCommandLine(argc, argv); + Runner.ApplyCommandLine(argc, argv, "server.*"); return Runner.Run(); } #endif -- cgit v1.2.3 From 795345e5fd7974a1f5227d507a58bb3ed75eafd5 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Mon, 13 Apr 2026 16:38:16 +0200 Subject: Compute OIDC auth, async Horde agents, and orchestrator improvements (#913) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rework of the Horde agent subsystem from synchronous per-thread I/O to an async ASIO-driven architecture, plus provisioner scale-down with graceful draining, OIDC authentication, scheduler improvements, and dashboard UI for provisioner control. ### Async Horde Agent Rewrite - Replace synchronous `HordeAgent` (one thread per agent, blocking I/O) with `AsyncHordeAgent` — an ASIO state machine running on a shared `io_context` thread pool - Replace `TcpComputeTransport`/`AesComputeTransport` with `AsyncTcpComputeTransport`/`AsyncAesComputeTransport` - Replace `AgentMessageChannel` with `AsyncAgentMessageChannel` using frame queuing and ASIO timers - Delete `ComputeBuffer` and `ComputeChannel` ring-buffer classes (no longer needed) ### Provisioner Drain / Scale-Down - `HordeProvisioner` can now drain agents when target core count is lowered: queries each agent's `/compute/session/status` for workload, selects candidates by largest-fit/lowest-workload, and sends `/compute/session/drain` - Configurable `--horde-drain-grace-period` (default 300s) before force-kill - Implement `IProvisionerStateProvider` interface to expose provisioner state to the orchestrator HTTP layer - Forward `--coordinator-session`, `--provision-clean`, and `--provision-tracehost` through both Horde and Nomad provisioners to spawned workers ### OIDC Authentication - `HordeClient` accepts an `AccessTokenProvider` (refreshable token function) as alternative to static `--horde-token` - Wire up `OidcToken.exe` auto-discovery via `httpclientauth::CreateFromOidcTokenExecutable` with `--HordeUrl` mode - New `--horde-oidctoken-exe-path` CLI option for explicit path override ### Orchestrator & Scheduler - Orchestrator generates a session ID at startup; workers include `coordinator_session` in announcements so the orchestrator can reject stale-session workers - New `Rejected` action state — when a remote runner declines at capacity, the action is rescheduled without retry count increment - Reduce scheduler lock contention: snapshot pending actions under shared lock, sort/trim outside the lock - Parallelize remote action submission across runners via `WorkerThreadPool` with slow-submit warnings - New action field `FailureReason` populated by all runner types (exit codes, sandbox failures, exceptions) - New endpoints: `session/drain`, `session/status`, `session/sunset`, `provisioner/status`, `provisioner/target` ### Remote Execution - Eager-attach mode for `RemoteHttpRunner` — bundles all attachments upfront in a `CbPackage` for single-roundtrip submits - Track in-flight submissions to prevent over-queuing - Show remote runner hostname in `GetDisplayName()` - `--announce-url` to override the endpoint announced to the coordinator (e.g. relay-visible address) ### Frontend Dashboard - Delete standalone `compute.html` (925 lines) and `orchestrator.html` (669 lines), consolidated into JS page modules - Add provisioner panel to orchestrator dashboard: target/active/estimated core counts, draining agent count - Editable target-cores input with debounced POST to `/orch/provisioner/target` - Per-agent provisioning status badges (active / draining / deallocated) in the agents table - Active vs total CPU counts in agents summary row ### CLI - New `zen compute record-start` / `record-stop` subcommands - `zen exec` progress bar with submit and completion phases, atomic work counters, `--progress` mode (Pretty/Plain/Quiet) ### Other - `DataDir` supports environment variable expansion - Worker manifest validation checks for `worker.zcb` marker to detect incomplete cached directories - Linux/Mac runners `nice(5)` child processes to avoid starving the main server - `ComputeService::SetShutdownCallback` wired to `RequestExit` via `session/sunset` - Curl HTTP client logs effective URL on failure - `MachineInfo` carries `Pool` and `Mode` from Horde response - Horde bundle creation includes `.pdb` on Windows --- src/zenserver/main.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src/zenserver/main.cpp') diff --git a/src/zenserver/main.cpp b/src/zenserver/main.cpp index c5f8724ca..108685eb9 100644 --- a/src/zenserver/main.cpp +++ b/src/zenserver/main.cpp @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -169,7 +168,12 @@ AppMain(int argc, char* argv[]) if (IsDir(ServerOptions.DataDir)) { ZEN_CONSOLE_INFO("Deleting files from '{}' ({})", ServerOptions.DataDir, DeleteReason); - DeleteDirectories(ServerOptions.DataDir); + std::error_code Ec; + DeleteDirectories(ServerOptions.DataDir, Ec); + if (Ec) + { + ZEN_WARN("could not fully clean '{}': {} (continuing anyway)", ServerOptions.DataDir, Ec.message()); + } } } -- cgit v1.2.3 From 28a61b12d302e9e0d37d52bf1aa5d19069f3411b Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 20 Apr 2026 15:53:22 +0200 Subject: zen history command (#987) - Feature: Per-user invocation history for `zen` and `zenserver`; each startup appends a record to a JSONL file capped at the most recent 100 entries. Location: `%LOCALAPPDATA%\Epic\Zen\History\invocations.jsonl` on Windows, `~/.zen/History/invocations.jsonl` on POSIX - `zen history` opens an interactive picker; selecting a zen row re-runs it inline and forwards the exit code, selecting a zenserver row spawns it detached - `zen history --list` (`-l`) prints the table to stdout instead of showing the picker - `zen history --filter zen|zenserver` restricts the listing to one executable - `zen history --print` prints the reconstructed command line of the selected row instead of launching it - `--enable-execution-history` global option on both binaries (default `true`) to opt out per invocation - The history file is attached to Sentry crash reports (alongside the existing zenserver log) --- src/zenserver/main.cpp | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'src/zenserver/main.cpp') diff --git a/src/zenserver/main.cpp b/src/zenserver/main.cpp index 108685eb9..d74d3956c 100644 --- a/src/zenserver/main.cpp +++ b/src/zenserver/main.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "diag/logging.h" @@ -264,6 +265,41 @@ main(int argc, char* argv[]) { zen::InstallCrashHandler(); + { + std::string_view ModeStr = "store"; + if (argc >= 2 && argv[1] != nullptr) + { + std::string_view A1 = argv[1]; + if (!A1.empty() && A1[0] != '-') + { + if (A1 == "hub") + { + ModeStr = "hub"; + } + else if (A1 == "store" || A1 == "storage") + { + ModeStr = "store"; + } + else if (A1 == "compute") + { + ModeStr = "compute"; + } + else if (A1 == "proxy") + { + ModeStr = "proxy"; + } + else if (A1 == "test") + { + ModeStr = "test"; + } + } + } + if (ModeStr != "test") + { + zen::LogInvocation("zenserver", ModeStr, argc, argv); + } + } + #if ZEN_PLATFORM_WINDOWS setlocale(LC_ALL, "en_us.UTF8"); #endif // ZEN_PLATFORM_WINDOWS -- cgit v1.2.3