aboutsummaryrefslogtreecommitdiff
path: root/src/zenhttp
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2026-04-13 16:38:16 +0200
committerGitHub Enterprise <[email protected]>2026-04-13 16:38:16 +0200
commit795345e5fd7974a1f5227d507a58bb3ed75eafd5 (patch)
tree7a0f142bf562c3590400586c82b0e7a1b5ad6493 /src/zenhttp
parent5.8.4-pre2 (diff)
downloadzen-795345e5fd7974a1f5227d507a58bb3ed75eafd5.tar.xz
zen-795345e5fd7974a1f5227d507a58bb3ed75eafd5.zip
Compute OIDC auth, async Horde agents, and orchestrator improvements (#913)
Rework of the Horde agent subsystem from synchronous per-thread I/O to an async ASIO-driven architecture, plus provisioner scale-down with graceful draining, OIDC authentication, scheduler improvements, and dashboard UI for provisioner control. ### Async Horde Agent Rewrite - Replace synchronous `HordeAgent` (one thread per agent, blocking I/O) with `AsyncHordeAgent` — an ASIO state machine running on a shared `io_context` thread pool - Replace `TcpComputeTransport`/`AesComputeTransport` with `AsyncTcpComputeTransport`/`AsyncAesComputeTransport` - Replace `AgentMessageChannel` with `AsyncAgentMessageChannel` using frame queuing and ASIO timers - Delete `ComputeBuffer` and `ComputeChannel` ring-buffer classes (no longer needed) ### Provisioner Drain / Scale-Down - `HordeProvisioner` can now drain agents when target core count is lowered: queries each agent's `/compute/session/status` for workload, selects candidates by largest-fit/lowest-workload, and sends `/compute/session/drain` - Configurable `--horde-drain-grace-period` (default 300s) before force-kill - Implement `IProvisionerStateProvider` interface to expose provisioner state to the orchestrator HTTP layer - Forward `--coordinator-session`, `--provision-clean`, and `--provision-tracehost` through both Horde and Nomad provisioners to spawned workers ### OIDC Authentication - `HordeClient` accepts an `AccessTokenProvider` (refreshable token function) as alternative to static `--horde-token` - Wire up `OidcToken.exe` auto-discovery via `httpclientauth::CreateFromOidcTokenExecutable` with `--HordeUrl` mode - New `--horde-oidctoken-exe-path` CLI option for explicit path override ### Orchestrator & Scheduler - Orchestrator generates a session ID at startup; workers include `coordinator_session` in announcements so the orchestrator can reject stale-session workers - New `Rejected` action state — when a remote runner declines at capacity, the action is rescheduled without retry count increment - Reduce scheduler lock contention: snapshot pending actions under shared lock, sort/trim outside the lock - Parallelize remote action submission across runners via `WorkerThreadPool` with slow-submit warnings - New action field `FailureReason` populated by all runner types (exit codes, sandbox failures, exceptions) - New endpoints: `session/drain`, `session/status`, `session/sunset`, `provisioner/status`, `provisioner/target` ### Remote Execution - Eager-attach mode for `RemoteHttpRunner` — bundles all attachments upfront in a `CbPackage` for single-roundtrip submits - Track in-flight submissions to prevent over-queuing - Show remote runner hostname in `GetDisplayName()` - `--announce-url` to override the endpoint announced to the coordinator (e.g. relay-visible address) ### Frontend Dashboard - Delete standalone `compute.html` (925 lines) and `orchestrator.html` (669 lines), consolidated into JS page modules - Add provisioner panel to orchestrator dashboard: target/active/estimated core counts, draining agent count - Editable target-cores input with debounced POST to `/orch/provisioner/target` - Per-agent provisioning status badges (active / draining / deallocated) in the agents table - Active vs total CPU counts in agents summary row ### CLI - New `zen compute record-start` / `record-stop` subcommands - `zen exec` progress bar with submit and completion phases, atomic work counters, `--progress` mode (Pretty/Plain/Quiet) ### Other - `DataDir` supports environment variable expansion - Worker manifest validation checks for `worker.zcb` marker to detect incomplete cached directories - Linux/Mac runners `nice(5)` child processes to avoid starving the main server - `ComputeService::SetShutdownCallback` wired to `RequestExit` via `session/sunset` - Curl HTTP client logs effective URL on failure - `MachineInfo` carries `Pool` and `Mode` from Horde response - Horde bundle creation includes `.pdb` on Windows
Diffstat (limited to 'src/zenhttp')
-rw-r--r--src/zenhttp/clients/httpclientcurl.cpp10
-rw-r--r--src/zenhttp/clients/httpclientcurl.h1
-rw-r--r--src/zenhttp/httpclientauth.cpp18
-rw-r--r--src/zenhttp/include/zenhttp/httpclientauth.h3
4 files changed, 23 insertions, 9 deletions
diff --git a/src/zenhttp/clients/httpclientcurl.cpp b/src/zenhttp/clients/httpclientcurl.cpp
index 446dd80be..56b9c39c5 100644
--- a/src/zenhttp/clients/httpclientcurl.cpp
+++ b/src/zenhttp/clients/httpclientcurl.cpp
@@ -228,6 +228,13 @@ CurlHttpClient::Session::Perform()
curl_easy_getinfo(Handle, CURLINFO_SIZE_DOWNLOAD_T, &DownBytes);
Result.DownloadedBytes = static_cast<int64_t>(DownBytes);
+ char* EffectiveUrl = nullptr;
+ curl_easy_getinfo(Handle, CURLINFO_EFFECTIVE_URL, &EffectiveUrl);
+ if (EffectiveUrl)
+ {
+ Result.Url = EffectiveUrl;
+ }
+
return Result;
}
@@ -294,8 +301,9 @@ CurlHttpClient::CommonResponse(std::string_view SessionId,
if (Result.ErrorCode != CURLE_OPERATION_TIMEDOUT && Result.ErrorCode != CURLE_COULDNT_CONNECT &&
Result.ErrorCode != CURLE_ABORTED_BY_CALLBACK)
{
- ZEN_WARN("HttpClient client failure (session: {}): ({}) '{}'",
+ ZEN_WARN("HttpClient client failure (session: {}, url: {}): ({}) '{}'",
SessionId,
+ Result.Url,
static_cast<int>(Result.ErrorCode),
Result.ErrorMessage);
}
diff --git a/src/zenhttp/clients/httpclientcurl.h b/src/zenhttp/clients/httpclientcurl.h
index bdeb46633..ea9193e65 100644
--- a/src/zenhttp/clients/httpclientcurl.h
+++ b/src/zenhttp/clients/httpclientcurl.h
@@ -73,6 +73,7 @@ private:
int64_t DownloadedBytes = 0;
CURLcode ErrorCode = CURLE_OK;
std::string ErrorMessage;
+ std::string Url;
};
struct Session
diff --git a/src/zenhttp/httpclientauth.cpp b/src/zenhttp/httpclientauth.cpp
index c42841922..0432e50ef 100644
--- a/src/zenhttp/httpclientauth.cpp
+++ b/src/zenhttp/httpclientauth.cpp
@@ -94,7 +94,8 @@ namespace zen { namespace httpclientauth {
std::string_view CloudHost,
bool Unattended,
bool Quiet,
- bool Hidden)
+ bool Hidden,
+ bool IsHordeUrl)
{
Stopwatch Timer;
@@ -117,8 +118,9 @@ namespace zen { namespace httpclientauth {
}
});
- const std::string ProcArgs = fmt::format("{} --AuthConfigUrl {} --OutFile {} --Unattended={}",
+ const std::string ProcArgs = fmt::format("{} {} {} --OutFile {} --Unattended={}",
OidcExecutablePath,
+ IsHordeUrl ? "--HordeUrl" : "--AuthConfigUrl",
CloudHost,
AuthTokenPath,
Unattended ? "true"sv : "false"sv);
@@ -193,7 +195,7 @@ namespace zen { namespace httpclientauth {
}
else
{
- ZEN_WARN("Failed running {} to get auth token, error code {}", OidcExecutablePath, ExitCode);
+ ZEN_WARN("Failed running '{}' to get auth token, error code {}", ProcArgs, ExitCode);
}
return HttpClientAccessToken{};
}
@@ -202,9 +204,10 @@ namespace zen { namespace httpclientauth {
std::string_view CloudHost,
bool Quiet,
bool Unattended,
- bool Hidden)
+ bool Hidden,
+ bool IsHordeUrl)
{
- HttpClientAccessToken InitialToken = GetOidcTokenFromExe(OidcExecutablePath, CloudHost, Unattended, Quiet, Hidden);
+ HttpClientAccessToken InitialToken = GetOidcTokenFromExe(OidcExecutablePath, CloudHost, Unattended, Quiet, Hidden, IsHordeUrl);
if (InitialToken.IsValid())
{
return [OidcExecutablePath = std::filesystem::path(OidcExecutablePath),
@@ -212,12 +215,13 @@ namespace zen { namespace httpclientauth {
Token = InitialToken,
Quiet,
Unattended,
- Hidden]() mutable {
+ Hidden,
+ IsHordeUrl]() mutable {
if (!Token.NeedsRefresh())
{
return std::move(Token);
}
- return GetOidcTokenFromExe(OidcExecutablePath, CloudHost, Unattended, Quiet, Hidden);
+ return GetOidcTokenFromExe(OidcExecutablePath, CloudHost, Unattended, Quiet, Hidden, IsHordeUrl);
};
}
return {};
diff --git a/src/zenhttp/include/zenhttp/httpclientauth.h b/src/zenhttp/include/zenhttp/httpclientauth.h
index ce646ebd7..9220a50b6 100644
--- a/src/zenhttp/include/zenhttp/httpclientauth.h
+++ b/src/zenhttp/include/zenhttp/httpclientauth.h
@@ -33,7 +33,8 @@ namespace httpclientauth {
std::string_view CloudHost,
bool Quiet,
bool Unattended,
- bool Hidden);
+ bool Hidden,
+ bool IsHordeUrl = false);
} // namespace httpclientauth
} // namespace zen