diff options
| author | Stefan Boberg <[email protected]> | 2026-03-09 17:43:08 +0100 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2026-03-09 17:43:08 +0100 |
| commit | b37b34ea6ad906f54e8104526e77ba66aed997da (patch) | |
| tree | e80ce17d666aff6d2f0d73d4977128ffb4055476 /src/zenserver/diag | |
| parent | add fallback for zencache multirange (#816) (diff) | |
| download | zen-b37b34ea6ad906f54e8104526e77ba66aed997da.tar.xz zen-b37b34ea6ad906f54e8104526e77ba66aed997da.zip | |
Dashboard overhaul, compute integration (#814)
- **Frontend dashboard overhaul**: Unified compute/main dashboards into a single shared UI. Added new pages for cache, projects, metrics, sessions, info (build/runtime config, system stats). Added live-update via WebSockets with pause control, sortable detail tables, themed styling. Refactored compute/hub/orchestrator pages into modular JS.
- **HTTP server fixes and stats**: Fixed http.sys local-only fallback when default port is in use, implemented root endpoint redirect for http.sys, fixed Linux/Mac port reuse. Added /stats endpoint exposing HTTP server metrics (bytes transferred, request rates). Added WebSocket stats tracking.
- **OTEL/diagnostics hardening**: Improved OTLP HTTP exporter with better error handling and resilience. Extended diagnostics services configuration.
- **Session management**: Added new sessions service with HTTP endpoints for registering, updating, querying, and removing sessions. Includes session log file support. This is still WIP.
- **CLI subcommand support**: Added support for commands with subcommands in the zen CLI tool, with improved command dispatch.
- **Misc**: Exposed CPU usage/hostname to frontend, fixed JS compact binary float32/float64 decoding, limited projects displayed on front page to 25 sorted by last access, added vscode:// link support.
Also contains some fixes from TSAN analysis.
Diffstat (limited to 'src/zenserver/diag')
| -rw-r--r-- | src/zenserver/diag/diagsvcs.cpp | 31 | ||||
| -rw-r--r-- | src/zenserver/diag/diagsvcs.h | 15 | ||||
| -rw-r--r-- | src/zenserver/diag/otlphttp.cpp | 59 | ||||
| -rw-r--r-- | src/zenserver/diag/otlphttp.h | 13 |
4 files changed, 99 insertions, 19 deletions
diff --git a/src/zenserver/diag/diagsvcs.cpp b/src/zenserver/diag/diagsvcs.cpp index 5fa81ff9f..dd4b8956c 100644 --- a/src/zenserver/diag/diagsvcs.cpp +++ b/src/zenserver/diag/diagsvcs.cpp @@ -9,6 +9,7 @@ #include <zencore/logging.h> #include <zencore/memory/llm.h> #include <zencore/string.h> +#include <zencore/system.h> #include <fstream> #include <sstream> @@ -51,6 +52,36 @@ HttpHealthService::HttpHealthService() Writer << "AbsLogPath"sv << m_HealthInfo.AbsLogPath.string(); Writer << "BuildVersion"sv << m_HealthInfo.BuildVersion; Writer << "HttpServerClass"sv << m_HealthInfo.HttpServerClass; + Writer << "Port"sv << m_HealthInfo.Port; + Writer << "Pid"sv << m_HealthInfo.Pid; + Writer << "IsDedicated"sv << m_HealthInfo.IsDedicated; + Writer << "StartTimeMs"sv << m_HealthInfo.StartTimeMs; + } + + Writer.BeginObject("RuntimeConfig"sv); + for (const auto& Opt : m_HealthInfo.RuntimeConfig) + { + Writer << Opt.first << Opt.second; + } + Writer.EndObject(); + + Writer.BeginObject("BuildConfig"sv); + for (const auto& Opt : m_HealthInfo.BuildOptions) + { + Writer << Opt.first << Opt.second; + } + Writer.EndObject(); + + Writer << "Hostname"sv << GetMachineName(); + Writer << "Platform"sv << GetRuntimePlatformName(); + Writer << "Arch"sv << GetCpuName(); + Writer << "OS"sv << GetOperatingSystemVersion(); + + { + auto Metrics = GetSystemMetrics(); + Writer.BeginObject("System"sv); + Describe(Metrics, Writer); + Writer.EndObject(); } HttpReq.WriteResponse(HttpResponseCode::OK, Writer.Save()); diff --git a/src/zenserver/diag/diagsvcs.h b/src/zenserver/diag/diagsvcs.h index 8cc869c83..87ce80b3c 100644 --- a/src/zenserver/diag/diagsvcs.h +++ b/src/zenserver/diag/diagsvcs.h @@ -6,6 +6,7 @@ #include <zenhttp/httpserver.h> #include <filesystem> +#include <vector> ////////////////////////////////////////////////////////////////////////// @@ -89,10 +90,16 @@ private: struct HealthServiceInfo { - std::filesystem::path DataRoot; - std::filesystem::path AbsLogPath; - std::string HttpServerClass; - std::string BuildVersion; + std::filesystem::path DataRoot; + std::filesystem::path AbsLogPath; + std::string HttpServerClass; + std::string BuildVersion; + int Port = 0; + int Pid = 0; + bool IsDedicated = false; + int64_t StartTimeMs = 0; + std::vector<std::pair<std::string_view, bool>> BuildOptions; + std::vector<std::pair<std::string_view, std::string>> RuntimeConfig; }; /** Health monitoring endpoint diff --git a/src/zenserver/diag/otlphttp.cpp b/src/zenserver/diag/otlphttp.cpp index 1434c9331..d6e24cbe3 100644 --- a/src/zenserver/diag/otlphttp.cpp +++ b/src/zenserver/diag/otlphttp.cpp @@ -10,11 +10,18 @@ #include <protozero/buffer_string.hpp> #include <protozero/pbf_builder.hpp> +#include <cstdio> + #if ZEN_WITH_OTEL namespace zen::logging { ////////////////////////////////////////////////////////////////////////// +// +// Important note: in general we cannot use ZEN_WARN/ZEN_ERROR etc in this +// file as it could cause recursive logging calls when we attempt to log +// errors from the OTLP HTTP client itself. +// OtelHttpProtobufSink::OtelHttpProtobufSink(const std::string_view& Uri) : m_OtelHttp(Uri) { @@ -36,14 +43,44 @@ OtelHttpProtobufSink::~OtelHttpProtobufSink() } void +OtelHttpProtobufSink::CheckPostResult(const HttpClient::Response& Result, const char* Endpoint) noexcept +{ + if (!Result.IsSuccess()) + { + uint32_t PrevFailures = m_ConsecutivePostFailures.fetch_add(1); + if (PrevFailures < kMaxReportedFailures) + { + fprintf(stderr, "OtelHttpProtobufSink: %s\n", Result.ErrorMessage(Endpoint).c_str()); + if (PrevFailures + 1 == kMaxReportedFailures) + { + fprintf(stderr, "OtelHttpProtobufSink: suppressing further export errors\n"); + } + } + } + else + { + m_ConsecutivePostFailures.store(0); + } +} + +void OtelHttpProtobufSink::RecordSpans(zen::otel::TraceId Trace, std::span<const zen::otel::Span*> Spans) { - std::string Data = m_Encoder.FormatOtelTrace(Trace, Spans); + try + { + std::string Data = m_Encoder.FormatOtelTrace(Trace, Spans); + + IoBuffer Payload{IoBuffer::Wrap, Data.data(), Data.size()}; + Payload.SetContentType(ZenContentType::kProtobuf); - IoBuffer Payload{IoBuffer::Wrap, Data.data(), Data.size()}; - Payload.SetContentType(ZenContentType::kProtobuf); + HttpClient::Response Result = m_OtelHttp.Post("/v1/traces", Payload); - auto Result = m_OtelHttp.Post("/v1/traces", Payload); + CheckPostResult(Result, "POST /v1/traces"); + } + catch (const std::exception& Ex) + { + fprintf(stderr, "OtelHttpProtobufSink: exception exporting traces: %s\n", Ex.what()); + } } void @@ -55,22 +92,20 @@ OtelHttpProtobufSink::TraceRecorder::RecordSpans(zen::otel::TraceId Trace, std:: void OtelHttpProtobufSink::Log(const LogMessage& Msg) { + try { std::string Data = m_Encoder.FormatOtelProtobuf(Msg); IoBuffer Payload{IoBuffer::Wrap, Data.data(), Data.size()}; Payload.SetContentType(ZenContentType::kProtobuf); - auto Result = m_OtelHttp.Post("/v1/logs", Payload); - } + HttpClient::Response Result = m_OtelHttp.Post("/v1/logs", Payload); + CheckPostResult(Result, "POST /v1/logs"); + } + catch (const std::exception& Ex) { - std::string Data = m_Encoder.FormatOtelMetrics(); - - IoBuffer Payload{IoBuffer::Wrap, Data.data(), Data.size()}; - Payload.SetContentType(ZenContentType::kProtobuf); - - auto Result = m_OtelHttp.Post("/v1/metrics", Payload); + fprintf(stderr, "OtelHttpProtobufSink: exception exporting logs: %s\n", Ex.what()); } } void diff --git a/src/zenserver/diag/otlphttp.h b/src/zenserver/diag/otlphttp.h index 8254af04d..64b3dbc87 100644 --- a/src/zenserver/diag/otlphttp.h +++ b/src/zenserver/diag/otlphttp.h @@ -9,6 +9,8 @@ #include <zentelemetry/otlpencoder.h> #include <zentelemetry/otlptrace.h> +#include <atomic> + #if ZEN_WITH_OTEL namespace zen::logging { @@ -36,6 +38,7 @@ private: virtual void SetFormatter(std::unique_ptr<Formatter>) override {} void RecordSpans(zen::otel::TraceId Trace, std::span<const zen::otel::Span*> Spans); + void CheckPostResult(const HttpClient::Response& Result, const char* Endpoint) noexcept; // This is just a thin wrapper to call back into the sink while participating in // reference counting from the OTEL trace back-end @@ -53,9 +56,13 @@ private: OtelHttpProtobufSink* m_Sink; }; - HttpClient m_OtelHttp; - OtlpEncoder m_Encoder; - Ref<TraceRecorder> m_TraceRecorder; + static constexpr uint32_t kMaxReportedFailures = 5; + + RwLock m_Lock; + std::atomic<uint32_t> m_ConsecutivePostFailures{0}; + HttpClient m_OtelHttp; + OtlpEncoder m_Encoder; + Ref<TraceRecorder> m_TraceRecorder; }; } // namespace zen::logging |