aboutsummaryrefslogtreecommitdiff
path: root/src/zenserver/diag
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2026-03-09 17:43:08 +0100
committerGitHub Enterprise <[email protected]>2026-03-09 17:43:08 +0100
commitb37b34ea6ad906f54e8104526e77ba66aed997da (patch)
treee80ce17d666aff6d2f0d73d4977128ffb4055476 /src/zenserver/diag
parentadd fallback for zencache multirange (#816) (diff)
downloadzen-b37b34ea6ad906f54e8104526e77ba66aed997da.tar.xz
zen-b37b34ea6ad906f54e8104526e77ba66aed997da.zip
Dashboard overhaul, compute integration (#814)
- **Frontend dashboard overhaul**: Unified compute/main dashboards into a single shared UI. Added new pages for cache, projects, metrics, sessions, info (build/runtime config, system stats). Added live-update via WebSockets with pause control, sortable detail tables, themed styling. Refactored compute/hub/orchestrator pages into modular JS. - **HTTP server fixes and stats**: Fixed http.sys local-only fallback when default port is in use, implemented root endpoint redirect for http.sys, fixed Linux/Mac port reuse. Added /stats endpoint exposing HTTP server metrics (bytes transferred, request rates). Added WebSocket stats tracking. - **OTEL/diagnostics hardening**: Improved OTLP HTTP exporter with better error handling and resilience. Extended diagnostics services configuration. - **Session management**: Added new sessions service with HTTP endpoints for registering, updating, querying, and removing sessions. Includes session log file support. This is still WIP. - **CLI subcommand support**: Added support for commands with subcommands in the zen CLI tool, with improved command dispatch. - **Misc**: Exposed CPU usage/hostname to frontend, fixed JS compact binary float32/float64 decoding, limited projects displayed on front page to 25 sorted by last access, added vscode:// link support. Also contains some fixes from TSAN analysis.
Diffstat (limited to 'src/zenserver/diag')
-rw-r--r--src/zenserver/diag/diagsvcs.cpp31
-rw-r--r--src/zenserver/diag/diagsvcs.h15
-rw-r--r--src/zenserver/diag/otlphttp.cpp59
-rw-r--r--src/zenserver/diag/otlphttp.h13
4 files changed, 99 insertions, 19 deletions
diff --git a/src/zenserver/diag/diagsvcs.cpp b/src/zenserver/diag/diagsvcs.cpp
index 5fa81ff9f..dd4b8956c 100644
--- a/src/zenserver/diag/diagsvcs.cpp
+++ b/src/zenserver/diag/diagsvcs.cpp
@@ -9,6 +9,7 @@
#include <zencore/logging.h>
#include <zencore/memory/llm.h>
#include <zencore/string.h>
+#include <zencore/system.h>
#include <fstream>
#include <sstream>
@@ -51,6 +52,36 @@ HttpHealthService::HttpHealthService()
Writer << "AbsLogPath"sv << m_HealthInfo.AbsLogPath.string();
Writer << "BuildVersion"sv << m_HealthInfo.BuildVersion;
Writer << "HttpServerClass"sv << m_HealthInfo.HttpServerClass;
+ Writer << "Port"sv << m_HealthInfo.Port;
+ Writer << "Pid"sv << m_HealthInfo.Pid;
+ Writer << "IsDedicated"sv << m_HealthInfo.IsDedicated;
+ Writer << "StartTimeMs"sv << m_HealthInfo.StartTimeMs;
+ }
+
+ Writer.BeginObject("RuntimeConfig"sv);
+ for (const auto& Opt : m_HealthInfo.RuntimeConfig)
+ {
+ Writer << Opt.first << Opt.second;
+ }
+ Writer.EndObject();
+
+ Writer.BeginObject("BuildConfig"sv);
+ for (const auto& Opt : m_HealthInfo.BuildOptions)
+ {
+ Writer << Opt.first << Opt.second;
+ }
+ Writer.EndObject();
+
+ Writer << "Hostname"sv << GetMachineName();
+ Writer << "Platform"sv << GetRuntimePlatformName();
+ Writer << "Arch"sv << GetCpuName();
+ Writer << "OS"sv << GetOperatingSystemVersion();
+
+ {
+ auto Metrics = GetSystemMetrics();
+ Writer.BeginObject("System"sv);
+ Describe(Metrics, Writer);
+ Writer.EndObject();
}
HttpReq.WriteResponse(HttpResponseCode::OK, Writer.Save());
diff --git a/src/zenserver/diag/diagsvcs.h b/src/zenserver/diag/diagsvcs.h
index 8cc869c83..87ce80b3c 100644
--- a/src/zenserver/diag/diagsvcs.h
+++ b/src/zenserver/diag/diagsvcs.h
@@ -6,6 +6,7 @@
#include <zenhttp/httpserver.h>
#include <filesystem>
+#include <vector>
//////////////////////////////////////////////////////////////////////////
@@ -89,10 +90,16 @@ private:
struct HealthServiceInfo
{
- std::filesystem::path DataRoot;
- std::filesystem::path AbsLogPath;
- std::string HttpServerClass;
- std::string BuildVersion;
+ std::filesystem::path DataRoot;
+ std::filesystem::path AbsLogPath;
+ std::string HttpServerClass;
+ std::string BuildVersion;
+ int Port = 0;
+ int Pid = 0;
+ bool IsDedicated = false;
+ int64_t StartTimeMs = 0;
+ std::vector<std::pair<std::string_view, bool>> BuildOptions;
+ std::vector<std::pair<std::string_view, std::string>> RuntimeConfig;
};
/** Health monitoring endpoint
diff --git a/src/zenserver/diag/otlphttp.cpp b/src/zenserver/diag/otlphttp.cpp
index 1434c9331..d6e24cbe3 100644
--- a/src/zenserver/diag/otlphttp.cpp
+++ b/src/zenserver/diag/otlphttp.cpp
@@ -10,11 +10,18 @@
#include <protozero/buffer_string.hpp>
#include <protozero/pbf_builder.hpp>
+#include <cstdio>
+
#if ZEN_WITH_OTEL
namespace zen::logging {
//////////////////////////////////////////////////////////////////////////
+//
+// Important note: in general we cannot use ZEN_WARN/ZEN_ERROR etc in this
+// file as it could cause recursive logging calls when we attempt to log
+// errors from the OTLP HTTP client itself.
+//
OtelHttpProtobufSink::OtelHttpProtobufSink(const std::string_view& Uri) : m_OtelHttp(Uri)
{
@@ -36,14 +43,44 @@ OtelHttpProtobufSink::~OtelHttpProtobufSink()
}
void
+OtelHttpProtobufSink::CheckPostResult(const HttpClient::Response& Result, const char* Endpoint) noexcept
+{
+ if (!Result.IsSuccess())
+ {
+ uint32_t PrevFailures = m_ConsecutivePostFailures.fetch_add(1);
+ if (PrevFailures < kMaxReportedFailures)
+ {
+ fprintf(stderr, "OtelHttpProtobufSink: %s\n", Result.ErrorMessage(Endpoint).c_str());
+ if (PrevFailures + 1 == kMaxReportedFailures)
+ {
+ fprintf(stderr, "OtelHttpProtobufSink: suppressing further export errors\n");
+ }
+ }
+ }
+ else
+ {
+ m_ConsecutivePostFailures.store(0);
+ }
+}
+
+void
OtelHttpProtobufSink::RecordSpans(zen::otel::TraceId Trace, std::span<const zen::otel::Span*> Spans)
{
- std::string Data = m_Encoder.FormatOtelTrace(Trace, Spans);
+ try
+ {
+ std::string Data = m_Encoder.FormatOtelTrace(Trace, Spans);
+
+ IoBuffer Payload{IoBuffer::Wrap, Data.data(), Data.size()};
+ Payload.SetContentType(ZenContentType::kProtobuf);
- IoBuffer Payload{IoBuffer::Wrap, Data.data(), Data.size()};
- Payload.SetContentType(ZenContentType::kProtobuf);
+ HttpClient::Response Result = m_OtelHttp.Post("/v1/traces", Payload);
- auto Result = m_OtelHttp.Post("/v1/traces", Payload);
+ CheckPostResult(Result, "POST /v1/traces");
+ }
+ catch (const std::exception& Ex)
+ {
+ fprintf(stderr, "OtelHttpProtobufSink: exception exporting traces: %s\n", Ex.what());
+ }
}
void
@@ -55,22 +92,20 @@ OtelHttpProtobufSink::TraceRecorder::RecordSpans(zen::otel::TraceId Trace, std::
void
OtelHttpProtobufSink::Log(const LogMessage& Msg)
{
+ try
{
std::string Data = m_Encoder.FormatOtelProtobuf(Msg);
IoBuffer Payload{IoBuffer::Wrap, Data.data(), Data.size()};
Payload.SetContentType(ZenContentType::kProtobuf);
- auto Result = m_OtelHttp.Post("/v1/logs", Payload);
- }
+ HttpClient::Response Result = m_OtelHttp.Post("/v1/logs", Payload);
+ CheckPostResult(Result, "POST /v1/logs");
+ }
+ catch (const std::exception& Ex)
{
- std::string Data = m_Encoder.FormatOtelMetrics();
-
- IoBuffer Payload{IoBuffer::Wrap, Data.data(), Data.size()};
- Payload.SetContentType(ZenContentType::kProtobuf);
-
- auto Result = m_OtelHttp.Post("/v1/metrics", Payload);
+ fprintf(stderr, "OtelHttpProtobufSink: exception exporting logs: %s\n", Ex.what());
}
}
void
diff --git a/src/zenserver/diag/otlphttp.h b/src/zenserver/diag/otlphttp.h
index 8254af04d..64b3dbc87 100644
--- a/src/zenserver/diag/otlphttp.h
+++ b/src/zenserver/diag/otlphttp.h
@@ -9,6 +9,8 @@
#include <zentelemetry/otlpencoder.h>
#include <zentelemetry/otlptrace.h>
+#include <atomic>
+
#if ZEN_WITH_OTEL
namespace zen::logging {
@@ -36,6 +38,7 @@ private:
virtual void SetFormatter(std::unique_ptr<Formatter>) override {}
void RecordSpans(zen::otel::TraceId Trace, std::span<const zen::otel::Span*> Spans);
+ void CheckPostResult(const HttpClient::Response& Result, const char* Endpoint) noexcept;
// This is just a thin wrapper to call back into the sink while participating in
// reference counting from the OTEL trace back-end
@@ -53,9 +56,13 @@ private:
OtelHttpProtobufSink* m_Sink;
};
- HttpClient m_OtelHttp;
- OtlpEncoder m_Encoder;
- Ref<TraceRecorder> m_TraceRecorder;
+ static constexpr uint32_t kMaxReportedFailures = 5;
+
+ RwLock m_Lock;
+ std::atomic<uint32_t> m_ConsecutivePostFailures{0};
+ HttpClient m_OtelHttp;
+ OtlpEncoder m_Encoder;
+ Ref<TraceRecorder> m_TraceRecorder;
};
} // namespace zen::logging