diff options
| author | Liam Mitchell <[email protected]> | 2026-03-09 19:06:36 -0700 |
|---|---|---|
| committer | Liam Mitchell <[email protected]> | 2026-03-09 19:06:36 -0700 |
| commit | d1abc50ee9d4fb72efc646e17decafea741caa34 (patch) | |
| tree | e4288e00f2f7ca0391b83d986efcb69d3ba66a83 /src/zentelemetry | |
| parent | Allow requests with invalid content-types unless specified in command line or... (diff) | |
| parent | updated chunk–block analyser (#818) (diff) | |
| download | zen-d1abc50ee9d4fb72efc646e17decafea741caa34.tar.xz zen-d1abc50ee9d4fb72efc646e17decafea741caa34.zip | |
Merge branch 'main' into lm/restrict-content-type
Diffstat (limited to 'src/zentelemetry')
| -rw-r--r-- | src/zentelemetry/include/zentelemetry/otlpencoder.h | 8 | ||||
| -rw-r--r-- | src/zentelemetry/include/zentelemetry/otlptrace.h | 9 | ||||
| -rw-r--r-- | src/zentelemetry/include/zentelemetry/stats.h | 203 | ||||
| -rw-r--r-- | src/zentelemetry/otlpencoder.cpp | 44 | ||||
| -rw-r--r-- | src/zentelemetry/otlptrace.cpp | 4 | ||||
| -rw-r--r-- | src/zentelemetry/stats.cpp | 6 | ||||
| -rw-r--r-- | src/zentelemetry/xmake.lua | 2 |
7 files changed, 192 insertions, 84 deletions
diff --git a/src/zentelemetry/include/zentelemetry/otlpencoder.h b/src/zentelemetry/include/zentelemetry/otlpencoder.h index ed6665781..f280aa9ec 100644 --- a/src/zentelemetry/include/zentelemetry/otlpencoder.h +++ b/src/zentelemetry/include/zentelemetry/otlpencoder.h @@ -13,9 +13,9 @@ # include <protozero/pbf_builder.hpp> # include <protozero/types.hpp> -namespace spdlog { namespace details { - struct log_msg; -}} // namespace spdlog::details +namespace zen::logging { +struct LogMessage; +} // namespace zen::logging namespace zen::otel { enum class Resource : protozero::pbf_tag_type; @@ -46,7 +46,7 @@ public: void AddResourceAttribute(const std::string_view& Key, const std::string_view& Value); void AddResourceAttribute(const std::string_view& Key, int64_t Value); - std::string FormatOtelProtobuf(const spdlog::details::log_msg& Msg) const; + std::string FormatOtelProtobuf(const logging::LogMessage& Msg) const; std::string FormatOtelMetrics() const; std::string FormatOtelTrace(zen::otel::TraceId Trace, std::span<const zen::otel::Span*> Spans) const; diff --git a/src/zentelemetry/include/zentelemetry/otlptrace.h b/src/zentelemetry/include/zentelemetry/otlptrace.h index 49dd90358..95718af55 100644 --- a/src/zentelemetry/include/zentelemetry/otlptrace.h +++ b/src/zentelemetry/include/zentelemetry/otlptrace.h @@ -317,6 +317,7 @@ public: ExtendableStringBuilder<128> NameBuilder; NamingFunction(NameBuilder); + Initialize(NameBuilder); } /** Construct a new span with a naming function AND initializer function @@ -350,7 +351,13 @@ public: // Execute a function with the span pointer if valid. This can // be used to add attributes or events to the span after creation - inline void WithSpan(auto Func) const { Func(*m_Span); } + inline void WithSpan(auto Func) const + { + if (m_Span) + { + Func(*m_Span); + } + } private: void Initialize(std::string_view Name); diff --git a/src/zentelemetry/include/zentelemetry/stats.h b/src/zentelemetry/include/zentelemetry/stats.h index 3e67bac1c..260b0fcfb 100644 --- a/src/zentelemetry/include/zentelemetry/stats.h +++ b/src/zentelemetry/include/zentelemetry/stats.h @@ -16,11 +16,17 @@ class CbObjectWriter; namespace zen::metrics { +/** A single atomic value that can be set and read at any time. + * + * Useful for point-in-time readings such as queue depth, active connection count, + * or any value where only the current state matters rather than history. + */ template<typename T> class Gauge { public: Gauge() : m_Value{0} {} + explicit Gauge(T InitialValue) : m_Value{InitialValue} {} T Value() const { return m_Value; } void SetValue(T Value) { m_Value = Value; } @@ -29,12 +35,12 @@ private: std::atomic<T> m_Value; }; -/** Stats counter +/** Monotonically increasing (or decreasing) counter. * - * A counter is modified by adding or subtracting a value from a current value. - * This would typically be used to track number of requests in flight, number - * of active jobs etc + * Suitable for tracking quantities that go up and down over time, such as + * requests in flight or active jobs. All operations are lock-free via atomics. * + * Unlike a Meter, a Counter does not track rates — it only records a running total. */ class Counter { @@ -50,34 +56,56 @@ private: std::atomic<uint64_t> m_count{0}; }; -/** Exponential Weighted Moving Average - - This is very raw, to use as little state as possible. If we - want to use this more broadly in user code we should perhaps - add a more user-friendly wrapper +/** Low-level exponential weighted moving average. + * + * Tracks a smoothed rate using the standard EWMA recurrence: + * + * rate = rate + alpha * (instantRate - rate) + * + * where instantRate = Count / Interval. The alpha value controls how quickly + * the average responds to changes — higher alpha means more weight on recent + * samples. Typical alphas are derived from a decay half-life (e.g. 1, 5, 15 + * minutes) and a fixed tick interval. + * + * This class is intentionally minimal to keep per-instance state to a single + * atomic double. See Meter for a more convenient wrapper. */ - class RawEWMA { public: - /// <summary> - /// Update EWMA with new measure - /// </summary> - /// <param name="Alpha">Smoothing factor (between 0 and 1)</param> - /// <param name="Interval">Elapsed time since last</param> - /// <param name="Count">Value</param> - /// <param name="IsInitialUpdate">Whether this is the first update or not</param> - void Tick(double Alpha, uint64_t Interval, uint64_t Count, bool IsInitialUpdate); + /** Update the EWMA with a new observation. + * + * @param Alpha Smoothing factor in (0, 1). Smaller values give a + * slower-moving average; larger values track recent + * changes more aggressively. + * @param Interval Elapsed hi-freq timer ticks since the last Tick call. + * Used to compute the instantaneous rate as Count/Interval. + * @param Count Number of events observed during this interval. + * @param IsInitialUpdate True on the very first call: seeds the rate directly + * from the instantaneous rate rather than blending it in. + */ + void Tick(double Alpha, uint64_t Interval, uint64_t Count, bool IsInitialUpdate); + + /** Returns the current smoothed rate in events per second. */ double Rate() const; private: std::atomic<double> m_Rate = 0; }; -/// <summary> -/// Tracks rate of events over time (i.e requests/sec), using -/// exponential moving averages -/// </summary> +/** Tracks the rate of events over time using exponential moving averages. + * + * Maintains three EWMA windows (1, 5, 15 minutes) in addition to a simple + * mean rate computed from the total count and elapsed wall time since + * construction. This mirrors the load-average conventions familiar from Unix. + * + * Rate updates are batched: Mark() accumulates a pending count and the EWMA + * is only advanced every ~5 seconds (controlled by kTickIntervalInSeconds), + * keeping contention low even under heavy call rates. Rates are returned in + * events per second. + * + * All operations are thread-safe via lock-free atomics. + */ class Meter { public: @@ -85,18 +113,18 @@ public: ~Meter(); inline uint64_t Count() const { return m_TotalCount; } - double Rate1(); // One-minute rate - double Rate5(); // Five-minute rate - double Rate15(); // Fifteen-minute rate - double MeanRate() const; // Mean rate since instantiation of this meter + double Rate1(); // One-minute EWMA rate (events/sec) + double Rate5(); // Five-minute EWMA rate (events/sec) + double Rate15(); // Fifteen-minute EWMA rate (events/sec) + double MeanRate() const; // Mean rate since instantiation (events/sec) void Mark(uint64_t Count = 1); // Register one or more events private: std::atomic<uint64_t> m_TotalCount{0}; // Accumulator counting number of marks since beginning - std::atomic<uint64_t> m_PendingCount{0}; // Pending EWMA update accumulator - std::atomic<uint64_t> m_StartTick{0}; // Time this was instantiated (for mean) - std::atomic<uint64_t> m_LastTick{0}; // Timestamp of last EWMA tick - std::atomic<int64_t> m_Remainder{0}; // Tracks the "modulo" of tick time + std::atomic<uint64_t> m_PendingCount{0}; // Pending EWMA update accumulator; drained on each tick + std::atomic<uint64_t> m_StartTick{0}; // Hi-freq timer value at construction (for MeanRate) + std::atomic<uint64_t> m_LastTick{0}; // Hi-freq timer value of the last EWMA tick + std::atomic<int64_t> m_Remainder{0}; // Accumulated ticks not yet consumed by EWMA updates bool m_IsFirstTick = true; RawEWMA m_RateM1; RawEWMA m_RateM5; @@ -106,7 +134,14 @@ private: void Tick(); }; -/** Moment-in-time snapshot of a distribution +/** Immutable sorted snapshot of a reservoir sample. + * + * Constructed from a vector of sampled values which are sorted on construction. + * Percentiles are computed on demand via linear interpolation between adjacent + * sorted values, following the standard R-7 quantile method. + * + * Because this is a copy of the reservoir at a point in time, it can be held + * and queried without holding any locks on the source UniformSample. */ class SampleSnapshot { @@ -128,12 +163,19 @@ private: std::vector<double> m_Values; }; -/** Randomly selects samples from a stream. Uses Vitter's - Algorithm R to produce a statistically representative sample. - - http://www.cs.umd.edu/~samir/498/vitter.pdf - Random Sampling with a Reservoir +/** Reservoir sampler for probabilistic distribution tracking. + * + * Maintains a fixed-size reservoir of samples drawn uniformly from the full + * history of values using Vitter's Algorithm R. This gives an unbiased + * statistical representation of the value distribution regardless of how many + * total values have been observed, at the cost of O(ReservoirSize) memory. + * + * A larger reservoir improves accuracy of tail percentiles (P99, P999) but + * increases memory and snapshot cost. The default of 1028 gives good accuracy + * for most telemetry uses. + * + * http://www.cs.umd.edu/~samir/498/vitter.pdf - Random Sampling with a Reservoir */ - class UniformSample { public: @@ -159,7 +201,14 @@ private: std::vector<std::atomic<int64_t>> m_Values; }; -/** Track (probabilistic) sample distribution along with min/max +/** Tracks the statistical distribution of a stream of values. + * + * Records exact min, max, count and mean across all values ever seen, plus a + * reservoir sample (via UniformSample) used to compute percentiles. Percentiles + * are therefore probabilistic — they reflect the distribution of a representative + * sample rather than the full history. + * + * All operations are thread-safe via lock-free atomics. */ class Histogram { @@ -183,11 +232,28 @@ private: std::atomic<int64_t> m_Count{0}; }; -/** Track timing and frequency of some operation - - Example usage would be to track frequency and duration of network - requests, or function calls. - +/** Combines a Histogram and a Meter to track both the distribution and rate + * of a recurring operation. + * + * Duration values are stored in hi-freq timer ticks. Use GetHifreqTimerToSeconds() + * when converting for display. + * + * Typical usage via the RAII Scope helper: + * + * OperationTiming MyTiming; + * + * { + * OperationTiming::Scope Scope(MyTiming); + * DoWork(); + * // Scope destructor calls Stop() automatically + * } + * + * // Or cancel if the operation should not be counted: + * { + * OperationTiming::Scope Scope(MyTiming); + * if (CacheHit) { Scope.Cancel(); return; } + * DoExpensiveWork(); + * } */ class OperationTiming { @@ -207,13 +273,19 @@ public: double Rate15() { return m_Meter.Rate15(); } double MeanRate() const { return m_Meter.MeanRate(); } + /** RAII helper that records duration from construction to Stop() or destruction. + * + * Call Cancel() to discard the measurement (e.g. for cache hits that should + * not skew latency statistics). After Stop() or Cancel() the destructor is a + * no-op. + */ struct Scope { Scope(OperationTiming& Outer); ~Scope(); - void Stop(); - void Cancel(); + void Stop(); // Record elapsed time and mark the meter + void Cancel(); // Discard this measurement; destructor becomes a no-op private: OperationTiming& m_Outer; @@ -225,6 +297,7 @@ private: Histogram m_Histogram; }; +/** Immutable snapshot of a Meter's state at a point in time. */ struct MeterSnapshot { uint64_t Count; @@ -234,6 +307,12 @@ struct MeterSnapshot double Rate15; }; +/** Immutable snapshot of a Histogram's state at a point in time. + * + * Count and all statistical values have been scaled by the ConversionFactor + * supplied when the snapshot was taken (e.g. GetHifreqTimerToSeconds() to + * convert timer ticks to seconds). + */ struct HistogramSnapshot { double Count; @@ -246,24 +325,29 @@ struct HistogramSnapshot double P999; }; +/** Combined snapshot of a Meter and Histogram pair. */ struct StatsSnapshot { MeterSnapshot Meter; HistogramSnapshot Histogram; }; +/** Combined snapshot of request timing and byte transfer statistics. */ struct RequestStatsSnapshot { StatsSnapshot Requests; StatsSnapshot Bytes; }; -/** Metrics for network requests - - Aggregates tracking of duration, payload sizes into a single - class - - */ +/** Tracks both the timing and payload size of network requests. + * + * Maintains two independent histogram+meter pairs: one for request duration + * (in hi-freq timer ticks) and one for transferred bytes. Both dimensions + * share the same request count — a single Update() call advances both. + * + * Duration accessors return values in hi-freq timer ticks. Multiply by + * GetHifreqTimerToSeconds() to convert to seconds. + */ class RequestStats { public: @@ -275,9 +359,9 @@ public: // Timing - int64_t MaxDuration() const { return m_BytesHistogram.Max(); } - int64_t MinDuration() const { return m_BytesHistogram.Min(); } - double MeanDuration() const { return m_BytesHistogram.Mean(); } + int64_t MaxDuration() const { return m_RequestTimeHistogram.Max(); } + int64_t MinDuration() const { return m_RequestTimeHistogram.Min(); } + double MeanDuration() const { return m_RequestTimeHistogram.Mean(); } SampleSnapshot DurationSnapshot() const { return m_RequestTimeHistogram.Snapshot(); } double Rate1() { return m_RequestMeter.Rate1(); } double Rate5() { return m_RequestMeter.Rate5(); } @@ -295,14 +379,23 @@ public: double ByteRate15() { return m_BytesMeter.Rate15(); } double ByteMeanRate() const { return m_BytesMeter.MeanRate(); } + /** RAII helper that records duration and byte count from construction to Stop() + * or destruction. + * + * The byte count can be supplied at construction or updated at any point via + * SetBytes() before the scope ends — useful when the response size is not + * known until the operation completes. + * + * Call Cancel() to discard the measurement entirely. + */ struct Scope { Scope(RequestStats& Outer, int64_t Bytes); ~Scope(); void SetBytes(int64_t Bytes) { m_Bytes = Bytes; } - void Stop(); - void Cancel(); + void Stop(); // Record elapsed time and byte count + void Cancel(); // Discard this measurement; destructor becomes a no-op private: RequestStats& m_Outer; diff --git a/src/zentelemetry/otlpencoder.cpp b/src/zentelemetry/otlpencoder.cpp index 677545066..5477c5381 100644 --- a/src/zentelemetry/otlpencoder.cpp +++ b/src/zentelemetry/otlpencoder.cpp @@ -3,9 +3,9 @@ #include "zentelemetry/otlpencoder.h" #include <zenbase/zenbase.h> +#include <zencore/logging/logmsg.h> #include <zentelemetry/otlptrace.h> -#include <spdlog/sinks/sink.h> #include <zencore/testing.h> #include <protozero/buffer_string.hpp> @@ -29,49 +29,49 @@ OtlpEncoder::~OtlpEncoder() } static int -MapSeverity(const spdlog::level::level_enum Level) +MapSeverity(const logging::LogLevel Level) { switch (Level) { - case spdlog::level::critical: + case logging::Critical: return otel::SEVERITY_NUMBER_FATAL; - case spdlog::level::err: + case logging::Err: return otel::SEVERITY_NUMBER_ERROR; - case spdlog::level::warn: + case logging::Warn: return otel::SEVERITY_NUMBER_WARN; - case spdlog::level::info: + case logging::Info: return otel::SEVERITY_NUMBER_INFO; - case spdlog::level::debug: + case logging::Debug: return otel::SEVERITY_NUMBER_DEBUG; default: - case spdlog::level::trace: + case logging::Trace: return otel::SEVERITY_NUMBER_TRACE; } } static const char* -MapSeverityText(const spdlog::level::level_enum Level) +MapSeverityText(const logging::LogLevel Level) { switch (Level) { - case spdlog::level::critical: + case logging::Critical: return "fatal"; - case spdlog::level::err: + case logging::Err: return "error"; - case spdlog::level::warn: + case logging::Warn: return "warn"; - case spdlog::level::info: + case logging::Info: return "info"; - case spdlog::level::debug: + case logging::Debug: return "debug"; default: - case spdlog::level::trace: + case logging::Trace: return "trace"; } } std::string -OtlpEncoder::FormatOtelProtobuf(const spdlog::details::log_msg& Msg) const +OtlpEncoder::FormatOtelProtobuf(const logging::LogMessage& Msg) const { std::string Data; @@ -98,7 +98,7 @@ OtlpEncoder::FormatOtelProtobuf(const spdlog::details::log_msg& Msg) const protozero::pbf_builder<otel::InstrumentationScope> IsBuilder{SlBuilder, otel::ScopeLogs::required_InstrumentationScope_scope}; - IsBuilder.add_string(otel::InstrumentationScope::string_name, Msg.logger_name.data(), Msg.logger_name.size()); + IsBuilder.add_string(otel::InstrumentationScope::string_name, Msg.GetLoggerName().data(), Msg.GetLoggerName().size()); } // LogRecord log_records @@ -106,13 +106,13 @@ OtlpEncoder::FormatOtelProtobuf(const spdlog::details::log_msg& Msg) const protozero::pbf_builder<otel::LogRecord> LrBuilder{SlBuilder, otel::ScopeLogs::required_repeated_LogRecord_log_records}; LrBuilder.add_fixed64(otel::LogRecord::required_fixed64_time_unix_nano, - std::chrono::duration_cast<std::chrono::nanoseconds>(Msg.time.time_since_epoch()).count()); + std::chrono::duration_cast<std::chrono::nanoseconds>(Msg.GetTime().time_since_epoch()).count()); - const int Severity = MapSeverity(Msg.level); + const int Severity = MapSeverity(Msg.GetLevel()); LrBuilder.add_enum(otel::LogRecord::optional_SeverityNumber_severity_number, Severity); - LrBuilder.add_string(otel::LogRecord::optional_string_severity_text, MapSeverityText(Msg.level)); + LrBuilder.add_string(otel::LogRecord::optional_string_severity_text, MapSeverityText(Msg.GetLevel())); otel::TraceId TraceId; const otel::SpanId SpanId = otel::Span::GetCurrentSpanId(TraceId); @@ -127,7 +127,7 @@ OtlpEncoder::FormatOtelProtobuf(const spdlog::details::log_msg& Msg) const { protozero::pbf_builder<otel::AnyValue> BodyBuilder{LrBuilder, otel::LogRecord::optional_anyvalue_body}; - BodyBuilder.add_string(otel::AnyValue::string_string_value, Msg.payload.data(), Msg.payload.size()); + BodyBuilder.add_string(otel::AnyValue::string_string_value, Msg.GetPayload().data(), Msg.GetPayload().size()); } // attributes @@ -139,7 +139,7 @@ OtlpEncoder::FormatOtelProtobuf(const spdlog::details::log_msg& Msg) const { protozero::pbf_builder<otel::AnyValue> AvBuilder{KvBuilder, otel::KeyValue::AnyValue_value}; - AvBuilder.add_int64(otel::AnyValue::int64_int_value, Msg.thread_id); + AvBuilder.add_int64(otel::AnyValue::int64_int_value, Msg.GetThreadId()); } } } diff --git a/src/zentelemetry/otlptrace.cpp b/src/zentelemetry/otlptrace.cpp index 6a095cfeb..3888717d5 100644 --- a/src/zentelemetry/otlptrace.cpp +++ b/src/zentelemetry/otlptrace.cpp @@ -385,6 +385,8 @@ otlptrace_forcelink() # if ZEN_WITH_TESTS +TEST_SUITE_BEGIN("telemetry.otlptrace"); + TEST_CASE("otlp.trace") { // Enable OTLP tracing for the duration of this test @@ -409,6 +411,8 @@ TEST_CASE("otlp.trace") } } +TEST_SUITE_END(); + # endif } // namespace zen::otel diff --git a/src/zentelemetry/stats.cpp b/src/zentelemetry/stats.cpp index c67fa3c66..a417bb52c 100644 --- a/src/zentelemetry/stats.cpp +++ b/src/zentelemetry/stats.cpp @@ -631,7 +631,7 @@ EmitSnapshot(const HistogramSnapshot& Snapshot, CbObjectWriter& Cbo) { Cbo << "t_count" << Snapshot.Count << "t_avg" << Snapshot.Avg; Cbo << "t_min" << Snapshot.Min << "t_max" << Snapshot.Max; - Cbo << "t_p75" << Snapshot.P75 << "t_p95" << Snapshot.P95 << "t_p99" << Snapshot.P999; + Cbo << "t_p75" << Snapshot.P75 << "t_p95" << Snapshot.P95 << "t_p99" << Snapshot.P99 << "t_p999" << Snapshot.P999; } void @@ -660,6 +660,8 @@ EmitSnapshot(std::string_view Tag, const RequestStatsSnapshot& Snapshot, CbObjec #if ZEN_WITH_TESTS +TEST_SUITE_BEGIN("telemetry.stats"); + TEST_CASE("Core.Stats.Histogram") { Histogram Histo{258}; @@ -819,6 +821,8 @@ TEST_CASE("Meter") # endif } +TEST_SUITE_END(); + namespace zen { void diff --git a/src/zentelemetry/xmake.lua b/src/zentelemetry/xmake.lua index 7739c0a08..cd9a18ec4 100644 --- a/src/zentelemetry/xmake.lua +++ b/src/zentelemetry/xmake.lua @@ -6,5 +6,5 @@ target('zentelemetry') add_headerfiles("**.h") add_files("**.cpp") add_includedirs("include", {public=true}) - add_deps("zencore", "protozero", "spdlog") + add_deps("zencore", "protozero") add_deps("robin-map") |