aboutsummaryrefslogtreecommitdiff
path: root/src/zentelemetry
diff options
context:
space:
mode:
authorLiam Mitchell <[email protected]>2026-03-09 19:06:36 -0700
committerLiam Mitchell <[email protected]>2026-03-09 19:06:36 -0700
commitd1abc50ee9d4fb72efc646e17decafea741caa34 (patch)
treee4288e00f2f7ca0391b83d986efcb69d3ba66a83 /src/zentelemetry
parentAllow requests with invalid content-types unless specified in command line or... (diff)
parentupdated chunk–block analyser (#818) (diff)
downloadzen-d1abc50ee9d4fb72efc646e17decafea741caa34.tar.xz
zen-d1abc50ee9d4fb72efc646e17decafea741caa34.zip
Merge branch 'main' into lm/restrict-content-type
Diffstat (limited to 'src/zentelemetry')
-rw-r--r--src/zentelemetry/include/zentelemetry/otlpencoder.h8
-rw-r--r--src/zentelemetry/include/zentelemetry/otlptrace.h9
-rw-r--r--src/zentelemetry/include/zentelemetry/stats.h203
-rw-r--r--src/zentelemetry/otlpencoder.cpp44
-rw-r--r--src/zentelemetry/otlptrace.cpp4
-rw-r--r--src/zentelemetry/stats.cpp6
-rw-r--r--src/zentelemetry/xmake.lua2
7 files changed, 192 insertions, 84 deletions
diff --git a/src/zentelemetry/include/zentelemetry/otlpencoder.h b/src/zentelemetry/include/zentelemetry/otlpencoder.h
index ed6665781..f280aa9ec 100644
--- a/src/zentelemetry/include/zentelemetry/otlpencoder.h
+++ b/src/zentelemetry/include/zentelemetry/otlpencoder.h
@@ -13,9 +13,9 @@
# include <protozero/pbf_builder.hpp>
# include <protozero/types.hpp>
-namespace spdlog { namespace details {
- struct log_msg;
-}} // namespace spdlog::details
+namespace zen::logging {
+struct LogMessage;
+} // namespace zen::logging
namespace zen::otel {
enum class Resource : protozero::pbf_tag_type;
@@ -46,7 +46,7 @@ public:
void AddResourceAttribute(const std::string_view& Key, const std::string_view& Value);
void AddResourceAttribute(const std::string_view& Key, int64_t Value);
- std::string FormatOtelProtobuf(const spdlog::details::log_msg& Msg) const;
+ std::string FormatOtelProtobuf(const logging::LogMessage& Msg) const;
std::string FormatOtelMetrics() const;
std::string FormatOtelTrace(zen::otel::TraceId Trace, std::span<const zen::otel::Span*> Spans) const;
diff --git a/src/zentelemetry/include/zentelemetry/otlptrace.h b/src/zentelemetry/include/zentelemetry/otlptrace.h
index 49dd90358..95718af55 100644
--- a/src/zentelemetry/include/zentelemetry/otlptrace.h
+++ b/src/zentelemetry/include/zentelemetry/otlptrace.h
@@ -317,6 +317,7 @@ public:
ExtendableStringBuilder<128> NameBuilder;
NamingFunction(NameBuilder);
+ Initialize(NameBuilder);
}
/** Construct a new span with a naming function AND initializer function
@@ -350,7 +351,13 @@ public:
// Execute a function with the span pointer if valid. This can
// be used to add attributes or events to the span after creation
- inline void WithSpan(auto Func) const { Func(*m_Span); }
+ inline void WithSpan(auto Func) const
+ {
+ if (m_Span)
+ {
+ Func(*m_Span);
+ }
+ }
private:
void Initialize(std::string_view Name);
diff --git a/src/zentelemetry/include/zentelemetry/stats.h b/src/zentelemetry/include/zentelemetry/stats.h
index 3e67bac1c..260b0fcfb 100644
--- a/src/zentelemetry/include/zentelemetry/stats.h
+++ b/src/zentelemetry/include/zentelemetry/stats.h
@@ -16,11 +16,17 @@ class CbObjectWriter;
namespace zen::metrics {
+/** A single atomic value that can be set and read at any time.
+ *
+ * Useful for point-in-time readings such as queue depth, active connection count,
+ * or any value where only the current state matters rather than history.
+ */
template<typename T>
class Gauge
{
public:
Gauge() : m_Value{0} {}
+ explicit Gauge(T InitialValue) : m_Value{InitialValue} {}
T Value() const { return m_Value; }
void SetValue(T Value) { m_Value = Value; }
@@ -29,12 +35,12 @@ private:
std::atomic<T> m_Value;
};
-/** Stats counter
+/** Monotonically increasing (or decreasing) counter.
*
- * A counter is modified by adding or subtracting a value from a current value.
- * This would typically be used to track number of requests in flight, number
- * of active jobs etc
+ * Suitable for tracking quantities that go up and down over time, such as
+ * requests in flight or active jobs. All operations are lock-free via atomics.
*
+ * Unlike a Meter, a Counter does not track rates — it only records a running total.
*/
class Counter
{
@@ -50,34 +56,56 @@ private:
std::atomic<uint64_t> m_count{0};
};
-/** Exponential Weighted Moving Average
-
- This is very raw, to use as little state as possible. If we
- want to use this more broadly in user code we should perhaps
- add a more user-friendly wrapper
+/** Low-level exponential weighted moving average.
+ *
+ * Tracks a smoothed rate using the standard EWMA recurrence:
+ *
+ * rate = rate + alpha * (instantRate - rate)
+ *
+ * where instantRate = Count / Interval. The alpha value controls how quickly
+ * the average responds to changes — higher alpha means more weight on recent
+ * samples. Typical alphas are derived from a decay half-life (e.g. 1, 5, 15
+ * minutes) and a fixed tick interval.
+ *
+ * This class is intentionally minimal to keep per-instance state to a single
+ * atomic double. See Meter for a more convenient wrapper.
*/
-
class RawEWMA
{
public:
- /// <summary>
- /// Update EWMA with new measure
- /// </summary>
- /// <param name="Alpha">Smoothing factor (between 0 and 1)</param>
- /// <param name="Interval">Elapsed time since last</param>
- /// <param name="Count">Value</param>
- /// <param name="IsInitialUpdate">Whether this is the first update or not</param>
- void Tick(double Alpha, uint64_t Interval, uint64_t Count, bool IsInitialUpdate);
+ /** Update the EWMA with a new observation.
+ *
+ * @param Alpha Smoothing factor in (0, 1). Smaller values give a
+ * slower-moving average; larger values track recent
+ * changes more aggressively.
+ * @param Interval Elapsed hi-freq timer ticks since the last Tick call.
+ * Used to compute the instantaneous rate as Count/Interval.
+ * @param Count Number of events observed during this interval.
+ * @param IsInitialUpdate True on the very first call: seeds the rate directly
+ * from the instantaneous rate rather than blending it in.
+ */
+ void Tick(double Alpha, uint64_t Interval, uint64_t Count, bool IsInitialUpdate);
+
+ /** Returns the current smoothed rate in events per second. */
double Rate() const;
private:
std::atomic<double> m_Rate = 0;
};
-/// <summary>
-/// Tracks rate of events over time (i.e requests/sec), using
-/// exponential moving averages
-/// </summary>
+/** Tracks the rate of events over time using exponential moving averages.
+ *
+ * Maintains three EWMA windows (1, 5, 15 minutes) in addition to a simple
+ * mean rate computed from the total count and elapsed wall time since
+ * construction. This mirrors the load-average conventions familiar from Unix.
+ *
+ * Rate updates are batched: Mark() accumulates a pending count and the EWMA
+ * is only advanced every ~5 seconds (controlled by kTickIntervalInSeconds),
+ * keeping contention low even under heavy call rates. Rates are returned in
+ * events per second.
+ *
+ * All operations are thread-safe via lock-free atomics.
+ */
class Meter
{
public:
@@ -85,18 +113,18 @@ public:
~Meter();
inline uint64_t Count() const { return m_TotalCount; }
- double Rate1(); // One-minute rate
- double Rate5(); // Five-minute rate
- double Rate15(); // Fifteen-minute rate
- double MeanRate() const; // Mean rate since instantiation of this meter
+ double Rate1(); // One-minute EWMA rate (events/sec)
+ double Rate5(); // Five-minute EWMA rate (events/sec)
+ double Rate15(); // Fifteen-minute EWMA rate (events/sec)
+ double MeanRate() const; // Mean rate since instantiation (events/sec)
void Mark(uint64_t Count = 1); // Register one or more events
private:
std::atomic<uint64_t> m_TotalCount{0}; // Accumulator counting number of marks since beginning
- std::atomic<uint64_t> m_PendingCount{0}; // Pending EWMA update accumulator
- std::atomic<uint64_t> m_StartTick{0}; // Time this was instantiated (for mean)
- std::atomic<uint64_t> m_LastTick{0}; // Timestamp of last EWMA tick
- std::atomic<int64_t> m_Remainder{0}; // Tracks the "modulo" of tick time
+ std::atomic<uint64_t> m_PendingCount{0}; // Pending EWMA update accumulator; drained on each tick
+ std::atomic<uint64_t> m_StartTick{0}; // Hi-freq timer value at construction (for MeanRate)
+ std::atomic<uint64_t> m_LastTick{0}; // Hi-freq timer value of the last EWMA tick
+ std::atomic<int64_t> m_Remainder{0}; // Accumulated ticks not yet consumed by EWMA updates
bool m_IsFirstTick = true;
RawEWMA m_RateM1;
RawEWMA m_RateM5;
@@ -106,7 +134,14 @@ private:
void Tick();
};
-/** Moment-in-time snapshot of a distribution
+/** Immutable sorted snapshot of a reservoir sample.
+ *
+ * Constructed from a vector of sampled values which are sorted on construction.
+ * Percentiles are computed on demand via linear interpolation between adjacent
+ * sorted values, following the standard R-7 quantile method.
+ *
+ * Because this is a copy of the reservoir at a point in time, it can be held
+ * and queried without holding any locks on the source UniformSample.
*/
class SampleSnapshot
{
@@ -128,12 +163,19 @@ private:
std::vector<double> m_Values;
};
-/** Randomly selects samples from a stream. Uses Vitter's
- Algorithm R to produce a statistically representative sample.
-
- http://www.cs.umd.edu/~samir/498/vitter.pdf - Random Sampling with a Reservoir
+/** Reservoir sampler for probabilistic distribution tracking.
+ *
+ * Maintains a fixed-size reservoir of samples drawn uniformly from the full
+ * history of values using Vitter's Algorithm R. This gives an unbiased
+ * statistical representation of the value distribution regardless of how many
+ * total values have been observed, at the cost of O(ReservoirSize) memory.
+ *
+ * A larger reservoir improves accuracy of tail percentiles (P99, P999) but
+ * increases memory and snapshot cost. The default of 1028 gives good accuracy
+ * for most telemetry uses.
+ *
+ * http://www.cs.umd.edu/~samir/498/vitter.pdf - Random Sampling with a Reservoir
*/
-
class UniformSample
{
public:
@@ -159,7 +201,14 @@ private:
std::vector<std::atomic<int64_t>> m_Values;
};
-/** Track (probabilistic) sample distribution along with min/max
+/** Tracks the statistical distribution of a stream of values.
+ *
+ * Records exact min, max, count and mean across all values ever seen, plus a
+ * reservoir sample (via UniformSample) used to compute percentiles. Percentiles
+ * are therefore probabilistic — they reflect the distribution of a representative
+ * sample rather than the full history.
+ *
+ * All operations are thread-safe via lock-free atomics.
*/
class Histogram
{
@@ -183,11 +232,28 @@ private:
std::atomic<int64_t> m_Count{0};
};
-/** Track timing and frequency of some operation
-
- Example usage would be to track frequency and duration of network
- requests, or function calls.
-
+/** Combines a Histogram and a Meter to track both the distribution and rate
+ * of a recurring operation.
+ *
+ * Duration values are stored in hi-freq timer ticks. Use GetHifreqTimerToSeconds()
+ * when converting for display.
+ *
+ * Typical usage via the RAII Scope helper:
+ *
+ * OperationTiming MyTiming;
+ *
+ * {
+ * OperationTiming::Scope Scope(MyTiming);
+ * DoWork();
+ * // Scope destructor calls Stop() automatically
+ * }
+ *
+ * // Or cancel if the operation should not be counted:
+ * {
+ * OperationTiming::Scope Scope(MyTiming);
+ * if (CacheHit) { Scope.Cancel(); return; }
+ * DoExpensiveWork();
+ * }
*/
class OperationTiming
{
@@ -207,13 +273,19 @@ public:
double Rate15() { return m_Meter.Rate15(); }
double MeanRate() const { return m_Meter.MeanRate(); }
+ /** RAII helper that records duration from construction to Stop() or destruction.
+ *
+ * Call Cancel() to discard the measurement (e.g. for cache hits that should
+ * not skew latency statistics). After Stop() or Cancel() the destructor is a
+ * no-op.
+ */
struct Scope
{
Scope(OperationTiming& Outer);
~Scope();
- void Stop();
- void Cancel();
+ void Stop(); // Record elapsed time and mark the meter
+ void Cancel(); // Discard this measurement; destructor becomes a no-op
private:
OperationTiming& m_Outer;
@@ -225,6 +297,7 @@ private:
Histogram m_Histogram;
};
+/** Immutable snapshot of a Meter's state at a point in time. */
struct MeterSnapshot
{
uint64_t Count;
@@ -234,6 +307,12 @@ struct MeterSnapshot
double Rate15;
};
+/** Immutable snapshot of a Histogram's state at a point in time.
+ *
+ * Count and all statistical values have been scaled by the ConversionFactor
+ * supplied when the snapshot was taken (e.g. GetHifreqTimerToSeconds() to
+ * convert timer ticks to seconds).
+ */
struct HistogramSnapshot
{
double Count;
@@ -246,24 +325,29 @@ struct HistogramSnapshot
double P999;
};
+/** Combined snapshot of a Meter and Histogram pair. */
struct StatsSnapshot
{
MeterSnapshot Meter;
HistogramSnapshot Histogram;
};
+/** Combined snapshot of request timing and byte transfer statistics. */
struct RequestStatsSnapshot
{
StatsSnapshot Requests;
StatsSnapshot Bytes;
};
-/** Metrics for network requests
-
- Aggregates tracking of duration, payload sizes into a single
- class
-
- */
+/** Tracks both the timing and payload size of network requests.
+ *
+ * Maintains two independent histogram+meter pairs: one for request duration
+ * (in hi-freq timer ticks) and one for transferred bytes. Both dimensions
+ * share the same request count — a single Update() call advances both.
+ *
+ * Duration accessors return values in hi-freq timer ticks. Multiply by
+ * GetHifreqTimerToSeconds() to convert to seconds.
+ */
class RequestStats
{
public:
@@ -275,9 +359,9 @@ public:
// Timing
- int64_t MaxDuration() const { return m_BytesHistogram.Max(); }
- int64_t MinDuration() const { return m_BytesHistogram.Min(); }
- double MeanDuration() const { return m_BytesHistogram.Mean(); }
+ int64_t MaxDuration() const { return m_RequestTimeHistogram.Max(); }
+ int64_t MinDuration() const { return m_RequestTimeHistogram.Min(); }
+ double MeanDuration() const { return m_RequestTimeHistogram.Mean(); }
SampleSnapshot DurationSnapshot() const { return m_RequestTimeHistogram.Snapshot(); }
double Rate1() { return m_RequestMeter.Rate1(); }
double Rate5() { return m_RequestMeter.Rate5(); }
@@ -295,14 +379,23 @@ public:
double ByteRate15() { return m_BytesMeter.Rate15(); }
double ByteMeanRate() const { return m_BytesMeter.MeanRate(); }
+ /** RAII helper that records duration and byte count from construction to Stop()
+ * or destruction.
+ *
+ * The byte count can be supplied at construction or updated at any point via
+ * SetBytes() before the scope ends — useful when the response size is not
+ * known until the operation completes.
+ *
+ * Call Cancel() to discard the measurement entirely.
+ */
struct Scope
{
Scope(RequestStats& Outer, int64_t Bytes);
~Scope();
void SetBytes(int64_t Bytes) { m_Bytes = Bytes; }
- void Stop();
- void Cancel();
+ void Stop(); // Record elapsed time and byte count
+ void Cancel(); // Discard this measurement; destructor becomes a no-op
private:
RequestStats& m_Outer;
diff --git a/src/zentelemetry/otlpencoder.cpp b/src/zentelemetry/otlpencoder.cpp
index 677545066..5477c5381 100644
--- a/src/zentelemetry/otlpencoder.cpp
+++ b/src/zentelemetry/otlpencoder.cpp
@@ -3,9 +3,9 @@
#include "zentelemetry/otlpencoder.h"
#include <zenbase/zenbase.h>
+#include <zencore/logging/logmsg.h>
#include <zentelemetry/otlptrace.h>
-#include <spdlog/sinks/sink.h>
#include <zencore/testing.h>
#include <protozero/buffer_string.hpp>
@@ -29,49 +29,49 @@ OtlpEncoder::~OtlpEncoder()
}
static int
-MapSeverity(const spdlog::level::level_enum Level)
+MapSeverity(const logging::LogLevel Level)
{
switch (Level)
{
- case spdlog::level::critical:
+ case logging::Critical:
return otel::SEVERITY_NUMBER_FATAL;
- case spdlog::level::err:
+ case logging::Err:
return otel::SEVERITY_NUMBER_ERROR;
- case spdlog::level::warn:
+ case logging::Warn:
return otel::SEVERITY_NUMBER_WARN;
- case spdlog::level::info:
+ case logging::Info:
return otel::SEVERITY_NUMBER_INFO;
- case spdlog::level::debug:
+ case logging::Debug:
return otel::SEVERITY_NUMBER_DEBUG;
default:
- case spdlog::level::trace:
+ case logging::Trace:
return otel::SEVERITY_NUMBER_TRACE;
}
}
static const char*
-MapSeverityText(const spdlog::level::level_enum Level)
+MapSeverityText(const logging::LogLevel Level)
{
switch (Level)
{
- case spdlog::level::critical:
+ case logging::Critical:
return "fatal";
- case spdlog::level::err:
+ case logging::Err:
return "error";
- case spdlog::level::warn:
+ case logging::Warn:
return "warn";
- case spdlog::level::info:
+ case logging::Info:
return "info";
- case spdlog::level::debug:
+ case logging::Debug:
return "debug";
default:
- case spdlog::level::trace:
+ case logging::Trace:
return "trace";
}
}
std::string
-OtlpEncoder::FormatOtelProtobuf(const spdlog::details::log_msg& Msg) const
+OtlpEncoder::FormatOtelProtobuf(const logging::LogMessage& Msg) const
{
std::string Data;
@@ -98,7 +98,7 @@ OtlpEncoder::FormatOtelProtobuf(const spdlog::details::log_msg& Msg) const
protozero::pbf_builder<otel::InstrumentationScope> IsBuilder{SlBuilder,
otel::ScopeLogs::required_InstrumentationScope_scope};
- IsBuilder.add_string(otel::InstrumentationScope::string_name, Msg.logger_name.data(), Msg.logger_name.size());
+ IsBuilder.add_string(otel::InstrumentationScope::string_name, Msg.GetLoggerName().data(), Msg.GetLoggerName().size());
}
// LogRecord log_records
@@ -106,13 +106,13 @@ OtlpEncoder::FormatOtelProtobuf(const spdlog::details::log_msg& Msg) const
protozero::pbf_builder<otel::LogRecord> LrBuilder{SlBuilder, otel::ScopeLogs::required_repeated_LogRecord_log_records};
LrBuilder.add_fixed64(otel::LogRecord::required_fixed64_time_unix_nano,
- std::chrono::duration_cast<std::chrono::nanoseconds>(Msg.time.time_since_epoch()).count());
+ std::chrono::duration_cast<std::chrono::nanoseconds>(Msg.GetTime().time_since_epoch()).count());
- const int Severity = MapSeverity(Msg.level);
+ const int Severity = MapSeverity(Msg.GetLevel());
LrBuilder.add_enum(otel::LogRecord::optional_SeverityNumber_severity_number, Severity);
- LrBuilder.add_string(otel::LogRecord::optional_string_severity_text, MapSeverityText(Msg.level));
+ LrBuilder.add_string(otel::LogRecord::optional_string_severity_text, MapSeverityText(Msg.GetLevel()));
otel::TraceId TraceId;
const otel::SpanId SpanId = otel::Span::GetCurrentSpanId(TraceId);
@@ -127,7 +127,7 @@ OtlpEncoder::FormatOtelProtobuf(const spdlog::details::log_msg& Msg) const
{
protozero::pbf_builder<otel::AnyValue> BodyBuilder{LrBuilder, otel::LogRecord::optional_anyvalue_body};
- BodyBuilder.add_string(otel::AnyValue::string_string_value, Msg.payload.data(), Msg.payload.size());
+ BodyBuilder.add_string(otel::AnyValue::string_string_value, Msg.GetPayload().data(), Msg.GetPayload().size());
}
// attributes
@@ -139,7 +139,7 @@ OtlpEncoder::FormatOtelProtobuf(const spdlog::details::log_msg& Msg) const
{
protozero::pbf_builder<otel::AnyValue> AvBuilder{KvBuilder, otel::KeyValue::AnyValue_value};
- AvBuilder.add_int64(otel::AnyValue::int64_int_value, Msg.thread_id);
+ AvBuilder.add_int64(otel::AnyValue::int64_int_value, Msg.GetThreadId());
}
}
}
diff --git a/src/zentelemetry/otlptrace.cpp b/src/zentelemetry/otlptrace.cpp
index 6a095cfeb..3888717d5 100644
--- a/src/zentelemetry/otlptrace.cpp
+++ b/src/zentelemetry/otlptrace.cpp
@@ -385,6 +385,8 @@ otlptrace_forcelink()
# if ZEN_WITH_TESTS
+TEST_SUITE_BEGIN("telemetry.otlptrace");
+
TEST_CASE("otlp.trace")
{
// Enable OTLP tracing for the duration of this test
@@ -409,6 +411,8 @@ TEST_CASE("otlp.trace")
}
}
+TEST_SUITE_END();
+
# endif
} // namespace zen::otel
diff --git a/src/zentelemetry/stats.cpp b/src/zentelemetry/stats.cpp
index c67fa3c66..a417bb52c 100644
--- a/src/zentelemetry/stats.cpp
+++ b/src/zentelemetry/stats.cpp
@@ -631,7 +631,7 @@ EmitSnapshot(const HistogramSnapshot& Snapshot, CbObjectWriter& Cbo)
{
Cbo << "t_count" << Snapshot.Count << "t_avg" << Snapshot.Avg;
Cbo << "t_min" << Snapshot.Min << "t_max" << Snapshot.Max;
- Cbo << "t_p75" << Snapshot.P75 << "t_p95" << Snapshot.P95 << "t_p99" << Snapshot.P999;
+ Cbo << "t_p75" << Snapshot.P75 << "t_p95" << Snapshot.P95 << "t_p99" << Snapshot.P99 << "t_p999" << Snapshot.P999;
}
void
@@ -660,6 +660,8 @@ EmitSnapshot(std::string_view Tag, const RequestStatsSnapshot& Snapshot, CbObjec
#if ZEN_WITH_TESTS
+TEST_SUITE_BEGIN("telemetry.stats");
+
TEST_CASE("Core.Stats.Histogram")
{
Histogram Histo{258};
@@ -819,6 +821,8 @@ TEST_CASE("Meter")
# endif
}
+TEST_SUITE_END();
+
namespace zen {
void
diff --git a/src/zentelemetry/xmake.lua b/src/zentelemetry/xmake.lua
index 7739c0a08..cd9a18ec4 100644
--- a/src/zentelemetry/xmake.lua
+++ b/src/zentelemetry/xmake.lua
@@ -6,5 +6,5 @@ target('zentelemetry')
add_headerfiles("**.h")
add_files("**.cpp")
add_includedirs("include", {public=true})
- add_deps("zencore", "protozero", "spdlog")
+ add_deps("zencore", "protozero")
add_deps("robin-map")