From 5c139e2d8a260544bc5e730de0440edbab4b0f03 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Wed, 22 Oct 2025 17:57:29 +0200 Subject: add support for OTLP logging/tracing (#599) - adds `zentelemetry` project which houses new functionality for serializing logs and traces in OpenTelemetry Protocol format (OTLP) - moved existing stats functionality from `zencore` to `zentelemetry` - adds `TRefCounted` for vtable-less refcounting - adds `MemoryArena` class which allows for linear allocation of memory from chunks - adds `protozero` which is used to encode OTLP protobuf messages --- src/zenbase/include/zenbase/refcount.h | 51 +- src/zencore/include/zencore/iobuffer.h | 3 + src/zencore/include/zencore/memory/memoryarena.h | 104 +++ src/zencore/include/zencore/stats.h | 356 -------- src/zencore/memory/memoryarena.cpp | 126 +++ src/zencore/stats.cpp | 831 ------------------ src/zencore/trace.cpp | 1 + src/zencore/zencore.cpp | 2 - src/zenhttp/httpserver.cpp | 5 + src/zenhttp/include/zenhttp/httptest.h | 2 +- src/zenhttp/xmake.lua | 2 +- src/zenserver/diag/logging.cpp | 12 + src/zenserver/diag/otlphttp.cpp | 83 ++ src/zenserver/diag/otlphttp.h | 64 ++ src/zenserver/storage/buildstore/httpbuildstore.h | 2 +- src/zenserver/storage/cache/httpstructuredcache.h | 2 +- .../storage/projectstore/httpprojectstore.h | 2 +- src/zenserver/storage/upstream/upstreamcache.cpp | 2 +- src/zenserver/storage/upstream/upstreamcache.h | 2 +- src/zenserver/storage/workspaces/httpworkspaces.h | 2 +- src/zenserver/xmake.lua | 3 + src/zenstore/cidstore.cpp | 2 +- .../include/zenstore/cache/cachedisklayer.h | 2 +- .../include/zenstore/cache/structuredcachestore.h | 2 +- .../include/zenstore/cache/upstreamcacheclient.h | 2 +- src/zenstore/include/zenstore/cidstore.h | 2 +- src/zentelemetry-test/xmake.lua | 9 + src/zentelemetry-test/zentelemetry-test.cpp | 42 + .../include/zentelemetry/otlpencoder.h | 66 ++ src/zentelemetry/include/zentelemetry/otlptrace.h | 268 ++++++ src/zentelemetry/include/zentelemetry/stats.h | 356 ++++++++ .../include/zentelemetry/zentelemetry.h | 9 + src/zentelemetry/otellogprotozero.h | 287 +++++++ src/zentelemetry/otelmetricsprotozero.h | 953 +++++++++++++++++++++ src/zentelemetry/otelprotozero.h | 166 ++++ src/zentelemetry/oteltraceprotozero.h | 474 ++++++++++ src/zentelemetry/otlpencoder.cpp | 478 +++++++++++ src/zentelemetry/otlptrace.cpp | 398 +++++++++ src/zentelemetry/stats.cpp | 831 ++++++++++++++++++ src/zentelemetry/xmake.lua | 10 + src/zentelemetry/zentelemetry.cpp | 19 + 41 files changed, 4828 insertions(+), 1205 deletions(-) create mode 100644 src/zencore/include/zencore/memory/memoryarena.h delete mode 100644 src/zencore/include/zencore/stats.h create mode 100644 src/zencore/memory/memoryarena.cpp delete mode 100644 src/zencore/stats.cpp create mode 100644 src/zenserver/diag/otlphttp.cpp create mode 100644 src/zenserver/diag/otlphttp.h create mode 100644 src/zentelemetry-test/xmake.lua create mode 100644 src/zentelemetry-test/zentelemetry-test.cpp create mode 100644 src/zentelemetry/include/zentelemetry/otlpencoder.h create mode 100644 src/zentelemetry/include/zentelemetry/otlptrace.h create mode 100644 src/zentelemetry/include/zentelemetry/stats.h create mode 100644 src/zentelemetry/include/zentelemetry/zentelemetry.h create mode 100644 src/zentelemetry/otellogprotozero.h create mode 100644 src/zentelemetry/otelmetricsprotozero.h create mode 100644 src/zentelemetry/otelprotozero.h create mode 100644 src/zentelemetry/oteltraceprotozero.h create mode 100644 src/zentelemetry/otlpencoder.cpp create mode 100644 src/zentelemetry/otlptrace.cpp create mode 100644 src/zentelemetry/stats.cpp create mode 100644 src/zentelemetry/xmake.lua create mode 100644 src/zentelemetry/zentelemetry.cpp (limited to 'src') diff --git a/src/zenbase/include/zenbase/refcount.h b/src/zenbase/include/zenbase/refcount.h index 6ad49cba2..40ad7bca5 100644 --- a/src/zenbase/include/zenbase/refcount.h +++ b/src/zenbase/include/zenbase/refcount.h @@ -10,6 +10,9 @@ namespace zen { /** * Helper base class for reference counted objects using intrusive reference counting + * + * When the reference count reaches zero, the object deletes itself. This class relies + * on having a virtual destructor to ensure proper cleanup of derived classes. */ class RefCounted { @@ -17,7 +20,7 @@ public: RefCounted() = default; virtual ~RefCounted() = default; - inline uint32_t AddRef() const { return AtomicIncrement(const_cast(this)->m_RefCount); } + inline uint32_t AddRef() const noexcept { return AtomicIncrement(const_cast(this)->m_RefCount); } inline uint32_t Release() const { const uint32_t RefCount = AtomicDecrement(const_cast(this)->m_RefCount); @@ -42,6 +45,48 @@ private: uint32_t m_RefCount = 0; }; +/** + * Template helper base class for reference counted objects using intrusive reference counting. + * + * NOTE: Unlike RefCounted, this class deletes the derived type when the reference count reaches zero. + * It has no virtual destructor, so it's important that you either don't derive from it further, + * or ensure that the derived class has a virtual destructor. + */ + +template +class TRefCounted +{ +public: + TRefCounted() = default; + ~TRefCounted() = default; + + inline uint32_t AddRef() const noexcept { return AtomicIncrement(const_cast*>(this)->m_RefCount); } + inline uint32_t Release() const + { + const uint32_t RefCount = AtomicDecrement(const_cast*>(this)->m_RefCount); + if (RefCount == 0) + { + const_cast(static_cast(this))->DeleteThis(); + } + return RefCount; + } + + // Copying reference counted objects doesn't make a lot of sense generally, so let's prevent it + + TRefCounted(const TRefCounted&) = delete; + TRefCounted(TRefCounted&&) = delete; + TRefCounted& operator=(const TRefCounted&) = delete; + TRefCounted& operator=(TRefCounted&&) = delete; + +protected: + inline uint32_t RefCount() const { return m_RefCount; } + + void DeleteThis() const noexcept { delete static_cast(this); } + +private: + uint32_t m_RefCount = 0; +}; + /** * Smart pointer for classes derived from RefCounted */ @@ -127,7 +172,8 @@ class Ref { public: inline Ref() = default; - inline Ref(const Ref& Rhs) : m_Ref(Rhs.m_Ref) { m_Ref && m_Ref->AddRef(); } + inline Ref(Ref&& Rhs) noexcept : m_Ref(Rhs.m_Ref) { Rhs.m_Ref = nullptr; } + inline Ref(const Ref& Rhs) noexcept : m_Ref(Rhs.m_Ref) { m_Ref && m_Ref->AddRef(); } inline explicit Ref(T* Ptr) : m_Ref(Ptr) { m_Ref && m_Ref->AddRef(); } inline ~Ref() { m_Ref && m_Ref->Release(); } @@ -170,7 +216,6 @@ public: } return *this; } - inline Ref(Ref&& Rhs) noexcept : m_Ref(Rhs.m_Ref) { Rhs.m_Ref = nullptr; } private: T* m_Ref = nullptr; diff --git a/src/zencore/include/zencore/iobuffer.h b/src/zencore/include/zencore/iobuffer.h index 63779407e..1b2d382ee 100644 --- a/src/zencore/include/zencore/iobuffer.h +++ b/src/zencore/include/zencore/iobuffer.h @@ -32,6 +32,7 @@ enum class ZenContentType : uint8_t kPNG = 12, kIcon = 13, kXML = 14, + kProtobuf = 15, kCOUNT }; @@ -73,6 +74,8 @@ ToString(ZenContentType ContentType) return "icon"sv; case ZenContentType::kXML: return "xml"sv; + case ZenContentType::kProtobuf: + return "protobuf"sv; } } diff --git a/src/zencore/include/zencore/memory/memoryarena.h b/src/zencore/include/zencore/memory/memoryarena.h new file mode 100644 index 000000000..551415aac --- /dev/null +++ b/src/zencore/include/zencore/memory/memoryarena.h @@ -0,0 +1,104 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include +#include + +namespace zen { + +/** + * Chunked linear memory allocator + * + * Supports fast allocation of many small objects with minimal overhead. + * All allocations are freed when the arena is destroyed, therefore there + * is no support for individual deallocation. + * + * For convenience, we include operator new/delete overloads below which + * take a MemoryArena reference as a placement argument. + * + * This allocator is thread-safe. + */ +class MemoryArena +{ +public: + MemoryArena() = default; + ~MemoryArena(); + + void* AllocateAligned(size_t ByteCount, size_t align); + void* AllocateAlignedWithOffset(size_t ByteCount, size_t align, size_t offset); + void* Allocate(size_t Size); + const char* DuplicateString(std::string_view Str); + + MemoryArena(const MemoryArena&) = delete; + MemoryArena& operator=(const MemoryArena&) = delete; + +private: + static constexpr size_t ChunkSize = 16 * 1024; + // TODO: should just chain the memory blocks together and avoid this + // vector altogether, saving us a memory allocation + std::vector m_Chunks; + uint8_t* m_CurrentChunk = nullptr; + size_t m_CurrentOffset = 0; + RwLock m_Lock; +}; + +// Allocator suitable for use with EASTL + +struct ArenaAlloc +{ + ArenaAlloc(const char* name_opt = nullptr) = delete; + ArenaAlloc(MemoryArena& Arena) : m_Arena(&Arena) {} + + inline void* allocate(size_t bytes, int flags = 0) + { + ZEN_UNUSED(flags); + return m_Arena->Allocate(bytes); + } + + inline void* allocate(size_t bytes, size_t align, size_t offset, int flags = 0) + { + ZEN_UNUSED(flags); + if (offset == 0) + { + return m_Arena->AllocateAligned(bytes, align); + } + else + { + return m_Arena->AllocateAlignedWithOffset(bytes, align, offset); + } + } + + void deallocate(void* p, size_t n) { ZEN_UNUSED(p, n); } + +private: + MemoryArena* m_Arena = nullptr; +}; + +} // namespace zen + +inline void* +operator new(size_t Size, zen::MemoryArena& Arena) +{ + return Arena.Allocate(Size); +} + +inline void +operator delete(void* Ptr, zen::MemoryArena& Arena) +{ + // Arena will clean up all allocations when it's destroyed + ZEN_UNUSED(Ptr, Arena); +} + +inline void* +operator new[](size_t Size, zen::MemoryArena& Arena) +{ + return Arena.Allocate(Size); +} + +inline void +operator delete[](void* Ptr, zen::MemoryArena& Arena) +{ + // Arena will clean up all allocations when it's destroyed + ZEN_UNUSED(Ptr, Arena); +} diff --git a/src/zencore/include/zencore/stats.h b/src/zencore/include/zencore/stats.h deleted file mode 100644 index f232cf2f4..000000000 --- a/src/zencore/include/zencore/stats.h +++ /dev/null @@ -1,356 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#pragma once - -#include "zencore.h" - -#include - -#include -#include -#include - -namespace zen { -class CbObjectWriter; -} - -namespace zen::metrics { - -template -class Gauge -{ -public: - Gauge() : m_Value{0} {} - - T Value() const { return m_Value; } - void SetValue(T Value) { m_Value = Value; } - -private: - std::atomic m_Value; -}; - -/** Stats counter - * - * A counter is modified by adding or subtracting a value from a current value. - * This would typically be used to track number of requests in flight, number - * of active jobs etc - * - */ -class Counter -{ -public: - inline void SetValue(uint64_t Value) { m_count = Value; } - inline uint64_t Value() const { return m_count; } - - inline void Increment(int64_t AddValue) { m_count.fetch_add(AddValue); } - inline void Decrement(int64_t SubValue) { m_count.fetch_sub(SubValue); } - inline void Clear() { m_count.store(0, std::memory_order_release); } - -private: - std::atomic m_count{0}; -}; - -/** Exponential Weighted Moving Average - - This is very raw, to use as little state as possible. If we - want to use this more broadly in user code we should perhaps - add a more user-friendly wrapper - */ - -class RawEWMA -{ -public: - /// - /// Update EWMA with new measure - /// - /// Smoothing factor (between 0 and 1) - /// Elapsed time since last - /// Value - /// Whether this is the first update or not - void Tick(double Alpha, uint64_t Interval, uint64_t Count, bool IsInitialUpdate); - double Rate() const; - -private: - std::atomic m_Rate = 0; -}; - -/// -/// Tracks rate of events over time (i.e requests/sec), using -/// exponential moving averages -/// -class Meter -{ -public: - Meter(); - ~Meter(); - - inline uint64_t Count() const { return m_TotalCount; } - double Rate1(); // One-minute rate - double Rate5(); // Five-minute rate - double Rate15(); // Fifteen-minute rate - double MeanRate() const; // Mean rate since instantiation of this meter - void Mark(uint64_t Count = 1); // Register one or more events - -private: - std::atomic m_TotalCount{0}; // Accumulator counting number of marks since beginning - std::atomic m_PendingCount{0}; // Pending EWMA update accumulator - std::atomic m_StartTick{0}; // Time this was instantiated (for mean) - std::atomic m_LastTick{0}; // Timestamp of last EWMA tick - std::atomic m_Remainder{0}; // Tracks the "modulo" of tick time - bool m_IsFirstTick = true; - RawEWMA m_RateM1; - RawEWMA m_RateM5; - RawEWMA m_RateM15; - - void TickIfNecessary(); - void Tick(); -}; - -/** Moment-in-time snapshot of a distribution - */ -class SampleSnapshot -{ -public: - SampleSnapshot(std::vector&& Values); - ~SampleSnapshot(); - - uint32_t Size() const { return (uint32_t)m_Values.size(); } - double GetQuantileValue(double Quantile); - double GetMedian() { return GetQuantileValue(0.5); } - double Get75Percentile() { return GetQuantileValue(0.75); } - double Get95Percentile() { return GetQuantileValue(0.95); } - double Get98Percentile() { return GetQuantileValue(0.98); } - double Get99Percentile() { return GetQuantileValue(0.99); } - double Get999Percentile() { return GetQuantileValue(0.999); } - const std::vector& GetValues() const; - -private: - std::vector m_Values; -}; - -/** Randomly selects samples from a stream. Uses Vitter's - Algorithm R to produce a statistically representative sample. - - http://www.cs.umd.edu/~samir/498/vitter.pdf - Random Sampling with a Reservoir - */ - -class UniformSample -{ -public: - UniformSample(uint32_t ReservoirSize); - ~UniformSample(); - - void Clear(); - uint32_t Size() const; - void Update(int64_t Value); - SampleSnapshot Snapshot() const; - - template T> - void IterateValues(T Callback) const - { - for (const auto& Value : m_Values) - { - Callback(Value); - } - } - -private: - std::atomic m_SampleCounter{0}; - std::vector> m_Values; -}; - -/** Track (probabilistic) sample distribution along with min/max - */ -class Histogram -{ -public: - Histogram(int32_t SampleCount = 1028); - ~Histogram(); - - void Clear(); - void Update(int64_t Value); - int64_t Max() const; - int64_t Min() const; - double Mean() const; - uint64_t Count() const; - SampleSnapshot Snapshot() const { return m_Sample.Snapshot(); } - -private: - UniformSample m_Sample; - std::atomic m_Min{0}; - std::atomic m_Max{0}; - std::atomic m_Sum{0}; - std::atomic m_Count{0}; -}; - -/** Track timing and frequency of some operation - - Example usage would be to track frequency and duration of network - requests, or function calls. - - */ -class OperationTiming -{ -public: - OperationTiming(int32_t SampleCount = 514); - ~OperationTiming(); - - void Update(int64_t Duration); - int64_t Max() const; - int64_t Min() const; - double Mean() const; - uint64_t Count() const; - SampleSnapshot Snapshot() const { return m_Histogram.Snapshot(); } - - double Rate1() { return m_Meter.Rate1(); } - double Rate5() { return m_Meter.Rate5(); } - double Rate15() { return m_Meter.Rate15(); } - double MeanRate() const { return m_Meter.MeanRate(); } - - struct Scope - { - Scope(OperationTiming& Outer); - ~Scope(); - - void Stop(); - void Cancel(); - - private: - OperationTiming& m_Outer; - uint64_t m_StartTick; - }; - -private: - Meter m_Meter; - Histogram m_Histogram; -}; - -struct MeterSnapshot -{ - uint64_t Count; - double MeanRate; - double Rate1; - double Rate5; - double Rate15; -}; - -struct HistogramSnapshot -{ - double Count; - double Avg; - double Min; - double Max; - double P75; - double P95; - double P99; - double P999; -}; - -struct StatsSnapshot -{ - MeterSnapshot Meter; - HistogramSnapshot Histogram; -}; - -struct RequestStatsSnapshot -{ - StatsSnapshot Requests; - StatsSnapshot Bytes; -}; - -/** Metrics for network requests - - Aggregates tracking of duration, payload sizes into a single - class - - */ -class RequestStats -{ -public: - RequestStats(int32_t SampleCount = 514); - ~RequestStats(); - - void Update(int64_t Duration, int64_t Bytes); - uint64_t Count() const; - - // Timing - - int64_t MaxDuration() const { return m_BytesHistogram.Max(); } - int64_t MinDuration() const { return m_BytesHistogram.Min(); } - double MeanDuration() const { return m_BytesHistogram.Mean(); } - SampleSnapshot DurationSnapshot() const { return m_RequestTimeHistogram.Snapshot(); } - double Rate1() { return m_RequestMeter.Rate1(); } - double Rate5() { return m_RequestMeter.Rate5(); } - double Rate15() { return m_RequestMeter.Rate15(); } - double MeanRate() const { return m_RequestMeter.MeanRate(); } - - // Bytes - - int64_t MaxBytes() const { return m_BytesHistogram.Max(); } - int64_t MinBytes() const { return m_BytesHistogram.Min(); } - double MeanBytes() const { return m_BytesHistogram.Mean(); } - SampleSnapshot BytesSnapshot() const { return m_BytesHistogram.Snapshot(); } - double ByteRate1() { return m_BytesMeter.Rate1(); } - double ByteRate5() { return m_BytesMeter.Rate5(); } - double ByteRate15() { return m_BytesMeter.Rate15(); } - double ByteMeanRate() const { return m_BytesMeter.MeanRate(); } - - struct Scope - { - Scope(RequestStats& Outer, int64_t Bytes); - ~Scope(); - - void SetBytes(int64_t Bytes) { m_Bytes = Bytes; } - void Stop(); - void Cancel(); - - private: - RequestStats& m_Outer; - uint64_t m_StartTick; - int64_t m_Bytes; - }; - - void EmitSnapshot(std::string_view Tag, CbObjectWriter& Cbo); - - RequestStatsSnapshot Snapshot(); - -private: - static StatsSnapshot GetSnapshot(Meter& M, Histogram& H, double ConversionFactor) - { - SampleSnapshot Snap = H.Snapshot(); - return StatsSnapshot{ - .Meter = {.Count = M.Count(), .MeanRate = M.MeanRate(), .Rate1 = M.Rate1(), .Rate5 = M.Rate5(), .Rate15 = M.Rate15()}, - .Histogram = {.Count = H.Count() * ConversionFactor, - .Avg = H.Mean() * ConversionFactor, - .Min = H.Min() * ConversionFactor, - .Max = H.Max() * ConversionFactor, - .P75 = Snap.Get75Percentile() * ConversionFactor, - .P95 = Snap.Get95Percentile() * ConversionFactor, - .P99 = Snap.Get99Percentile() * ConversionFactor, - .P999 = Snap.Get999Percentile() * ConversionFactor}}; - } - - Meter m_RequestMeter; - Meter m_BytesMeter; - Histogram m_RequestTimeHistogram; - Histogram m_BytesHistogram; -}; - -void EmitSnapshot(std::string_view Tag, OperationTiming& Stat, CbObjectWriter& Cbo); -void EmitSnapshot(std::string_view Tag, const Histogram& Stat, CbObjectWriter& Cbo, double ConversionFactor); -void EmitSnapshot(std::string_view Tag, Meter& Stat, CbObjectWriter& Cbo); - -void EmitSnapshot(const Histogram& Stat, CbObjectWriter& Cbo, double ConversionFactor); - -void EmitSnapshot(std::string_view Tag, const MeterSnapshot& Snapshot, CbObjectWriter& Cbo); -void EmitSnapshot(std::string_view Tag, const HistogramSnapshot& Snapshot, CbObjectWriter& Cbo); -void EmitSnapshot(std::string_view Tag, const StatsSnapshot& Snapshot, CbObjectWriter& Cbo); -void EmitSnapshot(std::string_view Tag, const RequestStatsSnapshot& Snapshot, CbObjectWriter& Cbo); - -} // namespace zen::metrics - -namespace zen { - -extern void stats_forcelink(); - -} // namespace zen diff --git a/src/zencore/memory/memoryarena.cpp b/src/zencore/memory/memoryarena.cpp new file mode 100644 index 000000000..9c907a66d --- /dev/null +++ b/src/zencore/memory/memoryarena.cpp @@ -0,0 +1,126 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include + +namespace zen { + +MemoryArena::~MemoryArena() +{ + for (auto Chunk : m_Chunks) + delete[] Chunk; +} + +void* +MemoryArena::AllocateAligned(size_t ByteCount, size_t align) +{ + if (ByteCount == 0) + { + return nullptr; + } + + void* Ptr = nullptr; + + m_Lock.WithExclusiveLock([&] { + size_t AlignedOffset = (m_CurrentOffset + (align - 1)) & ~(align - 1); + + if (m_CurrentChunk == nullptr || AlignedOffset + ByteCount > ChunkSize) + { + uint8_t* NewChunk = new uint8_t[ChunkSize]; + if (!NewChunk) + { + return; + } + + m_Chunks.push_back(NewChunk); + m_CurrentChunk = NewChunk; + AlignedOffset = 0; + } + + Ptr = m_CurrentChunk + AlignedOffset; + m_CurrentOffset = AlignedOffset + ByteCount; + }); + + return Ptr; +} + +void* +MemoryArena::AllocateAlignedWithOffset(size_t ByteCount, size_t align, size_t offset) +{ + if (ByteCount == 0) + { + return nullptr; + } + + void* Ptr = nullptr; + + m_Lock.WithExclusiveLock([&] { + size_t AlignedOffset = (m_CurrentOffset + (align - 1) + offset) & ~(align - 1); + + if (m_CurrentChunk == nullptr || AlignedOffset + ByteCount > ChunkSize) + { + uint8_t* NewChunk = new uint8_t[ChunkSize]; + if (!NewChunk) + { + return; + } + + m_Chunks.push_back(NewChunk); + m_CurrentChunk = NewChunk; + AlignedOffset = offset; + } + + Ptr = m_CurrentChunk + AlignedOffset; + m_CurrentOffset = AlignedOffset + ByteCount; + }); + + return Ptr; +} + +void* +MemoryArena::Allocate(size_t Size) +{ + if (Size == 0) + { + return nullptr; + } + + void* Ptr = nullptr; + constexpr size_t Alignment = alignof(std::max_align_t); + + m_Lock.WithExclusiveLock([&] { + size_t AlignedOffset = (m_CurrentOffset + Alignment - 1) & ~(Alignment - 1); + + if (m_CurrentChunk == nullptr || AlignedOffset + Size > ChunkSize) + { + uint8_t* NewChunk = new uint8_t[ChunkSize]; + if (!NewChunk) + { + return; + } + + m_Chunks.push_back(NewChunk); + m_CurrentChunk = NewChunk; + AlignedOffset = 0; + } + + Ptr = m_CurrentChunk + AlignedOffset; + m_CurrentOffset = AlignedOffset + Size; + }); + + return Ptr; +} + +const char* +MemoryArena::DuplicateString(std::string_view Str) +{ + const size_t Len = Str.size(); + char* NewStr = static_cast(Allocate(Len + 1)); + if (NewStr) + { + memcpy(NewStr, Str.data(), Len); + NewStr[Len] = '\0'; + } + return NewStr; +} + +} // namespace zen diff --git a/src/zencore/stats.cpp b/src/zencore/stats.cpp deleted file mode 100644 index 8a424c5ad..000000000 --- a/src/zencore/stats.cpp +++ /dev/null @@ -1,831 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#include "zencore/stats.h" - -#include -#include -#include -#include -#include -#include - -#include -#include - -#if ZEN_WITH_TESTS -# include -#endif - -// -// Derived from https://github.com/dln/medida/blob/master/src/medida/stats/ewma.cc -// - -namespace zen::metrics { - -static constinit int kTickIntervalInSeconds = 5; -static constinit double kSecondsPerMinute = 60.0; -static constinit int kOneMinute = 1; -static constinit int kFiveMinutes = 5; -static constinit int kFifteenMinutes = 15; - -static const double kM1_ALPHA = 1.0 - std::exp(-kTickIntervalInSeconds / kSecondsPerMinute / kOneMinute); -static const double kM5_ALPHA = 1.0 - std::exp(-kTickIntervalInSeconds / kSecondsPerMinute / kFiveMinutes); -static const double kM15_ALPHA = 1.0 - std::exp(-kTickIntervalInSeconds / kSecondsPerMinute / kFifteenMinutes); - -static const uint64_t CountPerTick = GetHifreqTimerFrequencySafe() * kTickIntervalInSeconds; -static const uint64_t CountPerSecond = GetHifreqTimerFrequencySafe(); - -////////////////////////////////////////////////////////////////////////// - -void -RawEWMA::Tick(double Alpha, uint64_t Interval, uint64_t Count, bool IsInitialUpdate) -{ - const double InstantRate = double(Count) / Interval; - - if (IsInitialUpdate) - { - m_Rate.store(InstantRate, std::memory_order_release); - } - else - { - double Delta = Alpha * (InstantRate - m_Rate); - -#if defined(__cpp_lib_atomic_float) - m_Rate.fetch_add(Delta); -#else - double Value = m_Rate.load(std::memory_order_acquire); - double Next; - do - { - Next = Value + Delta; - } while (!m_Rate.compare_exchange_weak(Value, Next, std::memory_order_relaxed)); -#endif - } -} - -double -RawEWMA::Rate() const -{ - return m_Rate.load(std::memory_order_relaxed) * CountPerSecond; -} - -////////////////////////////////////////////////////////////////////////// - -Meter::Meter() : m_StartTick{GetHifreqTimerValue()}, m_LastTick(m_StartTick.load()) -{ -} - -Meter::~Meter() -{ -} - -void -Meter::TickIfNecessary() -{ - uint64_t OldTick = m_LastTick.load(); - const uint64_t NewTick = GetHifreqTimerValue(); - const uint64_t Age = NewTick - OldTick; - - if (Age > CountPerTick) - { - // Ensure only one thread at a time updates the time. This - // works because our tick interval should be sufficiently - // long to ensure two threads don't end up inside this block - - if (m_LastTick.compare_exchange_strong(OldTick, NewTick)) - { - m_Remainder.fetch_add(Age); - - do - { - int64_t Remain = m_Remainder.load(std::memory_order_relaxed); - - if (Remain < 0) - { - return; - } - - if (m_Remainder.compare_exchange_strong(Remain, Remain - CountPerTick)) - { - Tick(); - } - } while (true); - } - } -} - -void -Meter::Tick() -{ - const uint64_t PendingCount = m_PendingCount.exchange(0); - const bool IsFirstTick = m_IsFirstTick; - - if (IsFirstTick) - { - m_IsFirstTick = false; - } - - m_RateM1.Tick(kM1_ALPHA, CountPerTick, PendingCount, IsFirstTick); - m_RateM5.Tick(kM5_ALPHA, CountPerTick, PendingCount, IsFirstTick); - m_RateM15.Tick(kM15_ALPHA, CountPerTick, PendingCount, IsFirstTick); -} - -double -Meter::Rate1() -{ - TickIfNecessary(); - - return m_RateM1.Rate(); -} - -double -Meter::Rate5() -{ - TickIfNecessary(); - - return m_RateM5.Rate(); -} - -double -Meter::Rate15() -{ - TickIfNecessary(); - - return m_RateM15.Rate(); -} - -double -Meter::MeanRate() const -{ - const uint64_t Count = m_TotalCount.load(std::memory_order_relaxed); - - if (Count == 0) - { - return 0.0; - } - - const uint64_t Elapsed = GetHifreqTimerValue() - m_StartTick; - - return (double(Count) * GetHifreqTimerFrequency()) / Elapsed; -} - -void -Meter::Mark(uint64_t Count) -{ - TickIfNecessary(); - - m_TotalCount.fetch_add(Count); - m_PendingCount.fetch_add(Count); -} - -////////////////////////////////////////////////////////////////////////// - -uint64_t -rol64(uint64_t x, int k) -{ - return (x << k) | (x >> (64 - k)); -} - -struct xoshiro256ss_state -{ - uint64_t s[4]; -}; - -uint64_t -xoshiro256ss(struct xoshiro256ss_state* state) -{ - uint64_t* s = state->s; - uint64_t const result = rol64(s[1] * 5, 7) * 9; - uint64_t const t = s[1] << 17; - - s[2] ^= s[0]; - s[3] ^= s[1]; - s[1] ^= s[2]; - s[0] ^= s[3]; - - s[2] ^= t; - s[3] = rol64(s[3], 45); - - return result; -} - -class xoshiro256 -{ -public: - uint64_t operator()() { return xoshiro256ss(&m_State); } - static constexpr uint64_t min() { return 0; } - static constexpr uint64_t max() { return ~(0ull); } - -private: - xoshiro256ss_state m_State{0xf0fefaf9, 0xbeeb5238, 0x48472397, 0x58858558}; -}; - -thread_local xoshiro256 ThreadLocalRng; - -////////////////////////////////////////////////////////////////////////// - -UniformSample::UniformSample(uint32_t ReservoirSize) -{ - ZEN_MEMSCOPE(ELLMTag::Metrics); - m_Values = std::vector>(ReservoirSize); -} - -UniformSample::~UniformSample() -{ -} - -void -UniformSample::Clear() -{ - for (auto& Value : m_Values) - { - Value.store(0); - } - m_SampleCounter = 0; -} - -uint32_t -UniformSample::Size() const -{ - return gsl::narrow_cast(Min(m_SampleCounter.load(), m_Values.size())); -} - -void -UniformSample::Update(int64_t Value) -{ - const uint64_t Count = m_SampleCounter++; - const uint64_t Size = m_Values.size(); - - if (Count < Size) - { - m_Values[Count] = Value; - } - else - { - // Randomly choose an old entry to potentially replace (the probability - // of replacing an entry diminishes with time) - - const uint64_t SampleIndex = ThreadLocalRng() % Count; - - if (SampleIndex < Size) - { - m_Values[SampleIndex].store(Value, std::memory_order_release); - } - } -} - -SampleSnapshot -UniformSample::Snapshot() const -{ - ZEN_MEMSCOPE(ELLMTag::Metrics); - - uint64_t ValuesSize = Size(); - std::vector Values(ValuesSize); - - for (int i = 0, n = int(ValuesSize); i < n; ++i) - { - Values[i] = double(m_Values[i]); - } - - return SampleSnapshot(std::move(Values)); -} - -////////////////////////////////////////////////////////////////////////// - -Histogram::Histogram(int32_t SampleCount) : m_Sample(SampleCount) -{ -} - -Histogram::~Histogram() -{ -} - -void -Histogram::Clear() -{ - m_Min = m_Max = m_Sum = m_Count = 0; - m_Sample.Clear(); -} - -void -Histogram::Update(int64_t Value) -{ - m_Sample.Update(Value); - - if (m_Count == 0) - { - m_Min = m_Max = Value; - } - else - { - int64_t CurrentMax = m_Max.load(std::memory_order_relaxed); - - while ((CurrentMax < Value) && !m_Max.compare_exchange_weak(CurrentMax, Value)) - { - } - - int64_t CurrentMin = m_Min.load(std::memory_order_relaxed); - - while ((CurrentMin > Value) && !m_Min.compare_exchange_weak(CurrentMin, Value)) - { - } - } - - m_Sum += Value; - ++m_Count; -} - -int64_t -Histogram::Max() const -{ - return m_Max.load(std::memory_order_relaxed); -} - -int64_t -Histogram::Min() const -{ - return m_Min.load(std::memory_order_relaxed); -} - -double -Histogram::Mean() const -{ - if (m_Count) - { - return double(m_Sum.load(std::memory_order_relaxed)) / m_Count; - } - else - { - return 0.0; - } -} - -uint64_t -Histogram::Count() const -{ - return m_Count.load(std::memory_order_relaxed); -} - -////////////////////////////////////////////////////////////////////////// - -SampleSnapshot::SampleSnapshot(std::vector&& Values) : m_Values(std::move(Values)) -{ - std::sort(begin(m_Values), end(m_Values)); -} - -SampleSnapshot::~SampleSnapshot() -{ -} - -double -SampleSnapshot::GetQuantileValue(double Quantile) -{ - ZEN_ASSERT((Quantile >= 0.0) && (Quantile <= 1.0)); - - if (m_Values.empty()) - { - return 0.0; - } - - const double Pos = Quantile * (m_Values.size() + 1); - - if (Pos < 1) - { - return m_Values.front(); - } - - if (Pos >= m_Values.size()) - { - return m_Values.back(); - } - - const int32_t Index = (int32_t)Pos; - const double Lower = m_Values[Index - 1]; - const double Upper = m_Values[Index]; - - // Lerp - return Lower + (Pos - std::floor(Pos)) * (Upper - Lower); -} - -const std::vector& -SampleSnapshot::GetValues() const -{ - return m_Values; -} - -////////////////////////////////////////////////////////////////////////// - -OperationTiming::OperationTiming(int32_t SampleCount) : m_Histogram{SampleCount} -{ -} - -OperationTiming::~OperationTiming() -{ -} - -void -OperationTiming::Update(int64_t Duration) -{ - m_Meter.Mark(1); - m_Histogram.Update(Duration); -} - -int64_t -OperationTiming::Max() const -{ - return m_Histogram.Max(); -} - -int64_t -OperationTiming::Min() const -{ - return m_Histogram.Min(); -} - -double -OperationTiming::Mean() const -{ - return m_Histogram.Mean(); -} - -uint64_t -OperationTiming::Count() const -{ - return m_Meter.Count(); -} - -OperationTiming::Scope::Scope(OperationTiming& Outer) : m_Outer(Outer), m_StartTick(GetHifreqTimerValue()) -{ -} - -OperationTiming::Scope::~Scope() -{ - Stop(); -} - -void -OperationTiming::Scope::Stop() -{ - if (m_StartTick != 0) - { - m_Outer.Update(GetHifreqTimerValue() - m_StartTick); - m_StartTick = 0; - } -} - -void -OperationTiming::Scope::Cancel() -{ - m_StartTick = 0; -} - -////////////////////////////////////////////////////////////////////////// - -RequestStats::RequestStats(int32_t SampleCount) : m_RequestTimeHistogram{SampleCount}, m_BytesHistogram{SampleCount} -{ -} - -RequestStats::~RequestStats() -{ -} - -void -RequestStats::Update(int64_t Duration, int64_t Bytes) -{ - m_RequestMeter.Mark(1); - m_RequestTimeHistogram.Update(Duration); - - m_BytesMeter.Mark(Bytes); - m_BytesHistogram.Update(Bytes); -} - -uint64_t -RequestStats::Count() const -{ - return m_RequestMeter.Count(); -} - -RequestStats::Scope::Scope(RequestStats& Outer, int64_t Bytes) : m_Outer(Outer), m_StartTick(GetHifreqTimerValue()), m_Bytes(Bytes) -{ -} - -RequestStats::Scope::~Scope() -{ - Stop(); -} - -void -RequestStats::Scope::Stop() -{ - if (m_StartTick != 0) - { - m_Outer.Update(GetHifreqTimerValue() - m_StartTick, m_Bytes); - m_StartTick = 0; - } -} - -void -RequestStats::Scope::Cancel() -{ - m_StartTick = 0; -} - -////////////////////////////////////////////////////////////////////////// - -void -EmitSnapshot(Meter& Stat, CbObjectWriter& Cbo) -{ - Cbo << "count" << Stat.Count(); - Cbo << "rate_mean" << Stat.MeanRate(); - Cbo << "rate_1" << Stat.Rate1() << "rate_5" << Stat.Rate5() << "rate_15" << Stat.Rate15(); -} - -void -RequestStats::EmitSnapshot(std::string_view Tag, CbObjectWriter& Cbo) -{ - Cbo.BeginObject(Tag); - - Cbo.BeginObject("requests"); - metrics::EmitSnapshot(m_RequestMeter, Cbo); - metrics::EmitSnapshot(m_RequestTimeHistogram, Cbo, GetHifreqTimerToSeconds()); - Cbo.EndObject(); - - Cbo.BeginObject("bytes"); - metrics::EmitSnapshot(m_BytesMeter, Cbo); - metrics::EmitSnapshot(m_BytesHistogram, Cbo, 1.0); - Cbo.EndObject(); - - Cbo.EndObject(); -} - -RequestStatsSnapshot -RequestStats::Snapshot() -{ - const double ToSeconds = GetHifreqTimerToSeconds(); - - return RequestStatsSnapshot{.Requests = GetSnapshot(m_RequestMeter, m_RequestTimeHistogram, ToSeconds), - .Bytes = GetSnapshot(m_BytesMeter, m_BytesHistogram, 1.0)}; -} - -void -EmitSnapshot(std::string_view Tag, OperationTiming& Stat, CbObjectWriter& Cbo) -{ - Cbo.BeginObject(Tag); - - SampleSnapshot Snap = Stat.Snapshot(); - - Cbo << "count" << Stat.Count(); - Cbo << "rate_mean" << Stat.MeanRate(); - Cbo << "rate_1" << Stat.Rate1() << "rate_5" << Stat.Rate5() << "rate_15" << Stat.Rate15(); - - const double ToSeconds = GetHifreqTimerToSeconds(); - - Cbo << "t_avg" << Stat.Mean() * ToSeconds; - Cbo << "t_min" << Stat.Min() * ToSeconds << "t_max" << Stat.Max() * ToSeconds; - Cbo << "t_p75" << Snap.Get75Percentile() * ToSeconds << "t_p95" << Snap.Get95Percentile() * ToSeconds << "t_p99" - << Snap.Get99Percentile() * ToSeconds << "t_p999" << Snap.Get999Percentile() * ToSeconds; - - Cbo.EndObject(); -} - -void -EmitSnapshot(std::string_view Tag, const Histogram& Stat, CbObjectWriter& Cbo, double ConversionFactor) -{ - Cbo.BeginObject(Tag); - EmitSnapshot(Stat, Cbo, ConversionFactor); - Cbo.EndObject(); -} - -void -EmitSnapshot(const Histogram& Stat, CbObjectWriter& Cbo, double ConversionFactor) -{ - SampleSnapshot Snap = Stat.Snapshot(); - - Cbo << "count" << Stat.Count() * ConversionFactor << "avg" << Stat.Mean() * ConversionFactor; - Cbo << "min" << Stat.Min() * ConversionFactor << "max" << Stat.Max() * ConversionFactor; - Cbo << "p75" << Snap.Get75Percentile() * ConversionFactor << "p95" << Snap.Get95Percentile() * ConversionFactor << "p99" - << Snap.Get99Percentile() * ConversionFactor << "p999" << Snap.Get999Percentile() * ConversionFactor; -} - -void -EmitSnapshot(std::string_view Tag, Meter& Stat, CbObjectWriter& Cbo) -{ - Cbo.BeginObject(Tag); - - Cbo << "count" << Stat.Count() << "rate_mean" << Stat.MeanRate(); - Cbo << "rate_1" << Stat.Rate1() << "rate_5" << Stat.Rate5() << "rate_15" << Stat.Rate15(); - - Cbo.EndObject(); -} - -void -EmitSnapshot(const MeterSnapshot& Snapshot, CbObjectWriter& Cbo) -{ - Cbo << "count" << Snapshot.Count; - Cbo << "rate_mean" << Snapshot.MeanRate; - Cbo << "rate_1" << Snapshot.Rate1 << "rate_5" << Snapshot.Rate5 << "rate_15" << Snapshot.Rate15; -} - -void -EmitSnapshot(const HistogramSnapshot& Snapshot, CbObjectWriter& Cbo) -{ - Cbo << "t_count" << Snapshot.Count << "t_avg" << Snapshot.Avg; - Cbo << "t_min" << Snapshot.Min << "t_max" << Snapshot.Max; - Cbo << "t_p75" << Snapshot.P75 << "t_p95" << Snapshot.P95 << "t_p99" << Snapshot.P999; -} - -void -EmitSnapshot(std::string_view Tag, const StatsSnapshot& Snapshot, CbObjectWriter& Cbo) -{ - Cbo.BeginObject(Tag); - EmitSnapshot(Snapshot.Meter, Cbo); - EmitSnapshot(Snapshot.Histogram, Cbo); - Cbo.EndObject(); -} - -void -EmitSnapshot(std::string_view Tag, const RequestStatsSnapshot& Snapshot, CbObjectWriter& Cbo) -{ - if (Snapshot.Requests.Meter.Count == 0) - { - return; - } - Cbo.BeginObject(Tag); - EmitSnapshot("request", Snapshot.Requests, Cbo); - EmitSnapshot("bytes", Snapshot.Bytes, Cbo); - Cbo.EndObject(); -} - -////////////////////////////////////////////////////////////////////////// - -#if ZEN_WITH_TESTS - -TEST_CASE("Core.Stats.Histogram") -{ - Histogram Histo{258}; - - SampleSnapshot Snap1 = Histo.Snapshot(); - CHECK_EQ(Snap1.Size(), 0); - CHECK_EQ(Snap1.GetMedian(), 0); - - Histo.Update(1); - CHECK_EQ(Histo.Min(), 1); - CHECK_EQ(Histo.Max(), 1); - - SampleSnapshot Snap2 = Histo.Snapshot(); - CHECK_EQ(Snap2.Size(), 1); - - Histo.Update(2); - CHECK_EQ(Histo.Min(), 1); - CHECK_EQ(Histo.Max(), 2); - - SampleSnapshot Snap3 = Histo.Snapshot(); - CHECK_EQ(Snap3.Size(), 2); - - Histo.Update(-2); - CHECK_EQ(Histo.Min(), -2); - CHECK_EQ(Histo.Max(), 2); - CHECK_EQ(Histo.Mean(), 1 / 3.0); - - SampleSnapshot Snap4 = Histo.Snapshot(); - CHECK_EQ(Snap4.Size(), 3); - CHECK_EQ(Snap4.GetMedian(), 1); - CHECK_EQ(Snap4.Get999Percentile(), 2); - CHECK_EQ(Snap4.GetQuantileValue(0), -2); -} - -TEST_CASE("Core.Stats.UniformSample") -{ - UniformSample Sample1{100}; - - for (int i = 0; i < 100; ++i) - { - for (int j = 1; j <= 100; ++j) - { - Sample1.Update(j); - } - } - - int64_t Sum = 0; - int64_t Count = 0; - - Sample1.IterateValues([&](int64_t Value) { - ++Count; - Sum += Value; - }); - - double Average = double(Sum) / Count; - - CHECK(fabs(Average - 50) < 10); // What's the right test here? The result could vary massively and still be technically correct -} - -TEST_CASE("Core.Stats.EWMA") -{ - SUBCASE("Simple_1") - { - RawEWMA Ewma1; - Ewma1.Tick(kM1_ALPHA, CountPerSecond, 5, true); - - CHECK(fabs(Ewma1.Rate() - 5) < 0.1); - - for (int i = 0; i < 60; ++i) - { - Ewma1.Tick(kM1_ALPHA, CountPerSecond, 10, false); - } - - CHECK(fabs(Ewma1.Rate() - 10) < 0.1); - - for (int i = 0; i < 60; ++i) - { - Ewma1.Tick(kM1_ALPHA, CountPerSecond, 20, false); - } - - CHECK(fabs(Ewma1.Rate() - 20) < 0.1); - } - - SUBCASE("Simple_10") - { - RawEWMA Ewma1; - RawEWMA Ewma5; - RawEWMA Ewma15; - Ewma1.Tick(kM1_ALPHA, CountPerSecond, 5, true); - Ewma5.Tick(kM5_ALPHA, CountPerSecond, 5, true); - Ewma15.Tick(kM15_ALPHA, CountPerSecond, 5, true); - - CHECK(fabs(Ewma1.Rate() - 5) < 0.1); - CHECK(fabs(Ewma5.Rate() - 5) < 0.1); - CHECK(fabs(Ewma15.Rate() - 5) < 0.1); - - auto Tick1 = [&Ewma1](auto Value) { Ewma1.Tick(kM1_ALPHA, CountPerSecond, Value, false); }; - auto Tick5 = [&Ewma5](auto Value) { Ewma5.Tick(kM5_ALPHA, CountPerSecond, Value, false); }; - auto Tick15 = [&Ewma15](auto Value) { Ewma15.Tick(kM15_ALPHA, CountPerSecond, Value, false); }; - - for (int i = 0; i < 60; ++i) - { - Tick1(10); - Tick5(10); - Tick15(10); - } - - CHECK(fabs(Ewma1.Rate() - 10) < 0.1); - - for (int i = 0; i < 5 * 60; ++i) - { - Tick1(20); - Tick5(20); - Tick15(20); - } - - CHECK(fabs(Ewma1.Rate() - 20) < 0.1); - CHECK(fabs(Ewma5.Rate() - 20) < 0.1); - - for (int i = 0; i < 16 * 60; ++i) - { - Tick1(100); - Tick5(100); - Tick15(100); - } - - CHECK(fabs(Ewma1.Rate() - 100) < 0.1); - CHECK(fabs(Ewma5.Rate() - 100) < 0.1); - CHECK(fabs(Ewma15.Rate() - 100) < 0.5); - } -} - -# if 0 // This is not really a unit test, but mildly useful to exercise some code -TEST_CASE("Meter") -{ - Meter Meter1; - Meter1.Mark(1); - Sleep(1000); - Meter1.Mark(1); - Sleep(1000); - Meter1.Mark(1); - Sleep(1000); - Meter1.Mark(1); - Sleep(1000); - Meter1.Mark(1); - Sleep(1000); - Meter1.Mark(1); - Sleep(1000); - Meter1.Mark(1); - Sleep(1000); - Meter1.Mark(1); - Sleep(1000); - Meter1.Mark(1); - Sleep(1000); - [[maybe_unused]] double Rate = Meter1.MeanRate(); -} -# endif -} - -namespace zen { - -void -stats_forcelink() -{ -} - -#endif - -} // namespace zen::metrics diff --git a/src/zencore/trace.cpp b/src/zencore/trace.cpp index fe8fb9a5d..87035554f 100644 --- a/src/zencore/trace.cpp +++ b/src/zencore/trace.cpp @@ -9,6 +9,7 @@ # include # define TRACE_IMPLEMENT 1 +# undef _WINSOCK_DEPRECATED_NO_WARNINGS # include # include diff --git a/src/zencore/zencore.cpp b/src/zencore/zencore.cpp index b78991918..4ff79edc7 100644 --- a/src/zencore/zencore.cpp +++ b/src/zencore/zencore.cpp @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -267,7 +266,6 @@ zencore_forcelinktests() zen::process_forcelink(); zen::refcount_forcelink(); zen::sha1_forcelink(); - zen::stats_forcelink(); zen::stream_forcelink(); zen::string_forcelink(); zen::thread_forcelink(); diff --git a/src/zenhttp/httpserver.cpp b/src/zenhttp/httpserver.cpp index 2c063d646..f48c22367 100644 --- a/src/zenhttp/httpserver.cpp +++ b/src/zenhttp/httpserver.cpp @@ -86,6 +86,9 @@ MapContentTypeToString(HttpContentType ContentType) case HttpContentType::kXML: return "application/xml"sv; + + case HttpContentType::kProtobuf: + return "application/x-protobuf"sv; } } @@ -119,6 +122,7 @@ static constinit uint32_t HashImagePng = HashStringDjb2("image/png"sv); static constinit uint32_t HashIcon = HashStringDjb2("ico"sv); static constinit uint32_t HashImageIcon = HashStringDjb2("image/x-icon"sv); static constinit uint32_t HashXml = HashStringDjb2("application/xml"sv); +static constinit uint32_t HashProtobuf = HashStringDjb2("application/x-protobuf"sv); std::once_flag InitContentTypeLookup; @@ -153,6 +157,7 @@ struct HashedTypeEntry {HashIcon, HttpContentType::kIcon}, {HashImageIcon, HttpContentType::kIcon}, {HashXml, HttpContentType::kXML}, + {HashProtobuf, HttpContentType::kProtobuf}, // clang-format on }; diff --git a/src/zenhttp/include/zenhttp/httptest.h b/src/zenhttp/include/zenhttp/httptest.h index afe71fbce..608462809 100644 --- a/src/zenhttp/include/zenhttp/httptest.h +++ b/src/zenhttp/include/zenhttp/httptest.h @@ -3,8 +3,8 @@ #pragma once #include -#include #include +#include #include #include diff --git a/src/zenhttp/xmake.lua b/src/zenhttp/xmake.lua index b6ffbe467..af4064012 100644 --- a/src/zenhttp/xmake.lua +++ b/src/zenhttp/xmake.lua @@ -7,7 +7,7 @@ target('zenhttp') add_files("**.cpp") add_files("servers/httpsys.cpp", {unity_ignored=true}) add_includedirs("include", {public=true}) - add_deps("zencore", "transport-sdk") + add_deps("zencore", "zentelemetry", "transport-sdk") add_packages( "vcpkg::asio", "vcpkg::cpr", diff --git a/src/zenserver/diag/logging.cpp b/src/zenserver/diag/logging.cpp index 50cf62274..90af79651 100644 --- a/src/zenserver/diag/logging.cpp +++ b/src/zenserver/diag/logging.cpp @@ -12,6 +12,8 @@ #include #include +#include "otlphttp.h" + ZEN_THIRD_PARTY_INCLUDES_START #include ZEN_THIRD_PARTY_INCLUDES_END @@ -73,6 +75,16 @@ InitializeServerLogging(const ZenServerConfig& InOptions) spdlog::apply_logger_env_levels(ZenClientLogger); spdlog::register_logger(ZenClientLogger); + // + +#if ZEN_WITH_OTEL + if (false) + { + auto OtelSink = std::make_shared("http://signoz.localdomain:4318"); + zen::logging::Default().SpdLogger->sinks().push_back(std::move(OtelSink)); + } +#endif + FinishInitializeLogging(LogOptions); const zen::Oid ServerSessionId = zen::GetSessionId(); diff --git a/src/zenserver/diag/otlphttp.cpp b/src/zenserver/diag/otlphttp.cpp new file mode 100644 index 000000000..d62ccccb6 --- /dev/null +++ b/src/zenserver/diag/otlphttp.cpp @@ -0,0 +1,83 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "otlphttp.h" + +#include +#include +#include +#include +#include +#include +#include + +#if ZEN_WITH_OTEL + +namespace zen::logging { + +////////////////////////////////////////////////////////////////////////// + +OtelHttpProtobufSink::OtelHttpProtobufSink(const std::string_view& Uri) : m_OtelHttp(Uri) +{ + m_Encoder.AddResourceAttribute("service.name", "zenserver"); + m_Encoder.AddResourceAttribute("service.instance.id", GetSessionIdString()); + m_Encoder.AddResourceAttribute("service.namespace", "zen"); + m_Encoder.AddResourceAttribute("service.version", ZEN_CFG_VERSION); + m_Encoder.AddResourceAttribute("host.name", GetMachineName()); + m_Encoder.AddResourceAttribute("session.id", GetSessionIdString()); + m_Encoder.AddResourceAttribute("process.id", zen::GetCurrentProcessId()); + + m_TraceRecorder = new TraceRecorder(this); + otel::SetTraceRecorder(m_TraceRecorder); +} + +OtelHttpProtobufSink::~OtelHttpProtobufSink() +{ + otel::SetTraceRecorder({}); +} + +void +OtelHttpProtobufSink::RecordSpans(zen::otel::TraceId Trace, std::span Spans) +{ + std::string Data = m_Encoder.FormatOtelTrace(Trace, Spans); + + IoBuffer Payload{IoBuffer::Wrap, Data.data(), Data.size()}; + Payload.SetContentType(ZenContentType::kProtobuf); + + auto Result = m_OtelHttp.Post("/v1/traces", Payload); +} + +void +OtelHttpProtobufSink::TraceRecorder::RecordSpans(zen::otel::TraceId Trace, std::span Spans) +{ + m_Sink->RecordSpans(Trace, Spans); +} + +void +OtelHttpProtobufSink::log(const spdlog::details::log_msg& Msg) +{ + { + std::string Data = m_Encoder.FormatOtelProtobuf(Msg); + + IoBuffer Payload{IoBuffer::Wrap, Data.data(), Data.size()}; + Payload.SetContentType(ZenContentType::kProtobuf); + + auto Result = m_OtelHttp.Post("/v1/logs", Payload); + } + + { + std::string Data = m_Encoder.FormatOtelMetrics(); + + IoBuffer Payload{IoBuffer::Wrap, Data.data(), Data.size()}; + Payload.SetContentType(ZenContentType::kProtobuf); + + auto Result = m_OtelHttp.Post("/v1/metrics", Payload); + } +} +void +OtelHttpProtobufSink::flush() +{ +} + +} // namespace zen::logging + +#endif diff --git a/src/zenserver/diag/otlphttp.h b/src/zenserver/diag/otlphttp.h new file mode 100644 index 000000000..2281bdcc0 --- /dev/null +++ b/src/zenserver/diag/otlphttp.h @@ -0,0 +1,64 @@ + +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include +#include +#include +#include +#include + +#if ZEN_WITH_OTEL + +namespace zen::logging { + +/** + * OTLP/HTTP sink for spdlog + * + * Sends log messages and traces to an OpenTelemetry collector via OTLP over HTTP + */ + +class OtelHttpProtobufSink : public spdlog::sinks::sink +{ +public: + // Note that this URI should be the base URI of the OTLP HTTP endpoint, e.g. + // "http://otel-collector:4318" + OtelHttpProtobufSink(const std::string_view& Uri); + ~OtelHttpProtobufSink(); + + OtelHttpProtobufSink(const OtelHttpProtobufSink&) = delete; + OtelHttpProtobufSink& operator=(const OtelHttpProtobufSink&) = delete; + +private: + virtual void log(const spdlog::details::log_msg& Msg) override; + virtual void flush() override; + virtual void set_pattern(const std::string& pattern) override { ZEN_UNUSED(pattern); } + virtual void set_formatter(std::unique_ptr sink_formatter) override { ZEN_UNUSED(sink_formatter); } + + void RecordSpans(zen::otel::TraceId Trace, std::span Spans); + + // This is just a thin wrapper to call back into the sink while participating in + // reference counting from the OTEL trace back-end + class TraceRecorder : public zen::otel::TraceRecorder + { + public: + TraceRecorder(OtelHttpProtobufSink* InSink) : m_Sink(InSink) {} + + private: + TraceRecorder(const TraceRecorder&) = delete; + TraceRecorder& operator=(const TraceRecorder&) = delete; + + virtual void RecordSpans(zen::otel::TraceId Trace, std::span Spans) override; + + OtelHttpProtobufSink* m_Sink; + }; + + HttpClient m_OtelHttp; + OtlpEncoder m_Encoder; + Ref m_TraceRecorder; +}; + +} // namespace zen::logging + +#endif \ No newline at end of file diff --git a/src/zenserver/storage/buildstore/httpbuildstore.h b/src/zenserver/storage/buildstore/httpbuildstore.h index 50cb5db12..e10986411 100644 --- a/src/zenserver/storage/buildstore/httpbuildstore.h +++ b/src/zenserver/storage/buildstore/httpbuildstore.h @@ -2,10 +2,10 @@ #pragma once -#include #include #include #include +#include #include diff --git a/src/zenserver/storage/cache/httpstructuredcache.h b/src/zenserver/storage/cache/httpstructuredcache.h index a157148c9..5a795c215 100644 --- a/src/zenserver/storage/cache/httpstructuredcache.h +++ b/src/zenserver/storage/cache/httpstructuredcache.h @@ -2,12 +2,12 @@ #pragma once -#include #include #include #include #include #include +#include #include #include diff --git a/src/zenserver/storage/projectstore/httpprojectstore.h b/src/zenserver/storage/projectstore/httpprojectstore.h index f0a0bcfa1..f6fe63614 100644 --- a/src/zenserver/storage/projectstore/httpprojectstore.h +++ b/src/zenserver/storage/projectstore/httpprojectstore.h @@ -2,11 +2,11 @@ #pragma once -#include #include #include #include #include +#include namespace zen { diff --git a/src/zenserver/storage/upstream/upstreamcache.cpp b/src/zenserver/storage/upstream/upstreamcache.cpp index f7ae5f973..6c489c5d3 100644 --- a/src/zenserver/storage/upstream/upstreamcache.cpp +++ b/src/zenserver/storage/upstream/upstreamcache.cpp @@ -9,10 +9,10 @@ #include #include #include -#include #include #include #include +#include #include #include diff --git a/src/zenserver/storage/upstream/upstreamcache.h b/src/zenserver/storage/upstream/upstreamcache.h index d5d61c8d9..c0c8a7ff9 100644 --- a/src/zenserver/storage/upstream/upstreamcache.h +++ b/src/zenserver/storage/upstream/upstreamcache.h @@ -6,10 +6,10 @@ #include #include #include -#include #include #include #include +#include #include #include diff --git a/src/zenserver/storage/workspaces/httpworkspaces.h b/src/zenserver/storage/workspaces/httpworkspaces.h index 89a8e8bdc..888a34b4d 100644 --- a/src/zenserver/storage/workspaces/httpworkspaces.h +++ b/src/zenserver/storage/workspaces/httpworkspaces.h @@ -2,10 +2,10 @@ #pragma once -#include #include #include #include +#include namespace zen { diff --git a/src/zenserver/xmake.lua b/src/zenserver/xmake.lua index 57105045d..483bfd5aa 100644 --- a/src/zenserver/xmake.lua +++ b/src/zenserver/xmake.lua @@ -7,6 +7,7 @@ target("zenserver") "zennet", "zenremotestore", "zenstore", + "zentelemetry", "zenutil", "zenvfs") add_headerfiles("**.h") @@ -17,6 +18,8 @@ target("zenserver") add_includedirs(".") set_symbols("debug") + add_deps("protozero") + if is_mode("release") then set_optimize("fastest") end diff --git a/src/zenstore/cidstore.cpp b/src/zenstore/cidstore.cpp index ae1b59dc0..52d5df061 100644 --- a/src/zenstore/cidstore.cpp +++ b/src/zenstore/cidstore.cpp @@ -6,9 +6,9 @@ #include #include #include -#include #include #include +#include #include "cas.h" diff --git a/src/zenstore/include/zenstore/cache/cachedisklayer.h b/src/zenstore/include/zenstore/cache/cachedisklayer.h index 10c61681b..1b501e9ae 100644 --- a/src/zenstore/include/zenstore/cache/cachedisklayer.h +++ b/src/zenstore/include/zenstore/cache/cachedisklayer.h @@ -5,10 +5,10 @@ #include "cacheshared.h" #include -#include #include #include #include +#include ZEN_THIRD_PARTY_INCLUDES_START #include diff --git a/src/zenstore/include/zenstore/cache/structuredcachestore.h b/src/zenstore/include/zenstore/cache/structuredcachestore.h index 1ba469431..75692cfcd 100644 --- a/src/zenstore/include/zenstore/cache/structuredcachestore.h +++ b/src/zenstore/include/zenstore/cache/structuredcachestore.h @@ -4,10 +4,10 @@ #include #include -#include #include #include #include +#include #include #include diff --git a/src/zenstore/include/zenstore/cache/upstreamcacheclient.h b/src/zenstore/include/zenstore/cache/upstreamcacheclient.h index 2f3b6b0d7..ff4a8c3f7 100644 --- a/src/zenstore/include/zenstore/cache/upstreamcacheclient.h +++ b/src/zenstore/include/zenstore/cache/upstreamcacheclient.h @@ -6,9 +6,9 @@ #include #include #include -#include #include #include +#include #include #include diff --git a/src/zenstore/include/zenstore/cidstore.h b/src/zenstore/include/zenstore/cidstore.h index 8918b119f..d54062476 100644 --- a/src/zenstore/include/zenstore/cidstore.h +++ b/src/zenstore/include/zenstore/cidstore.h @@ -5,8 +5,8 @@ #include "zenstore.h" #include -#include #include +#include #include #include diff --git a/src/zentelemetry-test/xmake.lua b/src/zentelemetry-test/xmake.lua new file mode 100644 index 000000000..bdc60cee8 --- /dev/null +++ b/src/zentelemetry-test/xmake.lua @@ -0,0 +1,9 @@ +-- Copyright Epic Games, Inc. All Rights Reserved. + +target("zentelemetry-test") + set_kind("binary") + set_group("tests") + add_headerfiles("**.h") + add_files("*.cpp") + add_deps("zentelemetry") + add_packages("vcpkg::doctest") diff --git a/src/zentelemetry-test/zentelemetry-test.cpp b/src/zentelemetry-test/zentelemetry-test.cpp new file mode 100644 index 000000000..c8b067226 --- /dev/null +++ b/src/zentelemetry-test/zentelemetry-test.cpp @@ -0,0 +1,42 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include +#include +#include +#include + +#include + +#if ZEN_WITH_TESTS +# define ZEN_TEST_WITH_RUNNER 1 +# include +# include +#endif + +int +main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) +{ +#if ZEN_WITH_TESTS + zen::zentelemetry_forcelinktests(); + +# if ZEN_PLATFORM_LINUX + zen::IgnoreChildSignals(); +# endif + +# if ZEN_WITH_TRACE + zen::TraceInit("zenstore-test"); + zen::TraceOptions TraceCommandlineOptions; + if (GetTraceOptionsFromCommandline(TraceCommandlineOptions)) + { + TraceConfigure(TraceCommandlineOptions); + } +# endif // ZEN_WITH_TRACE + + zen::logging::InitializeLogging(); + zen::MaximizeOpenFileCount(); + + return ZEN_RUN_TESTS(argc, argv); +#else + return 0; +#endif +} diff --git a/src/zentelemetry/include/zentelemetry/otlpencoder.h b/src/zentelemetry/include/zentelemetry/otlpencoder.h new file mode 100644 index 000000000..ed6665781 --- /dev/null +++ b/src/zentelemetry/include/zentelemetry/otlpencoder.h @@ -0,0 +1,66 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include +#include +#include +#include +#include + +#if ZEN_WITH_OTEL + +# include +# include + +namespace spdlog { namespace details { + struct log_msg; +}} // namespace spdlog::details + +namespace zen::otel { +enum class Resource : protozero::pbf_tag_type; +} + +namespace zen { + +/** + * Encoder for OpenTelemetry OTLP Protobuf format + * + * Designed to encode log messages and metrics into the OTLP Protobuf format + * for transmission to an OpenTelemetry collector or compatible backend. + * + * Currently, only log messages and basic resource attributes are supported. + * + * Some initial support for metrics encoding is also included. + * + * Transmission is expected to be handled externally, e.g. via HTTP client. + * + */ + +class OtlpEncoder +{ +public: + OtlpEncoder(); + ~OtlpEncoder(); + + void AddResourceAttribute(const std::string_view& Key, const std::string_view& Value); + void AddResourceAttribute(const std::string_view& Key, int64_t Value); + + std::string FormatOtelProtobuf(const spdlog::details::log_msg& Msg) const; + std::string FormatOtelMetrics() const; + std::string FormatOtelTrace(zen::otel::TraceId Trace, std::span Spans) const; + + OtlpEncoder& operator=(const OtlpEncoder&) = delete; + OtlpEncoder(const OtlpEncoder&) = delete; + +private: + mutable RwLock m_ResourceLock; + std::unordered_map m_ResourceAttributes; + std::unordered_map m_ResourceIntAttributes; + + void AppendResourceAttributes(protozero::pbf_builder& Res) const; +}; + +} // namespace zen + +#endif diff --git a/src/zentelemetry/include/zentelemetry/otlptrace.h b/src/zentelemetry/include/zentelemetry/otlptrace.h new file mode 100644 index 000000000..ebecff91a --- /dev/null +++ b/src/zentelemetry/include/zentelemetry/otlptrace.h @@ -0,0 +1,268 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include +#include + +#include +#include + +#define ZEN_WITH_OTEL 1 + +#if ZEN_WITH_OTEL +# define ZEN_CONCAT(a, b) a##b +# define IMPL_ZEN_OTEL_SPAN(Name, Line) ::zen::otel::ScopedSpan ZEN_CONCAT(Span$, Line)(Name); +# define ZEN_OTEL_SPAN(Name) IMPL_ZEN_OTEL_SPAN(Name, __LINE__) +#else +# define ZEN_OTEL_SPAN(Name) +#endif + +namespace zen { +class MemoryArena; +} + +#if ZEN_WITH_OTEL + +namespace zen::otel { + +using AttributeList = std::span>; + +class Tracer; + +Tracer& GetTracer(); + +// OLTP Span ID + +struct SpanId +{ + constexpr static size_t kSize = 8; + + inline SpanId() : Id{0} {} + explicit SpanId(const std::span Bytes) noexcept { std::copy(Bytes.begin(), Bytes.end(), Id); } + explicit SpanId(const Oid& InId) noexcept { memcpy(Id, reinterpret_cast(InId.OidBits), sizeof Id); } + + auto operator<=>(const SpanId& Rhs) const = default; + inline operator bool() const { return *this != SpanId(); } + + static SpanId NewSpanId(); + + const char* GetData() const { return reinterpret_cast(Id); } + +private: + uint8_t Id[kSize]; +}; + +// OLTP Trace ID + +struct TraceId +{ + constexpr static size_t kSize = 16; + + std::span GetBytes() const { return std::span(Id, kSize); } + + inline TraceId() noexcept : Id{0} {} + explicit TraceId(const std::span Bytes) noexcept { std::copy(Bytes.begin(), Bytes.end(), Id); } + + auto operator<=>(const TraceId& Rhs) const = default; + inline operator bool() const { return *this != TraceId(); } + + static TraceId NewTraceId(); + + inline const char* GetData() const { return reinterpret_cast(Id); } + +private: + uint8_t Id[kSize]; +}; + +struct AttributePair +{ + const char* Key = nullptr; + union + { + const char* StringValue; + uint64_t NumericValue; + } Value; + + uint64_t Flags = 0; + + enum + { + kFlags_IsNumeric = 1 << 0, + kFlags_IsString = 1 << 1 + }; + + bool IsNumeric() const { return (Flags & kFlags_IsNumeric) != 0; } + uint64_t GetNumericValue() const { return Value.NumericValue; } + void SetNumericValue(uint64_t InValue) + { + Value.NumericValue = InValue; + Flags |= kFlags_IsNumeric; + } + + bool IsString() const { return (Flags & kFlags_IsString) != 0; } + const char* GetStringValue() const { return Value.StringValue; } + void SetStringValue(const char* InValue) + { + Value.StringValue = InValue; + Flags |= kFlags_IsString; + } + + AttributePair* Next = nullptr; +}; + +struct Event +{ + Event* NextEvent = nullptr; + uint64_t Timestamp = 0; + const char* Name = nullptr; + AttributePair* Attributes = nullptr; +}; + +/** Span within a trace + * + * A span represents a single operation within a trace. Spans can be nested + * to form a trace tree. + * + */ + +struct Span final : public TRefCounted +{ + Span& operator=(const Span&) = delete; + Span(const Span&) = delete; + + void AddEvent(std::string_view Name); + void AddEvent(std::string_view Name, uint64_t Timestamp); + void AddEvent(std::string_view Name, const AttributeList& Attributes); + + void AddAttribute(std::string_view Key, std::string_view Value); + void AddAttribute(std::string_view Key, uint64_t Value); + void AddAttributes(const AttributeList& Attributes); + + void End(); + + enum class Kind : uint8_t // This must match otel::SpanKind + { + kInternal = 1, + kServer = 2, + kClient = 3, + kProducer = 4, + kConsumer = 5 + }; + + Kind GetKind() const { return m_Kind; } + void SetKind(Kind InKind) { m_Kind = InKind; } + + SpanId GetSpanId() const { return m_SpanId; } + const char* GetName() const { return m_Name; } + uint64_t GetStartTime() const { return m_StartTime; } + uint64_t GetEndTime() const { return m_EndTime; } + const Span* GetParentSpan() const { return m_ParentSpan; } + const AttributePair* GetAttributes() const { return m_Attributes; } + const Event* GetEvents() const { return m_Events; } + + // This is used to get the current span/trace ID for logging purposes + static SpanId GetCurrentSpanId(TraceId& OutTraceId); + static Span* GetCurrentSpan(); + +protected: + void* operator new(size_t Size) = delete; + void* operator new(size_t Size, MemoryArena& Arena); + void operator delete(void* Ptr) = delete; + void operator delete(void* Ptr, MemoryArena& Arena); + + friend class ScopedSpan; + friend class Tracer; + friend class TRefCounted; + + void DeleteThis() noexcept { End(); } + +private: + // Note: for now there's no locking on the span. To support spans across + // threads, we'll need to add locking around event addition and ending + // and linking of child spans etc + + MemoryArena& m_Arena; + Span* m_NextSibling = nullptr; + Span* m_FirstChild = nullptr; + Span* m_ParentSpan = nullptr; + Span* m_SpanChain = nullptr; + Event* m_Events = nullptr; + AttributePair* m_Attributes = nullptr; + SpanId m_SpanId; + const char* m_Name = nullptr; + uint64_t m_StartTime = 0; + uint64_t m_EndTime = 0; + Kind m_Kind = Kind::kInternal; + bool m_Ended = false; + + Span(MemoryArena& Arena, std::string_view Name, Span* SpanChain); + ~Span(); +}; + +/** Scoped span helper + * + * Automatically ends the span when it goes out of scope + */ + +class ScopedSpan final +{ +public: + ScopedSpan(std::string_view Name); + ScopedSpan() = delete; + ~ScopedSpan(); + + ScopedSpan& operator=(ScopedSpan&& Rhs) = default; + ScopedSpan& operator=(const ScopedSpan& Rhs) = default; + ScopedSpan(ScopedSpan&& Rhs) = default; + ScopedSpan(const ScopedSpan& Rhs) = default; + + Span* operator->() const { return m_Span.Get(); } + +private: + ScopedSpan(Span* InSpan, Tracer* InTracer); + + Ref m_Tracer; // This needs to precede the span ref to ensure proper destruction order + Ref m_Span; + + friend class Tracer; +}; + +/** + * Tracer for creating spans and managing trace state. + * + * This represents a single trace context identified by a trace ID, + * and is at the root of all spans created within that trace. + * + */ + +class Tracer final : public RefCounted +{ +public: + static ScopedSpan CreateSpan(std::string_view Name); + +private: + struct Impl; + Impl* m_Impl; + + Tracer(); + ~Tracer(); + + static Tracer* GetTracer(); + + friend class ScopedSpan; +}; + +class TraceRecorder : public RefCounted +{ +public: + virtual void RecordSpans(TraceId Trace, std::span Spans) = 0; +}; + +void SetTraceRecorder(Ref Recorder); +bool IsRecording(); + +void otlptrace_forcelink(); + +} // namespace zen::otel +#endif diff --git a/src/zentelemetry/include/zentelemetry/stats.h b/src/zentelemetry/include/zentelemetry/stats.h new file mode 100644 index 000000000..3e67bac1c --- /dev/null +++ b/src/zentelemetry/include/zentelemetry/stats.h @@ -0,0 +1,356 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include "zentelemetry.h" + +#include + +#include +#include +#include + +namespace zen { +class CbObjectWriter; +} + +namespace zen::metrics { + +template +class Gauge +{ +public: + Gauge() : m_Value{0} {} + + T Value() const { return m_Value; } + void SetValue(T Value) { m_Value = Value; } + +private: + std::atomic m_Value; +}; + +/** Stats counter + * + * A counter is modified by adding or subtracting a value from a current value. + * This would typically be used to track number of requests in flight, number + * of active jobs etc + * + */ +class Counter +{ +public: + inline void SetValue(uint64_t Value) { m_count = Value; } + inline uint64_t Value() const { return m_count; } + + inline void Increment(int64_t AddValue) { m_count.fetch_add(AddValue); } + inline void Decrement(int64_t SubValue) { m_count.fetch_sub(SubValue); } + inline void Clear() { m_count.store(0, std::memory_order_release); } + +private: + std::atomic m_count{0}; +}; + +/** Exponential Weighted Moving Average + + This is very raw, to use as little state as possible. If we + want to use this more broadly in user code we should perhaps + add a more user-friendly wrapper + */ + +class RawEWMA +{ +public: + /// + /// Update EWMA with new measure + /// + /// Smoothing factor (between 0 and 1) + /// Elapsed time since last + /// Value + /// Whether this is the first update or not + void Tick(double Alpha, uint64_t Interval, uint64_t Count, bool IsInitialUpdate); + double Rate() const; + +private: + std::atomic m_Rate = 0; +}; + +/// +/// Tracks rate of events over time (i.e requests/sec), using +/// exponential moving averages +/// +class Meter +{ +public: + Meter(); + ~Meter(); + + inline uint64_t Count() const { return m_TotalCount; } + double Rate1(); // One-minute rate + double Rate5(); // Five-minute rate + double Rate15(); // Fifteen-minute rate + double MeanRate() const; // Mean rate since instantiation of this meter + void Mark(uint64_t Count = 1); // Register one or more events + +private: + std::atomic m_TotalCount{0}; // Accumulator counting number of marks since beginning + std::atomic m_PendingCount{0}; // Pending EWMA update accumulator + std::atomic m_StartTick{0}; // Time this was instantiated (for mean) + std::atomic m_LastTick{0}; // Timestamp of last EWMA tick + std::atomic m_Remainder{0}; // Tracks the "modulo" of tick time + bool m_IsFirstTick = true; + RawEWMA m_RateM1; + RawEWMA m_RateM5; + RawEWMA m_RateM15; + + void TickIfNecessary(); + void Tick(); +}; + +/** Moment-in-time snapshot of a distribution + */ +class SampleSnapshot +{ +public: + SampleSnapshot(std::vector&& Values); + ~SampleSnapshot(); + + uint32_t Size() const { return (uint32_t)m_Values.size(); } + double GetQuantileValue(double Quantile); + double GetMedian() { return GetQuantileValue(0.5); } + double Get75Percentile() { return GetQuantileValue(0.75); } + double Get95Percentile() { return GetQuantileValue(0.95); } + double Get98Percentile() { return GetQuantileValue(0.98); } + double Get99Percentile() { return GetQuantileValue(0.99); } + double Get999Percentile() { return GetQuantileValue(0.999); } + const std::vector& GetValues() const; + +private: + std::vector m_Values; +}; + +/** Randomly selects samples from a stream. Uses Vitter's + Algorithm R to produce a statistically representative sample. + + http://www.cs.umd.edu/~samir/498/vitter.pdf - Random Sampling with a Reservoir + */ + +class UniformSample +{ +public: + UniformSample(uint32_t ReservoirSize); + ~UniformSample(); + + void Clear(); + uint32_t Size() const; + void Update(int64_t Value); + SampleSnapshot Snapshot() const; + + template T> + void IterateValues(T Callback) const + { + for (const auto& Value : m_Values) + { + Callback(Value); + } + } + +private: + std::atomic m_SampleCounter{0}; + std::vector> m_Values; +}; + +/** Track (probabilistic) sample distribution along with min/max + */ +class Histogram +{ +public: + Histogram(int32_t SampleCount = 1028); + ~Histogram(); + + void Clear(); + void Update(int64_t Value); + int64_t Max() const; + int64_t Min() const; + double Mean() const; + uint64_t Count() const; + SampleSnapshot Snapshot() const { return m_Sample.Snapshot(); } + +private: + UniformSample m_Sample; + std::atomic m_Min{0}; + std::atomic m_Max{0}; + std::atomic m_Sum{0}; + std::atomic m_Count{0}; +}; + +/** Track timing and frequency of some operation + + Example usage would be to track frequency and duration of network + requests, or function calls. + + */ +class OperationTiming +{ +public: + OperationTiming(int32_t SampleCount = 514); + ~OperationTiming(); + + void Update(int64_t Duration); + int64_t Max() const; + int64_t Min() const; + double Mean() const; + uint64_t Count() const; + SampleSnapshot Snapshot() const { return m_Histogram.Snapshot(); } + + double Rate1() { return m_Meter.Rate1(); } + double Rate5() { return m_Meter.Rate5(); } + double Rate15() { return m_Meter.Rate15(); } + double MeanRate() const { return m_Meter.MeanRate(); } + + struct Scope + { + Scope(OperationTiming& Outer); + ~Scope(); + + void Stop(); + void Cancel(); + + private: + OperationTiming& m_Outer; + uint64_t m_StartTick; + }; + +private: + Meter m_Meter; + Histogram m_Histogram; +}; + +struct MeterSnapshot +{ + uint64_t Count; + double MeanRate; + double Rate1; + double Rate5; + double Rate15; +}; + +struct HistogramSnapshot +{ + double Count; + double Avg; + double Min; + double Max; + double P75; + double P95; + double P99; + double P999; +}; + +struct StatsSnapshot +{ + MeterSnapshot Meter; + HistogramSnapshot Histogram; +}; + +struct RequestStatsSnapshot +{ + StatsSnapshot Requests; + StatsSnapshot Bytes; +}; + +/** Metrics for network requests + + Aggregates tracking of duration, payload sizes into a single + class + + */ +class RequestStats +{ +public: + RequestStats(int32_t SampleCount = 514); + ~RequestStats(); + + void Update(int64_t Duration, int64_t Bytes); + uint64_t Count() const; + + // Timing + + int64_t MaxDuration() const { return m_BytesHistogram.Max(); } + int64_t MinDuration() const { return m_BytesHistogram.Min(); } + double MeanDuration() const { return m_BytesHistogram.Mean(); } + SampleSnapshot DurationSnapshot() const { return m_RequestTimeHistogram.Snapshot(); } + double Rate1() { return m_RequestMeter.Rate1(); } + double Rate5() { return m_RequestMeter.Rate5(); } + double Rate15() { return m_RequestMeter.Rate15(); } + double MeanRate() const { return m_RequestMeter.MeanRate(); } + + // Bytes + + int64_t MaxBytes() const { return m_BytesHistogram.Max(); } + int64_t MinBytes() const { return m_BytesHistogram.Min(); } + double MeanBytes() const { return m_BytesHistogram.Mean(); } + SampleSnapshot BytesSnapshot() const { return m_BytesHistogram.Snapshot(); } + double ByteRate1() { return m_BytesMeter.Rate1(); } + double ByteRate5() { return m_BytesMeter.Rate5(); } + double ByteRate15() { return m_BytesMeter.Rate15(); } + double ByteMeanRate() const { return m_BytesMeter.MeanRate(); } + + struct Scope + { + Scope(RequestStats& Outer, int64_t Bytes); + ~Scope(); + + void SetBytes(int64_t Bytes) { m_Bytes = Bytes; } + void Stop(); + void Cancel(); + + private: + RequestStats& m_Outer; + uint64_t m_StartTick; + int64_t m_Bytes; + }; + + void EmitSnapshot(std::string_view Tag, CbObjectWriter& Cbo); + + RequestStatsSnapshot Snapshot(); + +private: + static StatsSnapshot GetSnapshot(Meter& M, Histogram& H, double ConversionFactor) + { + SampleSnapshot Snap = H.Snapshot(); + return StatsSnapshot{ + .Meter = {.Count = M.Count(), .MeanRate = M.MeanRate(), .Rate1 = M.Rate1(), .Rate5 = M.Rate5(), .Rate15 = M.Rate15()}, + .Histogram = {.Count = H.Count() * ConversionFactor, + .Avg = H.Mean() * ConversionFactor, + .Min = H.Min() * ConversionFactor, + .Max = H.Max() * ConversionFactor, + .P75 = Snap.Get75Percentile() * ConversionFactor, + .P95 = Snap.Get95Percentile() * ConversionFactor, + .P99 = Snap.Get99Percentile() * ConversionFactor, + .P999 = Snap.Get999Percentile() * ConversionFactor}}; + } + + Meter m_RequestMeter; + Meter m_BytesMeter; + Histogram m_RequestTimeHistogram; + Histogram m_BytesHistogram; +}; + +void EmitSnapshot(std::string_view Tag, OperationTiming& Stat, CbObjectWriter& Cbo); +void EmitSnapshot(std::string_view Tag, const Histogram& Stat, CbObjectWriter& Cbo, double ConversionFactor); +void EmitSnapshot(std::string_view Tag, Meter& Stat, CbObjectWriter& Cbo); + +void EmitSnapshot(const Histogram& Stat, CbObjectWriter& Cbo, double ConversionFactor); + +void EmitSnapshot(std::string_view Tag, const MeterSnapshot& Snapshot, CbObjectWriter& Cbo); +void EmitSnapshot(std::string_view Tag, const HistogramSnapshot& Snapshot, CbObjectWriter& Cbo); +void EmitSnapshot(std::string_view Tag, const StatsSnapshot& Snapshot, CbObjectWriter& Cbo); +void EmitSnapshot(std::string_view Tag, const RequestStatsSnapshot& Snapshot, CbObjectWriter& Cbo); + +} // namespace zen::metrics + +namespace zen { + +extern void stats_forcelink(); + +} // namespace zen diff --git a/src/zentelemetry/include/zentelemetry/zentelemetry.h b/src/zentelemetry/include/zentelemetry/zentelemetry.h new file mode 100644 index 000000000..fc811974b --- /dev/null +++ b/src/zentelemetry/include/zentelemetry/zentelemetry.h @@ -0,0 +1,9 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +namespace zen { + +void zentelemetry_forcelinktests(); + +} diff --git a/src/zentelemetry/otellogprotozero.h b/src/zentelemetry/otellogprotozero.h new file mode 100644 index 000000000..1e2e85d2d --- /dev/null +++ b/src/zentelemetry/otellogprotozero.h @@ -0,0 +1,287 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include "otelprotozero.h" + +////////////////////////////////////////////////////////////////////////// +// +// OTEL .proto definitions, for reference +// + +#if 0 + +// clang-format off + +///////////////////////////////////////////////////////////////////////////////////// +// logs/v1 + +// LogsData represents the logs data that can be stored in a persistent storage, +// OR can be embedded by other protocols that transfer OTLP logs data but do not +// implement the OTLP protocol. +// +// The main difference between this message and collector protocol is that +// in this message there will not be any "control" or "metadata" specific to +// OTLP protocol. +// +// When new fields are added into this message, the OTLP request MUST be updated +// as well. +message LogsData { + // An array of ResourceLogs. + // For data coming from a single resource this array will typically contain + // one element. Intermediary nodes that receive data from multiple origins + // typically batch the data before forwarding further and in that case this + // array will contain multiple elements. + repeated ResourceLogs resource_logs = 1; +} + +// A collection of ScopeLogs from a Resource. +message ResourceLogs { + reserved 1000; + + // The resource for the logs in this message. + // If this field is not set then resource info is unknown. + opentelemetry.proto.resource.v1.Resource resource = 1; + + // A list of ScopeLogs that originate from a resource. + repeated ScopeLogs scope_logs = 2; + + // The Schema URL, if known. This is the identifier of the Schema that the resource data + // is recorded in. Notably, the last part of the URL path is the version number of the + // schema: http[s]://server[:port]/path/. To learn more about Schema URL see + // https://opentelemetry.io/docs/specs/otel/schemas/#schema-url + // This schema_url applies to the data in the "resource" field. It does not apply + // to the data in the "scope_logs" field which have their own schema_url field. + string schema_url = 3; +} + +// A collection of Logs produced by a Scope. +message ScopeLogs { + // The instrumentation scope information for the logs in this message. + // Semantically when InstrumentationScope isn't set, it is equivalent with + // an empty instrumentation scope name (unknown). + opentelemetry.proto.common.v1.InstrumentationScope scope = 1; + + // A list of log records. + repeated LogRecord log_records = 2; + + // The Schema URL, if known. This is the identifier of the Schema that the log data + // is recorded in. Notably, the last part of the URL path is the version number of the + // schema: http[s]://server[:port]/path/. To learn more about Schema URL see + // https://opentelemetry.io/docs/specs/otel/schemas/#schema-url + // This schema_url applies to all logs in the "logs" field. + string schema_url = 3; +} + +// Possible values for LogRecord.SeverityNumber. +enum SeverityNumber { + // UNSPECIFIED is the default SeverityNumber, it MUST NOT be used. + SEVERITY_NUMBER_UNSPECIFIED = 0; + SEVERITY_NUMBER_TRACE = 1; + SEVERITY_NUMBER_TRACE2 = 2; + SEVERITY_NUMBER_TRACE3 = 3; + SEVERITY_NUMBER_TRACE4 = 4; + SEVERITY_NUMBER_DEBUG = 5; + SEVERITY_NUMBER_DEBUG2 = 6; + SEVERITY_NUMBER_DEBUG3 = 7; + SEVERITY_NUMBER_DEBUG4 = 8; + SEVERITY_NUMBER_INFO = 9; + SEVERITY_NUMBER_INFO2 = 10; + SEVERITY_NUMBER_INFO3 = 11; + SEVERITY_NUMBER_INFO4 = 12; + SEVERITY_NUMBER_WARN = 13; + SEVERITY_NUMBER_WARN2 = 14; + SEVERITY_NUMBER_WARN3 = 15; + SEVERITY_NUMBER_WARN4 = 16; + SEVERITY_NUMBER_ERROR = 17; + SEVERITY_NUMBER_ERROR2 = 18; + SEVERITY_NUMBER_ERROR3 = 19; + SEVERITY_NUMBER_ERROR4 = 20; + SEVERITY_NUMBER_FATAL = 21; + SEVERITY_NUMBER_FATAL2 = 22; + SEVERITY_NUMBER_FATAL3 = 23; + SEVERITY_NUMBER_FATAL4 = 24; +} + +// LogRecordFlags represents constants used to interpret the +// LogRecord.flags field, which is protobuf 'fixed32' type and is to +// be used as bit-fields. Each non-zero value defined in this enum is +// a bit-mask. To extract the bit-field, for example, use an +// expression like: +// +// (logRecord.flags & LOG_RECORD_FLAGS_TRACE_FLAGS_MASK) +// +enum LogRecordFlags { + // The zero value for the enum. Should not be used for comparisons. + // Instead use bitwise "and" with the appropriate mask as shown above. + LOG_RECORD_FLAGS_DO_NOT_USE = 0; + + // Bits 0-7 are used for trace flags. + LOG_RECORD_FLAGS_TRACE_FLAGS_MASK = 0x000000FF; + + // Bits 8-31 are reserved for future use. +} + +// A log record according to OpenTelemetry Log Data Model: +// https://github.com/open-telemetry/oteps/blob/main/text/logs/0097-log-data-model.md +message LogRecord { + reserved 4; + + // time_unix_nano is the time when the event occurred. + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January 1970. + // Value of 0 indicates unknown or missing timestamp. + fixed64 time_unix_nano = 1; + + // Time when the event was observed by the collection system. + // For events that originate in OpenTelemetry (e.g. using OpenTelemetry Logging SDK) + // this timestamp is typically set at the generation time and is equal to Timestamp. + // For events originating externally and collected by OpenTelemetry (e.g. using + // Collector) this is the time when OpenTelemetry's code observed the event measured + // by the clock of the OpenTelemetry code. This field MUST be set once the event is + // observed by OpenTelemetry. + // + // For converting OpenTelemetry log data to formats that support only one timestamp or + // when receiving OpenTelemetry log data by recipients that support only one timestamp + // internally the following logic is recommended: + // - Use time_unix_nano if it is present, otherwise use observed_time_unix_nano. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January 1970. + // Value of 0 indicates unknown or missing timestamp. + fixed64 observed_time_unix_nano = 11; + + // Numerical value of the severity, normalized to values described in Log Data Model. + // [Optional]. + SeverityNumber severity_number = 2; + + // The severity text (also known as log level). The original string representation as + // it is known at the source. [Optional]. + string severity_text = 3; + + // A value containing the body of the log record. Can be for example a human-readable + // string message (including multi-line) describing the event in a free form or it can + // be a structured data composed of arrays and maps of other values. [Optional]. + opentelemetry.proto.common.v1.AnyValue body = 5; + + // Additional attributes that describe the specific event occurrence. [Optional]. + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + repeated opentelemetry.proto.common.v1.KeyValue attributes = 6; + uint32 dropped_attributes_count = 7; + + // Flags, a bit field. 8 least significant bits are the trace flags as + // defined in W3C Trace Context specification. 24 most significant bits are reserved + // and must be set to 0. Readers must not assume that 24 most significant bits + // will be zero and must correctly mask the bits when reading 8-bit trace flag (use + // flags & LOG_RECORD_FLAGS_TRACE_FLAGS_MASK). [Optional]. + fixed32 flags = 8; + + // A unique identifier for a trace. All logs from the same trace share + // the same `trace_id`. The ID is a 16-byte array. An ID with all zeroes OR + // of length other than 16 bytes is considered invalid (empty string in OTLP/JSON + // is zero-length and thus is also invalid). + // + // This field is optional. + // + // The receivers SHOULD assume that the log record is not associated with a + // trace if any of the following is true: + // - the field is not present, + // - the field contains an invalid value. + bytes trace_id = 9; + + // A unique identifier for a span within a trace, assigned when the span + // is created. The ID is an 8-byte array. An ID with all zeroes OR of length + // other than 8 bytes is considered invalid (empty string in OTLP/JSON + // is zero-length and thus is also invalid). + // + // This field is optional. If the sender specifies a valid span_id then it SHOULD also + // specify a valid trace_id. + // + // The receivers SHOULD assume that the log record is not associated with a + // span if any of the following is true: + // - the field is not present, + // - the field contains an invalid value. + bytes span_id = 10; + + // A unique identifier of event category/type. + // All events with the same event_name are expected to conform to the same + // schema for both their attributes and their body. + // + // Recommended to be fully qualified and short (no longer than 256 characters). + // + // Presence of event_name on the log record identifies this record + // as an event. + // + // [Optional]. + string event_name = 12; +} +// clang-format on +#endif + +namespace zen::otel { + +// Possible values for LogRecord.SeverityNumber. +enum SeverityNumber +{ + // UNSPECIFIED is the default SeverityNumber, it MUST NOT be used. + SEVERITY_NUMBER_UNSPECIFIED = 0, + SEVERITY_NUMBER_TRACE = 1, + SEVERITY_NUMBER_TRACE2 = 2, + SEVERITY_NUMBER_TRACE3 = 3, + SEVERITY_NUMBER_TRACE4 = 4, + SEVERITY_NUMBER_DEBUG = 5, + SEVERITY_NUMBER_DEBUG2 = 6, + SEVERITY_NUMBER_DEBUG3 = 7, + SEVERITY_NUMBER_DEBUG4 = 8, + SEVERITY_NUMBER_INFO = 9, + SEVERITY_NUMBER_INFO2 = 10, + SEVERITY_NUMBER_INFO3 = 11, + SEVERITY_NUMBER_INFO4 = 12, + SEVERITY_NUMBER_WARN = 13, + SEVERITY_NUMBER_WARN2 = 14, + SEVERITY_NUMBER_WARN3 = 15, + SEVERITY_NUMBER_WARN4 = 16, + SEVERITY_NUMBER_ERROR = 17, + SEVERITY_NUMBER_ERROR2 = 18, + SEVERITY_NUMBER_ERROR3 = 19, + SEVERITY_NUMBER_ERROR4 = 20, + SEVERITY_NUMBER_FATAL = 21, + SEVERITY_NUMBER_FATAL2 = 22, + SEVERITY_NUMBER_FATAL3 = 23, + SEVERITY_NUMBER_FATAL4 = 24 +}; + +enum class LogRecord : protozero::pbf_tag_type +{ + required_fixed64_time_unix_nano = 1, + optional_SeverityNumber_severity_number = 2, + optional_string_severity_text = 3, + optional_anyvalue_body = 5, + optional_repeated_kv_attributes = 6, + optional_uint32_dropped_attributes_count = 7, + optional_fixed32_flags = 8, + optional_bytes_trace_id = 9, + optional_bytes_span_id = 10, + optional_fixed64_observed_time_unix_nano = 11, + optional_event_name = 12, +}; + +enum class ScopeLogs : protozero::pbf_tag_type +{ + required_InstrumentationScope_scope = 1, + required_repeated_LogRecord_log_records = 2, + optional_string_schema_url = 3 +}; + +enum class ResourceLogs : protozero::pbf_tag_type +{ + optional_Resource_resource = 1, + required_repeated_ScopeLogs_scope_logs = 2, + optional_string_schema_url = 3 +}; + +enum class LogsData : protozero::pbf_tag_type +{ + required_repeated_ResourceLogs_resource_logs = 1 +}; + +} // namespace zen::otel diff --git a/src/zentelemetry/otelmetricsprotozero.h b/src/zentelemetry/otelmetricsprotozero.h new file mode 100644 index 000000000..12bae0d75 --- /dev/null +++ b/src/zentelemetry/otelmetricsprotozero.h @@ -0,0 +1,953 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include "otelprotozero.h" + +////////////////////////////////////////////////////////////////////////// +// +// OTEL .proto definitions, for reference +// + +#if 0 + +// clang-format off + +////////////////////////////////////////////////////////////////////////// +// metrics/v1/metrics.proto +// + +// Copyright 2019, OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package opentelemetry.proto.metrics.v1; + +import "opentelemetry/proto/common/v1/common.proto"; +import "opentelemetry/proto/resource/v1/resource.proto"; + +option csharp_namespace = "OpenTelemetry.Proto.Metrics.V1"; +option java_multiple_files = true; +option java_package = "io.opentelemetry.proto.metrics.v1"; +option java_outer_classname = "MetricsProto"; +option go_package = "go.opentelemetry.io/proto/otlp/metrics/v1"; + +// MetricsData represents the metrics data that can be stored in a persistent +// storage, OR can be embedded by other protocols that transfer OTLP metrics +// data but do not implement the OTLP protocol. +// +// MetricsData +// └─── ResourceMetrics +// ├── Resource +// ├── SchemaURL +// └── ScopeMetrics +// ├── Scope +// ├── SchemaURL +// └── Metric +// ├── Name +// ├── Description +// ├── Unit +// └── data +// ├── Gauge +// ├── Sum +// ├── Histogram +// ├── ExponentialHistogram +// └── Summary +// +// The main difference between this message and collector protocol is that +// in this message there will not be any "control" or "metadata" specific to +// OTLP protocol. +// +// When new fields are added into this message, the OTLP request MUST be updated +// as well. +message MetricsData { + // An array of ResourceMetrics. + // For data coming from a single resource this array will typically contain + // one element. Intermediary nodes that receive data from multiple origins + // typically batch the data before forwarding further and in that case this + // array will contain multiple elements. + repeated ResourceMetrics resource_metrics = 1; +} + +// A collection of ScopeMetrics from a Resource. +message ResourceMetrics { + reserved 1000; + + // The resource for the metrics in this message. + // If this field is not set then no resource info is known. + opentelemetry.proto.resource.v1.Resource resource = 1; + + // A list of metrics that originate from a resource. + repeated ScopeMetrics scope_metrics = 2; + + // The Schema URL, if known. This is the identifier of the Schema that the resource data + // is recorded in. Notably, the last part of the URL path is the version number of the + // schema: http[s]://server[:port]/path/. To learn more about Schema URL see + // https://opentelemetry.io/docs/specs/otel/schemas/#schema-url + // This schema_url applies to the data in the "resource" field. It does not apply + // to the data in the "scope_metrics" field which have their own schema_url field. + string schema_url = 3; +} + +// A collection of Metrics produced by an Scope. +message ScopeMetrics { + // The instrumentation scope information for the metrics in this message. + // Semantically when InstrumentationScope isn't set, it is equivalent with + // an empty instrumentation scope name (unknown). + opentelemetry.proto.common.v1.InstrumentationScope scope = 1; + + // A list of metrics that originate from an instrumentation library. + repeated Metric metrics = 2; + + // The Schema URL, if known. This is the identifier of the Schema that the metric data + // is recorded in. Notably, the last part of the URL path is the version number of the + // schema: http[s]://server[:port]/path/. To learn more about Schema URL see + // https://opentelemetry.io/docs/specs/otel/schemas/#schema-url + // This schema_url applies to all metrics in the "metrics" field. + string schema_url = 3; +} + +// Defines a Metric which has one or more timeseries. The following is a +// brief summary of the Metric data model. For more details, see: +// +// https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/data-model.md +// +// The data model and relation between entities is shown in the +// diagram below. Here, "DataPoint" is the term used to refer to any +// one of the specific data point value types, and "points" is the term used +// to refer to any one of the lists of points contained in the Metric. +// +// - Metric is composed of a metadata and data. +// - Metadata part contains a name, description, unit. +// - Data is one of the possible types (Sum, Gauge, Histogram, Summary). +// - DataPoint contains timestamps, attributes, and one of the possible value type +// fields. +// +// Metric +// +------------+ +// |name | +// |description | +// |unit | +------------------------------------+ +// |data |---> |Gauge, Sum, Histogram, Summary, ... | +// +------------+ +------------------------------------+ +// +// Data [One of Gauge, Sum, Histogram, Summary, ...] +// +-----------+ +// |... | // Metadata about the Data. +// |points |--+ +// +-----------+ | +// | +---------------------------+ +// | |DataPoint 1 | +// v |+------+------+ +------+ | +// +-----+ ||label |label |...|label | | +// | 1 |-->||value1|value2|...|valueN| | +// +-----+ |+------+------+ +------+ | +// | . | |+-----+ | +// | . | ||value| | +// | . | |+-----+ | +// | . | +---------------------------+ +// | . | . +// | . | . +// | . | . +// | . | +---------------------------+ +// | . | |DataPoint M | +// +-----+ |+------+------+ +------+ | +// | M |-->||label |label |...|label | | +// +-----+ ||value1|value2|...|valueN| | +// |+------+------+ +------+ | +// |+-----+ | +// ||value| | +// |+-----+ | +// +---------------------------+ +// +// Each distinct type of DataPoint represents the output of a specific +// aggregation function, the result of applying the DataPoint's +// associated function of to one or more measurements. +// +// All DataPoint types have three common fields: +// - Attributes includes key-value pairs associated with the data point +// - TimeUnixNano is required, set to the end time of the aggregation +// - StartTimeUnixNano is optional, but strongly encouraged for DataPoints +// having an AggregationTemporality field, as discussed below. +// +// Both TimeUnixNano and StartTimeUnixNano values are expressed as +// UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January 1970. +// +// # TimeUnixNano +// +// This field is required, having consistent interpretation across +// DataPoint types. TimeUnixNano is the moment corresponding to when +// the data point's aggregate value was captured. +// +// Data points with the 0 value for TimeUnixNano SHOULD be rejected +// by consumers. +// +// # StartTimeUnixNano +// +// StartTimeUnixNano in general allows detecting when a sequence of +// observations is unbroken. This field indicates to consumers the +// start time for points with cumulative and delta +// AggregationTemporality, and it should be included whenever possible +// to support correct rate calculation. Although it may be omitted +// when the start time is truly unknown, setting StartTimeUnixNano is +// strongly encouraged. +message Metric { + reserved 4, 6, 8; + + // The name of the metric. + string name = 1; + + // A description of the metric, which can be used in documentation. + string description = 2; + + // The unit in which the metric value is reported. Follows the format + // described by https://unitsofmeasure.org/ucum.html. + string unit = 3; + + // Data determines the aggregation type (if any) of the metric, what is the + // reported value type for the data points, as well as the relatationship to + // the time interval over which they are reported. + oneof data { + Gauge gauge = 5; + Sum sum = 7; + Histogram histogram = 9; + ExponentialHistogram exponential_histogram = 10; + Summary summary = 11; + } + + // Additional metadata attributes that describe the metric. [Optional]. + // Attributes are non-identifying. + // Consumers SHOULD NOT need to be aware of these attributes. + // These attributes MAY be used to encode information allowing + // for lossless roundtrip translation to / from another data model. + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + repeated opentelemetry.proto.common.v1.KeyValue metadata = 12; +} + +// Gauge represents the type of a scalar metric that always exports the +// "current value" for every data point. It should be used for an "unknown" +// aggregation. +// +// A Gauge does not support different aggregation temporalities. Given the +// aggregation is unknown, points cannot be combined using the same +// aggregation, regardless of aggregation temporalities. Therefore, +// AggregationTemporality is not included. Consequently, this also means +// "StartTimeUnixNano" is ignored for all data points. +message Gauge { + // The time series data points. + // Note: Multiple time series may be included (same timestamp, different attributes). + repeated NumberDataPoint data_points = 1; +} + +// Sum represents the type of a scalar metric that is calculated as a sum of all +// reported measurements over a time interval. +message Sum { + // The time series data points. + // Note: Multiple time series may be included (same timestamp, different attributes). + repeated NumberDataPoint data_points = 1; + + // aggregation_temporality describes if the aggregator reports delta changes + // since last report time, or cumulative changes since a fixed start time. + AggregationTemporality aggregation_temporality = 2; + + // Represents whether the sum is monotonic. + bool is_monotonic = 3; +} + +// Histogram represents the type of a metric that is calculated by aggregating +// as a Histogram of all reported measurements over a time interval. +message Histogram { + // The time series data points. + // Note: Multiple time series may be included (same timestamp, different attributes). + repeated HistogramDataPoint data_points = 1; + + // aggregation_temporality describes if the aggregator reports delta changes + // since last report time, or cumulative changes since a fixed start time. + AggregationTemporality aggregation_temporality = 2; +} + +// ExponentialHistogram represents the type of a metric that is calculated by aggregating +// as a ExponentialHistogram of all reported double measurements over a time interval. +message ExponentialHistogram { + // The time series data points. + // Note: Multiple time series may be included (same timestamp, different attributes). + repeated ExponentialHistogramDataPoint data_points = 1; + + // aggregation_temporality describes if the aggregator reports delta changes + // since last report time, or cumulative changes since a fixed start time. + AggregationTemporality aggregation_temporality = 2; +} + +// Summary metric data are used to convey quantile summaries, +// a Prometheus (see: https://prometheus.io/docs/concepts/metric_types/#summary) +// and OpenMetrics (see: https://github.com/prometheus/OpenMetrics/blob/4dbf6075567ab43296eed941037c12951faafb92/protos/prometheus.proto#L45) +// data type. These data points cannot always be merged in a meaningful way. +// While they can be useful in some applications, histogram data points are +// recommended for new applications. +// Summary metrics do not have an aggregation temporality field. This is +// because the count and sum fields of a SummaryDataPoint are assumed to be +// cumulative values. +message Summary { + // The time series data points. + // Note: Multiple time series may be included (same timestamp, different attributes). + repeated SummaryDataPoint data_points = 1; +} + +// AggregationTemporality defines how a metric aggregator reports aggregated +// values. It describes how those values relate to the time interval over +// which they are aggregated. +enum AggregationTemporality { + // UNSPECIFIED is the default AggregationTemporality, it MUST not be used. + AGGREGATION_TEMPORALITY_UNSPECIFIED = 0; + + // DELTA is an AggregationTemporality for a metric aggregator which reports + // changes since last report time. Successive metrics contain aggregation of + // values from continuous and non-overlapping intervals. + // + // The values for a DELTA metric are based only on the time interval + // associated with one measurement cycle. There is no dependency on + // previous measurements like is the case for CUMULATIVE metrics. + // + // For example, consider a system measuring the number of requests that + // it receives and reports the sum of these requests every second as a + // DELTA metric: + // + // 1. The system starts receiving at time=t_0. + // 2. A request is received, the system measures 1 request. + // 3. A request is received, the system measures 1 request. + // 4. A request is received, the system measures 1 request. + // 5. The 1 second collection cycle ends. A metric is exported for the + // number of requests received over the interval of time t_0 to + // t_0+1 with a value of 3. + // 6. A request is received, the system measures 1 request. + // 7. A request is received, the system measures 1 request. + // 8. The 1 second collection cycle ends. A metric is exported for the + // number of requests received over the interval of time t_0+1 to + // t_0+2 with a value of 2. + AGGREGATION_TEMPORALITY_DELTA = 1; + + // CUMULATIVE is an AggregationTemporality for a metric aggregator which + // reports changes since a fixed start time. This means that current values + // of a CUMULATIVE metric depend on all previous measurements since the + // start time. Because of this, the sender is required to retain this state + // in some form. If this state is lost or invalidated, the CUMULATIVE metric + // values MUST be reset and a new fixed start time following the last + // reported measurement time sent MUST be used. + // + // For example, consider a system measuring the number of requests that + // it receives and reports the sum of these requests every second as a + // CUMULATIVE metric: + // + // 1. The system starts receiving at time=t_0. + // 2. A request is received, the system measures 1 request. + // 3. A request is received, the system measures 1 request. + // 4. A request is received, the system measures 1 request. + // 5. The 1 second collection cycle ends. A metric is exported for the + // number of requests received over the interval of time t_0 to + // t_0+1 with a value of 3. + // 6. A request is received, the system measures 1 request. + // 7. A request is received, the system measures 1 request. + // 8. The 1 second collection cycle ends. A metric is exported for the + // number of requests received over the interval of time t_0 to + // t_0+2 with a value of 5. + // 9. The system experiences a fault and loses state. + // 10. The system recovers and resumes receiving at time=t_1. + // 11. A request is received, the system measures 1 request. + // 12. The 1 second collection cycle ends. A metric is exported for the + // number of requests received over the interval of time t_1 to + // t_0+1 with a value of 1. + // + // Note: Even though, when reporting changes since last report time, using + // CUMULATIVE is valid, it is not recommended. This may cause problems for + // systems that do not use start_time to determine when the aggregation + // value was reset (e.g. Prometheus). + AGGREGATION_TEMPORALITY_CUMULATIVE = 2; +} + +// DataPointFlags is defined as a protobuf 'uint32' type and is to be used as a +// bit-field representing 32 distinct boolean flags. Each flag defined in this +// enum is a bit-mask. To test the presence of a single flag in the flags of +// a data point, for example, use an expression like: +// +// (point.flags & DATA_POINT_FLAGS_NO_RECORDED_VALUE_MASK) == DATA_POINT_FLAGS_NO_RECORDED_VALUE_MASK +// +enum DataPointFlags { + // The zero value for the enum. Should not be used for comparisons. + // Instead use bitwise "and" with the appropriate mask as shown above. + DATA_POINT_FLAGS_DO_NOT_USE = 0; + + // This DataPoint is valid but has no recorded value. This value + // SHOULD be used to reflect explicitly missing data in a series, as + // for an equivalent to the Prometheus "staleness marker". + DATA_POINT_FLAGS_NO_RECORDED_VALUE_MASK = 1; + + // Bits 2-31 are reserved for future use. +} + +// NumberDataPoint is a single data point in a timeseries that describes the +// time-varying scalar value of a metric. +message NumberDataPoint { + reserved 1; + + // The set of key/value pairs that uniquely identify the timeseries from + // where this point belongs. The list may be empty (may contain 0 elements). + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + // + // The attribute values SHOULD NOT contain empty values. + // The attribute values SHOULD NOT contain bytes values. + // The attribute values SHOULD NOT contain array values different than array of string values, bool values, int values, + // double values. + // The attribute values SHOULD NOT contain kvlist values. + // The behavior of software that receives attributes containing such values can be unpredictable. + // These restrictions can change in a minor release. + // The restrictions take origin from the OpenTelemetry specification: + // https://github.com/open-telemetry/opentelemetry-specification/blob/v1.47.0/specification/common/README.md#attribute. + repeated opentelemetry.proto.common.v1.KeyValue attributes = 7; + + // StartTimeUnixNano is optional but strongly encouraged, see the + // the detailed comments above Metric. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 start_time_unix_nano = 2; + + // TimeUnixNano is required, see the detailed comments above Metric. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 time_unix_nano = 3; + + // The value itself. A point is considered invalid when one of the recognized + // value fields is not present inside this oneof. + oneof value { + double as_double = 4; + sfixed64 as_int = 6; + } + + // (Optional) List of exemplars collected from + // measurements that were used to form the data point + repeated Exemplar exemplars = 5; + + // Flags that apply to this specific data point. See DataPointFlags + // for the available flags and their meaning. + uint32 flags = 8; +} + +// HistogramDataPoint is a single data point in a timeseries that describes the +// time-varying values of a Histogram. A Histogram contains summary statistics +// for a population of values, it may optionally contain the distribution of +// those values across a set of buckets. +// +// If the histogram contains the distribution of values, then both +// "explicit_bounds" and "bucket counts" fields must be defined. +// If the histogram does not contain the distribution of values, then both +// "explicit_bounds" and "bucket_counts" must be omitted and only "count" and +// "sum" are known. +message HistogramDataPoint { + reserved 1; + + // The set of key/value pairs that uniquely identify the timeseries from + // where this point belongs. The list may be empty (may contain 0 elements). + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + // + // The attribute values SHOULD NOT contain empty values. + // The attribute values SHOULD NOT contain bytes values. + // The attribute values SHOULD NOT contain array values different than array of string values, bool values, int values, + // double values. + // The attribute values SHOULD NOT contain kvlist values. + // The behavior of software that receives attributes containing such values can be unpredictable. + // These restrictions can change in a minor release. + // The restrictions take origin from the OpenTelemetry specification: + // https://github.com/open-telemetry/opentelemetry-specification/blob/v1.47.0/specification/common/README.md#attribute. + repeated opentelemetry.proto.common.v1.KeyValue attributes = 9; + + // StartTimeUnixNano is optional but strongly encouraged, see the + // the detailed comments above Metric. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 start_time_unix_nano = 2; + + // TimeUnixNano is required, see the detailed comments above Metric. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 time_unix_nano = 3; + + // count is the number of values in the population. Must be non-negative. This + // value must be equal to the sum of the "count" fields in buckets if a + // histogram is provided. + fixed64 count = 4; + + // sum of the values in the population. If count is zero then this field + // must be zero. + // + // Note: Sum should only be filled out when measuring non-negative discrete + // events, and is assumed to be monotonic over the values of these events. + // Negative events *can* be recorded, but sum should not be filled out when + // doing so. This is specifically to enforce compatibility w/ OpenMetrics, + // see: https://github.com/prometheus/OpenMetrics/blob/v1.0.0/specification/OpenMetrics.md#histogram + optional double sum = 5; + + // bucket_counts is an optional field contains the count values of histogram + // for each bucket. + // + // The sum of the bucket_counts must equal the value in the count field. + // + // The number of elements in bucket_counts array must be by one greater than + // the number of elements in explicit_bounds array. The exception to this rule + // is when the length of bucket_counts is 0, then the length of explicit_bounds + // must also be 0. + repeated fixed64 bucket_counts = 6; + + // explicit_bounds specifies buckets with explicitly defined bounds for values. + // + // The boundaries for bucket at index i are: + // + // (-infinity, explicit_bounds[i]] for i == 0 + // (explicit_bounds[i-1], explicit_bounds[i]] for 0 < i < size(explicit_bounds) + // (explicit_bounds[i-1], +infinity) for i == size(explicit_bounds) + // + // The values in the explicit_bounds array must be strictly increasing. + // + // Histogram buckets are inclusive of their upper boundary, except the last + // bucket where the boundary is at infinity. This format is intentionally + // compatible with the OpenMetrics histogram definition. + // + // If bucket_counts length is 0 then explicit_bounds length must also be 0, + // otherwise the data point is invalid. + repeated double explicit_bounds = 7; + + // (Optional) List of exemplars collected from + // measurements that were used to form the data point + repeated Exemplar exemplars = 8; + + // Flags that apply to this specific data point. See DataPointFlags + // for the available flags and their meaning. + uint32 flags = 10; + + // min is the minimum value over (start_time, end_time]. + optional double min = 11; + + // max is the maximum value over (start_time, end_time]. + optional double max = 12; +} + +// ExponentialHistogramDataPoint is a single data point in a timeseries that describes the +// time-varying values of a ExponentialHistogram of double values. A ExponentialHistogram contains +// summary statistics for a population of values, it may optionally contain the +// distribution of those values across a set of buckets. +// +message ExponentialHistogramDataPoint { + // The set of key/value pairs that uniquely identify the timeseries from + // where this point belongs. The list may be empty (may contain 0 elements). + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + // + // The attribute values SHOULD NOT contain empty values. + // The attribute values SHOULD NOT contain bytes values. + // The attribute values SHOULD NOT contain array values different than array of string values, bool values, int values, + // double values. + // The attribute values SHOULD NOT contain kvlist values. + // The behavior of software that receives attributes containing such values can be unpredictable. + // These restrictions can change in a minor release. + // The restrictions take origin from the OpenTelemetry specification: + // https://github.com/open-telemetry/opentelemetry-specification/blob/v1.47.0/specification/common/README.md#attribute. + repeated opentelemetry.proto.common.v1.KeyValue attributes = 1; + + // StartTimeUnixNano is optional but strongly encouraged, see the + // the detailed comments above Metric. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 start_time_unix_nano = 2; + + // TimeUnixNano is required, see the detailed comments above Metric. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 time_unix_nano = 3; + + // The number of values in the population. Must be + // non-negative. This value must be equal to the sum of the "bucket_counts" + // values in the positive and negative Buckets plus the "zero_count" field. + fixed64 count = 4; + + // The sum of the values in the population. If count is zero then this field + // must be zero. + // + // Note: Sum should only be filled out when measuring non-negative discrete + // events, and is assumed to be monotonic over the values of these events. + // Negative events *can* be recorded, but sum should not be filled out when + // doing so. This is specifically to enforce compatibility w/ OpenMetrics, + // see: https://github.com/prometheus/OpenMetrics/blob/v1.0.0/specification/OpenMetrics.md#histogram + optional double sum = 5; + + // scale describes the resolution of the histogram. Boundaries are + // located at powers of the base, where: + // + // base = (2^(2^-scale)) + // + // The histogram bucket identified by `index`, a signed integer, + // contains values that are greater than (base^index) and + // less than or equal to (base^(index+1)). + // + // The positive and negative ranges of the histogram are expressed + // separately. Negative values are mapped by their absolute value + // into the negative range using the same scale as the positive range. + // + // scale is not restricted by the protocol, as the permissible + // values depend on the range of the data. + sint32 scale = 6; + + // The count of values that are either exactly zero or + // within the region considered zero by the instrumentation at the + // tolerated degree of precision. This bucket stores values that + // cannot be expressed using the standard exponential formula as + // well as values that have been rounded to zero. + // + // Implementations MAY consider the zero bucket to have probability + // mass equal to (zero_count / count). + fixed64 zero_count = 7; + + // positive carries the positive range of exponential bucket counts. + Buckets positive = 8; + + // negative carries the negative range of exponential bucket counts. + Buckets negative = 9; + + // Buckets are a set of bucket counts, encoded in a contiguous array + // of counts. + message Buckets { + // The bucket index of the first entry in the bucket_counts array. + // + // Note: This uses a varint encoding as a simple form of compression. + sint32 offset = 1; + + // An array of count values, where bucket_counts[i] carries + // the count of the bucket at index (offset+i). bucket_counts[i] is the count + // of values greater than base^(offset+i) and less than or equal to + // base^(offset+i+1). + // + // Note: By contrast, the explicit HistogramDataPoint uses + // fixed64. This field is expected to have many buckets, + // especially zeros, so uint64 has been selected to ensure + // varint encoding. + repeated uint64 bucket_counts = 2; + } + + // Flags that apply to this specific data point. See DataPointFlags + // for the available flags and their meaning. + uint32 flags = 10; + + // (Optional) List of exemplars collected from + // measurements that were used to form the data point + repeated Exemplar exemplars = 11; + + // The minimum value over (start_time, end_time]. + optional double min = 12; + + // The maximum value over (start_time, end_time]. + optional double max = 13; + + // ZeroThreshold may be optionally set to convey the width of the zero + // region. Where the zero region is defined as the closed interval + // [-ZeroThreshold, ZeroThreshold]. + // When ZeroThreshold is 0, zero count bucket stores values that cannot be + // expressed using the standard exponential formula as well as values that + // have been rounded to zero. + double zero_threshold = 14; +} + +// SummaryDataPoint is a single data point in a timeseries that describes the +// time-varying values of a Summary metric. The count and sum fields represent +// cumulative values. +message SummaryDataPoint { + reserved 1; + + // The set of key/value pairs that uniquely identify the timeseries from + // where this point belongs. The list may be empty (may contain 0 elements). + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + // + // The attribute values SHOULD NOT contain empty values. + // The attribute values SHOULD NOT contain bytes values. + // The attribute values SHOULD NOT contain array values different than array of string values, bool values, int values, + // double values. + // The attribute values SHOULD NOT contain kvlist values. + // The behavior of software that receives attributes containing such values can be unpredictable. + // These restrictions can change in a minor release. + // The restrictions take origin from the OpenTelemetry specification: + // https://github.com/open-telemetry/opentelemetry-specification/blob/v1.47.0/specification/common/README.md#attribute. + repeated opentelemetry.proto.common.v1.KeyValue attributes = 7; + + // StartTimeUnixNano is optional but strongly encouraged, see the + // the detailed comments above Metric. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 start_time_unix_nano = 2; + + // TimeUnixNano is required, see the detailed comments above Metric. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 time_unix_nano = 3; + + // count is the number of values in the population. Must be non-negative. + fixed64 count = 4; + + // sum of the values in the population. If count is zero then this field + // must be zero. + // + // Note: Sum should only be filled out when measuring non-negative discrete + // events, and is assumed to be monotonic over the values of these events. + // Negative events *can* be recorded, but sum should not be filled out when + // doing so. This is specifically to enforce compatibility w/ OpenMetrics, + // see: https://github.com/prometheus/OpenMetrics/blob/v1.0.0/specification/OpenMetrics.md#summary + double sum = 5; + + // Represents the value at a given quantile of a distribution. + // + // To record Min and Max values following conventions are used: + // - The 1.0 quantile is equivalent to the maximum value observed. + // - The 0.0 quantile is equivalent to the minimum value observed. + // + // See the following issue for more context: + // https://github.com/open-telemetry/opentelemetry-proto/issues/125 + message ValueAtQuantile { + // The quantile of a distribution. Must be in the interval + // [0.0, 1.0]. + double quantile = 1; + + // The value at the given quantile of a distribution. + // + // Quantile values must NOT be negative. + double value = 2; + } + + // (Optional) list of values at different quantiles of the distribution calculated + // from the current snapshot. The quantiles must be strictly increasing. + repeated ValueAtQuantile quantile_values = 6; + + // Flags that apply to this specific data point. See DataPointFlags + // for the available flags and their meaning. + uint32 flags = 8; +} + +// A representation of an exemplar, which is a sample input measurement. +// Exemplars also hold information about the environment when the measurement +// was recorded, for example the span and trace ID of the active span when the +// exemplar was recorded. +message Exemplar { + reserved 1; + + // The set of key/value pairs that were filtered out by the aggregator, but + // recorded alongside the original measurement. Only key/value pairs that were + // filtered out by the aggregator should be included + repeated opentelemetry.proto.common.v1.KeyValue filtered_attributes = 7; + + // time_unix_nano is the exact time when this exemplar was recorded + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 time_unix_nano = 2; + + // The value of the measurement that was recorded. An exemplar is + // considered invalid when one of the recognized value fields is not present + // inside this oneof. + oneof value { + double as_double = 3; + sfixed64 as_int = 6; + } + + // (Optional) Span ID of the exemplar trace. + // span_id may be missing if the measurement is not recorded inside a trace + // or if the trace is not sampled. + bytes span_id = 4; + + // (Optional) Trace ID of the exemplar trace. + // trace_id may be missing if the measurement is not recorded inside a trace + // or if the trace is not sampled. + bytes trace_id = 5; +} + +// clang-format on +#endif + +namespace zen::otel { + +// Metrics + +enum class MetricsData : protozero::pbf_tag_type +{ + repeated_ResourceMetrics_resource_metrics = 1 +}; + +enum class ResourceMetrics : protozero::pbf_tag_type +{ + Resource_resource = 1, + repeated_ScopeMetrics_scope_metrics = 2, + string_schema_url = 3 +}; + +// A collection of Metrics produced by an Scope. +enum class ScopeMetrics : protozero::pbf_tag_type +{ + InstrumentationScope_scope = 1, + repeated_Metric_metrics = 2, + string_schema_url = 3 +}; + +enum class Metric : protozero::pbf_tag_type +{ + string_name = 1, + string_description = 2, + string_unit = 3, + + oneof_data_Gauge_gauge = 5, + oneof_data_Sum_sum = 7, + oneof_data_Histogram_histogram = 9, + oneof_data_ExponentialHistogram_exponential_histogram = 10, + oneof_data_Summary_summary = 11, + + repeated_KeyValue_metadata = 12 +}; + +enum class Gauge : protozero::pbf_tag_type +{ + repeated_NumberDataPoint_data_points = 1 +}; + +enum class Sum : protozero::pbf_tag_type +{ + repeated_NumberDataPoint_data_points = 1, + AggregationTemporality_aggregation_temporality = 2, + bool_is_monotonic = 3 +}; + +enum class Histogram : protozero::pbf_tag_type +{ + repeated_HistogramDataPoint_data_points = 1, + AggregationTemporality_aggregation_temporality = 2 +}; + +enum class ExponentialHistogram : protozero::pbf_tag_type +{ + repeated_ExponentialHistogramDataPoint_data_points = 1, + AggregationTemporality_aggregation_temporality = 2 +}; + +enum class Summary : protozero::pbf_tag_type +{ + repeated_SummaryDataPoint_data_points = 1 +}; + +enum AggregationTemporality +{ + AGGREGATION_TEMPORALITY_UNSPECIFIED = 0, + AGGREGATION_TEMPORALITY_DELTA = 1, + AGGREGATION_TEMPORALITY_CUMULATIVE = 2 +}; + +enum DataPointFlags +{ + DATA_POINT_FLAGS_DO_NOT_USE = 0, + DATA_POINT_FLAGS_NO_RECORDED_VALUE_MASK = 1, +}; + +enum class NumberDataPoint : protozero::pbf_tag_type +{ + repeated_KeyValue_attributes = 7, + fixed64_start_time_unix_nano = 2, + fixed64_time_unix_nano = 3, + + oneof_value_double_as_double = 4, + oneof_value_sfixed64_as_int = 6, + + repeated_Exemplar_exemplars = 5, + uint32_flags = 8 +}; + +enum class HistogramDataPoint : protozero::pbf_tag_type +{ + repeated_KeyValue_attributes = 9, + fixed64_start_time_unix_nano = 2, + fixed64_time_unix_nano = 3, + fixed64_count = 4, + optional_double_sum = 5, + repeated_fixed64_bucket_counts = 6, + repeated_double_explicit_bounds = 7, + repeated_Exemplar_exemplars = 8, + uint32_flags = 10, + optional_double_min = 11, + optional_double_max = 12 +}; + +enum class Buckets : protozero::pbf_tag_type +{ + sint32_offset = 1, + repeated_uint64_bucket_counts = 2 +}; + +enum class ExponentialHistogramDataPoint : protozero::pbf_tag_type +{ + repeated_KeyValue_attributes = 1, + fixed64_start_time_unix_nano = 2, + fixed64_time_unix_nano = 3, + fixed64_count = 4, + optional_double_sum = 5, + sint32_scale = 6, + fixed64_zero_count = 7, + Buckets_positive = 8, + Buckets_negative = 9, + uint32_flags = 10, + repeated_Exemplar_exemplars = 11, + optional_double_min = 12, + optional_double_max = 13, + double_zero_threshold = 14 +}; + +enum class ValueAtQuantile : protozero::pbf_tag_type +{ + double_quantile = 1, + double_value = 2 +}; + +enum class SummaryDataPoint : protozero::pbf_tag_type +{ + repeated_KeyValue_attributes = 7, + fixed64_start_time_unix_nano = 2, + fixed64_time_unix_nano = 3, + fixed64_count = 4, + double_sum = 5, + repeated_ValueAtQuantile_quantile_values = 6, + uint32_flags = 8 +}; + +enum class Exemplar : protozero::pbf_tag_type +{ + repeated_KeyValue_filtered_attributes = 7, + fixed64_time_unix_nano = 2, + oneof_value_double_as_double = 3, + oneof_value_sfixed64_as_int = 6, + bytes_span_id = 4, + bytes_trace_id = 5 +}; + +} // namespace zen::otel \ No newline at end of file diff --git a/src/zentelemetry/otelprotozero.h b/src/zentelemetry/otelprotozero.h new file mode 100644 index 000000000..8ad69e766 --- /dev/null +++ b/src/zentelemetry/otelprotozero.h @@ -0,0 +1,166 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include + +////////////////////////////////////////////////////////////////////////// +// +// OTEL .proto definitions, for reference +// + +#if 0 + +// clang-format off + +//////////////////////////////////////////////////////////////////////// +// resource/v1/resource.proto +// + +// Resource information. +message Resource { + // Set of attributes that describe the resource. + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + // + // The attribute values SHOULD NOT contain empty values. + // The attribute values SHOULD NOT contain bytes values. + // The attribute values SHOULD NOT contain array values different than array of string values, bool values, int values, + // double values. + // The attribute values SHOULD NOT contain kvlist values. + // The behavior of software that receives attributes containing such values can be unpredictable. + // These restrictions can change in a minor release. + // The restrictions take origin from the OpenTelemetry specification: + // https://github.com/open-telemetry/opentelemetry-specification/blob/v1.47.0/specification/common/README.md#attribute. + repeated opentelemetry.proto.common.v1.KeyValue attributes = 1; + + // The number of dropped attributes. If the value is 0, then + // no attributes were dropped. + uint32 dropped_attributes_count = 2; + + // Set of entities that participate in this Resource. + // + // Note: keys in the references MUST exist in attributes of this message. + // + // Status: [Development] + repeated opentelemetry.proto.common.v1.EntityRef entity_refs = 3; +} + +///////////////////////////////////////////////////////////////////////// +// common/v1/commmon.proto +// + +// Represents any type of attribute value. AnyValue may contain a +// primitive value such as a string or integer or it may contain an arbitrary nested +// object containing arrays, key-value lists and primitives. +message AnyValue { + // The value is one of the listed fields. It is valid for all values to be unspecified + // in which case this AnyValue is considered to be "empty". + oneof value { + string string_value = 1; + bool bool_value = 2; + int64 int_value = 3; + double double_value = 4; + ArrayValue array_value = 5; + KeyValueList kvlist_value = 6; + bytes bytes_value = 7; + } +} + +// ArrayValue is a list of AnyValue messages. We need ArrayValue as a message +// since oneof in AnyValue does not allow repeated fields. +message ArrayValue { + // Array of values. The array may be empty (contain 0 elements). + repeated AnyValue values = 1; +} + +// KeyValueList is a list of KeyValue messages. We need KeyValueList as a message +// since `oneof` in AnyValue does not allow repeated fields. Everywhere else where we need +// a list of KeyValue messages (e.g. in Span) we use `repeated KeyValue` directly to +// avoid unnecessary extra wrapping (which slows down the protocol). The 2 approaches +// are semantically equivalent. +message KeyValueList { + // A collection of key/value pairs of key-value pairs. The list may be empty (may + // contain 0 elements). + // The keys MUST be unique (it is not allowed to have more than one + // value with the same key). + repeated KeyValue values = 1; +} + +// Represents a key-value pair that is used to store Span attributes, Link +// attributes, etc. +message KeyValue { + // The key name of the pair. + string key = 1; + + // The value of the pair. + AnyValue value = 2; +} + +// InstrumentationScope is a message representing the instrumentation scope information +// such as the fully qualified name and version. +message InstrumentationScope { + // A name denoting the Instrumentation scope. + // An empty instrumentation scope name means the name is unknown. + string name = 1; + + // Defines the version of the instrumentation scope. + // An empty instrumentation scope version means the version is unknown. + string version = 2; + + // Additional attributes that describe the scope. [Optional]. + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + repeated KeyValue attributes = 3; + + // The number of attributes that were discarded. Attributes + // can be discarded because their keys are too long or because there are too many + // attributes. If this value is 0, then no attributes were dropped. + uint32 dropped_attributes_count = 4; +} + +// clang-format on +#endif + +////////////////////////////////////////////////////////////////////////// + +namespace zen::otel { + +enum class KeyValueList : protozero::pbf_tag_type +{ + repeated_KeyValue_values = 1 +}; + +enum class KeyValue : protozero::pbf_tag_type +{ + string_key = 1, + AnyValue_value = 2 +}; + +enum class Resource : protozero::pbf_tag_type +{ + repeated_KeyValue_attributes = 1, + uint32_dropped_attributes_count = 2, + repeated_EntityRef_entity_refs = 3 +}; + +enum class AnyValue : protozero::pbf_tag_type +{ + string_string_value = 1, + bool_bool_value = 2, + int64_int_value = 3, + double_double_value = 4, + ArrayValue_array_value = 5, + KeyValueList_kvlist_value = 6, + bytes_bytes_value = 7 +}; + +enum class InstrumentationScope : protozero::pbf_tag_type +{ + string_name = 1, + string_version = 2, + repeated_KeyValue_attributes = 3, + uint32_dropped_attributes_count = 4 +}; + +} // namespace zen::otel diff --git a/src/zentelemetry/oteltraceprotozero.h b/src/zentelemetry/oteltraceprotozero.h new file mode 100644 index 000000000..8b80c8f7f --- /dev/null +++ b/src/zentelemetry/oteltraceprotozero.h @@ -0,0 +1,474 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include "otelprotozero.h" + +////////////////////////////////////////////////////////////////////////// +// +// OTEL .proto definitions, for reference +// + +#if 0 + +// clang-format off + +///////////////////////////////////////////////////////////////////////// +// trace/v1/trace.proto +// + +// TracesData represents the traces data that can be stored in a persistent storage, +// OR can be embedded by other protocols that transfer OTLP traces data but do +// not implement the OTLP protocol. +// +// The main difference between this message and collector protocol is that +// in this message there will not be any "control" or "metadata" specific to +// OTLP protocol. +// +// When new fields are added into this message, the OTLP request MUST be updated +// as well. +message TracesData { + // An array of ResourceSpans. + // For data coming from a single resource this array will typically contain + // one element. Intermediary nodes that receive data from multiple origins + // typically batch the data before forwarding further and in that case this + // array will contain multiple elements. + repeated ResourceSpans resource_spans = 1; +} + +// A collection of ScopeSpans from a Resource. +message ResourceSpans { + reserved 1000; + + // The resource for the spans in this message. + // If this field is not set then no resource info is known. + opentelemetry.proto.resource.v1.Resource resource = 1; + + // A list of ScopeSpans that originate from a resource. + repeated ScopeSpans scope_spans = 2; + + // The Schema URL, if known. This is the identifier of the Schema that the resource data + // is recorded in. Notably, the last part of the URL path is the version number of the + // schema: http[s]://server[:port]/path/. To learn more about Schema URL see + // https://opentelemetry.io/docs/specs/otel/schemas/#schema-url + // This schema_url applies to the data in the "resource" field. It does not apply + // to the data in the "scope_spans" field which have their own schema_url field. + string schema_url = 3; +} + +// A collection of Spans produced by an InstrumentationScope. +message ScopeSpans { + // The instrumentation scope information for the spans in this message. + // Semantically when InstrumentationScope isn't set, it is equivalent with + // an empty instrumentation scope name (unknown). + opentelemetry.proto.common.v1.InstrumentationScope scope = 1; + + // A list of Spans that originate from an instrumentation scope. + repeated Span spans = 2; + + // The Schema URL, if known. This is the identifier of the Schema that the span data + // is recorded in. Notably, the last part of the URL path is the version number of the + // schema: http[s]://server[:port]/path/. To learn more about Schema URL see + // https://opentelemetry.io/docs/specs/otel/schemas/#schema-url + // This schema_url applies to all spans and span events in the "spans" field. + string schema_url = 3; +} + +// A Span represents a single operation performed by a single component of the system. +// +// The next available field id is 17. +message Span { + // A unique identifier for a trace. All spans from the same trace share + // the same `trace_id`. The ID is a 16-byte array. An ID with all zeroes OR + // of length other than 16 bytes is considered invalid (empty string in OTLP/JSON + // is zero-length and thus is also invalid). + // + // This field is required. + bytes trace_id = 1; + + // A unique identifier for a span within a trace, assigned when the span + // is created. The ID is an 8-byte array. An ID with all zeroes OR of length + // other than 8 bytes is considered invalid (empty string in OTLP/JSON + // is zero-length and thus is also invalid). + // + // This field is required. + bytes span_id = 2; + + // trace_state conveys information about request position in multiple distributed tracing graphs. + // It is a trace_state in w3c-trace-context format: https://www.w3.org/TR/trace-context/#tracestate-header + // See also https://github.com/w3c/distributed-tracing for more details about this field. + string trace_state = 3; + + // The `span_id` of this span's parent span. If this is a root span, then this + // field must be empty. The ID is an 8-byte array. + bytes parent_span_id = 4; + + // Flags, a bit field. + // + // Bits 0-7 (8 least significant bits) are the trace flags as defined in W3C Trace + // Context specification. To read the 8-bit W3C trace flag, use + // `flags & SPAN_FLAGS_TRACE_FLAGS_MASK`. + // + // See https://www.w3.org/TR/trace-context-2/#trace-flags for the flag definitions. + // + // Bits 8 and 9 represent the 3 states of whether a span's parent + // is remote. The states are (unknown, is not remote, is remote). + // To read whether the value is known, use `(flags & SPAN_FLAGS_CONTEXT_HAS_IS_REMOTE_MASK) != 0`. + // To read whether the span is remote, use `(flags & SPAN_FLAGS_CONTEXT_IS_REMOTE_MASK) != 0`. + // + // When creating span messages, if the message is logically forwarded from another source + // with an equivalent flags fields (i.e., usually another OTLP span message), the field SHOULD + // be copied as-is. If creating from a source that does not have an equivalent flags field + // (such as a runtime representation of an OpenTelemetry span), the high 22 bits MUST + // be set to zero. + // Readers MUST NOT assume that bits 10-31 (22 most significant bits) will be zero. + // + // [Optional]. + fixed32 flags = 16; + + // A description of the span's operation. + // + // For example, the name can be a qualified method name or a file name + // and a line number where the operation is called. A best practice is to use + // the same display name at the same call point in an application. + // This makes it easier to correlate spans in different traces. + // + // This field is semantically required to be set to non-empty string. + // Empty value is equivalent to an unknown span name. + // + // This field is required. + string name = 5; + + // SpanKind is the type of span. Can be used to specify additional relationships between spans + // in addition to a parent/child relationship. + enum SpanKind { + // Unspecified. Do NOT use as default. + // Implementations MAY assume SpanKind to be INTERNAL when receiving UNSPECIFIED. + SPAN_KIND_UNSPECIFIED = 0; + + // Indicates that the span represents an internal operation within an application, + // as opposed to an operation happening at the boundaries. Default value. + SPAN_KIND_INTERNAL = 1; + + // Indicates that the span covers server-side handling of an RPC or other + // remote network request. + SPAN_KIND_SERVER = 2; + + // Indicates that the span describes a request to some remote service. + SPAN_KIND_CLIENT = 3; + + // Indicates that the span describes a producer sending a message to a broker. + // Unlike CLIENT and SERVER, there is often no direct critical path latency relationship + // between producer and consumer spans. A PRODUCER span ends when the message was accepted + // by the broker while the logical processing of the message might span a much longer time. + SPAN_KIND_PRODUCER = 4; + + // Indicates that the span describes consumer receiving a message from a broker. + // Like the PRODUCER kind, there is often no direct critical path latency relationship + // between producer and consumer spans. + SPAN_KIND_CONSUMER = 5; + } + + // Distinguishes between spans generated in a particular context. For example, + // two spans with the same name may be distinguished using `CLIENT` (caller) + // and `SERVER` (callee) to identify queueing latency associated with the span. + SpanKind kind = 6; + + // The start time of the span. On the client side, this is the time + // kept by the local machine where the span execution starts. On the server side, this + // is the time when the server's application handler starts running. + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January 1970. + // + // This field is semantically required and it is expected that end_time >= start_time. + fixed64 start_time_unix_nano = 7; + + // The end time of the span. On the client side, this is the time + // kept by the local machine where the span execution ends. On the server side, this + // is the time when the server application handler stops running. + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January 1970. + // + // This field is semantically required and it is expected that end_time >= start_time. + fixed64 end_time_unix_nano = 8; + + // A collection of key/value pairs. Note, global attributes + // like server name can be set using the resource API. Examples of attributes: + // + // "/http/user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36" + // "/http/server_latency": 300 + // "example.com/myattribute": true + // "example.com/score": 10.239 + // + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + // + // The attribute values SHOULD NOT contain empty values. + // The attribute values SHOULD NOT contain bytes values. + // The attribute values SHOULD NOT contain array values different than array of string values, bool values, int values, + // double values. + // The attribute values SHOULD NOT contain kvlist values. + // The behavior of software that receives attributes containing such values can be unpredictable. + // These restrictions can change in a minor release. + // The restrictions take origin from the OpenTelemetry specification: + // https://github.com/open-telemetry/opentelemetry-specification/blob/v1.47.0/specification/common/README.md#attribute. + repeated opentelemetry.proto.common.v1.KeyValue attributes = 9; + + // The number of attributes that were discarded. Attributes + // can be discarded because their keys are too long or because there are too many + // attributes. If this value is 0, then no attributes were dropped. + uint32 dropped_attributes_count = 10; + + // Event is a time-stamped annotation of the span, consisting of user-supplied + // text description and key-value pairs. + message Event { + // The time the event occurred. + fixed64 time_unix_nano = 1; + + // The name of the event. + // This field is semantically required to be set to non-empty string. + string name = 2; + + // A collection of attribute key/value pairs on the event. + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + // + // The attribute values SHOULD NOT contain empty values. + // The attribute values SHOULD NOT contain bytes values. + // The attribute values SHOULD NOT contain array values different than array of string values, bool values, int values, + // double values. + // The attribute values SHOULD NOT contain kvlist values. + // The behavior of software that receives attributes containing such values can be unpredictable. + // These restrictions can change in a minor release. + // The restrictions take origin from the OpenTelemetry specification: + // https://github.com/open-telemetry/opentelemetry-specification/blob/v1.47.0/specification/common/README.md#attribute. + repeated opentelemetry.proto.common.v1.KeyValue attributes = 3; + + // The number of dropped attributes. If the value is 0, + // then no attributes were dropped. + uint32 dropped_attributes_count = 4; + } + + // A collection of Event items. + repeated Event events = 11; + + // The number of dropped events. If the value is 0, then no + // events were dropped. + uint32 dropped_events_count = 12; + + // A pointer from the current span to another span in the same trace or in a + // different trace. For example, this can be used in batching operations, + // where a single batch handler processes multiple requests from different + // traces or when the handler receives a request from a different project. + message Link { + // A unique identifier of a trace that this linked span is part of. The ID is a + // 16-byte array. + bytes trace_id = 1; + + // A unique identifier for the linked span. The ID is an 8-byte array. + bytes span_id = 2; + + // The trace_state associated with the link. + string trace_state = 3; + + // A collection of attribute key/value pairs on the link. + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + // + // The attribute values SHOULD NOT contain empty values. + // The attribute values SHOULD NOT contain bytes values. + // The attribute values SHOULD NOT contain array values different than array of string values, bool values, int values, + // double values. + // The attribute values SHOULD NOT contain kvlist values. + // The behavior of software that receives attributes containing such values can be unpredictable. + // These restrictions can change in a minor release. + // The restrictions take origin from the OpenTelemetry specification: + // https://github.com/open-telemetry/opentelemetry-specification/blob/v1.47.0/specification/common/README.md#attribute. + repeated opentelemetry.proto.common.v1.KeyValue attributes = 4; + + // The number of dropped attributes. If the value is 0, + // then no attributes were dropped. + uint32 dropped_attributes_count = 5; + + // Flags, a bit field. + // + // Bits 0-7 (8 least significant bits) are the trace flags as defined in W3C Trace + // Context specification. To read the 8-bit W3C trace flag, use + // `flags & SPAN_FLAGS_TRACE_FLAGS_MASK`. + // + // See https://www.w3.org/TR/trace-context-2/#trace-flags for the flag definitions. + // + // Bits 8 and 9 represent the 3 states of whether the link is remote. + // The states are (unknown, is not remote, is remote). + // To read whether the value is known, use `(flags & SPAN_FLAGS_CONTEXT_HAS_IS_REMOTE_MASK) != 0`. + // To read whether the link is remote, use `(flags & SPAN_FLAGS_CONTEXT_IS_REMOTE_MASK) != 0`. + // + // Readers MUST NOT assume that bits 10-31 (22 most significant bits) will be zero. + // When creating new spans, bits 10-31 (most-significant 22-bits) MUST be zero. + // + // [Optional]. + fixed32 flags = 6; + } + + // A collection of Links, which are references from this span to a span + // in the same or different trace. + repeated Link links = 13; + + // The number of dropped links after the maximum size was + // enforced. If this value is 0, then no links were dropped. + uint32 dropped_links_count = 14; + + // An optional final status for this span. Semantically when Status isn't set, it means + // span's status code is unset, i.e. assume STATUS_CODE_UNSET (code = 0). + Status status = 15; +} + +// The Status type defines a logical error model that is suitable for different +// programming environments, including REST APIs and RPC APIs. +message Status { + reserved 1; + + // A developer-facing human readable error message. + string message = 2; + + // For the semantics of status codes see + // https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/api.md#set-status + enum StatusCode { + // The default status. + STATUS_CODE_UNSET = 0; + // The Span has been validated by an Application developer or Operator to + // have completed successfully. + STATUS_CODE_OK = 1; + // The Span contains an error. + STATUS_CODE_ERROR = 2; + }; + + // The status code. + StatusCode code = 3; +} + +// SpanFlags represents constants used to interpret the +// Span.flags field, which is protobuf 'fixed32' type and is to +// be used as bit-fields. Each non-zero value defined in this enum is +// a bit-mask. To extract the bit-field, for example, use an +// expression like: +// +// (span.flags & SPAN_FLAGS_TRACE_FLAGS_MASK) +// +// See https://www.w3.org/TR/trace-context-2/#trace-flags for the flag definitions. +// +// Note that Span flags were introduced in version 1.1 of the +// OpenTelemetry protocol. Older Span producers do not set this +// field, consequently consumers should not rely on the absence of a +// particular flag bit to indicate the presence of a particular feature. +enum SpanFlags { + // The zero value for the enum. Should not be used for comparisons. + // Instead use bitwise "and" with the appropriate mask as shown above. + SPAN_FLAGS_DO_NOT_USE = 0; + + // Bits 0-7 are used for trace flags. + SPAN_FLAGS_TRACE_FLAGS_MASK = 0x000000FF; + + // Bits 8 and 9 are used to indicate that the parent span or link span is remote. + // Bit 8 (`HAS_IS_REMOTE`) indicates whether the value is known. + // Bit 9 (`IS_REMOTE`) indicates whether the span or link is remote. + SPAN_FLAGS_CONTEXT_HAS_IS_REMOTE_MASK = 0x00000100; + SPAN_FLAGS_CONTEXT_IS_REMOTE_MASK = 0x00000200; + + // Bits 10-31 are reserved for future use. +} + +// clang-format on +#endif + +namespace zen::otel::pbf { + +// Traces + +enum class TracesData : protozero::pbf_tag_type +{ + repeated_ResourceSpans_resource_spans = 1 +}; + +enum class ResourceSpans : protozero::pbf_tag_type +{ + Resource_resource = 1, + repeated_ScopeSpans_scope_spans = 2, + string_schema_url = 3 +}; + +enum class ScopeSpans : protozero::pbf_tag_type +{ + InstrumentationScope_scope = 1, + repeated_Span_spans = 2, + string_schema_url = 3 +}; + +enum SpanFlags +{ + SPAN_FLAGS_DO_NOT_USE = 0, + SPAN_FLAGS_TRACE_FLAGS_MASK = 0x000000FF, + SPAN_FLAGS_CONTEXT_HAS_IS_REMOTE_MASK = 0x00000100, + SPAN_FLAGS_CONTEXT_IS_REMOTE_MASK = 0x00000200, +}; + +enum SpanKind +{ + SPAN_KIND_UNSPECIFIED = 0, + SPAN_KIND_INTERNAL = 1, + SPAN_KIND_SERVER = 2, + SPAN_KIND_CLIENT = 3, + SPAN_KIND_PRODUCER = 4, + SPAN_KIND_CONSUMER = 5 +}; + +enum class Span_Event : protozero::pbf_tag_type +{ + fixed64_time_unix_nano = 1, + string_name = 2, + repeated_KeyValue_attributes = 3, + uint32_dropped_attributes_count = 4 +}; + +enum class Span_Link : protozero::pbf_tag_type +{ + bytes_trace_id = 1, + bytes_span_id = 2, + string_trace_state = 3, + repeated_KeyValue_attributes = 4, + uint32_dropped_attributes_count = 5, + fixed32_flags = 6 +}; + +enum class Span : protozero::pbf_tag_type +{ + required_bytes_trace_id = 1, + required_bytes_span_id = 2, + string_trace_state = 3, + bytes_parent_span_id = 4, + fixed32_flags = 16, + required_string_name = 5, + SpanKind_kind = 6, + required_fixed64_start_time_unix_nano = 7, + required_fixed64_end_time_unix_nano = 8, + repeated_KeyValue_attributes = 9, + uint32_dropped_attributes_count = 10, + repeated_Event_events = 11, + uint32_dropped_events_count = 12, + repeated_Link_links = 13, + uint32_dropped_links_count = 14, + Status_status = 15 +}; + +enum StatusCode +{ + STATUS_CODE_UNSET = 0, + STATUS_CODE_OK = 1, + STATUS_CODE_ERROR = 2 +}; + +enum class Status : protozero::pbf_tag_type +{ + string_message = 2, + StatusCode_code = 3 +}; + +} // namespace zen::otel::pbf diff --git a/src/zentelemetry/otlpencoder.cpp b/src/zentelemetry/otlpencoder.cpp new file mode 100644 index 000000000..677545066 --- /dev/null +++ b/src/zentelemetry/otlpencoder.cpp @@ -0,0 +1,478 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "zentelemetry/otlpencoder.h" + +#include +#include + +#include +#include + +#include +#include + +#include "otellogprotozero.h" +#include "otelmetricsprotozero.h" +#include "otelprotozero.h" +#include "oteltraceprotozero.h" + +#if ZEN_WITH_OTEL + +namespace zen { + +OtlpEncoder::OtlpEncoder() +{ +} + +OtlpEncoder::~OtlpEncoder() +{ +} + +static int +MapSeverity(const spdlog::level::level_enum Level) +{ + switch (Level) + { + case spdlog::level::critical: + return otel::SEVERITY_NUMBER_FATAL; + case spdlog::level::err: + return otel::SEVERITY_NUMBER_ERROR; + case spdlog::level::warn: + return otel::SEVERITY_NUMBER_WARN; + case spdlog::level::info: + return otel::SEVERITY_NUMBER_INFO; + case spdlog::level::debug: + return otel::SEVERITY_NUMBER_DEBUG; + default: + case spdlog::level::trace: + return otel::SEVERITY_NUMBER_TRACE; + } +} + +static const char* +MapSeverityText(const spdlog::level::level_enum Level) +{ + switch (Level) + { + case spdlog::level::critical: + return "fatal"; + case spdlog::level::err: + return "error"; + case spdlog::level::warn: + return "warn"; + case spdlog::level::info: + return "info"; + case spdlog::level::debug: + return "debug"; + default: + case spdlog::level::trace: + return "trace"; + } +} + +std::string +OtlpEncoder::FormatOtelProtobuf(const spdlog::details::log_msg& Msg) const +{ + std::string Data; + + // LogsData + { + protozero::pbf_builder Builder{Data}; + + // ResourceLogs + { + protozero::pbf_builder RlBuilder{Builder, otel::LogsData::required_repeated_ResourceLogs_resource_logs}; + + // ResourceLogs / Resource + { + protozero::pbf_builder Res{RlBuilder, otel::ResourceLogs::optional_Resource_resource}; + + AppendResourceAttributes(Res); + } + + // ScopeLogs scope_logs + { + protozero::pbf_builder SlBuilder{RlBuilder, otel::ResourceLogs::required_repeated_ScopeLogs_scope_logs}; + + { + protozero::pbf_builder IsBuilder{SlBuilder, + otel::ScopeLogs::required_InstrumentationScope_scope}; + + IsBuilder.add_string(otel::InstrumentationScope::string_name, Msg.logger_name.data(), Msg.logger_name.size()); + } + + // LogRecord log_records + { + protozero::pbf_builder LrBuilder{SlBuilder, otel::ScopeLogs::required_repeated_LogRecord_log_records}; + + LrBuilder.add_fixed64(otel::LogRecord::required_fixed64_time_unix_nano, + std::chrono::duration_cast(Msg.time.time_since_epoch()).count()); + + const int Severity = MapSeverity(Msg.level); + + LrBuilder.add_enum(otel::LogRecord::optional_SeverityNumber_severity_number, Severity); + + LrBuilder.add_string(otel::LogRecord::optional_string_severity_text, MapSeverityText(Msg.level)); + + otel::TraceId TraceId; + const otel::SpanId SpanId = otel::Span::GetCurrentSpanId(TraceId); + + if (SpanId && TraceId) + { + LrBuilder.add_bytes(otel::LogRecord::optional_bytes_trace_id, TraceId.GetData(), TraceId.kSize); + LrBuilder.add_bytes(otel::LogRecord::optional_bytes_span_id, SpanId.GetData(), SpanId.kSize); + } + + // body + { + protozero::pbf_builder BodyBuilder{LrBuilder, otel::LogRecord::optional_anyvalue_body}; + + BodyBuilder.add_string(otel::AnyValue::string_string_value, Msg.payload.data(), Msg.payload.size()); + } + + // attributes + + { + protozero::pbf_builder KvBuilder{LrBuilder, otel::LogRecord::optional_repeated_kv_attributes}; + KvBuilder.add_string(otel::KeyValue::string_key, "thread_id"); + + { + protozero::pbf_builder AvBuilder{KvBuilder, otel::KeyValue::AnyValue_value}; + + AvBuilder.add_int64(otel::AnyValue::int64_int_value, Msg.thread_id); + } + } + } + } + } + } + + return Data; +} + +std::string +OtlpEncoder::FormatOtelMetrics() const +{ + std::string Data; + +# if 0 + static int64_t LastNanos = 0; + + const int64_t NowNanos = + std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); + + // MetricsData + { + protozero::pbf_builder Builder{Data}; + + // ResourceMetrics + protozero::pbf_builder Rm{Builder, otel::MetricsData::repeated_ResourceMetrics_resource_metrics}; + + { + protozero::pbf_builder Res{Rm, otel::ResourceMetrics::Resource_resource}; + + AppendResourceAttributes(Res); + } + + // ScopeMetrics + protozero::pbf_builder Sm{Rm, otel::ResourceMetrics::repeated_ScopeMetrics_scope_metrics}; + + { + // InstrumentationScope + protozero::pbf_builder Is{Sm, otel::ScopeMetrics::InstrumentationScope_scope}; + Is.add_string(otel::InstrumentationScope::string_name, "scope_name"); + } + + { + protozero::pbf_builder Metric{Sm, otel::ScopeMetrics::repeated_Metric_metrics}; + Metric.add_string(otel::Metric::string_name, "metric_name"); + Metric.add_string(otel::Metric::string_unit, "KiB"); + + // Gauge + { + protozero::pbf_builder Gauge{Metric, otel::Metric::oneof_data_Gauge_gauge}; + + protozero::pbf_builder Dp{Gauge, otel::Gauge::repeated_NumberDataPoint_data_points}; + Dp.add_fixed64(otel::NumberDataPoint::fixed64_time_unix_nano, NowNanos); + Dp.add_fixed64(otel::NumberDataPoint::fixed64_start_time_unix_nano, LastNanos); + Dp.add_sfixed64(otel::NumberDataPoint::oneof_value_sfixed64_as_int, rand() * 470 / RAND_MAX); + } + } + + const int RequestCount = rand() % 600; + + { + protozero::pbf_builder Metric{Sm, otel::ScopeMetrics::repeated_Metric_metrics}; + Metric.add_string(otel::Metric::string_name, "request_count"); + Metric.add_string(otel::Metric::string_unit, "requests"); + + static int SumValue = 0; + SumValue += RequestCount; + + // Sum + { + protozero::pbf_builder Sum{Metric, otel::Metric::oneof_data_Sum_sum}; + Sum.add_enum(otel::Sum::AggregationTemporality_aggregation_temporality, otel::AGGREGATION_TEMPORALITY_CUMULATIVE); + Sum.add_bool(otel::Sum::bool_is_monotonic, true); + + protozero::pbf_builder Dp{Sum, otel::Sum::repeated_NumberDataPoint_data_points}; + Dp.add_fixed64(otel::NumberDataPoint::fixed64_time_unix_nano, NowNanos); + Dp.add_fixed64(otel::NumberDataPoint::fixed64_start_time_unix_nano, LastNanos); + Dp.add_double(otel::NumberDataPoint::oneof_value_double_as_double, SumValue); + } + } + + { + protozero::pbf_builder Metric{Sm, otel::ScopeMetrics::repeated_Metric_metrics}; + Metric.add_string(otel::Metric::string_name, "request_latency"); + Metric.add_string(otel::Metric::string_unit, "ms"); + + // Histogram + { + protozero::pbf_builder Histogram{Metric, otel::Metric::oneof_data_Histogram_histogram}; + Histogram.add_enum(otel::Histogram::AggregationTemporality_aggregation_temporality, + otel::AGGREGATION_TEMPORALITY_CUMULATIVE); + + protozero::pbf_builder Dp{Histogram, otel::Histogram::repeated_HistogramDataPoint_data_points}; + Dp.add_fixed64(otel::HistogramDataPoint::fixed64_time_unix_nano, NowNanos); + Dp.add_fixed64(otel::HistogramDataPoint::fixed64_start_time_unix_nano, LastNanos); + + // Simulated latency value + + double Sum = 0, Min = 0, Max = 0; + int Buckets[6] = {0}; + + for (int i = 0; i < RequestCount; ++i) + { + const int Latency = rand() % 250; + + if (i == 0 || Latency < Min) + { + Min = Latency; + } + if (i == 0 || Latency > Max) + { + Max = Latency; + } + + Sum += Latency; + + if (Latency >= 0 && Latency < 10) + { + ++Buckets[0]; // [0,10) + } + else if (Latency >= 10 && Latency < 25) + { + ++Buckets[1]; // [10,25) + } + else if (Latency >= 25 && Latency < 50) + { + ++Buckets[2]; // [25,50) + } + else if (Latency >= 50 && Latency < 100) + { + ++Buckets[3]; // [50,100) + } + else if (Latency >= 100 && Latency < 250) + { + ++Buckets[4]; // [100,250) + } + else + { + ++Buckets[5]; // [250,+inf) + } + } + + Dp.add_fixed64(otel::HistogramDataPoint::fixed64_count, RequestCount); + Dp.add_double(otel::HistogramDataPoint::optional_double_sum, Sum); + Dp.add_double(otel::HistogramDataPoint::optional_double_min, Min); + Dp.add_double(otel::HistogramDataPoint::optional_double_max, Max); + + // Bucket bounds + Dp.add_double(otel::HistogramDataPoint::repeated_double_explicit_bounds, 10.0); + Dp.add_double(otel::HistogramDataPoint::repeated_double_explicit_bounds, 25.0); + Dp.add_double(otel::HistogramDataPoint::repeated_double_explicit_bounds, 50.0); + Dp.add_double(otel::HistogramDataPoint::repeated_double_explicit_bounds, 100.0); + Dp.add_double(otel::HistogramDataPoint::repeated_double_explicit_bounds, 250.0); + + // Buckets + for (int i = 0; i < 6; ++i) + { + Dp.add_fixed64(otel::HistogramDataPoint::repeated_fixed64_bucket_counts, Buckets[i]); + } + } + } + +# if 0 + { + protozero::pbf_builder Metric{Sm, otel::ScopeMetrics::repeated_Metric_metrics}; + Metric.add_string(otel::Metric::string_name, "request_payload"); + Metric.add_string(otel::Metric::string_unit, "KiB"); + + // ExponentialHistogram + { + protozero::pbf_builder ExponentialHistogram{ + Metric, otel::Metric::oneof_data_ExponentialHistogram_exponential_histogram}; + ExponentialHistogram.add_enum(otel::ExponentialHistogram::AggregationTemporality_aggregation_temporality, + otel::AGGREGATION_TEMPORALITY_CUMULATIVE); + + protozero::pbf_builder Dp{ + ExponentialHistogram, otel::ExponentialHistogram::repeated_ExponentialHistogramDataPoint_data_points}; + Dp.add_fixed64(otel::ExponentialHistogramDataPoint::fixed64_time_unix_nano, NowNanos); + Dp.add_fixed64(otel::ExponentialHistogramDataPoint::fixed64_start_time_unix_nano, LastNanos); + + // Simulated payload size values + + int64_t sum = 0; + int count = 0; + + for (int i = 0; i < RequestCount; ++i) + { + const int PayloadSize = rand() % 1'500'000; // [0,1500000) bytes + + sum += PayloadSize; + ++count; + } + + Dp.add_fixed64(otel::ExponentialHistogramDataPoint::fixed64_count, count); + Dp.add_double(otel::ExponentialHistogramDataPoint::optional_double_sum, sum); + Dp.add_sint32(otel::ExponentialHistogramDataPoint::sint32_scale, 4); + } + } +# endif + } + LastNanos = NowNanos; +# endif + return Data; +} + +void +OtlpEncoder::AddResourceAttribute(const std::string_view& Key, const std::string_view& Value) +{ + m_ResourceLock.WithExclusiveLock([&] { m_ResourceAttributes[std::string(Key)] = std::string(Value); }); +} + +void +OtlpEncoder::AddResourceAttribute(const std::string_view& Key, int64_t Value) +{ + m_ResourceLock.WithExclusiveLock([&] { m_ResourceIntAttributes[std::string(Key)] = Value; }); +} + +void +OtlpEncoder::AppendResourceAttributes(protozero::pbf_builder& Res) const +{ + using namespace otel; + + m_ResourceLock.WithSharedLock([&] { + for (auto const& [K, V] : m_ResourceAttributes) + { + protozero::pbf_builder KvBuilder{Res, otel::Resource::repeated_KeyValue_attributes}; + KvBuilder.add_string(otel::KeyValue::string_key, K.c_str()); + protozero::pbf_builder AnyBuilder{KvBuilder, otel::KeyValue::AnyValue_value}; + AnyBuilder.add_string(otel::AnyValue::string_string_value, V.c_str()); + } + + for (auto const& [K, V] : m_ResourceIntAttributes) + { + protozero::pbf_builder KvBuilder{Res, otel::Resource::repeated_KeyValue_attributes}; + KvBuilder.add_string(otel::KeyValue::string_key, K.c_str()); + protozero::pbf_builder AnyBuilder{KvBuilder, otel::KeyValue::AnyValue_value}; + AnyBuilder.add_int64(otel::AnyValue::int64_int_value, V); + } + }); +} + +template +static void +AppendAttributesToBuilder(ParentBuilder& Parent, auto KeyValueField, const otel::AttributePair* Attrs) +{ + for (const otel::AttributePair* Attr = Attrs; Attr != nullptr; Attr = Attr->Next) + { + protozero::pbf_builder KvBuilder{Parent, KeyValueField}; + KvBuilder.add_string(otel::KeyValue::string_key, Attr->Key); + + protozero::pbf_builder AnyBuilder{KvBuilder, otel::KeyValue::AnyValue_value}; + + if (Attr->IsNumeric()) + { + AnyBuilder.add_int64(otel::AnyValue::int64_int_value, Attr->GetNumericValue()); + } + else if (Attr->IsString()) + { + AnyBuilder.add_string(otel::AnyValue::string_string_value, Attr->GetStringValue()); + } + } +} + +std::string +OtlpEncoder::FormatOtelTrace(zen::otel::TraceId Id, std::span Spans) const +{ + std::string Data; + + using namespace otel; + + // TracesData + { + protozero::pbf_builder Builder{Data}; + + { + protozero::pbf_builder Rs{Builder, pbf::TracesData::repeated_ResourceSpans_resource_spans}; + + { + protozero::pbf_builder Res{Rs, pbf::ResourceSpans::Resource_resource}; + + AppendResourceAttributes(Res); + } + + for (const zen::otel::Span* SpanPtr : Spans) + { + protozero::pbf_builder Ss{Rs, pbf::ResourceSpans::repeated_ScopeSpans_scope_spans}; + + { + // InstrumentationScope + protozero::pbf_builder Is{Ss, pbf::ScopeSpans::InstrumentationScope_scope}; + Is.add_string(otel::InstrumentationScope::string_name, "scope_name"); + } + + const SpanId ThisSpanId = SpanPtr->GetSpanId(); + + { + protozero::pbf_builder Sb{Ss, pbf::ScopeSpans::repeated_Span_spans}; + + Sb.add_bytes(pbf::Span::required_bytes_trace_id, Id.GetData(), TraceId::kSize); + Sb.add_bytes(pbf::Span::required_bytes_span_id, ThisSpanId.GetData(), SpanId::kSize); + // Sb.add_string(pbf::Span::string_trace_state, "state-value"); + // + if (const otel::Span* ParentSpan = SpanPtr->GetParentSpan()) + { + const SpanId ParentSpanId = ParentSpan->GetSpanId(); + Sb.add_bytes(pbf::Span::bytes_parent_span_id, ParentSpanId.GetData(), SpanId::kSize); + } + + Sb.add_fixed32(pbf::Span::fixed32_flags, 0); + Sb.add_string(pbf::Span::required_string_name, SpanPtr->GetName()); + Sb.add_enum(pbf::Span::SpanKind_kind, (int)SpanPtr->GetKind()); + Sb.add_fixed64(pbf::Span::required_fixed64_start_time_unix_nano, SpanPtr->GetStartTime()); + Sb.add_fixed64(pbf::Span::required_fixed64_end_time_unix_nano, SpanPtr->GetEndTime()); + + AppendAttributesToBuilder(Sb, pbf::Span::repeated_KeyValue_attributes, SpanPtr->GetAttributes()); + + for (const otel::Event* Event = SpanPtr->GetEvents(); Event != nullptr; Event = Event->NextEvent) + { + protozero::pbf_builder EventBuilder{Sb, pbf::Span::repeated_Event_events}; + EventBuilder.add_fixed64(pbf::Span_Event::fixed64_time_unix_nano, Event->Timestamp); + EventBuilder.add_string(pbf::Span_Event::string_name, Event->Name); + + AppendAttributesToBuilder(EventBuilder, pbf::Span_Event::repeated_KeyValue_attributes, Event->Attributes); + } + } + } + } + } + + return Data; +} + +} // namespace zen + +#endif diff --git a/src/zentelemetry/otlptrace.cpp b/src/zentelemetry/otlptrace.cpp new file mode 100644 index 000000000..b634eacf6 --- /dev/null +++ b/src/zentelemetry/otlptrace.cpp @@ -0,0 +1,398 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "zentelemetry/otlptrace.h" +#include "oteltraceprotozero.h" + +#if ZEN_WITH_OTEL + +# include +# include +# include +# include +# include + +# include +# include + +# include +# include + +namespace zen::otel { + +////////////////////////////////////////////////////////////////////////// + +uint64_t +NowInNanoseconds() +{ + const uint64_t NowNanos = + std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); + return static_cast(NowNanos); +} + +////////////////////////////////////////////////////////////////////////// + +/** + * xorshiftr128+ random number generator + * + * https://en.wikipedia.org/wiki/Xorshift#xorshift+ + */ +class XorShiftr128p +{ +public: + XorShiftr128p() + { + std::seed_seq SeedSeq{1, 2, 3, 5}; + do + { + SeedSeq.generate(reinterpret_cast(m_State), reinterpret_cast(&m_State[2])); + } while (m_State[0] == 0 && m_State[1] == 0); + } + + uint64_t Next() + { + uint64_t x = m_State[0]; + uint64_t const y = m_State[1]; + m_State[0] = y; + x ^= x << 23; + x ^= x >> 17; + x ^= y; + m_State[1] = x + y; + return x; + } + +private: + uint64_t m_State[2]; +}; + +////////////////////////////////////////////////////////////////////////// + +struct TraceState +{ + Span* ActiveSpan = nullptr; + TraceId CurrentTraceId; +}; + +thread_local TraceState t_TraceState; + +////////////////////////////////////////////////////////////////////////// + +Span::Span(MemoryArena& Arena, std::string_view Name, Span* SpanChain) +: m_Arena(Arena) +, m_SpanChain(SpanChain) +, m_SpanId(SpanId::NewSpanId()) +, m_Name(m_Arena.DuplicateString(Name)) +{ + if (Span* ActiveSpan = t_TraceState.ActiveSpan) + { + m_ParentSpan = ActiveSpan; + m_NextSibling = ActiveSpan->m_FirstChild; + ActiveSpan->m_FirstChild = this; + } + else + { + m_ParentSpan = nullptr; + t_TraceState.CurrentTraceId = TraceId::NewTraceId(); + } + + t_TraceState.ActiveSpan = this; + m_StartTime = NowInNanoseconds(); +} + +Span::~Span() +{ + End(); +} + +void +Span::End() +{ + if (m_Ended) + { + return; + } + + ZEN_ASSERT(t_TraceState.ActiveSpan == this); + + m_Ended = true; + t_TraceState.ActiveSpan = m_ParentSpan; + m_EndTime = NowInNanoseconds(); +} + +Span* +Span::GetCurrentSpan() +{ + return t_TraceState.ActiveSpan; +} + +SpanId +Span::GetCurrentSpanId(TraceId& OutTraceId) +{ + if (t_TraceState.ActiveSpan) + { + OutTraceId = t_TraceState.CurrentTraceId; + return t_TraceState.ActiveSpan->m_SpanId; + } + else + { + OutTraceId = TraceId(); + return SpanId(); + } +} + +void +Span::AddEvent(std::string_view Name) +{ + Event* NewEvent = new (m_Arena) Event(); + NewEvent->Name = m_Arena.DuplicateString(Name); + NewEvent->Timestamp = NowInNanoseconds(); + NewEvent->NextEvent = m_Events; + m_Events = NewEvent; +} + +void +Span::AddEvent(std::string_view Name, uint64_t Timestamp) +{ + Event* NewEvent = new (m_Arena) Event(); + NewEvent->Name = m_Arena.DuplicateString(Name); + NewEvent->Timestamp = Timestamp; + NewEvent->NextEvent = m_Events; + m_Events = NewEvent; +} + +void +Span::AddEvent(std::string_view Name, const AttributeList& Attributes) +{ + Event* NewEvent = new (m_Arena) Event(); + NewEvent->Name = m_Arena.DuplicateString(Name); + NewEvent->Timestamp = NowInNanoseconds(); + NewEvent->NextEvent = m_Events; + m_Events = NewEvent; + + for (auto& [K, V] : Attributes) + { + AttributePair* NewAttr = new (m_Arena) AttributePair(); + NewAttr->Key = m_Arena.DuplicateString(K); + NewAttr->SetStringValue(m_Arena.DuplicateString(V)); + NewAttr->Next = NewEvent->Attributes; + NewEvent->Attributes = NewAttr; + } +} + +void +Span::AddAttribute(std::string_view Key, std::string_view Value) +{ + AttributePair* NewAttr = new (m_Arena) AttributePair(); + NewAttr->Key = m_Arena.DuplicateString(Key); + NewAttr->SetStringValue(m_Arena.DuplicateString(Value)); + NewAttr->Next = m_Attributes; + m_Attributes = NewAttr; +} + +void +Span::AddAttribute(std::string_view Key, uint64_t Value) +{ + AttributePair* NewAttr = new (m_Arena) AttributePair(); + NewAttr->Key = m_Arena.DuplicateString(Key); + NewAttr->SetNumericValue(Value); + NewAttr->Next = m_Attributes; + m_Attributes = NewAttr; +} + +void +Span::AddAttributes(const AttributeList& Attributes) +{ + for (const auto& [K, V] : Attributes) + { + AddAttribute(K, V); + } +} + +void* +Span::operator new(size_t Size, zen::MemoryArena& Arena) +{ + return Arena.Allocate(Size); +} + +void +Span::operator delete(void* Ptr, zen::MemoryArena& Arena) +{ + // This exists because operator new with placement arguments + // requires a matching operator delete, but we don't actually + // need to do anything here + ZEN_UNUSED(Ptr, Arena); +} + +////////////////////////////////////////////////////////////////////////// + +std::atomic g_TraceCounter; + +TraceId +TraceId::NewTraceId() +{ + uint8_t TraceIdBytes[kSize]; + + // We use our session ID as the basis for trace IDs + const Oid SessionId = GetSessionId(); + memcpy(TraceIdBytes, SessionId.OidBits, sizeof SessionId.OidBits); + + const uint32_t TraceCounterBe = ByteSwap(g_TraceCounter.fetch_add(1)); + + memcpy(&TraceIdBytes[12], &TraceCounterBe, 4); + + return TraceId(std::span(TraceIdBytes, kSize)); +} + +////////////////////////////////////////////////////////////////////////// + +SpanId +SpanId::NewSpanId() +{ + // Just use a new OID and take the first 8 bytes. We'll probably want + // a more native solution later + + return SpanId(Oid::NewOid()); +} + +////////////////////////////////////////////////////////////////////////// + +Ref g_TraceRecorder; + +void +SetTraceRecorder(Ref Recorder) +{ + g_TraceRecorder = std::move(Recorder); +} + +bool +IsRecording() +{ + return !!g_TraceRecorder; +} + +////////////////////////////////////////////////////////////////////////// + +thread_local Tracer* t_Tracer; + +struct Tracer::Impl +{ + Impl() : m_PrevTracer(t_Tracer) {} + ~Impl() { t_Tracer = m_PrevTracer; } + + MemoryArena m_Arena; + Tracer* m_PrevTracer; + Span* m_SpanChain = nullptr; + std::atomic m_SpanCount; +}; + +Tracer::Tracer() : m_Impl(new Impl()) +{ + t_Tracer = this; +} + +Tracer::~Tracer() +{ + if (Ref Recorder = g_TraceRecorder) + { + const uint64_t SpanCount = m_Impl->m_SpanCount.load(); + const Span** Spans = new (m_Impl->m_Arena) const Span*[SpanCount]; + + uint64_t Index = 0; + + for (Span* CurrentSpan = m_Impl->m_SpanChain; CurrentSpan != nullptr; CurrentSpan = CurrentSpan->m_SpanChain) + { + Spans[Index++] = CurrentSpan; + } + + Recorder->RecordSpans(t_TraceState.CurrentTraceId, std::span(Spans, SpanCount)); + } + + delete m_Impl; +} + +Tracer* +Tracer::GetTracer() +{ + Tracer* TracerPtr = t_Tracer; + + if (!TracerPtr) + { + TracerPtr = new Tracer(); + } + + return TracerPtr; +} + +ScopedSpan +Tracer::CreateSpan(std::string_view Name) +{ + Tracer* TracerPtr = GetTracer(); + + Impl* const ImplPtr = TracerPtr->m_Impl; + + Span* NewSpan = new (ImplPtr->m_Arena) Span(ImplPtr->m_Arena, Name, ImplPtr->m_SpanChain); + + ImplPtr->m_SpanChain = NewSpan; + ImplPtr->m_SpanCount.fetch_add(1); + + return ScopedSpan(NewSpan, TracerPtr); +} + +////////////////////////////////////////////////////////////////////////// + +ScopedSpan::ScopedSpan(std::string_view Name) +{ + Tracer* TracerPtr = Tracer::GetTracer(); + + Tracer::Impl* const ImplPtr = TracerPtr->m_Impl; + + Span* NewSpan = new (ImplPtr->m_Arena) Span(ImplPtr->m_Arena, Name, ImplPtr->m_SpanChain); + + ImplPtr->m_SpanChain = NewSpan; + ImplPtr->m_SpanCount.fetch_add(1); + + m_Tracer = TracerPtr; + m_Span = NewSpan; +} + +ScopedSpan::ScopedSpan(Span* InSpan, Tracer* InTracer) : m_Tracer(InTracer), m_Span(InSpan) +{ +} + +ScopedSpan::~ScopedSpan() +{ +} + +} // namespace zen::otel + +////////////////////////////////////////////////////////////////////////// + +namespace zen::otel { + +using namespace std::literals; + +void +otlptrace_forcelink() +{ +} + +# if ZEN_WITH_TESTS + +TEST_CASE("otlp.trace") +{ + { + ScopedSpan Span = Tracer::CreateSpan("span0"); + Span->AddEvent("TestEvent1"sv); + Span->AddEvent("TestEvent2"sv); + + { + ScopedSpan Span2 = Tracer::CreateSpan("span1"); + Span2->AddEvent("TestEvent3"sv); + Span2->AddEvent("TestEvent4"sv); + } + } +} + +# endif + +} // namespace zen::otel +#endif diff --git a/src/zentelemetry/stats.cpp b/src/zentelemetry/stats.cpp new file mode 100644 index 000000000..c67fa3c66 --- /dev/null +++ b/src/zentelemetry/stats.cpp @@ -0,0 +1,831 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "zentelemetry/stats.h" + +#include +#include +#include +#include +#include +#include + +#include +#include + +#if ZEN_WITH_TESTS +# include +#endif + +// +// Derived from https://github.com/dln/medida/blob/master/src/medida/stats/ewma.cc +// + +namespace zen::metrics { + +static constinit int kTickIntervalInSeconds = 5; +static constinit double kSecondsPerMinute = 60.0; +static constinit int kOneMinute = 1; +static constinit int kFiveMinutes = 5; +static constinit int kFifteenMinutes = 15; + +static const double kM1_ALPHA = 1.0 - std::exp(-kTickIntervalInSeconds / kSecondsPerMinute / kOneMinute); +static const double kM5_ALPHA = 1.0 - std::exp(-kTickIntervalInSeconds / kSecondsPerMinute / kFiveMinutes); +static const double kM15_ALPHA = 1.0 - std::exp(-kTickIntervalInSeconds / kSecondsPerMinute / kFifteenMinutes); + +static const uint64_t CountPerTick = GetHifreqTimerFrequencySafe() * kTickIntervalInSeconds; +static const uint64_t CountPerSecond = GetHifreqTimerFrequencySafe(); + +////////////////////////////////////////////////////////////////////////// + +void +RawEWMA::Tick(double Alpha, uint64_t Interval, uint64_t Count, bool IsInitialUpdate) +{ + const double InstantRate = double(Count) / Interval; + + if (IsInitialUpdate) + { + m_Rate.store(InstantRate, std::memory_order_release); + } + else + { + double Delta = Alpha * (InstantRate - m_Rate); + +#if defined(__cpp_lib_atomic_float) + m_Rate.fetch_add(Delta); +#else + double Value = m_Rate.load(std::memory_order_acquire); + double Next; + do + { + Next = Value + Delta; + } while (!m_Rate.compare_exchange_weak(Value, Next, std::memory_order_relaxed)); +#endif + } +} + +double +RawEWMA::Rate() const +{ + return m_Rate.load(std::memory_order_relaxed) * CountPerSecond; +} + +////////////////////////////////////////////////////////////////////////// + +Meter::Meter() : m_StartTick{GetHifreqTimerValue()}, m_LastTick(m_StartTick.load()) +{ +} + +Meter::~Meter() +{ +} + +void +Meter::TickIfNecessary() +{ + uint64_t OldTick = m_LastTick.load(); + const uint64_t NewTick = GetHifreqTimerValue(); + const uint64_t Age = NewTick - OldTick; + + if (Age > CountPerTick) + { + // Ensure only one thread at a time updates the time. This + // works because our tick interval should be sufficiently + // long to ensure two threads don't end up inside this block + + if (m_LastTick.compare_exchange_strong(OldTick, NewTick)) + { + m_Remainder.fetch_add(Age); + + do + { + int64_t Remain = m_Remainder.load(std::memory_order_relaxed); + + if (Remain < 0) + { + return; + } + + if (m_Remainder.compare_exchange_strong(Remain, Remain - CountPerTick)) + { + Tick(); + } + } while (true); + } + } +} + +void +Meter::Tick() +{ + const uint64_t PendingCount = m_PendingCount.exchange(0); + const bool IsFirstTick = m_IsFirstTick; + + if (IsFirstTick) + { + m_IsFirstTick = false; + } + + m_RateM1.Tick(kM1_ALPHA, CountPerTick, PendingCount, IsFirstTick); + m_RateM5.Tick(kM5_ALPHA, CountPerTick, PendingCount, IsFirstTick); + m_RateM15.Tick(kM15_ALPHA, CountPerTick, PendingCount, IsFirstTick); +} + +double +Meter::Rate1() +{ + TickIfNecessary(); + + return m_RateM1.Rate(); +} + +double +Meter::Rate5() +{ + TickIfNecessary(); + + return m_RateM5.Rate(); +} + +double +Meter::Rate15() +{ + TickIfNecessary(); + + return m_RateM15.Rate(); +} + +double +Meter::MeanRate() const +{ + const uint64_t Count = m_TotalCount.load(std::memory_order_relaxed); + + if (Count == 0) + { + return 0.0; + } + + const uint64_t Elapsed = GetHifreqTimerValue() - m_StartTick; + + return (double(Count) * GetHifreqTimerFrequency()) / Elapsed; +} + +void +Meter::Mark(uint64_t Count) +{ + TickIfNecessary(); + + m_TotalCount.fetch_add(Count); + m_PendingCount.fetch_add(Count); +} + +////////////////////////////////////////////////////////////////////////// + +uint64_t +rol64(uint64_t x, int k) +{ + return (x << k) | (x >> (64 - k)); +} + +struct xoshiro256ss_state +{ + uint64_t s[4]; +}; + +uint64_t +xoshiro256ss(struct xoshiro256ss_state* state) +{ + uint64_t* s = state->s; + uint64_t const result = rol64(s[1] * 5, 7) * 9; + uint64_t const t = s[1] << 17; + + s[2] ^= s[0]; + s[3] ^= s[1]; + s[1] ^= s[2]; + s[0] ^= s[3]; + + s[2] ^= t; + s[3] = rol64(s[3], 45); + + return result; +} + +class xoshiro256 +{ +public: + uint64_t operator()() { return xoshiro256ss(&m_State); } + static constexpr uint64_t min() { return 0; } + static constexpr uint64_t max() { return ~(0ull); } + +private: + xoshiro256ss_state m_State{0xf0fefaf9, 0xbeeb5238, 0x48472397, 0x58858558}; +}; + +thread_local xoshiro256 ThreadLocalRng; + +////////////////////////////////////////////////////////////////////////// + +UniformSample::UniformSample(uint32_t ReservoirSize) +{ + ZEN_MEMSCOPE(ELLMTag::Metrics); + m_Values = std::vector>(ReservoirSize); +} + +UniformSample::~UniformSample() +{ +} + +void +UniformSample::Clear() +{ + for (auto& Value : m_Values) + { + Value.store(0); + } + m_SampleCounter = 0; +} + +uint32_t +UniformSample::Size() const +{ + return gsl::narrow_cast(Min(m_SampleCounter.load(), m_Values.size())); +} + +void +UniformSample::Update(int64_t Value) +{ + const uint64_t Count = m_SampleCounter++; + const uint64_t Size = m_Values.size(); + + if (Count < Size) + { + m_Values[Count] = Value; + } + else + { + // Randomly choose an old entry to potentially replace (the probability + // of replacing an entry diminishes with time) + + const uint64_t SampleIndex = ThreadLocalRng() % Count; + + if (SampleIndex < Size) + { + m_Values[SampleIndex].store(Value, std::memory_order_release); + } + } +} + +SampleSnapshot +UniformSample::Snapshot() const +{ + ZEN_MEMSCOPE(ELLMTag::Metrics); + + uint64_t ValuesSize = Size(); + std::vector Values(ValuesSize); + + for (int i = 0, n = int(ValuesSize); i < n; ++i) + { + Values[i] = double(m_Values[i]); + } + + return SampleSnapshot(std::move(Values)); +} + +////////////////////////////////////////////////////////////////////////// + +Histogram::Histogram(int32_t SampleCount) : m_Sample(SampleCount) +{ +} + +Histogram::~Histogram() +{ +} + +void +Histogram::Clear() +{ + m_Min = m_Max = m_Sum = m_Count = 0; + m_Sample.Clear(); +} + +void +Histogram::Update(int64_t Value) +{ + m_Sample.Update(Value); + + if (m_Count == 0) + { + m_Min = m_Max = Value; + } + else + { + int64_t CurrentMax = m_Max.load(std::memory_order_relaxed); + + while ((CurrentMax < Value) && !m_Max.compare_exchange_weak(CurrentMax, Value)) + { + } + + int64_t CurrentMin = m_Min.load(std::memory_order_relaxed); + + while ((CurrentMin > Value) && !m_Min.compare_exchange_weak(CurrentMin, Value)) + { + } + } + + m_Sum += Value; + ++m_Count; +} + +int64_t +Histogram::Max() const +{ + return m_Max.load(std::memory_order_relaxed); +} + +int64_t +Histogram::Min() const +{ + return m_Min.load(std::memory_order_relaxed); +} + +double +Histogram::Mean() const +{ + if (m_Count) + { + return double(m_Sum.load(std::memory_order_relaxed)) / m_Count; + } + else + { + return 0.0; + } +} + +uint64_t +Histogram::Count() const +{ + return m_Count.load(std::memory_order_relaxed); +} + +////////////////////////////////////////////////////////////////////////// + +SampleSnapshot::SampleSnapshot(std::vector&& Values) : m_Values(std::move(Values)) +{ + std::sort(begin(m_Values), end(m_Values)); +} + +SampleSnapshot::~SampleSnapshot() +{ +} + +double +SampleSnapshot::GetQuantileValue(double Quantile) +{ + ZEN_ASSERT((Quantile >= 0.0) && (Quantile <= 1.0)); + + if (m_Values.empty()) + { + return 0.0; + } + + const double Pos = Quantile * (m_Values.size() + 1); + + if (Pos < 1) + { + return m_Values.front(); + } + + if (Pos >= m_Values.size()) + { + return m_Values.back(); + } + + const int32_t Index = (int32_t)Pos; + const double Lower = m_Values[Index - 1]; + const double Upper = m_Values[Index]; + + // Lerp + return Lower + (Pos - std::floor(Pos)) * (Upper - Lower); +} + +const std::vector& +SampleSnapshot::GetValues() const +{ + return m_Values; +} + +////////////////////////////////////////////////////////////////////////// + +OperationTiming::OperationTiming(int32_t SampleCount) : m_Histogram{SampleCount} +{ +} + +OperationTiming::~OperationTiming() +{ +} + +void +OperationTiming::Update(int64_t Duration) +{ + m_Meter.Mark(1); + m_Histogram.Update(Duration); +} + +int64_t +OperationTiming::Max() const +{ + return m_Histogram.Max(); +} + +int64_t +OperationTiming::Min() const +{ + return m_Histogram.Min(); +} + +double +OperationTiming::Mean() const +{ + return m_Histogram.Mean(); +} + +uint64_t +OperationTiming::Count() const +{ + return m_Meter.Count(); +} + +OperationTiming::Scope::Scope(OperationTiming& Outer) : m_Outer(Outer), m_StartTick(GetHifreqTimerValue()) +{ +} + +OperationTiming::Scope::~Scope() +{ + Stop(); +} + +void +OperationTiming::Scope::Stop() +{ + if (m_StartTick != 0) + { + m_Outer.Update(GetHifreqTimerValue() - m_StartTick); + m_StartTick = 0; + } +} + +void +OperationTiming::Scope::Cancel() +{ + m_StartTick = 0; +} + +////////////////////////////////////////////////////////////////////////// + +RequestStats::RequestStats(int32_t SampleCount) : m_RequestTimeHistogram{SampleCount}, m_BytesHistogram{SampleCount} +{ +} + +RequestStats::~RequestStats() +{ +} + +void +RequestStats::Update(int64_t Duration, int64_t Bytes) +{ + m_RequestMeter.Mark(1); + m_RequestTimeHistogram.Update(Duration); + + m_BytesMeter.Mark(Bytes); + m_BytesHistogram.Update(Bytes); +} + +uint64_t +RequestStats::Count() const +{ + return m_RequestMeter.Count(); +} + +RequestStats::Scope::Scope(RequestStats& Outer, int64_t Bytes) : m_Outer(Outer), m_StartTick(GetHifreqTimerValue()), m_Bytes(Bytes) +{ +} + +RequestStats::Scope::~Scope() +{ + Stop(); +} + +void +RequestStats::Scope::Stop() +{ + if (m_StartTick != 0) + { + m_Outer.Update(GetHifreqTimerValue() - m_StartTick, m_Bytes); + m_StartTick = 0; + } +} + +void +RequestStats::Scope::Cancel() +{ + m_StartTick = 0; +} + +////////////////////////////////////////////////////////////////////////// + +void +EmitSnapshot(Meter& Stat, CbObjectWriter& Cbo) +{ + Cbo << "count" << Stat.Count(); + Cbo << "rate_mean" << Stat.MeanRate(); + Cbo << "rate_1" << Stat.Rate1() << "rate_5" << Stat.Rate5() << "rate_15" << Stat.Rate15(); +} + +void +RequestStats::EmitSnapshot(std::string_view Tag, CbObjectWriter& Cbo) +{ + Cbo.BeginObject(Tag); + + Cbo.BeginObject("requests"); + metrics::EmitSnapshot(m_RequestMeter, Cbo); + metrics::EmitSnapshot(m_RequestTimeHistogram, Cbo, GetHifreqTimerToSeconds()); + Cbo.EndObject(); + + Cbo.BeginObject("bytes"); + metrics::EmitSnapshot(m_BytesMeter, Cbo); + metrics::EmitSnapshot(m_BytesHistogram, Cbo, 1.0); + Cbo.EndObject(); + + Cbo.EndObject(); +} + +RequestStatsSnapshot +RequestStats::Snapshot() +{ + const double ToSeconds = GetHifreqTimerToSeconds(); + + return RequestStatsSnapshot{.Requests = GetSnapshot(m_RequestMeter, m_RequestTimeHistogram, ToSeconds), + .Bytes = GetSnapshot(m_BytesMeter, m_BytesHistogram, 1.0)}; +} + +void +EmitSnapshot(std::string_view Tag, OperationTiming& Stat, CbObjectWriter& Cbo) +{ + Cbo.BeginObject(Tag); + + SampleSnapshot Snap = Stat.Snapshot(); + + Cbo << "count" << Stat.Count(); + Cbo << "rate_mean" << Stat.MeanRate(); + Cbo << "rate_1" << Stat.Rate1() << "rate_5" << Stat.Rate5() << "rate_15" << Stat.Rate15(); + + const double ToSeconds = GetHifreqTimerToSeconds(); + + Cbo << "t_avg" << Stat.Mean() * ToSeconds; + Cbo << "t_min" << Stat.Min() * ToSeconds << "t_max" << Stat.Max() * ToSeconds; + Cbo << "t_p75" << Snap.Get75Percentile() * ToSeconds << "t_p95" << Snap.Get95Percentile() * ToSeconds << "t_p99" + << Snap.Get99Percentile() * ToSeconds << "t_p999" << Snap.Get999Percentile() * ToSeconds; + + Cbo.EndObject(); +} + +void +EmitSnapshot(std::string_view Tag, const Histogram& Stat, CbObjectWriter& Cbo, double ConversionFactor) +{ + Cbo.BeginObject(Tag); + EmitSnapshot(Stat, Cbo, ConversionFactor); + Cbo.EndObject(); +} + +void +EmitSnapshot(const Histogram& Stat, CbObjectWriter& Cbo, double ConversionFactor) +{ + SampleSnapshot Snap = Stat.Snapshot(); + + Cbo << "count" << Stat.Count() * ConversionFactor << "avg" << Stat.Mean() * ConversionFactor; + Cbo << "min" << Stat.Min() * ConversionFactor << "max" << Stat.Max() * ConversionFactor; + Cbo << "p75" << Snap.Get75Percentile() * ConversionFactor << "p95" << Snap.Get95Percentile() * ConversionFactor << "p99" + << Snap.Get99Percentile() * ConversionFactor << "p999" << Snap.Get999Percentile() * ConversionFactor; +} + +void +EmitSnapshot(std::string_view Tag, Meter& Stat, CbObjectWriter& Cbo) +{ + Cbo.BeginObject(Tag); + + Cbo << "count" << Stat.Count() << "rate_mean" << Stat.MeanRate(); + Cbo << "rate_1" << Stat.Rate1() << "rate_5" << Stat.Rate5() << "rate_15" << Stat.Rate15(); + + Cbo.EndObject(); +} + +void +EmitSnapshot(const MeterSnapshot& Snapshot, CbObjectWriter& Cbo) +{ + Cbo << "count" << Snapshot.Count; + Cbo << "rate_mean" << Snapshot.MeanRate; + Cbo << "rate_1" << Snapshot.Rate1 << "rate_5" << Snapshot.Rate5 << "rate_15" << Snapshot.Rate15; +} + +void +EmitSnapshot(const HistogramSnapshot& Snapshot, CbObjectWriter& Cbo) +{ + Cbo << "t_count" << Snapshot.Count << "t_avg" << Snapshot.Avg; + Cbo << "t_min" << Snapshot.Min << "t_max" << Snapshot.Max; + Cbo << "t_p75" << Snapshot.P75 << "t_p95" << Snapshot.P95 << "t_p99" << Snapshot.P999; +} + +void +EmitSnapshot(std::string_view Tag, const StatsSnapshot& Snapshot, CbObjectWriter& Cbo) +{ + Cbo.BeginObject(Tag); + EmitSnapshot(Snapshot.Meter, Cbo); + EmitSnapshot(Snapshot.Histogram, Cbo); + Cbo.EndObject(); +} + +void +EmitSnapshot(std::string_view Tag, const RequestStatsSnapshot& Snapshot, CbObjectWriter& Cbo) +{ + if (Snapshot.Requests.Meter.Count == 0) + { + return; + } + Cbo.BeginObject(Tag); + EmitSnapshot("request", Snapshot.Requests, Cbo); + EmitSnapshot("bytes", Snapshot.Bytes, Cbo); + Cbo.EndObject(); +} + +////////////////////////////////////////////////////////////////////////// + +#if ZEN_WITH_TESTS + +TEST_CASE("Core.Stats.Histogram") +{ + Histogram Histo{258}; + + SampleSnapshot Snap1 = Histo.Snapshot(); + CHECK_EQ(Snap1.Size(), 0); + CHECK_EQ(Snap1.GetMedian(), 0); + + Histo.Update(1); + CHECK_EQ(Histo.Min(), 1); + CHECK_EQ(Histo.Max(), 1); + + SampleSnapshot Snap2 = Histo.Snapshot(); + CHECK_EQ(Snap2.Size(), 1); + + Histo.Update(2); + CHECK_EQ(Histo.Min(), 1); + CHECK_EQ(Histo.Max(), 2); + + SampleSnapshot Snap3 = Histo.Snapshot(); + CHECK_EQ(Snap3.Size(), 2); + + Histo.Update(-2); + CHECK_EQ(Histo.Min(), -2); + CHECK_EQ(Histo.Max(), 2); + CHECK_EQ(Histo.Mean(), 1 / 3.0); + + SampleSnapshot Snap4 = Histo.Snapshot(); + CHECK_EQ(Snap4.Size(), 3); + CHECK_EQ(Snap4.GetMedian(), 1); + CHECK_EQ(Snap4.Get999Percentile(), 2); + CHECK_EQ(Snap4.GetQuantileValue(0), -2); +} + +TEST_CASE("Core.Stats.UniformSample") +{ + UniformSample Sample1{100}; + + for (int i = 0; i < 100; ++i) + { + for (int j = 1; j <= 100; ++j) + { + Sample1.Update(j); + } + } + + int64_t Sum = 0; + int64_t Count = 0; + + Sample1.IterateValues([&](int64_t Value) { + ++Count; + Sum += Value; + }); + + double Average = double(Sum) / Count; + + CHECK(fabs(Average - 50) < 10); // What's the right test here? The result could vary massively and still be technically correct +} + +TEST_CASE("Core.Stats.EWMA") +{ + SUBCASE("Simple_1") + { + RawEWMA Ewma1; + Ewma1.Tick(kM1_ALPHA, CountPerSecond, 5, true); + + CHECK(fabs(Ewma1.Rate() - 5) < 0.1); + + for (int i = 0; i < 60; ++i) + { + Ewma1.Tick(kM1_ALPHA, CountPerSecond, 10, false); + } + + CHECK(fabs(Ewma1.Rate() - 10) < 0.1); + + for (int i = 0; i < 60; ++i) + { + Ewma1.Tick(kM1_ALPHA, CountPerSecond, 20, false); + } + + CHECK(fabs(Ewma1.Rate() - 20) < 0.1); + } + + SUBCASE("Simple_10") + { + RawEWMA Ewma1; + RawEWMA Ewma5; + RawEWMA Ewma15; + Ewma1.Tick(kM1_ALPHA, CountPerSecond, 5, true); + Ewma5.Tick(kM5_ALPHA, CountPerSecond, 5, true); + Ewma15.Tick(kM15_ALPHA, CountPerSecond, 5, true); + + CHECK(fabs(Ewma1.Rate() - 5) < 0.1); + CHECK(fabs(Ewma5.Rate() - 5) < 0.1); + CHECK(fabs(Ewma15.Rate() - 5) < 0.1); + + auto Tick1 = [&Ewma1](auto Value) { Ewma1.Tick(kM1_ALPHA, CountPerSecond, Value, false); }; + auto Tick5 = [&Ewma5](auto Value) { Ewma5.Tick(kM5_ALPHA, CountPerSecond, Value, false); }; + auto Tick15 = [&Ewma15](auto Value) { Ewma15.Tick(kM15_ALPHA, CountPerSecond, Value, false); }; + + for (int i = 0; i < 60; ++i) + { + Tick1(10); + Tick5(10); + Tick15(10); + } + + CHECK(fabs(Ewma1.Rate() - 10) < 0.1); + + for (int i = 0; i < 5 * 60; ++i) + { + Tick1(20); + Tick5(20); + Tick15(20); + } + + CHECK(fabs(Ewma1.Rate() - 20) < 0.1); + CHECK(fabs(Ewma5.Rate() - 20) < 0.1); + + for (int i = 0; i < 16 * 60; ++i) + { + Tick1(100); + Tick5(100); + Tick15(100); + } + + CHECK(fabs(Ewma1.Rate() - 100) < 0.1); + CHECK(fabs(Ewma5.Rate() - 100) < 0.1); + CHECK(fabs(Ewma15.Rate() - 100) < 0.5); + } +} + +# if 0 // This is not really a unit test, but mildly useful to exercise some code +TEST_CASE("Meter") +{ + Meter Meter1; + Meter1.Mark(1); + Sleep(1000); + Meter1.Mark(1); + Sleep(1000); + Meter1.Mark(1); + Sleep(1000); + Meter1.Mark(1); + Sleep(1000); + Meter1.Mark(1); + Sleep(1000); + Meter1.Mark(1); + Sleep(1000); + Meter1.Mark(1); + Sleep(1000); + Meter1.Mark(1); + Sleep(1000); + Meter1.Mark(1); + Sleep(1000); + [[maybe_unused]] double Rate = Meter1.MeanRate(); +} +# endif +} + +namespace zen { + +void +stats_forcelink() +{ +} + +#endif + +} // namespace zen::metrics diff --git a/src/zentelemetry/xmake.lua b/src/zentelemetry/xmake.lua new file mode 100644 index 000000000..2f90d7b90 --- /dev/null +++ b/src/zentelemetry/xmake.lua @@ -0,0 +1,10 @@ +-- Copyright Epic Games, Inc. All Rights Reserved. + +target('zentelemetry') + set_kind("static") + set_group("libs") + add_headerfiles("**.h") + add_files("**.cpp") + add_includedirs("include", {public=true}) + add_deps("zencore", "protozero") + add_packages("vcpkg::robin-map", "vcpkg::spdlog") diff --git a/src/zentelemetry/zentelemetry.cpp b/src/zentelemetry/zentelemetry.cpp new file mode 100644 index 000000000..ed6ad13b9 --- /dev/null +++ b/src/zentelemetry/zentelemetry.cpp @@ -0,0 +1,19 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "zentelemetry/zentelemetry.h" + +#include "zentelemetry/otlptrace.h" +#include "zentelemetry/stats.h" + +namespace zen { + +void +zentelemetry_forcelinktests() +{ + zen::stats_forcelink(); +#if ZEN_WITH_OTEL + zen::otel::otlptrace_forcelink(); +#endif +} + +} // namespace zen -- cgit v1.2.3