diff options
| author | Stefan Boberg <[email protected]> | 2025-10-22 17:57:29 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2025-10-22 17:57:29 +0200 |
| commit | 5c139e2d8a260544bc5e730de0440edbab4b0f03 (patch) | |
| tree | b477208925fe3b373d4833460b90d61a8051cf05 /src | |
| parent | 5.7.7-pre3 (diff) | |
| download | zen-5c139e2d8a260544bc5e730de0440edbab4b0f03.tar.xz zen-5c139e2d8a260544bc5e730de0440edbab4b0f03.zip | |
add support for OTLP logging/tracing (#599)
- adds `zentelemetry` project which houses new functionality for serializing logs and traces in OpenTelemetry Protocol format (OTLP)
- moved existing stats functionality from `zencore` to `zentelemetry`
- adds `TRefCounted<T>` for vtable-less refcounting
- adds `MemoryArena` class which allows for linear allocation of memory from chunks
- adds `protozero` which is used to encode OTLP protobuf messages
Diffstat (limited to 'src')
39 files changed, 3643 insertions, 20 deletions
diff --git a/src/zenbase/include/zenbase/refcount.h b/src/zenbase/include/zenbase/refcount.h index 6ad49cba2..40ad7bca5 100644 --- a/src/zenbase/include/zenbase/refcount.h +++ b/src/zenbase/include/zenbase/refcount.h @@ -10,6 +10,9 @@ namespace zen { /** * Helper base class for reference counted objects using intrusive reference counting + * + * When the reference count reaches zero, the object deletes itself. This class relies + * on having a virtual destructor to ensure proper cleanup of derived classes. */ class RefCounted { @@ -17,7 +20,7 @@ public: RefCounted() = default; virtual ~RefCounted() = default; - inline uint32_t AddRef() const { return AtomicIncrement(const_cast<RefCounted*>(this)->m_RefCount); } + inline uint32_t AddRef() const noexcept { return AtomicIncrement(const_cast<RefCounted*>(this)->m_RefCount); } inline uint32_t Release() const { const uint32_t RefCount = AtomicDecrement(const_cast<RefCounted*>(this)->m_RefCount); @@ -43,6 +46,48 @@ private: }; /** + * Template helper base class for reference counted objects using intrusive reference counting. + * + * NOTE: Unlike RefCounted, this class deletes the derived type when the reference count reaches zero. + * It has no virtual destructor, so it's important that you either don't derive from it further, + * or ensure that the derived class has a virtual destructor. + */ + +template<typename T> +class TRefCounted +{ +public: + TRefCounted() = default; + ~TRefCounted() = default; + + inline uint32_t AddRef() const noexcept { return AtomicIncrement(const_cast<TRefCounted<T>*>(this)->m_RefCount); } + inline uint32_t Release() const + { + const uint32_t RefCount = AtomicDecrement(const_cast<TRefCounted<T>*>(this)->m_RefCount); + if (RefCount == 0) + { + const_cast<T*>(static_cast<const T*>(this))->DeleteThis(); + } + return RefCount; + } + + // Copying reference counted objects doesn't make a lot of sense generally, so let's prevent it + + TRefCounted(const TRefCounted&) = delete; + TRefCounted(TRefCounted&&) = delete; + TRefCounted& operator=(const TRefCounted&) = delete; + TRefCounted& operator=(TRefCounted&&) = delete; + +protected: + inline uint32_t RefCount() const { return m_RefCount; } + + void DeleteThis() const noexcept { delete static_cast<const T*>(this); } + +private: + uint32_t m_RefCount = 0; +}; + +/** * Smart pointer for classes derived from RefCounted */ @@ -127,7 +172,8 @@ class Ref { public: inline Ref() = default; - inline Ref(const Ref& Rhs) : m_Ref(Rhs.m_Ref) { m_Ref && m_Ref->AddRef(); } + inline Ref(Ref&& Rhs) noexcept : m_Ref(Rhs.m_Ref) { Rhs.m_Ref = nullptr; } + inline Ref(const Ref& Rhs) noexcept : m_Ref(Rhs.m_Ref) { m_Ref && m_Ref->AddRef(); } inline explicit Ref(T* Ptr) : m_Ref(Ptr) { m_Ref && m_Ref->AddRef(); } inline ~Ref() { m_Ref && m_Ref->Release(); } @@ -170,7 +216,6 @@ public: } return *this; } - inline Ref(Ref&& Rhs) noexcept : m_Ref(Rhs.m_Ref) { Rhs.m_Ref = nullptr; } private: T* m_Ref = nullptr; diff --git a/src/zencore/include/zencore/iobuffer.h b/src/zencore/include/zencore/iobuffer.h index 63779407e..1b2d382ee 100644 --- a/src/zencore/include/zencore/iobuffer.h +++ b/src/zencore/include/zencore/iobuffer.h @@ -32,6 +32,7 @@ enum class ZenContentType : uint8_t kPNG = 12, kIcon = 13, kXML = 14, + kProtobuf = 15, kCOUNT }; @@ -73,6 +74,8 @@ ToString(ZenContentType ContentType) return "icon"sv; case ZenContentType::kXML: return "xml"sv; + case ZenContentType::kProtobuf: + return "protobuf"sv; } } diff --git a/src/zencore/include/zencore/memory/memoryarena.h b/src/zencore/include/zencore/memory/memoryarena.h new file mode 100644 index 000000000..551415aac --- /dev/null +++ b/src/zencore/include/zencore/memory/memoryarena.h @@ -0,0 +1,104 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/thread.h> +#include <vector> + +namespace zen { + +/** + * Chunked linear memory allocator + * + * Supports fast allocation of many small objects with minimal overhead. + * All allocations are freed when the arena is destroyed, therefore there + * is no support for individual deallocation. + * + * For convenience, we include operator new/delete overloads below which + * take a MemoryArena reference as a placement argument. + * + * This allocator is thread-safe. + */ +class MemoryArena +{ +public: + MemoryArena() = default; + ~MemoryArena(); + + void* AllocateAligned(size_t ByteCount, size_t align); + void* AllocateAlignedWithOffset(size_t ByteCount, size_t align, size_t offset); + void* Allocate(size_t Size); + const char* DuplicateString(std::string_view Str); + + MemoryArena(const MemoryArena&) = delete; + MemoryArena& operator=(const MemoryArena&) = delete; + +private: + static constexpr size_t ChunkSize = 16 * 1024; + // TODO: should just chain the memory blocks together and avoid this + // vector altogether, saving us a memory allocation + std::vector<uint8_t*> m_Chunks; + uint8_t* m_CurrentChunk = nullptr; + size_t m_CurrentOffset = 0; + RwLock m_Lock; +}; + +// Allocator suitable for use with EASTL + +struct ArenaAlloc +{ + ArenaAlloc(const char* name_opt = nullptr) = delete; + ArenaAlloc(MemoryArena& Arena) : m_Arena(&Arena) {} + + inline void* allocate(size_t bytes, int flags = 0) + { + ZEN_UNUSED(flags); + return m_Arena->Allocate(bytes); + } + + inline void* allocate(size_t bytes, size_t align, size_t offset, int flags = 0) + { + ZEN_UNUSED(flags); + if (offset == 0) + { + return m_Arena->AllocateAligned(bytes, align); + } + else + { + return m_Arena->AllocateAlignedWithOffset(bytes, align, offset); + } + } + + void deallocate(void* p, size_t n) { ZEN_UNUSED(p, n); } + +private: + MemoryArena* m_Arena = nullptr; +}; + +} // namespace zen + +inline void* +operator new(size_t Size, zen::MemoryArena& Arena) +{ + return Arena.Allocate(Size); +} + +inline void +operator delete(void* Ptr, zen::MemoryArena& Arena) +{ + // Arena will clean up all allocations when it's destroyed + ZEN_UNUSED(Ptr, Arena); +} + +inline void* +operator new[](size_t Size, zen::MemoryArena& Arena) +{ + return Arena.Allocate(Size); +} + +inline void +operator delete[](void* Ptr, zen::MemoryArena& Arena) +{ + // Arena will clean up all allocations when it's destroyed + ZEN_UNUSED(Ptr, Arena); +} diff --git a/src/zencore/memory/memoryarena.cpp b/src/zencore/memory/memoryarena.cpp new file mode 100644 index 000000000..9c907a66d --- /dev/null +++ b/src/zencore/memory/memoryarena.cpp @@ -0,0 +1,126 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zencore/memory/memoryarena.h> + +namespace zen { + +MemoryArena::~MemoryArena() +{ + for (auto Chunk : m_Chunks) + delete[] Chunk; +} + +void* +MemoryArena::AllocateAligned(size_t ByteCount, size_t align) +{ + if (ByteCount == 0) + { + return nullptr; + } + + void* Ptr = nullptr; + + m_Lock.WithExclusiveLock([&] { + size_t AlignedOffset = (m_CurrentOffset + (align - 1)) & ~(align - 1); + + if (m_CurrentChunk == nullptr || AlignedOffset + ByteCount > ChunkSize) + { + uint8_t* NewChunk = new uint8_t[ChunkSize]; + if (!NewChunk) + { + return; + } + + m_Chunks.push_back(NewChunk); + m_CurrentChunk = NewChunk; + AlignedOffset = 0; + } + + Ptr = m_CurrentChunk + AlignedOffset; + m_CurrentOffset = AlignedOffset + ByteCount; + }); + + return Ptr; +} + +void* +MemoryArena::AllocateAlignedWithOffset(size_t ByteCount, size_t align, size_t offset) +{ + if (ByteCount == 0) + { + return nullptr; + } + + void* Ptr = nullptr; + + m_Lock.WithExclusiveLock([&] { + size_t AlignedOffset = (m_CurrentOffset + (align - 1) + offset) & ~(align - 1); + + if (m_CurrentChunk == nullptr || AlignedOffset + ByteCount > ChunkSize) + { + uint8_t* NewChunk = new uint8_t[ChunkSize]; + if (!NewChunk) + { + return; + } + + m_Chunks.push_back(NewChunk); + m_CurrentChunk = NewChunk; + AlignedOffset = offset; + } + + Ptr = m_CurrentChunk + AlignedOffset; + m_CurrentOffset = AlignedOffset + ByteCount; + }); + + return Ptr; +} + +void* +MemoryArena::Allocate(size_t Size) +{ + if (Size == 0) + { + return nullptr; + } + + void* Ptr = nullptr; + constexpr size_t Alignment = alignof(std::max_align_t); + + m_Lock.WithExclusiveLock([&] { + size_t AlignedOffset = (m_CurrentOffset + Alignment - 1) & ~(Alignment - 1); + + if (m_CurrentChunk == nullptr || AlignedOffset + Size > ChunkSize) + { + uint8_t* NewChunk = new uint8_t[ChunkSize]; + if (!NewChunk) + { + return; + } + + m_Chunks.push_back(NewChunk); + m_CurrentChunk = NewChunk; + AlignedOffset = 0; + } + + Ptr = m_CurrentChunk + AlignedOffset; + m_CurrentOffset = AlignedOffset + Size; + }); + + return Ptr; +} + +const char* +MemoryArena::DuplicateString(std::string_view Str) +{ + const size_t Len = Str.size(); + char* NewStr = static_cast<char*>(Allocate(Len + 1)); + if (NewStr) + { + memcpy(NewStr, Str.data(), Len); + NewStr[Len] = '\0'; + } + return NewStr; +} + +} // namespace zen diff --git a/src/zencore/trace.cpp b/src/zencore/trace.cpp index fe8fb9a5d..87035554f 100644 --- a/src/zencore/trace.cpp +++ b/src/zencore/trace.cpp @@ -9,6 +9,7 @@ # include <zencore/logging.h> # define TRACE_IMPLEMENT 1 +# undef _WINSOCK_DEPRECATED_NO_WARNINGS # include <zencore/trace.h> # include <zencore/memory/fmalloc.h> diff --git a/src/zencore/zencore.cpp b/src/zencore/zencore.cpp index b78991918..4ff79edc7 100644 --- a/src/zencore/zencore.cpp +++ b/src/zencore/zencore.cpp @@ -26,7 +26,6 @@ #include <zencore/parallelwork.h> #include <zencore/process.h> #include <zencore/sha1.h> -#include <zencore/stats.h> #include <zencore/stream.h> #include <zencore/string.h> #include <zencore/thread.h> @@ -267,7 +266,6 @@ zencore_forcelinktests() zen::process_forcelink(); zen::refcount_forcelink(); zen::sha1_forcelink(); - zen::stats_forcelink(); zen::stream_forcelink(); zen::string_forcelink(); zen::thread_forcelink(); diff --git a/src/zenhttp/httpserver.cpp b/src/zenhttp/httpserver.cpp index 2c063d646..f48c22367 100644 --- a/src/zenhttp/httpserver.cpp +++ b/src/zenhttp/httpserver.cpp @@ -86,6 +86,9 @@ MapContentTypeToString(HttpContentType ContentType) case HttpContentType::kXML: return "application/xml"sv; + + case HttpContentType::kProtobuf: + return "application/x-protobuf"sv; } } @@ -119,6 +122,7 @@ static constinit uint32_t HashImagePng = HashStringDjb2("image/png"sv); static constinit uint32_t HashIcon = HashStringDjb2("ico"sv); static constinit uint32_t HashImageIcon = HashStringDjb2("image/x-icon"sv); static constinit uint32_t HashXml = HashStringDjb2("application/xml"sv); +static constinit uint32_t HashProtobuf = HashStringDjb2("application/x-protobuf"sv); std::once_flag InitContentTypeLookup; @@ -153,6 +157,7 @@ struct HashedTypeEntry {HashIcon, HttpContentType::kIcon}, {HashImageIcon, HttpContentType::kIcon}, {HashXml, HttpContentType::kXML}, + {HashProtobuf, HttpContentType::kProtobuf}, // clang-format on }; diff --git a/src/zenhttp/include/zenhttp/httptest.h b/src/zenhttp/include/zenhttp/httptest.h index afe71fbce..608462809 100644 --- a/src/zenhttp/include/zenhttp/httptest.h +++ b/src/zenhttp/include/zenhttp/httptest.h @@ -3,8 +3,8 @@ #pragma once #include <zencore/logging.h> -#include <zencore/stats.h> #include <zenhttp/httpserver.h> +#include <zentelemetry/stats.h> #include <atomic> #include <unordered_map> diff --git a/src/zenhttp/xmake.lua b/src/zenhttp/xmake.lua index b6ffbe467..af4064012 100644 --- a/src/zenhttp/xmake.lua +++ b/src/zenhttp/xmake.lua @@ -7,7 +7,7 @@ target('zenhttp') add_files("**.cpp") add_files("servers/httpsys.cpp", {unity_ignored=true}) add_includedirs("include", {public=true}) - add_deps("zencore", "transport-sdk") + add_deps("zencore", "zentelemetry", "transport-sdk") add_packages( "vcpkg::asio", "vcpkg::cpr", diff --git a/src/zenserver/diag/logging.cpp b/src/zenserver/diag/logging.cpp index 50cf62274..90af79651 100644 --- a/src/zenserver/diag/logging.cpp +++ b/src/zenserver/diag/logging.cpp @@ -12,6 +12,8 @@ #include <zenutil/logging.h> #include <zenutil/logging/rotatingfilesink.h> +#include "otlphttp.h" + ZEN_THIRD_PARTY_INCLUDES_START #include <spdlog/spdlog.h> ZEN_THIRD_PARTY_INCLUDES_END @@ -73,6 +75,16 @@ InitializeServerLogging(const ZenServerConfig& InOptions) spdlog::apply_logger_env_levels(ZenClientLogger); spdlog::register_logger(ZenClientLogger); + // + +#if ZEN_WITH_OTEL + if (false) + { + auto OtelSink = std::make_shared<zen::logging::OtelHttpProtobufSink>("http://signoz.localdomain:4318"); + zen::logging::Default().SpdLogger->sinks().push_back(std::move(OtelSink)); + } +#endif + FinishInitializeLogging(LogOptions); const zen::Oid ServerSessionId = zen::GetSessionId(); diff --git a/src/zenserver/diag/otlphttp.cpp b/src/zenserver/diag/otlphttp.cpp new file mode 100644 index 000000000..d62ccccb6 --- /dev/null +++ b/src/zenserver/diag/otlphttp.cpp @@ -0,0 +1,83 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "otlphttp.h" + +#include <zencore/config.h> +#include <zencore/process.h> +#include <zencore/session.h> +#include <zencore/system.h> +#include <zentelemetry/otlpencoder.h> +#include <protozero/buffer_string.hpp> +#include <protozero/pbf_builder.hpp> + +#if ZEN_WITH_OTEL + +namespace zen::logging { + +////////////////////////////////////////////////////////////////////////// + +OtelHttpProtobufSink::OtelHttpProtobufSink(const std::string_view& Uri) : m_OtelHttp(Uri) +{ + m_Encoder.AddResourceAttribute("service.name", "zenserver"); + m_Encoder.AddResourceAttribute("service.instance.id", GetSessionIdString()); + m_Encoder.AddResourceAttribute("service.namespace", "zen"); + m_Encoder.AddResourceAttribute("service.version", ZEN_CFG_VERSION); + m_Encoder.AddResourceAttribute("host.name", GetMachineName()); + m_Encoder.AddResourceAttribute("session.id", GetSessionIdString()); + m_Encoder.AddResourceAttribute("process.id", zen::GetCurrentProcessId()); + + m_TraceRecorder = new TraceRecorder(this); + otel::SetTraceRecorder(m_TraceRecorder); +} + +OtelHttpProtobufSink::~OtelHttpProtobufSink() +{ + otel::SetTraceRecorder({}); +} + +void +OtelHttpProtobufSink::RecordSpans(zen::otel::TraceId Trace, std::span<const zen::otel::Span*> Spans) +{ + std::string Data = m_Encoder.FormatOtelTrace(Trace, Spans); + + IoBuffer Payload{IoBuffer::Wrap, Data.data(), Data.size()}; + Payload.SetContentType(ZenContentType::kProtobuf); + + auto Result = m_OtelHttp.Post("/v1/traces", Payload); +} + +void +OtelHttpProtobufSink::TraceRecorder::RecordSpans(zen::otel::TraceId Trace, std::span<const zen::otel::Span*> Spans) +{ + m_Sink->RecordSpans(Trace, Spans); +} + +void +OtelHttpProtobufSink::log(const spdlog::details::log_msg& Msg) +{ + { + std::string Data = m_Encoder.FormatOtelProtobuf(Msg); + + IoBuffer Payload{IoBuffer::Wrap, Data.data(), Data.size()}; + Payload.SetContentType(ZenContentType::kProtobuf); + + auto Result = m_OtelHttp.Post("/v1/logs", Payload); + } + + { + std::string Data = m_Encoder.FormatOtelMetrics(); + + IoBuffer Payload{IoBuffer::Wrap, Data.data(), Data.size()}; + Payload.SetContentType(ZenContentType::kProtobuf); + + auto Result = m_OtelHttp.Post("/v1/metrics", Payload); + } +} +void +OtelHttpProtobufSink::flush() +{ +} + +} // namespace zen::logging + +#endif diff --git a/src/zenserver/diag/otlphttp.h b/src/zenserver/diag/otlphttp.h new file mode 100644 index 000000000..2281bdcc0 --- /dev/null +++ b/src/zenserver/diag/otlphttp.h @@ -0,0 +1,64 @@ + +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <spdlog/sinks/sink.h> +#include <zencore/zencore.h> +#include <zenhttp/httpclient.h> +#include <zentelemetry/otlpencoder.h> +#include <zentelemetry/otlptrace.h> + +#if ZEN_WITH_OTEL + +namespace zen::logging { + +/** + * OTLP/HTTP sink for spdlog + * + * Sends log messages and traces to an OpenTelemetry collector via OTLP over HTTP + */ + +class OtelHttpProtobufSink : public spdlog::sinks::sink +{ +public: + // Note that this URI should be the base URI of the OTLP HTTP endpoint, e.g. + // "http://otel-collector:4318" + OtelHttpProtobufSink(const std::string_view& Uri); + ~OtelHttpProtobufSink(); + + OtelHttpProtobufSink(const OtelHttpProtobufSink&) = delete; + OtelHttpProtobufSink& operator=(const OtelHttpProtobufSink&) = delete; + +private: + virtual void log(const spdlog::details::log_msg& Msg) override; + virtual void flush() override; + virtual void set_pattern(const std::string& pattern) override { ZEN_UNUSED(pattern); } + virtual void set_formatter(std::unique_ptr<spdlog::formatter> sink_formatter) override { ZEN_UNUSED(sink_formatter); } + + void RecordSpans(zen::otel::TraceId Trace, std::span<const zen::otel::Span*> Spans); + + // This is just a thin wrapper to call back into the sink while participating in + // reference counting from the OTEL trace back-end + class TraceRecorder : public zen::otel::TraceRecorder + { + public: + TraceRecorder(OtelHttpProtobufSink* InSink) : m_Sink(InSink) {} + + private: + TraceRecorder(const TraceRecorder&) = delete; + TraceRecorder& operator=(const TraceRecorder&) = delete; + + virtual void RecordSpans(zen::otel::TraceId Trace, std::span<const zen::otel::Span*> Spans) override; + + OtelHttpProtobufSink* m_Sink; + }; + + HttpClient m_OtelHttp; + OtlpEncoder m_Encoder; + Ref<TraceRecorder> m_TraceRecorder; +}; + +} // namespace zen::logging + +#endif
\ No newline at end of file diff --git a/src/zenserver/storage/buildstore/httpbuildstore.h b/src/zenserver/storage/buildstore/httpbuildstore.h index 50cb5db12..e10986411 100644 --- a/src/zenserver/storage/buildstore/httpbuildstore.h +++ b/src/zenserver/storage/buildstore/httpbuildstore.h @@ -2,10 +2,10 @@ #pragma once -#include <zencore/stats.h> #include <zenhttp/httpserver.h> #include <zenhttp/httpstats.h> #include <zenhttp/httpstatus.h> +#include <zentelemetry/stats.h> #include <filesystem> diff --git a/src/zenserver/storage/cache/httpstructuredcache.h b/src/zenserver/storage/cache/httpstructuredcache.h index a157148c9..5a795c215 100644 --- a/src/zenserver/storage/cache/httpstructuredcache.h +++ b/src/zenserver/storage/cache/httpstructuredcache.h @@ -2,12 +2,12 @@ #pragma once -#include <zencore/stats.h> #include <zenhttp/httpserver.h> #include <zenhttp/httpstats.h> #include <zenhttp/httpstatus.h> #include <zenstore/cache/cache.h> #include <zenstore/cache/cacherpc.h> +#include <zentelemetry/stats.h> #include <zenutil/openprocesscache.h> #include <memory> diff --git a/src/zenserver/storage/projectstore/httpprojectstore.h b/src/zenserver/storage/projectstore/httpprojectstore.h index f0a0bcfa1..f6fe63614 100644 --- a/src/zenserver/storage/projectstore/httpprojectstore.h +++ b/src/zenserver/storage/projectstore/httpprojectstore.h @@ -2,11 +2,11 @@ #pragma once -#include <zencore/stats.h> #include <zenhttp/httpserver.h> #include <zenhttp/httpstats.h> #include <zenhttp/httpstatus.h> #include <zenstore/cidstore.h> +#include <zentelemetry/stats.h> namespace zen { diff --git a/src/zenserver/storage/upstream/upstreamcache.cpp b/src/zenserver/storage/upstream/upstreamcache.cpp index f7ae5f973..6c489c5d3 100644 --- a/src/zenserver/storage/upstream/upstreamcache.cpp +++ b/src/zenserver/storage/upstream/upstreamcache.cpp @@ -9,10 +9,10 @@ #include <zencore/compactbinarypackage.h> #include <zencore/compactbinaryvalidation.h> #include <zencore/fmtutils.h> -#include <zencore/stats.h> #include <zencore/stream.h> #include <zencore/timer.h> #include <zencore/trace.h> +#include <zentelemetry/stats.h> #include <zenhttp/httpclientauth.h> #include <zenhttp/packageformat.h> diff --git a/src/zenserver/storage/upstream/upstreamcache.h b/src/zenserver/storage/upstream/upstreamcache.h index d5d61c8d9..c0c8a7ff9 100644 --- a/src/zenserver/storage/upstream/upstreamcache.h +++ b/src/zenserver/storage/upstream/upstreamcache.h @@ -6,10 +6,10 @@ #include <zencore/compress.h> #include <zencore/iobuffer.h> #include <zencore/iohash.h> -#include <zencore/stats.h> #include <zencore/zencore.h> #include <zenstore/cache/cache.h> #include <zenstore/cache/upstreamcacheclient.h> +#include <zentelemetry/stats.h> #include <atomic> #include <chrono> diff --git a/src/zenserver/storage/workspaces/httpworkspaces.h b/src/zenserver/storage/workspaces/httpworkspaces.h index 89a8e8bdc..888a34b4d 100644 --- a/src/zenserver/storage/workspaces/httpworkspaces.h +++ b/src/zenserver/storage/workspaces/httpworkspaces.h @@ -2,10 +2,10 @@ #pragma once -#include <zencore/stats.h> #include <zenhttp/httpserver.h> #include <zenhttp/httpstats.h> #include <zenhttp/httpstatus.h> +#include <zentelemetry/stats.h> namespace zen { diff --git a/src/zenserver/xmake.lua b/src/zenserver/xmake.lua index 57105045d..483bfd5aa 100644 --- a/src/zenserver/xmake.lua +++ b/src/zenserver/xmake.lua @@ -7,6 +7,7 @@ target("zenserver") "zennet", "zenremotestore", "zenstore", + "zentelemetry", "zenutil", "zenvfs") add_headerfiles("**.h") @@ -17,6 +18,8 @@ target("zenserver") add_includedirs(".") set_symbols("debug") + add_deps("protozero") + if is_mode("release") then set_optimize("fastest") end diff --git a/src/zenstore/cidstore.cpp b/src/zenstore/cidstore.cpp index ae1b59dc0..52d5df061 100644 --- a/src/zenstore/cidstore.cpp +++ b/src/zenstore/cidstore.cpp @@ -6,9 +6,9 @@ #include <zencore/filesystem.h> #include <zencore/fmtutils.h> #include <zencore/logging.h> -#include <zencore/stats.h> #include <zencore/string.h> #include <zenstore/scrubcontext.h> +#include <zentelemetry/stats.h> #include "cas.h" diff --git a/src/zenstore/include/zenstore/cache/cachedisklayer.h b/src/zenstore/include/zenstore/cache/cachedisklayer.h index 10c61681b..1b501e9ae 100644 --- a/src/zenstore/include/zenstore/cache/cachedisklayer.h +++ b/src/zenstore/include/zenstore/cache/cachedisklayer.h @@ -5,10 +5,10 @@ #include "cacheshared.h" #include <zencore/compactbinary.h> -#include <zencore/stats.h> #include <zenstore/accesstime.h> #include <zenstore/blockstore.h> #include <zenstore/caslog.h> +#include <zentelemetry/stats.h> ZEN_THIRD_PARTY_INCLUDES_START #include <tsl/robin_map.h> diff --git a/src/zenstore/include/zenstore/cache/structuredcachestore.h b/src/zenstore/include/zenstore/cache/structuredcachestore.h index 1ba469431..75692cfcd 100644 --- a/src/zenstore/include/zenstore/cache/structuredcachestore.h +++ b/src/zenstore/include/zenstore/cache/structuredcachestore.h @@ -4,10 +4,10 @@ #include <zencore/compactbinary.h> #include <zencore/iohash.h> -#include <zencore/stats.h> #include <zenstore/cache/cache.h> #include <zenstore/cache/cachedisklayer.h> #include <zenstore/gc.h> +#include <zentelemetry/stats.h> #include <zenutil/statsreporter.h> #include <atomic> diff --git a/src/zenstore/include/zenstore/cache/upstreamcacheclient.h b/src/zenstore/include/zenstore/cache/upstreamcacheclient.h index 2f3b6b0d7..ff4a8c3f7 100644 --- a/src/zenstore/include/zenstore/cache/upstreamcacheclient.h +++ b/src/zenstore/include/zenstore/cache/upstreamcacheclient.h @@ -6,9 +6,9 @@ #include <zencore/compress.h> #include <zencore/iobuffer.h> #include <zencore/iohash.h> -#include <zencore/stats.h> #include <zencore/zencore.h> #include <zenstore/cache/cache.h> +#include <zentelemetry/stats.h> #include <functional> #include <memory> diff --git a/src/zenstore/include/zenstore/cidstore.h b/src/zenstore/include/zenstore/cidstore.h index 8918b119f..d54062476 100644 --- a/src/zenstore/include/zenstore/cidstore.h +++ b/src/zenstore/include/zenstore/cidstore.h @@ -5,8 +5,8 @@ #include "zenstore.h" #include <zencore/iohash.h> -#include <zencore/stats.h> #include <zenstore/hashkeyset.h> +#include <zentelemetry/stats.h> #include <zenutil/statsreporter.h> #include <filesystem> diff --git a/src/zentelemetry-test/xmake.lua b/src/zentelemetry-test/xmake.lua new file mode 100644 index 000000000..bdc60cee8 --- /dev/null +++ b/src/zentelemetry-test/xmake.lua @@ -0,0 +1,9 @@ +-- Copyright Epic Games, Inc. All Rights Reserved. + +target("zentelemetry-test") + set_kind("binary") + set_group("tests") + add_headerfiles("**.h") + add_files("*.cpp") + add_deps("zentelemetry") + add_packages("vcpkg::doctest") diff --git a/src/zentelemetry-test/zentelemetry-test.cpp b/src/zentelemetry-test/zentelemetry-test.cpp new file mode 100644 index 000000000..c8b067226 --- /dev/null +++ b/src/zentelemetry-test/zentelemetry-test.cpp @@ -0,0 +1,42 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zencore/filesystem.h> +#include <zencore/logging.h> +#include <zencore/trace.h> +#include <zentelemetry/zentelemetry.h> + +#include <zencore/memory/newdelete.h> + +#if ZEN_WITH_TESTS +# define ZEN_TEST_WITH_RUNNER 1 +# include <zencore/testing.h> +# include <zencore/process.h> +#endif + +int +main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) +{ +#if ZEN_WITH_TESTS + zen::zentelemetry_forcelinktests(); + +# if ZEN_PLATFORM_LINUX + zen::IgnoreChildSignals(); +# endif + +# if ZEN_WITH_TRACE + zen::TraceInit("zenstore-test"); + zen::TraceOptions TraceCommandlineOptions; + if (GetTraceOptionsFromCommandline(TraceCommandlineOptions)) + { + TraceConfigure(TraceCommandlineOptions); + } +# endif // ZEN_WITH_TRACE + + zen::logging::InitializeLogging(); + zen::MaximizeOpenFileCount(); + + return ZEN_RUN_TESTS(argc, argv); +#else + return 0; +#endif +} diff --git a/src/zentelemetry/include/zentelemetry/otlpencoder.h b/src/zentelemetry/include/zentelemetry/otlpencoder.h new file mode 100644 index 000000000..ed6665781 --- /dev/null +++ b/src/zentelemetry/include/zentelemetry/otlpencoder.h @@ -0,0 +1,66 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/thread.h> +#include <zentelemetry/otlptrace.h> +#include <string> +#include <string_view> +#include <unordered_map> + +#if ZEN_WITH_OTEL + +# include <protozero/pbf_builder.hpp> +# include <protozero/types.hpp> + +namespace spdlog { namespace details { + struct log_msg; +}} // namespace spdlog::details + +namespace zen::otel { +enum class Resource : protozero::pbf_tag_type; +} + +namespace zen { + +/** + * Encoder for OpenTelemetry OTLP Protobuf format + * + * Designed to encode log messages and metrics into the OTLP Protobuf format + * for transmission to an OpenTelemetry collector or compatible backend. + * + * Currently, only log messages and basic resource attributes are supported. + * + * Some initial support for metrics encoding is also included. + * + * Transmission is expected to be handled externally, e.g. via HTTP client. + * + */ + +class OtlpEncoder +{ +public: + OtlpEncoder(); + ~OtlpEncoder(); + + void AddResourceAttribute(const std::string_view& Key, const std::string_view& Value); + void AddResourceAttribute(const std::string_view& Key, int64_t Value); + + std::string FormatOtelProtobuf(const spdlog::details::log_msg& Msg) const; + std::string FormatOtelMetrics() const; + std::string FormatOtelTrace(zen::otel::TraceId Trace, std::span<const zen::otel::Span*> Spans) const; + + OtlpEncoder& operator=(const OtlpEncoder&) = delete; + OtlpEncoder(const OtlpEncoder&) = delete; + +private: + mutable RwLock m_ResourceLock; + std::unordered_map<std::string, std::string> m_ResourceAttributes; + std::unordered_map<std::string, int64_t> m_ResourceIntAttributes; + + void AppendResourceAttributes(protozero::pbf_builder<otel::Resource>& Res) const; +}; + +} // namespace zen + +#endif diff --git a/src/zentelemetry/include/zentelemetry/otlptrace.h b/src/zentelemetry/include/zentelemetry/otlptrace.h new file mode 100644 index 000000000..ebecff91a --- /dev/null +++ b/src/zentelemetry/include/zentelemetry/otlptrace.h @@ -0,0 +1,268 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zenbase/refcount.h> +#include <zencore/uid.h> + +#include <span> +#include <string> + +#define ZEN_WITH_OTEL 1 + +#if ZEN_WITH_OTEL +# define ZEN_CONCAT(a, b) a##b +# define IMPL_ZEN_OTEL_SPAN(Name, Line) ::zen::otel::ScopedSpan ZEN_CONCAT(Span$, Line)(Name); +# define ZEN_OTEL_SPAN(Name) IMPL_ZEN_OTEL_SPAN(Name, __LINE__) +#else +# define ZEN_OTEL_SPAN(Name) +#endif + +namespace zen { +class MemoryArena; +} + +#if ZEN_WITH_OTEL + +namespace zen::otel { + +using AttributeList = std::span<std::pair<std::string, std::string>>; + +class Tracer; + +Tracer& GetTracer(); + +// OLTP Span ID + +struct SpanId +{ + constexpr static size_t kSize = 8; + + inline SpanId() : Id{0} {} + explicit SpanId(const std::span<const uint8_t, 8> Bytes) noexcept { std::copy(Bytes.begin(), Bytes.end(), Id); } + explicit SpanId(const Oid& InId) noexcept { memcpy(Id, reinterpret_cast<const uint8_t*>(InId.OidBits), sizeof Id); } + + auto operator<=>(const SpanId& Rhs) const = default; + inline operator bool() const { return *this != SpanId(); } + + static SpanId NewSpanId(); + + const char* GetData() const { return reinterpret_cast<const char*>(Id); } + +private: + uint8_t Id[kSize]; +}; + +// OLTP Trace ID + +struct TraceId +{ + constexpr static size_t kSize = 16; + + std::span<const uint8_t> GetBytes() const { return std::span<const uint8_t>(Id, kSize); } + + inline TraceId() noexcept : Id{0} {} + explicit TraceId(const std::span<const uint8_t, kSize> Bytes) noexcept { std::copy(Bytes.begin(), Bytes.end(), Id); } + + auto operator<=>(const TraceId& Rhs) const = default; + inline operator bool() const { return *this != TraceId(); } + + static TraceId NewTraceId(); + + inline const char* GetData() const { return reinterpret_cast<const char*>(Id); } + +private: + uint8_t Id[kSize]; +}; + +struct AttributePair +{ + const char* Key = nullptr; + union + { + const char* StringValue; + uint64_t NumericValue; + } Value; + + uint64_t Flags = 0; + + enum + { + kFlags_IsNumeric = 1 << 0, + kFlags_IsString = 1 << 1 + }; + + bool IsNumeric() const { return (Flags & kFlags_IsNumeric) != 0; } + uint64_t GetNumericValue() const { return Value.NumericValue; } + void SetNumericValue(uint64_t InValue) + { + Value.NumericValue = InValue; + Flags |= kFlags_IsNumeric; + } + + bool IsString() const { return (Flags & kFlags_IsString) != 0; } + const char* GetStringValue() const { return Value.StringValue; } + void SetStringValue(const char* InValue) + { + Value.StringValue = InValue; + Flags |= kFlags_IsString; + } + + AttributePair* Next = nullptr; +}; + +struct Event +{ + Event* NextEvent = nullptr; + uint64_t Timestamp = 0; + const char* Name = nullptr; + AttributePair* Attributes = nullptr; +}; + +/** Span within a trace + * + * A span represents a single operation within a trace. Spans can be nested + * to form a trace tree. + * + */ + +struct Span final : public TRefCounted<Span> +{ + Span& operator=(const Span&) = delete; + Span(const Span&) = delete; + + void AddEvent(std::string_view Name); + void AddEvent(std::string_view Name, uint64_t Timestamp); + void AddEvent(std::string_view Name, const AttributeList& Attributes); + + void AddAttribute(std::string_view Key, std::string_view Value); + void AddAttribute(std::string_view Key, uint64_t Value); + void AddAttributes(const AttributeList& Attributes); + + void End(); + + enum class Kind : uint8_t // This must match otel::SpanKind + { + kInternal = 1, + kServer = 2, + kClient = 3, + kProducer = 4, + kConsumer = 5 + }; + + Kind GetKind() const { return m_Kind; } + void SetKind(Kind InKind) { m_Kind = InKind; } + + SpanId GetSpanId() const { return m_SpanId; } + const char* GetName() const { return m_Name; } + uint64_t GetStartTime() const { return m_StartTime; } + uint64_t GetEndTime() const { return m_EndTime; } + const Span* GetParentSpan() const { return m_ParentSpan; } + const AttributePair* GetAttributes() const { return m_Attributes; } + const Event* GetEvents() const { return m_Events; } + + // This is used to get the current span/trace ID for logging purposes + static SpanId GetCurrentSpanId(TraceId& OutTraceId); + static Span* GetCurrentSpan(); + +protected: + void* operator new(size_t Size) = delete; + void* operator new(size_t Size, MemoryArena& Arena); + void operator delete(void* Ptr) = delete; + void operator delete(void* Ptr, MemoryArena& Arena); + + friend class ScopedSpan; + friend class Tracer; + friend class TRefCounted<Span>; + + void DeleteThis() noexcept { End(); } + +private: + // Note: for now there's no locking on the span. To support spans across + // threads, we'll need to add locking around event addition and ending + // and linking of child spans etc + + MemoryArena& m_Arena; + Span* m_NextSibling = nullptr; + Span* m_FirstChild = nullptr; + Span* m_ParentSpan = nullptr; + Span* m_SpanChain = nullptr; + Event* m_Events = nullptr; + AttributePair* m_Attributes = nullptr; + SpanId m_SpanId; + const char* m_Name = nullptr; + uint64_t m_StartTime = 0; + uint64_t m_EndTime = 0; + Kind m_Kind = Kind::kInternal; + bool m_Ended = false; + + Span(MemoryArena& Arena, std::string_view Name, Span* SpanChain); + ~Span(); +}; + +/** Scoped span helper + * + * Automatically ends the span when it goes out of scope + */ + +class ScopedSpan final +{ +public: + ScopedSpan(std::string_view Name); + ScopedSpan() = delete; + ~ScopedSpan(); + + ScopedSpan& operator=(ScopedSpan&& Rhs) = default; + ScopedSpan& operator=(const ScopedSpan& Rhs) = default; + ScopedSpan(ScopedSpan&& Rhs) = default; + ScopedSpan(const ScopedSpan& Rhs) = default; + + Span* operator->() const { return m_Span.Get(); } + +private: + ScopedSpan(Span* InSpan, Tracer* InTracer); + + Ref<Tracer> m_Tracer; // This needs to precede the span ref to ensure proper destruction order + Ref<Span> m_Span; + + friend class Tracer; +}; + +/** + * Tracer for creating spans and managing trace state. + * + * This represents a single trace context identified by a trace ID, + * and is at the root of all spans created within that trace. + * + */ + +class Tracer final : public RefCounted +{ +public: + static ScopedSpan CreateSpan(std::string_view Name); + +private: + struct Impl; + Impl* m_Impl; + + Tracer(); + ~Tracer(); + + static Tracer* GetTracer(); + + friend class ScopedSpan; +}; + +class TraceRecorder : public RefCounted +{ +public: + virtual void RecordSpans(TraceId Trace, std::span<const Span*> Spans) = 0; +}; + +void SetTraceRecorder(Ref<TraceRecorder> Recorder); +bool IsRecording(); + +void otlptrace_forcelink(); + +} // namespace zen::otel +#endif diff --git a/src/zencore/include/zencore/stats.h b/src/zentelemetry/include/zentelemetry/stats.h index f232cf2f4..3e67bac1c 100644 --- a/src/zencore/include/zencore/stats.h +++ b/src/zentelemetry/include/zentelemetry/stats.h @@ -2,7 +2,7 @@ #pragma once -#include "zencore.h" +#include "zentelemetry.h" #include <zenbase/concepts.h> diff --git a/src/zentelemetry/include/zentelemetry/zentelemetry.h b/src/zentelemetry/include/zentelemetry/zentelemetry.h new file mode 100644 index 000000000..fc811974b --- /dev/null +++ b/src/zentelemetry/include/zentelemetry/zentelemetry.h @@ -0,0 +1,9 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +namespace zen { + +void zentelemetry_forcelinktests(); + +} diff --git a/src/zentelemetry/otellogprotozero.h b/src/zentelemetry/otellogprotozero.h new file mode 100644 index 000000000..1e2e85d2d --- /dev/null +++ b/src/zentelemetry/otellogprotozero.h @@ -0,0 +1,287 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include "otelprotozero.h" + +////////////////////////////////////////////////////////////////////////// +// +// OTEL .proto definitions, for reference +// + +#if 0 + +// clang-format off + +///////////////////////////////////////////////////////////////////////////////////// +// logs/v1 + +// LogsData represents the logs data that can be stored in a persistent storage, +// OR can be embedded by other protocols that transfer OTLP logs data but do not +// implement the OTLP protocol. +// +// The main difference between this message and collector protocol is that +// in this message there will not be any "control" or "metadata" specific to +// OTLP protocol. +// +// When new fields are added into this message, the OTLP request MUST be updated +// as well. +message LogsData { + // An array of ResourceLogs. + // For data coming from a single resource this array will typically contain + // one element. Intermediary nodes that receive data from multiple origins + // typically batch the data before forwarding further and in that case this + // array will contain multiple elements. + repeated ResourceLogs resource_logs = 1; +} + +// A collection of ScopeLogs from a Resource. +message ResourceLogs { + reserved 1000; + + // The resource for the logs in this message. + // If this field is not set then resource info is unknown. + opentelemetry.proto.resource.v1.Resource resource = 1; + + // A list of ScopeLogs that originate from a resource. + repeated ScopeLogs scope_logs = 2; + + // The Schema URL, if known. This is the identifier of the Schema that the resource data + // is recorded in. Notably, the last part of the URL path is the version number of the + // schema: http[s]://server[:port]/path/<version>. To learn more about Schema URL see + // https://opentelemetry.io/docs/specs/otel/schemas/#schema-url + // This schema_url applies to the data in the "resource" field. It does not apply + // to the data in the "scope_logs" field which have their own schema_url field. + string schema_url = 3; +} + +// A collection of Logs produced by a Scope. +message ScopeLogs { + // The instrumentation scope information for the logs in this message. + // Semantically when InstrumentationScope isn't set, it is equivalent with + // an empty instrumentation scope name (unknown). + opentelemetry.proto.common.v1.InstrumentationScope scope = 1; + + // A list of log records. + repeated LogRecord log_records = 2; + + // The Schema URL, if known. This is the identifier of the Schema that the log data + // is recorded in. Notably, the last part of the URL path is the version number of the + // schema: http[s]://server[:port]/path/<version>. To learn more about Schema URL see + // https://opentelemetry.io/docs/specs/otel/schemas/#schema-url + // This schema_url applies to all logs in the "logs" field. + string schema_url = 3; +} + +// Possible values for LogRecord.SeverityNumber. +enum SeverityNumber { + // UNSPECIFIED is the default SeverityNumber, it MUST NOT be used. + SEVERITY_NUMBER_UNSPECIFIED = 0; + SEVERITY_NUMBER_TRACE = 1; + SEVERITY_NUMBER_TRACE2 = 2; + SEVERITY_NUMBER_TRACE3 = 3; + SEVERITY_NUMBER_TRACE4 = 4; + SEVERITY_NUMBER_DEBUG = 5; + SEVERITY_NUMBER_DEBUG2 = 6; + SEVERITY_NUMBER_DEBUG3 = 7; + SEVERITY_NUMBER_DEBUG4 = 8; + SEVERITY_NUMBER_INFO = 9; + SEVERITY_NUMBER_INFO2 = 10; + SEVERITY_NUMBER_INFO3 = 11; + SEVERITY_NUMBER_INFO4 = 12; + SEVERITY_NUMBER_WARN = 13; + SEVERITY_NUMBER_WARN2 = 14; + SEVERITY_NUMBER_WARN3 = 15; + SEVERITY_NUMBER_WARN4 = 16; + SEVERITY_NUMBER_ERROR = 17; + SEVERITY_NUMBER_ERROR2 = 18; + SEVERITY_NUMBER_ERROR3 = 19; + SEVERITY_NUMBER_ERROR4 = 20; + SEVERITY_NUMBER_FATAL = 21; + SEVERITY_NUMBER_FATAL2 = 22; + SEVERITY_NUMBER_FATAL3 = 23; + SEVERITY_NUMBER_FATAL4 = 24; +} + +// LogRecordFlags represents constants used to interpret the +// LogRecord.flags field, which is protobuf 'fixed32' type and is to +// be used as bit-fields. Each non-zero value defined in this enum is +// a bit-mask. To extract the bit-field, for example, use an +// expression like: +// +// (logRecord.flags & LOG_RECORD_FLAGS_TRACE_FLAGS_MASK) +// +enum LogRecordFlags { + // The zero value for the enum. Should not be used for comparisons. + // Instead use bitwise "and" with the appropriate mask as shown above. + LOG_RECORD_FLAGS_DO_NOT_USE = 0; + + // Bits 0-7 are used for trace flags. + LOG_RECORD_FLAGS_TRACE_FLAGS_MASK = 0x000000FF; + + // Bits 8-31 are reserved for future use. +} + +// A log record according to OpenTelemetry Log Data Model: +// https://github.com/open-telemetry/oteps/blob/main/text/logs/0097-log-data-model.md +message LogRecord { + reserved 4; + + // time_unix_nano is the time when the event occurred. + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January 1970. + // Value of 0 indicates unknown or missing timestamp. + fixed64 time_unix_nano = 1; + + // Time when the event was observed by the collection system. + // For events that originate in OpenTelemetry (e.g. using OpenTelemetry Logging SDK) + // this timestamp is typically set at the generation time and is equal to Timestamp. + // For events originating externally and collected by OpenTelemetry (e.g. using + // Collector) this is the time when OpenTelemetry's code observed the event measured + // by the clock of the OpenTelemetry code. This field MUST be set once the event is + // observed by OpenTelemetry. + // + // For converting OpenTelemetry log data to formats that support only one timestamp or + // when receiving OpenTelemetry log data by recipients that support only one timestamp + // internally the following logic is recommended: + // - Use time_unix_nano if it is present, otherwise use observed_time_unix_nano. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January 1970. + // Value of 0 indicates unknown or missing timestamp. + fixed64 observed_time_unix_nano = 11; + + // Numerical value of the severity, normalized to values described in Log Data Model. + // [Optional]. + SeverityNumber severity_number = 2; + + // The severity text (also known as log level). The original string representation as + // it is known at the source. [Optional]. + string severity_text = 3; + + // A value containing the body of the log record. Can be for example a human-readable + // string message (including multi-line) describing the event in a free form or it can + // be a structured data composed of arrays and maps of other values. [Optional]. + opentelemetry.proto.common.v1.AnyValue body = 5; + + // Additional attributes that describe the specific event occurrence. [Optional]. + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + repeated opentelemetry.proto.common.v1.KeyValue attributes = 6; + uint32 dropped_attributes_count = 7; + + // Flags, a bit field. 8 least significant bits are the trace flags as + // defined in W3C Trace Context specification. 24 most significant bits are reserved + // and must be set to 0. Readers must not assume that 24 most significant bits + // will be zero and must correctly mask the bits when reading 8-bit trace flag (use + // flags & LOG_RECORD_FLAGS_TRACE_FLAGS_MASK). [Optional]. + fixed32 flags = 8; + + // A unique identifier for a trace. All logs from the same trace share + // the same `trace_id`. The ID is a 16-byte array. An ID with all zeroes OR + // of length other than 16 bytes is considered invalid (empty string in OTLP/JSON + // is zero-length and thus is also invalid). + // + // This field is optional. + // + // The receivers SHOULD assume that the log record is not associated with a + // trace if any of the following is true: + // - the field is not present, + // - the field contains an invalid value. + bytes trace_id = 9; + + // A unique identifier for a span within a trace, assigned when the span + // is created. The ID is an 8-byte array. An ID with all zeroes OR of length + // other than 8 bytes is considered invalid (empty string in OTLP/JSON + // is zero-length and thus is also invalid). + // + // This field is optional. If the sender specifies a valid span_id then it SHOULD also + // specify a valid trace_id. + // + // The receivers SHOULD assume that the log record is not associated with a + // span if any of the following is true: + // - the field is not present, + // - the field contains an invalid value. + bytes span_id = 10; + + // A unique identifier of event category/type. + // All events with the same event_name are expected to conform to the same + // schema for both their attributes and their body. + // + // Recommended to be fully qualified and short (no longer than 256 characters). + // + // Presence of event_name on the log record identifies this record + // as an event. + // + // [Optional]. + string event_name = 12; +} +// clang-format on +#endif + +namespace zen::otel { + +// Possible values for LogRecord.SeverityNumber. +enum SeverityNumber +{ + // UNSPECIFIED is the default SeverityNumber, it MUST NOT be used. + SEVERITY_NUMBER_UNSPECIFIED = 0, + SEVERITY_NUMBER_TRACE = 1, + SEVERITY_NUMBER_TRACE2 = 2, + SEVERITY_NUMBER_TRACE3 = 3, + SEVERITY_NUMBER_TRACE4 = 4, + SEVERITY_NUMBER_DEBUG = 5, + SEVERITY_NUMBER_DEBUG2 = 6, + SEVERITY_NUMBER_DEBUG3 = 7, + SEVERITY_NUMBER_DEBUG4 = 8, + SEVERITY_NUMBER_INFO = 9, + SEVERITY_NUMBER_INFO2 = 10, + SEVERITY_NUMBER_INFO3 = 11, + SEVERITY_NUMBER_INFO4 = 12, + SEVERITY_NUMBER_WARN = 13, + SEVERITY_NUMBER_WARN2 = 14, + SEVERITY_NUMBER_WARN3 = 15, + SEVERITY_NUMBER_WARN4 = 16, + SEVERITY_NUMBER_ERROR = 17, + SEVERITY_NUMBER_ERROR2 = 18, + SEVERITY_NUMBER_ERROR3 = 19, + SEVERITY_NUMBER_ERROR4 = 20, + SEVERITY_NUMBER_FATAL = 21, + SEVERITY_NUMBER_FATAL2 = 22, + SEVERITY_NUMBER_FATAL3 = 23, + SEVERITY_NUMBER_FATAL4 = 24 +}; + +enum class LogRecord : protozero::pbf_tag_type +{ + required_fixed64_time_unix_nano = 1, + optional_SeverityNumber_severity_number = 2, + optional_string_severity_text = 3, + optional_anyvalue_body = 5, + optional_repeated_kv_attributes = 6, + optional_uint32_dropped_attributes_count = 7, + optional_fixed32_flags = 8, + optional_bytes_trace_id = 9, + optional_bytes_span_id = 10, + optional_fixed64_observed_time_unix_nano = 11, + optional_event_name = 12, +}; + +enum class ScopeLogs : protozero::pbf_tag_type +{ + required_InstrumentationScope_scope = 1, + required_repeated_LogRecord_log_records = 2, + optional_string_schema_url = 3 +}; + +enum class ResourceLogs : protozero::pbf_tag_type +{ + optional_Resource_resource = 1, + required_repeated_ScopeLogs_scope_logs = 2, + optional_string_schema_url = 3 +}; + +enum class LogsData : protozero::pbf_tag_type +{ + required_repeated_ResourceLogs_resource_logs = 1 +}; + +} // namespace zen::otel diff --git a/src/zentelemetry/otelmetricsprotozero.h b/src/zentelemetry/otelmetricsprotozero.h new file mode 100644 index 000000000..12bae0d75 --- /dev/null +++ b/src/zentelemetry/otelmetricsprotozero.h @@ -0,0 +1,953 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include "otelprotozero.h" + +////////////////////////////////////////////////////////////////////////// +// +// OTEL .proto definitions, for reference +// + +#if 0 + +// clang-format off + +////////////////////////////////////////////////////////////////////////// +// metrics/v1/metrics.proto +// + +// Copyright 2019, OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package opentelemetry.proto.metrics.v1; + +import "opentelemetry/proto/common/v1/common.proto"; +import "opentelemetry/proto/resource/v1/resource.proto"; + +option csharp_namespace = "OpenTelemetry.Proto.Metrics.V1"; +option java_multiple_files = true; +option java_package = "io.opentelemetry.proto.metrics.v1"; +option java_outer_classname = "MetricsProto"; +option go_package = "go.opentelemetry.io/proto/otlp/metrics/v1"; + +// MetricsData represents the metrics data that can be stored in a persistent +// storage, OR can be embedded by other protocols that transfer OTLP metrics +// data but do not implement the OTLP protocol. +// +// MetricsData +// └─── ResourceMetrics +// ├── Resource +// ├── SchemaURL +// └── ScopeMetrics +// ├── Scope +// ├── SchemaURL +// └── Metric +// ├── Name +// ├── Description +// ├── Unit +// └── data +// ├── Gauge +// ├── Sum +// ├── Histogram +// ├── ExponentialHistogram +// └── Summary +// +// The main difference between this message and collector protocol is that +// in this message there will not be any "control" or "metadata" specific to +// OTLP protocol. +// +// When new fields are added into this message, the OTLP request MUST be updated +// as well. +message MetricsData { + // An array of ResourceMetrics. + // For data coming from a single resource this array will typically contain + // one element. Intermediary nodes that receive data from multiple origins + // typically batch the data before forwarding further and in that case this + // array will contain multiple elements. + repeated ResourceMetrics resource_metrics = 1; +} + +// A collection of ScopeMetrics from a Resource. +message ResourceMetrics { + reserved 1000; + + // The resource for the metrics in this message. + // If this field is not set then no resource info is known. + opentelemetry.proto.resource.v1.Resource resource = 1; + + // A list of metrics that originate from a resource. + repeated ScopeMetrics scope_metrics = 2; + + // The Schema URL, if known. This is the identifier of the Schema that the resource data + // is recorded in. Notably, the last part of the URL path is the version number of the + // schema: http[s]://server[:port]/path/<version>. To learn more about Schema URL see + // https://opentelemetry.io/docs/specs/otel/schemas/#schema-url + // This schema_url applies to the data in the "resource" field. It does not apply + // to the data in the "scope_metrics" field which have their own schema_url field. + string schema_url = 3; +} + +// A collection of Metrics produced by an Scope. +message ScopeMetrics { + // The instrumentation scope information for the metrics in this message. + // Semantically when InstrumentationScope isn't set, it is equivalent with + // an empty instrumentation scope name (unknown). + opentelemetry.proto.common.v1.InstrumentationScope scope = 1; + + // A list of metrics that originate from an instrumentation library. + repeated Metric metrics = 2; + + // The Schema URL, if known. This is the identifier of the Schema that the metric data + // is recorded in. Notably, the last part of the URL path is the version number of the + // schema: http[s]://server[:port]/path/<version>. To learn more about Schema URL see + // https://opentelemetry.io/docs/specs/otel/schemas/#schema-url + // This schema_url applies to all metrics in the "metrics" field. + string schema_url = 3; +} + +// Defines a Metric which has one or more timeseries. The following is a +// brief summary of the Metric data model. For more details, see: +// +// https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/data-model.md +// +// The data model and relation between entities is shown in the +// diagram below. Here, "DataPoint" is the term used to refer to any +// one of the specific data point value types, and "points" is the term used +// to refer to any one of the lists of points contained in the Metric. +// +// - Metric is composed of a metadata and data. +// - Metadata part contains a name, description, unit. +// - Data is one of the possible types (Sum, Gauge, Histogram, Summary). +// - DataPoint contains timestamps, attributes, and one of the possible value type +// fields. +// +// Metric +// +------------+ +// |name | +// |description | +// |unit | +------------------------------------+ +// |data |---> |Gauge, Sum, Histogram, Summary, ... | +// +------------+ +------------------------------------+ +// +// Data [One of Gauge, Sum, Histogram, Summary, ...] +// +-----------+ +// |... | // Metadata about the Data. +// |points |--+ +// +-----------+ | +// | +---------------------------+ +// | |DataPoint 1 | +// v |+------+------+ +------+ | +// +-----+ ||label |label |...|label | | +// | 1 |-->||value1|value2|...|valueN| | +// +-----+ |+------+------+ +------+ | +// | . | |+-----+ | +// | . | ||value| | +// | . | |+-----+ | +// | . | +---------------------------+ +// | . | . +// | . | . +// | . | . +// | . | +---------------------------+ +// | . | |DataPoint M | +// +-----+ |+------+------+ +------+ | +// | M |-->||label |label |...|label | | +// +-----+ ||value1|value2|...|valueN| | +// |+------+------+ +------+ | +// |+-----+ | +// ||value| | +// |+-----+ | +// +---------------------------+ +// +// Each distinct type of DataPoint represents the output of a specific +// aggregation function, the result of applying the DataPoint's +// associated function of to one or more measurements. +// +// All DataPoint types have three common fields: +// - Attributes includes key-value pairs associated with the data point +// - TimeUnixNano is required, set to the end time of the aggregation +// - StartTimeUnixNano is optional, but strongly encouraged for DataPoints +// having an AggregationTemporality field, as discussed below. +// +// Both TimeUnixNano and StartTimeUnixNano values are expressed as +// UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January 1970. +// +// # TimeUnixNano +// +// This field is required, having consistent interpretation across +// DataPoint types. TimeUnixNano is the moment corresponding to when +// the data point's aggregate value was captured. +// +// Data points with the 0 value for TimeUnixNano SHOULD be rejected +// by consumers. +// +// # StartTimeUnixNano +// +// StartTimeUnixNano in general allows detecting when a sequence of +// observations is unbroken. This field indicates to consumers the +// start time for points with cumulative and delta +// AggregationTemporality, and it should be included whenever possible +// to support correct rate calculation. Although it may be omitted +// when the start time is truly unknown, setting StartTimeUnixNano is +// strongly encouraged. +message Metric { + reserved 4, 6, 8; + + // The name of the metric. + string name = 1; + + // A description of the metric, which can be used in documentation. + string description = 2; + + // The unit in which the metric value is reported. Follows the format + // described by https://unitsofmeasure.org/ucum.html. + string unit = 3; + + // Data determines the aggregation type (if any) of the metric, what is the + // reported value type for the data points, as well as the relatationship to + // the time interval over which they are reported. + oneof data { + Gauge gauge = 5; + Sum sum = 7; + Histogram histogram = 9; + ExponentialHistogram exponential_histogram = 10; + Summary summary = 11; + } + + // Additional metadata attributes that describe the metric. [Optional]. + // Attributes are non-identifying. + // Consumers SHOULD NOT need to be aware of these attributes. + // These attributes MAY be used to encode information allowing + // for lossless roundtrip translation to / from another data model. + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + repeated opentelemetry.proto.common.v1.KeyValue metadata = 12; +} + +// Gauge represents the type of a scalar metric that always exports the +// "current value" for every data point. It should be used for an "unknown" +// aggregation. +// +// A Gauge does not support different aggregation temporalities. Given the +// aggregation is unknown, points cannot be combined using the same +// aggregation, regardless of aggregation temporalities. Therefore, +// AggregationTemporality is not included. Consequently, this also means +// "StartTimeUnixNano" is ignored for all data points. +message Gauge { + // The time series data points. + // Note: Multiple time series may be included (same timestamp, different attributes). + repeated NumberDataPoint data_points = 1; +} + +// Sum represents the type of a scalar metric that is calculated as a sum of all +// reported measurements over a time interval. +message Sum { + // The time series data points. + // Note: Multiple time series may be included (same timestamp, different attributes). + repeated NumberDataPoint data_points = 1; + + // aggregation_temporality describes if the aggregator reports delta changes + // since last report time, or cumulative changes since a fixed start time. + AggregationTemporality aggregation_temporality = 2; + + // Represents whether the sum is monotonic. + bool is_monotonic = 3; +} + +// Histogram represents the type of a metric that is calculated by aggregating +// as a Histogram of all reported measurements over a time interval. +message Histogram { + // The time series data points. + // Note: Multiple time series may be included (same timestamp, different attributes). + repeated HistogramDataPoint data_points = 1; + + // aggregation_temporality describes if the aggregator reports delta changes + // since last report time, or cumulative changes since a fixed start time. + AggregationTemporality aggregation_temporality = 2; +} + +// ExponentialHistogram represents the type of a metric that is calculated by aggregating +// as a ExponentialHistogram of all reported double measurements over a time interval. +message ExponentialHistogram { + // The time series data points. + // Note: Multiple time series may be included (same timestamp, different attributes). + repeated ExponentialHistogramDataPoint data_points = 1; + + // aggregation_temporality describes if the aggregator reports delta changes + // since last report time, or cumulative changes since a fixed start time. + AggregationTemporality aggregation_temporality = 2; +} + +// Summary metric data are used to convey quantile summaries, +// a Prometheus (see: https://prometheus.io/docs/concepts/metric_types/#summary) +// and OpenMetrics (see: https://github.com/prometheus/OpenMetrics/blob/4dbf6075567ab43296eed941037c12951faafb92/protos/prometheus.proto#L45) +// data type. These data points cannot always be merged in a meaningful way. +// While they can be useful in some applications, histogram data points are +// recommended for new applications. +// Summary metrics do not have an aggregation temporality field. This is +// because the count and sum fields of a SummaryDataPoint are assumed to be +// cumulative values. +message Summary { + // The time series data points. + // Note: Multiple time series may be included (same timestamp, different attributes). + repeated SummaryDataPoint data_points = 1; +} + +// AggregationTemporality defines how a metric aggregator reports aggregated +// values. It describes how those values relate to the time interval over +// which they are aggregated. +enum AggregationTemporality { + // UNSPECIFIED is the default AggregationTemporality, it MUST not be used. + AGGREGATION_TEMPORALITY_UNSPECIFIED = 0; + + // DELTA is an AggregationTemporality for a metric aggregator which reports + // changes since last report time. Successive metrics contain aggregation of + // values from continuous and non-overlapping intervals. + // + // The values for a DELTA metric are based only on the time interval + // associated with one measurement cycle. There is no dependency on + // previous measurements like is the case for CUMULATIVE metrics. + // + // For example, consider a system measuring the number of requests that + // it receives and reports the sum of these requests every second as a + // DELTA metric: + // + // 1. The system starts receiving at time=t_0. + // 2. A request is received, the system measures 1 request. + // 3. A request is received, the system measures 1 request. + // 4. A request is received, the system measures 1 request. + // 5. The 1 second collection cycle ends. A metric is exported for the + // number of requests received over the interval of time t_0 to + // t_0+1 with a value of 3. + // 6. A request is received, the system measures 1 request. + // 7. A request is received, the system measures 1 request. + // 8. The 1 second collection cycle ends. A metric is exported for the + // number of requests received over the interval of time t_0+1 to + // t_0+2 with a value of 2. + AGGREGATION_TEMPORALITY_DELTA = 1; + + // CUMULATIVE is an AggregationTemporality for a metric aggregator which + // reports changes since a fixed start time. This means that current values + // of a CUMULATIVE metric depend on all previous measurements since the + // start time. Because of this, the sender is required to retain this state + // in some form. If this state is lost or invalidated, the CUMULATIVE metric + // values MUST be reset and a new fixed start time following the last + // reported measurement time sent MUST be used. + // + // For example, consider a system measuring the number of requests that + // it receives and reports the sum of these requests every second as a + // CUMULATIVE metric: + // + // 1. The system starts receiving at time=t_0. + // 2. A request is received, the system measures 1 request. + // 3. A request is received, the system measures 1 request. + // 4. A request is received, the system measures 1 request. + // 5. The 1 second collection cycle ends. A metric is exported for the + // number of requests received over the interval of time t_0 to + // t_0+1 with a value of 3. + // 6. A request is received, the system measures 1 request. + // 7. A request is received, the system measures 1 request. + // 8. The 1 second collection cycle ends. A metric is exported for the + // number of requests received over the interval of time t_0 to + // t_0+2 with a value of 5. + // 9. The system experiences a fault and loses state. + // 10. The system recovers and resumes receiving at time=t_1. + // 11. A request is received, the system measures 1 request. + // 12. The 1 second collection cycle ends. A metric is exported for the + // number of requests received over the interval of time t_1 to + // t_0+1 with a value of 1. + // + // Note: Even though, when reporting changes since last report time, using + // CUMULATIVE is valid, it is not recommended. This may cause problems for + // systems that do not use start_time to determine when the aggregation + // value was reset (e.g. Prometheus). + AGGREGATION_TEMPORALITY_CUMULATIVE = 2; +} + +// DataPointFlags is defined as a protobuf 'uint32' type and is to be used as a +// bit-field representing 32 distinct boolean flags. Each flag defined in this +// enum is a bit-mask. To test the presence of a single flag in the flags of +// a data point, for example, use an expression like: +// +// (point.flags & DATA_POINT_FLAGS_NO_RECORDED_VALUE_MASK) == DATA_POINT_FLAGS_NO_RECORDED_VALUE_MASK +// +enum DataPointFlags { + // The zero value for the enum. Should not be used for comparisons. + // Instead use bitwise "and" with the appropriate mask as shown above. + DATA_POINT_FLAGS_DO_NOT_USE = 0; + + // This DataPoint is valid but has no recorded value. This value + // SHOULD be used to reflect explicitly missing data in a series, as + // for an equivalent to the Prometheus "staleness marker". + DATA_POINT_FLAGS_NO_RECORDED_VALUE_MASK = 1; + + // Bits 2-31 are reserved for future use. +} + +// NumberDataPoint is a single data point in a timeseries that describes the +// time-varying scalar value of a metric. +message NumberDataPoint { + reserved 1; + + // The set of key/value pairs that uniquely identify the timeseries from + // where this point belongs. The list may be empty (may contain 0 elements). + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + // + // The attribute values SHOULD NOT contain empty values. + // The attribute values SHOULD NOT contain bytes values. + // The attribute values SHOULD NOT contain array values different than array of string values, bool values, int values, + // double values. + // The attribute values SHOULD NOT contain kvlist values. + // The behavior of software that receives attributes containing such values can be unpredictable. + // These restrictions can change in a minor release. + // The restrictions take origin from the OpenTelemetry specification: + // https://github.com/open-telemetry/opentelemetry-specification/blob/v1.47.0/specification/common/README.md#attribute. + repeated opentelemetry.proto.common.v1.KeyValue attributes = 7; + + // StartTimeUnixNano is optional but strongly encouraged, see the + // the detailed comments above Metric. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 start_time_unix_nano = 2; + + // TimeUnixNano is required, see the detailed comments above Metric. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 time_unix_nano = 3; + + // The value itself. A point is considered invalid when one of the recognized + // value fields is not present inside this oneof. + oneof value { + double as_double = 4; + sfixed64 as_int = 6; + } + + // (Optional) List of exemplars collected from + // measurements that were used to form the data point + repeated Exemplar exemplars = 5; + + // Flags that apply to this specific data point. See DataPointFlags + // for the available flags and their meaning. + uint32 flags = 8; +} + +// HistogramDataPoint is a single data point in a timeseries that describes the +// time-varying values of a Histogram. A Histogram contains summary statistics +// for a population of values, it may optionally contain the distribution of +// those values across a set of buckets. +// +// If the histogram contains the distribution of values, then both +// "explicit_bounds" and "bucket counts" fields must be defined. +// If the histogram does not contain the distribution of values, then both +// "explicit_bounds" and "bucket_counts" must be omitted and only "count" and +// "sum" are known. +message HistogramDataPoint { + reserved 1; + + // The set of key/value pairs that uniquely identify the timeseries from + // where this point belongs. The list may be empty (may contain 0 elements). + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + // + // The attribute values SHOULD NOT contain empty values. + // The attribute values SHOULD NOT contain bytes values. + // The attribute values SHOULD NOT contain array values different than array of string values, bool values, int values, + // double values. + // The attribute values SHOULD NOT contain kvlist values. + // The behavior of software that receives attributes containing such values can be unpredictable. + // These restrictions can change in a minor release. + // The restrictions take origin from the OpenTelemetry specification: + // https://github.com/open-telemetry/opentelemetry-specification/blob/v1.47.0/specification/common/README.md#attribute. + repeated opentelemetry.proto.common.v1.KeyValue attributes = 9; + + // StartTimeUnixNano is optional but strongly encouraged, see the + // the detailed comments above Metric. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 start_time_unix_nano = 2; + + // TimeUnixNano is required, see the detailed comments above Metric. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 time_unix_nano = 3; + + // count is the number of values in the population. Must be non-negative. This + // value must be equal to the sum of the "count" fields in buckets if a + // histogram is provided. + fixed64 count = 4; + + // sum of the values in the population. If count is zero then this field + // must be zero. + // + // Note: Sum should only be filled out when measuring non-negative discrete + // events, and is assumed to be monotonic over the values of these events. + // Negative events *can* be recorded, but sum should not be filled out when + // doing so. This is specifically to enforce compatibility w/ OpenMetrics, + // see: https://github.com/prometheus/OpenMetrics/blob/v1.0.0/specification/OpenMetrics.md#histogram + optional double sum = 5; + + // bucket_counts is an optional field contains the count values of histogram + // for each bucket. + // + // The sum of the bucket_counts must equal the value in the count field. + // + // The number of elements in bucket_counts array must be by one greater than + // the number of elements in explicit_bounds array. The exception to this rule + // is when the length of bucket_counts is 0, then the length of explicit_bounds + // must also be 0. + repeated fixed64 bucket_counts = 6; + + // explicit_bounds specifies buckets with explicitly defined bounds for values. + // + // The boundaries for bucket at index i are: + // + // (-infinity, explicit_bounds[i]] for i == 0 + // (explicit_bounds[i-1], explicit_bounds[i]] for 0 < i < size(explicit_bounds) + // (explicit_bounds[i-1], +infinity) for i == size(explicit_bounds) + // + // The values in the explicit_bounds array must be strictly increasing. + // + // Histogram buckets are inclusive of their upper boundary, except the last + // bucket where the boundary is at infinity. This format is intentionally + // compatible with the OpenMetrics histogram definition. + // + // If bucket_counts length is 0 then explicit_bounds length must also be 0, + // otherwise the data point is invalid. + repeated double explicit_bounds = 7; + + // (Optional) List of exemplars collected from + // measurements that were used to form the data point + repeated Exemplar exemplars = 8; + + // Flags that apply to this specific data point. See DataPointFlags + // for the available flags and their meaning. + uint32 flags = 10; + + // min is the minimum value over (start_time, end_time]. + optional double min = 11; + + // max is the maximum value over (start_time, end_time]. + optional double max = 12; +} + +// ExponentialHistogramDataPoint is a single data point in a timeseries that describes the +// time-varying values of a ExponentialHistogram of double values. A ExponentialHistogram contains +// summary statistics for a population of values, it may optionally contain the +// distribution of those values across a set of buckets. +// +message ExponentialHistogramDataPoint { + // The set of key/value pairs that uniquely identify the timeseries from + // where this point belongs. The list may be empty (may contain 0 elements). + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + // + // The attribute values SHOULD NOT contain empty values. + // The attribute values SHOULD NOT contain bytes values. + // The attribute values SHOULD NOT contain array values different than array of string values, bool values, int values, + // double values. + // The attribute values SHOULD NOT contain kvlist values. + // The behavior of software that receives attributes containing such values can be unpredictable. + // These restrictions can change in a minor release. + // The restrictions take origin from the OpenTelemetry specification: + // https://github.com/open-telemetry/opentelemetry-specification/blob/v1.47.0/specification/common/README.md#attribute. + repeated opentelemetry.proto.common.v1.KeyValue attributes = 1; + + // StartTimeUnixNano is optional but strongly encouraged, see the + // the detailed comments above Metric. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 start_time_unix_nano = 2; + + // TimeUnixNano is required, see the detailed comments above Metric. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 time_unix_nano = 3; + + // The number of values in the population. Must be + // non-negative. This value must be equal to the sum of the "bucket_counts" + // values in the positive and negative Buckets plus the "zero_count" field. + fixed64 count = 4; + + // The sum of the values in the population. If count is zero then this field + // must be zero. + // + // Note: Sum should only be filled out when measuring non-negative discrete + // events, and is assumed to be monotonic over the values of these events. + // Negative events *can* be recorded, but sum should not be filled out when + // doing so. This is specifically to enforce compatibility w/ OpenMetrics, + // see: https://github.com/prometheus/OpenMetrics/blob/v1.0.0/specification/OpenMetrics.md#histogram + optional double sum = 5; + + // scale describes the resolution of the histogram. Boundaries are + // located at powers of the base, where: + // + // base = (2^(2^-scale)) + // + // The histogram bucket identified by `index`, a signed integer, + // contains values that are greater than (base^index) and + // less than or equal to (base^(index+1)). + // + // The positive and negative ranges of the histogram are expressed + // separately. Negative values are mapped by their absolute value + // into the negative range using the same scale as the positive range. + // + // scale is not restricted by the protocol, as the permissible + // values depend on the range of the data. + sint32 scale = 6; + + // The count of values that are either exactly zero or + // within the region considered zero by the instrumentation at the + // tolerated degree of precision. This bucket stores values that + // cannot be expressed using the standard exponential formula as + // well as values that have been rounded to zero. + // + // Implementations MAY consider the zero bucket to have probability + // mass equal to (zero_count / count). + fixed64 zero_count = 7; + + // positive carries the positive range of exponential bucket counts. + Buckets positive = 8; + + // negative carries the negative range of exponential bucket counts. + Buckets negative = 9; + + // Buckets are a set of bucket counts, encoded in a contiguous array + // of counts. + message Buckets { + // The bucket index of the first entry in the bucket_counts array. + // + // Note: This uses a varint encoding as a simple form of compression. + sint32 offset = 1; + + // An array of count values, where bucket_counts[i] carries + // the count of the bucket at index (offset+i). bucket_counts[i] is the count + // of values greater than base^(offset+i) and less than or equal to + // base^(offset+i+1). + // + // Note: By contrast, the explicit HistogramDataPoint uses + // fixed64. This field is expected to have many buckets, + // especially zeros, so uint64 has been selected to ensure + // varint encoding. + repeated uint64 bucket_counts = 2; + } + + // Flags that apply to this specific data point. See DataPointFlags + // for the available flags and their meaning. + uint32 flags = 10; + + // (Optional) List of exemplars collected from + // measurements that were used to form the data point + repeated Exemplar exemplars = 11; + + // The minimum value over (start_time, end_time]. + optional double min = 12; + + // The maximum value over (start_time, end_time]. + optional double max = 13; + + // ZeroThreshold may be optionally set to convey the width of the zero + // region. Where the zero region is defined as the closed interval + // [-ZeroThreshold, ZeroThreshold]. + // When ZeroThreshold is 0, zero count bucket stores values that cannot be + // expressed using the standard exponential formula as well as values that + // have been rounded to zero. + double zero_threshold = 14; +} + +// SummaryDataPoint is a single data point in a timeseries that describes the +// time-varying values of a Summary metric. The count and sum fields represent +// cumulative values. +message SummaryDataPoint { + reserved 1; + + // The set of key/value pairs that uniquely identify the timeseries from + // where this point belongs. The list may be empty (may contain 0 elements). + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + // + // The attribute values SHOULD NOT contain empty values. + // The attribute values SHOULD NOT contain bytes values. + // The attribute values SHOULD NOT contain array values different than array of string values, bool values, int values, + // double values. + // The attribute values SHOULD NOT contain kvlist values. + // The behavior of software that receives attributes containing such values can be unpredictable. + // These restrictions can change in a minor release. + // The restrictions take origin from the OpenTelemetry specification: + // https://github.com/open-telemetry/opentelemetry-specification/blob/v1.47.0/specification/common/README.md#attribute. + repeated opentelemetry.proto.common.v1.KeyValue attributes = 7; + + // StartTimeUnixNano is optional but strongly encouraged, see the + // the detailed comments above Metric. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 start_time_unix_nano = 2; + + // TimeUnixNano is required, see the detailed comments above Metric. + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 time_unix_nano = 3; + + // count is the number of values in the population. Must be non-negative. + fixed64 count = 4; + + // sum of the values in the population. If count is zero then this field + // must be zero. + // + // Note: Sum should only be filled out when measuring non-negative discrete + // events, and is assumed to be monotonic over the values of these events. + // Negative events *can* be recorded, but sum should not be filled out when + // doing so. This is specifically to enforce compatibility w/ OpenMetrics, + // see: https://github.com/prometheus/OpenMetrics/blob/v1.0.0/specification/OpenMetrics.md#summary + double sum = 5; + + // Represents the value at a given quantile of a distribution. + // + // To record Min and Max values following conventions are used: + // - The 1.0 quantile is equivalent to the maximum value observed. + // - The 0.0 quantile is equivalent to the minimum value observed. + // + // See the following issue for more context: + // https://github.com/open-telemetry/opentelemetry-proto/issues/125 + message ValueAtQuantile { + // The quantile of a distribution. Must be in the interval + // [0.0, 1.0]. + double quantile = 1; + + // The value at the given quantile of a distribution. + // + // Quantile values must NOT be negative. + double value = 2; + } + + // (Optional) list of values at different quantiles of the distribution calculated + // from the current snapshot. The quantiles must be strictly increasing. + repeated ValueAtQuantile quantile_values = 6; + + // Flags that apply to this specific data point. See DataPointFlags + // for the available flags and their meaning. + uint32 flags = 8; +} + +// A representation of an exemplar, which is a sample input measurement. +// Exemplars also hold information about the environment when the measurement +// was recorded, for example the span and trace ID of the active span when the +// exemplar was recorded. +message Exemplar { + reserved 1; + + // The set of key/value pairs that were filtered out by the aggregator, but + // recorded alongside the original measurement. Only key/value pairs that were + // filtered out by the aggregator should be included + repeated opentelemetry.proto.common.v1.KeyValue filtered_attributes = 7; + + // time_unix_nano is the exact time when this exemplar was recorded + // + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + // 1970. + fixed64 time_unix_nano = 2; + + // The value of the measurement that was recorded. An exemplar is + // considered invalid when one of the recognized value fields is not present + // inside this oneof. + oneof value { + double as_double = 3; + sfixed64 as_int = 6; + } + + // (Optional) Span ID of the exemplar trace. + // span_id may be missing if the measurement is not recorded inside a trace + // or if the trace is not sampled. + bytes span_id = 4; + + // (Optional) Trace ID of the exemplar trace. + // trace_id may be missing if the measurement is not recorded inside a trace + // or if the trace is not sampled. + bytes trace_id = 5; +} + +// clang-format on +#endif + +namespace zen::otel { + +// Metrics + +enum class MetricsData : protozero::pbf_tag_type +{ + repeated_ResourceMetrics_resource_metrics = 1 +}; + +enum class ResourceMetrics : protozero::pbf_tag_type +{ + Resource_resource = 1, + repeated_ScopeMetrics_scope_metrics = 2, + string_schema_url = 3 +}; + +// A collection of Metrics produced by an Scope. +enum class ScopeMetrics : protozero::pbf_tag_type +{ + InstrumentationScope_scope = 1, + repeated_Metric_metrics = 2, + string_schema_url = 3 +}; + +enum class Metric : protozero::pbf_tag_type +{ + string_name = 1, + string_description = 2, + string_unit = 3, + + oneof_data_Gauge_gauge = 5, + oneof_data_Sum_sum = 7, + oneof_data_Histogram_histogram = 9, + oneof_data_ExponentialHistogram_exponential_histogram = 10, + oneof_data_Summary_summary = 11, + + repeated_KeyValue_metadata = 12 +}; + +enum class Gauge : protozero::pbf_tag_type +{ + repeated_NumberDataPoint_data_points = 1 +}; + +enum class Sum : protozero::pbf_tag_type +{ + repeated_NumberDataPoint_data_points = 1, + AggregationTemporality_aggregation_temporality = 2, + bool_is_monotonic = 3 +}; + +enum class Histogram : protozero::pbf_tag_type +{ + repeated_HistogramDataPoint_data_points = 1, + AggregationTemporality_aggregation_temporality = 2 +}; + +enum class ExponentialHistogram : protozero::pbf_tag_type +{ + repeated_ExponentialHistogramDataPoint_data_points = 1, + AggregationTemporality_aggregation_temporality = 2 +}; + +enum class Summary : protozero::pbf_tag_type +{ + repeated_SummaryDataPoint_data_points = 1 +}; + +enum AggregationTemporality +{ + AGGREGATION_TEMPORALITY_UNSPECIFIED = 0, + AGGREGATION_TEMPORALITY_DELTA = 1, + AGGREGATION_TEMPORALITY_CUMULATIVE = 2 +}; + +enum DataPointFlags +{ + DATA_POINT_FLAGS_DO_NOT_USE = 0, + DATA_POINT_FLAGS_NO_RECORDED_VALUE_MASK = 1, +}; + +enum class NumberDataPoint : protozero::pbf_tag_type +{ + repeated_KeyValue_attributes = 7, + fixed64_start_time_unix_nano = 2, + fixed64_time_unix_nano = 3, + + oneof_value_double_as_double = 4, + oneof_value_sfixed64_as_int = 6, + + repeated_Exemplar_exemplars = 5, + uint32_flags = 8 +}; + +enum class HistogramDataPoint : protozero::pbf_tag_type +{ + repeated_KeyValue_attributes = 9, + fixed64_start_time_unix_nano = 2, + fixed64_time_unix_nano = 3, + fixed64_count = 4, + optional_double_sum = 5, + repeated_fixed64_bucket_counts = 6, + repeated_double_explicit_bounds = 7, + repeated_Exemplar_exemplars = 8, + uint32_flags = 10, + optional_double_min = 11, + optional_double_max = 12 +}; + +enum class Buckets : protozero::pbf_tag_type +{ + sint32_offset = 1, + repeated_uint64_bucket_counts = 2 +}; + +enum class ExponentialHistogramDataPoint : protozero::pbf_tag_type +{ + repeated_KeyValue_attributes = 1, + fixed64_start_time_unix_nano = 2, + fixed64_time_unix_nano = 3, + fixed64_count = 4, + optional_double_sum = 5, + sint32_scale = 6, + fixed64_zero_count = 7, + Buckets_positive = 8, + Buckets_negative = 9, + uint32_flags = 10, + repeated_Exemplar_exemplars = 11, + optional_double_min = 12, + optional_double_max = 13, + double_zero_threshold = 14 +}; + +enum class ValueAtQuantile : protozero::pbf_tag_type +{ + double_quantile = 1, + double_value = 2 +}; + +enum class SummaryDataPoint : protozero::pbf_tag_type +{ + repeated_KeyValue_attributes = 7, + fixed64_start_time_unix_nano = 2, + fixed64_time_unix_nano = 3, + fixed64_count = 4, + double_sum = 5, + repeated_ValueAtQuantile_quantile_values = 6, + uint32_flags = 8 +}; + +enum class Exemplar : protozero::pbf_tag_type +{ + repeated_KeyValue_filtered_attributes = 7, + fixed64_time_unix_nano = 2, + oneof_value_double_as_double = 3, + oneof_value_sfixed64_as_int = 6, + bytes_span_id = 4, + bytes_trace_id = 5 +}; + +} // namespace zen::otel
\ No newline at end of file diff --git a/src/zentelemetry/otelprotozero.h b/src/zentelemetry/otelprotozero.h new file mode 100644 index 000000000..8ad69e766 --- /dev/null +++ b/src/zentelemetry/otelprotozero.h @@ -0,0 +1,166 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <protozero/types.hpp> + +////////////////////////////////////////////////////////////////////////// +// +// OTEL .proto definitions, for reference +// + +#if 0 + +// clang-format off + +//////////////////////////////////////////////////////////////////////// +// resource/v1/resource.proto +// + +// Resource information. +message Resource { + // Set of attributes that describe the resource. + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + // + // The attribute values SHOULD NOT contain empty values. + // The attribute values SHOULD NOT contain bytes values. + // The attribute values SHOULD NOT contain array values different than array of string values, bool values, int values, + // double values. + // The attribute values SHOULD NOT contain kvlist values. + // The behavior of software that receives attributes containing such values can be unpredictable. + // These restrictions can change in a minor release. + // The restrictions take origin from the OpenTelemetry specification: + // https://github.com/open-telemetry/opentelemetry-specification/blob/v1.47.0/specification/common/README.md#attribute. + repeated opentelemetry.proto.common.v1.KeyValue attributes = 1; + + // The number of dropped attributes. If the value is 0, then + // no attributes were dropped. + uint32 dropped_attributes_count = 2; + + // Set of entities that participate in this Resource. + // + // Note: keys in the references MUST exist in attributes of this message. + // + // Status: [Development] + repeated opentelemetry.proto.common.v1.EntityRef entity_refs = 3; +} + +///////////////////////////////////////////////////////////////////////// +// common/v1/commmon.proto +// + +// Represents any type of attribute value. AnyValue may contain a +// primitive value such as a string or integer or it may contain an arbitrary nested +// object containing arrays, key-value lists and primitives. +message AnyValue { + // The value is one of the listed fields. It is valid for all values to be unspecified + // in which case this AnyValue is considered to be "empty". + oneof value { + string string_value = 1; + bool bool_value = 2; + int64 int_value = 3; + double double_value = 4; + ArrayValue array_value = 5; + KeyValueList kvlist_value = 6; + bytes bytes_value = 7; + } +} + +// ArrayValue is a list of AnyValue messages. We need ArrayValue as a message +// since oneof in AnyValue does not allow repeated fields. +message ArrayValue { + // Array of values. The array may be empty (contain 0 elements). + repeated AnyValue values = 1; +} + +// KeyValueList is a list of KeyValue messages. We need KeyValueList as a message +// since `oneof` in AnyValue does not allow repeated fields. Everywhere else where we need +// a list of KeyValue messages (e.g. in Span) we use `repeated KeyValue` directly to +// avoid unnecessary extra wrapping (which slows down the protocol). The 2 approaches +// are semantically equivalent. +message KeyValueList { + // A collection of key/value pairs of key-value pairs. The list may be empty (may + // contain 0 elements). + // The keys MUST be unique (it is not allowed to have more than one + // value with the same key). + repeated KeyValue values = 1; +} + +// Represents a key-value pair that is used to store Span attributes, Link +// attributes, etc. +message KeyValue { + // The key name of the pair. + string key = 1; + + // The value of the pair. + AnyValue value = 2; +} + +// InstrumentationScope is a message representing the instrumentation scope information +// such as the fully qualified name and version. +message InstrumentationScope { + // A name denoting the Instrumentation scope. + // An empty instrumentation scope name means the name is unknown. + string name = 1; + + // Defines the version of the instrumentation scope. + // An empty instrumentation scope version means the version is unknown. + string version = 2; + + // Additional attributes that describe the scope. [Optional]. + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + repeated KeyValue attributes = 3; + + // The number of attributes that were discarded. Attributes + // can be discarded because their keys are too long or because there are too many + // attributes. If this value is 0, then no attributes were dropped. + uint32 dropped_attributes_count = 4; +} + +// clang-format on +#endif + +////////////////////////////////////////////////////////////////////////// + +namespace zen::otel { + +enum class KeyValueList : protozero::pbf_tag_type +{ + repeated_KeyValue_values = 1 +}; + +enum class KeyValue : protozero::pbf_tag_type +{ + string_key = 1, + AnyValue_value = 2 +}; + +enum class Resource : protozero::pbf_tag_type +{ + repeated_KeyValue_attributes = 1, + uint32_dropped_attributes_count = 2, + repeated_EntityRef_entity_refs = 3 +}; + +enum class AnyValue : protozero::pbf_tag_type +{ + string_string_value = 1, + bool_bool_value = 2, + int64_int_value = 3, + double_double_value = 4, + ArrayValue_array_value = 5, + KeyValueList_kvlist_value = 6, + bytes_bytes_value = 7 +}; + +enum class InstrumentationScope : protozero::pbf_tag_type +{ + string_name = 1, + string_version = 2, + repeated_KeyValue_attributes = 3, + uint32_dropped_attributes_count = 4 +}; + +} // namespace zen::otel diff --git a/src/zentelemetry/oteltraceprotozero.h b/src/zentelemetry/oteltraceprotozero.h new file mode 100644 index 000000000..8b80c8f7f --- /dev/null +++ b/src/zentelemetry/oteltraceprotozero.h @@ -0,0 +1,474 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include "otelprotozero.h" + +////////////////////////////////////////////////////////////////////////// +// +// OTEL .proto definitions, for reference +// + +#if 0 + +// clang-format off + +///////////////////////////////////////////////////////////////////////// +// trace/v1/trace.proto +// + +// TracesData represents the traces data that can be stored in a persistent storage, +// OR can be embedded by other protocols that transfer OTLP traces data but do +// not implement the OTLP protocol. +// +// The main difference between this message and collector protocol is that +// in this message there will not be any "control" or "metadata" specific to +// OTLP protocol. +// +// When new fields are added into this message, the OTLP request MUST be updated +// as well. +message TracesData { + // An array of ResourceSpans. + // For data coming from a single resource this array will typically contain + // one element. Intermediary nodes that receive data from multiple origins + // typically batch the data before forwarding further and in that case this + // array will contain multiple elements. + repeated ResourceSpans resource_spans = 1; +} + +// A collection of ScopeSpans from a Resource. +message ResourceSpans { + reserved 1000; + + // The resource for the spans in this message. + // If this field is not set then no resource info is known. + opentelemetry.proto.resource.v1.Resource resource = 1; + + // A list of ScopeSpans that originate from a resource. + repeated ScopeSpans scope_spans = 2; + + // The Schema URL, if known. This is the identifier of the Schema that the resource data + // is recorded in. Notably, the last part of the URL path is the version number of the + // schema: http[s]://server[:port]/path/<version>. To learn more about Schema URL see + // https://opentelemetry.io/docs/specs/otel/schemas/#schema-url + // This schema_url applies to the data in the "resource" field. It does not apply + // to the data in the "scope_spans" field which have their own schema_url field. + string schema_url = 3; +} + +// A collection of Spans produced by an InstrumentationScope. +message ScopeSpans { + // The instrumentation scope information for the spans in this message. + // Semantically when InstrumentationScope isn't set, it is equivalent with + // an empty instrumentation scope name (unknown). + opentelemetry.proto.common.v1.InstrumentationScope scope = 1; + + // A list of Spans that originate from an instrumentation scope. + repeated Span spans = 2; + + // The Schema URL, if known. This is the identifier of the Schema that the span data + // is recorded in. Notably, the last part of the URL path is the version number of the + // schema: http[s]://server[:port]/path/<version>. To learn more about Schema URL see + // https://opentelemetry.io/docs/specs/otel/schemas/#schema-url + // This schema_url applies to all spans and span events in the "spans" field. + string schema_url = 3; +} + +// A Span represents a single operation performed by a single component of the system. +// +// The next available field id is 17. +message Span { + // A unique identifier for a trace. All spans from the same trace share + // the same `trace_id`. The ID is a 16-byte array. An ID with all zeroes OR + // of length other than 16 bytes is considered invalid (empty string in OTLP/JSON + // is zero-length and thus is also invalid). + // + // This field is required. + bytes trace_id = 1; + + // A unique identifier for a span within a trace, assigned when the span + // is created. The ID is an 8-byte array. An ID with all zeroes OR of length + // other than 8 bytes is considered invalid (empty string in OTLP/JSON + // is zero-length and thus is also invalid). + // + // This field is required. + bytes span_id = 2; + + // trace_state conveys information about request position in multiple distributed tracing graphs. + // It is a trace_state in w3c-trace-context format: https://www.w3.org/TR/trace-context/#tracestate-header + // See also https://github.com/w3c/distributed-tracing for more details about this field. + string trace_state = 3; + + // The `span_id` of this span's parent span. If this is a root span, then this + // field must be empty. The ID is an 8-byte array. + bytes parent_span_id = 4; + + // Flags, a bit field. + // + // Bits 0-7 (8 least significant bits) are the trace flags as defined in W3C Trace + // Context specification. To read the 8-bit W3C trace flag, use + // `flags & SPAN_FLAGS_TRACE_FLAGS_MASK`. + // + // See https://www.w3.org/TR/trace-context-2/#trace-flags for the flag definitions. + // + // Bits 8 and 9 represent the 3 states of whether a span's parent + // is remote. The states are (unknown, is not remote, is remote). + // To read whether the value is known, use `(flags & SPAN_FLAGS_CONTEXT_HAS_IS_REMOTE_MASK) != 0`. + // To read whether the span is remote, use `(flags & SPAN_FLAGS_CONTEXT_IS_REMOTE_MASK) != 0`. + // + // When creating span messages, if the message is logically forwarded from another source + // with an equivalent flags fields (i.e., usually another OTLP span message), the field SHOULD + // be copied as-is. If creating from a source that does not have an equivalent flags field + // (such as a runtime representation of an OpenTelemetry span), the high 22 bits MUST + // be set to zero. + // Readers MUST NOT assume that bits 10-31 (22 most significant bits) will be zero. + // + // [Optional]. + fixed32 flags = 16; + + // A description of the span's operation. + // + // For example, the name can be a qualified method name or a file name + // and a line number where the operation is called. A best practice is to use + // the same display name at the same call point in an application. + // This makes it easier to correlate spans in different traces. + // + // This field is semantically required to be set to non-empty string. + // Empty value is equivalent to an unknown span name. + // + // This field is required. + string name = 5; + + // SpanKind is the type of span. Can be used to specify additional relationships between spans + // in addition to a parent/child relationship. + enum SpanKind { + // Unspecified. Do NOT use as default. + // Implementations MAY assume SpanKind to be INTERNAL when receiving UNSPECIFIED. + SPAN_KIND_UNSPECIFIED = 0; + + // Indicates that the span represents an internal operation within an application, + // as opposed to an operation happening at the boundaries. Default value. + SPAN_KIND_INTERNAL = 1; + + // Indicates that the span covers server-side handling of an RPC or other + // remote network request. + SPAN_KIND_SERVER = 2; + + // Indicates that the span describes a request to some remote service. + SPAN_KIND_CLIENT = 3; + + // Indicates that the span describes a producer sending a message to a broker. + // Unlike CLIENT and SERVER, there is often no direct critical path latency relationship + // between producer and consumer spans. A PRODUCER span ends when the message was accepted + // by the broker while the logical processing of the message might span a much longer time. + SPAN_KIND_PRODUCER = 4; + + // Indicates that the span describes consumer receiving a message from a broker. + // Like the PRODUCER kind, there is often no direct critical path latency relationship + // between producer and consumer spans. + SPAN_KIND_CONSUMER = 5; + } + + // Distinguishes between spans generated in a particular context. For example, + // two spans with the same name may be distinguished using `CLIENT` (caller) + // and `SERVER` (callee) to identify queueing latency associated with the span. + SpanKind kind = 6; + + // The start time of the span. On the client side, this is the time + // kept by the local machine where the span execution starts. On the server side, this + // is the time when the server's application handler starts running. + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January 1970. + // + // This field is semantically required and it is expected that end_time >= start_time. + fixed64 start_time_unix_nano = 7; + + // The end time of the span. On the client side, this is the time + // kept by the local machine where the span execution ends. On the server side, this + // is the time when the server application handler stops running. + // Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January 1970. + // + // This field is semantically required and it is expected that end_time >= start_time. + fixed64 end_time_unix_nano = 8; + + // A collection of key/value pairs. Note, global attributes + // like server name can be set using the resource API. Examples of attributes: + // + // "/http/user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36" + // "/http/server_latency": 300 + // "example.com/myattribute": true + // "example.com/score": 10.239 + // + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + // + // The attribute values SHOULD NOT contain empty values. + // The attribute values SHOULD NOT contain bytes values. + // The attribute values SHOULD NOT contain array values different than array of string values, bool values, int values, + // double values. + // The attribute values SHOULD NOT contain kvlist values. + // The behavior of software that receives attributes containing such values can be unpredictable. + // These restrictions can change in a minor release. + // The restrictions take origin from the OpenTelemetry specification: + // https://github.com/open-telemetry/opentelemetry-specification/blob/v1.47.0/specification/common/README.md#attribute. + repeated opentelemetry.proto.common.v1.KeyValue attributes = 9; + + // The number of attributes that were discarded. Attributes + // can be discarded because their keys are too long or because there are too many + // attributes. If this value is 0, then no attributes were dropped. + uint32 dropped_attributes_count = 10; + + // Event is a time-stamped annotation of the span, consisting of user-supplied + // text description and key-value pairs. + message Event { + // The time the event occurred. + fixed64 time_unix_nano = 1; + + // The name of the event. + // This field is semantically required to be set to non-empty string. + string name = 2; + + // A collection of attribute key/value pairs on the event. + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + // + // The attribute values SHOULD NOT contain empty values. + // The attribute values SHOULD NOT contain bytes values. + // The attribute values SHOULD NOT contain array values different than array of string values, bool values, int values, + // double values. + // The attribute values SHOULD NOT contain kvlist values. + // The behavior of software that receives attributes containing such values can be unpredictable. + // These restrictions can change in a minor release. + // The restrictions take origin from the OpenTelemetry specification: + // https://github.com/open-telemetry/opentelemetry-specification/blob/v1.47.0/specification/common/README.md#attribute. + repeated opentelemetry.proto.common.v1.KeyValue attributes = 3; + + // The number of dropped attributes. If the value is 0, + // then no attributes were dropped. + uint32 dropped_attributes_count = 4; + } + + // A collection of Event items. + repeated Event events = 11; + + // The number of dropped events. If the value is 0, then no + // events were dropped. + uint32 dropped_events_count = 12; + + // A pointer from the current span to another span in the same trace or in a + // different trace. For example, this can be used in batching operations, + // where a single batch handler processes multiple requests from different + // traces or when the handler receives a request from a different project. + message Link { + // A unique identifier of a trace that this linked span is part of. The ID is a + // 16-byte array. + bytes trace_id = 1; + + // A unique identifier for the linked span. The ID is an 8-byte array. + bytes span_id = 2; + + // The trace_state associated with the link. + string trace_state = 3; + + // A collection of attribute key/value pairs on the link. + // Attribute keys MUST be unique (it is not allowed to have more than one + // attribute with the same key). + // + // The attribute values SHOULD NOT contain empty values. + // The attribute values SHOULD NOT contain bytes values. + // The attribute values SHOULD NOT contain array values different than array of string values, bool values, int values, + // double values. + // The attribute values SHOULD NOT contain kvlist values. + // The behavior of software that receives attributes containing such values can be unpredictable. + // These restrictions can change in a minor release. + // The restrictions take origin from the OpenTelemetry specification: + // https://github.com/open-telemetry/opentelemetry-specification/blob/v1.47.0/specification/common/README.md#attribute. + repeated opentelemetry.proto.common.v1.KeyValue attributes = 4; + + // The number of dropped attributes. If the value is 0, + // then no attributes were dropped. + uint32 dropped_attributes_count = 5; + + // Flags, a bit field. + // + // Bits 0-7 (8 least significant bits) are the trace flags as defined in W3C Trace + // Context specification. To read the 8-bit W3C trace flag, use + // `flags & SPAN_FLAGS_TRACE_FLAGS_MASK`. + // + // See https://www.w3.org/TR/trace-context-2/#trace-flags for the flag definitions. + // + // Bits 8 and 9 represent the 3 states of whether the link is remote. + // The states are (unknown, is not remote, is remote). + // To read whether the value is known, use `(flags & SPAN_FLAGS_CONTEXT_HAS_IS_REMOTE_MASK) != 0`. + // To read whether the link is remote, use `(flags & SPAN_FLAGS_CONTEXT_IS_REMOTE_MASK) != 0`. + // + // Readers MUST NOT assume that bits 10-31 (22 most significant bits) will be zero. + // When creating new spans, bits 10-31 (most-significant 22-bits) MUST be zero. + // + // [Optional]. + fixed32 flags = 6; + } + + // A collection of Links, which are references from this span to a span + // in the same or different trace. + repeated Link links = 13; + + // The number of dropped links after the maximum size was + // enforced. If this value is 0, then no links were dropped. + uint32 dropped_links_count = 14; + + // An optional final status for this span. Semantically when Status isn't set, it means + // span's status code is unset, i.e. assume STATUS_CODE_UNSET (code = 0). + Status status = 15; +} + +// The Status type defines a logical error model that is suitable for different +// programming environments, including REST APIs and RPC APIs. +message Status { + reserved 1; + + // A developer-facing human readable error message. + string message = 2; + + // For the semantics of status codes see + // https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/api.md#set-status + enum StatusCode { + // The default status. + STATUS_CODE_UNSET = 0; + // The Span has been validated by an Application developer or Operator to + // have completed successfully. + STATUS_CODE_OK = 1; + // The Span contains an error. + STATUS_CODE_ERROR = 2; + }; + + // The status code. + StatusCode code = 3; +} + +// SpanFlags represents constants used to interpret the +// Span.flags field, which is protobuf 'fixed32' type and is to +// be used as bit-fields. Each non-zero value defined in this enum is +// a bit-mask. To extract the bit-field, for example, use an +// expression like: +// +// (span.flags & SPAN_FLAGS_TRACE_FLAGS_MASK) +// +// See https://www.w3.org/TR/trace-context-2/#trace-flags for the flag definitions. +// +// Note that Span flags were introduced in version 1.1 of the +// OpenTelemetry protocol. Older Span producers do not set this +// field, consequently consumers should not rely on the absence of a +// particular flag bit to indicate the presence of a particular feature. +enum SpanFlags { + // The zero value for the enum. Should not be used for comparisons. + // Instead use bitwise "and" with the appropriate mask as shown above. + SPAN_FLAGS_DO_NOT_USE = 0; + + // Bits 0-7 are used for trace flags. + SPAN_FLAGS_TRACE_FLAGS_MASK = 0x000000FF; + + // Bits 8 and 9 are used to indicate that the parent span or link span is remote. + // Bit 8 (`HAS_IS_REMOTE`) indicates whether the value is known. + // Bit 9 (`IS_REMOTE`) indicates whether the span or link is remote. + SPAN_FLAGS_CONTEXT_HAS_IS_REMOTE_MASK = 0x00000100; + SPAN_FLAGS_CONTEXT_IS_REMOTE_MASK = 0x00000200; + + // Bits 10-31 are reserved for future use. +} + +// clang-format on +#endif + +namespace zen::otel::pbf { + +// Traces + +enum class TracesData : protozero::pbf_tag_type +{ + repeated_ResourceSpans_resource_spans = 1 +}; + +enum class ResourceSpans : protozero::pbf_tag_type +{ + Resource_resource = 1, + repeated_ScopeSpans_scope_spans = 2, + string_schema_url = 3 +}; + +enum class ScopeSpans : protozero::pbf_tag_type +{ + InstrumentationScope_scope = 1, + repeated_Span_spans = 2, + string_schema_url = 3 +}; + +enum SpanFlags +{ + SPAN_FLAGS_DO_NOT_USE = 0, + SPAN_FLAGS_TRACE_FLAGS_MASK = 0x000000FF, + SPAN_FLAGS_CONTEXT_HAS_IS_REMOTE_MASK = 0x00000100, + SPAN_FLAGS_CONTEXT_IS_REMOTE_MASK = 0x00000200, +}; + +enum SpanKind +{ + SPAN_KIND_UNSPECIFIED = 0, + SPAN_KIND_INTERNAL = 1, + SPAN_KIND_SERVER = 2, + SPAN_KIND_CLIENT = 3, + SPAN_KIND_PRODUCER = 4, + SPAN_KIND_CONSUMER = 5 +}; + +enum class Span_Event : protozero::pbf_tag_type +{ + fixed64_time_unix_nano = 1, + string_name = 2, + repeated_KeyValue_attributes = 3, + uint32_dropped_attributes_count = 4 +}; + +enum class Span_Link : protozero::pbf_tag_type +{ + bytes_trace_id = 1, + bytes_span_id = 2, + string_trace_state = 3, + repeated_KeyValue_attributes = 4, + uint32_dropped_attributes_count = 5, + fixed32_flags = 6 +}; + +enum class Span : protozero::pbf_tag_type +{ + required_bytes_trace_id = 1, + required_bytes_span_id = 2, + string_trace_state = 3, + bytes_parent_span_id = 4, + fixed32_flags = 16, + required_string_name = 5, + SpanKind_kind = 6, + required_fixed64_start_time_unix_nano = 7, + required_fixed64_end_time_unix_nano = 8, + repeated_KeyValue_attributes = 9, + uint32_dropped_attributes_count = 10, + repeated_Event_events = 11, + uint32_dropped_events_count = 12, + repeated_Link_links = 13, + uint32_dropped_links_count = 14, + Status_status = 15 +}; + +enum StatusCode +{ + STATUS_CODE_UNSET = 0, + STATUS_CODE_OK = 1, + STATUS_CODE_ERROR = 2 +}; + +enum class Status : protozero::pbf_tag_type +{ + string_message = 2, + StatusCode_code = 3 +}; + +} // namespace zen::otel::pbf diff --git a/src/zentelemetry/otlpencoder.cpp b/src/zentelemetry/otlpencoder.cpp new file mode 100644 index 000000000..677545066 --- /dev/null +++ b/src/zentelemetry/otlpencoder.cpp @@ -0,0 +1,478 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "zentelemetry/otlpencoder.h" + +#include <zenbase/zenbase.h> +#include <zentelemetry/otlptrace.h> + +#include <spdlog/sinks/sink.h> +#include <zencore/testing.h> + +#include <protozero/buffer_string.hpp> +#include <protozero/pbf_builder.hpp> + +#include "otellogprotozero.h" +#include "otelmetricsprotozero.h" +#include "otelprotozero.h" +#include "oteltraceprotozero.h" + +#if ZEN_WITH_OTEL + +namespace zen { + +OtlpEncoder::OtlpEncoder() +{ +} + +OtlpEncoder::~OtlpEncoder() +{ +} + +static int +MapSeverity(const spdlog::level::level_enum Level) +{ + switch (Level) + { + case spdlog::level::critical: + return otel::SEVERITY_NUMBER_FATAL; + case spdlog::level::err: + return otel::SEVERITY_NUMBER_ERROR; + case spdlog::level::warn: + return otel::SEVERITY_NUMBER_WARN; + case spdlog::level::info: + return otel::SEVERITY_NUMBER_INFO; + case spdlog::level::debug: + return otel::SEVERITY_NUMBER_DEBUG; + default: + case spdlog::level::trace: + return otel::SEVERITY_NUMBER_TRACE; + } +} + +static const char* +MapSeverityText(const spdlog::level::level_enum Level) +{ + switch (Level) + { + case spdlog::level::critical: + return "fatal"; + case spdlog::level::err: + return "error"; + case spdlog::level::warn: + return "warn"; + case spdlog::level::info: + return "info"; + case spdlog::level::debug: + return "debug"; + default: + case spdlog::level::trace: + return "trace"; + } +} + +std::string +OtlpEncoder::FormatOtelProtobuf(const spdlog::details::log_msg& Msg) const +{ + std::string Data; + + // LogsData + { + protozero::pbf_builder<otel::LogsData> Builder{Data}; + + // ResourceLogs + { + protozero::pbf_builder<otel::ResourceLogs> RlBuilder{Builder, otel::LogsData::required_repeated_ResourceLogs_resource_logs}; + + // ResourceLogs / Resource + { + protozero::pbf_builder<otel::Resource> Res{RlBuilder, otel::ResourceLogs::optional_Resource_resource}; + + AppendResourceAttributes(Res); + } + + // ScopeLogs scope_logs + { + protozero::pbf_builder<otel::ScopeLogs> SlBuilder{RlBuilder, otel::ResourceLogs::required_repeated_ScopeLogs_scope_logs}; + + { + protozero::pbf_builder<otel::InstrumentationScope> IsBuilder{SlBuilder, + otel::ScopeLogs::required_InstrumentationScope_scope}; + + IsBuilder.add_string(otel::InstrumentationScope::string_name, Msg.logger_name.data(), Msg.logger_name.size()); + } + + // LogRecord log_records + { + protozero::pbf_builder<otel::LogRecord> LrBuilder{SlBuilder, otel::ScopeLogs::required_repeated_LogRecord_log_records}; + + LrBuilder.add_fixed64(otel::LogRecord::required_fixed64_time_unix_nano, + std::chrono::duration_cast<std::chrono::nanoseconds>(Msg.time.time_since_epoch()).count()); + + const int Severity = MapSeverity(Msg.level); + + LrBuilder.add_enum(otel::LogRecord::optional_SeverityNumber_severity_number, Severity); + + LrBuilder.add_string(otel::LogRecord::optional_string_severity_text, MapSeverityText(Msg.level)); + + otel::TraceId TraceId; + const otel::SpanId SpanId = otel::Span::GetCurrentSpanId(TraceId); + + if (SpanId && TraceId) + { + LrBuilder.add_bytes(otel::LogRecord::optional_bytes_trace_id, TraceId.GetData(), TraceId.kSize); + LrBuilder.add_bytes(otel::LogRecord::optional_bytes_span_id, SpanId.GetData(), SpanId.kSize); + } + + // body + { + protozero::pbf_builder<otel::AnyValue> BodyBuilder{LrBuilder, otel::LogRecord::optional_anyvalue_body}; + + BodyBuilder.add_string(otel::AnyValue::string_string_value, Msg.payload.data(), Msg.payload.size()); + } + + // attributes + + { + protozero::pbf_builder<otel::KeyValue> KvBuilder{LrBuilder, otel::LogRecord::optional_repeated_kv_attributes}; + KvBuilder.add_string(otel::KeyValue::string_key, "thread_id"); + + { + protozero::pbf_builder<otel::AnyValue> AvBuilder{KvBuilder, otel::KeyValue::AnyValue_value}; + + AvBuilder.add_int64(otel::AnyValue::int64_int_value, Msg.thread_id); + } + } + } + } + } + } + + return Data; +} + +std::string +OtlpEncoder::FormatOtelMetrics() const +{ + std::string Data; + +# if 0 + static int64_t LastNanos = 0; + + const int64_t NowNanos = + std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now().time_since_epoch()).count(); + + // MetricsData + { + protozero::pbf_builder<otel::MetricsData> Builder{Data}; + + // ResourceMetrics + protozero::pbf_builder<otel::ResourceMetrics> Rm{Builder, otel::MetricsData::repeated_ResourceMetrics_resource_metrics}; + + { + protozero::pbf_builder<otel::Resource> Res{Rm, otel::ResourceMetrics::Resource_resource}; + + AppendResourceAttributes(Res); + } + + // ScopeMetrics + protozero::pbf_builder<otel::ScopeMetrics> Sm{Rm, otel::ResourceMetrics::repeated_ScopeMetrics_scope_metrics}; + + { + // InstrumentationScope + protozero::pbf_builder<otel::InstrumentationScope> Is{Sm, otel::ScopeMetrics::InstrumentationScope_scope}; + Is.add_string(otel::InstrumentationScope::string_name, "scope_name"); + } + + { + protozero::pbf_builder<otel::Metric> Metric{Sm, otel::ScopeMetrics::repeated_Metric_metrics}; + Metric.add_string(otel::Metric::string_name, "metric_name"); + Metric.add_string(otel::Metric::string_unit, "KiB"); + + // Gauge + { + protozero::pbf_builder<otel::Gauge> Gauge{Metric, otel::Metric::oneof_data_Gauge_gauge}; + + protozero::pbf_builder<otel::NumberDataPoint> Dp{Gauge, otel::Gauge::repeated_NumberDataPoint_data_points}; + Dp.add_fixed64(otel::NumberDataPoint::fixed64_time_unix_nano, NowNanos); + Dp.add_fixed64(otel::NumberDataPoint::fixed64_start_time_unix_nano, LastNanos); + Dp.add_sfixed64(otel::NumberDataPoint::oneof_value_sfixed64_as_int, rand() * 470 / RAND_MAX); + } + } + + const int RequestCount = rand() % 600; + + { + protozero::pbf_builder<otel::Metric> Metric{Sm, otel::ScopeMetrics::repeated_Metric_metrics}; + Metric.add_string(otel::Metric::string_name, "request_count"); + Metric.add_string(otel::Metric::string_unit, "requests"); + + static int SumValue = 0; + SumValue += RequestCount; + + // Sum + { + protozero::pbf_builder<otel::Sum> Sum{Metric, otel::Metric::oneof_data_Sum_sum}; + Sum.add_enum(otel::Sum::AggregationTemporality_aggregation_temporality, otel::AGGREGATION_TEMPORALITY_CUMULATIVE); + Sum.add_bool(otel::Sum::bool_is_monotonic, true); + + protozero::pbf_builder<otel::NumberDataPoint> Dp{Sum, otel::Sum::repeated_NumberDataPoint_data_points}; + Dp.add_fixed64(otel::NumberDataPoint::fixed64_time_unix_nano, NowNanos); + Dp.add_fixed64(otel::NumberDataPoint::fixed64_start_time_unix_nano, LastNanos); + Dp.add_double(otel::NumberDataPoint::oneof_value_double_as_double, SumValue); + } + } + + { + protozero::pbf_builder<otel::Metric> Metric{Sm, otel::ScopeMetrics::repeated_Metric_metrics}; + Metric.add_string(otel::Metric::string_name, "request_latency"); + Metric.add_string(otel::Metric::string_unit, "ms"); + + // Histogram + { + protozero::pbf_builder<otel::Histogram> Histogram{Metric, otel::Metric::oneof_data_Histogram_histogram}; + Histogram.add_enum(otel::Histogram::AggregationTemporality_aggregation_temporality, + otel::AGGREGATION_TEMPORALITY_CUMULATIVE); + + protozero::pbf_builder<otel::HistogramDataPoint> Dp{Histogram, otel::Histogram::repeated_HistogramDataPoint_data_points}; + Dp.add_fixed64(otel::HistogramDataPoint::fixed64_time_unix_nano, NowNanos); + Dp.add_fixed64(otel::HistogramDataPoint::fixed64_start_time_unix_nano, LastNanos); + + // Simulated latency value + + double Sum = 0, Min = 0, Max = 0; + int Buckets[6] = {0}; + + for (int i = 0; i < RequestCount; ++i) + { + const int Latency = rand() % 250; + + if (i == 0 || Latency < Min) + { + Min = Latency; + } + if (i == 0 || Latency > Max) + { + Max = Latency; + } + + Sum += Latency; + + if (Latency >= 0 && Latency < 10) + { + ++Buckets[0]; // [0,10) + } + else if (Latency >= 10 && Latency < 25) + { + ++Buckets[1]; // [10,25) + } + else if (Latency >= 25 && Latency < 50) + { + ++Buckets[2]; // [25,50) + } + else if (Latency >= 50 && Latency < 100) + { + ++Buckets[3]; // [50,100) + } + else if (Latency >= 100 && Latency < 250) + { + ++Buckets[4]; // [100,250) + } + else + { + ++Buckets[5]; // [250,+inf) + } + } + + Dp.add_fixed64(otel::HistogramDataPoint::fixed64_count, RequestCount); + Dp.add_double(otel::HistogramDataPoint::optional_double_sum, Sum); + Dp.add_double(otel::HistogramDataPoint::optional_double_min, Min); + Dp.add_double(otel::HistogramDataPoint::optional_double_max, Max); + + // Bucket bounds + Dp.add_double(otel::HistogramDataPoint::repeated_double_explicit_bounds, 10.0); + Dp.add_double(otel::HistogramDataPoint::repeated_double_explicit_bounds, 25.0); + Dp.add_double(otel::HistogramDataPoint::repeated_double_explicit_bounds, 50.0); + Dp.add_double(otel::HistogramDataPoint::repeated_double_explicit_bounds, 100.0); + Dp.add_double(otel::HistogramDataPoint::repeated_double_explicit_bounds, 250.0); + + // Buckets + for (int i = 0; i < 6; ++i) + { + Dp.add_fixed64(otel::HistogramDataPoint::repeated_fixed64_bucket_counts, Buckets[i]); + } + } + } + +# if 0 + { + protozero::pbf_builder<otel::Metric> Metric{Sm, otel::ScopeMetrics::repeated_Metric_metrics}; + Metric.add_string(otel::Metric::string_name, "request_payload"); + Metric.add_string(otel::Metric::string_unit, "KiB"); + + // ExponentialHistogram + { + protozero::pbf_builder<otel::ExponentialHistogram> ExponentialHistogram{ + Metric, otel::Metric::oneof_data_ExponentialHistogram_exponential_histogram}; + ExponentialHistogram.add_enum(otel::ExponentialHistogram::AggregationTemporality_aggregation_temporality, + otel::AGGREGATION_TEMPORALITY_CUMULATIVE); + + protozero::pbf_builder<otel::ExponentialHistogramDataPoint> Dp{ + ExponentialHistogram, otel::ExponentialHistogram::repeated_ExponentialHistogramDataPoint_data_points}; + Dp.add_fixed64(otel::ExponentialHistogramDataPoint::fixed64_time_unix_nano, NowNanos); + Dp.add_fixed64(otel::ExponentialHistogramDataPoint::fixed64_start_time_unix_nano, LastNanos); + + // Simulated payload size values + + int64_t sum = 0; + int count = 0; + + for (int i = 0; i < RequestCount; ++i) + { + const int PayloadSize = rand() % 1'500'000; // [0,1500000) bytes + + sum += PayloadSize; + ++count; + } + + Dp.add_fixed64(otel::ExponentialHistogramDataPoint::fixed64_count, count); + Dp.add_double(otel::ExponentialHistogramDataPoint::optional_double_sum, sum); + Dp.add_sint32(otel::ExponentialHistogramDataPoint::sint32_scale, 4); + } + } +# endif + } + LastNanos = NowNanos; +# endif + return Data; +} + +void +OtlpEncoder::AddResourceAttribute(const std::string_view& Key, const std::string_view& Value) +{ + m_ResourceLock.WithExclusiveLock([&] { m_ResourceAttributes[std::string(Key)] = std::string(Value); }); +} + +void +OtlpEncoder::AddResourceAttribute(const std::string_view& Key, int64_t Value) +{ + m_ResourceLock.WithExclusiveLock([&] { m_ResourceIntAttributes[std::string(Key)] = Value; }); +} + +void +OtlpEncoder::AppendResourceAttributes(protozero::pbf_builder<otel::Resource>& Res) const +{ + using namespace otel; + + m_ResourceLock.WithSharedLock([&] { + for (auto const& [K, V] : m_ResourceAttributes) + { + protozero::pbf_builder<otel::KeyValue> KvBuilder{Res, otel::Resource::repeated_KeyValue_attributes}; + KvBuilder.add_string(otel::KeyValue::string_key, K.c_str()); + protozero::pbf_builder<otel::AnyValue> AnyBuilder{KvBuilder, otel::KeyValue::AnyValue_value}; + AnyBuilder.add_string(otel::AnyValue::string_string_value, V.c_str()); + } + + for (auto const& [K, V] : m_ResourceIntAttributes) + { + protozero::pbf_builder<otel::KeyValue> KvBuilder{Res, otel::Resource::repeated_KeyValue_attributes}; + KvBuilder.add_string(otel::KeyValue::string_key, K.c_str()); + protozero::pbf_builder<otel::AnyValue> AnyBuilder{KvBuilder, otel::KeyValue::AnyValue_value}; + AnyBuilder.add_int64(otel::AnyValue::int64_int_value, V); + } + }); +} + +template<typename ParentBuilder> +static void +AppendAttributesToBuilder(ParentBuilder& Parent, auto KeyValueField, const otel::AttributePair* Attrs) +{ + for (const otel::AttributePair* Attr = Attrs; Attr != nullptr; Attr = Attr->Next) + { + protozero::pbf_builder<otel::KeyValue> KvBuilder{Parent, KeyValueField}; + KvBuilder.add_string(otel::KeyValue::string_key, Attr->Key); + + protozero::pbf_builder<otel::AnyValue> AnyBuilder{KvBuilder, otel::KeyValue::AnyValue_value}; + + if (Attr->IsNumeric()) + { + AnyBuilder.add_int64(otel::AnyValue::int64_int_value, Attr->GetNumericValue()); + } + else if (Attr->IsString()) + { + AnyBuilder.add_string(otel::AnyValue::string_string_value, Attr->GetStringValue()); + } + } +} + +std::string +OtlpEncoder::FormatOtelTrace(zen::otel::TraceId Id, std::span<const zen::otel::Span*> Spans) const +{ + std::string Data; + + using namespace otel; + + // TracesData + { + protozero::pbf_builder<pbf::TracesData> Builder{Data}; + + { + protozero::pbf_builder<pbf::ResourceSpans> Rs{Builder, pbf::TracesData::repeated_ResourceSpans_resource_spans}; + + { + protozero::pbf_builder<otel::Resource> Res{Rs, pbf::ResourceSpans::Resource_resource}; + + AppendResourceAttributes(Res); + } + + for (const zen::otel::Span* SpanPtr : Spans) + { + protozero::pbf_builder<pbf::ScopeSpans> Ss{Rs, pbf::ResourceSpans::repeated_ScopeSpans_scope_spans}; + + { + // InstrumentationScope + protozero::pbf_builder<otel::InstrumentationScope> Is{Ss, pbf::ScopeSpans::InstrumentationScope_scope}; + Is.add_string(otel::InstrumentationScope::string_name, "scope_name"); + } + + const SpanId ThisSpanId = SpanPtr->GetSpanId(); + + { + protozero::pbf_builder<pbf::Span> Sb{Ss, pbf::ScopeSpans::repeated_Span_spans}; + + Sb.add_bytes(pbf::Span::required_bytes_trace_id, Id.GetData(), TraceId::kSize); + Sb.add_bytes(pbf::Span::required_bytes_span_id, ThisSpanId.GetData(), SpanId::kSize); + // Sb.add_string(pbf::Span::string_trace_state, "state-value"); + // + if (const otel::Span* ParentSpan = SpanPtr->GetParentSpan()) + { + const SpanId ParentSpanId = ParentSpan->GetSpanId(); + Sb.add_bytes(pbf::Span::bytes_parent_span_id, ParentSpanId.GetData(), SpanId::kSize); + } + + Sb.add_fixed32(pbf::Span::fixed32_flags, 0); + Sb.add_string(pbf::Span::required_string_name, SpanPtr->GetName()); + Sb.add_enum(pbf::Span::SpanKind_kind, (int)SpanPtr->GetKind()); + Sb.add_fixed64(pbf::Span::required_fixed64_start_time_unix_nano, SpanPtr->GetStartTime()); + Sb.add_fixed64(pbf::Span::required_fixed64_end_time_unix_nano, SpanPtr->GetEndTime()); + + AppendAttributesToBuilder(Sb, pbf::Span::repeated_KeyValue_attributes, SpanPtr->GetAttributes()); + + for (const otel::Event* Event = SpanPtr->GetEvents(); Event != nullptr; Event = Event->NextEvent) + { + protozero::pbf_builder<pbf::Span_Event> EventBuilder{Sb, pbf::Span::repeated_Event_events}; + EventBuilder.add_fixed64(pbf::Span_Event::fixed64_time_unix_nano, Event->Timestamp); + EventBuilder.add_string(pbf::Span_Event::string_name, Event->Name); + + AppendAttributesToBuilder(EventBuilder, pbf::Span_Event::repeated_KeyValue_attributes, Event->Attributes); + } + } + } + } + } + + return Data; +} + +} // namespace zen + +#endif diff --git a/src/zentelemetry/otlptrace.cpp b/src/zentelemetry/otlptrace.cpp new file mode 100644 index 000000000..b634eacf6 --- /dev/null +++ b/src/zentelemetry/otlptrace.cpp @@ -0,0 +1,398 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "zentelemetry/otlptrace.h" +#include "oteltraceprotozero.h" + +#if ZEN_WITH_OTEL + +# include <zencore/endian.h> +# include <zencore/memory/memoryarena.h> +# include <zencore/session.h> +# include <zencore/testing.h> +# include <zencore/uid.h> + +# include <EASTL/list.h> +# include <EASTL/vector.h> + +# include <protozero/pbf_builder.hpp> +# include <random> + +namespace zen::otel { + +////////////////////////////////////////////////////////////////////////// + +uint64_t +NowInNanoseconds() +{ + const uint64_t NowNanos = + std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now().time_since_epoch()).count(); + return static_cast<uint64_t>(NowNanos); +} + +////////////////////////////////////////////////////////////////////////// + +/** + * xorshiftr128+ random number generator + * + * https://en.wikipedia.org/wiki/Xorshift#xorshift+ + */ +class XorShiftr128p +{ +public: + XorShiftr128p() + { + std::seed_seq SeedSeq{1, 2, 3, 5}; + do + { + SeedSeq.generate(reinterpret_cast<uint32_t*>(m_State), reinterpret_cast<uint32_t*>(&m_State[2])); + } while (m_State[0] == 0 && m_State[1] == 0); + } + + uint64_t Next() + { + uint64_t x = m_State[0]; + uint64_t const y = m_State[1]; + m_State[0] = y; + x ^= x << 23; + x ^= x >> 17; + x ^= y; + m_State[1] = x + y; + return x; + } + +private: + uint64_t m_State[2]; +}; + +////////////////////////////////////////////////////////////////////////// + +struct TraceState +{ + Span* ActiveSpan = nullptr; + TraceId CurrentTraceId; +}; + +thread_local TraceState t_TraceState; + +////////////////////////////////////////////////////////////////////////// + +Span::Span(MemoryArena& Arena, std::string_view Name, Span* SpanChain) +: m_Arena(Arena) +, m_SpanChain(SpanChain) +, m_SpanId(SpanId::NewSpanId()) +, m_Name(m_Arena.DuplicateString(Name)) +{ + if (Span* ActiveSpan = t_TraceState.ActiveSpan) + { + m_ParentSpan = ActiveSpan; + m_NextSibling = ActiveSpan->m_FirstChild; + ActiveSpan->m_FirstChild = this; + } + else + { + m_ParentSpan = nullptr; + t_TraceState.CurrentTraceId = TraceId::NewTraceId(); + } + + t_TraceState.ActiveSpan = this; + m_StartTime = NowInNanoseconds(); +} + +Span::~Span() +{ + End(); +} + +void +Span::End() +{ + if (m_Ended) + { + return; + } + + ZEN_ASSERT(t_TraceState.ActiveSpan == this); + + m_Ended = true; + t_TraceState.ActiveSpan = m_ParentSpan; + m_EndTime = NowInNanoseconds(); +} + +Span* +Span::GetCurrentSpan() +{ + return t_TraceState.ActiveSpan; +} + +SpanId +Span::GetCurrentSpanId(TraceId& OutTraceId) +{ + if (t_TraceState.ActiveSpan) + { + OutTraceId = t_TraceState.CurrentTraceId; + return t_TraceState.ActiveSpan->m_SpanId; + } + else + { + OutTraceId = TraceId(); + return SpanId(); + } +} + +void +Span::AddEvent(std::string_view Name) +{ + Event* NewEvent = new (m_Arena) Event(); + NewEvent->Name = m_Arena.DuplicateString(Name); + NewEvent->Timestamp = NowInNanoseconds(); + NewEvent->NextEvent = m_Events; + m_Events = NewEvent; +} + +void +Span::AddEvent(std::string_view Name, uint64_t Timestamp) +{ + Event* NewEvent = new (m_Arena) Event(); + NewEvent->Name = m_Arena.DuplicateString(Name); + NewEvent->Timestamp = Timestamp; + NewEvent->NextEvent = m_Events; + m_Events = NewEvent; +} + +void +Span::AddEvent(std::string_view Name, const AttributeList& Attributes) +{ + Event* NewEvent = new (m_Arena) Event(); + NewEvent->Name = m_Arena.DuplicateString(Name); + NewEvent->Timestamp = NowInNanoseconds(); + NewEvent->NextEvent = m_Events; + m_Events = NewEvent; + + for (auto& [K, V] : Attributes) + { + AttributePair* NewAttr = new (m_Arena) AttributePair(); + NewAttr->Key = m_Arena.DuplicateString(K); + NewAttr->SetStringValue(m_Arena.DuplicateString(V)); + NewAttr->Next = NewEvent->Attributes; + NewEvent->Attributes = NewAttr; + } +} + +void +Span::AddAttribute(std::string_view Key, std::string_view Value) +{ + AttributePair* NewAttr = new (m_Arena) AttributePair(); + NewAttr->Key = m_Arena.DuplicateString(Key); + NewAttr->SetStringValue(m_Arena.DuplicateString(Value)); + NewAttr->Next = m_Attributes; + m_Attributes = NewAttr; +} + +void +Span::AddAttribute(std::string_view Key, uint64_t Value) +{ + AttributePair* NewAttr = new (m_Arena) AttributePair(); + NewAttr->Key = m_Arena.DuplicateString(Key); + NewAttr->SetNumericValue(Value); + NewAttr->Next = m_Attributes; + m_Attributes = NewAttr; +} + +void +Span::AddAttributes(const AttributeList& Attributes) +{ + for (const auto& [K, V] : Attributes) + { + AddAttribute(K, V); + } +} + +void* +Span::operator new(size_t Size, zen::MemoryArena& Arena) +{ + return Arena.Allocate(Size); +} + +void +Span::operator delete(void* Ptr, zen::MemoryArena& Arena) +{ + // This exists because operator new with placement arguments + // requires a matching operator delete, but we don't actually + // need to do anything here + ZEN_UNUSED(Ptr, Arena); +} + +////////////////////////////////////////////////////////////////////////// + +std::atomic<uint32_t> g_TraceCounter; + +TraceId +TraceId::NewTraceId() +{ + uint8_t TraceIdBytes[kSize]; + + // We use our session ID as the basis for trace IDs + const Oid SessionId = GetSessionId(); + memcpy(TraceIdBytes, SessionId.OidBits, sizeof SessionId.OidBits); + + const uint32_t TraceCounterBe = ByteSwap(g_TraceCounter.fetch_add(1)); + + memcpy(&TraceIdBytes[12], &TraceCounterBe, 4); + + return TraceId(std::span<const uint8_t, kSize>(TraceIdBytes, kSize)); +} + +////////////////////////////////////////////////////////////////////////// + +SpanId +SpanId::NewSpanId() +{ + // Just use a new OID and take the first 8 bytes. We'll probably want + // a more native solution later + + return SpanId(Oid::NewOid()); +} + +////////////////////////////////////////////////////////////////////////// + +Ref<TraceRecorder> g_TraceRecorder; + +void +SetTraceRecorder(Ref<TraceRecorder> Recorder) +{ + g_TraceRecorder = std::move(Recorder); +} + +bool +IsRecording() +{ + return !!g_TraceRecorder; +} + +////////////////////////////////////////////////////////////////////////// + +thread_local Tracer* t_Tracer; + +struct Tracer::Impl +{ + Impl() : m_PrevTracer(t_Tracer) {} + ~Impl() { t_Tracer = m_PrevTracer; } + + MemoryArena m_Arena; + Tracer* m_PrevTracer; + Span* m_SpanChain = nullptr; + std::atomic<uint64_t> m_SpanCount; +}; + +Tracer::Tracer() : m_Impl(new Impl()) +{ + t_Tracer = this; +} + +Tracer::~Tracer() +{ + if (Ref<TraceRecorder> Recorder = g_TraceRecorder) + { + const uint64_t SpanCount = m_Impl->m_SpanCount.load(); + const Span** Spans = new (m_Impl->m_Arena) const Span*[SpanCount]; + + uint64_t Index = 0; + + for (Span* CurrentSpan = m_Impl->m_SpanChain; CurrentSpan != nullptr; CurrentSpan = CurrentSpan->m_SpanChain) + { + Spans[Index++] = CurrentSpan; + } + + Recorder->RecordSpans(t_TraceState.CurrentTraceId, std::span<const Span*>(Spans, SpanCount)); + } + + delete m_Impl; +} + +Tracer* +Tracer::GetTracer() +{ + Tracer* TracerPtr = t_Tracer; + + if (!TracerPtr) + { + TracerPtr = new Tracer(); + } + + return TracerPtr; +} + +ScopedSpan +Tracer::CreateSpan(std::string_view Name) +{ + Tracer* TracerPtr = GetTracer(); + + Impl* const ImplPtr = TracerPtr->m_Impl; + + Span* NewSpan = new (ImplPtr->m_Arena) Span(ImplPtr->m_Arena, Name, ImplPtr->m_SpanChain); + + ImplPtr->m_SpanChain = NewSpan; + ImplPtr->m_SpanCount.fetch_add(1); + + return ScopedSpan(NewSpan, TracerPtr); +} + +////////////////////////////////////////////////////////////////////////// + +ScopedSpan::ScopedSpan(std::string_view Name) +{ + Tracer* TracerPtr = Tracer::GetTracer(); + + Tracer::Impl* const ImplPtr = TracerPtr->m_Impl; + + Span* NewSpan = new (ImplPtr->m_Arena) Span(ImplPtr->m_Arena, Name, ImplPtr->m_SpanChain); + + ImplPtr->m_SpanChain = NewSpan; + ImplPtr->m_SpanCount.fetch_add(1); + + m_Tracer = TracerPtr; + m_Span = NewSpan; +} + +ScopedSpan::ScopedSpan(Span* InSpan, Tracer* InTracer) : m_Tracer(InTracer), m_Span(InSpan) +{ +} + +ScopedSpan::~ScopedSpan() +{ +} + +} // namespace zen::otel + +////////////////////////////////////////////////////////////////////////// + +namespace zen::otel { + +using namespace std::literals; + +void +otlptrace_forcelink() +{ +} + +# if ZEN_WITH_TESTS + +TEST_CASE("otlp.trace") +{ + { + ScopedSpan Span = Tracer::CreateSpan("span0"); + Span->AddEvent("TestEvent1"sv); + Span->AddEvent("TestEvent2"sv); + + { + ScopedSpan Span2 = Tracer::CreateSpan("span1"); + Span2->AddEvent("TestEvent3"sv); + Span2->AddEvent("TestEvent4"sv); + } + } +} + +# endif + +} // namespace zen::otel +#endif diff --git a/src/zencore/stats.cpp b/src/zentelemetry/stats.cpp index 8a424c5ad..c67fa3c66 100644 --- a/src/zencore/stats.cpp +++ b/src/zentelemetry/stats.cpp @@ -1,6 +1,6 @@ // Copyright Epic Games, Inc. All Rights Reserved. -#include "zencore/stats.h" +#include "zentelemetry/stats.h" #include <zencore/compactbinarybuilder.h> #include <zencore/intmath.h> diff --git a/src/zentelemetry/xmake.lua b/src/zentelemetry/xmake.lua new file mode 100644 index 000000000..2f90d7b90 --- /dev/null +++ b/src/zentelemetry/xmake.lua @@ -0,0 +1,10 @@ +-- Copyright Epic Games, Inc. All Rights Reserved. + +target('zentelemetry') + set_kind("static") + set_group("libs") + add_headerfiles("**.h") + add_files("**.cpp") + add_includedirs("include", {public=true}) + add_deps("zencore", "protozero") + add_packages("vcpkg::robin-map", "vcpkg::spdlog") diff --git a/src/zentelemetry/zentelemetry.cpp b/src/zentelemetry/zentelemetry.cpp new file mode 100644 index 000000000..ed6ad13b9 --- /dev/null +++ b/src/zentelemetry/zentelemetry.cpp @@ -0,0 +1,19 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "zentelemetry/zentelemetry.h" + +#include "zentelemetry/otlptrace.h" +#include "zentelemetry/stats.h" + +namespace zen { + +void +zentelemetry_forcelinktests() +{ + zen::stats_forcelink(); +#if ZEN_WITH_OTEL + zen::otel::otlptrace_forcelink(); +#endif +} + +} // namespace zen |