aboutsummaryrefslogtreecommitdiff
path: root/src/zentelemetry/include
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2026-03-23 12:54:14 +0100
committerGitHub Enterprise <[email protected]>2026-03-23 12:54:14 +0100
commit8e2c307bdb501db0ab0ce2d51bc61b552855ee11 (patch)
tree8f9be7e926bc555318a68794ee75ad5ad0dd979f /src/zentelemetry/include
parentLogger simplification (#883) (diff)
downloadzen-8e2c307bdb501db0ab0ce2d51bc61b552855ee11.tar.xz
zen-8e2c307bdb501db0ab0ce2d51bc61b552855ee11.zip
Unique session/client tracking using HyperLogLog (#884)
## Summary Adds probabilistic cardinality estimation for tracking unique HTTP clients and sessions using a HyperLogLog implementation. - Add a `HyperLogLog<Precision>` template in `zentelemetry` with thread-safe lock-free register updates, merge support, and XXH3 hashing - Feed client IP addresses (via raw bytes) and session IDs (via `Oid` bytes) into their respective HyperLogLog estimators from both the ASIO and http.sys server backends - Emit `distinct_clients` and `distinct_sessions` cardinality estimates in HTTP `CollectStats()` - Add tests covering empty, single, duplicates, accuracy, merge, and clear scenarios ## Why HyperLogLog Tracking exact unique counts would require storing every observed IP or session ID. HyperLogLog provides a memory-bounded probabilistic estimate (~1–2% error) using only a few KB of memory regardless of traffic volume.
Diffstat (limited to 'src/zentelemetry/include')
-rw-r--r--src/zentelemetry/include/zentelemetry/hyperloglog.h165
1 files changed, 165 insertions, 0 deletions
diff --git a/src/zentelemetry/include/zentelemetry/hyperloglog.h b/src/zentelemetry/include/zentelemetry/hyperloglog.h
new file mode 100644
index 000000000..2daf75a43
--- /dev/null
+++ b/src/zentelemetry/include/zentelemetry/hyperloglog.h
@@ -0,0 +1,165 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include "zentelemetry.h"
+
+#include <zencore/intmath.h>
+#include <zencore/xxhash.h>
+
+#include <array>
+#include <atomic>
+#include <cstdint>
+#include <string_view>
+
+namespace zen::metrics {
+
+/** HyperLogLog cardinality estimator.
+ *
+ * Estimates the number of distinct elements in a stream using O(2^Precision)
+ * bytes of memory. The relative error is approximately 1.04 / sqrt(2^Precision).
+ *
+ * @tparam Precision Number of bits used for register indexing (4..16).
+ * Higher precision = more memory but lower error.
+ * Default 14 gives 16384 registers (~16 KiB) and ~0.81% error.
+ *
+ * All operations are thread-safe. Add() uses relaxed atomic compare-exchange
+ * so concurrent updates are lock-free but may occasionally lose a race (which
+ * only means a slightly delayed register update, not a correctness issue).
+ */
+template<uint8_t Precision = 14>
+class HyperLogLog
+{
+ static_assert(Precision >= 4 && Precision <= 16, "Precision must be between 4 and 16");
+
+ static constexpr uint32_t RegisterCount = 1u << Precision;
+ static constexpr uint64_t IndexMask = RegisterCount - 1;
+
+public:
+ HyperLogLog() { Clear(); }
+
+ /** Add a raw byte sequence to the sketch. */
+ void Add(const void* Data, size_t Size)
+ {
+ XXH3_128 Hash = XXH3_128::HashMemory(Data, Size);
+ AddHash(Hash);
+ }
+
+ /** Add a string value to the sketch. */
+ void Add(std::string_view Value) { Add(Value.data(), Value.size()); }
+
+ /** Add a pre-computed hash to the sketch. */
+ void AddHash(const XXH3_128& Hash)
+ {
+ // Use the first 8 bytes of the hash as our 64-bit value
+ uint64_t H;
+ memcpy(&H, Hash.Hash, sizeof(H));
+
+ uint32_t Index = static_cast<uint32_t>(H >> (64 - Precision)) & IndexMask;
+ uint64_t W = H << Precision;
+
+ // Count leading zeros of W + 1 (the +1 ensures we never store 0)
+ uint8_t Rank = static_cast<uint8_t>(CountLeadingZeros64(W) + 1);
+
+ // Atomically update the register to the maximum observed rank
+ uint8_t Current = m_Registers[Index].load(std::memory_order_relaxed);
+ while (Rank > Current)
+ {
+ if (m_Registers[Index].compare_exchange_weak(Current, Rank, std::memory_order_relaxed))
+ {
+ break;
+ }
+ }
+ }
+
+ /** Estimate the number of distinct elements added. */
+ double Estimate() const
+ {
+ // Compute the raw harmonic mean estimate
+ double Sum = 0.0;
+ uint32_t ZeroCount = 0;
+
+ for (uint32_t i = 0; i < RegisterCount; ++i)
+ {
+ uint8_t Val = m_Registers[i].load(std::memory_order_relaxed);
+ Sum += 1.0 / static_cast<double>(1ull << Val);
+ if (Val == 0)
+ {
+ ++ZeroCount;
+ }
+ }
+
+ double Alpha = AlphaM();
+ double RawEstimate = Alpha * RegisterCount * RegisterCount / Sum;
+
+ // Small range correction using LinearCounting
+ if (RawEstimate <= 2.5 * RegisterCount && ZeroCount > 0)
+ {
+ return RegisterCount * std::log(static_cast<double>(RegisterCount) / ZeroCount);
+ }
+
+ return RawEstimate;
+ }
+
+ /** Return the estimate as an integer. */
+ uint64_t Count() const
+ {
+ double E = Estimate();
+ return E < 0.5 ? 0 : static_cast<uint64_t>(E + 0.5);
+ }
+
+ /** Reset all registers to zero. */
+ void Clear()
+ {
+ for (uint32_t i = 0; i < RegisterCount; ++i)
+ {
+ m_Registers[i].store(0, std::memory_order_relaxed);
+ }
+ }
+
+ /** Merge another HyperLogLog sketch into this one (union). */
+ void Merge(const HyperLogLog& Other)
+ {
+ for (uint32_t i = 0; i < RegisterCount; ++i)
+ {
+ uint8_t OtherVal = Other.m_Registers[i].load(std::memory_order_relaxed);
+ uint8_t Current = m_Registers[i].load(std::memory_order_relaxed);
+ while (OtherVal > Current)
+ {
+ if (m_Registers[i].compare_exchange_weak(Current, OtherVal, std::memory_order_relaxed))
+ {
+ break;
+ }
+ }
+ }
+ }
+
+private:
+ std::array<std::atomic<uint8_t>, RegisterCount> m_Registers;
+
+ static double AlphaM()
+ {
+ if constexpr (Precision == 4)
+ {
+ return 0.673;
+ }
+ else if constexpr (Precision == 5)
+ {
+ return 0.697;
+ }
+ else if constexpr (Precision == 6)
+ {
+ return 0.709;
+ }
+ else
+ {
+ return 0.7213 / (1.0 + 1.079 / RegisterCount);
+ }
+ }
+};
+
+} // namespace zen::metrics
+
+namespace zen {
+void hyperloglog_forcelink();
+} // namespace zen