diff options
| author | Stefan Boberg <[email protected]> | 2026-03-23 12:54:14 +0100 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2026-03-23 12:54:14 +0100 |
| commit | 8e2c307bdb501db0ab0ce2d51bc61b552855ee11 (patch) | |
| tree | 8f9be7e926bc555318a68794ee75ad5ad0dd979f /src/zentelemetry/hyperloglog.cpp | |
| parent | Logger simplification (#883) (diff) | |
| download | zen-8e2c307bdb501db0ab0ce2d51bc61b552855ee11.tar.xz zen-8e2c307bdb501db0ab0ce2d51bc61b552855ee11.zip | |
Unique session/client tracking using HyperLogLog (#884)
## Summary
Adds probabilistic cardinality estimation for tracking unique HTTP clients and sessions using a HyperLogLog implementation.
- Add a `HyperLogLog<Precision>` template in `zentelemetry` with thread-safe lock-free register updates, merge support, and XXH3 hashing
- Feed client IP addresses (via raw bytes) and session IDs (via `Oid` bytes) into their respective HyperLogLog estimators from both the ASIO and http.sys server backends
- Emit `distinct_clients` and `distinct_sessions` cardinality estimates in HTTP `CollectStats()`
- Add tests covering empty, single, duplicates, accuracy, merge, and clear scenarios
## Why HyperLogLog
Tracking exact unique counts would require storing every observed IP or session ID. HyperLogLog provides a memory-bounded probabilistic estimate (~1–2% error) using only a few KB of memory regardless of traffic volume.
Diffstat (limited to 'src/zentelemetry/hyperloglog.cpp')
| -rw-r--r-- | src/zentelemetry/hyperloglog.cpp | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/src/zentelemetry/hyperloglog.cpp b/src/zentelemetry/hyperloglog.cpp new file mode 100644 index 000000000..3a06fc59f --- /dev/null +++ b/src/zentelemetry/hyperloglog.cpp @@ -0,0 +1,127 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zentelemetry/hyperloglog.h> + +namespace zen { +void +hyperloglog_forcelink() +{ +} +} // namespace zen + +#if ZEN_WITH_TESTS + +# include <zencore/testing.h> + +# include <fmt/format.h> + +# include <set> +# include <string> + +namespace zen::tests { + +TEST_SUITE_BEGIN("telemetry.hyperloglog"); + +TEST_CASE("hyperloglog.empty") +{ + metrics::HyperLogLog<> Hll; + CHECK_EQ(Hll.Count(), 0); +} + +TEST_CASE("hyperloglog.single_element") +{ + metrics::HyperLogLog<> Hll; + Hll.Add("hello"); + CHECK(Hll.Count() >= 1); + CHECK(Hll.Count() <= 2); +} + +TEST_CASE("hyperloglog.duplicates") +{ + metrics::HyperLogLog<> Hll; + for (int i = 0; i < 1000; ++i) + { + Hll.Add("same_value"); + } + CHECK_EQ(Hll.Count(), 1); +} + +TEST_CASE("hyperloglog.estimate_accuracy") +{ + // With precision 14, expected error is ~0.81% + // Use a generous margin (5%) for the test to be reliable + constexpr uint64_t N = 100000; + + metrics::HyperLogLog<14> Hll; + for (uint64_t i = 0; i < N; ++i) + { + std::string Value = fmt::format("item_{}", i); + Hll.Add(Value); + } + + uint64_t Estimate = Hll.Count(); + double Error = std::abs(static_cast<double>(Estimate) - static_cast<double>(N)) / static_cast<double>(N); + + CHECK(Error < 0.05); +} + +TEST_CASE("hyperloglog.small_cardinality") +{ + metrics::HyperLogLog<> Hll; + for (int i = 0; i < 10; ++i) + { + std::string Value = fmt::format("key_{}", i); + Hll.Add(Value); + } + + uint64_t Estimate = Hll.Count(); + CHECK(Estimate >= 8); + CHECK(Estimate <= 12); +} + +TEST_CASE("hyperloglog.clear") +{ + metrics::HyperLogLog<> Hll; + for (int i = 0; i < 100; ++i) + { + std::string Value = fmt::format("v_{}", i); + Hll.Add(Value); + } + CHECK(Hll.Count() > 0); + + Hll.Clear(); + CHECK_EQ(Hll.Count(), 0); +} + +TEST_CASE("hyperloglog.merge") +{ + constexpr uint64_t N = 50000; + + metrics::HyperLogLog<14> A; + metrics::HyperLogLog<14> B; + + // A gets even numbers, B gets odd numbers + for (uint64_t i = 0; i < N; ++i) + { + std::string Value = fmt::format("item_{}", i * 2); + A.Add(Value); + } + for (uint64_t i = 0; i < N; ++i) + { + std::string Value = fmt::format("item_{}", i * 2 + 1); + B.Add(Value); + } + + A.Merge(B); + + uint64_t Estimate = A.Count(); + double Error = std::abs(static_cast<double>(Estimate) - static_cast<double>(N * 2)) / static_cast<double>(N * 2); + + CHECK(Error < 0.05); +} + +TEST_SUITE_END(); + +} // namespace zen::tests + +#endif // ZEN_WITH_TESTS |