aboutsummaryrefslogtreecommitdiff
path: root/src/zentelemetry/hyperloglog.cpp
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2026-03-23 12:54:14 +0100
committerGitHub Enterprise <[email protected]>2026-03-23 12:54:14 +0100
commit8e2c307bdb501db0ab0ce2d51bc61b552855ee11 (patch)
tree8f9be7e926bc555318a68794ee75ad5ad0dd979f /src/zentelemetry/hyperloglog.cpp
parentLogger simplification (#883) (diff)
downloadzen-8e2c307bdb501db0ab0ce2d51bc61b552855ee11.tar.xz
zen-8e2c307bdb501db0ab0ce2d51bc61b552855ee11.zip
Unique session/client tracking using HyperLogLog (#884)
## Summary Adds probabilistic cardinality estimation for tracking unique HTTP clients and sessions using a HyperLogLog implementation. - Add a `HyperLogLog<Precision>` template in `zentelemetry` with thread-safe lock-free register updates, merge support, and XXH3 hashing - Feed client IP addresses (via raw bytes) and session IDs (via `Oid` bytes) into their respective HyperLogLog estimators from both the ASIO and http.sys server backends - Emit `distinct_clients` and `distinct_sessions` cardinality estimates in HTTP `CollectStats()` - Add tests covering empty, single, duplicates, accuracy, merge, and clear scenarios ## Why HyperLogLog Tracking exact unique counts would require storing every observed IP or session ID. HyperLogLog provides a memory-bounded probabilistic estimate (~1–2% error) using only a few KB of memory regardless of traffic volume.
Diffstat (limited to 'src/zentelemetry/hyperloglog.cpp')
-rw-r--r--src/zentelemetry/hyperloglog.cpp127
1 files changed, 127 insertions, 0 deletions
diff --git a/src/zentelemetry/hyperloglog.cpp b/src/zentelemetry/hyperloglog.cpp
new file mode 100644
index 000000000..3a06fc59f
--- /dev/null
+++ b/src/zentelemetry/hyperloglog.cpp
@@ -0,0 +1,127 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zentelemetry/hyperloglog.h>
+
+namespace zen {
+void
+hyperloglog_forcelink()
+{
+}
+} // namespace zen
+
+#if ZEN_WITH_TESTS
+
+# include <zencore/testing.h>
+
+# include <fmt/format.h>
+
+# include <set>
+# include <string>
+
+namespace zen::tests {
+
+TEST_SUITE_BEGIN("telemetry.hyperloglog");
+
+TEST_CASE("hyperloglog.empty")
+{
+ metrics::HyperLogLog<> Hll;
+ CHECK_EQ(Hll.Count(), 0);
+}
+
+TEST_CASE("hyperloglog.single_element")
+{
+ metrics::HyperLogLog<> Hll;
+ Hll.Add("hello");
+ CHECK(Hll.Count() >= 1);
+ CHECK(Hll.Count() <= 2);
+}
+
+TEST_CASE("hyperloglog.duplicates")
+{
+ metrics::HyperLogLog<> Hll;
+ for (int i = 0; i < 1000; ++i)
+ {
+ Hll.Add("same_value");
+ }
+ CHECK_EQ(Hll.Count(), 1);
+}
+
+TEST_CASE("hyperloglog.estimate_accuracy")
+{
+ // With precision 14, expected error is ~0.81%
+ // Use a generous margin (5%) for the test to be reliable
+ constexpr uint64_t N = 100000;
+
+ metrics::HyperLogLog<14> Hll;
+ for (uint64_t i = 0; i < N; ++i)
+ {
+ std::string Value = fmt::format("item_{}", i);
+ Hll.Add(Value);
+ }
+
+ uint64_t Estimate = Hll.Count();
+ double Error = std::abs(static_cast<double>(Estimate) - static_cast<double>(N)) / static_cast<double>(N);
+
+ CHECK(Error < 0.05);
+}
+
+TEST_CASE("hyperloglog.small_cardinality")
+{
+ metrics::HyperLogLog<> Hll;
+ for (int i = 0; i < 10; ++i)
+ {
+ std::string Value = fmt::format("key_{}", i);
+ Hll.Add(Value);
+ }
+
+ uint64_t Estimate = Hll.Count();
+ CHECK(Estimate >= 8);
+ CHECK(Estimate <= 12);
+}
+
+TEST_CASE("hyperloglog.clear")
+{
+ metrics::HyperLogLog<> Hll;
+ for (int i = 0; i < 100; ++i)
+ {
+ std::string Value = fmt::format("v_{}", i);
+ Hll.Add(Value);
+ }
+ CHECK(Hll.Count() > 0);
+
+ Hll.Clear();
+ CHECK_EQ(Hll.Count(), 0);
+}
+
+TEST_CASE("hyperloglog.merge")
+{
+ constexpr uint64_t N = 50000;
+
+ metrics::HyperLogLog<14> A;
+ metrics::HyperLogLog<14> B;
+
+ // A gets even numbers, B gets odd numbers
+ for (uint64_t i = 0; i < N; ++i)
+ {
+ std::string Value = fmt::format("item_{}", i * 2);
+ A.Add(Value);
+ }
+ for (uint64_t i = 0; i < N; ++i)
+ {
+ std::string Value = fmt::format("item_{}", i * 2 + 1);
+ B.Add(Value);
+ }
+
+ A.Merge(B);
+
+ uint64_t Estimate = A.Count();
+ double Error = std::abs(static_cast<double>(Estimate) - static_cast<double>(N * 2)) / static_cast<double>(N * 2);
+
+ CHECK(Error < 0.05);
+}
+
+TEST_SUITE_END();
+
+} // namespace zen::tests
+
+#endif // ZEN_WITH_TESTS