diff options
| author | Stefan Boberg <[email protected]> | 2024-01-10 12:33:31 +0100 |
|---|---|---|
| committer | GitHub <[email protected]> | 2024-01-10 12:33:31 +0100 |
| commit | 304ff53705d6a0f5140ba9e82ce66d6d7f9cb554 (patch) | |
| tree | 306cfc79a45a0aac74124db358a3c2584bbfed89 | |
| parent | Merge branch 'main' of https://github.com/EpicGames/zen (diff) | |
| download | zen-304ff53705d6a0f5140ba9e82ce66d6d7f9cb554.tar.xz zen-304ff53705d6a0f5140ba9e82ce66d6d7f9cb554.zip | |
remove use of <random> in stats (#628)
the previous implementation was quite slow due to use of mt and uniform_distribution.
| -rw-r--r-- | CHANGELOG.md | 1 | ||||
| -rw-r--r-- | src/zencore/include/zencore/stats.h | 3 | ||||
| -rw-r--r-- | src/zencore/stats.cpp | 48 |
3 files changed, 46 insertions, 6 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 449b5ffdf..c388f263a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ ## +- Improvement: Removed use of <random> in stats, for better performance (runtime as well as build) - Improvement: Separated cache RPC handling code from general structured cache HTTP code - Bugfix: RPC recording would not release memory as early as intended which resulted in memory buildup during long recording sessions. Previously certain memory was only released when recording stopped, now it gets released immediately when a segment is complete and written to disk. diff --git a/src/zencore/include/zencore/stats.h b/src/zencore/include/zencore/stats.h index 7e745dc16..f232cf2f4 100644 --- a/src/zencore/include/zencore/stats.h +++ b/src/zencore/include/zencore/stats.h @@ -7,7 +7,8 @@ #include <zenbase/concepts.h> #include <atomic> -#include <random> +#include <string_view> +#include <vector> namespace zen { class CbObjectWriter; diff --git a/src/zencore/stats.cpp b/src/zencore/stats.cpp index 087319fa4..7c1a9e086 100644 --- a/src/zencore/stats.cpp +++ b/src/zencore/stats.cpp @@ -178,10 +178,49 @@ Meter::Mark(uint64_t Count) ////////////////////////////////////////////////////////////////////////// -// TODO: should consider a cheaper RNG here, this will run for every thread -// that gets created +uint64_t +rol64(uint64_t x, int k) +{ + return (x << k) | (x >> (64 - k)); +} + +struct xoshiro256ss_state +{ + uint64_t s[4]; +}; + +uint64_t +xoshiro256ss(struct xoshiro256ss_state* state) +{ + uint64_t* s = state->s; + uint64_t const result = rol64(s[1] * 5, 7) * 9; + uint64_t const t = s[1] << 17; + + s[2] ^= s[0]; + s[3] ^= s[1]; + s[1] ^= s[2]; + s[0] ^= s[3]; -thread_local std::mt19937_64 ThreadLocalRng; + s[2] ^= t; + s[3] = rol64(s[3], 45); + + return result; +} + +class xoshiro256 +{ +public: + uint64_t operator()() { return xoshiro256ss(&m_State); } + static constexpr uint64_t min() { return 0; } + static constexpr uint64_t max() { return ~(0ull); } + +private: + xoshiro256ss_state m_State{0xf0fefaf9, 0xbeeb5238, 0x48472397, 0x58858558}; +}; + +thread_local xoshiro256 ThreadLocalRng; + +////////////////////////////////////////////////////////////////////////// UniformSample::UniformSample(uint32_t ReservoirSize) : m_Values(ReservoirSize) { @@ -222,8 +261,7 @@ UniformSample::Update(int64_t Value) // Randomly choose an old entry to potentially replace (the probability // of replacing an entry diminishes with time) - std::uniform_int_distribution<uint64_t> UniformDist(0, Count); - uint64_t SampleIndex = UniformDist(ThreadLocalRng); + const uint64_t SampleIndex = ThreadLocalRng() % Count; if (SampleIndex < Size) { |