aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2024-01-10 12:33:31 +0100
committerGitHub <[email protected]>2024-01-10 12:33:31 +0100
commit304ff53705d6a0f5140ba9e82ce66d6d7f9cb554 (patch)
tree306cfc79a45a0aac74124db358a3c2584bbfed89
parentMerge branch 'main' of https://github.com/EpicGames/zen (diff)
downloadzen-304ff53705d6a0f5140ba9e82ce66d6d7f9cb554.tar.xz
zen-304ff53705d6a0f5140ba9e82ce66d6d7f9cb554.zip
remove use of <random> in stats (#628)
the previous implementation was quite slow due to use of mt and uniform_distribution.
-rw-r--r--CHANGELOG.md1
-rw-r--r--src/zencore/include/zencore/stats.h3
-rw-r--r--src/zencore/stats.cpp48
3 files changed, 46 insertions, 6 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 449b5ffdf..c388f263a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,5 @@
##
+- Improvement: Removed use of <random> in stats, for better performance (runtime as well as build)
- Improvement: Separated cache RPC handling code from general structured cache HTTP code
- Bugfix: RPC recording would not release memory as early as intended which resulted in memory buildup during long recording sessions. Previously certain memory was only released when recording stopped, now it gets released immediately when a segment is complete and written to disk.
diff --git a/src/zencore/include/zencore/stats.h b/src/zencore/include/zencore/stats.h
index 7e745dc16..f232cf2f4 100644
--- a/src/zencore/include/zencore/stats.h
+++ b/src/zencore/include/zencore/stats.h
@@ -7,7 +7,8 @@
#include <zenbase/concepts.h>
#include <atomic>
-#include <random>
+#include <string_view>
+#include <vector>
namespace zen {
class CbObjectWriter;
diff --git a/src/zencore/stats.cpp b/src/zencore/stats.cpp
index 087319fa4..7c1a9e086 100644
--- a/src/zencore/stats.cpp
+++ b/src/zencore/stats.cpp
@@ -178,10 +178,49 @@ Meter::Mark(uint64_t Count)
//////////////////////////////////////////////////////////////////////////
-// TODO: should consider a cheaper RNG here, this will run for every thread
-// that gets created
+uint64_t
+rol64(uint64_t x, int k)
+{
+ return (x << k) | (x >> (64 - k));
+}
+
+struct xoshiro256ss_state
+{
+ uint64_t s[4];
+};
+
+uint64_t
+xoshiro256ss(struct xoshiro256ss_state* state)
+{
+ uint64_t* s = state->s;
+ uint64_t const result = rol64(s[1] * 5, 7) * 9;
+ uint64_t const t = s[1] << 17;
+
+ s[2] ^= s[0];
+ s[3] ^= s[1];
+ s[1] ^= s[2];
+ s[0] ^= s[3];
-thread_local std::mt19937_64 ThreadLocalRng;
+ s[2] ^= t;
+ s[3] = rol64(s[3], 45);
+
+ return result;
+}
+
+class xoshiro256
+{
+public:
+ uint64_t operator()() { return xoshiro256ss(&m_State); }
+ static constexpr uint64_t min() { return 0; }
+ static constexpr uint64_t max() { return ~(0ull); }
+
+private:
+ xoshiro256ss_state m_State{0xf0fefaf9, 0xbeeb5238, 0x48472397, 0x58858558};
+};
+
+thread_local xoshiro256 ThreadLocalRng;
+
+//////////////////////////////////////////////////////////////////////////
UniformSample::UniformSample(uint32_t ReservoirSize) : m_Values(ReservoirSize)
{
@@ -222,8 +261,7 @@ UniformSample::Update(int64_t Value)
// Randomly choose an old entry to potentially replace (the probability
// of replacing an entry diminishes with time)
- std::uniform_int_distribution<uint64_t> UniformDist(0, Count);
- uint64_t SampleIndex = UniformDist(ThreadLocalRng);
+ const uint64_t SampleIndex = ThreadLocalRng() % Count;
if (SampleIndex < Size)
{