aboutsummaryrefslogtreecommitdiff
path: root/src/zenutil
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2026-03-23 19:22:08 +0100
committerGitHub Enterprise <[email protected]>2026-03-23 19:22:08 +0100
commit440ef03df8d8bba4432126f36168c1f7631c18dc (patch)
tree07d4bd4446a11589c9a842255bf37c25aaded74b /src/zenutil
parentMerge branch 'de/v5.7.25-hotpatch' (#880) (diff)
downloadzen-440ef03df8d8bba4432126f36168c1f7631c18dc.tar.xz
zen-440ef03df8d8bba4432126f36168c1f7631c18dc.zip
Cross-platform process metrics support (#887)
- **Cross-platform `GetProcessMetrics`**: Implement Linux (`/proc/{pid}/stat`, `/proc/{pid}/statm`, `/proc/{pid}/status`) and macOS (`proc_pidinfo(PROC_PIDTASKINFO)`) support for CPU times and memory metrics. Fix Windows to populate the `MemoryBytes` field (was always 0). All platforms now set `MemoryBytes = WorkingSetSize`. - **`ProcessMetricsTracker`**: Experimental utility class (`zenutil`) that periodically samples resource usage for a set of tracked child processes. Supports both a dedicated background thread and an ASIO steady_timer mode. Computes delta-based CPU usage percentage across samples, with batched sampling (8 processes per tick) to limit per-cycle overhead. - **`ProcessHandle` documentation**: Add Doxygen comments to all public methods describing platform-specific behavior. - **Cleanup**: Remove unused `ZEN_RUN_TESTS` macro (inlined at its single call site in `zenserver/main.cpp`), remove dead `#if 0` thread-shutdown workaround block. - **Minor fixes**: Use `HttpClientAccessToken` constructor in hordeclient instead of setting private members directly. Log ASIO version at startup and include it in the server settings list.
Diffstat (limited to 'src/zenutil')
-rw-r--r--src/zenutil/include/zenutil/processmetricstracker.h105
-rw-r--r--src/zenutil/processmetricstracker.cpp392
-rw-r--r--src/zenutil/zenutil.cpp2
3 files changed, 499 insertions, 0 deletions
diff --git a/src/zenutil/include/zenutil/processmetricstracker.h b/src/zenutil/include/zenutil/processmetricstracker.h
new file mode 100644
index 000000000..fdeae2bfa
--- /dev/null
+++ b/src/zenutil/include/zenutil/processmetricstracker.h
@@ -0,0 +1,105 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/process.h>
+#include <zencore/zencore.h>
+
+#include <memory>
+#include <vector>
+
+namespace asio {
+class io_context;
+}
+
+namespace zen {
+
+/** Tracked process entry with latest metrics snapshot.
+ */
+struct TrackedProcessEntry
+{
+ int Pid = 0;
+ ProcessMetrics Metrics;
+
+ // Derived CPU usage percentage (delta-based, requires two samples).
+ // -1.0 means not yet sampled.
+ float CpuUsagePercent = -1.0f;
+};
+
+/** Aggregate metrics across all tracked processes.
+ */
+struct AggregateProcessMetrics
+{
+ uint64_t TotalWorkingSetSize = 0;
+ uint64_t TotalPeakWorkingSetSize = 0;
+ uint64_t TotalUserTimeMs = 0;
+ uint64_t TotalKernelTimeMs = 0;
+ uint32_t ProcessCount = 0;
+};
+
+/** Background process metrics tracker.
+ *
+ * Maintains a set of child processes keyed by pid and periodically samples
+ * their resource usage (CPU times, memory) in a background thread or via
+ * an ASIO timer on an external io_context.
+ *
+ * The tracker does not take ownership of process handles. On Windows it
+ * duplicates the handle internally; on other platforms it uses the pid
+ * directly.
+ *
+ * Usage (dedicated thread):
+ * ProcessMetricsTracker Tracker;
+ * Tracker.Start();
+ * Tracker.Add(ChildHandle);
+ *
+ * Usage (ASIO timer):
+ * ProcessMetricsTracker Tracker(IoContext);
+ * Tracker.Start();
+ * Tracker.Add(ChildHandle);
+ */
+class ProcessMetricsTracker
+{
+public:
+ /// Construct with a dedicated background thread for sampling.
+ explicit ProcessMetricsTracker(uint64_t SampleIntervalMs = 5000);
+
+ /// Construct with an external io_context — uses an asio::steady_timer
+ /// instead of a dedicated thread. The caller must ensure the io_context
+ /// outlives this tracker and that its run loop is active.
+ ProcessMetricsTracker(asio::io_context& IoContext, uint64_t SampleIntervalMs = 5000);
+
+ ~ProcessMetricsTracker();
+
+ ProcessMetricsTracker(const ProcessMetricsTracker&) = delete;
+ ProcessMetricsTracker& operator=(const ProcessMetricsTracker&) = delete;
+
+ /// Start sampling. Spawns the background thread or enqueues the first timer.
+ void Start();
+
+ /// Stop sampling. Safe to call multiple times.
+ void Stop();
+
+ /// Add a process to track. Internally clones the handle (Windows) or
+ /// copies the pid (Linux/macOS). If the pid is already tracked, replaces it.
+ void Add(const ProcessHandle& Handle);
+
+ /// Remove a tracked process by pid.
+ void Remove(int Pid);
+
+ /// Remove all tracked processes.
+ void Clear();
+
+ /// Returns a snapshot of metrics for all tracked processes.
+ std::vector<TrackedProcessEntry> GetSnapshot() const;
+
+ /// Returns aggregate metrics across all tracked processes.
+ AggregateProcessMetrics GetAggregate() const;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> m_Impl;
+};
+
+void processmetricstracker_forcelink(); // internal
+
+} // namespace zen
diff --git a/src/zenutil/processmetricstracker.cpp b/src/zenutil/processmetricstracker.cpp
new file mode 100644
index 000000000..555d0ae1a
--- /dev/null
+++ b/src/zenutil/processmetricstracker.cpp
@@ -0,0 +1,392 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zenutil/processmetricstracker.h>
+
+#include <zencore/thread.h>
+#include <zencore/timer.h>
+
+#include <algorithm>
+#include <thread>
+#include <unordered_map>
+#include <vector>
+
+ZEN_THIRD_PARTY_INCLUDES_START
+#include <asio/io_context.hpp>
+#include <asio/steady_timer.hpp>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+namespace zen {
+
+struct ProcessMetricsTracker::Impl
+{
+ static constexpr size_t kBatchSize = 8;
+
+ struct Entry
+ {
+ ProcessHandle Handle;
+ ProcessMetrics LastMetrics;
+ float CpuUsagePercent = -1.0f;
+
+ uint64_t PrevUserTimeMs = 0;
+ uint64_t PrevKernelTimeMs = 0;
+ uint64_t PrevSampleTicks = 0;
+ };
+
+ uint64_t m_SampleIntervalMs;
+
+ mutable RwLock m_Lock;
+ std::unordered_map<int, Entry> m_Entries;
+ size_t m_NextSampleIndex = 0;
+ std::vector<int> m_KeyOrder;
+
+ std::atomic<bool> m_Running{false};
+
+ // Thread-based sampling
+ std::thread m_Thread;
+ Event m_StopEvent;
+
+ // Timer-based sampling
+ std::unique_ptr<asio::steady_timer> m_Timer;
+
+ explicit Impl(uint64_t SampleIntervalMs) : m_SampleIntervalMs(SampleIntervalMs) {}
+
+ Impl(asio::io_context& IoContext, uint64_t SampleIntervalMs)
+ : m_SampleIntervalMs(SampleIntervalMs)
+ , m_Timer(std::make_unique<asio::steady_timer>(IoContext))
+ {
+ }
+
+ ~Impl() { Stop(); }
+
+ void Start()
+ {
+ if (m_Running.exchange(true))
+ {
+ return;
+ }
+
+ if (m_Timer)
+ {
+ EnqueueTimer();
+ }
+ else
+ {
+ m_Thread = std::thread([this] { SamplingLoop(); });
+ }
+ }
+
+ void Stop()
+ {
+ if (!m_Running.exchange(false))
+ {
+ return;
+ }
+
+ if (m_Timer)
+ {
+ m_Timer->cancel();
+ }
+
+ if (m_Thread.joinable())
+ {
+ m_StopEvent.Set();
+ m_Thread.join();
+ }
+ }
+
+ void Add(const ProcessHandle& Handle)
+ {
+ int Pid = Handle.Pid();
+
+ RwLock::ExclusiveLockScope $(m_Lock);
+
+ auto It = m_Entries.find(Pid);
+ if (It != m_Entries.end())
+ {
+ m_Entries.erase(It);
+ }
+ else
+ {
+ m_KeyOrder.push_back(Pid);
+ }
+
+ auto [NewIt, Inserted] = m_Entries.try_emplace(Pid);
+ NewIt->second.Handle.Initialize(Pid);
+ }
+
+ void Remove(int Pid)
+ {
+ RwLock::ExclusiveLockScope $(m_Lock);
+ m_Entries.erase(Pid);
+ m_KeyOrder.erase(std::remove(m_KeyOrder.begin(), m_KeyOrder.end(), Pid), m_KeyOrder.end());
+
+ if (m_NextSampleIndex >= m_KeyOrder.size())
+ {
+ m_NextSampleIndex = 0;
+ }
+ }
+
+ void Clear()
+ {
+ RwLock::ExclusiveLockScope $(m_Lock);
+ m_Entries.clear();
+ m_KeyOrder.clear();
+ m_NextSampleIndex = 0;
+ }
+
+ std::vector<TrackedProcessEntry> GetSnapshot() const
+ {
+ std::vector<TrackedProcessEntry> Result;
+
+ RwLock::SharedLockScope $(m_Lock);
+ Result.reserve(m_Entries.size());
+
+ for (const auto& [Pid, E] : m_Entries)
+ {
+ TrackedProcessEntry Out;
+ Out.Pid = Pid;
+ Out.Metrics = E.LastMetrics;
+ Out.CpuUsagePercent = E.CpuUsagePercent;
+ Result.push_back(std::move(Out));
+ }
+
+ return Result;
+ }
+
+ AggregateProcessMetrics GetAggregate() const
+ {
+ AggregateProcessMetrics Agg;
+
+ RwLock::SharedLockScope $(m_Lock);
+
+ for (const auto& [Pid, E] : m_Entries)
+ {
+ Agg.TotalWorkingSetSize += E.LastMetrics.WorkingSetSize;
+ Agg.TotalPeakWorkingSetSize += E.LastMetrics.PeakWorkingSetSize;
+ Agg.TotalUserTimeMs += E.LastMetrics.UserTimeMs;
+ Agg.TotalKernelTimeMs += E.LastMetrics.KernelTimeMs;
+ Agg.ProcessCount++;
+ }
+
+ return Agg;
+ }
+
+ void SampleBatch()
+ {
+ RwLock::SharedLockScope $(m_Lock);
+
+ if (m_KeyOrder.empty())
+ {
+ return;
+ }
+
+ const uint64_t NowTicks = GetHifreqTimerValue();
+ size_t Remaining = std::min(kBatchSize, m_KeyOrder.size());
+
+ while (Remaining > 0)
+ {
+ if (m_NextSampleIndex >= m_KeyOrder.size())
+ {
+ m_NextSampleIndex = 0;
+ }
+
+ int Pid = m_KeyOrder[m_NextSampleIndex];
+ auto It = m_Entries.find(Pid);
+
+ if (It == m_Entries.end())
+ {
+ m_NextSampleIndex++;
+ Remaining--;
+ continue;
+ }
+
+ Entry& E = It->second;
+
+ ProcessMetrics Metrics;
+ GetProcessMetrics(E.Handle, Metrics);
+
+ if (E.PrevSampleTicks > 0)
+ {
+ uint64_t ElapsedMs = Stopwatch::GetElapsedTimeMs(NowTicks - E.PrevSampleTicks);
+ uint64_t DeltaCpuTimeMs = (Metrics.UserTimeMs + Metrics.KernelTimeMs) - (E.PrevUserTimeMs + E.PrevKernelTimeMs);
+ if (ElapsedMs > 0)
+ {
+ E.CpuUsagePercent = static_cast<float>(static_cast<double>(DeltaCpuTimeMs) / ElapsedMs * 100.0);
+ }
+ }
+
+ E.PrevUserTimeMs = Metrics.UserTimeMs;
+ E.PrevKernelTimeMs = Metrics.KernelTimeMs;
+ E.PrevSampleTicks = NowTicks;
+ E.LastMetrics = Metrics;
+
+ m_NextSampleIndex++;
+ Remaining--;
+ }
+ }
+
+ void SamplingLoop()
+ {
+ while (!m_StopEvent.Wait(static_cast<int>(m_SampleIntervalMs)))
+ {
+ if (!m_Running.load())
+ {
+ return;
+ }
+
+ SampleBatch();
+ }
+ }
+
+ void EnqueueTimer()
+ {
+ if (!m_Timer || !m_Running.load())
+ {
+ return;
+ }
+
+ m_Timer->expires_after(std::chrono::milliseconds(m_SampleIntervalMs));
+ m_Timer->async_wait([this](const asio::error_code& Ec) {
+ if (Ec || !m_Running.load())
+ {
+ return;
+ }
+
+ SampleBatch();
+ EnqueueTimer();
+ });
+ }
+};
+
+//////////////////////////////////////////////////////////////////////////
+
+ProcessMetricsTracker::ProcessMetricsTracker(uint64_t SampleIntervalMs) : m_Impl(std::make_unique<Impl>(SampleIntervalMs))
+{
+}
+
+ProcessMetricsTracker::ProcessMetricsTracker(asio::io_context& IoContext, uint64_t SampleIntervalMs)
+: m_Impl(std::make_unique<Impl>(IoContext, SampleIntervalMs))
+{
+}
+
+ProcessMetricsTracker::~ProcessMetricsTracker() = default;
+
+void
+ProcessMetricsTracker::Start()
+{
+ m_Impl->Start();
+}
+
+void
+ProcessMetricsTracker::Stop()
+{
+ m_Impl->Stop();
+}
+
+void
+ProcessMetricsTracker::Add(const ProcessHandle& Handle)
+{
+ m_Impl->Add(Handle);
+}
+
+void
+ProcessMetricsTracker::Remove(int Pid)
+{
+ m_Impl->Remove(Pid);
+}
+
+void
+ProcessMetricsTracker::Clear()
+{
+ m_Impl->Clear();
+}
+
+std::vector<TrackedProcessEntry>
+ProcessMetricsTracker::GetSnapshot() const
+{
+ return m_Impl->GetSnapshot();
+}
+
+AggregateProcessMetrics
+ProcessMetricsTracker::GetAggregate() const
+{
+ return m_Impl->GetAggregate();
+}
+
+} // namespace zen
+
+#if ZEN_WITH_TESTS
+
+# include <zencore/testing.h>
+
+using namespace zen;
+
+void
+zen::processmetricstracker_forcelink()
+{
+}
+
+TEST_SUITE_BEGIN("util.processmetricstracker");
+
+TEST_CASE("ProcessMetricsTracker.SelfProcess")
+{
+ ProcessMetricsTracker Tracker(100);
+ Tracker.Start();
+
+ ProcessHandle Handle;
+ Handle.Initialize(zen::GetCurrentProcessId());
+ REQUIRE(Handle.IsValid());
+
+ int Pid = Handle.Pid();
+ Tracker.Add(Handle);
+
+ // Wait for at least two samples so CPU% is computed
+ std::this_thread::sleep_for(std::chrono::milliseconds(350));
+
+ auto Snapshot = Tracker.GetSnapshot();
+ REQUIRE(Snapshot.size() == 1);
+ CHECK(Snapshot[0].Pid == Pid);
+ CHECK(Snapshot[0].Metrics.WorkingSetSize > 0);
+ CHECK(Snapshot[0].Metrics.MemoryBytes > 0);
+ CHECK((Snapshot[0].Metrics.UserTimeMs + Snapshot[0].Metrics.KernelTimeMs) > 0);
+ CHECK(Snapshot[0].CpuUsagePercent >= 0.0f);
+
+ auto Agg = Tracker.GetAggregate();
+ CHECK(Agg.ProcessCount == 1);
+ CHECK(Agg.TotalWorkingSetSize > 0);
+
+ Tracker.Remove(Pid);
+
+ Snapshot = Tracker.GetSnapshot();
+ CHECK(Snapshot.empty());
+
+ Tracker.Stop();
+}
+
+TEST_CASE("ProcessMetricsTracker.AsioTimer")
+{
+ asio::io_context IoContext;
+
+ ProcessMetricsTracker Tracker(IoContext, 100);
+ Tracker.Start();
+
+ ProcessHandle Handle;
+ Handle.Initialize(zen::GetCurrentProcessId());
+ REQUIRE(Handle.IsValid());
+
+ Tracker.Add(Handle);
+
+ // Run the io_context for enough time to get two samples
+ IoContext.run_for(std::chrono::milliseconds(350));
+
+ auto Snapshot = Tracker.GetSnapshot();
+ REQUIRE(Snapshot.size() == 1);
+ CHECK(Snapshot[0].Metrics.WorkingSetSize > 0);
+ CHECK(Snapshot[0].CpuUsagePercent >= 0.0f);
+
+ Tracker.Stop();
+}
+
+TEST_SUITE_END();
+
+#endif
diff --git a/src/zenutil/zenutil.cpp b/src/zenutil/zenutil.cpp
index c4d01554d..032f21c9b 100644
--- a/src/zenutil/zenutil.cpp
+++ b/src/zenutil/zenutil.cpp
@@ -10,6 +10,7 @@
# include <zenutil/config/commandlineoptions.h>
# include <zenutil/rpcrecording.h>
# include <zenutil/splitconsole/logstreamlistener.h>
+# include <zenutil/processmetricstracker.h>
# include <zenutil/wildcard.h>
namespace zen {
@@ -21,6 +22,7 @@ zenutil_forcelinktests()
commandlineoptions_forcelink();
imdscredentials_forcelink();
logstreamlistener_forcelink();
+ processmetricstracker_forcelink();
s3client_forcelink();
sigv4_forcelink();
wildcard_forcelink();