diff options
| author | Stefan Boberg <[email protected]> | 2026-03-23 19:22:08 +0100 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2026-03-23 19:22:08 +0100 |
| commit | 440ef03df8d8bba4432126f36168c1f7631c18dc (patch) | |
| tree | 07d4bd4446a11589c9a842255bf37c25aaded74b /src/zenutil | |
| parent | Merge branch 'de/v5.7.25-hotpatch' (#880) (diff) | |
| download | zen-440ef03df8d8bba4432126f36168c1f7631c18dc.tar.xz zen-440ef03df8d8bba4432126f36168c1f7631c18dc.zip | |
Cross-platform process metrics support (#887)
- **Cross-platform `GetProcessMetrics`**: Implement Linux (`/proc/{pid}/stat`, `/proc/{pid}/statm`, `/proc/{pid}/status`) and macOS (`proc_pidinfo(PROC_PIDTASKINFO)`) support for CPU times and memory metrics. Fix Windows to populate the `MemoryBytes` field (was always 0). All platforms now set `MemoryBytes = WorkingSetSize`.
- **`ProcessMetricsTracker`**: Experimental utility class (`zenutil`) that periodically samples resource usage for a set of tracked child processes. Supports both a dedicated background thread and an ASIO steady_timer mode. Computes delta-based CPU usage percentage across samples, with batched sampling (8 processes per tick) to limit per-cycle overhead.
- **`ProcessHandle` documentation**: Add Doxygen comments to all public methods describing platform-specific behavior.
- **Cleanup**: Remove unused `ZEN_RUN_TESTS` macro (inlined at its single call site in `zenserver/main.cpp`), remove dead `#if 0` thread-shutdown workaround block.
- **Minor fixes**: Use `HttpClientAccessToken` constructor in hordeclient instead of setting private members directly. Log ASIO version at startup and include it in the server settings list.
Diffstat (limited to 'src/zenutil')
| -rw-r--r-- | src/zenutil/include/zenutil/processmetricstracker.h | 105 | ||||
| -rw-r--r-- | src/zenutil/processmetricstracker.cpp | 392 | ||||
| -rw-r--r-- | src/zenutil/zenutil.cpp | 2 |
3 files changed, 499 insertions, 0 deletions
diff --git a/src/zenutil/include/zenutil/processmetricstracker.h b/src/zenutil/include/zenutil/processmetricstracker.h new file mode 100644 index 000000000..fdeae2bfa --- /dev/null +++ b/src/zenutil/include/zenutil/processmetricstracker.h @@ -0,0 +1,105 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/process.h> +#include <zencore/zencore.h> + +#include <memory> +#include <vector> + +namespace asio { +class io_context; +} + +namespace zen { + +/** Tracked process entry with latest metrics snapshot. + */ +struct TrackedProcessEntry +{ + int Pid = 0; + ProcessMetrics Metrics; + + // Derived CPU usage percentage (delta-based, requires two samples). + // -1.0 means not yet sampled. + float CpuUsagePercent = -1.0f; +}; + +/** Aggregate metrics across all tracked processes. + */ +struct AggregateProcessMetrics +{ + uint64_t TotalWorkingSetSize = 0; + uint64_t TotalPeakWorkingSetSize = 0; + uint64_t TotalUserTimeMs = 0; + uint64_t TotalKernelTimeMs = 0; + uint32_t ProcessCount = 0; +}; + +/** Background process metrics tracker. + * + * Maintains a set of child processes keyed by pid and periodically samples + * their resource usage (CPU times, memory) in a background thread or via + * an ASIO timer on an external io_context. + * + * The tracker does not take ownership of process handles. On Windows it + * duplicates the handle internally; on other platforms it uses the pid + * directly. + * + * Usage (dedicated thread): + * ProcessMetricsTracker Tracker; + * Tracker.Start(); + * Tracker.Add(ChildHandle); + * + * Usage (ASIO timer): + * ProcessMetricsTracker Tracker(IoContext); + * Tracker.Start(); + * Tracker.Add(ChildHandle); + */ +class ProcessMetricsTracker +{ +public: + /// Construct with a dedicated background thread for sampling. + explicit ProcessMetricsTracker(uint64_t SampleIntervalMs = 5000); + + /// Construct with an external io_context — uses an asio::steady_timer + /// instead of a dedicated thread. The caller must ensure the io_context + /// outlives this tracker and that its run loop is active. + ProcessMetricsTracker(asio::io_context& IoContext, uint64_t SampleIntervalMs = 5000); + + ~ProcessMetricsTracker(); + + ProcessMetricsTracker(const ProcessMetricsTracker&) = delete; + ProcessMetricsTracker& operator=(const ProcessMetricsTracker&) = delete; + + /// Start sampling. Spawns the background thread or enqueues the first timer. + void Start(); + + /// Stop sampling. Safe to call multiple times. + void Stop(); + + /// Add a process to track. Internally clones the handle (Windows) or + /// copies the pid (Linux/macOS). If the pid is already tracked, replaces it. + void Add(const ProcessHandle& Handle); + + /// Remove a tracked process by pid. + void Remove(int Pid); + + /// Remove all tracked processes. + void Clear(); + + /// Returns a snapshot of metrics for all tracked processes. + std::vector<TrackedProcessEntry> GetSnapshot() const; + + /// Returns aggregate metrics across all tracked processes. + AggregateProcessMetrics GetAggregate() const; + +private: + struct Impl; + std::unique_ptr<Impl> m_Impl; +}; + +void processmetricstracker_forcelink(); // internal + +} // namespace zen diff --git a/src/zenutil/processmetricstracker.cpp b/src/zenutil/processmetricstracker.cpp new file mode 100644 index 000000000..555d0ae1a --- /dev/null +++ b/src/zenutil/processmetricstracker.cpp @@ -0,0 +1,392 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenutil/processmetricstracker.h> + +#include <zencore/thread.h> +#include <zencore/timer.h> + +#include <algorithm> +#include <thread> +#include <unordered_map> +#include <vector> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <asio/io_context.hpp> +#include <asio/steady_timer.hpp> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +struct ProcessMetricsTracker::Impl +{ + static constexpr size_t kBatchSize = 8; + + struct Entry + { + ProcessHandle Handle; + ProcessMetrics LastMetrics; + float CpuUsagePercent = -1.0f; + + uint64_t PrevUserTimeMs = 0; + uint64_t PrevKernelTimeMs = 0; + uint64_t PrevSampleTicks = 0; + }; + + uint64_t m_SampleIntervalMs; + + mutable RwLock m_Lock; + std::unordered_map<int, Entry> m_Entries; + size_t m_NextSampleIndex = 0; + std::vector<int> m_KeyOrder; + + std::atomic<bool> m_Running{false}; + + // Thread-based sampling + std::thread m_Thread; + Event m_StopEvent; + + // Timer-based sampling + std::unique_ptr<asio::steady_timer> m_Timer; + + explicit Impl(uint64_t SampleIntervalMs) : m_SampleIntervalMs(SampleIntervalMs) {} + + Impl(asio::io_context& IoContext, uint64_t SampleIntervalMs) + : m_SampleIntervalMs(SampleIntervalMs) + , m_Timer(std::make_unique<asio::steady_timer>(IoContext)) + { + } + + ~Impl() { Stop(); } + + void Start() + { + if (m_Running.exchange(true)) + { + return; + } + + if (m_Timer) + { + EnqueueTimer(); + } + else + { + m_Thread = std::thread([this] { SamplingLoop(); }); + } + } + + void Stop() + { + if (!m_Running.exchange(false)) + { + return; + } + + if (m_Timer) + { + m_Timer->cancel(); + } + + if (m_Thread.joinable()) + { + m_StopEvent.Set(); + m_Thread.join(); + } + } + + void Add(const ProcessHandle& Handle) + { + int Pid = Handle.Pid(); + + RwLock::ExclusiveLockScope $(m_Lock); + + auto It = m_Entries.find(Pid); + if (It != m_Entries.end()) + { + m_Entries.erase(It); + } + else + { + m_KeyOrder.push_back(Pid); + } + + auto [NewIt, Inserted] = m_Entries.try_emplace(Pid); + NewIt->second.Handle.Initialize(Pid); + } + + void Remove(int Pid) + { + RwLock::ExclusiveLockScope $(m_Lock); + m_Entries.erase(Pid); + m_KeyOrder.erase(std::remove(m_KeyOrder.begin(), m_KeyOrder.end(), Pid), m_KeyOrder.end()); + + if (m_NextSampleIndex >= m_KeyOrder.size()) + { + m_NextSampleIndex = 0; + } + } + + void Clear() + { + RwLock::ExclusiveLockScope $(m_Lock); + m_Entries.clear(); + m_KeyOrder.clear(); + m_NextSampleIndex = 0; + } + + std::vector<TrackedProcessEntry> GetSnapshot() const + { + std::vector<TrackedProcessEntry> Result; + + RwLock::SharedLockScope $(m_Lock); + Result.reserve(m_Entries.size()); + + for (const auto& [Pid, E] : m_Entries) + { + TrackedProcessEntry Out; + Out.Pid = Pid; + Out.Metrics = E.LastMetrics; + Out.CpuUsagePercent = E.CpuUsagePercent; + Result.push_back(std::move(Out)); + } + + return Result; + } + + AggregateProcessMetrics GetAggregate() const + { + AggregateProcessMetrics Agg; + + RwLock::SharedLockScope $(m_Lock); + + for (const auto& [Pid, E] : m_Entries) + { + Agg.TotalWorkingSetSize += E.LastMetrics.WorkingSetSize; + Agg.TotalPeakWorkingSetSize += E.LastMetrics.PeakWorkingSetSize; + Agg.TotalUserTimeMs += E.LastMetrics.UserTimeMs; + Agg.TotalKernelTimeMs += E.LastMetrics.KernelTimeMs; + Agg.ProcessCount++; + } + + return Agg; + } + + void SampleBatch() + { + RwLock::SharedLockScope $(m_Lock); + + if (m_KeyOrder.empty()) + { + return; + } + + const uint64_t NowTicks = GetHifreqTimerValue(); + size_t Remaining = std::min(kBatchSize, m_KeyOrder.size()); + + while (Remaining > 0) + { + if (m_NextSampleIndex >= m_KeyOrder.size()) + { + m_NextSampleIndex = 0; + } + + int Pid = m_KeyOrder[m_NextSampleIndex]; + auto It = m_Entries.find(Pid); + + if (It == m_Entries.end()) + { + m_NextSampleIndex++; + Remaining--; + continue; + } + + Entry& E = It->second; + + ProcessMetrics Metrics; + GetProcessMetrics(E.Handle, Metrics); + + if (E.PrevSampleTicks > 0) + { + uint64_t ElapsedMs = Stopwatch::GetElapsedTimeMs(NowTicks - E.PrevSampleTicks); + uint64_t DeltaCpuTimeMs = (Metrics.UserTimeMs + Metrics.KernelTimeMs) - (E.PrevUserTimeMs + E.PrevKernelTimeMs); + if (ElapsedMs > 0) + { + E.CpuUsagePercent = static_cast<float>(static_cast<double>(DeltaCpuTimeMs) / ElapsedMs * 100.0); + } + } + + E.PrevUserTimeMs = Metrics.UserTimeMs; + E.PrevKernelTimeMs = Metrics.KernelTimeMs; + E.PrevSampleTicks = NowTicks; + E.LastMetrics = Metrics; + + m_NextSampleIndex++; + Remaining--; + } + } + + void SamplingLoop() + { + while (!m_StopEvent.Wait(static_cast<int>(m_SampleIntervalMs))) + { + if (!m_Running.load()) + { + return; + } + + SampleBatch(); + } + } + + void EnqueueTimer() + { + if (!m_Timer || !m_Running.load()) + { + return; + } + + m_Timer->expires_after(std::chrono::milliseconds(m_SampleIntervalMs)); + m_Timer->async_wait([this](const asio::error_code& Ec) { + if (Ec || !m_Running.load()) + { + return; + } + + SampleBatch(); + EnqueueTimer(); + }); + } +}; + +////////////////////////////////////////////////////////////////////////// + +ProcessMetricsTracker::ProcessMetricsTracker(uint64_t SampleIntervalMs) : m_Impl(std::make_unique<Impl>(SampleIntervalMs)) +{ +} + +ProcessMetricsTracker::ProcessMetricsTracker(asio::io_context& IoContext, uint64_t SampleIntervalMs) +: m_Impl(std::make_unique<Impl>(IoContext, SampleIntervalMs)) +{ +} + +ProcessMetricsTracker::~ProcessMetricsTracker() = default; + +void +ProcessMetricsTracker::Start() +{ + m_Impl->Start(); +} + +void +ProcessMetricsTracker::Stop() +{ + m_Impl->Stop(); +} + +void +ProcessMetricsTracker::Add(const ProcessHandle& Handle) +{ + m_Impl->Add(Handle); +} + +void +ProcessMetricsTracker::Remove(int Pid) +{ + m_Impl->Remove(Pid); +} + +void +ProcessMetricsTracker::Clear() +{ + m_Impl->Clear(); +} + +std::vector<TrackedProcessEntry> +ProcessMetricsTracker::GetSnapshot() const +{ + return m_Impl->GetSnapshot(); +} + +AggregateProcessMetrics +ProcessMetricsTracker::GetAggregate() const +{ + return m_Impl->GetAggregate(); +} + +} // namespace zen + +#if ZEN_WITH_TESTS + +# include <zencore/testing.h> + +using namespace zen; + +void +zen::processmetricstracker_forcelink() +{ +} + +TEST_SUITE_BEGIN("util.processmetricstracker"); + +TEST_CASE("ProcessMetricsTracker.SelfProcess") +{ + ProcessMetricsTracker Tracker(100); + Tracker.Start(); + + ProcessHandle Handle; + Handle.Initialize(zen::GetCurrentProcessId()); + REQUIRE(Handle.IsValid()); + + int Pid = Handle.Pid(); + Tracker.Add(Handle); + + // Wait for at least two samples so CPU% is computed + std::this_thread::sleep_for(std::chrono::milliseconds(350)); + + auto Snapshot = Tracker.GetSnapshot(); + REQUIRE(Snapshot.size() == 1); + CHECK(Snapshot[0].Pid == Pid); + CHECK(Snapshot[0].Metrics.WorkingSetSize > 0); + CHECK(Snapshot[0].Metrics.MemoryBytes > 0); + CHECK((Snapshot[0].Metrics.UserTimeMs + Snapshot[0].Metrics.KernelTimeMs) > 0); + CHECK(Snapshot[0].CpuUsagePercent >= 0.0f); + + auto Agg = Tracker.GetAggregate(); + CHECK(Agg.ProcessCount == 1); + CHECK(Agg.TotalWorkingSetSize > 0); + + Tracker.Remove(Pid); + + Snapshot = Tracker.GetSnapshot(); + CHECK(Snapshot.empty()); + + Tracker.Stop(); +} + +TEST_CASE("ProcessMetricsTracker.AsioTimer") +{ + asio::io_context IoContext; + + ProcessMetricsTracker Tracker(IoContext, 100); + Tracker.Start(); + + ProcessHandle Handle; + Handle.Initialize(zen::GetCurrentProcessId()); + REQUIRE(Handle.IsValid()); + + Tracker.Add(Handle); + + // Run the io_context for enough time to get two samples + IoContext.run_for(std::chrono::milliseconds(350)); + + auto Snapshot = Tracker.GetSnapshot(); + REQUIRE(Snapshot.size() == 1); + CHECK(Snapshot[0].Metrics.WorkingSetSize > 0); + CHECK(Snapshot[0].CpuUsagePercent >= 0.0f); + + Tracker.Stop(); +} + +TEST_SUITE_END(); + +#endif diff --git a/src/zenutil/zenutil.cpp b/src/zenutil/zenutil.cpp index c4d01554d..032f21c9b 100644 --- a/src/zenutil/zenutil.cpp +++ b/src/zenutil/zenutil.cpp @@ -10,6 +10,7 @@ # include <zenutil/config/commandlineoptions.h> # include <zenutil/rpcrecording.h> # include <zenutil/splitconsole/logstreamlistener.h> +# include <zenutil/processmetricstracker.h> # include <zenutil/wildcard.h> namespace zen { @@ -21,6 +22,7 @@ zenutil_forcelinktests() commandlineoptions_forcelink(); imdscredentials_forcelink(); logstreamlistener_forcelink(); + processmetricstracker_forcelink(); s3client_forcelink(); sigv4_forcelink(); wildcard_forcelink(); |