aboutsummaryrefslogtreecommitdiff
path: root/src/zenutil
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2026-04-20 21:50:41 +0200
committerGitHub Enterprise <[email protected]>2026-04-20 21:50:41 +0200
commit2dfb5da16b97a6c12e01977af5b5188522178a4e (patch)
tree428aa0aa8e6079c64438931e0fd4f828c613c94d /src/zenutil
parentAdd CompactString utility type (#990) (diff)
downloadarchived-zen-2dfb5da16b97a6c12e01977af5b5188522178a4e.tar.xz
archived-zen-2dfb5da16b97a6c12e01977af5b5188522178a4e.zip
zen trace analysis support (#945)
Integrates the **tourist** trace analysis library and builds a full `zen trace` command suite for working with Unreal Engine `.utrace` files. ### Trace analysis library (`thirdparty/tourist/`) - Adds the tourist library as a third-party dependency with three modules: **foundation** (platform primitives, memory, scheduling), **trace** (UE Trace protocol decoding), and **analysis** (event dispatching and analyzer framework). - Cross-platform support for Windows, Linux, and macOS. ### `zen trace` CLI commands (`src/zen/cmds/`, `src/zen/trace/`) - **`zen trace analyze`** — Summarize a `.utrace` file: session metadata, thread inventory, command line + build configuration, CPU profiling scopes, timing, event rates, log messages, and (with symbols) memory allocation metrics including live-allocs dumps, callstack-keyed aggregation, and allocation churn. Optional HTML output for memory reports. - **`zen trace inspect`** — Dump the event schema (declared types, fields, sizes) from a trace file. - **`zen trace trim`** — Extract a time-window from a trace into a new `.utrace` file. - **`zen trace serve`** — Launch a local HTTP server hosting an interactive trace viewer; opens in the default browser. ### Symbolication (`src/zen/trace/symbol_resolver.*`, `thirdparty/raw_pdb/`) - Pluggable resolver with multiple backends: `pdb` (in-tree raw_pdb), `dbghelp` (Windows), `llvm-symbolizer` (all platforms), `atos` (macOS). An `auto` backend picks the best available tool per platform. - Microsoft Symbol Server support: downloads PDBs on demand using a redirect-aware HTTP client. - Local PDB cache keyed by image GUID preserves symbols across binary recompilation. - Callstack trimming heuristic strips UE internal noise from reports. - Binary analysis cache (`.ucache_z`) avoids re-resolving the same trace. ### Interactive trace viewer (`src/zen/frontend/html/`, `src/zen/trace/trace_viewer_service.*`) - Timeline: scope-level detail, horizontal zoom/pan, vertical scrolling, viewport-driven loading with pre-computed LOD for responsive navigation of large traces. - Thread grouping (collapsible sidebar sections) synthesized from name suffixes, natural sort order, visual distinction between lane threads and OS threads. - Bookmark and region annotations; region categories with per-category toggles; bookmark marker toggle in the toolbar. - Filterable Logs tab showing captured `UE_LOG` output. - Stats tab with per-scope aggregate statistics. - Memory tab with interactive allocation analysis and an allocation size histogram. - CsvProfiler event parsing and chart UI. ### Other in-branch supporting changes - **Cross-platform browser launcher** (`browser_launcher.{h,cpp}`) used by `trace serve`. - **`ReciprocalU64`** fast 64-bit integer division (zencore/intmath) for trace analyzers. - **`parallelsort`** cross-platform parallel sort helper (zenutil). - Frontend zip build rule so the viewer's HTML assets are bundled into `zen.exe`. - `/Zo` flag for better optimized debug info on Windows release builds. - `trace-tests.cpp` in the `zen-test` harness (harness itself landed on main via #985).
Diffstat (limited to 'src/zenutil')
-rw-r--r--src/zenutil/include/zenutil/parallelsort.h119
-rw-r--r--src/zenutil/parallelsort.cpp148
-rw-r--r--src/zenutil/xmake.lua3
-rw-r--r--src/zenutil/zenutil.cpp2
4 files changed, 272 insertions, 0 deletions
diff --git a/src/zenutil/include/zenutil/parallelsort.h b/src/zenutil/include/zenutil/parallelsort.h
new file mode 100644
index 000000000..ed455ce9d
--- /dev/null
+++ b/src/zenutil/include/zenutil/parallelsort.h
@@ -0,0 +1,119 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/scopeguard.h>
+#include <zencore/thread.h>
+#include <zencore/workthreadpool.h>
+
+ZEN_THIRD_PARTY_INCLUDES_START
+#include <EASTL/sort.h>
+#include <EASTL/vector.h>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+#include <algorithm>
+
+namespace zen {
+
+// Bottom-up parallel merge sort using WorkerThreadPool + Latch.
+//
+// Splits the range into chunks, sorts each chunk in parallel via
+// eastl::sort, then iteratively merges adjacent pairs in parallel
+// using std::inplace_merge until a single sorted range remains.
+//
+// Falls back to eastl::sort for ranges below kMinParallelSortSize.
+template<typename RandomIt, typename Compare>
+void
+ParallelSort(WorkerThreadPool& Pool, RandomIt First, RandomIt Last, Compare Comp)
+{
+ constexpr size_t kMinParallelSortSize = 65536;
+ constexpr size_t kMinChunkSize = 65536;
+ constexpr size_t kMaxChunks = 64;
+
+ size_t Count = size_t(Last - First);
+ if (Count <= kMinParallelSortSize)
+ {
+ eastl::sort(First, Last, Comp);
+ return;
+ }
+
+ // Determine chunk count: enough to saturate workers, but not so many
+ // that scheduling overhead dominates.
+ size_t ChunkCount = (Count + kMinChunkSize - 1) / kMinChunkSize;
+ if (ChunkCount > kMaxChunks)
+ {
+ ChunkCount = kMaxChunks;
+ }
+ if (ChunkCount < 2)
+ {
+ ChunkCount = 2;
+ }
+
+ // Compute chunk boundaries.
+ eastl::vector<RandomIt> Boundaries;
+ Boundaries.reserve(ChunkCount + 1);
+ size_t ChunkSize = Count / ChunkCount;
+ for (size_t I = 0; I < ChunkCount; ++I)
+ {
+ Boundaries.push_back(First + ptrdiff_t(I * ChunkSize));
+ }
+ Boundaries.push_back(Last);
+
+ // Phase 1: Sort each chunk in parallel.
+ {
+ Latch Done(1);
+ for (size_t I = 0; I < ChunkCount; ++I)
+ {
+ Done.AddCount(1);
+ Pool.ScheduleWork(
+ [&Done, Begin = Boundaries[I], End = Boundaries[I + 1], &Comp]() {
+ auto Guard = MakeGuard([&Done]() { Done.CountDown(); });
+ eastl::sort(Begin, End, Comp);
+ },
+ WorkerThreadPool::EMode::EnableBacklog);
+ }
+ Done.CountDown();
+ Done.Wait();
+ }
+
+ // Phase 2: Pairwise merge rounds until a single sorted range remains.
+ // Each round merges non-overlapping adjacent pairs in parallel, then
+ // compacts the boundary list. An odd trailing chunk is carried forward.
+ while (ChunkCount > 1)
+ {
+ size_t Pairs = ChunkCount / 2;
+
+ {
+ Latch Done(1);
+ for (size_t I = 0; I < Pairs; ++I)
+ {
+ size_t Left = 2 * I;
+ Done.AddCount(1);
+ Pool.ScheduleWork(
+ [&Done, F = Boundaries[Left], M = Boundaries[Left + 1], L = Boundaries[Left + 2], &Comp]() {
+ auto Guard = MakeGuard([&Done]() { Done.CountDown(); });
+ std::inplace_merge(F, M, L, Comp);
+ },
+ WorkerThreadPool::EMode::EnableBacklog);
+ }
+ Done.CountDown();
+ Done.Wait();
+ }
+
+ // Compact boundaries: merged pairs collapse, odd chunk carried forward.
+ eastl::vector<RandomIt> NewBounds;
+ NewBounds.reserve((ChunkCount + 1) / 2 + 1);
+ for (size_t I = 0; I < ChunkCount; I += 2)
+ {
+ NewBounds.push_back(Boundaries[I]);
+ }
+ NewBounds.push_back(Last);
+
+ ChunkCount = NewBounds.size() - 1;
+ Boundaries = eastl::move(NewBounds);
+ }
+}
+
+void parallelsort_forcelink(); // internal
+
+} // namespace zen
diff --git a/src/zenutil/parallelsort.cpp b/src/zenutil/parallelsort.cpp
new file mode 100644
index 000000000..8a9f547bc
--- /dev/null
+++ b/src/zenutil/parallelsort.cpp
@@ -0,0 +1,148 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zenutil/parallelsort.h>
+
+#include <zencore/testing.h>
+
+namespace zen {
+
+#if ZEN_WITH_TESTS
+
+void
+parallelsort_forcelink()
+{
+}
+
+TEST_SUITE_BEGIN("util.parallelsort");
+
+TEST_CASE("empty range")
+{
+ WorkerThreadPool Pool(2);
+ eastl::vector<int> Vec;
+ ParallelSort(Pool, Vec.begin(), Vec.end(), [](int A, int B) { return A < B; });
+ CHECK(Vec.empty());
+}
+
+TEST_CASE("single element")
+{
+ WorkerThreadPool Pool(2);
+ eastl::vector<int> Vec = {42};
+ ParallelSort(Pool, Vec.begin(), Vec.end(), [](int A, int B) { return A < B; });
+ CHECK(Vec.size() == 1);
+ CHECK(Vec[0] == 42);
+}
+
+TEST_CASE("small array below threshold")
+{
+ WorkerThreadPool Pool(2);
+ eastl::vector<int> Vec = {5, 3, 8, 1, 9, 2, 7, 4, 6, 0};
+ ParallelSort(Pool, Vec.begin(), Vec.end(), [](int A, int B) { return A < B; });
+ for (size_t I = 0; I < Vec.size(); ++I)
+ {
+ CHECK(Vec[I] == int(I));
+ }
+}
+
+TEST_CASE("large array triggers parallel path")
+{
+ WorkerThreadPool Pool(4);
+
+ // 200K elements — well above the 64K threshold.
+ constexpr size_t N = 200'000;
+ eastl::vector<uint32_t> Vec(N);
+
+ // Fill with descending values.
+ for (size_t I = 0; I < N; ++I)
+ {
+ Vec[I] = uint32_t(N - 1 - I);
+ }
+
+ ParallelSort(Pool, Vec.begin(), Vec.end(), [](uint32_t A, uint32_t B) { return A < B; });
+
+ for (size_t I = 0; I < N; ++I)
+ {
+ CHECK_MESSAGE(Vec[I] == uint32_t(I), "index=", I, " got=", Vec[I]);
+ }
+}
+
+TEST_CASE("already sorted")
+{
+ WorkerThreadPool Pool(4);
+
+ constexpr size_t N = 200'000;
+ eastl::vector<uint32_t> Vec(N);
+ for (size_t I = 0; I < N; ++I)
+ {
+ Vec[I] = uint32_t(I);
+ }
+
+ ParallelSort(Pool, Vec.begin(), Vec.end(), [](uint32_t A, uint32_t B) { return A < B; });
+
+ for (size_t I = 0; I < N; ++I)
+ {
+ CHECK_MESSAGE(Vec[I] == uint32_t(I), "index=", I, " got=", Vec[I]);
+ }
+}
+
+TEST_CASE("reverse sorted")
+{
+ WorkerThreadPool Pool(4);
+
+ constexpr size_t N = 200'000;
+ eastl::vector<uint32_t> Vec(N);
+ for (size_t I = 0; I < N; ++I)
+ {
+ Vec[I] = uint32_t(N - 1 - I);
+ }
+
+ ParallelSort(Pool, Vec.begin(), Vec.end(), [](uint32_t A, uint32_t B) { return A < B; });
+
+ for (size_t I = 0; I < N; ++I)
+ {
+ CHECK_MESSAGE(Vec[I] == uint32_t(I), "index=", I, " got=", Vec[I]);
+ }
+}
+
+TEST_CASE("duplicate keys")
+{
+ WorkerThreadPool Pool(4);
+
+ constexpr size_t N = 200'000;
+ eastl::vector<uint32_t> Vec(N);
+ for (size_t I = 0; I < N; ++I)
+ {
+ Vec[I] = uint32_t(I % 100); // only 100 distinct values
+ }
+
+ ParallelSort(Pool, Vec.begin(), Vec.end(), [](uint32_t A, uint32_t B) { return A < B; });
+
+ for (size_t I = 1; I < N; ++I)
+ {
+ CHECK_MESSAGE(Vec[I - 1] <= Vec[I], "not sorted at index=", I);
+ }
+}
+
+TEST_CASE("custom comparator descending")
+{
+ WorkerThreadPool Pool(4);
+
+ constexpr size_t N = 200'000;
+ eastl::vector<uint32_t> Vec(N);
+ for (size_t I = 0; I < N; ++I)
+ {
+ Vec[I] = uint32_t(I);
+ }
+
+ ParallelSort(Pool, Vec.begin(), Vec.end(), [](uint32_t A, uint32_t B) { return A > B; });
+
+ for (size_t I = 0; I < N; ++I)
+ {
+ CHECK_MESSAGE(Vec[I] == uint32_t(N - 1 - I), "index=", I, " got=", Vec[I]);
+ }
+}
+
+TEST_SUITE_END();
+
+#endif
+
+} // namespace zen
diff --git a/src/zenutil/xmake.lua b/src/zenutil/xmake.lua
index 83a6b7f93..e28f6e345 100644
--- a/src/zenutil/xmake.lua
+++ b/src/zenutil/xmake.lua
@@ -9,6 +9,9 @@ target('zenutil')
add_deps("zencore", "zenhttp")
add_deps("cxxopts")
add_deps("robin-map")
+ if is_plat("linux", "macosx") then
+ add_syslinks("dl")
+ end
add_packages("json11")
if is_plat("linux", "macosx") then
diff --git a/src/zenutil/zenutil.cpp b/src/zenutil/zenutil.cpp
index d2b8258eb..b9617b1ed 100644
--- a/src/zenutil/zenutil.cpp
+++ b/src/zenutil/zenutil.cpp
@@ -11,6 +11,7 @@
# include <zenutil/config/commandlineoptions.h>
# include <zenutil/filesystemutils.h>
# include <zenutil/invocationhistory.h>
+# include <zenutil/parallelsort.h>
# include <zenutil/rpcrecording.h>
# include <zenutil/splitconsole/logstreamlistener.h>
# include <zenutil/process/subprocessmanager.h>
@@ -28,6 +29,7 @@ zenutil_forcelinktests()
filesystemutils_forcelink();
imdscredentials_forcelink();
invocationhistory_forcelink();
+ parallelsort_forcelink();
logstreamlistener_forcelink();
subprocessmanager_forcelink();
s3client_forcelink();