diff options
| author | Stefan Boberg <[email protected]> | 2026-04-20 21:50:41 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2026-04-20 21:50:41 +0200 |
| commit | 2dfb5da16b97a6c12e01977af5b5188522178a4e (patch) | |
| tree | 428aa0aa8e6079c64438931e0fd4f828c613c94d /src/zenutil | |
| parent | Add CompactString utility type (#990) (diff) | |
| download | archived-zen-2dfb5da16b97a6c12e01977af5b5188522178a4e.tar.xz archived-zen-2dfb5da16b97a6c12e01977af5b5188522178a4e.zip | |
zen trace analysis support (#945)
Integrates the **tourist** trace analysis library and builds a full `zen trace` command suite for working with Unreal Engine `.utrace` files.
### Trace analysis library (`thirdparty/tourist/`)
- Adds the tourist library as a third-party dependency with three modules: **foundation** (platform primitives, memory, scheduling), **trace** (UE Trace protocol decoding), and **analysis** (event dispatching and analyzer framework).
- Cross-platform support for Windows, Linux, and macOS.
### `zen trace` CLI commands (`src/zen/cmds/`, `src/zen/trace/`)
- **`zen trace analyze`** — Summarize a `.utrace` file: session metadata, thread inventory, command line + build configuration, CPU profiling scopes, timing, event rates, log messages, and (with symbols) memory allocation metrics including live-allocs dumps, callstack-keyed aggregation, and allocation churn. Optional HTML output for memory reports.
- **`zen trace inspect`** — Dump the event schema (declared types, fields, sizes) from a trace file.
- **`zen trace trim`** — Extract a time-window from a trace into a new `.utrace` file.
- **`zen trace serve`** — Launch a local HTTP server hosting an interactive trace viewer; opens in the default browser.
### Symbolication (`src/zen/trace/symbol_resolver.*`, `thirdparty/raw_pdb/`)
- Pluggable resolver with multiple backends: `pdb` (in-tree raw_pdb), `dbghelp` (Windows), `llvm-symbolizer` (all platforms), `atos` (macOS). An `auto` backend picks the best available tool per platform.
- Microsoft Symbol Server support: downloads PDBs on demand using a redirect-aware HTTP client.
- Local PDB cache keyed by image GUID preserves symbols across binary recompilation.
- Callstack trimming heuristic strips UE internal noise from reports.
- Binary analysis cache (`.ucache_z`) avoids re-resolving the same trace.
### Interactive trace viewer (`src/zen/frontend/html/`, `src/zen/trace/trace_viewer_service.*`)
- Timeline: scope-level detail, horizontal zoom/pan, vertical scrolling, viewport-driven loading with pre-computed LOD for responsive navigation of large traces.
- Thread grouping (collapsible sidebar sections) synthesized from name suffixes, natural sort order, visual distinction between lane threads and OS threads.
- Bookmark and region annotations; region categories with per-category toggles; bookmark marker toggle in the toolbar.
- Filterable Logs tab showing captured `UE_LOG` output.
- Stats tab with per-scope aggregate statistics.
- Memory tab with interactive allocation analysis and an allocation size histogram.
- CsvProfiler event parsing and chart UI.
### Other in-branch supporting changes
- **Cross-platform browser launcher** (`browser_launcher.{h,cpp}`) used by `trace serve`.
- **`ReciprocalU64`** fast 64-bit integer division (zencore/intmath) for trace analyzers.
- **`parallelsort`** cross-platform parallel sort helper (zenutil).
- Frontend zip build rule so the viewer's HTML assets are bundled into `zen.exe`.
- `/Zo` flag for better optimized debug info on Windows release builds.
- `trace-tests.cpp` in the `zen-test` harness (harness itself landed on main via #985).
Diffstat (limited to 'src/zenutil')
| -rw-r--r-- | src/zenutil/include/zenutil/parallelsort.h | 119 | ||||
| -rw-r--r-- | src/zenutil/parallelsort.cpp | 148 | ||||
| -rw-r--r-- | src/zenutil/xmake.lua | 3 | ||||
| -rw-r--r-- | src/zenutil/zenutil.cpp | 2 |
4 files changed, 272 insertions, 0 deletions
diff --git a/src/zenutil/include/zenutil/parallelsort.h b/src/zenutil/include/zenutil/parallelsort.h new file mode 100644 index 000000000..ed455ce9d --- /dev/null +++ b/src/zenutil/include/zenutil/parallelsort.h @@ -0,0 +1,119 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/scopeguard.h> +#include <zencore/thread.h> +#include <zencore/workthreadpool.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <EASTL/sort.h> +#include <EASTL/vector.h> +ZEN_THIRD_PARTY_INCLUDES_END + +#include <algorithm> + +namespace zen { + +// Bottom-up parallel merge sort using WorkerThreadPool + Latch. +// +// Splits the range into chunks, sorts each chunk in parallel via +// eastl::sort, then iteratively merges adjacent pairs in parallel +// using std::inplace_merge until a single sorted range remains. +// +// Falls back to eastl::sort for ranges below kMinParallelSortSize. +template<typename RandomIt, typename Compare> +void +ParallelSort(WorkerThreadPool& Pool, RandomIt First, RandomIt Last, Compare Comp) +{ + constexpr size_t kMinParallelSortSize = 65536; + constexpr size_t kMinChunkSize = 65536; + constexpr size_t kMaxChunks = 64; + + size_t Count = size_t(Last - First); + if (Count <= kMinParallelSortSize) + { + eastl::sort(First, Last, Comp); + return; + } + + // Determine chunk count: enough to saturate workers, but not so many + // that scheduling overhead dominates. + size_t ChunkCount = (Count + kMinChunkSize - 1) / kMinChunkSize; + if (ChunkCount > kMaxChunks) + { + ChunkCount = kMaxChunks; + } + if (ChunkCount < 2) + { + ChunkCount = 2; + } + + // Compute chunk boundaries. + eastl::vector<RandomIt> Boundaries; + Boundaries.reserve(ChunkCount + 1); + size_t ChunkSize = Count / ChunkCount; + for (size_t I = 0; I < ChunkCount; ++I) + { + Boundaries.push_back(First + ptrdiff_t(I * ChunkSize)); + } + Boundaries.push_back(Last); + + // Phase 1: Sort each chunk in parallel. + { + Latch Done(1); + for (size_t I = 0; I < ChunkCount; ++I) + { + Done.AddCount(1); + Pool.ScheduleWork( + [&Done, Begin = Boundaries[I], End = Boundaries[I + 1], &Comp]() { + auto Guard = MakeGuard([&Done]() { Done.CountDown(); }); + eastl::sort(Begin, End, Comp); + }, + WorkerThreadPool::EMode::EnableBacklog); + } + Done.CountDown(); + Done.Wait(); + } + + // Phase 2: Pairwise merge rounds until a single sorted range remains. + // Each round merges non-overlapping adjacent pairs in parallel, then + // compacts the boundary list. An odd trailing chunk is carried forward. + while (ChunkCount > 1) + { + size_t Pairs = ChunkCount / 2; + + { + Latch Done(1); + for (size_t I = 0; I < Pairs; ++I) + { + size_t Left = 2 * I; + Done.AddCount(1); + Pool.ScheduleWork( + [&Done, F = Boundaries[Left], M = Boundaries[Left + 1], L = Boundaries[Left + 2], &Comp]() { + auto Guard = MakeGuard([&Done]() { Done.CountDown(); }); + std::inplace_merge(F, M, L, Comp); + }, + WorkerThreadPool::EMode::EnableBacklog); + } + Done.CountDown(); + Done.Wait(); + } + + // Compact boundaries: merged pairs collapse, odd chunk carried forward. + eastl::vector<RandomIt> NewBounds; + NewBounds.reserve((ChunkCount + 1) / 2 + 1); + for (size_t I = 0; I < ChunkCount; I += 2) + { + NewBounds.push_back(Boundaries[I]); + } + NewBounds.push_back(Last); + + ChunkCount = NewBounds.size() - 1; + Boundaries = eastl::move(NewBounds); + } +} + +void parallelsort_forcelink(); // internal + +} // namespace zen diff --git a/src/zenutil/parallelsort.cpp b/src/zenutil/parallelsort.cpp new file mode 100644 index 000000000..8a9f547bc --- /dev/null +++ b/src/zenutil/parallelsort.cpp @@ -0,0 +1,148 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenutil/parallelsort.h> + +#include <zencore/testing.h> + +namespace zen { + +#if ZEN_WITH_TESTS + +void +parallelsort_forcelink() +{ +} + +TEST_SUITE_BEGIN("util.parallelsort"); + +TEST_CASE("empty range") +{ + WorkerThreadPool Pool(2); + eastl::vector<int> Vec; + ParallelSort(Pool, Vec.begin(), Vec.end(), [](int A, int B) { return A < B; }); + CHECK(Vec.empty()); +} + +TEST_CASE("single element") +{ + WorkerThreadPool Pool(2); + eastl::vector<int> Vec = {42}; + ParallelSort(Pool, Vec.begin(), Vec.end(), [](int A, int B) { return A < B; }); + CHECK(Vec.size() == 1); + CHECK(Vec[0] == 42); +} + +TEST_CASE("small array below threshold") +{ + WorkerThreadPool Pool(2); + eastl::vector<int> Vec = {5, 3, 8, 1, 9, 2, 7, 4, 6, 0}; + ParallelSort(Pool, Vec.begin(), Vec.end(), [](int A, int B) { return A < B; }); + for (size_t I = 0; I < Vec.size(); ++I) + { + CHECK(Vec[I] == int(I)); + } +} + +TEST_CASE("large array triggers parallel path") +{ + WorkerThreadPool Pool(4); + + // 200K elements — well above the 64K threshold. + constexpr size_t N = 200'000; + eastl::vector<uint32_t> Vec(N); + + // Fill with descending values. + for (size_t I = 0; I < N; ++I) + { + Vec[I] = uint32_t(N - 1 - I); + } + + ParallelSort(Pool, Vec.begin(), Vec.end(), [](uint32_t A, uint32_t B) { return A < B; }); + + for (size_t I = 0; I < N; ++I) + { + CHECK_MESSAGE(Vec[I] == uint32_t(I), "index=", I, " got=", Vec[I]); + } +} + +TEST_CASE("already sorted") +{ + WorkerThreadPool Pool(4); + + constexpr size_t N = 200'000; + eastl::vector<uint32_t> Vec(N); + for (size_t I = 0; I < N; ++I) + { + Vec[I] = uint32_t(I); + } + + ParallelSort(Pool, Vec.begin(), Vec.end(), [](uint32_t A, uint32_t B) { return A < B; }); + + for (size_t I = 0; I < N; ++I) + { + CHECK_MESSAGE(Vec[I] == uint32_t(I), "index=", I, " got=", Vec[I]); + } +} + +TEST_CASE("reverse sorted") +{ + WorkerThreadPool Pool(4); + + constexpr size_t N = 200'000; + eastl::vector<uint32_t> Vec(N); + for (size_t I = 0; I < N; ++I) + { + Vec[I] = uint32_t(N - 1 - I); + } + + ParallelSort(Pool, Vec.begin(), Vec.end(), [](uint32_t A, uint32_t B) { return A < B; }); + + for (size_t I = 0; I < N; ++I) + { + CHECK_MESSAGE(Vec[I] == uint32_t(I), "index=", I, " got=", Vec[I]); + } +} + +TEST_CASE("duplicate keys") +{ + WorkerThreadPool Pool(4); + + constexpr size_t N = 200'000; + eastl::vector<uint32_t> Vec(N); + for (size_t I = 0; I < N; ++I) + { + Vec[I] = uint32_t(I % 100); // only 100 distinct values + } + + ParallelSort(Pool, Vec.begin(), Vec.end(), [](uint32_t A, uint32_t B) { return A < B; }); + + for (size_t I = 1; I < N; ++I) + { + CHECK_MESSAGE(Vec[I - 1] <= Vec[I], "not sorted at index=", I); + } +} + +TEST_CASE("custom comparator descending") +{ + WorkerThreadPool Pool(4); + + constexpr size_t N = 200'000; + eastl::vector<uint32_t> Vec(N); + for (size_t I = 0; I < N; ++I) + { + Vec[I] = uint32_t(I); + } + + ParallelSort(Pool, Vec.begin(), Vec.end(), [](uint32_t A, uint32_t B) { return A > B; }); + + for (size_t I = 0; I < N; ++I) + { + CHECK_MESSAGE(Vec[I] == uint32_t(N - 1 - I), "index=", I, " got=", Vec[I]); + } +} + +TEST_SUITE_END(); + +#endif + +} // namespace zen diff --git a/src/zenutil/xmake.lua b/src/zenutil/xmake.lua index 83a6b7f93..e28f6e345 100644 --- a/src/zenutil/xmake.lua +++ b/src/zenutil/xmake.lua @@ -9,6 +9,9 @@ target('zenutil') add_deps("zencore", "zenhttp") add_deps("cxxopts") add_deps("robin-map") + if is_plat("linux", "macosx") then + add_syslinks("dl") + end add_packages("json11") if is_plat("linux", "macosx") then diff --git a/src/zenutil/zenutil.cpp b/src/zenutil/zenutil.cpp index d2b8258eb..b9617b1ed 100644 --- a/src/zenutil/zenutil.cpp +++ b/src/zenutil/zenutil.cpp @@ -11,6 +11,7 @@ # include <zenutil/config/commandlineoptions.h> # include <zenutil/filesystemutils.h> # include <zenutil/invocationhistory.h> +# include <zenutil/parallelsort.h> # include <zenutil/rpcrecording.h> # include <zenutil/splitconsole/logstreamlistener.h> # include <zenutil/process/subprocessmanager.h> @@ -28,6 +29,7 @@ zenutil_forcelinktests() filesystemutils_forcelink(); imdscredentials_forcelink(); invocationhistory_forcelink(); + parallelsort_forcelink(); logstreamlistener_forcelink(); subprocessmanager_forcelink(); s3client_forcelink(); |