From 2dfb5da16b97a6c12e01977af5b5188522178a4e Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Mon, 20 Apr 2026 21:50:41 +0200 Subject: zen trace analysis support (#945) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Integrates the **tourist** trace analysis library and builds a full `zen trace` command suite for working with Unreal Engine `.utrace` files. ### Trace analysis library (`thirdparty/tourist/`) - Adds the tourist library as a third-party dependency with three modules: **foundation** (platform primitives, memory, scheduling), **trace** (UE Trace protocol decoding), and **analysis** (event dispatching and analyzer framework). - Cross-platform support for Windows, Linux, and macOS. ### `zen trace` CLI commands (`src/zen/cmds/`, `src/zen/trace/`) - **`zen trace analyze`** — Summarize a `.utrace` file: session metadata, thread inventory, command line + build configuration, CPU profiling scopes, timing, event rates, log messages, and (with symbols) memory allocation metrics including live-allocs dumps, callstack-keyed aggregation, and allocation churn. Optional HTML output for memory reports. - **`zen trace inspect`** — Dump the event schema (declared types, fields, sizes) from a trace file. - **`zen trace trim`** — Extract a time-window from a trace into a new `.utrace` file. - **`zen trace serve`** — Launch a local HTTP server hosting an interactive trace viewer; opens in the default browser. ### Symbolication (`src/zen/trace/symbol_resolver.*`, `thirdparty/raw_pdb/`) - Pluggable resolver with multiple backends: `pdb` (in-tree raw_pdb), `dbghelp` (Windows), `llvm-symbolizer` (all platforms), `atos` (macOS). An `auto` backend picks the best available tool per platform. - Microsoft Symbol Server support: downloads PDBs on demand using a redirect-aware HTTP client. - Local PDB cache keyed by image GUID preserves symbols across binary recompilation. - Callstack trimming heuristic strips UE internal noise from reports. - Binary analysis cache (`.ucache_z`) avoids re-resolving the same trace. ### Interactive trace viewer (`src/zen/frontend/html/`, `src/zen/trace/trace_viewer_service.*`) - Timeline: scope-level detail, horizontal zoom/pan, vertical scrolling, viewport-driven loading with pre-computed LOD for responsive navigation of large traces. - Thread grouping (collapsible sidebar sections) synthesized from name suffixes, natural sort order, visual distinction between lane threads and OS threads. - Bookmark and region annotations; region categories with per-category toggles; bookmark marker toggle in the toolbar. - Filterable Logs tab showing captured `UE_LOG` output. - Stats tab with per-scope aggregate statistics. - Memory tab with interactive allocation analysis and an allocation size histogram. - CsvProfiler event parsing and chart UI. ### Other in-branch supporting changes - **Cross-platform browser launcher** (`browser_launcher.{h,cpp}`) used by `trace serve`. - **`ReciprocalU64`** fast 64-bit integer division (zencore/intmath) for trace analyzers. - **`parallelsort`** cross-platform parallel sort helper (zenutil). - Frontend zip build rule so the viewer's HTML assets are bundled into `zen.exe`. - `/Zo` flag for better optimized debug info on Windows release builds. - `trace-tests.cpp` in the `zen-test` harness (harness itself landed on main via #985). --- src/zen/trace/trace_analyze.cpp | 812 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 812 insertions(+) create mode 100644 src/zen/trace/trace_analyze.cpp (limited to 'src/zen/trace/trace_analyze.cpp') diff --git a/src/zen/trace/trace_analyze.cpp b/src/zen/trace/trace_analyze.cpp new file mode 100644 index 000000000..ff168cd9c --- /dev/null +++ b/src/zen/trace/trace_analyze.cpp @@ -0,0 +1,812 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "trace_analyze.h" + +#include "callstack_formatter.h" +#include "trace_cache.h" +#include "zen.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +ZEN_THIRD_PARTY_INCLUDES_START +#include +#include +#include +ZEN_THIRD_PARTY_INCLUDES_END + +#include + +namespace { + +using namespace zen::trace_detail; + +static void +AppendHtmlEscaped(zen::StringBuilderBase& Out, std::string_view Text) +{ + for (char Ch : Text) + { + switch (Ch) + { + case '&': + Out << "&"; + break; + case '<': + Out << "<"; + break; + case '>': + Out << ">"; + break; + case '"': + Out << """; + break; + case '\'': + Out << "'"; + break; + default: + Out.Append(Ch); + break; + } + } +} + +static CallstackFilterOptions +BuildCallstackFilterOptions(const AnalyzeOptions& Options) +{ + CallstackFilterOptions Result; + Result.EnableHeuristic = Options.EnableCallstackHeuristic; + Result.SkipPatterns = Options.CallstackSkipPatterns; + return Result; +} + +static std::string +BuildThreadSummary(const TraceModel& Model, const eastl::fixed_vector& ThreadIds) +{ + std::string Result; + for (uint32_t Tid : ThreadIds) + { + if (!Result.empty()) + { + Result += ", "; + } + auto TIt = std::find_if(Model.Threads.begin(), Model.Threads.end(), [Tid](const ThreadInfoEntry& T) { return T.ThreadId == Tid; }); + if (TIt != Model.Threads.end() && !TIt->Name.empty()) + { + Result += TIt->Name; + } + else + { + Result += fmt::format("tid:{}", Tid); + } + } + return Result; +} + +static void +AppendHtmlCallstack(zen::StringBuilderBase& Out, const AnalyzeOptions& Options, CallstackFormatter& Formatter, uint32_t CallstackId) +{ + const CallstackEntry* Entry = Formatter.FindCallstackEntry(CallstackId); + if (Entry == nullptr || Entry->Frames.empty()) + { + Out << "
No callstack frames recorded.
"; + return; + } + + FilteredCallstackView Filtered = Formatter.BuildView(*Entry, BuildCallstackFilterOptions(Options)); + if (Filtered.HiddenPrefixCount > 0) + { + Out << "
Skipped " << uint64_t(Filtered.HiddenPrefixCount) << " leading frame(s)"; + if (Filtered.IncludedThirdPartyBoundary) + { + Out << "; kept boundary third-party callsite"; + } + Out << ".
"; + } + + Out << "
    "; + for (const FilteredCallstackFrame& Frame : Filtered.Frames) + { + Out << "
  1. "; + AppendHtmlEscaped(Out, Frame.Display); + Out << "
  2. "; + } + Out << "
"; +} + +static std::string_view +FindHeapName(const TraceModel& Model, uint32_t HeapId) +{ + for (const HeapInfo& Heap : Model.Heaps) + { + if (Heap.Id == HeapId && !Heap.Name.empty()) + { + return Heap.Name; + } + } + return "unknown"; +} + +static bool +PassesChurnThreshold(const AnalyzeOptions& Options, const CallstackChurnStat& Stat) +{ + return Stat.MeanDistance <= double(Options.ChurnDistanceThreshold); +} + +static uint64_t +CountShownChurnSites(const TraceModel& Model, const AnalyzeOptions& Options, uint64_t Limit = 100) +{ + uint64_t Result = 0; + for (const CallstackChurnStat& Stat : Model.ChurnStats) + { + if (PassesChurnThreshold(Options, Stat) && Result < Limit) + { + ++Result; + } + } + return Result; +} + +class ConsoleAnalyzeWriter +{ +public: + ConsoleAnalyzeWriter(const TraceModel& InModel, + const AnalyzeOptions& InOptions, + const std::filesystem::path& InFilePath, + CallstackFormatter& InFrameFormatter) + : m_Model(InModel) + , m_Options(InOptions) + , m_FilePath(InFilePath) + , m_FrameFormatter(InFrameFormatter) + { + } + + void Write() const + { + AppendSession(); + AppendGeneralSummary(); + AppendEventTypes(); + AppendThreads(); + AppendChannels(); + AppendCpuScopeStats(); + AppendMemorySummary(); + AppendLiveAllocationCallstacks(); + AppendChurnCallstacks(); + } + +private: + void AppendSession() const + { + const SessionInfo& Session = m_Model.Session; + if (!Session.HasSession) + { + return; + } + + ZEN_CONSOLE("Session:"); + if (!Session.Platform.empty()) + { + ZEN_CONSOLE(" Platform: {}", Session.Platform); + } + if (!Session.AppName.empty()) + { + ZEN_CONSOLE(" App: {}", Session.AppName); + } + if (!Session.ProjectName.empty()) + { + ZEN_CONSOLE(" Project: {}", Session.ProjectName); + } + if (!Session.Branch.empty()) + { + ZEN_CONSOLE(" Branch: {}", Session.Branch); + } + if (!Session.BuildVersion.empty()) + { + ZEN_CONSOLE(" Build: {}", Session.BuildVersion); + } + if (Session.ConfigurationType != 0) + { + constexpr const char* kConfigNames[] = {"Unknown", "Debug", "DebugGame", "Development", "Shipping", "Test"}; + uint8_t Idx = Session.ConfigurationType; + const char* Name = (Idx < std::size(kConfigNames)) ? kConfigNames[Idx] : "Unknown"; + ZEN_CONSOLE(" Config: {}", Name); + } + if (Session.Changelist != 0) + { + ZEN_CONSOLE(" CL: {}", Session.Changelist); + } + if (!Session.CommandLine.empty()) + { + ZEN_CONSOLE(" Cmd: {}", Session.CommandLine); + } + ZEN_CONSOLE(""); + } + + void AppendGeneralSummary() const + { + uint64_t DurationUs = (m_Model.TraceEndUs > m_Model.TraceStartUs) ? (m_Model.TraceEndUs - m_Model.TraceStartUs) : 0; + + ZEN_CONSOLE("Trace: {}", m_FilePath); + ZEN_CONSOLE("Size: {}", zen::NiceBytes(m_Model.FileSize)); + ZEN_CONSOLE("Events: {}", zen::ThousandsNum(m_Model.TotalEvents)); + ZEN_CONSOLE("Duration: {}", zen::NiceTimeSpanMs((DurationUs + 500) / 1000)); + ZEN_CONSOLE("Threads: {}", m_Model.Threads.size()); + ZEN_CONSOLE("Modules: {}", m_Model.Modules.size()); + ZEN_CONSOLE("Parsed: {}", zen::NiceTimeSpanMs(m_Model.ParseTimeMs)); + if (m_Model.ParseTimeMs > 0) + { + ZEN_CONSOLE("Rate: {} events/s", zen::ThousandsNum(m_Model.TotalEvents * 1000 / m_Model.ParseTimeMs)); + } + ZEN_CONSOLE(""); + } + + void AppendEventTypes() const + { + if (m_Model.EventTypeCounts.empty()) + { + return; + } + + size_t MaxNameLen = 10; + for (const auto& Entry : m_Model.EventTypeCounts) + { + MaxNameLen = std::max(MaxNameLen, Entry.Name.size()); + } + + ZEN_CONSOLE("{:<{}} {:>14}", "Event Type", MaxNameLen, "Count"); + ZEN_CONSOLE("{:-<{}}", "", MaxNameLen + 16); + for (const auto& Entry : m_Model.EventTypeCounts) + { + ZEN_CONSOLE("{:<{}} {:>14}", Entry.Name, MaxNameLen, zen::ThousandsNum(Entry.Count)); + } + ZEN_CONSOLE(""); + } + + void AppendThreads() const + { + if (m_Model.Threads.empty()) + { + return; + } + + ZEN_CONSOLE("Threads:"); + for (const ThreadInfoEntry& Thread : m_Model.Threads) + { + auto TimelineIt = std::find_if(m_Model.Timelines.begin(), + m_Model.Timelines.end(), + [Tid = Thread.ThreadId](const ThreadTimeline& T) { return T.ThreadId == Tid; }); + uint64_t ScopeCount = (TimelineIt != m_Model.Timelines.end()) ? TimelineIt->Scopes.size() : 0; + + if (!Thread.Name.empty()) + { + ZEN_CONSOLE(" {:>5} {:<32} {} scopes", Thread.ThreadId, Thread.Name, zen::ThousandsNum(ScopeCount)); + } + } + ZEN_CONSOLE(""); + } + + void AppendChannels() const + { + if (m_Model.Channels.empty()) + { + return; + } + + ZEN_CONSOLE("Channels:"); + for (const ChannelInfo& Channel : m_Model.Channels) + { + ZEN_CONSOLE(" {:<32} {}", Channel.Name, Channel.Enabled ? "enabled" : "disabled"); + } + ZEN_CONSOLE(""); + } + + void AppendCpuScopeStats() const + { + if (m_Model.ScopeStats.empty()) + { + return; + } + + ZEN_CONSOLE("CPU Profiling Scopes:"); + ZEN_CONSOLE(""); + ZEN_CONSOLE("{:<48} {:>8} {:>9} {:>9} {:>9} {:>9}", "Scope", "Count", "Min(ms)", "Mean(ms)", "Max(ms)", "SD(ms)"); + ZEN_CONSOLE("{:-<{}}", "", 48 + 8 + 9 + 9 + 9 + 9 + 5); + + constexpr double UsToMs = 1.0 / 1000.0; + for (const CpuScopeStat& Stat : m_Model.ScopeStats) + { + if (Stat.MaxUs < 500) + { + continue; + } + + ZEN_CONSOLE("{:<48.48} {:>8} {:>9.3f} {:>9.3f} {:>9.3f} {:>9.3f}", + Stat.Name, + zen::ThousandsNum(Stat.Count), + double(Stat.MinUs) * UsToMs, + Stat.MeanUs * UsToMs, + double(Stat.MaxUs) * UsToMs, + Stat.StdDevUs * UsToMs); + } + ZEN_CONSOLE(""); + } + + void AppendMemorySummary() const + { + const AllocationSummary& AllocSummary = m_Model.AllocSummary; + if (!AllocSummary.HasMemoryData) + { + return; + } + + ZEN_CONSOLE("Memory Allocations:"); + ZEN_CONSOLE(""); + ZEN_CONSOLE(" Allocs: {}", zen::ThousandsNum(AllocSummary.TotalAllocs)); + ZEN_CONSOLE(" Frees: {}", zen::ThousandsNum(AllocSummary.TotalFrees)); + ZEN_CONSOLE(" Reallocs: {} alloc / {} free", + zen::ThousandsNum(AllocSummary.TotalReallocAllocs), + zen::ThousandsNum(AllocSummary.TotalReallocFrees)); + ZEN_CONSOLE(" Peak: {}", zen::NiceBytes(uint64_t(AllocSummary.PeakBytes))); + ZEN_CONSOLE(" End: {}", zen::NiceBytes(uint64_t(AllocSummary.EndBytes))); + ZEN_CONSOLE(" Live allocs: {}", zen::ThousandsNum(AllocSummary.LiveAllocations)); + + if (!m_Model.HeapStats.empty()) + { + ZEN_CONSOLE(""); + ZEN_CONSOLE(" {:<20} {:>14} {:>14} {:>10} {:>10}", "Heap", "Current", "Peak", "Allocs", "Frees"); + ZEN_CONSOLE(" {:-<{}}", "", 20 + 14 + 14 + 10 + 10 + 4); + + for (const HeapStat& Stat : m_Model.HeapStats) + { + std::string_view HeapName = FindHeapName(m_Model, Stat.HeapId); + + ZEN_CONSOLE(" {:<20.20} {:>14} {:>14} {:>10} {:>10}", + HeapName, + zen::NiceBytes(uint64_t(Stat.CurrentBytes)), + zen::NiceBytes(uint64_t(Stat.PeakBytes)), + zen::ThousandsNum(Stat.AllocCount), + zen::ThousandsNum(Stat.FreeCount)); + } + } + ZEN_CONSOLE(""); + } + + void PrintCallstack(uint32_t CallstackId) const + { + const CallstackEntry* Entry = m_FrameFormatter.FindCallstackEntry(CallstackId); + if (Entry == nullptr) + { + return; + } + + FilteredCallstackView Filtered = m_FrameFormatter.BuildView(*Entry, BuildCallstackFilterOptions(m_Options)); + if (Filtered.HiddenPrefixCount > 0) + { + if (Filtered.IncludedThirdPartyBoundary) + { + ZEN_CONSOLE(" [skipped {} leading frame(s); kept boundary third-party callsite]", Filtered.HiddenPrefixCount); + } + else + { + ZEN_CONSOLE(" [skipped {} leading frame(s)]", Filtered.HiddenPrefixCount); + } + } + for (const FilteredCallstackFrame& Frame : Filtered.Frames) + { + ZEN_CONSOLE(" {}", Frame.Display); + } + } + + void AppendLiveAllocationCallstacks() const + { + if (m_Options.LiveAllocsLimit <= 0 || m_Model.CallstackStats.empty()) + { + return; + } + + size_t Count = std::min(size_t(m_Options.LiveAllocsLimit), m_Model.CallstackStats.size()); + ZEN_CONSOLE("Live Allocation Callstacks (top {} by bytes):", Count); + ZEN_CONSOLE(""); + + for (size_t I = 0; I < Count; ++I) + { + const CallstackAllocStat& Stat = m_Model.CallstackStats[I]; + std::string ThreadInfo = BuildThreadSummary(m_Model, Stat.ThreadIds); + ZEN_CONSOLE(" #{} {} in {} allocation(s) [callstack {}, {}]", + I + 1, + zen::NiceBytes(uint64_t(Stat.LiveBytes)), + zen::ThousandsNum(Stat.LiveCount), + Stat.CallstackId, + ThreadInfo); + PrintCallstack(Stat.CallstackId); + ZEN_CONSOLE(""); + } + } + + void AppendChurnCallstacks() const + { + if (m_Options.ChurnLimit <= 0 || m_Model.ChurnStats.empty()) + { + return; + } + + size_t Emitted = 0; + size_t Limit = size_t(m_Options.ChurnLimit); + ZEN_CONSOLE("Allocation Churn (top {}, event distance <= {}):", Limit, m_Options.ChurnDistanceThreshold); + ZEN_CONSOLE(""); + + for (const CallstackChurnStat& Stat : m_Model.ChurnStats) + { + if (Emitted >= Limit) + { + break; + } + if (!PassesChurnThreshold(m_Options, Stat)) + { + continue; + } + + ZEN_CONSOLE(" #{} {} short-lived allocs ({} total), {} churned, avg distance {:.0f} events [callstack {}]", + Emitted + 1, + zen::ThousandsNum(Stat.ChurnAllocs), + zen::ThousandsNum(Stat.TotalAllocs), + zen::NiceBytes(Stat.ChurnBytes), + Stat.MeanDistance, + Stat.CallstackId); + PrintCallstack(Stat.CallstackId); + ZEN_CONSOLE(""); + ++Emitted; + } + } + + const TraceModel& m_Model; + const AnalyzeOptions& m_Options; + const std::filesystem::path& m_FilePath; + CallstackFormatter& m_FrameFormatter; +}; + +class HtmlReportWriter +{ +public: + HtmlReportWriter(const TraceModel& InModel, + const AnalyzeOptions& InOptions, + const std::filesystem::path& InFilePath, + CallstackFormatter& InFrameFormatter) + : m_Model(InModel) + , m_Options(InOptions) + , m_FilePath(InFilePath) + , m_FrameFormatter(InFrameFormatter) + { + } + + void Write(const std::filesystem::path& OutputPath) + { + AppendDocument(); + zen::WriteFile(OutputPath, zen::IoBuffer(zen::IoBuffer::Wrap, m_Html.Data(), m_Html.Size())); + } + +private: + void AppendDocument() + { + m_Html << "zen trace analyze report"; + AppendStyles(); + m_Html << ""; + AppendHeader(); + AppendSummaryCards(); + AppendLeaksSection(); + AppendChurnSection(); + m_Html << ""; + } + + void AppendStyles() + { + m_Html << ""; + } + + void AppendHeader() + { + m_Html << "

zen trace analyze memory report

"; + m_Html << "
offline HTMLtop 100 churn " + "sites

Trace: "; + AppendHtmlEscaped(m_Html, m_FilePath.string()); + m_Html << "

"; + if (m_Model.Session.HasSession && !m_Model.Session.AppName.empty()) + { + m_Html << "

App: "; + AppendHtmlEscaped(m_Html, m_Model.Session.AppName); + m_Html << "

"; + } + m_Html << "

Generated by zen trace analyze. Churn threshold: "; + AppendHtmlEscaped(m_Html, fmt::format("{} events", m_Options.ChurnDistanceThreshold)); + m_Html << "

"; + } + + void AppendSummaryCards() + { + uint64_t DurationUs = (m_Model.TraceEndUs > m_Model.TraceStartUs) ? (m_Model.TraceEndUs - m_Model.TraceStartUs) : 0; + m_Html << "
"; + m_Html << "
Trace size
" << zen::NiceBytes(m_Model.FileSize) + << "
"; + m_Html << "
Duration
" << zen::NiceTimeSpanMs((DurationUs + 500) / 1000) + << "
"; + m_Html << "
Peak memory
" + << zen::NiceBytes(uint64_t(m_Model.AllocSummary.PeakBytes)) << "
"; + m_Html << "
End memory
" + << zen::NiceBytes(uint64_t(m_Model.AllocSummary.EndBytes)) << "
"; + m_Html << "
Live allocations
" + << zen::ThousandsNum(m_Model.AllocSummary.LiveAllocations) << "
"; + m_Html << "
Leak callstacks
" + << zen::ThousandsNum(m_Model.CallstackStats.size()) << "
"; + m_Html << "
Churn sites shown
" + << zen::ThousandsNum(::CountShownChurnSites(m_Model, m_Options)) << "
"; + m_Html << "
"; + } + + void AppendLeaksSection() + { + m_Html << "

Memory leaks (all live-allocation callstacks)

"; + if (m_Model.CallstackStats.empty()) + { + m_Html << "
No live allocation callstacks were present at the end of the trace.
"; + return; + } + + m_Html << ""; + for (size_t I = 0; I < m_Model.CallstackStats.size(); ++I) + { + const CallstackAllocStat& Stat = m_Model.CallstackStats[I]; + std::string ThreadInfo = BuildThreadSummary(m_Model, Stat.ThreadIds); + m_Html << ""; + } + m_Html << "
#Live bytesAlloc " + "countThreadsCallstack
" << uint64_t(I + 1) << "" << zen::NiceBytes(uint64_t(Stat.LiveBytes)) + << "" << zen::ThousandsNum(Stat.LiveCount) << ""; + AppendHtmlEscaped(m_Html, ThreadInfo); + m_Html << "
Callstack " << Stat.CallstackId << ""; + AppendHtmlCallstack(m_Html, m_Options, m_FrameFormatter, Stat.CallstackId); + m_Html << "
"; + } + + void AppendChurnSection() + { + m_Html << "

Allocation churn sites (top 100)

"; + if (m_Model.ChurnStats.empty()) + { + m_Html << "
No churn statistics were available in this trace.
"; + return; + } + + m_Html << ""; + size_t Emitted = 0; + for (const CallstackChurnStat& Stat : m_Model.ChurnStats) + { + if (Emitted >= 100) + { + break; + } + if (!PassesChurnThreshold(m_Options, Stat)) + { + continue; + } + m_Html << ""; + ++Emitted; + } + m_Html << "
#Short-lived allocsChurn bytesTotal allocsAvg " + "distanceCallstack
" << uint64_t(Emitted + 1) << "" << zen::ThousandsNum(Stat.ChurnAllocs) + << "" << zen::NiceBytes(Stat.ChurnBytes) << "" + << zen::ThousandsNum(Stat.TotalAllocs) << "" << fmt::format("{:.0f} events", Stat.MeanDistance) + << "
Callstack " << Stat.CallstackId << ""; + AppendHtmlCallstack(m_Html, m_Options, m_FrameFormatter, Stat.CallstackId); + m_Html << "
"; + } + + const TraceModel& m_Model; + const AnalyzeOptions& m_Options; + const std::filesystem::path& m_FilePath; + CallstackFormatter& m_FrameFormatter; + zen::ExtendableStringBuilder<32768> m_Html; +}; + +static void +WriteAnalyzeHtmlReport(const TraceModel& Model, + const AnalyzeOptions& Options, + const std::filesystem::path& FilePath, + CallstackFormatter& FrameFormatter) +{ + std::filesystem::path OutputPath = std::filesystem::absolute(Options.HtmlReportPath); + if (OutputPath.empty()) + { + return; + } + + std::error_code Ec; + std::filesystem::path ParentPath = OutputPath.parent_path(); + if (!ParentPath.empty()) + { + std::filesystem::create_directories(ParentPath, Ec); + } + + HtmlReportWriter Writer(Model, Options, FilePath, FrameFormatter); + Writer.Write(OutputPath); + ZEN_CONSOLE("HTML report: {}", OutputPath.string()); +} + +} // namespace + +namespace zen::trace_detail { + +void +RunAnalyze(const std::filesystem::path& FilePath, const AnalyzeOptions& Options) +{ + std::filesystem::path CachePath = FilePath; + CachePath.replace_extension(".ucache_z"); + + TraceModel Model; + std::unique_ptr Symbols; + bool LoadedFromCache = false; + + // Try loading from cache + if (!Options.NoCache) + { + std::optional Cached = TryLoadAnalyzeCache(CachePath, FilePath); + if (Cached) + { + Model = std::move(Cached->Model); + Symbols = std::move(Cached->Symbols); + LoadedFromCache = true; + } + } + + if (!LoadedFromCache) + { + WorkerThreadPool ThreadPool(gsl::narrow(GetHardwareConcurrency())); + Model = BuildTraceModel(FilePath, ThreadPool); + + if (Options.Symbols != SymbolBackend::Off) + { + Symbols = CreateSymbolResolver(Options.Symbols); + for (const ModuleInfo& Mod : Model.Modules) + { + Symbols->LoadModule(Mod); + } + } + } + + CallstackFormatter FrameFormatter(Model, Symbols.get()); + ConsoleAnalyzeWriter ConsoleWriter(Model, Options, FilePath, FrameFormatter); + ConsoleWriter.Write(); + + if (!Options.HtmlReportPath.empty()) + { + WriteAnalyzeHtmlReport(Model, Options, FilePath, FrameFormatter); + } + + // Write cache on fresh parse + if (!LoadedFromCache && !Options.NoCache) + { + // Build the complete symbol map for the cache. Start with whatever + // the formatter already resolved during display, then resolve every + // remaining callstack address in parallel. + eastl::hash_map AllSymbols = FrameFormatter.GetResolvedCache(); + + // Collect unique addresses that still need resolving. + eastl::hash_set Needed; + for (const CallstackEntry& CS : Model.Callstacks) + { + for (const ResolvedFrame& Frame : CS.Frames) + { + if (AllSymbols.find(Frame.Address) == AllSymbols.end()) + { + Needed.insert(Frame.Address); + } + } + } + + if (!Needed.empty() && Symbols) + { + // Flatten to a vector so we can partition into chunks. + eastl::vector Addresses(Needed.begin(), Needed.end()); + Needed.clear(); + + uint32_t ThreadCount = gsl::narrow(GetHardwareConcurrency()); + WorkerThreadPool ResolvePool(gsl::narrow(ThreadCount)); + + // Each worker resolves a chunk and writes into its own local map. + eastl::vector> PerThread(ThreadCount); + uint32_t ChunkSize = uint32_t((Addresses.size() + ThreadCount - 1) / ThreadCount); + + Latch Done(ThreadCount); + for (uint32_t T = 0; T < ThreadCount; ++T) + { + uint32_t Begin = T * ChunkSize; + uint32_t End = std::min(Begin + ChunkSize, uint32_t(Addresses.size())); + if (Begin >= End) + { + Done.CountDown(); + continue; + } + + ResolvePool.ScheduleWork( + [&Addresses, &PerThread, &Model, &Symbols, &Done, T, Begin, End]() { + auto _ = MakeGuard([&Done]() { Done.CountDown(); }); + for (uint32_t I = Begin; I < End; ++I) + { + uint64_t Addr = Addresses[I]; + std::string Symbol = Symbols->Resolve(Addr); + if (!Symbol.empty()) + { + PerThread[T].emplace(Addr, std::move(Symbol)); + } + } + }, + WorkerThreadPool::EMode::EnableBacklog); + } + Done.Wait(); + + // Merge per-thread results. + for (auto& Map : PerThread) + { + for (auto& [Addr, Sym] : Map) + { + AllSymbols.emplace(Addr, std::move(Sym)); + } + } + } + + // Fill in module-name fallbacks for any addresses not resolved by the + // symbol resolver (same logic as CallstackFormatter::Describe). + for (const CallstackEntry& CS : Model.Callstacks) + { + for (const ResolvedFrame& Frame : CS.Frames) + { + if (AllSymbols.find(Frame.Address) != AllSymbols.end()) + { + continue; + } + std::string Fallback; + if (Frame.ModuleIndex != ~0u && Frame.ModuleIndex < Model.Modules.size()) + { + Fallback = fmt::format("{} + 0x{:X}", Model.Modules[Frame.ModuleIndex].Name, Frame.Offset); + } + else + { + Fallback = fmt::format("0x{:X}", Frame.Address); + } + AllSymbols.emplace(Frame.Address, std::move(Fallback)); + } + } + + WriteAnalyzeCache(CachePath, FilePath, Model, AllSymbols); + } +} + +} // namespace zen::trace_detail -- cgit v1.2.3