aboutsummaryrefslogtreecommitdiff
path: root/src/zen/trace/trace_model.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/zen/trace/trace_model.cpp')
-rw-r--r--src/zen/trace/trace_model.cpp3898
1 files changed, 3898 insertions, 0 deletions
diff --git a/src/zen/trace/trace_model.cpp b/src/zen/trace/trace_model.cpp
new file mode 100644
index 000000000..ac81161a1
--- /dev/null
+++ b/src/zen/trace/trace_model.cpp
@@ -0,0 +1,3898 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include "trace_model.h"
+
+#include <zencore/basicfile.h>
+#include <zencore/except_fmt.h>
+#include <zencore/fmtutils.h>
+#include <zencore/intmath.h>
+#include <zencore/logging.h>
+#include <zencore/logging/tracelog.h>
+#include <zencore/scopeguard.h>
+#include <zencore/string.h>
+#include <zencore/thread.h>
+#include <zencore/timer.h>
+#include <zenutil/parallelsort.h>
+
+ZEN_THIRD_PARTY_INCLUDES_START
+#include <EASTL/hash_map.h>
+#include <EASTL/map.h>
+#include <EASTL/set.h>
+#include <EASTL/sort.h>
+#include <EASTL/vector.h>
+#include <analysis/analyzer.h>
+#include <analysis/dispatcher.h>
+#include <fmt/format.h>
+#include <trace/trace.h>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+
+using namespace std::literals;
+
+// Toggle to A/B test cross-platform parallel sort vs sequential eastl::sort.
+constexpr bool kUseParallelSort = true;
+
+namespace eastl {
+
+template<>
+struct hash<std::string>
+{
+ size_t operator()(const std::string& S) const { return eastl::hash<const char*>()(S.c_str()); }
+};
+
+} // namespace eastl
+
+//////////////////////////////////////////////////////////////////////////////
+// Trace analysis types (global namespace alongside tourist types)
+
+namespace {
+
+using zen::ReciprocalU64;
+
+// Welford's online algorithm for computing mean and standard deviation
+class Distribution
+{
+public:
+ void add(double X)
+ {
+ m_Count++;
+ if (m_Count == 1)
+ {
+ m_OldM = m_NewM = X;
+ m_OldS = 0.0;
+ }
+ else
+ {
+ m_NewM = m_OldM + (X - m_OldM) / double(m_Count);
+ m_NewS = m_OldS + (X - m_OldM) * (X - m_NewM);
+ m_OldM = m_NewM;
+ m_OldS = m_NewS;
+ }
+ }
+
+ uint32_t Count() const { return m_Count; }
+ double Mean() const { return (m_Count > 0) ? m_NewM : 0.0; }
+ double Variance() const { return (m_Count > 1) ? m_NewS / double(m_Count - 1) : 0.0; }
+ double StdDev() const { return std::sqrt(Variance()); }
+
+private:
+ double m_OldM = 0.0;
+ double m_NewM = 0.0;
+ double m_OldS = 0.0;
+ double m_NewS = 0.0;
+ uint32_t m_Count = 0;
+};
+
+class NameDepot
+{
+public:
+ uint64 Add(StringView Name)
+ {
+ uint64 NameHash = Hash(Name);
+ Add(NameHash, Name);
+ return NameHash;
+ }
+
+ void Add(uint64 NameHash, StringView Name)
+ {
+ if (auto It = m_Names.insert({NameHash, String()}); It.second)
+ {
+ It.first->second = Name;
+ }
+ }
+
+ StringView Get(uint64 NameHash) const
+ {
+ auto Iter = m_Names.find(NameHash);
+ if (Iter == m_Names.end())
+ {
+ return "???";
+ }
+ return Iter->second;
+ }
+
+private:
+ eastl::hash_map<uint64, String> m_Names;
+};
+
+struct CpuEventStat
+{
+ Distribution Dist;
+ uint32_t Min = ~0u;
+ uint32_t Max = 0;
+};
+
+class EventStats
+{
+public:
+ void Record(uint64 NameHash, uint32 DurationUs)
+ {
+ CpuEventStat& Stat = m_Stats[NameHash];
+ Stat.Min = std::min(Stat.Min, DurationUs);
+ Stat.Max = std::max(Stat.Max, DurationUs);
+ Stat.Dist.add(double(DurationUs));
+ }
+
+ auto begin() const { return m_Stats.begin(); }
+ auto end() const { return m_Stats.end(); }
+ bool empty() const { return m_Stats.empty(); }
+
+private:
+ eastl::hash_map<uint64, CpuEventStat> m_Stats;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// Event outlines
+
+// clang-format off
+begin_outline($Trace, NewTrace)
+ field(uint64, CycleFrequency)
+ field(uint64, StartCycle)
+end_outline()
+
+begin_outline(CpuProfiler, EventSpec)
+ field(uint32, Id)
+ field(FieldStr, Name)
+end_outline()
+
+begin_outline(CpuProfiler, EventBatch)
+ field(uint32, ThreadId)
+ field(uint8[], Data)
+end_outline()
+
+begin_outline(CpuProfiler, EventBatchV2)
+ field(uint8[], Data)
+end_outline()
+
+begin_outline(CpuProfiler, EventBatchV3)
+ field(uint8[], Data)
+end_outline()
+
+begin_outline(CpuProfiler, Metadata)
+ field(uint32, Id)
+ field(uint32, SpecId)
+ field(uint8[], Metadata)
+end_outline()
+
+begin_outline($Trace, ThreadInfo)
+ field(FieldStr, Name)
+ field(int32, SortHint)
+ field(uint32, ThreadId)
+ field(uint32, SystemId)
+end_outline()
+
+begin_outline($Trace, ThreadGroupBegin)
+ field(FieldStr, Name)
+end_outline()
+
+begin_outline($Trace, ThreadGroupEnd)
+end_outline()
+
+begin_outline(Diagnostics, Session2)
+ field(FieldStr, Platform)
+ field(FieldStr, AppName)
+ field(FieldStr, ProjectName)
+ field(FieldStr, CommandLine)
+ field(FieldStr, Branch)
+ field(FieldStr, BuildVersion)
+ field(uint32, Changelist)
+ field(uint8, ConfigurationType)
+ field(uint8, TargetType)
+end_outline()
+
+begin_outline(Diagnostics, ModuleInit)
+ field(FieldStr, SymbolFormat)
+ field(uint8, ModuleBaseShift)
+end_outline()
+
+begin_outline(Diagnostics, ModuleLoad)
+ field(FieldStr, Name)
+ field(uint64, Base)
+ field(uint32, Size)
+ field(uint8[], ImageId)
+end_outline()
+
+begin_outline(Diagnostics, ModuleUnload)
+ field(uint64, Base)
+end_outline()
+
+begin_outline(Trace, ChannelAnnounce)
+ field(uint32, Id)
+ field(uint8, IsEnabled)
+ field(uint8, ReadOnly)
+ field(FieldStr, Name)
+end_outline()
+
+begin_outline(Trace, ChannelToggle)
+ field(uint32, Id)
+ field(uint8, IsEnabled)
+end_outline()
+
+begin_outline(Logging, LogCategory)
+ field(uint64, CategoryPointer)
+ field(uint8, DefaultVerbosity)
+ field(FieldStr, Name)
+end_outline()
+
+begin_outline(Logging, LogMessageSpec)
+ field(uint64, LogPoint)
+ field(uint64, CategoryPointer)
+ field(int32, Line)
+ field(uint8, Verbosity)
+ field(FieldStr, FileName)
+ field(FieldStr, FormatString)
+end_outline()
+
+begin_outline(Logging, LogMessage)
+ field(uint64, LogPoint)
+ field(uint64, Cycle)
+ field(uint8[], FormatArgs)
+end_outline()
+
+// Analyzer-level schema for the zencore log events defined in
+// src/zencore/logging/tracelog.cpp. The field layout mirrors the Logging.*
+// outline above so the analyzer plumbing is symmetric, but the events carry
+// fmt-style `{}` format strings instead of printf-style `%` specifiers and
+// the FormatArgs blob uses a zen-specific descriptor encoding.
+begin_outline(ZenLog, Category)
+ field(uint64, CategoryPointer)
+ field(uint8, DefaultVerbosity)
+ field(FieldStr, Name)
+end_outline()
+
+begin_outline(ZenLog, MessageSpec)
+ field(uint64, LogPoint)
+ field(uint64, CategoryPointer)
+ field(int32, Line)
+ field(uint8, Verbosity)
+ field(FieldStr, FileName)
+ field(FieldStr, FormatString)
+end_outline()
+
+begin_outline(ZenLog, Message)
+ field(uint64, LogPoint)
+ field(uint64, Cycle)
+ field(uint8[], FormatArgs)
+end_outline()
+
+begin_outline(Misc, BookmarkSpec)
+ field(uint64, BookmarkPoint)
+ field(int32, Line)
+ field(FieldStr, FormatString)
+ field(FieldStr, FileName)
+end_outline()
+
+begin_outline(Misc, Bookmark)
+ field(uint64, Cycle)
+ field(uint64, BookmarkPoint)
+ field(uint8[], FormatArgs)
+end_outline()
+
+begin_outline(Misc, RegionBegin)
+ field(uint64, Cycle)
+ field(uint8[], RegionName)
+ field(uint8[], Category)
+end_outline()
+
+begin_outline(Misc, RegionBeginWithId)
+ field(uint64, CycleAndId)
+ field(uint8[], RegionName)
+ field(uint8[], Category)
+end_outline()
+
+begin_outline(Misc, RegionEnd)
+ field(uint64, Cycle)
+ field(uint8[], RegionName)
+end_outline()
+
+begin_outline(Misc, RegionEndWithId)
+ field(uint64, Cycle)
+ field(uint64, RegionId)
+end_outline()
+
+// CsvProfiler events
+begin_outline(CsvProfiler, RegisterCategory)
+ field(int32, Index)
+ field(uint8[], Name)
+end_outline()
+
+begin_outline(CsvProfiler, DefineInlineStat)
+ field(uint64, StatId)
+ field(int32, CategoryIndex)
+ field(uint8[], Name)
+end_outline()
+
+begin_outline(CsvProfiler, DefineDeclaredStat)
+ field(uint64, StatId)
+ field(int32, CategoryIndex)
+ field(uint8[], Name)
+end_outline()
+
+begin_outline(CsvProfiler, BeginStat)
+ field(uint64, StatId)
+ field(uint64, Cycle)
+end_outline()
+
+begin_outline(CsvProfiler, EndStat)
+ field(uint64, StatId)
+ field(uint64, Cycle)
+end_outline()
+
+begin_outline(CsvProfiler, BeginExclusiveStat)
+ field(uint64, StatId)
+ field(uint64, Cycle)
+end_outline()
+
+begin_outline(CsvProfiler, EndExclusiveStat)
+ field(uint64, StatId)
+ field(uint64, Cycle)
+end_outline()
+
+begin_outline(CsvProfiler, CustomStatInt)
+ field(uint64, StatId)
+ field(uint64, Cycle)
+ field(int32, Value)
+ field(uint8, OpType)
+end_outline()
+
+begin_outline(CsvProfiler, CustomStatFloat)
+ field(uint64, StatId)
+ field(uint64, Cycle)
+ field(float, Value)
+ field(uint8, OpType)
+end_outline()
+
+begin_outline(CsvProfiler, Event)
+ field(uint64, Cycle)
+ field(int32, CategoryIndex)
+ field(uint8[], Text)
+end_outline()
+
+begin_outline(CsvProfiler, BeginCapture)
+ field(uint64, Cycle)
+ field(uint32, RenderThreadId)
+ field(uint32, RHIThreadId)
+ field(uint8, EnableCounts)
+ field(uint8[], FileName)
+end_outline()
+
+begin_outline(CsvProfiler, EndCapture)
+ field(uint64, Cycle)
+end_outline()
+
+begin_outline(CsvProfiler, Metadata)
+ field(uint8[], Key)
+ field(uint8[], Value)
+end_outline()
+ // clang-format on
+
+ //////////////////////////////////////////////////////////////////////////////
+ // Forward declarations needed by the helper analyzers below.
+
+ using zen::trace_detail::SafeFieldStr;
+
+//////////////////////////////////////////////////////////////////////////////
+// Minimal CBOR formatter
+//
+// CpuProfiler.Metadata payloads are CBOR-encoded (RFC 7049) blobs produced
+// by UE's FCborWriter. We don't need a full decoder -- we just walk the
+// bytes and append human-readable values to an output string. Handles the
+// subset actually emitted by UE's metadata scopes: unsigned / negative
+// integers, byte / text strings, arrays, maps, floats, and the boolean /
+// null simple values.
+
+static bool CborAppendValue(const uint8*& p, const uint8* end, std::string& out, int depth);
+
+static bool
+CborReadArg(const uint8*& p, const uint8* end, uint8 info, uint64& value)
+{
+ if (info < 24)
+ {
+ value = info;
+ return true;
+ }
+ if (info == 24)
+ {
+ if (end - p < 1)
+ return false;
+ value = *p++;
+ return true;
+ }
+ if (info == 25)
+ {
+ if (end - p < 2)
+ return false;
+ value = (uint64(p[0]) << 8) | p[1];
+ p += 2;
+ return true;
+ }
+ if (info == 26)
+ {
+ if (end - p < 4)
+ return false;
+ value = (uint64(p[0]) << 24) | (uint64(p[1]) << 16) | (uint64(p[2]) << 8) | p[3];
+ p += 4;
+ return true;
+ }
+ if (info == 27)
+ {
+ if (end - p < 8)
+ return false;
+ value = 0;
+ for (int i = 0; i < 8; ++i)
+ {
+ value = (value << 8) | p[i];
+ }
+ p += 8;
+ return true;
+ }
+ return false;
+}
+
+static bool
+CborAppendValue(const uint8*& p, const uint8* end, std::string& out, int depth)
+{
+ if (depth > 4 || p >= end)
+ {
+ return false;
+ }
+
+ const uint8 ib = *p++;
+ const uint8 major = ib >> 5;
+ const uint8 info = ib & 0x1f;
+
+ switch (major)
+ {
+ case 0: // unsigned integer
+ {
+ uint64 v;
+ if (!CborReadArg(p, end, info, v))
+ return false;
+ out += fmt::format("{}", v);
+ return true;
+ }
+ case 1: // negative integer: -1 - value
+ {
+ uint64 v;
+ if (!CborReadArg(p, end, info, v))
+ return false;
+ out += fmt::format("{}", -1 - int64_t(v));
+ return true;
+ }
+ case 2: // byte string
+ case 3: // text string
+ {
+ uint64 len;
+ if (!CborReadArg(p, end, info, len))
+ return false;
+ if (len > uint64(end - p))
+ return false;
+ out.append(reinterpret_cast<const char*>(p), size_t(len));
+ p += len;
+ return true;
+ }
+ case 4: // array
+ {
+ uint64 count;
+ if (!CborReadArg(p, end, info, count))
+ return false;
+ for (uint64 i = 0; i < count; ++i)
+ {
+ if (i > 0)
+ out += ", ";
+ if (!CborAppendValue(p, end, out, depth + 1))
+ return false;
+ }
+ return true;
+ }
+ case 5: // map
+ {
+ uint64 count;
+ if (!CborReadArg(p, end, info, count))
+ return false;
+ for (uint64 i = 0; i < count; ++i)
+ {
+ if (i > 0)
+ out += ", ";
+ if (!CborAppendValue(p, end, out, depth + 1))
+ return false;
+ out += "=";
+ if (!CborAppendValue(p, end, out, depth + 1))
+ return false;
+ }
+ return true;
+ }
+ case 7: // simple values / floats
+ {
+ if (info == 20)
+ {
+ out += "false";
+ return true;
+ }
+ if (info == 21)
+ {
+ out += "true";
+ return true;
+ }
+ if (info == 22)
+ {
+ out += "null";
+ return true;
+ }
+ if (info == 26)
+ {
+ if (end - p < 4)
+ return false;
+ uint32 bits = (uint32(p[0]) << 24) | (uint32(p[1]) << 16) | (uint32(p[2]) << 8) | p[3];
+ float v;
+ std::memcpy(&v, &bits, 4);
+ out += fmt::format("{}", v);
+ p += 4;
+ return true;
+ }
+ if (info == 27)
+ {
+ if (end - p < 8)
+ return false;
+ uint64 bits = 0;
+ for (int i = 0; i < 8; ++i)
+ {
+ bits = (bits << 8) | p[i];
+ }
+ p += 8;
+ double v;
+ std::memcpy(&v, &bits, 8);
+ out += fmt::format("{}", v);
+ return true;
+ }
+ return false;
+ }
+ default:
+ return false;
+ }
+}
+
+static std::string
+FormatMetadataValues(const uint8_t* Bytes, size_t Size)
+{
+ std::string out;
+ if (Size == 0)
+ {
+ return out;
+ }
+ const uint8* p = Bytes;
+ CborAppendValue(p, Bytes + Size, out, 0);
+ return out;
+}
+
+using zen::trace_detail::TraceTiming;
+
+//////////////////////////////////////////////////////////////////////////////
+// Metadata registry
+//
+// Subscribes to CpuProfiler.Metadata events and stores each payload's
+// CBOR-encoded bytes keyed by MetadataId, along with the SpecId they
+// reference. Both CpuAnalyzer and TimelineAnalyzer query the registry when
+// they encounter a V3 scope with the metadata bit set so the scope can be
+// rendered as `{base name} - {formatted values}`.
+
+struct MetadataEntry
+{
+ uint32_t SpecId = 0;
+ eastl::vector<uint8_t> Bytes;
+};
+
+class MetadataRegistry : public Analyzer
+{
+public:
+ void subscribe(Vector<Subscription>& Subs) override { Subs.emplace_back(this, &MetadataRegistry::OnMetadata); }
+
+ const MetadataEntry* Lookup(uint32_t MetadataId) const
+ {
+ auto It = m_Entries.find(MetadataId);
+ return (It != m_Entries.end()) ? &It->second : nullptr;
+ }
+
+private:
+ void OnMetadata(const CpuProfiler_Metadata& Ev)
+ {
+ uint32_t MetadataId = Ev.Id();
+ uint32_t SpecId = Ev.SpecId();
+ Array<uint8[]> Data = Ev.Metadata();
+
+ MetadataEntry& Entry = m_Entries[MetadataId];
+ Entry.SpecId = SpecId;
+ Entry.Bytes.assign(Data.get(), Data.get() + Data.get_size());
+ }
+
+ eastl::hash_map<uint32_t, MetadataEntry> m_Entries;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// Log message formatting (upstream UE Logging.* wire)
+//
+// UE's trace emits log messages as a sequence of typed arguments that need
+// to be substituted into a printf-style format string. The wire format is:
+//
+// [ArgumentCount: uint8]
+// [Descriptors: uint8 * ArgumentCount] // each byte = category | size
+// [Payload: bytes]
+//
+// Category bits live in the upper 2 bits (shifted by FormatArgTypeCode_-
+// CategoryBitShift == 6): Integer=1, Float=2, String=3. The low 6 bits are
+// the argument size in bytes; for strings the size is the per-character
+// width (1 == ANSI, 2 == UTF-16).
+//
+// We walk the format string, extract each specifier, pull the matching arg
+// from the stream and hand both to std::snprintf. Width/precision stars
+// (e.g. "%*.*f") are not supported; they're rare in log formats.
+//
+// Note: this is the upstream UE printf-style wire only. The zen-specific
+// ZenLog.* events use a different descriptor encoding (3-bit category /
+// 5-bit size, plus a dedicated bool and pointer category) and a different
+// format-spec grammar. See zen::logging::FormatLogArgs in
+// src/zencore/logging/tracelog.cpp for that path.
+
+struct FormatArgStream
+{
+ const uint8_t* Descriptors;
+ const uint8_t* Payload;
+ uint8_t Remaining;
+
+ bool HasNext() const { return Remaining > 0; }
+
+ uint8_t PeekCategory() const { return (*Descriptors) & 0xC0; }
+ uint8_t PeekSize() const { return (*Descriptors) & 0x3F; }
+
+ void Advance(size_t PayloadBytes)
+ {
+ Payload += PayloadBytes;
+ ++Descriptors;
+ --Remaining;
+ }
+};
+
+static bool
+InitFormatArgStream(FormatArgStream& Ctx, const uint8_t* Data, size_t Size)
+{
+ if (!Data || Size == 0)
+ {
+ Ctx.Remaining = 0;
+ return false;
+ }
+ uint8_t Count = Data[0];
+ if (size_t(1) + Count > Size)
+ {
+ Ctx.Remaining = 0;
+ return false;
+ }
+ Ctx.Descriptors = Data + 1;
+ Ctx.Payload = Data + 1 + Count;
+ Ctx.Remaining = Count;
+ return true;
+}
+
+static bool
+IsPrintfSpecifierChar(char c)
+{
+ switch (c)
+ {
+ case 'd':
+ case 'i':
+ case 'u':
+ case 'o':
+ case 'x':
+ case 'X':
+ case 'c':
+ case 'p':
+ case 'f':
+ case 'F':
+ case 'e':
+ case 'E':
+ case 'g':
+ case 'G':
+ case 'a':
+ case 'A':
+ case 's':
+ case 'S':
+ case 'n':
+ return true;
+ default:
+ return false;
+ }
+}
+
+static std::string
+FormatLogMessage(std::string_view Format, const uint8_t* ArgsData, size_t ArgsSize)
+{
+ FormatArgStream Stream{};
+ InitFormatArgStream(Stream, ArgsData, ArgsSize);
+
+ std::string Out;
+ Out.reserve(Format.size() + 32);
+
+ size_t i = 0;
+ while (i < Format.size())
+ {
+ char c = Format[i];
+ if (c != '%')
+ {
+ Out.push_back(c);
+ ++i;
+ continue;
+ }
+
+ // Handle "%%" -> literal percent.
+ if (i + 1 < Format.size() && Format[i + 1] == '%')
+ {
+ Out.push_back('%');
+ i += 2;
+ continue;
+ }
+
+ // Walk the specifier until we find a terminating character.
+ size_t SpecStart = i++;
+ while (i < Format.size() && !IsPrintfSpecifierChar(Format[i]))
+ {
+ ++i;
+ }
+ if (i >= Format.size())
+ {
+ // Truncated specifier -- copy the remainder literally.
+ Out.append(Format.substr(SpecStart));
+ break;
+ }
+
+ char Specifier = Format[i++];
+ std::string Spec(Format.substr(SpecStart, i - SpecStart));
+
+ if (!Stream.HasNext())
+ {
+ // Not enough arguments: emit the raw specifier so the user can
+ // at least tell something is missing.
+ Out.append(Spec);
+ continue;
+ }
+
+ const uint8_t Category = Stream.PeekCategory();
+ const uint8_t Size = Stream.PeekSize();
+
+ char Buf[512];
+ Buf[0] = '\0';
+
+ if (Category == 0x40) // integer
+ {
+ uint64_t Raw = 0;
+ if (Size <= sizeof(Raw) && Size > 0)
+ {
+ std::memcpy(&Raw, Stream.Payload, Size);
+ }
+
+ // Route through the correct snprintf type based on the
+ // specifier. Cast to int64_t for signed integer specifiers.
+ switch (Specifier)
+ {
+ case 'd':
+ case 'i':
+ {
+ // Sign-extend based on Size.
+ int64_t Signed = 0;
+ switch (Size)
+ {
+ case 1:
+ Signed = int8_t(Raw & 0xff);
+ break;
+ case 2:
+ Signed = int16_t(Raw & 0xffff);
+ break;
+ case 4:
+ Signed = int32_t(Raw & 0xffffffff);
+ break;
+ case 8:
+ Signed = int64_t(Raw);
+ break;
+ default:
+ Signed = int64_t(Raw);
+ break;
+ }
+ // Replace length modifier so snprintf interprets the
+ // correctly-sized value. Simplest: append "ll".
+ std::string AdjustedSpec = Spec;
+ AdjustedSpec.insert(AdjustedSpec.size() - 1, "ll");
+ std::snprintf(Buf, sizeof(Buf), AdjustedSpec.c_str(), static_cast<long long>(Signed));
+ break;
+ }
+ case 'u':
+ case 'o':
+ case 'x':
+ case 'X':
+ case 'p':
+ {
+ std::string AdjustedSpec = Spec;
+ AdjustedSpec.insert(AdjustedSpec.size() - 1, "ll");
+ std::snprintf(Buf, sizeof(Buf), AdjustedSpec.c_str(), static_cast<unsigned long long>(Raw));
+ break;
+ }
+ case 'c':
+ {
+ std::snprintf(Buf, sizeof(Buf), Spec.c_str(), int(Raw & 0xff));
+ break;
+ }
+ default:
+ std::snprintf(Buf, sizeof(Buf), "%llu", static_cast<unsigned long long>(Raw));
+ break;
+ }
+ Stream.Advance(Size);
+ }
+ else if (Category == 0x80) // floating point
+ {
+ double Value = 0.0;
+ if (Size == 4)
+ {
+ float F;
+ std::memcpy(&F, Stream.Payload, 4);
+ Value = double(F);
+ }
+ else if (Size == 8)
+ {
+ std::memcpy(&Value, Stream.Payload, 8);
+ }
+ std::snprintf(Buf, sizeof(Buf), Spec.c_str(), Value);
+ Stream.Advance(Size);
+ }
+ else if (Category == 0xC0) // string
+ {
+ std::string Tmp;
+ if (Size == 1)
+ {
+ const char* S = reinterpret_cast<const char*>(Stream.Payload);
+ size_t Len = std::strlen(S);
+ Tmp.assign(S, Len);
+ std::snprintf(Buf, sizeof(Buf), Spec.c_str(), Tmp.c_str());
+ Stream.Advance(Len + 1);
+ }
+ else if (Size == 2)
+ {
+ const char16_t* W = reinterpret_cast<const char16_t*>(Stream.Payload);
+ size_t Len = 0;
+ while (W[Len] != 0)
+ ++Len;
+ Tmp.reserve(Len);
+ for (size_t k = 0; k < Len; ++k)
+ {
+ char16_t ch = W[k];
+ Tmp.push_back(ch < 0x80 ? char(ch) : '?');
+ }
+ std::snprintf(Buf, sizeof(Buf), Spec.c_str(), Tmp.c_str());
+ Stream.Advance((Len + 1) * 2);
+ }
+ else
+ {
+ std::snprintf(Buf, sizeof(Buf), "<unsupported string width %u>", unsigned(Size));
+ Stream.Advance(0);
+ ++Stream.Descriptors;
+ --Stream.Remaining;
+ }
+ }
+ else
+ {
+ std::snprintf(Buf, sizeof(Buf), "<arg>");
+ Stream.Advance(Size);
+ }
+
+ Out.append(Buf);
+ }
+
+ return Out;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Log analyzer
+
+class LogAnalyzer : public Analyzer
+{
+public:
+ explicit LogAnalyzer(const TraceTiming* Timing = nullptr) : m_Timing(Timing) {}
+
+ void subscribe(Vector<Subscription>& Subs) override
+ {
+ Subs.emplace_back(this, &LogAnalyzer::OnLogCategory);
+ Subs.emplace_back(this, &LogAnalyzer::OnLogMessageSpec);
+ Subs.emplace_back(this, &LogAnalyzer::OnLogMessage);
+ Subs.emplace_back(this, &LogAnalyzer::OnZenLogCategory);
+ Subs.emplace_back(this, &LogAnalyzer::OnZenLogMessageSpec);
+ Subs.emplace_back(this, &LogAnalyzer::OnZenLogMessage);
+ }
+
+ eastl::vector<zen::trace_detail::LogCategoryInfo> BuildCategories(eastl::hash_map<uint64_t, uint32_t>& OutPointerToIndex) const
+ {
+ eastl::vector<zen::trace_detail::LogCategoryInfo> Cats;
+ Cats.reserve(m_Categories.size());
+ OutPointerToIndex.clear();
+ for (const auto& [Ptr, Info] : m_Categories)
+ {
+ OutPointerToIndex[Ptr] = uint32_t(Cats.size());
+ Cats.push_back(Info);
+ }
+ return Cats;
+ }
+
+ const eastl::vector<zen::trace_detail::LogEntry>& Entries() const { return m_Entries; }
+ eastl::vector<zen::trace_detail::LogEntry>& MutableEntries() { return m_Entries; }
+
+ // The shared TraceTiming pointer lets external callers read the trace's
+ // cycle base / frequency without each analyzer having to own a copy.
+ const TraceTiming* Timing() const { return m_Timing; }
+
+ struct MessageSpec
+ {
+ uint64_t CategoryPointer = 0;
+ int32_t Line = 0;
+ uint8_t Verbosity = 0;
+ std::string File;
+ std::string FormatString;
+ };
+
+ const eastl::hash_map<uint64_t, MessageSpec>& MessageSpecs() const { return m_Specs; }
+
+private:
+ // Both Logging.* and ZenLog.* use identical event fields; the difference is
+ // only how their respective FormatArgs get rendered (printf vs fmt), which
+ // is decided at message-emit time.
+ template<typename CategoryEvent>
+ void IngestCategory(const CategoryEvent& Ev)
+ {
+ uint64_t Ptr = Ev.CategoryPointer();
+ zen::trace_detail::LogCategoryInfo& Info = m_Categories[Ptr];
+ Info.Name = SafeFieldStr(Ev.Name());
+ Info.DefaultVerbosity = Ev.DefaultVerbosity();
+ }
+
+ template<typename SpecEvent>
+ void IngestSpec(const SpecEvent& Ev)
+ {
+ MessageSpec& Spec = m_Specs[Ev.LogPoint()];
+ Spec.CategoryPointer = Ev.CategoryPointer();
+ Spec.Line = Ev.Line();
+ Spec.Verbosity = Ev.Verbosity();
+ Spec.File = SafeFieldStr(Ev.FileName());
+ Spec.FormatString = SafeFieldStr(Ev.FormatString());
+ }
+
+ void OnLogCategory(const Logging_LogCategory& Ev) { IngestCategory(Ev); }
+ void OnLogMessageSpec(const Logging_LogMessageSpec& Ev) { IngestSpec(Ev); }
+ void OnLogMessage(const Logging_LogMessage& Ev) { EmitEntry(Ev.LogPoint(), Ev.Cycle(), Ev.FormatArgs(), /*IsZenLog*/ false); }
+
+ // ZenLog.* shares the same MessageSpec table. LogPoint pointers are emitted
+ // by distinct processes (zenserver vs. a hypothetical UE trace we imported)
+ // so the two keyspaces don't collide in practice.
+ void OnZenLogCategory(const ZenLog_Category& Ev) { IngestCategory(Ev); }
+ void OnZenLogMessageSpec(const ZenLog_MessageSpec& Ev) { IngestSpec(Ev); }
+ void OnZenLogMessage(const ZenLog_Message& Ev) { EmitEntry(Ev.LogPoint(), Ev.Cycle(), Ev.FormatArgs(), /*IsZenLog*/ true); }
+
+ void EmitEntry(uint64_t LogPoint, uint64_t Cycle, Array<uint8[]> Args, bool IsZenLog)
+ {
+ auto SpecIt = m_Specs.find(LogPoint);
+ if (SpecIt == m_Specs.end())
+ {
+ return;
+ }
+ const MessageSpec& Spec = SpecIt->second;
+
+ uint32_t TimeUs = m_Timing ? m_Timing->CycleToTimeUs(Cycle) : 0;
+
+ std::string Msg = IsZenLog ? zen::logging::FormatLogArgs(std::string_view(Spec.FormatString), Args.get(), Args.get_size())
+ : FormatLogMessage(std::string_view(Spec.FormatString), Args.get(), Args.get_size());
+
+ zen::trace_detail::LogEntry Entry;
+ Entry.TimeUs = TimeUs;
+ Entry.Verbosity = Spec.Verbosity;
+ Entry.Line = Spec.Line;
+ Entry.File = Spec.File;
+ Entry.Message = std::move(Msg);
+ // Use the category pointer temporarily so BuildTraceModel can resolve
+ // it against the categories table.
+ Entry.CategoryIndex = SpecToCategoryIndex(Spec.CategoryPointer);
+ m_Entries.push_back(std::move(Entry));
+ }
+
+ uint32_t SpecToCategoryIndex(uint64_t Ptr)
+ {
+ // Encoded pointer stuffed into uint32_t so BuildTraceModel can remap.
+ // Lossy but deterministic: use a stable sequential index per unique
+ // pointer so we never need the full 64-bit value beyond build time.
+ auto It = m_CategoryIndex.find(Ptr);
+ if (It != m_CategoryIndex.end())
+ {
+ return It->second;
+ }
+ uint32_t Idx = uint32_t(m_CategoryIndex.size());
+ m_CategoryIndex[Ptr] = Idx;
+ return Idx;
+ }
+
+public:
+ // Mapping from the intermediate index stored in LogEntry::CategoryIndex
+ // during capture to the real category pointer; BuildTraceModel uses
+ // this to remap entries against the flattened LogCategories array.
+ const eastl::hash_map<uint64_t, uint32_t>& CategoryPointerIndex() const { return m_CategoryIndex; }
+
+private:
+ const TraceTiming* m_Timing = nullptr;
+ eastl::hash_map<uint64_t, zen::trace_detail::LogCategoryInfo> m_Categories;
+ eastl::hash_map<uint64_t, MessageSpec> m_Specs;
+ eastl::hash_map<uint64_t, uint32_t> m_CategoryIndex;
+ eastl::vector<zen::trace_detail::LogEntry> m_Entries;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// Bookmarks and regions
+//
+// UE's bookmark wire format mirrors LogMessage: a BookmarkSpec introduces
+// a (FileName, Line, FormatString) triple keyed by a BookmarkPoint pointer,
+// and each Misc.Bookmark event carries that pointer, a cycle, and the same
+// FFormatArgsTrace payload the log pipeline already knows how to decode.
+// Region events come in two flavours: the legacy name-paired
+// RegionBegin/RegionEnd and the newer *WithId variants that pack a unique
+// id into the begin event's cycle.
+
+class BookmarksAnalyzer : public Analyzer
+{
+public:
+ explicit BookmarksAnalyzer(const TraceTiming* Timing = nullptr) : m_Timing(Timing) {}
+
+ void subscribe(Vector<Subscription>& Subs) override
+ {
+ Subs.emplace_back(this, &BookmarksAnalyzer::OnBookmarkSpec);
+ Subs.emplace_back(this, &BookmarksAnalyzer::OnBookmark);
+ Subs.emplace_back(this, &BookmarksAnalyzer::OnRegionBegin);
+ Subs.emplace_back(this, &BookmarksAnalyzer::OnRegionBeginWithId);
+ Subs.emplace_back(this, &BookmarksAnalyzer::OnRegionEnd);
+ Subs.emplace_back(this, &BookmarksAnalyzer::OnRegionEndWithId);
+ }
+
+ eastl::vector<zen::trace_detail::Bookmark>& MutableBookmarks() { return m_Bookmarks; }
+ eastl::vector<zen::trace_detail::RegionEntry>& MutableRegions() { return m_Regions; }
+
+private:
+ struct BookmarkSpec
+ {
+ int32_t Line = 0;
+ std::string File;
+ std::string FormatString;
+ };
+
+ uint32_t CycleToTimeUs(uint64_t Cycle) const { return m_Timing ? m_Timing->CycleToTimeUs(Cycle) : 0; }
+
+ void OnBookmarkSpec(const Misc_BookmarkSpec& Ev)
+ {
+ BookmarkSpec& Spec = m_Specs[Ev.BookmarkPoint()];
+ Spec.Line = Ev.Line();
+ Spec.File = SafeFieldStr(Ev.FileName());
+ Spec.FormatString = SafeFieldStr(Ev.FormatString());
+ }
+
+ void OnBookmark(const Misc_Bookmark& Ev)
+ {
+ auto SpecIt = m_Specs.find(Ev.BookmarkPoint());
+ if (SpecIt == m_Specs.end())
+ {
+ return;
+ }
+ const BookmarkSpec& Spec = SpecIt->second;
+
+ Array<uint8[]> Args = Ev.FormatArgs();
+ std::string Text = FormatLogMessage(std::string_view(Spec.FormatString), Args.get(), Args.get_size());
+
+ zen::trace_detail::Bookmark Out;
+ Out.TimeUs = CycleToTimeUs(Ev.Cycle());
+ Out.Line = Spec.Line;
+ Out.File = Spec.File;
+ Out.Text = std::move(Text);
+ m_Bookmarks.push_back(std::move(Out));
+ }
+
+ uint32_t CreatePartialRegion(uint32_t TimeUs, std::string Name, std::string Category)
+ {
+ zen::trace_detail::RegionEntry Entry;
+ Entry.BeginUs = TimeUs;
+ Entry.EndUs = ~uint32_t(0); // sentinel: still open
+ Entry.Depth = 0;
+ Entry.Reserved = 0;
+ Entry.Name = std::move(Name);
+ Entry.Category = std::move(Category);
+ uint32_t Idx = uint32_t(m_Regions.size());
+ m_Regions.push_back(std::move(Entry));
+ return Idx;
+ }
+
+ // Decodes the raw array bytes of a RegionName field into a std::string.
+ // UE emits RegionName as either AnsiString (1-byte) or WideString (2-byte)
+ // depending on the trace's age -- for the 2-byte case we do the same
+ // lossy ASCII fold tourist's FieldStr does, which is all we need for
+ // display.
+ static std::string DecodeRegionName(const Array<uint8[]>& Data)
+ {
+ const uint8_t* p = Data.get();
+ size_t size = Data.get_size();
+ uint32_t count = Data.get_count();
+ if (!p || size == 0 || count == 0)
+ {
+ return {};
+ }
+ if (size == count)
+ {
+ // 1 byte per element -- AnsiString.
+ return std::string(reinterpret_cast<const char*>(p), count);
+ }
+ if (size == count * 2)
+ {
+ // 2 bytes per element -- WideString. Lossy ASCII fold.
+ std::string out;
+ out.reserve(count);
+ const char16_t* w = reinterpret_cast<const char16_t*>(p);
+ for (uint32_t i = 0; i < count; ++i)
+ {
+ out.push_back(w[i] < 0x80 ? char(w[i]) : '?');
+ }
+ return out;
+ }
+ return {};
+ }
+
+ void OnRegionBegin(const Misc_RegionBegin& Ev)
+ {
+ uint32_t TimeUs = CycleToTimeUs(Ev.Cycle());
+ Array<uint8[]> NameArr = Ev.RegionName();
+ std::string Name = DecodeRegionName(NameArr);
+ std::string Category = DecodeRegionName(Ev.Category());
+ uint32_t Idx = CreatePartialRegion(TimeUs, Name, std::move(Category));
+ m_OpenByName[Name].push_back(Idx);
+ }
+
+ void OnRegionBeginWithId(const Misc_RegionBeginWithId& Ev)
+ {
+ // Despite its name, CycleAndId is just Cycles64() -- a plain 64-bit
+ // cycle count that doubles as a unique region identifier. The caller
+ // keeps the returned value and passes it back as RegionId at end.
+ uint64_t CycleAndId = Ev.CycleAndId();
+ uint32_t TimeUs = CycleToTimeUs(CycleAndId);
+ Array<uint8[]> NameArr = Ev.RegionName();
+ std::string Name = DecodeRegionName(NameArr);
+ std::string Category = DecodeRegionName(Ev.Category());
+ uint32_t Idx = CreatePartialRegion(TimeUs, std::move(Name), std::move(Category));
+ m_OpenById[CycleAndId] = Idx;
+ }
+
+ void OnRegionEnd(const Misc_RegionEnd& Ev)
+ {
+ uint32_t TimeUs = CycleToTimeUs(Ev.Cycle());
+ Array<uint8[]> NameArr = Ev.RegionName();
+ std::string Name = DecodeRegionName(NameArr);
+ auto It = m_OpenByName.find(Name);
+ if (It == m_OpenByName.end() || It->second.empty())
+ {
+ return;
+ }
+ uint32_t Idx = It->second.back();
+ It->second.pop_back();
+ m_Regions[Idx].EndUs = TimeUs;
+ }
+
+ void OnRegionEndWithId(const Misc_RegionEndWithId& Ev)
+ {
+ uint32_t TimeUs = CycleToTimeUs(Ev.Cycle());
+ uint64_t Id = Ev.RegionId();
+ auto It = m_OpenById.find(Id);
+ if (It == m_OpenById.end())
+ {
+ return;
+ }
+ m_Regions[It->second].EndUs = TimeUs;
+ m_OpenById.erase(It);
+ }
+
+ const TraceTiming* m_Timing = nullptr;
+ eastl::hash_map<uint64_t, BookmarkSpec> m_Specs;
+ eastl::vector<zen::trace_detail::Bookmark> m_Bookmarks;
+ eastl::vector<zen::trace_detail::RegionEntry> m_Regions;
+ eastl::hash_map<std::string, eastl::vector<uint32_t>> m_OpenByName;
+ eastl::hash_map<uint64_t, uint32_t> m_OpenById;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// CsvProfiler analyzer -- parses CSV stat categories, definitions, timing,
+// custom values, events, capture markers, and metadata.
+
+class CsvProfilerAnalyzer : public Analyzer
+{
+public:
+ explicit CsvProfilerAnalyzer(const TraceTiming* Timing = nullptr) : m_Timing(Timing) {}
+
+ void subscribe(Vector<Subscription>& Subs) override
+ {
+ Subs.emplace_back(this, &CsvProfilerAnalyzer::OnRegisterCategory);
+ Subs.emplace_back(this, &CsvProfilerAnalyzer::OnDefineInlineStat);
+ Subs.emplace_back(this, &CsvProfilerAnalyzer::OnDefineDeclaredStat);
+ Subs.emplace_back(this, &CsvProfilerAnalyzer::OnBeginStat);
+ Subs.emplace_back(this, &CsvProfilerAnalyzer::OnEndStat);
+ Subs.emplace_back(this, &CsvProfilerAnalyzer::OnBeginExclusiveStat);
+ Subs.emplace_back(this, &CsvProfilerAnalyzer::OnEndExclusiveStat);
+ Subs.emplace_back(this, &CsvProfilerAnalyzer::OnCustomStatInt);
+ Subs.emplace_back(this, &CsvProfilerAnalyzer::OnCustomStatFloat);
+ Subs.emplace_back(this, &CsvProfilerAnalyzer::OnEvent);
+ Subs.emplace_back(this, &CsvProfilerAnalyzer::OnBeginCapture);
+ Subs.emplace_back(this, &CsvProfilerAnalyzer::OnEndCapture);
+ Subs.emplace_back(this, &CsvProfilerAnalyzer::OnMetadata);
+ }
+
+ eastl::vector<zen::trace_detail::TraceModel::CsvCategory>& MutableCategories() { return m_Categories; }
+ eastl::vector<zen::trace_detail::TraceModel::CsvStatDef>& MutableStatDefs() { return m_StatDefs; }
+ eastl::vector<zen::trace_detail::TraceModel::CsvEvent>& MutableEvents() { return m_Events; }
+ eastl::vector<zen::trace_detail::TraceModel::CsvMeta>& MutableMetadata() { return m_Metadata; }
+
+ // Build the per-stat+thread time series from the accumulated samples.
+ eastl::vector<zen::trace_detail::TraceModel::CsvSeries> BuildTimeSeries()
+ {
+ eastl::vector<zen::trace_detail::TraceModel::CsvSeries> Result;
+ for (auto& [Key, Samples] : m_SeriesMap)
+ {
+ eastl::sort(Samples.begin(), Samples.end(), [](const auto& A, const auto& B) { return A.TimeUs < B.TimeUs; });
+ zen::trace_detail::TraceModel::CsvSeries S;
+ S.StatId = Key.StatId;
+ S.ThreadId = Key.ThreadId;
+ S.Samples = std::move(Samples);
+ Result.push_back(std::move(S));
+ }
+ return Result;
+ }
+
+private:
+ uint32_t CycleToTimeUs(uint64_t Cycle) const
+ {
+ if (!m_Timing || m_Timing->Freq == 0)
+ {
+ return 0;
+ }
+ uint64_t Elapsed = (Cycle >= m_Timing->Base) ? (Cycle - m_Timing->Base) : 0;
+ return uint32_t(Elapsed * 1'000'000 / m_Timing->Freq);
+ }
+
+ static std::string DecodeAnsiName(const Array<uint8[]>& Data)
+ {
+ const uint8_t* P = Data.get();
+ size_t Size = Data.get_size();
+ if (!P || Size == 0)
+ {
+ return {};
+ }
+ return std::string(reinterpret_cast<const char*>(P), Size);
+ }
+
+ static std::string DecodeWideName(const Array<uint8[]>& Data)
+ {
+ const uint8_t* P = Data.get();
+ size_t Size = Data.get_size();
+ uint32_t Count = Data.get_count();
+ if (!P || Size == 0 || Count == 0)
+ {
+ return {};
+ }
+ uint32_t ElemSize = Data.get_element_size();
+ if (ElemSize == 2)
+ {
+ std::string Out;
+ Out.reserve(Count);
+ const char16_t* W = reinterpret_cast<const char16_t*>(P);
+ for (uint32_t I = 0; I < Count; ++I)
+ {
+ Out.push_back(W[I] < 0x80 ? char(W[I]) : '?');
+ }
+ return Out;
+ }
+ return std::string(reinterpret_cast<const char*>(P), Size);
+ }
+
+ struct SeriesKey
+ {
+ uint64_t StatId;
+ uint32_t ThreadId;
+ bool operator==(const SeriesKey& O) const { return StatId == O.StatId && ThreadId == O.ThreadId; }
+ };
+ struct SeriesKeyHash
+ {
+ size_t operator()(const SeriesKey& K) const
+ {
+ return eastl::hash<uint64_t>{}(K.StatId) ^ (eastl::hash<uint32_t>{}(K.ThreadId) * 2654435761u);
+ }
+ };
+
+ void AddSample(uint64_t StatId, uint32_t ThreadId, uint32_t TimeUs, float Value)
+ {
+ m_SeriesMap[SeriesKey{StatId, ThreadId}].push_back({TimeUs, Value});
+ }
+
+ // -- Event handlers -----------------------------------------------
+
+ void OnRegisterCategory(const CsvProfiler_RegisterCategory& Ev)
+ {
+ zen::trace_detail::TraceModel::CsvCategory Cat;
+ Cat.Index = Ev.Index();
+ Cat.Name = DecodeAnsiName(Ev.Name());
+ m_Categories.push_back(std::move(Cat));
+ }
+
+ void OnDefineInlineStat(const CsvProfiler_DefineInlineStat& Ev)
+ {
+ DefineStat(Ev.StatId(), Ev.CategoryIndex(), DecodeAnsiName(Ev.Name()));
+ }
+
+ void OnDefineDeclaredStat(const CsvProfiler_DefineDeclaredStat& Ev)
+ {
+ DefineStat(Ev.StatId(), Ev.CategoryIndex(), DecodeAnsiName(Ev.Name()));
+ }
+
+ void DefineStat(uint64_t StatId, int32_t CategoryIndex, std::string Name)
+ {
+ if (m_StatIdToIndex.count(StatId))
+ {
+ return; // already defined
+ }
+ m_StatIdToIndex[StatId] = uint32_t(m_StatDefs.size());
+ zen::trace_detail::TraceModel::CsvStatDef Def;
+ Def.StatId = StatId;
+ Def.CategoryIndex = CategoryIndex;
+ Def.Name = std::move(Name);
+ m_StatDefs.push_back(std::move(Def));
+ }
+
+ void OnBeginStat(const CsvProfiler_BeginStat& Ev)
+ {
+ uint32_t ThreadId = Ev.get_thread_id();
+ uint32_t TimeUs = CycleToTimeUs(Ev.Cycle());
+ m_OpenStacks[{Ev.StatId(), ThreadId}].push_back(TimeUs);
+ }
+
+ void OnEndStat(const CsvProfiler_EndStat& Ev)
+ {
+ uint32_t ThreadId = Ev.get_thread_id();
+ uint32_t TimeUs = CycleToTimeUs(Ev.Cycle());
+ auto Key = SeriesKey{Ev.StatId(), ThreadId};
+ auto It = m_OpenStacks.find(Key);
+ if (It == m_OpenStacks.end() || It->second.empty())
+ {
+ return;
+ }
+ uint32_t BeginUs = It->second.back();
+ It->second.pop_back();
+ float DurationMs = float(TimeUs - BeginUs) / 1000.0f;
+ AddSample(Ev.StatId(), ThreadId, BeginUs, DurationMs);
+ }
+
+ void OnBeginExclusiveStat(const CsvProfiler_BeginExclusiveStat& Ev)
+ {
+ // For basic support, treat exclusive stats like regular stats.
+ uint32_t ThreadId = Ev.get_thread_id();
+ uint32_t TimeUs = CycleToTimeUs(Ev.Cycle());
+ m_OpenStacks[{Ev.StatId(), ThreadId}].push_back(TimeUs);
+ }
+
+ void OnEndExclusiveStat(const CsvProfiler_EndExclusiveStat& Ev)
+ {
+ uint32_t ThreadId = Ev.get_thread_id();
+ uint32_t TimeUs = CycleToTimeUs(Ev.Cycle());
+ auto Key = SeriesKey{Ev.StatId(), ThreadId};
+ auto It = m_OpenStacks.find(Key);
+ if (It == m_OpenStacks.end() || It->second.empty())
+ {
+ return;
+ }
+ uint32_t BeginUs = It->second.back();
+ It->second.pop_back();
+ float DurationMs = float(TimeUs - BeginUs) / 1000.0f;
+ AddSample(Ev.StatId(), ThreadId, BeginUs, DurationMs);
+ }
+
+ void OnCustomStatInt(const CsvProfiler_CustomStatInt& Ev)
+ {
+ uint32_t ThreadId = Ev.get_thread_id();
+ uint32_t TimeUs = CycleToTimeUs(Ev.Cycle());
+ AddSample(Ev.StatId(), ThreadId, TimeUs, float(Ev.Value()));
+ }
+
+ void OnCustomStatFloat(const CsvProfiler_CustomStatFloat& Ev)
+ {
+ uint32_t ThreadId = Ev.get_thread_id();
+ uint32_t TimeUs = CycleToTimeUs(Ev.Cycle());
+ AddSample(Ev.StatId(), ThreadId, TimeUs, Ev.Value());
+ }
+
+ void OnEvent(const CsvProfiler_Event& Ev)
+ {
+ zen::trace_detail::TraceModel::CsvEvent E;
+ E.TimeUs = CycleToTimeUs(Ev.Cycle());
+ E.CategoryIndex = Ev.CategoryIndex();
+ E.Text = DecodeWideName(Ev.Text());
+ m_Events.push_back(std::move(E));
+ }
+
+ void OnBeginCapture(const CsvProfiler_BeginCapture& Ev) { m_CaptureStartUs = CycleToTimeUs(Ev.Cycle()); }
+
+ void OnEndCapture(const CsvProfiler_EndCapture& Ev) { m_CaptureEndUs = CycleToTimeUs(Ev.Cycle()); }
+
+ void OnMetadata(const CsvProfiler_Metadata& Ev)
+ {
+ zen::trace_detail::TraceModel::CsvMeta M;
+ M.Key = DecodeWideName(Ev.Key());
+ M.Value = DecodeWideName(Ev.Value());
+ m_Metadata.push_back(std::move(M));
+ }
+
+ const TraceTiming* m_Timing = nullptr;
+
+ eastl::vector<zen::trace_detail::TraceModel::CsvCategory> m_Categories;
+ eastl::vector<zen::trace_detail::TraceModel::CsvStatDef> m_StatDefs;
+ eastl::hash_map<uint64_t, uint32_t> m_StatIdToIndex;
+
+ // Timing stacks: (StatId, ThreadId) -> stack of begin times
+ eastl::hash_map<SeriesKey, eastl::vector<uint32_t>, SeriesKeyHash> m_OpenStacks;
+
+ // Accumulated samples: (StatId, ThreadId) -> samples
+ eastl::hash_map<SeriesKey, eastl::vector<zen::trace_detail::TraceModel::CsvSample>, SeriesKeyHash> m_SeriesMap;
+
+ eastl::vector<zen::trace_detail::TraceModel::CsvEvent> m_Events;
+ eastl::vector<zen::trace_detail::TraceModel::CsvMeta> m_Metadata;
+
+ uint32_t m_CaptureStartUs = 0;
+ uint32_t m_CaptureEndUs = 0;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// Analyzers
+
+class CpuAnalyzer : public Analyzer
+{
+public:
+ CpuAnalyzer(EventStats& Stats, NameDepot& Names, const MetadataRegistry* Metadata)
+ : m_Names(Names)
+ , m_Stats(Stats)
+ , m_Metadata(Metadata)
+ {
+ Names.Add(NO_NAME, "???");
+ }
+
+ void subscribe(Vector<Subscription>& Subs) override
+ {
+ Subs.emplace_back(this, &CpuAnalyzer::OnNewTrace);
+ Subs.emplace_back(this, &CpuAnalyzer::OnCpuSpec);
+ Subs.emplace_back(this, &CpuAnalyzer::OnCpuBatch);
+ Subs.emplace_back(this, &CpuAnalyzer::OnCpuBatchV2);
+ Subs.emplace_back(this, &CpuAnalyzer::OnCpuBatchV3);
+ }
+
+private:
+ static constexpr uint32 NO_INDEX = ~0u;
+ static constexpr uint64 NO_NAME = ~0ull;
+ static constexpr uint32 METADATA_BIT = 0x8000'0000u;
+
+ uint64 ResolveNameHash(uint32 PackedId)
+ {
+ const bool IsMetadata = (PackedId & METADATA_BIT) != 0;
+ const uint32 Id = PackedId & ~METADATA_BIT;
+
+ if (IsMetadata && m_Metadata)
+ {
+ auto CachedIt = m_MetadataNames.find(Id);
+ if (CachedIt != m_MetadataNames.end())
+ {
+ return CachedIt->second;
+ }
+
+ const MetadataEntry* Entry = m_Metadata->Lookup(Id);
+ if (Entry)
+ {
+ auto BaseIt = m_Specs.find(Entry->SpecId);
+ StringView BaseName = (BaseIt != m_Specs.end()) ? m_Names.Get(BaseIt->second) : StringView("???");
+ std::string Formatted(BaseName);
+ std::string Values = FormatMetadataValues(Entry->Bytes.data(), Entry->Bytes.size());
+ if (!Values.empty())
+ {
+ Formatted += " - ";
+ Formatted += Values;
+ }
+ uint64 Hash = m_Names.Add(StringView(Formatted));
+ m_MetadataNames[Id] = Hash;
+ return Hash;
+ }
+ return NO_NAME;
+ }
+
+ auto It = m_Specs.find(Id);
+ return (It != m_Specs.end()) ? It->second : NO_NAME;
+ }
+
+ struct EventStack
+ {
+ uint32 Tail = NO_INDEX;
+ };
+
+ struct ScopeEvent
+ {
+ uint32 Id;
+ uint32 TimeUs;
+ union
+ {
+ uint32 Next;
+ uint32 Index;
+ };
+ };
+
+ struct EventPool
+ {
+ uint32 Alloc()
+ {
+ if (m_FreeHead == NO_INDEX)
+ {
+ uint32 Idx = uint32(m_Pool.size());
+ m_Pool.push_back({.Index = Idx});
+ return Idx;
+ }
+ uint32 Idx = m_FreeHead;
+ m_FreeHead = m_Pool[Idx].Index;
+ return Idx;
+ }
+
+ void Free(uint32 Idx)
+ {
+ m_Pool[Idx].Index = m_FreeHead;
+ m_FreeHead = Idx;
+ }
+
+ ScopeEvent& Get(uint32 Idx) { return m_Pool[Idx]; }
+
+ eastl::vector<ScopeEvent> m_Pool;
+ uint32 m_FreeHead = NO_INDEX;
+ };
+
+ void OnNewTrace(const $Trace_NewTrace& NewTrace)
+ {
+ m_Freq = NewTrace.CycleFrequency();
+ m_Base = NewTrace.StartCycle();
+ m_UsDiv = m_Freq / 1'000'000;
+ if (m_UsDiv == 0)
+ {
+ m_UsDiv = 1;
+ }
+ m_UsDivRecip = ReciprocalU64(m_UsDiv);
+ }
+
+ void OnCpuSpec(const CpuProfiler_EventSpec& Spec)
+ {
+ uint32 SpecId = Spec.Id();
+ FieldStr SpecName = Spec.Name();
+
+ StringView NameView = SpecName.as_view();
+
+ if (NameView.starts_with("Frame "))
+ {
+ NameView = "Frame";
+ }
+ if (size_t Pos = NameView.find("\""); Pos != StringView::npos)
+ {
+ NameView = NameView.substr(0, Pos);
+ }
+ if (size_t Pos = NameView.find("\\"); Pos != StringView::npos)
+ {
+ NameView = NameView.substr(0, Pos);
+ }
+
+ m_Specs[SpecId] = m_Names.Add(NameView);
+ }
+
+ void OnCpuBatch(const CpuProfiler_EventBatch& Batch)
+ {
+ uint32 ThreadId = Batch.get_thread_id();
+ Array<uint8[]> Data = Batch.Data();
+ AbsorbBatch(/*Version=*/1, ThreadId, Data);
+ }
+
+ void OnCpuBatchV2(const CpuProfiler_EventBatchV2& Batch)
+ {
+ uint32 ThreadId = Batch.get_thread_id();
+ Array<uint8[]> Data = Batch.Data();
+ AbsorbBatch(/*Version=*/2, ThreadId, Data);
+ }
+
+ void OnCpuBatchV3(const CpuProfiler_EventBatchV3& Batch)
+ {
+ uint32 ThreadId = Batch.get_thread_id();
+ Array<uint8[]> Data = Batch.Data();
+ AbsorbBatch(/*Version=*/3, ThreadId, Data);
+ }
+
+ // Decodes a CpuProfiler scope batch. Mirrors UE's reference
+ // TraceServices/.../CpuProfilerTraceAnalysis.cpp ProcessBuffer /
+ // ProcessBufferV2.
+ //
+ // Version 1 (`CpuProfiler.EventBatch`): cycle is `value >> 1`; bit 0 is
+ // IsEnter; IsEnter events carry a SpecId varint.
+ //
+ // Version 2 (`CpuProfiler.EventBatchV2`, UE 5.1..5.5) and Version 3
+ // (`CpuProfiler.EventBatchV3`, UE 5.6+): cycle is `value >> 2`; bit 0 is
+ // IsEnter, bit 1 is IsCoroutine. Coroutine begin events carry CoroutineId
+ // and TimerScopeDepth varints; coroutine end events carry a single
+ // TimerScopeDepth varint. V3 additionally reserves the low bit of the
+ // SpecId to mark metadata-bearing timers, so SpecId must be shifted
+ // right by 1 to recover the actual spec id.
+ void AbsorbBatch(uint32 Version, uint32 ThreadId, const Array<uint8[]>& Data)
+ {
+ const uint8* Cursor = Data.get();
+ const uint8* End = Cursor + Data.get_size();
+
+ auto Decode = [&]() {
+ uint64 Value = 0;
+ for (uint32 I = 1, J = 0; I; J += 7)
+ {
+ I = *Cursor++;
+ Value |= uint64(I & 0x7f) << J;
+ I &= 0x80;
+ }
+ return Value;
+ };
+
+ if (ThreadId >= m_Threads.size())
+ {
+ m_Threads.resize(ThreadId + 1);
+ }
+ EventStack& Stack = m_Threads[ThreadId];
+
+ const uint32 CycleShift = (Version == 1) ? 1u : 2u;
+
+ uint64 Base = m_Base;
+
+ uint64 Cycle = ~Base + 1;
+ while (Cursor < End)
+ {
+ uint64 Value = Decode();
+ uint32 IsEnter = (Value & 0b01);
+
+ if (Version > 1 && (Value & 0b10))
+ {
+ // Coroutine event -- not visualised, but the trailing varints
+ // still need to be consumed so we stay in sync with the
+ // stream.
+ if (IsEnter)
+ {
+ (void)Decode(); // CoroutineId
+ (void)Decode(); // TimerScopeDepth
+ }
+ else
+ {
+ (void)Decode(); // TimerScopeDepth
+ }
+ continue;
+ }
+
+ uint64 EventId = IsEnter ? Decode() : ~0ull;
+
+ Cycle += (Value >> CycleShift);
+ uint32 TimeUs = m_UsDivRecip.Divide(Cycle + (m_UsDiv >> 1));
+
+ if (IsEnter)
+ {
+ uint32 ScopeId = uint32(EventId);
+ bool IsMetadata = false;
+ if (Version == 3)
+ {
+ IsMetadata = (ScopeId & 1u) != 0;
+ ScopeId >>= 1;
+ }
+ uint32 EvIdx = m_Events.Alloc();
+ ScopeEvent& Ev = m_Events.Get(EvIdx);
+ // Pack the metadata flag in the high bit so the close path
+ // can distinguish metadata-id scopes from regular ones without
+ // an extra field.
+ Ev.Id = IsMetadata ? (ScopeId | 0x8000'0000u) : ScopeId;
+ Ev.TimeUs = TimeUs;
+ Ev.Next = Stack.Tail;
+ Stack.Tail = EvIdx;
+ continue;
+ }
+
+ if (Stack.Tail == NO_INDEX)
+ {
+ continue;
+ }
+
+ ScopeEvent& Ev = m_Events.Get(Stack.Tail);
+ uint32 DurationUs = TimeUs - Ev.TimeUs;
+ uint64 NameHash = ResolveNameHash(Ev.Id);
+ m_Stats.Record(NameHash, DurationUs);
+
+ uint32 NextIdx = Ev.Next;
+ m_Events.Free(Stack.Tail);
+ Stack.Tail = NextIdx;
+ }
+ }
+
+ uint64 m_Freq = 0;
+ uint64 m_Base = 0;
+ uint64 m_UsDiv = 1;
+ ReciprocalU64 m_UsDivRecip;
+ eastl::hash_map<uint32, uint64> m_Specs;
+ NameDepot& m_Names;
+ EventPool m_Events;
+ eastl::vector<EventStack> m_Threads;
+ EventStats& m_Stats;
+ const MetadataRegistry* m_Metadata = nullptr;
+ // Caches the resolved name hash for each MetadataId so we don't
+ // re-format the same CBOR payload on every scope-close.
+ eastl::hash_map<uint32, uint64> m_MetadataNames;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// Per-event CPU scope capture for the interactive trace viewer.
+//
+// Mirrors CpuAnalyzer's decode loop but instead of aggregating statistics,
+// it records one TimelineScope per closed CPU scope so the viewer can draw a
+// flame graph. Scope names are interned into a flat vector so each event only
+// stores a compact uint32 NameId.
+
+class TimelineAnalyzer : public Analyzer
+{
+public:
+ explicit TimelineAnalyzer(const MetadataRegistry* Metadata = nullptr, TraceTiming* SharedTiming = nullptr)
+ : m_SharedTiming(SharedTiming)
+ , m_Metadata(Metadata)
+ {
+ }
+
+ void subscribe(Vector<Subscription>& Subs) override
+ {
+ Subs.emplace_back(this, &TimelineAnalyzer::OnNewTrace);
+ Subs.emplace_back(this, &TimelineAnalyzer::OnCpuSpec);
+ Subs.emplace_back(this, &TimelineAnalyzer::OnCpuBatch);
+ Subs.emplace_back(this, &TimelineAnalyzer::OnCpuBatchV2);
+ Subs.emplace_back(this, &TimelineAnalyzer::OnCpuBatchV3);
+ }
+
+ struct ThreadData
+ {
+ eastl::vector<zen::trace_detail::TimelineScope> Scopes;
+ // Open-scope stack: parallel arrays keeping begin time and name id.
+ eastl::vector<uint32_t> OpenBeginUs;
+ eastl::vector<uint32_t> OpenNameIds;
+ };
+
+ const eastl::vector<std::string>& ScopeNames() const { return m_ScopeNames; }
+ const eastl::map<uint32_t, ThreadData>& Threads() const { return m_Threads; }
+ uint32_t MinBeginUs() const { return m_MinBeginUs; }
+ uint32_t MaxEndUs() const { return m_MaxEndUs; }
+
+private:
+ static constexpr uint32_t INVALID_NAME = ~0u;
+
+ uint32_t InternName(StringView Name)
+ {
+ String Key(Name);
+ auto [It, Inserted] = m_NameIndex.try_emplace(std::move(Key), 0);
+ if (Inserted)
+ {
+ It->second = uint32_t(m_ScopeNames.size());
+ m_ScopeNames.emplace_back(Name);
+ }
+ return It->second;
+ }
+
+ void OnNewTrace(const $Trace_NewTrace& NewTrace)
+ {
+ m_Freq = NewTrace.CycleFrequency();
+ m_Base = NewTrace.StartCycle();
+ m_UsDiv = m_Freq / 1'000'000;
+ if (m_UsDiv == 0)
+ {
+ m_UsDiv = 1;
+ }
+ m_UsDivRecip = ReciprocalU64(m_UsDiv);
+ if (m_SharedTiming)
+ {
+ m_SharedTiming->Freq = m_Freq;
+ m_SharedTiming->Base = m_Base;
+ m_SharedTiming->UsDiv = m_UsDiv;
+ }
+ }
+
+ void OnCpuSpec(const CpuProfiler_EventSpec& Spec)
+ {
+ uint32 SpecId = Spec.Id();
+ FieldStr SpecName = Spec.Name();
+
+ StringView NameView = SpecName.as_view();
+
+ if (NameView.starts_with("Frame "))
+ {
+ NameView = "Frame";
+ }
+ if (size_t Pos = NameView.find("\""); Pos != StringView::npos)
+ {
+ NameView = NameView.substr(0, Pos);
+ }
+ if (size_t Pos = NameView.find("\\"); Pos != StringView::npos)
+ {
+ NameView = NameView.substr(0, Pos);
+ }
+
+ m_Specs[SpecId] = InternName(NameView);
+ }
+
+ void OnCpuBatch(const CpuProfiler_EventBatch& Batch)
+ {
+ uint32 ThreadId = Batch.get_thread_id();
+ Array<uint8[]> Data = Batch.Data();
+ AbsorbBatch(/*Version=*/1, ThreadId, Data);
+ }
+
+ void OnCpuBatchV2(const CpuProfiler_EventBatchV2& Batch)
+ {
+ uint32 ThreadId = Batch.get_thread_id();
+ Array<uint8[]> Data = Batch.Data();
+ AbsorbBatch(/*Version=*/2, ThreadId, Data);
+ }
+
+ void OnCpuBatchV3(const CpuProfiler_EventBatchV3& Batch)
+ {
+ uint32 ThreadId = Batch.get_thread_id();
+ Array<uint8[]> Data = Batch.Data();
+ AbsorbBatch(/*Version=*/3, ThreadId, Data);
+ }
+
+ TraceTiming* m_SharedTiming = nullptr;
+
+ // See CpuAnalyzer::AbsorbBatch for a detailed description of the wire
+ // format for each version.
+ void AbsorbBatch(uint32 Version, uint32 ThreadId, const Array<uint8[]>& Data)
+ {
+ const uint8* Cursor = Data.get();
+ const uint8* End = Cursor + Data.get_size();
+
+ auto Decode = [&]() {
+ uint64 Value = 0;
+ for (uint32 I = 1, J = 0; I; J += 7)
+ {
+ I = *Cursor++;
+ Value |= uint64(I & 0x7f) << J;
+ I &= 0x80;
+ }
+ return Value;
+ };
+
+ ThreadData& Thread = m_Threads[ThreadId];
+
+ const uint32 CycleShift = (Version == 1) ? 1u : 2u;
+
+ uint64 Base = m_Base;
+
+ uint64 Cycle = ~Base + 1;
+ while (Cursor < End)
+ {
+ uint64 Value = Decode();
+ uint32 IsEnter = (Value & 0b01);
+
+ if (Version > 1 && (Value & 0b10))
+ {
+ // Coroutine event -- consume the trailing varints so we stay
+ // in sync with the stream, but drop the event on the floor.
+ if (IsEnter)
+ {
+ (void)Decode(); // CoroutineId
+ (void)Decode(); // TimerScopeDepth
+ }
+ else
+ {
+ (void)Decode(); // TimerScopeDepth
+ }
+ continue;
+ }
+
+ uint64 EventId = IsEnter ? Decode() : ~0ull;
+
+ Cycle += (Value >> CycleShift);
+ uint32 TimeUs = m_UsDivRecip.Divide(Cycle + (m_UsDiv >> 1));
+
+ if (IsEnter)
+ {
+ uint32 ScopeId = uint32(EventId);
+ bool IsMetadata = false;
+ if (Version == 3)
+ {
+ IsMetadata = (ScopeId & 1u) != 0;
+ ScopeId >>= 1;
+ }
+ uint32_t NameId = IsMetadata ? ResolveMetadataNameId(ScopeId) : LookupSpecNameId(ScopeId);
+ Thread.OpenBeginUs.push_back(TimeUs);
+ Thread.OpenNameIds.push_back(NameId);
+ continue;
+ }
+
+ if (Thread.OpenBeginUs.empty())
+ {
+ continue;
+ }
+
+ uint32_t BeginUs = Thread.OpenBeginUs.back();
+ uint32_t NameId = Thread.OpenNameIds.back();
+ Thread.OpenBeginUs.pop_back();
+ Thread.OpenNameIds.pop_back();
+
+ if (NameId == INVALID_NAME)
+ {
+ continue;
+ }
+
+ uint16_t Depth = uint16_t(Thread.OpenBeginUs.size());
+ Thread.Scopes.push_back(zen::trace_detail::TimelineScope{
+ .BeginUs = BeginUs,
+ .DurationUs = TimeUs - BeginUs,
+ .NameId = NameId,
+ .Depth = Depth,
+ .MergeCount = 0,
+ });
+
+ if (BeginUs < m_MinBeginUs)
+ {
+ m_MinBeginUs = BeginUs;
+ }
+ if (TimeUs > m_MaxEndUs)
+ {
+ m_MaxEndUs = TimeUs;
+ }
+ }
+ }
+
+ uint32_t LookupSpecNameId(uint32 SpecId) const
+ {
+ auto It = m_Specs.find(SpecId);
+ return (It != m_Specs.end()) ? It->second : INVALID_NAME;
+ }
+
+ uint32_t ResolveMetadataNameId(uint32 MetadataId)
+ {
+ if (!m_Metadata)
+ {
+ return INVALID_NAME;
+ }
+
+ auto CachedIt = m_MetadataNameIds.find(MetadataId);
+ if (CachedIt != m_MetadataNameIds.end())
+ {
+ return CachedIt->second;
+ }
+
+ const MetadataEntry* Entry = m_Metadata->Lookup(MetadataId);
+ if (!Entry)
+ {
+ return INVALID_NAME;
+ }
+
+ auto BaseIt = m_Specs.find(Entry->SpecId);
+ const std::string& BaseName = (BaseIt != m_Specs.end()) ? m_ScopeNames[BaseIt->second] : kUnknownName;
+ std::string Formatted = BaseName;
+ std::string Values = FormatMetadataValues(Entry->Bytes.data(), Entry->Bytes.size());
+ if (!Values.empty())
+ {
+ Formatted += " - ";
+ Formatted += Values;
+ }
+
+ uint32_t NameId = InternName(StringView(Formatted.data(), Formatted.size()));
+ m_MetadataNameIds[MetadataId] = NameId;
+ return NameId;
+ }
+
+ static inline const std::string kUnknownName{"???"};
+
+ uint64 m_Freq = 0;
+ uint64 m_Base = 0;
+ uint64 m_UsDiv = 1;
+ ReciprocalU64 m_UsDivRecip;
+ uint32_t m_MinBeginUs = ~0u;
+ uint32_t m_MaxEndUs = 0;
+ eastl::hash_map<uint32, uint32_t> m_Specs;
+ eastl::hash_map<String, uint32_t> m_NameIndex;
+ eastl::vector<std::string> m_ScopeNames;
+ eastl::map<uint32_t, ThreadData> m_Threads;
+ const MetadataRegistry* m_Metadata = nullptr;
+ eastl::hash_map<uint32_t, uint32_t> m_MetadataNameIds;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+
+} // anonymous namespace
+
+std::string
+zen::trace_detail::SafeFieldStr(FieldStr&& Field)
+{
+ try
+ {
+ std::string_view View = Field.as_view();
+ // Some trace writers include the NUL terminator in the field length
+ // (see UE trace ToAnsiCheap / ThreadRegister). Strip any trailing NULs
+ // so downstream consumers don't see garbage.
+ while (!View.empty() && View.back() == '\0')
+ {
+ View.remove_suffix(1);
+ }
+ return std::string(View);
+ }
+ catch (const std::exception& E)
+ {
+ ZEN_DEBUG("Failed to decode trace string field: {}", E.what());
+ return {};
+ }
+}
+
+namespace {
+
+// Derive a thread group name from a thread name by stripping a trailing
+// integer suffix (optionally preceded by a separator). E.g. "IoPool Worker 3"
+// -> "IoPool Worker", "DbWorker_12" -> "DbWorker", "HttpThread42" ->
+// "HttpThread". Returns an empty string if no suffix is present or the
+// resulting prefix would be empty.
+static std::string
+SynthesizeThreadGroupFromName(std::string_view Name)
+{
+ size_t I = Name.size();
+ while (I > 0 && Name[I - 1] >= '0' && Name[I - 1] <= '9')
+ {
+ --I;
+ }
+ if (I == Name.size())
+ {
+ return {}; // no trailing digits
+ }
+ if (I > 0)
+ {
+ char C = Name[I - 1];
+ if (C == '_' || C == '-' || C == '.' || C == ':' || C == '#' || C == '/' || C == ' ' || C == '\t')
+ {
+ --I;
+ }
+ }
+ while (I > 0 && (Name[I - 1] == ' ' || Name[I - 1] == '\t'))
+ {
+ --I;
+ }
+ if (I == 0)
+ {
+ return {}; // pure-numeric name
+ }
+ return std::string(Name.substr(0, I));
+}
+
+class SessionAnalyzer : public Analyzer
+{
+public:
+ zen::trace_detail::SessionInfo Session;
+ eastl::map<uint32_t, zen::trace_detail::ThreadInfoEntry> ThreadNames;
+ eastl::map<uint32_t, zen::trace_detail::ChannelInfo> Channels;
+
+ void subscribe(Vector<Subscription>& Subs) override
+ {
+ Subs.emplace_back(this, &SessionAnalyzer::OnSession);
+ Subs.emplace_back(this, &SessionAnalyzer::OnThreadGroupBegin);
+ Subs.emplace_back(this, &SessionAnalyzer::OnThreadGroupEnd);
+ Subs.emplace_back(this, &SessionAnalyzer::OnThreadInfo);
+ Subs.emplace_back(this, &SessionAnalyzer::OnChannelAnnounce);
+ Subs.emplace_back(this, &SessionAnalyzer::OnChannelToggle);
+ }
+
+private:
+ eastl::vector<String> m_GroupStack;
+
+ void OnSession(const Diagnostics_Session2& Ev)
+ {
+ Session.Platform = SafeFieldStr(Ev.Platform());
+ Session.AppName = SafeFieldStr(Ev.AppName());
+ Session.ProjectName = SafeFieldStr(Ev.ProjectName());
+ Session.CommandLine = SafeFieldStr(Ev.CommandLine());
+ Session.Branch = SafeFieldStr(Ev.Branch());
+ Session.BuildVersion = SafeFieldStr(Ev.BuildVersion());
+ Session.Changelist = Ev.Changelist();
+ Session.ConfigurationType = Ev.ConfigurationType();
+ Session.HasSession = true;
+ }
+
+ void OnThreadGroupBegin(const $Trace_ThreadGroupBegin& Ev) { m_GroupStack.push_back(SafeFieldStr(Ev.Name())); }
+
+ void OnThreadGroupEnd(const $Trace_ThreadGroupEnd&)
+ {
+ if (!m_GroupStack.empty())
+ {
+ m_GroupStack.pop_back();
+ }
+ }
+
+ void OnThreadInfo(const $Trace_ThreadInfo& Ev)
+ {
+ uint32_t Tid = Ev.ThreadId();
+ zen::trace_detail::ThreadInfoEntry& Info = ThreadNames[Tid];
+ Info.ThreadId = Tid;
+ Info.Name = SafeFieldStr(Ev.Name());
+ Info.GroupName = m_GroupStack.empty() ? "" : m_GroupStack.back();
+ if (Info.GroupName.empty())
+ {
+ Info.GroupName = SynthesizeThreadGroupFromName(Info.Name);
+ }
+ Info.SystemId = Ev.SystemId();
+ Info.SortHint = Ev.SortHint();
+ }
+
+ void OnChannelAnnounce(const Trace_ChannelAnnounce& Ev)
+ {
+ uint32_t Id = Ev.Id();
+ zen::trace_detail::ChannelInfo& Info = Channels[Id];
+ Info.Name = SafeFieldStr(Ev.Name());
+ Info.Enabled = Ev.IsEnabled() != 0;
+ Info.ReadOnly = Ev.ReadOnly() != 0;
+ }
+
+ void OnChannelToggle(const Trace_ChannelToggle& Ev)
+ {
+ uint32_t Id = Ev.Id();
+ auto It = Channels.find(Id);
+ if (It != Channels.end())
+ {
+ It->second.Enabled = Ev.IsEnabled() != 0;
+ }
+ }
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// Module analyzer
+//
+// Captures Diagnostics.Module{Init,Load,Unload} so TraceModel::Modules has a
+// populated list of loaded DLLs. These events are NoSync+Important so they
+// don't carry a Cycle field (no load/unload timestamps available) but they
+// do survive reconnects and the trim filter. The analyzer is intentionally
+// passive -- we stash the raw data here and leave symbolication and memory
+// attribution to whatever consumes TraceModel::Modules later.
+
+class ModuleAnalyzer : public Analyzer
+{
+public:
+ eastl::map<uint64_t, zen::trace_detail::ModuleInfo> ModulesByBase;
+ std::string SymbolFormat;
+ uint8_t BaseShift = 0;
+
+ void subscribe(Vector<Subscription>& Subs) override
+ {
+ Subs.emplace_back(this, &ModuleAnalyzer::OnModuleInit);
+ Subs.emplace_back(this, &ModuleAnalyzer::OnModuleLoad);
+ Subs.emplace_back(this, &ModuleAnalyzer::OnModuleUnload);
+ }
+
+private:
+ void OnModuleInit(const Diagnostics_ModuleInit& Ev)
+ {
+ SymbolFormat = SafeFieldStr(Ev.SymbolFormat());
+ BaseShift = Ev.ModuleBaseShift();
+ }
+
+ void OnModuleLoad(const Diagnostics_ModuleLoad& Ev)
+ {
+ // Older traces stored Base as a 32-bit value shifted right by
+ // ModuleBaseShift to fit. Modern traces set BaseShift to zero and
+ // Base is a full 64-bit address; applying the shift then is a
+ // harmless no-op.
+ uint64_t Base = uint64_t(Ev.Base()) << BaseShift;
+
+ zen::trace_detail::ModuleInfo& Info = ModulesByBase[Base];
+ Info.FullPath = SafeFieldStr(Ev.Name());
+ Info.Base = Base;
+ Info.Size = Ev.Size();
+ Info.Unloaded = false;
+
+ // Extract the basename without pulling in the whole filesystem
+ // library for a single operation. UE emits forward- or backslashes
+ // depending on platform, so handle both.
+ const std::string& Path = Info.FullPath;
+ size_t Cut = Path.find_last_of("/\\");
+ Info.Name = (Cut == std::string::npos) ? Path : Path.substr(Cut + 1);
+
+ ::Array<uint8[]> ImageId = Ev.ImageId();
+ const uint8* IdPtr = ImageId.get();
+ const uint32 IdSize = ImageId.get_size();
+ Info.ImageId.assign(IdPtr, IdPtr + IdSize);
+ }
+
+ void OnModuleUnload(const Diagnostics_ModuleUnload& Ev)
+ {
+ uint64_t Base = uint64_t(Ev.Base()) << BaseShift;
+ auto It = ModulesByBase.find(Base);
+ if (It != ModulesByBase.end())
+ {
+ It->second.Unloaded = true;
+ }
+ }
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// Trim analyzer
+//
+// Decodes CpuProfiler batch events to extract per-batch timestamp ranges AND
+// to track open/close scope bracketing per thread. The scope tracker lets us
+// identify "must-keep" packets: any packet containing the Leave event for a
+// scope whose Enter was at or before the user's trim EndUs. Preserving those
+// Leaves is what lets the downstream TimelineAnalyzer (and Unreal Insights)
+// still render long-running scopes that span the window end -- if we dropped
+// their closing event the scope would sit unmatched on the open-scope stack
+// and not render at all.
+//
+// Attribution to raw packet indices is approximate due to Tourist's internal
+// per-thread packet buffering; the trim driver processes the trace one packet
+// at a time (bundle of size 1) to keep it as tight as possible. Packets that
+// never get an attributed time range are conservatively retained by the
+// caller.
+
+class TrimAnalyzer : public Analyzer
+{
+public:
+ // Maps packet index (matching both Tourist's Packet::get_index() and our
+ // raw walker's vector position) -> (MinUs, MaxUs) of all events attributed
+ // to that packet.
+ struct Range
+ {
+ uint32_t MinUs = ~0u;
+ uint32_t MaxUs = 0;
+ };
+
+ eastl::hash_map<uint32_t, Range> PacketRanges;
+
+ // Maps thread id -> the maximum packet index that contains a Leave event
+ // for a scope whose matching Enter was at or before EndUs. These packets
+ // must be retained so the downstream analyzer can close the scope.
+ eastl::hash_map<uint32_t, uint32_t> MustKeepPacketByThread;
+
+ // Set by the trim driver from TraceTrimArgs::EndSec before the analysis
+ // pass begins. Used by the scope tracker to decide which leaves are
+ // "must keep".
+ uint32_t EndUs = ~0u;
+
+ // Updated by the trim driver before each Proto.read call when the next
+ // packet is on a normal thread. Maps normal-thread id -> the most
+ // recently scattered packet's index.
+ eastl::hash_map<uint32_t, uint32_t> LastPacketIndexByThread;
+
+ void subscribe(Vector<Subscription>& Subs) override
+ {
+ Subs.emplace_back(this, &TrimAnalyzer::OnNewTrace);
+ Subs.emplace_back(this, &TrimAnalyzer::OnCpuBatch);
+ Subs.emplace_back(this, &TrimAnalyzer::OnCpuBatchV2);
+ Subs.emplace_back(this, &TrimAnalyzer::OnCpuBatchV3);
+ }
+
+ bool HasTimeBase() const { return m_Freq != 0; }
+
+private:
+ void OnNewTrace(const $Trace_NewTrace& NewTrace)
+ {
+ m_Freq = NewTrace.CycleFrequency();
+ m_Base = NewTrace.StartCycle();
+ m_UsDiv = (m_Freq > 0) ? (m_Freq / 1'000'000) : 1;
+ if (m_UsDiv == 0)
+ {
+ m_UsDiv = 1;
+ }
+ m_UsDivRecip = ReciprocalU64(m_UsDiv);
+ }
+
+ void OnCpuBatch(const CpuProfiler_EventBatch& Batch) { AbsorbBatchTimes(/*Version=*/1, Batch.get_thread_id(), Batch.Data()); }
+
+ void OnCpuBatchV2(const CpuProfiler_EventBatchV2& Batch) { AbsorbBatchTimes(/*Version=*/2, Batch.get_thread_id(), Batch.Data()); }
+
+ void OnCpuBatchV3(const CpuProfiler_EventBatchV3& Batch) { AbsorbBatchTimes(/*Version=*/3, Batch.get_thread_id(), Batch.Data()); }
+
+ // Decodes cycle deltas in a CpuProfiler batch to find the timestamp range
+ // AND to maintain a per-thread open-scope stack. Mirrors the wire format
+ // documented in CpuAnalyzer::AbsorbBatch. Scope ids are decoded just far
+ // enough to keep the varint cursor in sync; we don't store them.
+ void AbsorbBatchTimes(uint32 Version, uint32 ThreadId, const Array<uint8[]>& Data)
+ {
+ if (m_Freq == 0)
+ {
+ return;
+ }
+
+ auto It = LastPacketIndexByThread.find(ThreadId);
+ if (It == LastPacketIndexByThread.end())
+ {
+ return;
+ }
+ const uint32_t PacketIndex = It->second;
+
+ // The open-scope stack is maintained across every batch on a thread.
+ // Each entry stores the Enter time in microseconds from trace start.
+ eastl::vector<uint32_t>& OpenStack = m_OpenScopes[ThreadId];
+
+ const uint8* Cursor = Data.get();
+ const uint8* End = Cursor + Data.get_size();
+
+ auto Decode = [&]() {
+ uint64 Value = 0;
+ for (uint32 I = 1, J = 0; I; J += 7)
+ {
+ I = *Cursor++;
+ Value |= uint64(I & 0x7f) << J;
+ I &= 0x80;
+ }
+ return Value;
+ };
+
+ const uint32 CycleShift = (Version == 1) ? 1u : 2u;
+ const uint64 Base = m_Base;
+
+ uint32_t BatchMinUs = ~0u;
+ uint32_t BatchMaxUs = 0;
+ bool HasAny = false;
+
+ uint64 Cycle = ~Base + 1;
+ while (Cursor < End)
+ {
+ uint64 Value = Decode();
+ uint32 IsEnter = (Value & 0b01);
+
+ if (Version > 1 && (Value & 0b10))
+ {
+ // Coroutine event -- consume the trailing varints and skip.
+ // These don't participate in the scope bracket tracking; the
+ // existing TimelineAnalyzer ignores them for the same reason.
+ if (IsEnter)
+ {
+ (void)Decode(); // CoroutineId
+ (void)Decode(); // TimerScopeDepth
+ }
+ else
+ {
+ (void)Decode(); // TimerScopeDepth
+ }
+ continue;
+ }
+
+ if (IsEnter)
+ {
+ (void)Decode(); // EventId / SpecId
+ }
+
+ Cycle += (Value >> CycleShift);
+ uint32_t TimeUs = m_UsDivRecip.Divide(Cycle + (m_UsDiv >> 1));
+
+ if (!HasAny || TimeUs < BatchMinUs)
+ {
+ BatchMinUs = TimeUs;
+ }
+ if (!HasAny || TimeUs > BatchMaxUs)
+ {
+ BatchMaxUs = TimeUs;
+ }
+ HasAny = true;
+
+ if (IsEnter)
+ {
+ OpenStack.push_back(TimeUs);
+ }
+ else if (!OpenStack.empty())
+ {
+ uint32_t EnterTimeUs = OpenStack.back();
+ OpenStack.pop_back();
+
+ // If the scope started at or before the window end, we need
+ // its closing Leave event to survive so the downstream
+ // analyzer can render it. Mark the current packet (the one
+ // holding this Leave) as must-keep for the thread.
+ if (EnterTimeUs <= EndUs)
+ {
+ uint32_t& MustKeep = MustKeepPacketByThread[ThreadId];
+ if (PacketIndex > MustKeep)
+ {
+ MustKeep = PacketIndex;
+ }
+ }
+ }
+ }
+
+ if (!HasAny)
+ {
+ return;
+ }
+
+ Range& R = PacketRanges[PacketIndex];
+ R.MinUs = std::min(R.MinUs, BatchMinUs);
+ R.MaxUs = std::max(R.MaxUs, BatchMaxUs);
+ }
+
+ uint64 m_Freq = 0;
+ uint64 m_Base = 0;
+ uint64 m_UsDiv = 1;
+ ReciprocalU64 m_UsDivRecip;
+
+ // Per-thread open scope stack, carrying the Enter times in microseconds
+ // from trace start. Entries are pushed on Enter and popped on Leave; the
+ // stack may contain unclosed entries when decoding ends (scopes that
+ // outlive the captured trace).
+ eastl::hash_map<uint32_t, eastl::vector<uint32_t>> m_OpenScopes;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// Common trace iteration
+
+struct TraceSummary
+{
+ eastl::map<uint32_t, std::pair<std::string, uint64_t>> TypeInfo;
+ eastl::set<uint16_t> Threads;
+ uint64_t TotalEvents = 0;
+};
+
+template<typename ParcelCallback>
+static TraceSummary
+IterateTrace(::DataSource& Source, ParcelCallback OnParcel, const zen::trace_detail::ProgressCallback& OnProgress = {})
+{
+ TraceSummary Summary;
+
+ try
+ {
+ uint64_t TotalFileBytes = uint64_t(std::max(Source.get_size(), int64(0)));
+
+ ::Allocator TraceAllocator;
+ ::Preamble Pream(Source, TraceAllocator);
+ ::Transport Xport = Pream.get_transport();
+ ::Protocol Proto = Pream.get_protocol();
+
+ ::Packet Packets[128];
+ ::EventParcel Parcel;
+
+ while (::Bundle Bndl = Xport.read_packets(Packets))
+ {
+ Parcel.reset();
+ Proto.read(Parcel, Bndl);
+
+ OnParcel(Parcel);
+
+ for (const ::Type* TraceType : Parcel.new_types)
+ {
+ auto [LoggerName, EventName] = TraceType->get_name();
+ std::string TypeName = fmt::format("{}.{}", std::string_view(LoggerName), std::string_view(EventName));
+ Summary.TypeInfo[TraceType->get_uid()] = {std::move(TypeName), 0};
+ }
+
+ for (const ::Event& Ev : Parcel.events)
+ {
+ Summary.TotalEvents++;
+ Summary.Threads.insert(Ev.thread_id);
+
+ auto It = Summary.TypeInfo.find(Ev.uid);
+ if (It != Summary.TypeInfo.end())
+ {
+ It->second.second++;
+ }
+ }
+
+ if (OnProgress)
+ {
+ OnProgress(Xport.tell(), TotalFileBytes, Summary.TotalEvents);
+ }
+ }
+ }
+ catch (const DataStream::Eof&)
+ {
+ }
+ catch (const Exception::StreamError& E)
+ {
+ throw std::runtime_error(fmt::format("Trace stream error at position {}: {} (value: {})", E.position, E.message, E.value));
+ }
+
+ return Summary;
+}
+
+// Print session metadata
+static void
+PrintSessionInfo(const SessionAnalyzer& SessionAn)
+{
+ const zen::trace_detail::SessionInfo& Sess = SessionAn.Session;
+ if (!Sess.HasSession)
+ {
+ return;
+ }
+
+ ZEN_CONSOLE("Platform: {}", Sess.Platform);
+ ZEN_CONSOLE("App: {}", Sess.AppName);
+ if (!Sess.ProjectName.empty())
+ {
+ ZEN_CONSOLE("Project: {}", Sess.ProjectName);
+ }
+ if (!Sess.Branch.empty())
+ {
+ ZEN_CONSOLE("Branch: {}", Sess.Branch);
+ }
+ if (!Sess.BuildVersion.empty())
+ {
+ ZEN_CONSOLE("Build: {}", Sess.BuildVersion);
+ }
+ if (Sess.Changelist)
+ {
+ ZEN_CONSOLE("Changelist: {}", Sess.Changelist);
+ }
+ if (!Sess.CommandLine.empty())
+ {
+ ZEN_CONSOLE("CommandLine: {}", Sess.CommandLine);
+ }
+ ZEN_CONSOLE("");
+}
+
+// Print thread names
+static void
+PrintThreadInfo(const SessionAnalyzer& SessionAn)
+{
+ if (SessionAn.ThreadNames.empty())
+ {
+ return;
+ }
+
+ eastl::vector<std::pair<uint32_t, const zen::trace_detail::ThreadInfoEntry*>> ThreadsSorted;
+ for (const auto& [Tid, Info] : SessionAn.ThreadNames)
+ {
+ ThreadsSorted.emplace_back(Tid, &Info);
+ }
+ eastl::sort(ThreadsSorted.begin(), ThreadsSorted.end(), [](const auto& A, const auto& B) {
+ return A.second->SortHint < B.second->SortHint;
+ });
+
+ ZEN_CONSOLE("");
+ ZEN_CONSOLE("Threads:");
+ ZEN_CONSOLE("");
+ ZEN_CONSOLE("{:>6} {:>10} {}", "TID", "SystemID", "Name");
+ ZEN_CONSOLE("{:-<{}}", "", 6 + 10 + 40 + 4);
+ for (const auto& [Tid, Info] : ThreadsSorted)
+ {
+ ZEN_CONSOLE("{:>6} {:>10} {}", Tid, Info->SystemId, Info->Name);
+ }
+}
+
+// Print trace channel info
+static void
+PrintChannelInfo(const SessionAnalyzer& SessionAn)
+{
+ if (SessionAn.Channels.empty())
+ {
+ return;
+ }
+
+ eastl::vector<const zen::trace_detail::ChannelInfo*> ChannelsSorted;
+ for (const auto& [Id, Info] : SessionAn.Channels)
+ {
+ ChannelsSorted.push_back(&Info);
+ }
+ eastl::sort(ChannelsSorted.begin(), ChannelsSorted.end(), [](const auto* A, const auto* B) { return A->Name < B->Name; });
+
+ ZEN_CONSOLE("");
+ ZEN_CONSOLE("Trace Channels:");
+ ZEN_CONSOLE("");
+ for (const zen::trace_detail::ChannelInfo* Ch : ChannelsSorted)
+ {
+ std::string_view State = Ch->Enabled ? "enabled" : "disabled";
+ if (Ch->ReadOnly)
+ {
+ ZEN_CONSOLE(" {} ({}, read-only)", Ch->Name, State);
+ }
+ else
+ {
+ ZEN_CONSOLE(" {} ({})", Ch->Name, State);
+ }
+ }
+}
+
+} // namespace
+
+//////////////////////////////////////////////////////////////////////////////
+
+namespace zen::trace_detail {
+
+std::filesystem::path
+ResolveTraceFile(const std::filesystem::path& Input, cxxopts::Options& HelpOptions)
+{
+ if (Input.empty())
+ {
+ throw zen::OptionParseException("File path is required", HelpOptions.help());
+ }
+
+ std::filesystem::path FilePath = std::filesystem::absolute(Input);
+ if (!std::filesystem::exists(FilePath))
+ {
+ throw std::runtime_error(fmt::format("File not found: {}", FilePath));
+ }
+
+ return FilePath;
+}
+
+void
+RunInspect(const std::filesystem::path& FilePath)
+{
+ ::DataSource Source(FilePath);
+
+ SessionAnalyzer SessionAn;
+ ::Dispatcher Dispatch;
+ Dispatch.add_analyzer(SessionAn);
+
+ // Collect type schemas
+ struct TypeSchema
+ {
+ std::string FullName;
+ uint32_t Uid = 0;
+ uint32_t FieldCount = 0;
+ uint32_t Flags = 0;
+ uint64_t EventCount = 0;
+ eastl::vector<std::string> FieldNames;
+ eastl::vector<uint32_t> FieldSizes;
+ eastl::vector<uint32_t> FieldTypeInfos;
+ };
+
+ eastl::map<uint32_t, TypeSchema> Schemas;
+
+ TraceSummary Summary = IterateTrace(Source, [&](const ::EventParcel& Parcel) {
+ Dispatch.on_parcel(Parcel);
+
+ for (const ::Type* TraceType : Parcel.new_types)
+ {
+ auto [LoggerName, EventName] = TraceType->get_name();
+ uint32_t Uid = TraceType->get_uid();
+
+ TypeSchema& Schema = Schemas[Uid];
+ Schema.FullName = fmt::format("{}.{}", std::string_view(LoggerName), std::string_view(EventName));
+ Schema.Uid = Uid;
+ Schema.FieldCount = TraceType->get_field_count();
+ Schema.Flags = 0;
+ if (TraceType->has_flag(TYPE_FLAG_IMPORTANT))
+ {
+ Schema.Flags |= TYPE_FLAG_IMPORTANT;
+ }
+ if (TraceType->has_flag(TYPE_FLAG_AUX))
+ {
+ Schema.Flags |= TYPE_FLAG_AUX;
+ }
+
+ for (uint32_t I = 0; I < Schema.FieldCount; I++)
+ {
+ auto [FieldName, Field] = TraceType->get_field_info(I);
+ Schema.FieldNames.emplace_back(FieldName);
+ Schema.FieldSizes.push_back(Field.get_size());
+ Schema.FieldTypeInfos.push_back(Field.get_type_info());
+ }
+ }
+
+ for (const ::Event& Ev : Parcel.events)
+ {
+ auto It = Schemas.find(Ev.uid);
+ if (It != Schemas.end())
+ {
+ It->second.EventCount++;
+ }
+ }
+ });
+
+ // -- Session info --
+ PrintSessionInfo(SessionAn);
+
+ ZEN_CONSOLE("Trace: {}", FilePath);
+ ZEN_CONSOLE("Size: {}", zen::NiceBytes(uint64_t(std::filesystem::file_size(FilePath))));
+ ZEN_CONSOLE("Events: {}", zen::ThousandsNum(Summary.TotalEvents));
+ ZEN_CONSOLE("Threads: {}", Summary.Threads.size());
+ ZEN_CONSOLE("Types: {}", Schemas.size());
+
+ // -- Thread names --
+ PrintThreadInfo(SessionAn);
+
+ // -- Trace channels --
+ PrintChannelInfo(SessionAn);
+
+ // -- Event schemas --
+ ZEN_CONSOLE("");
+ ZEN_CONSOLE("Event Schemas:");
+ ZEN_CONSOLE("");
+
+ eastl::vector<const TypeSchema*> SortedSchemas;
+ SortedSchemas.reserve(Schemas.size());
+ for (const auto& [Uid, Schema] : Schemas)
+ {
+ SortedSchemas.push_back(&Schema);
+ }
+ eastl::sort(SortedSchemas.begin(), SortedSchemas.end(), [](const auto* A, const auto* B) { return A->FullName < B->FullName; });
+
+ auto FieldTypeStr = [](uint32_t TypeInfo, uint32_t Size) -> std::string_view {
+ uint32_t Cat = TypeInfo & TYPE_INFO_CAT_MASK;
+ if (Cat == TYPE_INFO_CAT_ARRAY)
+ {
+ return "array";
+ }
+ if (Cat == TYPE_INFO_CAT_FLOAT)
+ {
+ return (Size == 8) ? "float64" : "float32";
+ }
+ bool IsSigned = (TypeInfo & TYPE_INFO_SPECIAL_MASK) == TYPE_INFO_SPECIAL_SIGNED;
+ switch (Size)
+ {
+ case 1:
+ return IsSigned ? "int8" : "uint8";
+ case 2:
+ return IsSigned ? "int16" : "uint16";
+ case 4:
+ return IsSigned ? "int32" : "uint32";
+ case 8:
+ return IsSigned ? "int64" : "uint64";
+ default:
+ return "unknown";
+ }
+ };
+
+ for (const TypeSchema* Schema : SortedSchemas)
+ {
+ std::string Flags;
+ if (Schema->Flags & TYPE_FLAG_IMPORTANT)
+ {
+ Flags += " [important]";
+ }
+ if (Schema->Flags & TYPE_FLAG_AUX)
+ {
+ Flags += " [aux]";
+ }
+
+ ZEN_CONSOLE("{} (uid={}, events={}){}", Schema->FullName, Schema->Uid, zen::ThousandsNum(Schema->EventCount), Flags);
+
+ for (uint32_t I = 0; I < Schema->FieldCount; I++)
+ {
+ ZEN_CONSOLE(" {} {}", FieldTypeStr(Schema->FieldTypeInfos[I], Schema->FieldSizes[I]), Schema->FieldNames[I]);
+ }
+
+ if (Schema->FieldCount > 0)
+ {
+ ZEN_CONSOLE("");
+ }
+ }
+}
+
+// Build a single LOD level by merging Lod0 scopes below the given resolution.
+// Lod0 must already be sorted by BeginUs. Safe to call concurrently for
+// different (Level, Resolution) pairs sharing the same Lod0.
+static void
+BuildSingleLod(const eastl::vector<TimelineScope>& Lod0, TimelineDetailLevel& Level, uint32_t Resolution)
+{
+ Level.ResolutionUs = Resolution;
+
+ // Per-depth merge accumulators. Since depths are typically small (< 64),
+ // a flat array indexed by depth is more cache-friendly than a hash map.
+ struct PendingMerge
+ {
+ uint32_t BeginUs = 0;
+ uint32_t EndUs = 0;
+ uint32_t NameId = 0;
+ uint32_t MaxChildDur = 0;
+ uint16_t Depth = 0;
+ uint16_t Count = 0;
+ bool Active = false;
+ };
+
+ eastl::vector<PendingMerge> Pending(64); // grows if needed
+
+ auto FlushPending = [&Level](PendingMerge& P) {
+ if (!P.Active)
+ {
+ return;
+ }
+ Level.Scopes.push_back(TimelineScope{
+ .BeginUs = P.BeginUs,
+ .DurationUs = P.EndUs - P.BeginUs,
+ .NameId = P.NameId,
+ .Depth = P.Depth,
+ .MergeCount = P.Count,
+ });
+ P.Active = false;
+ };
+
+ // Single O(n) sweep over LOD 0 scopes (sorted by BeginUs). For each
+ // depth, merge adjacent small scopes that fall within one resolution
+ // bucket of each other. Large scopes (>= Resolution) pass through.
+ for (const TimelineScope& Scope : Lod0)
+ {
+ uint16_t Depth = Scope.Depth;
+ if (Depth >= Pending.size())
+ {
+ Pending.resize(Depth + 1);
+ }
+
+ if (Scope.DurationUs >= Resolution)
+ {
+ // Large scope -- flush any pending merge for this depth,
+ // then emit the scope un-merged.
+ FlushPending(Pending[Depth]);
+ Level.Scopes.push_back(TimelineScope{
+ .BeginUs = Scope.BeginUs,
+ .DurationUs = Scope.DurationUs,
+ .NameId = Scope.NameId,
+ .Depth = Scope.Depth,
+ .MergeCount = 1,
+ });
+ continue;
+ }
+
+ PendingMerge& P = Pending[Depth];
+ uint32_t EndUs = Scope.BeginUs + Scope.DurationUs;
+
+ if (P.Active && Scope.BeginUs < P.EndUs + Resolution)
+ {
+ // Extend the pending merge.
+ if (EndUs > P.EndUs)
+ {
+ P.EndUs = EndUs;
+ }
+ ++P.Count;
+ if (Scope.DurationUs > P.MaxChildDur)
+ {
+ P.MaxChildDur = Scope.DurationUs;
+ P.NameId = Scope.NameId;
+ }
+ }
+ else
+ {
+ // Start a new pending merge (flush previous if any).
+ FlushPending(P);
+ P.BeginUs = Scope.BeginUs;
+ P.EndUs = EndUs;
+ P.NameId = Scope.NameId;
+ P.MaxChildDur = Scope.DurationUs;
+ P.Depth = Scope.Depth;
+ P.Count = 1;
+ P.Active = true;
+ }
+ }
+
+ // Flush remaining per-depth accumulators.
+ for (PendingMerge& P : Pending)
+ {
+ FlushPending(P);
+ }
+
+ // Sort by (BeginUs, Depth) -- the per-depth flush may have interleaved
+ // entries from different depths. Tie-breaking on depth keeps the
+ // ordering consistent with LOD 0 (parents before nested children) so
+ // the front-end never sees a child rendered before its parent.
+ eastl::sort(Level.Scopes.begin(), Level.Scopes.end(), [](const TimelineScope& A, const TimelineScope& B) {
+ if (A.BeginUs != B.BeginUs)
+ {
+ return A.BeginUs < B.BeginUs;
+ }
+ return A.Depth < B.Depth;
+ });
+}
+
+void
+BuildTimelineLods(ThreadTimeline& Timeline)
+{
+ if (Timeline.Scopes.empty())
+ {
+ return;
+ }
+
+ for (size_t LodIdx = 0; LodIdx < kTimelineLodCount; ++LodIdx)
+ {
+ BuildSingleLod(Timeline.Scopes, Timeline.DetailLevels[LodIdx], kTimelineLodResolutions[LodIdx]);
+ }
+}
+
+namespace {
+
+ // Post-iteration phases, extracted from BuildTraceModel for clarity. Each one
+ // runs after the event-iteration pass has populated the analyzers and mutates
+ // only the pieces of TraceModel it owns.
+
+ void ComputeScopeStats(const TimelineAnalyzer& TimelineAn, TraceModel& Model)
+ {
+ const eastl::vector<std::string>& ScopeNames = TimelineAn.ScopeNames();
+ eastl::vector<Distribution> Dists(ScopeNames.size());
+ eastl::vector<uint32_t> Mins(ScopeNames.size(), ~0u);
+ eastl::vector<uint32_t> Maxs(ScopeNames.size(), 0u);
+
+ for (const auto& [Tid, Thread] : TimelineAn.Threads())
+ {
+ for (const TimelineScope& Scope : Thread.Scopes)
+ {
+ if (Scope.NameId >= Dists.size())
+ {
+ continue;
+ }
+ Dists[Scope.NameId].add(double(Scope.DurationUs));
+ Mins[Scope.NameId] = std::min(Mins[Scope.NameId], Scope.DurationUs);
+ Maxs[Scope.NameId] = std::max(Maxs[Scope.NameId], Scope.DurationUs);
+ }
+ }
+
+ Model.ScopeStats.reserve(ScopeNames.size());
+ for (size_t I = 0; I < ScopeNames.size(); ++I)
+ {
+ if (Dists[I].Count() == 0)
+ {
+ continue;
+ }
+ CpuScopeStat Entry;
+ Entry.Name = ScopeNames[I];
+ Entry.Count = Dists[I].Count();
+ Entry.MinUs = Mins[I];
+ Entry.MaxUs = Maxs[I];
+ Entry.MeanUs = Dists[I].Mean();
+ Entry.StdDevUs = Dists[I].StdDev();
+ Model.ScopeStats.push_back(std::move(Entry));
+ }
+ eastl::sort(Model.ScopeStats.begin(), Model.ScopeStats.end(), [](const CpuScopeStat& A, const CpuScopeStat& B) {
+ return A.Count > B.Count;
+ });
+ }
+
+ // Translate each LogEntry's captured CategoryIndex (a sequential id keyed on
+ // the source category pointer) into the flat LogCategories index the frontend
+ // consumes. Entries whose category pointer never got a matching LogCategory
+ // event are bucketed into a synthetic "(unknown)" category.
+ void ResolveLogCategories(LogAnalyzer& LogAn, TraceModel& Model)
+ {
+ const eastl::hash_map<uint64_t, uint32_t>& CategoryPtrToSeqIdx = LogAn.CategoryPointerIndex();
+
+ eastl::hash_map<uint64_t, uint32_t> RealPtrToFlatIdx;
+ Model.LogCategories = LogAn.BuildCategories(RealPtrToFlatIdx);
+
+ const uint32_t UnknownIdx = uint32_t(Model.LogCategories.size());
+ Model.LogCategories.push_back(LogCategoryInfo{.Name = "(unknown)", .DefaultVerbosity = 0});
+
+ eastl::vector<uint32_t> SeqToFlat(CategoryPtrToSeqIdx.size(), UnknownIdx);
+ for (const auto& [Ptr, SeqIdx] : CategoryPtrToSeqIdx)
+ {
+ auto It = RealPtrToFlatIdx.find(Ptr);
+ if (It != RealPtrToFlatIdx.end())
+ {
+ SeqToFlat[SeqIdx] = It->second;
+ }
+ }
+
+ Model.LogEntries = LogAn.MutableEntries();
+ for (LogEntry& E : Model.LogEntries)
+ {
+ E.CategoryIndex = (E.CategoryIndex < SeqToFlat.size()) ? SeqToFlat[E.CategoryIndex] : UnknownIdx;
+ }
+
+ eastl::sort(Model.LogEntries.begin(), Model.LogEntries.end(), [](const LogEntry& A, const LogEntry& B) {
+ return A.TimeUs < B.TimeUs;
+ });
+ }
+
+ // Finalize any still-open regions, group by category, and greedily pack each
+ // category's regions into non-overlapping lanes so the frontend can stack them
+ // without re-running collision detection.
+ void BuildRegionCategories(eastl::vector<RegionEntry>&& AllRegions, uint32_t TraceEndUs, TraceModel& Model)
+ {
+ for (RegionEntry& R : AllRegions)
+ {
+ if (R.EndUs == ~uint32_t(0))
+ {
+ R.EndUs = TraceEndUs;
+ }
+ if (R.EndUs < R.BeginUs)
+ {
+ R.EndUs = R.BeginUs;
+ }
+ }
+
+ eastl::map<std::string, eastl::vector<RegionEntry>> ByCategory;
+ for (RegionEntry& R : AllRegions)
+ {
+ ByCategory[R.Category].push_back(std::move(R));
+ }
+
+ for (auto& [CatName, Regions] : ByCategory)
+ {
+ eastl::sort(Regions.begin(), Regions.end(), [](const RegionEntry& A, const RegionEntry& B) {
+ if (A.BeginUs != B.BeginUs)
+ {
+ return A.BeginUs < B.BeginUs;
+ }
+ return A.EndUs < B.EndUs;
+ });
+
+ eastl::vector<uint32_t> LaneEndUs;
+ uint32_t MaxLane = 0;
+ for (RegionEntry& R : Regions)
+ {
+ uint16_t Depth = 0;
+ bool Assigned = false;
+ for (size_t I = 0; I < LaneEndUs.size(); ++I)
+ {
+ if (LaneEndUs[I] <= R.BeginUs)
+ {
+ Depth = uint16_t(I);
+ LaneEndUs[I] = R.EndUs;
+ Assigned = true;
+ break;
+ }
+ }
+ if (!Assigned)
+ {
+ Depth = uint16_t(LaneEndUs.size());
+ LaneEndUs.push_back(R.EndUs);
+ }
+ R.Depth = Depth;
+ if (Depth + 1u > MaxLane)
+ {
+ MaxLane = Depth + 1u;
+ }
+ }
+
+ RegionCategory Cat;
+ Cat.Name = CatName;
+ Cat.LaneCount = MaxLane;
+ Cat.Regions = std::move(Regions);
+ Model.RegionCategories.push_back(std::move(Cat));
+ }
+
+ // Sort: uncategorized (empty name) first, then alphabetical.
+ eastl::sort(Model.RegionCategories.begin(), Model.RegionCategories.end(), [](const RegionCategory& A, const RegionCategory& B) {
+ if (A.Name.empty() != B.Name.empty())
+ {
+ return A.Name.empty();
+ }
+ return A.Name < B.Name;
+ });
+ }
+
+ // Map callstack frame addresses to (module, offset) pairs using a sorted
+ // (Base, End) lookup over the already-populated Model.Modules.
+ void ResolveCallstacks(const ModuleAnalyzer& ModuleAn,
+ const CallstackAnalyzer& CallstackAn,
+ AllocationAnalyzer& AllocAn,
+ TraceModel& Model)
+ {
+ const auto& RawCallstacks = CallstackAn.RawCallstacks();
+
+ struct ModuleLookup
+ {
+ uint64_t Base;
+ uint64_t End;
+ uint32_t ModelIndex;
+ };
+ eastl::vector<ModuleLookup> Lookup;
+ Lookup.reserve(ModuleAn.ModulesByBase.size());
+ for (const auto& [Base, Info] : ModuleAn.ModulesByBase)
+ {
+ for (uint32_t I = 0; I < Model.Modules.size(); ++I)
+ {
+ if (Model.Modules[I].Base == Base)
+ {
+ Lookup.push_back({Base, Base + Info.Size, I});
+ break;
+ }
+ }
+ }
+ eastl::sort(Lookup.begin(), Lookup.end(), [](const ModuleLookup& A, const ModuleLookup& B) { return A.Base < B.Base; });
+
+ auto ResolveFrame = [&Lookup](uint64_t Address) -> ResolvedFrame {
+ ResolvedFrame F;
+ F.Address = Address;
+ auto It = eastl::upper_bound(Lookup.begin(), Lookup.end(), Address, [](uint64_t Addr, const ModuleLookup& M) {
+ return Addr < M.Base;
+ });
+ if (It != Lookup.begin())
+ {
+ --It;
+ if (Address < It->End)
+ {
+ F.ModuleIndex = It->ModelIndex;
+ F.Offset = Address - It->Base;
+ }
+ }
+ return F;
+ };
+
+ eastl::vector<uint32_t> SortedCallstackIds;
+ SortedCallstackIds.reserve(RawCallstacks.size());
+ for (const auto& [Id, RawFrames] : RawCallstacks)
+ {
+ ZEN_UNUSED(RawFrames);
+ SortedCallstackIds.push_back(Id);
+ }
+ eastl::sort(SortedCallstackIds.begin(), SortedCallstackIds.end());
+
+ Model.Callstacks.reserve(RawCallstacks.size());
+ for (uint32_t Id : SortedCallstackIds)
+ {
+ auto RawIt = RawCallstacks.find(Id);
+ ZEN_ASSERT(RawIt != RawCallstacks.end());
+ const eastl::vector<uint64_t>& RawFrames = RawIt->second;
+
+ CallstackEntry Entry;
+ Entry.Id = Id;
+ Entry.Frames.reserve(RawFrames.size());
+ for (uint64_t Addr : RawFrames)
+ {
+ Entry.Frames.push_back(ResolveFrame(Addr));
+ }
+ Model.Callstacks.push_back(std::move(Entry));
+ }
+
+ Model.CallstackStats = AllocAn.BuildCallstackStats();
+ Model.ChurnStats = AllocAn.BuildChurnStats(~uint64_t(0));
+ Model.AllocSizeHistogram = AllocAn.BuildSizeHistogram();
+ }
+
+} // namespace
+
+TraceModel
+BuildTraceModel(const std::filesystem::path& FilePath, WorkerThreadPool& ThreadPool, const ProgressCallback& OnProgress)
+{
+ ::DataSource Source(FilePath);
+
+ TraceTiming Timing;
+
+ SessionAnalyzer SessionAn;
+ ModuleAnalyzer ModuleAn;
+ MetadataRegistry MetadataReg;
+ TimelineAnalyzer TimelineAn(&MetadataReg, &Timing);
+ LogAnalyzer LogAn(&Timing);
+ BookmarksAnalyzer BookmarkAn(&Timing);
+ CsvProfilerAnalyzer CsvAn(&Timing);
+ AllocationAnalyzer AllocAn(&Timing);
+ CallstackAnalyzer CallstackAn;
+
+ // Tourist's Dispatcher only supports one subscription per event type, so we
+ // cannot run CpuAnalyzer alongside TimelineAnalyzer -- CpuAnalyzer would
+ // claim the CpuProfiler.Event* types first and TimelineAnalyzer would
+ // never receive any events. Instead, TimelineAnalyzer captures every
+ // scope interval and we derive the aggregate statistics from those
+ // intervals in a cheap post-pass below.
+ ::Dispatcher Dispatch;
+ Dispatch.add_analyzer(SessionAn);
+ Dispatch.add_analyzer(ModuleAn);
+ Dispatch.add_analyzer(MetadataReg);
+ Dispatch.add_analyzer(TimelineAn);
+ Dispatch.add_analyzer(LogAn);
+ Dispatch.add_analyzer(BookmarkAn);
+ Dispatch.add_analyzer(CsvAn);
+ Dispatch.add_analyzer(AllocAn);
+ Dispatch.add_analyzer(CallstackAn);
+
+ zen::Stopwatch Timer;
+ TraceSummary Summary = IterateTrace(
+ Source,
+ [&](const ::EventParcel& Parcel) { Dispatch.on_parcel(Parcel); },
+ OnProgress);
+ ZEN_INFO("Trace iteration complete: {} events in {}",
+ zen::ThousandsNum(Summary.TotalEvents),
+ zen::NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+
+ {
+ uint32_t StartUs = (TimelineAn.MinBeginUs() == ~0u) ? 0u : TimelineAn.MinBeginUs();
+ uint32_t EndUs = TimelineAn.MaxEndUs();
+ uint64_t DurationMs = (EndUs > StartUs) ? (uint64_t(EndUs - StartUs) + 500) / 1000 : 0;
+ ZEN_INFO("Trace duration: {}", zen::NiceTimeSpanMs(DurationMs));
+ }
+
+ TraceModel Model;
+ Model.FilePath = FilePath;
+ Model.FileSize = uint64_t(std::filesystem::file_size(FilePath));
+ Model.TotalEvents = Summary.TotalEvents;
+ Model.ParseTimeMs = Timer.GetElapsedTimeMs();
+ Model.Session = SessionAn.Session;
+
+ // Event type counts (sorted by count descending)
+ Model.EventTypeCounts.reserve(Summary.TypeInfo.size());
+ for (auto& [Uid, Info] : Summary.TypeInfo)
+ {
+ Model.EventTypeCounts.push_back({std::move(Info.first), Info.second});
+ }
+ eastl::sort(Model.EventTypeCounts.begin(), Model.EventTypeCounts.end(), [](const auto& A, const auto& B) { return A.Count > B.Count; });
+
+ // Flatten and sort threads by sort hint
+ Model.Threads.reserve(SessionAn.ThreadNames.size());
+ for (const auto& [Tid, Info] : SessionAn.ThreadNames)
+ {
+ Model.Threads.push_back(Info);
+ }
+ eastl::sort(Model.Threads.begin(), Model.Threads.end(), [](const ThreadInfoEntry& A, const ThreadInfoEntry& B) {
+ return A.SortHint < B.SortHint;
+ });
+
+ // Flatten and sort channels by name
+ Model.Channels.reserve(SessionAn.Channels.size());
+ for (const auto& [Id, Info] : SessionAn.Channels)
+ {
+ Model.Channels.push_back(Info);
+ }
+ eastl::sort(Model.Channels.begin(), Model.Channels.end(), [](const ChannelInfo& A, const ChannelInfo& B) { return A.Name < B.Name; });
+
+ {
+ ExtendableStringBuilder<512> Enabled;
+ for (const ChannelInfo& Ch : Model.Channels)
+ {
+ if (Ch.Enabled)
+ {
+ if (Enabled.Size() > 0)
+ {
+ Enabled.Append(", ");
+ }
+ Enabled.Append(Ch.Name);
+ }
+ }
+ if (Enabled.Size() > 0)
+ {
+ ZEN_INFO("Enabled channels: {}", Enabled);
+ }
+ }
+
+ // Flatten and sort modules by name
+ Model.Modules.reserve(ModuleAn.ModulesByBase.size());
+ for (const auto& [Base, Info] : ModuleAn.ModulesByBase)
+ {
+ Model.Modules.push_back(Info);
+ }
+ eastl::sort(Model.Modules.begin(), Model.Modules.end(), [](const ModuleInfo& A, const ModuleInfo& B) { return A.Name < B.Name; });
+
+ // CPU scope statistics and timeline building read from TimelineAn
+ // independently and write to separate Model fields, so overlap them.
+ Model.ScopeNames = TimelineAn.ScopeNames();
+
+ ZEN_INFO("Computing CPU scope statistics ({} scope names)", TimelineAn.ScopeNames().size());
+
+ // Kick off scope stats on a worker -- runs concurrently with the
+ // timeline copy + sort below.
+ Latch StatsLatch(1);
+ ThreadPool.ScheduleWork(
+ [&StatsLatch, &Model, &TimelineAn]() {
+ auto _ = MakeGuard([&StatsLatch]() { StatsLatch.CountDown(); });
+ ComputeScopeStats(TimelineAn, Model);
+ },
+ WorkerThreadPool::EMode::EnableBacklog);
+
+ // Timelines -- build per-thread sort + LODs in parallel.
+ {
+ const auto& Threads = TimelineAn.Threads();
+ size_t TotalScopes = 0;
+ for (const auto& [Tid, Thread] : Threads)
+ {
+ TotalScopes += Thread.Scopes.size();
+ }
+ ZEN_INFO("Building timelines: {} threads, {} scopes (sort + LODs)", Threads.size(), zen::ThousandsNum(TotalScopes));
+ Model.Timelines.resize(Threads.size());
+
+ // Populate timeline metadata on the main thread (cheap lookups).
+ size_t Idx = 0;
+ for (const auto& [Tid, Thread] : Threads)
+ {
+ ThreadTimeline& Timeline = Model.Timelines[Idx++];
+ Timeline.ThreadId = Tid;
+ auto It = SessionAn.ThreadNames.find(Tid);
+ if (It != SessionAn.ThreadNames.end())
+ {
+ Timeline.Name = It->second.Name;
+ Timeline.SortHint = It->second.SortHint;
+ }
+ Timeline.Scopes = Thread.Scopes;
+ }
+
+ // Phase 1: Sort LOD 0 scopes per thread.
+ // ParallelSort fans out internally using the pool, so it must be
+ // called from the main thread to avoid nested fan-out deadlocks.
+ // Small timelines are dispatched to workers first (they just call
+ // eastl::sort -- no nesting). Then large ones are sorted one at a
+ // time from the main thread with full pool utilisation each.
+ //
+ // Tie-break on Depth so that scopes which start at the same micro
+ // timestamp come out parent-first (lower depth wins). This keeps
+ // the scope ordering well-defined and lets the front-end rely on
+ // outer scopes appearing before their nested children regardless
+ // of the order the analyzer happened to emit them.
+ {
+ auto Cmp = [](const TimelineScope& A, const TimelineScope& B) {
+ if (A.BeginUs != B.BeginUs)
+ {
+ return A.BeginUs < B.BeginUs;
+ }
+ return A.Depth < B.Depth;
+ };
+
+ if constexpr (kUseParallelSort)
+ {
+ constexpr size_t kParallelThreshold = 65536;
+
+ // Dispatch small timelines to workers.
+ Latch SmallLatch(1);
+ for (size_t I = 0; I < Model.Timelines.size(); ++I)
+ {
+ if (Model.Timelines[I].Scopes.size() >= kParallelThreshold)
+ {
+ continue;
+ }
+ SmallLatch.AddCount(1);
+ ThreadPool.ScheduleWork(
+ [&SmallLatch, &Cmp, &Timeline = Model.Timelines[I]]() {
+ auto _ = MakeGuard([&SmallLatch]() { SmallLatch.CountDown(); });
+ eastl::sort(Timeline.Scopes.begin(), Timeline.Scopes.end(), Cmp);
+ },
+ WorkerThreadPool::EMode::EnableBacklog);
+ }
+ SmallLatch.CountDown();
+ SmallLatch.Wait();
+
+ // Sort large timelines from the main thread so ParallelSort
+ // can fan out across the (now idle) pool without deadlocking.
+ for (ThreadTimeline& Timeline : Model.Timelines)
+ {
+ if (Timeline.Scopes.size() >= kParallelThreshold)
+ {
+ zen::ParallelSort(ThreadPool, Timeline.Scopes.begin(), Timeline.Scopes.end(), Cmp);
+ }
+ }
+ }
+ else
+ {
+ Latch SortLatch(1);
+ for (size_t I = 0; I < Model.Timelines.size(); ++I)
+ {
+ SortLatch.AddCount(1);
+ ThreadPool.ScheduleWork(
+ [&SortLatch, &Cmp, &Timeline = Model.Timelines[I]]() {
+ auto _ = MakeGuard([&SortLatch]() { SortLatch.CountDown(); });
+ eastl::sort(Timeline.Scopes.begin(), Timeline.Scopes.end(), Cmp);
+ },
+ WorkerThreadPool::EMode::EnableBacklog);
+ }
+ SortLatch.CountDown();
+ SortLatch.Wait();
+ }
+ }
+
+ // Phase 2: Build LOD levels -- one task per (thread, LOD) pair.
+ // Flat dispatch avoids nested fan-out which could deadlock the pool.
+ Latch LodLatch(1);
+ for (size_t I = 0; I < Model.Timelines.size(); ++I)
+ {
+ if (Model.Timelines[I].Scopes.empty())
+ {
+ continue;
+ }
+ for (size_t L = 0; L < kTimelineLodCount; ++L)
+ {
+ LodLatch.AddCount(1);
+ ThreadPool.ScheduleWork(
+ [&LodLatch, &Timeline = Model.Timelines[I], L]() {
+ auto _ = MakeGuard([&LodLatch]() { LodLatch.CountDown(); });
+ BuildSingleLod(Timeline.Scopes, Timeline.DetailLevels[L], kTimelineLodResolutions[L]);
+ },
+ WorkerThreadPool::EMode::EnableBacklog);
+ }
+ }
+ LodLatch.CountDown();
+ LodLatch.Wait();
+ }
+ eastl::sort(Model.Timelines.begin(), Model.Timelines.end(), [](const ThreadTimeline& A, const ThreadTimeline& B) {
+ return A.SortHint < B.SortHint;
+ });
+
+ Model.TraceStartUs = (TimelineAn.MinBeginUs() == ~0u) ? 0u : TimelineAn.MinBeginUs();
+ Model.TraceEndUs = TimelineAn.MaxEndUs();
+
+ // Ensure scope stats computation (kicked off earlier) has finished.
+ StatsLatch.Wait();
+
+ ZEN_INFO("Processing {} log entries", zen::ThousandsNum(LogAn.Entries().size()));
+ ResolveLogCategories(LogAn, Model);
+
+ ZEN_INFO("Sorting {} bookmarks, {} regions", BookmarkAn.MutableBookmarks().size(), BookmarkAn.MutableRegions().size());
+
+ // Bookmarks: move and sort by TimeUs.
+ Model.Bookmarks = std::move(BookmarkAn.MutableBookmarks());
+ eastl::sort(Model.Bookmarks.begin(), Model.Bookmarks.end(), [](const Bookmark& A, const Bookmark& B) { return A.TimeUs < B.TimeUs; });
+
+ BuildRegionCategories(std::move(BookmarkAn.MutableRegions()), Model.TraceEndUs, Model);
+
+ // CsvProfiler data
+ {
+ Model.CsvCategories = std::move(CsvAn.MutableCategories());
+ Model.CsvStatDefs = std::move(CsvAn.MutableStatDefs());
+ Model.CsvTimeSeries = CsvAn.BuildTimeSeries();
+ Model.CsvEvents = std::move(CsvAn.MutableEvents());
+ eastl::sort(Model.CsvEvents.begin(), Model.CsvEvents.end(), [](const auto& A, const auto& B) { return A.TimeUs < B.TimeUs; });
+ Model.CsvMetadata = std::move(CsvAn.MutableMetadata());
+ ZEN_INFO("CSV profiler: {} categories, {} stats, {} series, {} events",
+ Model.CsvCategories.size(),
+ Model.CsvStatDefs.size(),
+ Model.CsvTimeSeries.size(),
+ Model.CsvEvents.size());
+ }
+
+ // Memory allocation data
+ {
+ AllocAn.EmitFinalSample(Model.TraceEndUs);
+ Model.AllocSummary = AllocAn.Summary();
+
+ // Flatten heaps map into sorted vector
+ Model.Heaps.reserve(AllocAn.Heaps().size());
+ for (const auto& [Id, Info] : AllocAn.Heaps())
+ {
+ Model.Heaps.push_back(Info);
+ }
+ eastl::sort(Model.Heaps.begin(), Model.Heaps.end(), [](const HeapInfo& A, const HeapInfo& B) { return A.Id < B.Id; });
+
+ // Flatten tags map into sorted vector
+ Model.Tags.reserve(AllocAn.Tags().size());
+ for (const auto& [Tag, Info] : AllocAn.Tags())
+ {
+ Model.Tags.push_back(Info);
+ }
+ eastl::sort(Model.Tags.begin(), Model.Tags.end(), [](const TagInfo& A, const TagInfo& B) { return A.Tag < B.Tag; });
+
+ // Move timeline (already time-ordered from Marker events)
+ Model.MemoryTimeline = std::move(AllocAn.MutableTimeline());
+
+ // Flatten per-root-heap stats into sorted vector
+ Model.HeapStats.reserve(AllocAn.RootHeapStats().size());
+ for (const auto& [HeapId, Stat] : AllocAn.RootHeapStats())
+ {
+ Model.HeapStats.push_back(Stat);
+ }
+ eastl::sort(Model.HeapStats.begin(), Model.HeapStats.end(), [](const HeapStat& A, const HeapStat& B) {
+ return A.HeapId < B.HeapId;
+ });
+
+ if (Model.AllocSummary.HasMemoryData)
+ {
+ ZEN_INFO("Memory: {} allocs, {} frees, peak {}, {} live, {} timeline samples",
+ zen::ThousandsNum(Model.AllocSummary.TotalAllocs + Model.AllocSummary.TotalReallocAllocs),
+ zen::ThousandsNum(Model.AllocSummary.TotalFrees + Model.AllocSummary.TotalReallocFrees),
+ zen::NiceBytes(uint64_t(Model.AllocSummary.PeakBytes)),
+ zen::ThousandsNum(Model.AllocSummary.LiveAllocations),
+ zen::ThousandsNum(Model.MemoryTimeline.size()));
+ }
+ }
+
+ ResolveCallstacks(ModuleAn, CallstackAn, AllocAn, Model);
+ ZEN_INFO("Callstacks: {} unique, {} with live allocations",
+ zen::ThousandsNum(Model.Callstacks.size()),
+ zen::ThousandsNum(Model.CallstackStats.size()));
+
+ return Model;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Trace trim
+//
+// The trim pipeline operates entirely at the raw packet level: a .utrace on
+// disk is identical to the wire format (see src/zenserver/trace/tracerecorder.cpp
+// for the capture-side passthrough), so trimming reduces to "copy the preamble,
+// then copy only the packets we want to keep". We never re-encode or re-emit
+// any events, which sidesteps the fact that Tourist has no writer path.
+//
+// The algorithm:
+//
+// 1. Slurp the input file into memory and walk raw packets using the
+// [size:uint16][thread_id:uint16][payload] framing. This gives an ordered
+// list of packet descriptors keyed by file offset.
+//
+// 2. Classify packets by their on-disk thread_id:
+// TID_TYPE -> always keep (type definitions)
+// TID_IMPORTANT -> always keep (events of types marked TYPE_FLAG_IMPORTANT,
+// i.e. session info, thread names, channel state,
+// log categories, CPU specs, etc.)
+// TID_SYNC -> always keep (transport barriers)
+// TID_NORMAL+ -> keep only if the packet's events overlap the window
+//
+// 3. For normal-thread packets, run Tourist's reader with a bundle of size 1
+// so each Proto.read() scatters exactly one raw packet before emitting any
+// events. Before the read call, we record the file offset of the current
+// packet as the "latest packet" for its thread. TrimAnalyzer then decodes
+// CpuProfiler batch events and attributes their timestamp ranges back to
+// that thread's latest packet. The attribution can drift if Tourist
+// buffers multiple packets on one thread, but the failure mode is that
+// earlier packets lose attribution and are conservatively retained.
+//
+// 4. Write the output: the preamble bytes verbatim, followed by the raw
+// bytes of each kept packet in original order. There is no trailer; the
+// Tourist reader catches DataStream::Eof at the end of the stream.
+//
+// Coarse per-packet precision is accepted by design: a packet straddling a
+// window edge is kept in full. CpuProfiler batches are self-contained per
+// packet (each re-derives cycles from the trace-wide StartCycle), so dropping
+// packets does not desync delta decoding on surviving ones, and orphaned leave
+// events from half-open scopes are silently ignored by decoders.
+
+namespace {
+
+ struct TrimPacketDesc
+ {
+ uint64_t FileOffset = 0; // offset of the [size:uint16] header in the file
+ uint32_t Size = 0; // total size including the 4-byte header
+ uint16_t ThreadIdRaw = 0; // thread_id as stored on disk, including PACKET_FLAG_COMPRESSED
+ };
+
+ // Parses the .utrace preamble in place to determine the byte offset where
+ // packets begin. Mirrors Preamble::parse_header in Tourist so we can run the
+ // raw walker without spinning up a second DataSource. Throws on a malformed
+ // preamble.
+ static uint64_t ParsePreambleLength(const uint8_t* Data, uint64_t Size)
+ {
+ if (Size < 8)
+ {
+ throw zen::runtime_error("Trace file too small to contain a preamble ({} bytes)", Size);
+ }
+
+ uint32_t Magic = 0;
+ std::memcpy(&Magic, Data, sizeof(uint32_t));
+ if (Magic != 'TRC2')
+ {
+ throw zen::runtime_error("Unexpected trace file magic value 0x{:08x}", Magic);
+ }
+
+ uint16_t MetaSize = 0;
+ std::memcpy(&MetaSize, Data + 4, sizeof(uint16_t));
+
+ // magic(4) + meta_size(2) + metadata + transport(1) + protocol(1)
+ uint64_t PreambleLen = uint64_t(4) + 2 + MetaSize + 1 + 1;
+ if (PreambleLen > Size)
+ {
+ throw zen::runtime_error("Trace preamble extends past end of file ({} > {})", PreambleLen, Size);
+ }
+
+ return PreambleLen;
+ }
+
+ // Walks raw packets starting at PreambleLen. Returns one TrimPacketDesc per
+ // packet in original stream order. The walker stops gracefully on truncated
+ // data so partial traces still produce a usable packet list.
+ static eastl::vector<TrimPacketDesc> WalkRawPackets(const uint8_t* Data, uint64_t Size, uint64_t PreambleLen)
+ {
+ eastl::vector<TrimPacketDesc> Packets;
+ uint64_t Offset = PreambleLen;
+
+ while (Offset + 4 <= Size)
+ {
+ uint16_t PacketSize = 0;
+ uint16_t ThreadIdRaw = 0;
+ std::memcpy(&PacketSize, Data + Offset, sizeof(uint16_t));
+ std::memcpy(&ThreadIdRaw, Data + Offset + 2, sizeof(uint16_t));
+
+ if (PacketSize < 4)
+ {
+ // Malformed size; stop walking and accept whatever we have.
+ break;
+ }
+
+ if (Offset + PacketSize > Size)
+ {
+ // Truncated tail -- drop it.
+ break;
+ }
+
+ TrimPacketDesc Desc;
+ Desc.FileOffset = Offset;
+ Desc.Size = PacketSize;
+ Desc.ThreadIdRaw = ThreadIdRaw;
+ Packets.push_back(Desc);
+
+ Offset += PacketSize;
+ }
+
+ return Packets;
+ }
+
+} // namespace
+
+void
+RunTraceTrim(const TraceTrimArgs& Args)
+{
+ if (!(Args.EndSec > Args.StartSec))
+ {
+ throw zen::runtime_error("Invalid trim range: start={} end={}", Args.StartSec, Args.EndSec);
+ }
+
+ // --- Read the input file ---
+ zen::BasicFile InputFile(Args.InputPath, zen::BasicFile::Mode::kRead);
+ zen::IoBuffer InputBuffer = InputFile.ReadAll();
+ InputFile.Close();
+
+ const uint8_t* FileBytes = static_cast<const uint8_t*>(InputBuffer.GetData());
+ const uint64_t FileSize = InputBuffer.GetSize();
+
+ const uint64_t PreambleLen = ParsePreambleLength(FileBytes, FileSize);
+
+ // --- Raw packet walk ---
+ eastl::vector<TrimPacketDesc> Packets = WalkRawPackets(FileBytes, FileSize, PreambleLen);
+ if (Packets.empty())
+ {
+ throw zen::runtime_error("Trace file contains no packets");
+ }
+
+ // Initial keep classification: definitions, important events, sync are
+ // always retained. Normal-thread packets start as drop candidates and get
+ // promoted if their decoded time range overlaps the window.
+ eastl::vector<uint8_t> Keep(Packets.size(), 0);
+ size_t NumAlwaysKept = 0;
+ for (size_t I = 0; I < Packets.size(); ++I)
+ {
+ uint32_t Tid = Packets[I].ThreadIdRaw & ~PACKET_FLAG_COMPRESSED;
+ if (Tid == TID_TYPE || Tid == TID_IMPORTANT || Tid == TID_SYNC)
+ {
+ Keep[I] = 1;
+ ++NumAlwaysKept;
+ }
+ }
+
+ // --- Time-range classification via Tourist (bundle of 1) ---
+ TrimAnalyzer TrimAn;
+ TrimAn.EndUs = (Args.EndSec * 1e6 > double(~uint32_t(0))) ? ~uint32_t(0) : uint32_t(Args.EndSec * 1e6);
+ ::Dispatcher Dispatch;
+ Dispatch.add_analyzer(TrimAn);
+
+ {
+ ::DataSource Source(Args.InputPath);
+ ::Allocator TraceAllocator;
+ ::Preamble Pream(Source, TraceAllocator);
+ ::Transport Xport = Pream.get_transport();
+ ::Protocol Proto = Pream.get_protocol();
+
+ ::Packet OnePacket[1];
+ ::EventParcel Parcel;
+
+ try
+ {
+ while (::Bundle Bndl = Xport.read_packets(OnePacket))
+ {
+ if (Bndl.empty())
+ {
+ break;
+ }
+
+ const ::Packet& P = Bndl[0];
+ uint32_t Tid = P.get_thread_id();
+
+ if (Tid >= TID_NORMAL && Tid != TID_SYNC)
+ {
+ // Tourist's Packet::get_index() is the same sequential
+ // packet counter as our raw walker's vector position,
+ // since both read the stream from the start in order.
+ TrimAn.LastPacketIndexByThread[Tid] = P.get_index();
+ }
+
+ Parcel.reset();
+ Proto.read(Parcel, Bndl);
+ Dispatch.on_parcel(Parcel);
+ }
+ }
+ catch (const DataStream::Eof&)
+ {
+ }
+ catch (const Exception::StreamError& E)
+ {
+ throw zen::runtime_error("Trace stream error at position {}: {} (value: {})", E.position, E.message, E.value);
+ }
+ }
+
+ // --- Apply the window filter ---
+ //
+ // Per-packet filtering in the middle of a thread's stream is unsafe:
+ // Tourist's event parser holds per-thread continuation state (see
+ // EventParser::_fragment / _missing in
+ // thirdparty/tourist/trace/src/protocol.cpp) so an event can straddle a
+ // packet boundary on a normal thread. Removing a packet from the middle
+ // leaves subsequent packets on the same thread decoded against the wrong
+ // position in an in-flight event and Tourist crashes. We therefore only
+ // drop packets in two safe ways:
+ //
+ // 1. Whole-thread drop: a thread whose attributed packets are all
+ // outside the window has every one of its packets dropped. No
+ // surviving packet references that thread, so there is no state
+ // machine to corrupt.
+ //
+ // 2. Per-thread tail truncation: for a thread that does have in-window
+ // activity, drop every packet AFTER the latest in-window packet on
+ // that thread. Tail drops are safe because no later packet on the
+ // same thread can be looking forward to the dropped bytes; the
+ // parser just ends its stream for that thread at the truncation
+ // point, exactly like a trace that naturally stopped recording.
+ //
+ // Threads for which we never attributed any CpuProfiler batch events are
+ // retained in full; we have no evidence about their time range and
+ // can't safely drop them.
+ const uint32_t StartUs = uint32_t(std::max(0.0, Args.StartSec) * 1e6);
+ const uint32_t EndUs = (Args.EndSec * 1e6 > double(~uint32_t(0))) ? ~uint32_t(0) : uint32_t(Args.EndSec * 1e6);
+
+ struct ThreadInfo
+ {
+ bool HasAnyBatch = false;
+ bool HasInWindowBatch = false;
+ // First packet index on this thread whose attributed CPU batches are
+ // *entirely* past EndUs. Every packet on this thread with an index
+ // >= this value is safe to tail-drop. Defaults to size_t(-1) (no cut
+ // point) when the thread has no such packet.
+ size_t FirstPastWindowIdx = size_t(-1);
+ };
+ eastl::hash_map<uint32_t, ThreadInfo> ThreadInfos;
+
+ for (size_t I = 0; I < Packets.size(); ++I)
+ {
+ uint32_t Tid = Packets[I].ThreadIdRaw & ~PACKET_FLAG_COMPRESSED;
+ if (Tid < TID_NORMAL || Tid == TID_SYNC)
+ {
+ continue;
+ }
+
+ auto RangeIt = TrimAn.PacketRanges.find(uint32_t(I));
+ if (RangeIt == TrimAn.PacketRanges.end())
+ {
+ continue;
+ }
+
+ ThreadInfo& Info = ThreadInfos[Tid];
+ Info.HasAnyBatch = true;
+ const auto& Range = RangeIt->second;
+ if (Range.MaxUs >= StartUs && Range.MinUs <= EndUs)
+ {
+ Info.HasInWindowBatch = true;
+ }
+ if (Range.MinUs > EndUs && I < Info.FirstPastWindowIdx)
+ {
+ Info.FirstPastWindowIdx = I;
+ }
+ }
+
+ size_t NumThreadsKept = 0;
+ size_t NumThreadsDropped = 0;
+ for (const auto& [Tid, Info] : ThreadInfos)
+ {
+ if (Info.HasInWindowBatch)
+ {
+ ++NumThreadsKept;
+ }
+ else
+ {
+ ++NumThreadsDropped;
+ }
+ }
+
+ size_t NumInWindow = 0;
+ size_t NumTailDropped = 0;
+ size_t NumUnattributed = 0;
+ size_t NumDropped = 0;
+
+ for (size_t I = 0; I < Packets.size(); ++I)
+ {
+ if (Keep[I])
+ {
+ continue;
+ }
+
+ uint32_t Tid = Packets[I].ThreadIdRaw & ~PACKET_FLAG_COMPRESSED;
+ auto It = ThreadInfos.find(Tid);
+ if (It == ThreadInfos.end() || !It->second.HasAnyBatch)
+ {
+ // We have no evidence for this thread's time range. Retain all
+ // its packets conservatively to avoid breaking Tourist's per-
+ // thread parser state.
+ Keep[I] = 1;
+ ++NumUnattributed;
+ continue;
+ }
+
+ if (!It->second.HasInWindowBatch)
+ {
+ // Thread's attributed packets are all outside the window -- drop
+ // every packet on this thread.
+ ++NumDropped;
+ continue;
+ }
+
+ if (I >= It->second.FirstPastWindowIdx)
+ {
+ // Past the first entirely-after-window packet on this thread --
+ // candidate for tail truncation. Before dropping, check whether
+ // this packet carries a Leave event that closes a scope whose
+ // Enter was at or before the window end. If so, we MUST keep it
+ // so the downstream analyzer can render the long-running scope;
+ // otherwise the scope would sit unmatched on the open stack.
+ auto MustKeepIt = TrimAn.MustKeepPacketByThread.find(Tid);
+ if (MustKeepIt != TrimAn.MustKeepPacketByThread.end() && I <= MustKeepIt->second)
+ {
+ Keep[I] = 1;
+ ++NumInWindow;
+ continue;
+ }
+
+ ++NumTailDropped;
+ continue;
+ }
+
+ Keep[I] = 1;
+ ++NumInWindow;
+ }
+
+ // --- Write output ---
+ std::error_code Ec;
+ std::filesystem::create_directories(Args.OutputPath.parent_path(), Ec);
+
+ zen::BasicFile OutputFile(Args.OutputPath, zen::BasicFile::Mode::kTruncate);
+
+ uint64_t OutOffset = 0;
+ OutputFile.Write(FileBytes, PreambleLen, OutOffset);
+ OutOffset += PreambleLen;
+
+ uint64_t KeptBytes = 0;
+ for (size_t I = 0; I < Packets.size(); ++I)
+ {
+ if (!Keep[I])
+ {
+ continue;
+ }
+ OutputFile.Write(FileBytes + Packets[I].FileOffset, Packets[I].Size, OutOffset);
+ OutOffset += Packets[I].Size;
+ KeptBytes += Packets[I].Size;
+ }
+
+ OutputFile.Flush();
+ OutputFile.Close();
+
+ ZEN_CONSOLE("Trimmed trace written to {}", Args.OutputPath);
+ ZEN_CONSOLE(" Input: {} ({} packets)", zen::NiceBytes(FileSize), zen::ThousandsNum(Packets.size()));
+ ZEN_CONSOLE(" Output: {} ({} packets)",
+ zen::NiceBytes(OutOffset),
+ zen::ThousandsNum(NumAlwaysKept + NumInWindow + NumUnattributed));
+ ZEN_CONSOLE(" Always kept: {} packets (types / important / sync)", zen::ThousandsNum(NumAlwaysKept));
+ ZEN_CONSOLE(" Thread kept: {} packets from {} threads with in-window activity",
+ zen::ThousandsNum(NumInWindow),
+ zen::ThousandsNum(NumThreadsKept));
+ ZEN_CONSOLE(" Thread dropped: {} packets from {} threads with no in-window activity",
+ zen::ThousandsNum(NumDropped),
+ zen::ThousandsNum(NumThreadsDropped));
+ ZEN_CONSOLE(" Tail dropped: {} packets past the latest in-window packet on their thread", zen::ThousandsNum(NumTailDropped));
+ ZEN_CONSOLE(" Unattributed: {} packets (retained conservatively)", zen::ThousandsNum(NumUnattributed));
+ ZEN_UNUSED(KeptBytes);
+
+ // --- Diagnostic: summarise the attributed time range distribution ---
+ {
+ uint32_t GlobalMin = ~0u;
+ uint32_t GlobalMax = 0;
+ for (const auto& [Idx, R] : TrimAn.PacketRanges)
+ {
+ GlobalMin = std::min(GlobalMin, R.MinUs);
+ GlobalMax = std::max(GlobalMax, R.MaxUs);
+ }
+ ZEN_CONSOLE(" Attributed: {} packets, window {:.3f}s .. {:.3f}s",
+ zen::ThousandsNum(TrimAn.PacketRanges.size()),
+ double(GlobalMin) / 1e6,
+ double(GlobalMax) / 1e6);
+ }
+}
+
+} // namespace zen::trace_detail