// Copyright Epic Games, Inc. All Rights Reserved. #include "trace_model.h" #include #include #include #include #include #include #include #include #include #include #include ZEN_THIRD_PARTY_INCLUDES_START #include #include #include #include #include #include #include #include #include ZEN_THIRD_PARTY_INCLUDES_END #include #include #include using namespace std::literals; // Toggle to A/B test cross-platform parallel sort vs sequential eastl::sort. constexpr bool kUseParallelSort = true; namespace eastl { template<> struct hash { size_t operator()(const std::string& S) const { return eastl::hash()(S.c_str()); } }; } // namespace eastl ////////////////////////////////////////////////////////////////////////////// // Trace analysis types (global namespace alongside tourist types) namespace { using zen::ReciprocalU64; // Welford's online algorithm for computing mean and standard deviation class Distribution { public: void add(double X) { m_Count++; if (m_Count == 1) { m_OldM = m_NewM = X; m_OldS = 0.0; } else { m_NewM = m_OldM + (X - m_OldM) / double(m_Count); m_NewS = m_OldS + (X - m_OldM) * (X - m_NewM); m_OldM = m_NewM; m_OldS = m_NewS; } } uint32_t Count() const { return m_Count; } double Mean() const { return (m_Count > 0) ? m_NewM : 0.0; } double Variance() const { return (m_Count > 1) ? m_NewS / double(m_Count - 1) : 0.0; } double StdDev() const { return std::sqrt(Variance()); } private: double m_OldM = 0.0; double m_NewM = 0.0; double m_OldS = 0.0; double m_NewS = 0.0; uint32_t m_Count = 0; }; class NameDepot { public: uint64 Add(StringView Name) { uint64 NameHash = Hash(Name); Add(NameHash, Name); return NameHash; } void Add(uint64 NameHash, StringView Name) { if (auto It = m_Names.insert({NameHash, String()}); It.second) { It.first->second = Name; } } StringView Get(uint64 NameHash) const { auto Iter = m_Names.find(NameHash); if (Iter == m_Names.end()) { return "???"; } return Iter->second; } private: eastl::hash_map m_Names; }; struct CpuEventStat { Distribution Dist; uint32_t Min = ~0u; uint32_t Max = 0; }; class EventStats { public: void Record(uint64 NameHash, uint32 DurationUs) { CpuEventStat& Stat = m_Stats[NameHash]; Stat.Min = std::min(Stat.Min, DurationUs); Stat.Max = std::max(Stat.Max, DurationUs); Stat.Dist.add(double(DurationUs)); } auto begin() const { return m_Stats.begin(); } auto end() const { return m_Stats.end(); } bool empty() const { return m_Stats.empty(); } private: eastl::hash_map m_Stats; }; ////////////////////////////////////////////////////////////////////////////// // Event outlines // clang-format off begin_outline($Trace, NewTrace) field(uint64, CycleFrequency) field(uint64, StartCycle) end_outline() begin_outline(CpuProfiler, EventSpec) field(uint32, Id) field(FieldStr, Name) end_outline() begin_outline(CpuProfiler, EventBatch) field(uint32, ThreadId) field(uint8[], Data) end_outline() begin_outline(CpuProfiler, EventBatchV2) field(uint8[], Data) end_outline() begin_outline(CpuProfiler, EventBatchV3) field(uint8[], Data) end_outline() begin_outline(CpuProfiler, Metadata) field(uint32, Id) field(uint32, SpecId) field(uint8[], Metadata) end_outline() begin_outline($Trace, ThreadInfo) field(FieldStr, Name) field(int32, SortHint) field(uint32, ThreadId) field(uint32, SystemId) end_outline() begin_outline($Trace, ThreadGroupBegin) field(FieldStr, Name) end_outline() begin_outline($Trace, ThreadGroupEnd) end_outline() begin_outline(Diagnostics, Session2) field(FieldStr, Platform) field(FieldStr, AppName) field(FieldStr, ProjectName) field(FieldStr, CommandLine) field(FieldStr, Branch) field(FieldStr, BuildVersion) field(uint32, Changelist) field(uint8, ConfigurationType) field(uint8, TargetType) end_outline() begin_outline(Diagnostics, ModuleInit) field(FieldStr, SymbolFormat) field(uint8, ModuleBaseShift) end_outline() begin_outline(Diagnostics, ModuleLoad) field(FieldStr, Name) field(uint64, Base) field(uint32, Size) field(uint8[], ImageId) end_outline() begin_outline(Diagnostics, ModuleUnload) field(uint64, Base) end_outline() begin_outline(Trace, ChannelAnnounce) field(uint32, Id) field(uint8, IsEnabled) field(uint8, ReadOnly) field(FieldStr, Name) end_outline() begin_outline(Trace, ChannelToggle) field(uint32, Id) field(uint8, IsEnabled) end_outline() begin_outline(Logging, LogCategory) field(uint64, CategoryPointer) field(uint8, DefaultVerbosity) field(FieldStr, Name) end_outline() begin_outline(Logging, LogMessageSpec) field(uint64, LogPoint) field(uint64, CategoryPointer) field(int32, Line) field(uint8, Verbosity) field(FieldStr, FileName) field(FieldStr, FormatString) end_outline() begin_outline(Logging, LogMessage) field(uint64, LogPoint) field(uint64, Cycle) field(uint8[], FormatArgs) end_outline() // Analyzer-level schema for the zencore log events defined in // src/zencore/logging/tracelog.cpp. The field layout mirrors the Logging.* // outline above so the analyzer plumbing is symmetric, but the events carry // fmt-style `{}` format strings instead of printf-style `%` specifiers and // the FormatArgs blob uses a zen-specific descriptor encoding. begin_outline(ZenLog, Category) field(uint64, CategoryPointer) field(uint8, DefaultVerbosity) field(FieldStr, Name) end_outline() begin_outline(ZenLog, MessageSpec) field(uint64, LogPoint) field(uint64, CategoryPointer) field(int32, Line) field(uint8, Verbosity) field(FieldStr, FileName) field(FieldStr, FormatString) end_outline() begin_outline(ZenLog, Message) field(uint64, LogPoint) field(uint64, Cycle) field(uint8[], FormatArgs) end_outline() begin_outline(Misc, BookmarkSpec) field(uint64, BookmarkPoint) field(int32, Line) field(FieldStr, FormatString) field(FieldStr, FileName) end_outline() begin_outline(Misc, Bookmark) field(uint64, Cycle) field(uint64, BookmarkPoint) field(uint8[], FormatArgs) end_outline() begin_outline(Misc, RegionBegin) field(uint64, Cycle) field(uint8[], RegionName) field(uint8[], Category) end_outline() begin_outline(Misc, RegionBeginWithId) field(uint64, CycleAndId) field(uint8[], RegionName) field(uint8[], Category) end_outline() begin_outline(Misc, RegionEnd) field(uint64, Cycle) field(uint8[], RegionName) end_outline() begin_outline(Misc, RegionEndWithId) field(uint64, Cycle) field(uint64, RegionId) end_outline() // CsvProfiler events begin_outline(CsvProfiler, RegisterCategory) field(int32, Index) field(uint8[], Name) end_outline() begin_outline(CsvProfiler, DefineInlineStat) field(uint64, StatId) field(int32, CategoryIndex) field(uint8[], Name) end_outline() begin_outline(CsvProfiler, DefineDeclaredStat) field(uint64, StatId) field(int32, CategoryIndex) field(uint8[], Name) end_outline() begin_outline(CsvProfiler, BeginStat) field(uint64, StatId) field(uint64, Cycle) end_outline() begin_outline(CsvProfiler, EndStat) field(uint64, StatId) field(uint64, Cycle) end_outline() begin_outline(CsvProfiler, BeginExclusiveStat) field(uint64, StatId) field(uint64, Cycle) end_outline() begin_outline(CsvProfiler, EndExclusiveStat) field(uint64, StatId) field(uint64, Cycle) end_outline() begin_outline(CsvProfiler, CustomStatInt) field(uint64, StatId) field(uint64, Cycle) field(int32, Value) field(uint8, OpType) end_outline() begin_outline(CsvProfiler, CustomStatFloat) field(uint64, StatId) field(uint64, Cycle) field(float, Value) field(uint8, OpType) end_outline() begin_outline(CsvProfiler, Event) field(uint64, Cycle) field(int32, CategoryIndex) field(uint8[], Text) end_outline() begin_outline(CsvProfiler, BeginCapture) field(uint64, Cycle) field(uint32, RenderThreadId) field(uint32, RHIThreadId) field(uint8, EnableCounts) field(uint8[], FileName) end_outline() begin_outline(CsvProfiler, EndCapture) field(uint64, Cycle) end_outline() begin_outline(CsvProfiler, Metadata) field(uint8[], Key) field(uint8[], Value) end_outline() // clang-format on ////////////////////////////////////////////////////////////////////////////// // Forward declarations needed by the helper analyzers below. using zen::trace_detail::SafeFieldStr; ////////////////////////////////////////////////////////////////////////////// // Minimal CBOR formatter // // CpuProfiler.Metadata payloads are CBOR-encoded (RFC 7049) blobs produced // by UE's FCborWriter. We don't need a full decoder -- we just walk the // bytes and append human-readable values to an output string. Handles the // subset actually emitted by UE's metadata scopes: unsigned / negative // integers, byte / text strings, arrays, maps, floats, and the boolean / // null simple values. static bool CborAppendValue(const uint8*& p, const uint8* end, std::string& out, int depth); static bool CborReadArg(const uint8*& p, const uint8* end, uint8 info, uint64& value) { if (info < 24) { value = info; return true; } if (info == 24) { if (end - p < 1) return false; value = *p++; return true; } if (info == 25) { if (end - p < 2) return false; value = (uint64(p[0]) << 8) | p[1]; p += 2; return true; } if (info == 26) { if (end - p < 4) return false; value = (uint64(p[0]) << 24) | (uint64(p[1]) << 16) | (uint64(p[2]) << 8) | p[3]; p += 4; return true; } if (info == 27) { if (end - p < 8) return false; value = 0; for (int i = 0; i < 8; ++i) { value = (value << 8) | p[i]; } p += 8; return true; } return false; } static bool CborAppendValue(const uint8*& p, const uint8* end, std::string& out, int depth) { if (depth > 4 || p >= end) { return false; } const uint8 ib = *p++; const uint8 major = ib >> 5; const uint8 info = ib & 0x1f; switch (major) { case 0: // unsigned integer { uint64 v; if (!CborReadArg(p, end, info, v)) return false; out += fmt::format("{}", v); return true; } case 1: // negative integer: -1 - value { uint64 v; if (!CborReadArg(p, end, info, v)) return false; out += fmt::format("{}", -1 - int64_t(v)); return true; } case 2: // byte string case 3: // text string { uint64 len; if (!CborReadArg(p, end, info, len)) return false; if (len > uint64(end - p)) return false; out.append(reinterpret_cast(p), size_t(len)); p += len; return true; } case 4: // array { uint64 count; if (!CborReadArg(p, end, info, count)) return false; for (uint64 i = 0; i < count; ++i) { if (i > 0) out += ", "; if (!CborAppendValue(p, end, out, depth + 1)) return false; } return true; } case 5: // map { uint64 count; if (!CborReadArg(p, end, info, count)) return false; for (uint64 i = 0; i < count; ++i) { if (i > 0) out += ", "; if (!CborAppendValue(p, end, out, depth + 1)) return false; out += "="; if (!CborAppendValue(p, end, out, depth + 1)) return false; } return true; } case 7: // simple values / floats { if (info == 20) { out += "false"; return true; } if (info == 21) { out += "true"; return true; } if (info == 22) { out += "null"; return true; } if (info == 26) { if (end - p < 4) return false; uint32 bits = (uint32(p[0]) << 24) | (uint32(p[1]) << 16) | (uint32(p[2]) << 8) | p[3]; float v; std::memcpy(&v, &bits, 4); out += fmt::format("{}", v); p += 4; return true; } if (info == 27) { if (end - p < 8) return false; uint64 bits = 0; for (int i = 0; i < 8; ++i) { bits = (bits << 8) | p[i]; } p += 8; double v; std::memcpy(&v, &bits, 8); out += fmt::format("{}", v); return true; } return false; } default: return false; } } static std::string FormatMetadataValues(const uint8_t* Bytes, size_t Size) { std::string out; if (Size == 0) { return out; } const uint8* p = Bytes; CborAppendValue(p, Bytes + Size, out, 0); return out; } using zen::trace_detail::TraceTiming; ////////////////////////////////////////////////////////////////////////////// // Metadata registry // // Subscribes to CpuProfiler.Metadata events and stores each payload's // CBOR-encoded bytes keyed by MetadataId, along with the SpecId they // reference. Both CpuAnalyzer and TimelineAnalyzer query the registry when // they encounter a V3 scope with the metadata bit set so the scope can be // rendered as `{base name} - {formatted values}`. struct MetadataEntry { uint32_t SpecId = 0; eastl::vector Bytes; }; class MetadataRegistry : public Analyzer { public: void subscribe(Vector& Subs) override { Subs.emplace_back(this, &MetadataRegistry::OnMetadata); } const MetadataEntry* Lookup(uint32_t MetadataId) const { auto It = m_Entries.find(MetadataId); return (It != m_Entries.end()) ? &It->second : nullptr; } private: void OnMetadata(const CpuProfiler_Metadata& Ev) { uint32_t MetadataId = Ev.Id(); uint32_t SpecId = Ev.SpecId(); Array Data = Ev.Metadata(); MetadataEntry& Entry = m_Entries[MetadataId]; Entry.SpecId = SpecId; Entry.Bytes.assign(Data.get(), Data.get() + Data.get_size()); } eastl::hash_map m_Entries; }; ////////////////////////////////////////////////////////////////////////////// // Log message formatting (upstream UE Logging.* wire) // // UE's trace emits log messages as a sequence of typed arguments that need // to be substituted into a printf-style format string. The wire format is: // // [ArgumentCount: uint8] // [Descriptors: uint8 * ArgumentCount] // each byte = category | size // [Payload: bytes] // // Category bits live in the upper 2 bits (shifted by FormatArgTypeCode_- // CategoryBitShift == 6): Integer=1, Float=2, String=3. The low 6 bits are // the argument size in bytes; for strings the size is the per-character // width (1 == ANSI, 2 == UTF-16). // // We walk the format string, extract each specifier, pull the matching arg // from the stream and hand both to std::snprintf. Width/precision stars // (e.g. "%*.*f") are not supported; they're rare in log formats. // // Note: this is the upstream UE printf-style wire only. The zen-specific // ZenLog.* events use a different descriptor encoding (3-bit category / // 5-bit size, plus a dedicated bool and pointer category) and a different // format-spec grammar. See zen::logging::FormatLogArgs in // src/zencore/logging/tracelog.cpp for that path. struct FormatArgStream { const uint8_t* Descriptors; const uint8_t* Payload; uint8_t Remaining; bool HasNext() const { return Remaining > 0; } uint8_t PeekCategory() const { return (*Descriptors) & 0xC0; } uint8_t PeekSize() const { return (*Descriptors) & 0x3F; } void Advance(size_t PayloadBytes) { Payload += PayloadBytes; ++Descriptors; --Remaining; } }; static bool InitFormatArgStream(FormatArgStream& Ctx, const uint8_t* Data, size_t Size) { if (!Data || Size == 0) { Ctx.Remaining = 0; return false; } uint8_t Count = Data[0]; if (size_t(1) + Count > Size) { Ctx.Remaining = 0; return false; } Ctx.Descriptors = Data + 1; Ctx.Payload = Data + 1 + Count; Ctx.Remaining = Count; return true; } static bool IsPrintfSpecifierChar(char c) { switch (c) { case 'd': case 'i': case 'u': case 'o': case 'x': case 'X': case 'c': case 'p': case 'f': case 'F': case 'e': case 'E': case 'g': case 'G': case 'a': case 'A': case 's': case 'S': case 'n': return true; default: return false; } } static std::string FormatLogMessage(std::string_view Format, const uint8_t* ArgsData, size_t ArgsSize) { FormatArgStream Stream{}; InitFormatArgStream(Stream, ArgsData, ArgsSize); std::string Out; Out.reserve(Format.size() + 32); size_t i = 0; while (i < Format.size()) { char c = Format[i]; if (c != '%') { Out.push_back(c); ++i; continue; } // Handle "%%" -> literal percent. if (i + 1 < Format.size() && Format[i + 1] == '%') { Out.push_back('%'); i += 2; continue; } // Walk the specifier until we find a terminating character. size_t SpecStart = i++; while (i < Format.size() && !IsPrintfSpecifierChar(Format[i])) { ++i; } if (i >= Format.size()) { // Truncated specifier -- copy the remainder literally. Out.append(Format.substr(SpecStart)); break; } char Specifier = Format[i++]; std::string Spec(Format.substr(SpecStart, i - SpecStart)); if (!Stream.HasNext()) { // Not enough arguments: emit the raw specifier so the user can // at least tell something is missing. Out.append(Spec); continue; } const uint8_t Category = Stream.PeekCategory(); const uint8_t Size = Stream.PeekSize(); char Buf[512]; Buf[0] = '\0'; if (Category == 0x40) // integer { uint64_t Raw = 0; if (Size <= sizeof(Raw) && Size > 0) { std::memcpy(&Raw, Stream.Payload, Size); } // Route through the correct snprintf type based on the // specifier. Cast to int64_t for signed integer specifiers. switch (Specifier) { case 'd': case 'i': { // Sign-extend based on Size. int64_t Signed = 0; switch (Size) { case 1: Signed = int8_t(Raw & 0xff); break; case 2: Signed = int16_t(Raw & 0xffff); break; case 4: Signed = int32_t(Raw & 0xffffffff); break; case 8: Signed = int64_t(Raw); break; default: Signed = int64_t(Raw); break; } // Replace length modifier so snprintf interprets the // correctly-sized value. Simplest: append "ll". std::string AdjustedSpec = Spec; AdjustedSpec.insert(AdjustedSpec.size() - 1, "ll"); std::snprintf(Buf, sizeof(Buf), AdjustedSpec.c_str(), static_cast(Signed)); break; } case 'u': case 'o': case 'x': case 'X': case 'p': { std::string AdjustedSpec = Spec; AdjustedSpec.insert(AdjustedSpec.size() - 1, "ll"); std::snprintf(Buf, sizeof(Buf), AdjustedSpec.c_str(), static_cast(Raw)); break; } case 'c': { std::snprintf(Buf, sizeof(Buf), Spec.c_str(), int(Raw & 0xff)); break; } default: std::snprintf(Buf, sizeof(Buf), "%llu", static_cast(Raw)); break; } Stream.Advance(Size); } else if (Category == 0x80) // floating point { double Value = 0.0; if (Size == 4) { float F; std::memcpy(&F, Stream.Payload, 4); Value = double(F); } else if (Size == 8) { std::memcpy(&Value, Stream.Payload, 8); } std::snprintf(Buf, sizeof(Buf), Spec.c_str(), Value); Stream.Advance(Size); } else if (Category == 0xC0) // string { std::string Tmp; if (Size == 1) { const char* S = reinterpret_cast(Stream.Payload); size_t Len = std::strlen(S); Tmp.assign(S, Len); std::snprintf(Buf, sizeof(Buf), Spec.c_str(), Tmp.c_str()); Stream.Advance(Len + 1); } else if (Size == 2) { const char16_t* W = reinterpret_cast(Stream.Payload); size_t Len = 0; while (W[Len] != 0) ++Len; Tmp.reserve(Len); for (size_t k = 0; k < Len; ++k) { char16_t ch = W[k]; Tmp.push_back(ch < 0x80 ? char(ch) : '?'); } std::snprintf(Buf, sizeof(Buf), Spec.c_str(), Tmp.c_str()); Stream.Advance((Len + 1) * 2); } else { std::snprintf(Buf, sizeof(Buf), "", unsigned(Size)); Stream.Advance(0); ++Stream.Descriptors; --Stream.Remaining; } } else { std::snprintf(Buf, sizeof(Buf), ""); Stream.Advance(Size); } Out.append(Buf); } return Out; } ////////////////////////////////////////////////////////////////////////////// // Log analyzer class LogAnalyzer : public Analyzer { public: explicit LogAnalyzer(const TraceTiming* Timing = nullptr) : m_Timing(Timing) {} void subscribe(Vector& Subs) override { Subs.emplace_back(this, &LogAnalyzer::OnLogCategory); Subs.emplace_back(this, &LogAnalyzer::OnLogMessageSpec); Subs.emplace_back(this, &LogAnalyzer::OnLogMessage); Subs.emplace_back(this, &LogAnalyzer::OnZenLogCategory); Subs.emplace_back(this, &LogAnalyzer::OnZenLogMessageSpec); Subs.emplace_back(this, &LogAnalyzer::OnZenLogMessage); } eastl::vector BuildCategories(eastl::hash_map& OutPointerToIndex) const { eastl::vector Cats; Cats.reserve(m_Categories.size()); OutPointerToIndex.clear(); for (const auto& [Ptr, Info] : m_Categories) { OutPointerToIndex[Ptr] = uint32_t(Cats.size()); Cats.push_back(Info); } return Cats; } const eastl::vector& Entries() const { return m_Entries; } eastl::vector& MutableEntries() { return m_Entries; } // The shared TraceTiming pointer lets external callers read the trace's // cycle base / frequency without each analyzer having to own a copy. const TraceTiming* Timing() const { return m_Timing; } struct MessageSpec { uint64_t CategoryPointer = 0; int32_t Line = 0; uint8_t Verbosity = 0; std::string File; std::string FormatString; }; const eastl::hash_map& MessageSpecs() const { return m_Specs; } private: // Both Logging.* and ZenLog.* use identical event fields; the difference is // only how their respective FormatArgs get rendered (printf vs fmt), which // is decided at message-emit time. template void IngestCategory(const CategoryEvent& Ev) { uint64_t Ptr = Ev.CategoryPointer(); zen::trace_detail::LogCategoryInfo& Info = m_Categories[Ptr]; Info.Name = SafeFieldStr(Ev.Name()); Info.DefaultVerbosity = Ev.DefaultVerbosity(); } template void IngestSpec(const SpecEvent& Ev) { MessageSpec& Spec = m_Specs[Ev.LogPoint()]; Spec.CategoryPointer = Ev.CategoryPointer(); Spec.Line = Ev.Line(); Spec.Verbosity = Ev.Verbosity(); Spec.File = SafeFieldStr(Ev.FileName()); Spec.FormatString = SafeFieldStr(Ev.FormatString()); } void OnLogCategory(const Logging_LogCategory& Ev) { IngestCategory(Ev); } void OnLogMessageSpec(const Logging_LogMessageSpec& Ev) { IngestSpec(Ev); } void OnLogMessage(const Logging_LogMessage& Ev) { EmitEntry(Ev.LogPoint(), Ev.Cycle(), Ev.FormatArgs(), /*IsZenLog*/ false); } // ZenLog.* shares the same MessageSpec table. LogPoint pointers are emitted // by distinct processes (zenserver vs. a hypothetical UE trace we imported) // so the two keyspaces don't collide in practice. void OnZenLogCategory(const ZenLog_Category& Ev) { IngestCategory(Ev); } void OnZenLogMessageSpec(const ZenLog_MessageSpec& Ev) { IngestSpec(Ev); } void OnZenLogMessage(const ZenLog_Message& Ev) { EmitEntry(Ev.LogPoint(), Ev.Cycle(), Ev.FormatArgs(), /*IsZenLog*/ true); } void EmitEntry(uint64_t LogPoint, uint64_t Cycle, Array Args, bool IsZenLog) { auto SpecIt = m_Specs.find(LogPoint); if (SpecIt == m_Specs.end()) { return; } const MessageSpec& Spec = SpecIt->second; uint32_t TimeUs = m_Timing ? m_Timing->CycleToTimeUs(Cycle) : 0; std::string Msg = IsZenLog ? zen::logging::FormatLogArgs(std::string_view(Spec.FormatString), Args.get(), Args.get_size()) : FormatLogMessage(std::string_view(Spec.FormatString), Args.get(), Args.get_size()); zen::trace_detail::LogEntry Entry; Entry.TimeUs = TimeUs; Entry.Verbosity = Spec.Verbosity; Entry.Line = Spec.Line; Entry.File = Spec.File; Entry.Message = std::move(Msg); // Use the category pointer temporarily so BuildTraceModel can resolve // it against the categories table. Entry.CategoryIndex = SpecToCategoryIndex(Spec.CategoryPointer); m_Entries.push_back(std::move(Entry)); } uint32_t SpecToCategoryIndex(uint64_t Ptr) { // Encoded pointer stuffed into uint32_t so BuildTraceModel can remap. // Lossy but deterministic: use a stable sequential index per unique // pointer so we never need the full 64-bit value beyond build time. auto It = m_CategoryIndex.find(Ptr); if (It != m_CategoryIndex.end()) { return It->second; } uint32_t Idx = uint32_t(m_CategoryIndex.size()); m_CategoryIndex[Ptr] = Idx; return Idx; } public: // Mapping from the intermediate index stored in LogEntry::CategoryIndex // during capture to the real category pointer; BuildTraceModel uses // this to remap entries against the flattened LogCategories array. const eastl::hash_map& CategoryPointerIndex() const { return m_CategoryIndex; } private: const TraceTiming* m_Timing = nullptr; eastl::hash_map m_Categories; eastl::hash_map m_Specs; eastl::hash_map m_CategoryIndex; eastl::vector m_Entries; }; ////////////////////////////////////////////////////////////////////////////// // Bookmarks and regions // // UE's bookmark wire format mirrors LogMessage: a BookmarkSpec introduces // a (FileName, Line, FormatString) triple keyed by a BookmarkPoint pointer, // and each Misc.Bookmark event carries that pointer, a cycle, and the same // FFormatArgsTrace payload the log pipeline already knows how to decode. // Region events come in two flavours: the legacy name-paired // RegionBegin/RegionEnd and the newer *WithId variants that pack a unique // id into the begin event's cycle. class BookmarksAnalyzer : public Analyzer { public: explicit BookmarksAnalyzer(const TraceTiming* Timing = nullptr) : m_Timing(Timing) {} void subscribe(Vector& Subs) override { Subs.emplace_back(this, &BookmarksAnalyzer::OnBookmarkSpec); Subs.emplace_back(this, &BookmarksAnalyzer::OnBookmark); Subs.emplace_back(this, &BookmarksAnalyzer::OnRegionBegin); Subs.emplace_back(this, &BookmarksAnalyzer::OnRegionBeginWithId); Subs.emplace_back(this, &BookmarksAnalyzer::OnRegionEnd); Subs.emplace_back(this, &BookmarksAnalyzer::OnRegionEndWithId); } eastl::vector& MutableBookmarks() { return m_Bookmarks; } eastl::vector& MutableRegions() { return m_Regions; } private: struct BookmarkSpec { int32_t Line = 0; std::string File; std::string FormatString; }; uint32_t CycleToTimeUs(uint64_t Cycle) const { return m_Timing ? m_Timing->CycleToTimeUs(Cycle) : 0; } void OnBookmarkSpec(const Misc_BookmarkSpec& Ev) { BookmarkSpec& Spec = m_Specs[Ev.BookmarkPoint()]; Spec.Line = Ev.Line(); Spec.File = SafeFieldStr(Ev.FileName()); Spec.FormatString = SafeFieldStr(Ev.FormatString()); } void OnBookmark(const Misc_Bookmark& Ev) { auto SpecIt = m_Specs.find(Ev.BookmarkPoint()); if (SpecIt == m_Specs.end()) { return; } const BookmarkSpec& Spec = SpecIt->second; Array Args = Ev.FormatArgs(); std::string Text = FormatLogMessage(std::string_view(Spec.FormatString), Args.get(), Args.get_size()); zen::trace_detail::Bookmark Out; Out.TimeUs = CycleToTimeUs(Ev.Cycle()); Out.Line = Spec.Line; Out.File = Spec.File; Out.Text = std::move(Text); m_Bookmarks.push_back(std::move(Out)); } uint32_t CreatePartialRegion(uint32_t TimeUs, std::string Name, std::string Category) { zen::trace_detail::RegionEntry Entry; Entry.BeginUs = TimeUs; Entry.EndUs = ~uint32_t(0); // sentinel: still open Entry.Depth = 0; Entry.Reserved = 0; Entry.Name = std::move(Name); Entry.Category = std::move(Category); uint32_t Idx = uint32_t(m_Regions.size()); m_Regions.push_back(std::move(Entry)); return Idx; } // Decodes the raw array bytes of a RegionName field into a std::string. // UE emits RegionName as either AnsiString (1-byte) or WideString (2-byte) // depending on the trace's age -- for the 2-byte case we do the same // lossy ASCII fold tourist's FieldStr does, which is all we need for // display. static std::string DecodeRegionName(const Array& Data) { const uint8_t* p = Data.get(); size_t size = Data.get_size(); uint32_t count = Data.get_count(); if (!p || size == 0 || count == 0) { return {}; } if (size == count) { // 1 byte per element -- AnsiString. return std::string(reinterpret_cast(p), count); } if (size == count * 2) { // 2 bytes per element -- WideString. Lossy ASCII fold. std::string out; out.reserve(count); const char16_t* w = reinterpret_cast(p); for (uint32_t i = 0; i < count; ++i) { out.push_back(w[i] < 0x80 ? char(w[i]) : '?'); } return out; } return {}; } void OnRegionBegin(const Misc_RegionBegin& Ev) { uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); Array NameArr = Ev.RegionName(); std::string Name = DecodeRegionName(NameArr); std::string Category = DecodeRegionName(Ev.Category()); uint32_t Idx = CreatePartialRegion(TimeUs, Name, std::move(Category)); m_OpenByName[Name].push_back(Idx); } void OnRegionBeginWithId(const Misc_RegionBeginWithId& Ev) { // Despite its name, CycleAndId is just Cycles64() -- a plain 64-bit // cycle count that doubles as a unique region identifier. The caller // keeps the returned value and passes it back as RegionId at end. uint64_t CycleAndId = Ev.CycleAndId(); uint32_t TimeUs = CycleToTimeUs(CycleAndId); Array NameArr = Ev.RegionName(); std::string Name = DecodeRegionName(NameArr); std::string Category = DecodeRegionName(Ev.Category()); uint32_t Idx = CreatePartialRegion(TimeUs, std::move(Name), std::move(Category)); m_OpenById[CycleAndId] = Idx; } void OnRegionEnd(const Misc_RegionEnd& Ev) { uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); Array NameArr = Ev.RegionName(); std::string Name = DecodeRegionName(NameArr); auto It = m_OpenByName.find(Name); if (It == m_OpenByName.end() || It->second.empty()) { return; } uint32_t Idx = It->second.back(); It->second.pop_back(); m_Regions[Idx].EndUs = TimeUs; } void OnRegionEndWithId(const Misc_RegionEndWithId& Ev) { uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); uint64_t Id = Ev.RegionId(); auto It = m_OpenById.find(Id); if (It == m_OpenById.end()) { return; } m_Regions[It->second].EndUs = TimeUs; m_OpenById.erase(It); } const TraceTiming* m_Timing = nullptr; eastl::hash_map m_Specs; eastl::vector m_Bookmarks; eastl::vector m_Regions; eastl::hash_map> m_OpenByName; eastl::hash_map m_OpenById; }; ////////////////////////////////////////////////////////////////////////////// // CsvProfiler analyzer -- parses CSV stat categories, definitions, timing, // custom values, events, capture markers, and metadata. class CsvProfilerAnalyzer : public Analyzer { public: explicit CsvProfilerAnalyzer(const TraceTiming* Timing = nullptr) : m_Timing(Timing) {} void subscribe(Vector& Subs) override { Subs.emplace_back(this, &CsvProfilerAnalyzer::OnRegisterCategory); Subs.emplace_back(this, &CsvProfilerAnalyzer::OnDefineInlineStat); Subs.emplace_back(this, &CsvProfilerAnalyzer::OnDefineDeclaredStat); Subs.emplace_back(this, &CsvProfilerAnalyzer::OnBeginStat); Subs.emplace_back(this, &CsvProfilerAnalyzer::OnEndStat); Subs.emplace_back(this, &CsvProfilerAnalyzer::OnBeginExclusiveStat); Subs.emplace_back(this, &CsvProfilerAnalyzer::OnEndExclusiveStat); Subs.emplace_back(this, &CsvProfilerAnalyzer::OnCustomStatInt); Subs.emplace_back(this, &CsvProfilerAnalyzer::OnCustomStatFloat); Subs.emplace_back(this, &CsvProfilerAnalyzer::OnEvent); Subs.emplace_back(this, &CsvProfilerAnalyzer::OnBeginCapture); Subs.emplace_back(this, &CsvProfilerAnalyzer::OnEndCapture); Subs.emplace_back(this, &CsvProfilerAnalyzer::OnMetadata); } eastl::vector& MutableCategories() { return m_Categories; } eastl::vector& MutableStatDefs() { return m_StatDefs; } eastl::vector& MutableEvents() { return m_Events; } eastl::vector& MutableMetadata() { return m_Metadata; } // Build the per-stat+thread time series from the accumulated samples. eastl::vector BuildTimeSeries() { eastl::vector Result; for (auto& [Key, Samples] : m_SeriesMap) { eastl::sort(Samples.begin(), Samples.end(), [](const auto& A, const auto& B) { return A.TimeUs < B.TimeUs; }); zen::trace_detail::TraceModel::CsvSeries S; S.StatId = Key.StatId; S.ThreadId = Key.ThreadId; S.Samples = std::move(Samples); Result.push_back(std::move(S)); } return Result; } private: uint32_t CycleToTimeUs(uint64_t Cycle) const { if (!m_Timing || m_Timing->Freq == 0) { return 0; } uint64_t Elapsed = (Cycle >= m_Timing->Base) ? (Cycle - m_Timing->Base) : 0; return uint32_t(Elapsed * 1'000'000 / m_Timing->Freq); } static std::string DecodeAnsiName(const Array& Data) { const uint8_t* P = Data.get(); size_t Size = Data.get_size(); if (!P || Size == 0) { return {}; } return std::string(reinterpret_cast(P), Size); } static std::string DecodeWideName(const Array& Data) { const uint8_t* P = Data.get(); size_t Size = Data.get_size(); uint32_t Count = Data.get_count(); if (!P || Size == 0 || Count == 0) { return {}; } uint32_t ElemSize = Data.get_element_size(); if (ElemSize == 2) { std::string Out; Out.reserve(Count); const char16_t* W = reinterpret_cast(P); for (uint32_t I = 0; I < Count; ++I) { Out.push_back(W[I] < 0x80 ? char(W[I]) : '?'); } return Out; } return std::string(reinterpret_cast(P), Size); } struct SeriesKey { uint64_t StatId; uint32_t ThreadId; bool operator==(const SeriesKey& O) const { return StatId == O.StatId && ThreadId == O.ThreadId; } }; struct SeriesKeyHash { size_t operator()(const SeriesKey& K) const { return eastl::hash{}(K.StatId) ^ (eastl::hash{}(K.ThreadId) * 2654435761u); } }; void AddSample(uint64_t StatId, uint32_t ThreadId, uint32_t TimeUs, float Value) { m_SeriesMap[SeriesKey{StatId, ThreadId}].push_back({TimeUs, Value}); } // -- Event handlers ----------------------------------------------- void OnRegisterCategory(const CsvProfiler_RegisterCategory& Ev) { zen::trace_detail::TraceModel::CsvCategory Cat; Cat.Index = Ev.Index(); Cat.Name = DecodeAnsiName(Ev.Name()); m_Categories.push_back(std::move(Cat)); } void OnDefineInlineStat(const CsvProfiler_DefineInlineStat& Ev) { DefineStat(Ev.StatId(), Ev.CategoryIndex(), DecodeAnsiName(Ev.Name())); } void OnDefineDeclaredStat(const CsvProfiler_DefineDeclaredStat& Ev) { DefineStat(Ev.StatId(), Ev.CategoryIndex(), DecodeAnsiName(Ev.Name())); } void DefineStat(uint64_t StatId, int32_t CategoryIndex, std::string Name) { if (m_StatIdToIndex.count(StatId)) { return; // already defined } m_StatIdToIndex[StatId] = uint32_t(m_StatDefs.size()); zen::trace_detail::TraceModel::CsvStatDef Def; Def.StatId = StatId; Def.CategoryIndex = CategoryIndex; Def.Name = std::move(Name); m_StatDefs.push_back(std::move(Def)); } void OnBeginStat(const CsvProfiler_BeginStat& Ev) { uint32_t ThreadId = Ev.get_thread_id(); uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); m_OpenStacks[{Ev.StatId(), ThreadId}].push_back(TimeUs); } void OnEndStat(const CsvProfiler_EndStat& Ev) { uint32_t ThreadId = Ev.get_thread_id(); uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); auto Key = SeriesKey{Ev.StatId(), ThreadId}; auto It = m_OpenStacks.find(Key); if (It == m_OpenStacks.end() || It->second.empty()) { return; } uint32_t BeginUs = It->second.back(); It->second.pop_back(); float DurationMs = float(TimeUs - BeginUs) / 1000.0f; AddSample(Ev.StatId(), ThreadId, BeginUs, DurationMs); } void OnBeginExclusiveStat(const CsvProfiler_BeginExclusiveStat& Ev) { // For basic support, treat exclusive stats like regular stats. uint32_t ThreadId = Ev.get_thread_id(); uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); m_OpenStacks[{Ev.StatId(), ThreadId}].push_back(TimeUs); } void OnEndExclusiveStat(const CsvProfiler_EndExclusiveStat& Ev) { uint32_t ThreadId = Ev.get_thread_id(); uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); auto Key = SeriesKey{Ev.StatId(), ThreadId}; auto It = m_OpenStacks.find(Key); if (It == m_OpenStacks.end() || It->second.empty()) { return; } uint32_t BeginUs = It->second.back(); It->second.pop_back(); float DurationMs = float(TimeUs - BeginUs) / 1000.0f; AddSample(Ev.StatId(), ThreadId, BeginUs, DurationMs); } void OnCustomStatInt(const CsvProfiler_CustomStatInt& Ev) { uint32_t ThreadId = Ev.get_thread_id(); uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); AddSample(Ev.StatId(), ThreadId, TimeUs, float(Ev.Value())); } void OnCustomStatFloat(const CsvProfiler_CustomStatFloat& Ev) { uint32_t ThreadId = Ev.get_thread_id(); uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); AddSample(Ev.StatId(), ThreadId, TimeUs, Ev.Value()); } void OnEvent(const CsvProfiler_Event& Ev) { zen::trace_detail::TraceModel::CsvEvent E; E.TimeUs = CycleToTimeUs(Ev.Cycle()); E.CategoryIndex = Ev.CategoryIndex(); E.Text = DecodeWideName(Ev.Text()); m_Events.push_back(std::move(E)); } void OnBeginCapture(const CsvProfiler_BeginCapture& Ev) { m_CaptureStartUs = CycleToTimeUs(Ev.Cycle()); } void OnEndCapture(const CsvProfiler_EndCapture& Ev) { m_CaptureEndUs = CycleToTimeUs(Ev.Cycle()); } void OnMetadata(const CsvProfiler_Metadata& Ev) { zen::trace_detail::TraceModel::CsvMeta M; M.Key = DecodeWideName(Ev.Key()); M.Value = DecodeWideName(Ev.Value()); m_Metadata.push_back(std::move(M)); } const TraceTiming* m_Timing = nullptr; eastl::vector m_Categories; eastl::vector m_StatDefs; eastl::hash_map m_StatIdToIndex; // Timing stacks: (StatId, ThreadId) -> stack of begin times eastl::hash_map, SeriesKeyHash> m_OpenStacks; // Accumulated samples: (StatId, ThreadId) -> samples eastl::hash_map, SeriesKeyHash> m_SeriesMap; eastl::vector m_Events; eastl::vector m_Metadata; uint32_t m_CaptureStartUs = 0; uint32_t m_CaptureEndUs = 0; }; ////////////////////////////////////////////////////////////////////////////// // Analyzers class CpuAnalyzer : public Analyzer { public: CpuAnalyzer(EventStats& Stats, NameDepot& Names, const MetadataRegistry* Metadata) : m_Names(Names) , m_Stats(Stats) , m_Metadata(Metadata) { Names.Add(NO_NAME, "???"); } void subscribe(Vector& Subs) override { Subs.emplace_back(this, &CpuAnalyzer::OnNewTrace); Subs.emplace_back(this, &CpuAnalyzer::OnCpuSpec); Subs.emplace_back(this, &CpuAnalyzer::OnCpuBatch); Subs.emplace_back(this, &CpuAnalyzer::OnCpuBatchV2); Subs.emplace_back(this, &CpuAnalyzer::OnCpuBatchV3); } private: static constexpr uint32 NO_INDEX = ~0u; static constexpr uint64 NO_NAME = ~0ull; static constexpr uint32 METADATA_BIT = 0x8000'0000u; uint64 ResolveNameHash(uint32 PackedId) { const bool IsMetadata = (PackedId & METADATA_BIT) != 0; const uint32 Id = PackedId & ~METADATA_BIT; if (IsMetadata && m_Metadata) { auto CachedIt = m_MetadataNames.find(Id); if (CachedIt != m_MetadataNames.end()) { return CachedIt->second; } const MetadataEntry* Entry = m_Metadata->Lookup(Id); if (Entry) { auto BaseIt = m_Specs.find(Entry->SpecId); StringView BaseName = (BaseIt != m_Specs.end()) ? m_Names.Get(BaseIt->second) : StringView("???"); std::string Formatted(BaseName); std::string Values = FormatMetadataValues(Entry->Bytes.data(), Entry->Bytes.size()); if (!Values.empty()) { Formatted += " - "; Formatted += Values; } uint64 Hash = m_Names.Add(StringView(Formatted)); m_MetadataNames[Id] = Hash; return Hash; } return NO_NAME; } auto It = m_Specs.find(Id); return (It != m_Specs.end()) ? It->second : NO_NAME; } struct EventStack { uint32 Tail = NO_INDEX; }; struct ScopeEvent { uint32 Id; uint32 TimeUs; union { uint32 Next; uint32 Index; }; }; struct EventPool { uint32 Alloc() { if (m_FreeHead == NO_INDEX) { uint32 Idx = uint32(m_Pool.size()); m_Pool.push_back({.Index = Idx}); return Idx; } uint32 Idx = m_FreeHead; m_FreeHead = m_Pool[Idx].Index; return Idx; } void Free(uint32 Idx) { m_Pool[Idx].Index = m_FreeHead; m_FreeHead = Idx; } ScopeEvent& Get(uint32 Idx) { return m_Pool[Idx]; } eastl::vector m_Pool; uint32 m_FreeHead = NO_INDEX; }; void OnNewTrace(const $Trace_NewTrace& NewTrace) { m_Freq = NewTrace.CycleFrequency(); m_Base = NewTrace.StartCycle(); m_UsDiv = m_Freq / 1'000'000; if (m_UsDiv == 0) { m_UsDiv = 1; } m_UsDivRecip = ReciprocalU64(m_UsDiv); } void OnCpuSpec(const CpuProfiler_EventSpec& Spec) { uint32 SpecId = Spec.Id(); FieldStr SpecName = Spec.Name(); StringView NameView = SpecName.as_view(); if (NameView.starts_with("Frame ")) { NameView = "Frame"; } if (size_t Pos = NameView.find("\""); Pos != StringView::npos) { NameView = NameView.substr(0, Pos); } if (size_t Pos = NameView.find("\\"); Pos != StringView::npos) { NameView = NameView.substr(0, Pos); } m_Specs[SpecId] = m_Names.Add(NameView); } void OnCpuBatch(const CpuProfiler_EventBatch& Batch) { uint32 ThreadId = Batch.get_thread_id(); Array Data = Batch.Data(); AbsorbBatch(/*Version=*/1, ThreadId, Data); } void OnCpuBatchV2(const CpuProfiler_EventBatchV2& Batch) { uint32 ThreadId = Batch.get_thread_id(); Array Data = Batch.Data(); AbsorbBatch(/*Version=*/2, ThreadId, Data); } void OnCpuBatchV3(const CpuProfiler_EventBatchV3& Batch) { uint32 ThreadId = Batch.get_thread_id(); Array Data = Batch.Data(); AbsorbBatch(/*Version=*/3, ThreadId, Data); } // Decodes a CpuProfiler scope batch. Mirrors UE's reference // TraceServices/.../CpuProfilerTraceAnalysis.cpp ProcessBuffer / // ProcessBufferV2. // // Version 1 (`CpuProfiler.EventBatch`): cycle is `value >> 1`; bit 0 is // IsEnter; IsEnter events carry a SpecId varint. // // Version 2 (`CpuProfiler.EventBatchV2`, UE 5.1..5.5) and Version 3 // (`CpuProfiler.EventBatchV3`, UE 5.6+): cycle is `value >> 2`; bit 0 is // IsEnter, bit 1 is IsCoroutine. Coroutine begin events carry CoroutineId // and TimerScopeDepth varints; coroutine end events carry a single // TimerScopeDepth varint. V3 additionally reserves the low bit of the // SpecId to mark metadata-bearing timers, so SpecId must be shifted // right by 1 to recover the actual spec id. void AbsorbBatch(uint32 Version, uint32 ThreadId, const Array& Data) { const uint8* Cursor = Data.get(); const uint8* End = Cursor + Data.get_size(); auto Decode = [&]() { uint64 Value = 0; for (uint32 I = 1, J = 0; I; J += 7) { I = *Cursor++; Value |= uint64(I & 0x7f) << J; I &= 0x80; } return Value; }; if (ThreadId >= m_Threads.size()) { m_Threads.resize(ThreadId + 1); } EventStack& Stack = m_Threads[ThreadId]; const uint32 CycleShift = (Version == 1) ? 1u : 2u; uint64 Base = m_Base; uint64 Cycle = ~Base + 1; while (Cursor < End) { uint64 Value = Decode(); uint32 IsEnter = (Value & 0b01); if (Version > 1 && (Value & 0b10)) { // Coroutine event -- not visualised, but the trailing varints // still need to be consumed so we stay in sync with the // stream. if (IsEnter) { (void)Decode(); // CoroutineId (void)Decode(); // TimerScopeDepth } else { (void)Decode(); // TimerScopeDepth } continue; } uint64 EventId = IsEnter ? Decode() : ~0ull; Cycle += (Value >> CycleShift); uint32 TimeUs = m_UsDivRecip.Divide(Cycle + (m_UsDiv >> 1)); if (IsEnter) { uint32 ScopeId = uint32(EventId); bool IsMetadata = false; if (Version == 3) { IsMetadata = (ScopeId & 1u) != 0; ScopeId >>= 1; } uint32 EvIdx = m_Events.Alloc(); ScopeEvent& Ev = m_Events.Get(EvIdx); // Pack the metadata flag in the high bit so the close path // can distinguish metadata-id scopes from regular ones without // an extra field. Ev.Id = IsMetadata ? (ScopeId | 0x8000'0000u) : ScopeId; Ev.TimeUs = TimeUs; Ev.Next = Stack.Tail; Stack.Tail = EvIdx; continue; } if (Stack.Tail == NO_INDEX) { continue; } ScopeEvent& Ev = m_Events.Get(Stack.Tail); uint32 DurationUs = TimeUs - Ev.TimeUs; uint64 NameHash = ResolveNameHash(Ev.Id); m_Stats.Record(NameHash, DurationUs); uint32 NextIdx = Ev.Next; m_Events.Free(Stack.Tail); Stack.Tail = NextIdx; } } uint64 m_Freq = 0; uint64 m_Base = 0; uint64 m_UsDiv = 1; ReciprocalU64 m_UsDivRecip; eastl::hash_map m_Specs; NameDepot& m_Names; EventPool m_Events; eastl::vector m_Threads; EventStats& m_Stats; const MetadataRegistry* m_Metadata = nullptr; // Caches the resolved name hash for each MetadataId so we don't // re-format the same CBOR payload on every scope-close. eastl::hash_map m_MetadataNames; }; ////////////////////////////////////////////////////////////////////////////// // Per-event CPU scope capture for the interactive trace viewer. // // Mirrors CpuAnalyzer's decode loop but instead of aggregating statistics, // it records one TimelineScope per closed CPU scope so the viewer can draw a // flame graph. Scope names are interned into a flat vector so each event only // stores a compact uint32 NameId. class TimelineAnalyzer : public Analyzer { public: explicit TimelineAnalyzer(const MetadataRegistry* Metadata = nullptr, TraceTiming* SharedTiming = nullptr) : m_SharedTiming(SharedTiming) , m_Metadata(Metadata) { } void subscribe(Vector& Subs) override { Subs.emplace_back(this, &TimelineAnalyzer::OnNewTrace); Subs.emplace_back(this, &TimelineAnalyzer::OnCpuSpec); Subs.emplace_back(this, &TimelineAnalyzer::OnCpuBatch); Subs.emplace_back(this, &TimelineAnalyzer::OnCpuBatchV2); Subs.emplace_back(this, &TimelineAnalyzer::OnCpuBatchV3); } struct ThreadData { eastl::vector Scopes; // Open-scope stack: parallel arrays keeping begin time and name id. eastl::vector OpenBeginUs; eastl::vector OpenNameIds; }; const eastl::vector& ScopeNames() const { return m_ScopeNames; } const eastl::map& Threads() const { return m_Threads; } uint32_t MinBeginUs() const { return m_MinBeginUs; } uint32_t MaxEndUs() const { return m_MaxEndUs; } private: static constexpr uint32_t INVALID_NAME = ~0u; uint32_t InternName(StringView Name) { String Key(Name); auto [It, Inserted] = m_NameIndex.try_emplace(std::move(Key), 0); if (Inserted) { It->second = uint32_t(m_ScopeNames.size()); m_ScopeNames.emplace_back(Name); } return It->second; } void OnNewTrace(const $Trace_NewTrace& NewTrace) { m_Freq = NewTrace.CycleFrequency(); m_Base = NewTrace.StartCycle(); m_UsDiv = m_Freq / 1'000'000; if (m_UsDiv == 0) { m_UsDiv = 1; } m_UsDivRecip = ReciprocalU64(m_UsDiv); if (m_SharedTiming) { m_SharedTiming->Freq = m_Freq; m_SharedTiming->Base = m_Base; m_SharedTiming->UsDiv = m_UsDiv; } } void OnCpuSpec(const CpuProfiler_EventSpec& Spec) { uint32 SpecId = Spec.Id(); FieldStr SpecName = Spec.Name(); StringView NameView = SpecName.as_view(); if (NameView.starts_with("Frame ")) { NameView = "Frame"; } if (size_t Pos = NameView.find("\""); Pos != StringView::npos) { NameView = NameView.substr(0, Pos); } if (size_t Pos = NameView.find("\\"); Pos != StringView::npos) { NameView = NameView.substr(0, Pos); } m_Specs[SpecId] = InternName(NameView); } void OnCpuBatch(const CpuProfiler_EventBatch& Batch) { uint32 ThreadId = Batch.get_thread_id(); Array Data = Batch.Data(); AbsorbBatch(/*Version=*/1, ThreadId, Data); } void OnCpuBatchV2(const CpuProfiler_EventBatchV2& Batch) { uint32 ThreadId = Batch.get_thread_id(); Array Data = Batch.Data(); AbsorbBatch(/*Version=*/2, ThreadId, Data); } void OnCpuBatchV3(const CpuProfiler_EventBatchV3& Batch) { uint32 ThreadId = Batch.get_thread_id(); Array Data = Batch.Data(); AbsorbBatch(/*Version=*/3, ThreadId, Data); } TraceTiming* m_SharedTiming = nullptr; // See CpuAnalyzer::AbsorbBatch for a detailed description of the wire // format for each version. void AbsorbBatch(uint32 Version, uint32 ThreadId, const Array& Data) { const uint8* Cursor = Data.get(); const uint8* End = Cursor + Data.get_size(); auto Decode = [&]() { uint64 Value = 0; for (uint32 I = 1, J = 0; I; J += 7) { I = *Cursor++; Value |= uint64(I & 0x7f) << J; I &= 0x80; } return Value; }; ThreadData& Thread = m_Threads[ThreadId]; const uint32 CycleShift = (Version == 1) ? 1u : 2u; uint64 Base = m_Base; uint64 Cycle = ~Base + 1; while (Cursor < End) { uint64 Value = Decode(); uint32 IsEnter = (Value & 0b01); if (Version > 1 && (Value & 0b10)) { // Coroutine event -- consume the trailing varints so we stay // in sync with the stream, but drop the event on the floor. if (IsEnter) { (void)Decode(); // CoroutineId (void)Decode(); // TimerScopeDepth } else { (void)Decode(); // TimerScopeDepth } continue; } uint64 EventId = IsEnter ? Decode() : ~0ull; Cycle += (Value >> CycleShift); uint32 TimeUs = m_UsDivRecip.Divide(Cycle + (m_UsDiv >> 1)); if (IsEnter) { uint32 ScopeId = uint32(EventId); bool IsMetadata = false; if (Version == 3) { IsMetadata = (ScopeId & 1u) != 0; ScopeId >>= 1; } uint32_t NameId = IsMetadata ? ResolveMetadataNameId(ScopeId) : LookupSpecNameId(ScopeId); Thread.OpenBeginUs.push_back(TimeUs); Thread.OpenNameIds.push_back(NameId); continue; } if (Thread.OpenBeginUs.empty()) { continue; } uint32_t BeginUs = Thread.OpenBeginUs.back(); uint32_t NameId = Thread.OpenNameIds.back(); Thread.OpenBeginUs.pop_back(); Thread.OpenNameIds.pop_back(); if (NameId == INVALID_NAME) { continue; } uint16_t Depth = uint16_t(Thread.OpenBeginUs.size()); Thread.Scopes.push_back(zen::trace_detail::TimelineScope{ .BeginUs = BeginUs, .DurationUs = TimeUs - BeginUs, .NameId = NameId, .Depth = Depth, .MergeCount = 0, }); if (BeginUs < m_MinBeginUs) { m_MinBeginUs = BeginUs; } if (TimeUs > m_MaxEndUs) { m_MaxEndUs = TimeUs; } } } uint32_t LookupSpecNameId(uint32 SpecId) const { auto It = m_Specs.find(SpecId); return (It != m_Specs.end()) ? It->second : INVALID_NAME; } uint32_t ResolveMetadataNameId(uint32 MetadataId) { if (!m_Metadata) { return INVALID_NAME; } auto CachedIt = m_MetadataNameIds.find(MetadataId); if (CachedIt != m_MetadataNameIds.end()) { return CachedIt->second; } const MetadataEntry* Entry = m_Metadata->Lookup(MetadataId); if (!Entry) { return INVALID_NAME; } auto BaseIt = m_Specs.find(Entry->SpecId); const std::string& BaseName = (BaseIt != m_Specs.end()) ? m_ScopeNames[BaseIt->second] : kUnknownName; std::string Formatted = BaseName; std::string Values = FormatMetadataValues(Entry->Bytes.data(), Entry->Bytes.size()); if (!Values.empty()) { Formatted += " - "; Formatted += Values; } uint32_t NameId = InternName(StringView(Formatted.data(), Formatted.size())); m_MetadataNameIds[MetadataId] = NameId; return NameId; } static inline const std::string kUnknownName{"???"}; uint64 m_Freq = 0; uint64 m_Base = 0; uint64 m_UsDiv = 1; ReciprocalU64 m_UsDivRecip; uint32_t m_MinBeginUs = ~0u; uint32_t m_MaxEndUs = 0; eastl::hash_map m_Specs; eastl::hash_map m_NameIndex; eastl::vector m_ScopeNames; eastl::map m_Threads; const MetadataRegistry* m_Metadata = nullptr; eastl::hash_map m_MetadataNameIds; }; ////////////////////////////////////////////////////////////////////////////// } // anonymous namespace std::string zen::trace_detail::SafeFieldStr(FieldStr&& Field) { try { std::string_view View = Field.as_view(); // Some trace writers include the NUL terminator in the field length // (see UE trace ToAnsiCheap / ThreadRegister). Strip any trailing NULs // so downstream consumers don't see garbage. while (!View.empty() && View.back() == '\0') { View.remove_suffix(1); } return std::string(View); } catch (const std::exception& E) { ZEN_DEBUG("Failed to decode trace string field: {}", E.what()); return {}; } } namespace { // Derive a thread group name from a thread name by stripping a trailing // integer suffix (optionally preceded by a separator). E.g. "IoPool Worker 3" // -> "IoPool Worker", "DbWorker_12" -> "DbWorker", "HttpThread42" -> // "HttpThread". Returns an empty string if no suffix is present or the // resulting prefix would be empty. static std::string SynthesizeThreadGroupFromName(std::string_view Name) { size_t I = Name.size(); while (I > 0 && Name[I - 1] >= '0' && Name[I - 1] <= '9') { --I; } if (I == Name.size()) { return {}; // no trailing digits } if (I > 0) { char C = Name[I - 1]; if (C == '_' || C == '-' || C == '.' || C == ':' || C == '#' || C == '/' || C == ' ' || C == '\t') { --I; } } while (I > 0 && (Name[I - 1] == ' ' || Name[I - 1] == '\t')) { --I; } if (I == 0) { return {}; // pure-numeric name } return std::string(Name.substr(0, I)); } class SessionAnalyzer : public Analyzer { public: zen::trace_detail::SessionInfo Session; eastl::map ThreadNames; eastl::map Channels; void subscribe(Vector& Subs) override { Subs.emplace_back(this, &SessionAnalyzer::OnSession); Subs.emplace_back(this, &SessionAnalyzer::OnThreadGroupBegin); Subs.emplace_back(this, &SessionAnalyzer::OnThreadGroupEnd); Subs.emplace_back(this, &SessionAnalyzer::OnThreadInfo); Subs.emplace_back(this, &SessionAnalyzer::OnChannelAnnounce); Subs.emplace_back(this, &SessionAnalyzer::OnChannelToggle); } private: eastl::vector m_GroupStack; void OnSession(const Diagnostics_Session2& Ev) { Session.Platform = SafeFieldStr(Ev.Platform()); Session.AppName = SafeFieldStr(Ev.AppName()); Session.ProjectName = SafeFieldStr(Ev.ProjectName()); Session.CommandLine = SafeFieldStr(Ev.CommandLine()); Session.Branch = SafeFieldStr(Ev.Branch()); Session.BuildVersion = SafeFieldStr(Ev.BuildVersion()); Session.Changelist = Ev.Changelist(); Session.ConfigurationType = Ev.ConfigurationType(); Session.HasSession = true; } void OnThreadGroupBegin(const $Trace_ThreadGroupBegin& Ev) { m_GroupStack.push_back(SafeFieldStr(Ev.Name())); } void OnThreadGroupEnd(const $Trace_ThreadGroupEnd&) { if (!m_GroupStack.empty()) { m_GroupStack.pop_back(); } } void OnThreadInfo(const $Trace_ThreadInfo& Ev) { uint32_t Tid = Ev.ThreadId(); zen::trace_detail::ThreadInfoEntry& Info = ThreadNames[Tid]; Info.ThreadId = Tid; Info.Name = SafeFieldStr(Ev.Name()); Info.GroupName = m_GroupStack.empty() ? "" : m_GroupStack.back(); if (Info.GroupName.empty()) { Info.GroupName = SynthesizeThreadGroupFromName(Info.Name); } Info.SystemId = Ev.SystemId(); Info.SortHint = Ev.SortHint(); } void OnChannelAnnounce(const Trace_ChannelAnnounce& Ev) { uint32_t Id = Ev.Id(); zen::trace_detail::ChannelInfo& Info = Channels[Id]; Info.Name = SafeFieldStr(Ev.Name()); Info.Enabled = Ev.IsEnabled() != 0; Info.ReadOnly = Ev.ReadOnly() != 0; } void OnChannelToggle(const Trace_ChannelToggle& Ev) { uint32_t Id = Ev.Id(); auto It = Channels.find(Id); if (It != Channels.end()) { It->second.Enabled = Ev.IsEnabled() != 0; } } }; ////////////////////////////////////////////////////////////////////////////// // Module analyzer // // Captures Diagnostics.Module{Init,Load,Unload} so TraceModel::Modules has a // populated list of loaded DLLs. These events are NoSync+Important so they // don't carry a Cycle field (no load/unload timestamps available) but they // do survive reconnects and the trim filter. The analyzer is intentionally // passive -- we stash the raw data here and leave symbolication and memory // attribution to whatever consumes TraceModel::Modules later. class ModuleAnalyzer : public Analyzer { public: eastl::map ModulesByBase; std::string SymbolFormat; uint8_t BaseShift = 0; void subscribe(Vector& Subs) override { Subs.emplace_back(this, &ModuleAnalyzer::OnModuleInit); Subs.emplace_back(this, &ModuleAnalyzer::OnModuleLoad); Subs.emplace_back(this, &ModuleAnalyzer::OnModuleUnload); } private: void OnModuleInit(const Diagnostics_ModuleInit& Ev) { SymbolFormat = SafeFieldStr(Ev.SymbolFormat()); BaseShift = Ev.ModuleBaseShift(); } void OnModuleLoad(const Diagnostics_ModuleLoad& Ev) { // Older traces stored Base as a 32-bit value shifted right by // ModuleBaseShift to fit. Modern traces set BaseShift to zero and // Base is a full 64-bit address; applying the shift then is a // harmless no-op. uint64_t Base = uint64_t(Ev.Base()) << BaseShift; zen::trace_detail::ModuleInfo& Info = ModulesByBase[Base]; Info.FullPath = SafeFieldStr(Ev.Name()); Info.Base = Base; Info.Size = Ev.Size(); Info.Unloaded = false; // Extract the basename without pulling in the whole filesystem // library for a single operation. UE emits forward- or backslashes // depending on platform, so handle both. const std::string& Path = Info.FullPath; size_t Cut = Path.find_last_of("/\\"); Info.Name = (Cut == std::string::npos) ? Path : Path.substr(Cut + 1); ::Array ImageId = Ev.ImageId(); const uint8* IdPtr = ImageId.get(); const uint32 IdSize = ImageId.get_size(); Info.ImageId.assign(IdPtr, IdPtr + IdSize); } void OnModuleUnload(const Diagnostics_ModuleUnload& Ev) { uint64_t Base = uint64_t(Ev.Base()) << BaseShift; auto It = ModulesByBase.find(Base); if (It != ModulesByBase.end()) { It->second.Unloaded = true; } } }; ////////////////////////////////////////////////////////////////////////////// // Trim analyzer // // Decodes CpuProfiler batch events to extract per-batch timestamp ranges AND // to track open/close scope bracketing per thread. The scope tracker lets us // identify "must-keep" packets: any packet containing the Leave event for a // scope whose Enter was at or before the user's trim EndUs. Preserving those // Leaves is what lets the downstream TimelineAnalyzer (and Unreal Insights) // still render long-running scopes that span the window end -- if we dropped // their closing event the scope would sit unmatched on the open-scope stack // and not render at all. // // Attribution to raw packet indices is approximate due to Tourist's internal // per-thread packet buffering; the trim driver processes the trace one packet // at a time (bundle of size 1) to keep it as tight as possible. Packets that // never get an attributed time range are conservatively retained by the // caller. class TrimAnalyzer : public Analyzer { public: // Maps packet index (matching both Tourist's Packet::get_index() and our // raw walker's vector position) -> (MinUs, MaxUs) of all events attributed // to that packet. struct Range { uint32_t MinUs = ~0u; uint32_t MaxUs = 0; }; eastl::hash_map PacketRanges; // Maps thread id -> the maximum packet index that contains a Leave event // for a scope whose matching Enter was at or before EndUs. These packets // must be retained so the downstream analyzer can close the scope. eastl::hash_map MustKeepPacketByThread; // Set by the trim driver from TraceTrimArgs::EndSec before the analysis // pass begins. Used by the scope tracker to decide which leaves are // "must keep". uint32_t EndUs = ~0u; // Updated by the trim driver before each Proto.read call when the next // packet is on a normal thread. Maps normal-thread id -> the most // recently scattered packet's index. eastl::hash_map LastPacketIndexByThread; void subscribe(Vector& Subs) override { Subs.emplace_back(this, &TrimAnalyzer::OnNewTrace); Subs.emplace_back(this, &TrimAnalyzer::OnCpuBatch); Subs.emplace_back(this, &TrimAnalyzer::OnCpuBatchV2); Subs.emplace_back(this, &TrimAnalyzer::OnCpuBatchV3); } bool HasTimeBase() const { return m_Freq != 0; } private: void OnNewTrace(const $Trace_NewTrace& NewTrace) { m_Freq = NewTrace.CycleFrequency(); m_Base = NewTrace.StartCycle(); m_UsDiv = (m_Freq > 0) ? (m_Freq / 1'000'000) : 1; if (m_UsDiv == 0) { m_UsDiv = 1; } m_UsDivRecip = ReciprocalU64(m_UsDiv); } void OnCpuBatch(const CpuProfiler_EventBatch& Batch) { AbsorbBatchTimes(/*Version=*/1, Batch.get_thread_id(), Batch.Data()); } void OnCpuBatchV2(const CpuProfiler_EventBatchV2& Batch) { AbsorbBatchTimes(/*Version=*/2, Batch.get_thread_id(), Batch.Data()); } void OnCpuBatchV3(const CpuProfiler_EventBatchV3& Batch) { AbsorbBatchTimes(/*Version=*/3, Batch.get_thread_id(), Batch.Data()); } // Decodes cycle deltas in a CpuProfiler batch to find the timestamp range // AND to maintain a per-thread open-scope stack. Mirrors the wire format // documented in CpuAnalyzer::AbsorbBatch. Scope ids are decoded just far // enough to keep the varint cursor in sync; we don't store them. void AbsorbBatchTimes(uint32 Version, uint32 ThreadId, const Array& Data) { if (m_Freq == 0) { return; } auto It = LastPacketIndexByThread.find(ThreadId); if (It == LastPacketIndexByThread.end()) { return; } const uint32_t PacketIndex = It->second; // The open-scope stack is maintained across every batch on a thread. // Each entry stores the Enter time in microseconds from trace start. eastl::vector& OpenStack = m_OpenScopes[ThreadId]; const uint8* Cursor = Data.get(); const uint8* End = Cursor + Data.get_size(); auto Decode = [&]() { uint64 Value = 0; for (uint32 I = 1, J = 0; I; J += 7) { I = *Cursor++; Value |= uint64(I & 0x7f) << J; I &= 0x80; } return Value; }; const uint32 CycleShift = (Version == 1) ? 1u : 2u; const uint64 Base = m_Base; uint32_t BatchMinUs = ~0u; uint32_t BatchMaxUs = 0; bool HasAny = false; uint64 Cycle = ~Base + 1; while (Cursor < End) { uint64 Value = Decode(); uint32 IsEnter = (Value & 0b01); if (Version > 1 && (Value & 0b10)) { // Coroutine event -- consume the trailing varints and skip. // These don't participate in the scope bracket tracking; the // existing TimelineAnalyzer ignores them for the same reason. if (IsEnter) { (void)Decode(); // CoroutineId (void)Decode(); // TimerScopeDepth } else { (void)Decode(); // TimerScopeDepth } continue; } if (IsEnter) { (void)Decode(); // EventId / SpecId } Cycle += (Value >> CycleShift); uint32_t TimeUs = m_UsDivRecip.Divide(Cycle + (m_UsDiv >> 1)); if (!HasAny || TimeUs < BatchMinUs) { BatchMinUs = TimeUs; } if (!HasAny || TimeUs > BatchMaxUs) { BatchMaxUs = TimeUs; } HasAny = true; if (IsEnter) { OpenStack.push_back(TimeUs); } else if (!OpenStack.empty()) { uint32_t EnterTimeUs = OpenStack.back(); OpenStack.pop_back(); // If the scope started at or before the window end, we need // its closing Leave event to survive so the downstream // analyzer can render it. Mark the current packet (the one // holding this Leave) as must-keep for the thread. if (EnterTimeUs <= EndUs) { uint32_t& MustKeep = MustKeepPacketByThread[ThreadId]; if (PacketIndex > MustKeep) { MustKeep = PacketIndex; } } } } if (!HasAny) { return; } Range& R = PacketRanges[PacketIndex]; R.MinUs = std::min(R.MinUs, BatchMinUs); R.MaxUs = std::max(R.MaxUs, BatchMaxUs); } uint64 m_Freq = 0; uint64 m_Base = 0; uint64 m_UsDiv = 1; ReciprocalU64 m_UsDivRecip; // Per-thread open scope stack, carrying the Enter times in microseconds // from trace start. Entries are pushed on Enter and popped on Leave; the // stack may contain unclosed entries when decoding ends (scopes that // outlive the captured trace). eastl::hash_map> m_OpenScopes; }; ////////////////////////////////////////////////////////////////////////////// // Common trace iteration struct TraceSummary { eastl::map> TypeInfo; eastl::set Threads; uint64_t TotalEvents = 0; }; template static TraceSummary IterateTrace(::DataSource& Source, ParcelCallback OnParcel, const zen::trace_detail::ProgressCallback& OnProgress = {}) { TraceSummary Summary; try { uint64_t TotalFileBytes = uint64_t(std::max(Source.get_size(), int64(0))); ::Allocator TraceAllocator; ::Preamble Pream(Source, TraceAllocator); ::Transport Xport = Pream.get_transport(); ::Protocol Proto = Pream.get_protocol(); ::Packet Packets[128]; ::EventParcel Parcel; while (::Bundle Bndl = Xport.read_packets(Packets)) { Parcel.reset(); Proto.read(Parcel, Bndl); OnParcel(Parcel); for (const ::Type* TraceType : Parcel.new_types) { auto [LoggerName, EventName] = TraceType->get_name(); std::string TypeName = fmt::format("{}.{}", std::string_view(LoggerName), std::string_view(EventName)); Summary.TypeInfo[TraceType->get_uid()] = {std::move(TypeName), 0}; } for (const ::Event& Ev : Parcel.events) { Summary.TotalEvents++; Summary.Threads.insert(Ev.thread_id); auto It = Summary.TypeInfo.find(Ev.uid); if (It != Summary.TypeInfo.end()) { It->second.second++; } } if (OnProgress) { OnProgress(Xport.tell(), TotalFileBytes, Summary.TotalEvents); } } } catch (const DataStream::Eof&) { } catch (const Exception::StreamError& E) { throw std::runtime_error(fmt::format("Trace stream error at position {}: {} (value: {})", E.position, E.message, E.value)); } return Summary; } // Print session metadata static void PrintSessionInfo(const SessionAnalyzer& SessionAn) { const zen::trace_detail::SessionInfo& Sess = SessionAn.Session; if (!Sess.HasSession) { return; } ZEN_CONSOLE("Platform: {}", Sess.Platform); ZEN_CONSOLE("App: {}", Sess.AppName); if (!Sess.ProjectName.empty()) { ZEN_CONSOLE("Project: {}", Sess.ProjectName); } if (!Sess.Branch.empty()) { ZEN_CONSOLE("Branch: {}", Sess.Branch); } if (!Sess.BuildVersion.empty()) { ZEN_CONSOLE("Build: {}", Sess.BuildVersion); } if (Sess.Changelist) { ZEN_CONSOLE("Changelist: {}", Sess.Changelist); } if (!Sess.CommandLine.empty()) { ZEN_CONSOLE("CommandLine: {}", Sess.CommandLine); } ZEN_CONSOLE(""); } // Print thread names static void PrintThreadInfo(const SessionAnalyzer& SessionAn) { if (SessionAn.ThreadNames.empty()) { return; } eastl::vector> ThreadsSorted; for (const auto& [Tid, Info] : SessionAn.ThreadNames) { ThreadsSorted.emplace_back(Tid, &Info); } eastl::sort(ThreadsSorted.begin(), ThreadsSorted.end(), [](const auto& A, const auto& B) { return A.second->SortHint < B.second->SortHint; }); ZEN_CONSOLE(""); ZEN_CONSOLE("Threads:"); ZEN_CONSOLE(""); ZEN_CONSOLE("{:>6} {:>10} {}", "TID", "SystemID", "Name"); ZEN_CONSOLE("{:-<{}}", "", 6 + 10 + 40 + 4); for (const auto& [Tid, Info] : ThreadsSorted) { ZEN_CONSOLE("{:>6} {:>10} {}", Tid, Info->SystemId, Info->Name); } } // Print trace channel info static void PrintChannelInfo(const SessionAnalyzer& SessionAn) { if (SessionAn.Channels.empty()) { return; } eastl::vector ChannelsSorted; for (const auto& [Id, Info] : SessionAn.Channels) { ChannelsSorted.push_back(&Info); } eastl::sort(ChannelsSorted.begin(), ChannelsSorted.end(), [](const auto* A, const auto* B) { return A->Name < B->Name; }); ZEN_CONSOLE(""); ZEN_CONSOLE("Trace Channels:"); ZEN_CONSOLE(""); for (const zen::trace_detail::ChannelInfo* Ch : ChannelsSorted) { std::string_view State = Ch->Enabled ? "enabled" : "disabled"; if (Ch->ReadOnly) { ZEN_CONSOLE(" {} ({}, read-only)", Ch->Name, State); } else { ZEN_CONSOLE(" {} ({})", Ch->Name, State); } } } } // namespace ////////////////////////////////////////////////////////////////////////////// namespace zen::trace_detail { std::filesystem::path ResolveTraceFile(const std::filesystem::path& Input, cxxopts::Options& HelpOptions) { if (Input.empty()) { throw zen::OptionParseException("File path is required", HelpOptions.help()); } std::filesystem::path FilePath = std::filesystem::absolute(Input); if (!std::filesystem::exists(FilePath)) { throw std::runtime_error(fmt::format("File not found: {}", FilePath)); } return FilePath; } void RunInspect(const std::filesystem::path& FilePath) { ::DataSource Source(FilePath); SessionAnalyzer SessionAn; ::Dispatcher Dispatch; Dispatch.add_analyzer(SessionAn); // Collect type schemas struct TypeSchema { std::string FullName; uint32_t Uid = 0; uint32_t FieldCount = 0; uint32_t Flags = 0; uint64_t EventCount = 0; eastl::vector FieldNames; eastl::vector FieldSizes; eastl::vector FieldTypeInfos; }; eastl::map Schemas; TraceSummary Summary = IterateTrace(Source, [&](const ::EventParcel& Parcel) { Dispatch.on_parcel(Parcel); for (const ::Type* TraceType : Parcel.new_types) { auto [LoggerName, EventName] = TraceType->get_name(); uint32_t Uid = TraceType->get_uid(); TypeSchema& Schema = Schemas[Uid]; Schema.FullName = fmt::format("{}.{}", std::string_view(LoggerName), std::string_view(EventName)); Schema.Uid = Uid; Schema.FieldCount = TraceType->get_field_count(); Schema.Flags = 0; if (TraceType->has_flag(TYPE_FLAG_IMPORTANT)) { Schema.Flags |= TYPE_FLAG_IMPORTANT; } if (TraceType->has_flag(TYPE_FLAG_AUX)) { Schema.Flags |= TYPE_FLAG_AUX; } for (uint32_t I = 0; I < Schema.FieldCount; I++) { auto [FieldName, Field] = TraceType->get_field_info(I); Schema.FieldNames.emplace_back(FieldName); Schema.FieldSizes.push_back(Field.get_size()); Schema.FieldTypeInfos.push_back(Field.get_type_info()); } } for (const ::Event& Ev : Parcel.events) { auto It = Schemas.find(Ev.uid); if (It != Schemas.end()) { It->second.EventCount++; } } }); // -- Session info -- PrintSessionInfo(SessionAn); ZEN_CONSOLE("Trace: {}", FilePath); ZEN_CONSOLE("Size: {}", zen::NiceBytes(uint64_t(std::filesystem::file_size(FilePath)))); ZEN_CONSOLE("Events: {}", zen::ThousandsNum(Summary.TotalEvents)); ZEN_CONSOLE("Threads: {}", Summary.Threads.size()); ZEN_CONSOLE("Types: {}", Schemas.size()); // -- Thread names -- PrintThreadInfo(SessionAn); // -- Trace channels -- PrintChannelInfo(SessionAn); // -- Event schemas -- ZEN_CONSOLE(""); ZEN_CONSOLE("Event Schemas:"); ZEN_CONSOLE(""); eastl::vector SortedSchemas; SortedSchemas.reserve(Schemas.size()); for (const auto& [Uid, Schema] : Schemas) { SortedSchemas.push_back(&Schema); } eastl::sort(SortedSchemas.begin(), SortedSchemas.end(), [](const auto* A, const auto* B) { return A->FullName < B->FullName; }); auto FieldTypeStr = [](uint32_t TypeInfo, uint32_t Size) -> std::string_view { uint32_t Cat = TypeInfo & TYPE_INFO_CAT_MASK; if (Cat == TYPE_INFO_CAT_ARRAY) { return "array"; } if (Cat == TYPE_INFO_CAT_FLOAT) { return (Size == 8) ? "float64" : "float32"; } bool IsSigned = (TypeInfo & TYPE_INFO_SPECIAL_MASK) == TYPE_INFO_SPECIAL_SIGNED; switch (Size) { case 1: return IsSigned ? "int8" : "uint8"; case 2: return IsSigned ? "int16" : "uint16"; case 4: return IsSigned ? "int32" : "uint32"; case 8: return IsSigned ? "int64" : "uint64"; default: return "unknown"; } }; for (const TypeSchema* Schema : SortedSchemas) { std::string Flags; if (Schema->Flags & TYPE_FLAG_IMPORTANT) { Flags += " [important]"; } if (Schema->Flags & TYPE_FLAG_AUX) { Flags += " [aux]"; } ZEN_CONSOLE("{} (uid={}, events={}){}", Schema->FullName, Schema->Uid, zen::ThousandsNum(Schema->EventCount), Flags); for (uint32_t I = 0; I < Schema->FieldCount; I++) { ZEN_CONSOLE(" {} {}", FieldTypeStr(Schema->FieldTypeInfos[I], Schema->FieldSizes[I]), Schema->FieldNames[I]); } if (Schema->FieldCount > 0) { ZEN_CONSOLE(""); } } } // Build a single LOD level by merging Lod0 scopes below the given resolution. // Lod0 must already be sorted by BeginUs. Safe to call concurrently for // different (Level, Resolution) pairs sharing the same Lod0. static void BuildSingleLod(const eastl::vector& Lod0, TimelineDetailLevel& Level, uint32_t Resolution) { Level.ResolutionUs = Resolution; // Per-depth merge accumulators. Since depths are typically small (< 64), // a flat array indexed by depth is more cache-friendly than a hash map. struct PendingMerge { uint32_t BeginUs = 0; uint32_t EndUs = 0; uint32_t NameId = 0; uint32_t MaxChildDur = 0; uint16_t Depth = 0; uint16_t Count = 0; bool Active = false; }; eastl::vector Pending(64); // grows if needed auto FlushPending = [&Level](PendingMerge& P) { if (!P.Active) { return; } Level.Scopes.push_back(TimelineScope{ .BeginUs = P.BeginUs, .DurationUs = P.EndUs - P.BeginUs, .NameId = P.NameId, .Depth = P.Depth, .MergeCount = P.Count, }); P.Active = false; }; // Single O(n) sweep over LOD 0 scopes (sorted by BeginUs). For each // depth, merge adjacent small scopes that fall within one resolution // bucket of each other. Large scopes (>= Resolution) pass through. for (const TimelineScope& Scope : Lod0) { uint16_t Depth = Scope.Depth; if (Depth >= Pending.size()) { Pending.resize(Depth + 1); } if (Scope.DurationUs >= Resolution) { // Large scope -- flush any pending merge for this depth, // then emit the scope un-merged. FlushPending(Pending[Depth]); Level.Scopes.push_back(TimelineScope{ .BeginUs = Scope.BeginUs, .DurationUs = Scope.DurationUs, .NameId = Scope.NameId, .Depth = Scope.Depth, .MergeCount = 1, }); continue; } PendingMerge& P = Pending[Depth]; uint32_t EndUs = Scope.BeginUs + Scope.DurationUs; if (P.Active && Scope.BeginUs < P.EndUs + Resolution) { // Extend the pending merge. if (EndUs > P.EndUs) { P.EndUs = EndUs; } ++P.Count; if (Scope.DurationUs > P.MaxChildDur) { P.MaxChildDur = Scope.DurationUs; P.NameId = Scope.NameId; } } else { // Start a new pending merge (flush previous if any). FlushPending(P); P.BeginUs = Scope.BeginUs; P.EndUs = EndUs; P.NameId = Scope.NameId; P.MaxChildDur = Scope.DurationUs; P.Depth = Scope.Depth; P.Count = 1; P.Active = true; } } // Flush remaining per-depth accumulators. for (PendingMerge& P : Pending) { FlushPending(P); } // Sort by (BeginUs, Depth) -- the per-depth flush may have interleaved // entries from different depths. Tie-breaking on depth keeps the // ordering consistent with LOD 0 (parents before nested children) so // the front-end never sees a child rendered before its parent. eastl::sort(Level.Scopes.begin(), Level.Scopes.end(), [](const TimelineScope& A, const TimelineScope& B) { if (A.BeginUs != B.BeginUs) { return A.BeginUs < B.BeginUs; } return A.Depth < B.Depth; }); } void BuildTimelineLods(ThreadTimeline& Timeline) { if (Timeline.Scopes.empty()) { return; } for (size_t LodIdx = 0; LodIdx < kTimelineLodCount; ++LodIdx) { BuildSingleLod(Timeline.Scopes, Timeline.DetailLevels[LodIdx], kTimelineLodResolutions[LodIdx]); } } namespace { // Post-iteration phases, extracted from BuildTraceModel for clarity. Each one // runs after the event-iteration pass has populated the analyzers and mutates // only the pieces of TraceModel it owns. void ComputeScopeStats(const TimelineAnalyzer& TimelineAn, TraceModel& Model) { const eastl::vector& ScopeNames = TimelineAn.ScopeNames(); eastl::vector Dists(ScopeNames.size()); eastl::vector Mins(ScopeNames.size(), ~0u); eastl::vector Maxs(ScopeNames.size(), 0u); for (const auto& [Tid, Thread] : TimelineAn.Threads()) { for (const TimelineScope& Scope : Thread.Scopes) { if (Scope.NameId >= Dists.size()) { continue; } Dists[Scope.NameId].add(double(Scope.DurationUs)); Mins[Scope.NameId] = std::min(Mins[Scope.NameId], Scope.DurationUs); Maxs[Scope.NameId] = std::max(Maxs[Scope.NameId], Scope.DurationUs); } } Model.ScopeStats.reserve(ScopeNames.size()); for (size_t I = 0; I < ScopeNames.size(); ++I) { if (Dists[I].Count() == 0) { continue; } CpuScopeStat Entry; Entry.Name = ScopeNames[I]; Entry.Count = Dists[I].Count(); Entry.MinUs = Mins[I]; Entry.MaxUs = Maxs[I]; Entry.MeanUs = Dists[I].Mean(); Entry.StdDevUs = Dists[I].StdDev(); Model.ScopeStats.push_back(std::move(Entry)); } eastl::sort(Model.ScopeStats.begin(), Model.ScopeStats.end(), [](const CpuScopeStat& A, const CpuScopeStat& B) { return A.Count > B.Count; }); } // Translate each LogEntry's captured CategoryIndex (a sequential id keyed on // the source category pointer) into the flat LogCategories index the frontend // consumes. Entries whose category pointer never got a matching LogCategory // event are bucketed into a synthetic "(unknown)" category. void ResolveLogCategories(LogAnalyzer& LogAn, TraceModel& Model) { const eastl::hash_map& CategoryPtrToSeqIdx = LogAn.CategoryPointerIndex(); eastl::hash_map RealPtrToFlatIdx; Model.LogCategories = LogAn.BuildCategories(RealPtrToFlatIdx); const uint32_t UnknownIdx = uint32_t(Model.LogCategories.size()); Model.LogCategories.push_back(LogCategoryInfo{.Name = "(unknown)", .DefaultVerbosity = 0}); eastl::vector SeqToFlat(CategoryPtrToSeqIdx.size(), UnknownIdx); for (const auto& [Ptr, SeqIdx] : CategoryPtrToSeqIdx) { auto It = RealPtrToFlatIdx.find(Ptr); if (It != RealPtrToFlatIdx.end()) { SeqToFlat[SeqIdx] = It->second; } } Model.LogEntries = LogAn.MutableEntries(); for (LogEntry& E : Model.LogEntries) { E.CategoryIndex = (E.CategoryIndex < SeqToFlat.size()) ? SeqToFlat[E.CategoryIndex] : UnknownIdx; } eastl::sort(Model.LogEntries.begin(), Model.LogEntries.end(), [](const LogEntry& A, const LogEntry& B) { return A.TimeUs < B.TimeUs; }); } // Finalize any still-open regions, group by category, and greedily pack each // category's regions into non-overlapping lanes so the frontend can stack them // without re-running collision detection. void BuildRegionCategories(eastl::vector&& AllRegions, uint32_t TraceEndUs, TraceModel& Model) { for (RegionEntry& R : AllRegions) { if (R.EndUs == ~uint32_t(0)) { R.EndUs = TraceEndUs; } if (R.EndUs < R.BeginUs) { R.EndUs = R.BeginUs; } } eastl::map> ByCategory; for (RegionEntry& R : AllRegions) { ByCategory[R.Category].push_back(std::move(R)); } for (auto& [CatName, Regions] : ByCategory) { eastl::sort(Regions.begin(), Regions.end(), [](const RegionEntry& A, const RegionEntry& B) { if (A.BeginUs != B.BeginUs) { return A.BeginUs < B.BeginUs; } return A.EndUs < B.EndUs; }); eastl::vector LaneEndUs; uint32_t MaxLane = 0; for (RegionEntry& R : Regions) { uint16_t Depth = 0; bool Assigned = false; for (size_t I = 0; I < LaneEndUs.size(); ++I) { if (LaneEndUs[I] <= R.BeginUs) { Depth = uint16_t(I); LaneEndUs[I] = R.EndUs; Assigned = true; break; } } if (!Assigned) { Depth = uint16_t(LaneEndUs.size()); LaneEndUs.push_back(R.EndUs); } R.Depth = Depth; if (Depth + 1u > MaxLane) { MaxLane = Depth + 1u; } } RegionCategory Cat; Cat.Name = CatName; Cat.LaneCount = MaxLane; Cat.Regions = std::move(Regions); Model.RegionCategories.push_back(std::move(Cat)); } // Sort: uncategorized (empty name) first, then alphabetical. eastl::sort(Model.RegionCategories.begin(), Model.RegionCategories.end(), [](const RegionCategory& A, const RegionCategory& B) { if (A.Name.empty() != B.Name.empty()) { return A.Name.empty(); } return A.Name < B.Name; }); } // Map callstack frame addresses to (module, offset) pairs using a sorted // (Base, End) lookup over the already-populated Model.Modules. void ResolveCallstacks(const ModuleAnalyzer& ModuleAn, const CallstackAnalyzer& CallstackAn, AllocationAnalyzer& AllocAn, TraceModel& Model) { const auto& RawCallstacks = CallstackAn.RawCallstacks(); struct ModuleLookup { uint64_t Base; uint64_t End; uint32_t ModelIndex; }; eastl::vector Lookup; Lookup.reserve(ModuleAn.ModulesByBase.size()); for (const auto& [Base, Info] : ModuleAn.ModulesByBase) { for (uint32_t I = 0; I < Model.Modules.size(); ++I) { if (Model.Modules[I].Base == Base) { Lookup.push_back({Base, Base + Info.Size, I}); break; } } } eastl::sort(Lookup.begin(), Lookup.end(), [](const ModuleLookup& A, const ModuleLookup& B) { return A.Base < B.Base; }); auto ResolveFrame = [&Lookup](uint64_t Address) -> ResolvedFrame { ResolvedFrame F; F.Address = Address; auto It = eastl::upper_bound(Lookup.begin(), Lookup.end(), Address, [](uint64_t Addr, const ModuleLookup& M) { return Addr < M.Base; }); if (It != Lookup.begin()) { --It; if (Address < It->End) { F.ModuleIndex = It->ModelIndex; F.Offset = Address - It->Base; } } return F; }; eastl::vector SortedCallstackIds; SortedCallstackIds.reserve(RawCallstacks.size()); for (const auto& [Id, RawFrames] : RawCallstacks) { ZEN_UNUSED(RawFrames); SortedCallstackIds.push_back(Id); } eastl::sort(SortedCallstackIds.begin(), SortedCallstackIds.end()); Model.Callstacks.reserve(RawCallstacks.size()); for (uint32_t Id : SortedCallstackIds) { auto RawIt = RawCallstacks.find(Id); ZEN_ASSERT(RawIt != RawCallstacks.end()); const eastl::vector& RawFrames = RawIt->second; CallstackEntry Entry; Entry.Id = Id; Entry.Frames.reserve(RawFrames.size()); for (uint64_t Addr : RawFrames) { Entry.Frames.push_back(ResolveFrame(Addr)); } Model.Callstacks.push_back(std::move(Entry)); } Model.CallstackStats = AllocAn.BuildCallstackStats(); Model.ChurnStats = AllocAn.BuildChurnStats(~uint64_t(0)); Model.AllocSizeHistogram = AllocAn.BuildSizeHistogram(); } } // namespace TraceModel BuildTraceModel(const std::filesystem::path& FilePath, WorkerThreadPool& ThreadPool, const ProgressCallback& OnProgress) { ::DataSource Source(FilePath); TraceTiming Timing; SessionAnalyzer SessionAn; ModuleAnalyzer ModuleAn; MetadataRegistry MetadataReg; TimelineAnalyzer TimelineAn(&MetadataReg, &Timing); LogAnalyzer LogAn(&Timing); BookmarksAnalyzer BookmarkAn(&Timing); CsvProfilerAnalyzer CsvAn(&Timing); AllocationAnalyzer AllocAn(&Timing); CallstackAnalyzer CallstackAn; // Tourist's Dispatcher only supports one subscription per event type, so we // cannot run CpuAnalyzer alongside TimelineAnalyzer -- CpuAnalyzer would // claim the CpuProfiler.Event* types first and TimelineAnalyzer would // never receive any events. Instead, TimelineAnalyzer captures every // scope interval and we derive the aggregate statistics from those // intervals in a cheap post-pass below. ::Dispatcher Dispatch; Dispatch.add_analyzer(SessionAn); Dispatch.add_analyzer(ModuleAn); Dispatch.add_analyzer(MetadataReg); Dispatch.add_analyzer(TimelineAn); Dispatch.add_analyzer(LogAn); Dispatch.add_analyzer(BookmarkAn); Dispatch.add_analyzer(CsvAn); Dispatch.add_analyzer(AllocAn); Dispatch.add_analyzer(CallstackAn); zen::Stopwatch Timer; TraceSummary Summary = IterateTrace( Source, [&](const ::EventParcel& Parcel) { Dispatch.on_parcel(Parcel); }, OnProgress); ZEN_INFO("Trace iteration complete: {} events in {}", zen::ThousandsNum(Summary.TotalEvents), zen::NiceTimeSpanMs(Timer.GetElapsedTimeMs())); { uint32_t StartUs = (TimelineAn.MinBeginUs() == ~0u) ? 0u : TimelineAn.MinBeginUs(); uint32_t EndUs = TimelineAn.MaxEndUs(); uint64_t DurationMs = (EndUs > StartUs) ? (uint64_t(EndUs - StartUs) + 500) / 1000 : 0; ZEN_INFO("Trace duration: {}", zen::NiceTimeSpanMs(DurationMs)); } TraceModel Model; Model.FilePath = FilePath; Model.FileSize = uint64_t(std::filesystem::file_size(FilePath)); Model.TotalEvents = Summary.TotalEvents; Model.ParseTimeMs = Timer.GetElapsedTimeMs(); Model.Session = SessionAn.Session; // Event type counts (sorted by count descending) Model.EventTypeCounts.reserve(Summary.TypeInfo.size()); for (auto& [Uid, Info] : Summary.TypeInfo) { Model.EventTypeCounts.push_back({std::move(Info.first), Info.second}); } eastl::sort(Model.EventTypeCounts.begin(), Model.EventTypeCounts.end(), [](const auto& A, const auto& B) { return A.Count > B.Count; }); // Flatten and sort threads by sort hint Model.Threads.reserve(SessionAn.ThreadNames.size()); for (const auto& [Tid, Info] : SessionAn.ThreadNames) { Model.Threads.push_back(Info); } eastl::sort(Model.Threads.begin(), Model.Threads.end(), [](const ThreadInfoEntry& A, const ThreadInfoEntry& B) { return A.SortHint < B.SortHint; }); // Flatten and sort channels by name Model.Channels.reserve(SessionAn.Channels.size()); for (const auto& [Id, Info] : SessionAn.Channels) { Model.Channels.push_back(Info); } eastl::sort(Model.Channels.begin(), Model.Channels.end(), [](const ChannelInfo& A, const ChannelInfo& B) { return A.Name < B.Name; }); { ExtendableStringBuilder<512> Enabled; for (const ChannelInfo& Ch : Model.Channels) { if (Ch.Enabled) { if (Enabled.Size() > 0) { Enabled.Append(", "); } Enabled.Append(Ch.Name); } } if (Enabled.Size() > 0) { ZEN_INFO("Enabled channels: {}", Enabled); } } // Flatten and sort modules by name Model.Modules.reserve(ModuleAn.ModulesByBase.size()); for (const auto& [Base, Info] : ModuleAn.ModulesByBase) { Model.Modules.push_back(Info); } eastl::sort(Model.Modules.begin(), Model.Modules.end(), [](const ModuleInfo& A, const ModuleInfo& B) { return A.Name < B.Name; }); // CPU scope statistics and timeline building read from TimelineAn // independently and write to separate Model fields, so overlap them. Model.ScopeNames = TimelineAn.ScopeNames(); ZEN_INFO("Computing CPU scope statistics ({} scope names)", TimelineAn.ScopeNames().size()); // Kick off scope stats on a worker -- runs concurrently with the // timeline copy + sort below. Latch StatsLatch(1); ThreadPool.ScheduleWork( [&StatsLatch, &Model, &TimelineAn]() { auto _ = MakeGuard([&StatsLatch]() { StatsLatch.CountDown(); }); ComputeScopeStats(TimelineAn, Model); }, WorkerThreadPool::EMode::EnableBacklog); // Timelines -- build per-thread sort + LODs in parallel. { const auto& Threads = TimelineAn.Threads(); size_t TotalScopes = 0; for (const auto& [Tid, Thread] : Threads) { TotalScopes += Thread.Scopes.size(); } ZEN_INFO("Building timelines: {} threads, {} scopes (sort + LODs)", Threads.size(), zen::ThousandsNum(TotalScopes)); Model.Timelines.resize(Threads.size()); // Populate timeline metadata on the main thread (cheap lookups). size_t Idx = 0; for (const auto& [Tid, Thread] : Threads) { ThreadTimeline& Timeline = Model.Timelines[Idx++]; Timeline.ThreadId = Tid; auto It = SessionAn.ThreadNames.find(Tid); if (It != SessionAn.ThreadNames.end()) { Timeline.Name = It->second.Name; Timeline.SortHint = It->second.SortHint; } Timeline.Scopes = Thread.Scopes; } // Phase 1: Sort LOD 0 scopes per thread. // ParallelSort fans out internally using the pool, so it must be // called from the main thread to avoid nested fan-out deadlocks. // Small timelines are dispatched to workers first (they just call // eastl::sort -- no nesting). Then large ones are sorted one at a // time from the main thread with full pool utilisation each. // // Tie-break on Depth so that scopes which start at the same micro // timestamp come out parent-first (lower depth wins). This keeps // the scope ordering well-defined and lets the front-end rely on // outer scopes appearing before their nested children regardless // of the order the analyzer happened to emit them. { auto Cmp = [](const TimelineScope& A, const TimelineScope& B) { if (A.BeginUs != B.BeginUs) { return A.BeginUs < B.BeginUs; } return A.Depth < B.Depth; }; if constexpr (kUseParallelSort) { constexpr size_t kParallelThreshold = 65536; // Dispatch small timelines to workers. Latch SmallLatch(1); for (size_t I = 0; I < Model.Timelines.size(); ++I) { if (Model.Timelines[I].Scopes.size() >= kParallelThreshold) { continue; } SmallLatch.AddCount(1); ThreadPool.ScheduleWork( [&SmallLatch, &Cmp, &Timeline = Model.Timelines[I]]() { auto _ = MakeGuard([&SmallLatch]() { SmallLatch.CountDown(); }); eastl::sort(Timeline.Scopes.begin(), Timeline.Scopes.end(), Cmp); }, WorkerThreadPool::EMode::EnableBacklog); } SmallLatch.CountDown(); SmallLatch.Wait(); // Sort large timelines from the main thread so ParallelSort // can fan out across the (now idle) pool without deadlocking. for (ThreadTimeline& Timeline : Model.Timelines) { if (Timeline.Scopes.size() >= kParallelThreshold) { zen::ParallelSort(ThreadPool, Timeline.Scopes.begin(), Timeline.Scopes.end(), Cmp); } } } else { Latch SortLatch(1); for (size_t I = 0; I < Model.Timelines.size(); ++I) { SortLatch.AddCount(1); ThreadPool.ScheduleWork( [&SortLatch, &Cmp, &Timeline = Model.Timelines[I]]() { auto _ = MakeGuard([&SortLatch]() { SortLatch.CountDown(); }); eastl::sort(Timeline.Scopes.begin(), Timeline.Scopes.end(), Cmp); }, WorkerThreadPool::EMode::EnableBacklog); } SortLatch.CountDown(); SortLatch.Wait(); } } // Phase 2: Build LOD levels -- one task per (thread, LOD) pair. // Flat dispatch avoids nested fan-out which could deadlock the pool. Latch LodLatch(1); for (size_t I = 0; I < Model.Timelines.size(); ++I) { if (Model.Timelines[I].Scopes.empty()) { continue; } for (size_t L = 0; L < kTimelineLodCount; ++L) { LodLatch.AddCount(1); ThreadPool.ScheduleWork( [&LodLatch, &Timeline = Model.Timelines[I], L]() { auto _ = MakeGuard([&LodLatch]() { LodLatch.CountDown(); }); BuildSingleLod(Timeline.Scopes, Timeline.DetailLevels[L], kTimelineLodResolutions[L]); }, WorkerThreadPool::EMode::EnableBacklog); } } LodLatch.CountDown(); LodLatch.Wait(); } eastl::sort(Model.Timelines.begin(), Model.Timelines.end(), [](const ThreadTimeline& A, const ThreadTimeline& B) { return A.SortHint < B.SortHint; }); Model.TraceStartUs = (TimelineAn.MinBeginUs() == ~0u) ? 0u : TimelineAn.MinBeginUs(); Model.TraceEndUs = TimelineAn.MaxEndUs(); // Ensure scope stats computation (kicked off earlier) has finished. StatsLatch.Wait(); ZEN_INFO("Processing {} log entries", zen::ThousandsNum(LogAn.Entries().size())); ResolveLogCategories(LogAn, Model); ZEN_INFO("Sorting {} bookmarks, {} regions", BookmarkAn.MutableBookmarks().size(), BookmarkAn.MutableRegions().size()); // Bookmarks: move and sort by TimeUs. Model.Bookmarks = std::move(BookmarkAn.MutableBookmarks()); eastl::sort(Model.Bookmarks.begin(), Model.Bookmarks.end(), [](const Bookmark& A, const Bookmark& B) { return A.TimeUs < B.TimeUs; }); BuildRegionCategories(std::move(BookmarkAn.MutableRegions()), Model.TraceEndUs, Model); // CsvProfiler data { Model.CsvCategories = std::move(CsvAn.MutableCategories()); Model.CsvStatDefs = std::move(CsvAn.MutableStatDefs()); Model.CsvTimeSeries = CsvAn.BuildTimeSeries(); Model.CsvEvents = std::move(CsvAn.MutableEvents()); eastl::sort(Model.CsvEvents.begin(), Model.CsvEvents.end(), [](const auto& A, const auto& B) { return A.TimeUs < B.TimeUs; }); Model.CsvMetadata = std::move(CsvAn.MutableMetadata()); ZEN_INFO("CSV profiler: {} categories, {} stats, {} series, {} events", Model.CsvCategories.size(), Model.CsvStatDefs.size(), Model.CsvTimeSeries.size(), Model.CsvEvents.size()); } // Memory allocation data { AllocAn.EmitFinalSample(Model.TraceEndUs); Model.AllocSummary = AllocAn.Summary(); // Flatten heaps map into sorted vector Model.Heaps.reserve(AllocAn.Heaps().size()); for (const auto& [Id, Info] : AllocAn.Heaps()) { Model.Heaps.push_back(Info); } eastl::sort(Model.Heaps.begin(), Model.Heaps.end(), [](const HeapInfo& A, const HeapInfo& B) { return A.Id < B.Id; }); // Flatten tags map into sorted vector Model.Tags.reserve(AllocAn.Tags().size()); for (const auto& [Tag, Info] : AllocAn.Tags()) { Model.Tags.push_back(Info); } eastl::sort(Model.Tags.begin(), Model.Tags.end(), [](const TagInfo& A, const TagInfo& B) { return A.Tag < B.Tag; }); // Move timeline (already time-ordered from Marker events) Model.MemoryTimeline = std::move(AllocAn.MutableTimeline()); // Flatten per-root-heap stats into sorted vector Model.HeapStats.reserve(AllocAn.RootHeapStats().size()); for (const auto& [HeapId, Stat] : AllocAn.RootHeapStats()) { Model.HeapStats.push_back(Stat); } eastl::sort(Model.HeapStats.begin(), Model.HeapStats.end(), [](const HeapStat& A, const HeapStat& B) { return A.HeapId < B.HeapId; }); if (Model.AllocSummary.HasMemoryData) { ZEN_INFO("Memory: {} allocs, {} frees, peak {}, {} live, {} timeline samples", zen::ThousandsNum(Model.AllocSummary.TotalAllocs + Model.AllocSummary.TotalReallocAllocs), zen::ThousandsNum(Model.AllocSummary.TotalFrees + Model.AllocSummary.TotalReallocFrees), zen::NiceBytes(uint64_t(Model.AllocSummary.PeakBytes)), zen::ThousandsNum(Model.AllocSummary.LiveAllocations), zen::ThousandsNum(Model.MemoryTimeline.size())); } } ResolveCallstacks(ModuleAn, CallstackAn, AllocAn, Model); ZEN_INFO("Callstacks: {} unique, {} with live allocations", zen::ThousandsNum(Model.Callstacks.size()), zen::ThousandsNum(Model.CallstackStats.size())); return Model; } ////////////////////////////////////////////////////////////////////////////// // Trace trim // // The trim pipeline operates entirely at the raw packet level: a .utrace on // disk is identical to the wire format (see src/zenserver/trace/tracerecorder.cpp // for the capture-side passthrough), so trimming reduces to "copy the preamble, // then copy only the packets we want to keep". We never re-encode or re-emit // any events, which sidesteps the fact that Tourist has no writer path. // // The algorithm: // // 1. Slurp the input file into memory and walk raw packets using the // [size:uint16][thread_id:uint16][payload] framing. This gives an ordered // list of packet descriptors keyed by file offset. // // 2. Classify packets by their on-disk thread_id: // TID_TYPE -> always keep (type definitions) // TID_IMPORTANT -> always keep (events of types marked TYPE_FLAG_IMPORTANT, // i.e. session info, thread names, channel state, // log categories, CPU specs, etc.) // TID_SYNC -> always keep (transport barriers) // TID_NORMAL+ -> keep only if the packet's events overlap the window // // 3. For normal-thread packets, run Tourist's reader with a bundle of size 1 // so each Proto.read() scatters exactly one raw packet before emitting any // events. Before the read call, we record the file offset of the current // packet as the "latest packet" for its thread. TrimAnalyzer then decodes // CpuProfiler batch events and attributes their timestamp ranges back to // that thread's latest packet. The attribution can drift if Tourist // buffers multiple packets on one thread, but the failure mode is that // earlier packets lose attribution and are conservatively retained. // // 4. Write the output: the preamble bytes verbatim, followed by the raw // bytes of each kept packet in original order. There is no trailer; the // Tourist reader catches DataStream::Eof at the end of the stream. // // Coarse per-packet precision is accepted by design: a packet straddling a // window edge is kept in full. CpuProfiler batches are self-contained per // packet (each re-derives cycles from the trace-wide StartCycle), so dropping // packets does not desync delta decoding on surviving ones, and orphaned leave // events from half-open scopes are silently ignored by decoders. namespace { struct TrimPacketDesc { uint64_t FileOffset = 0; // offset of the [size:uint16] header in the file uint32_t Size = 0; // total size including the 4-byte header uint16_t ThreadIdRaw = 0; // thread_id as stored on disk, including PACKET_FLAG_COMPRESSED }; // Parses the .utrace preamble in place to determine the byte offset where // packets begin. Mirrors Preamble::parse_header in Tourist so we can run the // raw walker without spinning up a second DataSource. Throws on a malformed // preamble. static uint64_t ParsePreambleLength(const uint8_t* Data, uint64_t Size) { if (Size < 8) { throw zen::runtime_error("Trace file too small to contain a preamble ({} bytes)", Size); } uint32_t Magic = 0; std::memcpy(&Magic, Data, sizeof(uint32_t)); if (Magic != 'TRC2') { throw zen::runtime_error("Unexpected trace file magic value 0x{:08x}", Magic); } uint16_t MetaSize = 0; std::memcpy(&MetaSize, Data + 4, sizeof(uint16_t)); // magic(4) + meta_size(2) + metadata + transport(1) + protocol(1) uint64_t PreambleLen = uint64_t(4) + 2 + MetaSize + 1 + 1; if (PreambleLen > Size) { throw zen::runtime_error("Trace preamble extends past end of file ({} > {})", PreambleLen, Size); } return PreambleLen; } // Walks raw packets starting at PreambleLen. Returns one TrimPacketDesc per // packet in original stream order. The walker stops gracefully on truncated // data so partial traces still produce a usable packet list. static eastl::vector WalkRawPackets(const uint8_t* Data, uint64_t Size, uint64_t PreambleLen) { eastl::vector Packets; uint64_t Offset = PreambleLen; while (Offset + 4 <= Size) { uint16_t PacketSize = 0; uint16_t ThreadIdRaw = 0; std::memcpy(&PacketSize, Data + Offset, sizeof(uint16_t)); std::memcpy(&ThreadIdRaw, Data + Offset + 2, sizeof(uint16_t)); if (PacketSize < 4) { // Malformed size; stop walking and accept whatever we have. break; } if (Offset + PacketSize > Size) { // Truncated tail -- drop it. break; } TrimPacketDesc Desc; Desc.FileOffset = Offset; Desc.Size = PacketSize; Desc.ThreadIdRaw = ThreadIdRaw; Packets.push_back(Desc); Offset += PacketSize; } return Packets; } } // namespace void RunTraceTrim(const TraceTrimArgs& Args) { if (!(Args.EndSec > Args.StartSec)) { throw zen::runtime_error("Invalid trim range: start={} end={}", Args.StartSec, Args.EndSec); } // --- Read the input file --- zen::BasicFile InputFile(Args.InputPath, zen::BasicFile::Mode::kRead); zen::IoBuffer InputBuffer = InputFile.ReadAll(); InputFile.Close(); const uint8_t* FileBytes = static_cast(InputBuffer.GetData()); const uint64_t FileSize = InputBuffer.GetSize(); const uint64_t PreambleLen = ParsePreambleLength(FileBytes, FileSize); // --- Raw packet walk --- eastl::vector Packets = WalkRawPackets(FileBytes, FileSize, PreambleLen); if (Packets.empty()) { throw zen::runtime_error("Trace file contains no packets"); } // Initial keep classification: definitions, important events, sync are // always retained. Normal-thread packets start as drop candidates and get // promoted if their decoded time range overlaps the window. eastl::vector Keep(Packets.size(), 0); size_t NumAlwaysKept = 0; for (size_t I = 0; I < Packets.size(); ++I) { uint32_t Tid = Packets[I].ThreadIdRaw & ~PACKET_FLAG_COMPRESSED; if (Tid == TID_TYPE || Tid == TID_IMPORTANT || Tid == TID_SYNC) { Keep[I] = 1; ++NumAlwaysKept; } } // --- Time-range classification via Tourist (bundle of 1) --- TrimAnalyzer TrimAn; TrimAn.EndUs = (Args.EndSec * 1e6 > double(~uint32_t(0))) ? ~uint32_t(0) : uint32_t(Args.EndSec * 1e6); ::Dispatcher Dispatch; Dispatch.add_analyzer(TrimAn); { ::DataSource Source(Args.InputPath); ::Allocator TraceAllocator; ::Preamble Pream(Source, TraceAllocator); ::Transport Xport = Pream.get_transport(); ::Protocol Proto = Pream.get_protocol(); ::Packet OnePacket[1]; ::EventParcel Parcel; try { while (::Bundle Bndl = Xport.read_packets(OnePacket)) { if (Bndl.empty()) { break; } const ::Packet& P = Bndl[0]; uint32_t Tid = P.get_thread_id(); if (Tid >= TID_NORMAL && Tid != TID_SYNC) { // Tourist's Packet::get_index() is the same sequential // packet counter as our raw walker's vector position, // since both read the stream from the start in order. TrimAn.LastPacketIndexByThread[Tid] = P.get_index(); } Parcel.reset(); Proto.read(Parcel, Bndl); Dispatch.on_parcel(Parcel); } } catch (const DataStream::Eof&) { } catch (const Exception::StreamError& E) { throw zen::runtime_error("Trace stream error at position {}: {} (value: {})", E.position, E.message, E.value); } } // --- Apply the window filter --- // // Per-packet filtering in the middle of a thread's stream is unsafe: // Tourist's event parser holds per-thread continuation state (see // EventParser::_fragment / _missing in // thirdparty/tourist/trace/src/protocol.cpp) so an event can straddle a // packet boundary on a normal thread. Removing a packet from the middle // leaves subsequent packets on the same thread decoded against the wrong // position in an in-flight event and Tourist crashes. We therefore only // drop packets in two safe ways: // // 1. Whole-thread drop: a thread whose attributed packets are all // outside the window has every one of its packets dropped. No // surviving packet references that thread, so there is no state // machine to corrupt. // // 2. Per-thread tail truncation: for a thread that does have in-window // activity, drop every packet AFTER the latest in-window packet on // that thread. Tail drops are safe because no later packet on the // same thread can be looking forward to the dropped bytes; the // parser just ends its stream for that thread at the truncation // point, exactly like a trace that naturally stopped recording. // // Threads for which we never attributed any CpuProfiler batch events are // retained in full; we have no evidence about their time range and // can't safely drop them. const uint32_t StartUs = uint32_t(std::max(0.0, Args.StartSec) * 1e6); const uint32_t EndUs = (Args.EndSec * 1e6 > double(~uint32_t(0))) ? ~uint32_t(0) : uint32_t(Args.EndSec * 1e6); struct ThreadInfo { bool HasAnyBatch = false; bool HasInWindowBatch = false; // First packet index on this thread whose attributed CPU batches are // *entirely* past EndUs. Every packet on this thread with an index // >= this value is safe to tail-drop. Defaults to size_t(-1) (no cut // point) when the thread has no such packet. size_t FirstPastWindowIdx = size_t(-1); }; eastl::hash_map ThreadInfos; for (size_t I = 0; I < Packets.size(); ++I) { uint32_t Tid = Packets[I].ThreadIdRaw & ~PACKET_FLAG_COMPRESSED; if (Tid < TID_NORMAL || Tid == TID_SYNC) { continue; } auto RangeIt = TrimAn.PacketRanges.find(uint32_t(I)); if (RangeIt == TrimAn.PacketRanges.end()) { continue; } ThreadInfo& Info = ThreadInfos[Tid]; Info.HasAnyBatch = true; const auto& Range = RangeIt->second; if (Range.MaxUs >= StartUs && Range.MinUs <= EndUs) { Info.HasInWindowBatch = true; } if (Range.MinUs > EndUs && I < Info.FirstPastWindowIdx) { Info.FirstPastWindowIdx = I; } } size_t NumThreadsKept = 0; size_t NumThreadsDropped = 0; for (const auto& [Tid, Info] : ThreadInfos) { if (Info.HasInWindowBatch) { ++NumThreadsKept; } else { ++NumThreadsDropped; } } size_t NumInWindow = 0; size_t NumTailDropped = 0; size_t NumUnattributed = 0; size_t NumDropped = 0; for (size_t I = 0; I < Packets.size(); ++I) { if (Keep[I]) { continue; } uint32_t Tid = Packets[I].ThreadIdRaw & ~PACKET_FLAG_COMPRESSED; auto It = ThreadInfos.find(Tid); if (It == ThreadInfos.end() || !It->second.HasAnyBatch) { // We have no evidence for this thread's time range. Retain all // its packets conservatively to avoid breaking Tourist's per- // thread parser state. Keep[I] = 1; ++NumUnattributed; continue; } if (!It->second.HasInWindowBatch) { // Thread's attributed packets are all outside the window -- drop // every packet on this thread. ++NumDropped; continue; } if (I >= It->second.FirstPastWindowIdx) { // Past the first entirely-after-window packet on this thread -- // candidate for tail truncation. Before dropping, check whether // this packet carries a Leave event that closes a scope whose // Enter was at or before the window end. If so, we MUST keep it // so the downstream analyzer can render the long-running scope; // otherwise the scope would sit unmatched on the open stack. auto MustKeepIt = TrimAn.MustKeepPacketByThread.find(Tid); if (MustKeepIt != TrimAn.MustKeepPacketByThread.end() && I <= MustKeepIt->second) { Keep[I] = 1; ++NumInWindow; continue; } ++NumTailDropped; continue; } Keep[I] = 1; ++NumInWindow; } // --- Write output --- std::error_code Ec; std::filesystem::create_directories(Args.OutputPath.parent_path(), Ec); zen::BasicFile OutputFile(Args.OutputPath, zen::BasicFile::Mode::kTruncate); uint64_t OutOffset = 0; OutputFile.Write(FileBytes, PreambleLen, OutOffset); OutOffset += PreambleLen; uint64_t KeptBytes = 0; for (size_t I = 0; I < Packets.size(); ++I) { if (!Keep[I]) { continue; } OutputFile.Write(FileBytes + Packets[I].FileOffset, Packets[I].Size, OutOffset); OutOffset += Packets[I].Size; KeptBytes += Packets[I].Size; } OutputFile.Flush(); OutputFile.Close(); ZEN_CONSOLE("Trimmed trace written to {}", Args.OutputPath); ZEN_CONSOLE(" Input: {} ({} packets)", zen::NiceBytes(FileSize), zen::ThousandsNum(Packets.size())); ZEN_CONSOLE(" Output: {} ({} packets)", zen::NiceBytes(OutOffset), zen::ThousandsNum(NumAlwaysKept + NumInWindow + NumUnattributed)); ZEN_CONSOLE(" Always kept: {} packets (types / important / sync)", zen::ThousandsNum(NumAlwaysKept)); ZEN_CONSOLE(" Thread kept: {} packets from {} threads with in-window activity", zen::ThousandsNum(NumInWindow), zen::ThousandsNum(NumThreadsKept)); ZEN_CONSOLE(" Thread dropped: {} packets from {} threads with no in-window activity", zen::ThousandsNum(NumDropped), zen::ThousandsNum(NumThreadsDropped)); ZEN_CONSOLE(" Tail dropped: {} packets past the latest in-window packet on their thread", zen::ThousandsNum(NumTailDropped)); ZEN_CONSOLE(" Unattributed: {} packets (retained conservatively)", zen::ThousandsNum(NumUnattributed)); ZEN_UNUSED(KeptBytes); // --- Diagnostic: summarise the attributed time range distribution --- { uint32_t GlobalMin = ~0u; uint32_t GlobalMax = 0; for (const auto& [Idx, R] : TrimAn.PacketRanges) { GlobalMin = std::min(GlobalMin, R.MinUs); GlobalMax = std::max(GlobalMax, R.MaxUs); } ZEN_CONSOLE(" Attributed: {} packets, window {:.3f}s .. {:.3f}s", zen::ThousandsNum(TrimAn.PacketRanges.size()), double(GlobalMin) / 1e6, double(GlobalMax) / 1e6); } } } // namespace zen::trace_detail