diff options
Diffstat (limited to 'src/zen/trace/trace_model.cpp')
| -rw-r--r-- | src/zen/trace/trace_model.cpp | 3847 |
1 files changed, 3847 insertions, 0 deletions
diff --git a/src/zen/trace/trace_model.cpp b/src/zen/trace/trace_model.cpp new file mode 100644 index 000000000..f92b0c04a --- /dev/null +++ b/src/zen/trace/trace_model.cpp @@ -0,0 +1,3847 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "trace_model.h" + +#include <zencore/basicfile.h> +#include <zencore/except_fmt.h> +#include <zencore/fmtutils.h> +#include <zencore/intmath.h> +#include <zencore/logging.h> +#include <zencore/scopeguard.h> +#include <zencore/string.h> +#include <zencore/thread.h> +#include <zencore/timer.h> +#include <zenutil/parallelsort.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <EASTL/hash_map.h> +#include <EASTL/map.h> +#include <EASTL/set.h> +#include <EASTL/sort.h> +#include <EASTL/vector.h> +#include <analysis/analyzer.h> +#include <analysis/dispatcher.h> +#include <trace/trace.h> +ZEN_THIRD_PARTY_INCLUDES_END + +#include <algorithm> +#include <cmath> +#include <cstring> + +using namespace std::literals; + +// Toggle to A/B test cross-platform parallel sort vs sequential eastl::sort. +constexpr bool kUseParallelSort = true; + +namespace eastl { + +template<> +struct hash<std::string> +{ + size_t operator()(const std::string& S) const { return eastl::hash<const char*>()(S.c_str()); } +}; + +} // namespace eastl + +////////////////////////////////////////////////////////////////////////////// +// Trace analysis types (global namespace alongside tourist types) + +namespace { + +using zen::ReciprocalU64; + +// Welford's online algorithm for computing mean and standard deviation +class Distribution +{ +public: + void add(double X) + { + m_Count++; + if (m_Count == 1) + { + m_OldM = m_NewM = X; + m_OldS = 0.0; + } + else + { + m_NewM = m_OldM + (X - m_OldM) / double(m_Count); + m_NewS = m_OldS + (X - m_OldM) * (X - m_NewM); + m_OldM = m_NewM; + m_OldS = m_NewS; + } + } + + uint32_t Count() const { return m_Count; } + double Mean() const { return (m_Count > 0) ? m_NewM : 0.0; } + double Variance() const { return (m_Count > 1) ? m_NewS / double(m_Count - 1) : 0.0; } + double StdDev() const { return std::sqrt(Variance()); } + +private: + double m_OldM = 0.0; + double m_NewM = 0.0; + double m_OldS = 0.0; + double m_NewS = 0.0; + uint32_t m_Count = 0; +}; + +class NameDepot +{ +public: + uint64 Add(StringView Name) + { + uint64 NameHash = Hash(Name); + Add(NameHash, Name); + return NameHash; + } + + void Add(uint64 NameHash, StringView Name) + { + if (auto It = m_Names.insert({NameHash, String()}); It.second) + { + It.first->second = Name; + } + } + + StringView Get(uint64 NameHash) const + { + auto Iter = m_Names.find(NameHash); + if (Iter == m_Names.end()) + { + return "???"; + } + return Iter->second; + } + +private: + eastl::hash_map<uint64, String> m_Names; +}; + +struct CpuEventStat +{ + Distribution Dist; + uint32_t Min = ~0u; + uint32_t Max = 0; +}; + +class EventStats +{ +public: + void Record(uint64 NameHash, uint32 DurationUs) + { + CpuEventStat& Stat = m_Stats[NameHash]; + Stat.Min = std::min(Stat.Min, DurationUs); + Stat.Max = std::max(Stat.Max, DurationUs); + Stat.Dist.add(double(DurationUs)); + } + + auto begin() const { return m_Stats.begin(); } + auto end() const { return m_Stats.end(); } + bool empty() const { return m_Stats.empty(); } + +private: + eastl::hash_map<uint64, CpuEventStat> m_Stats; +}; + +////////////////////////////////////////////////////////////////////////////// +// Event outlines + +// clang-format off +begin_outline($Trace, NewTrace) + field(uint64, CycleFrequency) + field(uint64, StartCycle) +end_outline() + +begin_outline(CpuProfiler, EventSpec) + field(uint32, Id) + field(FieldStr, Name) +end_outline() + +begin_outline(CpuProfiler, EventBatch) + field(uint32, ThreadId) + field(uint8[], Data) +end_outline() + +begin_outline(CpuProfiler, EventBatchV2) + field(uint8[], Data) +end_outline() + +begin_outline(CpuProfiler, EventBatchV3) + field(uint8[], Data) +end_outline() + +begin_outline(CpuProfiler, Metadata) + field(uint32, Id) + field(uint32, SpecId) + field(uint8[], Metadata) +end_outline() + +begin_outline($Trace, ThreadInfo) + field(FieldStr, Name) + field(int32, SortHint) + field(uint32, ThreadId) + field(uint32, SystemId) +end_outline() + +begin_outline($Trace, ThreadGroupBegin) + field(FieldStr, Name) +end_outline() + +begin_outline($Trace, ThreadGroupEnd) +end_outline() + +begin_outline(Diagnostics, Session2) + field(FieldStr, Platform) + field(FieldStr, AppName) + field(FieldStr, ProjectName) + field(FieldStr, CommandLine) + field(FieldStr, Branch) + field(FieldStr, BuildVersion) + field(uint32, Changelist) + field(uint8, ConfigurationType) + field(uint8, TargetType) +end_outline() + +begin_outline(Diagnostics, ModuleInit) + field(FieldStr, SymbolFormat) + field(uint8, ModuleBaseShift) +end_outline() + +begin_outline(Diagnostics, ModuleLoad) + field(FieldStr, Name) + field(uint64, Base) + field(uint32, Size) + field(uint8[], ImageId) +end_outline() + +begin_outline(Diagnostics, ModuleUnload) + field(uint64, Base) +end_outline() + +begin_outline(Trace, ChannelAnnounce) + field(uint32, Id) + field(uint8, IsEnabled) + field(uint8, ReadOnly) + field(FieldStr, Name) +end_outline() + +begin_outline(Trace, ChannelToggle) + field(uint32, Id) + field(uint8, IsEnabled) +end_outline() + +begin_outline(Logging, LogCategory) + field(uint64, CategoryPointer) + field(uint8, DefaultVerbosity) + field(FieldStr, Name) +end_outline() + +begin_outline(Logging, LogMessageSpec) + field(uint64, LogPoint) + field(uint64, CategoryPointer) + field(int32, Line) + field(uint8, Verbosity) + field(FieldStr, FileName) + field(FieldStr, FormatString) +end_outline() + +begin_outline(Logging, LogMessage) + field(uint64, LogPoint) + field(uint64, Cycle) + field(uint8[], FormatArgs) +end_outline() + +begin_outline(Misc, BookmarkSpec) + field(uint64, BookmarkPoint) + field(int32, Line) + field(FieldStr, FormatString) + field(FieldStr, FileName) +end_outline() + +begin_outline(Misc, Bookmark) + field(uint64, Cycle) + field(uint64, BookmarkPoint) + field(uint8[], FormatArgs) +end_outline() + +begin_outline(Misc, RegionBegin) + field(uint64, Cycle) + field(uint8[], RegionName) + field(uint8[], Category) +end_outline() + +begin_outline(Misc, RegionBeginWithId) + field(uint64, CycleAndId) + field(uint8[], RegionName) + field(uint8[], Category) +end_outline() + +begin_outline(Misc, RegionEnd) + field(uint64, Cycle) + field(uint8[], RegionName) +end_outline() + +begin_outline(Misc, RegionEndWithId) + field(uint64, Cycle) + field(uint64, RegionId) +end_outline() + +// CsvProfiler events +begin_outline(CsvProfiler, RegisterCategory) + field(int32, Index) + field(uint8[], Name) +end_outline() + +begin_outline(CsvProfiler, DefineInlineStat) + field(uint64, StatId) + field(int32, CategoryIndex) + field(uint8[], Name) +end_outline() + +begin_outline(CsvProfiler, DefineDeclaredStat) + field(uint64, StatId) + field(int32, CategoryIndex) + field(uint8[], Name) +end_outline() + +begin_outline(CsvProfiler, BeginStat) + field(uint64, StatId) + field(uint64, Cycle) +end_outline() + +begin_outline(CsvProfiler, EndStat) + field(uint64, StatId) + field(uint64, Cycle) +end_outline() + +begin_outline(CsvProfiler, BeginExclusiveStat) + field(uint64, StatId) + field(uint64, Cycle) +end_outline() + +begin_outline(CsvProfiler, EndExclusiveStat) + field(uint64, StatId) + field(uint64, Cycle) +end_outline() + +begin_outline(CsvProfiler, CustomStatInt) + field(uint64, StatId) + field(uint64, Cycle) + field(int32, Value) + field(uint8, OpType) +end_outline() + +begin_outline(CsvProfiler, CustomStatFloat) + field(uint64, StatId) + field(uint64, Cycle) + field(float, Value) + field(uint8, OpType) +end_outline() + +begin_outline(CsvProfiler, Event) + field(uint64, Cycle) + field(int32, CategoryIndex) + field(uint8[], Text) +end_outline() + +begin_outline(CsvProfiler, BeginCapture) + field(uint64, Cycle) + field(uint32, RenderThreadId) + field(uint32, RHIThreadId) + field(uint8, EnableCounts) + field(uint8[], FileName) +end_outline() + +begin_outline(CsvProfiler, EndCapture) + field(uint64, Cycle) +end_outline() + +begin_outline(CsvProfiler, Metadata) + field(uint8[], Key) + field(uint8[], Value) +end_outline() + // clang-format on + + ////////////////////////////////////////////////////////////////////////////// + // Forward declarations needed by the helper analyzers below. + + using zen::trace_detail::SafeFieldStr; + +////////////////////////////////////////////////////////////////////////////// +// Minimal CBOR formatter +// +// CpuProfiler.Metadata payloads are CBOR-encoded (RFC 7049) blobs produced +// by UE's FCborWriter. We don't need a full decoder -- we just walk the +// bytes and append human-readable values to an output string. Handles the +// subset actually emitted by UE's metadata scopes: unsigned / negative +// integers, byte / text strings, arrays, maps, floats, and the boolean / +// null simple values. + +static bool CborAppendValue(const uint8*& p, const uint8* end, std::string& out, int depth); + +static bool +CborReadArg(const uint8*& p, const uint8* end, uint8 info, uint64& value) +{ + if (info < 24) + { + value = info; + return true; + } + if (info == 24) + { + if (end - p < 1) + return false; + value = *p++; + return true; + } + if (info == 25) + { + if (end - p < 2) + return false; + value = (uint64(p[0]) << 8) | p[1]; + p += 2; + return true; + } + if (info == 26) + { + if (end - p < 4) + return false; + value = (uint64(p[0]) << 24) | (uint64(p[1]) << 16) | (uint64(p[2]) << 8) | p[3]; + p += 4; + return true; + } + if (info == 27) + { + if (end - p < 8) + return false; + value = 0; + for (int i = 0; i < 8; ++i) + { + value = (value << 8) | p[i]; + } + p += 8; + return true; + } + return false; +} + +static bool +CborAppendValue(const uint8*& p, const uint8* end, std::string& out, int depth) +{ + if (depth > 4 || p >= end) + { + return false; + } + + const uint8 ib = *p++; + const uint8 major = ib >> 5; + const uint8 info = ib & 0x1f; + + switch (major) + { + case 0: // unsigned integer + { + uint64 v; + if (!CborReadArg(p, end, info, v)) + return false; + out += fmt::format("{}", v); + return true; + } + case 1: // negative integer: -1 - value + { + uint64 v; + if (!CborReadArg(p, end, info, v)) + return false; + out += fmt::format("{}", -1 - int64_t(v)); + return true; + } + case 2: // byte string + case 3: // text string + { + uint64 len; + if (!CborReadArg(p, end, info, len)) + return false; + if (len > uint64(end - p)) + return false; + out.append(reinterpret_cast<const char*>(p), size_t(len)); + p += len; + return true; + } + case 4: // array + { + uint64 count; + if (!CborReadArg(p, end, info, count)) + return false; + for (uint64 i = 0; i < count; ++i) + { + if (i > 0) + out += ", "; + if (!CborAppendValue(p, end, out, depth + 1)) + return false; + } + return true; + } + case 5: // map + { + uint64 count; + if (!CborReadArg(p, end, info, count)) + return false; + for (uint64 i = 0; i < count; ++i) + { + if (i > 0) + out += ", "; + if (!CborAppendValue(p, end, out, depth + 1)) + return false; + out += "="; + if (!CborAppendValue(p, end, out, depth + 1)) + return false; + } + return true; + } + case 7: // simple values / floats + { + if (info == 20) + { + out += "false"; + return true; + } + if (info == 21) + { + out += "true"; + return true; + } + if (info == 22) + { + out += "null"; + return true; + } + if (info == 26) + { + if (end - p < 4) + return false; + uint32 bits = (uint32(p[0]) << 24) | (uint32(p[1]) << 16) | (uint32(p[2]) << 8) | p[3]; + float v; + std::memcpy(&v, &bits, 4); + out += fmt::format("{}", v); + p += 4; + return true; + } + if (info == 27) + { + if (end - p < 8) + return false; + uint64 bits = 0; + for (int i = 0; i < 8; ++i) + { + bits = (bits << 8) | p[i]; + } + p += 8; + double v; + std::memcpy(&v, &bits, 8); + out += fmt::format("{}", v); + return true; + } + return false; + } + default: + return false; + } +} + +static std::string +FormatMetadataValues(const uint8_t* Bytes, size_t Size) +{ + std::string out; + if (Size == 0) + { + return out; + } + const uint8* p = Bytes; + CborAppendValue(p, Bytes + Size, out, 0); + return out; +} + +using zen::trace_detail::TraceTiming; + +////////////////////////////////////////////////////////////////////////////// +// Metadata registry +// +// Subscribes to CpuProfiler.Metadata events and stores each payload's +// CBOR-encoded bytes keyed by MetadataId, along with the SpecId they +// reference. Both CpuAnalyzer and TimelineAnalyzer query the registry when +// they encounter a V3 scope with the metadata bit set so the scope can be +// rendered as `{base name} - {formatted values}`. + +struct MetadataEntry +{ + uint32_t SpecId = 0; + eastl::vector<uint8_t> Bytes; +}; + +class MetadataRegistry : public Analyzer +{ +public: + void subscribe(Vector<Subscription>& Subs) override { Subs.emplace_back(this, &MetadataRegistry::OnMetadata); } + + const MetadataEntry* Lookup(uint32_t MetadataId) const + { + auto It = m_Entries.find(MetadataId); + return (It != m_Entries.end()) ? &It->second : nullptr; + } + +private: + void OnMetadata(const CpuProfiler_Metadata& Ev) + { + uint32_t MetadataId = Ev.Id(); + uint32_t SpecId = Ev.SpecId(); + Array<uint8[]> Data = Ev.Metadata(); + + MetadataEntry& Entry = m_Entries[MetadataId]; + Entry.SpecId = SpecId; + Entry.Bytes.assign(Data.get(), Data.get() + Data.get_size()); + } + + eastl::hash_map<uint32_t, MetadataEntry> m_Entries; +}; + +////////////////////////////////////////////////////////////////////////////// +// Log message formatting +// +// UE's trace emits log messages as a sequence of typed arguments that need +// to be substituted into a printf-style format string. The wire format is: +// +// [ArgumentCount: uint8] +// [Descriptors: uint8 * ArgumentCount] // each byte = category | size +// [Payload: bytes] +// +// Category bits live in the upper 2 bits (shifted by FormatArgTypeCode_- +// CategoryBitShift == 6): Integer=1, Float=2, String=3. The low 6 bits are +// the argument size in bytes; for strings the size is the per-character +// width (1 == ANSI, 2 == UTF-16). +// +// We walk the format string, extract each specifier, pull the matching arg +// from the stream and hand both to std::snprintf. Width/precision stars +// (e.g. "%*.*f") are not supported; they're rare in log formats. + +struct FormatArgStream +{ + const uint8_t* Descriptors; + const uint8_t* Payload; + uint8_t Remaining; + + bool HasNext() const { return Remaining > 0; } + + uint8_t PeekCategory() const { return (*Descriptors) & 0xC0; } + uint8_t PeekSize() const { return (*Descriptors) & 0x3F; } + + void Advance(size_t PayloadBytes) + { + Payload += PayloadBytes; + ++Descriptors; + --Remaining; + } +}; + +static bool +InitFormatArgStream(FormatArgStream& Ctx, const uint8_t* Data, size_t Size) +{ + if (!Data || Size == 0) + { + Ctx.Remaining = 0; + return false; + } + uint8_t Count = Data[0]; + if (size_t(1) + Count > Size) + { + Ctx.Remaining = 0; + return false; + } + Ctx.Descriptors = Data + 1; + Ctx.Payload = Data + 1 + Count; + Ctx.Remaining = Count; + return true; +} + +static bool +IsPrintfSpecifierChar(char c) +{ + switch (c) + { + case 'd': + case 'i': + case 'u': + case 'o': + case 'x': + case 'X': + case 'c': + case 'p': + case 'f': + case 'F': + case 'e': + case 'E': + case 'g': + case 'G': + case 'a': + case 'A': + case 's': + case 'S': + case 'n': + return true; + default: + return false; + } +} + +static std::string +FormatLogMessage(std::string_view Format, const uint8_t* ArgsData, size_t ArgsSize) +{ + FormatArgStream Stream{}; + InitFormatArgStream(Stream, ArgsData, ArgsSize); + + std::string Out; + Out.reserve(Format.size() + 32); + + size_t i = 0; + while (i < Format.size()) + { + char c = Format[i]; + if (c != '%') + { + Out.push_back(c); + ++i; + continue; + } + + // Handle "%%" -> literal percent. + if (i + 1 < Format.size() && Format[i + 1] == '%') + { + Out.push_back('%'); + i += 2; + continue; + } + + // Walk the specifier until we find a terminating character. + size_t SpecStart = i++; + while (i < Format.size() && !IsPrintfSpecifierChar(Format[i])) + { + ++i; + } + if (i >= Format.size()) + { + // Truncated specifier -- copy the remainder literally. + Out.append(Format.substr(SpecStart)); + break; + } + + char Specifier = Format[i++]; + std::string Spec(Format.substr(SpecStart, i - SpecStart)); + + if (!Stream.HasNext()) + { + // Not enough arguments: emit the raw specifier so the user can + // at least tell something is missing. + Out.append(Spec); + continue; + } + + const uint8_t Category = Stream.PeekCategory(); + const uint8_t Size = Stream.PeekSize(); + + char Buf[512]; + Buf[0] = '\0'; + + if (Category == 0x40) // integer + { + uint64_t Raw = 0; + if (Size <= sizeof(Raw) && Size > 0) + { + std::memcpy(&Raw, Stream.Payload, Size); + } + + // Route through the correct snprintf type based on the + // specifier. Cast to int64_t for signed integer specifiers. + switch (Specifier) + { + case 'd': + case 'i': + { + // Sign-extend based on Size. + int64_t Signed = 0; + switch (Size) + { + case 1: + Signed = int8_t(Raw & 0xff); + break; + case 2: + Signed = int16_t(Raw & 0xffff); + break; + case 4: + Signed = int32_t(Raw & 0xffffffff); + break; + case 8: + Signed = int64_t(Raw); + break; + default: + Signed = int64_t(Raw); + break; + } + // Replace length modifier so snprintf interprets the + // correctly-sized value. Simplest: append "ll". + std::string AdjustedSpec = Spec; + AdjustedSpec.insert(AdjustedSpec.size() - 1, "ll"); + std::snprintf(Buf, sizeof(Buf), AdjustedSpec.c_str(), static_cast<long long>(Signed)); + break; + } + case 'u': + case 'o': + case 'x': + case 'X': + case 'p': + { + std::string AdjustedSpec = Spec; + AdjustedSpec.insert(AdjustedSpec.size() - 1, "ll"); + std::snprintf(Buf, sizeof(Buf), AdjustedSpec.c_str(), static_cast<unsigned long long>(Raw)); + break; + } + case 'c': + { + std::snprintf(Buf, sizeof(Buf), Spec.c_str(), int(Raw & 0xff)); + break; + } + default: + std::snprintf(Buf, sizeof(Buf), "%llu", static_cast<unsigned long long>(Raw)); + break; + } + Stream.Advance(Size); + } + else if (Category == 0x80) // floating point + { + double Value = 0.0; + if (Size == 4) + { + float F; + std::memcpy(&F, Stream.Payload, 4); + Value = double(F); + } + else if (Size == 8) + { + std::memcpy(&Value, Stream.Payload, 8); + } + std::snprintf(Buf, sizeof(Buf), Spec.c_str(), Value); + Stream.Advance(Size); + } + else if (Category == 0xC0) // string + { + std::string Tmp; + if (Size == 1) + { + const char* S = reinterpret_cast<const char*>(Stream.Payload); + size_t Len = std::strlen(S); + Tmp.assign(S, Len); + std::snprintf(Buf, sizeof(Buf), Spec.c_str(), Tmp.c_str()); + Stream.Advance(Len + 1); + } + else if (Size == 2) + { + const char16_t* W = reinterpret_cast<const char16_t*>(Stream.Payload); + size_t Len = 0; + while (W[Len] != 0) + ++Len; + Tmp.reserve(Len); + for (size_t k = 0; k < Len; ++k) + { + char16_t ch = W[k]; + Tmp.push_back(ch < 0x80 ? char(ch) : '?'); + } + std::snprintf(Buf, sizeof(Buf), Spec.c_str(), Tmp.c_str()); + Stream.Advance((Len + 1) * 2); + } + else + { + std::snprintf(Buf, sizeof(Buf), "<unsupported string width %u>", unsigned(Size)); + Stream.Advance(0); + ++Stream.Descriptors; + --Stream.Remaining; + } + } + else + { + std::snprintf(Buf, sizeof(Buf), "<arg>"); + Stream.Advance(Size); + } + + Out.append(Buf); + } + + return Out; +} + +////////////////////////////////////////////////////////////////////////////// +// Log analyzer + +class LogAnalyzer : public Analyzer +{ +public: + explicit LogAnalyzer(const TraceTiming* Timing = nullptr) : m_Timing(Timing) {} + + void subscribe(Vector<Subscription>& Subs) override + { + Subs.emplace_back(this, &LogAnalyzer::OnLogCategory); + Subs.emplace_back(this, &LogAnalyzer::OnLogMessageSpec); + Subs.emplace_back(this, &LogAnalyzer::OnLogMessage); + } + + eastl::vector<zen::trace_detail::LogCategoryInfo> BuildCategories(eastl::hash_map<uint64_t, uint32_t>& OutPointerToIndex) const + { + eastl::vector<zen::trace_detail::LogCategoryInfo> Cats; + Cats.reserve(m_Categories.size()); + OutPointerToIndex.clear(); + for (const auto& [Ptr, Info] : m_Categories) + { + OutPointerToIndex[Ptr] = uint32_t(Cats.size()); + Cats.push_back(Info); + } + return Cats; + } + + const eastl::vector<zen::trace_detail::LogEntry>& Entries() const { return m_Entries; } + eastl::vector<zen::trace_detail::LogEntry>& MutableEntries() { return m_Entries; } + + // The shared TraceTiming pointer lets external callers read the trace's + // cycle base / frequency without each analyzer having to own a copy. + const TraceTiming* Timing() const { return m_Timing; } + + struct MessageSpec + { + uint64_t CategoryPointer = 0; + int32_t Line = 0; + uint8_t Verbosity = 0; + std::string File; + std::string FormatString; + }; + + const eastl::hash_map<uint64_t, MessageSpec>& MessageSpecs() const { return m_Specs; } + +private: + void OnLogCategory(const Logging_LogCategory& Ev) + { + uint64_t Ptr = Ev.CategoryPointer(); + zen::trace_detail::LogCategoryInfo& Info = m_Categories[Ptr]; + Info.Name = SafeFieldStr(Ev.Name()); + Info.DefaultVerbosity = Ev.DefaultVerbosity(); + } + + void OnLogMessageSpec(const Logging_LogMessageSpec& Ev) + { + MessageSpec& Spec = m_Specs[Ev.LogPoint()]; + Spec.CategoryPointer = Ev.CategoryPointer(); + Spec.Line = Ev.Line(); + Spec.Verbosity = Ev.Verbosity(); + Spec.File = SafeFieldStr(Ev.FileName()); + Spec.FormatString = SafeFieldStr(Ev.FormatString()); + } + + void OnLogMessage(const Logging_LogMessage& Ev) + { + uint64_t LogPoint = Ev.LogPoint(); + auto SpecIt = m_Specs.find(LogPoint); + if (SpecIt == m_Specs.end()) + { + return; + } + const MessageSpec& Spec = SpecIt->second; + + uint32_t TimeUs = m_Timing ? m_Timing->CycleToTimeUs(Ev.Cycle()) : 0; + + Array<uint8[]> Args = Ev.FormatArgs(); + std::string Msg = FormatLogMessage(std::string_view(Spec.FormatString), Args.get(), Args.get_size()); + + zen::trace_detail::LogEntry Entry; + Entry.TimeUs = TimeUs; + Entry.CategoryIndex = ~0u; // resolved in BuildTraceModel + Entry.Verbosity = Spec.Verbosity; + Entry.Line = Spec.Line; + Entry.File = Spec.File; + Entry.Message = std::move(Msg); + // Use the category pointer temporarily so BuildTraceModel can resolve + // it against the categories table. + Entry.CategoryIndex = SpecToCategoryIndex(Spec.CategoryPointer); + m_Entries.push_back(std::move(Entry)); + } + + uint32_t SpecToCategoryIndex(uint64_t Ptr) + { + // Encoded pointer stuffed into uint32_t so BuildTraceModel can remap. + // Lossy but deterministic: use a stable sequential index per unique + // pointer so we never need the full 64-bit value beyond build time. + auto It = m_CategoryIndex.find(Ptr); + if (It != m_CategoryIndex.end()) + { + return It->second; + } + uint32_t Idx = uint32_t(m_CategoryIndex.size()); + m_CategoryIndex[Ptr] = Idx; + return Idx; + } + +public: + // Mapping from the intermediate index stored in LogEntry::CategoryIndex + // during capture to the real category pointer; BuildTraceModel uses + // this to remap entries against the flattened LogCategories array. + const eastl::hash_map<uint64_t, uint32_t>& CategoryPointerIndex() const { return m_CategoryIndex; } + +private: + const TraceTiming* m_Timing = nullptr; + eastl::hash_map<uint64_t, zen::trace_detail::LogCategoryInfo> m_Categories; + eastl::hash_map<uint64_t, MessageSpec> m_Specs; + eastl::hash_map<uint64_t, uint32_t> m_CategoryIndex; + eastl::vector<zen::trace_detail::LogEntry> m_Entries; +}; + +////////////////////////////////////////////////////////////////////////////// +// Bookmarks and regions +// +// UE's bookmark wire format mirrors LogMessage: a BookmarkSpec introduces +// a (FileName, Line, FormatString) triple keyed by a BookmarkPoint pointer, +// and each Misc.Bookmark event carries that pointer, a cycle, and the same +// FFormatArgsTrace payload the log pipeline already knows how to decode. +// Region events come in two flavours: the legacy name-paired +// RegionBegin/RegionEnd and the newer *WithId variants that pack a unique +// id into the begin event's cycle. + +class BookmarksAnalyzer : public Analyzer +{ +public: + explicit BookmarksAnalyzer(const TraceTiming* Timing = nullptr) : m_Timing(Timing) {} + + void subscribe(Vector<Subscription>& Subs) override + { + Subs.emplace_back(this, &BookmarksAnalyzer::OnBookmarkSpec); + Subs.emplace_back(this, &BookmarksAnalyzer::OnBookmark); + Subs.emplace_back(this, &BookmarksAnalyzer::OnRegionBegin); + Subs.emplace_back(this, &BookmarksAnalyzer::OnRegionBeginWithId); + Subs.emplace_back(this, &BookmarksAnalyzer::OnRegionEnd); + Subs.emplace_back(this, &BookmarksAnalyzer::OnRegionEndWithId); + } + + eastl::vector<zen::trace_detail::Bookmark>& MutableBookmarks() { return m_Bookmarks; } + eastl::vector<zen::trace_detail::RegionEntry>& MutableRegions() { return m_Regions; } + +private: + struct BookmarkSpec + { + int32_t Line = 0; + std::string File; + std::string FormatString; + }; + + uint32_t CycleToTimeUs(uint64_t Cycle) const { return m_Timing ? m_Timing->CycleToTimeUs(Cycle) : 0; } + + void OnBookmarkSpec(const Misc_BookmarkSpec& Ev) + { + BookmarkSpec& Spec = m_Specs[Ev.BookmarkPoint()]; + Spec.Line = Ev.Line(); + Spec.File = SafeFieldStr(Ev.FileName()); + Spec.FormatString = SafeFieldStr(Ev.FormatString()); + } + + void OnBookmark(const Misc_Bookmark& Ev) + { + auto SpecIt = m_Specs.find(Ev.BookmarkPoint()); + if (SpecIt == m_Specs.end()) + { + return; + } + const BookmarkSpec& Spec = SpecIt->second; + + Array<uint8[]> Args = Ev.FormatArgs(); + std::string Text = FormatLogMessage(std::string_view(Spec.FormatString), Args.get(), Args.get_size()); + + zen::trace_detail::Bookmark Out; + Out.TimeUs = CycleToTimeUs(Ev.Cycle()); + Out.Line = Spec.Line; + Out.File = Spec.File; + Out.Text = std::move(Text); + m_Bookmarks.push_back(std::move(Out)); + } + + uint32_t CreatePartialRegion(uint32_t TimeUs, std::string Name, std::string Category) + { + zen::trace_detail::RegionEntry Entry; + Entry.BeginUs = TimeUs; + Entry.EndUs = ~uint32_t(0); // sentinel: still open + Entry.Depth = 0; + Entry.Reserved = 0; + Entry.Name = std::move(Name); + Entry.Category = std::move(Category); + uint32_t Idx = uint32_t(m_Regions.size()); + m_Regions.push_back(std::move(Entry)); + return Idx; + } + + // Decodes the raw array bytes of a RegionName field into a std::string. + // UE emits RegionName as either AnsiString (1-byte) or WideString (2-byte) + // depending on the trace's age -- for the 2-byte case we do the same + // lossy ASCII fold tourist's FieldStr does, which is all we need for + // display. + static std::string DecodeRegionName(const Array<uint8[]>& Data) + { + const uint8_t* p = Data.get(); + size_t size = Data.get_size(); + uint32_t count = Data.get_count(); + if (!p || size == 0 || count == 0) + { + return {}; + } + if (size == count) + { + // 1 byte per element -- AnsiString. + return std::string(reinterpret_cast<const char*>(p), count); + } + if (size == count * 2) + { + // 2 bytes per element -- WideString. Lossy ASCII fold. + std::string out; + out.reserve(count); + const char16_t* w = reinterpret_cast<const char16_t*>(p); + for (uint32_t i = 0; i < count; ++i) + { + out.push_back(w[i] < 0x80 ? char(w[i]) : '?'); + } + return out; + } + return {}; + } + + void OnRegionBegin(const Misc_RegionBegin& Ev) + { + uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); + Array<uint8[]> NameArr = Ev.RegionName(); + std::string Name = DecodeRegionName(NameArr); + std::string Category = DecodeRegionName(Ev.Category()); + uint32_t Idx = CreatePartialRegion(TimeUs, Name, std::move(Category)); + m_OpenByName[Name].push_back(Idx); + } + + void OnRegionBeginWithId(const Misc_RegionBeginWithId& Ev) + { + // Despite its name, CycleAndId is just Cycles64() -- a plain 64-bit + // cycle count that doubles as a unique region identifier. The caller + // keeps the returned value and passes it back as RegionId at end. + uint64_t CycleAndId = Ev.CycleAndId(); + uint32_t TimeUs = CycleToTimeUs(CycleAndId); + Array<uint8[]> NameArr = Ev.RegionName(); + std::string Name = DecodeRegionName(NameArr); + std::string Category = DecodeRegionName(Ev.Category()); + uint32_t Idx = CreatePartialRegion(TimeUs, std::move(Name), std::move(Category)); + m_OpenById[CycleAndId] = Idx; + } + + void OnRegionEnd(const Misc_RegionEnd& Ev) + { + uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); + Array<uint8[]> NameArr = Ev.RegionName(); + std::string Name = DecodeRegionName(NameArr); + auto It = m_OpenByName.find(Name); + if (It == m_OpenByName.end() || It->second.empty()) + { + return; + } + uint32_t Idx = It->second.back(); + It->second.pop_back(); + m_Regions[Idx].EndUs = TimeUs; + } + + void OnRegionEndWithId(const Misc_RegionEndWithId& Ev) + { + uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); + uint64_t Id = Ev.RegionId(); + auto It = m_OpenById.find(Id); + if (It == m_OpenById.end()) + { + return; + } + m_Regions[It->second].EndUs = TimeUs; + m_OpenById.erase(It); + } + + const TraceTiming* m_Timing = nullptr; + eastl::hash_map<uint64_t, BookmarkSpec> m_Specs; + eastl::vector<zen::trace_detail::Bookmark> m_Bookmarks; + eastl::vector<zen::trace_detail::RegionEntry> m_Regions; + eastl::hash_map<std::string, eastl::vector<uint32_t>> m_OpenByName; + eastl::hash_map<uint64_t, uint32_t> m_OpenById; +}; + +////////////////////////////////////////////////////////////////////////////// +// CsvProfiler analyzer -- parses CSV stat categories, definitions, timing, +// custom values, events, capture markers, and metadata. + +class CsvProfilerAnalyzer : public Analyzer +{ +public: + explicit CsvProfilerAnalyzer(const TraceTiming* Timing = nullptr) : m_Timing(Timing) {} + + void subscribe(Vector<Subscription>& Subs) override + { + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnRegisterCategory); + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnDefineInlineStat); + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnDefineDeclaredStat); + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnBeginStat); + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnEndStat); + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnBeginExclusiveStat); + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnEndExclusiveStat); + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnCustomStatInt); + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnCustomStatFloat); + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnEvent); + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnBeginCapture); + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnEndCapture); + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnMetadata); + } + + eastl::vector<zen::trace_detail::TraceModel::CsvCategory>& MutableCategories() { return m_Categories; } + eastl::vector<zen::trace_detail::TraceModel::CsvStatDef>& MutableStatDefs() { return m_StatDefs; } + eastl::vector<zen::trace_detail::TraceModel::CsvEvent>& MutableEvents() { return m_Events; } + eastl::vector<zen::trace_detail::TraceModel::CsvMeta>& MutableMetadata() { return m_Metadata; } + + // Build the per-stat+thread time series from the accumulated samples. + eastl::vector<zen::trace_detail::TraceModel::CsvSeries> BuildTimeSeries() + { + eastl::vector<zen::trace_detail::TraceModel::CsvSeries> Result; + for (auto& [Key, Samples] : m_SeriesMap) + { + eastl::sort(Samples.begin(), Samples.end(), [](const auto& A, const auto& B) { return A.TimeUs < B.TimeUs; }); + zen::trace_detail::TraceModel::CsvSeries S; + S.StatId = Key.StatId; + S.ThreadId = Key.ThreadId; + S.Samples = std::move(Samples); + Result.push_back(std::move(S)); + } + return Result; + } + +private: + uint32_t CycleToTimeUs(uint64_t Cycle) const + { + if (!m_Timing || m_Timing->Freq == 0) + { + return 0; + } + uint64_t Elapsed = (Cycle >= m_Timing->Base) ? (Cycle - m_Timing->Base) : 0; + return uint32_t(Elapsed * 1'000'000 / m_Timing->Freq); + } + + static std::string DecodeAnsiName(const Array<uint8[]>& Data) + { + const uint8_t* P = Data.get(); + size_t Size = Data.get_size(); + if (!P || Size == 0) + { + return {}; + } + return std::string(reinterpret_cast<const char*>(P), Size); + } + + static std::string DecodeWideName(const Array<uint8[]>& Data) + { + const uint8_t* P = Data.get(); + size_t Size = Data.get_size(); + uint32_t Count = Data.get_count(); + if (!P || Size == 0 || Count == 0) + { + return {}; + } + uint32_t ElemSize = Data.get_element_size(); + if (ElemSize == 2) + { + std::string Out; + Out.reserve(Count); + const char16_t* W = reinterpret_cast<const char16_t*>(P); + for (uint32_t I = 0; I < Count; ++I) + { + Out.push_back(W[I] < 0x80 ? char(W[I]) : '?'); + } + return Out; + } + return std::string(reinterpret_cast<const char*>(P), Size); + } + + struct SeriesKey + { + uint64_t StatId; + uint32_t ThreadId; + bool operator==(const SeriesKey& O) const { return StatId == O.StatId && ThreadId == O.ThreadId; } + }; + struct SeriesKeyHash + { + size_t operator()(const SeriesKey& K) const + { + return eastl::hash<uint64_t>{}(K.StatId) ^ (eastl::hash<uint32_t>{}(K.ThreadId) * 2654435761u); + } + }; + + void AddSample(uint64_t StatId, uint32_t ThreadId, uint32_t TimeUs, float Value) + { + m_SeriesMap[SeriesKey{StatId, ThreadId}].push_back({TimeUs, Value}); + } + + // -- Event handlers ----------------------------------------------- + + void OnRegisterCategory(const CsvProfiler_RegisterCategory& Ev) + { + zen::trace_detail::TraceModel::CsvCategory Cat; + Cat.Index = Ev.Index(); + Cat.Name = DecodeAnsiName(Ev.Name()); + m_Categories.push_back(std::move(Cat)); + } + + void OnDefineInlineStat(const CsvProfiler_DefineInlineStat& Ev) + { + DefineStat(Ev.StatId(), Ev.CategoryIndex(), DecodeAnsiName(Ev.Name())); + } + + void OnDefineDeclaredStat(const CsvProfiler_DefineDeclaredStat& Ev) + { + DefineStat(Ev.StatId(), Ev.CategoryIndex(), DecodeAnsiName(Ev.Name())); + } + + void DefineStat(uint64_t StatId, int32_t CategoryIndex, std::string Name) + { + if (m_StatIdToIndex.count(StatId)) + { + return; // already defined + } + m_StatIdToIndex[StatId] = uint32_t(m_StatDefs.size()); + zen::trace_detail::TraceModel::CsvStatDef Def; + Def.StatId = StatId; + Def.CategoryIndex = CategoryIndex; + Def.Name = std::move(Name); + m_StatDefs.push_back(std::move(Def)); + } + + void OnBeginStat(const CsvProfiler_BeginStat& Ev) + { + uint32_t ThreadId = Ev.get_thread_id(); + uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); + m_OpenStacks[{Ev.StatId(), ThreadId}].push_back(TimeUs); + } + + void OnEndStat(const CsvProfiler_EndStat& Ev) + { + uint32_t ThreadId = Ev.get_thread_id(); + uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); + auto Key = SeriesKey{Ev.StatId(), ThreadId}; + auto It = m_OpenStacks.find(Key); + if (It == m_OpenStacks.end() || It->second.empty()) + { + return; + } + uint32_t BeginUs = It->second.back(); + It->second.pop_back(); + float DurationMs = float(TimeUs - BeginUs) / 1000.0f; + AddSample(Ev.StatId(), ThreadId, BeginUs, DurationMs); + } + + void OnBeginExclusiveStat(const CsvProfiler_BeginExclusiveStat& Ev) + { + // For basic support, treat exclusive stats like regular stats. + uint32_t ThreadId = Ev.get_thread_id(); + uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); + m_OpenStacks[{Ev.StatId(), ThreadId}].push_back(TimeUs); + } + + void OnEndExclusiveStat(const CsvProfiler_EndExclusiveStat& Ev) + { + uint32_t ThreadId = Ev.get_thread_id(); + uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); + auto Key = SeriesKey{Ev.StatId(), ThreadId}; + auto It = m_OpenStacks.find(Key); + if (It == m_OpenStacks.end() || It->second.empty()) + { + return; + } + uint32_t BeginUs = It->second.back(); + It->second.pop_back(); + float DurationMs = float(TimeUs - BeginUs) / 1000.0f; + AddSample(Ev.StatId(), ThreadId, BeginUs, DurationMs); + } + + void OnCustomStatInt(const CsvProfiler_CustomStatInt& Ev) + { + uint32_t ThreadId = Ev.get_thread_id(); + uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); + AddSample(Ev.StatId(), ThreadId, TimeUs, float(Ev.Value())); + } + + void OnCustomStatFloat(const CsvProfiler_CustomStatFloat& Ev) + { + uint32_t ThreadId = Ev.get_thread_id(); + uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); + AddSample(Ev.StatId(), ThreadId, TimeUs, Ev.Value()); + } + + void OnEvent(const CsvProfiler_Event& Ev) + { + zen::trace_detail::TraceModel::CsvEvent E; + E.TimeUs = CycleToTimeUs(Ev.Cycle()); + E.CategoryIndex = Ev.CategoryIndex(); + E.Text = DecodeWideName(Ev.Text()); + m_Events.push_back(std::move(E)); + } + + void OnBeginCapture(const CsvProfiler_BeginCapture& Ev) { m_CaptureStartUs = CycleToTimeUs(Ev.Cycle()); } + + void OnEndCapture(const CsvProfiler_EndCapture& Ev) { m_CaptureEndUs = CycleToTimeUs(Ev.Cycle()); } + + void OnMetadata(const CsvProfiler_Metadata& Ev) + { + zen::trace_detail::TraceModel::CsvMeta M; + M.Key = DecodeWideName(Ev.Key()); + M.Value = DecodeWideName(Ev.Value()); + m_Metadata.push_back(std::move(M)); + } + + const TraceTiming* m_Timing = nullptr; + + eastl::vector<zen::trace_detail::TraceModel::CsvCategory> m_Categories; + eastl::vector<zen::trace_detail::TraceModel::CsvStatDef> m_StatDefs; + eastl::hash_map<uint64_t, uint32_t> m_StatIdToIndex; + + // Timing stacks: (StatId, ThreadId) -> stack of begin times + eastl::hash_map<SeriesKey, eastl::vector<uint32_t>, SeriesKeyHash> m_OpenStacks; + + // Accumulated samples: (StatId, ThreadId) -> samples + eastl::hash_map<SeriesKey, eastl::vector<zen::trace_detail::TraceModel::CsvSample>, SeriesKeyHash> m_SeriesMap; + + eastl::vector<zen::trace_detail::TraceModel::CsvEvent> m_Events; + eastl::vector<zen::trace_detail::TraceModel::CsvMeta> m_Metadata; + + uint32_t m_CaptureStartUs = 0; + uint32_t m_CaptureEndUs = 0; +}; + +////////////////////////////////////////////////////////////////////////////// +// Analyzers + +class CpuAnalyzer : public Analyzer +{ +public: + CpuAnalyzer(EventStats& Stats, NameDepot& Names, const MetadataRegistry* Metadata) + : m_Names(Names) + , m_Stats(Stats) + , m_Metadata(Metadata) + { + Names.Add(NO_NAME, "???"); + } + + void subscribe(Vector<Subscription>& Subs) override + { + Subs.emplace_back(this, &CpuAnalyzer::OnNewTrace); + Subs.emplace_back(this, &CpuAnalyzer::OnCpuSpec); + Subs.emplace_back(this, &CpuAnalyzer::OnCpuBatch); + Subs.emplace_back(this, &CpuAnalyzer::OnCpuBatchV2); + Subs.emplace_back(this, &CpuAnalyzer::OnCpuBatchV3); + } + +private: + static constexpr uint32 NO_INDEX = ~0u; + static constexpr uint64 NO_NAME = ~0ull; + static constexpr uint32 METADATA_BIT = 0x8000'0000u; + + uint64 ResolveNameHash(uint32 PackedId) + { + const bool IsMetadata = (PackedId & METADATA_BIT) != 0; + const uint32 Id = PackedId & ~METADATA_BIT; + + if (IsMetadata && m_Metadata) + { + auto CachedIt = m_MetadataNames.find(Id); + if (CachedIt != m_MetadataNames.end()) + { + return CachedIt->second; + } + + const MetadataEntry* Entry = m_Metadata->Lookup(Id); + if (Entry) + { + auto BaseIt = m_Specs.find(Entry->SpecId); + StringView BaseName = (BaseIt != m_Specs.end()) ? m_Names.Get(BaseIt->second) : StringView("???"); + std::string Formatted(BaseName); + std::string Values = FormatMetadataValues(Entry->Bytes.data(), Entry->Bytes.size()); + if (!Values.empty()) + { + Formatted += " - "; + Formatted += Values; + } + uint64 Hash = m_Names.Add(StringView(Formatted)); + m_MetadataNames[Id] = Hash; + return Hash; + } + return NO_NAME; + } + + auto It = m_Specs.find(Id); + return (It != m_Specs.end()) ? It->second : NO_NAME; + } + + struct EventStack + { + uint32 Tail = NO_INDEX; + }; + + struct ScopeEvent + { + uint32 Id; + uint32 TimeUs; + union + { + uint32 Next; + uint32 Index; + }; + }; + + struct EventPool + { + uint32 Alloc() + { + if (m_FreeHead == NO_INDEX) + { + uint32 Idx = uint32(m_Pool.size()); + m_Pool.push_back({.Index = Idx}); + return Idx; + } + uint32 Idx = m_FreeHead; + m_FreeHead = m_Pool[Idx].Index; + return Idx; + } + + void Free(uint32 Idx) + { + m_Pool[Idx].Index = m_FreeHead; + m_FreeHead = Idx; + } + + ScopeEvent& Get(uint32 Idx) { return m_Pool[Idx]; } + + eastl::vector<ScopeEvent> m_Pool; + uint32 m_FreeHead = NO_INDEX; + }; + + void OnNewTrace(const $Trace_NewTrace& NewTrace) + { + m_Freq = NewTrace.CycleFrequency(); + m_Base = NewTrace.StartCycle(); + m_UsDiv = m_Freq / 1'000'000; + if (m_UsDiv == 0) + { + m_UsDiv = 1; + } + m_UsDivRecip = ReciprocalU64(m_UsDiv); + } + + void OnCpuSpec(const CpuProfiler_EventSpec& Spec) + { + uint32 SpecId = Spec.Id(); + FieldStr SpecName = Spec.Name(); + + StringView NameView = SpecName.as_view(); + + if (NameView.starts_with("Frame ")) + { + NameView = "Frame"; + } + if (size_t Pos = NameView.find("\""); Pos != StringView::npos) + { + NameView = NameView.substr(0, Pos); + } + if (size_t Pos = NameView.find("\\"); Pos != StringView::npos) + { + NameView = NameView.substr(0, Pos); + } + + m_Specs[SpecId] = m_Names.Add(NameView); + } + + void OnCpuBatch(const CpuProfiler_EventBatch& Batch) + { + uint32 ThreadId = Batch.get_thread_id(); + Array<uint8[]> Data = Batch.Data(); + AbsorbBatch(/*Version=*/1, ThreadId, Data); + } + + void OnCpuBatchV2(const CpuProfiler_EventBatchV2& Batch) + { + uint32 ThreadId = Batch.get_thread_id(); + Array<uint8[]> Data = Batch.Data(); + AbsorbBatch(/*Version=*/2, ThreadId, Data); + } + + void OnCpuBatchV3(const CpuProfiler_EventBatchV3& Batch) + { + uint32 ThreadId = Batch.get_thread_id(); + Array<uint8[]> Data = Batch.Data(); + AbsorbBatch(/*Version=*/3, ThreadId, Data); + } + + // Decodes a CpuProfiler scope batch. Mirrors UE's reference + // TraceServices/.../CpuProfilerTraceAnalysis.cpp ProcessBuffer / + // ProcessBufferV2. + // + // Version 1 (`CpuProfiler.EventBatch`): cycle is `value >> 1`; bit 0 is + // IsEnter; IsEnter events carry a SpecId varint. + // + // Version 2 (`CpuProfiler.EventBatchV2`, UE 5.1..5.5) and Version 3 + // (`CpuProfiler.EventBatchV3`, UE 5.6+): cycle is `value >> 2`; bit 0 is + // IsEnter, bit 1 is IsCoroutine. Coroutine begin events carry CoroutineId + // and TimerScopeDepth varints; coroutine end events carry a single + // TimerScopeDepth varint. V3 additionally reserves the low bit of the + // SpecId to mark metadata-bearing timers, so SpecId must be shifted + // right by 1 to recover the actual spec id. + void AbsorbBatch(uint32 Version, uint32 ThreadId, const Array<uint8[]>& Data) + { + const uint8* Cursor = Data.get(); + const uint8* End = Cursor + Data.get_size(); + + auto Decode = [&]() { + uint64 Value = 0; + for (uint32 I = 1, J = 0; I; J += 7) + { + I = *Cursor++; + Value |= uint64(I & 0x7f) << J; + I &= 0x80; + } + return Value; + }; + + if (ThreadId >= m_Threads.size()) + { + m_Threads.resize(ThreadId + 1); + } + EventStack& Stack = m_Threads[ThreadId]; + + const uint32 CycleShift = (Version == 1) ? 1u : 2u; + + uint64 Base = m_Base; + + uint64 Cycle = ~Base + 1; + while (Cursor < End) + { + uint64 Value = Decode(); + uint32 IsEnter = (Value & 0b01); + + if (Version > 1 && (Value & 0b10)) + { + // Coroutine event -- not visualised, but the trailing varints + // still need to be consumed so we stay in sync with the + // stream. + if (IsEnter) + { + (void)Decode(); // CoroutineId + (void)Decode(); // TimerScopeDepth + } + else + { + (void)Decode(); // TimerScopeDepth + } + continue; + } + + uint64 EventId = IsEnter ? Decode() : ~0ull; + + Cycle += (Value >> CycleShift); + uint32 TimeUs = m_UsDivRecip.Divide(Cycle + (m_UsDiv >> 1)); + + if (IsEnter) + { + uint32 ScopeId = uint32(EventId); + bool IsMetadata = false; + if (Version == 3) + { + IsMetadata = (ScopeId & 1u) != 0; + ScopeId >>= 1; + } + uint32 EvIdx = m_Events.Alloc(); + ScopeEvent& Ev = m_Events.Get(EvIdx); + // Pack the metadata flag in the high bit so the close path + // can distinguish metadata-id scopes from regular ones without + // an extra field. + Ev.Id = IsMetadata ? (ScopeId | 0x8000'0000u) : ScopeId; + Ev.TimeUs = TimeUs; + Ev.Next = Stack.Tail; + Stack.Tail = EvIdx; + continue; + } + + if (Stack.Tail == NO_INDEX) + { + continue; + } + + ScopeEvent& Ev = m_Events.Get(Stack.Tail); + uint32 DurationUs = TimeUs - Ev.TimeUs; + uint64 NameHash = ResolveNameHash(Ev.Id); + m_Stats.Record(NameHash, DurationUs); + + uint32 NextIdx = Ev.Next; + m_Events.Free(Stack.Tail); + Stack.Tail = NextIdx; + } + } + + uint64 m_Freq = 0; + uint64 m_Base = 0; + uint64 m_UsDiv = 1; + ReciprocalU64 m_UsDivRecip; + eastl::hash_map<uint32, uint64> m_Specs; + NameDepot& m_Names; + EventPool m_Events; + eastl::vector<EventStack> m_Threads; + EventStats& m_Stats; + const MetadataRegistry* m_Metadata = nullptr; + // Caches the resolved name hash for each MetadataId so we don't + // re-format the same CBOR payload on every scope-close. + eastl::hash_map<uint32, uint64> m_MetadataNames; +}; + +////////////////////////////////////////////////////////////////////////////// +// Per-event CPU scope capture for the interactive trace viewer. +// +// Mirrors CpuAnalyzer's decode loop but instead of aggregating statistics, +// it records one TimelineScope per closed CPU scope so the viewer can draw a +// flame graph. Scope names are interned into a flat vector so each event only +// stores a compact uint32 NameId. + +class TimelineAnalyzer : public Analyzer +{ +public: + explicit TimelineAnalyzer(const MetadataRegistry* Metadata = nullptr, TraceTiming* SharedTiming = nullptr) + : m_SharedTiming(SharedTiming) + , m_Metadata(Metadata) + { + } + + void subscribe(Vector<Subscription>& Subs) override + { + Subs.emplace_back(this, &TimelineAnalyzer::OnNewTrace); + Subs.emplace_back(this, &TimelineAnalyzer::OnCpuSpec); + Subs.emplace_back(this, &TimelineAnalyzer::OnCpuBatch); + Subs.emplace_back(this, &TimelineAnalyzer::OnCpuBatchV2); + Subs.emplace_back(this, &TimelineAnalyzer::OnCpuBatchV3); + } + + struct ThreadData + { + eastl::vector<zen::trace_detail::TimelineScope> Scopes; + // Open-scope stack: parallel arrays keeping begin time and name id. + eastl::vector<uint32_t> OpenBeginUs; + eastl::vector<uint32_t> OpenNameIds; + }; + + const eastl::vector<std::string>& ScopeNames() const { return m_ScopeNames; } + const eastl::map<uint32_t, ThreadData>& Threads() const { return m_Threads; } + uint32_t MinBeginUs() const { return m_MinBeginUs; } + uint32_t MaxEndUs() const { return m_MaxEndUs; } + +private: + static constexpr uint32_t INVALID_NAME = ~0u; + + uint32_t InternName(StringView Name) + { + String Key(Name); + auto [It, Inserted] = m_NameIndex.try_emplace(std::move(Key), 0); + if (Inserted) + { + It->second = uint32_t(m_ScopeNames.size()); + m_ScopeNames.emplace_back(Name); + } + return It->second; + } + + void OnNewTrace(const $Trace_NewTrace& NewTrace) + { + m_Freq = NewTrace.CycleFrequency(); + m_Base = NewTrace.StartCycle(); + m_UsDiv = m_Freq / 1'000'000; + if (m_UsDiv == 0) + { + m_UsDiv = 1; + } + m_UsDivRecip = ReciprocalU64(m_UsDiv); + if (m_SharedTiming) + { + m_SharedTiming->Freq = m_Freq; + m_SharedTiming->Base = m_Base; + m_SharedTiming->UsDiv = m_UsDiv; + } + } + + void OnCpuSpec(const CpuProfiler_EventSpec& Spec) + { + uint32 SpecId = Spec.Id(); + FieldStr SpecName = Spec.Name(); + + StringView NameView = SpecName.as_view(); + + if (NameView.starts_with("Frame ")) + { + NameView = "Frame"; + } + if (size_t Pos = NameView.find("\""); Pos != StringView::npos) + { + NameView = NameView.substr(0, Pos); + } + if (size_t Pos = NameView.find("\\"); Pos != StringView::npos) + { + NameView = NameView.substr(0, Pos); + } + + m_Specs[SpecId] = InternName(NameView); + } + + void OnCpuBatch(const CpuProfiler_EventBatch& Batch) + { + uint32 ThreadId = Batch.get_thread_id(); + Array<uint8[]> Data = Batch.Data(); + AbsorbBatch(/*Version=*/1, ThreadId, Data); + } + + void OnCpuBatchV2(const CpuProfiler_EventBatchV2& Batch) + { + uint32 ThreadId = Batch.get_thread_id(); + Array<uint8[]> Data = Batch.Data(); + AbsorbBatch(/*Version=*/2, ThreadId, Data); + } + + void OnCpuBatchV3(const CpuProfiler_EventBatchV3& Batch) + { + uint32 ThreadId = Batch.get_thread_id(); + Array<uint8[]> Data = Batch.Data(); + AbsorbBatch(/*Version=*/3, ThreadId, Data); + } + + TraceTiming* m_SharedTiming = nullptr; + + // See CpuAnalyzer::AbsorbBatch for a detailed description of the wire + // format for each version. + void AbsorbBatch(uint32 Version, uint32 ThreadId, const Array<uint8[]>& Data) + { + const uint8* Cursor = Data.get(); + const uint8* End = Cursor + Data.get_size(); + + auto Decode = [&]() { + uint64 Value = 0; + for (uint32 I = 1, J = 0; I; J += 7) + { + I = *Cursor++; + Value |= uint64(I & 0x7f) << J; + I &= 0x80; + } + return Value; + }; + + ThreadData& Thread = m_Threads[ThreadId]; + + const uint32 CycleShift = (Version == 1) ? 1u : 2u; + + uint64 Base = m_Base; + + uint64 Cycle = ~Base + 1; + while (Cursor < End) + { + uint64 Value = Decode(); + uint32 IsEnter = (Value & 0b01); + + if (Version > 1 && (Value & 0b10)) + { + // Coroutine event -- consume the trailing varints so we stay + // in sync with the stream, but drop the event on the floor. + if (IsEnter) + { + (void)Decode(); // CoroutineId + (void)Decode(); // TimerScopeDepth + } + else + { + (void)Decode(); // TimerScopeDepth + } + continue; + } + + uint64 EventId = IsEnter ? Decode() : ~0ull; + + Cycle += (Value >> CycleShift); + uint32 TimeUs = m_UsDivRecip.Divide(Cycle + (m_UsDiv >> 1)); + + if (IsEnter) + { + uint32 ScopeId = uint32(EventId); + bool IsMetadata = false; + if (Version == 3) + { + IsMetadata = (ScopeId & 1u) != 0; + ScopeId >>= 1; + } + uint32_t NameId = IsMetadata ? ResolveMetadataNameId(ScopeId) : LookupSpecNameId(ScopeId); + Thread.OpenBeginUs.push_back(TimeUs); + Thread.OpenNameIds.push_back(NameId); + continue; + } + + if (Thread.OpenBeginUs.empty()) + { + continue; + } + + uint32_t BeginUs = Thread.OpenBeginUs.back(); + uint32_t NameId = Thread.OpenNameIds.back(); + Thread.OpenBeginUs.pop_back(); + Thread.OpenNameIds.pop_back(); + + if (NameId == INVALID_NAME) + { + continue; + } + + uint16_t Depth = uint16_t(Thread.OpenBeginUs.size()); + Thread.Scopes.push_back(zen::trace_detail::TimelineScope{ + .BeginUs = BeginUs, + .DurationUs = TimeUs - BeginUs, + .NameId = NameId, + .Depth = Depth, + .MergeCount = 0, + }); + + if (BeginUs < m_MinBeginUs) + { + m_MinBeginUs = BeginUs; + } + if (TimeUs > m_MaxEndUs) + { + m_MaxEndUs = TimeUs; + } + } + } + + uint32_t LookupSpecNameId(uint32 SpecId) const + { + auto It = m_Specs.find(SpecId); + return (It != m_Specs.end()) ? It->second : INVALID_NAME; + } + + uint32_t ResolveMetadataNameId(uint32 MetadataId) + { + if (!m_Metadata) + { + return INVALID_NAME; + } + + auto CachedIt = m_MetadataNameIds.find(MetadataId); + if (CachedIt != m_MetadataNameIds.end()) + { + return CachedIt->second; + } + + const MetadataEntry* Entry = m_Metadata->Lookup(MetadataId); + if (!Entry) + { + return INVALID_NAME; + } + + auto BaseIt = m_Specs.find(Entry->SpecId); + const std::string& BaseName = (BaseIt != m_Specs.end()) ? m_ScopeNames[BaseIt->second] : kUnknownName; + std::string Formatted = BaseName; + std::string Values = FormatMetadataValues(Entry->Bytes.data(), Entry->Bytes.size()); + if (!Values.empty()) + { + Formatted += " - "; + Formatted += Values; + } + + uint32_t NameId = InternName(StringView(Formatted.data(), Formatted.size())); + m_MetadataNameIds[MetadataId] = NameId; + return NameId; + } + + static inline const std::string kUnknownName{"???"}; + + uint64 m_Freq = 0; + uint64 m_Base = 0; + uint64 m_UsDiv = 1; + ReciprocalU64 m_UsDivRecip; + uint32_t m_MinBeginUs = ~0u; + uint32_t m_MaxEndUs = 0; + eastl::hash_map<uint32, uint32_t> m_Specs; + eastl::hash_map<String, uint32_t> m_NameIndex; + eastl::vector<std::string> m_ScopeNames; + eastl::map<uint32_t, ThreadData> m_Threads; + const MetadataRegistry* m_Metadata = nullptr; + eastl::hash_map<uint32_t, uint32_t> m_MetadataNameIds; +}; + +////////////////////////////////////////////////////////////////////////////// + +} // anonymous namespace + +std::string +zen::trace_detail::SafeFieldStr(FieldStr&& Field) +{ + try + { + std::string_view View = Field.as_view(); + // Some trace writers include the NUL terminator in the field length + // (see UE trace ToAnsiCheap / ThreadRegister). Strip any trailing NULs + // so downstream consumers don't see garbage. + while (!View.empty() && View.back() == '\0') + { + View.remove_suffix(1); + } + return std::string(View); + } + catch (const std::exception& E) + { + ZEN_DEBUG("Failed to decode trace string field: {}", E.what()); + return {}; + } +} + +namespace { + +// Derive a thread group name from a thread name by stripping a trailing +// integer suffix (optionally preceded by a separator). E.g. "IoPool Worker 3" +// -> "IoPool Worker", "DbWorker_12" -> "DbWorker", "HttpThread42" -> +// "HttpThread". Returns an empty string if no suffix is present or the +// resulting prefix would be empty. +static std::string +SynthesizeThreadGroupFromName(std::string_view Name) +{ + size_t I = Name.size(); + while (I > 0 && Name[I - 1] >= '0' && Name[I - 1] <= '9') + { + --I; + } + if (I == Name.size()) + { + return {}; // no trailing digits + } + if (I > 0) + { + char C = Name[I - 1]; + if (C == '_' || C == '-' || C == '.' || C == ':' || C == '#' || C == '/' || C == ' ' || C == '\t') + { + --I; + } + } + while (I > 0 && (Name[I - 1] == ' ' || Name[I - 1] == '\t')) + { + --I; + } + if (I == 0) + { + return {}; // pure-numeric name + } + return std::string(Name.substr(0, I)); +} + +class SessionAnalyzer : public Analyzer +{ +public: + zen::trace_detail::SessionInfo Session; + eastl::map<uint32_t, zen::trace_detail::ThreadInfoEntry> ThreadNames; + eastl::map<uint32_t, zen::trace_detail::ChannelInfo> Channels; + + void subscribe(Vector<Subscription>& Subs) override + { + Subs.emplace_back(this, &SessionAnalyzer::OnSession); + Subs.emplace_back(this, &SessionAnalyzer::OnThreadGroupBegin); + Subs.emplace_back(this, &SessionAnalyzer::OnThreadGroupEnd); + Subs.emplace_back(this, &SessionAnalyzer::OnThreadInfo); + Subs.emplace_back(this, &SessionAnalyzer::OnChannelAnnounce); + Subs.emplace_back(this, &SessionAnalyzer::OnChannelToggle); + } + +private: + eastl::vector<String> m_GroupStack; + + void OnSession(const Diagnostics_Session2& Ev) + { + Session.Platform = SafeFieldStr(Ev.Platform()); + Session.AppName = SafeFieldStr(Ev.AppName()); + Session.ProjectName = SafeFieldStr(Ev.ProjectName()); + Session.CommandLine = SafeFieldStr(Ev.CommandLine()); + Session.Branch = SafeFieldStr(Ev.Branch()); + Session.BuildVersion = SafeFieldStr(Ev.BuildVersion()); + Session.Changelist = Ev.Changelist(); + Session.ConfigurationType = Ev.ConfigurationType(); + Session.HasSession = true; + } + + void OnThreadGroupBegin(const $Trace_ThreadGroupBegin& Ev) { m_GroupStack.push_back(SafeFieldStr(Ev.Name())); } + + void OnThreadGroupEnd(const $Trace_ThreadGroupEnd&) + { + if (!m_GroupStack.empty()) + { + m_GroupStack.pop_back(); + } + } + + void OnThreadInfo(const $Trace_ThreadInfo& Ev) + { + uint32_t Tid = Ev.ThreadId(); + zen::trace_detail::ThreadInfoEntry& Info = ThreadNames[Tid]; + Info.ThreadId = Tid; + Info.Name = SafeFieldStr(Ev.Name()); + Info.GroupName = m_GroupStack.empty() ? "" : m_GroupStack.back(); + if (Info.GroupName.empty()) + { + Info.GroupName = SynthesizeThreadGroupFromName(Info.Name); + } + Info.SystemId = Ev.SystemId(); + Info.SortHint = Ev.SortHint(); + } + + void OnChannelAnnounce(const Trace_ChannelAnnounce& Ev) + { + uint32_t Id = Ev.Id(); + zen::trace_detail::ChannelInfo& Info = Channels[Id]; + Info.Name = SafeFieldStr(Ev.Name()); + Info.Enabled = Ev.IsEnabled() != 0; + Info.ReadOnly = Ev.ReadOnly() != 0; + } + + void OnChannelToggle(const Trace_ChannelToggle& Ev) + { + uint32_t Id = Ev.Id(); + auto It = Channels.find(Id); + if (It != Channels.end()) + { + It->second.Enabled = Ev.IsEnabled() != 0; + } + } +}; + +////////////////////////////////////////////////////////////////////////////// +// Module analyzer +// +// Captures Diagnostics.Module{Init,Load,Unload} so TraceModel::Modules has a +// populated list of loaded DLLs. These events are NoSync+Important so they +// don't carry a Cycle field (no load/unload timestamps available) but they +// do survive reconnects and the trim filter. The analyzer is intentionally +// passive -- we stash the raw data here and leave symbolication and memory +// attribution to whatever consumes TraceModel::Modules later. + +class ModuleAnalyzer : public Analyzer +{ +public: + eastl::map<uint64_t, zen::trace_detail::ModuleInfo> ModulesByBase; + std::string SymbolFormat; + uint8_t BaseShift = 0; + + void subscribe(Vector<Subscription>& Subs) override + { + Subs.emplace_back(this, &ModuleAnalyzer::OnModuleInit); + Subs.emplace_back(this, &ModuleAnalyzer::OnModuleLoad); + Subs.emplace_back(this, &ModuleAnalyzer::OnModuleUnload); + } + +private: + void OnModuleInit(const Diagnostics_ModuleInit& Ev) + { + SymbolFormat = SafeFieldStr(Ev.SymbolFormat()); + BaseShift = Ev.ModuleBaseShift(); + } + + void OnModuleLoad(const Diagnostics_ModuleLoad& Ev) + { + // Older traces stored Base as a 32-bit value shifted right by + // ModuleBaseShift to fit. Modern traces set BaseShift to zero and + // Base is a full 64-bit address; applying the shift then is a + // harmless no-op. + uint64_t Base = uint64_t(Ev.Base()) << BaseShift; + + zen::trace_detail::ModuleInfo& Info = ModulesByBase[Base]; + Info.FullPath = SafeFieldStr(Ev.Name()); + Info.Base = Base; + Info.Size = Ev.Size(); + Info.Unloaded = false; + + // Extract the basename without pulling in the whole filesystem + // library for a single operation. UE emits forward- or backslashes + // depending on platform, so handle both. + const std::string& Path = Info.FullPath; + size_t Cut = Path.find_last_of("/\\"); + Info.Name = (Cut == std::string::npos) ? Path : Path.substr(Cut + 1); + + ::Array<uint8[]> ImageId = Ev.ImageId(); + const uint8* IdPtr = ImageId.get(); + const uint32 IdSize = ImageId.get_size(); + Info.ImageId.assign(IdPtr, IdPtr + IdSize); + } + + void OnModuleUnload(const Diagnostics_ModuleUnload& Ev) + { + uint64_t Base = uint64_t(Ev.Base()) << BaseShift; + auto It = ModulesByBase.find(Base); + if (It != ModulesByBase.end()) + { + It->second.Unloaded = true; + } + } +}; + +////////////////////////////////////////////////////////////////////////////// +// Trim analyzer +// +// Decodes CpuProfiler batch events to extract per-batch timestamp ranges AND +// to track open/close scope bracketing per thread. The scope tracker lets us +// identify "must-keep" packets: any packet containing the Leave event for a +// scope whose Enter was at or before the user's trim EndUs. Preserving those +// Leaves is what lets the downstream TimelineAnalyzer (and Unreal Insights) +// still render long-running scopes that span the window end -- if we dropped +// their closing event the scope would sit unmatched on the open-scope stack +// and not render at all. +// +// Attribution to raw packet indices is approximate due to Tourist's internal +// per-thread packet buffering; the trim driver processes the trace one packet +// at a time (bundle of size 1) to keep it as tight as possible. Packets that +// never get an attributed time range are conservatively retained by the +// caller. + +class TrimAnalyzer : public Analyzer +{ +public: + // Maps packet index (matching both Tourist's Packet::get_index() and our + // raw walker's vector position) -> (MinUs, MaxUs) of all events attributed + // to that packet. + struct Range + { + uint32_t MinUs = ~0u; + uint32_t MaxUs = 0; + }; + + eastl::hash_map<uint32_t, Range> PacketRanges; + + // Maps thread id -> the maximum packet index that contains a Leave event + // for a scope whose matching Enter was at or before EndUs. These packets + // must be retained so the downstream analyzer can close the scope. + eastl::hash_map<uint32_t, uint32_t> MustKeepPacketByThread; + + // Set by the trim driver from TraceTrimArgs::EndSec before the analysis + // pass begins. Used by the scope tracker to decide which leaves are + // "must keep". + uint32_t EndUs = ~0u; + + // Updated by the trim driver before each Proto.read call when the next + // packet is on a normal thread. Maps normal-thread id -> the most + // recently scattered packet's index. + eastl::hash_map<uint32_t, uint32_t> LastPacketIndexByThread; + + void subscribe(Vector<Subscription>& Subs) override + { + Subs.emplace_back(this, &TrimAnalyzer::OnNewTrace); + Subs.emplace_back(this, &TrimAnalyzer::OnCpuBatch); + Subs.emplace_back(this, &TrimAnalyzer::OnCpuBatchV2); + Subs.emplace_back(this, &TrimAnalyzer::OnCpuBatchV3); + } + + bool HasTimeBase() const { return m_Freq != 0; } + +private: + void OnNewTrace(const $Trace_NewTrace& NewTrace) + { + m_Freq = NewTrace.CycleFrequency(); + m_Base = NewTrace.StartCycle(); + m_UsDiv = (m_Freq > 0) ? (m_Freq / 1'000'000) : 1; + if (m_UsDiv == 0) + { + m_UsDiv = 1; + } + m_UsDivRecip = ReciprocalU64(m_UsDiv); + } + + void OnCpuBatch(const CpuProfiler_EventBatch& Batch) { AbsorbBatchTimes(/*Version=*/1, Batch.get_thread_id(), Batch.Data()); } + + void OnCpuBatchV2(const CpuProfiler_EventBatchV2& Batch) { AbsorbBatchTimes(/*Version=*/2, Batch.get_thread_id(), Batch.Data()); } + + void OnCpuBatchV3(const CpuProfiler_EventBatchV3& Batch) { AbsorbBatchTimes(/*Version=*/3, Batch.get_thread_id(), Batch.Data()); } + + // Decodes cycle deltas in a CpuProfiler batch to find the timestamp range + // AND to maintain a per-thread open-scope stack. Mirrors the wire format + // documented in CpuAnalyzer::AbsorbBatch. Scope ids are decoded just far + // enough to keep the varint cursor in sync; we don't store them. + void AbsorbBatchTimes(uint32 Version, uint32 ThreadId, const Array<uint8[]>& Data) + { + if (m_Freq == 0) + { + return; + } + + auto It = LastPacketIndexByThread.find(ThreadId); + if (It == LastPacketIndexByThread.end()) + { + return; + } + const uint32_t PacketIndex = It->second; + + // The open-scope stack is maintained across every batch on a thread. + // Each entry stores the Enter time in microseconds from trace start. + eastl::vector<uint32_t>& OpenStack = m_OpenScopes[ThreadId]; + + const uint8* Cursor = Data.get(); + const uint8* End = Cursor + Data.get_size(); + + auto Decode = [&]() { + uint64 Value = 0; + for (uint32 I = 1, J = 0; I; J += 7) + { + I = *Cursor++; + Value |= uint64(I & 0x7f) << J; + I &= 0x80; + } + return Value; + }; + + const uint32 CycleShift = (Version == 1) ? 1u : 2u; + const uint64 Base = m_Base; + + uint32_t BatchMinUs = ~0u; + uint32_t BatchMaxUs = 0; + bool HasAny = false; + + uint64 Cycle = ~Base + 1; + while (Cursor < End) + { + uint64 Value = Decode(); + uint32 IsEnter = (Value & 0b01); + + if (Version > 1 && (Value & 0b10)) + { + // Coroutine event -- consume the trailing varints and skip. + // These don't participate in the scope bracket tracking; the + // existing TimelineAnalyzer ignores them for the same reason. + if (IsEnter) + { + (void)Decode(); // CoroutineId + (void)Decode(); // TimerScopeDepth + } + else + { + (void)Decode(); // TimerScopeDepth + } + continue; + } + + if (IsEnter) + { + (void)Decode(); // EventId / SpecId + } + + Cycle += (Value >> CycleShift); + uint32_t TimeUs = m_UsDivRecip.Divide(Cycle + (m_UsDiv >> 1)); + + if (!HasAny || TimeUs < BatchMinUs) + { + BatchMinUs = TimeUs; + } + if (!HasAny || TimeUs > BatchMaxUs) + { + BatchMaxUs = TimeUs; + } + HasAny = true; + + if (IsEnter) + { + OpenStack.push_back(TimeUs); + } + else if (!OpenStack.empty()) + { + uint32_t EnterTimeUs = OpenStack.back(); + OpenStack.pop_back(); + + // If the scope started at or before the window end, we need + // its closing Leave event to survive so the downstream + // analyzer can render it. Mark the current packet (the one + // holding this Leave) as must-keep for the thread. + if (EnterTimeUs <= EndUs) + { + uint32_t& MustKeep = MustKeepPacketByThread[ThreadId]; + if (PacketIndex > MustKeep) + { + MustKeep = PacketIndex; + } + } + } + } + + if (!HasAny) + { + return; + } + + Range& R = PacketRanges[PacketIndex]; + R.MinUs = std::min(R.MinUs, BatchMinUs); + R.MaxUs = std::max(R.MaxUs, BatchMaxUs); + } + + uint64 m_Freq = 0; + uint64 m_Base = 0; + uint64 m_UsDiv = 1; + ReciprocalU64 m_UsDivRecip; + + // Per-thread open scope stack, carrying the Enter times in microseconds + // from trace start. Entries are pushed on Enter and popped on Leave; the + // stack may contain unclosed entries when decoding ends (scopes that + // outlive the captured trace). + eastl::hash_map<uint32_t, eastl::vector<uint32_t>> m_OpenScopes; +}; + +////////////////////////////////////////////////////////////////////////////// +// Common trace iteration + +struct TraceSummary +{ + eastl::map<uint32_t, std::pair<std::string, uint64_t>> TypeInfo; + eastl::set<uint16_t> Threads; + uint64_t TotalEvents = 0; +}; + +template<typename ParcelCallback> +static TraceSummary +IterateTrace(::DataSource& Source, ParcelCallback OnParcel, const zen::trace_detail::ProgressCallback& OnProgress = {}) +{ + TraceSummary Summary; + + try + { + uint64_t TotalFileBytes = uint64_t(std::max(Source.get_size(), int64(0))); + + ::Allocator TraceAllocator; + ::Preamble Pream(Source, TraceAllocator); + ::Transport Xport = Pream.get_transport(); + ::Protocol Proto = Pream.get_protocol(); + + ::Packet Packets[128]; + ::EventParcel Parcel; + + while (::Bundle Bndl = Xport.read_packets(Packets)) + { + Parcel.reset(); + Proto.read(Parcel, Bndl); + + OnParcel(Parcel); + + for (const ::Type* TraceType : Parcel.new_types) + { + auto [LoggerName, EventName] = TraceType->get_name(); + std::string TypeName = fmt::format("{}.{}", std::string_view(LoggerName), std::string_view(EventName)); + Summary.TypeInfo[TraceType->get_uid()] = {std::move(TypeName), 0}; + } + + for (const ::Event& Ev : Parcel.events) + { + Summary.TotalEvents++; + Summary.Threads.insert(Ev.thread_id); + + auto It = Summary.TypeInfo.find(Ev.uid); + if (It != Summary.TypeInfo.end()) + { + It->second.second++; + } + } + + if (OnProgress) + { + OnProgress(Xport.tell(), TotalFileBytes, Summary.TotalEvents); + } + } + } + catch (const DataStream::Eof&) + { + } + catch (const Exception::StreamError& E) + { + throw std::runtime_error(fmt::format("Trace stream error at position {}: {} (value: {})", E.position, E.message, E.value)); + } + + return Summary; +} + +// Print session metadata +static void +PrintSessionInfo(const SessionAnalyzer& SessionAn) +{ + const zen::trace_detail::SessionInfo& Sess = SessionAn.Session; + if (!Sess.HasSession) + { + return; + } + + ZEN_CONSOLE("Platform: {}", Sess.Platform); + ZEN_CONSOLE("App: {}", Sess.AppName); + if (!Sess.ProjectName.empty()) + { + ZEN_CONSOLE("Project: {}", Sess.ProjectName); + } + if (!Sess.Branch.empty()) + { + ZEN_CONSOLE("Branch: {}", Sess.Branch); + } + if (!Sess.BuildVersion.empty()) + { + ZEN_CONSOLE("Build: {}", Sess.BuildVersion); + } + if (Sess.Changelist) + { + ZEN_CONSOLE("Changelist: {}", Sess.Changelist); + } + if (!Sess.CommandLine.empty()) + { + ZEN_CONSOLE("CommandLine: {}", Sess.CommandLine); + } + ZEN_CONSOLE(""); +} + +// Print thread names +static void +PrintThreadInfo(const SessionAnalyzer& SessionAn) +{ + if (SessionAn.ThreadNames.empty()) + { + return; + } + + eastl::vector<std::pair<uint32_t, const zen::trace_detail::ThreadInfoEntry*>> ThreadsSorted; + for (const auto& [Tid, Info] : SessionAn.ThreadNames) + { + ThreadsSorted.emplace_back(Tid, &Info); + } + eastl::sort(ThreadsSorted.begin(), ThreadsSorted.end(), [](const auto& A, const auto& B) { + return A.second->SortHint < B.second->SortHint; + }); + + ZEN_CONSOLE(""); + ZEN_CONSOLE("Threads:"); + ZEN_CONSOLE(""); + ZEN_CONSOLE("{:>6} {:>10} {}", "TID", "SystemID", "Name"); + ZEN_CONSOLE("{:-<{}}", "", 6 + 10 + 40 + 4); + for (const auto& [Tid, Info] : ThreadsSorted) + { + ZEN_CONSOLE("{:>6} {:>10} {}", Tid, Info->SystemId, Info->Name); + } +} + +// Print trace channel info +static void +PrintChannelInfo(const SessionAnalyzer& SessionAn) +{ + if (SessionAn.Channels.empty()) + { + return; + } + + eastl::vector<const zen::trace_detail::ChannelInfo*> ChannelsSorted; + for (const auto& [Id, Info] : SessionAn.Channels) + { + ChannelsSorted.push_back(&Info); + } + eastl::sort(ChannelsSorted.begin(), ChannelsSorted.end(), [](const auto* A, const auto* B) { return A->Name < B->Name; }); + + ZEN_CONSOLE(""); + ZEN_CONSOLE("Trace Channels:"); + ZEN_CONSOLE(""); + for (const zen::trace_detail::ChannelInfo* Ch : ChannelsSorted) + { + std::string_view State = Ch->Enabled ? "enabled" : "disabled"; + if (Ch->ReadOnly) + { + ZEN_CONSOLE(" {} ({}, read-only)", Ch->Name, State); + } + else + { + ZEN_CONSOLE(" {} ({})", Ch->Name, State); + } + } +} + +} // namespace + +////////////////////////////////////////////////////////////////////////////// + +namespace zen::trace_detail { + +std::filesystem::path +ResolveTraceFile(const std::filesystem::path& Input, cxxopts::Options& HelpOptions) +{ + if (Input.empty()) + { + throw zen::OptionParseException("File path is required", HelpOptions.help()); + } + + std::filesystem::path FilePath = std::filesystem::absolute(Input); + if (!std::filesystem::exists(FilePath)) + { + throw std::runtime_error(fmt::format("File not found: {}", FilePath)); + } + + return FilePath; +} + +void +RunInspect(const std::filesystem::path& FilePath) +{ + ::DataSource Source(FilePath); + + SessionAnalyzer SessionAn; + ::Dispatcher Dispatch; + Dispatch.add_analyzer(SessionAn); + + // Collect type schemas + struct TypeSchema + { + std::string FullName; + uint32_t Uid = 0; + uint32_t FieldCount = 0; + uint32_t Flags = 0; + uint64_t EventCount = 0; + eastl::vector<std::string> FieldNames; + eastl::vector<uint32_t> FieldSizes; + eastl::vector<uint32_t> FieldTypeInfos; + }; + + eastl::map<uint32_t, TypeSchema> Schemas; + + TraceSummary Summary = IterateTrace(Source, [&](const ::EventParcel& Parcel) { + Dispatch.on_parcel(Parcel); + + for (const ::Type* TraceType : Parcel.new_types) + { + auto [LoggerName, EventName] = TraceType->get_name(); + uint32_t Uid = TraceType->get_uid(); + + TypeSchema& Schema = Schemas[Uid]; + Schema.FullName = fmt::format("{}.{}", std::string_view(LoggerName), std::string_view(EventName)); + Schema.Uid = Uid; + Schema.FieldCount = TraceType->get_field_count(); + Schema.Flags = 0; + if (TraceType->has_flag(TYPE_FLAG_IMPORTANT)) + { + Schema.Flags |= TYPE_FLAG_IMPORTANT; + } + if (TraceType->has_flag(TYPE_FLAG_AUX)) + { + Schema.Flags |= TYPE_FLAG_AUX; + } + + for (uint32_t I = 0; I < Schema.FieldCount; I++) + { + auto [FieldName, Field] = TraceType->get_field_info(I); + Schema.FieldNames.emplace_back(FieldName); + Schema.FieldSizes.push_back(Field.get_size()); + Schema.FieldTypeInfos.push_back(Field.get_type_info()); + } + } + + for (const ::Event& Ev : Parcel.events) + { + auto It = Schemas.find(Ev.uid); + if (It != Schemas.end()) + { + It->second.EventCount++; + } + } + }); + + // -- Session info -- + PrintSessionInfo(SessionAn); + + ZEN_CONSOLE("Trace: {}", FilePath); + ZEN_CONSOLE("Size: {}", zen::NiceBytes(uint64_t(std::filesystem::file_size(FilePath)))); + ZEN_CONSOLE("Events: {}", zen::ThousandsNum(Summary.TotalEvents)); + ZEN_CONSOLE("Threads: {}", Summary.Threads.size()); + ZEN_CONSOLE("Types: {}", Schemas.size()); + + // -- Thread names -- + PrintThreadInfo(SessionAn); + + // -- Trace channels -- + PrintChannelInfo(SessionAn); + + // -- Event schemas -- + ZEN_CONSOLE(""); + ZEN_CONSOLE("Event Schemas:"); + ZEN_CONSOLE(""); + + eastl::vector<const TypeSchema*> SortedSchemas; + SortedSchemas.reserve(Schemas.size()); + for (const auto& [Uid, Schema] : Schemas) + { + SortedSchemas.push_back(&Schema); + } + eastl::sort(SortedSchemas.begin(), SortedSchemas.end(), [](const auto* A, const auto* B) { return A->FullName < B->FullName; }); + + auto FieldTypeStr = [](uint32_t TypeInfo, uint32_t Size) -> std::string_view { + uint32_t Cat = TypeInfo & TYPE_INFO_CAT_MASK; + if (Cat == TYPE_INFO_CAT_ARRAY) + { + return "array"; + } + if (Cat == TYPE_INFO_CAT_FLOAT) + { + return (Size == 8) ? "float64" : "float32"; + } + bool IsSigned = (TypeInfo & TYPE_INFO_SPECIAL_MASK) == TYPE_INFO_SPECIAL_SIGNED; + switch (Size) + { + case 1: + return IsSigned ? "int8" : "uint8"; + case 2: + return IsSigned ? "int16" : "uint16"; + case 4: + return IsSigned ? "int32" : "uint32"; + case 8: + return IsSigned ? "int64" : "uint64"; + default: + return "unknown"; + } + }; + + for (const TypeSchema* Schema : SortedSchemas) + { + std::string Flags; + if (Schema->Flags & TYPE_FLAG_IMPORTANT) + { + Flags += " [important]"; + } + if (Schema->Flags & TYPE_FLAG_AUX) + { + Flags += " [aux]"; + } + + ZEN_CONSOLE("{} (uid={}, events={}){}", Schema->FullName, Schema->Uid, zen::ThousandsNum(Schema->EventCount), Flags); + + for (uint32_t I = 0; I < Schema->FieldCount; I++) + { + ZEN_CONSOLE(" {} {}", FieldTypeStr(Schema->FieldTypeInfos[I], Schema->FieldSizes[I]), Schema->FieldNames[I]); + } + + if (Schema->FieldCount > 0) + { + ZEN_CONSOLE(""); + } + } +} + +// Build a single LOD level by merging Lod0 scopes below the given resolution. +// Lod0 must already be sorted by BeginUs. Safe to call concurrently for +// different (Level, Resolution) pairs sharing the same Lod0. +static void +BuildSingleLod(const eastl::vector<TimelineScope>& Lod0, TimelineDetailLevel& Level, uint32_t Resolution) +{ + Level.ResolutionUs = Resolution; + + // Per-depth merge accumulators. Since depths are typically small (< 64), + // a flat array indexed by depth is more cache-friendly than a hash map. + struct PendingMerge + { + uint32_t BeginUs = 0; + uint32_t EndUs = 0; + uint32_t NameId = 0; + uint32_t MaxChildDur = 0; + uint16_t Depth = 0; + uint16_t Count = 0; + bool Active = false; + }; + + eastl::vector<PendingMerge> Pending(64); // grows if needed + + auto FlushPending = [&Level](PendingMerge& P) { + if (!P.Active) + { + return; + } + Level.Scopes.push_back(TimelineScope{ + .BeginUs = P.BeginUs, + .DurationUs = P.EndUs - P.BeginUs, + .NameId = P.NameId, + .Depth = P.Depth, + .MergeCount = P.Count, + }); + P.Active = false; + }; + + // Single O(n) sweep over LOD 0 scopes (sorted by BeginUs). For each + // depth, merge adjacent small scopes that fall within one resolution + // bucket of each other. Large scopes (>= Resolution) pass through. + for (const TimelineScope& Scope : Lod0) + { + uint16_t Depth = Scope.Depth; + if (Depth >= Pending.size()) + { + Pending.resize(Depth + 1); + } + + if (Scope.DurationUs >= Resolution) + { + // Large scope -- flush any pending merge for this depth, + // then emit the scope un-merged. + FlushPending(Pending[Depth]); + Level.Scopes.push_back(TimelineScope{ + .BeginUs = Scope.BeginUs, + .DurationUs = Scope.DurationUs, + .NameId = Scope.NameId, + .Depth = Scope.Depth, + .MergeCount = 1, + }); + continue; + } + + PendingMerge& P = Pending[Depth]; + uint32_t EndUs = Scope.BeginUs + Scope.DurationUs; + + if (P.Active && Scope.BeginUs < P.EndUs + Resolution) + { + // Extend the pending merge. + if (EndUs > P.EndUs) + { + P.EndUs = EndUs; + } + ++P.Count; + if (Scope.DurationUs > P.MaxChildDur) + { + P.MaxChildDur = Scope.DurationUs; + P.NameId = Scope.NameId; + } + } + else + { + // Start a new pending merge (flush previous if any). + FlushPending(P); + P.BeginUs = Scope.BeginUs; + P.EndUs = EndUs; + P.NameId = Scope.NameId; + P.MaxChildDur = Scope.DurationUs; + P.Depth = Scope.Depth; + P.Count = 1; + P.Active = true; + } + } + + // Flush remaining per-depth accumulators. + for (PendingMerge& P : Pending) + { + FlushPending(P); + } + + // Sort by (BeginUs, Depth) -- the per-depth flush may have interleaved + // entries from different depths. Tie-breaking on depth keeps the + // ordering consistent with LOD 0 (parents before nested children) so + // the front-end never sees a child rendered before its parent. + eastl::sort(Level.Scopes.begin(), Level.Scopes.end(), [](const TimelineScope& A, const TimelineScope& B) { + if (A.BeginUs != B.BeginUs) + { + return A.BeginUs < B.BeginUs; + } + return A.Depth < B.Depth; + }); +} + +void +BuildTimelineLods(ThreadTimeline& Timeline) +{ + if (Timeline.Scopes.empty()) + { + return; + } + + for (size_t LodIdx = 0; LodIdx < kTimelineLodCount; ++LodIdx) + { + BuildSingleLod(Timeline.Scopes, Timeline.DetailLevels[LodIdx], kTimelineLodResolutions[LodIdx]); + } +} + +namespace { + + // Post-iteration phases, extracted from BuildTraceModel for clarity. Each one + // runs after the event-iteration pass has populated the analyzers and mutates + // only the pieces of TraceModel it owns. + + void ComputeScopeStats(const TimelineAnalyzer& TimelineAn, TraceModel& Model) + { + const eastl::vector<std::string>& ScopeNames = TimelineAn.ScopeNames(); + eastl::vector<Distribution> Dists(ScopeNames.size()); + eastl::vector<uint32_t> Mins(ScopeNames.size(), ~0u); + eastl::vector<uint32_t> Maxs(ScopeNames.size(), 0u); + + for (const auto& [Tid, Thread] : TimelineAn.Threads()) + { + for (const TimelineScope& Scope : Thread.Scopes) + { + if (Scope.NameId >= Dists.size()) + { + continue; + } + Dists[Scope.NameId].add(double(Scope.DurationUs)); + Mins[Scope.NameId] = std::min(Mins[Scope.NameId], Scope.DurationUs); + Maxs[Scope.NameId] = std::max(Maxs[Scope.NameId], Scope.DurationUs); + } + } + + Model.ScopeStats.reserve(ScopeNames.size()); + for (size_t I = 0; I < ScopeNames.size(); ++I) + { + if (Dists[I].Count() == 0) + { + continue; + } + CpuScopeStat Entry; + Entry.Name = ScopeNames[I]; + Entry.Count = Dists[I].Count(); + Entry.MinUs = Mins[I]; + Entry.MaxUs = Maxs[I]; + Entry.MeanUs = Dists[I].Mean(); + Entry.StdDevUs = Dists[I].StdDev(); + Model.ScopeStats.push_back(std::move(Entry)); + } + eastl::sort(Model.ScopeStats.begin(), Model.ScopeStats.end(), [](const CpuScopeStat& A, const CpuScopeStat& B) { + return A.Count > B.Count; + }); + } + + // Translate each LogEntry's captured CategoryIndex (a sequential id keyed on + // the source category pointer) into the flat LogCategories index the frontend + // consumes. Entries whose category pointer never got a matching LogCategory + // event are bucketed into a synthetic "(unknown)" category. + void ResolveLogCategories(LogAnalyzer& LogAn, TraceModel& Model) + { + const eastl::hash_map<uint64_t, uint32_t>& CategoryPtrToSeqIdx = LogAn.CategoryPointerIndex(); + + eastl::hash_map<uint64_t, uint32_t> RealPtrToFlatIdx; + Model.LogCategories = LogAn.BuildCategories(RealPtrToFlatIdx); + + const uint32_t UnknownIdx = uint32_t(Model.LogCategories.size()); + Model.LogCategories.push_back(LogCategoryInfo{.Name = "(unknown)", .DefaultVerbosity = 0}); + + eastl::vector<uint32_t> SeqToFlat(CategoryPtrToSeqIdx.size(), UnknownIdx); + for (const auto& [Ptr, SeqIdx] : CategoryPtrToSeqIdx) + { + auto It = RealPtrToFlatIdx.find(Ptr); + if (It != RealPtrToFlatIdx.end()) + { + SeqToFlat[SeqIdx] = It->second; + } + } + + Model.LogEntries = LogAn.MutableEntries(); + for (LogEntry& E : Model.LogEntries) + { + E.CategoryIndex = (E.CategoryIndex < SeqToFlat.size()) ? SeqToFlat[E.CategoryIndex] : UnknownIdx; + } + + eastl::sort(Model.LogEntries.begin(), Model.LogEntries.end(), [](const LogEntry& A, const LogEntry& B) { + return A.TimeUs < B.TimeUs; + }); + } + + // Finalize any still-open regions, group by category, and greedily pack each + // category's regions into non-overlapping lanes so the frontend can stack them + // without re-running collision detection. + void BuildRegionCategories(eastl::vector<RegionEntry>&& AllRegions, uint32_t TraceEndUs, TraceModel& Model) + { + for (RegionEntry& R : AllRegions) + { + if (R.EndUs == ~uint32_t(0)) + { + R.EndUs = TraceEndUs; + } + if (R.EndUs < R.BeginUs) + { + R.EndUs = R.BeginUs; + } + } + + eastl::map<std::string, eastl::vector<RegionEntry>> ByCategory; + for (RegionEntry& R : AllRegions) + { + ByCategory[R.Category].push_back(std::move(R)); + } + + for (auto& [CatName, Regions] : ByCategory) + { + eastl::sort(Regions.begin(), Regions.end(), [](const RegionEntry& A, const RegionEntry& B) { + if (A.BeginUs != B.BeginUs) + { + return A.BeginUs < B.BeginUs; + } + return A.EndUs < B.EndUs; + }); + + eastl::vector<uint32_t> LaneEndUs; + uint32_t MaxLane = 0; + for (RegionEntry& R : Regions) + { + uint16_t Depth = 0; + bool Assigned = false; + for (size_t I = 0; I < LaneEndUs.size(); ++I) + { + if (LaneEndUs[I] <= R.BeginUs) + { + Depth = uint16_t(I); + LaneEndUs[I] = R.EndUs; + Assigned = true; + break; + } + } + if (!Assigned) + { + Depth = uint16_t(LaneEndUs.size()); + LaneEndUs.push_back(R.EndUs); + } + R.Depth = Depth; + if (Depth + 1u > MaxLane) + { + MaxLane = Depth + 1u; + } + } + + RegionCategory Cat; + Cat.Name = CatName; + Cat.LaneCount = MaxLane; + Cat.Regions = std::move(Regions); + Model.RegionCategories.push_back(std::move(Cat)); + } + + // Sort: uncategorized (empty name) first, then alphabetical. + eastl::sort(Model.RegionCategories.begin(), Model.RegionCategories.end(), [](const RegionCategory& A, const RegionCategory& B) { + if (A.Name.empty() != B.Name.empty()) + { + return A.Name.empty(); + } + return A.Name < B.Name; + }); + } + + // Map callstack frame addresses to (module, offset) pairs using a sorted + // (Base, End) lookup over the already-populated Model.Modules. + void ResolveCallstacks(const ModuleAnalyzer& ModuleAn, + const CallstackAnalyzer& CallstackAn, + AllocationAnalyzer& AllocAn, + TraceModel& Model) + { + const auto& RawCallstacks = CallstackAn.RawCallstacks(); + + struct ModuleLookup + { + uint64_t Base; + uint64_t End; + uint32_t ModelIndex; + }; + eastl::vector<ModuleLookup> Lookup; + Lookup.reserve(ModuleAn.ModulesByBase.size()); + for (const auto& [Base, Info] : ModuleAn.ModulesByBase) + { + for (uint32_t I = 0; I < Model.Modules.size(); ++I) + { + if (Model.Modules[I].Base == Base) + { + Lookup.push_back({Base, Base + Info.Size, I}); + break; + } + } + } + eastl::sort(Lookup.begin(), Lookup.end(), [](const ModuleLookup& A, const ModuleLookup& B) { return A.Base < B.Base; }); + + auto ResolveFrame = [&Lookup](uint64_t Address) -> ResolvedFrame { + ResolvedFrame F; + F.Address = Address; + auto It = eastl::upper_bound(Lookup.begin(), Lookup.end(), Address, [](uint64_t Addr, const ModuleLookup& M) { + return Addr < M.Base; + }); + if (It != Lookup.begin()) + { + --It; + if (Address < It->End) + { + F.ModuleIndex = It->ModelIndex; + F.Offset = Address - It->Base; + } + } + return F; + }; + + eastl::vector<uint32_t> SortedCallstackIds; + SortedCallstackIds.reserve(RawCallstacks.size()); + for (const auto& [Id, RawFrames] : RawCallstacks) + { + ZEN_UNUSED(RawFrames); + SortedCallstackIds.push_back(Id); + } + eastl::sort(SortedCallstackIds.begin(), SortedCallstackIds.end()); + + Model.Callstacks.reserve(RawCallstacks.size()); + for (uint32_t Id : SortedCallstackIds) + { + auto RawIt = RawCallstacks.find(Id); + ZEN_ASSERT(RawIt != RawCallstacks.end()); + const eastl::vector<uint64_t>& RawFrames = RawIt->second; + + CallstackEntry Entry; + Entry.Id = Id; + Entry.Frames.reserve(RawFrames.size()); + for (uint64_t Addr : RawFrames) + { + Entry.Frames.push_back(ResolveFrame(Addr)); + } + Model.Callstacks.push_back(std::move(Entry)); + } + + Model.CallstackStats = AllocAn.BuildCallstackStats(); + Model.ChurnStats = AllocAn.BuildChurnStats(~uint64_t(0)); + Model.AllocSizeHistogram = AllocAn.BuildSizeHistogram(); + } + +} // namespace + +TraceModel +BuildTraceModel(const std::filesystem::path& FilePath, WorkerThreadPool& ThreadPool, const ProgressCallback& OnProgress) +{ + ::DataSource Source(FilePath); + + TraceTiming Timing; + + SessionAnalyzer SessionAn; + ModuleAnalyzer ModuleAn; + MetadataRegistry MetadataReg; + TimelineAnalyzer TimelineAn(&MetadataReg, &Timing); + LogAnalyzer LogAn(&Timing); + BookmarksAnalyzer BookmarkAn(&Timing); + CsvProfilerAnalyzer CsvAn(&Timing); + AllocationAnalyzer AllocAn(&Timing); + CallstackAnalyzer CallstackAn; + + // Tourist's Dispatcher only supports one subscription per event type, so we + // cannot run CpuAnalyzer alongside TimelineAnalyzer -- CpuAnalyzer would + // claim the CpuProfiler.Event* types first and TimelineAnalyzer would + // never receive any events. Instead, TimelineAnalyzer captures every + // scope interval and we derive the aggregate statistics from those + // intervals in a cheap post-pass below. + ::Dispatcher Dispatch; + Dispatch.add_analyzer(SessionAn); + Dispatch.add_analyzer(ModuleAn); + Dispatch.add_analyzer(MetadataReg); + Dispatch.add_analyzer(TimelineAn); + Dispatch.add_analyzer(LogAn); + Dispatch.add_analyzer(BookmarkAn); + Dispatch.add_analyzer(CsvAn); + Dispatch.add_analyzer(AllocAn); + Dispatch.add_analyzer(CallstackAn); + + zen::Stopwatch Timer; + TraceSummary Summary = IterateTrace( + Source, + [&](const ::EventParcel& Parcel) { Dispatch.on_parcel(Parcel); }, + OnProgress); + ZEN_INFO("Trace iteration complete: {} events in {}", + zen::ThousandsNum(Summary.TotalEvents), + zen::NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + + { + uint32_t StartUs = (TimelineAn.MinBeginUs() == ~0u) ? 0u : TimelineAn.MinBeginUs(); + uint32_t EndUs = TimelineAn.MaxEndUs(); + uint64_t DurationMs = (EndUs > StartUs) ? (uint64_t(EndUs - StartUs) + 500) / 1000 : 0; + ZEN_INFO("Trace duration: {}", zen::NiceTimeSpanMs(DurationMs)); + } + + TraceModel Model; + Model.FilePath = FilePath; + Model.FileSize = uint64_t(std::filesystem::file_size(FilePath)); + Model.TotalEvents = Summary.TotalEvents; + Model.ParseTimeMs = Timer.GetElapsedTimeMs(); + Model.Session = SessionAn.Session; + + // Event type counts (sorted by count descending) + Model.EventTypeCounts.reserve(Summary.TypeInfo.size()); + for (auto& [Uid, Info] : Summary.TypeInfo) + { + Model.EventTypeCounts.push_back({std::move(Info.first), Info.second}); + } + eastl::sort(Model.EventTypeCounts.begin(), Model.EventTypeCounts.end(), [](const auto& A, const auto& B) { return A.Count > B.Count; }); + + // Flatten and sort threads by sort hint + Model.Threads.reserve(SessionAn.ThreadNames.size()); + for (const auto& [Tid, Info] : SessionAn.ThreadNames) + { + Model.Threads.push_back(Info); + } + eastl::sort(Model.Threads.begin(), Model.Threads.end(), [](const ThreadInfoEntry& A, const ThreadInfoEntry& B) { + return A.SortHint < B.SortHint; + }); + + // Flatten and sort channels by name + Model.Channels.reserve(SessionAn.Channels.size()); + for (const auto& [Id, Info] : SessionAn.Channels) + { + Model.Channels.push_back(Info); + } + eastl::sort(Model.Channels.begin(), Model.Channels.end(), [](const ChannelInfo& A, const ChannelInfo& B) { return A.Name < B.Name; }); + + { + ExtendableStringBuilder<512> Enabled; + for (const ChannelInfo& Ch : Model.Channels) + { + if (Ch.Enabled) + { + if (Enabled.Size() > 0) + { + Enabled.Append(", "); + } + Enabled.Append(Ch.Name); + } + } + if (Enabled.Size() > 0) + { + ZEN_INFO("Enabled channels: {}", Enabled); + } + } + + // Flatten and sort modules by name + Model.Modules.reserve(ModuleAn.ModulesByBase.size()); + for (const auto& [Base, Info] : ModuleAn.ModulesByBase) + { + Model.Modules.push_back(Info); + } + eastl::sort(Model.Modules.begin(), Model.Modules.end(), [](const ModuleInfo& A, const ModuleInfo& B) { return A.Name < B.Name; }); + + // CPU scope statistics and timeline building read from TimelineAn + // independently and write to separate Model fields, so overlap them. + Model.ScopeNames = TimelineAn.ScopeNames(); + + ZEN_INFO("Computing CPU scope statistics ({} scope names)", TimelineAn.ScopeNames().size()); + + // Kick off scope stats on a worker -- runs concurrently with the + // timeline copy + sort below. + Latch StatsLatch(1); + ThreadPool.ScheduleWork( + [&StatsLatch, &Model, &TimelineAn]() { + auto _ = MakeGuard([&StatsLatch]() { StatsLatch.CountDown(); }); + ComputeScopeStats(TimelineAn, Model); + }, + WorkerThreadPool::EMode::EnableBacklog); + + // Timelines -- build per-thread sort + LODs in parallel. + { + const auto& Threads = TimelineAn.Threads(); + size_t TotalScopes = 0; + for (const auto& [Tid, Thread] : Threads) + { + TotalScopes += Thread.Scopes.size(); + } + ZEN_INFO("Building timelines: {} threads, {} scopes (sort + LODs)", Threads.size(), zen::ThousandsNum(TotalScopes)); + Model.Timelines.resize(Threads.size()); + + // Populate timeline metadata on the main thread (cheap lookups). + size_t Idx = 0; + for (const auto& [Tid, Thread] : Threads) + { + ThreadTimeline& Timeline = Model.Timelines[Idx++]; + Timeline.ThreadId = Tid; + auto It = SessionAn.ThreadNames.find(Tid); + if (It != SessionAn.ThreadNames.end()) + { + Timeline.Name = It->second.Name; + Timeline.SortHint = It->second.SortHint; + } + Timeline.Scopes = Thread.Scopes; + } + + // Phase 1: Sort LOD 0 scopes per thread. + // ParallelSort fans out internally using the pool, so it must be + // called from the main thread to avoid nested fan-out deadlocks. + // Small timelines are dispatched to workers first (they just call + // eastl::sort -- no nesting). Then large ones are sorted one at a + // time from the main thread with full pool utilisation each. + // + // Tie-break on Depth so that scopes which start at the same micro + // timestamp come out parent-first (lower depth wins). This keeps + // the scope ordering well-defined and lets the front-end rely on + // outer scopes appearing before their nested children regardless + // of the order the analyzer happened to emit them. + { + auto Cmp = [](const TimelineScope& A, const TimelineScope& B) { + if (A.BeginUs != B.BeginUs) + { + return A.BeginUs < B.BeginUs; + } + return A.Depth < B.Depth; + }; + + if constexpr (kUseParallelSort) + { + constexpr size_t kParallelThreshold = 65536; + + // Dispatch small timelines to workers. + Latch SmallLatch(1); + for (size_t I = 0; I < Model.Timelines.size(); ++I) + { + if (Model.Timelines[I].Scopes.size() >= kParallelThreshold) + { + continue; + } + SmallLatch.AddCount(1); + ThreadPool.ScheduleWork( + [&SmallLatch, &Cmp, &Timeline = Model.Timelines[I]]() { + auto _ = MakeGuard([&SmallLatch]() { SmallLatch.CountDown(); }); + eastl::sort(Timeline.Scopes.begin(), Timeline.Scopes.end(), Cmp); + }, + WorkerThreadPool::EMode::EnableBacklog); + } + SmallLatch.CountDown(); + SmallLatch.Wait(); + + // Sort large timelines from the main thread so ParallelSort + // can fan out across the (now idle) pool without deadlocking. + for (ThreadTimeline& Timeline : Model.Timelines) + { + if (Timeline.Scopes.size() >= kParallelThreshold) + { + zen::ParallelSort(ThreadPool, Timeline.Scopes.begin(), Timeline.Scopes.end(), Cmp); + } + } + } + else + { + Latch SortLatch(1); + for (size_t I = 0; I < Model.Timelines.size(); ++I) + { + SortLatch.AddCount(1); + ThreadPool.ScheduleWork( + [&SortLatch, &Cmp, &Timeline = Model.Timelines[I]]() { + auto _ = MakeGuard([&SortLatch]() { SortLatch.CountDown(); }); + eastl::sort(Timeline.Scopes.begin(), Timeline.Scopes.end(), Cmp); + }, + WorkerThreadPool::EMode::EnableBacklog); + } + SortLatch.CountDown(); + SortLatch.Wait(); + } + } + + // Phase 2: Build LOD levels -- one task per (thread, LOD) pair. + // Flat dispatch avoids nested fan-out which could deadlock the pool. + Latch LodLatch(1); + for (size_t I = 0; I < Model.Timelines.size(); ++I) + { + if (Model.Timelines[I].Scopes.empty()) + { + continue; + } + for (size_t L = 0; L < kTimelineLodCount; ++L) + { + LodLatch.AddCount(1); + ThreadPool.ScheduleWork( + [&LodLatch, &Timeline = Model.Timelines[I], L]() { + auto _ = MakeGuard([&LodLatch]() { LodLatch.CountDown(); }); + BuildSingleLod(Timeline.Scopes, Timeline.DetailLevels[L], kTimelineLodResolutions[L]); + }, + WorkerThreadPool::EMode::EnableBacklog); + } + } + LodLatch.CountDown(); + LodLatch.Wait(); + } + eastl::sort(Model.Timelines.begin(), Model.Timelines.end(), [](const ThreadTimeline& A, const ThreadTimeline& B) { + return A.SortHint < B.SortHint; + }); + + Model.TraceStartUs = (TimelineAn.MinBeginUs() == ~0u) ? 0u : TimelineAn.MinBeginUs(); + Model.TraceEndUs = TimelineAn.MaxEndUs(); + + // Ensure scope stats computation (kicked off earlier) has finished. + StatsLatch.Wait(); + + ZEN_INFO("Processing {} log entries", zen::ThousandsNum(LogAn.Entries().size())); + ResolveLogCategories(LogAn, Model); + + ZEN_INFO("Sorting {} bookmarks, {} regions", BookmarkAn.MutableBookmarks().size(), BookmarkAn.MutableRegions().size()); + + // Bookmarks: move and sort by TimeUs. + Model.Bookmarks = std::move(BookmarkAn.MutableBookmarks()); + eastl::sort(Model.Bookmarks.begin(), Model.Bookmarks.end(), [](const Bookmark& A, const Bookmark& B) { return A.TimeUs < B.TimeUs; }); + + BuildRegionCategories(std::move(BookmarkAn.MutableRegions()), Model.TraceEndUs, Model); + + // CsvProfiler data + { + Model.CsvCategories = std::move(CsvAn.MutableCategories()); + Model.CsvStatDefs = std::move(CsvAn.MutableStatDefs()); + Model.CsvTimeSeries = CsvAn.BuildTimeSeries(); + Model.CsvEvents = std::move(CsvAn.MutableEvents()); + eastl::sort(Model.CsvEvents.begin(), Model.CsvEvents.end(), [](const auto& A, const auto& B) { return A.TimeUs < B.TimeUs; }); + Model.CsvMetadata = std::move(CsvAn.MutableMetadata()); + ZEN_INFO("CSV profiler: {} categories, {} stats, {} series, {} events", + Model.CsvCategories.size(), + Model.CsvStatDefs.size(), + Model.CsvTimeSeries.size(), + Model.CsvEvents.size()); + } + + // Memory allocation data + { + AllocAn.EmitFinalSample(Model.TraceEndUs); + Model.AllocSummary = AllocAn.Summary(); + + // Flatten heaps map into sorted vector + Model.Heaps.reserve(AllocAn.Heaps().size()); + for (const auto& [Id, Info] : AllocAn.Heaps()) + { + Model.Heaps.push_back(Info); + } + eastl::sort(Model.Heaps.begin(), Model.Heaps.end(), [](const HeapInfo& A, const HeapInfo& B) { return A.Id < B.Id; }); + + // Flatten tags map into sorted vector + Model.Tags.reserve(AllocAn.Tags().size()); + for (const auto& [Tag, Info] : AllocAn.Tags()) + { + Model.Tags.push_back(Info); + } + eastl::sort(Model.Tags.begin(), Model.Tags.end(), [](const TagInfo& A, const TagInfo& B) { return A.Tag < B.Tag; }); + + // Move timeline (already time-ordered from Marker events) + Model.MemoryTimeline = std::move(AllocAn.MutableTimeline()); + + // Flatten per-root-heap stats into sorted vector + Model.HeapStats.reserve(AllocAn.RootHeapStats().size()); + for (const auto& [HeapId, Stat] : AllocAn.RootHeapStats()) + { + Model.HeapStats.push_back(Stat); + } + eastl::sort(Model.HeapStats.begin(), Model.HeapStats.end(), [](const HeapStat& A, const HeapStat& B) { + return A.HeapId < B.HeapId; + }); + + if (Model.AllocSummary.HasMemoryData) + { + ZEN_INFO("Memory: {} allocs, {} frees, peak {}, {} live, {} timeline samples", + zen::ThousandsNum(Model.AllocSummary.TotalAllocs + Model.AllocSummary.TotalReallocAllocs), + zen::ThousandsNum(Model.AllocSummary.TotalFrees + Model.AllocSummary.TotalReallocFrees), + zen::NiceBytes(uint64_t(Model.AllocSummary.PeakBytes)), + zen::ThousandsNum(Model.AllocSummary.LiveAllocations), + zen::ThousandsNum(Model.MemoryTimeline.size())); + } + } + + ResolveCallstacks(ModuleAn, CallstackAn, AllocAn, Model); + ZEN_INFO("Callstacks: {} unique, {} with live allocations", + zen::ThousandsNum(Model.Callstacks.size()), + zen::ThousandsNum(Model.CallstackStats.size())); + + return Model; +} + +////////////////////////////////////////////////////////////////////////////// +// Trace trim +// +// The trim pipeline operates entirely at the raw packet level: a .utrace on +// disk is identical to the wire format (see src/zenserver/trace/tracerecorder.cpp +// for the capture-side passthrough), so trimming reduces to "copy the preamble, +// then copy only the packets we want to keep". We never re-encode or re-emit +// any events, which sidesteps the fact that Tourist has no writer path. +// +// The algorithm: +// +// 1. Slurp the input file into memory and walk raw packets using the +// [size:uint16][thread_id:uint16][payload] framing. This gives an ordered +// list of packet descriptors keyed by file offset. +// +// 2. Classify packets by their on-disk thread_id: +// TID_TYPE -> always keep (type definitions) +// TID_IMPORTANT -> always keep (events of types marked TYPE_FLAG_IMPORTANT, +// i.e. session info, thread names, channel state, +// log categories, CPU specs, etc.) +// TID_SYNC -> always keep (transport barriers) +// TID_NORMAL+ -> keep only if the packet's events overlap the window +// +// 3. For normal-thread packets, run Tourist's reader with a bundle of size 1 +// so each Proto.read() scatters exactly one raw packet before emitting any +// events. Before the read call, we record the file offset of the current +// packet as the "latest packet" for its thread. TrimAnalyzer then decodes +// CpuProfiler batch events and attributes their timestamp ranges back to +// that thread's latest packet. The attribution can drift if Tourist +// buffers multiple packets on one thread, but the failure mode is that +// earlier packets lose attribution and are conservatively retained. +// +// 4. Write the output: the preamble bytes verbatim, followed by the raw +// bytes of each kept packet in original order. There is no trailer; the +// Tourist reader catches DataStream::Eof at the end of the stream. +// +// Coarse per-packet precision is accepted by design: a packet straddling a +// window edge is kept in full. CpuProfiler batches are self-contained per +// packet (each re-derives cycles from the trace-wide StartCycle), so dropping +// packets does not desync delta decoding on surviving ones, and orphaned leave +// events from half-open scopes are silently ignored by decoders. + +namespace { + + struct TrimPacketDesc + { + uint64_t FileOffset = 0; // offset of the [size:uint16] header in the file + uint32_t Size = 0; // total size including the 4-byte header + uint16_t ThreadIdRaw = 0; // thread_id as stored on disk, including PACKET_FLAG_COMPRESSED + }; + + // Parses the .utrace preamble in place to determine the byte offset where + // packets begin. Mirrors Preamble::parse_header in Tourist so we can run the + // raw walker without spinning up a second DataSource. Throws on a malformed + // preamble. + static uint64_t ParsePreambleLength(const uint8_t* Data, uint64_t Size) + { + if (Size < 8) + { + throw zen::runtime_error("Trace file too small to contain a preamble ({} bytes)", Size); + } + + uint32_t Magic = 0; + std::memcpy(&Magic, Data, sizeof(uint32_t)); + if (Magic != 'TRC2') + { + throw zen::runtime_error("Unexpected trace file magic value 0x{:08x}", Magic); + } + + uint16_t MetaSize = 0; + std::memcpy(&MetaSize, Data + 4, sizeof(uint16_t)); + + // magic(4) + meta_size(2) + metadata + transport(1) + protocol(1) + uint64_t PreambleLen = uint64_t(4) + 2 + MetaSize + 1 + 1; + if (PreambleLen > Size) + { + throw zen::runtime_error("Trace preamble extends past end of file ({} > {})", PreambleLen, Size); + } + + return PreambleLen; + } + + // Walks raw packets starting at PreambleLen. Returns one TrimPacketDesc per + // packet in original stream order. The walker stops gracefully on truncated + // data so partial traces still produce a usable packet list. + static eastl::vector<TrimPacketDesc> WalkRawPackets(const uint8_t* Data, uint64_t Size, uint64_t PreambleLen) + { + eastl::vector<TrimPacketDesc> Packets; + uint64_t Offset = PreambleLen; + + while (Offset + 4 <= Size) + { + uint16_t PacketSize = 0; + uint16_t ThreadIdRaw = 0; + std::memcpy(&PacketSize, Data + Offset, sizeof(uint16_t)); + std::memcpy(&ThreadIdRaw, Data + Offset + 2, sizeof(uint16_t)); + + if (PacketSize < 4) + { + // Malformed size; stop walking and accept whatever we have. + break; + } + + if (Offset + PacketSize > Size) + { + // Truncated tail -- drop it. + break; + } + + TrimPacketDesc Desc; + Desc.FileOffset = Offset; + Desc.Size = PacketSize; + Desc.ThreadIdRaw = ThreadIdRaw; + Packets.push_back(Desc); + + Offset += PacketSize; + } + + return Packets; + } + +} // namespace + +void +RunTraceTrim(const TraceTrimArgs& Args) +{ + if (!(Args.EndSec > Args.StartSec)) + { + throw zen::runtime_error("Invalid trim range: start={} end={}", Args.StartSec, Args.EndSec); + } + + // --- Read the input file --- + zen::BasicFile InputFile(Args.InputPath, zen::BasicFile::Mode::kRead); + zen::IoBuffer InputBuffer = InputFile.ReadAll(); + InputFile.Close(); + + const uint8_t* FileBytes = static_cast<const uint8_t*>(InputBuffer.GetData()); + const uint64_t FileSize = InputBuffer.GetSize(); + + const uint64_t PreambleLen = ParsePreambleLength(FileBytes, FileSize); + + // --- Raw packet walk --- + eastl::vector<TrimPacketDesc> Packets = WalkRawPackets(FileBytes, FileSize, PreambleLen); + if (Packets.empty()) + { + throw zen::runtime_error("Trace file contains no packets"); + } + + // Initial keep classification: definitions, important events, sync are + // always retained. Normal-thread packets start as drop candidates and get + // promoted if their decoded time range overlaps the window. + eastl::vector<uint8_t> Keep(Packets.size(), 0); + size_t NumAlwaysKept = 0; + for (size_t I = 0; I < Packets.size(); ++I) + { + uint32_t Tid = Packets[I].ThreadIdRaw & ~PACKET_FLAG_COMPRESSED; + if (Tid == TID_TYPE || Tid == TID_IMPORTANT || Tid == TID_SYNC) + { + Keep[I] = 1; + ++NumAlwaysKept; + } + } + + // --- Time-range classification via Tourist (bundle of 1) --- + TrimAnalyzer TrimAn; + TrimAn.EndUs = (Args.EndSec * 1e6 > double(~uint32_t(0))) ? ~uint32_t(0) : uint32_t(Args.EndSec * 1e6); + ::Dispatcher Dispatch; + Dispatch.add_analyzer(TrimAn); + + { + ::DataSource Source(Args.InputPath); + ::Allocator TraceAllocator; + ::Preamble Pream(Source, TraceAllocator); + ::Transport Xport = Pream.get_transport(); + ::Protocol Proto = Pream.get_protocol(); + + ::Packet OnePacket[1]; + ::EventParcel Parcel; + + try + { + while (::Bundle Bndl = Xport.read_packets(OnePacket)) + { + if (Bndl.empty()) + { + break; + } + + const ::Packet& P = Bndl[0]; + uint32_t Tid = P.get_thread_id(); + + if (Tid >= TID_NORMAL && Tid != TID_SYNC) + { + // Tourist's Packet::get_index() is the same sequential + // packet counter as our raw walker's vector position, + // since both read the stream from the start in order. + TrimAn.LastPacketIndexByThread[Tid] = P.get_index(); + } + + Parcel.reset(); + Proto.read(Parcel, Bndl); + Dispatch.on_parcel(Parcel); + } + } + catch (const DataStream::Eof&) + { + } + catch (const Exception::StreamError& E) + { + throw zen::runtime_error("Trace stream error at position {}: {} (value: {})", E.position, E.message, E.value); + } + } + + // --- Apply the window filter --- + // + // Per-packet filtering in the middle of a thread's stream is unsafe: + // Tourist's event parser holds per-thread continuation state (see + // EventParser::_fragment / _missing in + // thirdparty/tourist/trace/src/protocol.cpp) so an event can straddle a + // packet boundary on a normal thread. Removing a packet from the middle + // leaves subsequent packets on the same thread decoded against the wrong + // position in an in-flight event and Tourist crashes. We therefore only + // drop packets in two safe ways: + // + // 1. Whole-thread drop: a thread whose attributed packets are all + // outside the window has every one of its packets dropped. No + // surviving packet references that thread, so there is no state + // machine to corrupt. + // + // 2. Per-thread tail truncation: for a thread that does have in-window + // activity, drop every packet AFTER the latest in-window packet on + // that thread. Tail drops are safe because no later packet on the + // same thread can be looking forward to the dropped bytes; the + // parser just ends its stream for that thread at the truncation + // point, exactly like a trace that naturally stopped recording. + // + // Threads for which we never attributed any CpuProfiler batch events are + // retained in full; we have no evidence about their time range and + // can't safely drop them. + const uint32_t StartUs = uint32_t(std::max(0.0, Args.StartSec) * 1e6); + const uint32_t EndUs = (Args.EndSec * 1e6 > double(~uint32_t(0))) ? ~uint32_t(0) : uint32_t(Args.EndSec * 1e6); + + struct ThreadInfo + { + bool HasAnyBatch = false; + bool HasInWindowBatch = false; + // First packet index on this thread whose attributed CPU batches are + // *entirely* past EndUs. Every packet on this thread with an index + // >= this value is safe to tail-drop. Defaults to size_t(-1) (no cut + // point) when the thread has no such packet. + size_t FirstPastWindowIdx = size_t(-1); + }; + eastl::hash_map<uint32_t, ThreadInfo> ThreadInfos; + + for (size_t I = 0; I < Packets.size(); ++I) + { + uint32_t Tid = Packets[I].ThreadIdRaw & ~PACKET_FLAG_COMPRESSED; + if (Tid < TID_NORMAL || Tid == TID_SYNC) + { + continue; + } + + auto RangeIt = TrimAn.PacketRanges.find(uint32_t(I)); + if (RangeIt == TrimAn.PacketRanges.end()) + { + continue; + } + + ThreadInfo& Info = ThreadInfos[Tid]; + Info.HasAnyBatch = true; + const auto& Range = RangeIt->second; + if (Range.MaxUs >= StartUs && Range.MinUs <= EndUs) + { + Info.HasInWindowBatch = true; + } + if (Range.MinUs > EndUs && I < Info.FirstPastWindowIdx) + { + Info.FirstPastWindowIdx = I; + } + } + + size_t NumThreadsKept = 0; + size_t NumThreadsDropped = 0; + for (const auto& [Tid, Info] : ThreadInfos) + { + if (Info.HasInWindowBatch) + { + ++NumThreadsKept; + } + else + { + ++NumThreadsDropped; + } + } + + size_t NumInWindow = 0; + size_t NumTailDropped = 0; + size_t NumUnattributed = 0; + size_t NumDropped = 0; + + for (size_t I = 0; I < Packets.size(); ++I) + { + if (Keep[I]) + { + continue; + } + + uint32_t Tid = Packets[I].ThreadIdRaw & ~PACKET_FLAG_COMPRESSED; + auto It = ThreadInfos.find(Tid); + if (It == ThreadInfos.end() || !It->second.HasAnyBatch) + { + // We have no evidence for this thread's time range. Retain all + // its packets conservatively to avoid breaking Tourist's per- + // thread parser state. + Keep[I] = 1; + ++NumUnattributed; + continue; + } + + if (!It->second.HasInWindowBatch) + { + // Thread's attributed packets are all outside the window -- drop + // every packet on this thread. + ++NumDropped; + continue; + } + + if (I >= It->second.FirstPastWindowIdx) + { + // Past the first entirely-after-window packet on this thread -- + // candidate for tail truncation. Before dropping, check whether + // this packet carries a Leave event that closes a scope whose + // Enter was at or before the window end. If so, we MUST keep it + // so the downstream analyzer can render the long-running scope; + // otherwise the scope would sit unmatched on the open stack. + auto MustKeepIt = TrimAn.MustKeepPacketByThread.find(Tid); + if (MustKeepIt != TrimAn.MustKeepPacketByThread.end() && I <= MustKeepIt->second) + { + Keep[I] = 1; + ++NumInWindow; + continue; + } + + ++NumTailDropped; + continue; + } + + Keep[I] = 1; + ++NumInWindow; + } + + // --- Write output --- + std::error_code Ec; + std::filesystem::create_directories(Args.OutputPath.parent_path(), Ec); + + zen::BasicFile OutputFile(Args.OutputPath, zen::BasicFile::Mode::kTruncate); + + uint64_t OutOffset = 0; + OutputFile.Write(FileBytes, PreambleLen, OutOffset); + OutOffset += PreambleLen; + + uint64_t KeptBytes = 0; + for (size_t I = 0; I < Packets.size(); ++I) + { + if (!Keep[I]) + { + continue; + } + OutputFile.Write(FileBytes + Packets[I].FileOffset, Packets[I].Size, OutOffset); + OutOffset += Packets[I].Size; + KeptBytes += Packets[I].Size; + } + + OutputFile.Flush(); + OutputFile.Close(); + + ZEN_CONSOLE("Trimmed trace written to {}", Args.OutputPath); + ZEN_CONSOLE(" Input: {} ({} packets)", zen::NiceBytes(FileSize), zen::ThousandsNum(Packets.size())); + ZEN_CONSOLE(" Output: {} ({} packets)", + zen::NiceBytes(OutOffset), + zen::ThousandsNum(NumAlwaysKept + NumInWindow + NumUnattributed)); + ZEN_CONSOLE(" Always kept: {} packets (types / important / sync)", zen::ThousandsNum(NumAlwaysKept)); + ZEN_CONSOLE(" Thread kept: {} packets from {} threads with in-window activity", + zen::ThousandsNum(NumInWindow), + zen::ThousandsNum(NumThreadsKept)); + ZEN_CONSOLE(" Thread dropped: {} packets from {} threads with no in-window activity", + zen::ThousandsNum(NumDropped), + zen::ThousandsNum(NumThreadsDropped)); + ZEN_CONSOLE(" Tail dropped: {} packets past the latest in-window packet on their thread", zen::ThousandsNum(NumTailDropped)); + ZEN_CONSOLE(" Unattributed: {} packets (retained conservatively)", zen::ThousandsNum(NumUnattributed)); + ZEN_UNUSED(KeptBytes); + + // --- Diagnostic: summarise the attributed time range distribution --- + { + uint32_t GlobalMin = ~0u; + uint32_t GlobalMax = 0; + for (const auto& [Idx, R] : TrimAn.PacketRanges) + { + GlobalMin = std::min(GlobalMin, R.MinUs); + GlobalMax = std::max(GlobalMax, R.MaxUs); + } + ZEN_CONSOLE(" Attributed: {} packets, window {:.3f}s .. {:.3f}s", + zen::ThousandsNum(TrimAn.PacketRanges.size()), + double(GlobalMin) / 1e6, + double(GlobalMax) / 1e6); + } +} + +} // namespace zen::trace_detail |