aboutsummaryrefslogtreecommitdiff
path: root/src/zen/trace/trace_cache.cpp
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2026-04-20 21:50:41 +0200
committerGitHub Enterprise <[email protected]>2026-04-20 21:50:41 +0200
commit2dfb5da16b97a6c12e01977af5b5188522178a4e (patch)
tree428aa0aa8e6079c64438931e0fd4f828c613c94d /src/zen/trace/trace_cache.cpp
parentAdd CompactString utility type (#990) (diff)
downloadarchived-zen-2dfb5da16b97a6c12e01977af5b5188522178a4e.tar.xz
archived-zen-2dfb5da16b97a6c12e01977af5b5188522178a4e.zip
zen trace analysis support (#945)
Integrates the **tourist** trace analysis library and builds a full `zen trace` command suite for working with Unreal Engine `.utrace` files. ### Trace analysis library (`thirdparty/tourist/`) - Adds the tourist library as a third-party dependency with three modules: **foundation** (platform primitives, memory, scheduling), **trace** (UE Trace protocol decoding), and **analysis** (event dispatching and analyzer framework). - Cross-platform support for Windows, Linux, and macOS. ### `zen trace` CLI commands (`src/zen/cmds/`, `src/zen/trace/`) - **`zen trace analyze`** — Summarize a `.utrace` file: session metadata, thread inventory, command line + build configuration, CPU profiling scopes, timing, event rates, log messages, and (with symbols) memory allocation metrics including live-allocs dumps, callstack-keyed aggregation, and allocation churn. Optional HTML output for memory reports. - **`zen trace inspect`** — Dump the event schema (declared types, fields, sizes) from a trace file. - **`zen trace trim`** — Extract a time-window from a trace into a new `.utrace` file. - **`zen trace serve`** — Launch a local HTTP server hosting an interactive trace viewer; opens in the default browser. ### Symbolication (`src/zen/trace/symbol_resolver.*`, `thirdparty/raw_pdb/`) - Pluggable resolver with multiple backends: `pdb` (in-tree raw_pdb), `dbghelp` (Windows), `llvm-symbolizer` (all platforms), `atos` (macOS). An `auto` backend picks the best available tool per platform. - Microsoft Symbol Server support: downloads PDBs on demand using a redirect-aware HTTP client. - Local PDB cache keyed by image GUID preserves symbols across binary recompilation. - Callstack trimming heuristic strips UE internal noise from reports. - Binary analysis cache (`.ucache_z`) avoids re-resolving the same trace. ### Interactive trace viewer (`src/zen/frontend/html/`, `src/zen/trace/trace_viewer_service.*`) - Timeline: scope-level detail, horizontal zoom/pan, vertical scrolling, viewport-driven loading with pre-computed LOD for responsive navigation of large traces. - Thread grouping (collapsible sidebar sections) synthesized from name suffixes, natural sort order, visual distinction between lane threads and OS threads. - Bookmark and region annotations; region categories with per-category toggles; bookmark marker toggle in the toolbar. - Filterable Logs tab showing captured `UE_LOG` output. - Stats tab with per-scope aggregate statistics. - Memory tab with interactive allocation analysis and an allocation size histogram. - CsvProfiler event parsing and chart UI. ### Other in-branch supporting changes - **Cross-platform browser launcher** (`browser_launcher.{h,cpp}`) used by `trace serve`. - **`ReciprocalU64`** fast 64-bit integer division (zencore/intmath) for trace analyzers. - **`parallelsort`** cross-platform parallel sort helper (zenutil). - Frontend zip build rule so the viewer's HTML assets are bundled into `zen.exe`. - `/Zo` flag for better optimized debug info on Windows release builds. - `trace-tests.cpp` in the `zen-test` harness (harness itself landed on main via #985).
Diffstat (limited to 'src/zen/trace/trace_cache.cpp')
-rw-r--r--src/zen/trace/trace_cache.cpp1104
1 files changed, 1104 insertions, 0 deletions
diff --git a/src/zen/trace/trace_cache.cpp b/src/zen/trace/trace_cache.cpp
new file mode 100644
index 000000000..165c1eecf
--- /dev/null
+++ b/src/zen/trace/trace_cache.cpp
@@ -0,0 +1,1104 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include "trace_cache.h"
+
+#include <zencore/basicfile.h>
+#include <zencore/compress.h>
+#include <zencore/filesystem.h>
+#include <zencore/fmtutils.h>
+#include <zencore/iohash.h>
+#include <zencore/logging.h>
+#include <zencore/stream.h>
+
+ZEN_THIRD_PARTY_INCLUDES_START
+#include <EASTL/sort.h>
+#include <EASTL/vector.h>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+#include <filesystem>
+
+namespace zen::trace_detail {
+
+// ===========================================================================
+// StringTableBuilder — write-path helper that deduplicates and packs strings
+//
+// Strings are appended back-to-back (null-terminated) in a single contiguous
+// block. Deduplication is keyed by (offset, length) pairs into that block so
+// no separate string copies are made. To look up an incoming string_view we
+// speculatively append it, build a key, and look it up. On duplicate the
+// append is rolled back by truncating the buffer.
+// ===========================================================================
+
+class StringTableBuilder
+{
+public:
+ StringTableBuilder() : m_IndexMap(0, StringHash{&m_Packed}, StringEq{&m_Packed}) { m_Packed.reserve(4096); }
+
+ // Intern a string and return its index. Deduplicates across calls.
+ uint32_t Intern(std::string_view Str)
+ {
+ // Speculatively append the string so that the hash/eq functors can
+ // read it from the packed buffer (avoids dangling string_view keys).
+ uint32_t SpecOffset = uint32_t(m_Packed.size());
+ uint32_t SpecLength = uint32_t(Str.size());
+
+ m_Packed.resize(m_Packed.size() + Str.size() + 1);
+ if (!Str.empty())
+ {
+ memcpy(m_Packed.data() + SpecOffset, Str.data(), Str.size());
+ }
+ m_Packed[SpecOffset + Str.size()] = '\0';
+
+ StringKey Key{SpecOffset, SpecLength};
+ auto It = m_IndexMap.find(Key);
+ if (It != m_IndexMap.end())
+ {
+ // Duplicate — roll back the speculative append.
+ m_Packed.resize(SpecOffset);
+ return It->second;
+ }
+
+ // New string — keep the append and record its index.
+ uint32_t Index = uint32_t(m_Offsets.size());
+ m_Offsets.push_back(SpecOffset);
+ m_IndexMap.emplace(Key, Index);
+ return Index;
+ }
+
+ // Serialize: [uint32_t count][uint32_t offsets[count]][packed strings]
+ SharedBuffer Serialize() const
+ {
+ BinaryWriter W;
+ uint32_t Count = uint32_t(m_Offsets.size());
+ W.Write(&Count, sizeof(Count));
+ if (Count > 0)
+ {
+ W.Write(m_Offsets.data(), m_Offsets.size() * sizeof(uint32_t));
+ }
+ if (!m_Packed.empty())
+ {
+ W.Write(m_Packed.data(), m_Packed.size());
+ }
+ return SharedBuffer(IoBuffer(IoBuffer::Clone, W.Data(), W.Size()));
+ }
+
+private:
+ struct StringKey
+ {
+ uint32_t Offset;
+ uint32_t Length;
+ };
+
+ struct StringHash
+ {
+ const eastl::vector<uint8_t>* Packed;
+ size_t operator()(const StringKey& K) const
+ {
+ std::string_view Sv(reinterpret_cast<const char*>(Packed->data()) + K.Offset, K.Length);
+ return std::hash<std::string_view>{}(Sv);
+ }
+ };
+
+ struct StringEq
+ {
+ const eastl::vector<uint8_t>* Packed;
+ bool operator()(const StringKey& A, const StringKey& B) const
+ {
+ if (A.Length != B.Length)
+ {
+ return false;
+ }
+ return memcmp(Packed->data() + A.Offset, Packed->data() + B.Offset, A.Length) == 0;
+ }
+ };
+
+ eastl::vector<uint8_t> m_Packed; // null-terminated strings back-to-back
+ eastl::vector<uint32_t> m_Offsets; // byte offset into m_Packed for each string
+
+ // Dedup map: StringKey (offset+length into m_Packed) → string index.
+ // Hash/eq functors hold a pointer to m_Packed (stable address) and read
+ // via data() at call time, so reallocation of m_Packed is safe.
+ eastl::hash_map<StringKey, uint32_t, StringHash, StringEq> m_IndexMap;
+};
+
+// ===========================================================================
+// StringTableReader — read-path helper for O(1) string lookup by index
+// ===========================================================================
+
+class StringTableReader
+{
+public:
+ bool Init(const SharedBuffer& Data)
+ {
+ if (Data.GetSize() < sizeof(uint32_t))
+ {
+ return false;
+ }
+
+ const uint8_t* Base = reinterpret_cast<const uint8_t*>(Data.GetData());
+ memcpy(&m_Count, Base, sizeof(uint32_t));
+
+ size_t RequiredHeader = sizeof(uint32_t) + size_t(m_Count) * sizeof(uint32_t);
+ if (Data.GetSize() < RequiredHeader)
+ {
+ return false;
+ }
+
+ m_Offsets = reinterpret_cast<const uint32_t*>(Base + sizeof(uint32_t));
+ m_PackedBase = reinterpret_cast<const char*>(Base + RequiredHeader);
+ m_PackedSize = Data.GetSize() - RequiredHeader;
+ m_OwningBuffer = Data;
+ return true;
+ }
+
+ std::string_view Get(uint32_t Index) const
+ {
+ if (Index >= m_Count)
+ {
+ return {};
+ }
+ uint32_t Off = m_Offsets[Index];
+ if (Off >= m_PackedSize)
+ {
+ return {};
+ }
+ return std::string_view(m_PackedBase + Off);
+ }
+
+ uint32_t Count() const { return m_Count; }
+
+private:
+ uint32_t m_Count = 0;
+ const uint32_t* m_Offsets = nullptr;
+ const char* m_PackedBase = nullptr;
+ size_t m_PackedSize = 0;
+ SharedBuffer m_OwningBuffer; // keeps the decompressed data alive
+};
+
+// ===========================================================================
+// CachedSymbolResolver — SymbolResolver backed by cache data
+// ===========================================================================
+
+class CachedSymbolResolver final : public SymbolResolver
+{
+public:
+ void LoadModule(const ModuleInfo&) override {}
+ std::string Resolve(uint64_t Address) const override
+ {
+ auto It = m_Symbols.find(Address);
+ if (It != m_Symbols.end())
+ {
+ return It->second;
+ }
+ return {};
+ }
+
+ eastl::hash_map<uint64_t, std::string> m_Symbols;
+};
+
+// ===========================================================================
+// Section writers (model → binary blob)
+// ===========================================================================
+
+namespace {
+
+ template<typename T>
+ void WritePod(BinaryWriter& W, const T& Value)
+ {
+ W.Write(&Value, sizeof(T));
+ }
+
+ template<typename T>
+ void WriteCount(BinaryWriter& W, uint32_t Count)
+ {
+ W.Write(&Count, sizeof(Count));
+ }
+
+ SharedBuffer ToSharedBuffer(const BinaryWriter& W) { return SharedBuffer(IoBuffer(IoBuffer::Clone, W.Data(), W.Size())); }
+
+ // -- Metadata section --
+
+ SharedBuffer WriteMetadataSection(const TraceModel& Model, StringTableBuilder& Strings)
+ {
+ BinaryWriter W;
+
+ MetadataPod M = {};
+ M.FileSize = Model.FileSize;
+ M.TotalEvents = Model.TotalEvents;
+ M.ParseTimeMs = Model.ParseTimeMs;
+ M.TraceStartUs = Model.TraceStartUs;
+ M.TraceEndUs = Model.TraceEndUs;
+
+ M.SessionPlatform = Strings.Intern(Model.Session.Platform);
+ M.SessionAppName = Strings.Intern(Model.Session.AppName);
+ M.SessionProjectName = Strings.Intern(Model.Session.ProjectName);
+ M.SessionCommandLine = Strings.Intern(Model.Session.CommandLine);
+ M.SessionBranch = Strings.Intern(Model.Session.Branch);
+ M.SessionBuildVersion = Strings.Intern(Model.Session.BuildVersion);
+ M.SessionChangelist = Model.Session.Changelist;
+ M.SessionConfigType = Model.Session.ConfigurationType;
+ M.SessionHasSession = Model.Session.HasSession ? 1 : 0;
+ WritePod(W, M);
+
+ // Threads
+ uint32_t ThreadCount = uint32_t(Model.Threads.size());
+ WritePod(W, ThreadCount);
+ for (const ThreadInfoEntry& T : Model.Threads)
+ {
+ ThreadInfoPod P = {};
+ P.ThreadId = T.ThreadId;
+ P.Name = Strings.Intern(T.Name);
+ P.GroupName = Strings.Intern(T.GroupName);
+ P.SystemId = T.SystemId;
+ P.SortHint = T.SortHint;
+ WritePod(W, P);
+ }
+
+ // Channels
+ uint32_t ChannelCount = uint32_t(Model.Channels.size());
+ WritePod(W, ChannelCount);
+ for (const ChannelInfo& C : Model.Channels)
+ {
+ ChannelInfoPod P = {};
+ P.Name = Strings.Intern(C.Name);
+ P.Enabled = C.Enabled ? 1 : 0;
+ P.ReadOnly = C.ReadOnly ? 1 : 0;
+ WritePod(W, P);
+ }
+
+ // Modules
+ uint32_t ModuleCount = uint32_t(Model.Modules.size());
+ WritePod(W, ModuleCount);
+
+ // First pass: compute ImageId blob layout
+ eastl::vector<uint32_t> ImageIdOffsets(ModuleCount);
+ uint32_t ImageIdBlobSize = 0;
+ for (uint32_t I = 0; I < ModuleCount; ++I)
+ {
+ ImageIdOffsets[I] = ImageIdBlobSize;
+ ImageIdBlobSize += uint32_t(Model.Modules[I].ImageId.size());
+ }
+
+ for (uint32_t I = 0; I < ModuleCount; ++I)
+ {
+ const ModuleInfo& Mod = Model.Modules[I];
+ ModuleInfoPod P = {};
+ P.Name = Strings.Intern(Mod.Name);
+ P.FullPath = Strings.Intern(Mod.FullPath);
+ P.Base = Mod.Base;
+ P.Size = Mod.Size;
+ P.ImageIdSize = uint32_t(Mod.ImageId.size());
+ P.ImageIdOffset = ImageIdOffsets[I];
+ WritePod(W, P);
+ }
+
+ // ImageId blob
+ for (const ModuleInfo& Mod : Model.Modules)
+ {
+ if (!Mod.ImageId.empty())
+ {
+ W.Write(Mod.ImageId.data(), Mod.ImageId.size());
+ }
+ }
+
+ // EventTypeCounts
+ uint32_t EventTypeCount = uint32_t(Model.EventTypeCounts.size());
+ WritePod(W, EventTypeCount);
+ for (const TraceModel::EventTypeCount& E : Model.EventTypeCounts)
+ {
+ EventTypeCountPod P = {};
+ P.Name = Strings.Intern(E.Name);
+ P.Count = E.Count;
+ WritePod(W, P);
+ }
+
+ // ScopeStats
+ uint32_t ScopeStatCount = uint32_t(Model.ScopeStats.size());
+ WritePod(W, ScopeStatCount);
+ for (const CpuScopeStat& S : Model.ScopeStats)
+ {
+ CpuScopeStatPod P = {};
+ P.Name = Strings.Intern(S.Name);
+ P.MinUs = S.MinUs;
+ P.MaxUs = S.MaxUs;
+ P.Count = S.Count;
+ P.MeanUs = S.MeanUs;
+ P.StdDevUs = S.StdDevUs;
+ WritePod(W, P);
+ }
+
+ return ToSharedBuffer(W);
+ }
+
+ // -- Memory section --
+
+ SharedBuffer WriteMemorySection(const TraceModel& Model, StringTableBuilder& Strings)
+ {
+ BinaryWriter W;
+
+ // AllocSummary
+ AllocSummaryPod A = {};
+ A.HasMemoryData = Model.AllocSummary.HasMemoryData ? 1 : 0;
+ A.PeakTimeUs = Model.AllocSummary.PeakTimeUs;
+ A.LiveAllocations = Model.AllocSummary.LiveAllocations;
+ A.TotalAllocs = Model.AllocSummary.TotalAllocs;
+ A.TotalFrees = Model.AllocSummary.TotalFrees;
+ A.TotalReallocAllocs = Model.AllocSummary.TotalReallocAllocs;
+ A.TotalReallocFrees = Model.AllocSummary.TotalReallocFrees;
+ A.PeakBytes = Model.AllocSummary.PeakBytes;
+ A.EndBytes = Model.AllocSummary.EndBytes;
+ WritePod(W, A);
+
+ // Heaps
+ uint32_t HeapCount = uint32_t(Model.Heaps.size());
+ WritePod(W, HeapCount);
+ for (const HeapInfo& H : Model.Heaps)
+ {
+ HeapInfoPod P = {};
+ P.Id = H.Id;
+ P.ParentId = H.ParentId;
+ P.Flags = H.Flags;
+ P.Name = Strings.Intern(H.Name);
+ WritePod(W, P);
+ }
+
+ // HeapStats
+ uint32_t HeapStatCount = uint32_t(Model.HeapStats.size());
+ WritePod(W, HeapStatCount);
+ for (const HeapStat& S : Model.HeapStats)
+ {
+ HeapStatPod P = {};
+ P.HeapId = S.HeapId;
+ P.CurrentBytes = S.CurrentBytes;
+ P.PeakBytes = S.PeakBytes;
+ P.AllocCount = S.AllocCount;
+ P.FreeCount = S.FreeCount;
+ WritePod(W, P);
+ }
+
+ // CallstackAllocStats
+ uint32_t AllocStatCount = uint32_t(Model.CallstackStats.size());
+ WritePod(W, AllocStatCount);
+ for (const CallstackAllocStat& S : Model.CallstackStats)
+ {
+ CallstackAllocStatPod P = {};
+ P.CallstackId = S.CallstackId;
+ P.LiveCount = S.LiveCount;
+ P.LiveBytes = S.LiveBytes;
+ P.ThreadIdCount = uint32_t(std::min(S.ThreadIds.size(), size_t(4)));
+ for (uint32_t I = 0; I < P.ThreadIdCount; ++I)
+ {
+ P.ThreadIds[I] = S.ThreadIds[I];
+ }
+ WritePod(W, P);
+ }
+
+ // ChurnStats
+ uint32_t ChurnCount = uint32_t(Model.ChurnStats.size());
+ WritePod(W, ChurnCount);
+ for (const CallstackChurnStat& S : Model.ChurnStats)
+ {
+ CallstackChurnStatPod P = {};
+ P.CallstackId = S.CallstackId;
+ P.ChurnAllocs = S.ChurnAllocs;
+ P.ChurnBytes = S.ChurnBytes;
+ P.TotalAllocs = S.TotalAllocs;
+ P.TotalBytes = S.TotalBytes;
+ P.MeanDistance = S.MeanDistance;
+ WritePod(W, P);
+ }
+
+ return ToSharedBuffer(W);
+ }
+
+ // -- Callstacks section --
+
+ SharedBuffer WriteCallstacksSection(const TraceModel& Model)
+ {
+ BinaryWriter W;
+
+ uint32_t Count = uint32_t(Model.Callstacks.size());
+ WritePod(W, Count);
+
+ // Compute frame offsets
+ uint32_t FrameOffset = 0;
+ for (const CallstackEntry& CS : Model.Callstacks)
+ {
+ CallstackHeaderPod H = {};
+ H.Id = CS.Id;
+ H.FrameCount = uint32_t(CS.Frames.size());
+ H.FrameOffset = FrameOffset;
+ WritePod(W, H);
+ FrameOffset += H.FrameCount;
+ }
+
+ // Write all frames
+ for (const CallstackEntry& CS : Model.Callstacks)
+ {
+ for (const ResolvedFrame& F : CS.Frames)
+ {
+ ResolvedFramePod P = {};
+ P.Address = F.Address;
+ P.ModuleIndex = F.ModuleIndex;
+ P.Offset = F.Offset;
+ WritePod(W, P);
+ }
+ }
+
+ return ToSharedBuffer(W);
+ }
+
+ // -- Symbols section --
+
+ SharedBuffer WriteSymbolsSection(const eastl::hash_map<uint64_t, std::string>& ResolvedSymbols, StringTableBuilder& Strings)
+ {
+ BinaryWriter W;
+
+ // Collect and sort entries by address for binary search on read
+ eastl::vector<SymbolEntryPod> Entries;
+ Entries.reserve(ResolvedSymbols.size());
+ for (const auto& [Address, SymbolStr] : ResolvedSymbols)
+ {
+ SymbolEntryPod E = {};
+ E.Address = Address;
+ E.StringIdx = Strings.Intern(SymbolStr);
+ Entries.push_back(E);
+ }
+ eastl::sort(Entries.begin(), Entries.end(), [](const SymbolEntryPod& A, const SymbolEntryPod& B) { return A.Address < B.Address; });
+
+ uint32_t Count = uint32_t(Entries.size());
+ WritePod(W, Count);
+ if (!Entries.empty())
+ {
+ W.Write(Entries.data(), Entries.size() * sizeof(SymbolEntryPod));
+ }
+
+ return ToSharedBuffer(W);
+ }
+
+ // -- Compression helper --
+
+ CompressedBuffer CompressSection(const SharedBuffer& Raw)
+ {
+ return CompressedBuffer::Compress(Raw, OodleCompressor::Mermaid, OodleCompressionLevel::VeryFast);
+ }
+
+ // ===========================================================================
+ // Section readers (binary blob → model)
+ // ===========================================================================
+
+ template<typename T>
+ bool ReadPod(BinaryReader& R, T& Out)
+ {
+ if (R.Remaining() < sizeof(T))
+ {
+ return false;
+ }
+ R.Read(&Out, sizeof(T));
+ return true;
+ }
+
+ bool ReadUint32(BinaryReader& R, uint32_t& Out) { return ReadPod(R, Out); }
+
+ bool ReadMetadataSection(const SharedBuffer& Data, const StringTableReader& Strings, TraceModel& Model)
+ {
+ BinaryReader R(Data.GetData(), Data.GetSize());
+
+ MetadataPod M;
+ if (!ReadPod(R, M))
+ {
+ return false;
+ }
+ Model.FileSize = M.FileSize;
+ Model.TotalEvents = M.TotalEvents;
+ Model.ParseTimeMs = M.ParseTimeMs;
+ Model.TraceStartUs = M.TraceStartUs;
+ Model.TraceEndUs = M.TraceEndUs;
+
+ Model.Session.Platform = std::string(Strings.Get(M.SessionPlatform));
+ Model.Session.AppName = std::string(Strings.Get(M.SessionAppName));
+ Model.Session.ProjectName = std::string(Strings.Get(M.SessionProjectName));
+ Model.Session.CommandLine = std::string(Strings.Get(M.SessionCommandLine));
+ Model.Session.Branch = std::string(Strings.Get(M.SessionBranch));
+ Model.Session.BuildVersion = std::string(Strings.Get(M.SessionBuildVersion));
+ Model.Session.Changelist = M.SessionChangelist;
+ Model.Session.ConfigurationType = M.SessionConfigType;
+ Model.Session.HasSession = (M.SessionHasSession != 0);
+
+ // Threads
+ uint32_t ThreadCount = 0;
+ if (!ReadUint32(R, ThreadCount))
+ {
+ return false;
+ }
+ Model.Threads.resize(ThreadCount);
+ for (uint32_t I = 0; I < ThreadCount; ++I)
+ {
+ ThreadInfoPod P;
+ if (!ReadPod(R, P))
+ {
+ return false;
+ }
+ Model.Threads[I].ThreadId = P.ThreadId;
+ Model.Threads[I].Name = std::string(Strings.Get(P.Name));
+ Model.Threads[I].GroupName = std::string(Strings.Get(P.GroupName));
+ Model.Threads[I].SystemId = P.SystemId;
+ Model.Threads[I].SortHint = P.SortHint;
+ }
+
+ // Channels
+ uint32_t ChannelCount = 0;
+ if (!ReadUint32(R, ChannelCount))
+ {
+ return false;
+ }
+ Model.Channels.resize(ChannelCount);
+ for (uint32_t I = 0; I < ChannelCount; ++I)
+ {
+ ChannelInfoPod P;
+ if (!ReadPod(R, P))
+ {
+ return false;
+ }
+ Model.Channels[I].Name = std::string(Strings.Get(P.Name));
+ Model.Channels[I].Enabled = (P.Enabled != 0);
+ Model.Channels[I].ReadOnly = (P.ReadOnly != 0);
+ }
+
+ // Modules
+ uint32_t ModuleCount = 0;
+ if (!ReadUint32(R, ModuleCount))
+ {
+ return false;
+ }
+
+ // Read ModuleInfoPod entries first, then the ImageId blob
+ eastl::vector<ModuleInfoPod> ModulePods(ModuleCount);
+ for (uint32_t I = 0; I < ModuleCount; ++I)
+ {
+ if (!ReadPod(R, ModulePods[I]))
+ {
+ return false;
+ }
+ }
+
+ // Compute total ImageId blob size
+ uint32_t TotalImageIdSize = 0;
+ for (const ModuleInfoPod& MP : ModulePods)
+ {
+ uint32_t End = MP.ImageIdOffset + MP.ImageIdSize;
+ if (End > TotalImageIdSize)
+ {
+ TotalImageIdSize = End;
+ }
+ }
+
+ const uint8_t* ImageIdBlobBase = nullptr;
+ if (TotalImageIdSize > 0)
+ {
+ if (R.Remaining() < TotalImageIdSize)
+ {
+ return false;
+ }
+ ImageIdBlobBase = reinterpret_cast<const uint8_t*>(R.GetView(TotalImageIdSize).GetData());
+ R.Skip(TotalImageIdSize);
+ }
+
+ Model.Modules.resize(ModuleCount);
+ for (uint32_t I = 0; I < ModuleCount; ++I)
+ {
+ const ModuleInfoPod& MP = ModulePods[I];
+ ModuleInfo& Mod = Model.Modules[I];
+ Mod.Name = std::string(Strings.Get(MP.Name));
+ Mod.FullPath = std::string(Strings.Get(MP.FullPath));
+ Mod.Base = MP.Base;
+ Mod.Size = MP.Size;
+ if (MP.ImageIdSize > 0 && ImageIdBlobBase != nullptr)
+ {
+ Mod.ImageId.assign(ImageIdBlobBase + MP.ImageIdOffset, ImageIdBlobBase + MP.ImageIdOffset + MP.ImageIdSize);
+ }
+ }
+
+ // EventTypeCounts
+ uint32_t EventTypeCount = 0;
+ if (!ReadUint32(R, EventTypeCount))
+ {
+ return false;
+ }
+ Model.EventTypeCounts.resize(EventTypeCount);
+ for (uint32_t I = 0; I < EventTypeCount; ++I)
+ {
+ EventTypeCountPod P;
+ if (!ReadPod(R, P))
+ {
+ return false;
+ }
+ Model.EventTypeCounts[I].Name = std::string(Strings.Get(P.Name));
+ Model.EventTypeCounts[I].Count = P.Count;
+ }
+
+ // ScopeStats
+ uint32_t ScopeStatCount = 0;
+ if (!ReadUint32(R, ScopeStatCount))
+ {
+ return false;
+ }
+ Model.ScopeStats.resize(ScopeStatCount);
+ for (uint32_t I = 0; I < ScopeStatCount; ++I)
+ {
+ CpuScopeStatPod P;
+ if (!ReadPod(R, P))
+ {
+ return false;
+ }
+ Model.ScopeStats[I].Name = std::string(Strings.Get(P.Name));
+ Model.ScopeStats[I].MinUs = P.MinUs;
+ Model.ScopeStats[I].MaxUs = P.MaxUs;
+ Model.ScopeStats[I].Count = P.Count;
+ Model.ScopeStats[I].MeanUs = P.MeanUs;
+ Model.ScopeStats[I].StdDevUs = P.StdDevUs;
+ }
+
+ return true;
+ }
+
+ bool ReadMemorySection(const SharedBuffer& Data, const StringTableReader& Strings, TraceModel& Model)
+ {
+ BinaryReader R(Data.GetData(), Data.GetSize());
+
+ // AllocSummary
+ AllocSummaryPod A;
+ if (!ReadPod(R, A))
+ {
+ return false;
+ }
+ Model.AllocSummary.HasMemoryData = (A.HasMemoryData != 0);
+ Model.AllocSummary.PeakTimeUs = A.PeakTimeUs;
+ Model.AllocSummary.LiveAllocations = A.LiveAllocations;
+ Model.AllocSummary.TotalAllocs = A.TotalAllocs;
+ Model.AllocSummary.TotalFrees = A.TotalFrees;
+ Model.AllocSummary.TotalReallocAllocs = A.TotalReallocAllocs;
+ Model.AllocSummary.TotalReallocFrees = A.TotalReallocFrees;
+ Model.AllocSummary.PeakBytes = A.PeakBytes;
+ Model.AllocSummary.EndBytes = A.EndBytes;
+
+ // Heaps
+ uint32_t HeapCount = 0;
+ if (!ReadUint32(R, HeapCount))
+ {
+ return false;
+ }
+ Model.Heaps.resize(HeapCount);
+ for (uint32_t I = 0; I < HeapCount; ++I)
+ {
+ HeapInfoPod P;
+ if (!ReadPod(R, P))
+ {
+ return false;
+ }
+ Model.Heaps[I].Id = P.Id;
+ Model.Heaps[I].ParentId = P.ParentId;
+ Model.Heaps[I].Flags = P.Flags;
+ Model.Heaps[I].Name = std::string(Strings.Get(P.Name));
+ }
+
+ // HeapStats
+ uint32_t HeapStatCount = 0;
+ if (!ReadUint32(R, HeapStatCount))
+ {
+ return false;
+ }
+ Model.HeapStats.resize(HeapStatCount);
+ for (uint32_t I = 0; I < HeapStatCount; ++I)
+ {
+ HeapStatPod P;
+ if (!ReadPod(R, P))
+ {
+ return false;
+ }
+ Model.HeapStats[I].HeapId = P.HeapId;
+ Model.HeapStats[I].CurrentBytes = P.CurrentBytes;
+ Model.HeapStats[I].PeakBytes = P.PeakBytes;
+ Model.HeapStats[I].AllocCount = P.AllocCount;
+ Model.HeapStats[I].FreeCount = P.FreeCount;
+ }
+
+ // CallstackAllocStats
+ uint32_t AllocStatCount = 0;
+ if (!ReadUint32(R, AllocStatCount))
+ {
+ return false;
+ }
+ Model.CallstackStats.resize(AllocStatCount);
+ for (uint32_t I = 0; I < AllocStatCount; ++I)
+ {
+ CallstackAllocStatPod P;
+ if (!ReadPod(R, P))
+ {
+ return false;
+ }
+ Model.CallstackStats[I].CallstackId = P.CallstackId;
+ Model.CallstackStats[I].LiveCount = P.LiveCount;
+ Model.CallstackStats[I].LiveBytes = P.LiveBytes;
+ for (uint32_t J = 0; J < P.ThreadIdCount && J < 4; ++J)
+ {
+ Model.CallstackStats[I].ThreadIds.push_back(P.ThreadIds[J]);
+ }
+ }
+
+ // ChurnStats
+ uint32_t ChurnCount = 0;
+ if (!ReadUint32(R, ChurnCount))
+ {
+ return false;
+ }
+ Model.ChurnStats.resize(ChurnCount);
+ for (uint32_t I = 0; I < ChurnCount; ++I)
+ {
+ CallstackChurnStatPod P;
+ if (!ReadPod(R, P))
+ {
+ return false;
+ }
+ Model.ChurnStats[I].CallstackId = P.CallstackId;
+ Model.ChurnStats[I].ChurnAllocs = P.ChurnAllocs;
+ Model.ChurnStats[I].ChurnBytes = P.ChurnBytes;
+ Model.ChurnStats[I].TotalAllocs = P.TotalAllocs;
+ Model.ChurnStats[I].TotalBytes = P.TotalBytes;
+ Model.ChurnStats[I].MeanDistance = P.MeanDistance;
+ }
+
+ return true;
+ }
+
+ bool ReadCallstacksSection(const SharedBuffer& Data, TraceModel& Model)
+ {
+ BinaryReader R(Data.GetData(), Data.GetSize());
+
+ uint32_t Count = 0;
+ if (!ReadUint32(R, Count))
+ {
+ return false;
+ }
+
+ // Read headers
+ eastl::vector<CallstackHeaderPod> Headers(Count);
+ for (uint32_t I = 0; I < Count; ++I)
+ {
+ if (!ReadPod(R, Headers[I]))
+ {
+ return false;
+ }
+ }
+
+ // Compute total frame count
+ uint32_t TotalFrames = 0;
+ for (const CallstackHeaderPod& H : Headers)
+ {
+ TotalFrames = std::max(TotalFrames, H.FrameOffset + H.FrameCount);
+ }
+
+ if (R.Remaining() < TotalFrames * sizeof(ResolvedFramePod))
+ {
+ return false;
+ }
+
+ // Read all frames
+ eastl::vector<ResolvedFramePod> AllFrames(TotalFrames);
+ for (uint32_t I = 0; I < TotalFrames; ++I)
+ {
+ if (!ReadPod(R, AllFrames[I]))
+ {
+ return false;
+ }
+ }
+
+ // Build CallstackEntry vector
+ Model.Callstacks.resize(Count);
+ for (uint32_t I = 0; I < Count; ++I)
+ {
+ const CallstackHeaderPod& H = Headers[I];
+ CallstackEntry& CS = Model.Callstacks[I];
+ CS.Id = H.Id;
+ CS.Frames.resize(H.FrameCount);
+ for (uint32_t J = 0; J < H.FrameCount; ++J)
+ {
+ const ResolvedFramePod& FP = AllFrames[H.FrameOffset + J];
+ CS.Frames[J].Address = FP.Address;
+ CS.Frames[J].ModuleIndex = FP.ModuleIndex;
+ CS.Frames[J].Offset = FP.Offset;
+ }
+ }
+
+ return true;
+ }
+
+ bool ReadSymbolsSection(const SharedBuffer& Data, const StringTableReader& Strings, CachedSymbolResolver& Resolver)
+ {
+ BinaryReader R(Data.GetData(), Data.GetSize());
+
+ uint32_t Count = 0;
+ if (!ReadUint32(R, Count))
+ {
+ return false;
+ }
+
+ for (uint32_t I = 0; I < Count; ++I)
+ {
+ SymbolEntryPod E;
+ if (!ReadPod(R, E))
+ {
+ return false;
+ }
+ std::string_view Str = Strings.Get(E.StringIdx);
+ if (!Str.empty())
+ {
+ Resolver.m_Symbols.emplace(E.Address, std::string(Str));
+ }
+ }
+
+ return true;
+ }
+
+ // ===========================================================================
+ // File-level helpers
+ // ===========================================================================
+
+ int64_t GetFileModTimeNs(const std::filesystem::path& Path)
+ {
+ std::error_code Ec;
+ auto ModTime = std::filesystem::last_write_time(Path, Ec);
+ if (Ec)
+ {
+ return 0;
+ }
+ auto Duration = ModTime.time_since_epoch();
+ return std::chrono::duration_cast<std::chrono::nanoseconds>(Duration).count();
+ }
+
+ SharedBuffer DecompressSection(const uint8_t* FileBase, const SectionDirectoryEntry& Dir)
+ {
+ IoBuffer CompressedIo(IoBuffer::Wrap, FileBase + Dir.FileOffset, Dir.CompressedSize);
+
+ IoHash RawHash;
+ uint64_t RawSize = 0;
+ CompressedBuffer CB = CompressedBuffer::FromCompressed(SharedBuffer(std::move(CompressedIo)), RawHash, RawSize);
+ if (CB.IsNull())
+ {
+ return {};
+ }
+ return CB.Decompress();
+ }
+
+} // namespace
+
+// ===========================================================================
+// Public API
+// ===========================================================================
+
+void
+WriteAnalyzeCache(const std::filesystem::path& CachePath,
+ const std::filesystem::path& SourcePath,
+ const TraceModel& Model,
+ const eastl::hash_map<uint64_t, std::string>& ResolvedSymbols)
+{
+ try
+ {
+ StringTableBuilder Strings;
+
+ // Build section payloads (order matters: Symbols and Metadata/Memory
+ // intern strings, so StringTable must be serialized LAST after all
+ // interning is done).
+ SharedBuffer MetadataRaw = WriteMetadataSection(Model, Strings);
+ SharedBuffer MemoryRaw = WriteMemorySection(Model, Strings);
+ SharedBuffer CallstacksRaw = WriteCallstacksSection(Model);
+ SharedBuffer SymbolsRaw = WriteSymbolsSection(ResolvedSymbols, Strings);
+ SharedBuffer StringTableRaw = Strings.Serialize();
+
+ // Compress each section
+ CompressedBuffer Sections[uint32_t(CacheSectionId::Count)];
+ Sections[uint32_t(CacheSectionId::StringTable)] = CompressSection(StringTableRaw);
+ Sections[uint32_t(CacheSectionId::Metadata)] = CompressSection(MetadataRaw);
+ Sections[uint32_t(CacheSectionId::Memory)] = CompressSection(MemoryRaw);
+ Sections[uint32_t(CacheSectionId::Callstacks)] = CompressSection(CallstacksRaw);
+ Sections[uint32_t(CacheSectionId::Symbols)] = CompressSection(SymbolsRaw);
+
+ // Build file header
+ CacheFileHeader Header = {};
+ Header.Magic = kCacheMagic;
+ Header.Version = kCacheVersion;
+
+ std::error_code Ec;
+ Header.SourceFileSize = std::filesystem::file_size(SourcePath, Ec);
+ Header.SourceModTimeNs = GetFileModTimeNs(SourcePath);
+
+ uint32_t SectionCount = uint32_t(CacheSectionId::Count);
+
+ // Compute section directory
+ uint64_t DataOffset = sizeof(CacheFileHeader) + SectionCount * sizeof(SectionDirectoryEntry);
+
+ SectionDirectoryEntry Directory[uint32_t(CacheSectionId::Count)];
+ for (uint32_t I = 0; I < SectionCount; ++I)
+ {
+ Directory[I].SectionId = I;
+ Directory[I].Reserved = 0;
+ Directory[I].FileOffset = DataOffset;
+ Directory[I].CompressedSize = Sections[I].GetCompressedSize();
+ DataOffset += Directory[I].CompressedSize;
+ }
+
+ // Assemble and write the file
+ BinaryWriter FileWriter;
+ FileWriter.Write(&Header, sizeof(Header));
+ FileWriter.Write(Directory, sizeof(Directory));
+
+ // Append compressed blobs
+ for (uint32_t I = 0; I < SectionCount; ++I)
+ {
+ SharedBuffer Flat = std::move(Sections[I]).GetCompressed().Flatten();
+ FileWriter.Write(Flat.GetData(), Flat.GetSize());
+ }
+
+ zen::TemporaryFile::SafeWriteFile(CachePath, FileWriter.GetView());
+
+ ZEN_INFO("Wrote analysis cache {} ({})", CachePath.filename().string(), zen::NiceBytes(FileWriter.Size()));
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_WARN("Failed to write analysis cache: {}", Ex.what());
+ }
+}
+
+std::optional<CachedAnalysis>
+TryLoadAnalyzeCache(const std::filesystem::path& CachePath, const std::filesystem::path& SourcePath)
+{
+ std::error_code Ec;
+ if (!std::filesystem::exists(CachePath, Ec))
+ {
+ return std::nullopt;
+ }
+
+ try
+ {
+ FileContents Contents = zen::ReadFile(CachePath);
+ if (!Contents)
+ {
+ return std::nullopt;
+ }
+
+ IoBuffer FileData = Contents.Flatten();
+ if (FileData.Size() < sizeof(CacheFileHeader))
+ {
+ return std::nullopt;
+ }
+
+ const uint8_t* Base = reinterpret_cast<const uint8_t*>(FileData.Data());
+
+ // Validate header
+ CacheFileHeader Header;
+ memcpy(&Header, Base, sizeof(Header));
+
+ if (Header.Magic != kCacheMagic)
+ {
+ ZEN_DEBUG("Analysis cache: bad magic");
+ return std::nullopt;
+ }
+
+ if (Header.Version != kCacheVersion)
+ {
+ ZEN_DEBUG("Analysis cache: version mismatch ({} vs {})", Header.Version, kCacheVersion);
+ return std::nullopt;
+ }
+
+ // Validate source file hasn't changed
+ uint64_t CurrentSize = std::filesystem::file_size(SourcePath, Ec);
+ int64_t CurrentModTime = GetFileModTimeNs(SourcePath);
+
+ if (Header.SourceFileSize != CurrentSize || Header.SourceModTimeNs != CurrentModTime)
+ {
+ ZEN_DEBUG("Analysis cache: source file changed, invalidating");
+ return std::nullopt;
+ }
+
+ // Parse section directory
+ uint32_t SectionCount = uint32_t(CacheSectionId::Count);
+ size_t DirSize = SectionCount * sizeof(SectionDirectoryEntry);
+ if (FileData.Size() < sizeof(CacheFileHeader) + DirSize)
+ {
+ return std::nullopt;
+ }
+
+ SectionDirectoryEntry Directory[uint32_t(CacheSectionId::Count)];
+ memcpy(Directory, Base + sizeof(CacheFileHeader), DirSize);
+
+ // Validate all sections fit in the file
+ for (uint32_t I = 0; I < SectionCount; ++I)
+ {
+ if (Directory[I].FileOffset + Directory[I].CompressedSize > FileData.Size())
+ {
+ ZEN_DEBUG("Analysis cache: section {} truncated", I);
+ return std::nullopt;
+ }
+ }
+
+ // Decompress string table first
+ SharedBuffer StringTableData = DecompressSection(Base, Directory[uint32_t(CacheSectionId::StringTable)]);
+ if (StringTableData.IsNull())
+ {
+ ZEN_DEBUG("Analysis cache: failed to decompress string table");
+ return std::nullopt;
+ }
+
+ StringTableReader Strings;
+ if (!Strings.Init(StringTableData))
+ {
+ ZEN_DEBUG("Analysis cache: invalid string table");
+ return std::nullopt;
+ }
+
+ CachedAnalysis Result;
+ Result.Model.FilePath = SourcePath;
+
+ // Decompress and read each section
+ SharedBuffer MetaData = DecompressSection(Base, Directory[uint32_t(CacheSectionId::Metadata)]);
+ if (MetaData.IsNull() || !ReadMetadataSection(MetaData, Strings, Result.Model))
+ {
+ ZEN_DEBUG("Analysis cache: failed to read metadata section");
+ return std::nullopt;
+ }
+
+ SharedBuffer MemData = DecompressSection(Base, Directory[uint32_t(CacheSectionId::Memory)]);
+ if (MemData.IsNull() || !ReadMemorySection(MemData, Strings, Result.Model))
+ {
+ ZEN_DEBUG("Analysis cache: failed to read memory section");
+ return std::nullopt;
+ }
+
+ SharedBuffer CsData = DecompressSection(Base, Directory[uint32_t(CacheSectionId::Callstacks)]);
+ if (CsData.IsNull() || !ReadCallstacksSection(CsData, Result.Model))
+ {
+ ZEN_DEBUG("Analysis cache: failed to read callstacks section");
+ return std::nullopt;
+ }
+
+ SharedBuffer SymData = DecompressSection(Base, Directory[uint32_t(CacheSectionId::Symbols)]);
+ if (!SymData.IsNull())
+ {
+ auto Resolver = std::make_unique<CachedSymbolResolver>();
+ if (ReadSymbolsSection(SymData, Strings, *Resolver))
+ {
+ Result.Symbols = std::move(Resolver);
+ }
+ }
+
+ ZEN_INFO("Loaded analysis from cache ({})", zen::NiceBytes(FileData.Size()));
+ return Result;
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_DEBUG("Analysis cache load failed: {}", Ex.what());
+ return std::nullopt;
+ }
+}
+
+} // namespace zen::trace_detail