diff options
Diffstat (limited to 'src/zen/trace')
| -rw-r--r-- | src/zen/trace/callstack_formatter.cpp | 251 | ||||
| -rw-r--r-- | src/zen/trace/callstack_formatter.h | 55 | ||||
| -rw-r--r-- | src/zen/trace/symbol_resolver.cpp | 1631 | ||||
| -rw-r--r-- | src/zen/trace/symbol_resolver.h | 45 | ||||
| -rw-r--r-- | src/zen/trace/timeline_query.cpp | 123 | ||||
| -rw-r--r-- | src/zen/trace/timeline_query.h | 69 | ||||
| -rw-r--r-- | src/zen/trace/trace_analyze.cpp | 812 | ||||
| -rw-r--r-- | src/zen/trace/trace_analyze.h | 29 | ||||
| -rw-r--r-- | src/zen/trace/trace_cache.cpp | 1104 | ||||
| -rw-r--r-- | src/zen/trace/trace_cache.h | 253 | ||||
| -rw-r--r-- | src/zen/trace/trace_cmd.cpp | 402 | ||||
| -rw-r--r-- | src/zen/trace/trace_cmd.h | 123 | ||||
| -rw-r--r-- | src/zen/trace/trace_memory.cpp | 901 | ||||
| -rw-r--r-- | src/zen/trace/trace_memory.h | 301 | ||||
| -rw-r--r-- | src/zen/trace/trace_model.cpp | 3898 | ||||
| -rw-r--r-- | src/zen/trace/trace_model.h | 314 | ||||
| -rw-r--r-- | src/zen/trace/trace_viewer_service.cpp | 1225 | ||||
| -rw-r--r-- | src/zen/trace/trace_viewer_service.h | 71 |
18 files changed, 11607 insertions, 0 deletions
diff --git a/src/zen/trace/callstack_formatter.cpp b/src/zen/trace/callstack_formatter.cpp new file mode 100644 index 000000000..0c601d5c0 --- /dev/null +++ b/src/zen/trace/callstack_formatter.cpp @@ -0,0 +1,251 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "callstack_formatter.h" + +#include <zencore/fmtutils.h> +#include <zencore/string.h> +#include <zenutil/wildcard.h> + +#include <algorithm> + +namespace zen::trace_detail { + +namespace { + + static bool IsKnownRuntimeModuleName(std::string_view ModuleName) + { + std::string LowerName = zen::ToLower(ModuleName); + return LowerName == "ntdll.dll" || LowerName == "kernel32.dll" || LowerName == "kernelbase.dll" || LowerName == "ucrtbase.dll" || + LowerName == "ucrtbased.dll" || LowerName.starts_with("vcruntime") || LowerName.starts_with("msvcp") || + LowerName.starts_with("api-ms-win-") || LowerName == "libc.so" || LowerName.starts_with("libc.so.") || + LowerName == "libstdc++.so" || LowerName.starts_with("libstdc++.so.") || LowerName == "libgcc_s.so" || + LowerName.starts_with("libgcc_s.so.") || LowerName == "libpthread.so" || LowerName.starts_with("libpthread.so.") || + LowerName == "libm.so" || LowerName.starts_with("libm.so.") || LowerName == "ld-linux.so" || + LowerName.starts_with("ld-linux") || LowerName == "libsystem_kernel.dylib" || LowerName == "libsystem_malloc.dylib" || + LowerName == "libsystem_pthread.dylib" || LowerName == "libdyld.dylib"; + } + + static bool PathLooksThirdParty(std::string_view ModulePath) + { + std::string LowerPath = zen::ToLower(ModulePath); + return LowerPath.find("/thirdparty/") != std::string::npos || LowerPath.find("\\thirdparty\\") != std::string::npos || + LowerPath.find("/third-party/") != std::string::npos || LowerPath.find("\\third-party\\") != std::string::npos || + LowerPath.find("/external/") != std::string::npos || LowerPath.find("\\external\\") != std::string::npos || + LowerPath.find("/extern/") != std::string::npos || LowerPath.find("\\extern\\") != std::string::npos || + LowerPath.find("/engine/binaries/thirdparty/") != std::string::npos || + LowerPath.find("\\engine\\binaries\\thirdparty\\") != std::string::npos || + LowerPath.find("c:\\windows\\system32\\") != std::string::npos || + LowerPath.find("c:\\windows\\syswow64\\") != std::string::npos || LowerPath.starts_with("/usr/lib/") || + LowerPath.starts_with("/lib/") || LowerPath.starts_with("/system/"); + } + + static bool MatchesAnyPattern(std::string_view Text, const std::vector<std::string>& Patterns) + { + for (const std::string& Pattern : Patterns) + { + if (zen::MatchWildcard(Pattern, Text, /*CaseSensitive=*/false)) + { + return true; + } + } + return false; + } + + static bool ShouldSkipFrameByPattern(const CallstackFilterOptions& Options, + const TraceModel& Model, + const ResolvedFrame& Frame, + std::string_view Description) + { + if (MatchesAnyPattern(Description, Options.SkipPatterns)) + { + return true; + } + if (Frame.ModuleIndex != ~0u && Frame.ModuleIndex < Model.Modules.size()) + { + const ModuleInfo& Module = Model.Modules[Frame.ModuleIndex]; + if (MatchesAnyPattern(Module.Name, Options.SkipPatterns) || MatchesAnyPattern(Module.FullPath, Options.SkipPatterns)) + { + return true; + } + } + + static const std::vector<std::string> kDefaultSkipPatterns = { + "zen::MemoryTrace_*", + "*mi_*", + "*_mi_*", + "*rpmalloc*", + "*mimalloc*", + "*je_malloc*", + "*je_free*", + "*malloc*", + "*free*", + "*realloc*", + }; + return MatchesAnyPattern(Description, kDefaultSkipPatterns); + } + + static bool IsThirdPartyFrame(const TraceModel& Model, const ResolvedFrame& Frame, std::string_view Description) + { + if (Description.starts_with("std::")) + { + return true; + } + if (Frame.ModuleIndex == ~0u || Frame.ModuleIndex >= Model.Modules.size()) + { + return false; + } + + const ModuleInfo& Module = Model.Modules[Frame.ModuleIndex]; + return IsKnownRuntimeModuleName(Module.Name) || PathLooksThirdParty(Module.FullPath); + } + +} // namespace + +CallstackFormatter::CallstackFormatter(const TraceModel& InModel, const SymbolResolver* InSymbols) : m_Model(InModel), m_Symbols(InSymbols) +{ +} + +const CallstackEntry* +CallstackFormatter::FindCallstackEntry(uint32_t CallstackId) const +{ + auto It = + eastl::lower_bound(m_Model.Callstacks.begin(), m_Model.Callstacks.end(), CallstackId, [](const CallstackEntry& E, uint32_t Id) { + return E.Id < Id; + }); + if (It == m_Model.Callstacks.end() || It->Id != CallstackId) + { + return nullptr; + } + return &*It; +} + +const std::string& +CallstackFormatter::Describe(const ResolvedFrame& Frame) +{ + auto It = m_Cache.find(Frame.Address); + if (It != m_Cache.end()) + { + return It->second; + } + + std::string Result = m_Symbols ? m_Symbols->Resolve(Frame.Address) : std::string{}; + if (Result.empty()) + { + if (Frame.ModuleIndex != ~0u && Frame.ModuleIndex < m_Model.Modules.size()) + { + Result = fmt::format("{} + 0x{:X}", m_Model.Modules[Frame.ModuleIndex].Name, Frame.Offset); + } + else + { + Result = fmt::format("0x{:X}", Frame.Address); + } + } + + auto [InsertedIt, Inserted] = m_Cache.emplace(Frame.Address, std::move(Result)); + ZEN_UNUSED(Inserted); + return InsertedIt->second; +} + +FilteredCallstackView +CallstackFormatter::BuildView(const CallstackEntry& Entry, const CallstackFilterOptions& Options) +{ + FilteredCallstackView Result; + Result.Frames.reserve(Entry.Frames.size()); + if (Entry.Frames.empty()) + { + return Result; + } + + eastl::vector<bool> ExplicitSkip; + eastl::vector<bool> ThirdParty; + ExplicitSkip.reserve(Entry.Frames.size()); + ThirdParty.reserve(Entry.Frames.size()); + + for (const ResolvedFrame& Frame : Entry.Frames) + { + const std::string& Description = Describe(Frame); + ExplicitSkip.push_back(ShouldSkipFrameByPattern(Options, m_Model, Frame, Description)); + ThirdParty.push_back(IsThirdPartyFrame(m_Model, Frame, Description)); + } + + eastl::vector<size_t> VisibleFrameIndices; + VisibleFrameIndices.reserve(Entry.Frames.size()); + + if (!Options.EnableHeuristic) + { + for (size_t Index = 0; Index < Entry.Frames.size(); ++Index) + { + if (!ExplicitSkip[Index]) + { + VisibleFrameIndices.push_back(Index); + } + } + if (VisibleFrameIndices.empty()) + { + VisibleFrameIndices.push_back(0); + } + } + else + { + size_t FirstProgramIndex = Entry.Frames.size(); + size_t BoundaryThirdPartyIndex = Entry.Frames.size(); + for (size_t Index = 0; Index < Entry.Frames.size(); ++Index) + { + if (ExplicitSkip[Index]) + { + continue; + } + if (ThirdParty[Index]) + { + BoundaryThirdPartyIndex = Index; + continue; + } + FirstProgramIndex = Index; + break; + } + + if (FirstProgramIndex == Entry.Frames.size()) + { + for (size_t Index = 0; Index < Entry.Frames.size(); ++Index) + { + if (!ExplicitSkip[Index]) + { + VisibleFrameIndices.push_back(Index); + } + } + if (VisibleFrameIndices.empty()) + { + VisibleFrameIndices.push_back(0); + } + } + else + { + if (BoundaryThirdPartyIndex < Entry.Frames.size()) + { + VisibleFrameIndices.push_back(BoundaryThirdPartyIndex); + Result.IncludedThirdPartyBoundary = true; + } + for (size_t Index = FirstProgramIndex; Index < Entry.Frames.size(); ++Index) + { + if (!ExplicitSkip[Index]) + { + VisibleFrameIndices.push_back(Index); + } + } + if (VisibleFrameIndices.empty()) + { + VisibleFrameIndices.push_back(FirstProgramIndex); + } + } + } + + Result.HiddenPrefixCount = uint32_t(VisibleFrameIndices.front()); + for (size_t FrameIndex : VisibleFrameIndices) + { + Result.Frames.push_back( + {.OriginalIndex = FrameIndex, .Frame = &Entry.Frames[FrameIndex], .Display = Describe(Entry.Frames[FrameIndex])}); + } + return Result; +} + +} // namespace zen::trace_detail diff --git a/src/zen/trace/callstack_formatter.h b/src/zen/trace/callstack_formatter.h new file mode 100644 index 000000000..067985f25 --- /dev/null +++ b/src/zen/trace/callstack_formatter.h @@ -0,0 +1,55 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include "symbol_resolver.h" +#include "trace_model.h" + +#include <string> +#include <vector> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <EASTL/hash_map.h> +#include <EASTL/vector.h> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen::trace_detail { + +struct CallstackFilterOptions +{ + bool EnableHeuristic = true; + std::vector<std::string> SkipPatterns; +}; + +struct FilteredCallstackFrame +{ + size_t OriginalIndex = 0; + const ResolvedFrame* Frame = nullptr; + std::string Display; +}; + +struct FilteredCallstackView +{ + eastl::vector<FilteredCallstackFrame> Frames; + uint32_t HiddenPrefixCount = 0; + bool IncludedThirdPartyBoundary = false; +}; + +class CallstackFormatter +{ +public: + CallstackFormatter(const TraceModel& InModel, const SymbolResolver* InSymbols); + + const eastl::hash_map<uint64_t, std::string>& GetResolvedCache() const { return m_Cache; } + + const CallstackEntry* FindCallstackEntry(uint32_t CallstackId) const; + const std::string& Describe(const ResolvedFrame& Frame); + FilteredCallstackView BuildView(const CallstackEntry& Entry, const CallstackFilterOptions& Options); + +private: + const TraceModel& m_Model; + const SymbolResolver* m_Symbols = nullptr; + eastl::hash_map<uint64_t, std::string> m_Cache; +}; + +} // namespace zen::trace_detail diff --git a/src/zen/trace/symbol_resolver.cpp b/src/zen/trace/symbol_resolver.cpp new file mode 100644 index 000000000..53374cd64 --- /dev/null +++ b/src/zen/trace/symbol_resolver.cpp @@ -0,0 +1,1631 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "symbol_resolver.h" + +#include <zencore/filesystem.h> +#include <zencore/fmtutils.h> +#include <zencore/logging.h> +#include <zencore/process.h> +#include <zencore/string.h> +#include <zenhttp/httpclient.h> + +#include <algorithm> +#include <filesystem> +#include <mutex> +#include <unordered_map> +#include <vector> + +#if !ZEN_PLATFORM_WINDOWS +# include <cerrno> +# include <unistd.h> +#endif + +#if ZEN_PLATFORM_WINDOWS + +ZEN_THIRD_PARTY_INCLUDES_START +# include <Foundation/PDB_PointerUtil.h> +# include <PDB.h> +# include <PDB_CoalescedMSFStream.h> +# include <PDB_DBIStream.h> +# include <PDB_ImageSectionStream.h> +# include <PDB_InfoStream.h> +# include <PDB_ModuleInfoStream.h> +# include <PDB_ModuleLineStream.h> +# include <PDB_ModuleSymbolStream.h> +# include <PDB_PublicSymbolStream.h> +# include <PDB_RawFile.h> +ZEN_THIRD_PARTY_INCLUDES_END + +# include <zencore/windows.h> + +ZEN_THIRD_PARTY_INCLUDES_START +# include <DbgHelp.h> +ZEN_THIRD_PARTY_INCLUDES_END + +#endif // ZEN_PLATFORM_WINDOWS + +namespace zen::trace_detail { + +////////////////////////////////////////////////////////////////////////////// +// Null resolver (used when symbolication is off or unsupported) + +class NullSymbolResolver final : public SymbolResolver +{ +public: + void LoadModule(const ModuleInfo&) override {} + std::string Resolve(uint64_t) const override { return {}; } +}; + +#if ZEN_PLATFORM_WINDOWS + +////////////////////////////////////////////////////////////////////////////// +// Helpers shared by Windows backends + +static std::string +FormatSymbol(std::string_view Name, uint64_t Displacement) +{ + if (Displacement == 0) + { + return std::string(Name); + } + return fmt::format("{} + 0x{:X}", Name, Displacement); +} + +////////////////////////////////////////////////////////////////////////////// +// Memory-mapped file helper + +namespace { + + struct MappedFile + { + const void* Data = nullptr; + size_t Size = 0; + HANDLE FileHandle = INVALID_HANDLE_VALUE; + HANDLE MappingHandle = nullptr; + + MappedFile() = default; + ~MappedFile() { Close(); } + + bool Open(const std::filesystem::path& Path) + { + FileHandle = CreateFileW(Path.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr); + if (FileHandle == INVALID_HANDLE_VALUE) + { + return false; + } + + LARGE_INTEGER FileSize; + if (!GetFileSizeEx(FileHandle, &FileSize) || FileSize.QuadPart == 0) + { + Close(); + return false; + } + + MappingHandle = CreateFileMappingW(FileHandle, nullptr, PAGE_READONLY, 0, 0, nullptr); + if (MappingHandle == nullptr) + { + Close(); + return false; + } + + Data = MapViewOfFile(MappingHandle, FILE_MAP_READ, 0, 0, 0); + if (Data == nullptr) + { + Close(); + return false; + } + + Size = size_t(FileSize.QuadPart); + return true; + } + + void Close() + { + if (Data != nullptr) + { + UnmapViewOfFile(Data); + Data = nullptr; + } + if (MappingHandle != nullptr) + { + CloseHandle(MappingHandle); + MappingHandle = nullptr; + } + if (FileHandle != INVALID_HANDLE_VALUE) + { + CloseHandle(FileHandle); + FileHandle = INVALID_HANDLE_VALUE; + } + Size = 0; + } + + MappedFile(const MappedFile&) = delete; + MappedFile& operator=(const MappedFile&) = delete; + }; + + // Format an ImageId (16-byte GUID + 4-byte Age) as the hex key used in + // symbol server URLs: <GUID_no_dashes><Age_hex>, e.g. "A1B2C3...1". + std::string FormatImageIdKey(const eastl::vector<uint8_t>& ImageId) + { + if (ImageId.size() < 20) + { + return {}; + } + + // GUID bytes are stored as {Data1 LE, Data2 LE, Data3 LE, Data4[8]}. + // The symbol server key encodes Data1/2/3 as big-endian hex. + const uint8_t* G = ImageId.data(); + + uint32_t Data1; + uint16_t Data2; + uint16_t Data3; + memcpy(&Data1, G + 0, 4); + memcpy(&Data2, G + 4, 2); + memcpy(&Data3, G + 6, 2); + + uint32_t Age; + memcpy(&Age, ImageId.data() + 16, 4); + + return fmt::format("{:08X}{:04X}{:04X}{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}{:x}", + Data1, + Data2, + Data3, + G[8], + G[9], + G[10], + G[11], + G[12], + G[13], + G[14], + G[15], + Age); + } + + // PdbName originates from module metadata in an untrusted trace file and is used + // to build both a filesystem path and an HTTP request path. Restrict it to a safe + // subset so a malicious trace cannot traverse out of the cache dir, inject URL + // syntax, or trip cross-platform path parsing quirks (e.g. `\` is a separator on + // Windows but not POSIX, so filename() doesn't always strip it). + bool IsSafePdbName(std::string_view Name) + { + constexpr AsciiSet SafeChars( + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "0123456789" + "._-+"); + + if (Name.empty() || Name.size() > 255 || Name == "." || Name == "..") + { + return false; + } + return AsciiSet::HasOnly(Name, SafeChars); + } + + const std::filesystem::path& GetSymbolCacheDir() + { + // Use %TEMP%/zen-symbols as the default cache location. + static const std::filesystem::path s_CacheDir = [] { + std::filesystem::path TempDir = std::filesystem::temp_directory_path(); + return TempDir / "zen-symbols"; + }(); + return s_CacheDir; + } + + // Try to download a PDB from a symbol server URL. + // Returns the local cache path on success, empty path on failure. + std::filesystem::path DownloadPdb(std::string_view ServerUrl, + std::string_view PdbName, + const std::string& ImageIdKey, + const std::filesystem::path& CacheDir) + { + // Cache path mirrors the server structure + std::filesystem::path CachePath = CacheDir / PdbName / ImageIdKey / PdbName; + + // Already cached? + std::error_code Ec; + if (std::filesystem::exists(CachePath, Ec)) + { + return CachePath; + } + + ZEN_INFO("Downloading {} from symbol server...", PdbName); + + try + { + std::string RequestPath = fmt::format("/{}/{}/{}", PdbName, ImageIdKey, PdbName); + + zen::HttpClientSettings Settings; + Settings.Timeout = std::chrono::milliseconds(30000); + Settings.ConnectTimeout = std::chrono::milliseconds(5000); + Settings.FollowRedirects = true; + Settings.ExpectedErrorCodes = {zen::HttpResponseCode::NotFound}; + + zen::HttpClient Http(ServerUrl, Settings); + + zen::HttpClient::Response Response = Http.Get(RequestPath); + + if (!Response) + { + ZEN_DEBUG("Symbol server: {} not found (HTTP {})", PdbName, int(Response.StatusCode)); + return {}; + } + + // Write to cache using zencore file I/O + std::filesystem::create_directories(CachePath.parent_path(), Ec); + zen::WriteFile(CachePath, Response.ResponsePayload); + + ZEN_INFO("Cached {} ({})", PdbName, zen::NiceBytes(Response.ResponsePayload.GetSize())); + return CachePath; + } + catch (const std::exception& Ex) + { + ZEN_WARN("Symbol server download failed for {}: {}", PdbName, Ex.what()); + return {}; + } + } + + // Parse _NT_SYMBOL_PATH to extract symbol server URLs. + // Format: "srv*<cache>*<url>" or "symsrv*symsrv.dll*<cache>*<url>" or just a URL. + // Returns a list of server URLs to try. + const std::vector<std::string>& ParseSymbolPath() + { + static const std::vector<std::string> s_Servers = [] { + std::vector<std::string> Servers; + + const char* EnvPath = std::getenv("_NT_SYMBOL_PATH"); + if (EnvPath == nullptr || EnvPath[0] == '\0') + { + // Default to Microsoft public symbol server + Servers.push_back("https://msdl.microsoft.com/download/symbols"); + return Servers; + } + + std::string_view Path(EnvPath); + + // Split on ';' for multiple entries + while (!Path.empty()) + { + size_t Semi = Path.find(';'); + std::string_view Entry = (Semi != std::string_view::npos) ? Path.substr(0, Semi) : Path; + Path = (Semi != std::string_view::npos) ? Path.substr(Semi + 1) : std::string_view{}; + + // Look for srv* or symsrv* prefix — the last '*'-delimited token is the server URL. + if (Entry.substr(0, 4) == "srv*" || Entry.substr(0, 7) == "symsrv*") + { + size_t LastStar = Entry.rfind('*'); + if (LastStar != std::string_view::npos && LastStar + 1 < Entry.size()) + { + std::string_view Url = Entry.substr(LastStar + 1); + if (Url.substr(0, 4) == "http") + { + Servers.emplace_back(Url); + } + } + } + } + + if (Servers.empty()) + { + Servers.push_back("https://msdl.microsoft.com/download/symbols"); + } + + return Servers; + }(); + return s_Servers; + } + + // Copy a local PDB into the symbol cache so that future analysis of traces + // from this build succeeds even after the binary is recompiled. + void CacheLocalPdb(const std::filesystem::path& PdbPath, + std::string_view PdbName, + const std::string& ImageIdKey, + const std::filesystem::path& CacheDir) + { + std::filesystem::path CachePath = CacheDir / PdbName / ImageIdKey / PdbName; + std::error_code Ec; + if (std::filesystem::exists(CachePath, Ec)) + { + return; + } + + std::filesystem::create_directories(CachePath.parent_path(), Ec); + if (Ec) + { + return; + } + + std::filesystem::copy_file(PdbPath, CachePath, std::filesystem::copy_options::skip_existing, Ec); + if (!Ec) + { + uint64_t Size = std::filesystem::file_size(PdbPath, Ec); + ZEN_INFO("Cached local PDB {} ({})", PdbName, zen::NiceBytes(Size)); + } + } + + // Look for a PDB in the local symbol cache or download from symbol servers. + // Returns the cache path on success, empty path on failure. + std::filesystem::path FindPdbInCacheOrServer(std::string_view PdbName, + const std::string& ImageIdKey, + const std::filesystem::path& CacheDir) + { + if (ImageIdKey.empty()) + { + return {}; + } + + // Check local cache first (includes previously cached local PDBs and + // earlier symbol server downloads). + std::filesystem::path CachePath = CacheDir / PdbName / ImageIdKey / PdbName; + std::error_code Ec; + if (std::filesystem::exists(CachePath, Ec)) + { + return CachePath; + } + + // Try symbol servers + const std::vector<std::string>& Servers = ParseSymbolPath(); + for (const std::string& Server : Servers) + { + std::filesystem::path Downloaded = DownloadPdb(Server, PdbName, ImageIdKey, CacheDir); + if (!Downloaded.empty()) + { + return Downloaded; + } + } + + return {}; + } + +} // namespace + +////////////////////////////////////////////////////////////////////////////// +// RawPdb backend — reads PDB files directly + +class PdbSymbolResolver final : public SymbolResolver +{ +public: + void LoadModule(const ModuleInfo& Module) override; + std::string Resolve(uint64_t Address) const override; + +private: + struct FunctionEntry + { + uint64_t Address; + uint32_t Size; + std::string Name; + }; + + struct LineEntry + { + uint64_t Address; + uint32_t CodeSize; + uint32_t Line; + std::string File; // shortened: basename only + }; + + std::vector<FunctionEntry> m_Functions; + std::vector<LineEntry> m_Lines; +}; + +void +PdbSymbolResolver::LoadModule(const ModuleInfo& Module) +{ + if (Module.FullPath.empty() || Module.Base == 0) + { + return; + } + + std::string ImageIdKey = FormatImageIdKey(Module.ImageId); + std::string PdbName = std::filesystem::path(Module.FullPath).filename().replace_extension(".pdb").string(); + const std::filesystem::path& CacheDir = GetSymbolCacheDir(); + + if (!IsSafePdbName(PdbName)) + { + ZEN_WARN("Rejecting unsafe PDB name from trace: '{}'", PdbName); + return; + } + + // Try local PDB first (next to the binary) + std::filesystem::path PdbPath(Module.FullPath); + PdbPath.replace_extension(".pdb"); + + std::error_code Ec; + bool FromLocal = false; + + if (std::filesystem::exists(PdbPath, Ec)) + { + FromLocal = true; + } + else + { + // Try symbol cache / symbol server download + PdbPath = FindPdbInCacheOrServer(PdbName, ImageIdKey, CacheDir); + if (PdbPath.empty()) + { + ZEN_DEBUG("PDB not found locally or on symbol server: {}", PdbName); + return; + } + } + + MappedFile File; + if (!File.Open(PdbPath)) + { + ZEN_DEBUG("Failed to open PDB: {}", PdbPath.string()); + return; + } + + if (PDB::ValidateFile(File.Data, File.Size) != PDB::ErrorCode::Success) + { + ZEN_DEBUG("Invalid PDB file: {}", PdbPath.string()); + return; + } + + PDB::RawFile RawFile = PDB::CreateRawFile(File.Data); + PDB::InfoStream PdbInfoStream(RawFile); + + // Verify the PDB matches the traced module by comparing GUID + Age. + // The trace stores ImageId as 16 bytes GUID followed by 4 bytes Age. + if (Module.ImageId.size() >= 20) + { + const PDB::Header* PdbHeader = PdbInfoStream.GetHeader(); + + // Only compare the GUID, not the age. The symbol server may return a + // PDB with a higher age (from incremental linking) which is compatible. + if (memcmp(&PdbHeader->guid, Module.ImageId.data(), 16) != 0) + { + if (FromLocal) + { + // The local PDB no longer matches — the binary was recompiled + // since the trace was taken. Try the symbol cache / servers for + // the original PDB. + File.Close(); + PdbPath = FindPdbInCacheOrServer(PdbName, ImageIdKey, CacheDir); + if (PdbPath.empty()) + { + ZEN_WARN("PDB mismatch for {} — binary was recompiled and no cached symbols available", Module.Name); + return; + } + + FromLocal = false; + + if (!File.Open(PdbPath)) + { + ZEN_DEBUG("Failed to open cached PDB: {}", PdbPath.string()); + return; + } + + if (PDB::ValidateFile(File.Data, File.Size) != PDB::ErrorCode::Success) + { + ZEN_DEBUG("Invalid cached PDB: {}", PdbPath.string()); + return; + } + + RawFile = PDB::CreateRawFile(File.Data); + PdbInfoStream = PDB::InfoStream(RawFile); + + const PDB::Header* CachedHeader = PdbInfoStream.GetHeader(); + if (memcmp(&CachedHeader->guid, Module.ImageId.data(), 16) != 0) + { + ZEN_WARN("PDB GUID mismatch for {} — skipping", Module.Name); + return; + } + } + else + { + ZEN_WARN("PDB GUID mismatch for {} — skipping", Module.Name); + return; + } + } + } + + // Cache the local PDB so that future analysis of traces from this build + // succeeds even after the binary is recompiled. + if (FromLocal && !ImageIdKey.empty()) + { + CacheLocalPdb(PdbPath, PdbName, ImageIdKey, CacheDir); + } + + if (PDB::HasValidDBIStream(RawFile) != PDB::ErrorCode::Success) + { + return; + } + + const PDB::DBIStream DbiStream = PDB::CreateDBIStream(RawFile); + if (DbiStream.HasValidImageSectionStream(RawFile) != PDB::ErrorCode::Success) + { + return; + } + + const PDB::ImageSectionStream ImageSections = DbiStream.CreateImageSectionStream(RawFile); + uint64_t ModuleBase = Module.Base; + uint32_t SkippedModules = 0; + size_t FunctionCountBeforeModuleSymbols = m_Functions.size(); + + // Collect functions from module symbol streams (S_GPROC32 / S_LPROC32) + { + const PDB::ModuleInfoStream ModInfoStream = DbiStream.CreateModuleInfoStream(RawFile); + const PDB::ArrayView<PDB::ModuleInfoStream::Module> Modules = ModInfoStream.GetModules(); + for (const PDB::ModuleInfoStream::Module& Mod : Modules) + { + if (!Mod.HasSymbolStream()) + { + ++SkippedModules; + continue; + } + + const PDB::ModuleSymbolStream SymStream = Mod.CreateSymbolStream(RawFile); + + SymStream.ForEachSymbol([&](const PDB::CodeView::DBI::Record* Record) { + using Kind = PDB::CodeView::DBI::SymbolRecordKind; + const Kind K = Record->header.kind; + const auto& Data = Record->data; + + if (K == Kind::S_GPROC32 || K == Kind::S_LPROC32 || K == Kind::S_GPROC32_ID || K == Kind::S_LPROC32_ID) + { + uint32_t Rva = ImageSections.ConvertSectionOffsetToRVA(Data.S_GPROC32.section, Data.S_GPROC32.offset); + if (Rva != 0) + { + m_Functions.push_back({ModuleBase + Rva, Data.S_GPROC32.codeSize, Data.S_GPROC32.name}); + } + } + }); + } + } + + // Public symbols as fallback only when module symbol streams did not yield any + // functions. Building the coalesced symbol-record stream is expensive and can + // allocate tens of megabytes for large PDBs. + if (FunctionCountBeforeModuleSymbols == m_Functions.size() && DbiStream.HasValidPublicSymbolStream(RawFile) == PDB::ErrorCode::Success) + { + const PDB::PublicSymbolStream PubStream = DbiStream.CreatePublicSymbolStream(RawFile); + const PDB::CoalescedMSFStream SymRecords = DbiStream.CreateSymbolRecordStream(RawFile); + + for (const PDB::HashRecord& Hash : PubStream.GetRecords()) + { + const PDB::CodeView::DBI::Record* Record = SymRecords.GetDataAtOffset<PDB::CodeView::DBI::Record>(Hash.offset); + if (Record->header.kind == PDB::CodeView::DBI::SymbolRecordKind::S_PUB32) + { + uint32_t Rva = ImageSections.ConvertSectionOffsetToRVA(Record->data.S_PUB32.section, Record->data.S_PUB32.offset); + if (Rva != 0) + { + m_Functions.push_back({ModuleBase + Rva, 0, Record->data.S_PUB32.name}); + } + } + } + } + + // Collect line information from module line streams + if (PdbInfoStream.HasNamesStream()) + { + const PDB::NamesStream NamesStream = PdbInfoStream.CreateNamesStream(RawFile); + const PDB::ModuleInfoStream ModInfoStream2 = DbiStream.CreateModuleInfoStream(RawFile); + + for (const PDB::ModuleInfoStream::Module& Mod : ModInfoStream2.GetModules()) + { + if (!Mod.HasLineStream()) + { + continue; + } + + const PDB::ModuleLineStream LineStream = Mod.CreateLineStream(RawFile); + + // Two passes: first find the checksums section, then process lines. + const PDB::CodeView::DBI::FileChecksumHeader* ModuleChecksumBase = nullptr; + + LineStream.ForEachSection([&](const PDB::CodeView::DBI::LineSection* Section) { + if (Section->header.kind == PDB::CodeView::DBI::DebugSubsectionKind::S_FILECHECKSUMS) + { + ModuleChecksumBase = &Section->checksumHeader; + } + }); + + if (ModuleChecksumBase == nullptr) + { + continue; + } + + LineStream.ForEachSection([&](const PDB::CodeView::DBI::LineSection* Section) { + if (Section->header.kind != PDB::CodeView::DBI::DebugSubsectionKind::S_LINES) + { + return; + } + + uint16_t SecIdx = Section->linesHeader.sectionIndex; + uint32_t SecOff = Section->linesHeader.sectionOffset; + + LineStream.ForEachLinesBlock(Section, + [&](const PDB::CodeView::DBI::LinesFileBlockHeader* Block, + const PDB::CodeView::DBI::Line* Lines, + const PDB::CodeView::DBI::Column*) { + if (Block->numLines == 0) + { + return; + } + + // Resolve filename for this block + const auto* Checksum = PDB::Pointer::Offset<const PDB::CodeView::DBI::FileChecksumHeader*>( + ModuleChecksumBase, + Block->fileChecksumOffset); + const char* FullFile = NamesStream.GetFilename(Checksum->filenameOffset); + + // Extract basename + std::string_view FileView(FullFile); + size_t Cut = FileView.find_last_of("\\/"); + std::string Basename(Cut != std::string_view::npos ? FileView.substr(Cut + 1) : FileView); + + for (uint32_t I = 0; I < Block->numLines; ++I) + { + uint32_t Rva = + ImageSections.ConvertSectionOffsetToRVA(SecIdx, SecOff + Lines[I].offset); + if (Rva == 0) + { + continue; + } + + uint32_t CodeSize = 0; + if (I + 1 < Block->numLines) + { + CodeSize = Lines[I + 1].offset - Lines[I].offset; + } + else + { + CodeSize = Section->linesHeader.codeSize - Lines[I].offset; + } + + m_Lines.push_back({ModuleBase + Rva, CodeSize, Lines[I].linenumStart, Basename}); + } + }); + }); + } + } + + std::sort(m_Functions.begin(), m_Functions.end(), [](const FunctionEntry& A, const FunctionEntry& B) { return A.Address < B.Address; }); + + std::sort(m_Lines.begin(), m_Lines.end(), [](const LineEntry& A, const LineEntry& B) { return A.Address < B.Address; }); + + if (SkippedModules > 0) + { + ZEN_INFO("Loaded {} symbols, {} line records from {} ({} modules without embedded debug info)", + m_Functions.size(), + m_Lines.size(), + Module.Name, + SkippedModules); + } + else + { + ZEN_INFO("Loaded {} symbols, {} line records from {}", m_Functions.size(), m_Lines.size(), Module.Name); + } +} + +std::string +PdbSymbolResolver::Resolve(uint64_t Address) const +{ + if (m_Functions.empty()) + { + return {}; + } + + // Resolve function name + auto FnIt = std::upper_bound(m_Functions.begin(), m_Functions.end(), Address, [](uint64_t Addr, const FunctionEntry& E) { + return Addr < E.Address; + }); + + if (FnIt == m_Functions.begin()) + { + return {}; + } + + --FnIt; + + if (FnIt->Size > 0 && Address >= FnIt->Address + FnIt->Size) + { + return {}; + } + + std::string Result = FormatSymbol(FnIt->Name, Address - FnIt->Address); + + // Resolve file:line + if (!m_Lines.empty()) + { + auto LineIt = + std::upper_bound(m_Lines.begin(), m_Lines.end(), Address, [](uint64_t Addr, const LineEntry& E) { return Addr < E.Address; }); + + if (LineIt != m_Lines.begin()) + { + --LineIt; + if (LineIt->CodeSize == 0 || Address < LineIt->Address + LineIt->CodeSize) + { + Result += fmt::format(" [{}:{}]", LineIt->File, LineIt->Line); + } + } + } + + return Result; +} + +////////////////////////////////////////////////////////////////////////////// +// DbgHelp backend — uses Windows symbol API, supports _NT_SYMBOL_PATH + +class DbgHelpSymbolResolver final : public SymbolResolver +{ +public: + DbgHelpSymbolResolver(); + ~DbgHelpSymbolResolver() override; + + void LoadModule(const ModuleInfo& Module) override; + std::string Resolve(uint64_t Address) const override; + +private: + // Map trace addresses to DbgHelp addresses when the loaded base differs. + struct ModuleMapping + { + uint64_t TraceBase; + uint64_t TraceEnd; + int64_t Delta; // DbgHelpBase - TraceBase + }; + + HANDLE m_Process = nullptr; + std::vector<ModuleMapping> m_Mappings; + // DbgHelp is not thread-safe; its API functions require serialized access. This + // mutex covers every DbgHelp call (SymInitialize/SymLoadModuleExW/SymFromAddr/ + // SymGetLineFromAddr64) and therefore serializes all parallel symbol lookups in + // trace_analyze. For workloads where lookup throughput matters, prefer + // PdbSymbolResolver, which parses PDBs directly and is lock-free per-module. + mutable std::mutex m_Mutex; +}; + +DbgHelpSymbolResolver::DbgHelpSymbolResolver() +{ + std::lock_guard Lock(m_Mutex); + + // Use a unique pseudo-handle so we don't conflict with the runtime + // symbol handler used by callstack.cpp / crashhandler.cpp. + m_Process = reinterpret_cast<HANDLE>(static_cast<uintptr_t>(0xDEAD0042)); + + // NULL search path lets DbgHelp use _NT_SYMBOL_PATH and its defaults. + if (!SymInitialize(m_Process, nullptr, FALSE)) + { + ZEN_WARN("DbgHelp: SymInitialize failed (error {})", GetLastError()); + m_Process = nullptr; + } +} + +DbgHelpSymbolResolver::~DbgHelpSymbolResolver() +{ + std::lock_guard Lock(m_Mutex); + + if (m_Process != nullptr) + { + SymCleanup(m_Process); + } +} + +void +DbgHelpSymbolResolver::LoadModule(const ModuleInfo& Module) +{ + std::lock_guard Lock(m_Mutex); + + if (m_Process == nullptr || Module.FullPath.empty() || Module.Base == 0) + { + return; + } + + std::filesystem::path ModulePath(Module.FullPath); + std::wstring WidePath = ModulePath.wstring(); + + DWORD64 LoadedBase = SymLoadModuleExW(m_Process, nullptr, WidePath.c_str(), nullptr, Module.Base, Module.Size, nullptr, 0); + + if (LoadedBase == 0) + { + DWORD Err = GetLastError(); + if (Err != ERROR_SUCCESS) + { + ZEN_DEBUG("DbgHelp: failed to load {}: error {}", Module.Name, Err); + } + return; + } + + int64_t Delta = int64_t(LoadedBase) - int64_t(Module.Base); + if (Delta != 0) + { + ZEN_DEBUG("DbgHelp: {} loaded at 0x{:X} (trace base 0x{:X}, delta {:+})", Module.Name, LoadedBase, Module.Base, Delta); + } + + uint64_t TraceEnd = Module.Base + (Module.Size > 0 ? Module.Size : 0x1000000); + m_Mappings.push_back({Module.Base, TraceEnd, Delta}); + + ZEN_INFO("DbgHelp: loaded symbols for {}", Module.Name); +} + +std::string +DbgHelpSymbolResolver::Resolve(uint64_t Address) const +{ + std::lock_guard Lock(m_Mutex); + + if (m_Process == nullptr) + { + return {}; + } + + // Translate the trace address to the DbgHelp address space + uint64_t DbgAddr = Address; + for (const ModuleMapping& M : m_Mappings) + { + if (Address >= M.TraceBase && Address < M.TraceEnd) + { + DbgAddr = uint64_t(int64_t(Address) + M.Delta); + break; + } + } + + alignas(SYMBOL_INFO) char Buffer[sizeof(SYMBOL_INFO) + MAX_SYM_NAME]; + SYMBOL_INFO* SymInfo = reinterpret_cast<SYMBOL_INFO*>(Buffer); + SymInfo->SizeOfStruct = sizeof(SYMBOL_INFO); + SymInfo->MaxNameLen = MAX_SYM_NAME; + + DWORD64 Displacement = 0; + if (!SymFromAddr(m_Process, DbgAddr, &Displacement, SymInfo)) + { + return {}; + } + + std::string Result = FormatSymbol(std::string_view(SymInfo->Name, SymInfo->NameLen), Displacement); + + IMAGEHLP_LINE64 LineInfo = {}; + LineInfo.SizeOfStruct = sizeof(IMAGEHLP_LINE64); + DWORD LineDisplacement = 0; + if (SymGetLineFromAddr64(m_Process, DbgAddr, &LineDisplacement, &LineInfo)) + { + std::string_view FileView(LineInfo.FileName); + size_t Cut = FileView.find_last_of("\\/"); + std::string_view Basename = (Cut != std::string_view::npos) ? FileView.substr(Cut + 1) : FileView; + Result += fmt::format(" [{}:{}]", Basename, LineInfo.LineNumber); + } + + return Result; +} + +#endif // ZEN_PLATFORM_WINDOWS + +////////////////////////////////////////////////////////////////////////////// +// Shared helpers for subprocess-based backends + +namespace { + + // CreateProc parses the command line as space-separated tokens, so argv[0] + // must be quoted if the resolved executable path contains spaces (e.g. an + // Xcode toolchain location on macOS). + std::string QuoteIfNeeded(std::string_view Path) + { + if (Path.find(' ') == std::string_view::npos) + { + return std::string(Path); + } + return fmt::format("\"{}\"", Path); + } + +} // namespace + +////////////////////////////////////////////////////////////////////////////// +// llvm-symbolizer backend — cross-platform, shells out to `llvm-symbolizer` +// and speaks its interactive protocol over pipes. +// +// Protocol (one request / response): +// We write: "<path-to-binary> 0x<relative-address>\n" +// It replies: "FunctionName\n" +// "file:line:col\n" +// "\n" <-- blank line terminates the record +// +// Launch flags: +// --demangle demangle C++ names (default, but explicit) +// --output-style=LLVM stable two-line format described above +// --functions=linkage keep template arguments visible +// --relative-address treat the address as an offset from module base +// --inlining=false emit one frame per address (no inline expansion) + +class LlvmSymbolizerResolver final : public SymbolResolver +{ +public: + LlvmSymbolizerResolver() = default; + ~LlvmSymbolizerResolver() override; + + void LoadModule(const ModuleInfo& Module) override; + std::string Resolve(uint64_t Address) const override; + +private: + struct Module + { + std::string FullPath; + uint64_t Base = 0; + uint64_t End = 0; + }; + + const Module* FindModule(uint64_t Address) const; + bool EnsureProcess() const; + bool ReadLine(std::string& Out) const; + std::string DoQuery(const Module& M, uint64_t RelAddress) const; + + std::vector<Module> m_Modules; + + // Subprocess + IO state. All accesses serialized under m_Mutex. + mutable std::mutex m_Mutex; + mutable bool m_Attempted = false; + mutable bool m_Alive = false; + mutable zen::ProcessHandle m_Process; + mutable zen::StdinPipeHandles m_StdinPipe; + mutable zen::StdoutPipeHandles m_StdoutPipe; + mutable std::string m_ReadBuffer; + + // Cache resolved addresses (same mutex). + mutable std::unordered_map<uint64_t, std::string> m_Cache; +}; + +LlvmSymbolizerResolver::~LlvmSymbolizerResolver() +{ + std::lock_guard Lock(m_Mutex); + if (m_Alive) + { + // Closing stdin lets llvm-symbolizer exit cleanly on EOF. + m_StdinPipe.CloseWriteEnd(); + m_Process.Wait(2000); + if (m_Process.IsRunning()) + { + m_Process.Terminate(0); + } + } +} + +void +LlvmSymbolizerResolver::LoadModule(const ModuleInfo& Mod) +{ + if (Mod.FullPath.empty() || Mod.Base == 0) + { + return; + } + + // llvm-symbolizer auto-discovers adjacent debug info (Foo.dSYM on Mac, + // .gnu_debuglink / build-id sources on Linux, Foo.pdb on Windows). If the + // binary itself isn't present locally, there's nothing we can do. + std::error_code Ec; + if (!std::filesystem::exists(Mod.FullPath, Ec)) + { + ZEN_DEBUG("llvm-symbolizer: binary not found for {} at {}", Mod.Name, Mod.FullPath); + return; + } + + uint64_t End = Mod.Base + (Mod.Size > 0 ? Mod.Size : 0x1000000); + + std::lock_guard Lock(m_Mutex); + m_Modules.push_back({Mod.FullPath, Mod.Base, End}); + ZEN_INFO("llvm-symbolizer: registered {} [0x{:X}..0x{:X})", Mod.Name, Mod.Base, End); +} + +std::string +LlvmSymbolizerResolver::Resolve(uint64_t Address) const +{ + std::lock_guard Lock(m_Mutex); + + auto CacheIt = m_Cache.find(Address); + if (CacheIt != m_Cache.end()) + { + return CacheIt->second; + } + + const Module* M = FindModule(Address); + if (M == nullptr) + { + m_Cache.emplace(Address, std::string{}); + return {}; + } + + std::string Result = DoQuery(*M, Address - M->Base); + m_Cache.emplace(Address, Result); + return Result; +} + +const LlvmSymbolizerResolver::Module* +LlvmSymbolizerResolver::FindModule(uint64_t Address) const +{ + for (const Module& M : m_Modules) + { + if (Address >= M.Base && Address < M.End) + { + return &M; + } + } + return nullptr; +} + +bool +LlvmSymbolizerResolver::EnsureProcess() const +{ + if (m_Attempted) + { + return m_Alive; + } + m_Attempted = true; + + std::filesystem::path Executable = SearchPathForExecutable("llvm-symbolizer"); + + if (!CreateStdinPipe(m_StdinPipe) || !CreateStdoutPipe(m_StdoutPipe)) + { + ZEN_WARN("llvm-symbolizer: failed to create pipes"); + return false; + } + + // Build the command line. CommandLine begins with the executable name (arg[0]). + std::string CommandLine = fmt::format("{} --demangle --output-style=LLVM --functions=linkage --relative-address --inlining=false", + QuoteIfNeeded(Executable.string())); + + CreateProcOptions Options; + Options.StdinPipe = &m_StdinPipe; + Options.StdoutPipe = &m_StdoutPipe; + + CreateProcResult Handle = CreateProc(Executable, CommandLine, Options); + +#if ZEN_PLATFORM_WINDOWS + if (Handle == nullptr) +#else + if (Handle <= 0) +#endif + { + ZEN_WARN("llvm-symbolizer: failed to launch '{}' - install LLVM or add to PATH", Executable.string()); + m_StdinPipe.Close(); + m_StdoutPipe.Close(); + return false; + } + +#if ZEN_PLATFORM_WINDOWS + m_Process.Initialize(Handle); +#else + std::error_code Ec; + m_Process.Initialize(int(Handle), Ec); + if (Ec) + { + ZEN_WARN("llvm-symbolizer: ProcessHandle init failed: {}", Ec.message()); + m_StdinPipe.Close(); + m_StdoutPipe.Close(); + return false; + } +#endif + + // Close the child-side handles in the parent. + m_StdinPipe.CloseReadEnd(); + m_StdoutPipe.CloseWriteEnd(); + + m_Alive = true; + return true; +} + +bool +LlvmSymbolizerResolver::ReadLine(std::string& Out) const +{ + // Search for a newline already in the buffer; if not, read more. + for (;;) + { + size_t NewlinePos = m_ReadBuffer.find('\n'); + if (NewlinePos != std::string::npos) + { + Out.assign(m_ReadBuffer, 0, NewlinePos); + m_ReadBuffer.erase(0, NewlinePos + 1); + // Trim a trailing \r (in case of CRLF line endings). + if (!Out.empty() && Out.back() == '\r') + { + Out.pop_back(); + } + return true; + } + + char Buffer[1024]; +#if ZEN_PLATFORM_WINDOWS + DWORD BytesRead = 0; + if (!::ReadFile(m_StdoutPipe.ReadHandle, Buffer, sizeof(Buffer), &BytesRead, nullptr) || BytesRead == 0) + { + return false; + } + m_ReadBuffer.append(Buffer, BytesRead); +#else + ssize_t BytesRead = ::read(m_StdoutPipe.ReadFd, Buffer, sizeof(Buffer)); + if (BytesRead <= 0) + { + if (BytesRead < 0 && errno == EINTR) + { + continue; + } + return false; + } + m_ReadBuffer.append(Buffer, static_cast<size_t>(BytesRead)); +#endif + } +} + +std::string +LlvmSymbolizerResolver::DoQuery(const Module& M, uint64_t RelAddress) const +{ + if (!EnsureProcess()) + { + return {}; + } + + // Write "<path> 0x<addr>\n". Paths with spaces must be quoted for llvm-symbolizer + // interactive input; it accepts double quotes. + std::string Line; + if (M.FullPath.find(' ') != std::string::npos) + { + Line = fmt::format("\"{}\" 0x{:X}\n", M.FullPath, RelAddress); + } + else + { + Line = fmt::format("{} 0x{:X}\n", M.FullPath, RelAddress); + } + +#if ZEN_PLATFORM_WINDOWS + DWORD BytesWritten = 0; + if (!::WriteFile(m_StdinPipe.WriteHandle, Line.data(), static_cast<DWORD>(Line.size()), &BytesWritten, nullptr) || + BytesWritten != Line.size()) + { + ZEN_WARN("llvm-symbolizer: write failed, disabling backend"); + m_Alive = false; + return {}; + } +#else + const char* Ptr = Line.data(); + size_t Remaining = Line.size(); + while (Remaining > 0) + { + ssize_t N = ::write(m_StdinPipe.WriteFd, Ptr, Remaining); + if (N <= 0) + { + if (N < 0 && errno == EINTR) + { + continue; + } + ZEN_WARN("llvm-symbolizer: write failed, disabling backend"); + m_Alive = false; + return {}; + } + Ptr += N; + Remaining -= static_cast<size_t>(N); + } +#endif + + // Read lines until a blank line terminates the record. + std::string Function; + std::string Location; + std::string Buf; + int LineIdx = 0; + while (ReadLine(Buf)) + { + if (Buf.empty()) + { + break; + } + if (LineIdx == 0) + { + Function = Buf; + } + else if (LineIdx == 1) + { + Location = Buf; + } + // Additional lines would be inline frames (--inlining=false suppresses them); ignore. + ++LineIdx; + } + + if (Function.empty() || Function == "??") + { + return {}; + } + + std::string Result = std::move(Function); + if (!Location.empty() && Location != "??:0:0") + { + // Location is "path:line:col" — trim to "basename:line" to match Windows output. + std::string_view LocView(Location); + size_t LastColon = LocView.find_last_of(':'); + if (LastColon != std::string_view::npos) + { + LocView = LocView.substr(0, LastColon); + } + size_t Slash = LocView.find_last_of("/\\"); + std::string_view FileLine = (Slash == std::string_view::npos) ? LocView : LocView.substr(Slash + 1); + Result += fmt::format(" [{}]", FileLine); + } + return Result; +} + +////////////////////////////////////////////////////////////////////////////// +// atos backend — macOS only. Apple's symbolizer; ships with Xcode + the CLT. +// +// Unlike llvm-symbolizer, atos accepts only one binary per process. We keep +// one subprocess per loaded module and demultiplex queries by module path. +// +// Protocol (one request / response): +// We write: "0x<absolute-address>\n" +// It replies: "Function (in Binary) (file.cpp:NN)\n" +// or "Function (in Binary) + 0x<disp>\n" (no debug info) +// or "0x<address>\n" (nothing known) +// +// Launched with: atos -o <binary> -l 0x<module-base> +// atos subtracts -l from each input address to get the file offset. + +#if ZEN_PLATFORM_MAC + +class AtosSymbolizerResolver final : public SymbolResolver +{ +public: + AtosSymbolizerResolver() = default; + ~AtosSymbolizerResolver() override; + + void LoadModule(const ModuleInfo& Module) override; + std::string Resolve(uint64_t Address) const override; + +private: + struct Module + { + std::string FullPath; + uint64_t Base = 0; + uint64_t End = 0; + }; + + // One atos subprocess per loaded module (atos is single-binary). + struct AtosProcess + { + zen::ProcessHandle Process; + zen::StdinPipeHandles StdinPipe; + zen::StdoutPipeHandles StdoutPipe; + std::string ReadBuffer; + bool Alive = false; + }; + + const Module* FindModule(uint64_t Address) const; + AtosProcess* EnsureProcessFor(const Module& M) const; + bool ReadLine(AtosProcess& P, std::string& Out) const; + std::string DoQuery(const Module& M, uint64_t Address) const; + + std::vector<Module> m_Modules; + + mutable std::mutex m_Mutex; + mutable std::unordered_map<std::string, std::unique_ptr<AtosProcess>> m_Processes; + mutable std::unordered_map<uint64_t, std::string> m_Cache; +}; + +AtosSymbolizerResolver::~AtosSymbolizerResolver() +{ + std::lock_guard Lock(m_Mutex); + for (auto& [Path, P] : m_Processes) + { + if (P && P->Alive) + { + P->StdinPipe.CloseWriteEnd(); + P->Process.Wait(2000); + if (P->Process.IsRunning()) + { + P->Process.Terminate(0); + } + } + } +} + +void +AtosSymbolizerResolver::LoadModule(const ModuleInfo& Mod) +{ + if (Mod.FullPath.empty() || Mod.Base == 0) + { + return; + } + + std::error_code Ec; + if (!std::filesystem::exists(Mod.FullPath, Ec)) + { + ZEN_DEBUG("atos: binary not found for {} at {}", Mod.Name, Mod.FullPath); + return; + } + + uint64_t End = Mod.Base + (Mod.Size > 0 ? Mod.Size : 0x1000000); + + std::lock_guard Lock(m_Mutex); + m_Modules.push_back({Mod.FullPath, Mod.Base, End}); + ZEN_INFO("atos: registered {} [0x{:X}..0x{:X})", Mod.Name, Mod.Base, End); +} + +std::string +AtosSymbolizerResolver::Resolve(uint64_t Address) const +{ + std::lock_guard Lock(m_Mutex); + + auto CacheIt = m_Cache.find(Address); + if (CacheIt != m_Cache.end()) + { + return CacheIt->second; + } + + const Module* M = FindModule(Address); + if (M == nullptr) + { + m_Cache.emplace(Address, std::string{}); + return {}; + } + + std::string Result = DoQuery(*M, Address); + m_Cache.emplace(Address, Result); + return Result; +} + +const AtosSymbolizerResolver::Module* +AtosSymbolizerResolver::FindModule(uint64_t Address) const +{ + for (const Module& M : m_Modules) + { + if (Address >= M.Base && Address < M.End) + { + return &M; + } + } + return nullptr; +} + +AtosSymbolizerResolver::AtosProcess* +AtosSymbolizerResolver::EnsureProcessFor(const Module& M) const +{ + auto It = m_Processes.find(M.FullPath); + if (It != m_Processes.end()) + { + return It->second.get(); + } + + auto P = std::make_unique<AtosProcess>(); + + if (!CreateStdinPipe(P->StdinPipe) || !CreateStdoutPipe(P->StdoutPipe)) + { + ZEN_WARN("atos: failed to create pipes for {}", M.FullPath); + auto [Ins, _] = m_Processes.emplace(M.FullPath, std::move(P)); + return Ins->second.get(); // Alive = false + } + + std::filesystem::path Executable = SearchPathForExecutable("atos"); + std::string CommandLine = fmt::format("{} -o \"{}\" -l 0x{:X}", QuoteIfNeeded(Executable.string()), M.FullPath, M.Base); + + CreateProcOptions Options; + Options.StdinPipe = &P->StdinPipe; + Options.StdoutPipe = &P->StdoutPipe; + + CreateProcResult Handle = CreateProc(Executable, CommandLine, Options); + if (Handle <= 0) + { + ZEN_WARN("atos: failed to launch for {} - `atos` should be on PATH on macOS", M.FullPath); + P->StdinPipe.Close(); + P->StdoutPipe.Close(); + auto [Ins, _] = m_Processes.emplace(M.FullPath, std::move(P)); + return Ins->second.get(); // Alive = false + } + + std::error_code Ec; + P->Process.Initialize(int(Handle), Ec); + if (Ec) + { + ZEN_WARN("atos: ProcessHandle init failed for {}: {}", M.FullPath, Ec.message()); + P->StdinPipe.Close(); + P->StdoutPipe.Close(); + auto [Ins, _] = m_Processes.emplace(M.FullPath, std::move(P)); + return Ins->second.get(); // Alive = false + } + + P->StdinPipe.CloseReadEnd(); + P->StdoutPipe.CloseWriteEnd(); + P->Alive = true; + + auto [Ins, _] = m_Processes.emplace(M.FullPath, std::move(P)); + return Ins->second.get(); +} + +bool +AtosSymbolizerResolver::ReadLine(AtosProcess& P, std::string& Out) const +{ + for (;;) + { + size_t NewlinePos = P.ReadBuffer.find('\n'); + if (NewlinePos != std::string::npos) + { + Out.assign(P.ReadBuffer, 0, NewlinePos); + P.ReadBuffer.erase(0, NewlinePos + 1); + return true; + } + + char Buffer[1024]; + ssize_t BytesRead = ::read(P.StdoutPipe.ReadFd, Buffer, sizeof(Buffer)); + if (BytesRead <= 0) + { + if (BytesRead < 0 && errno == EINTR) + { + continue; + } + return false; + } + P.ReadBuffer.append(Buffer, static_cast<size_t>(BytesRead)); + } +} + +std::string +AtosSymbolizerResolver::DoQuery(const Module& M, uint64_t Address) const +{ + AtosProcess* P = EnsureProcessFor(M); + if (P == nullptr || !P->Alive) + { + return {}; + } + + std::string Line = fmt::format("0x{:X}\n", Address); + + const char* Ptr = Line.data(); + size_t Remaining = Line.size(); + while (Remaining > 0) + { + ssize_t N = ::write(P->StdinPipe.WriteFd, Ptr, Remaining); + if (N <= 0) + { + if (N < 0 && errno == EINTR) + { + continue; + } + ZEN_WARN("atos: write failed for {}, disabling", M.FullPath); + P->Alive = false; + return {}; + } + Ptr += N; + Remaining -= static_cast<size_t>(N); + } + + std::string Reply; + if (!ReadLine(*P, Reply) || Reply.empty()) + { + return {}; + } + + // Parse "Function (in Binary) (file.cpp:NN)" or "... + 0xNN" or just "0xADDR". + // Extract everything before " (in " as the function name. + size_t InPos = Reply.find(" (in "); + if (InPos == std::string::npos) + { + // No match — either raw "0xADDR" (no info) or an error message. Skip. + return {}; + } + + std::string_view Function(Reply.data(), InPos); + + // Look for a trailing "(file:line)" after the "(in ...)" block. + std::string_view LocationView; + size_t AfterIn = Reply.find(')', InPos); + if (AfterIn != std::string::npos) + { + size_t OpenParen = Reply.find('(', AfterIn); + if (OpenParen != std::string::npos) + { + size_t CloseParen = Reply.find(')', OpenParen); + if (CloseParen != std::string::npos && CloseParen > OpenParen + 1) + { + LocationView = std::string_view(Reply).substr(OpenParen + 1, CloseParen - OpenParen - 1); + } + } + } + + std::string Result(Function); + if (!LocationView.empty()) + { + // atos gives us "file.cpp:NN" directly — no need to strip a directory. + Result += fmt::format(" [{}]", LocationView); + } + return Result; +} + +#endif // ZEN_PLATFORM_MAC + +////////////////////////////////////////////////////////////////////////////// +// Factory + +namespace { + +#if ZEN_PLATFORM_MAC + // Probe PATH for a tool and return true if something usable was found. + // SearchPathForExecutable returns the input unchanged if the tool can't be + // found, so we compare against the filesystem to detect a hit. + bool ToolIsOnPath(std::string_view Name) + { + std::filesystem::path Resolved = SearchPathForExecutable(Name); + std::error_code Ec; + return std::filesystem::exists(Resolved, Ec) && std::filesystem::is_regular_file(Resolved, Ec); + } +#endif + + SymbolBackend ResolveAutoBackend() + { +#if ZEN_PLATFORM_WINDOWS + return SymbolBackend::Pdb; +#elif ZEN_PLATFORM_MAC + if (ToolIsOnPath("llvm-symbolizer")) + { + return SymbolBackend::LlvmSymbolizer; + } + return SymbolBackend::Atos; +#else + // Linux: llvm-symbolizer is the only backend we ship. + return SymbolBackend::LlvmSymbolizer; +#endif + } + +} // namespace + +std::unique_ptr<SymbolResolver> +CreateSymbolResolver(SymbolBackend Backend) +{ + if (Backend == SymbolBackend::Auto) + { + Backend = ResolveAutoBackend(); + } + + if (Backend == SymbolBackend::Off) + { + return std::make_unique<NullSymbolResolver>(); + } + + if (Backend == SymbolBackend::LlvmSymbolizer) + { + return std::make_unique<LlvmSymbolizerResolver>(); + } + +#if ZEN_PLATFORM_MAC + if (Backend == SymbolBackend::Atos) + { + return std::make_unique<AtosSymbolizerResolver>(); + } +#else + if (Backend == SymbolBackend::Atos) + { + ZEN_WARN("atos backend is macOS-only; falling back to llvm-symbolizer"); + return std::make_unique<LlvmSymbolizerResolver>(); + } +#endif + +#if ZEN_PLATFORM_WINDOWS + if (Backend == SymbolBackend::DbgHelp) + { + return std::make_unique<DbgHelpSymbolResolver>(); + } + return std::make_unique<PdbSymbolResolver>(); +#else + // Pdb / DbgHelp aren't available on non-Windows; any other request falls back to llvm-symbolizer. + return std::make_unique<LlvmSymbolizerResolver>(); +#endif +} + +SymbolBackend +ParseSymbolBackend(std::string_view Name) +{ + if (Name == "auto") + { + return SymbolBackend::Auto; + } + if (Name == "pdb") + { + return SymbolBackend::Pdb; + } + if (Name == "dbghelp") + { + return SymbolBackend::DbgHelp; + } + if (Name == "llvm" || Name == "llvm-symbolizer") + { + return SymbolBackend::LlvmSymbolizer; + } + if (Name == "atos") + { + return SymbolBackend::Atos; + } + if (Name == "off") + { + return SymbolBackend::Off; + } + return SymbolBackend::Off; +} + +} // namespace zen::trace_detail diff --git a/src/zen/trace/symbol_resolver.h b/src/zen/trace/symbol_resolver.h new file mode 100644 index 000000000..4acdaf95e --- /dev/null +++ b/src/zen/trace/symbol_resolver.h @@ -0,0 +1,45 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include "trace_model.h" + +#include <cstdint> +#include <memory> +#include <string> + +namespace zen::trace_detail { + +enum class SymbolBackend : uint8_t +{ + Off, + Auto, // Probe PATH and pick the best available backend for the platform + Pdb, // Windows only: RawPdb — fast, reads PDB files directly + DbgHelp, // Windows only: DbgHelp API — supports symbol servers and _NT_SYMBOL_PATH + LlvmSymbolizer, // Any platform: shells out to `llvm-symbolizer`, resolves dSYM (Mac) / DWARF (Linux) / PDB (Windows) + Atos // macOS only: shells out to `atos`, resolves Mach-O binaries and adjacent .dSYM bundles +}; + +// Resolves virtual addresses captured in a trace to function names. +// Use CreateSymbolResolver() to obtain a concrete implementation. +class SymbolResolver +{ +public: + virtual ~SymbolResolver() = default; + + // Load symbols for a module. + virtual void LoadModule(const ModuleInfo& Module) = 0; + + // Resolve an absolute virtual address to "FunctionName + 0xNN" (or just + // "FunctionName" when the displacement is zero). Returns an empty string + // when the address cannot be resolved. + virtual std::string Resolve(uint64_t Address) const = 0; +}; + +std::unique_ptr<SymbolResolver> CreateSymbolResolver(SymbolBackend Backend); + +// Parse a string ("auto", "pdb", "dbghelp", "llvm", "llvm-symbolizer", +// "atos", "off") into a SymbolBackend enum. Returns Off on unrecognised input. +SymbolBackend ParseSymbolBackend(std::string_view Name); + +} // namespace zen::trace_detail diff --git a/src/zen/trace/timeline_query.cpp b/src/zen/trace/timeline_query.cpp new file mode 100644 index 000000000..d90c79a29 --- /dev/null +++ b/src/zen/trace/timeline_query.cpp @@ -0,0 +1,123 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "timeline_query.h" + +#include <algorithm> + +namespace zen::trace_detail { + +namespace { + + // Pick the LOD level a given resolution should read from. Mirrors the + // historical selection in trace_viewer_service.cpp: resolution 0 reads the + // raw LOD 0; otherwise the smallest LOD whose ResolutionUs >= the request + // wins, falling back to the coarsest level if none qualify. + // + // Returned values: 0 == raw scopes (LOD 0), 1..kTimelineLodCount == DetailLevels[lod-1]. + size_t SelectLodIndex(uint32_t ResolutionUs) + { + if (ResolutionUs == 0) + { + return 0; + } + for (size_t I = 0; I < kTimelineLodCount; ++I) + { + if (kTimelineLodResolutions[I] >= ResolutionUs) + { + return I + 1; + } + } + return kTimelineLodCount; + } + + const eastl::vector<TimelineScope>& LodScopes(const ThreadTimeline& Timeline, size_t LodIndex) + { + if (LodIndex == 0) + { + return Timeline.Scopes; + } + return Timeline.DetailLevels[LodIndex - 1].Scopes; + } + + void ExtractScopesInto(const ThreadTimeline& Timeline, const TimelineQueryRequest& Req, std::vector<TimelineScopeView>& Out) + { + const eastl::vector<TimelineScope>& Scopes = LodScopes(Timeline, SelectLodIndex(Req.ResolutionUs)); + + auto MidIt = + std::lower_bound(Scopes.begin(), Scopes.end(), Req.StartUs, [](const TimelineScope& S, uint32_t V) { return S.BeginUs < V; }); + + for (auto It = Scopes.begin(); It != MidIt; ++It) + { + if ((It->BeginUs + It->DurationUs) < Req.StartUs || It->DurationUs < Req.MinDurUs) + { + continue; + } + Out.push_back({It->BeginUs, It->DurationUs, It->NameId, It->Depth, It->MergeCount}); + } + for (auto It = MidIt; It != Scopes.end(); ++It) + { + if (It->BeginUs > Req.EndUs) + { + break; + } + if (It->DurationUs < Req.MinDurUs) + { + continue; + } + Out.push_back({It->BeginUs, It->DurationUs, It->NameId, It->Depth, It->MergeCount}); + } + } + + const ThreadTimeline* FindThread(const TraceModel& Model, uint32_t ThreadId) + { + auto It = std::find_if(Model.Timelines.begin(), Model.Timelines.end(), [ThreadId](const ThreadTimeline& T) { + return T.ThreadId == ThreadId; + }); + return (It != Model.Timelines.end()) ? &*It : nullptr; + } + + class InMemoryTimelineQuery final : public TimelineQuery + { + public: + explicit InMemoryTimelineQuery(const TraceModel& Model) : m_Model(Model) {} + + void QueryThread(uint32_t ThreadId, const TimelineQueryRequest& Req, std::vector<TimelineScopeView>& Out) const override + { + const ThreadTimeline* Timeline = FindThread(m_Model, ThreadId); + if (Timeline) + { + ExtractScopesInto(*Timeline, Req, Out); + } + } + + void QueryBatch(std::span<const uint32_t> ThreadIds, const TimelineQueryRequest& Req, BatchResult& Out) const override + { + Out.Scopes.clear(); + Out.Ranges.clear(); + Out.Ranges.reserve(ThreadIds.size()); + + for (uint32_t ThreadId : ThreadIds) + { + const uint32_t Begin = uint32_t(Out.Scopes.size()); + const ThreadTimeline* Timeline = FindThread(m_Model, ThreadId); + if (Timeline) + { + ExtractScopesInto(*Timeline, Req, Out.Scopes); + } + Out.Ranges.push_back({Begin, uint32_t(Out.Scopes.size())}); + } + } + + private: + const TraceModel& m_Model; + }; + +} // namespace + +std::unique_ptr<TimelineQuery> +MakeInMemoryTimelineQuery(const TraceModel& Model) +{ + return std::make_unique<InMemoryTimelineQuery>(Model); +} + +} // namespace zen::trace_detail diff --git a/src/zen/trace/timeline_query.h b/src/zen/trace/timeline_query.h new file mode 100644 index 000000000..f773d8e58 --- /dev/null +++ b/src/zen/trace/timeline_query.h @@ -0,0 +1,69 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include "trace_model.h" + +#include <cstdint> +#include <memory> +#include <span> +#include <vector> + +namespace zen::trace_detail { + +// Plain-data view of a single timeline scope returned by a TimelineQuery. +// Mirrors the on-disk TimelineScope but is intentionally decoupled from the +// in-memory model so that alternative backends can share the same result type. +struct TimelineScopeView +{ + uint32_t BeginUs; + uint32_t DurationUs; + uint32_t NameId; + uint16_t Depth; + uint16_t MergeCount; // 0 == raw LOD 0, N>0 == N merged scopes +}; + +// Common parameters for a viewport-style timeline query. +struct TimelineQueryRequest +{ + uint32_t StartUs; + uint32_t EndUs; + uint32_t MinDurUs; + uint32_t ResolutionUs; // 0 == LOD 0 (raw); >0 picks the smallest LOD with ResolutionUs >= this +}; + +// Backend-agnostic interface for serving timeline scope data to the trace +// viewer HTTP handlers. Currently only the in-memory implementation exists, +// but the abstraction is preserved as a clean swap point if a different +// backend (e.g. on-disk indexed store) ever becomes useful. +class TimelineQuery +{ +public: + virtual ~TimelineQuery() = default; + + // Append all scopes for a single thread matching the request to Out. + // Out is not cleared; callers can chain queries into the same buffer. + virtual void QueryThread(uint32_t ThreadId, const TimelineQueryRequest& Req, std::vector<TimelineScopeView>& Out) const = 0; + + // Result of a batch query: a single flat scope vector plus per-thread + // ranges into it. Ranges[i] corresponds to ThreadIds[i] from the request. + struct BatchResult + { + struct Range + { + uint32_t Begin; + uint32_t End; + }; + std::vector<TimelineScopeView> Scopes; + std::vector<Range> Ranges; + }; + + // Query several threads in one call. Out is cleared before being filled. + virtual void QueryBatch(std::span<const uint32_t> ThreadIds, const TimelineQueryRequest& Req, BatchResult& Out) const = 0; +}; + +// In-memory implementation. Holds a reference to Model — the model must +// outlive the returned object. +std::unique_ptr<TimelineQuery> MakeInMemoryTimelineQuery(const TraceModel& Model); + +} // namespace zen::trace_detail diff --git a/src/zen/trace/trace_analyze.cpp b/src/zen/trace/trace_analyze.cpp new file mode 100644 index 000000000..ff168cd9c --- /dev/null +++ b/src/zen/trace/trace_analyze.cpp @@ -0,0 +1,812 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "trace_analyze.h" + +#include "callstack_formatter.h" +#include "trace_cache.h" +#include "zen.h" + +#include <zencore/basicfile.h> +#include <zencore/fmtutils.h> +#include <zencore/iobuffer.h> +#include <zencore/logging.h> +#include <zencore/scopeguard.h> +#include <zencore/string.h> +#include <zencore/thread.h> +#include <zencore/workthreadpool.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <EASTL/hash_map.h> +#include <EASTL/hash_set.h> +#include <EASTL/vector.h> +ZEN_THIRD_PARTY_INCLUDES_END + +#include <algorithm> + +namespace { + +using namespace zen::trace_detail; + +static void +AppendHtmlEscaped(zen::StringBuilderBase& Out, std::string_view Text) +{ + for (char Ch : Text) + { + switch (Ch) + { + case '&': + Out << "&"; + break; + case '<': + Out << "<"; + break; + case '>': + Out << ">"; + break; + case '"': + Out << """; + break; + case '\'': + Out << "'"; + break; + default: + Out.Append(Ch); + break; + } + } +} + +static CallstackFilterOptions +BuildCallstackFilterOptions(const AnalyzeOptions& Options) +{ + CallstackFilterOptions Result; + Result.EnableHeuristic = Options.EnableCallstackHeuristic; + Result.SkipPatterns = Options.CallstackSkipPatterns; + return Result; +} + +static std::string +BuildThreadSummary(const TraceModel& Model, const eastl::fixed_vector<uint32_t, 4, true>& ThreadIds) +{ + std::string Result; + for (uint32_t Tid : ThreadIds) + { + if (!Result.empty()) + { + Result += ", "; + } + auto TIt = std::find_if(Model.Threads.begin(), Model.Threads.end(), [Tid](const ThreadInfoEntry& T) { return T.ThreadId == Tid; }); + if (TIt != Model.Threads.end() && !TIt->Name.empty()) + { + Result += TIt->Name; + } + else + { + Result += fmt::format("tid:{}", Tid); + } + } + return Result; +} + +static void +AppendHtmlCallstack(zen::StringBuilderBase& Out, const AnalyzeOptions& Options, CallstackFormatter& Formatter, uint32_t CallstackId) +{ + const CallstackEntry* Entry = Formatter.FindCallstackEntry(CallstackId); + if (Entry == nullptr || Entry->Frames.empty()) + { + Out << "<div class=\"muted\">No callstack frames recorded.</div>"; + return; + } + + FilteredCallstackView Filtered = Formatter.BuildView(*Entry, BuildCallstackFilterOptions(Options)); + if (Filtered.HiddenPrefixCount > 0) + { + Out << "<div class=\"muted\">Skipped " << uint64_t(Filtered.HiddenPrefixCount) << " leading frame(s)"; + if (Filtered.IncludedThirdPartyBoundary) + { + Out << "; kept boundary third-party callsite"; + } + Out << ".</div>"; + } + + Out << "<ol class=\"frames\">"; + for (const FilteredCallstackFrame& Frame : Filtered.Frames) + { + Out << "<li>"; + AppendHtmlEscaped(Out, Frame.Display); + Out << "</li>"; + } + Out << "</ol>"; +} + +static std::string_view +FindHeapName(const TraceModel& Model, uint32_t HeapId) +{ + for (const HeapInfo& Heap : Model.Heaps) + { + if (Heap.Id == HeapId && !Heap.Name.empty()) + { + return Heap.Name; + } + } + return "unknown"; +} + +static bool +PassesChurnThreshold(const AnalyzeOptions& Options, const CallstackChurnStat& Stat) +{ + return Stat.MeanDistance <= double(Options.ChurnDistanceThreshold); +} + +static uint64_t +CountShownChurnSites(const TraceModel& Model, const AnalyzeOptions& Options, uint64_t Limit = 100) +{ + uint64_t Result = 0; + for (const CallstackChurnStat& Stat : Model.ChurnStats) + { + if (PassesChurnThreshold(Options, Stat) && Result < Limit) + { + ++Result; + } + } + return Result; +} + +class ConsoleAnalyzeWriter +{ +public: + ConsoleAnalyzeWriter(const TraceModel& InModel, + const AnalyzeOptions& InOptions, + const std::filesystem::path& InFilePath, + CallstackFormatter& InFrameFormatter) + : m_Model(InModel) + , m_Options(InOptions) + , m_FilePath(InFilePath) + , m_FrameFormatter(InFrameFormatter) + { + } + + void Write() const + { + AppendSession(); + AppendGeneralSummary(); + AppendEventTypes(); + AppendThreads(); + AppendChannels(); + AppendCpuScopeStats(); + AppendMemorySummary(); + AppendLiveAllocationCallstacks(); + AppendChurnCallstacks(); + } + +private: + void AppendSession() const + { + const SessionInfo& Session = m_Model.Session; + if (!Session.HasSession) + { + return; + } + + ZEN_CONSOLE("Session:"); + if (!Session.Platform.empty()) + { + ZEN_CONSOLE(" Platform: {}", Session.Platform); + } + if (!Session.AppName.empty()) + { + ZEN_CONSOLE(" App: {}", Session.AppName); + } + if (!Session.ProjectName.empty()) + { + ZEN_CONSOLE(" Project: {}", Session.ProjectName); + } + if (!Session.Branch.empty()) + { + ZEN_CONSOLE(" Branch: {}", Session.Branch); + } + if (!Session.BuildVersion.empty()) + { + ZEN_CONSOLE(" Build: {}", Session.BuildVersion); + } + if (Session.ConfigurationType != 0) + { + constexpr const char* kConfigNames[] = {"Unknown", "Debug", "DebugGame", "Development", "Shipping", "Test"}; + uint8_t Idx = Session.ConfigurationType; + const char* Name = (Idx < std::size(kConfigNames)) ? kConfigNames[Idx] : "Unknown"; + ZEN_CONSOLE(" Config: {}", Name); + } + if (Session.Changelist != 0) + { + ZEN_CONSOLE(" CL: {}", Session.Changelist); + } + if (!Session.CommandLine.empty()) + { + ZEN_CONSOLE(" Cmd: {}", Session.CommandLine); + } + ZEN_CONSOLE(""); + } + + void AppendGeneralSummary() const + { + uint64_t DurationUs = (m_Model.TraceEndUs > m_Model.TraceStartUs) ? (m_Model.TraceEndUs - m_Model.TraceStartUs) : 0; + + ZEN_CONSOLE("Trace: {}", m_FilePath); + ZEN_CONSOLE("Size: {}", zen::NiceBytes(m_Model.FileSize)); + ZEN_CONSOLE("Events: {}", zen::ThousandsNum(m_Model.TotalEvents)); + ZEN_CONSOLE("Duration: {}", zen::NiceTimeSpanMs((DurationUs + 500) / 1000)); + ZEN_CONSOLE("Threads: {}", m_Model.Threads.size()); + ZEN_CONSOLE("Modules: {}", m_Model.Modules.size()); + ZEN_CONSOLE("Parsed: {}", zen::NiceTimeSpanMs(m_Model.ParseTimeMs)); + if (m_Model.ParseTimeMs > 0) + { + ZEN_CONSOLE("Rate: {} events/s", zen::ThousandsNum(m_Model.TotalEvents * 1000 / m_Model.ParseTimeMs)); + } + ZEN_CONSOLE(""); + } + + void AppendEventTypes() const + { + if (m_Model.EventTypeCounts.empty()) + { + return; + } + + size_t MaxNameLen = 10; + for (const auto& Entry : m_Model.EventTypeCounts) + { + MaxNameLen = std::max(MaxNameLen, Entry.Name.size()); + } + + ZEN_CONSOLE("{:<{}} {:>14}", "Event Type", MaxNameLen, "Count"); + ZEN_CONSOLE("{:-<{}}", "", MaxNameLen + 16); + for (const auto& Entry : m_Model.EventTypeCounts) + { + ZEN_CONSOLE("{:<{}} {:>14}", Entry.Name, MaxNameLen, zen::ThousandsNum(Entry.Count)); + } + ZEN_CONSOLE(""); + } + + void AppendThreads() const + { + if (m_Model.Threads.empty()) + { + return; + } + + ZEN_CONSOLE("Threads:"); + for (const ThreadInfoEntry& Thread : m_Model.Threads) + { + auto TimelineIt = std::find_if(m_Model.Timelines.begin(), + m_Model.Timelines.end(), + [Tid = Thread.ThreadId](const ThreadTimeline& T) { return T.ThreadId == Tid; }); + uint64_t ScopeCount = (TimelineIt != m_Model.Timelines.end()) ? TimelineIt->Scopes.size() : 0; + + if (!Thread.Name.empty()) + { + ZEN_CONSOLE(" {:>5} {:<32} {} scopes", Thread.ThreadId, Thread.Name, zen::ThousandsNum(ScopeCount)); + } + } + ZEN_CONSOLE(""); + } + + void AppendChannels() const + { + if (m_Model.Channels.empty()) + { + return; + } + + ZEN_CONSOLE("Channels:"); + for (const ChannelInfo& Channel : m_Model.Channels) + { + ZEN_CONSOLE(" {:<32} {}", Channel.Name, Channel.Enabled ? "enabled" : "disabled"); + } + ZEN_CONSOLE(""); + } + + void AppendCpuScopeStats() const + { + if (m_Model.ScopeStats.empty()) + { + return; + } + + ZEN_CONSOLE("CPU Profiling Scopes:"); + ZEN_CONSOLE(""); + ZEN_CONSOLE("{:<48} {:>8} {:>9} {:>9} {:>9} {:>9}", "Scope", "Count", "Min(ms)", "Mean(ms)", "Max(ms)", "SD(ms)"); + ZEN_CONSOLE("{:-<{}}", "", 48 + 8 + 9 + 9 + 9 + 9 + 5); + + constexpr double UsToMs = 1.0 / 1000.0; + for (const CpuScopeStat& Stat : m_Model.ScopeStats) + { + if (Stat.MaxUs < 500) + { + continue; + } + + ZEN_CONSOLE("{:<48.48} {:>8} {:>9.3f} {:>9.3f} {:>9.3f} {:>9.3f}", + Stat.Name, + zen::ThousandsNum(Stat.Count), + double(Stat.MinUs) * UsToMs, + Stat.MeanUs * UsToMs, + double(Stat.MaxUs) * UsToMs, + Stat.StdDevUs * UsToMs); + } + ZEN_CONSOLE(""); + } + + void AppendMemorySummary() const + { + const AllocationSummary& AllocSummary = m_Model.AllocSummary; + if (!AllocSummary.HasMemoryData) + { + return; + } + + ZEN_CONSOLE("Memory Allocations:"); + ZEN_CONSOLE(""); + ZEN_CONSOLE(" Allocs: {}", zen::ThousandsNum(AllocSummary.TotalAllocs)); + ZEN_CONSOLE(" Frees: {}", zen::ThousandsNum(AllocSummary.TotalFrees)); + ZEN_CONSOLE(" Reallocs: {} alloc / {} free", + zen::ThousandsNum(AllocSummary.TotalReallocAllocs), + zen::ThousandsNum(AllocSummary.TotalReallocFrees)); + ZEN_CONSOLE(" Peak: {}", zen::NiceBytes(uint64_t(AllocSummary.PeakBytes))); + ZEN_CONSOLE(" End: {}", zen::NiceBytes(uint64_t(AllocSummary.EndBytes))); + ZEN_CONSOLE(" Live allocs: {}", zen::ThousandsNum(AllocSummary.LiveAllocations)); + + if (!m_Model.HeapStats.empty()) + { + ZEN_CONSOLE(""); + ZEN_CONSOLE(" {:<20} {:>14} {:>14} {:>10} {:>10}", "Heap", "Current", "Peak", "Allocs", "Frees"); + ZEN_CONSOLE(" {:-<{}}", "", 20 + 14 + 14 + 10 + 10 + 4); + + for (const HeapStat& Stat : m_Model.HeapStats) + { + std::string_view HeapName = FindHeapName(m_Model, Stat.HeapId); + + ZEN_CONSOLE(" {:<20.20} {:>14} {:>14} {:>10} {:>10}", + HeapName, + zen::NiceBytes(uint64_t(Stat.CurrentBytes)), + zen::NiceBytes(uint64_t(Stat.PeakBytes)), + zen::ThousandsNum(Stat.AllocCount), + zen::ThousandsNum(Stat.FreeCount)); + } + } + ZEN_CONSOLE(""); + } + + void PrintCallstack(uint32_t CallstackId) const + { + const CallstackEntry* Entry = m_FrameFormatter.FindCallstackEntry(CallstackId); + if (Entry == nullptr) + { + return; + } + + FilteredCallstackView Filtered = m_FrameFormatter.BuildView(*Entry, BuildCallstackFilterOptions(m_Options)); + if (Filtered.HiddenPrefixCount > 0) + { + if (Filtered.IncludedThirdPartyBoundary) + { + ZEN_CONSOLE(" [skipped {} leading frame(s); kept boundary third-party callsite]", Filtered.HiddenPrefixCount); + } + else + { + ZEN_CONSOLE(" [skipped {} leading frame(s)]", Filtered.HiddenPrefixCount); + } + } + for (const FilteredCallstackFrame& Frame : Filtered.Frames) + { + ZEN_CONSOLE(" {}", Frame.Display); + } + } + + void AppendLiveAllocationCallstacks() const + { + if (m_Options.LiveAllocsLimit <= 0 || m_Model.CallstackStats.empty()) + { + return; + } + + size_t Count = std::min(size_t(m_Options.LiveAllocsLimit), m_Model.CallstackStats.size()); + ZEN_CONSOLE("Live Allocation Callstacks (top {} by bytes):", Count); + ZEN_CONSOLE(""); + + for (size_t I = 0; I < Count; ++I) + { + const CallstackAllocStat& Stat = m_Model.CallstackStats[I]; + std::string ThreadInfo = BuildThreadSummary(m_Model, Stat.ThreadIds); + ZEN_CONSOLE(" #{} {} in {} allocation(s) [callstack {}, {}]", + I + 1, + zen::NiceBytes(uint64_t(Stat.LiveBytes)), + zen::ThousandsNum(Stat.LiveCount), + Stat.CallstackId, + ThreadInfo); + PrintCallstack(Stat.CallstackId); + ZEN_CONSOLE(""); + } + } + + void AppendChurnCallstacks() const + { + if (m_Options.ChurnLimit <= 0 || m_Model.ChurnStats.empty()) + { + return; + } + + size_t Emitted = 0; + size_t Limit = size_t(m_Options.ChurnLimit); + ZEN_CONSOLE("Allocation Churn (top {}, event distance <= {}):", Limit, m_Options.ChurnDistanceThreshold); + ZEN_CONSOLE(""); + + for (const CallstackChurnStat& Stat : m_Model.ChurnStats) + { + if (Emitted >= Limit) + { + break; + } + if (!PassesChurnThreshold(m_Options, Stat)) + { + continue; + } + + ZEN_CONSOLE(" #{} {} short-lived allocs ({} total), {} churned, avg distance {:.0f} events [callstack {}]", + Emitted + 1, + zen::ThousandsNum(Stat.ChurnAllocs), + zen::ThousandsNum(Stat.TotalAllocs), + zen::NiceBytes(Stat.ChurnBytes), + Stat.MeanDistance, + Stat.CallstackId); + PrintCallstack(Stat.CallstackId); + ZEN_CONSOLE(""); + ++Emitted; + } + } + + const TraceModel& m_Model; + const AnalyzeOptions& m_Options; + const std::filesystem::path& m_FilePath; + CallstackFormatter& m_FrameFormatter; +}; + +class HtmlReportWriter +{ +public: + HtmlReportWriter(const TraceModel& InModel, + const AnalyzeOptions& InOptions, + const std::filesystem::path& InFilePath, + CallstackFormatter& InFrameFormatter) + : m_Model(InModel) + , m_Options(InOptions) + , m_FilePath(InFilePath) + , m_FrameFormatter(InFrameFormatter) + { + } + + void Write(const std::filesystem::path& OutputPath) + { + AppendDocument(); + zen::WriteFile(OutputPath, zen::IoBuffer(zen::IoBuffer::Wrap, m_Html.Data(), m_Html.Size())); + } + +private: + void AppendDocument() + { + m_Html << "<!doctype html><html><head><meta charset=\"utf-8\"><title>zen trace analyze report</title>"; + AppendStyles(); + m_Html << "</head><body>"; + AppendHeader(); + AppendSummaryCards(); + AppendLeaksSection(); + AppendChurnSection(); + m_Html << "</body></html>"; + } + + void AppendStyles() + { + m_Html << "<style>body{font:14px/1.45 system-ui,-apple-system,Segoe " + "UI,Roboto,sans-serif;margin:24px;color:#1f2937;background:#f8fafc;}"; + m_Html << "h1,h2{margin:0 0 12px;}h1{font-size:28px;}h2{font-size:20px;margin-top:28px;}"; + m_Html << ".meta,.card,details{background:#fff;border:1px solid #dbe2ea;border-radius:10px;box-shadow:0 1px 2px rgba(0,0,0,.04);}"; + m_Html << ".meta,.card{padding:16px;margin-bottom:16px;}.report-table{width:100%;border-collapse:collapse;background:#fff;border:" + "1px solid #dbe2ea;border-radius:10px;overflow:hidden;table-layout:fixed;}"; + m_Html << "th,td{padding:10px 12px;border-bottom:1px solid " + "#e5e7eb;vertical-align:top;text-align:left;}th{background:#f1f5f9;font-weight:600;}tr:last-child td{border-bottom:0;}"; + m_Html + << "th.num,td.num{text-align:right;white-space:nowrap;font-variant-numeric:tabular-nums;}.muted{color:#64748b;}.pill{display:" + "inline-block;padding:2px 8px;border-radius:999px;background:#e2e8f0;color:#334155;font-size:12px;margin-right:6px;}"; + m_Html << ".col-rank{width:56px;}.col-live-bytes,.col-alloc-count,.col-short-lived,.col-churn-bytes,.col-total-allocs,.col-avg-" + "distance{width:132px;}.col-threads{width:260px;}.col-callstack{width:auto;}"; + m_Html << ".grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(220px,1fr));gap:12px;margin-bottom:18px;}details{" + "display:block;width:100%;box-sizing:border-box;padding:12px " + "14px;margin:0;}summary{cursor:pointer;font-weight:600;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;}"; + m_Html << ".callstack-cell{width:100%;}.frames{margin:10px 0 0 " + "20px;padding:0;font-family:ui-monospace,SFMono-Regular,Consolas,monospace;font-size:12px;}.frames li{margin:4px " + "0;overflow-wrap:anywhere;word-break:break-word;}code{font-family:ui-monospace,SFMono-Regular,Consolas,monospace;}a{" + "color:#2563eb;text-decoration:none;}a:hover{text-decoration:underline;}</style>"; + } + + void AppendHeader() + { + m_Html << "<h1>zen trace analyze memory report</h1>"; + m_Html << "<div class=\"meta\"><div><span class=\"pill\">offline HTML</span><span class=\"pill\">top 100 churn " + "sites</span></div><p><strong>Trace:</strong> <code>"; + AppendHtmlEscaped(m_Html, m_FilePath.string()); + m_Html << "</code></p>"; + if (m_Model.Session.HasSession && !m_Model.Session.AppName.empty()) + { + m_Html << "<p><strong>App:</strong> "; + AppendHtmlEscaped(m_Html, m_Model.Session.AppName); + m_Html << "</p>"; + } + m_Html << "<p class=\"muted\">Generated by zen trace analyze. Churn threshold: "; + AppendHtmlEscaped(m_Html, fmt::format("{} events", m_Options.ChurnDistanceThreshold)); + m_Html << "</p></div>"; + } + + void AppendSummaryCards() + { + uint64_t DurationUs = (m_Model.TraceEndUs > m_Model.TraceStartUs) ? (m_Model.TraceEndUs - m_Model.TraceStartUs) : 0; + m_Html << "<div class=\"grid\">"; + m_Html << "<div class=\"card\"><div class=\"muted\">Trace size</div><div><strong>" << zen::NiceBytes(m_Model.FileSize) + << "</strong></div></div>"; + m_Html << "<div class=\"card\"><div class=\"muted\">Duration</div><div><strong>" << zen::NiceTimeSpanMs((DurationUs + 500) / 1000) + << "</strong></div></div>"; + m_Html << "<div class=\"card\"><div class=\"muted\">Peak memory</div><div><strong>" + << zen::NiceBytes(uint64_t(m_Model.AllocSummary.PeakBytes)) << "</strong></div></div>"; + m_Html << "<div class=\"card\"><div class=\"muted\">End memory</div><div><strong>" + << zen::NiceBytes(uint64_t(m_Model.AllocSummary.EndBytes)) << "</strong></div></div>"; + m_Html << "<div class=\"card\"><div class=\"muted\">Live allocations</div><div><strong>" + << zen::ThousandsNum(m_Model.AllocSummary.LiveAllocations) << "</strong></div></div>"; + m_Html << "<div class=\"card\"><div class=\"muted\">Leak callstacks</div><div><strong>" + << zen::ThousandsNum(m_Model.CallstackStats.size()) << "</strong></div></div>"; + m_Html << "<div class=\"card\"><div class=\"muted\">Churn sites shown</div><div><strong>" + << zen::ThousandsNum(::CountShownChurnSites(m_Model, m_Options)) << "</strong></div></div>"; + m_Html << "</div>"; + } + + void AppendLeaksSection() + { + m_Html << "<h2 id=\"leaks\">Memory leaks (all live-allocation callstacks)</h2>"; + if (m_Model.CallstackStats.empty()) + { + m_Html << "<div class=\"card muted\">No live allocation callstacks were present at the end of the trace.</div>"; + return; + } + + m_Html << "<table class=\"report-table\"><colgroup><col class=\"col-rank\"><col class=\"col-live-bytes\"><col " + "class=\"col-alloc-count\"><col class=\"col-threads\"><col class=\"col-callstack\"></colgroup><thead><tr><th " + "class=\"num\">#</th><th class=\"num\">Live bytes</th><th class=\"num\">Alloc " + "count</th><th>Threads</th><th>Callstack</th></tr></thead><tbody>"; + for (size_t I = 0; I < m_Model.CallstackStats.size(); ++I) + { + const CallstackAllocStat& Stat = m_Model.CallstackStats[I]; + std::string ThreadInfo = BuildThreadSummary(m_Model, Stat.ThreadIds); + m_Html << "<tr><td class=\"num\">" << uint64_t(I + 1) << "</td><td class=\"num\">" << zen::NiceBytes(uint64_t(Stat.LiveBytes)) + << "</td><td class=\"num\">" << zen::ThousandsNum(Stat.LiveCount) << "</td><td>"; + AppendHtmlEscaped(m_Html, ThreadInfo); + m_Html << "</td><td class=\"callstack-cell\"><details><summary>Callstack " << Stat.CallstackId << "</summary>"; + AppendHtmlCallstack(m_Html, m_Options, m_FrameFormatter, Stat.CallstackId); + m_Html << "</details></td></tr>"; + } + m_Html << "</tbody></table>"; + } + + void AppendChurnSection() + { + m_Html << "<h2 id=\"churn\">Allocation churn sites (top 100)</h2>"; + if (m_Model.ChurnStats.empty()) + { + m_Html << "<div class=\"card muted\">No churn statistics were available in this trace.</div>"; + return; + } + + m_Html << "<table class=\"report-table\"><colgroup><col class=\"col-rank\"><col class=\"col-short-lived\"><col " + "class=\"col-churn-bytes\"><col class=\"col-total-allocs\"><col class=\"col-avg-distance\"><col " + "class=\"col-callstack\"></colgroup><thead><tr><th class=\"num\">#</th><th class=\"num\">Short-lived allocs</th><th " + "class=\"num\">Churn bytes</th><th class=\"num\">Total allocs</th><th class=\"num\">Avg " + "distance</th><th>Callstack</th></tr></thead><tbody>"; + size_t Emitted = 0; + for (const CallstackChurnStat& Stat : m_Model.ChurnStats) + { + if (Emitted >= 100) + { + break; + } + if (!PassesChurnThreshold(m_Options, Stat)) + { + continue; + } + m_Html << "<tr><td class=\"num\">" << uint64_t(Emitted + 1) << "</td><td class=\"num\">" << zen::ThousandsNum(Stat.ChurnAllocs) + << "</td><td class=\"num\">" << zen::NiceBytes(Stat.ChurnBytes) << "</td><td class=\"num\">" + << zen::ThousandsNum(Stat.TotalAllocs) << "</td><td class=\"num\">" << fmt::format("{:.0f} events", Stat.MeanDistance) + << "</td><td class=\"callstack-cell\"><details><summary>Callstack " << Stat.CallstackId << "</summary>"; + AppendHtmlCallstack(m_Html, m_Options, m_FrameFormatter, Stat.CallstackId); + m_Html << "</details></td></tr>"; + ++Emitted; + } + m_Html << "</tbody></table>"; + } + + const TraceModel& m_Model; + const AnalyzeOptions& m_Options; + const std::filesystem::path& m_FilePath; + CallstackFormatter& m_FrameFormatter; + zen::ExtendableStringBuilder<32768> m_Html; +}; + +static void +WriteAnalyzeHtmlReport(const TraceModel& Model, + const AnalyzeOptions& Options, + const std::filesystem::path& FilePath, + CallstackFormatter& FrameFormatter) +{ + std::filesystem::path OutputPath = std::filesystem::absolute(Options.HtmlReportPath); + if (OutputPath.empty()) + { + return; + } + + std::error_code Ec; + std::filesystem::path ParentPath = OutputPath.parent_path(); + if (!ParentPath.empty()) + { + std::filesystem::create_directories(ParentPath, Ec); + } + + HtmlReportWriter Writer(Model, Options, FilePath, FrameFormatter); + Writer.Write(OutputPath); + ZEN_CONSOLE("HTML report: {}", OutputPath.string()); +} + +} // namespace + +namespace zen::trace_detail { + +void +RunAnalyze(const std::filesystem::path& FilePath, const AnalyzeOptions& Options) +{ + std::filesystem::path CachePath = FilePath; + CachePath.replace_extension(".ucache_z"); + + TraceModel Model; + std::unique_ptr<SymbolResolver> Symbols; + bool LoadedFromCache = false; + + // Try loading from cache + if (!Options.NoCache) + { + std::optional<CachedAnalysis> Cached = TryLoadAnalyzeCache(CachePath, FilePath); + if (Cached) + { + Model = std::move(Cached->Model); + Symbols = std::move(Cached->Symbols); + LoadedFromCache = true; + } + } + + if (!LoadedFromCache) + { + WorkerThreadPool ThreadPool(gsl::narrow<int>(GetHardwareConcurrency())); + Model = BuildTraceModel(FilePath, ThreadPool); + + if (Options.Symbols != SymbolBackend::Off) + { + Symbols = CreateSymbolResolver(Options.Symbols); + for (const ModuleInfo& Mod : Model.Modules) + { + Symbols->LoadModule(Mod); + } + } + } + + CallstackFormatter FrameFormatter(Model, Symbols.get()); + ConsoleAnalyzeWriter ConsoleWriter(Model, Options, FilePath, FrameFormatter); + ConsoleWriter.Write(); + + if (!Options.HtmlReportPath.empty()) + { + WriteAnalyzeHtmlReport(Model, Options, FilePath, FrameFormatter); + } + + // Write cache on fresh parse + if (!LoadedFromCache && !Options.NoCache) + { + // Build the complete symbol map for the cache. Start with whatever + // the formatter already resolved during display, then resolve every + // remaining callstack address in parallel. + eastl::hash_map<uint64_t, std::string> AllSymbols = FrameFormatter.GetResolvedCache(); + + // Collect unique addresses that still need resolving. + eastl::hash_set<uint64_t> Needed; + for (const CallstackEntry& CS : Model.Callstacks) + { + for (const ResolvedFrame& Frame : CS.Frames) + { + if (AllSymbols.find(Frame.Address) == AllSymbols.end()) + { + Needed.insert(Frame.Address); + } + } + } + + if (!Needed.empty() && Symbols) + { + // Flatten to a vector so we can partition into chunks. + eastl::vector<uint64_t> Addresses(Needed.begin(), Needed.end()); + Needed.clear(); + + uint32_t ThreadCount = gsl::narrow<uint32_t>(GetHardwareConcurrency()); + WorkerThreadPool ResolvePool(gsl::narrow<int>(ThreadCount)); + + // Each worker resolves a chunk and writes into its own local map. + eastl::vector<eastl::hash_map<uint64_t, std::string>> PerThread(ThreadCount); + uint32_t ChunkSize = uint32_t((Addresses.size() + ThreadCount - 1) / ThreadCount); + + Latch Done(ThreadCount); + for (uint32_t T = 0; T < ThreadCount; ++T) + { + uint32_t Begin = T * ChunkSize; + uint32_t End = std::min(Begin + ChunkSize, uint32_t(Addresses.size())); + if (Begin >= End) + { + Done.CountDown(); + continue; + } + + ResolvePool.ScheduleWork( + [&Addresses, &PerThread, &Model, &Symbols, &Done, T, Begin, End]() { + auto _ = MakeGuard([&Done]() { Done.CountDown(); }); + for (uint32_t I = Begin; I < End; ++I) + { + uint64_t Addr = Addresses[I]; + std::string Symbol = Symbols->Resolve(Addr); + if (!Symbol.empty()) + { + PerThread[T].emplace(Addr, std::move(Symbol)); + } + } + }, + WorkerThreadPool::EMode::EnableBacklog); + } + Done.Wait(); + + // Merge per-thread results. + for (auto& Map : PerThread) + { + for (auto& [Addr, Sym] : Map) + { + AllSymbols.emplace(Addr, std::move(Sym)); + } + } + } + + // Fill in module-name fallbacks for any addresses not resolved by the + // symbol resolver (same logic as CallstackFormatter::Describe). + for (const CallstackEntry& CS : Model.Callstacks) + { + for (const ResolvedFrame& Frame : CS.Frames) + { + if (AllSymbols.find(Frame.Address) != AllSymbols.end()) + { + continue; + } + std::string Fallback; + if (Frame.ModuleIndex != ~0u && Frame.ModuleIndex < Model.Modules.size()) + { + Fallback = fmt::format("{} + 0x{:X}", Model.Modules[Frame.ModuleIndex].Name, Frame.Offset); + } + else + { + Fallback = fmt::format("0x{:X}", Frame.Address); + } + AllSymbols.emplace(Frame.Address, std::move(Fallback)); + } + } + + WriteAnalyzeCache(CachePath, FilePath, Model, AllSymbols); + } +} + +} // namespace zen::trace_detail diff --git a/src/zen/trace/trace_analyze.h b/src/zen/trace/trace_analyze.h new file mode 100644 index 000000000..7b6f4fccd --- /dev/null +++ b/src/zen/trace/trace_analyze.h @@ -0,0 +1,29 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include "symbol_resolver.h" +#include "trace_model.h" + +#include <cstdint> +#include <filesystem> +#include <string> +#include <vector> + +namespace zen::trace_detail { + +struct AnalyzeOptions +{ + int LiveAllocsLimit = 50; // 0 = off + int ChurnLimit = 0; // 0 = off; top N churny callstacks + uint64_t ChurnDistanceThreshold = 1000; // event distance: allocs freed within N events are "churny" + SymbolBackend Symbols = SymbolBackend(1); // Pdb (default) + std::filesystem::path HtmlReportPath; // empty = off; standalone offline memory HTML report + bool NoCache = false; // skip reading/writing the .ucache_z cache + bool EnableCallstackHeuristic = true; // skip leading low-level / third-party frames while keeping the boundary callsite + std::vector<std::string> CallstackSkipPatterns; // wildcard patterns matched against symbol, module name, and module path +}; + +void RunAnalyze(const std::filesystem::path& FilePath, const AnalyzeOptions& Options = {}); + +} // namespace zen::trace_detail diff --git a/src/zen/trace/trace_cache.cpp b/src/zen/trace/trace_cache.cpp new file mode 100644 index 000000000..165c1eecf --- /dev/null +++ b/src/zen/trace/trace_cache.cpp @@ -0,0 +1,1104 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "trace_cache.h" + +#include <zencore/basicfile.h> +#include <zencore/compress.h> +#include <zencore/filesystem.h> +#include <zencore/fmtutils.h> +#include <zencore/iohash.h> +#include <zencore/logging.h> +#include <zencore/stream.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <EASTL/sort.h> +#include <EASTL/vector.h> +ZEN_THIRD_PARTY_INCLUDES_END + +#include <filesystem> + +namespace zen::trace_detail { + +// =========================================================================== +// StringTableBuilder — write-path helper that deduplicates and packs strings +// +// Strings are appended back-to-back (null-terminated) in a single contiguous +// block. Deduplication is keyed by (offset, length) pairs into that block so +// no separate string copies are made. To look up an incoming string_view we +// speculatively append it, build a key, and look it up. On duplicate the +// append is rolled back by truncating the buffer. +// =========================================================================== + +class StringTableBuilder +{ +public: + StringTableBuilder() : m_IndexMap(0, StringHash{&m_Packed}, StringEq{&m_Packed}) { m_Packed.reserve(4096); } + + // Intern a string and return its index. Deduplicates across calls. + uint32_t Intern(std::string_view Str) + { + // Speculatively append the string so that the hash/eq functors can + // read it from the packed buffer (avoids dangling string_view keys). + uint32_t SpecOffset = uint32_t(m_Packed.size()); + uint32_t SpecLength = uint32_t(Str.size()); + + m_Packed.resize(m_Packed.size() + Str.size() + 1); + if (!Str.empty()) + { + memcpy(m_Packed.data() + SpecOffset, Str.data(), Str.size()); + } + m_Packed[SpecOffset + Str.size()] = '\0'; + + StringKey Key{SpecOffset, SpecLength}; + auto It = m_IndexMap.find(Key); + if (It != m_IndexMap.end()) + { + // Duplicate — roll back the speculative append. + m_Packed.resize(SpecOffset); + return It->second; + } + + // New string — keep the append and record its index. + uint32_t Index = uint32_t(m_Offsets.size()); + m_Offsets.push_back(SpecOffset); + m_IndexMap.emplace(Key, Index); + return Index; + } + + // Serialize: [uint32_t count][uint32_t offsets[count]][packed strings] + SharedBuffer Serialize() const + { + BinaryWriter W; + uint32_t Count = uint32_t(m_Offsets.size()); + W.Write(&Count, sizeof(Count)); + if (Count > 0) + { + W.Write(m_Offsets.data(), m_Offsets.size() * sizeof(uint32_t)); + } + if (!m_Packed.empty()) + { + W.Write(m_Packed.data(), m_Packed.size()); + } + return SharedBuffer(IoBuffer(IoBuffer::Clone, W.Data(), W.Size())); + } + +private: + struct StringKey + { + uint32_t Offset; + uint32_t Length; + }; + + struct StringHash + { + const eastl::vector<uint8_t>* Packed; + size_t operator()(const StringKey& K) const + { + std::string_view Sv(reinterpret_cast<const char*>(Packed->data()) + K.Offset, K.Length); + return std::hash<std::string_view>{}(Sv); + } + }; + + struct StringEq + { + const eastl::vector<uint8_t>* Packed; + bool operator()(const StringKey& A, const StringKey& B) const + { + if (A.Length != B.Length) + { + return false; + } + return memcmp(Packed->data() + A.Offset, Packed->data() + B.Offset, A.Length) == 0; + } + }; + + eastl::vector<uint8_t> m_Packed; // null-terminated strings back-to-back + eastl::vector<uint32_t> m_Offsets; // byte offset into m_Packed for each string + + // Dedup map: StringKey (offset+length into m_Packed) → string index. + // Hash/eq functors hold a pointer to m_Packed (stable address) and read + // via data() at call time, so reallocation of m_Packed is safe. + eastl::hash_map<StringKey, uint32_t, StringHash, StringEq> m_IndexMap; +}; + +// =========================================================================== +// StringTableReader — read-path helper for O(1) string lookup by index +// =========================================================================== + +class StringTableReader +{ +public: + bool Init(const SharedBuffer& Data) + { + if (Data.GetSize() < sizeof(uint32_t)) + { + return false; + } + + const uint8_t* Base = reinterpret_cast<const uint8_t*>(Data.GetData()); + memcpy(&m_Count, Base, sizeof(uint32_t)); + + size_t RequiredHeader = sizeof(uint32_t) + size_t(m_Count) * sizeof(uint32_t); + if (Data.GetSize() < RequiredHeader) + { + return false; + } + + m_Offsets = reinterpret_cast<const uint32_t*>(Base + sizeof(uint32_t)); + m_PackedBase = reinterpret_cast<const char*>(Base + RequiredHeader); + m_PackedSize = Data.GetSize() - RequiredHeader; + m_OwningBuffer = Data; + return true; + } + + std::string_view Get(uint32_t Index) const + { + if (Index >= m_Count) + { + return {}; + } + uint32_t Off = m_Offsets[Index]; + if (Off >= m_PackedSize) + { + return {}; + } + return std::string_view(m_PackedBase + Off); + } + + uint32_t Count() const { return m_Count; } + +private: + uint32_t m_Count = 0; + const uint32_t* m_Offsets = nullptr; + const char* m_PackedBase = nullptr; + size_t m_PackedSize = 0; + SharedBuffer m_OwningBuffer; // keeps the decompressed data alive +}; + +// =========================================================================== +// CachedSymbolResolver — SymbolResolver backed by cache data +// =========================================================================== + +class CachedSymbolResolver final : public SymbolResolver +{ +public: + void LoadModule(const ModuleInfo&) override {} + std::string Resolve(uint64_t Address) const override + { + auto It = m_Symbols.find(Address); + if (It != m_Symbols.end()) + { + return It->second; + } + return {}; + } + + eastl::hash_map<uint64_t, std::string> m_Symbols; +}; + +// =========================================================================== +// Section writers (model → binary blob) +// =========================================================================== + +namespace { + + template<typename T> + void WritePod(BinaryWriter& W, const T& Value) + { + W.Write(&Value, sizeof(T)); + } + + template<typename T> + void WriteCount(BinaryWriter& W, uint32_t Count) + { + W.Write(&Count, sizeof(Count)); + } + + SharedBuffer ToSharedBuffer(const BinaryWriter& W) { return SharedBuffer(IoBuffer(IoBuffer::Clone, W.Data(), W.Size())); } + + // -- Metadata section -- + + SharedBuffer WriteMetadataSection(const TraceModel& Model, StringTableBuilder& Strings) + { + BinaryWriter W; + + MetadataPod M = {}; + M.FileSize = Model.FileSize; + M.TotalEvents = Model.TotalEvents; + M.ParseTimeMs = Model.ParseTimeMs; + M.TraceStartUs = Model.TraceStartUs; + M.TraceEndUs = Model.TraceEndUs; + + M.SessionPlatform = Strings.Intern(Model.Session.Platform); + M.SessionAppName = Strings.Intern(Model.Session.AppName); + M.SessionProjectName = Strings.Intern(Model.Session.ProjectName); + M.SessionCommandLine = Strings.Intern(Model.Session.CommandLine); + M.SessionBranch = Strings.Intern(Model.Session.Branch); + M.SessionBuildVersion = Strings.Intern(Model.Session.BuildVersion); + M.SessionChangelist = Model.Session.Changelist; + M.SessionConfigType = Model.Session.ConfigurationType; + M.SessionHasSession = Model.Session.HasSession ? 1 : 0; + WritePod(W, M); + + // Threads + uint32_t ThreadCount = uint32_t(Model.Threads.size()); + WritePod(W, ThreadCount); + for (const ThreadInfoEntry& T : Model.Threads) + { + ThreadInfoPod P = {}; + P.ThreadId = T.ThreadId; + P.Name = Strings.Intern(T.Name); + P.GroupName = Strings.Intern(T.GroupName); + P.SystemId = T.SystemId; + P.SortHint = T.SortHint; + WritePod(W, P); + } + + // Channels + uint32_t ChannelCount = uint32_t(Model.Channels.size()); + WritePod(W, ChannelCount); + for (const ChannelInfo& C : Model.Channels) + { + ChannelInfoPod P = {}; + P.Name = Strings.Intern(C.Name); + P.Enabled = C.Enabled ? 1 : 0; + P.ReadOnly = C.ReadOnly ? 1 : 0; + WritePod(W, P); + } + + // Modules + uint32_t ModuleCount = uint32_t(Model.Modules.size()); + WritePod(W, ModuleCount); + + // First pass: compute ImageId blob layout + eastl::vector<uint32_t> ImageIdOffsets(ModuleCount); + uint32_t ImageIdBlobSize = 0; + for (uint32_t I = 0; I < ModuleCount; ++I) + { + ImageIdOffsets[I] = ImageIdBlobSize; + ImageIdBlobSize += uint32_t(Model.Modules[I].ImageId.size()); + } + + for (uint32_t I = 0; I < ModuleCount; ++I) + { + const ModuleInfo& Mod = Model.Modules[I]; + ModuleInfoPod P = {}; + P.Name = Strings.Intern(Mod.Name); + P.FullPath = Strings.Intern(Mod.FullPath); + P.Base = Mod.Base; + P.Size = Mod.Size; + P.ImageIdSize = uint32_t(Mod.ImageId.size()); + P.ImageIdOffset = ImageIdOffsets[I]; + WritePod(W, P); + } + + // ImageId blob + for (const ModuleInfo& Mod : Model.Modules) + { + if (!Mod.ImageId.empty()) + { + W.Write(Mod.ImageId.data(), Mod.ImageId.size()); + } + } + + // EventTypeCounts + uint32_t EventTypeCount = uint32_t(Model.EventTypeCounts.size()); + WritePod(W, EventTypeCount); + for (const TraceModel::EventTypeCount& E : Model.EventTypeCounts) + { + EventTypeCountPod P = {}; + P.Name = Strings.Intern(E.Name); + P.Count = E.Count; + WritePod(W, P); + } + + // ScopeStats + uint32_t ScopeStatCount = uint32_t(Model.ScopeStats.size()); + WritePod(W, ScopeStatCount); + for (const CpuScopeStat& S : Model.ScopeStats) + { + CpuScopeStatPod P = {}; + P.Name = Strings.Intern(S.Name); + P.MinUs = S.MinUs; + P.MaxUs = S.MaxUs; + P.Count = S.Count; + P.MeanUs = S.MeanUs; + P.StdDevUs = S.StdDevUs; + WritePod(W, P); + } + + return ToSharedBuffer(W); + } + + // -- Memory section -- + + SharedBuffer WriteMemorySection(const TraceModel& Model, StringTableBuilder& Strings) + { + BinaryWriter W; + + // AllocSummary + AllocSummaryPod A = {}; + A.HasMemoryData = Model.AllocSummary.HasMemoryData ? 1 : 0; + A.PeakTimeUs = Model.AllocSummary.PeakTimeUs; + A.LiveAllocations = Model.AllocSummary.LiveAllocations; + A.TotalAllocs = Model.AllocSummary.TotalAllocs; + A.TotalFrees = Model.AllocSummary.TotalFrees; + A.TotalReallocAllocs = Model.AllocSummary.TotalReallocAllocs; + A.TotalReallocFrees = Model.AllocSummary.TotalReallocFrees; + A.PeakBytes = Model.AllocSummary.PeakBytes; + A.EndBytes = Model.AllocSummary.EndBytes; + WritePod(W, A); + + // Heaps + uint32_t HeapCount = uint32_t(Model.Heaps.size()); + WritePod(W, HeapCount); + for (const HeapInfo& H : Model.Heaps) + { + HeapInfoPod P = {}; + P.Id = H.Id; + P.ParentId = H.ParentId; + P.Flags = H.Flags; + P.Name = Strings.Intern(H.Name); + WritePod(W, P); + } + + // HeapStats + uint32_t HeapStatCount = uint32_t(Model.HeapStats.size()); + WritePod(W, HeapStatCount); + for (const HeapStat& S : Model.HeapStats) + { + HeapStatPod P = {}; + P.HeapId = S.HeapId; + P.CurrentBytes = S.CurrentBytes; + P.PeakBytes = S.PeakBytes; + P.AllocCount = S.AllocCount; + P.FreeCount = S.FreeCount; + WritePod(W, P); + } + + // CallstackAllocStats + uint32_t AllocStatCount = uint32_t(Model.CallstackStats.size()); + WritePod(W, AllocStatCount); + for (const CallstackAllocStat& S : Model.CallstackStats) + { + CallstackAllocStatPod P = {}; + P.CallstackId = S.CallstackId; + P.LiveCount = S.LiveCount; + P.LiveBytes = S.LiveBytes; + P.ThreadIdCount = uint32_t(std::min(S.ThreadIds.size(), size_t(4))); + for (uint32_t I = 0; I < P.ThreadIdCount; ++I) + { + P.ThreadIds[I] = S.ThreadIds[I]; + } + WritePod(W, P); + } + + // ChurnStats + uint32_t ChurnCount = uint32_t(Model.ChurnStats.size()); + WritePod(W, ChurnCount); + for (const CallstackChurnStat& S : Model.ChurnStats) + { + CallstackChurnStatPod P = {}; + P.CallstackId = S.CallstackId; + P.ChurnAllocs = S.ChurnAllocs; + P.ChurnBytes = S.ChurnBytes; + P.TotalAllocs = S.TotalAllocs; + P.TotalBytes = S.TotalBytes; + P.MeanDistance = S.MeanDistance; + WritePod(W, P); + } + + return ToSharedBuffer(W); + } + + // -- Callstacks section -- + + SharedBuffer WriteCallstacksSection(const TraceModel& Model) + { + BinaryWriter W; + + uint32_t Count = uint32_t(Model.Callstacks.size()); + WritePod(W, Count); + + // Compute frame offsets + uint32_t FrameOffset = 0; + for (const CallstackEntry& CS : Model.Callstacks) + { + CallstackHeaderPod H = {}; + H.Id = CS.Id; + H.FrameCount = uint32_t(CS.Frames.size()); + H.FrameOffset = FrameOffset; + WritePod(W, H); + FrameOffset += H.FrameCount; + } + + // Write all frames + for (const CallstackEntry& CS : Model.Callstacks) + { + for (const ResolvedFrame& F : CS.Frames) + { + ResolvedFramePod P = {}; + P.Address = F.Address; + P.ModuleIndex = F.ModuleIndex; + P.Offset = F.Offset; + WritePod(W, P); + } + } + + return ToSharedBuffer(W); + } + + // -- Symbols section -- + + SharedBuffer WriteSymbolsSection(const eastl::hash_map<uint64_t, std::string>& ResolvedSymbols, StringTableBuilder& Strings) + { + BinaryWriter W; + + // Collect and sort entries by address for binary search on read + eastl::vector<SymbolEntryPod> Entries; + Entries.reserve(ResolvedSymbols.size()); + for (const auto& [Address, SymbolStr] : ResolvedSymbols) + { + SymbolEntryPod E = {}; + E.Address = Address; + E.StringIdx = Strings.Intern(SymbolStr); + Entries.push_back(E); + } + eastl::sort(Entries.begin(), Entries.end(), [](const SymbolEntryPod& A, const SymbolEntryPod& B) { return A.Address < B.Address; }); + + uint32_t Count = uint32_t(Entries.size()); + WritePod(W, Count); + if (!Entries.empty()) + { + W.Write(Entries.data(), Entries.size() * sizeof(SymbolEntryPod)); + } + + return ToSharedBuffer(W); + } + + // -- Compression helper -- + + CompressedBuffer CompressSection(const SharedBuffer& Raw) + { + return CompressedBuffer::Compress(Raw, OodleCompressor::Mermaid, OodleCompressionLevel::VeryFast); + } + + // =========================================================================== + // Section readers (binary blob → model) + // =========================================================================== + + template<typename T> + bool ReadPod(BinaryReader& R, T& Out) + { + if (R.Remaining() < sizeof(T)) + { + return false; + } + R.Read(&Out, sizeof(T)); + return true; + } + + bool ReadUint32(BinaryReader& R, uint32_t& Out) { return ReadPod(R, Out); } + + bool ReadMetadataSection(const SharedBuffer& Data, const StringTableReader& Strings, TraceModel& Model) + { + BinaryReader R(Data.GetData(), Data.GetSize()); + + MetadataPod M; + if (!ReadPod(R, M)) + { + return false; + } + Model.FileSize = M.FileSize; + Model.TotalEvents = M.TotalEvents; + Model.ParseTimeMs = M.ParseTimeMs; + Model.TraceStartUs = M.TraceStartUs; + Model.TraceEndUs = M.TraceEndUs; + + Model.Session.Platform = std::string(Strings.Get(M.SessionPlatform)); + Model.Session.AppName = std::string(Strings.Get(M.SessionAppName)); + Model.Session.ProjectName = std::string(Strings.Get(M.SessionProjectName)); + Model.Session.CommandLine = std::string(Strings.Get(M.SessionCommandLine)); + Model.Session.Branch = std::string(Strings.Get(M.SessionBranch)); + Model.Session.BuildVersion = std::string(Strings.Get(M.SessionBuildVersion)); + Model.Session.Changelist = M.SessionChangelist; + Model.Session.ConfigurationType = M.SessionConfigType; + Model.Session.HasSession = (M.SessionHasSession != 0); + + // Threads + uint32_t ThreadCount = 0; + if (!ReadUint32(R, ThreadCount)) + { + return false; + } + Model.Threads.resize(ThreadCount); + for (uint32_t I = 0; I < ThreadCount; ++I) + { + ThreadInfoPod P; + if (!ReadPod(R, P)) + { + return false; + } + Model.Threads[I].ThreadId = P.ThreadId; + Model.Threads[I].Name = std::string(Strings.Get(P.Name)); + Model.Threads[I].GroupName = std::string(Strings.Get(P.GroupName)); + Model.Threads[I].SystemId = P.SystemId; + Model.Threads[I].SortHint = P.SortHint; + } + + // Channels + uint32_t ChannelCount = 0; + if (!ReadUint32(R, ChannelCount)) + { + return false; + } + Model.Channels.resize(ChannelCount); + for (uint32_t I = 0; I < ChannelCount; ++I) + { + ChannelInfoPod P; + if (!ReadPod(R, P)) + { + return false; + } + Model.Channels[I].Name = std::string(Strings.Get(P.Name)); + Model.Channels[I].Enabled = (P.Enabled != 0); + Model.Channels[I].ReadOnly = (P.ReadOnly != 0); + } + + // Modules + uint32_t ModuleCount = 0; + if (!ReadUint32(R, ModuleCount)) + { + return false; + } + + // Read ModuleInfoPod entries first, then the ImageId blob + eastl::vector<ModuleInfoPod> ModulePods(ModuleCount); + for (uint32_t I = 0; I < ModuleCount; ++I) + { + if (!ReadPod(R, ModulePods[I])) + { + return false; + } + } + + // Compute total ImageId blob size + uint32_t TotalImageIdSize = 0; + for (const ModuleInfoPod& MP : ModulePods) + { + uint32_t End = MP.ImageIdOffset + MP.ImageIdSize; + if (End > TotalImageIdSize) + { + TotalImageIdSize = End; + } + } + + const uint8_t* ImageIdBlobBase = nullptr; + if (TotalImageIdSize > 0) + { + if (R.Remaining() < TotalImageIdSize) + { + return false; + } + ImageIdBlobBase = reinterpret_cast<const uint8_t*>(R.GetView(TotalImageIdSize).GetData()); + R.Skip(TotalImageIdSize); + } + + Model.Modules.resize(ModuleCount); + for (uint32_t I = 0; I < ModuleCount; ++I) + { + const ModuleInfoPod& MP = ModulePods[I]; + ModuleInfo& Mod = Model.Modules[I]; + Mod.Name = std::string(Strings.Get(MP.Name)); + Mod.FullPath = std::string(Strings.Get(MP.FullPath)); + Mod.Base = MP.Base; + Mod.Size = MP.Size; + if (MP.ImageIdSize > 0 && ImageIdBlobBase != nullptr) + { + Mod.ImageId.assign(ImageIdBlobBase + MP.ImageIdOffset, ImageIdBlobBase + MP.ImageIdOffset + MP.ImageIdSize); + } + } + + // EventTypeCounts + uint32_t EventTypeCount = 0; + if (!ReadUint32(R, EventTypeCount)) + { + return false; + } + Model.EventTypeCounts.resize(EventTypeCount); + for (uint32_t I = 0; I < EventTypeCount; ++I) + { + EventTypeCountPod P; + if (!ReadPod(R, P)) + { + return false; + } + Model.EventTypeCounts[I].Name = std::string(Strings.Get(P.Name)); + Model.EventTypeCounts[I].Count = P.Count; + } + + // ScopeStats + uint32_t ScopeStatCount = 0; + if (!ReadUint32(R, ScopeStatCount)) + { + return false; + } + Model.ScopeStats.resize(ScopeStatCount); + for (uint32_t I = 0; I < ScopeStatCount; ++I) + { + CpuScopeStatPod P; + if (!ReadPod(R, P)) + { + return false; + } + Model.ScopeStats[I].Name = std::string(Strings.Get(P.Name)); + Model.ScopeStats[I].MinUs = P.MinUs; + Model.ScopeStats[I].MaxUs = P.MaxUs; + Model.ScopeStats[I].Count = P.Count; + Model.ScopeStats[I].MeanUs = P.MeanUs; + Model.ScopeStats[I].StdDevUs = P.StdDevUs; + } + + return true; + } + + bool ReadMemorySection(const SharedBuffer& Data, const StringTableReader& Strings, TraceModel& Model) + { + BinaryReader R(Data.GetData(), Data.GetSize()); + + // AllocSummary + AllocSummaryPod A; + if (!ReadPod(R, A)) + { + return false; + } + Model.AllocSummary.HasMemoryData = (A.HasMemoryData != 0); + Model.AllocSummary.PeakTimeUs = A.PeakTimeUs; + Model.AllocSummary.LiveAllocations = A.LiveAllocations; + Model.AllocSummary.TotalAllocs = A.TotalAllocs; + Model.AllocSummary.TotalFrees = A.TotalFrees; + Model.AllocSummary.TotalReallocAllocs = A.TotalReallocAllocs; + Model.AllocSummary.TotalReallocFrees = A.TotalReallocFrees; + Model.AllocSummary.PeakBytes = A.PeakBytes; + Model.AllocSummary.EndBytes = A.EndBytes; + + // Heaps + uint32_t HeapCount = 0; + if (!ReadUint32(R, HeapCount)) + { + return false; + } + Model.Heaps.resize(HeapCount); + for (uint32_t I = 0; I < HeapCount; ++I) + { + HeapInfoPod P; + if (!ReadPod(R, P)) + { + return false; + } + Model.Heaps[I].Id = P.Id; + Model.Heaps[I].ParentId = P.ParentId; + Model.Heaps[I].Flags = P.Flags; + Model.Heaps[I].Name = std::string(Strings.Get(P.Name)); + } + + // HeapStats + uint32_t HeapStatCount = 0; + if (!ReadUint32(R, HeapStatCount)) + { + return false; + } + Model.HeapStats.resize(HeapStatCount); + for (uint32_t I = 0; I < HeapStatCount; ++I) + { + HeapStatPod P; + if (!ReadPod(R, P)) + { + return false; + } + Model.HeapStats[I].HeapId = P.HeapId; + Model.HeapStats[I].CurrentBytes = P.CurrentBytes; + Model.HeapStats[I].PeakBytes = P.PeakBytes; + Model.HeapStats[I].AllocCount = P.AllocCount; + Model.HeapStats[I].FreeCount = P.FreeCount; + } + + // CallstackAllocStats + uint32_t AllocStatCount = 0; + if (!ReadUint32(R, AllocStatCount)) + { + return false; + } + Model.CallstackStats.resize(AllocStatCount); + for (uint32_t I = 0; I < AllocStatCount; ++I) + { + CallstackAllocStatPod P; + if (!ReadPod(R, P)) + { + return false; + } + Model.CallstackStats[I].CallstackId = P.CallstackId; + Model.CallstackStats[I].LiveCount = P.LiveCount; + Model.CallstackStats[I].LiveBytes = P.LiveBytes; + for (uint32_t J = 0; J < P.ThreadIdCount && J < 4; ++J) + { + Model.CallstackStats[I].ThreadIds.push_back(P.ThreadIds[J]); + } + } + + // ChurnStats + uint32_t ChurnCount = 0; + if (!ReadUint32(R, ChurnCount)) + { + return false; + } + Model.ChurnStats.resize(ChurnCount); + for (uint32_t I = 0; I < ChurnCount; ++I) + { + CallstackChurnStatPod P; + if (!ReadPod(R, P)) + { + return false; + } + Model.ChurnStats[I].CallstackId = P.CallstackId; + Model.ChurnStats[I].ChurnAllocs = P.ChurnAllocs; + Model.ChurnStats[I].ChurnBytes = P.ChurnBytes; + Model.ChurnStats[I].TotalAllocs = P.TotalAllocs; + Model.ChurnStats[I].TotalBytes = P.TotalBytes; + Model.ChurnStats[I].MeanDistance = P.MeanDistance; + } + + return true; + } + + bool ReadCallstacksSection(const SharedBuffer& Data, TraceModel& Model) + { + BinaryReader R(Data.GetData(), Data.GetSize()); + + uint32_t Count = 0; + if (!ReadUint32(R, Count)) + { + return false; + } + + // Read headers + eastl::vector<CallstackHeaderPod> Headers(Count); + for (uint32_t I = 0; I < Count; ++I) + { + if (!ReadPod(R, Headers[I])) + { + return false; + } + } + + // Compute total frame count + uint32_t TotalFrames = 0; + for (const CallstackHeaderPod& H : Headers) + { + TotalFrames = std::max(TotalFrames, H.FrameOffset + H.FrameCount); + } + + if (R.Remaining() < TotalFrames * sizeof(ResolvedFramePod)) + { + return false; + } + + // Read all frames + eastl::vector<ResolvedFramePod> AllFrames(TotalFrames); + for (uint32_t I = 0; I < TotalFrames; ++I) + { + if (!ReadPod(R, AllFrames[I])) + { + return false; + } + } + + // Build CallstackEntry vector + Model.Callstacks.resize(Count); + for (uint32_t I = 0; I < Count; ++I) + { + const CallstackHeaderPod& H = Headers[I]; + CallstackEntry& CS = Model.Callstacks[I]; + CS.Id = H.Id; + CS.Frames.resize(H.FrameCount); + for (uint32_t J = 0; J < H.FrameCount; ++J) + { + const ResolvedFramePod& FP = AllFrames[H.FrameOffset + J]; + CS.Frames[J].Address = FP.Address; + CS.Frames[J].ModuleIndex = FP.ModuleIndex; + CS.Frames[J].Offset = FP.Offset; + } + } + + return true; + } + + bool ReadSymbolsSection(const SharedBuffer& Data, const StringTableReader& Strings, CachedSymbolResolver& Resolver) + { + BinaryReader R(Data.GetData(), Data.GetSize()); + + uint32_t Count = 0; + if (!ReadUint32(R, Count)) + { + return false; + } + + for (uint32_t I = 0; I < Count; ++I) + { + SymbolEntryPod E; + if (!ReadPod(R, E)) + { + return false; + } + std::string_view Str = Strings.Get(E.StringIdx); + if (!Str.empty()) + { + Resolver.m_Symbols.emplace(E.Address, std::string(Str)); + } + } + + return true; + } + + // =========================================================================== + // File-level helpers + // =========================================================================== + + int64_t GetFileModTimeNs(const std::filesystem::path& Path) + { + std::error_code Ec; + auto ModTime = std::filesystem::last_write_time(Path, Ec); + if (Ec) + { + return 0; + } + auto Duration = ModTime.time_since_epoch(); + return std::chrono::duration_cast<std::chrono::nanoseconds>(Duration).count(); + } + + SharedBuffer DecompressSection(const uint8_t* FileBase, const SectionDirectoryEntry& Dir) + { + IoBuffer CompressedIo(IoBuffer::Wrap, FileBase + Dir.FileOffset, Dir.CompressedSize); + + IoHash RawHash; + uint64_t RawSize = 0; + CompressedBuffer CB = CompressedBuffer::FromCompressed(SharedBuffer(std::move(CompressedIo)), RawHash, RawSize); + if (CB.IsNull()) + { + return {}; + } + return CB.Decompress(); + } + +} // namespace + +// =========================================================================== +// Public API +// =========================================================================== + +void +WriteAnalyzeCache(const std::filesystem::path& CachePath, + const std::filesystem::path& SourcePath, + const TraceModel& Model, + const eastl::hash_map<uint64_t, std::string>& ResolvedSymbols) +{ + try + { + StringTableBuilder Strings; + + // Build section payloads (order matters: Symbols and Metadata/Memory + // intern strings, so StringTable must be serialized LAST after all + // interning is done). + SharedBuffer MetadataRaw = WriteMetadataSection(Model, Strings); + SharedBuffer MemoryRaw = WriteMemorySection(Model, Strings); + SharedBuffer CallstacksRaw = WriteCallstacksSection(Model); + SharedBuffer SymbolsRaw = WriteSymbolsSection(ResolvedSymbols, Strings); + SharedBuffer StringTableRaw = Strings.Serialize(); + + // Compress each section + CompressedBuffer Sections[uint32_t(CacheSectionId::Count)]; + Sections[uint32_t(CacheSectionId::StringTable)] = CompressSection(StringTableRaw); + Sections[uint32_t(CacheSectionId::Metadata)] = CompressSection(MetadataRaw); + Sections[uint32_t(CacheSectionId::Memory)] = CompressSection(MemoryRaw); + Sections[uint32_t(CacheSectionId::Callstacks)] = CompressSection(CallstacksRaw); + Sections[uint32_t(CacheSectionId::Symbols)] = CompressSection(SymbolsRaw); + + // Build file header + CacheFileHeader Header = {}; + Header.Magic = kCacheMagic; + Header.Version = kCacheVersion; + + std::error_code Ec; + Header.SourceFileSize = std::filesystem::file_size(SourcePath, Ec); + Header.SourceModTimeNs = GetFileModTimeNs(SourcePath); + + uint32_t SectionCount = uint32_t(CacheSectionId::Count); + + // Compute section directory + uint64_t DataOffset = sizeof(CacheFileHeader) + SectionCount * sizeof(SectionDirectoryEntry); + + SectionDirectoryEntry Directory[uint32_t(CacheSectionId::Count)]; + for (uint32_t I = 0; I < SectionCount; ++I) + { + Directory[I].SectionId = I; + Directory[I].Reserved = 0; + Directory[I].FileOffset = DataOffset; + Directory[I].CompressedSize = Sections[I].GetCompressedSize(); + DataOffset += Directory[I].CompressedSize; + } + + // Assemble and write the file + BinaryWriter FileWriter; + FileWriter.Write(&Header, sizeof(Header)); + FileWriter.Write(Directory, sizeof(Directory)); + + // Append compressed blobs + for (uint32_t I = 0; I < SectionCount; ++I) + { + SharedBuffer Flat = std::move(Sections[I]).GetCompressed().Flatten(); + FileWriter.Write(Flat.GetData(), Flat.GetSize()); + } + + zen::TemporaryFile::SafeWriteFile(CachePath, FileWriter.GetView()); + + ZEN_INFO("Wrote analysis cache {} ({})", CachePath.filename().string(), zen::NiceBytes(FileWriter.Size())); + } + catch (const std::exception& Ex) + { + ZEN_WARN("Failed to write analysis cache: {}", Ex.what()); + } +} + +std::optional<CachedAnalysis> +TryLoadAnalyzeCache(const std::filesystem::path& CachePath, const std::filesystem::path& SourcePath) +{ + std::error_code Ec; + if (!std::filesystem::exists(CachePath, Ec)) + { + return std::nullopt; + } + + try + { + FileContents Contents = zen::ReadFile(CachePath); + if (!Contents) + { + return std::nullopt; + } + + IoBuffer FileData = Contents.Flatten(); + if (FileData.Size() < sizeof(CacheFileHeader)) + { + return std::nullopt; + } + + const uint8_t* Base = reinterpret_cast<const uint8_t*>(FileData.Data()); + + // Validate header + CacheFileHeader Header; + memcpy(&Header, Base, sizeof(Header)); + + if (Header.Magic != kCacheMagic) + { + ZEN_DEBUG("Analysis cache: bad magic"); + return std::nullopt; + } + + if (Header.Version != kCacheVersion) + { + ZEN_DEBUG("Analysis cache: version mismatch ({} vs {})", Header.Version, kCacheVersion); + return std::nullopt; + } + + // Validate source file hasn't changed + uint64_t CurrentSize = std::filesystem::file_size(SourcePath, Ec); + int64_t CurrentModTime = GetFileModTimeNs(SourcePath); + + if (Header.SourceFileSize != CurrentSize || Header.SourceModTimeNs != CurrentModTime) + { + ZEN_DEBUG("Analysis cache: source file changed, invalidating"); + return std::nullopt; + } + + // Parse section directory + uint32_t SectionCount = uint32_t(CacheSectionId::Count); + size_t DirSize = SectionCount * sizeof(SectionDirectoryEntry); + if (FileData.Size() < sizeof(CacheFileHeader) + DirSize) + { + return std::nullopt; + } + + SectionDirectoryEntry Directory[uint32_t(CacheSectionId::Count)]; + memcpy(Directory, Base + sizeof(CacheFileHeader), DirSize); + + // Validate all sections fit in the file + for (uint32_t I = 0; I < SectionCount; ++I) + { + if (Directory[I].FileOffset + Directory[I].CompressedSize > FileData.Size()) + { + ZEN_DEBUG("Analysis cache: section {} truncated", I); + return std::nullopt; + } + } + + // Decompress string table first + SharedBuffer StringTableData = DecompressSection(Base, Directory[uint32_t(CacheSectionId::StringTable)]); + if (StringTableData.IsNull()) + { + ZEN_DEBUG("Analysis cache: failed to decompress string table"); + return std::nullopt; + } + + StringTableReader Strings; + if (!Strings.Init(StringTableData)) + { + ZEN_DEBUG("Analysis cache: invalid string table"); + return std::nullopt; + } + + CachedAnalysis Result; + Result.Model.FilePath = SourcePath; + + // Decompress and read each section + SharedBuffer MetaData = DecompressSection(Base, Directory[uint32_t(CacheSectionId::Metadata)]); + if (MetaData.IsNull() || !ReadMetadataSection(MetaData, Strings, Result.Model)) + { + ZEN_DEBUG("Analysis cache: failed to read metadata section"); + return std::nullopt; + } + + SharedBuffer MemData = DecompressSection(Base, Directory[uint32_t(CacheSectionId::Memory)]); + if (MemData.IsNull() || !ReadMemorySection(MemData, Strings, Result.Model)) + { + ZEN_DEBUG("Analysis cache: failed to read memory section"); + return std::nullopt; + } + + SharedBuffer CsData = DecompressSection(Base, Directory[uint32_t(CacheSectionId::Callstacks)]); + if (CsData.IsNull() || !ReadCallstacksSection(CsData, Result.Model)) + { + ZEN_DEBUG("Analysis cache: failed to read callstacks section"); + return std::nullopt; + } + + SharedBuffer SymData = DecompressSection(Base, Directory[uint32_t(CacheSectionId::Symbols)]); + if (!SymData.IsNull()) + { + auto Resolver = std::make_unique<CachedSymbolResolver>(); + if (ReadSymbolsSection(SymData, Strings, *Resolver)) + { + Result.Symbols = std::move(Resolver); + } + } + + ZEN_INFO("Loaded analysis from cache ({})", zen::NiceBytes(FileData.Size())); + return Result; + } + catch (const std::exception& Ex) + { + ZEN_DEBUG("Analysis cache load failed: {}", Ex.what()); + return std::nullopt; + } +} + +} // namespace zen::trace_detail diff --git a/src/zen/trace/trace_cache.h b/src/zen/trace/trace_cache.h new file mode 100644 index 000000000..88778a020 --- /dev/null +++ b/src/zen/trace/trace_cache.h @@ -0,0 +1,253 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include "symbol_resolver.h" +#include "trace_model.h" + +#include <zencore/sharedbuffer.h> + +#include <cstdint> +#include <filesystem> +#include <memory> +#include <optional> +#include <string_view> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <EASTL/hash_map.h> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen::trace_detail { + +// --------------------------------------------------------------------------- +// File format constants +// --------------------------------------------------------------------------- + +static constexpr uint32_t kCacheMagic = 0x005A4355; // "UCZ\0" +static constexpr uint32_t kCacheVersion = 1; + +enum class CacheSectionId : uint32_t +{ + StringTable = 0, + Metadata = 1, + Memory = 2, + Callstacks = 3, + Symbols = 4, + Count +}; + +// --------------------------------------------------------------------------- +// On-disk header structures (naturally aligned, no packing) +// +// Fields are ordered so natural alignment matches across compilers without +// needing pragma pack. static_asserts at the bottom of this block pin the +// layout so a reordering or added field cannot silently break cached files. +// --------------------------------------------------------------------------- + +struct CacheFileHeader +{ + uint32_t Magic; + uint32_t Version; + uint64_t SourceFileSize; + int64_t SourceModTimeNs; // last_write_time as nanoseconds since epoch + uint64_t Reserved; +}; + +struct SectionDirectoryEntry +{ + uint32_t SectionId; + uint32_t Reserved; + uint64_t FileOffset; // byte offset from start of file + uint64_t CompressedSize; // size of the CompressedBuffer blob on disk +}; + +// --------------------------------------------------------------------------- +// POD types for memory-mappable section content +// --------------------------------------------------------------------------- + +struct MetadataPod +{ + uint64_t FileSize; + uint64_t TotalEvents; + uint64_t ParseTimeMs; + uint32_t TraceStartUs; + uint32_t TraceEndUs; + // SessionInfo string indices + uint32_t SessionPlatform; + uint32_t SessionAppName; + uint32_t SessionProjectName; + uint32_t SessionCommandLine; + uint32_t SessionBranch; + uint32_t SessionBuildVersion; + uint32_t SessionChangelist; + uint8_t SessionConfigType; + uint8_t SessionHasSession; + uint8_t Padding[2]; +}; + +struct ThreadInfoPod +{ + uint32_t ThreadId; + uint32_t Name; // string index + uint32_t GroupName; // string index + uint32_t SystemId; + int32_t SortHint; + uint32_t Pad; +}; + +struct ChannelInfoPod +{ + uint32_t Name; // string index + uint8_t Enabled; + uint8_t ReadOnly; + uint8_t Pad[2]; +}; + +struct ModuleInfoPod +{ + uint32_t Name; // string index + uint32_t FullPath; // string index + uint64_t Base; + uint32_t Size; + uint32_t ImageIdSize; // byte count in the ImageId blob area + uint32_t ImageIdOffset; // byte offset into the ImageId blob area + uint32_t Pad; +}; + +struct EventTypeCountPod +{ + uint32_t Name; // string index + uint32_t Pad; + uint64_t Count; +}; + +struct CpuScopeStatPod +{ + uint32_t Name; // string index + uint32_t MinUs; + uint32_t MaxUs; + uint32_t Pad; + uint64_t Count; + double MeanUs; + double StdDevUs; +}; + +struct AllocSummaryPod +{ + uint8_t HasMemoryData; + uint8_t Pad0[3]; + uint32_t PeakTimeUs; + uint32_t LiveAllocations; + uint32_t Pad1; + uint64_t TotalAllocs; + uint64_t TotalFrees; + uint64_t TotalReallocAllocs; + uint64_t TotalReallocFrees; + int64_t PeakBytes; + int64_t EndBytes; +}; + +struct HeapInfoPod +{ + uint32_t Id; + uint32_t ParentId; + uint16_t Flags; + uint16_t Pad0; + uint32_t Name; // string index +}; + +struct HeapStatPod +{ + uint32_t HeapId; + uint32_t Pad; + int64_t CurrentBytes; + int64_t PeakBytes; + uint64_t AllocCount; + uint64_t FreeCount; +}; + +struct CallstackAllocStatPod +{ + uint32_t CallstackId; + uint32_t LiveCount; + int64_t LiveBytes; + uint32_t ThreadIdCount; + uint32_t ThreadIds[4]; + uint32_t Pad; + uint32_t Pad2; +}; + +struct CallstackChurnStatPod +{ + uint32_t CallstackId; + uint32_t Pad; + uint64_t ChurnAllocs; + uint64_t ChurnBytes; + uint64_t TotalAllocs; + uint64_t TotalBytes; + double MeanDistance; +}; + +struct CallstackHeaderPod +{ + uint32_t Id; + uint32_t FrameCount; + uint32_t FrameOffset; // index into the frames array + uint32_t Pad; +}; + +struct ResolvedFramePod +{ + uint64_t Address; + uint32_t ModuleIndex; + uint32_t Pad; + uint64_t Offset; +}; + +struct SymbolEntryPod +{ + uint64_t Address; + uint32_t StringIdx; // index into the string table + uint32_t Pad; +}; + +// Pin the on-disk layout. Any change here is a cache format change and must +// bump kCacheVersion. +static_assert(sizeof(CacheFileHeader) == 32); +static_assert(sizeof(SectionDirectoryEntry) == 24); +static_assert(sizeof(MetadataPod) == 64); +static_assert(sizeof(ThreadInfoPod) == 24); +static_assert(sizeof(ChannelInfoPod) == 8); +static_assert(sizeof(ModuleInfoPod) == 32); +static_assert(sizeof(EventTypeCountPod) == 16); +static_assert(sizeof(CpuScopeStatPod) == 40); +static_assert(sizeof(AllocSummaryPod) == 64); +static_assert(sizeof(HeapInfoPod) == 16); +static_assert(sizeof(HeapStatPod) == 40); +static_assert(sizeof(CallstackAllocStatPod) == 48); +static_assert(sizeof(CallstackChurnStatPod) == 48); +static_assert(sizeof(CallstackHeaderPod) == 16); +static_assert(sizeof(ResolvedFramePod) == 24); +static_assert(sizeof(SymbolEntryPod) == 16); + +// --------------------------------------------------------------------------- +// Cache read / write API +// --------------------------------------------------------------------------- + +struct CachedAnalysis +{ + TraceModel Model; + std::unique_ptr<SymbolResolver> Symbols; +}; + +// Try to load a cached analysis from the .ucache_z file next to a .utrace. +// Returns nullopt on any failure (missing, stale, corrupt, version mismatch). +std::optional<CachedAnalysis> TryLoadAnalyzeCache(const std::filesystem::path& CachePath, const std::filesystem::path& SourcePath); + +// Write the analysis cache for future reuse. +void WriteAnalyzeCache(const std::filesystem::path& CachePath, + const std::filesystem::path& SourcePath, + const TraceModel& Model, + const eastl::hash_map<uint64_t, std::string>& ResolvedSymbols); + +} // namespace zen::trace_detail diff --git a/src/zen/trace/trace_cmd.cpp b/src/zen/trace/trace_cmd.cpp new file mode 100644 index 000000000..35316721e --- /dev/null +++ b/src/zen/trace/trace_cmd.cpp @@ -0,0 +1,402 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "trace_cmd.h" + +#include "browser_launcher.h" +#include "consoleprogress.h" +#include "symbol_resolver.h" +#include "trace_analyze.h" +#include "trace_model.h" +#include "trace_viewer_service.h" +#include "zenserviceclient.h" + +#include <zencore/except_fmt.h> +#include <zencore/filesystem.h> +#include <zencore/fmtutils.h> +#include <zencore/logging.h> +#include <zencore/string.h> +#include <zencore/thread.h> +#include <zencore/workthreadpool.h> +#include <zenhttp/httpclient.h> +#include <zenhttp/httpcommon.h> +#include <zenhttp/httpserver.h> + +#include <filesystem> +#include <numeric> + +using namespace std::literals; + +namespace zen { + +namespace { + +#if ZEN_PLATFORM_WINDOWS + constexpr const char* kSymbolBackendHelp = "Symbol backend: auto (default), pdb, dbghelp, llvm, off"; +#elif ZEN_PLATFORM_MAC + constexpr const char* kSymbolBackendHelp = "Symbol backend: auto (default - prefers llvm, falls back to atos), llvm, atos, off"; +#else + constexpr const char* kSymbolBackendHelp = "Symbol backend: auto (default - uses llvm), llvm, off"; +#endif + +} // namespace + +////////////////////////////////////////////////////////////////////////// +// TraceAnalyzeSubCmd + +TraceAnalyzeSubCmd::TraceAnalyzeSubCmd() : ZenSubCmdBase("analyze", "Analyze a .utrace file") +{ + SubOptions().add_option("", "", "file", "Path to .utrace file", cxxopts::value(m_TraceFilePath), "<filepath>"); + SubOptions().add_option("", + "", + "live-allocs", + "Dump top N live-allocation callstacks (0 = off, default 50)", + cxxopts::value(m_LiveAllocs)->default_value("50"), + "<count>"); + SubOptions().add_option("", + "", + "churn", + "Dump top N allocation-churn callstacks (0 = off, default 0)", + cxxopts::value(m_Churn)->default_value("0"), + "<count>"); + SubOptions().add_option("", + "", + "churn-distance", + "Max event distance between alloc and free to count as churn (default 1000)", + cxxopts::value(m_ChurnMin)->default_value("1000"), + "<events>"); + SubOptions().add_option("", "", "symbols", kSymbolBackendHelp, cxxopts::value(m_Symbols)->default_value("auto"), "<backend>"); + SubOptions().add_option("", + "", + "html-report", + "Write a standalone HTML memory report (all live leaks + top 100 churn sites)", + cxxopts::value(m_HtmlReportPath), + "<filepath>"); + SubOptions().add_option("", + "", + "callstack-skip", + "Semicolon-separated wildcard patterns for frames to hide from analyzed callstacks", + cxxopts::value(m_CallstackSkip), + "<pattern;...>"); + SubOptions().add_option("", + "", + "no-callstack-heuristic", + "Disable leading third-party frame trimming in analyzed callstacks", + cxxopts::value(m_NoCallstackHeuristic)->default_value("false"), + "<no-callstack-heuristic>"); + SubOptions().add_option("", + "", + "no-cache", + "Skip reading/writing the .ucache_z analysis cache", + cxxopts::value(m_NoCache)->default_value("false"), + "<no-cache>"); + SubOptions().parse_positional({"file"}); + SubOptions().positional_help("<file.utrace>"); +} + +void +TraceAnalyzeSubCmd::Run(const ZenCliOptions& GlobalOptions) +{ + ZEN_UNUSED(GlobalOptions); + + std::filesystem::path FilePath = trace_detail::ResolveTraceFile(m_TraceFilePath, SubOptions()); + + trace_detail::AnalyzeOptions Options; + Options.LiveAllocsLimit = m_LiveAllocs; + Options.ChurnLimit = m_Churn; + Options.ChurnDistanceThreshold = uint64_t(m_ChurnMin); + Options.Symbols = trace_detail::ParseSymbolBackend(m_Symbols); + Options.NoCache = m_NoCache; + Options.EnableCallstackHeuristic = !m_NoCallstackHeuristic; + Options.HtmlReportPath = m_HtmlReportPath; + ForEachStrTok(m_CallstackSkip, ';', [&Options](std::string_view Pattern) { + if (!Pattern.empty()) + { + Options.CallstackSkipPatterns.emplace_back(Pattern); + } + return true; + }); + trace_detail::RunAnalyze(FilePath, Options); +} + +////////////////////////////////////////////////////////////////////////// +// TraceInspectSubCmd + +TraceInspectSubCmd::TraceInspectSubCmd() : ZenSubCmdBase("inspect", "Inspect event schemas in a .utrace file") +{ + SubOptions().add_option("", "", "file", "Path to .utrace file", cxxopts::value(m_TraceFilePath), "<filepath>"); + SubOptions().parse_positional({"file"}); + SubOptions().positional_help("<file.utrace>"); +} + +void +TraceInspectSubCmd::Run(const ZenCliOptions& GlobalOptions) +{ + ZEN_UNUSED(GlobalOptions); + + std::filesystem::path FilePath = trace_detail::ResolveTraceFile(m_TraceFilePath, SubOptions()); + trace_detail::RunInspect(FilePath); +} + +////////////////////////////////////////////////////////////////////////// +// TraceServeSubCmd + +TraceServeSubCmd::TraceServeSubCmd() : ZenSubCmdBase("serve", "Serve an interactive viewer for a .utrace file") +{ + AddAlias("view"); + SubOptions().add_option("", "", "file", "Path to .utrace file", cxxopts::value(m_TraceFilePath), "<filepath>"); + SubOptions().add_option("", "p", "port", "Port to listen on", cxxopts::value(m_Port)->default_value("1480"), "<port>"); + SubOptions().add_option("", "", "bind", "Address to bind to", cxxopts::value(m_Bind)->default_value("127.0.0.1"), "<host>"); + SubOptions().add_option("", "", "symbols", kSymbolBackendHelp, cxxopts::value(m_Symbols)->default_value("auto"), "<backend>"); + SubOptions().add_option("", + "", + "no-browser", + "Do not launch a web browser after starting the server", + cxxopts::value(m_NoBrowser)->default_value("false"), + "<no-browser>"); + SubOptions().parse_positional({"file"}); + SubOptions().positional_help("<file.utrace>"); +} + +void +TraceServeSubCmd::Run(const ZenCliOptions& GlobalOptions) +{ + ZEN_UNUSED(GlobalOptions); + + std::filesystem::path FilePath = trace_detail::ResolveTraceFile(m_TraceFilePath, SubOptions()); + + WorkerThreadPool ThreadPool(gsl::narrow<int>(GetHardwareConcurrency())); + + uint64_t FileSize = uint64_t(std::filesystem::file_size(FilePath)); + ZEN_CONSOLE("Parsing {} ({})", FilePath.filename().string(), zen::NiceBytes(FileSize)); + + std::unique_ptr<ProgressBase> ProgressOwner(CreateConsoleProgress(ConsoleProgressMode::Pretty)); + std::unique_ptr<ProgressBase::ProgressBar> Progress = ProgressOwner->CreateProgressBar("Parse"); + trace_detail::TraceModel Model = + trace_detail::BuildTraceModel(FilePath, ThreadPool, [&](uint64_t BytesProcessed, uint64_t TotalBytes, uint64_t EventsSoFar) { + Progress->UpdateState( + { + .Task = "Parsing trace", + .Details = fmt::format("{} events", zen::ThousandsNum(EventsSoFar)), + .TotalCount = TotalBytes, + .RemainingCount = TotalBytes - std::min(BytesProcessed, TotalBytes), + }, + false); + }); + Progress->Finish(); + + ZEN_CONSOLE(" Events: {}", zen::ThousandsNum(Model.TotalEvents)); + ZEN_CONSOLE(" Threads: {}", Model.Threads.size()); + ZEN_CONSOLE( + " Scopes: {}", + zen::ThousandsNum(std::accumulate(Model.Timelines.begin(), + Model.Timelines.end(), + size_t(0), + [](size_t Acc, const trace_detail::ThreadTimeline& T) { return Acc + T.Scopes.size(); }))); + ZEN_CONSOLE(" Time: {}", zen::NiceTimeSpanMs(Model.ParseTimeMs)); + + std::unique_ptr<trace_detail::SymbolResolver> Symbols = trace_detail::CreateSymbolResolver(trace_detail::ParseSymbolBackend(m_Symbols)); + for (const trace_detail::ModuleInfo& Mod : Model.Modules) + { + Symbols->LoadModule(Mod); + } + ZEN_CONSOLE(" Symbols: {} modules loaded", Model.Modules.size()); + ZEN_CONSOLE(""); + + HttpServerConfig Config; + Config.ServerClass = "asio"; + Config.IsDedicatedServer = false; + Config.AllowPortProbing = true; + Config.ForceLoopback = (m_Bind == "127.0.0.1" || m_Bind == "localhost" || m_Bind == "::1"); + + Ref<HttpServer> Server = CreateHttpServer(Config); + + std::filesystem::path TempDir = std::filesystem::temp_directory_path() / "zen-trace-viewer"; + std::error_code Ec; + std::filesystem::create_directories(TempDir, Ec); + + int EffectivePort = Server->Initialize(m_Port, TempDir); + if (EffectivePort <= 0) + { + throw zen::runtime_error("Failed to initialize HTTP server"); + } + + TraceViewerService ViewerService(Model, std::move(Symbols)); + Server->RegisterService(ViewerService); + + std::string Url = fmt::format("http://{}:{}{}", m_Bind, EffectivePort, ViewerService.BaseUri()); + ZEN_CONSOLE("Serving trace viewer at {}", Url); + ZEN_CONSOLE("Press Ctrl+C to stop"); + + if (!m_NoBrowser) + { + try + { + LaunchBrowser(Url); + } + catch (const std::exception& E) + { + ZEN_WARN("Failed to launch browser: {}", E.what()); + } + } + + Server->Run(/*IsInteractiveSession=*/true); + Server->Close(); +} + +////////////////////////////////////////////////////////////////////////// +// TraceTrimSubCmd + +TraceTrimSubCmd::TraceTrimSubCmd() : ZenSubCmdBase("trim", "Trim a .utrace file to a time range while preserving important events") +{ + SubOptions().add_option("", "", "file", "Path to input .utrace file", cxxopts::value(m_TraceFilePath), "<filepath>"); + SubOptions().add_option("", "o", "output", "Path to output .utrace file", cxxopts::value(m_OutputPath), "<filepath>"); + SubOptions().add_option("", + "", + "start", + "Start of the time window in seconds from the beginning of the trace", + cxxopts::value(m_StartSec)->default_value("0"), + "<seconds>"); + SubOptions().add_option("", + "", + "end", + "End of the time window in seconds from the beginning of the trace", + cxxopts::value(m_EndSec)->default_value("0"), + "<seconds>"); + SubOptions().parse_positional({"file"}); + SubOptions().positional_help("<file.utrace>"); +} + +void +TraceTrimSubCmd::Run(const ZenCliOptions& GlobalOptions) +{ + ZEN_UNUSED(GlobalOptions); + + std::filesystem::path InputPath = trace_detail::ResolveTraceFile(m_TraceFilePath, SubOptions()); + + if (m_OutputPath.empty()) + { + throw zen::OptionParseException("--output is required", SubOptions().help()); + } + if (m_EndSec <= m_StartSec) + { + throw zen::OptionParseException("--end must be greater than --start", SubOptions().help()); + } + + trace_detail::TraceTrimArgs Args; + Args.InputPath = InputPath; + Args.OutputPath = std::filesystem::absolute(m_OutputPath); + Args.StartSec = m_StartSec; + Args.EndSec = m_EndSec; + + trace_detail::RunTraceTrim(Args); +} + +////////////////////////////////////////////////////////////////////////// +// TraceStartSubCmd + +TraceStartSubCmd::TraceStartSubCmd() : ZenSubCmdBase("start", "Start zen server realtime tracing") +{ + SubOptions().add_option("", "u", "hosturl", ZenCmdBase::kHostUrlHelp, cxxopts::value(m_HostName)->default_value(""), "<hosturl>"); + SubOptions().add_option("", "", "host", "Stream trace data to a remote host", cxxopts::value(m_TraceHost), "<hostip>"); + SubOptions().add_option("", "", "file", "Write trace data to a file", cxxopts::value(m_TraceFile), "<filepath>"); +} + +void +TraceStartSubCmd::Run(const ZenCliOptions& GlobalOptions) +{ + ZEN_UNUSED(GlobalOptions); + + if (m_TraceHost.empty() && m_TraceFile.empty()) + { + throw OptionParseException("Either --host or --file is required", SubOptions().help()); + } + if (!m_TraceHost.empty() && !m_TraceFile.empty()) + { + throw OptionParseException("--host and --file are mutually exclusive", SubOptions().help()); + } + + std::string StartArg = m_TraceHost.empty() ? fmt::format("file={}", m_TraceFile) : fmt::format("host={}", m_TraceHost); + + ZenServiceClient Service({.HostSpec = m_HostName, .CommandName = "start"}); + HttpClient& Http = Service.Http(); + if (HttpClient::Response Response = Http.Post(fmt::format("/admin/trace/start?{}"sv, StartArg))) + { + ZEN_CONSOLE("OK: {}", Response.ToText()); + } + else + { + Response.ThrowError("Trace start failed"); + } +} + +////////////////////////////////////////////////////////////////////////// +// TraceStopSubCmd + +TraceStopSubCmd::TraceStopSubCmd() : ZenSubCmdBase("stop", "Stop zen server realtime tracing") +{ + SubOptions().add_option("", "u", "hosturl", ZenCmdBase::kHostUrlHelp, cxxopts::value(m_HostName)->default_value(""), "<hosturl>"); +} + +void +TraceStopSubCmd::Run(const ZenCliOptions& GlobalOptions) +{ + ZEN_UNUSED(GlobalOptions); + + ZenServiceClient Service({.HostSpec = m_HostName, .CommandName = "stop"}); + HttpClient& Http = Service.Http(); + if (HttpClient::Response Response = Http.Post("/admin/trace/stop"sv)) + { + ZEN_CONSOLE("OK: {}", Response.ToText()); + } + else + { + Response.ThrowError("Trace stop failed"); + } +} + +////////////////////////////////////////////////////////////////////////// +// TraceStatusSubCmd + +TraceStatusSubCmd::TraceStatusSubCmd() : ZenSubCmdBase("status", "Report zen server realtime tracing status") +{ + SubOptions().add_option("", "u", "hosturl", ZenCmdBase::kHostUrlHelp, cxxopts::value(m_HostName)->default_value(""), "<hosturl>"); +} + +void +TraceStatusSubCmd::Run(const ZenCliOptions& GlobalOptions) +{ + ZEN_UNUSED(GlobalOptions); + + ZenServiceClient Service({.HostSpec = m_HostName, .CommandName = "status"}); + HttpClient& Http = Service.Http(); + if (HttpClient::Response Response = Http.Get("/admin/trace"sv)) + { + ZEN_CONSOLE("OK: {}", Response.ToText()); + } + else + { + Response.ThrowError("Trace status failed"); + } +} + +////////////////////////////////////////////////////////////////////////// +// TraceCommand + +TraceCommand::TraceCommand() +{ + m_Options.add_options()("h,help", "Print help"); + m_Options.add_option("__hidden__", "", "subcommand", "", cxxopts::value<std::string>(m_SubCommand)->default_value(""), ""); + m_Options.parse_positional({"subcommand"}); + + AddSubCommand(m_AnalyzeSubCmd); + AddSubCommand(m_InspectSubCmd); + AddSubCommand(m_ServeSubCmd); + AddSubCommand(m_TrimSubCmd); + AddSubCommand(m_StartSubCmd); + AddSubCommand(m_StopSubCmd); + AddSubCommand(m_StatusSubCmd); +} + +TraceCommand::~TraceCommand() = default; + +} // namespace zen diff --git a/src/zen/trace/trace_cmd.h b/src/zen/trace/trace_cmd.h new file mode 100644 index 000000000..bb2759241 --- /dev/null +++ b/src/zen/trace/trace_cmd.h @@ -0,0 +1,123 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include "zen.h" + +#include <filesystem> +#include <string> + +namespace zen { + +class TraceAnalyzeSubCmd : public ZenSubCmdBase +{ +public: + TraceAnalyzeSubCmd(); + void Run(const ZenCliOptions& GlobalOptions) override; + +private: + std::filesystem::path m_TraceFilePath; + int m_LiveAllocs = 0; + int m_Churn = 0; + int m_ChurnMin = 1000; + std::string m_Symbols; + std::string m_CallstackSkip; + bool m_NoCache = false; + bool m_NoCallstackHeuristic = false; + std::filesystem::path m_HtmlReportPath; +}; + +class TraceInspectSubCmd : public ZenSubCmdBase +{ +public: + TraceInspectSubCmd(); + void Run(const ZenCliOptions& GlobalOptions) override; + +private: + std::filesystem::path m_TraceFilePath; +}; + +class TraceServeSubCmd : public ZenSubCmdBase +{ +public: + TraceServeSubCmd(); + void Run(const ZenCliOptions& GlobalOptions) override; + +private: + std::filesystem::path m_TraceFilePath; + std::string m_Symbols; + int m_Port = 0; + std::string m_Bind = "127.0.0.1"; + bool m_NoBrowser = false; +}; + +class TraceTrimSubCmd : public ZenSubCmdBase +{ +public: + TraceTrimSubCmd(); + void Run(const ZenCliOptions& GlobalOptions) override; + +private: + std::filesystem::path m_TraceFilePath; + std::filesystem::path m_OutputPath; + double m_StartSec = 0.0; + double m_EndSec = 0.0; +}; + +class TraceStartSubCmd : public ZenSubCmdBase +{ +public: + TraceStartSubCmd(); + void Run(const ZenCliOptions& GlobalOptions) override; + +private: + std::string m_HostName; + std::string m_TraceHost; + std::string m_TraceFile; +}; + +class TraceStopSubCmd : public ZenSubCmdBase +{ +public: + TraceStopSubCmd(); + void Run(const ZenCliOptions& GlobalOptions) override; + +private: + std::string m_HostName; +}; + +class TraceStatusSubCmd : public ZenSubCmdBase +{ +public: + TraceStatusSubCmd(); + void Run(const ZenCliOptions& GlobalOptions) override; + +private: + std::string m_HostName; +}; + +class TraceCommand : public ZenCmdWithSubCommands +{ +public: + static constexpr char Name[] = "trace"; + static constexpr char Description[] = "Control zen realtime tracing and work with .utrace files"; + + TraceCommand(); + ~TraceCommand(); + + cxxopts::Options& Options() override { return m_Options; } + +private: + cxxopts::Options m_Options{Name, Description}; + std::string m_SubCommand; + + TraceAnalyzeSubCmd m_AnalyzeSubCmd; + TraceInspectSubCmd m_InspectSubCmd; + TraceServeSubCmd m_ServeSubCmd; + TraceTrimSubCmd m_TrimSubCmd; + TraceStartSubCmd m_StartSubCmd; + TraceStopSubCmd m_StopSubCmd; + TraceStatusSubCmd m_StatusSubCmd; +}; + +} // namespace zen diff --git a/src/zen/trace/trace_memory.cpp b/src/zen/trace/trace_memory.cpp new file mode 100644 index 000000000..704b8bcde --- /dev/null +++ b/src/zen/trace/trace_memory.cpp @@ -0,0 +1,901 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "trace_memory.h" + +#include "trace_model.h" + +#include <zencore/fmtutils.h> +#include <zencore/logging.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <EASTL/sort.h> +#include <analysis/analyzer.h> +ZEN_THIRD_PARTY_INCLUDES_END + +using namespace zen::trace_detail; + +////////////////////////////////////////////////////////////////////////////// +// Event outlines for Memory.* trace events +// +// Field names and types match the UE wire format exactly. +// See Engine/Source/Runtime/Core/Private/ProfilingDebugging/MemoryTrace.cpp. + +// clang-format off +begin_outline(Memory, Init) + field(uint8, Version) + field(uint32, MarkerPeriod) + field(uint8, MinAlignment) + field(uint8, SizeShift) + field(uint64, PageSize) +end_outline() + +begin_outline(Memory, Marker) + field(uint64, Cycle) +end_outline() + +begin_outline(Memory, Alloc) + field(uint64, Address) + field(uint32, CallstackId) + field(uint32, Size) + field(uint8, AlignmentPow2_SizeLower) + field(uint8, RootHeap) +end_outline() + +begin_outline(Memory, AllocSystem) + field(uint64, Address) + field(uint32, CallstackId) + field(uint32, Size) + field(uint8, AlignmentPow2_SizeLower) +end_outline() + +begin_outline(Memory, AllocVideo) + field(uint64, Address) + field(uint32, CallstackId) + field(uint32, Size) + field(uint8, AlignmentPow2_SizeLower) +end_outline() + +begin_outline(Memory, Free) + field(uint64, Address) + field(uint32, CallstackId) + field(uint8, RootHeap) +end_outline() + +begin_outline(Memory, FreeSystem) + field(uint64, Address) + field(uint32, CallstackId) +end_outline() + +begin_outline(Memory, FreeVideo) + field(uint64, Address) + field(uint32, CallstackId) +end_outline() + +begin_outline(Memory, ReallocAlloc) + field(uint64, Address) + field(uint32, CallstackId) + field(uint32, Size) + field(uint8, AlignmentPow2_SizeLower) + field(uint8, RootHeap) +end_outline() + +begin_outline(Memory, ReallocAllocSystem) + field(uint64, Address) + field(uint32, CallstackId) + field(uint32, Size) + field(uint8, AlignmentPow2_SizeLower) +end_outline() + +begin_outline(Memory, ReallocFree) + field(uint64, Address) + field(uint32, CallstackId) + field(uint8, RootHeap) +end_outline() + +begin_outline(Memory, ReallocFreeSystem) + field(uint64, Address) + field(uint32, CallstackId) +end_outline() + +begin_outline(Memory, HeapSpec) + field(uint32, Id) + field(uint32, ParentId) + field(uint16, Flags) + field(FieldStr, Name) +end_outline() + +begin_outline(Memory, HeapMarkAlloc) + field(uint64, Address) + field(uint32, CallstackId) + field(uint16, Flags) + field(uint32, Heap) +end_outline() + +begin_outline(Memory, HeapUnmarkAlloc) + field(uint64, Address) + field(uint32, CallstackId) + field(uint32, Heap) +end_outline() + +begin_outline(Memory, TagSpec) + field(int32, Tag) + field(int32, Parent) + field(FieldStr, Display) +end_outline() + +begin_outline(Memory, CallstackSpec) + field(uint32, CallstackId) + field(uint64[], Frames) +end_outline() + +begin_outline(Memory, CallstackSpecDeltaVarInt) + field(uint32, CallstackId) + field(uint8[], CompressedFrames) +end_outline() + +begin_outline(Memory, CallstackSpecDelta7bit) + field(uint32, CallstackId) + field(uint8[], CompressedFrames) +end_outline() + +begin_outline(Memory, CallstackSpecXORAndRLE) + field(uint32, CallstackId) + field(uint8[], CompressedFrames) +end_outline() + // clang-format on + + ////////////////////////////////////////////////////////////////////////////// + // Callstack decompression helpers + + namespace +{ + inline int64_t ZigZagDecode(uint64_t Encoded) { return int64_t(Encoded >> 1) ^ -int64_t(Encoded & 1); } + + // UE VarInt: leading 1-bits in the first byte indicate total byte count. + // 0xxxxxxx = 1 byte (7 value bits) + // 10xxxxxx = 2 bytes (14 value bits) + // 110xxxxx = 3 bytes (21 value bits) ...up to 9 bytes. + // Remaining bytes are big-endian value continuation. + eastl::vector<uint64_t> DecodeDeltaVarInt(const uint8_t* Data, uint32_t Size) + { + eastl::vector<uint64_t> Frames; + uint64_t Prev = 0; + const uint8_t* Cur = Data; + const uint8_t* End = Data + Size; + + while (Cur < End) + { + uint8_t First = *Cur; + uint32_t ByteCount = 1; + uint8_t Mask = 0x80; + while ((First & Mask) && ByteCount < 9) + { + ByteCount++; + Mask >>= 1; + } + + if (Cur + ByteCount > End) + { + break; + } + + uint64_t Raw = 0; + if (ByteCount == 9) + { + // First byte is 0xFF; next 8 bytes are the raw value. + for (uint32_t I = 1; I <= 8; I++) + { + Raw = (Raw << 8) | Cur[I]; + } + } + else + { + // First byte contributes value bits after stripping the length prefix. + uint8_t ValueMask = uint8_t((1u << (8 - ByteCount)) - 1); + Raw = First & ValueMask; + for (uint32_t I = 1; I < ByteCount; I++) + { + Raw = (Raw << 8) | Cur[I]; + } + } + Cur += ByteCount; + + int64_t Delta = ZigZagDecode(Raw); + Prev = uint64_t(int64_t(Prev) + Delta); + Frames.push_back(Prev); + } + + return Frames; + } + + // 7-bit continuation encoding: bit 7 = more bytes, bits 0-6 = value (little-endian). + eastl::vector<uint64_t> DecodeDelta7bit(const uint8_t* Data, uint32_t Size) + { + eastl::vector<uint64_t> Frames; + uint64_t Prev = 0; + const uint8_t* Cur = Data; + const uint8_t* End = Data + Size; + + while (Cur < End) + { + uint64_t Raw = 0; + uint32_t Shift = 0; + for (;;) + { + if (Cur >= End) + { + break; + } + uint8_t Byte = *Cur++; + Raw |= uint64_t(Byte & 0x7F) << Shift; + Shift += 7; + if ((Byte & 0x80) == 0) + { + break; + } + } + + int64_t Delta = ZigZagDecode(Raw); + Prev = uint64_t(int64_t(Prev) + Delta); + Frames.push_back(Prev); + } + + return Frames; + } + + // XOR + RLE: first byte = leading zero bit count in (frame XOR prev). + // Remaining ceil((64 - zeros) / 8) bytes are the non-zero suffix, little-endian. + eastl::vector<uint64_t> DecodeXORAndRLE(const uint8_t* Data, uint32_t Size) + { + eastl::vector<uint64_t> Frames; + uint64_t Prev = 0; + const uint8_t* Cur = Data; + const uint8_t* End = Data + Size; + + while (Cur < End) + { + uint8_t LeadingZeros = *Cur++; + if (LeadingZeros >= 64) + { + Frames.push_back(Prev); + continue; + } + + uint32_t ValueBits = 64 - LeadingZeros; + uint32_t ValueBytes = (ValueBits + 7) / 8; + + if (Cur + ValueBytes > End) + { + break; + } + + uint64_t XorVal = 0; + for (uint32_t I = 0; I < ValueBytes; I++) + { + XorVal |= uint64_t(Cur[I]) << (I * 8); + } + Cur += ValueBytes; + + Prev ^= XorVal; + Frames.push_back(Prev); + } + + return Frames; + } + +} // anonymous namespace + +////////////////////////////////////////////////////////////////////////////// +// AllocationAnalyzer implementation + +AllocationAnalyzer::AllocationAnalyzer(const TraceTiming* Timing) : m_Timing(Timing) +{ +} + +void +AllocationAnalyzer::subscribe(Vector<Subscription>& Subs) +{ + Subs.emplace_back(this, &AllocationAnalyzer::OnInit); + Subs.emplace_back(this, &AllocationAnalyzer::OnMarker); + Subs.emplace_back(this, &AllocationAnalyzer::OnAlloc); + Subs.emplace_back(this, &AllocationAnalyzer::OnAllocSystem); + Subs.emplace_back(this, &AllocationAnalyzer::OnAllocVideo); + Subs.emplace_back(this, &AllocationAnalyzer::OnFree); + Subs.emplace_back(this, &AllocationAnalyzer::OnFreeSystem); + Subs.emplace_back(this, &AllocationAnalyzer::OnFreeVideo); + Subs.emplace_back(this, &AllocationAnalyzer::OnReallocAlloc); + Subs.emplace_back(this, &AllocationAnalyzer::OnReallocAllocSystem); + Subs.emplace_back(this, &AllocationAnalyzer::OnReallocFree); + Subs.emplace_back(this, &AllocationAnalyzer::OnReallocFreeSystem); + Subs.emplace_back(this, &AllocationAnalyzer::OnHeapSpec); + Subs.emplace_back(this, &AllocationAnalyzer::OnHeapMarkAlloc); + Subs.emplace_back(this, &AllocationAnalyzer::OnHeapUnmarkAlloc); + Subs.emplace_back(this, &AllocationAnalyzer::OnTagSpec); +} + +////////////////////////////////////////////////////////////////////////////// +// Internal helpers + +uint64_t +AllocationAnalyzer::DecodeAllocSize(uint32_t RawSize, uint8_t AlignSizeLower) const +{ + uint32_t Shift = m_SizeShift; + uint32_t LowMask = (1u << Shift) - 1; + return (uint64_t(RawSize) << Shift) | (AlignSizeLower & LowMask); +} + +void +AllocationAnalyzer::HandleAlloc(uint64_t Address, uint64_t Size, uint8_t RootHeap, uint32_t CallstackId, uint32_t ThreadId, bool IsRealloc) +{ + // If address is already tracked (shouldn't normally happen), treat as + // implicit free of the old allocation so the counters stay consistent. + auto It = m_LiveAllocs.find(Address); + if (It != m_LiveAllocs.end()) + { + // Heap-marked allocs were already subtracted from totals in OnHeapMarkAlloc. + if (!It->second.IsHeap) + { + int64_t OldSize = int64_t(It->second.Size); + uint8_t OldHeap = It->second.RootHeap; + m_CurrentBytes -= OldSize; + if (OldHeap == 0) + { + m_SystemBytes -= OldSize; + } + else if (OldHeap == 1) + { + m_VideoBytes -= OldSize; + } + auto HIt = m_RootHeapStats.find(OldHeap); + if (HIt != m_RootHeapStats.end()) + { + HIt->second.CurrentBytes -= OldSize; + HIt->second.FreeCount++; + } + } + It->second = LiveAlloc{Size, CallstackId, ThreadId, m_AllocEventSeq, RootHeap, false}; + } + else + { + m_LiveAllocs.insert({Address, LiveAlloc{Size, CallstackId, ThreadId, m_AllocEventSeq, RootHeap, false}}); + } + + int64_t SignedSize = int64_t(Size); + m_CurrentBytes += SignedSize; + if (RootHeap == 0) + { + m_SystemBytes += SignedSize; + } + else if (RootHeap == 1) + { + m_VideoBytes += SignedSize; + } + + // Update per-root-heap stats + HeapStat& HStat = m_RootHeapStats[RootHeap]; + HStat.HeapId = RootHeap; + HStat.CurrentBytes += SignedSize; + HStat.AllocCount++; + if (HStat.CurrentBytes > HStat.PeakBytes) + { + HStat.PeakBytes = HStat.CurrentBytes; + } + + // Track global peak + if (m_CurrentBytes > m_PeakBytes) + { + m_PeakBytes = m_CurrentBytes; + m_PeakTimeUs = m_LastMarkerTimeUs; + } + + if (IsRealloc) + { + m_TotalReallocAllocs++; + } + else + { + m_TotalAllocs++; + } + + // Churn tracking + m_AllocEventSeq++; + if (CallstackId != 0) + { + ChurnAccum& Churn = m_ChurnByCallstack[CallstackId]; + Churn.TotalAllocs++; + Churn.TotalBytes += Size; + } + + // Size histogram: bucket 0 captures zero-size allocs, bucket i (i>=1) + // captures sizes in [2^(i-1)+1, 2^i]. Use ceil(log2) so power-of-two + // sizes land on their own bucket (e.g. 16 -> bucket 4 = (8, 16]). + size_t BucketIndex = 0; + if (Size > 0) + { + uint64_t Shifted = Size - 1; + while (Shifted > 0 && BucketIndex < kSizeHistogramBuckets - 1) + { + Shifted >>= 1; + ++BucketIndex; + } + } + m_SizeHistogram[BucketIndex].Count++; + m_SizeHistogram[BucketIndex].Bytes += Size; +} + +void +AllocationAnalyzer::HandleFree(uint64_t Address, uint8_t /*RootHeap*/, uint32_t /*CallstackId*/, bool IsRealloc) +{ + auto It = m_LiveAllocs.find(Address); + if (It == m_LiveAllocs.end()) + { + // Allocation happened before the trace started -- nothing to subtract. + if (IsRealloc) + { + m_TotalReallocFrees++; + } + else + { + m_TotalFrees++; + } + return; + } + + int64_t Size = int64_t(It->second.Size); + uint8_t AllocHeap = It->second.RootHeap; + bool WasHeap = It->second.IsHeap; + uint32_t AllocCsId = It->second.CallstackId; + uint64_t AllocEventSeq = It->second.EventSeq; + + // Heap-marked allocs were already subtracted from totals in OnHeapMarkAlloc. + if (!WasHeap) + { + m_CurrentBytes -= Size; + if (AllocHeap == 0) + { + m_SystemBytes -= Size; + } + else if (AllocHeap == 1) + { + m_VideoBytes -= Size; + } + + auto HIt = m_RootHeapStats.find(AllocHeap); + if (HIt != m_RootHeapStats.end()) + { + HIt->second.CurrentBytes -= Size; + HIt->second.FreeCount++; + } + } + + m_LiveAllocs.erase(It); + + // Churn tracking: record event distance for this alloc→free pair. + // Short distances indicate short-lived (churny) allocations. + if (AllocCsId != 0) + { + uint64_t Distance = m_AllocEventSeq - AllocEventSeq; + auto ChurnIt = m_ChurnByCallstack.find(AllocCsId); + if (ChurnIt != m_ChurnByCallstack.end()) + { + ChurnIt->second.ChurnDistanceSum += Distance; + ChurnIt->second.ChurnAllocs++; + ChurnIt->second.ChurnBytes += uint64_t(Size); + } + } + + if (IsRealloc) + { + m_TotalReallocFrees++; + } + else + { + m_TotalFrees++; + } +} + +void +AllocationAnalyzer::MaybeEmitSample(uint32_t TimeUs) +{ + if (TimeUs < m_LastSampleTimeUs + kTimelineSampleIntervalUs) + { + return; + } + m_LastSampleTimeUs = TimeUs; + m_Timeline.push_back(MemoryTimelineSample{ + .TimeUs = TimeUs, + .TotalAllocatedBytes = m_CurrentBytes, + .SystemBytes = m_SystemBytes, + .VideoBytes = m_VideoBytes, + }); +} + +////////////////////////////////////////////////////////////////////////////// +// Event handlers + +void +AllocationAnalyzer::OnInit(const ::Memory_Init& Ev) +{ + m_SizeShift = Ev.SizeShift(); + m_Initialized = true; + ZEN_DEBUG("Memory trace init: version={}, sizeShift={}, minAlignment={}, markerPeriod={}, pageSize={}", + Ev.Version(), + m_SizeShift, + Ev.MinAlignment(), + Ev.MarkerPeriod(), + Ev.PageSize()); +} + +void +AllocationAnalyzer::OnMarker(const ::Memory_Marker& Ev) +{ + if (!m_Timing || m_Timing->Freq == 0) + { + return; + } + uint32_t TimeUs = m_Timing->CycleToTimeUs(Ev.Cycle()); + m_LastMarkerTimeUs = TimeUs; + m_HasReceivedMarker = true; + MaybeEmitSample(TimeUs); +} + +void +AllocationAnalyzer::OnAlloc(const ::Memory_Alloc& Ev) +{ + uint64_t Size = DecodeAllocSize(Ev.Size(), Ev.AlignmentPow2_SizeLower()); + HandleAlloc(Ev.Address(), Size, Ev.RootHeap(), Ev.CallstackId(), Ev.get_thread_id(), /*IsRealloc=*/false); +} + +void +AllocationAnalyzer::OnAllocSystem(const ::Memory_AllocSystem& Ev) +{ + uint64_t Size = DecodeAllocSize(Ev.Size(), Ev.AlignmentPow2_SizeLower()); + HandleAlloc(Ev.Address(), Size, /*RootHeap=*/0, Ev.CallstackId(), Ev.get_thread_id(), /*IsRealloc=*/false); +} + +void +AllocationAnalyzer::OnAllocVideo(const ::Memory_AllocVideo& Ev) +{ + uint64_t Size = DecodeAllocSize(Ev.Size(), Ev.AlignmentPow2_SizeLower()); + HandleAlloc(Ev.Address(), Size, /*RootHeap=*/1, Ev.CallstackId(), Ev.get_thread_id(), /*IsRealloc=*/false); +} + +void +AllocationAnalyzer::OnFree(const ::Memory_Free& Ev) +{ + HandleFree(Ev.Address(), Ev.RootHeap(), Ev.CallstackId(), /*IsRealloc=*/false); +} + +void +AllocationAnalyzer::OnFreeSystem(const ::Memory_FreeSystem& Ev) +{ + HandleFree(Ev.Address(), /*RootHeap=*/0, Ev.CallstackId(), /*IsRealloc=*/false); +} + +void +AllocationAnalyzer::OnFreeVideo(const ::Memory_FreeVideo& Ev) +{ + HandleFree(Ev.Address(), /*RootHeap=*/1, Ev.CallstackId(), /*IsRealloc=*/false); +} + +void +AllocationAnalyzer::OnReallocAlloc(const ::Memory_ReallocAlloc& Ev) +{ + uint64_t Size = DecodeAllocSize(Ev.Size(), Ev.AlignmentPow2_SizeLower()); + HandleAlloc(Ev.Address(), Size, Ev.RootHeap(), Ev.CallstackId(), Ev.get_thread_id(), /*IsRealloc=*/true); +} + +void +AllocationAnalyzer::OnReallocAllocSystem(const ::Memory_ReallocAllocSystem& Ev) +{ + uint64_t Size = DecodeAllocSize(Ev.Size(), Ev.AlignmentPow2_SizeLower()); + HandleAlloc(Ev.Address(), Size, /*RootHeap=*/0, Ev.CallstackId(), Ev.get_thread_id(), /*IsRealloc=*/true); +} + +void +AllocationAnalyzer::OnReallocFree(const ::Memory_ReallocFree& Ev) +{ + HandleFree(Ev.Address(), Ev.RootHeap(), Ev.CallstackId(), /*IsRealloc=*/true); +} + +void +AllocationAnalyzer::OnReallocFreeSystem(const ::Memory_ReallocFreeSystem& Ev) +{ + HandleFree(Ev.Address(), /*RootHeap=*/0, Ev.CallstackId(), /*IsRealloc=*/true); +} + +void +AllocationAnalyzer::OnHeapSpec(const ::Memory_HeapSpec& Ev) +{ + uint32_t Id = Ev.Id(); + HeapInfo& Info = m_Heaps[Id]; + Info.Id = Id; + Info.ParentId = Ev.ParentId(); + Info.Flags = Ev.Flags(); + Info.Name = SafeFieldStr(Ev.Name()); +} + +void +AllocationAnalyzer::OnHeapMarkAlloc(const ::Memory_HeapMarkAlloc& Ev) +{ + uint64_t Address = Ev.Address(); + auto It = m_LiveAllocs.find(Address); + if (It == m_LiveAllocs.end()) + { + return; + } + + LiveAlloc& Alloc = It->second; + if (Alloc.IsHeap) + { + return; // already marked + } + + Alloc.IsHeap = true; + + // Remove this allocation from the running totals — heap-marked + // allocations represent address-space reservations (e.g. module images) + // and should not count towards the regular memory budget. + int64_t SignedSize = int64_t(Alloc.Size); + m_CurrentBytes -= SignedSize; + if (Alloc.RootHeap == 0) + { + m_SystemBytes -= SignedSize; + } + else if (Alloc.RootHeap == 1) + { + m_VideoBytes -= SignedSize; + } + auto HIt = m_RootHeapStats.find(Alloc.RootHeap); + if (HIt != m_RootHeapStats.end()) + { + HIt->second.CurrentBytes -= SignedSize; + } +} + +void +AllocationAnalyzer::OnHeapUnmarkAlloc(const ::Memory_HeapUnmarkAlloc& Ev) +{ + uint64_t Address = Ev.Address(); + auto It = m_LiveAllocs.find(Address); + if (It == m_LiveAllocs.end()) + { + return; + } + + LiveAlloc& Alloc = It->second; + if (!Alloc.IsHeap) + { + return; // not marked + } + + Alloc.IsHeap = false; + + // Add back to running totals. + int64_t SignedSize = int64_t(Alloc.Size); + m_CurrentBytes += SignedSize; + if (Alloc.RootHeap == 0) + { + m_SystemBytes += SignedSize; + } + else if (Alloc.RootHeap == 1) + { + m_VideoBytes += SignedSize; + } + auto HIt = m_RootHeapStats.find(Alloc.RootHeap); + if (HIt != m_RootHeapStats.end()) + { + HIt->second.CurrentBytes += SignedSize; + } +} + +void +AllocationAnalyzer::OnTagSpec(const ::Memory_TagSpec& Ev) +{ + int32_t Tag = Ev.Tag(); + TagInfo& Info = m_Tags[Tag]; + Info.Tag = Tag; + Info.Parent = Ev.Parent(); + Info.Display = SafeFieldStr(Ev.Display()); +} + +////////////////////////////////////////////////////////////////////////////// +// Public accessors + +AllocationSummary +AllocationAnalyzer::Summary() const +{ + AllocationSummary S; + S.HasMemoryData = m_Initialized || m_TotalAllocs > 0; + S.TotalAllocs = m_TotalAllocs; + S.TotalFrees = m_TotalFrees; + S.TotalReallocAllocs = m_TotalReallocAllocs; + S.TotalReallocFrees = m_TotalReallocFrees; + S.PeakBytes = m_PeakBytes; + S.PeakTimeUs = m_PeakTimeUs; + S.EndBytes = m_CurrentBytes; + + uint32_t LiveCount = 0; + for (const auto& [Addr, Alloc] : m_LiveAllocs) + { + if (!Alloc.IsHeap) + { + ++LiveCount; + } + } + S.LiveAllocations = LiveCount; + return S; +} + +void +AllocationAnalyzer::EmitFinalSample(uint32_t TraceEndUs) +{ + if (!m_Initialized) + { + return; + } + // Force-emit a final sample at the trace end so the timeline captures + // the terminal memory state even if no Marker arrived recently. + uint32_t FinalTimeUs = m_HasReceivedMarker ? std::max(m_LastMarkerTimeUs, TraceEndUs) : TraceEndUs; + m_Timeline.push_back(MemoryTimelineSample{ + .TimeUs = FinalTimeUs, + .TotalAllocatedBytes = m_CurrentBytes, + .SystemBytes = m_SystemBytes, + .VideoBytes = m_VideoBytes, + }); +} + +eastl::vector<CallstackAllocStat> +AllocationAnalyzer::BuildCallstackStats() const +{ + eastl::hash_map<uint32_t, CallstackAllocStat> Map; + for (const auto& [Addr, Alloc] : m_LiveAllocs) + { + if (Alloc.CallstackId == 0 || Alloc.IsHeap) + { + continue; + } + CallstackAllocStat& S = Map[Alloc.CallstackId]; + S.CallstackId = Alloc.CallstackId; + S.LiveBytes += int64_t(Alloc.Size); + S.LiveCount++; + if (eastl::find(S.ThreadIds.begin(), S.ThreadIds.end(), Alloc.ThreadId) == S.ThreadIds.end()) + { + S.ThreadIds.push_back(Alloc.ThreadId); + } + } + + eastl::vector<CallstackAllocStat> Result; + Result.reserve(Map.size()); + for (auto& [Id, Stat] : Map) + { + Result.push_back(Stat); + } + eastl::sort(Result.begin(), Result.end(), [](const CallstackAllocStat& A, const CallstackAllocStat& B) { + return A.LiveBytes > B.LiveBytes; + }); + return Result; +} + +eastl::vector<CallstackChurnStat> +AllocationAnalyzer::BuildChurnStats(uint64_t ChurnDistanceThreshold) const +{ + // The ChurnAccum already separates total allocs from churny allocs. + // ChurnAllocs/ChurnBytes count every freed allocation (regardless of + // distance). We now need to re-bucket using the threshold. But since + // we only stored the sum of distances (not per-alloc distances), we + // use the average: if MeanDistance <= threshold, all freed allocs from + // that callstack are considered churny. This is an approximation — + // a per-alloc histogram would be more precise but much more expensive. + eastl::vector<CallstackChurnStat> Result; + Result.reserve(m_ChurnByCallstack.size()); + for (const auto& [Id, Churn] : m_ChurnByCallstack) + { + if (Churn.ChurnAllocs == 0) + { + continue; + } + double MeanDist = double(Churn.ChurnDistanceSum) / double(Churn.ChurnAllocs); + if (MeanDist > double(ChurnDistanceThreshold)) + { + continue; + } + CallstackChurnStat S; + S.CallstackId = Id; + S.ChurnAllocs = Churn.ChurnAllocs; + S.ChurnBytes = Churn.ChurnBytes; + S.TotalAllocs = Churn.TotalAllocs; + S.TotalBytes = Churn.TotalBytes; + S.MeanDistance = MeanDist; + Result.push_back(S); + } + eastl::sort(Result.begin(), Result.end(), [](const CallstackChurnStat& A, const CallstackChurnStat& B) { + return A.ChurnAllocs > B.ChurnAllocs; + }); + return Result; +} + +eastl::vector<AllocSizeBucket> +AllocationAnalyzer::BuildSizeHistogram() const +{ + eastl::vector<AllocSizeBucket> Result; + Result.reserve(kSizeHistogramBuckets); + for (size_t I = 0; I < kSizeHistogramBuckets; ++I) + { + const SizeBucketAccum& Accum = m_SizeHistogram[I]; + if (Accum.Count == 0) + { + continue; + } + AllocSizeBucket Bucket; + if (I == 0) + { + Bucket.MinSize = 0; + Bucket.MaxSize = 0; + } + else + { + // Bucket i covers (2^(i-1), 2^i]; bucket 1 is just size 1. + Bucket.MinSize = (I == 1) ? 1 : ((uint64_t(1) << (I - 1)) + 1); + Bucket.MaxSize = (I >= 64) ? ~uint64_t(0) : (uint64_t(1) << I); + } + Bucket.Count = Accum.Count; + Bucket.Bytes = Accum.Bytes; + Result.push_back(Bucket); + } + return Result; +} + +////////////////////////////////////////////////////////////////////////////// +// CallstackAnalyzer implementation + +void +CallstackAnalyzer::subscribe(Vector<Subscription>& Subs) +{ + Subs.emplace_back(this, &CallstackAnalyzer::OnCallstackSpec); + Subs.emplace_back(this, &CallstackAnalyzer::OnCallstackSpecDeltaVarInt); + Subs.emplace_back(this, &CallstackAnalyzer::OnCallstackSpecDelta7bit); + Subs.emplace_back(this, &CallstackAnalyzer::OnCallstackSpecXORAndRLE); +} + +void +CallstackAnalyzer::StoreCallstack(uint32_t Id, const uint64_t* Frames, size_t Count) +{ + if (Id == 0 || Count == 0) + { + return; + } + auto& Entry = m_Callstacks[Id]; + Entry.assign(Frames, Frames + Count); +} + +void +CallstackAnalyzer::OnCallstackSpec(const ::Memory_CallstackSpec& Ev) +{ + Array<uint64[]> Frames = Ev.Frames(); + StoreCallstack(Ev.CallstackId(), Frames.get(), Frames.get_count()); +} + +void +CallstackAnalyzer::OnCallstackSpecDeltaVarInt(const ::Memory_CallstackSpecDeltaVarInt& Ev) +{ + Array<uint8[]> Compressed = Ev.CompressedFrames(); + eastl::vector<uint64_t> Frames = DecodeDeltaVarInt(Compressed.get(), Compressed.get_size()); + StoreCallstack(Ev.CallstackId(), Frames.data(), Frames.size()); +} + +void +CallstackAnalyzer::OnCallstackSpecDelta7bit(const ::Memory_CallstackSpecDelta7bit& Ev) +{ + Array<uint8[]> Compressed = Ev.CompressedFrames(); + eastl::vector<uint64_t> Frames = DecodeDelta7bit(Compressed.get(), Compressed.get_size()); + StoreCallstack(Ev.CallstackId(), Frames.data(), Frames.size()); +} + +void +CallstackAnalyzer::OnCallstackSpecXORAndRLE(const ::Memory_CallstackSpecXORAndRLE& Ev) +{ + Array<uint8[]> Compressed = Ev.CompressedFrames(); + eastl::vector<uint64_t> Frames = DecodeXORAndRLE(Compressed.get(), Compressed.get_size()); + StoreCallstack(Ev.CallstackId(), Frames.data(), Frames.size()); +} diff --git a/src/zen/trace/trace_memory.h b/src/zen/trace/trace_memory.h new file mode 100644 index 000000000..da33d8218 --- /dev/null +++ b/src/zen/trace/trace_memory.h @@ -0,0 +1,301 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/zencore.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <EASTL/fixed_vector.h> +#include <EASTL/hash_map.h> +#include <EASTL/vector.h> +#include <analysis/analyzer.h> +ZEN_THIRD_PARTY_INCLUDES_END + +#include <cstdint> +#include <string> + +// Forward declarations of outline types (defined in trace_memory.cpp). +// These are global-scope structs created by the begin_outline() macro. +struct Memory_Init; +struct Memory_Marker; +struct Memory_Alloc; +struct Memory_AllocSystem; +struct Memory_AllocVideo; +struct Memory_Free; +struct Memory_FreeSystem; +struct Memory_FreeVideo; +struct Memory_ReallocAlloc; +struct Memory_ReallocAllocSystem; +struct Memory_ReallocFree; +struct Memory_ReallocFreeSystem; +struct Memory_HeapSpec; +struct Memory_HeapMarkAlloc; +struct Memory_HeapUnmarkAlloc; +struct Memory_TagSpec; +struct Memory_CallstackSpec; +struct Memory_CallstackSpecDeltaVarInt; +struct Memory_CallstackSpecDelta7bit; +struct Memory_CallstackSpecXORAndRLE; + +namespace zen::trace_detail { + +struct TraceTiming; + +// -- Allocation data structures -------------------------------------------- + +struct HeapInfo +{ + uint32_t Id = 0; + uint32_t ParentId = ~0u; + uint16_t Flags = 0; // EMemoryTraceHeapFlags bits + std::string Name; +}; + +struct TagInfo +{ + int32_t Tag = 0; + int32_t Parent = 0; + std::string Display; +}; + +struct MemoryTimelineSample +{ + uint32_t TimeUs; + int64_t TotalAllocatedBytes; + int64_t SystemBytes; + int64_t VideoBytes; +}; + +struct HeapStat +{ + uint32_t HeapId = 0; + int64_t CurrentBytes = 0; + int64_t PeakBytes = 0; + uint64_t AllocCount = 0; + uint64_t FreeCount = 0; +}; + +struct AllocationSummary +{ + bool HasMemoryData = false; + uint64_t TotalAllocs = 0; + uint64_t TotalFrees = 0; + uint64_t TotalReallocAllocs = 0; + uint64_t TotalReallocFrees = 0; + int64_t PeakBytes = 0; + uint32_t PeakTimeUs = 0; + int64_t EndBytes = 0; + uint32_t LiveAllocations = 0; +}; + +// One power-of-two bucket of the allocation size histogram. The bucket covers +// sizes in [MinSize, MaxSize] inclusive (MaxSize = MinSize*2 - 1, or 0 for the +// zero-size bucket). Count and Bytes aggregate every alloc/realloc-alloc seen +// during the trace (not just currently-live allocations). +struct AllocSizeBucket +{ + uint64_t MinSize = 0; + uint64_t MaxSize = 0; + uint64_t Count = 0; + uint64_t Bytes = 0; +}; + +// -- Callstack data structures --------------------------------------------- + +// A single resolved stack frame. ModuleIndex references TraceModel::Modules; +// ~0u means the frame did not map to any loaded module. +struct ResolvedFrame +{ + uint64_t Address = 0; + uint32_t ModuleIndex = ~0u; + uint64_t Offset = 0; +}; + +// A decoded callstack: the ordered list of instruction-pointer frames +// captured at the point of an allocation (or free). +struct CallstackEntry +{ + uint32_t Id = 0; + eastl::vector<ResolvedFrame> Frames; // outermost (caller) first +}; + +// Per-callstack allocation churn statistics. "Churn" is measured by how +// quickly an allocation is freed — specifically, the number of alloc events +// that occur between the alloc and its matching free (event distance). +struct CallstackChurnStat +{ + uint32_t CallstackId = 0; + uint64_t ChurnAllocs = 0; // allocations freed within the distance threshold + uint64_t ChurnBytes = 0; // cumulative bytes of those short-lived allocations + uint64_t TotalAllocs = 0; // all allocations from this callstack (for context) + uint64_t TotalBytes = 0; + double MeanDistance = 0.0; // average event distance for the churny allocs +}; + +// Per-callstack live allocation statistics. +struct CallstackAllocStat +{ + uint32_t CallstackId = 0; + int64_t LiveBytes = 0; + uint32_t LiveCount = 0; + eastl::fixed_vector<uint32_t, 4, true> ThreadIds; // unique thread IDs that contributed allocations +}; + +// -- AllocationAnalyzer ---------------------------------------------------- + +// Subscribes to Memory.* trace events and tracks aggregate allocation +// statistics, a memory-over-time timeline, heap specs, and tag specs. +// Intended to be instantiated by BuildTraceModel alongside the other +// analyzers and registered with the Dispatcher. +class AllocationAnalyzer : public Analyzer +{ +public: + explicit AllocationAnalyzer(const TraceTiming* Timing); + + void subscribe(Vector<Subscription>& Subs) override; + + // -- Accessors (call after IterateTrace completes) -- + + bool Initialized() const { return m_Initialized; } + AllocationSummary Summary() const; + void EmitFinalSample(uint32_t TraceEndUs); + + eastl::vector<MemoryTimelineSample>& MutableTimeline() { return m_Timeline; } + const eastl::hash_map<uint32_t, HeapInfo>& Heaps() const { return m_Heaps; } + const eastl::hash_map<int32_t, TagInfo>& Tags() const { return m_Tags; } + const eastl::hash_map<uint8_t, HeapStat>& RootHeapStats() const { return m_RootHeapStats; } + + // Build per-callstack statistics from the current live allocation set. + eastl::vector<CallstackAllocStat> BuildCallstackStats() const; + + // Build per-callstack churn statistics sorted by churn alloc count descending. + // ChurnDistanceThreshold: allocations freed within this many alloc-events are + // considered "short-lived" / churny. + eastl::vector<CallstackChurnStat> BuildChurnStats(uint64_t ChurnDistanceThreshold = 1000) const; + + // Build a size-bucketed histogram of all observed allocations. Returns + // only populated buckets, ordered by MinSize ascending. + eastl::vector<AllocSizeBucket> BuildSizeHistogram() const; + +private: + // -- Event handlers -- + + void OnInit(const ::Memory_Init& Ev); + void OnMarker(const ::Memory_Marker& Ev); + void OnAlloc(const ::Memory_Alloc& Ev); + void OnAllocSystem(const ::Memory_AllocSystem& Ev); + void OnAllocVideo(const ::Memory_AllocVideo& Ev); + void OnFree(const ::Memory_Free& Ev); + void OnFreeSystem(const ::Memory_FreeSystem& Ev); + void OnFreeVideo(const ::Memory_FreeVideo& Ev); + void OnReallocAlloc(const ::Memory_ReallocAlloc& Ev); + void OnReallocAllocSystem(const ::Memory_ReallocAllocSystem& Ev); + void OnReallocFree(const ::Memory_ReallocFree& Ev); + void OnReallocFreeSystem(const ::Memory_ReallocFreeSystem& Ev); + void OnHeapSpec(const ::Memory_HeapSpec& Ev); + void OnHeapMarkAlloc(const ::Memory_HeapMarkAlloc& Ev); + void OnHeapUnmarkAlloc(const ::Memory_HeapUnmarkAlloc& Ev); + void OnTagSpec(const ::Memory_TagSpec& Ev); + + // -- Internal helpers -- + + struct LiveAlloc + { + uint64_t Size; + uint32_t CallstackId; + uint32_t ThreadId; + uint64_t EventSeq; // alloc event sequence number for churn distance + uint8_t RootHeap; + bool IsHeap = false; // true after HeapMarkAlloc; excluded from totals + }; + + uint64_t DecodeAllocSize(uint32_t RawSize, uint8_t AlignSizeLower) const; + void HandleAlloc(uint64_t Address, uint64_t Size, uint8_t RootHeap, uint32_t CallstackId, uint32_t ThreadId, bool IsRealloc); + void HandleFree(uint64_t Address, uint8_t RootHeap, uint32_t CallstackId, bool IsRealloc); + void MaybeEmitSample(uint32_t TimeUs); + + // -- State -- + + static constexpr uint32_t kTimelineSampleIntervalUs = 10'000; // 10ms + + const TraceTiming* m_Timing = nullptr; + + // Init params + uint8_t m_SizeShift = 3; // overridden by Memory.Init if present; 3 matches zencore's default + bool m_Initialized = false; + + // Live allocation map (address -> size + root heap) + eastl::hash_map<uint64_t, LiveAlloc> m_LiveAllocs; + + // Running byte counters + int64_t m_CurrentBytes = 0; + int64_t m_SystemBytes = 0; + int64_t m_VideoBytes = 0; + int64_t m_PeakBytes = 0; + uint32_t m_PeakTimeUs = 0; + + // Event counters + uint64_t m_TotalAllocs = 0; + uint64_t m_TotalFrees = 0; + uint64_t m_TotalReallocAllocs = 0; + uint64_t m_TotalReallocFrees = 0; + + // Timeline sampling + eastl::vector<MemoryTimelineSample> m_Timeline; + uint32_t m_LastSampleTimeUs = 0; + uint32_t m_LastMarkerTimeUs = 0; + bool m_HasReceivedMarker = false; + + // Per-callstack churn counters: total allocs + short-lived alloc stats + struct ChurnAccum + { + uint64_t TotalAllocs = 0; + uint64_t TotalBytes = 0; + uint64_t ChurnAllocs = 0; // freed within the distance threshold + uint64_t ChurnBytes = 0; + uint64_t ChurnDistanceSum = 0; // sum of event distances for churny allocs + }; + eastl::hash_map<uint32_t, ChurnAccum> m_ChurnByCallstack; + uint64_t m_AllocEventSeq = 0; // monotonic alloc event counter + + // Allocation size histogram: bucket i covers sizes [2^(i-1)+1, 2^i], with + // bucket 0 reserved for zero-size allocations. 65 buckets covers up to 2^64. + static constexpr size_t kSizeHistogramBuckets = 65; + struct SizeBucketAccum + { + uint64_t Count = 0; + uint64_t Bytes = 0; + }; + SizeBucketAccum m_SizeHistogram[kSizeHistogramBuckets] = {}; + + // Metadata + eastl::hash_map<uint32_t, HeapInfo> m_Heaps; + eastl::hash_map<int32_t, TagInfo> m_Tags; + eastl::hash_map<uint8_t, HeapStat> m_RootHeapStats; +}; + +// -- CallstackAnalyzer ----------------------------------------------------- + +// Subscribes to Memory.CallstackSpec* trace events, decodes compressed +// frames, and stores a callstack ID -> frame addresses mapping. Frame +// addresses are raw instruction pointers; resolution to module+offset +// happens in BuildTraceModel post-processing. +class CallstackAnalyzer : public Analyzer +{ +public: + void subscribe(Vector<Subscription>& Subs) override; + + const eastl::hash_map<uint32_t, eastl::vector<uint64_t>>& RawCallstacks() const { return m_Callstacks; } + +private: + void OnCallstackSpec(const ::Memory_CallstackSpec& Ev); + void OnCallstackSpecDeltaVarInt(const ::Memory_CallstackSpecDeltaVarInt& Ev); + void OnCallstackSpecDelta7bit(const ::Memory_CallstackSpecDelta7bit& Ev); + void OnCallstackSpecXORAndRLE(const ::Memory_CallstackSpecXORAndRLE& Ev); + + void StoreCallstack(uint32_t Id, const uint64_t* Frames, size_t Count); + + eastl::hash_map<uint32_t, eastl::vector<uint64_t>> m_Callstacks; +}; + +} // namespace zen::trace_detail diff --git a/src/zen/trace/trace_model.cpp b/src/zen/trace/trace_model.cpp new file mode 100644 index 000000000..ac81161a1 --- /dev/null +++ b/src/zen/trace/trace_model.cpp @@ -0,0 +1,3898 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "trace_model.h" + +#include <zencore/basicfile.h> +#include <zencore/except_fmt.h> +#include <zencore/fmtutils.h> +#include <zencore/intmath.h> +#include <zencore/logging.h> +#include <zencore/logging/tracelog.h> +#include <zencore/scopeguard.h> +#include <zencore/string.h> +#include <zencore/thread.h> +#include <zencore/timer.h> +#include <zenutil/parallelsort.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <EASTL/hash_map.h> +#include <EASTL/map.h> +#include <EASTL/set.h> +#include <EASTL/sort.h> +#include <EASTL/vector.h> +#include <analysis/analyzer.h> +#include <analysis/dispatcher.h> +#include <fmt/format.h> +#include <trace/trace.h> +ZEN_THIRD_PARTY_INCLUDES_END + +#include <algorithm> +#include <cmath> +#include <cstring> + +using namespace std::literals; + +// Toggle to A/B test cross-platform parallel sort vs sequential eastl::sort. +constexpr bool kUseParallelSort = true; + +namespace eastl { + +template<> +struct hash<std::string> +{ + size_t operator()(const std::string& S) const { return eastl::hash<const char*>()(S.c_str()); } +}; + +} // namespace eastl + +////////////////////////////////////////////////////////////////////////////// +// Trace analysis types (global namespace alongside tourist types) + +namespace { + +using zen::ReciprocalU64; + +// Welford's online algorithm for computing mean and standard deviation +class Distribution +{ +public: + void add(double X) + { + m_Count++; + if (m_Count == 1) + { + m_OldM = m_NewM = X; + m_OldS = 0.0; + } + else + { + m_NewM = m_OldM + (X - m_OldM) / double(m_Count); + m_NewS = m_OldS + (X - m_OldM) * (X - m_NewM); + m_OldM = m_NewM; + m_OldS = m_NewS; + } + } + + uint32_t Count() const { return m_Count; } + double Mean() const { return (m_Count > 0) ? m_NewM : 0.0; } + double Variance() const { return (m_Count > 1) ? m_NewS / double(m_Count - 1) : 0.0; } + double StdDev() const { return std::sqrt(Variance()); } + +private: + double m_OldM = 0.0; + double m_NewM = 0.0; + double m_OldS = 0.0; + double m_NewS = 0.0; + uint32_t m_Count = 0; +}; + +class NameDepot +{ +public: + uint64 Add(StringView Name) + { + uint64 NameHash = Hash(Name); + Add(NameHash, Name); + return NameHash; + } + + void Add(uint64 NameHash, StringView Name) + { + if (auto It = m_Names.insert({NameHash, String()}); It.second) + { + It.first->second = Name; + } + } + + StringView Get(uint64 NameHash) const + { + auto Iter = m_Names.find(NameHash); + if (Iter == m_Names.end()) + { + return "???"; + } + return Iter->second; + } + +private: + eastl::hash_map<uint64, String> m_Names; +}; + +struct CpuEventStat +{ + Distribution Dist; + uint32_t Min = ~0u; + uint32_t Max = 0; +}; + +class EventStats +{ +public: + void Record(uint64 NameHash, uint32 DurationUs) + { + CpuEventStat& Stat = m_Stats[NameHash]; + Stat.Min = std::min(Stat.Min, DurationUs); + Stat.Max = std::max(Stat.Max, DurationUs); + Stat.Dist.add(double(DurationUs)); + } + + auto begin() const { return m_Stats.begin(); } + auto end() const { return m_Stats.end(); } + bool empty() const { return m_Stats.empty(); } + +private: + eastl::hash_map<uint64, CpuEventStat> m_Stats; +}; + +////////////////////////////////////////////////////////////////////////////// +// Event outlines + +// clang-format off +begin_outline($Trace, NewTrace) + field(uint64, CycleFrequency) + field(uint64, StartCycle) +end_outline() + +begin_outline(CpuProfiler, EventSpec) + field(uint32, Id) + field(FieldStr, Name) +end_outline() + +begin_outline(CpuProfiler, EventBatch) + field(uint32, ThreadId) + field(uint8[], Data) +end_outline() + +begin_outline(CpuProfiler, EventBatchV2) + field(uint8[], Data) +end_outline() + +begin_outline(CpuProfiler, EventBatchV3) + field(uint8[], Data) +end_outline() + +begin_outline(CpuProfiler, Metadata) + field(uint32, Id) + field(uint32, SpecId) + field(uint8[], Metadata) +end_outline() + +begin_outline($Trace, ThreadInfo) + field(FieldStr, Name) + field(int32, SortHint) + field(uint32, ThreadId) + field(uint32, SystemId) +end_outline() + +begin_outline($Trace, ThreadGroupBegin) + field(FieldStr, Name) +end_outline() + +begin_outline($Trace, ThreadGroupEnd) +end_outline() + +begin_outline(Diagnostics, Session2) + field(FieldStr, Platform) + field(FieldStr, AppName) + field(FieldStr, ProjectName) + field(FieldStr, CommandLine) + field(FieldStr, Branch) + field(FieldStr, BuildVersion) + field(uint32, Changelist) + field(uint8, ConfigurationType) + field(uint8, TargetType) +end_outline() + +begin_outline(Diagnostics, ModuleInit) + field(FieldStr, SymbolFormat) + field(uint8, ModuleBaseShift) +end_outline() + +begin_outline(Diagnostics, ModuleLoad) + field(FieldStr, Name) + field(uint64, Base) + field(uint32, Size) + field(uint8[], ImageId) +end_outline() + +begin_outline(Diagnostics, ModuleUnload) + field(uint64, Base) +end_outline() + +begin_outline(Trace, ChannelAnnounce) + field(uint32, Id) + field(uint8, IsEnabled) + field(uint8, ReadOnly) + field(FieldStr, Name) +end_outline() + +begin_outline(Trace, ChannelToggle) + field(uint32, Id) + field(uint8, IsEnabled) +end_outline() + +begin_outline(Logging, LogCategory) + field(uint64, CategoryPointer) + field(uint8, DefaultVerbosity) + field(FieldStr, Name) +end_outline() + +begin_outline(Logging, LogMessageSpec) + field(uint64, LogPoint) + field(uint64, CategoryPointer) + field(int32, Line) + field(uint8, Verbosity) + field(FieldStr, FileName) + field(FieldStr, FormatString) +end_outline() + +begin_outline(Logging, LogMessage) + field(uint64, LogPoint) + field(uint64, Cycle) + field(uint8[], FormatArgs) +end_outline() + +// Analyzer-level schema for the zencore log events defined in +// src/zencore/logging/tracelog.cpp. The field layout mirrors the Logging.* +// outline above so the analyzer plumbing is symmetric, but the events carry +// fmt-style `{}` format strings instead of printf-style `%` specifiers and +// the FormatArgs blob uses a zen-specific descriptor encoding. +begin_outline(ZenLog, Category) + field(uint64, CategoryPointer) + field(uint8, DefaultVerbosity) + field(FieldStr, Name) +end_outline() + +begin_outline(ZenLog, MessageSpec) + field(uint64, LogPoint) + field(uint64, CategoryPointer) + field(int32, Line) + field(uint8, Verbosity) + field(FieldStr, FileName) + field(FieldStr, FormatString) +end_outline() + +begin_outline(ZenLog, Message) + field(uint64, LogPoint) + field(uint64, Cycle) + field(uint8[], FormatArgs) +end_outline() + +begin_outline(Misc, BookmarkSpec) + field(uint64, BookmarkPoint) + field(int32, Line) + field(FieldStr, FormatString) + field(FieldStr, FileName) +end_outline() + +begin_outline(Misc, Bookmark) + field(uint64, Cycle) + field(uint64, BookmarkPoint) + field(uint8[], FormatArgs) +end_outline() + +begin_outline(Misc, RegionBegin) + field(uint64, Cycle) + field(uint8[], RegionName) + field(uint8[], Category) +end_outline() + +begin_outline(Misc, RegionBeginWithId) + field(uint64, CycleAndId) + field(uint8[], RegionName) + field(uint8[], Category) +end_outline() + +begin_outline(Misc, RegionEnd) + field(uint64, Cycle) + field(uint8[], RegionName) +end_outline() + +begin_outline(Misc, RegionEndWithId) + field(uint64, Cycle) + field(uint64, RegionId) +end_outline() + +// CsvProfiler events +begin_outline(CsvProfiler, RegisterCategory) + field(int32, Index) + field(uint8[], Name) +end_outline() + +begin_outline(CsvProfiler, DefineInlineStat) + field(uint64, StatId) + field(int32, CategoryIndex) + field(uint8[], Name) +end_outline() + +begin_outline(CsvProfiler, DefineDeclaredStat) + field(uint64, StatId) + field(int32, CategoryIndex) + field(uint8[], Name) +end_outline() + +begin_outline(CsvProfiler, BeginStat) + field(uint64, StatId) + field(uint64, Cycle) +end_outline() + +begin_outline(CsvProfiler, EndStat) + field(uint64, StatId) + field(uint64, Cycle) +end_outline() + +begin_outline(CsvProfiler, BeginExclusiveStat) + field(uint64, StatId) + field(uint64, Cycle) +end_outline() + +begin_outline(CsvProfiler, EndExclusiveStat) + field(uint64, StatId) + field(uint64, Cycle) +end_outline() + +begin_outline(CsvProfiler, CustomStatInt) + field(uint64, StatId) + field(uint64, Cycle) + field(int32, Value) + field(uint8, OpType) +end_outline() + +begin_outline(CsvProfiler, CustomStatFloat) + field(uint64, StatId) + field(uint64, Cycle) + field(float, Value) + field(uint8, OpType) +end_outline() + +begin_outline(CsvProfiler, Event) + field(uint64, Cycle) + field(int32, CategoryIndex) + field(uint8[], Text) +end_outline() + +begin_outline(CsvProfiler, BeginCapture) + field(uint64, Cycle) + field(uint32, RenderThreadId) + field(uint32, RHIThreadId) + field(uint8, EnableCounts) + field(uint8[], FileName) +end_outline() + +begin_outline(CsvProfiler, EndCapture) + field(uint64, Cycle) +end_outline() + +begin_outline(CsvProfiler, Metadata) + field(uint8[], Key) + field(uint8[], Value) +end_outline() + // clang-format on + + ////////////////////////////////////////////////////////////////////////////// + // Forward declarations needed by the helper analyzers below. + + using zen::trace_detail::SafeFieldStr; + +////////////////////////////////////////////////////////////////////////////// +// Minimal CBOR formatter +// +// CpuProfiler.Metadata payloads are CBOR-encoded (RFC 7049) blobs produced +// by UE's FCborWriter. We don't need a full decoder -- we just walk the +// bytes and append human-readable values to an output string. Handles the +// subset actually emitted by UE's metadata scopes: unsigned / negative +// integers, byte / text strings, arrays, maps, floats, and the boolean / +// null simple values. + +static bool CborAppendValue(const uint8*& p, const uint8* end, std::string& out, int depth); + +static bool +CborReadArg(const uint8*& p, const uint8* end, uint8 info, uint64& value) +{ + if (info < 24) + { + value = info; + return true; + } + if (info == 24) + { + if (end - p < 1) + return false; + value = *p++; + return true; + } + if (info == 25) + { + if (end - p < 2) + return false; + value = (uint64(p[0]) << 8) | p[1]; + p += 2; + return true; + } + if (info == 26) + { + if (end - p < 4) + return false; + value = (uint64(p[0]) << 24) | (uint64(p[1]) << 16) | (uint64(p[2]) << 8) | p[3]; + p += 4; + return true; + } + if (info == 27) + { + if (end - p < 8) + return false; + value = 0; + for (int i = 0; i < 8; ++i) + { + value = (value << 8) | p[i]; + } + p += 8; + return true; + } + return false; +} + +static bool +CborAppendValue(const uint8*& p, const uint8* end, std::string& out, int depth) +{ + if (depth > 4 || p >= end) + { + return false; + } + + const uint8 ib = *p++; + const uint8 major = ib >> 5; + const uint8 info = ib & 0x1f; + + switch (major) + { + case 0: // unsigned integer + { + uint64 v; + if (!CborReadArg(p, end, info, v)) + return false; + out += fmt::format("{}", v); + return true; + } + case 1: // negative integer: -1 - value + { + uint64 v; + if (!CborReadArg(p, end, info, v)) + return false; + out += fmt::format("{}", -1 - int64_t(v)); + return true; + } + case 2: // byte string + case 3: // text string + { + uint64 len; + if (!CborReadArg(p, end, info, len)) + return false; + if (len > uint64(end - p)) + return false; + out.append(reinterpret_cast<const char*>(p), size_t(len)); + p += len; + return true; + } + case 4: // array + { + uint64 count; + if (!CborReadArg(p, end, info, count)) + return false; + for (uint64 i = 0; i < count; ++i) + { + if (i > 0) + out += ", "; + if (!CborAppendValue(p, end, out, depth + 1)) + return false; + } + return true; + } + case 5: // map + { + uint64 count; + if (!CborReadArg(p, end, info, count)) + return false; + for (uint64 i = 0; i < count; ++i) + { + if (i > 0) + out += ", "; + if (!CborAppendValue(p, end, out, depth + 1)) + return false; + out += "="; + if (!CborAppendValue(p, end, out, depth + 1)) + return false; + } + return true; + } + case 7: // simple values / floats + { + if (info == 20) + { + out += "false"; + return true; + } + if (info == 21) + { + out += "true"; + return true; + } + if (info == 22) + { + out += "null"; + return true; + } + if (info == 26) + { + if (end - p < 4) + return false; + uint32 bits = (uint32(p[0]) << 24) | (uint32(p[1]) << 16) | (uint32(p[2]) << 8) | p[3]; + float v; + std::memcpy(&v, &bits, 4); + out += fmt::format("{}", v); + p += 4; + return true; + } + if (info == 27) + { + if (end - p < 8) + return false; + uint64 bits = 0; + for (int i = 0; i < 8; ++i) + { + bits = (bits << 8) | p[i]; + } + p += 8; + double v; + std::memcpy(&v, &bits, 8); + out += fmt::format("{}", v); + return true; + } + return false; + } + default: + return false; + } +} + +static std::string +FormatMetadataValues(const uint8_t* Bytes, size_t Size) +{ + std::string out; + if (Size == 0) + { + return out; + } + const uint8* p = Bytes; + CborAppendValue(p, Bytes + Size, out, 0); + return out; +} + +using zen::trace_detail::TraceTiming; + +////////////////////////////////////////////////////////////////////////////// +// Metadata registry +// +// Subscribes to CpuProfiler.Metadata events and stores each payload's +// CBOR-encoded bytes keyed by MetadataId, along with the SpecId they +// reference. Both CpuAnalyzer and TimelineAnalyzer query the registry when +// they encounter a V3 scope with the metadata bit set so the scope can be +// rendered as `{base name} - {formatted values}`. + +struct MetadataEntry +{ + uint32_t SpecId = 0; + eastl::vector<uint8_t> Bytes; +}; + +class MetadataRegistry : public Analyzer +{ +public: + void subscribe(Vector<Subscription>& Subs) override { Subs.emplace_back(this, &MetadataRegistry::OnMetadata); } + + const MetadataEntry* Lookup(uint32_t MetadataId) const + { + auto It = m_Entries.find(MetadataId); + return (It != m_Entries.end()) ? &It->second : nullptr; + } + +private: + void OnMetadata(const CpuProfiler_Metadata& Ev) + { + uint32_t MetadataId = Ev.Id(); + uint32_t SpecId = Ev.SpecId(); + Array<uint8[]> Data = Ev.Metadata(); + + MetadataEntry& Entry = m_Entries[MetadataId]; + Entry.SpecId = SpecId; + Entry.Bytes.assign(Data.get(), Data.get() + Data.get_size()); + } + + eastl::hash_map<uint32_t, MetadataEntry> m_Entries; +}; + +////////////////////////////////////////////////////////////////////////////// +// Log message formatting (upstream UE Logging.* wire) +// +// UE's trace emits log messages as a sequence of typed arguments that need +// to be substituted into a printf-style format string. The wire format is: +// +// [ArgumentCount: uint8] +// [Descriptors: uint8 * ArgumentCount] // each byte = category | size +// [Payload: bytes] +// +// Category bits live in the upper 2 bits (shifted by FormatArgTypeCode_- +// CategoryBitShift == 6): Integer=1, Float=2, String=3. The low 6 bits are +// the argument size in bytes; for strings the size is the per-character +// width (1 == ANSI, 2 == UTF-16). +// +// We walk the format string, extract each specifier, pull the matching arg +// from the stream and hand both to std::snprintf. Width/precision stars +// (e.g. "%*.*f") are not supported; they're rare in log formats. +// +// Note: this is the upstream UE printf-style wire only. The zen-specific +// ZenLog.* events use a different descriptor encoding (3-bit category / +// 5-bit size, plus a dedicated bool and pointer category) and a different +// format-spec grammar. See zen::logging::FormatLogArgs in +// src/zencore/logging/tracelog.cpp for that path. + +struct FormatArgStream +{ + const uint8_t* Descriptors; + const uint8_t* Payload; + uint8_t Remaining; + + bool HasNext() const { return Remaining > 0; } + + uint8_t PeekCategory() const { return (*Descriptors) & 0xC0; } + uint8_t PeekSize() const { return (*Descriptors) & 0x3F; } + + void Advance(size_t PayloadBytes) + { + Payload += PayloadBytes; + ++Descriptors; + --Remaining; + } +}; + +static bool +InitFormatArgStream(FormatArgStream& Ctx, const uint8_t* Data, size_t Size) +{ + if (!Data || Size == 0) + { + Ctx.Remaining = 0; + return false; + } + uint8_t Count = Data[0]; + if (size_t(1) + Count > Size) + { + Ctx.Remaining = 0; + return false; + } + Ctx.Descriptors = Data + 1; + Ctx.Payload = Data + 1 + Count; + Ctx.Remaining = Count; + return true; +} + +static bool +IsPrintfSpecifierChar(char c) +{ + switch (c) + { + case 'd': + case 'i': + case 'u': + case 'o': + case 'x': + case 'X': + case 'c': + case 'p': + case 'f': + case 'F': + case 'e': + case 'E': + case 'g': + case 'G': + case 'a': + case 'A': + case 's': + case 'S': + case 'n': + return true; + default: + return false; + } +} + +static std::string +FormatLogMessage(std::string_view Format, const uint8_t* ArgsData, size_t ArgsSize) +{ + FormatArgStream Stream{}; + InitFormatArgStream(Stream, ArgsData, ArgsSize); + + std::string Out; + Out.reserve(Format.size() + 32); + + size_t i = 0; + while (i < Format.size()) + { + char c = Format[i]; + if (c != '%') + { + Out.push_back(c); + ++i; + continue; + } + + // Handle "%%" -> literal percent. + if (i + 1 < Format.size() && Format[i + 1] == '%') + { + Out.push_back('%'); + i += 2; + continue; + } + + // Walk the specifier until we find a terminating character. + size_t SpecStart = i++; + while (i < Format.size() && !IsPrintfSpecifierChar(Format[i])) + { + ++i; + } + if (i >= Format.size()) + { + // Truncated specifier -- copy the remainder literally. + Out.append(Format.substr(SpecStart)); + break; + } + + char Specifier = Format[i++]; + std::string Spec(Format.substr(SpecStart, i - SpecStart)); + + if (!Stream.HasNext()) + { + // Not enough arguments: emit the raw specifier so the user can + // at least tell something is missing. + Out.append(Spec); + continue; + } + + const uint8_t Category = Stream.PeekCategory(); + const uint8_t Size = Stream.PeekSize(); + + char Buf[512]; + Buf[0] = '\0'; + + if (Category == 0x40) // integer + { + uint64_t Raw = 0; + if (Size <= sizeof(Raw) && Size > 0) + { + std::memcpy(&Raw, Stream.Payload, Size); + } + + // Route through the correct snprintf type based on the + // specifier. Cast to int64_t for signed integer specifiers. + switch (Specifier) + { + case 'd': + case 'i': + { + // Sign-extend based on Size. + int64_t Signed = 0; + switch (Size) + { + case 1: + Signed = int8_t(Raw & 0xff); + break; + case 2: + Signed = int16_t(Raw & 0xffff); + break; + case 4: + Signed = int32_t(Raw & 0xffffffff); + break; + case 8: + Signed = int64_t(Raw); + break; + default: + Signed = int64_t(Raw); + break; + } + // Replace length modifier so snprintf interprets the + // correctly-sized value. Simplest: append "ll". + std::string AdjustedSpec = Spec; + AdjustedSpec.insert(AdjustedSpec.size() - 1, "ll"); + std::snprintf(Buf, sizeof(Buf), AdjustedSpec.c_str(), static_cast<long long>(Signed)); + break; + } + case 'u': + case 'o': + case 'x': + case 'X': + case 'p': + { + std::string AdjustedSpec = Spec; + AdjustedSpec.insert(AdjustedSpec.size() - 1, "ll"); + std::snprintf(Buf, sizeof(Buf), AdjustedSpec.c_str(), static_cast<unsigned long long>(Raw)); + break; + } + case 'c': + { + std::snprintf(Buf, sizeof(Buf), Spec.c_str(), int(Raw & 0xff)); + break; + } + default: + std::snprintf(Buf, sizeof(Buf), "%llu", static_cast<unsigned long long>(Raw)); + break; + } + Stream.Advance(Size); + } + else if (Category == 0x80) // floating point + { + double Value = 0.0; + if (Size == 4) + { + float F; + std::memcpy(&F, Stream.Payload, 4); + Value = double(F); + } + else if (Size == 8) + { + std::memcpy(&Value, Stream.Payload, 8); + } + std::snprintf(Buf, sizeof(Buf), Spec.c_str(), Value); + Stream.Advance(Size); + } + else if (Category == 0xC0) // string + { + std::string Tmp; + if (Size == 1) + { + const char* S = reinterpret_cast<const char*>(Stream.Payload); + size_t Len = std::strlen(S); + Tmp.assign(S, Len); + std::snprintf(Buf, sizeof(Buf), Spec.c_str(), Tmp.c_str()); + Stream.Advance(Len + 1); + } + else if (Size == 2) + { + const char16_t* W = reinterpret_cast<const char16_t*>(Stream.Payload); + size_t Len = 0; + while (W[Len] != 0) + ++Len; + Tmp.reserve(Len); + for (size_t k = 0; k < Len; ++k) + { + char16_t ch = W[k]; + Tmp.push_back(ch < 0x80 ? char(ch) : '?'); + } + std::snprintf(Buf, sizeof(Buf), Spec.c_str(), Tmp.c_str()); + Stream.Advance((Len + 1) * 2); + } + else + { + std::snprintf(Buf, sizeof(Buf), "<unsupported string width %u>", unsigned(Size)); + Stream.Advance(0); + ++Stream.Descriptors; + --Stream.Remaining; + } + } + else + { + std::snprintf(Buf, sizeof(Buf), "<arg>"); + Stream.Advance(Size); + } + + Out.append(Buf); + } + + return Out; +} + +////////////////////////////////////////////////////////////////////////////// +// Log analyzer + +class LogAnalyzer : public Analyzer +{ +public: + explicit LogAnalyzer(const TraceTiming* Timing = nullptr) : m_Timing(Timing) {} + + void subscribe(Vector<Subscription>& Subs) override + { + Subs.emplace_back(this, &LogAnalyzer::OnLogCategory); + Subs.emplace_back(this, &LogAnalyzer::OnLogMessageSpec); + Subs.emplace_back(this, &LogAnalyzer::OnLogMessage); + Subs.emplace_back(this, &LogAnalyzer::OnZenLogCategory); + Subs.emplace_back(this, &LogAnalyzer::OnZenLogMessageSpec); + Subs.emplace_back(this, &LogAnalyzer::OnZenLogMessage); + } + + eastl::vector<zen::trace_detail::LogCategoryInfo> BuildCategories(eastl::hash_map<uint64_t, uint32_t>& OutPointerToIndex) const + { + eastl::vector<zen::trace_detail::LogCategoryInfo> Cats; + Cats.reserve(m_Categories.size()); + OutPointerToIndex.clear(); + for (const auto& [Ptr, Info] : m_Categories) + { + OutPointerToIndex[Ptr] = uint32_t(Cats.size()); + Cats.push_back(Info); + } + return Cats; + } + + const eastl::vector<zen::trace_detail::LogEntry>& Entries() const { return m_Entries; } + eastl::vector<zen::trace_detail::LogEntry>& MutableEntries() { return m_Entries; } + + // The shared TraceTiming pointer lets external callers read the trace's + // cycle base / frequency without each analyzer having to own a copy. + const TraceTiming* Timing() const { return m_Timing; } + + struct MessageSpec + { + uint64_t CategoryPointer = 0; + int32_t Line = 0; + uint8_t Verbosity = 0; + std::string File; + std::string FormatString; + }; + + const eastl::hash_map<uint64_t, MessageSpec>& MessageSpecs() const { return m_Specs; } + +private: + // Both Logging.* and ZenLog.* use identical event fields; the difference is + // only how their respective FormatArgs get rendered (printf vs fmt), which + // is decided at message-emit time. + template<typename CategoryEvent> + void IngestCategory(const CategoryEvent& Ev) + { + uint64_t Ptr = Ev.CategoryPointer(); + zen::trace_detail::LogCategoryInfo& Info = m_Categories[Ptr]; + Info.Name = SafeFieldStr(Ev.Name()); + Info.DefaultVerbosity = Ev.DefaultVerbosity(); + } + + template<typename SpecEvent> + void IngestSpec(const SpecEvent& Ev) + { + MessageSpec& Spec = m_Specs[Ev.LogPoint()]; + Spec.CategoryPointer = Ev.CategoryPointer(); + Spec.Line = Ev.Line(); + Spec.Verbosity = Ev.Verbosity(); + Spec.File = SafeFieldStr(Ev.FileName()); + Spec.FormatString = SafeFieldStr(Ev.FormatString()); + } + + void OnLogCategory(const Logging_LogCategory& Ev) { IngestCategory(Ev); } + void OnLogMessageSpec(const Logging_LogMessageSpec& Ev) { IngestSpec(Ev); } + void OnLogMessage(const Logging_LogMessage& Ev) { EmitEntry(Ev.LogPoint(), Ev.Cycle(), Ev.FormatArgs(), /*IsZenLog*/ false); } + + // ZenLog.* shares the same MessageSpec table. LogPoint pointers are emitted + // by distinct processes (zenserver vs. a hypothetical UE trace we imported) + // so the two keyspaces don't collide in practice. + void OnZenLogCategory(const ZenLog_Category& Ev) { IngestCategory(Ev); } + void OnZenLogMessageSpec(const ZenLog_MessageSpec& Ev) { IngestSpec(Ev); } + void OnZenLogMessage(const ZenLog_Message& Ev) { EmitEntry(Ev.LogPoint(), Ev.Cycle(), Ev.FormatArgs(), /*IsZenLog*/ true); } + + void EmitEntry(uint64_t LogPoint, uint64_t Cycle, Array<uint8[]> Args, bool IsZenLog) + { + auto SpecIt = m_Specs.find(LogPoint); + if (SpecIt == m_Specs.end()) + { + return; + } + const MessageSpec& Spec = SpecIt->second; + + uint32_t TimeUs = m_Timing ? m_Timing->CycleToTimeUs(Cycle) : 0; + + std::string Msg = IsZenLog ? zen::logging::FormatLogArgs(std::string_view(Spec.FormatString), Args.get(), Args.get_size()) + : FormatLogMessage(std::string_view(Spec.FormatString), Args.get(), Args.get_size()); + + zen::trace_detail::LogEntry Entry; + Entry.TimeUs = TimeUs; + Entry.Verbosity = Spec.Verbosity; + Entry.Line = Spec.Line; + Entry.File = Spec.File; + Entry.Message = std::move(Msg); + // Use the category pointer temporarily so BuildTraceModel can resolve + // it against the categories table. + Entry.CategoryIndex = SpecToCategoryIndex(Spec.CategoryPointer); + m_Entries.push_back(std::move(Entry)); + } + + uint32_t SpecToCategoryIndex(uint64_t Ptr) + { + // Encoded pointer stuffed into uint32_t so BuildTraceModel can remap. + // Lossy but deterministic: use a stable sequential index per unique + // pointer so we never need the full 64-bit value beyond build time. + auto It = m_CategoryIndex.find(Ptr); + if (It != m_CategoryIndex.end()) + { + return It->second; + } + uint32_t Idx = uint32_t(m_CategoryIndex.size()); + m_CategoryIndex[Ptr] = Idx; + return Idx; + } + +public: + // Mapping from the intermediate index stored in LogEntry::CategoryIndex + // during capture to the real category pointer; BuildTraceModel uses + // this to remap entries against the flattened LogCategories array. + const eastl::hash_map<uint64_t, uint32_t>& CategoryPointerIndex() const { return m_CategoryIndex; } + +private: + const TraceTiming* m_Timing = nullptr; + eastl::hash_map<uint64_t, zen::trace_detail::LogCategoryInfo> m_Categories; + eastl::hash_map<uint64_t, MessageSpec> m_Specs; + eastl::hash_map<uint64_t, uint32_t> m_CategoryIndex; + eastl::vector<zen::trace_detail::LogEntry> m_Entries; +}; + +////////////////////////////////////////////////////////////////////////////// +// Bookmarks and regions +// +// UE's bookmark wire format mirrors LogMessage: a BookmarkSpec introduces +// a (FileName, Line, FormatString) triple keyed by a BookmarkPoint pointer, +// and each Misc.Bookmark event carries that pointer, a cycle, and the same +// FFormatArgsTrace payload the log pipeline already knows how to decode. +// Region events come in two flavours: the legacy name-paired +// RegionBegin/RegionEnd and the newer *WithId variants that pack a unique +// id into the begin event's cycle. + +class BookmarksAnalyzer : public Analyzer +{ +public: + explicit BookmarksAnalyzer(const TraceTiming* Timing = nullptr) : m_Timing(Timing) {} + + void subscribe(Vector<Subscription>& Subs) override + { + Subs.emplace_back(this, &BookmarksAnalyzer::OnBookmarkSpec); + Subs.emplace_back(this, &BookmarksAnalyzer::OnBookmark); + Subs.emplace_back(this, &BookmarksAnalyzer::OnRegionBegin); + Subs.emplace_back(this, &BookmarksAnalyzer::OnRegionBeginWithId); + Subs.emplace_back(this, &BookmarksAnalyzer::OnRegionEnd); + Subs.emplace_back(this, &BookmarksAnalyzer::OnRegionEndWithId); + } + + eastl::vector<zen::trace_detail::Bookmark>& MutableBookmarks() { return m_Bookmarks; } + eastl::vector<zen::trace_detail::RegionEntry>& MutableRegions() { return m_Regions; } + +private: + struct BookmarkSpec + { + int32_t Line = 0; + std::string File; + std::string FormatString; + }; + + uint32_t CycleToTimeUs(uint64_t Cycle) const { return m_Timing ? m_Timing->CycleToTimeUs(Cycle) : 0; } + + void OnBookmarkSpec(const Misc_BookmarkSpec& Ev) + { + BookmarkSpec& Spec = m_Specs[Ev.BookmarkPoint()]; + Spec.Line = Ev.Line(); + Spec.File = SafeFieldStr(Ev.FileName()); + Spec.FormatString = SafeFieldStr(Ev.FormatString()); + } + + void OnBookmark(const Misc_Bookmark& Ev) + { + auto SpecIt = m_Specs.find(Ev.BookmarkPoint()); + if (SpecIt == m_Specs.end()) + { + return; + } + const BookmarkSpec& Spec = SpecIt->second; + + Array<uint8[]> Args = Ev.FormatArgs(); + std::string Text = FormatLogMessage(std::string_view(Spec.FormatString), Args.get(), Args.get_size()); + + zen::trace_detail::Bookmark Out; + Out.TimeUs = CycleToTimeUs(Ev.Cycle()); + Out.Line = Spec.Line; + Out.File = Spec.File; + Out.Text = std::move(Text); + m_Bookmarks.push_back(std::move(Out)); + } + + uint32_t CreatePartialRegion(uint32_t TimeUs, std::string Name, std::string Category) + { + zen::trace_detail::RegionEntry Entry; + Entry.BeginUs = TimeUs; + Entry.EndUs = ~uint32_t(0); // sentinel: still open + Entry.Depth = 0; + Entry.Reserved = 0; + Entry.Name = std::move(Name); + Entry.Category = std::move(Category); + uint32_t Idx = uint32_t(m_Regions.size()); + m_Regions.push_back(std::move(Entry)); + return Idx; + } + + // Decodes the raw array bytes of a RegionName field into a std::string. + // UE emits RegionName as either AnsiString (1-byte) or WideString (2-byte) + // depending on the trace's age -- for the 2-byte case we do the same + // lossy ASCII fold tourist's FieldStr does, which is all we need for + // display. + static std::string DecodeRegionName(const Array<uint8[]>& Data) + { + const uint8_t* p = Data.get(); + size_t size = Data.get_size(); + uint32_t count = Data.get_count(); + if (!p || size == 0 || count == 0) + { + return {}; + } + if (size == count) + { + // 1 byte per element -- AnsiString. + return std::string(reinterpret_cast<const char*>(p), count); + } + if (size == count * 2) + { + // 2 bytes per element -- WideString. Lossy ASCII fold. + std::string out; + out.reserve(count); + const char16_t* w = reinterpret_cast<const char16_t*>(p); + for (uint32_t i = 0; i < count; ++i) + { + out.push_back(w[i] < 0x80 ? char(w[i]) : '?'); + } + return out; + } + return {}; + } + + void OnRegionBegin(const Misc_RegionBegin& Ev) + { + uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); + Array<uint8[]> NameArr = Ev.RegionName(); + std::string Name = DecodeRegionName(NameArr); + std::string Category = DecodeRegionName(Ev.Category()); + uint32_t Idx = CreatePartialRegion(TimeUs, Name, std::move(Category)); + m_OpenByName[Name].push_back(Idx); + } + + void OnRegionBeginWithId(const Misc_RegionBeginWithId& Ev) + { + // Despite its name, CycleAndId is just Cycles64() -- a plain 64-bit + // cycle count that doubles as a unique region identifier. The caller + // keeps the returned value and passes it back as RegionId at end. + uint64_t CycleAndId = Ev.CycleAndId(); + uint32_t TimeUs = CycleToTimeUs(CycleAndId); + Array<uint8[]> NameArr = Ev.RegionName(); + std::string Name = DecodeRegionName(NameArr); + std::string Category = DecodeRegionName(Ev.Category()); + uint32_t Idx = CreatePartialRegion(TimeUs, std::move(Name), std::move(Category)); + m_OpenById[CycleAndId] = Idx; + } + + void OnRegionEnd(const Misc_RegionEnd& Ev) + { + uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); + Array<uint8[]> NameArr = Ev.RegionName(); + std::string Name = DecodeRegionName(NameArr); + auto It = m_OpenByName.find(Name); + if (It == m_OpenByName.end() || It->second.empty()) + { + return; + } + uint32_t Idx = It->second.back(); + It->second.pop_back(); + m_Regions[Idx].EndUs = TimeUs; + } + + void OnRegionEndWithId(const Misc_RegionEndWithId& Ev) + { + uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); + uint64_t Id = Ev.RegionId(); + auto It = m_OpenById.find(Id); + if (It == m_OpenById.end()) + { + return; + } + m_Regions[It->second].EndUs = TimeUs; + m_OpenById.erase(It); + } + + const TraceTiming* m_Timing = nullptr; + eastl::hash_map<uint64_t, BookmarkSpec> m_Specs; + eastl::vector<zen::trace_detail::Bookmark> m_Bookmarks; + eastl::vector<zen::trace_detail::RegionEntry> m_Regions; + eastl::hash_map<std::string, eastl::vector<uint32_t>> m_OpenByName; + eastl::hash_map<uint64_t, uint32_t> m_OpenById; +}; + +////////////////////////////////////////////////////////////////////////////// +// CsvProfiler analyzer -- parses CSV stat categories, definitions, timing, +// custom values, events, capture markers, and metadata. + +class CsvProfilerAnalyzer : public Analyzer +{ +public: + explicit CsvProfilerAnalyzer(const TraceTiming* Timing = nullptr) : m_Timing(Timing) {} + + void subscribe(Vector<Subscription>& Subs) override + { + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnRegisterCategory); + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnDefineInlineStat); + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnDefineDeclaredStat); + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnBeginStat); + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnEndStat); + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnBeginExclusiveStat); + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnEndExclusiveStat); + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnCustomStatInt); + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnCustomStatFloat); + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnEvent); + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnBeginCapture); + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnEndCapture); + Subs.emplace_back(this, &CsvProfilerAnalyzer::OnMetadata); + } + + eastl::vector<zen::trace_detail::TraceModel::CsvCategory>& MutableCategories() { return m_Categories; } + eastl::vector<zen::trace_detail::TraceModel::CsvStatDef>& MutableStatDefs() { return m_StatDefs; } + eastl::vector<zen::trace_detail::TraceModel::CsvEvent>& MutableEvents() { return m_Events; } + eastl::vector<zen::trace_detail::TraceModel::CsvMeta>& MutableMetadata() { return m_Metadata; } + + // Build the per-stat+thread time series from the accumulated samples. + eastl::vector<zen::trace_detail::TraceModel::CsvSeries> BuildTimeSeries() + { + eastl::vector<zen::trace_detail::TraceModel::CsvSeries> Result; + for (auto& [Key, Samples] : m_SeriesMap) + { + eastl::sort(Samples.begin(), Samples.end(), [](const auto& A, const auto& B) { return A.TimeUs < B.TimeUs; }); + zen::trace_detail::TraceModel::CsvSeries S; + S.StatId = Key.StatId; + S.ThreadId = Key.ThreadId; + S.Samples = std::move(Samples); + Result.push_back(std::move(S)); + } + return Result; + } + +private: + uint32_t CycleToTimeUs(uint64_t Cycle) const + { + if (!m_Timing || m_Timing->Freq == 0) + { + return 0; + } + uint64_t Elapsed = (Cycle >= m_Timing->Base) ? (Cycle - m_Timing->Base) : 0; + return uint32_t(Elapsed * 1'000'000 / m_Timing->Freq); + } + + static std::string DecodeAnsiName(const Array<uint8[]>& Data) + { + const uint8_t* P = Data.get(); + size_t Size = Data.get_size(); + if (!P || Size == 0) + { + return {}; + } + return std::string(reinterpret_cast<const char*>(P), Size); + } + + static std::string DecodeWideName(const Array<uint8[]>& Data) + { + const uint8_t* P = Data.get(); + size_t Size = Data.get_size(); + uint32_t Count = Data.get_count(); + if (!P || Size == 0 || Count == 0) + { + return {}; + } + uint32_t ElemSize = Data.get_element_size(); + if (ElemSize == 2) + { + std::string Out; + Out.reserve(Count); + const char16_t* W = reinterpret_cast<const char16_t*>(P); + for (uint32_t I = 0; I < Count; ++I) + { + Out.push_back(W[I] < 0x80 ? char(W[I]) : '?'); + } + return Out; + } + return std::string(reinterpret_cast<const char*>(P), Size); + } + + struct SeriesKey + { + uint64_t StatId; + uint32_t ThreadId; + bool operator==(const SeriesKey& O) const { return StatId == O.StatId && ThreadId == O.ThreadId; } + }; + struct SeriesKeyHash + { + size_t operator()(const SeriesKey& K) const + { + return eastl::hash<uint64_t>{}(K.StatId) ^ (eastl::hash<uint32_t>{}(K.ThreadId) * 2654435761u); + } + }; + + void AddSample(uint64_t StatId, uint32_t ThreadId, uint32_t TimeUs, float Value) + { + m_SeriesMap[SeriesKey{StatId, ThreadId}].push_back({TimeUs, Value}); + } + + // -- Event handlers ----------------------------------------------- + + void OnRegisterCategory(const CsvProfiler_RegisterCategory& Ev) + { + zen::trace_detail::TraceModel::CsvCategory Cat; + Cat.Index = Ev.Index(); + Cat.Name = DecodeAnsiName(Ev.Name()); + m_Categories.push_back(std::move(Cat)); + } + + void OnDefineInlineStat(const CsvProfiler_DefineInlineStat& Ev) + { + DefineStat(Ev.StatId(), Ev.CategoryIndex(), DecodeAnsiName(Ev.Name())); + } + + void OnDefineDeclaredStat(const CsvProfiler_DefineDeclaredStat& Ev) + { + DefineStat(Ev.StatId(), Ev.CategoryIndex(), DecodeAnsiName(Ev.Name())); + } + + void DefineStat(uint64_t StatId, int32_t CategoryIndex, std::string Name) + { + if (m_StatIdToIndex.count(StatId)) + { + return; // already defined + } + m_StatIdToIndex[StatId] = uint32_t(m_StatDefs.size()); + zen::trace_detail::TraceModel::CsvStatDef Def; + Def.StatId = StatId; + Def.CategoryIndex = CategoryIndex; + Def.Name = std::move(Name); + m_StatDefs.push_back(std::move(Def)); + } + + void OnBeginStat(const CsvProfiler_BeginStat& Ev) + { + uint32_t ThreadId = Ev.get_thread_id(); + uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); + m_OpenStacks[{Ev.StatId(), ThreadId}].push_back(TimeUs); + } + + void OnEndStat(const CsvProfiler_EndStat& Ev) + { + uint32_t ThreadId = Ev.get_thread_id(); + uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); + auto Key = SeriesKey{Ev.StatId(), ThreadId}; + auto It = m_OpenStacks.find(Key); + if (It == m_OpenStacks.end() || It->second.empty()) + { + return; + } + uint32_t BeginUs = It->second.back(); + It->second.pop_back(); + float DurationMs = float(TimeUs - BeginUs) / 1000.0f; + AddSample(Ev.StatId(), ThreadId, BeginUs, DurationMs); + } + + void OnBeginExclusiveStat(const CsvProfiler_BeginExclusiveStat& Ev) + { + // For basic support, treat exclusive stats like regular stats. + uint32_t ThreadId = Ev.get_thread_id(); + uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); + m_OpenStacks[{Ev.StatId(), ThreadId}].push_back(TimeUs); + } + + void OnEndExclusiveStat(const CsvProfiler_EndExclusiveStat& Ev) + { + uint32_t ThreadId = Ev.get_thread_id(); + uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); + auto Key = SeriesKey{Ev.StatId(), ThreadId}; + auto It = m_OpenStacks.find(Key); + if (It == m_OpenStacks.end() || It->second.empty()) + { + return; + } + uint32_t BeginUs = It->second.back(); + It->second.pop_back(); + float DurationMs = float(TimeUs - BeginUs) / 1000.0f; + AddSample(Ev.StatId(), ThreadId, BeginUs, DurationMs); + } + + void OnCustomStatInt(const CsvProfiler_CustomStatInt& Ev) + { + uint32_t ThreadId = Ev.get_thread_id(); + uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); + AddSample(Ev.StatId(), ThreadId, TimeUs, float(Ev.Value())); + } + + void OnCustomStatFloat(const CsvProfiler_CustomStatFloat& Ev) + { + uint32_t ThreadId = Ev.get_thread_id(); + uint32_t TimeUs = CycleToTimeUs(Ev.Cycle()); + AddSample(Ev.StatId(), ThreadId, TimeUs, Ev.Value()); + } + + void OnEvent(const CsvProfiler_Event& Ev) + { + zen::trace_detail::TraceModel::CsvEvent E; + E.TimeUs = CycleToTimeUs(Ev.Cycle()); + E.CategoryIndex = Ev.CategoryIndex(); + E.Text = DecodeWideName(Ev.Text()); + m_Events.push_back(std::move(E)); + } + + void OnBeginCapture(const CsvProfiler_BeginCapture& Ev) { m_CaptureStartUs = CycleToTimeUs(Ev.Cycle()); } + + void OnEndCapture(const CsvProfiler_EndCapture& Ev) { m_CaptureEndUs = CycleToTimeUs(Ev.Cycle()); } + + void OnMetadata(const CsvProfiler_Metadata& Ev) + { + zen::trace_detail::TraceModel::CsvMeta M; + M.Key = DecodeWideName(Ev.Key()); + M.Value = DecodeWideName(Ev.Value()); + m_Metadata.push_back(std::move(M)); + } + + const TraceTiming* m_Timing = nullptr; + + eastl::vector<zen::trace_detail::TraceModel::CsvCategory> m_Categories; + eastl::vector<zen::trace_detail::TraceModel::CsvStatDef> m_StatDefs; + eastl::hash_map<uint64_t, uint32_t> m_StatIdToIndex; + + // Timing stacks: (StatId, ThreadId) -> stack of begin times + eastl::hash_map<SeriesKey, eastl::vector<uint32_t>, SeriesKeyHash> m_OpenStacks; + + // Accumulated samples: (StatId, ThreadId) -> samples + eastl::hash_map<SeriesKey, eastl::vector<zen::trace_detail::TraceModel::CsvSample>, SeriesKeyHash> m_SeriesMap; + + eastl::vector<zen::trace_detail::TraceModel::CsvEvent> m_Events; + eastl::vector<zen::trace_detail::TraceModel::CsvMeta> m_Metadata; + + uint32_t m_CaptureStartUs = 0; + uint32_t m_CaptureEndUs = 0; +}; + +////////////////////////////////////////////////////////////////////////////// +// Analyzers + +class CpuAnalyzer : public Analyzer +{ +public: + CpuAnalyzer(EventStats& Stats, NameDepot& Names, const MetadataRegistry* Metadata) + : m_Names(Names) + , m_Stats(Stats) + , m_Metadata(Metadata) + { + Names.Add(NO_NAME, "???"); + } + + void subscribe(Vector<Subscription>& Subs) override + { + Subs.emplace_back(this, &CpuAnalyzer::OnNewTrace); + Subs.emplace_back(this, &CpuAnalyzer::OnCpuSpec); + Subs.emplace_back(this, &CpuAnalyzer::OnCpuBatch); + Subs.emplace_back(this, &CpuAnalyzer::OnCpuBatchV2); + Subs.emplace_back(this, &CpuAnalyzer::OnCpuBatchV3); + } + +private: + static constexpr uint32 NO_INDEX = ~0u; + static constexpr uint64 NO_NAME = ~0ull; + static constexpr uint32 METADATA_BIT = 0x8000'0000u; + + uint64 ResolveNameHash(uint32 PackedId) + { + const bool IsMetadata = (PackedId & METADATA_BIT) != 0; + const uint32 Id = PackedId & ~METADATA_BIT; + + if (IsMetadata && m_Metadata) + { + auto CachedIt = m_MetadataNames.find(Id); + if (CachedIt != m_MetadataNames.end()) + { + return CachedIt->second; + } + + const MetadataEntry* Entry = m_Metadata->Lookup(Id); + if (Entry) + { + auto BaseIt = m_Specs.find(Entry->SpecId); + StringView BaseName = (BaseIt != m_Specs.end()) ? m_Names.Get(BaseIt->second) : StringView("???"); + std::string Formatted(BaseName); + std::string Values = FormatMetadataValues(Entry->Bytes.data(), Entry->Bytes.size()); + if (!Values.empty()) + { + Formatted += " - "; + Formatted += Values; + } + uint64 Hash = m_Names.Add(StringView(Formatted)); + m_MetadataNames[Id] = Hash; + return Hash; + } + return NO_NAME; + } + + auto It = m_Specs.find(Id); + return (It != m_Specs.end()) ? It->second : NO_NAME; + } + + struct EventStack + { + uint32 Tail = NO_INDEX; + }; + + struct ScopeEvent + { + uint32 Id; + uint32 TimeUs; + union + { + uint32 Next; + uint32 Index; + }; + }; + + struct EventPool + { + uint32 Alloc() + { + if (m_FreeHead == NO_INDEX) + { + uint32 Idx = uint32(m_Pool.size()); + m_Pool.push_back({.Index = Idx}); + return Idx; + } + uint32 Idx = m_FreeHead; + m_FreeHead = m_Pool[Idx].Index; + return Idx; + } + + void Free(uint32 Idx) + { + m_Pool[Idx].Index = m_FreeHead; + m_FreeHead = Idx; + } + + ScopeEvent& Get(uint32 Idx) { return m_Pool[Idx]; } + + eastl::vector<ScopeEvent> m_Pool; + uint32 m_FreeHead = NO_INDEX; + }; + + void OnNewTrace(const $Trace_NewTrace& NewTrace) + { + m_Freq = NewTrace.CycleFrequency(); + m_Base = NewTrace.StartCycle(); + m_UsDiv = m_Freq / 1'000'000; + if (m_UsDiv == 0) + { + m_UsDiv = 1; + } + m_UsDivRecip = ReciprocalU64(m_UsDiv); + } + + void OnCpuSpec(const CpuProfiler_EventSpec& Spec) + { + uint32 SpecId = Spec.Id(); + FieldStr SpecName = Spec.Name(); + + StringView NameView = SpecName.as_view(); + + if (NameView.starts_with("Frame ")) + { + NameView = "Frame"; + } + if (size_t Pos = NameView.find("\""); Pos != StringView::npos) + { + NameView = NameView.substr(0, Pos); + } + if (size_t Pos = NameView.find("\\"); Pos != StringView::npos) + { + NameView = NameView.substr(0, Pos); + } + + m_Specs[SpecId] = m_Names.Add(NameView); + } + + void OnCpuBatch(const CpuProfiler_EventBatch& Batch) + { + uint32 ThreadId = Batch.get_thread_id(); + Array<uint8[]> Data = Batch.Data(); + AbsorbBatch(/*Version=*/1, ThreadId, Data); + } + + void OnCpuBatchV2(const CpuProfiler_EventBatchV2& Batch) + { + uint32 ThreadId = Batch.get_thread_id(); + Array<uint8[]> Data = Batch.Data(); + AbsorbBatch(/*Version=*/2, ThreadId, Data); + } + + void OnCpuBatchV3(const CpuProfiler_EventBatchV3& Batch) + { + uint32 ThreadId = Batch.get_thread_id(); + Array<uint8[]> Data = Batch.Data(); + AbsorbBatch(/*Version=*/3, ThreadId, Data); + } + + // Decodes a CpuProfiler scope batch. Mirrors UE's reference + // TraceServices/.../CpuProfilerTraceAnalysis.cpp ProcessBuffer / + // ProcessBufferV2. + // + // Version 1 (`CpuProfiler.EventBatch`): cycle is `value >> 1`; bit 0 is + // IsEnter; IsEnter events carry a SpecId varint. + // + // Version 2 (`CpuProfiler.EventBatchV2`, UE 5.1..5.5) and Version 3 + // (`CpuProfiler.EventBatchV3`, UE 5.6+): cycle is `value >> 2`; bit 0 is + // IsEnter, bit 1 is IsCoroutine. Coroutine begin events carry CoroutineId + // and TimerScopeDepth varints; coroutine end events carry a single + // TimerScopeDepth varint. V3 additionally reserves the low bit of the + // SpecId to mark metadata-bearing timers, so SpecId must be shifted + // right by 1 to recover the actual spec id. + void AbsorbBatch(uint32 Version, uint32 ThreadId, const Array<uint8[]>& Data) + { + const uint8* Cursor = Data.get(); + const uint8* End = Cursor + Data.get_size(); + + auto Decode = [&]() { + uint64 Value = 0; + for (uint32 I = 1, J = 0; I; J += 7) + { + I = *Cursor++; + Value |= uint64(I & 0x7f) << J; + I &= 0x80; + } + return Value; + }; + + if (ThreadId >= m_Threads.size()) + { + m_Threads.resize(ThreadId + 1); + } + EventStack& Stack = m_Threads[ThreadId]; + + const uint32 CycleShift = (Version == 1) ? 1u : 2u; + + uint64 Base = m_Base; + + uint64 Cycle = ~Base + 1; + while (Cursor < End) + { + uint64 Value = Decode(); + uint32 IsEnter = (Value & 0b01); + + if (Version > 1 && (Value & 0b10)) + { + // Coroutine event -- not visualised, but the trailing varints + // still need to be consumed so we stay in sync with the + // stream. + if (IsEnter) + { + (void)Decode(); // CoroutineId + (void)Decode(); // TimerScopeDepth + } + else + { + (void)Decode(); // TimerScopeDepth + } + continue; + } + + uint64 EventId = IsEnter ? Decode() : ~0ull; + + Cycle += (Value >> CycleShift); + uint32 TimeUs = m_UsDivRecip.Divide(Cycle + (m_UsDiv >> 1)); + + if (IsEnter) + { + uint32 ScopeId = uint32(EventId); + bool IsMetadata = false; + if (Version == 3) + { + IsMetadata = (ScopeId & 1u) != 0; + ScopeId >>= 1; + } + uint32 EvIdx = m_Events.Alloc(); + ScopeEvent& Ev = m_Events.Get(EvIdx); + // Pack the metadata flag in the high bit so the close path + // can distinguish metadata-id scopes from regular ones without + // an extra field. + Ev.Id = IsMetadata ? (ScopeId | 0x8000'0000u) : ScopeId; + Ev.TimeUs = TimeUs; + Ev.Next = Stack.Tail; + Stack.Tail = EvIdx; + continue; + } + + if (Stack.Tail == NO_INDEX) + { + continue; + } + + ScopeEvent& Ev = m_Events.Get(Stack.Tail); + uint32 DurationUs = TimeUs - Ev.TimeUs; + uint64 NameHash = ResolveNameHash(Ev.Id); + m_Stats.Record(NameHash, DurationUs); + + uint32 NextIdx = Ev.Next; + m_Events.Free(Stack.Tail); + Stack.Tail = NextIdx; + } + } + + uint64 m_Freq = 0; + uint64 m_Base = 0; + uint64 m_UsDiv = 1; + ReciprocalU64 m_UsDivRecip; + eastl::hash_map<uint32, uint64> m_Specs; + NameDepot& m_Names; + EventPool m_Events; + eastl::vector<EventStack> m_Threads; + EventStats& m_Stats; + const MetadataRegistry* m_Metadata = nullptr; + // Caches the resolved name hash for each MetadataId so we don't + // re-format the same CBOR payload on every scope-close. + eastl::hash_map<uint32, uint64> m_MetadataNames; +}; + +////////////////////////////////////////////////////////////////////////////// +// Per-event CPU scope capture for the interactive trace viewer. +// +// Mirrors CpuAnalyzer's decode loop but instead of aggregating statistics, +// it records one TimelineScope per closed CPU scope so the viewer can draw a +// flame graph. Scope names are interned into a flat vector so each event only +// stores a compact uint32 NameId. + +class TimelineAnalyzer : public Analyzer +{ +public: + explicit TimelineAnalyzer(const MetadataRegistry* Metadata = nullptr, TraceTiming* SharedTiming = nullptr) + : m_SharedTiming(SharedTiming) + , m_Metadata(Metadata) + { + } + + void subscribe(Vector<Subscription>& Subs) override + { + Subs.emplace_back(this, &TimelineAnalyzer::OnNewTrace); + Subs.emplace_back(this, &TimelineAnalyzer::OnCpuSpec); + Subs.emplace_back(this, &TimelineAnalyzer::OnCpuBatch); + Subs.emplace_back(this, &TimelineAnalyzer::OnCpuBatchV2); + Subs.emplace_back(this, &TimelineAnalyzer::OnCpuBatchV3); + } + + struct ThreadData + { + eastl::vector<zen::trace_detail::TimelineScope> Scopes; + // Open-scope stack: parallel arrays keeping begin time and name id. + eastl::vector<uint32_t> OpenBeginUs; + eastl::vector<uint32_t> OpenNameIds; + }; + + const eastl::vector<std::string>& ScopeNames() const { return m_ScopeNames; } + const eastl::map<uint32_t, ThreadData>& Threads() const { return m_Threads; } + uint32_t MinBeginUs() const { return m_MinBeginUs; } + uint32_t MaxEndUs() const { return m_MaxEndUs; } + +private: + static constexpr uint32_t INVALID_NAME = ~0u; + + uint32_t InternName(StringView Name) + { + String Key(Name); + auto [It, Inserted] = m_NameIndex.try_emplace(std::move(Key), 0); + if (Inserted) + { + It->second = uint32_t(m_ScopeNames.size()); + m_ScopeNames.emplace_back(Name); + } + return It->second; + } + + void OnNewTrace(const $Trace_NewTrace& NewTrace) + { + m_Freq = NewTrace.CycleFrequency(); + m_Base = NewTrace.StartCycle(); + m_UsDiv = m_Freq / 1'000'000; + if (m_UsDiv == 0) + { + m_UsDiv = 1; + } + m_UsDivRecip = ReciprocalU64(m_UsDiv); + if (m_SharedTiming) + { + m_SharedTiming->Freq = m_Freq; + m_SharedTiming->Base = m_Base; + m_SharedTiming->UsDiv = m_UsDiv; + } + } + + void OnCpuSpec(const CpuProfiler_EventSpec& Spec) + { + uint32 SpecId = Spec.Id(); + FieldStr SpecName = Spec.Name(); + + StringView NameView = SpecName.as_view(); + + if (NameView.starts_with("Frame ")) + { + NameView = "Frame"; + } + if (size_t Pos = NameView.find("\""); Pos != StringView::npos) + { + NameView = NameView.substr(0, Pos); + } + if (size_t Pos = NameView.find("\\"); Pos != StringView::npos) + { + NameView = NameView.substr(0, Pos); + } + + m_Specs[SpecId] = InternName(NameView); + } + + void OnCpuBatch(const CpuProfiler_EventBatch& Batch) + { + uint32 ThreadId = Batch.get_thread_id(); + Array<uint8[]> Data = Batch.Data(); + AbsorbBatch(/*Version=*/1, ThreadId, Data); + } + + void OnCpuBatchV2(const CpuProfiler_EventBatchV2& Batch) + { + uint32 ThreadId = Batch.get_thread_id(); + Array<uint8[]> Data = Batch.Data(); + AbsorbBatch(/*Version=*/2, ThreadId, Data); + } + + void OnCpuBatchV3(const CpuProfiler_EventBatchV3& Batch) + { + uint32 ThreadId = Batch.get_thread_id(); + Array<uint8[]> Data = Batch.Data(); + AbsorbBatch(/*Version=*/3, ThreadId, Data); + } + + TraceTiming* m_SharedTiming = nullptr; + + // See CpuAnalyzer::AbsorbBatch for a detailed description of the wire + // format for each version. + void AbsorbBatch(uint32 Version, uint32 ThreadId, const Array<uint8[]>& Data) + { + const uint8* Cursor = Data.get(); + const uint8* End = Cursor + Data.get_size(); + + auto Decode = [&]() { + uint64 Value = 0; + for (uint32 I = 1, J = 0; I; J += 7) + { + I = *Cursor++; + Value |= uint64(I & 0x7f) << J; + I &= 0x80; + } + return Value; + }; + + ThreadData& Thread = m_Threads[ThreadId]; + + const uint32 CycleShift = (Version == 1) ? 1u : 2u; + + uint64 Base = m_Base; + + uint64 Cycle = ~Base + 1; + while (Cursor < End) + { + uint64 Value = Decode(); + uint32 IsEnter = (Value & 0b01); + + if (Version > 1 && (Value & 0b10)) + { + // Coroutine event -- consume the trailing varints so we stay + // in sync with the stream, but drop the event on the floor. + if (IsEnter) + { + (void)Decode(); // CoroutineId + (void)Decode(); // TimerScopeDepth + } + else + { + (void)Decode(); // TimerScopeDepth + } + continue; + } + + uint64 EventId = IsEnter ? Decode() : ~0ull; + + Cycle += (Value >> CycleShift); + uint32 TimeUs = m_UsDivRecip.Divide(Cycle + (m_UsDiv >> 1)); + + if (IsEnter) + { + uint32 ScopeId = uint32(EventId); + bool IsMetadata = false; + if (Version == 3) + { + IsMetadata = (ScopeId & 1u) != 0; + ScopeId >>= 1; + } + uint32_t NameId = IsMetadata ? ResolveMetadataNameId(ScopeId) : LookupSpecNameId(ScopeId); + Thread.OpenBeginUs.push_back(TimeUs); + Thread.OpenNameIds.push_back(NameId); + continue; + } + + if (Thread.OpenBeginUs.empty()) + { + continue; + } + + uint32_t BeginUs = Thread.OpenBeginUs.back(); + uint32_t NameId = Thread.OpenNameIds.back(); + Thread.OpenBeginUs.pop_back(); + Thread.OpenNameIds.pop_back(); + + if (NameId == INVALID_NAME) + { + continue; + } + + uint16_t Depth = uint16_t(Thread.OpenBeginUs.size()); + Thread.Scopes.push_back(zen::trace_detail::TimelineScope{ + .BeginUs = BeginUs, + .DurationUs = TimeUs - BeginUs, + .NameId = NameId, + .Depth = Depth, + .MergeCount = 0, + }); + + if (BeginUs < m_MinBeginUs) + { + m_MinBeginUs = BeginUs; + } + if (TimeUs > m_MaxEndUs) + { + m_MaxEndUs = TimeUs; + } + } + } + + uint32_t LookupSpecNameId(uint32 SpecId) const + { + auto It = m_Specs.find(SpecId); + return (It != m_Specs.end()) ? It->second : INVALID_NAME; + } + + uint32_t ResolveMetadataNameId(uint32 MetadataId) + { + if (!m_Metadata) + { + return INVALID_NAME; + } + + auto CachedIt = m_MetadataNameIds.find(MetadataId); + if (CachedIt != m_MetadataNameIds.end()) + { + return CachedIt->second; + } + + const MetadataEntry* Entry = m_Metadata->Lookup(MetadataId); + if (!Entry) + { + return INVALID_NAME; + } + + auto BaseIt = m_Specs.find(Entry->SpecId); + const std::string& BaseName = (BaseIt != m_Specs.end()) ? m_ScopeNames[BaseIt->second] : kUnknownName; + std::string Formatted = BaseName; + std::string Values = FormatMetadataValues(Entry->Bytes.data(), Entry->Bytes.size()); + if (!Values.empty()) + { + Formatted += " - "; + Formatted += Values; + } + + uint32_t NameId = InternName(StringView(Formatted.data(), Formatted.size())); + m_MetadataNameIds[MetadataId] = NameId; + return NameId; + } + + static inline const std::string kUnknownName{"???"}; + + uint64 m_Freq = 0; + uint64 m_Base = 0; + uint64 m_UsDiv = 1; + ReciprocalU64 m_UsDivRecip; + uint32_t m_MinBeginUs = ~0u; + uint32_t m_MaxEndUs = 0; + eastl::hash_map<uint32, uint32_t> m_Specs; + eastl::hash_map<String, uint32_t> m_NameIndex; + eastl::vector<std::string> m_ScopeNames; + eastl::map<uint32_t, ThreadData> m_Threads; + const MetadataRegistry* m_Metadata = nullptr; + eastl::hash_map<uint32_t, uint32_t> m_MetadataNameIds; +}; + +////////////////////////////////////////////////////////////////////////////// + +} // anonymous namespace + +std::string +zen::trace_detail::SafeFieldStr(FieldStr&& Field) +{ + try + { + std::string_view View = Field.as_view(); + // Some trace writers include the NUL terminator in the field length + // (see UE trace ToAnsiCheap / ThreadRegister). Strip any trailing NULs + // so downstream consumers don't see garbage. + while (!View.empty() && View.back() == '\0') + { + View.remove_suffix(1); + } + return std::string(View); + } + catch (const std::exception& E) + { + ZEN_DEBUG("Failed to decode trace string field: {}", E.what()); + return {}; + } +} + +namespace { + +// Derive a thread group name from a thread name by stripping a trailing +// integer suffix (optionally preceded by a separator). E.g. "IoPool Worker 3" +// -> "IoPool Worker", "DbWorker_12" -> "DbWorker", "HttpThread42" -> +// "HttpThread". Returns an empty string if no suffix is present or the +// resulting prefix would be empty. +static std::string +SynthesizeThreadGroupFromName(std::string_view Name) +{ + size_t I = Name.size(); + while (I > 0 && Name[I - 1] >= '0' && Name[I - 1] <= '9') + { + --I; + } + if (I == Name.size()) + { + return {}; // no trailing digits + } + if (I > 0) + { + char C = Name[I - 1]; + if (C == '_' || C == '-' || C == '.' || C == ':' || C == '#' || C == '/' || C == ' ' || C == '\t') + { + --I; + } + } + while (I > 0 && (Name[I - 1] == ' ' || Name[I - 1] == '\t')) + { + --I; + } + if (I == 0) + { + return {}; // pure-numeric name + } + return std::string(Name.substr(0, I)); +} + +class SessionAnalyzer : public Analyzer +{ +public: + zen::trace_detail::SessionInfo Session; + eastl::map<uint32_t, zen::trace_detail::ThreadInfoEntry> ThreadNames; + eastl::map<uint32_t, zen::trace_detail::ChannelInfo> Channels; + + void subscribe(Vector<Subscription>& Subs) override + { + Subs.emplace_back(this, &SessionAnalyzer::OnSession); + Subs.emplace_back(this, &SessionAnalyzer::OnThreadGroupBegin); + Subs.emplace_back(this, &SessionAnalyzer::OnThreadGroupEnd); + Subs.emplace_back(this, &SessionAnalyzer::OnThreadInfo); + Subs.emplace_back(this, &SessionAnalyzer::OnChannelAnnounce); + Subs.emplace_back(this, &SessionAnalyzer::OnChannelToggle); + } + +private: + eastl::vector<String> m_GroupStack; + + void OnSession(const Diagnostics_Session2& Ev) + { + Session.Platform = SafeFieldStr(Ev.Platform()); + Session.AppName = SafeFieldStr(Ev.AppName()); + Session.ProjectName = SafeFieldStr(Ev.ProjectName()); + Session.CommandLine = SafeFieldStr(Ev.CommandLine()); + Session.Branch = SafeFieldStr(Ev.Branch()); + Session.BuildVersion = SafeFieldStr(Ev.BuildVersion()); + Session.Changelist = Ev.Changelist(); + Session.ConfigurationType = Ev.ConfigurationType(); + Session.HasSession = true; + } + + void OnThreadGroupBegin(const $Trace_ThreadGroupBegin& Ev) { m_GroupStack.push_back(SafeFieldStr(Ev.Name())); } + + void OnThreadGroupEnd(const $Trace_ThreadGroupEnd&) + { + if (!m_GroupStack.empty()) + { + m_GroupStack.pop_back(); + } + } + + void OnThreadInfo(const $Trace_ThreadInfo& Ev) + { + uint32_t Tid = Ev.ThreadId(); + zen::trace_detail::ThreadInfoEntry& Info = ThreadNames[Tid]; + Info.ThreadId = Tid; + Info.Name = SafeFieldStr(Ev.Name()); + Info.GroupName = m_GroupStack.empty() ? "" : m_GroupStack.back(); + if (Info.GroupName.empty()) + { + Info.GroupName = SynthesizeThreadGroupFromName(Info.Name); + } + Info.SystemId = Ev.SystemId(); + Info.SortHint = Ev.SortHint(); + } + + void OnChannelAnnounce(const Trace_ChannelAnnounce& Ev) + { + uint32_t Id = Ev.Id(); + zen::trace_detail::ChannelInfo& Info = Channels[Id]; + Info.Name = SafeFieldStr(Ev.Name()); + Info.Enabled = Ev.IsEnabled() != 0; + Info.ReadOnly = Ev.ReadOnly() != 0; + } + + void OnChannelToggle(const Trace_ChannelToggle& Ev) + { + uint32_t Id = Ev.Id(); + auto It = Channels.find(Id); + if (It != Channels.end()) + { + It->second.Enabled = Ev.IsEnabled() != 0; + } + } +}; + +////////////////////////////////////////////////////////////////////////////// +// Module analyzer +// +// Captures Diagnostics.Module{Init,Load,Unload} so TraceModel::Modules has a +// populated list of loaded DLLs. These events are NoSync+Important so they +// don't carry a Cycle field (no load/unload timestamps available) but they +// do survive reconnects and the trim filter. The analyzer is intentionally +// passive -- we stash the raw data here and leave symbolication and memory +// attribution to whatever consumes TraceModel::Modules later. + +class ModuleAnalyzer : public Analyzer +{ +public: + eastl::map<uint64_t, zen::trace_detail::ModuleInfo> ModulesByBase; + std::string SymbolFormat; + uint8_t BaseShift = 0; + + void subscribe(Vector<Subscription>& Subs) override + { + Subs.emplace_back(this, &ModuleAnalyzer::OnModuleInit); + Subs.emplace_back(this, &ModuleAnalyzer::OnModuleLoad); + Subs.emplace_back(this, &ModuleAnalyzer::OnModuleUnload); + } + +private: + void OnModuleInit(const Diagnostics_ModuleInit& Ev) + { + SymbolFormat = SafeFieldStr(Ev.SymbolFormat()); + BaseShift = Ev.ModuleBaseShift(); + } + + void OnModuleLoad(const Diagnostics_ModuleLoad& Ev) + { + // Older traces stored Base as a 32-bit value shifted right by + // ModuleBaseShift to fit. Modern traces set BaseShift to zero and + // Base is a full 64-bit address; applying the shift then is a + // harmless no-op. + uint64_t Base = uint64_t(Ev.Base()) << BaseShift; + + zen::trace_detail::ModuleInfo& Info = ModulesByBase[Base]; + Info.FullPath = SafeFieldStr(Ev.Name()); + Info.Base = Base; + Info.Size = Ev.Size(); + Info.Unloaded = false; + + // Extract the basename without pulling in the whole filesystem + // library for a single operation. UE emits forward- or backslashes + // depending on platform, so handle both. + const std::string& Path = Info.FullPath; + size_t Cut = Path.find_last_of("/\\"); + Info.Name = (Cut == std::string::npos) ? Path : Path.substr(Cut + 1); + + ::Array<uint8[]> ImageId = Ev.ImageId(); + const uint8* IdPtr = ImageId.get(); + const uint32 IdSize = ImageId.get_size(); + Info.ImageId.assign(IdPtr, IdPtr + IdSize); + } + + void OnModuleUnload(const Diagnostics_ModuleUnload& Ev) + { + uint64_t Base = uint64_t(Ev.Base()) << BaseShift; + auto It = ModulesByBase.find(Base); + if (It != ModulesByBase.end()) + { + It->second.Unloaded = true; + } + } +}; + +////////////////////////////////////////////////////////////////////////////// +// Trim analyzer +// +// Decodes CpuProfiler batch events to extract per-batch timestamp ranges AND +// to track open/close scope bracketing per thread. The scope tracker lets us +// identify "must-keep" packets: any packet containing the Leave event for a +// scope whose Enter was at or before the user's trim EndUs. Preserving those +// Leaves is what lets the downstream TimelineAnalyzer (and Unreal Insights) +// still render long-running scopes that span the window end -- if we dropped +// their closing event the scope would sit unmatched on the open-scope stack +// and not render at all. +// +// Attribution to raw packet indices is approximate due to Tourist's internal +// per-thread packet buffering; the trim driver processes the trace one packet +// at a time (bundle of size 1) to keep it as tight as possible. Packets that +// never get an attributed time range are conservatively retained by the +// caller. + +class TrimAnalyzer : public Analyzer +{ +public: + // Maps packet index (matching both Tourist's Packet::get_index() and our + // raw walker's vector position) -> (MinUs, MaxUs) of all events attributed + // to that packet. + struct Range + { + uint32_t MinUs = ~0u; + uint32_t MaxUs = 0; + }; + + eastl::hash_map<uint32_t, Range> PacketRanges; + + // Maps thread id -> the maximum packet index that contains a Leave event + // for a scope whose matching Enter was at or before EndUs. These packets + // must be retained so the downstream analyzer can close the scope. + eastl::hash_map<uint32_t, uint32_t> MustKeepPacketByThread; + + // Set by the trim driver from TraceTrimArgs::EndSec before the analysis + // pass begins. Used by the scope tracker to decide which leaves are + // "must keep". + uint32_t EndUs = ~0u; + + // Updated by the trim driver before each Proto.read call when the next + // packet is on a normal thread. Maps normal-thread id -> the most + // recently scattered packet's index. + eastl::hash_map<uint32_t, uint32_t> LastPacketIndexByThread; + + void subscribe(Vector<Subscription>& Subs) override + { + Subs.emplace_back(this, &TrimAnalyzer::OnNewTrace); + Subs.emplace_back(this, &TrimAnalyzer::OnCpuBatch); + Subs.emplace_back(this, &TrimAnalyzer::OnCpuBatchV2); + Subs.emplace_back(this, &TrimAnalyzer::OnCpuBatchV3); + } + + bool HasTimeBase() const { return m_Freq != 0; } + +private: + void OnNewTrace(const $Trace_NewTrace& NewTrace) + { + m_Freq = NewTrace.CycleFrequency(); + m_Base = NewTrace.StartCycle(); + m_UsDiv = (m_Freq > 0) ? (m_Freq / 1'000'000) : 1; + if (m_UsDiv == 0) + { + m_UsDiv = 1; + } + m_UsDivRecip = ReciprocalU64(m_UsDiv); + } + + void OnCpuBatch(const CpuProfiler_EventBatch& Batch) { AbsorbBatchTimes(/*Version=*/1, Batch.get_thread_id(), Batch.Data()); } + + void OnCpuBatchV2(const CpuProfiler_EventBatchV2& Batch) { AbsorbBatchTimes(/*Version=*/2, Batch.get_thread_id(), Batch.Data()); } + + void OnCpuBatchV3(const CpuProfiler_EventBatchV3& Batch) { AbsorbBatchTimes(/*Version=*/3, Batch.get_thread_id(), Batch.Data()); } + + // Decodes cycle deltas in a CpuProfiler batch to find the timestamp range + // AND to maintain a per-thread open-scope stack. Mirrors the wire format + // documented in CpuAnalyzer::AbsorbBatch. Scope ids are decoded just far + // enough to keep the varint cursor in sync; we don't store them. + void AbsorbBatchTimes(uint32 Version, uint32 ThreadId, const Array<uint8[]>& Data) + { + if (m_Freq == 0) + { + return; + } + + auto It = LastPacketIndexByThread.find(ThreadId); + if (It == LastPacketIndexByThread.end()) + { + return; + } + const uint32_t PacketIndex = It->second; + + // The open-scope stack is maintained across every batch on a thread. + // Each entry stores the Enter time in microseconds from trace start. + eastl::vector<uint32_t>& OpenStack = m_OpenScopes[ThreadId]; + + const uint8* Cursor = Data.get(); + const uint8* End = Cursor + Data.get_size(); + + auto Decode = [&]() { + uint64 Value = 0; + for (uint32 I = 1, J = 0; I; J += 7) + { + I = *Cursor++; + Value |= uint64(I & 0x7f) << J; + I &= 0x80; + } + return Value; + }; + + const uint32 CycleShift = (Version == 1) ? 1u : 2u; + const uint64 Base = m_Base; + + uint32_t BatchMinUs = ~0u; + uint32_t BatchMaxUs = 0; + bool HasAny = false; + + uint64 Cycle = ~Base + 1; + while (Cursor < End) + { + uint64 Value = Decode(); + uint32 IsEnter = (Value & 0b01); + + if (Version > 1 && (Value & 0b10)) + { + // Coroutine event -- consume the trailing varints and skip. + // These don't participate in the scope bracket tracking; the + // existing TimelineAnalyzer ignores them for the same reason. + if (IsEnter) + { + (void)Decode(); // CoroutineId + (void)Decode(); // TimerScopeDepth + } + else + { + (void)Decode(); // TimerScopeDepth + } + continue; + } + + if (IsEnter) + { + (void)Decode(); // EventId / SpecId + } + + Cycle += (Value >> CycleShift); + uint32_t TimeUs = m_UsDivRecip.Divide(Cycle + (m_UsDiv >> 1)); + + if (!HasAny || TimeUs < BatchMinUs) + { + BatchMinUs = TimeUs; + } + if (!HasAny || TimeUs > BatchMaxUs) + { + BatchMaxUs = TimeUs; + } + HasAny = true; + + if (IsEnter) + { + OpenStack.push_back(TimeUs); + } + else if (!OpenStack.empty()) + { + uint32_t EnterTimeUs = OpenStack.back(); + OpenStack.pop_back(); + + // If the scope started at or before the window end, we need + // its closing Leave event to survive so the downstream + // analyzer can render it. Mark the current packet (the one + // holding this Leave) as must-keep for the thread. + if (EnterTimeUs <= EndUs) + { + uint32_t& MustKeep = MustKeepPacketByThread[ThreadId]; + if (PacketIndex > MustKeep) + { + MustKeep = PacketIndex; + } + } + } + } + + if (!HasAny) + { + return; + } + + Range& R = PacketRanges[PacketIndex]; + R.MinUs = std::min(R.MinUs, BatchMinUs); + R.MaxUs = std::max(R.MaxUs, BatchMaxUs); + } + + uint64 m_Freq = 0; + uint64 m_Base = 0; + uint64 m_UsDiv = 1; + ReciprocalU64 m_UsDivRecip; + + // Per-thread open scope stack, carrying the Enter times in microseconds + // from trace start. Entries are pushed on Enter and popped on Leave; the + // stack may contain unclosed entries when decoding ends (scopes that + // outlive the captured trace). + eastl::hash_map<uint32_t, eastl::vector<uint32_t>> m_OpenScopes; +}; + +////////////////////////////////////////////////////////////////////////////// +// Common trace iteration + +struct TraceSummary +{ + eastl::map<uint32_t, std::pair<std::string, uint64_t>> TypeInfo; + eastl::set<uint16_t> Threads; + uint64_t TotalEvents = 0; +}; + +template<typename ParcelCallback> +static TraceSummary +IterateTrace(::DataSource& Source, ParcelCallback OnParcel, const zen::trace_detail::ProgressCallback& OnProgress = {}) +{ + TraceSummary Summary; + + try + { + uint64_t TotalFileBytes = uint64_t(std::max(Source.get_size(), int64(0))); + + ::Allocator TraceAllocator; + ::Preamble Pream(Source, TraceAllocator); + ::Transport Xport = Pream.get_transport(); + ::Protocol Proto = Pream.get_protocol(); + + ::Packet Packets[128]; + ::EventParcel Parcel; + + while (::Bundle Bndl = Xport.read_packets(Packets)) + { + Parcel.reset(); + Proto.read(Parcel, Bndl); + + OnParcel(Parcel); + + for (const ::Type* TraceType : Parcel.new_types) + { + auto [LoggerName, EventName] = TraceType->get_name(); + std::string TypeName = fmt::format("{}.{}", std::string_view(LoggerName), std::string_view(EventName)); + Summary.TypeInfo[TraceType->get_uid()] = {std::move(TypeName), 0}; + } + + for (const ::Event& Ev : Parcel.events) + { + Summary.TotalEvents++; + Summary.Threads.insert(Ev.thread_id); + + auto It = Summary.TypeInfo.find(Ev.uid); + if (It != Summary.TypeInfo.end()) + { + It->second.second++; + } + } + + if (OnProgress) + { + OnProgress(Xport.tell(), TotalFileBytes, Summary.TotalEvents); + } + } + } + catch (const DataStream::Eof&) + { + } + catch (const Exception::StreamError& E) + { + throw std::runtime_error(fmt::format("Trace stream error at position {}: {} (value: {})", E.position, E.message, E.value)); + } + + return Summary; +} + +// Print session metadata +static void +PrintSessionInfo(const SessionAnalyzer& SessionAn) +{ + const zen::trace_detail::SessionInfo& Sess = SessionAn.Session; + if (!Sess.HasSession) + { + return; + } + + ZEN_CONSOLE("Platform: {}", Sess.Platform); + ZEN_CONSOLE("App: {}", Sess.AppName); + if (!Sess.ProjectName.empty()) + { + ZEN_CONSOLE("Project: {}", Sess.ProjectName); + } + if (!Sess.Branch.empty()) + { + ZEN_CONSOLE("Branch: {}", Sess.Branch); + } + if (!Sess.BuildVersion.empty()) + { + ZEN_CONSOLE("Build: {}", Sess.BuildVersion); + } + if (Sess.Changelist) + { + ZEN_CONSOLE("Changelist: {}", Sess.Changelist); + } + if (!Sess.CommandLine.empty()) + { + ZEN_CONSOLE("CommandLine: {}", Sess.CommandLine); + } + ZEN_CONSOLE(""); +} + +// Print thread names +static void +PrintThreadInfo(const SessionAnalyzer& SessionAn) +{ + if (SessionAn.ThreadNames.empty()) + { + return; + } + + eastl::vector<std::pair<uint32_t, const zen::trace_detail::ThreadInfoEntry*>> ThreadsSorted; + for (const auto& [Tid, Info] : SessionAn.ThreadNames) + { + ThreadsSorted.emplace_back(Tid, &Info); + } + eastl::sort(ThreadsSorted.begin(), ThreadsSorted.end(), [](const auto& A, const auto& B) { + return A.second->SortHint < B.second->SortHint; + }); + + ZEN_CONSOLE(""); + ZEN_CONSOLE("Threads:"); + ZEN_CONSOLE(""); + ZEN_CONSOLE("{:>6} {:>10} {}", "TID", "SystemID", "Name"); + ZEN_CONSOLE("{:-<{}}", "", 6 + 10 + 40 + 4); + for (const auto& [Tid, Info] : ThreadsSorted) + { + ZEN_CONSOLE("{:>6} {:>10} {}", Tid, Info->SystemId, Info->Name); + } +} + +// Print trace channel info +static void +PrintChannelInfo(const SessionAnalyzer& SessionAn) +{ + if (SessionAn.Channels.empty()) + { + return; + } + + eastl::vector<const zen::trace_detail::ChannelInfo*> ChannelsSorted; + for (const auto& [Id, Info] : SessionAn.Channels) + { + ChannelsSorted.push_back(&Info); + } + eastl::sort(ChannelsSorted.begin(), ChannelsSorted.end(), [](const auto* A, const auto* B) { return A->Name < B->Name; }); + + ZEN_CONSOLE(""); + ZEN_CONSOLE("Trace Channels:"); + ZEN_CONSOLE(""); + for (const zen::trace_detail::ChannelInfo* Ch : ChannelsSorted) + { + std::string_view State = Ch->Enabled ? "enabled" : "disabled"; + if (Ch->ReadOnly) + { + ZEN_CONSOLE(" {} ({}, read-only)", Ch->Name, State); + } + else + { + ZEN_CONSOLE(" {} ({})", Ch->Name, State); + } + } +} + +} // namespace + +////////////////////////////////////////////////////////////////////////////// + +namespace zen::trace_detail { + +std::filesystem::path +ResolveTraceFile(const std::filesystem::path& Input, cxxopts::Options& HelpOptions) +{ + if (Input.empty()) + { + throw zen::OptionParseException("File path is required", HelpOptions.help()); + } + + std::filesystem::path FilePath = std::filesystem::absolute(Input); + if (!std::filesystem::exists(FilePath)) + { + throw std::runtime_error(fmt::format("File not found: {}", FilePath)); + } + + return FilePath; +} + +void +RunInspect(const std::filesystem::path& FilePath) +{ + ::DataSource Source(FilePath); + + SessionAnalyzer SessionAn; + ::Dispatcher Dispatch; + Dispatch.add_analyzer(SessionAn); + + // Collect type schemas + struct TypeSchema + { + std::string FullName; + uint32_t Uid = 0; + uint32_t FieldCount = 0; + uint32_t Flags = 0; + uint64_t EventCount = 0; + eastl::vector<std::string> FieldNames; + eastl::vector<uint32_t> FieldSizes; + eastl::vector<uint32_t> FieldTypeInfos; + }; + + eastl::map<uint32_t, TypeSchema> Schemas; + + TraceSummary Summary = IterateTrace(Source, [&](const ::EventParcel& Parcel) { + Dispatch.on_parcel(Parcel); + + for (const ::Type* TraceType : Parcel.new_types) + { + auto [LoggerName, EventName] = TraceType->get_name(); + uint32_t Uid = TraceType->get_uid(); + + TypeSchema& Schema = Schemas[Uid]; + Schema.FullName = fmt::format("{}.{}", std::string_view(LoggerName), std::string_view(EventName)); + Schema.Uid = Uid; + Schema.FieldCount = TraceType->get_field_count(); + Schema.Flags = 0; + if (TraceType->has_flag(TYPE_FLAG_IMPORTANT)) + { + Schema.Flags |= TYPE_FLAG_IMPORTANT; + } + if (TraceType->has_flag(TYPE_FLAG_AUX)) + { + Schema.Flags |= TYPE_FLAG_AUX; + } + + for (uint32_t I = 0; I < Schema.FieldCount; I++) + { + auto [FieldName, Field] = TraceType->get_field_info(I); + Schema.FieldNames.emplace_back(FieldName); + Schema.FieldSizes.push_back(Field.get_size()); + Schema.FieldTypeInfos.push_back(Field.get_type_info()); + } + } + + for (const ::Event& Ev : Parcel.events) + { + auto It = Schemas.find(Ev.uid); + if (It != Schemas.end()) + { + It->second.EventCount++; + } + } + }); + + // -- Session info -- + PrintSessionInfo(SessionAn); + + ZEN_CONSOLE("Trace: {}", FilePath); + ZEN_CONSOLE("Size: {}", zen::NiceBytes(uint64_t(std::filesystem::file_size(FilePath)))); + ZEN_CONSOLE("Events: {}", zen::ThousandsNum(Summary.TotalEvents)); + ZEN_CONSOLE("Threads: {}", Summary.Threads.size()); + ZEN_CONSOLE("Types: {}", Schemas.size()); + + // -- Thread names -- + PrintThreadInfo(SessionAn); + + // -- Trace channels -- + PrintChannelInfo(SessionAn); + + // -- Event schemas -- + ZEN_CONSOLE(""); + ZEN_CONSOLE("Event Schemas:"); + ZEN_CONSOLE(""); + + eastl::vector<const TypeSchema*> SortedSchemas; + SortedSchemas.reserve(Schemas.size()); + for (const auto& [Uid, Schema] : Schemas) + { + SortedSchemas.push_back(&Schema); + } + eastl::sort(SortedSchemas.begin(), SortedSchemas.end(), [](const auto* A, const auto* B) { return A->FullName < B->FullName; }); + + auto FieldTypeStr = [](uint32_t TypeInfo, uint32_t Size) -> std::string_view { + uint32_t Cat = TypeInfo & TYPE_INFO_CAT_MASK; + if (Cat == TYPE_INFO_CAT_ARRAY) + { + return "array"; + } + if (Cat == TYPE_INFO_CAT_FLOAT) + { + return (Size == 8) ? "float64" : "float32"; + } + bool IsSigned = (TypeInfo & TYPE_INFO_SPECIAL_MASK) == TYPE_INFO_SPECIAL_SIGNED; + switch (Size) + { + case 1: + return IsSigned ? "int8" : "uint8"; + case 2: + return IsSigned ? "int16" : "uint16"; + case 4: + return IsSigned ? "int32" : "uint32"; + case 8: + return IsSigned ? "int64" : "uint64"; + default: + return "unknown"; + } + }; + + for (const TypeSchema* Schema : SortedSchemas) + { + std::string Flags; + if (Schema->Flags & TYPE_FLAG_IMPORTANT) + { + Flags += " [important]"; + } + if (Schema->Flags & TYPE_FLAG_AUX) + { + Flags += " [aux]"; + } + + ZEN_CONSOLE("{} (uid={}, events={}){}", Schema->FullName, Schema->Uid, zen::ThousandsNum(Schema->EventCount), Flags); + + for (uint32_t I = 0; I < Schema->FieldCount; I++) + { + ZEN_CONSOLE(" {} {}", FieldTypeStr(Schema->FieldTypeInfos[I], Schema->FieldSizes[I]), Schema->FieldNames[I]); + } + + if (Schema->FieldCount > 0) + { + ZEN_CONSOLE(""); + } + } +} + +// Build a single LOD level by merging Lod0 scopes below the given resolution. +// Lod0 must already be sorted by BeginUs. Safe to call concurrently for +// different (Level, Resolution) pairs sharing the same Lod0. +static void +BuildSingleLod(const eastl::vector<TimelineScope>& Lod0, TimelineDetailLevel& Level, uint32_t Resolution) +{ + Level.ResolutionUs = Resolution; + + // Per-depth merge accumulators. Since depths are typically small (< 64), + // a flat array indexed by depth is more cache-friendly than a hash map. + struct PendingMerge + { + uint32_t BeginUs = 0; + uint32_t EndUs = 0; + uint32_t NameId = 0; + uint32_t MaxChildDur = 0; + uint16_t Depth = 0; + uint16_t Count = 0; + bool Active = false; + }; + + eastl::vector<PendingMerge> Pending(64); // grows if needed + + auto FlushPending = [&Level](PendingMerge& P) { + if (!P.Active) + { + return; + } + Level.Scopes.push_back(TimelineScope{ + .BeginUs = P.BeginUs, + .DurationUs = P.EndUs - P.BeginUs, + .NameId = P.NameId, + .Depth = P.Depth, + .MergeCount = P.Count, + }); + P.Active = false; + }; + + // Single O(n) sweep over LOD 0 scopes (sorted by BeginUs). For each + // depth, merge adjacent small scopes that fall within one resolution + // bucket of each other. Large scopes (>= Resolution) pass through. + for (const TimelineScope& Scope : Lod0) + { + uint16_t Depth = Scope.Depth; + if (Depth >= Pending.size()) + { + Pending.resize(Depth + 1); + } + + if (Scope.DurationUs >= Resolution) + { + // Large scope -- flush any pending merge for this depth, + // then emit the scope un-merged. + FlushPending(Pending[Depth]); + Level.Scopes.push_back(TimelineScope{ + .BeginUs = Scope.BeginUs, + .DurationUs = Scope.DurationUs, + .NameId = Scope.NameId, + .Depth = Scope.Depth, + .MergeCount = 1, + }); + continue; + } + + PendingMerge& P = Pending[Depth]; + uint32_t EndUs = Scope.BeginUs + Scope.DurationUs; + + if (P.Active && Scope.BeginUs < P.EndUs + Resolution) + { + // Extend the pending merge. + if (EndUs > P.EndUs) + { + P.EndUs = EndUs; + } + ++P.Count; + if (Scope.DurationUs > P.MaxChildDur) + { + P.MaxChildDur = Scope.DurationUs; + P.NameId = Scope.NameId; + } + } + else + { + // Start a new pending merge (flush previous if any). + FlushPending(P); + P.BeginUs = Scope.BeginUs; + P.EndUs = EndUs; + P.NameId = Scope.NameId; + P.MaxChildDur = Scope.DurationUs; + P.Depth = Scope.Depth; + P.Count = 1; + P.Active = true; + } + } + + // Flush remaining per-depth accumulators. + for (PendingMerge& P : Pending) + { + FlushPending(P); + } + + // Sort by (BeginUs, Depth) -- the per-depth flush may have interleaved + // entries from different depths. Tie-breaking on depth keeps the + // ordering consistent with LOD 0 (parents before nested children) so + // the front-end never sees a child rendered before its parent. + eastl::sort(Level.Scopes.begin(), Level.Scopes.end(), [](const TimelineScope& A, const TimelineScope& B) { + if (A.BeginUs != B.BeginUs) + { + return A.BeginUs < B.BeginUs; + } + return A.Depth < B.Depth; + }); +} + +void +BuildTimelineLods(ThreadTimeline& Timeline) +{ + if (Timeline.Scopes.empty()) + { + return; + } + + for (size_t LodIdx = 0; LodIdx < kTimelineLodCount; ++LodIdx) + { + BuildSingleLod(Timeline.Scopes, Timeline.DetailLevels[LodIdx], kTimelineLodResolutions[LodIdx]); + } +} + +namespace { + + // Post-iteration phases, extracted from BuildTraceModel for clarity. Each one + // runs after the event-iteration pass has populated the analyzers and mutates + // only the pieces of TraceModel it owns. + + void ComputeScopeStats(const TimelineAnalyzer& TimelineAn, TraceModel& Model) + { + const eastl::vector<std::string>& ScopeNames = TimelineAn.ScopeNames(); + eastl::vector<Distribution> Dists(ScopeNames.size()); + eastl::vector<uint32_t> Mins(ScopeNames.size(), ~0u); + eastl::vector<uint32_t> Maxs(ScopeNames.size(), 0u); + + for (const auto& [Tid, Thread] : TimelineAn.Threads()) + { + for (const TimelineScope& Scope : Thread.Scopes) + { + if (Scope.NameId >= Dists.size()) + { + continue; + } + Dists[Scope.NameId].add(double(Scope.DurationUs)); + Mins[Scope.NameId] = std::min(Mins[Scope.NameId], Scope.DurationUs); + Maxs[Scope.NameId] = std::max(Maxs[Scope.NameId], Scope.DurationUs); + } + } + + Model.ScopeStats.reserve(ScopeNames.size()); + for (size_t I = 0; I < ScopeNames.size(); ++I) + { + if (Dists[I].Count() == 0) + { + continue; + } + CpuScopeStat Entry; + Entry.Name = ScopeNames[I]; + Entry.Count = Dists[I].Count(); + Entry.MinUs = Mins[I]; + Entry.MaxUs = Maxs[I]; + Entry.MeanUs = Dists[I].Mean(); + Entry.StdDevUs = Dists[I].StdDev(); + Model.ScopeStats.push_back(std::move(Entry)); + } + eastl::sort(Model.ScopeStats.begin(), Model.ScopeStats.end(), [](const CpuScopeStat& A, const CpuScopeStat& B) { + return A.Count > B.Count; + }); + } + + // Translate each LogEntry's captured CategoryIndex (a sequential id keyed on + // the source category pointer) into the flat LogCategories index the frontend + // consumes. Entries whose category pointer never got a matching LogCategory + // event are bucketed into a synthetic "(unknown)" category. + void ResolveLogCategories(LogAnalyzer& LogAn, TraceModel& Model) + { + const eastl::hash_map<uint64_t, uint32_t>& CategoryPtrToSeqIdx = LogAn.CategoryPointerIndex(); + + eastl::hash_map<uint64_t, uint32_t> RealPtrToFlatIdx; + Model.LogCategories = LogAn.BuildCategories(RealPtrToFlatIdx); + + const uint32_t UnknownIdx = uint32_t(Model.LogCategories.size()); + Model.LogCategories.push_back(LogCategoryInfo{.Name = "(unknown)", .DefaultVerbosity = 0}); + + eastl::vector<uint32_t> SeqToFlat(CategoryPtrToSeqIdx.size(), UnknownIdx); + for (const auto& [Ptr, SeqIdx] : CategoryPtrToSeqIdx) + { + auto It = RealPtrToFlatIdx.find(Ptr); + if (It != RealPtrToFlatIdx.end()) + { + SeqToFlat[SeqIdx] = It->second; + } + } + + Model.LogEntries = LogAn.MutableEntries(); + for (LogEntry& E : Model.LogEntries) + { + E.CategoryIndex = (E.CategoryIndex < SeqToFlat.size()) ? SeqToFlat[E.CategoryIndex] : UnknownIdx; + } + + eastl::sort(Model.LogEntries.begin(), Model.LogEntries.end(), [](const LogEntry& A, const LogEntry& B) { + return A.TimeUs < B.TimeUs; + }); + } + + // Finalize any still-open regions, group by category, and greedily pack each + // category's regions into non-overlapping lanes so the frontend can stack them + // without re-running collision detection. + void BuildRegionCategories(eastl::vector<RegionEntry>&& AllRegions, uint32_t TraceEndUs, TraceModel& Model) + { + for (RegionEntry& R : AllRegions) + { + if (R.EndUs == ~uint32_t(0)) + { + R.EndUs = TraceEndUs; + } + if (R.EndUs < R.BeginUs) + { + R.EndUs = R.BeginUs; + } + } + + eastl::map<std::string, eastl::vector<RegionEntry>> ByCategory; + for (RegionEntry& R : AllRegions) + { + ByCategory[R.Category].push_back(std::move(R)); + } + + for (auto& [CatName, Regions] : ByCategory) + { + eastl::sort(Regions.begin(), Regions.end(), [](const RegionEntry& A, const RegionEntry& B) { + if (A.BeginUs != B.BeginUs) + { + return A.BeginUs < B.BeginUs; + } + return A.EndUs < B.EndUs; + }); + + eastl::vector<uint32_t> LaneEndUs; + uint32_t MaxLane = 0; + for (RegionEntry& R : Regions) + { + uint16_t Depth = 0; + bool Assigned = false; + for (size_t I = 0; I < LaneEndUs.size(); ++I) + { + if (LaneEndUs[I] <= R.BeginUs) + { + Depth = uint16_t(I); + LaneEndUs[I] = R.EndUs; + Assigned = true; + break; + } + } + if (!Assigned) + { + Depth = uint16_t(LaneEndUs.size()); + LaneEndUs.push_back(R.EndUs); + } + R.Depth = Depth; + if (Depth + 1u > MaxLane) + { + MaxLane = Depth + 1u; + } + } + + RegionCategory Cat; + Cat.Name = CatName; + Cat.LaneCount = MaxLane; + Cat.Regions = std::move(Regions); + Model.RegionCategories.push_back(std::move(Cat)); + } + + // Sort: uncategorized (empty name) first, then alphabetical. + eastl::sort(Model.RegionCategories.begin(), Model.RegionCategories.end(), [](const RegionCategory& A, const RegionCategory& B) { + if (A.Name.empty() != B.Name.empty()) + { + return A.Name.empty(); + } + return A.Name < B.Name; + }); + } + + // Map callstack frame addresses to (module, offset) pairs using a sorted + // (Base, End) lookup over the already-populated Model.Modules. + void ResolveCallstacks(const ModuleAnalyzer& ModuleAn, + const CallstackAnalyzer& CallstackAn, + AllocationAnalyzer& AllocAn, + TraceModel& Model) + { + const auto& RawCallstacks = CallstackAn.RawCallstacks(); + + struct ModuleLookup + { + uint64_t Base; + uint64_t End; + uint32_t ModelIndex; + }; + eastl::vector<ModuleLookup> Lookup; + Lookup.reserve(ModuleAn.ModulesByBase.size()); + for (const auto& [Base, Info] : ModuleAn.ModulesByBase) + { + for (uint32_t I = 0; I < Model.Modules.size(); ++I) + { + if (Model.Modules[I].Base == Base) + { + Lookup.push_back({Base, Base + Info.Size, I}); + break; + } + } + } + eastl::sort(Lookup.begin(), Lookup.end(), [](const ModuleLookup& A, const ModuleLookup& B) { return A.Base < B.Base; }); + + auto ResolveFrame = [&Lookup](uint64_t Address) -> ResolvedFrame { + ResolvedFrame F; + F.Address = Address; + auto It = eastl::upper_bound(Lookup.begin(), Lookup.end(), Address, [](uint64_t Addr, const ModuleLookup& M) { + return Addr < M.Base; + }); + if (It != Lookup.begin()) + { + --It; + if (Address < It->End) + { + F.ModuleIndex = It->ModelIndex; + F.Offset = Address - It->Base; + } + } + return F; + }; + + eastl::vector<uint32_t> SortedCallstackIds; + SortedCallstackIds.reserve(RawCallstacks.size()); + for (const auto& [Id, RawFrames] : RawCallstacks) + { + ZEN_UNUSED(RawFrames); + SortedCallstackIds.push_back(Id); + } + eastl::sort(SortedCallstackIds.begin(), SortedCallstackIds.end()); + + Model.Callstacks.reserve(RawCallstacks.size()); + for (uint32_t Id : SortedCallstackIds) + { + auto RawIt = RawCallstacks.find(Id); + ZEN_ASSERT(RawIt != RawCallstacks.end()); + const eastl::vector<uint64_t>& RawFrames = RawIt->second; + + CallstackEntry Entry; + Entry.Id = Id; + Entry.Frames.reserve(RawFrames.size()); + for (uint64_t Addr : RawFrames) + { + Entry.Frames.push_back(ResolveFrame(Addr)); + } + Model.Callstacks.push_back(std::move(Entry)); + } + + Model.CallstackStats = AllocAn.BuildCallstackStats(); + Model.ChurnStats = AllocAn.BuildChurnStats(~uint64_t(0)); + Model.AllocSizeHistogram = AllocAn.BuildSizeHistogram(); + } + +} // namespace + +TraceModel +BuildTraceModel(const std::filesystem::path& FilePath, WorkerThreadPool& ThreadPool, const ProgressCallback& OnProgress) +{ + ::DataSource Source(FilePath); + + TraceTiming Timing; + + SessionAnalyzer SessionAn; + ModuleAnalyzer ModuleAn; + MetadataRegistry MetadataReg; + TimelineAnalyzer TimelineAn(&MetadataReg, &Timing); + LogAnalyzer LogAn(&Timing); + BookmarksAnalyzer BookmarkAn(&Timing); + CsvProfilerAnalyzer CsvAn(&Timing); + AllocationAnalyzer AllocAn(&Timing); + CallstackAnalyzer CallstackAn; + + // Tourist's Dispatcher only supports one subscription per event type, so we + // cannot run CpuAnalyzer alongside TimelineAnalyzer -- CpuAnalyzer would + // claim the CpuProfiler.Event* types first and TimelineAnalyzer would + // never receive any events. Instead, TimelineAnalyzer captures every + // scope interval and we derive the aggregate statistics from those + // intervals in a cheap post-pass below. + ::Dispatcher Dispatch; + Dispatch.add_analyzer(SessionAn); + Dispatch.add_analyzer(ModuleAn); + Dispatch.add_analyzer(MetadataReg); + Dispatch.add_analyzer(TimelineAn); + Dispatch.add_analyzer(LogAn); + Dispatch.add_analyzer(BookmarkAn); + Dispatch.add_analyzer(CsvAn); + Dispatch.add_analyzer(AllocAn); + Dispatch.add_analyzer(CallstackAn); + + zen::Stopwatch Timer; + TraceSummary Summary = IterateTrace( + Source, + [&](const ::EventParcel& Parcel) { Dispatch.on_parcel(Parcel); }, + OnProgress); + ZEN_INFO("Trace iteration complete: {} events in {}", + zen::ThousandsNum(Summary.TotalEvents), + zen::NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + + { + uint32_t StartUs = (TimelineAn.MinBeginUs() == ~0u) ? 0u : TimelineAn.MinBeginUs(); + uint32_t EndUs = TimelineAn.MaxEndUs(); + uint64_t DurationMs = (EndUs > StartUs) ? (uint64_t(EndUs - StartUs) + 500) / 1000 : 0; + ZEN_INFO("Trace duration: {}", zen::NiceTimeSpanMs(DurationMs)); + } + + TraceModel Model; + Model.FilePath = FilePath; + Model.FileSize = uint64_t(std::filesystem::file_size(FilePath)); + Model.TotalEvents = Summary.TotalEvents; + Model.ParseTimeMs = Timer.GetElapsedTimeMs(); + Model.Session = SessionAn.Session; + + // Event type counts (sorted by count descending) + Model.EventTypeCounts.reserve(Summary.TypeInfo.size()); + for (auto& [Uid, Info] : Summary.TypeInfo) + { + Model.EventTypeCounts.push_back({std::move(Info.first), Info.second}); + } + eastl::sort(Model.EventTypeCounts.begin(), Model.EventTypeCounts.end(), [](const auto& A, const auto& B) { return A.Count > B.Count; }); + + // Flatten and sort threads by sort hint + Model.Threads.reserve(SessionAn.ThreadNames.size()); + for (const auto& [Tid, Info] : SessionAn.ThreadNames) + { + Model.Threads.push_back(Info); + } + eastl::sort(Model.Threads.begin(), Model.Threads.end(), [](const ThreadInfoEntry& A, const ThreadInfoEntry& B) { + return A.SortHint < B.SortHint; + }); + + // Flatten and sort channels by name + Model.Channels.reserve(SessionAn.Channels.size()); + for (const auto& [Id, Info] : SessionAn.Channels) + { + Model.Channels.push_back(Info); + } + eastl::sort(Model.Channels.begin(), Model.Channels.end(), [](const ChannelInfo& A, const ChannelInfo& B) { return A.Name < B.Name; }); + + { + ExtendableStringBuilder<512> Enabled; + for (const ChannelInfo& Ch : Model.Channels) + { + if (Ch.Enabled) + { + if (Enabled.Size() > 0) + { + Enabled.Append(", "); + } + Enabled.Append(Ch.Name); + } + } + if (Enabled.Size() > 0) + { + ZEN_INFO("Enabled channels: {}", Enabled); + } + } + + // Flatten and sort modules by name + Model.Modules.reserve(ModuleAn.ModulesByBase.size()); + for (const auto& [Base, Info] : ModuleAn.ModulesByBase) + { + Model.Modules.push_back(Info); + } + eastl::sort(Model.Modules.begin(), Model.Modules.end(), [](const ModuleInfo& A, const ModuleInfo& B) { return A.Name < B.Name; }); + + // CPU scope statistics and timeline building read from TimelineAn + // independently and write to separate Model fields, so overlap them. + Model.ScopeNames = TimelineAn.ScopeNames(); + + ZEN_INFO("Computing CPU scope statistics ({} scope names)", TimelineAn.ScopeNames().size()); + + // Kick off scope stats on a worker -- runs concurrently with the + // timeline copy + sort below. + Latch StatsLatch(1); + ThreadPool.ScheduleWork( + [&StatsLatch, &Model, &TimelineAn]() { + auto _ = MakeGuard([&StatsLatch]() { StatsLatch.CountDown(); }); + ComputeScopeStats(TimelineAn, Model); + }, + WorkerThreadPool::EMode::EnableBacklog); + + // Timelines -- build per-thread sort + LODs in parallel. + { + const auto& Threads = TimelineAn.Threads(); + size_t TotalScopes = 0; + for (const auto& [Tid, Thread] : Threads) + { + TotalScopes += Thread.Scopes.size(); + } + ZEN_INFO("Building timelines: {} threads, {} scopes (sort + LODs)", Threads.size(), zen::ThousandsNum(TotalScopes)); + Model.Timelines.resize(Threads.size()); + + // Populate timeline metadata on the main thread (cheap lookups). + size_t Idx = 0; + for (const auto& [Tid, Thread] : Threads) + { + ThreadTimeline& Timeline = Model.Timelines[Idx++]; + Timeline.ThreadId = Tid; + auto It = SessionAn.ThreadNames.find(Tid); + if (It != SessionAn.ThreadNames.end()) + { + Timeline.Name = It->second.Name; + Timeline.SortHint = It->second.SortHint; + } + Timeline.Scopes = Thread.Scopes; + } + + // Phase 1: Sort LOD 0 scopes per thread. + // ParallelSort fans out internally using the pool, so it must be + // called from the main thread to avoid nested fan-out deadlocks. + // Small timelines are dispatched to workers first (they just call + // eastl::sort -- no nesting). Then large ones are sorted one at a + // time from the main thread with full pool utilisation each. + // + // Tie-break on Depth so that scopes which start at the same micro + // timestamp come out parent-first (lower depth wins). This keeps + // the scope ordering well-defined and lets the front-end rely on + // outer scopes appearing before their nested children regardless + // of the order the analyzer happened to emit them. + { + auto Cmp = [](const TimelineScope& A, const TimelineScope& B) { + if (A.BeginUs != B.BeginUs) + { + return A.BeginUs < B.BeginUs; + } + return A.Depth < B.Depth; + }; + + if constexpr (kUseParallelSort) + { + constexpr size_t kParallelThreshold = 65536; + + // Dispatch small timelines to workers. + Latch SmallLatch(1); + for (size_t I = 0; I < Model.Timelines.size(); ++I) + { + if (Model.Timelines[I].Scopes.size() >= kParallelThreshold) + { + continue; + } + SmallLatch.AddCount(1); + ThreadPool.ScheduleWork( + [&SmallLatch, &Cmp, &Timeline = Model.Timelines[I]]() { + auto _ = MakeGuard([&SmallLatch]() { SmallLatch.CountDown(); }); + eastl::sort(Timeline.Scopes.begin(), Timeline.Scopes.end(), Cmp); + }, + WorkerThreadPool::EMode::EnableBacklog); + } + SmallLatch.CountDown(); + SmallLatch.Wait(); + + // Sort large timelines from the main thread so ParallelSort + // can fan out across the (now idle) pool without deadlocking. + for (ThreadTimeline& Timeline : Model.Timelines) + { + if (Timeline.Scopes.size() >= kParallelThreshold) + { + zen::ParallelSort(ThreadPool, Timeline.Scopes.begin(), Timeline.Scopes.end(), Cmp); + } + } + } + else + { + Latch SortLatch(1); + for (size_t I = 0; I < Model.Timelines.size(); ++I) + { + SortLatch.AddCount(1); + ThreadPool.ScheduleWork( + [&SortLatch, &Cmp, &Timeline = Model.Timelines[I]]() { + auto _ = MakeGuard([&SortLatch]() { SortLatch.CountDown(); }); + eastl::sort(Timeline.Scopes.begin(), Timeline.Scopes.end(), Cmp); + }, + WorkerThreadPool::EMode::EnableBacklog); + } + SortLatch.CountDown(); + SortLatch.Wait(); + } + } + + // Phase 2: Build LOD levels -- one task per (thread, LOD) pair. + // Flat dispatch avoids nested fan-out which could deadlock the pool. + Latch LodLatch(1); + for (size_t I = 0; I < Model.Timelines.size(); ++I) + { + if (Model.Timelines[I].Scopes.empty()) + { + continue; + } + for (size_t L = 0; L < kTimelineLodCount; ++L) + { + LodLatch.AddCount(1); + ThreadPool.ScheduleWork( + [&LodLatch, &Timeline = Model.Timelines[I], L]() { + auto _ = MakeGuard([&LodLatch]() { LodLatch.CountDown(); }); + BuildSingleLod(Timeline.Scopes, Timeline.DetailLevels[L], kTimelineLodResolutions[L]); + }, + WorkerThreadPool::EMode::EnableBacklog); + } + } + LodLatch.CountDown(); + LodLatch.Wait(); + } + eastl::sort(Model.Timelines.begin(), Model.Timelines.end(), [](const ThreadTimeline& A, const ThreadTimeline& B) { + return A.SortHint < B.SortHint; + }); + + Model.TraceStartUs = (TimelineAn.MinBeginUs() == ~0u) ? 0u : TimelineAn.MinBeginUs(); + Model.TraceEndUs = TimelineAn.MaxEndUs(); + + // Ensure scope stats computation (kicked off earlier) has finished. + StatsLatch.Wait(); + + ZEN_INFO("Processing {} log entries", zen::ThousandsNum(LogAn.Entries().size())); + ResolveLogCategories(LogAn, Model); + + ZEN_INFO("Sorting {} bookmarks, {} regions", BookmarkAn.MutableBookmarks().size(), BookmarkAn.MutableRegions().size()); + + // Bookmarks: move and sort by TimeUs. + Model.Bookmarks = std::move(BookmarkAn.MutableBookmarks()); + eastl::sort(Model.Bookmarks.begin(), Model.Bookmarks.end(), [](const Bookmark& A, const Bookmark& B) { return A.TimeUs < B.TimeUs; }); + + BuildRegionCategories(std::move(BookmarkAn.MutableRegions()), Model.TraceEndUs, Model); + + // CsvProfiler data + { + Model.CsvCategories = std::move(CsvAn.MutableCategories()); + Model.CsvStatDefs = std::move(CsvAn.MutableStatDefs()); + Model.CsvTimeSeries = CsvAn.BuildTimeSeries(); + Model.CsvEvents = std::move(CsvAn.MutableEvents()); + eastl::sort(Model.CsvEvents.begin(), Model.CsvEvents.end(), [](const auto& A, const auto& B) { return A.TimeUs < B.TimeUs; }); + Model.CsvMetadata = std::move(CsvAn.MutableMetadata()); + ZEN_INFO("CSV profiler: {} categories, {} stats, {} series, {} events", + Model.CsvCategories.size(), + Model.CsvStatDefs.size(), + Model.CsvTimeSeries.size(), + Model.CsvEvents.size()); + } + + // Memory allocation data + { + AllocAn.EmitFinalSample(Model.TraceEndUs); + Model.AllocSummary = AllocAn.Summary(); + + // Flatten heaps map into sorted vector + Model.Heaps.reserve(AllocAn.Heaps().size()); + for (const auto& [Id, Info] : AllocAn.Heaps()) + { + Model.Heaps.push_back(Info); + } + eastl::sort(Model.Heaps.begin(), Model.Heaps.end(), [](const HeapInfo& A, const HeapInfo& B) { return A.Id < B.Id; }); + + // Flatten tags map into sorted vector + Model.Tags.reserve(AllocAn.Tags().size()); + for (const auto& [Tag, Info] : AllocAn.Tags()) + { + Model.Tags.push_back(Info); + } + eastl::sort(Model.Tags.begin(), Model.Tags.end(), [](const TagInfo& A, const TagInfo& B) { return A.Tag < B.Tag; }); + + // Move timeline (already time-ordered from Marker events) + Model.MemoryTimeline = std::move(AllocAn.MutableTimeline()); + + // Flatten per-root-heap stats into sorted vector + Model.HeapStats.reserve(AllocAn.RootHeapStats().size()); + for (const auto& [HeapId, Stat] : AllocAn.RootHeapStats()) + { + Model.HeapStats.push_back(Stat); + } + eastl::sort(Model.HeapStats.begin(), Model.HeapStats.end(), [](const HeapStat& A, const HeapStat& B) { + return A.HeapId < B.HeapId; + }); + + if (Model.AllocSummary.HasMemoryData) + { + ZEN_INFO("Memory: {} allocs, {} frees, peak {}, {} live, {} timeline samples", + zen::ThousandsNum(Model.AllocSummary.TotalAllocs + Model.AllocSummary.TotalReallocAllocs), + zen::ThousandsNum(Model.AllocSummary.TotalFrees + Model.AllocSummary.TotalReallocFrees), + zen::NiceBytes(uint64_t(Model.AllocSummary.PeakBytes)), + zen::ThousandsNum(Model.AllocSummary.LiveAllocations), + zen::ThousandsNum(Model.MemoryTimeline.size())); + } + } + + ResolveCallstacks(ModuleAn, CallstackAn, AllocAn, Model); + ZEN_INFO("Callstacks: {} unique, {} with live allocations", + zen::ThousandsNum(Model.Callstacks.size()), + zen::ThousandsNum(Model.CallstackStats.size())); + + return Model; +} + +////////////////////////////////////////////////////////////////////////////// +// Trace trim +// +// The trim pipeline operates entirely at the raw packet level: a .utrace on +// disk is identical to the wire format (see src/zenserver/trace/tracerecorder.cpp +// for the capture-side passthrough), so trimming reduces to "copy the preamble, +// then copy only the packets we want to keep". We never re-encode or re-emit +// any events, which sidesteps the fact that Tourist has no writer path. +// +// The algorithm: +// +// 1. Slurp the input file into memory and walk raw packets using the +// [size:uint16][thread_id:uint16][payload] framing. This gives an ordered +// list of packet descriptors keyed by file offset. +// +// 2. Classify packets by their on-disk thread_id: +// TID_TYPE -> always keep (type definitions) +// TID_IMPORTANT -> always keep (events of types marked TYPE_FLAG_IMPORTANT, +// i.e. session info, thread names, channel state, +// log categories, CPU specs, etc.) +// TID_SYNC -> always keep (transport barriers) +// TID_NORMAL+ -> keep only if the packet's events overlap the window +// +// 3. For normal-thread packets, run Tourist's reader with a bundle of size 1 +// so each Proto.read() scatters exactly one raw packet before emitting any +// events. Before the read call, we record the file offset of the current +// packet as the "latest packet" for its thread. TrimAnalyzer then decodes +// CpuProfiler batch events and attributes their timestamp ranges back to +// that thread's latest packet. The attribution can drift if Tourist +// buffers multiple packets on one thread, but the failure mode is that +// earlier packets lose attribution and are conservatively retained. +// +// 4. Write the output: the preamble bytes verbatim, followed by the raw +// bytes of each kept packet in original order. There is no trailer; the +// Tourist reader catches DataStream::Eof at the end of the stream. +// +// Coarse per-packet precision is accepted by design: a packet straddling a +// window edge is kept in full. CpuProfiler batches are self-contained per +// packet (each re-derives cycles from the trace-wide StartCycle), so dropping +// packets does not desync delta decoding on surviving ones, and orphaned leave +// events from half-open scopes are silently ignored by decoders. + +namespace { + + struct TrimPacketDesc + { + uint64_t FileOffset = 0; // offset of the [size:uint16] header in the file + uint32_t Size = 0; // total size including the 4-byte header + uint16_t ThreadIdRaw = 0; // thread_id as stored on disk, including PACKET_FLAG_COMPRESSED + }; + + // Parses the .utrace preamble in place to determine the byte offset where + // packets begin. Mirrors Preamble::parse_header in Tourist so we can run the + // raw walker without spinning up a second DataSource. Throws on a malformed + // preamble. + static uint64_t ParsePreambleLength(const uint8_t* Data, uint64_t Size) + { + if (Size < 8) + { + throw zen::runtime_error("Trace file too small to contain a preamble ({} bytes)", Size); + } + + uint32_t Magic = 0; + std::memcpy(&Magic, Data, sizeof(uint32_t)); + if (Magic != 'TRC2') + { + throw zen::runtime_error("Unexpected trace file magic value 0x{:08x}", Magic); + } + + uint16_t MetaSize = 0; + std::memcpy(&MetaSize, Data + 4, sizeof(uint16_t)); + + // magic(4) + meta_size(2) + metadata + transport(1) + protocol(1) + uint64_t PreambleLen = uint64_t(4) + 2 + MetaSize + 1 + 1; + if (PreambleLen > Size) + { + throw zen::runtime_error("Trace preamble extends past end of file ({} > {})", PreambleLen, Size); + } + + return PreambleLen; + } + + // Walks raw packets starting at PreambleLen. Returns one TrimPacketDesc per + // packet in original stream order. The walker stops gracefully on truncated + // data so partial traces still produce a usable packet list. + static eastl::vector<TrimPacketDesc> WalkRawPackets(const uint8_t* Data, uint64_t Size, uint64_t PreambleLen) + { + eastl::vector<TrimPacketDesc> Packets; + uint64_t Offset = PreambleLen; + + while (Offset + 4 <= Size) + { + uint16_t PacketSize = 0; + uint16_t ThreadIdRaw = 0; + std::memcpy(&PacketSize, Data + Offset, sizeof(uint16_t)); + std::memcpy(&ThreadIdRaw, Data + Offset + 2, sizeof(uint16_t)); + + if (PacketSize < 4) + { + // Malformed size; stop walking and accept whatever we have. + break; + } + + if (Offset + PacketSize > Size) + { + // Truncated tail -- drop it. + break; + } + + TrimPacketDesc Desc; + Desc.FileOffset = Offset; + Desc.Size = PacketSize; + Desc.ThreadIdRaw = ThreadIdRaw; + Packets.push_back(Desc); + + Offset += PacketSize; + } + + return Packets; + } + +} // namespace + +void +RunTraceTrim(const TraceTrimArgs& Args) +{ + if (!(Args.EndSec > Args.StartSec)) + { + throw zen::runtime_error("Invalid trim range: start={} end={}", Args.StartSec, Args.EndSec); + } + + // --- Read the input file --- + zen::BasicFile InputFile(Args.InputPath, zen::BasicFile::Mode::kRead); + zen::IoBuffer InputBuffer = InputFile.ReadAll(); + InputFile.Close(); + + const uint8_t* FileBytes = static_cast<const uint8_t*>(InputBuffer.GetData()); + const uint64_t FileSize = InputBuffer.GetSize(); + + const uint64_t PreambleLen = ParsePreambleLength(FileBytes, FileSize); + + // --- Raw packet walk --- + eastl::vector<TrimPacketDesc> Packets = WalkRawPackets(FileBytes, FileSize, PreambleLen); + if (Packets.empty()) + { + throw zen::runtime_error("Trace file contains no packets"); + } + + // Initial keep classification: definitions, important events, sync are + // always retained. Normal-thread packets start as drop candidates and get + // promoted if their decoded time range overlaps the window. + eastl::vector<uint8_t> Keep(Packets.size(), 0); + size_t NumAlwaysKept = 0; + for (size_t I = 0; I < Packets.size(); ++I) + { + uint32_t Tid = Packets[I].ThreadIdRaw & ~PACKET_FLAG_COMPRESSED; + if (Tid == TID_TYPE || Tid == TID_IMPORTANT || Tid == TID_SYNC) + { + Keep[I] = 1; + ++NumAlwaysKept; + } + } + + // --- Time-range classification via Tourist (bundle of 1) --- + TrimAnalyzer TrimAn; + TrimAn.EndUs = (Args.EndSec * 1e6 > double(~uint32_t(0))) ? ~uint32_t(0) : uint32_t(Args.EndSec * 1e6); + ::Dispatcher Dispatch; + Dispatch.add_analyzer(TrimAn); + + { + ::DataSource Source(Args.InputPath); + ::Allocator TraceAllocator; + ::Preamble Pream(Source, TraceAllocator); + ::Transport Xport = Pream.get_transport(); + ::Protocol Proto = Pream.get_protocol(); + + ::Packet OnePacket[1]; + ::EventParcel Parcel; + + try + { + while (::Bundle Bndl = Xport.read_packets(OnePacket)) + { + if (Bndl.empty()) + { + break; + } + + const ::Packet& P = Bndl[0]; + uint32_t Tid = P.get_thread_id(); + + if (Tid >= TID_NORMAL && Tid != TID_SYNC) + { + // Tourist's Packet::get_index() is the same sequential + // packet counter as our raw walker's vector position, + // since both read the stream from the start in order. + TrimAn.LastPacketIndexByThread[Tid] = P.get_index(); + } + + Parcel.reset(); + Proto.read(Parcel, Bndl); + Dispatch.on_parcel(Parcel); + } + } + catch (const DataStream::Eof&) + { + } + catch (const Exception::StreamError& E) + { + throw zen::runtime_error("Trace stream error at position {}: {} (value: {})", E.position, E.message, E.value); + } + } + + // --- Apply the window filter --- + // + // Per-packet filtering in the middle of a thread's stream is unsafe: + // Tourist's event parser holds per-thread continuation state (see + // EventParser::_fragment / _missing in + // thirdparty/tourist/trace/src/protocol.cpp) so an event can straddle a + // packet boundary on a normal thread. Removing a packet from the middle + // leaves subsequent packets on the same thread decoded against the wrong + // position in an in-flight event and Tourist crashes. We therefore only + // drop packets in two safe ways: + // + // 1. Whole-thread drop: a thread whose attributed packets are all + // outside the window has every one of its packets dropped. No + // surviving packet references that thread, so there is no state + // machine to corrupt. + // + // 2. Per-thread tail truncation: for a thread that does have in-window + // activity, drop every packet AFTER the latest in-window packet on + // that thread. Tail drops are safe because no later packet on the + // same thread can be looking forward to the dropped bytes; the + // parser just ends its stream for that thread at the truncation + // point, exactly like a trace that naturally stopped recording. + // + // Threads for which we never attributed any CpuProfiler batch events are + // retained in full; we have no evidence about their time range and + // can't safely drop them. + const uint32_t StartUs = uint32_t(std::max(0.0, Args.StartSec) * 1e6); + const uint32_t EndUs = (Args.EndSec * 1e6 > double(~uint32_t(0))) ? ~uint32_t(0) : uint32_t(Args.EndSec * 1e6); + + struct ThreadInfo + { + bool HasAnyBatch = false; + bool HasInWindowBatch = false; + // First packet index on this thread whose attributed CPU batches are + // *entirely* past EndUs. Every packet on this thread with an index + // >= this value is safe to tail-drop. Defaults to size_t(-1) (no cut + // point) when the thread has no such packet. + size_t FirstPastWindowIdx = size_t(-1); + }; + eastl::hash_map<uint32_t, ThreadInfo> ThreadInfos; + + for (size_t I = 0; I < Packets.size(); ++I) + { + uint32_t Tid = Packets[I].ThreadIdRaw & ~PACKET_FLAG_COMPRESSED; + if (Tid < TID_NORMAL || Tid == TID_SYNC) + { + continue; + } + + auto RangeIt = TrimAn.PacketRanges.find(uint32_t(I)); + if (RangeIt == TrimAn.PacketRanges.end()) + { + continue; + } + + ThreadInfo& Info = ThreadInfos[Tid]; + Info.HasAnyBatch = true; + const auto& Range = RangeIt->second; + if (Range.MaxUs >= StartUs && Range.MinUs <= EndUs) + { + Info.HasInWindowBatch = true; + } + if (Range.MinUs > EndUs && I < Info.FirstPastWindowIdx) + { + Info.FirstPastWindowIdx = I; + } + } + + size_t NumThreadsKept = 0; + size_t NumThreadsDropped = 0; + for (const auto& [Tid, Info] : ThreadInfos) + { + if (Info.HasInWindowBatch) + { + ++NumThreadsKept; + } + else + { + ++NumThreadsDropped; + } + } + + size_t NumInWindow = 0; + size_t NumTailDropped = 0; + size_t NumUnattributed = 0; + size_t NumDropped = 0; + + for (size_t I = 0; I < Packets.size(); ++I) + { + if (Keep[I]) + { + continue; + } + + uint32_t Tid = Packets[I].ThreadIdRaw & ~PACKET_FLAG_COMPRESSED; + auto It = ThreadInfos.find(Tid); + if (It == ThreadInfos.end() || !It->second.HasAnyBatch) + { + // We have no evidence for this thread's time range. Retain all + // its packets conservatively to avoid breaking Tourist's per- + // thread parser state. + Keep[I] = 1; + ++NumUnattributed; + continue; + } + + if (!It->second.HasInWindowBatch) + { + // Thread's attributed packets are all outside the window -- drop + // every packet on this thread. + ++NumDropped; + continue; + } + + if (I >= It->second.FirstPastWindowIdx) + { + // Past the first entirely-after-window packet on this thread -- + // candidate for tail truncation. Before dropping, check whether + // this packet carries a Leave event that closes a scope whose + // Enter was at or before the window end. If so, we MUST keep it + // so the downstream analyzer can render the long-running scope; + // otherwise the scope would sit unmatched on the open stack. + auto MustKeepIt = TrimAn.MustKeepPacketByThread.find(Tid); + if (MustKeepIt != TrimAn.MustKeepPacketByThread.end() && I <= MustKeepIt->second) + { + Keep[I] = 1; + ++NumInWindow; + continue; + } + + ++NumTailDropped; + continue; + } + + Keep[I] = 1; + ++NumInWindow; + } + + // --- Write output --- + std::error_code Ec; + std::filesystem::create_directories(Args.OutputPath.parent_path(), Ec); + + zen::BasicFile OutputFile(Args.OutputPath, zen::BasicFile::Mode::kTruncate); + + uint64_t OutOffset = 0; + OutputFile.Write(FileBytes, PreambleLen, OutOffset); + OutOffset += PreambleLen; + + uint64_t KeptBytes = 0; + for (size_t I = 0; I < Packets.size(); ++I) + { + if (!Keep[I]) + { + continue; + } + OutputFile.Write(FileBytes + Packets[I].FileOffset, Packets[I].Size, OutOffset); + OutOffset += Packets[I].Size; + KeptBytes += Packets[I].Size; + } + + OutputFile.Flush(); + OutputFile.Close(); + + ZEN_CONSOLE("Trimmed trace written to {}", Args.OutputPath); + ZEN_CONSOLE(" Input: {} ({} packets)", zen::NiceBytes(FileSize), zen::ThousandsNum(Packets.size())); + ZEN_CONSOLE(" Output: {} ({} packets)", + zen::NiceBytes(OutOffset), + zen::ThousandsNum(NumAlwaysKept + NumInWindow + NumUnattributed)); + ZEN_CONSOLE(" Always kept: {} packets (types / important / sync)", zen::ThousandsNum(NumAlwaysKept)); + ZEN_CONSOLE(" Thread kept: {} packets from {} threads with in-window activity", + zen::ThousandsNum(NumInWindow), + zen::ThousandsNum(NumThreadsKept)); + ZEN_CONSOLE(" Thread dropped: {} packets from {} threads with no in-window activity", + zen::ThousandsNum(NumDropped), + zen::ThousandsNum(NumThreadsDropped)); + ZEN_CONSOLE(" Tail dropped: {} packets past the latest in-window packet on their thread", zen::ThousandsNum(NumTailDropped)); + ZEN_CONSOLE(" Unattributed: {} packets (retained conservatively)", zen::ThousandsNum(NumUnattributed)); + ZEN_UNUSED(KeptBytes); + + // --- Diagnostic: summarise the attributed time range distribution --- + { + uint32_t GlobalMin = ~0u; + uint32_t GlobalMax = 0; + for (const auto& [Idx, R] : TrimAn.PacketRanges) + { + GlobalMin = std::min(GlobalMin, R.MinUs); + GlobalMax = std::max(GlobalMax, R.MaxUs); + } + ZEN_CONSOLE(" Attributed: {} packets, window {:.3f}s .. {:.3f}s", + zen::ThousandsNum(TrimAn.PacketRanges.size()), + double(GlobalMin) / 1e6, + double(GlobalMax) / 1e6); + } +} + +} // namespace zen::trace_detail diff --git a/src/zen/trace/trace_model.h b/src/zen/trace/trace_model.h new file mode 100644 index 000000000..bd6dcc674 --- /dev/null +++ b/src/zen/trace/trace_model.h @@ -0,0 +1,314 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include "trace_memory.h" +#include "zen.h" + +#include <zencore/workthreadpool.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <EASTL/vector.h> +ZEN_THIRD_PARTY_INCLUDES_END + +#include <cstdint> +#include <filesystem> +#include <functional> +#include <string> + +namespace zen::trace_detail { + +// Shared trace timing state. Tourist's Dispatcher only allows one subscriber +// per event type, so only one analyzer can own the `$Trace.NewTrace` +// subscription. Other analyzers that need to convert absolute Cycle64 values +// read from this shared struct, which the owning analyzer fills in during its +// OnNewTrace callback. +struct TraceTiming +{ + uint64_t Freq = 0; + uint64_t Base = 0; + uint64_t UsDiv = 1; + + uint32_t CycleToTimeUs(uint64_t Cycle) const + { + uint64_t CycleFromStart = (Cycle >= Base) ? (Cycle - Base) : 0; + uint64_t D = (UsDiv > 0) ? UsDiv : 1; + return uint32_t((CycleFromStart + (D >> 1)) / D); + } +}; + +// Safely convert a tourist FieldStr to std::string, stripping trailing NULs +// and returning an empty string on failure. +std::string SafeFieldStr(class FieldStr&& Field); + +struct SessionInfo +{ + std::string Platform; + std::string AppName; + std::string ProjectName; + std::string CommandLine; + std::string Branch; + std::string BuildVersion; + uint32_t Changelist = 0; + uint8_t ConfigurationType = 0; + bool HasSession = false; +}; + +struct ThreadInfoEntry +{ + uint32_t ThreadId = 0; + std::string Name; + std::string GroupName; // from $Trace.ThreadGroupBegin/End bracketing, or synthesized by stripping a numeric suffix from Name + uint32_t SystemId = 0; + int32_t SortHint = 0; +}; + +struct ChannelInfo +{ + std::string Name; + bool Enabled = false; + bool ReadOnly = false; +}; + +// A DLL / shared library that was loaded (or seen already loaded) during the +// capture. Populated from the Diagnostics.Module{Init,Load,Unload} events +// which are all marked NoSync|Important, so they survive reconnects and our +// own trim filter. Load/unload timestamps aren't available because the events +// don't carry a Cycle field. +struct ModuleInfo +{ + std::string Name; // basename of FullPath + std::string FullPath; // full path as reported by the engine + uint64_t Base = 0; + uint32_t Size = 0; + bool Unloaded = false; // set when we see a matching ModuleUnload + eastl::vector<uint8_t> ImageId; // PDB GUID + Age, opaque -- for later symbol lookup +}; + +// UE verbosity values mirror ELogVerbosity::Type. We expose the raw integer +// so the frontend can map it to a label / color. +struct LogCategoryInfo +{ + std::string Name; + uint8_t DefaultVerbosity = 0; +}; + +struct LogEntry +{ + uint32_t TimeUs; // microseconds from the start of the trace + uint32_t CategoryIndex; // index into TraceModel::LogCategories (or ~0u) + uint8_t Verbosity; + int32_t Line; + std::string File; + std::string Message; +}; + +// Point-in-time marker emitted via TRACE_BOOKMARK / UE_TRACE_BOOKMARK. +// Each entry's Text has already been formatted (FormatString + FormatArgs +// substituted) during parsing. +struct Bookmark +{ + uint32_t TimeUs; + int32_t Line; + std::string File; + std::string Text; +}; + +// A named time range announced via Misc.RegionBegin / Misc.RegionEnd +// (or the newer *WithId variants). Depth is the lane index assigned by +// the analyzer's greedy overlap-avoidance pass. +struct RegionEntry +{ + uint32_t BeginUs; + uint32_t EndUs; // == TraceEndUs if still open at trace end + uint16_t Depth; + uint16_t Reserved; + std::string Name; + std::string Category; +}; + +// A group of regions sharing the same category label. Each category has its +// own lane namespace so depths are assigned independently. +struct RegionCategory +{ + std::string Name; // display name; empty categories get "Uncategorized" + uint32_t LaneCount = 0; + eastl::vector<RegionEntry> Regions; // sorted by BeginUs, Depth is per-category +}; + +struct CpuScopeStat +{ + std::string Name; + uint64_t Count = 0; + uint32_t MinUs = 0; + uint32_t MaxUs = 0; + double MeanUs = 0.0; + double StdDevUs = 0.0; +}; + +// Single CPU scope interval captured by TimelineAnalyzer. Packed for size: +// timelines can easily contain millions of entries. +struct TimelineScope +{ + uint32_t BeginUs; // microseconds from the start of the trace + uint32_t DurationUs; // scope duration in microseconds + uint32_t NameId; // index into TraceModel::ScopeNames + uint16_t Depth; // call-stack depth (0 == outermost) + uint16_t MergeCount; // 0 = raw (LOD 0), N>0 = N scopes merged (LOD 1+) +}; + +// Pre-computed detail level for a thread timeline. Each level merges scopes +// shorter than ResolutionUs into "macro scopes" carrying the dominant name +// (the name of the longest contributing scope). The merge count is stored in +// TimelineScope::MergeCount. +struct TimelineDetailLevel +{ + uint32_t ResolutionUs = 0; + eastl::vector<TimelineScope> Scopes; // sorted by BeginUs +}; + +// LOD resolutions in microseconds (geometric spacing inspired by Unreal Insights). +// LOD 0 is the raw ThreadTimeline::Scopes; these are LOD 1-5. +inline constexpr uint32_t kTimelineLodResolutions[] = {100, 1000, 8000, 40000, 200000}; +inline constexpr size_t kTimelineLodCount = sizeof(kTimelineLodResolutions) / sizeof(kTimelineLodResolutions[0]); + +struct ThreadTimeline +{ + uint32_t ThreadId = 0; + std::string Name; + int32_t SortHint = 0; + eastl::vector<TimelineScope> Scopes; // LOD 0 -- full resolution, sorted by BeginUs + + TimelineDetailLevel DetailLevels[kTimelineLodCount]; // LOD 1-5 +}; + +// Build pre-computed LOD levels for a ThreadTimeline whose Scopes vector is +// already sorted by BeginUs. Called from BuildTraceModel after populating the +// raw scopes. +void BuildTimelineLods(ThreadTimeline& Timeline); + +// Complete in-memory view of a parsed .utrace file, produced by BuildTraceModel +// and consumed by the `zen trace serve` subcommand. +struct TraceModel +{ + std::filesystem::path FilePath; + uint64_t FileSize = 0; + uint64_t TotalEvents = 0; + uint64_t ParseTimeMs = 0; + uint32_t TraceStartUs = 0; + uint32_t TraceEndUs = 0; + + SessionInfo Session; + eastl::vector<ThreadInfoEntry> Threads; // sorted by SortHint + eastl::vector<ChannelInfo> Channels; // sorted by name + eastl::vector<ModuleInfo> Modules; // sorted by Name + + eastl::vector<std::string> ScopeNames; // referenced by TimelineScope::NameId + eastl::vector<CpuScopeStat> ScopeStats; // sorted by Count descending + eastl::vector<ThreadTimeline> Timelines; // one entry per thread that produced scopes + + eastl::vector<LogCategoryInfo> LogCategories; // referenced by LogEntry::CategoryIndex + eastl::vector<LogEntry> LogEntries; // sorted by TimeUs + + eastl::vector<Bookmark> Bookmarks; // sorted by TimeUs + eastl::vector<RegionCategory> RegionCategories; // sorted: uncategorized first, then alpha + + // -- CsvProfiler -- + struct CsvCategory + { + int32_t Index = 0; + std::string Name; + }; + + struct CsvStatDef + { + uint64_t StatId = 0; + int32_t CategoryIndex = 0; + std::string Name; + }; + + struct CsvSample + { + uint32_t TimeUs; + float Value; + }; + + // Time series for one stat on one thread. + struct CsvSeries + { + uint64_t StatId = 0; + uint32_t ThreadId = 0; + eastl::vector<CsvSample> Samples; // sorted by TimeUs + }; + + struct CsvEvent + { + uint32_t TimeUs; + int32_t CategoryIndex; + std::string Text; + }; + + struct CsvMeta + { + std::string Key; + std::string Value; + }; + + eastl::vector<CsvCategory> CsvCategories; + eastl::vector<CsvStatDef> CsvStatDefs; + eastl::vector<CsvSeries> CsvTimeSeries; // per stat+thread + eastl::vector<CsvEvent> CsvEvents; // sorted by TimeUs + eastl::vector<CsvMeta> CsvMetadata; + + // -- Event type counts (sorted by count descending) -- + struct EventTypeCount + { + std::string Name; + uint64_t Count = 0; + }; + eastl::vector<EventTypeCount> EventTypeCounts; + + // -- Memory allocations -- + AllocationSummary AllocSummary; + eastl::vector<HeapInfo> Heaps; // sorted by Id + eastl::vector<TagInfo> Tags; // sorted by Tag + eastl::vector<MemoryTimelineSample> MemoryTimeline; // sorted by TimeUs + eastl::vector<HeapStat> HeapStats; // sorted by HeapId + eastl::vector<CallstackEntry> Callstacks; // sorted by Id + eastl::vector<CallstackAllocStat> CallstackStats; // sorted by LiveBytes desc + eastl::vector<CallstackChurnStat> ChurnStats; // sorted by TotalAllocs desc + eastl::vector<AllocSizeBucket> AllocSizeHistogram; // sorted by MinSize asc, populated buckets only +}; + +// Resolve and validate a .utrace file path. Throws OptionParseException when +// the path is empty and runtime_error when the file does not exist. +std::filesystem::path ResolveTraceFile(const std::filesystem::path& Input, cxxopts::Options& HelpOptions); + +// Parse a .utrace file and print the event-schema inspect report to the console. +void RunInspect(const std::filesystem::path& FilePath); + +// Progress callback invoked once per bundle during trace iteration. +// Arguments: BytesProcessed (estimated), TotalFileBytes, EventsSoFar. +using ProgressCallback = std::function<void(uint64_t, uint64_t, uint64_t)>; + +// Parse a .utrace file into an in-memory TraceModel suitable for serving via +// the trace viewer. A single pass runs the session, CPU-stats and timeline +// analyzers. The optional progress callback is invoked once per bundle. +TraceModel BuildTraceModel(const std::filesystem::path& FilePath, WorkerThreadPool& ThreadPool, const ProgressCallback& OnProgress = {}); + +struct TraceTrimArgs +{ + std::filesystem::path InputPath; + std::filesystem::path OutputPath; + double StartSec = 0.0; + double EndSec = 0.0; +}; + +// Produce a trimmed .utrace file containing all type-definition and "important" +// packets from the input, plus any regular thread packets whose events overlap +// the [StartSec, EndSec] window. The output remains a valid .utrace that can be +// read by Unreal Insights and zen's own trace tooling. Trimming is coarse at +// the packet level: a packet that straddles the window boundary is kept in full. +void RunTraceTrim(const TraceTrimArgs& Args); + +} // namespace zen::trace_detail diff --git a/src/zen/trace/trace_viewer_service.cpp b/src/zen/trace/trace_viewer_service.cpp new file mode 100644 index 000000000..7d8301ae2 --- /dev/null +++ b/src/zen/trace/trace_viewer_service.cpp @@ -0,0 +1,1225 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "trace_viewer_service.h" + +#include "timeline_query.h" + +#include <zencore/compactbinarybuilder.h> +#include <zencore/filesystem.h> +#include <zencore/fmtutils.h> +#include <zencore/iobuffer.h> +#include <zencore/logging.h> +#include <zencore/string.h> +#include <zenhttp/httpcommon.h> + +#include <algorithm> +#include <charconv> +#include <cstdio> +#include <cstdlib> +#include <string> +#include <type_traits> + +#if !defined(ZEN_EMBED_ZEN_HTML_ZIP) +# define ZEN_EMBED_ZEN_HTML_ZIP 0 +#endif + +#if ZEN_EMBED_ZEN_HTML_ZIP +static unsigned char gZenHtmlZipData[] = { +# include <zen-html.zip.h> +}; +#endif + +namespace zen { + +namespace { + + // Parse a uint32 query parameter; returns the fallback on error / absent. + // The entire string must be a valid base-10 unsigned integer. + uint32_t ParseUintParam(std::string_view Value, uint32_t Fallback) + { + if (Value.empty()) + { + return Fallback; + } + + uint32_t Number = 0; + auto [Ptr, Ec] = std::from_chars(Value.data(), Value.data() + Value.size(), Number, 10); + if (Ec != std::errc() || Ptr != Value.data() + Value.size()) + { + return Fallback; + } + + return Number; + } + + void WriteNotFound(HttpServerRequest& Request, std::string_view Message = "Not found") + { + Request.WriteResponse(HttpResponseCode::NotFound, HttpContentType::kText, Message); + } + + struct CallstackSummaryInfo + { + std::string Summary; + std::string TopFrame; + std::string SecondaryFrame; + std::string GroupKey; + uint32_t HiddenPrefixCount = 0; + bool IncludedThirdPartyBoundary = false; + }; + + CallstackSummaryInfo BuildCallstackSummary(const trace_detail::FilteredCallstackView& View) + { + CallstackSummaryInfo Result; + Result.HiddenPrefixCount = View.HiddenPrefixCount; + Result.IncludedThirdPartyBoundary = View.IncludedThirdPartyBoundary; + if (View.Frames.empty()) + { + Result.Summary = "No frames"; + Result.GroupKey = "No frames"; + return Result; + } + + Result.TopFrame = View.Frames[0].Display; + Result.GroupKey = View.Frames[0].Display; + if (View.Frames.size() > 1) + { + Result.SecondaryFrame = View.Frames[1].Display; + Result.Summary = fmt::format("{} \xE2\x86\x90 {}", Result.TopFrame, Result.SecondaryFrame); + Result.GroupKey = fmt::format("{} | {}", Result.TopFrame, Result.SecondaryFrame); + } + else + { + Result.Summary = Result.TopFrame; + } + return Result; + } + + // Append a base-10 unsigned integer to a string builder via std::to_chars. + // Reserves the worst-case digit count up front, writes directly into the + // builder's buffer, then trims the unused suffix. About 5–10× faster than + // going through StringBuilder::operator<<(uint32_t), which routes integer + // formatting through snprintf via IntNum. + template<typename T> + inline void AppendUintFast(StringBuilderBase& Sb, T Value) + { + static_assert(std::is_unsigned_v<T> && std::is_integral_v<T>); + // digits10 is the largest K such that 10^K fits — the longest + // printable representation is digits10 + 1 digits. +1 more for safety. + constexpr size_t MaxDigits = std::numeric_limits<T>::digits10 + 2; + + const size_t Off = Sb.AddUninitialized(MaxDigits); + char* const Begin = Sb.Data() + Off; + const auto Result = std::to_chars(Begin, Begin + MaxDigits, Value); + const size_t Written = size_t(Result.ptr - Begin); + Sb.RemoveSuffix(uint32_t(MaxDigits - Written)); + } + + // Render a span of TimelineScopeView records directly into a string + // builder using the wire format consumed by the trace viewer front-end: + // [[beginUs, durationUs, nameId, depth, mergeCount?], ...] + // The trailing mergeCount element is only emitted for LOD-merged scopes. + // Output is compact (no whitespace) — the viewer parses both forms but + // dropping the spaces shaves ~10% off the response size. + void AppendScopesJsonArray(StringBuilderBase& Sb, const trace_detail::TimelineScopeView* Scopes, size_t Count) + { + Sb << '['; + for (size_t I = 0; I < Count; ++I) + { + const trace_detail::TimelineScopeView& S = Scopes[I]; + if (I > 0) + { + Sb << ','; + } + Sb << '['; + AppendUintFast(Sb, S.BeginUs); + Sb << ','; + AppendUintFast(Sb, S.DurationUs); + Sb << ','; + AppendUintFast(Sb, S.NameId); + Sb << ','; + AppendUintFast(Sb, S.Depth); + if (S.MergeCount > 1) + { + Sb << ','; + AppendUintFast(Sb, S.MergeCount); + } + Sb << ']'; + } + Sb << ']'; + } + +} // namespace + +////////////////////////////////////////////////////////////////////////////// + +TraceViewerService::TraceViewerService(const trace_detail::TraceModel& Model, + std::unique_ptr<trace_detail::SymbolResolver> Symbols, + std::filesystem::path DevHtmlDir) +: m_Model(Model) +, m_DevHtmlDir(std::move(DevHtmlDir)) +, m_Symbols(std::move(Symbols)) +, m_CallstackFormatter(m_Model, m_Symbols.get()) +{ +#if ZEN_EMBED_ZEN_HTML_ZIP + IoBuffer ZipBuffer(IoBuffer::Wrap, gZenHtmlZipData, sizeof(gZenHtmlZipData) - 1); + m_ZipFs = std::make_unique<ZipFs>(std::move(ZipBuffer)); +#endif + + m_TimelineQuery = trace_detail::MakeInMemoryTimelineQuery(m_Model); + + if (m_DevHtmlDir.empty()) + { + // Probe for development layout: walk up from the running executable + // until we find a directory named xmake.lua, then look for the html + // tree under src/zen/frontend/html. + std::filesystem::path Path = GetRunningExecutablePath(); + std::error_code Ec; + while (Path.has_parent_path()) + { + std::filesystem::path Parent = Path.parent_path(); + if (Parent == Path) + { + break; + } + if (IsFile(Parent / "xmake.lua", Ec)) + { + std::filesystem::path Candidate = Parent / "src" / "zen" / "frontend" / "html"; + if (IsDir(Candidate, Ec)) + { + m_DevHtmlDir = Candidate; + } + break; + } + Path = Parent; + } + } + + if (m_ZipFs) + { + ZEN_INFO("trace viewer front-end is served from embedded zip"); + } + else if (!m_DevHtmlDir.empty()) + { + ZEN_INFO("trace viewer front-end is served from '{}'", m_DevHtmlDir); + } + else + { + ZEN_WARN("trace viewer front-end is NOT AVAILABLE — only /api/* endpoints will respond"); + } +} + +TraceViewerService::~TraceViewerService() = default; + +const char* +TraceViewerService::BaseUri() const +{ + // Mounted at a sub-path so we don't collide with the http.sys server's + // own root handler on Windows. + return "/trace/"; +} + +void +TraceViewerService::HandleRequest(HttpServerRequest& Request) +{ + using namespace std::literals; + + std::string_view Uri = Request.RelativeUriWithExtension(); + for (; !Uri.empty() && Uri[0] == '/'; Uri = Uri.substr(1)) + { + } + + if (Uri.starts_with("api/"sv)) + { + HandleApiRequest(Request, Uri.substr(4)); + return; + } + + HandleStaticAsset(Request, Uri); +} + +////////////////////////////////////////////////////////////////////////////// +// Static asset handling + +void +TraceViewerService::HandleStaticAsset(HttpServerRequest& Request, std::string_view Uri) +{ + using namespace std::literals; + + ExtendableStringBuilder<256> UriBuilder; + if (Uri.empty()) + { + Uri = "index.html"sv; + } + else if (Uri.back() == '/') + { + UriBuilder << Uri << "index.html"sv; + Uri = UriBuilder; + } + + // Path traversal guard: reject parent refs, Windows-style separators, and absolute + // paths. `std::filesystem::path::operator/=` replaces the base when the RHS is + // absolute, so without this check a URI like `C:/Windows/...` would escape m_DevHtmlDir. + if (Uri.find("..") != Uri.npos || Uri.find('\\') != Uri.npos || std::filesystem::path(Uri).is_absolute()) + { + Request.WriteResponse(HttpResponseCode::Forbidden); + return; + } + + HttpContentType ContentType = HttpContentType::kUnknownContentType; + if (const size_t DotIndex = Uri.rfind("."); DotIndex != Uri.npos) + { + const std::string_view DotExt = Uri.substr(DotIndex + 1); + ContentType = ParseContentType(DotExt); + if (ContentType == HttpContentType::kUnknownContentType) + { + if (DotExt == "txt"sv || DotExt == "md"sv) + { + ContentType = HttpContentType::kText; + } + } + } + + if (ContentType == HttpContentType::kUnknownContentType) + { + Request.WriteResponse(HttpResponseCode::Forbidden); + return; + } + + // Dev mode: serve from disk first so HTML/JS edits show up without a rebuild + if (!m_DevHtmlDir.empty()) + { + std::filesystem::path FullPath = m_DevHtmlDir / std::filesystem::path(Uri).make_preferred(); + FileContents File = ReadFile(FullPath); + if (!File.ErrorCode) + { + Request.WriteResponse(HttpResponseCode::OK, ContentType, File.Data[0]); + return; + } + } + + // Fallback: embedded zip + if (m_ZipFs) + { + if (IoBuffer File = m_ZipFs->GetFile(Uri)) + { + Request.WriteResponse(HttpResponseCode::OK, ContentType, File); + return; + } + } + + WriteNotFound(Request); +} + +////////////////////////////////////////////////////////////////////////////// +// REST endpoints + +void +TraceViewerService::HandleApiRequest(HttpServerRequest& Request, std::string_view Path) +{ + using namespace std::literals; + + if (Path == "session"sv) + { + HandleSessionApi(Request); + } + else if (Path == "threads"sv) + { + HandleThreadsApi(Request); + } + else if (Path == "channels"sv) + { + HandleChannelsApi(Request); + } + else if (Path == "scope-stats"sv) + { + HandleScopeStatsApi(Request); + } + else if (Path == "scope-names"sv) + { + HandleScopeNamesApi(Request); + } + else if (Path == "timeline"sv) + { + HandleTimelineApi(Request); + } + else if (Path == "timeline-batch"sv) + { + HandleTimelineBatchApi(Request); + } + else if (Path == "log-categories"sv) + { + HandleLogCategoriesApi(Request); + } + else if (Path == "logs"sv) + { + HandleLogsApi(Request); + } + else if (Path == "bookmarks"sv) + { + HandleBookmarksApi(Request); + } + else if (Path == "regions"sv) + { + HandleRegionsApi(Request); + } + else if (Path == "csv-categories"sv) + { + HandleCsvCategoriesApi(Request); + } + else if (Path == "csv-stats"sv) + { + HandleCsvStatsApi(Request); + } + else if (Path == "csv-series"sv) + { + HandleCsvSeriesApi(Request); + } + else if (Path == "csv-events"sv) + { + HandleCsvEventsApi(Request); + } + else if (Path == "csv-metadata"sv) + { + HandleCsvMetadataApi(Request); + } + else if (Path == "alloc-summary"sv) + { + HandleAllocSummaryApi(Request); + } + else if (Path == "heaps"sv) + { + HandleHeapsApi(Request); + } + else if (Path == "alloc-tags"sv) + { + HandleAllocTagsApi(Request); + } + else if (Path == "memory-timeline"sv) + { + HandleMemoryTimelineApi(Request); + } + else if (Path == "heap-stats"sv) + { + HandleHeapStatsApi(Request); + } + else if (Path == "callstacks"sv) + { + HandleCallstacksApi(Request); + } + else if (Path == "callstack-stats"sv) + { + HandleCallstackStatsApi(Request); + } + else if (Path == "churn-stats"sv) + { + HandleChurnStatsApi(Request); + } + else if (Path == "alloc-size-histogram"sv) + { + HandleAllocSizeHistogramApi(Request); + } + else + { + WriteNotFound(Request, "Unknown API endpoint"); + } +} + +void +TraceViewerService::HandleSessionApi(HttpServerRequest& Request) +{ + const trace_detail::SessionInfo& Session = m_Model.Session; + + CbObjectWriter Obj; + Obj << "file_path" << m_Model.FilePath.string(); + Obj << "file_size" << m_Model.FileSize; + Obj << "total_events" << m_Model.TotalEvents; + Obj << "parse_time_ms" << m_Model.ParseTimeMs; + Obj << "trace_start_us" << m_Model.TraceStartUs; + Obj << "trace_end_us" << m_Model.TraceEndUs; + Obj << "has_session" << Session.HasSession; + Obj << "platform" << Session.Platform; + Obj << "app_name" << Session.AppName; + Obj << "project_name" << Session.ProjectName; + Obj << "command_line" << Session.CommandLine; + Obj << "branch" << Session.Branch; + Obj << "build_version" << Session.BuildVersion; + Obj << "changelist" << Session.Changelist; + Obj << "has_memory_data" << m_Model.AllocSummary.HasMemoryData; + + Request.WriteResponse(HttpResponseCode::OK, Obj.Save()); +} + +void +TraceViewerService::HandleThreadsApi(HttpServerRequest& Request) +{ + CbWriter Writer; + Writer.BeginArray(); + for (const trace_detail::ThreadInfoEntry& Thread : m_Model.Threads) + { + Writer.BeginObject(); + Writer << "thread_id" << Thread.ThreadId; + Writer << "name" << Thread.Name; + Writer << "group" << Thread.GroupName; + Writer << "system_id" << Thread.SystemId; + Writer << "sort_hint" << Thread.SortHint; + // Lane threads use synthetic IDs starting at 2048 (see lane_trace.inl). + // SystemId==0 alone is insufficient — the main/trace threads also lack + // a system ID in some traces. + Writer << "is_lane" << (Thread.SystemId == 0 && Thread.ThreadId >= 2048); + + // Per-thread timeline summary: whether we captured scopes and their span. + auto It = std::find_if(m_Model.Timelines.begin(), + m_Model.Timelines.end(), + [Tid = Thread.ThreadId](const trace_detail::ThreadTimeline& T) { return T.ThreadId == Tid; }); + if (It != m_Model.Timelines.end()) + { + Writer << "scope_count" << uint64_t(It->Scopes.size()); + } + else + { + Writer << "scope_count" << uint64_t(0); + } + + Writer.EndObject(); + } + Writer.EndArray(); + + Request.WriteResponse(HttpResponseCode::OK, Writer.Save().AsArray()); +} + +void +TraceViewerService::HandleChannelsApi(HttpServerRequest& Request) +{ + CbWriter Writer; + Writer.BeginArray(); + for (const trace_detail::ChannelInfo& Channel : m_Model.Channels) + { + Writer.BeginObject(); + Writer << "name" << Channel.Name; + Writer << "enabled" << Channel.Enabled; + Writer << "readonly" << Channel.ReadOnly; + Writer.EndObject(); + } + Writer.EndArray(); + + Request.WriteResponse(HttpResponseCode::OK, Writer.Save().AsArray()); +} + +void +TraceViewerService::HandleScopeStatsApi(HttpServerRequest& Request) +{ + CbWriter Writer; + Writer.BeginArray(); + for (const trace_detail::CpuScopeStat& Stat : m_Model.ScopeStats) + { + Writer.BeginObject(); + Writer << "name" << Stat.Name; + Writer << "count" << Stat.Count; + Writer << "min_us" << Stat.MinUs; + Writer << "max_us" << Stat.MaxUs; + Writer.AddFloat("mean_us", Stat.MeanUs); + Writer.AddFloat("stdev_us", Stat.StdDevUs); + Writer.EndObject(); + } + Writer.EndArray(); + + Request.WriteResponse(HttpResponseCode::OK, Writer.Save().AsArray()); +} + +void +TraceViewerService::HandleScopeNamesApi(HttpServerRequest& Request) +{ + CbWriter Writer; + Writer.BeginArray(); + for (const std::string& Name : m_Model.ScopeNames) + { + Writer.AddString(Name); + } + Writer.EndArray(); + + Request.WriteResponse(HttpResponseCode::OK, Writer.Save().AsArray()); +} + +void +TraceViewerService::HandleTimelineApi(HttpServerRequest& Request) +{ + HttpServerRequest::QueryParams Params = Request.GetQueryParams(); + + std::string_view ThreadStr = Params.GetValue("thread"); + if (ThreadStr.empty()) + { + Request.WriteResponse(HttpResponseCode::BadRequest, HttpContentType::kText, "thread parameter required"); + return; + } + + uint32_t ThreadId = ParseUintParam(ThreadStr, ~uint32_t(0)); + + trace_detail::TimelineQueryRequest Req; + Req.StartUs = ParseUintParam(Params.GetValue("start"), 0u); + Req.EndUs = ParseUintParam(Params.GetValue("end"), ~uint32_t(0)); + Req.MinDurUs = ParseUintParam(Params.GetValue("mindur"), 0u); + Req.ResolutionUs = ParseUintParam(Params.GetValue("resolution"), 0u); + + std::vector<trace_detail::TimelineScopeView> Scopes; + m_TimelineQuery->QueryThread(ThreadId, Req, Scopes); + + // Direct string formatting for the timeline wire format — the compact + // [[beginUs,durationUs,nameId,depth,mergeCount?],...] arrays are faster + // to serialize directly than via CbWriter. The 64 KB inline buffer + // covers small viewport queries without heap traffic. + ExtendableStringBuilder<65536> Sb; + Sb << R"({"thread_id":)"; + AppendUintFast(Sb, ThreadId); + Sb << R"(,"scopes":)"; + AppendScopesJsonArray(Sb, Scopes.data(), Scopes.size()); + Sb << '}'; + Request.WriteResponse(HttpResponseCode::OK, HttpContentType::kJSON, Sb.ToView()); +} + +void +TraceViewerService::HandleTimelineBatchApi(HttpServerRequest& Request) +{ + HttpServerRequest::QueryParams Params = Request.GetQueryParams(); + + std::string_view ThreadsStr = Params.GetValue("threads"); + if (ThreadsStr.empty()) + { + Request.WriteResponse(HttpResponseCode::BadRequest, HttpContentType::kText, "threads parameter required"); + return; + } + + trace_detail::TimelineQueryRequest Req; + Req.StartUs = ParseUintParam(Params.GetValue("start"), 0u); + Req.EndUs = ParseUintParam(Params.GetValue("end"), ~uint32_t(0)); + Req.MinDurUs = ParseUintParam(Params.GetValue("mindur"), 0u); + Req.ResolutionUs = ParseUintParam(Params.GetValue("resolution"), 0u); + + // Parse comma-separated thread IDs. Tokens with invalid IDs (~0u) are + // skipped — same behaviour as the previous handler. + std::vector<uint32_t> ThreadIds; + { + std::string_view Remaining = ThreadsStr; + while (!Remaining.empty()) + { + size_t Comma = Remaining.find(','); + std::string_view Token = (Comma != std::string_view::npos) ? Remaining.substr(0, Comma) : Remaining; + Remaining = (Comma != std::string_view::npos) ? Remaining.substr(Comma + 1) : std::string_view{}; + + uint32_t ThreadId = ParseUintParam(Token, ~uint32_t(0)); + if (ThreadId == ~uint32_t(0)) + { + continue; + } + ThreadIds.push_back(ThreadId); + } + } + + trace_detail::TimelineQuery::BatchResult Batch; + m_TimelineQuery->QueryBatch(ThreadIds, Req, Batch); + + // Multi-chunk response: one IoBuffer per thread plus the surrounding + // "{" / "}" braces. Avoids materialising the entire JSON in a single + // contiguous allocation. The transport gathers the chunks at write time. + static constexpr char kOpenBrace[] = "{"; + static constexpr char kCloseBrace[] = "}"; + + std::vector<IoBuffer> Chunks; + Chunks.reserve(2 + ThreadIds.size()); + Chunks.emplace_back(IoBuffer::Wrap, kOpenBrace, 1); + + for (size_t I = 0; I < ThreadIds.size(); ++I) + { + const trace_detail::TimelineQuery::BatchResult::Range R = Batch.Ranges[I]; + + // Per-thread chunk: optional leading comma, "<threadId>":{"scopes":[...]} + // 32 KB inline covers most threads at typical viewport zoom levels. + ExtendableStringBuilder<32768> Sb; + if (I > 0) + { + Sb << ','; + } + Sb << '"'; + AppendUintFast(Sb, ThreadIds[I]); + Sb << R"(":{"scopes":)"; + AppendScopesJsonArray(Sb, Batch.Scopes.data() + R.Begin, R.End - R.Begin); + Sb << '}'; + + // Clone into an IoBuffer so the chunk owns its bytes — the builder + // dies at the end of this iteration. + const std::string_view View = Sb.ToView(); + Chunks.emplace_back(IoBuffer::Clone, View.data(), View.size()); + } + + Chunks.emplace_back(IoBuffer::Wrap, kCloseBrace, 1); + + Request.WriteResponse(HttpResponseCode::OK, HttpContentType::kJSON, std::span<IoBuffer>{Chunks}); +} + +void +TraceViewerService::HandleLogCategoriesApi(HttpServerRequest& Request) +{ + CbWriter Writer; + Writer.BeginArray(); + for (const trace_detail::LogCategoryInfo& Cat : m_Model.LogCategories) + { + Writer.BeginObject(); + Writer << "name" << Cat.Name; + Writer << "default_verbosity" << uint32_t(Cat.DefaultVerbosity); + Writer.EndObject(); + } + Writer.EndArray(); + + Request.WriteResponse(HttpResponseCode::OK, Writer.Save().AsArray()); +} + +void +TraceViewerService::HandleLogsApi(HttpServerRequest& Request) +{ + HttpServerRequest::QueryParams Params = Request.GetQueryParams(); + + uint32_t StartUs = ParseUintParam(Params.GetValue("start"), 0u); + uint32_t EndUs = ParseUintParam(Params.GetValue("end"), ~uint32_t(0)); + uint32_t MinVerb = ParseUintParam(Params.GetValue("min_verbosity"), 0u); + uint32_t CategoryId = ParseUintParam(Params.GetValue("category"), ~uint32_t(0)); + uint32_t Limit = ParseUintParam(Params.GetValue("limit"), 5000u); + + // Binary-search lower bound by TimeUs. + const eastl::vector<trace_detail::LogEntry>& Entries = m_Model.LogEntries; + auto FirstIt = + std::lower_bound(Entries.begin(), Entries.end(), StartUs, [](const trace_detail::LogEntry& E, uint32_t V) { return E.TimeUs < V; }); + + CbObjectWriter Obj; + Obj << "total" << uint64_t(Entries.size()); + + uint32_t Emitted = 0; + Obj.BeginArray("entries"); + for (auto It = FirstIt; It != Entries.end() && Emitted < Limit; ++It) + { + if (It->TimeUs > EndUs) + { + break; + } + if (MinVerb != 0 && It->Verbosity > MinVerb) + { + // Lower verbosity value = higher severity in UE's ELogVerbosity. + // Skip entries less severe than the requested floor. + continue; + } + if (CategoryId != ~uint32_t(0) && It->CategoryIndex != CategoryId) + { + continue; + } + + Obj.BeginObject(); + Obj << "time_us" << It->TimeUs; + Obj << "category_index" << It->CategoryIndex; + Obj << "verbosity" << uint32_t(It->Verbosity); + Obj << "line" << It->Line; + Obj << "file" << It->File; + Obj << "message" << It->Message; + Obj.EndObject(); + ++Emitted; + } + Obj.EndArray(); + + Obj << "returned" << Emitted; + + Request.WriteResponse(HttpResponseCode::OK, Obj.Save()); +} + +void +TraceViewerService::HandleBookmarksApi(HttpServerRequest& Request) +{ + CbWriter Writer; + Writer.BeginArray(); + for (const trace_detail::Bookmark& B : m_Model.Bookmarks) + { + Writer.BeginObject(); + Writer << "time_us" << B.TimeUs; + Writer << "line" << B.Line; + Writer << "file" << B.File; + Writer << "text" << B.Text; + Writer.EndObject(); + } + Writer.EndArray(); + + Request.WriteResponse(HttpResponseCode::OK, Writer.Save().AsArray()); +} + +void +TraceViewerService::HandleRegionsApi(HttpServerRequest& Request) +{ + CbObjectWriter Obj; + Obj.BeginArray("categories"); + for (const trace_detail::RegionCategory& Cat : m_Model.RegionCategories) + { + Obj.BeginObject(); + Obj << "name" << std::string_view(Cat.Name.empty() ? "Uncategorized" : Cat.Name); + Obj << "lane_count" << Cat.LaneCount; + Obj.BeginArray("regions"); + for (const trace_detail::RegionEntry& R : Cat.Regions) + { + Obj.BeginObject(); + Obj << "begin_us" << R.BeginUs; + Obj << "end_us" << R.EndUs; + Obj << "depth" << uint32_t(R.Depth); + Obj << "name" << R.Name; + Obj.EndObject(); + } + Obj.EndArray(); + Obj.EndObject(); + } + Obj.EndArray(); + + Request.WriteResponse(HttpResponseCode::OK, Obj.Save()); +} + +void +TraceViewerService::HandleCsvCategoriesApi(HttpServerRequest& Request) +{ + CbWriter Writer; + Writer.BeginArray(); + for (const auto& Cat : m_Model.CsvCategories) + { + Writer.BeginObject(); + Writer << "index" << Cat.Index; + Writer << "name" << Cat.Name; + Writer.EndObject(); + } + Writer.EndArray(); + + Request.WriteResponse(HttpResponseCode::OK, Writer.Save().AsArray()); +} + +void +TraceViewerService::HandleCsvStatsApi(HttpServerRequest& Request) +{ + CbWriter Writer; + Writer.BeginArray(); + for (const auto& Def : m_Model.CsvStatDefs) + { + Writer.BeginObject(); + Writer << "stat_id" << Def.StatId; + Writer << "category_index" << Def.CategoryIndex; + Writer << "name" << Def.Name; + Writer.EndObject(); + } + Writer.EndArray(); + + Request.WriteResponse(HttpResponseCode::OK, Writer.Save().AsArray()); +} + +void +TraceViewerService::HandleCsvSeriesApi(HttpServerRequest& Request) +{ + HttpServerRequest::QueryParams Params = Request.GetQueryParams(); + + // Accept either a single series index or iterate all for the requested stat+thread. + std::string_view StatStr = Params.GetValue("stat"); + std::string_view ThreadStr = Params.GetValue("thread"); + + uint64_t StatId = StatStr.empty() ? 0 : ParseUintParam(StatStr, 0); + uint32_t ThreadId = ThreadStr.empty() ? ~uint32_t(0) : ParseUintParam(ThreadStr, ~uint32_t(0)); + + CbWriter Writer; + Writer.BeginArray(); + for (const auto& S : m_Model.CsvTimeSeries) + { + if (StatId != 0 && S.StatId != StatId) + { + continue; + } + if (ThreadId != ~uint32_t(0) && S.ThreadId != ThreadId) + { + continue; + } + Writer.BeginObject(); + Writer << "stat_id" << S.StatId; + Writer << "thread_id" << S.ThreadId; + Writer.BeginArray("samples"); + for (const auto& Sample : S.Samples) + { + Writer.BeginArray(); + Writer.AddInteger(uint32_t(Sample.TimeUs)); + Writer.AddFloat(double(Sample.Value)); + Writer.EndArray(); + } + Writer.EndArray(); + Writer.EndObject(); + } + Writer.EndArray(); + + Request.WriteResponse(HttpResponseCode::OK, Writer.Save().AsArray()); +} + +void +TraceViewerService::HandleCsvEventsApi(HttpServerRequest& Request) +{ + CbWriter Writer; + Writer.BeginArray(); + for (const auto& E : m_Model.CsvEvents) + { + Writer.BeginObject(); + Writer << "time_us" << E.TimeUs; + Writer << "category_index" << E.CategoryIndex; + Writer << "text" << E.Text; + Writer.EndObject(); + } + Writer.EndArray(); + + Request.WriteResponse(HttpResponseCode::OK, Writer.Save().AsArray()); +} + +void +TraceViewerService::HandleCsvMetadataApi(HttpServerRequest& Request) +{ + CbWriter Writer; + Writer.BeginArray(); + for (const auto& M : m_Model.CsvMetadata) + { + Writer.BeginObject(); + Writer << "key" << M.Key; + Writer << "value" << M.Value; + Writer.EndObject(); + } + Writer.EndArray(); + + Request.WriteResponse(HttpResponseCode::OK, Writer.Save().AsArray()); +} + +////////////////////////////////////////////////////////////////////////////// +// Memory allocation endpoints + +void +TraceViewerService::HandleAllocSummaryApi(HttpServerRequest& Request) +{ + const trace_detail::AllocationSummary& S = m_Model.AllocSummary; + + CbObjectWriter Obj; + Obj << "has_memory_data" << S.HasMemoryData; + Obj << "total_allocs" << S.TotalAllocs; + Obj << "total_frees" << S.TotalFrees; + Obj << "total_realloc_allocs" << S.TotalReallocAllocs; + Obj << "total_realloc_frees" << S.TotalReallocFrees; + Obj << "peak_bytes" << S.PeakBytes; + Obj << "peak_time_us" << S.PeakTimeUs; + Obj << "end_bytes" << S.EndBytes; + Obj << "live_allocations" << S.LiveAllocations; + + Request.WriteResponse(HttpResponseCode::OK, Obj.Save()); +} + +void +TraceViewerService::HandleHeapsApi(HttpServerRequest& Request) +{ + CbWriter Writer; + Writer.BeginArray(); + for (const trace_detail::HeapInfo& H : m_Model.Heaps) + { + Writer.BeginObject(); + Writer << "id" << H.Id; + Writer << "parent_id" << H.ParentId; + Writer << "flags" << H.Flags; + Writer << "name" << H.Name; + Writer.EndObject(); + } + Writer.EndArray(); + + Request.WriteResponse(HttpResponseCode::OK, Writer.Save().AsArray()); +} + +void +TraceViewerService::HandleAllocTagsApi(HttpServerRequest& Request) +{ + CbWriter Writer; + Writer.BeginArray(); + for (const trace_detail::TagInfo& T : m_Model.Tags) + { + Writer.BeginObject(); + Writer << "tag" << T.Tag; + Writer << "parent" << T.Parent; + Writer << "display" << T.Display; + Writer.EndObject(); + } + Writer.EndArray(); + + Request.WriteResponse(HttpResponseCode::OK, Writer.Save().AsArray()); +} + +void +TraceViewerService::HandleMemoryTimelineApi(HttpServerRequest& Request) +{ + const auto& Timeline = m_Model.MemoryTimeline; + + // Parse optional query parameters for range filtering and downsampling. + HttpServerRequest::QueryParams Params = Request.GetQueryParams(); + uint32_t StartUs = ParseUintParam(Params.GetValue("start"), 0); + uint32_t EndUs = ParseUintParam(Params.GetValue("end"), ~uint32_t(0)); + uint32_t MaxSamples = ParseUintParam(Params.GetValue("max_samples"), 2000); + if (MaxSamples == 0) + { + MaxSamples = 2000; + } + + // Binary-search for the start offset. + size_t Begin = 0; + { + size_t Lo = 0; + size_t Hi = Timeline.size(); + while (Lo < Hi) + { + size_t Mid = Lo + (Hi - Lo) / 2; + if (Timeline[Mid].TimeUs < StartUs) + { + Lo = Mid + 1; + } + else + { + Hi = Mid; + } + } + Begin = Lo; + } + + // Find end offset. + size_t End = Timeline.size(); + { + size_t Lo = Begin; + size_t Hi = Timeline.size(); + while (Lo < Hi) + { + size_t Mid = Lo + (Hi - Lo) / 2; + if (Timeline[Mid].TimeUs <= EndUs) + { + Lo = Mid + 1; + } + else + { + Hi = Mid; + } + } + End = Lo; + } + + size_t Count = (End > Begin) ? (End - Begin) : 0; + size_t Stride = (Count > MaxSamples) ? (Count / MaxSamples) : 1; + + CbObjectWriter Obj; + + uint64_t SampleCount = 0; + Obj.BeginArray("samples"); + for (size_t I = Begin; I < End; I += Stride) + { + const trace_detail::MemoryTimelineSample& S = Timeline[I]; + Obj.BeginArray(); + Obj.AddInteger(S.TimeUs); + Obj.AddInteger(S.TotalAllocatedBytes); + Obj.AddInteger(S.SystemBytes); + Obj.AddInteger(S.VideoBytes); + Obj.EndArray(); + ++SampleCount; + } + Obj.EndArray(); + + Obj << "sample_count" << SampleCount; + + Request.WriteResponse(HttpResponseCode::OK, Obj.Save()); +} + +void +TraceViewerService::HandleHeapStatsApi(HttpServerRequest& Request) +{ + CbWriter Writer; + Writer.BeginArray(); + for (const trace_detail::HeapStat& S : m_Model.HeapStats) + { + Writer.BeginObject(); + Writer << "heap_id" << S.HeapId; + Writer << "current_bytes" << S.CurrentBytes; + Writer << "peak_bytes" << S.PeakBytes; + Writer << "alloc_count" << S.AllocCount; + Writer << "free_count" << S.FreeCount; + Writer.EndObject(); + } + Writer.EndArray(); + + Request.WriteResponse(HttpResponseCode::OK, Writer.Save().AsArray()); +} + +void +TraceViewerService::HandleCallstacksApi(HttpServerRequest& Request) +{ + HttpServerRequest::QueryParams Params = Request.GetQueryParams(); + uint32_t Id = ParseUintParam(Params.GetValue("id"), 0); + + if (Id == 0) + { + WriteNotFound(Request, "Missing or invalid 'id' parameter"); + return; + } + + const trace_detail::CallstackEntry* Entry = m_CallstackFormatter.FindCallstackEntry(Id); + if (Entry == nullptr) + { + WriteNotFound(Request, "Callstack not found"); + return; + } + + trace_detail::FilteredCallstackView Filtered = m_CallstackFormatter.BuildView(*Entry, m_CallstackFilterOptions); + + CbObjectWriter Obj; + CallstackSummaryInfo Summary = BuildCallstackSummary(Filtered); + Obj << "id" << Entry->Id; + Obj << "summary" << Summary.Summary; + Obj << "top_frame" << Summary.TopFrame; + Obj << "secondary_frame" << Summary.SecondaryFrame; + Obj << "group_key" << Summary.GroupKey; + Obj << "hidden_prefix_count" << Filtered.HiddenPrefixCount; + Obj << "included_third_party_boundary" << Filtered.IncludedThirdPartyBoundary; + Obj.BeginArray("frames"); + for (const trace_detail::FilteredCallstackFrame& FrameView : Filtered.Frames) + { + const trace_detail::ResolvedFrame& F = *FrameView.Frame; + Obj.BeginObject(); + Obj << "index" << uint64_t(FrameView.OriginalIndex); + Obj.AddString("address", fmt::format("0x{:X}", F.Address)); + Obj << "display" << FrameView.Display; + if (F.ModuleIndex != ~0u && F.ModuleIndex < m_Model.Modules.size()) + { + const trace_detail::ModuleInfo& Module = m_Model.Modules[F.ModuleIndex]; + Obj << "module" << std::string_view(Module.Name); + Obj << "module_path" << std::string_view(Module.FullPath); + Obj.AddString("offset", fmt::format("0x{:X}", F.Offset)); + } + Obj.EndObject(); + } + Obj.EndArray(); + + Request.WriteResponse(HttpResponseCode::OK, Obj.Save()); +} + +void +TraceViewerService::HandleCallstackStatsApi(HttpServerRequest& Request) +{ + HttpServerRequest::QueryParams Params = Request.GetQueryParams(); + uint32_t Limit = ParseUintParam(Params.GetValue("limit"), 100); + if (Limit == 0) + { + Limit = 100; + } + + size_t Count = std::min(size_t(Limit), m_Model.CallstackStats.size()); + + CbObjectWriter Obj; + Obj << "total_unique_callstacks" << uint64_t(m_Model.Callstacks.size()); + Obj.BeginArray("stats"); + for (size_t I = 0; I < Count; ++I) + { + const trace_detail::CallstackAllocStat& S = m_Model.CallstackStats[I]; + Obj.BeginObject(); + Obj << "callstack_id" << S.CallstackId; + Obj << "live_bytes" << S.LiveBytes; + Obj << "live_count" << S.LiveCount; + if (const trace_detail::CallstackEntry* Entry = m_CallstackFormatter.FindCallstackEntry(S.CallstackId)) + { + CallstackSummaryInfo Summary = BuildCallstackSummary(m_CallstackFormatter.BuildView(*Entry, m_CallstackFilterOptions)); + Obj << "summary" << Summary.Summary; + Obj << "top_frame" << Summary.TopFrame; + Obj << "secondary_frame" << Summary.SecondaryFrame; + Obj << "group_key" << Summary.GroupKey; + Obj << "hidden_prefix_count" << Summary.HiddenPrefixCount; + Obj << "included_third_party_boundary" << Summary.IncludedThirdPartyBoundary; + } + Obj.EndObject(); + } + Obj.EndArray(); + + Request.WriteResponse(HttpResponseCode::OK, Obj.Save()); +} + +void +TraceViewerService::HandleChurnStatsApi(HttpServerRequest& Request) +{ + HttpServerRequest::QueryParams Params = Request.GetQueryParams(); + uint32_t Limit = ParseUintParam(Params.GetValue("limit"), 100); + if (Limit == 0) + { + Limit = 100; + } + + size_t Count = std::min(size_t(Limit), m_Model.ChurnStats.size()); + + CbObjectWriter Obj; + Obj << "total_unique_callstacks" << uint64_t(m_Model.Callstacks.size()); + Obj.BeginArray("stats"); + for (size_t I = 0; I < Count; ++I) + { + const trace_detail::CallstackChurnStat& S = m_Model.ChurnStats[I]; + Obj.BeginObject(); + Obj << "callstack_id" << S.CallstackId; + Obj << "churn_allocs" << S.ChurnAllocs; + Obj << "churn_bytes" << S.ChurnBytes; + Obj << "total_allocs" << S.TotalAllocs; + Obj << "total_bytes" << S.TotalBytes; + Obj.AddFloat("mean_distance", S.MeanDistance); + if (const trace_detail::CallstackEntry* Entry = m_CallstackFormatter.FindCallstackEntry(S.CallstackId)) + { + CallstackSummaryInfo Summary = BuildCallstackSummary(m_CallstackFormatter.BuildView(*Entry, m_CallstackFilterOptions)); + Obj << "summary" << Summary.Summary; + Obj << "top_frame" << Summary.TopFrame; + Obj << "secondary_frame" << Summary.SecondaryFrame; + Obj << "group_key" << Summary.GroupKey; + Obj << "hidden_prefix_count" << Summary.HiddenPrefixCount; + Obj << "included_third_party_boundary" << Summary.IncludedThirdPartyBoundary; + } + Obj.EndObject(); + } + Obj.EndArray(); + + Request.WriteResponse(HttpResponseCode::OK, Obj.Save()); +} + +void +TraceViewerService::HandleAllocSizeHistogramApi(HttpServerRequest& Request) +{ + const auto& Buckets = m_Model.AllocSizeHistogram; + + uint64_t TotalCount = 0; + uint64_t TotalBytes = 0; + uint64_t MaxCount = 0; + uint64_t MaxBytes = 0; + for (const trace_detail::AllocSizeBucket& B : Buckets) + { + TotalCount += B.Count; + TotalBytes += B.Bytes; + if (B.Count > MaxCount) + { + MaxCount = B.Count; + } + if (B.Bytes > MaxBytes) + { + MaxBytes = B.Bytes; + } + } + + CbObjectWriter Obj; + Obj << "total_count" << TotalCount; + Obj << "total_bytes" << TotalBytes; + Obj << "max_count" << MaxCount; + Obj << "max_bytes" << MaxBytes; + Obj.BeginArray("buckets"); + for (const trace_detail::AllocSizeBucket& B : Buckets) + { + Obj.BeginObject(); + Obj << "min_size" << B.MinSize; + Obj << "max_size" << B.MaxSize; + Obj << "count" << B.Count; + Obj << "bytes" << B.Bytes; + Obj.EndObject(); + } + Obj.EndArray(); + + Request.WriteResponse(HttpResponseCode::OK, Obj.Save()); +} + +} // namespace zen diff --git a/src/zen/trace/trace_viewer_service.h b/src/zen/trace/trace_viewer_service.h new file mode 100644 index 000000000..f7bc51499 --- /dev/null +++ b/src/zen/trace/trace_viewer_service.h @@ -0,0 +1,71 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include "callstack_formatter.h" +#include "timeline_query.h" +#include "trace_model.h" + +#include <zenhttp/httpserver.h> +#include <zenhttp/zipfs.h> + +#include <filesystem> +#include <memory> + +namespace zen { + +// HttpService that serves an interactive flame-graph viewer for a parsed +// TraceModel. Mounts at the server root; URIs beginning with "api/" return +// JSON describing the model, everything else is resolved to a static asset +// (first from the optional dev-mode directory, then from the embedded zip). +class TraceViewerService final : public HttpService +{ +public: + TraceViewerService(const trace_detail::TraceModel& Model, + std::unique_ptr<trace_detail::SymbolResolver> Symbols = {}, + std::filesystem::path DevHtmlDir = {}); + ~TraceViewerService() override; + + [[nodiscard]] const char* BaseUri() const override; + void HandleRequest(HttpServerRequest& Request) override; + +private: + void HandleStaticAsset(HttpServerRequest& Request, std::string_view Uri); + void HandleApiRequest(HttpServerRequest& Request, std::string_view Path); + + void HandleSessionApi(HttpServerRequest& Request); + void HandleThreadsApi(HttpServerRequest& Request); + void HandleChannelsApi(HttpServerRequest& Request); + void HandleScopeStatsApi(HttpServerRequest& Request); + void HandleScopeNamesApi(HttpServerRequest& Request); + void HandleTimelineApi(HttpServerRequest& Request); + void HandleTimelineBatchApi(HttpServerRequest& Request); + void HandleLogCategoriesApi(HttpServerRequest& Request); + void HandleLogsApi(HttpServerRequest& Request); + void HandleBookmarksApi(HttpServerRequest& Request); + void HandleRegionsApi(HttpServerRequest& Request); + void HandleCsvCategoriesApi(HttpServerRequest& Request); + void HandleCsvStatsApi(HttpServerRequest& Request); + void HandleCsvSeriesApi(HttpServerRequest& Request); + void HandleCsvEventsApi(HttpServerRequest& Request); + void HandleCsvMetadataApi(HttpServerRequest& Request); + void HandleAllocSummaryApi(HttpServerRequest& Request); + void HandleHeapsApi(HttpServerRequest& Request); + void HandleAllocTagsApi(HttpServerRequest& Request); + void HandleMemoryTimelineApi(HttpServerRequest& Request); + void HandleHeapStatsApi(HttpServerRequest& Request); + void HandleCallstacksApi(HttpServerRequest& Request); + void HandleCallstackStatsApi(HttpServerRequest& Request); + void HandleChurnStatsApi(HttpServerRequest& Request); + void HandleAllocSizeHistogramApi(HttpServerRequest& Request); + + const trace_detail::TraceModel& m_Model; + std::filesystem::path m_DevHtmlDir; + std::unique_ptr<ZipFs> m_ZipFs; + std::unique_ptr<trace_detail::TimelineQuery> m_TimelineQuery; + std::unique_ptr<trace_detail::SymbolResolver> m_Symbols; + trace_detail::CallstackFilterOptions m_CallstackFilterOptions; + trace_detail::CallstackFormatter m_CallstackFormatter; +}; + +} // namespace zen |