// Copyright Epic Games, Inc. All Rights Reserved. #pragma once #include ZEN_THIRD_PARTY_INCLUDES_START #include #include #include #include ZEN_THIRD_PARTY_INCLUDES_END #include #include // Forward declarations of outline types (defined in trace_memory.cpp). // These are global-scope structs created by the begin_outline() macro. struct Memory_Init; struct Memory_Marker; struct Memory_Alloc; struct Memory_AllocSystem; struct Memory_AllocVideo; struct Memory_Free; struct Memory_FreeSystem; struct Memory_FreeVideo; struct Memory_ReallocAlloc; struct Memory_ReallocAllocSystem; struct Memory_ReallocFree; struct Memory_ReallocFreeSystem; struct Memory_HeapSpec; struct Memory_HeapMarkAlloc; struct Memory_HeapUnmarkAlloc; struct Memory_TagSpec; struct Memory_CallstackSpec; struct Memory_CallstackSpecDeltaVarInt; struct Memory_CallstackSpecDelta7bit; struct Memory_CallstackSpecXORAndRLE; namespace zen::trace_detail { struct TraceTiming; // -- Allocation data structures -------------------------------------------- struct HeapInfo { uint32_t Id = 0; uint32_t ParentId = ~0u; uint16_t Flags = 0; // EMemoryTraceHeapFlags bits std::string Name; }; struct TagInfo { int32_t Tag = 0; int32_t Parent = 0; std::string Display; }; struct MemoryTimelineSample { uint32_t TimeUs; int64_t TotalAllocatedBytes; int64_t SystemBytes; int64_t VideoBytes; }; struct HeapStat { uint32_t HeapId = 0; int64_t CurrentBytes = 0; int64_t PeakBytes = 0; uint64_t AllocCount = 0; uint64_t FreeCount = 0; }; struct AllocationSummary { bool HasMemoryData = false; uint64_t TotalAllocs = 0; uint64_t TotalFrees = 0; uint64_t TotalReallocAllocs = 0; uint64_t TotalReallocFrees = 0; int64_t PeakBytes = 0; uint32_t PeakTimeUs = 0; int64_t EndBytes = 0; uint32_t LiveAllocations = 0; }; // One power-of-two bucket of the allocation size histogram. The bucket covers // sizes in [MinSize, MaxSize] inclusive (MaxSize = MinSize*2 - 1, or 0 for the // zero-size bucket). Count and Bytes aggregate every alloc/realloc-alloc seen // during the trace (not just currently-live allocations). struct AllocSizeBucket { uint64_t MinSize = 0; uint64_t MaxSize = 0; uint64_t Count = 0; uint64_t Bytes = 0; }; // -- Callstack data structures --------------------------------------------- // A single resolved stack frame. ModuleIndex references TraceModel::Modules; // ~0u means the frame did not map to any loaded module. struct ResolvedFrame { uint64_t Address = 0; uint32_t ModuleIndex = ~0u; uint64_t Offset = 0; }; // A decoded callstack: the ordered list of instruction-pointer frames // captured at the point of an allocation (or free). struct CallstackEntry { uint32_t Id = 0; eastl::vector Frames; // outermost (caller) first }; // Per-callstack allocation churn statistics. "Churn" is measured by how // quickly an allocation is freed — specifically, the number of alloc events // that occur between the alloc and its matching free (event distance). struct CallstackChurnStat { uint32_t CallstackId = 0; uint64_t ChurnAllocs = 0; // allocations freed within the distance threshold uint64_t ChurnBytes = 0; // cumulative bytes of those short-lived allocations uint64_t TotalAllocs = 0; // all allocations from this callstack (for context) uint64_t TotalBytes = 0; double MeanDistance = 0.0; // average event distance for the churny allocs }; // Per-callstack live allocation statistics. struct CallstackAllocStat { uint32_t CallstackId = 0; int64_t LiveBytes = 0; uint32_t LiveCount = 0; eastl::fixed_vector ThreadIds; // unique thread IDs that contributed allocations }; // -- AllocationAnalyzer ---------------------------------------------------- // Subscribes to Memory.* trace events and tracks aggregate allocation // statistics, a memory-over-time timeline, heap specs, and tag specs. // Intended to be instantiated by BuildTraceModel alongside the other // analyzers and registered with the Dispatcher. class AllocationAnalyzer : public Analyzer { public: explicit AllocationAnalyzer(const TraceTiming* Timing); void subscribe(Vector& Subs) override; // -- Accessors (call after IterateTrace completes) -- bool Initialized() const { return m_Initialized; } AllocationSummary Summary() const; void EmitFinalSample(uint32_t TraceEndUs); eastl::vector& MutableTimeline() { return m_Timeline; } const eastl::hash_map& Heaps() const { return m_Heaps; } const eastl::hash_map& Tags() const { return m_Tags; } const eastl::hash_map& RootHeapStats() const { return m_RootHeapStats; } // Build per-callstack statistics from the current live allocation set. eastl::vector BuildCallstackStats() const; // Build per-callstack churn statistics sorted by churn alloc count descending. // ChurnDistanceThreshold: allocations freed within this many alloc-events are // considered "short-lived" / churny. eastl::vector BuildChurnStats(uint64_t ChurnDistanceThreshold = 1000) const; // Build a size-bucketed histogram of all observed allocations. Returns // only populated buckets, ordered by MinSize ascending. eastl::vector BuildSizeHistogram() const; private: // -- Event handlers -- void OnInit(const ::Memory_Init& Ev); void OnMarker(const ::Memory_Marker& Ev); void OnAlloc(const ::Memory_Alloc& Ev); void OnAllocSystem(const ::Memory_AllocSystem& Ev); void OnAllocVideo(const ::Memory_AllocVideo& Ev); void OnFree(const ::Memory_Free& Ev); void OnFreeSystem(const ::Memory_FreeSystem& Ev); void OnFreeVideo(const ::Memory_FreeVideo& Ev); void OnReallocAlloc(const ::Memory_ReallocAlloc& Ev); void OnReallocAllocSystem(const ::Memory_ReallocAllocSystem& Ev); void OnReallocFree(const ::Memory_ReallocFree& Ev); void OnReallocFreeSystem(const ::Memory_ReallocFreeSystem& Ev); void OnHeapSpec(const ::Memory_HeapSpec& Ev); void OnHeapMarkAlloc(const ::Memory_HeapMarkAlloc& Ev); void OnHeapUnmarkAlloc(const ::Memory_HeapUnmarkAlloc& Ev); void OnTagSpec(const ::Memory_TagSpec& Ev); // -- Internal helpers -- struct LiveAlloc { uint64_t Size; uint32_t CallstackId; uint32_t ThreadId; uint64_t EventSeq; // alloc event sequence number for churn distance uint8_t RootHeap; bool IsHeap = false; // true after HeapMarkAlloc; excluded from totals }; uint64_t DecodeAllocSize(uint32_t RawSize, uint8_t AlignSizeLower) const; void HandleAlloc(uint64_t Address, uint64_t Size, uint8_t RootHeap, uint32_t CallstackId, uint32_t ThreadId, bool IsRealloc); void HandleFree(uint64_t Address, uint8_t RootHeap, uint32_t CallstackId, bool IsRealloc); void MaybeEmitSample(uint32_t TimeUs); // -- State -- static constexpr uint32_t kTimelineSampleIntervalUs = 10'000; // 10ms const TraceTiming* m_Timing = nullptr; // Init params uint8_t m_SizeShift = 3; // overridden by Memory.Init if present; 3 matches zencore's default bool m_Initialized = false; // Live allocation map (address -> size + root heap) eastl::hash_map m_LiveAllocs; // Running byte counters int64_t m_CurrentBytes = 0; int64_t m_SystemBytes = 0; int64_t m_VideoBytes = 0; int64_t m_PeakBytes = 0; uint32_t m_PeakTimeUs = 0; // Event counters uint64_t m_TotalAllocs = 0; uint64_t m_TotalFrees = 0; uint64_t m_TotalReallocAllocs = 0; uint64_t m_TotalReallocFrees = 0; // Timeline sampling eastl::vector m_Timeline; uint32_t m_LastSampleTimeUs = 0; uint32_t m_LastMarkerTimeUs = 0; bool m_HasReceivedMarker = false; // Per-callstack churn counters: total allocs + short-lived alloc stats struct ChurnAccum { uint64_t TotalAllocs = 0; uint64_t TotalBytes = 0; uint64_t ChurnAllocs = 0; // freed within the distance threshold uint64_t ChurnBytes = 0; uint64_t ChurnDistanceSum = 0; // sum of event distances for churny allocs }; eastl::hash_map m_ChurnByCallstack; uint64_t m_AllocEventSeq = 0; // monotonic alloc event counter // Allocation size histogram: bucket i covers sizes [2^(i-1)+1, 2^i], with // bucket 0 reserved for zero-size allocations. 65 buckets covers up to 2^64. static constexpr size_t kSizeHistogramBuckets = 65; struct SizeBucketAccum { uint64_t Count = 0; uint64_t Bytes = 0; }; SizeBucketAccum m_SizeHistogram[kSizeHistogramBuckets] = {}; // Metadata eastl::hash_map m_Heaps; eastl::hash_map m_Tags; eastl::hash_map m_RootHeapStats; }; // -- CallstackAnalyzer ----------------------------------------------------- // Subscribes to Memory.CallstackSpec* trace events, decodes compressed // frames, and stores a callstack ID -> frame addresses mapping. Frame // addresses are raw instruction pointers; resolution to module+offset // happens in BuildTraceModel post-processing. class CallstackAnalyzer : public Analyzer { public: void subscribe(Vector& Subs) override; const eastl::hash_map>& RawCallstacks() const { return m_Callstacks; } private: void OnCallstackSpec(const ::Memory_CallstackSpec& Ev); void OnCallstackSpecDeltaVarInt(const ::Memory_CallstackSpecDeltaVarInt& Ev); void OnCallstackSpecDelta7bit(const ::Memory_CallstackSpecDelta7bit& Ev); void OnCallstackSpecXORAndRLE(const ::Memory_CallstackSpecXORAndRLE& Ev); void StoreCallstack(uint32_t Id, const uint64_t* Frames, size_t Count); eastl::hash_map> m_Callstacks; }; } // namespace zen::trace_detail