diff options
Diffstat (limited to 'src/zen/trace/trace_memory.h')
| -rw-r--r-- | src/zen/trace/trace_memory.h | 301 |
1 files changed, 301 insertions, 0 deletions
diff --git a/src/zen/trace/trace_memory.h b/src/zen/trace/trace_memory.h new file mode 100644 index 000000000..da33d8218 --- /dev/null +++ b/src/zen/trace/trace_memory.h @@ -0,0 +1,301 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/zencore.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <EASTL/fixed_vector.h> +#include <EASTL/hash_map.h> +#include <EASTL/vector.h> +#include <analysis/analyzer.h> +ZEN_THIRD_PARTY_INCLUDES_END + +#include <cstdint> +#include <string> + +// Forward declarations of outline types (defined in trace_memory.cpp). +// These are global-scope structs created by the begin_outline() macro. +struct Memory_Init; +struct Memory_Marker; +struct Memory_Alloc; +struct Memory_AllocSystem; +struct Memory_AllocVideo; +struct Memory_Free; +struct Memory_FreeSystem; +struct Memory_FreeVideo; +struct Memory_ReallocAlloc; +struct Memory_ReallocAllocSystem; +struct Memory_ReallocFree; +struct Memory_ReallocFreeSystem; +struct Memory_HeapSpec; +struct Memory_HeapMarkAlloc; +struct Memory_HeapUnmarkAlloc; +struct Memory_TagSpec; +struct Memory_CallstackSpec; +struct Memory_CallstackSpecDeltaVarInt; +struct Memory_CallstackSpecDelta7bit; +struct Memory_CallstackSpecXORAndRLE; + +namespace zen::trace_detail { + +struct TraceTiming; + +// -- Allocation data structures -------------------------------------------- + +struct HeapInfo +{ + uint32_t Id = 0; + uint32_t ParentId = ~0u; + uint16_t Flags = 0; // EMemoryTraceHeapFlags bits + std::string Name; +}; + +struct TagInfo +{ + int32_t Tag = 0; + int32_t Parent = 0; + std::string Display; +}; + +struct MemoryTimelineSample +{ + uint32_t TimeUs; + int64_t TotalAllocatedBytes; + int64_t SystemBytes; + int64_t VideoBytes; +}; + +struct HeapStat +{ + uint32_t HeapId = 0; + int64_t CurrentBytes = 0; + int64_t PeakBytes = 0; + uint64_t AllocCount = 0; + uint64_t FreeCount = 0; +}; + +struct AllocationSummary +{ + bool HasMemoryData = false; + uint64_t TotalAllocs = 0; + uint64_t TotalFrees = 0; + uint64_t TotalReallocAllocs = 0; + uint64_t TotalReallocFrees = 0; + int64_t PeakBytes = 0; + uint32_t PeakTimeUs = 0; + int64_t EndBytes = 0; + uint32_t LiveAllocations = 0; +}; + +// One power-of-two bucket of the allocation size histogram. The bucket covers +// sizes in [MinSize, MaxSize] inclusive (MaxSize = MinSize*2 - 1, or 0 for the +// zero-size bucket). Count and Bytes aggregate every alloc/realloc-alloc seen +// during the trace (not just currently-live allocations). +struct AllocSizeBucket +{ + uint64_t MinSize = 0; + uint64_t MaxSize = 0; + uint64_t Count = 0; + uint64_t Bytes = 0; +}; + +// -- Callstack data structures --------------------------------------------- + +// A single resolved stack frame. ModuleIndex references TraceModel::Modules; +// ~0u means the frame did not map to any loaded module. +struct ResolvedFrame +{ + uint64_t Address = 0; + uint32_t ModuleIndex = ~0u; + uint64_t Offset = 0; +}; + +// A decoded callstack: the ordered list of instruction-pointer frames +// captured at the point of an allocation (or free). +struct CallstackEntry +{ + uint32_t Id = 0; + eastl::vector<ResolvedFrame> Frames; // outermost (caller) first +}; + +// Per-callstack allocation churn statistics. "Churn" is measured by how +// quickly an allocation is freed — specifically, the number of alloc events +// that occur between the alloc and its matching free (event distance). +struct CallstackChurnStat +{ + uint32_t CallstackId = 0; + uint64_t ChurnAllocs = 0; // allocations freed within the distance threshold + uint64_t ChurnBytes = 0; // cumulative bytes of those short-lived allocations + uint64_t TotalAllocs = 0; // all allocations from this callstack (for context) + uint64_t TotalBytes = 0; + double MeanDistance = 0.0; // average event distance for the churny allocs +}; + +// Per-callstack live allocation statistics. +struct CallstackAllocStat +{ + uint32_t CallstackId = 0; + int64_t LiveBytes = 0; + uint32_t LiveCount = 0; + eastl::fixed_vector<uint32_t, 4, true> ThreadIds; // unique thread IDs that contributed allocations +}; + +// -- AllocationAnalyzer ---------------------------------------------------- + +// Subscribes to Memory.* trace events and tracks aggregate allocation +// statistics, a memory-over-time timeline, heap specs, and tag specs. +// Intended to be instantiated by BuildTraceModel alongside the other +// analyzers and registered with the Dispatcher. +class AllocationAnalyzer : public Analyzer +{ +public: + explicit AllocationAnalyzer(const TraceTiming* Timing); + + void subscribe(Vector<Subscription>& Subs) override; + + // -- Accessors (call after IterateTrace completes) -- + + bool Initialized() const { return m_Initialized; } + AllocationSummary Summary() const; + void EmitFinalSample(uint32_t TraceEndUs); + + eastl::vector<MemoryTimelineSample>& MutableTimeline() { return m_Timeline; } + const eastl::hash_map<uint32_t, HeapInfo>& Heaps() const { return m_Heaps; } + const eastl::hash_map<int32_t, TagInfo>& Tags() const { return m_Tags; } + const eastl::hash_map<uint8_t, HeapStat>& RootHeapStats() const { return m_RootHeapStats; } + + // Build per-callstack statistics from the current live allocation set. + eastl::vector<CallstackAllocStat> BuildCallstackStats() const; + + // Build per-callstack churn statistics sorted by churn alloc count descending. + // ChurnDistanceThreshold: allocations freed within this many alloc-events are + // considered "short-lived" / churny. + eastl::vector<CallstackChurnStat> BuildChurnStats(uint64_t ChurnDistanceThreshold = 1000) const; + + // Build a size-bucketed histogram of all observed allocations. Returns + // only populated buckets, ordered by MinSize ascending. + eastl::vector<AllocSizeBucket> BuildSizeHistogram() const; + +private: + // -- Event handlers -- + + void OnInit(const ::Memory_Init& Ev); + void OnMarker(const ::Memory_Marker& Ev); + void OnAlloc(const ::Memory_Alloc& Ev); + void OnAllocSystem(const ::Memory_AllocSystem& Ev); + void OnAllocVideo(const ::Memory_AllocVideo& Ev); + void OnFree(const ::Memory_Free& Ev); + void OnFreeSystem(const ::Memory_FreeSystem& Ev); + void OnFreeVideo(const ::Memory_FreeVideo& Ev); + void OnReallocAlloc(const ::Memory_ReallocAlloc& Ev); + void OnReallocAllocSystem(const ::Memory_ReallocAllocSystem& Ev); + void OnReallocFree(const ::Memory_ReallocFree& Ev); + void OnReallocFreeSystem(const ::Memory_ReallocFreeSystem& Ev); + void OnHeapSpec(const ::Memory_HeapSpec& Ev); + void OnHeapMarkAlloc(const ::Memory_HeapMarkAlloc& Ev); + void OnHeapUnmarkAlloc(const ::Memory_HeapUnmarkAlloc& Ev); + void OnTagSpec(const ::Memory_TagSpec& Ev); + + // -- Internal helpers -- + + struct LiveAlloc + { + uint64_t Size; + uint32_t CallstackId; + uint32_t ThreadId; + uint64_t EventSeq; // alloc event sequence number for churn distance + uint8_t RootHeap; + bool IsHeap = false; // true after HeapMarkAlloc; excluded from totals + }; + + uint64_t DecodeAllocSize(uint32_t RawSize, uint8_t AlignSizeLower) const; + void HandleAlloc(uint64_t Address, uint64_t Size, uint8_t RootHeap, uint32_t CallstackId, uint32_t ThreadId, bool IsRealloc); + void HandleFree(uint64_t Address, uint8_t RootHeap, uint32_t CallstackId, bool IsRealloc); + void MaybeEmitSample(uint32_t TimeUs); + + // -- State -- + + static constexpr uint32_t kTimelineSampleIntervalUs = 10'000; // 10ms + + const TraceTiming* m_Timing = nullptr; + + // Init params + uint8_t m_SizeShift = 3; // overridden by Memory.Init if present; 3 matches zencore's default + bool m_Initialized = false; + + // Live allocation map (address -> size + root heap) + eastl::hash_map<uint64_t, LiveAlloc> m_LiveAllocs; + + // Running byte counters + int64_t m_CurrentBytes = 0; + int64_t m_SystemBytes = 0; + int64_t m_VideoBytes = 0; + int64_t m_PeakBytes = 0; + uint32_t m_PeakTimeUs = 0; + + // Event counters + uint64_t m_TotalAllocs = 0; + uint64_t m_TotalFrees = 0; + uint64_t m_TotalReallocAllocs = 0; + uint64_t m_TotalReallocFrees = 0; + + // Timeline sampling + eastl::vector<MemoryTimelineSample> m_Timeline; + uint32_t m_LastSampleTimeUs = 0; + uint32_t m_LastMarkerTimeUs = 0; + bool m_HasReceivedMarker = false; + + // Per-callstack churn counters: total allocs + short-lived alloc stats + struct ChurnAccum + { + uint64_t TotalAllocs = 0; + uint64_t TotalBytes = 0; + uint64_t ChurnAllocs = 0; // freed within the distance threshold + uint64_t ChurnBytes = 0; + uint64_t ChurnDistanceSum = 0; // sum of event distances for churny allocs + }; + eastl::hash_map<uint32_t, ChurnAccum> m_ChurnByCallstack; + uint64_t m_AllocEventSeq = 0; // monotonic alloc event counter + + // Allocation size histogram: bucket i covers sizes [2^(i-1)+1, 2^i], with + // bucket 0 reserved for zero-size allocations. 65 buckets covers up to 2^64. + static constexpr size_t kSizeHistogramBuckets = 65; + struct SizeBucketAccum + { + uint64_t Count = 0; + uint64_t Bytes = 0; + }; + SizeBucketAccum m_SizeHistogram[kSizeHistogramBuckets] = {}; + + // Metadata + eastl::hash_map<uint32_t, HeapInfo> m_Heaps; + eastl::hash_map<int32_t, TagInfo> m_Tags; + eastl::hash_map<uint8_t, HeapStat> m_RootHeapStats; +}; + +// -- CallstackAnalyzer ----------------------------------------------------- + +// Subscribes to Memory.CallstackSpec* trace events, decodes compressed +// frames, and stores a callstack ID -> frame addresses mapping. Frame +// addresses are raw instruction pointers; resolution to module+offset +// happens in BuildTraceModel post-processing. +class CallstackAnalyzer : public Analyzer +{ +public: + void subscribe(Vector<Subscription>& Subs) override; + + const eastl::hash_map<uint32_t, eastl::vector<uint64_t>>& RawCallstacks() const { return m_Callstacks; } + +private: + void OnCallstackSpec(const ::Memory_CallstackSpec& Ev); + void OnCallstackSpecDeltaVarInt(const ::Memory_CallstackSpecDeltaVarInt& Ev); + void OnCallstackSpecDelta7bit(const ::Memory_CallstackSpecDelta7bit& Ev); + void OnCallstackSpecXORAndRLE(const ::Memory_CallstackSpecXORAndRLE& Ev); + + void StoreCallstack(uint32_t Id, const uint64_t* Frames, size_t Count); + + eastl::hash_map<uint32_t, eastl::vector<uint64_t>> m_Callstacks; +}; + +} // namespace zen::trace_detail |