aboutsummaryrefslogtreecommitdiff
path: root/src/zen/trace/trace_memory.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/zen/trace/trace_memory.h')
-rw-r--r--src/zen/trace/trace_memory.h301
1 files changed, 301 insertions, 0 deletions
diff --git a/src/zen/trace/trace_memory.h b/src/zen/trace/trace_memory.h
new file mode 100644
index 000000000..da33d8218
--- /dev/null
+++ b/src/zen/trace/trace_memory.h
@@ -0,0 +1,301 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/zencore.h>
+
+ZEN_THIRD_PARTY_INCLUDES_START
+#include <EASTL/fixed_vector.h>
+#include <EASTL/hash_map.h>
+#include <EASTL/vector.h>
+#include <analysis/analyzer.h>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+#include <cstdint>
+#include <string>
+
+// Forward declarations of outline types (defined in trace_memory.cpp).
+// These are global-scope structs created by the begin_outline() macro.
+struct Memory_Init;
+struct Memory_Marker;
+struct Memory_Alloc;
+struct Memory_AllocSystem;
+struct Memory_AllocVideo;
+struct Memory_Free;
+struct Memory_FreeSystem;
+struct Memory_FreeVideo;
+struct Memory_ReallocAlloc;
+struct Memory_ReallocAllocSystem;
+struct Memory_ReallocFree;
+struct Memory_ReallocFreeSystem;
+struct Memory_HeapSpec;
+struct Memory_HeapMarkAlloc;
+struct Memory_HeapUnmarkAlloc;
+struct Memory_TagSpec;
+struct Memory_CallstackSpec;
+struct Memory_CallstackSpecDeltaVarInt;
+struct Memory_CallstackSpecDelta7bit;
+struct Memory_CallstackSpecXORAndRLE;
+
+namespace zen::trace_detail {
+
+struct TraceTiming;
+
+// -- Allocation data structures --------------------------------------------
+
+struct HeapInfo
+{
+ uint32_t Id = 0;
+ uint32_t ParentId = ~0u;
+ uint16_t Flags = 0; // EMemoryTraceHeapFlags bits
+ std::string Name;
+};
+
+struct TagInfo
+{
+ int32_t Tag = 0;
+ int32_t Parent = 0;
+ std::string Display;
+};
+
+struct MemoryTimelineSample
+{
+ uint32_t TimeUs;
+ int64_t TotalAllocatedBytes;
+ int64_t SystemBytes;
+ int64_t VideoBytes;
+};
+
+struct HeapStat
+{
+ uint32_t HeapId = 0;
+ int64_t CurrentBytes = 0;
+ int64_t PeakBytes = 0;
+ uint64_t AllocCount = 0;
+ uint64_t FreeCount = 0;
+};
+
+struct AllocationSummary
+{
+ bool HasMemoryData = false;
+ uint64_t TotalAllocs = 0;
+ uint64_t TotalFrees = 0;
+ uint64_t TotalReallocAllocs = 0;
+ uint64_t TotalReallocFrees = 0;
+ int64_t PeakBytes = 0;
+ uint32_t PeakTimeUs = 0;
+ int64_t EndBytes = 0;
+ uint32_t LiveAllocations = 0;
+};
+
+// One power-of-two bucket of the allocation size histogram. The bucket covers
+// sizes in [MinSize, MaxSize] inclusive (MaxSize = MinSize*2 - 1, or 0 for the
+// zero-size bucket). Count and Bytes aggregate every alloc/realloc-alloc seen
+// during the trace (not just currently-live allocations).
+struct AllocSizeBucket
+{
+ uint64_t MinSize = 0;
+ uint64_t MaxSize = 0;
+ uint64_t Count = 0;
+ uint64_t Bytes = 0;
+};
+
+// -- Callstack data structures ---------------------------------------------
+
+// A single resolved stack frame. ModuleIndex references TraceModel::Modules;
+// ~0u means the frame did not map to any loaded module.
+struct ResolvedFrame
+{
+ uint64_t Address = 0;
+ uint32_t ModuleIndex = ~0u;
+ uint64_t Offset = 0;
+};
+
+// A decoded callstack: the ordered list of instruction-pointer frames
+// captured at the point of an allocation (or free).
+struct CallstackEntry
+{
+ uint32_t Id = 0;
+ eastl::vector<ResolvedFrame> Frames; // outermost (caller) first
+};
+
+// Per-callstack allocation churn statistics. "Churn" is measured by how
+// quickly an allocation is freed — specifically, the number of alloc events
+// that occur between the alloc and its matching free (event distance).
+struct CallstackChurnStat
+{
+ uint32_t CallstackId = 0;
+ uint64_t ChurnAllocs = 0; // allocations freed within the distance threshold
+ uint64_t ChurnBytes = 0; // cumulative bytes of those short-lived allocations
+ uint64_t TotalAllocs = 0; // all allocations from this callstack (for context)
+ uint64_t TotalBytes = 0;
+ double MeanDistance = 0.0; // average event distance for the churny allocs
+};
+
+// Per-callstack live allocation statistics.
+struct CallstackAllocStat
+{
+ uint32_t CallstackId = 0;
+ int64_t LiveBytes = 0;
+ uint32_t LiveCount = 0;
+ eastl::fixed_vector<uint32_t, 4, true> ThreadIds; // unique thread IDs that contributed allocations
+};
+
+// -- AllocationAnalyzer ----------------------------------------------------
+
+// Subscribes to Memory.* trace events and tracks aggregate allocation
+// statistics, a memory-over-time timeline, heap specs, and tag specs.
+// Intended to be instantiated by BuildTraceModel alongside the other
+// analyzers and registered with the Dispatcher.
+class AllocationAnalyzer : public Analyzer
+{
+public:
+ explicit AllocationAnalyzer(const TraceTiming* Timing);
+
+ void subscribe(Vector<Subscription>& Subs) override;
+
+ // -- Accessors (call after IterateTrace completes) --
+
+ bool Initialized() const { return m_Initialized; }
+ AllocationSummary Summary() const;
+ void EmitFinalSample(uint32_t TraceEndUs);
+
+ eastl::vector<MemoryTimelineSample>& MutableTimeline() { return m_Timeline; }
+ const eastl::hash_map<uint32_t, HeapInfo>& Heaps() const { return m_Heaps; }
+ const eastl::hash_map<int32_t, TagInfo>& Tags() const { return m_Tags; }
+ const eastl::hash_map<uint8_t, HeapStat>& RootHeapStats() const { return m_RootHeapStats; }
+
+ // Build per-callstack statistics from the current live allocation set.
+ eastl::vector<CallstackAllocStat> BuildCallstackStats() const;
+
+ // Build per-callstack churn statistics sorted by churn alloc count descending.
+ // ChurnDistanceThreshold: allocations freed within this many alloc-events are
+ // considered "short-lived" / churny.
+ eastl::vector<CallstackChurnStat> BuildChurnStats(uint64_t ChurnDistanceThreshold = 1000) const;
+
+ // Build a size-bucketed histogram of all observed allocations. Returns
+ // only populated buckets, ordered by MinSize ascending.
+ eastl::vector<AllocSizeBucket> BuildSizeHistogram() const;
+
+private:
+ // -- Event handlers --
+
+ void OnInit(const ::Memory_Init& Ev);
+ void OnMarker(const ::Memory_Marker& Ev);
+ void OnAlloc(const ::Memory_Alloc& Ev);
+ void OnAllocSystem(const ::Memory_AllocSystem& Ev);
+ void OnAllocVideo(const ::Memory_AllocVideo& Ev);
+ void OnFree(const ::Memory_Free& Ev);
+ void OnFreeSystem(const ::Memory_FreeSystem& Ev);
+ void OnFreeVideo(const ::Memory_FreeVideo& Ev);
+ void OnReallocAlloc(const ::Memory_ReallocAlloc& Ev);
+ void OnReallocAllocSystem(const ::Memory_ReallocAllocSystem& Ev);
+ void OnReallocFree(const ::Memory_ReallocFree& Ev);
+ void OnReallocFreeSystem(const ::Memory_ReallocFreeSystem& Ev);
+ void OnHeapSpec(const ::Memory_HeapSpec& Ev);
+ void OnHeapMarkAlloc(const ::Memory_HeapMarkAlloc& Ev);
+ void OnHeapUnmarkAlloc(const ::Memory_HeapUnmarkAlloc& Ev);
+ void OnTagSpec(const ::Memory_TagSpec& Ev);
+
+ // -- Internal helpers --
+
+ struct LiveAlloc
+ {
+ uint64_t Size;
+ uint32_t CallstackId;
+ uint32_t ThreadId;
+ uint64_t EventSeq; // alloc event sequence number for churn distance
+ uint8_t RootHeap;
+ bool IsHeap = false; // true after HeapMarkAlloc; excluded from totals
+ };
+
+ uint64_t DecodeAllocSize(uint32_t RawSize, uint8_t AlignSizeLower) const;
+ void HandleAlloc(uint64_t Address, uint64_t Size, uint8_t RootHeap, uint32_t CallstackId, uint32_t ThreadId, bool IsRealloc);
+ void HandleFree(uint64_t Address, uint8_t RootHeap, uint32_t CallstackId, bool IsRealloc);
+ void MaybeEmitSample(uint32_t TimeUs);
+
+ // -- State --
+
+ static constexpr uint32_t kTimelineSampleIntervalUs = 10'000; // 10ms
+
+ const TraceTiming* m_Timing = nullptr;
+
+ // Init params
+ uint8_t m_SizeShift = 3; // overridden by Memory.Init if present; 3 matches zencore's default
+ bool m_Initialized = false;
+
+ // Live allocation map (address -> size + root heap)
+ eastl::hash_map<uint64_t, LiveAlloc> m_LiveAllocs;
+
+ // Running byte counters
+ int64_t m_CurrentBytes = 0;
+ int64_t m_SystemBytes = 0;
+ int64_t m_VideoBytes = 0;
+ int64_t m_PeakBytes = 0;
+ uint32_t m_PeakTimeUs = 0;
+
+ // Event counters
+ uint64_t m_TotalAllocs = 0;
+ uint64_t m_TotalFrees = 0;
+ uint64_t m_TotalReallocAllocs = 0;
+ uint64_t m_TotalReallocFrees = 0;
+
+ // Timeline sampling
+ eastl::vector<MemoryTimelineSample> m_Timeline;
+ uint32_t m_LastSampleTimeUs = 0;
+ uint32_t m_LastMarkerTimeUs = 0;
+ bool m_HasReceivedMarker = false;
+
+ // Per-callstack churn counters: total allocs + short-lived alloc stats
+ struct ChurnAccum
+ {
+ uint64_t TotalAllocs = 0;
+ uint64_t TotalBytes = 0;
+ uint64_t ChurnAllocs = 0; // freed within the distance threshold
+ uint64_t ChurnBytes = 0;
+ uint64_t ChurnDistanceSum = 0; // sum of event distances for churny allocs
+ };
+ eastl::hash_map<uint32_t, ChurnAccum> m_ChurnByCallstack;
+ uint64_t m_AllocEventSeq = 0; // monotonic alloc event counter
+
+ // Allocation size histogram: bucket i covers sizes [2^(i-1)+1, 2^i], with
+ // bucket 0 reserved for zero-size allocations. 65 buckets covers up to 2^64.
+ static constexpr size_t kSizeHistogramBuckets = 65;
+ struct SizeBucketAccum
+ {
+ uint64_t Count = 0;
+ uint64_t Bytes = 0;
+ };
+ SizeBucketAccum m_SizeHistogram[kSizeHistogramBuckets] = {};
+
+ // Metadata
+ eastl::hash_map<uint32_t, HeapInfo> m_Heaps;
+ eastl::hash_map<int32_t, TagInfo> m_Tags;
+ eastl::hash_map<uint8_t, HeapStat> m_RootHeapStats;
+};
+
+// -- CallstackAnalyzer -----------------------------------------------------
+
+// Subscribes to Memory.CallstackSpec* trace events, decodes compressed
+// frames, and stores a callstack ID -> frame addresses mapping. Frame
+// addresses are raw instruction pointers; resolution to module+offset
+// happens in BuildTraceModel post-processing.
+class CallstackAnalyzer : public Analyzer
+{
+public:
+ void subscribe(Vector<Subscription>& Subs) override;
+
+ const eastl::hash_map<uint32_t, eastl::vector<uint64_t>>& RawCallstacks() const { return m_Callstacks; }
+
+private:
+ void OnCallstackSpec(const ::Memory_CallstackSpec& Ev);
+ void OnCallstackSpecDeltaVarInt(const ::Memory_CallstackSpecDeltaVarInt& Ev);
+ void OnCallstackSpecDelta7bit(const ::Memory_CallstackSpecDelta7bit& Ev);
+ void OnCallstackSpecXORAndRLE(const ::Memory_CallstackSpecXORAndRLE& Ev);
+
+ void StoreCallstack(uint32_t Id, const uint64_t* Frames, size_t Count);
+
+ eastl::hash_map<uint32_t, eastl::vector<uint64_t>> m_Callstacks;
+};
+
+} // namespace zen::trace_detail