aboutsummaryrefslogtreecommitdiff
path: root/src/zen/trace/trace_memory.h
blob: da33d82189da31f81cbf4db5c1de9b74fa17e0b3 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
// Copyright Epic Games, Inc. All Rights Reserved.

#pragma once

#include <zencore/zencore.h>

ZEN_THIRD_PARTY_INCLUDES_START
#include <EASTL/fixed_vector.h>
#include <EASTL/hash_map.h>
#include <EASTL/vector.h>
#include <analysis/analyzer.h>
ZEN_THIRD_PARTY_INCLUDES_END

#include <cstdint>
#include <string>

// Forward declarations of outline types (defined in trace_memory.cpp).
// These are global-scope structs created by the begin_outline() macro.
struct Memory_Init;
struct Memory_Marker;
struct Memory_Alloc;
struct Memory_AllocSystem;
struct Memory_AllocVideo;
struct Memory_Free;
struct Memory_FreeSystem;
struct Memory_FreeVideo;
struct Memory_ReallocAlloc;
struct Memory_ReallocAllocSystem;
struct Memory_ReallocFree;
struct Memory_ReallocFreeSystem;
struct Memory_HeapSpec;
struct Memory_HeapMarkAlloc;
struct Memory_HeapUnmarkAlloc;
struct Memory_TagSpec;
struct Memory_CallstackSpec;
struct Memory_CallstackSpecDeltaVarInt;
struct Memory_CallstackSpecDelta7bit;
struct Memory_CallstackSpecXORAndRLE;

namespace zen::trace_detail {

struct TraceTiming;

// -- Allocation data structures --------------------------------------------

struct HeapInfo
{
	uint32_t	Id		 = 0;
	uint32_t	ParentId = ~0u;
	uint16_t	Flags	 = 0;  // EMemoryTraceHeapFlags bits
	std::string Name;
};

struct TagInfo
{
	int32_t		Tag	   = 0;
	int32_t		Parent = 0;
	std::string Display;
};

struct MemoryTimelineSample
{
	uint32_t TimeUs;
	int64_t	 TotalAllocatedBytes;
	int64_t	 SystemBytes;
	int64_t	 VideoBytes;
};

struct HeapStat
{
	uint32_t HeapId		  = 0;
	int64_t	 CurrentBytes = 0;
	int64_t	 PeakBytes	  = 0;
	uint64_t AllocCount	  = 0;
	uint64_t FreeCount	  = 0;
};

struct AllocationSummary
{
	bool	 HasMemoryData		= false;
	uint64_t TotalAllocs		= 0;
	uint64_t TotalFrees			= 0;
	uint64_t TotalReallocAllocs = 0;
	uint64_t TotalReallocFrees	= 0;
	int64_t	 PeakBytes			= 0;
	uint32_t PeakTimeUs			= 0;
	int64_t	 EndBytes			= 0;
	uint32_t LiveAllocations	= 0;
};

// One power-of-two bucket of the allocation size histogram. The bucket covers
// sizes in [MinSize, MaxSize] inclusive (MaxSize = MinSize*2 - 1, or 0 for the
// zero-size bucket). Count and Bytes aggregate every alloc/realloc-alloc seen
// during the trace (not just currently-live allocations).
struct AllocSizeBucket
{
	uint64_t MinSize = 0;
	uint64_t MaxSize = 0;
	uint64_t Count	 = 0;
	uint64_t Bytes	 = 0;
};

// -- Callstack data structures ---------------------------------------------

// A single resolved stack frame. ModuleIndex references TraceModel::Modules;
// ~0u means the frame did not map to any loaded module.
struct ResolvedFrame
{
	uint64_t Address	 = 0;
	uint32_t ModuleIndex = ~0u;
	uint64_t Offset		 = 0;
};

// A decoded callstack: the ordered list of instruction-pointer frames
// captured at the point of an allocation (or free).
struct CallstackEntry
{
	uint32_t					 Id = 0;
	eastl::vector<ResolvedFrame> Frames;  // outermost (caller) first
};

// Per-callstack allocation churn statistics. "Churn" is measured by how
// quickly an allocation is freed — specifically, the number of alloc events
// that occur between the alloc and its matching free (event distance).
struct CallstackChurnStat
{
	uint32_t CallstackId  = 0;
	uint64_t ChurnAllocs  = 0;	// allocations freed within the distance threshold
	uint64_t ChurnBytes	  = 0;	// cumulative bytes of those short-lived allocations
	uint64_t TotalAllocs  = 0;	// all allocations from this callstack (for context)
	uint64_t TotalBytes	  = 0;
	double	 MeanDistance = 0.0;  // average event distance for the churny allocs
};

// Per-callstack live allocation statistics.
struct CallstackAllocStat
{
	uint32_t							   CallstackId = 0;
	int64_t								   LiveBytes   = 0;
	uint32_t							   LiveCount   = 0;
	eastl::fixed_vector<uint32_t, 4, true> ThreadIds;  // unique thread IDs that contributed allocations
};

// -- AllocationAnalyzer ----------------------------------------------------

// Subscribes to Memory.* trace events and tracks aggregate allocation
// statistics, a memory-over-time timeline, heap specs, and tag specs.
// Intended to be instantiated by BuildTraceModel alongside the other
// analyzers and registered with the Dispatcher.
class AllocationAnalyzer : public Analyzer
{
public:
	explicit AllocationAnalyzer(const TraceTiming* Timing);

	void subscribe(Vector<Subscription>& Subs) override;

	// -- Accessors (call after IterateTrace completes) --

	bool			  Initialized() const { return m_Initialized; }
	AllocationSummary Summary() const;
	void			  EmitFinalSample(uint32_t TraceEndUs);

	eastl::vector<MemoryTimelineSample>&	   MutableTimeline() { return m_Timeline; }
	const eastl::hash_map<uint32_t, HeapInfo>& Heaps() const { return m_Heaps; }
	const eastl::hash_map<int32_t, TagInfo>&   Tags() const { return m_Tags; }
	const eastl::hash_map<uint8_t, HeapStat>&  RootHeapStats() const { return m_RootHeapStats; }

	// Build per-callstack statistics from the current live allocation set.
	eastl::vector<CallstackAllocStat> BuildCallstackStats() const;

	// Build per-callstack churn statistics sorted by churn alloc count descending.
	// ChurnDistanceThreshold: allocations freed within this many alloc-events are
	// considered "short-lived" / churny.
	eastl::vector<CallstackChurnStat> BuildChurnStats(uint64_t ChurnDistanceThreshold = 1000) const;

	// Build a size-bucketed histogram of all observed allocations. Returns
	// only populated buckets, ordered by MinSize ascending.
	eastl::vector<AllocSizeBucket> BuildSizeHistogram() const;

private:
	// -- Event handlers --

	void OnInit(const ::Memory_Init& Ev);
	void OnMarker(const ::Memory_Marker& Ev);
	void OnAlloc(const ::Memory_Alloc& Ev);
	void OnAllocSystem(const ::Memory_AllocSystem& Ev);
	void OnAllocVideo(const ::Memory_AllocVideo& Ev);
	void OnFree(const ::Memory_Free& Ev);
	void OnFreeSystem(const ::Memory_FreeSystem& Ev);
	void OnFreeVideo(const ::Memory_FreeVideo& Ev);
	void OnReallocAlloc(const ::Memory_ReallocAlloc& Ev);
	void OnReallocAllocSystem(const ::Memory_ReallocAllocSystem& Ev);
	void OnReallocFree(const ::Memory_ReallocFree& Ev);
	void OnReallocFreeSystem(const ::Memory_ReallocFreeSystem& Ev);
	void OnHeapSpec(const ::Memory_HeapSpec& Ev);
	void OnHeapMarkAlloc(const ::Memory_HeapMarkAlloc& Ev);
	void OnHeapUnmarkAlloc(const ::Memory_HeapUnmarkAlloc& Ev);
	void OnTagSpec(const ::Memory_TagSpec& Ev);

	// -- Internal helpers --

	struct LiveAlloc
	{
		uint64_t Size;
		uint32_t CallstackId;
		uint32_t ThreadId;
		uint64_t EventSeq;	// alloc event sequence number for churn distance
		uint8_t	 RootHeap;
		bool	 IsHeap = false;  // true after HeapMarkAlloc; excluded from totals
	};

	uint64_t DecodeAllocSize(uint32_t RawSize, uint8_t AlignSizeLower) const;
	void	 HandleAlloc(uint64_t Address, uint64_t Size, uint8_t RootHeap, uint32_t CallstackId, uint32_t ThreadId, bool IsRealloc);
	void	 HandleFree(uint64_t Address, uint8_t RootHeap, uint32_t CallstackId, bool IsRealloc);
	void	 MaybeEmitSample(uint32_t TimeUs);

	// -- State --

	static constexpr uint32_t kTimelineSampleIntervalUs = 10'000;  // 10ms

	const TraceTiming* m_Timing = nullptr;

	// Init params
	uint8_t m_SizeShift	  = 3;	// overridden by Memory.Init if present; 3 matches zencore's default
	bool	m_Initialized = false;

	// Live allocation map (address -> size + root heap)
	eastl::hash_map<uint64_t, LiveAlloc> m_LiveAllocs;

	// Running byte counters
	int64_t	 m_CurrentBytes = 0;
	int64_t	 m_SystemBytes	= 0;
	int64_t	 m_VideoBytes	= 0;
	int64_t	 m_PeakBytes	= 0;
	uint32_t m_PeakTimeUs	= 0;

	// Event counters
	uint64_t m_TotalAllocs		  = 0;
	uint64_t m_TotalFrees		  = 0;
	uint64_t m_TotalReallocAllocs = 0;
	uint64_t m_TotalReallocFrees  = 0;

	// Timeline sampling
	eastl::vector<MemoryTimelineSample> m_Timeline;
	uint32_t							m_LastSampleTimeUs	= 0;
	uint32_t							m_LastMarkerTimeUs	= 0;
	bool								m_HasReceivedMarker = false;

	// Per-callstack churn counters: total allocs + short-lived alloc stats
	struct ChurnAccum
	{
		uint64_t TotalAllocs	  = 0;
		uint64_t TotalBytes		  = 0;
		uint64_t ChurnAllocs	  = 0;	// freed within the distance threshold
		uint64_t ChurnBytes		  = 0;
		uint64_t ChurnDistanceSum = 0;	// sum of event distances for churny allocs
	};
	eastl::hash_map<uint32_t, ChurnAccum> m_ChurnByCallstack;
	uint64_t							  m_AllocEventSeq = 0;	// monotonic alloc event counter

	// Allocation size histogram: bucket i covers sizes [2^(i-1)+1, 2^i], with
	// bucket 0 reserved for zero-size allocations. 65 buckets covers up to 2^64.
	static constexpr size_t kSizeHistogramBuckets = 65;
	struct SizeBucketAccum
	{
		uint64_t Count = 0;
		uint64_t Bytes = 0;
	};
	SizeBucketAccum m_SizeHistogram[kSizeHistogramBuckets] = {};

	// Metadata
	eastl::hash_map<uint32_t, HeapInfo> m_Heaps;
	eastl::hash_map<int32_t, TagInfo>	m_Tags;
	eastl::hash_map<uint8_t, HeapStat>	m_RootHeapStats;
};

// -- CallstackAnalyzer -----------------------------------------------------

// Subscribes to Memory.CallstackSpec* trace events, decodes compressed
// frames, and stores a callstack ID -> frame addresses mapping. Frame
// addresses are raw instruction pointers; resolution to module+offset
// happens in BuildTraceModel post-processing.
class CallstackAnalyzer : public Analyzer
{
public:
	void subscribe(Vector<Subscription>& Subs) override;

	const eastl::hash_map<uint32_t, eastl::vector<uint64_t>>& RawCallstacks() const { return m_Callstacks; }

private:
	void OnCallstackSpec(const ::Memory_CallstackSpec& Ev);
	void OnCallstackSpecDeltaVarInt(const ::Memory_CallstackSpecDeltaVarInt& Ev);
	void OnCallstackSpecDelta7bit(const ::Memory_CallstackSpecDelta7bit& Ev);
	void OnCallstackSpecXORAndRLE(const ::Memory_CallstackSpecXORAndRLE& Ev);

	void StoreCallstack(uint32_t Id, const uint64_t* Frames, size_t Count);

	eastl::hash_map<uint32_t, eastl::vector<uint64_t>> m_Callstacks;
};

}  // namespace zen::trace_detail