1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
|
// Copyright Epic Games, Inc. All Rights Reserved.
#pragma once
#include <zencore/zencore.h>
ZEN_THIRD_PARTY_INCLUDES_START
#include <EASTL/fixed_vector.h>
#include <EASTL/hash_map.h>
#include <EASTL/vector.h>
#include <analysis/analyzer.h>
ZEN_THIRD_PARTY_INCLUDES_END
#include <cstdint>
#include <string>
// Forward declarations of outline types (defined in trace_memory.cpp).
// These are global-scope structs created by the begin_outline() macro.
struct Memory_Init;
struct Memory_Marker;
struct Memory_Alloc;
struct Memory_AllocSystem;
struct Memory_AllocVideo;
struct Memory_Free;
struct Memory_FreeSystem;
struct Memory_FreeVideo;
struct Memory_ReallocAlloc;
struct Memory_ReallocAllocSystem;
struct Memory_ReallocFree;
struct Memory_ReallocFreeSystem;
struct Memory_HeapSpec;
struct Memory_HeapMarkAlloc;
struct Memory_HeapUnmarkAlloc;
struct Memory_TagSpec;
struct Memory_CallstackSpec;
struct Memory_CallstackSpecDeltaVarInt;
struct Memory_CallstackSpecDelta7bit;
struct Memory_CallstackSpecXORAndRLE;
namespace zen::trace_detail {
struct TraceTiming;
// -- Allocation data structures --------------------------------------------
struct HeapInfo
{
uint32_t Id = 0;
uint32_t ParentId = ~0u;
uint16_t Flags = 0; // EMemoryTraceHeapFlags bits
std::string Name;
};
struct TagInfo
{
int32_t Tag = 0;
int32_t Parent = 0;
std::string Display;
};
struct MemoryTimelineSample
{
uint32_t TimeUs;
int64_t TotalAllocatedBytes;
int64_t SystemBytes;
int64_t VideoBytes;
};
struct HeapStat
{
uint32_t HeapId = 0;
int64_t CurrentBytes = 0;
int64_t PeakBytes = 0;
uint64_t AllocCount = 0;
uint64_t FreeCount = 0;
};
struct AllocationSummary
{
bool HasMemoryData = false;
uint64_t TotalAllocs = 0;
uint64_t TotalFrees = 0;
uint64_t TotalReallocAllocs = 0;
uint64_t TotalReallocFrees = 0;
int64_t PeakBytes = 0;
uint32_t PeakTimeUs = 0;
int64_t EndBytes = 0;
uint32_t LiveAllocations = 0;
};
// One power-of-two bucket of the allocation size histogram. The bucket covers
// sizes in [MinSize, MaxSize] inclusive (MaxSize = MinSize*2 - 1, or 0 for the
// zero-size bucket). Count and Bytes aggregate every alloc/realloc-alloc seen
// during the trace (not just currently-live allocations).
struct AllocSizeBucket
{
uint64_t MinSize = 0;
uint64_t MaxSize = 0;
uint64_t Count = 0;
uint64_t Bytes = 0;
};
// -- Callstack data structures ---------------------------------------------
// A single resolved stack frame. ModuleIndex references TraceModel::Modules;
// ~0u means the frame did not map to any loaded module.
struct ResolvedFrame
{
uint64_t Address = 0;
uint32_t ModuleIndex = ~0u;
uint64_t Offset = 0;
};
// A decoded callstack: the ordered list of instruction-pointer frames
// captured at the point of an allocation (or free).
struct CallstackEntry
{
uint32_t Id = 0;
eastl::vector<ResolvedFrame> Frames; // outermost (caller) first
};
// Per-callstack allocation churn statistics. "Churn" is measured by how
// quickly an allocation is freed — specifically, the number of alloc events
// that occur between the alloc and its matching free (event distance).
struct CallstackChurnStat
{
uint32_t CallstackId = 0;
uint64_t ChurnAllocs = 0; // allocations freed within the distance threshold
uint64_t ChurnBytes = 0; // cumulative bytes of those short-lived allocations
uint64_t TotalAllocs = 0; // all allocations from this callstack (for context)
uint64_t TotalBytes = 0;
double MeanDistance = 0.0; // average event distance for the churny allocs
};
// Per-callstack live allocation statistics.
struct CallstackAllocStat
{
uint32_t CallstackId = 0;
int64_t LiveBytes = 0;
uint32_t LiveCount = 0;
eastl::fixed_vector<uint32_t, 4, true> ThreadIds; // unique thread IDs that contributed allocations
};
// -- AllocationAnalyzer ----------------------------------------------------
// Subscribes to Memory.* trace events and tracks aggregate allocation
// statistics, a memory-over-time timeline, heap specs, and tag specs.
// Intended to be instantiated by BuildTraceModel alongside the other
// analyzers and registered with the Dispatcher.
class AllocationAnalyzer : public Analyzer
{
public:
explicit AllocationAnalyzer(const TraceTiming* Timing);
void subscribe(Vector<Subscription>& Subs) override;
// -- Accessors (call after IterateTrace completes) --
bool Initialized() const { return m_Initialized; }
AllocationSummary Summary() const;
void EmitFinalSample(uint32_t TraceEndUs);
eastl::vector<MemoryTimelineSample>& MutableTimeline() { return m_Timeline; }
const eastl::hash_map<uint32_t, HeapInfo>& Heaps() const { return m_Heaps; }
const eastl::hash_map<int32_t, TagInfo>& Tags() const { return m_Tags; }
const eastl::hash_map<uint8_t, HeapStat>& RootHeapStats() const { return m_RootHeapStats; }
// Build per-callstack statistics from the current live allocation set.
eastl::vector<CallstackAllocStat> BuildCallstackStats() const;
// Build per-callstack churn statistics sorted by churn alloc count descending.
// ChurnDistanceThreshold: allocations freed within this many alloc-events are
// considered "short-lived" / churny.
eastl::vector<CallstackChurnStat> BuildChurnStats(uint64_t ChurnDistanceThreshold = 1000) const;
// Build a size-bucketed histogram of all observed allocations. Returns
// only populated buckets, ordered by MinSize ascending.
eastl::vector<AllocSizeBucket> BuildSizeHistogram() const;
private:
// -- Event handlers --
void OnInit(const ::Memory_Init& Ev);
void OnMarker(const ::Memory_Marker& Ev);
void OnAlloc(const ::Memory_Alloc& Ev);
void OnAllocSystem(const ::Memory_AllocSystem& Ev);
void OnAllocVideo(const ::Memory_AllocVideo& Ev);
void OnFree(const ::Memory_Free& Ev);
void OnFreeSystem(const ::Memory_FreeSystem& Ev);
void OnFreeVideo(const ::Memory_FreeVideo& Ev);
void OnReallocAlloc(const ::Memory_ReallocAlloc& Ev);
void OnReallocAllocSystem(const ::Memory_ReallocAllocSystem& Ev);
void OnReallocFree(const ::Memory_ReallocFree& Ev);
void OnReallocFreeSystem(const ::Memory_ReallocFreeSystem& Ev);
void OnHeapSpec(const ::Memory_HeapSpec& Ev);
void OnHeapMarkAlloc(const ::Memory_HeapMarkAlloc& Ev);
void OnHeapUnmarkAlloc(const ::Memory_HeapUnmarkAlloc& Ev);
void OnTagSpec(const ::Memory_TagSpec& Ev);
// -- Internal helpers --
struct LiveAlloc
{
uint64_t Size;
uint32_t CallstackId;
uint32_t ThreadId;
uint64_t EventSeq; // alloc event sequence number for churn distance
uint8_t RootHeap;
bool IsHeap = false; // true after HeapMarkAlloc; excluded from totals
};
uint64_t DecodeAllocSize(uint32_t RawSize, uint8_t AlignSizeLower) const;
void HandleAlloc(uint64_t Address, uint64_t Size, uint8_t RootHeap, uint32_t CallstackId, uint32_t ThreadId, bool IsRealloc);
void HandleFree(uint64_t Address, uint8_t RootHeap, uint32_t CallstackId, bool IsRealloc);
void MaybeEmitSample(uint32_t TimeUs);
// -- State --
static constexpr uint32_t kTimelineSampleIntervalUs = 10'000; // 10ms
const TraceTiming* m_Timing = nullptr;
// Init params
uint8_t m_SizeShift = 3; // overridden by Memory.Init if present; 3 matches zencore's default
bool m_Initialized = false;
// Live allocation map (address -> size + root heap)
eastl::hash_map<uint64_t, LiveAlloc> m_LiveAllocs;
// Running byte counters
int64_t m_CurrentBytes = 0;
int64_t m_SystemBytes = 0;
int64_t m_VideoBytes = 0;
int64_t m_PeakBytes = 0;
uint32_t m_PeakTimeUs = 0;
// Event counters
uint64_t m_TotalAllocs = 0;
uint64_t m_TotalFrees = 0;
uint64_t m_TotalReallocAllocs = 0;
uint64_t m_TotalReallocFrees = 0;
// Timeline sampling
eastl::vector<MemoryTimelineSample> m_Timeline;
uint32_t m_LastSampleTimeUs = 0;
uint32_t m_LastMarkerTimeUs = 0;
bool m_HasReceivedMarker = false;
// Per-callstack churn counters: total allocs + short-lived alloc stats
struct ChurnAccum
{
uint64_t TotalAllocs = 0;
uint64_t TotalBytes = 0;
uint64_t ChurnAllocs = 0; // freed within the distance threshold
uint64_t ChurnBytes = 0;
uint64_t ChurnDistanceSum = 0; // sum of event distances for churny allocs
};
eastl::hash_map<uint32_t, ChurnAccum> m_ChurnByCallstack;
uint64_t m_AllocEventSeq = 0; // monotonic alloc event counter
// Allocation size histogram: bucket i covers sizes [2^(i-1)+1, 2^i], with
// bucket 0 reserved for zero-size allocations. 65 buckets covers up to 2^64.
static constexpr size_t kSizeHistogramBuckets = 65;
struct SizeBucketAccum
{
uint64_t Count = 0;
uint64_t Bytes = 0;
};
SizeBucketAccum m_SizeHistogram[kSizeHistogramBuckets] = {};
// Metadata
eastl::hash_map<uint32_t, HeapInfo> m_Heaps;
eastl::hash_map<int32_t, TagInfo> m_Tags;
eastl::hash_map<uint8_t, HeapStat> m_RootHeapStats;
};
// -- CallstackAnalyzer -----------------------------------------------------
// Subscribes to Memory.CallstackSpec* trace events, decodes compressed
// frames, and stores a callstack ID -> frame addresses mapping. Frame
// addresses are raw instruction pointers; resolution to module+offset
// happens in BuildTraceModel post-processing.
class CallstackAnalyzer : public Analyzer
{
public:
void subscribe(Vector<Subscription>& Subs) override;
const eastl::hash_map<uint32_t, eastl::vector<uint64_t>>& RawCallstacks() const { return m_Callstacks; }
private:
void OnCallstackSpec(const ::Memory_CallstackSpec& Ev);
void OnCallstackSpecDeltaVarInt(const ::Memory_CallstackSpecDeltaVarInt& Ev);
void OnCallstackSpecDelta7bit(const ::Memory_CallstackSpecDelta7bit& Ev);
void OnCallstackSpecXORAndRLE(const ::Memory_CallstackSpecXORAndRLE& Ev);
void StoreCallstack(uint32_t Id, const uint64_t* Frames, size_t Count);
eastl::hash_map<uint32_t, eastl::vector<uint64_t>> m_Callstacks;
};
} // namespace zen::trace_detail
|