aboutsummaryrefslogtreecommitdiff
path: root/src/zen/trace/trace_memory.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/zen/trace/trace_memory.cpp')
-rw-r--r--src/zen/trace/trace_memory.cpp901
1 files changed, 901 insertions, 0 deletions
diff --git a/src/zen/trace/trace_memory.cpp b/src/zen/trace/trace_memory.cpp
new file mode 100644
index 000000000..704b8bcde
--- /dev/null
+++ b/src/zen/trace/trace_memory.cpp
@@ -0,0 +1,901 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include "trace_memory.h"
+
+#include "trace_model.h"
+
+#include <zencore/fmtutils.h>
+#include <zencore/logging.h>
+
+ZEN_THIRD_PARTY_INCLUDES_START
+#include <EASTL/sort.h>
+#include <analysis/analyzer.h>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+using namespace zen::trace_detail;
+
+//////////////////////////////////////////////////////////////////////////////
+// Event outlines for Memory.* trace events
+//
+// Field names and types match the UE wire format exactly.
+// See Engine/Source/Runtime/Core/Private/ProfilingDebugging/MemoryTrace.cpp.
+
+// clang-format off
+begin_outline(Memory, Init)
+ field(uint8, Version)
+ field(uint32, MarkerPeriod)
+ field(uint8, MinAlignment)
+ field(uint8, SizeShift)
+ field(uint64, PageSize)
+end_outline()
+
+begin_outline(Memory, Marker)
+ field(uint64, Cycle)
+end_outline()
+
+begin_outline(Memory, Alloc)
+ field(uint64, Address)
+ field(uint32, CallstackId)
+ field(uint32, Size)
+ field(uint8, AlignmentPow2_SizeLower)
+ field(uint8, RootHeap)
+end_outline()
+
+begin_outline(Memory, AllocSystem)
+ field(uint64, Address)
+ field(uint32, CallstackId)
+ field(uint32, Size)
+ field(uint8, AlignmentPow2_SizeLower)
+end_outline()
+
+begin_outline(Memory, AllocVideo)
+ field(uint64, Address)
+ field(uint32, CallstackId)
+ field(uint32, Size)
+ field(uint8, AlignmentPow2_SizeLower)
+end_outline()
+
+begin_outline(Memory, Free)
+ field(uint64, Address)
+ field(uint32, CallstackId)
+ field(uint8, RootHeap)
+end_outline()
+
+begin_outline(Memory, FreeSystem)
+ field(uint64, Address)
+ field(uint32, CallstackId)
+end_outline()
+
+begin_outline(Memory, FreeVideo)
+ field(uint64, Address)
+ field(uint32, CallstackId)
+end_outline()
+
+begin_outline(Memory, ReallocAlloc)
+ field(uint64, Address)
+ field(uint32, CallstackId)
+ field(uint32, Size)
+ field(uint8, AlignmentPow2_SizeLower)
+ field(uint8, RootHeap)
+end_outline()
+
+begin_outline(Memory, ReallocAllocSystem)
+ field(uint64, Address)
+ field(uint32, CallstackId)
+ field(uint32, Size)
+ field(uint8, AlignmentPow2_SizeLower)
+end_outline()
+
+begin_outline(Memory, ReallocFree)
+ field(uint64, Address)
+ field(uint32, CallstackId)
+ field(uint8, RootHeap)
+end_outline()
+
+begin_outline(Memory, ReallocFreeSystem)
+ field(uint64, Address)
+ field(uint32, CallstackId)
+end_outline()
+
+begin_outline(Memory, HeapSpec)
+ field(uint32, Id)
+ field(uint32, ParentId)
+ field(uint16, Flags)
+ field(FieldStr, Name)
+end_outline()
+
+begin_outline(Memory, HeapMarkAlloc)
+ field(uint64, Address)
+ field(uint32, CallstackId)
+ field(uint16, Flags)
+ field(uint32, Heap)
+end_outline()
+
+begin_outline(Memory, HeapUnmarkAlloc)
+ field(uint64, Address)
+ field(uint32, CallstackId)
+ field(uint32, Heap)
+end_outline()
+
+begin_outline(Memory, TagSpec)
+ field(int32, Tag)
+ field(int32, Parent)
+ field(FieldStr, Display)
+end_outline()
+
+begin_outline(Memory, CallstackSpec)
+ field(uint32, CallstackId)
+ field(uint64[], Frames)
+end_outline()
+
+begin_outline(Memory, CallstackSpecDeltaVarInt)
+ field(uint32, CallstackId)
+ field(uint8[], CompressedFrames)
+end_outline()
+
+begin_outline(Memory, CallstackSpecDelta7bit)
+ field(uint32, CallstackId)
+ field(uint8[], CompressedFrames)
+end_outline()
+
+begin_outline(Memory, CallstackSpecXORAndRLE)
+ field(uint32, CallstackId)
+ field(uint8[], CompressedFrames)
+end_outline()
+ // clang-format on
+
+ //////////////////////////////////////////////////////////////////////////////
+ // Callstack decompression helpers
+
+ namespace
+{
+ inline int64_t ZigZagDecode(uint64_t Encoded) { return int64_t(Encoded >> 1) ^ -int64_t(Encoded & 1); }
+
+ // UE VarInt: leading 1-bits in the first byte indicate total byte count.
+ // 0xxxxxxx = 1 byte (7 value bits)
+ // 10xxxxxx = 2 bytes (14 value bits)
+ // 110xxxxx = 3 bytes (21 value bits) ...up to 9 bytes.
+ // Remaining bytes are big-endian value continuation.
+ eastl::vector<uint64_t> DecodeDeltaVarInt(const uint8_t* Data, uint32_t Size)
+ {
+ eastl::vector<uint64_t> Frames;
+ uint64_t Prev = 0;
+ const uint8_t* Cur = Data;
+ const uint8_t* End = Data + Size;
+
+ while (Cur < End)
+ {
+ uint8_t First = *Cur;
+ uint32_t ByteCount = 1;
+ uint8_t Mask = 0x80;
+ while ((First & Mask) && ByteCount < 9)
+ {
+ ByteCount++;
+ Mask >>= 1;
+ }
+
+ if (Cur + ByteCount > End)
+ {
+ break;
+ }
+
+ uint64_t Raw = 0;
+ if (ByteCount == 9)
+ {
+ // First byte is 0xFF; next 8 bytes are the raw value.
+ for (uint32_t I = 1; I <= 8; I++)
+ {
+ Raw = (Raw << 8) | Cur[I];
+ }
+ }
+ else
+ {
+ // First byte contributes value bits after stripping the length prefix.
+ uint8_t ValueMask = uint8_t((1u << (8 - ByteCount)) - 1);
+ Raw = First & ValueMask;
+ for (uint32_t I = 1; I < ByteCount; I++)
+ {
+ Raw = (Raw << 8) | Cur[I];
+ }
+ }
+ Cur += ByteCount;
+
+ int64_t Delta = ZigZagDecode(Raw);
+ Prev = uint64_t(int64_t(Prev) + Delta);
+ Frames.push_back(Prev);
+ }
+
+ return Frames;
+ }
+
+ // 7-bit continuation encoding: bit 7 = more bytes, bits 0-6 = value (little-endian).
+ eastl::vector<uint64_t> DecodeDelta7bit(const uint8_t* Data, uint32_t Size)
+ {
+ eastl::vector<uint64_t> Frames;
+ uint64_t Prev = 0;
+ const uint8_t* Cur = Data;
+ const uint8_t* End = Data + Size;
+
+ while (Cur < End)
+ {
+ uint64_t Raw = 0;
+ uint32_t Shift = 0;
+ for (;;)
+ {
+ if (Cur >= End)
+ {
+ break;
+ }
+ uint8_t Byte = *Cur++;
+ Raw |= uint64_t(Byte & 0x7F) << Shift;
+ Shift += 7;
+ if ((Byte & 0x80) == 0)
+ {
+ break;
+ }
+ }
+
+ int64_t Delta = ZigZagDecode(Raw);
+ Prev = uint64_t(int64_t(Prev) + Delta);
+ Frames.push_back(Prev);
+ }
+
+ return Frames;
+ }
+
+ // XOR + RLE: first byte = leading zero bit count in (frame XOR prev).
+ // Remaining ceil((64 - zeros) / 8) bytes are the non-zero suffix, little-endian.
+ eastl::vector<uint64_t> DecodeXORAndRLE(const uint8_t* Data, uint32_t Size)
+ {
+ eastl::vector<uint64_t> Frames;
+ uint64_t Prev = 0;
+ const uint8_t* Cur = Data;
+ const uint8_t* End = Data + Size;
+
+ while (Cur < End)
+ {
+ uint8_t LeadingZeros = *Cur++;
+ if (LeadingZeros >= 64)
+ {
+ Frames.push_back(Prev);
+ continue;
+ }
+
+ uint32_t ValueBits = 64 - LeadingZeros;
+ uint32_t ValueBytes = (ValueBits + 7) / 8;
+
+ if (Cur + ValueBytes > End)
+ {
+ break;
+ }
+
+ uint64_t XorVal = 0;
+ for (uint32_t I = 0; I < ValueBytes; I++)
+ {
+ XorVal |= uint64_t(Cur[I]) << (I * 8);
+ }
+ Cur += ValueBytes;
+
+ Prev ^= XorVal;
+ Frames.push_back(Prev);
+ }
+
+ return Frames;
+ }
+
+} // anonymous namespace
+
+//////////////////////////////////////////////////////////////////////////////
+// AllocationAnalyzer implementation
+
+AllocationAnalyzer::AllocationAnalyzer(const TraceTiming* Timing) : m_Timing(Timing)
+{
+}
+
+void
+AllocationAnalyzer::subscribe(Vector<Subscription>& Subs)
+{
+ Subs.emplace_back(this, &AllocationAnalyzer::OnInit);
+ Subs.emplace_back(this, &AllocationAnalyzer::OnMarker);
+ Subs.emplace_back(this, &AllocationAnalyzer::OnAlloc);
+ Subs.emplace_back(this, &AllocationAnalyzer::OnAllocSystem);
+ Subs.emplace_back(this, &AllocationAnalyzer::OnAllocVideo);
+ Subs.emplace_back(this, &AllocationAnalyzer::OnFree);
+ Subs.emplace_back(this, &AllocationAnalyzer::OnFreeSystem);
+ Subs.emplace_back(this, &AllocationAnalyzer::OnFreeVideo);
+ Subs.emplace_back(this, &AllocationAnalyzer::OnReallocAlloc);
+ Subs.emplace_back(this, &AllocationAnalyzer::OnReallocAllocSystem);
+ Subs.emplace_back(this, &AllocationAnalyzer::OnReallocFree);
+ Subs.emplace_back(this, &AllocationAnalyzer::OnReallocFreeSystem);
+ Subs.emplace_back(this, &AllocationAnalyzer::OnHeapSpec);
+ Subs.emplace_back(this, &AllocationAnalyzer::OnHeapMarkAlloc);
+ Subs.emplace_back(this, &AllocationAnalyzer::OnHeapUnmarkAlloc);
+ Subs.emplace_back(this, &AllocationAnalyzer::OnTagSpec);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Internal helpers
+
+uint64_t
+AllocationAnalyzer::DecodeAllocSize(uint32_t RawSize, uint8_t AlignSizeLower) const
+{
+ uint32_t Shift = m_SizeShift;
+ uint32_t LowMask = (1u << Shift) - 1;
+ return (uint64_t(RawSize) << Shift) | (AlignSizeLower & LowMask);
+}
+
+void
+AllocationAnalyzer::HandleAlloc(uint64_t Address, uint64_t Size, uint8_t RootHeap, uint32_t CallstackId, uint32_t ThreadId, bool IsRealloc)
+{
+ // If address is already tracked (shouldn't normally happen), treat as
+ // implicit free of the old allocation so the counters stay consistent.
+ auto It = m_LiveAllocs.find(Address);
+ if (It != m_LiveAllocs.end())
+ {
+ // Heap-marked allocs were already subtracted from totals in OnHeapMarkAlloc.
+ if (!It->second.IsHeap)
+ {
+ int64_t OldSize = int64_t(It->second.Size);
+ uint8_t OldHeap = It->second.RootHeap;
+ m_CurrentBytes -= OldSize;
+ if (OldHeap == 0)
+ {
+ m_SystemBytes -= OldSize;
+ }
+ else if (OldHeap == 1)
+ {
+ m_VideoBytes -= OldSize;
+ }
+ auto HIt = m_RootHeapStats.find(OldHeap);
+ if (HIt != m_RootHeapStats.end())
+ {
+ HIt->second.CurrentBytes -= OldSize;
+ HIt->second.FreeCount++;
+ }
+ }
+ It->second = LiveAlloc{Size, CallstackId, ThreadId, m_AllocEventSeq, RootHeap, false};
+ }
+ else
+ {
+ m_LiveAllocs.insert({Address, LiveAlloc{Size, CallstackId, ThreadId, m_AllocEventSeq, RootHeap, false}});
+ }
+
+ int64_t SignedSize = int64_t(Size);
+ m_CurrentBytes += SignedSize;
+ if (RootHeap == 0)
+ {
+ m_SystemBytes += SignedSize;
+ }
+ else if (RootHeap == 1)
+ {
+ m_VideoBytes += SignedSize;
+ }
+
+ // Update per-root-heap stats
+ HeapStat& HStat = m_RootHeapStats[RootHeap];
+ HStat.HeapId = RootHeap;
+ HStat.CurrentBytes += SignedSize;
+ HStat.AllocCount++;
+ if (HStat.CurrentBytes > HStat.PeakBytes)
+ {
+ HStat.PeakBytes = HStat.CurrentBytes;
+ }
+
+ // Track global peak
+ if (m_CurrentBytes > m_PeakBytes)
+ {
+ m_PeakBytes = m_CurrentBytes;
+ m_PeakTimeUs = m_LastMarkerTimeUs;
+ }
+
+ if (IsRealloc)
+ {
+ m_TotalReallocAllocs++;
+ }
+ else
+ {
+ m_TotalAllocs++;
+ }
+
+ // Churn tracking
+ m_AllocEventSeq++;
+ if (CallstackId != 0)
+ {
+ ChurnAccum& Churn = m_ChurnByCallstack[CallstackId];
+ Churn.TotalAllocs++;
+ Churn.TotalBytes += Size;
+ }
+
+ // Size histogram: bucket 0 captures zero-size allocs, bucket i (i>=1)
+ // captures sizes in [2^(i-1)+1, 2^i]. Use ceil(log2) so power-of-two
+ // sizes land on their own bucket (e.g. 16 -> bucket 4 = (8, 16]).
+ size_t BucketIndex = 0;
+ if (Size > 0)
+ {
+ uint64_t Shifted = Size - 1;
+ while (Shifted > 0 && BucketIndex < kSizeHistogramBuckets - 1)
+ {
+ Shifted >>= 1;
+ ++BucketIndex;
+ }
+ }
+ m_SizeHistogram[BucketIndex].Count++;
+ m_SizeHistogram[BucketIndex].Bytes += Size;
+}
+
+void
+AllocationAnalyzer::HandleFree(uint64_t Address, uint8_t /*RootHeap*/, uint32_t /*CallstackId*/, bool IsRealloc)
+{
+ auto It = m_LiveAllocs.find(Address);
+ if (It == m_LiveAllocs.end())
+ {
+ // Allocation happened before the trace started -- nothing to subtract.
+ if (IsRealloc)
+ {
+ m_TotalReallocFrees++;
+ }
+ else
+ {
+ m_TotalFrees++;
+ }
+ return;
+ }
+
+ int64_t Size = int64_t(It->second.Size);
+ uint8_t AllocHeap = It->second.RootHeap;
+ bool WasHeap = It->second.IsHeap;
+ uint32_t AllocCsId = It->second.CallstackId;
+ uint64_t AllocEventSeq = It->second.EventSeq;
+
+ // Heap-marked allocs were already subtracted from totals in OnHeapMarkAlloc.
+ if (!WasHeap)
+ {
+ m_CurrentBytes -= Size;
+ if (AllocHeap == 0)
+ {
+ m_SystemBytes -= Size;
+ }
+ else if (AllocHeap == 1)
+ {
+ m_VideoBytes -= Size;
+ }
+
+ auto HIt = m_RootHeapStats.find(AllocHeap);
+ if (HIt != m_RootHeapStats.end())
+ {
+ HIt->second.CurrentBytes -= Size;
+ HIt->second.FreeCount++;
+ }
+ }
+
+ m_LiveAllocs.erase(It);
+
+ // Churn tracking: record event distance for this alloc→free pair.
+ // Short distances indicate short-lived (churny) allocations.
+ if (AllocCsId != 0)
+ {
+ uint64_t Distance = m_AllocEventSeq - AllocEventSeq;
+ auto ChurnIt = m_ChurnByCallstack.find(AllocCsId);
+ if (ChurnIt != m_ChurnByCallstack.end())
+ {
+ ChurnIt->second.ChurnDistanceSum += Distance;
+ ChurnIt->second.ChurnAllocs++;
+ ChurnIt->second.ChurnBytes += uint64_t(Size);
+ }
+ }
+
+ if (IsRealloc)
+ {
+ m_TotalReallocFrees++;
+ }
+ else
+ {
+ m_TotalFrees++;
+ }
+}
+
+void
+AllocationAnalyzer::MaybeEmitSample(uint32_t TimeUs)
+{
+ if (TimeUs < m_LastSampleTimeUs + kTimelineSampleIntervalUs)
+ {
+ return;
+ }
+ m_LastSampleTimeUs = TimeUs;
+ m_Timeline.push_back(MemoryTimelineSample{
+ .TimeUs = TimeUs,
+ .TotalAllocatedBytes = m_CurrentBytes,
+ .SystemBytes = m_SystemBytes,
+ .VideoBytes = m_VideoBytes,
+ });
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Event handlers
+
+void
+AllocationAnalyzer::OnInit(const ::Memory_Init& Ev)
+{
+ m_SizeShift = Ev.SizeShift();
+ m_Initialized = true;
+ ZEN_DEBUG("Memory trace init: version={}, sizeShift={}, minAlignment={}, markerPeriod={}, pageSize={}",
+ Ev.Version(),
+ m_SizeShift,
+ Ev.MinAlignment(),
+ Ev.MarkerPeriod(),
+ Ev.PageSize());
+}
+
+void
+AllocationAnalyzer::OnMarker(const ::Memory_Marker& Ev)
+{
+ if (!m_Timing || m_Timing->Freq == 0)
+ {
+ return;
+ }
+ uint32_t TimeUs = m_Timing->CycleToTimeUs(Ev.Cycle());
+ m_LastMarkerTimeUs = TimeUs;
+ m_HasReceivedMarker = true;
+ MaybeEmitSample(TimeUs);
+}
+
+void
+AllocationAnalyzer::OnAlloc(const ::Memory_Alloc& Ev)
+{
+ uint64_t Size = DecodeAllocSize(Ev.Size(), Ev.AlignmentPow2_SizeLower());
+ HandleAlloc(Ev.Address(), Size, Ev.RootHeap(), Ev.CallstackId(), Ev.get_thread_id(), /*IsRealloc=*/false);
+}
+
+void
+AllocationAnalyzer::OnAllocSystem(const ::Memory_AllocSystem& Ev)
+{
+ uint64_t Size = DecodeAllocSize(Ev.Size(), Ev.AlignmentPow2_SizeLower());
+ HandleAlloc(Ev.Address(), Size, /*RootHeap=*/0, Ev.CallstackId(), Ev.get_thread_id(), /*IsRealloc=*/false);
+}
+
+void
+AllocationAnalyzer::OnAllocVideo(const ::Memory_AllocVideo& Ev)
+{
+ uint64_t Size = DecodeAllocSize(Ev.Size(), Ev.AlignmentPow2_SizeLower());
+ HandleAlloc(Ev.Address(), Size, /*RootHeap=*/1, Ev.CallstackId(), Ev.get_thread_id(), /*IsRealloc=*/false);
+}
+
+void
+AllocationAnalyzer::OnFree(const ::Memory_Free& Ev)
+{
+ HandleFree(Ev.Address(), Ev.RootHeap(), Ev.CallstackId(), /*IsRealloc=*/false);
+}
+
+void
+AllocationAnalyzer::OnFreeSystem(const ::Memory_FreeSystem& Ev)
+{
+ HandleFree(Ev.Address(), /*RootHeap=*/0, Ev.CallstackId(), /*IsRealloc=*/false);
+}
+
+void
+AllocationAnalyzer::OnFreeVideo(const ::Memory_FreeVideo& Ev)
+{
+ HandleFree(Ev.Address(), /*RootHeap=*/1, Ev.CallstackId(), /*IsRealloc=*/false);
+}
+
+void
+AllocationAnalyzer::OnReallocAlloc(const ::Memory_ReallocAlloc& Ev)
+{
+ uint64_t Size = DecodeAllocSize(Ev.Size(), Ev.AlignmentPow2_SizeLower());
+ HandleAlloc(Ev.Address(), Size, Ev.RootHeap(), Ev.CallstackId(), Ev.get_thread_id(), /*IsRealloc=*/true);
+}
+
+void
+AllocationAnalyzer::OnReallocAllocSystem(const ::Memory_ReallocAllocSystem& Ev)
+{
+ uint64_t Size = DecodeAllocSize(Ev.Size(), Ev.AlignmentPow2_SizeLower());
+ HandleAlloc(Ev.Address(), Size, /*RootHeap=*/0, Ev.CallstackId(), Ev.get_thread_id(), /*IsRealloc=*/true);
+}
+
+void
+AllocationAnalyzer::OnReallocFree(const ::Memory_ReallocFree& Ev)
+{
+ HandleFree(Ev.Address(), Ev.RootHeap(), Ev.CallstackId(), /*IsRealloc=*/true);
+}
+
+void
+AllocationAnalyzer::OnReallocFreeSystem(const ::Memory_ReallocFreeSystem& Ev)
+{
+ HandleFree(Ev.Address(), /*RootHeap=*/0, Ev.CallstackId(), /*IsRealloc=*/true);
+}
+
+void
+AllocationAnalyzer::OnHeapSpec(const ::Memory_HeapSpec& Ev)
+{
+ uint32_t Id = Ev.Id();
+ HeapInfo& Info = m_Heaps[Id];
+ Info.Id = Id;
+ Info.ParentId = Ev.ParentId();
+ Info.Flags = Ev.Flags();
+ Info.Name = SafeFieldStr(Ev.Name());
+}
+
+void
+AllocationAnalyzer::OnHeapMarkAlloc(const ::Memory_HeapMarkAlloc& Ev)
+{
+ uint64_t Address = Ev.Address();
+ auto It = m_LiveAllocs.find(Address);
+ if (It == m_LiveAllocs.end())
+ {
+ return;
+ }
+
+ LiveAlloc& Alloc = It->second;
+ if (Alloc.IsHeap)
+ {
+ return; // already marked
+ }
+
+ Alloc.IsHeap = true;
+
+ // Remove this allocation from the running totals — heap-marked
+ // allocations represent address-space reservations (e.g. module images)
+ // and should not count towards the regular memory budget.
+ int64_t SignedSize = int64_t(Alloc.Size);
+ m_CurrentBytes -= SignedSize;
+ if (Alloc.RootHeap == 0)
+ {
+ m_SystemBytes -= SignedSize;
+ }
+ else if (Alloc.RootHeap == 1)
+ {
+ m_VideoBytes -= SignedSize;
+ }
+ auto HIt = m_RootHeapStats.find(Alloc.RootHeap);
+ if (HIt != m_RootHeapStats.end())
+ {
+ HIt->second.CurrentBytes -= SignedSize;
+ }
+}
+
+void
+AllocationAnalyzer::OnHeapUnmarkAlloc(const ::Memory_HeapUnmarkAlloc& Ev)
+{
+ uint64_t Address = Ev.Address();
+ auto It = m_LiveAllocs.find(Address);
+ if (It == m_LiveAllocs.end())
+ {
+ return;
+ }
+
+ LiveAlloc& Alloc = It->second;
+ if (!Alloc.IsHeap)
+ {
+ return; // not marked
+ }
+
+ Alloc.IsHeap = false;
+
+ // Add back to running totals.
+ int64_t SignedSize = int64_t(Alloc.Size);
+ m_CurrentBytes += SignedSize;
+ if (Alloc.RootHeap == 0)
+ {
+ m_SystemBytes += SignedSize;
+ }
+ else if (Alloc.RootHeap == 1)
+ {
+ m_VideoBytes += SignedSize;
+ }
+ auto HIt = m_RootHeapStats.find(Alloc.RootHeap);
+ if (HIt != m_RootHeapStats.end())
+ {
+ HIt->second.CurrentBytes += SignedSize;
+ }
+}
+
+void
+AllocationAnalyzer::OnTagSpec(const ::Memory_TagSpec& Ev)
+{
+ int32_t Tag = Ev.Tag();
+ TagInfo& Info = m_Tags[Tag];
+ Info.Tag = Tag;
+ Info.Parent = Ev.Parent();
+ Info.Display = SafeFieldStr(Ev.Display());
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Public accessors
+
+AllocationSummary
+AllocationAnalyzer::Summary() const
+{
+ AllocationSummary S;
+ S.HasMemoryData = m_Initialized || m_TotalAllocs > 0;
+ S.TotalAllocs = m_TotalAllocs;
+ S.TotalFrees = m_TotalFrees;
+ S.TotalReallocAllocs = m_TotalReallocAllocs;
+ S.TotalReallocFrees = m_TotalReallocFrees;
+ S.PeakBytes = m_PeakBytes;
+ S.PeakTimeUs = m_PeakTimeUs;
+ S.EndBytes = m_CurrentBytes;
+
+ uint32_t LiveCount = 0;
+ for (const auto& [Addr, Alloc] : m_LiveAllocs)
+ {
+ if (!Alloc.IsHeap)
+ {
+ ++LiveCount;
+ }
+ }
+ S.LiveAllocations = LiveCount;
+ return S;
+}
+
+void
+AllocationAnalyzer::EmitFinalSample(uint32_t TraceEndUs)
+{
+ if (!m_Initialized)
+ {
+ return;
+ }
+ // Force-emit a final sample at the trace end so the timeline captures
+ // the terminal memory state even if no Marker arrived recently.
+ uint32_t FinalTimeUs = m_HasReceivedMarker ? std::max(m_LastMarkerTimeUs, TraceEndUs) : TraceEndUs;
+ m_Timeline.push_back(MemoryTimelineSample{
+ .TimeUs = FinalTimeUs,
+ .TotalAllocatedBytes = m_CurrentBytes,
+ .SystemBytes = m_SystemBytes,
+ .VideoBytes = m_VideoBytes,
+ });
+}
+
+eastl::vector<CallstackAllocStat>
+AllocationAnalyzer::BuildCallstackStats() const
+{
+ eastl::hash_map<uint32_t, CallstackAllocStat> Map;
+ for (const auto& [Addr, Alloc] : m_LiveAllocs)
+ {
+ if (Alloc.CallstackId == 0 || Alloc.IsHeap)
+ {
+ continue;
+ }
+ CallstackAllocStat& S = Map[Alloc.CallstackId];
+ S.CallstackId = Alloc.CallstackId;
+ S.LiveBytes += int64_t(Alloc.Size);
+ S.LiveCount++;
+ if (eastl::find(S.ThreadIds.begin(), S.ThreadIds.end(), Alloc.ThreadId) == S.ThreadIds.end())
+ {
+ S.ThreadIds.push_back(Alloc.ThreadId);
+ }
+ }
+
+ eastl::vector<CallstackAllocStat> Result;
+ Result.reserve(Map.size());
+ for (auto& [Id, Stat] : Map)
+ {
+ Result.push_back(Stat);
+ }
+ eastl::sort(Result.begin(), Result.end(), [](const CallstackAllocStat& A, const CallstackAllocStat& B) {
+ return A.LiveBytes > B.LiveBytes;
+ });
+ return Result;
+}
+
+eastl::vector<CallstackChurnStat>
+AllocationAnalyzer::BuildChurnStats(uint64_t ChurnDistanceThreshold) const
+{
+ // The ChurnAccum already separates total allocs from churny allocs.
+ // ChurnAllocs/ChurnBytes count every freed allocation (regardless of
+ // distance). We now need to re-bucket using the threshold. But since
+ // we only stored the sum of distances (not per-alloc distances), we
+ // use the average: if MeanDistance <= threshold, all freed allocs from
+ // that callstack are considered churny. This is an approximation —
+ // a per-alloc histogram would be more precise but much more expensive.
+ eastl::vector<CallstackChurnStat> Result;
+ Result.reserve(m_ChurnByCallstack.size());
+ for (const auto& [Id, Churn] : m_ChurnByCallstack)
+ {
+ if (Churn.ChurnAllocs == 0)
+ {
+ continue;
+ }
+ double MeanDist = double(Churn.ChurnDistanceSum) / double(Churn.ChurnAllocs);
+ if (MeanDist > double(ChurnDistanceThreshold))
+ {
+ continue;
+ }
+ CallstackChurnStat S;
+ S.CallstackId = Id;
+ S.ChurnAllocs = Churn.ChurnAllocs;
+ S.ChurnBytes = Churn.ChurnBytes;
+ S.TotalAllocs = Churn.TotalAllocs;
+ S.TotalBytes = Churn.TotalBytes;
+ S.MeanDistance = MeanDist;
+ Result.push_back(S);
+ }
+ eastl::sort(Result.begin(), Result.end(), [](const CallstackChurnStat& A, const CallstackChurnStat& B) {
+ return A.ChurnAllocs > B.ChurnAllocs;
+ });
+ return Result;
+}
+
+eastl::vector<AllocSizeBucket>
+AllocationAnalyzer::BuildSizeHistogram() const
+{
+ eastl::vector<AllocSizeBucket> Result;
+ Result.reserve(kSizeHistogramBuckets);
+ for (size_t I = 0; I < kSizeHistogramBuckets; ++I)
+ {
+ const SizeBucketAccum& Accum = m_SizeHistogram[I];
+ if (Accum.Count == 0)
+ {
+ continue;
+ }
+ AllocSizeBucket Bucket;
+ if (I == 0)
+ {
+ Bucket.MinSize = 0;
+ Bucket.MaxSize = 0;
+ }
+ else
+ {
+ // Bucket i covers (2^(i-1), 2^i]; bucket 1 is just size 1.
+ Bucket.MinSize = (I == 1) ? 1 : ((uint64_t(1) << (I - 1)) + 1);
+ Bucket.MaxSize = (I >= 64) ? ~uint64_t(0) : (uint64_t(1) << I);
+ }
+ Bucket.Count = Accum.Count;
+ Bucket.Bytes = Accum.Bytes;
+ Result.push_back(Bucket);
+ }
+ return Result;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// CallstackAnalyzer implementation
+
+void
+CallstackAnalyzer::subscribe(Vector<Subscription>& Subs)
+{
+ Subs.emplace_back(this, &CallstackAnalyzer::OnCallstackSpec);
+ Subs.emplace_back(this, &CallstackAnalyzer::OnCallstackSpecDeltaVarInt);
+ Subs.emplace_back(this, &CallstackAnalyzer::OnCallstackSpecDelta7bit);
+ Subs.emplace_back(this, &CallstackAnalyzer::OnCallstackSpecXORAndRLE);
+}
+
+void
+CallstackAnalyzer::StoreCallstack(uint32_t Id, const uint64_t* Frames, size_t Count)
+{
+ if (Id == 0 || Count == 0)
+ {
+ return;
+ }
+ auto& Entry = m_Callstacks[Id];
+ Entry.assign(Frames, Frames + Count);
+}
+
+void
+CallstackAnalyzer::OnCallstackSpec(const ::Memory_CallstackSpec& Ev)
+{
+ Array<uint64[]> Frames = Ev.Frames();
+ StoreCallstack(Ev.CallstackId(), Frames.get(), Frames.get_count());
+}
+
+void
+CallstackAnalyzer::OnCallstackSpecDeltaVarInt(const ::Memory_CallstackSpecDeltaVarInt& Ev)
+{
+ Array<uint8[]> Compressed = Ev.CompressedFrames();
+ eastl::vector<uint64_t> Frames = DecodeDeltaVarInt(Compressed.get(), Compressed.get_size());
+ StoreCallstack(Ev.CallstackId(), Frames.data(), Frames.size());
+}
+
+void
+CallstackAnalyzer::OnCallstackSpecDelta7bit(const ::Memory_CallstackSpecDelta7bit& Ev)
+{
+ Array<uint8[]> Compressed = Ev.CompressedFrames();
+ eastl::vector<uint64_t> Frames = DecodeDelta7bit(Compressed.get(), Compressed.get_size());
+ StoreCallstack(Ev.CallstackId(), Frames.data(), Frames.size());
+}
+
+void
+CallstackAnalyzer::OnCallstackSpecXORAndRLE(const ::Memory_CallstackSpecXORAndRLE& Ev)
+{
+ Array<uint8[]> Compressed = Ev.CompressedFrames();
+ eastl::vector<uint64_t> Frames = DecodeXORAndRLE(Compressed.get(), Compressed.get_size());
+ StoreCallstack(Ev.CallstackId(), Frames.data(), Frames.size());
+}