aboutsummaryrefslogtreecommitdiff
path: root/src/zencore/memtrack/callstacktrace.cpp
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2024-11-25 09:56:23 +0100
committerGitHub Enterprise <[email protected]>2024-11-25 09:56:23 +0100
commit8b8de92e51db4cc4c1727712c736dcba5f79d369 (patch)
tree1f58edaaad389837a7652daebab246125762240e /src/zencore/memtrack/callstacktrace.cpp
parent5.5.13 (diff)
downloadzen-8b8de92e51db4cc4c1727712c736dcba5f79d369.tar.xz
zen-8b8de92e51db4cc4c1727712c736dcba5f79d369.zip
Insights-compatible memory tracking (#214)
This change introduces support for tracing of memory allocation activity. The code is ported from UE5, and Unreal Insights can be used to analyze the output. This is currently only fully supported on Windows, but will be extended to Mac/Linux in the near future. To activate full memory tracking, pass `--trace=memory` on the commandline alongside `--tracehost=<ip>` or `-tracefile=<path>`. For more control over how much detail is traced you can instead pass some combination of `callstack`, `memtag`, `memalloc` instead. In practice, `--trace=memory` is an alias for `--trace=callstack,memtag,memalloc`). For convenience we also support `--trace=memory_light` which omits call stacks. This change also introduces multiple memory allocators, which may be selected via command-line option `--malloc=<allocator>`: * `mimalloc` - mimalloc (default, same as before) * `rpmalloc` - rpmalloc is another high performance allocator for multithreaded applications which may be a better option than mimalloc (to be evaluated). Due to toolchain limitations this is currently only supported on Windows. * `stomp` - an allocator intended to be used during development/debugging to help track down memory issues such as use-after-free or out-of-bounds access. Currently only supported on Windows. * `ansi` - fallback to default system allocator
Diffstat (limited to 'src/zencore/memtrack/callstacktrace.cpp')
-rw-r--r--src/zencore/memtrack/callstacktrace.cpp1059
1 files changed, 1059 insertions, 0 deletions
diff --git a/src/zencore/memtrack/callstacktrace.cpp b/src/zencore/memtrack/callstacktrace.cpp
new file mode 100644
index 000000000..d860c05d1
--- /dev/null
+++ b/src/zencore/memtrack/callstacktrace.cpp
@@ -0,0 +1,1059 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include "callstacktrace.h"
+
+#include <zenbase/zenbase.h>
+#include <zencore/string.h>
+
+#if UE_CALLSTACK_TRACE_ENABLED
+
+namespace zen {
+
+// Platform implementations of back tracing
+////////////////////////////////////////////////////////////////////////////////
+void CallstackTrace_CreateInternal(FMalloc*);
+void CallstackTrace_InitializeInternal();
+
+////////////////////////////////////////////////////////////////////////////////
+UE_TRACE_CHANNEL_DEFINE(CallstackChannel)
+UE_TRACE_EVENT_DEFINE(Memory, CallstackSpec)
+
+uint32 GCallStackTracingTlsSlotIndex = FPlatformTLS::InvalidTlsSlot;
+
+////////////////////////////////////////////////////////////////////////////////
+void
+CallstackTrace_Create(class FMalloc* InMalloc)
+{
+ static auto InitOnce = [&] {
+ CallstackTrace_CreateInternal(InMalloc);
+ return true;
+ }();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+CallstackTrace_Initialize()
+{
+ GCallStackTracingTlsSlotIndex = FPlatformTLS::AllocTlsSlot();
+
+ static auto InitOnce = [&] {
+ CallstackTrace_InitializeInternal();
+ return true;
+ }();
+}
+
+} // namespace zen
+
+#endif
+
+#if ZEN_PLATFORM_WINDOWS
+# include "moduletrace.h"
+
+# include "growonlylockfreehash.h"
+
+# include <zencore/scopeguard.h>
+# include <zencore/thread.h>
+# include <zencore/trace.h>
+
+# include <atomic>
+# include <span>
+
+# include <zencore/windows.h>
+
+ZEN_THIRD_PARTY_INCLUDES_START
+# include <winnt.h>
+# include <winternl.h>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+# ifndef UE_CALLSTACK_TRACE_FULL_CALLSTACKS
+# define UE_CALLSTACK_TRACE_FULL_CALLSTACKS 0
+# endif
+
+// 0=off, 1=stats, 2=validation, 3=truth_compare
+# define BACKTRACE_DBGLVL 0
+
+# define BACKTRACE_LOCK_FREE (1 && (BACKTRACE_DBGLVL == 0))
+
+static bool GModulesAreInitialized = false;
+
+// This implementation is using unwind tables which is results in very fast
+// stack walking. In some cases this is not suitable, and we then fall back
+// to the standard stack walking implementation.
+# if !defined(UE_CALLSTACK_TRACE_USE_UNWIND_TABLES)
+# if defined(__clang__)
+# define UE_CALLSTACK_TRACE_USE_UNWIND_TABLES 0
+# else
+# define UE_CALLSTACK_TRACE_USE_UNWIND_TABLES 1
+# endif
+# endif
+
+// stacktrace tracking using clang intrinsic __builtin_frame_address(0) doesn't work correctly on all windows platforms
+# if !defined(PLATFORM_USE_CALLSTACK_ADDRESS_POINTER)
+# if defined(__clang__)
+# define PLATFORM_USE_CALLSTACK_ADDRESS_POINTER 0
+# else
+# define PLATFORM_USE_CALLSTACK_ADDRESS_POINTER 1
+# endif
+# endif
+
+# if !defined(UE_CALLSTACK_TRACE_RESERVE_MB)
+// Initial size of the known set of callstacks
+# define UE_CALLSTACK_TRACE_RESERVE_MB 8 // ~500k callstacks
+# endif
+
+# if !defined(UE_CALLSTACK_TRACE_RESERVE_GROWABLE)
+// If disabled the known set will not grow. New callstacks will not be
+// reported if the set is full
+# define UE_CALLSTACK_TRACE_RESERVE_GROWABLE 1
+# endif
+
+namespace zen {
+
+class FMalloc;
+
+UE_TRACE_CHANNEL_EXTERN(CallstackChannel)
+
+UE_TRACE_EVENT_BEGIN_EXTERN(Memory, CallstackSpec, NoSync)
+ UE_TRACE_EVENT_FIELD(uint32, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint64[], Frames)
+UE_TRACE_EVENT_END()
+
+class FCallstackTracer
+{
+public:
+ struct FBacktraceEntry
+ {
+ uint64_t Hash = 0;
+ uint32_t FrameCount = 0;
+ uint64_t* Frames;
+ };
+
+ FCallstackTracer(FMalloc* InMalloc) : KnownSet(InMalloc) {}
+
+ uint32_t AddCallstack(const FBacktraceEntry& Entry)
+ {
+ bool bAlreadyAdded = false;
+
+ // Our set implementation doesn't allow for zero entries (zero represents an empty element
+ // in the hash table), so if we get one due to really bad luck in our 64-bit Id calculation,
+ // treat it as a "1" instead, for purposes of tracking if we've seen that callstack.
+ const uint64_t Hash = FMath::Max(Entry.Hash, 1ull);
+ uint32_t Id;
+ KnownSet.Find(Hash, &Id, &bAlreadyAdded);
+ if (!bAlreadyAdded)
+ {
+ Id = CallstackIdCounter.fetch_add(1, std::memory_order_relaxed);
+ // On the first callstack reserve memory up front
+ if (Id == 1)
+ {
+ KnownSet.Reserve(InitialReserveCount);
+ }
+# if !UE_CALLSTACK_TRACE_RESERVE_GROWABLE
+ // If configured as not growable, start returning unknown id's when full.
+ if (Id >= InitialReserveCount)
+ {
+ return 0;
+ }
+# endif
+ KnownSet.Emplace(Hash, Id);
+ UE_TRACE_LOG(Memory, CallstackSpec, CallstackChannel)
+ << CallstackSpec.CallstackId(Id) << CallstackSpec.Frames(Entry.Frames, Entry.FrameCount);
+ }
+
+ return Id;
+ }
+
+private:
+ struct FEncounteredCallstackSetEntry
+ {
+ std::atomic_uint64_t Key;
+ std::atomic_uint32_t Value;
+
+ inline uint64 GetKey() const { return Key.load(std::memory_order_relaxed); }
+ inline uint32_t GetValue() const { return Value.load(std::memory_order_relaxed); }
+ inline bool IsEmpty() const { return Key.load(std::memory_order_relaxed) == 0; }
+ inline void SetKeyValue(uint64_t InKey, uint32_t InValue)
+ {
+ Value.store(InValue, std::memory_order_release);
+ Key.store(InKey, std::memory_order_relaxed);
+ }
+ static inline uint32_t KeyHash(uint64_t Key) { return static_cast<uint32_t>(Key); }
+ static inline void ClearEntries(FEncounteredCallstackSetEntry* Entries, int32_t EntryCount)
+ {
+ memset(Entries, 0, EntryCount * sizeof(FEncounteredCallstackSetEntry));
+ }
+ };
+
+ typedef TGrowOnlyLockFreeHash<FEncounteredCallstackSetEntry, uint64_t, uint32_t> FEncounteredCallstackSet;
+
+ constexpr static uint32_t InitialReserveBytes = UE_CALLSTACK_TRACE_RESERVE_MB * 1024 * 1024;
+ constexpr static uint32_t InitialReserveCount = InitialReserveBytes / sizeof(FEncounteredCallstackSetEntry);
+
+ FEncounteredCallstackSet KnownSet;
+ std::atomic_uint32_t CallstackIdCounter{1}; // 0 is reserved for "unknown callstack"
+};
+
+# if UE_CALLSTACK_TRACE_USE_UNWIND_TABLES
+
+/*
+ * Windows' x64 binaries contain a ".pdata" section that describes the location
+ * and size of its functions and details on how to unwind them. The unwind
+ * information includes descriptions about a function's stack frame size and
+ * the non-volatile registers it pushes onto the stack. From this we can
+ * calculate where a call instruction wrote its return address. This is enough
+ * to walk the callstack and by caching this information it can be done
+ * efficiently.
+ *
+ * Some functions need a variable amount of stack (such as those that use
+ * alloc() for example) will use a frame pointer. Frame pointers involve saving
+ * and restoring the stack pointer in the function's prologue/epilogue. This
+ * frees the function up to modify the stack pointer arbitrarily. This
+ * significantly complicates establishing where a return address is, so this
+ * pdata scheme of walking the stack just doesn't support functions like this.
+ * Walking stops if it encounters such a function. Fortunately there are
+ * usually very few such functions, saving us from having to read and track
+ * non-volatile registers which adds a significant amount of work.
+ *
+ * A further optimisation is to to assume we are only interested methods that
+ * are part of engine or game code. As such we only build lookup tables for
+ * such modules and never accept OS or third party modules. Backtracing stops
+ * if an address is encountered which doesn't map to a known module.
+ */
+
+////////////////////////////////////////////////////////////////////////////////
+static uint32_t
+AddressToId(uintptr_t Address)
+{
+ return uint32_t(Address >> 16);
+}
+
+static uintptr_t
+IdToAddress(uint32_t Id)
+{
+ return static_cast<uint32_t>(uintptr_t(Id) << 16);
+}
+
+struct FIdPredicate
+{
+ template<class T>
+ bool operator()(uint32_t Id, const T& Item) const
+ {
+ return Id < Item.Id;
+ }
+ template<class T>
+ bool operator()(const T& Item, uint32_t Id) const
+ {
+ return Item.Id < Id;
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+struct FUnwindInfo
+{
+ uint8_t Version : 3;
+ uint8_t Flags : 5;
+ uint8_t PrologBytes;
+ uint8_t NumUnwindCodes;
+ uint8_t FrameReg : 4;
+ uint8_t FrameRspBias : 4;
+};
+
+# pragma warning(push)
+# pragma warning(disable : 4200)
+struct FUnwindCode
+{
+ uint8_t PrologOffset;
+ uint8_t OpCode : 4;
+ uint8_t OpInfo : 4;
+ uint16_t Params[];
+};
+# pragma warning(pop)
+
+enum
+{
+ UWOP_PUSH_NONVOL = 0, // 1 node
+ UWOP_ALLOC_LARGE = 1, // 2 or 3 nodes
+ UWOP_ALLOC_SMALL = 2, // 1 node
+ UWOP_SET_FPREG = 3, // 1 node
+ UWOP_SAVE_NONVOL = 4, // 2 nodes
+ UWOP_SAVE_NONVOL_FAR = 5, // 3 nodes
+ UWOP_SAVE_XMM128 = 8, // 2 nodes
+ UWOP_SAVE_XMM128_FAR = 9, // 3 nodes
+ UWOP_PUSH_MACHFRAME = 10, // 1 node
+};
+
+////////////////////////////////////////////////////////////////////////////////
+class FBacktracer
+{
+public:
+ FBacktracer(FMalloc* InMalloc);
+ ~FBacktracer();
+ static FBacktracer* Get();
+ void AddModule(uintptr_t Base, const char16_t* Name);
+ void RemoveModule(uintptr_t Base);
+ uint32_t GetBacktraceId(void* AddressOfReturnAddress);
+
+private:
+ struct FFunction
+ {
+ uint32_t Id;
+ int32_t RspBias;
+# if BACKTRACE_DBGLVL >= 2
+ uint32_t Size;
+ const FUnwindInfo* UnwindInfo;
+# endif
+ };
+
+ struct FModule
+ {
+ uint32_t Id;
+ uint32_t IdSize;
+ uint32_t NumFunctions;
+# if BACKTRACE_DBGLVL >= 1
+ uint16 NumFpTypes;
+ // uint16 *padding*
+# else
+ // uint32_t *padding*
+# endif
+ FFunction* Functions;
+ };
+
+ struct FLookupState
+ {
+ FModule Module;
+ };
+
+ struct FFunctionLookupSetEntry
+ {
+ // Bottom 48 bits are key (pointer), top 16 bits are data (RSP bias for function)
+ std::atomic_uint64_t Data;
+
+ inline uint64_t GetKey() const { return Data.load(std::memory_order_relaxed) & 0xffffffffffffull; }
+ inline int32_t GetValue() const { return static_cast<int64_t>(Data.load(std::memory_order_relaxed)) >> 48; }
+ inline bool IsEmpty() const { return Data.load(std::memory_order_relaxed) == 0; }
+ inline void SetKeyValue(uint64_t Key, int32_t Value)
+ {
+ Data.store(Key | (static_cast<int64_t>(Value) << 48), std::memory_order_relaxed);
+ }
+ static inline uint32_t KeyHash(uint64_t Key)
+ {
+ // 64 bit pointer to 32 bit hash
+ Key = (~Key) + (Key << 21);
+ Key = Key ^ (Key >> 24);
+ Key = Key * 265;
+ Key = Key ^ (Key >> 14);
+ Key = Key * 21;
+ Key = Key ^ (Key >> 28);
+ Key = Key + (Key << 31);
+ return static_cast<uint32_t>(Key);
+ }
+ static void ClearEntries(FFunctionLookupSetEntry* Entries, int32_t EntryCount)
+ {
+ memset(Entries, 0, EntryCount * sizeof(FFunctionLookupSetEntry));
+ }
+ };
+ typedef TGrowOnlyLockFreeHash<FFunctionLookupSetEntry, uint64_t, int32_t> FFunctionLookupSet;
+
+ const FFunction* LookupFunction(uintptr_t Address, FLookupState& State) const;
+ static FBacktracer* Instance;
+ mutable zen::RwLock Lock;
+ FModule* Modules;
+ int32_t ModulesNum;
+ int32_t ModulesCapacity;
+ FMalloc* Malloc;
+ FCallstackTracer CallstackTracer;
+# if BACKTRACE_LOCK_FREE
+ mutable FFunctionLookupSet FunctionLookups;
+ mutable bool bReentranceCheck = false;
+# endif
+# if BACKTRACE_DBGLVL >= 1
+ mutable uint32_t NumFpTruncations = 0;
+ mutable uint32_t TotalFunctions = 0;
+# endif
+};
+
+////////////////////////////////////////////////////////////////////////////////
+FBacktracer* FBacktracer::Instance = nullptr;
+
+////////////////////////////////////////////////////////////////////////////////
+FBacktracer::FBacktracer(FMalloc* InMalloc)
+: Malloc(InMalloc)
+, CallstackTracer(InMalloc)
+# if BACKTRACE_LOCK_FREE
+, FunctionLookups(InMalloc)
+# endif
+{
+# if BACKTRACE_LOCK_FREE
+ FunctionLookups.Reserve(512 * 1024); // 4 MB
+# endif
+ ModulesCapacity = 8;
+ ModulesNum = 0;
+ Modules = (FModule*)Malloc->Malloc(sizeof(FModule) * ModulesCapacity);
+
+ Instance = this;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FBacktracer::~FBacktracer()
+{
+ std::span<FModule> ModulesView(Modules, ModulesNum);
+ for (FModule& Module : ModulesView)
+ {
+ Malloc->Free(Module.Functions);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FBacktracer*
+FBacktracer::Get()
+{
+ return Instance;
+}
+
+bool GFullBacktraces = false;
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FBacktracer::AddModule(uintptr_t ModuleBase, const char16_t* Name)
+{
+ if (!GFullBacktraces)
+ {
+ const size_t NameLen = StringLength(Name);
+ if (!(NameLen > 4 && StringEquals(Name + NameLen - 4, u".exe")))
+ {
+ return;
+ }
+ }
+
+ const auto* DosHeader = (IMAGE_DOS_HEADER*)ModuleBase;
+ const auto* NtHeader = (IMAGE_NT_HEADERS*)(ModuleBase + DosHeader->e_lfanew);
+ const IMAGE_FILE_HEADER* FileHeader = &(NtHeader->FileHeader);
+
+ uint32_t NumSections = FileHeader->NumberOfSections;
+ const auto* Sections = (IMAGE_SECTION_HEADER*)(uintptr_t(&(NtHeader->OptionalHeader)) + FileHeader->SizeOfOptionalHeader);
+
+ // Find ".pdata" section
+ uintptr_t PdataBase = 0;
+ uintptr_t PdataEnd = 0;
+ for (uint32_t i = 0; i < NumSections; ++i)
+ {
+ const IMAGE_SECTION_HEADER* Section = Sections + i;
+ if (*(uint64_t*)(Section->Name) ==
+ 0x61'74'61'64'70'2eull) // Sections names are eight bytes and zero padded. This constant is '.pdata'
+ {
+ PdataBase = ModuleBase + Section->VirtualAddress;
+ PdataEnd = PdataBase + Section->SizeOfRawData;
+ break;
+ }
+ }
+
+ if (PdataBase == 0)
+ {
+ return;
+ }
+
+ // Count the number of functions. The assumption here is that if we have got this far then there is at least one function
+ uint32_t NumFunctions = uint32_t(PdataEnd - PdataBase) / sizeof(RUNTIME_FUNCTION);
+ if (NumFunctions == 0)
+ {
+ return;
+ }
+
+ const auto* FunctionTables = (RUNTIME_FUNCTION*)PdataBase;
+ do
+ {
+ const RUNTIME_FUNCTION* Function = FunctionTables + NumFunctions - 1;
+ if (uint32_t(Function->BeginAddress) < uint32_t(Function->EndAddress))
+ {
+ break;
+ }
+
+ --NumFunctions;
+ } while (NumFunctions != 0);
+
+ // Allocate some space for the module's function-to-frame-size table
+ auto* OutTable = (FFunction*)Malloc->Malloc(sizeof(FFunction) * NumFunctions);
+ FFunction* OutTableCursor = OutTable;
+
+ // Extract frame size for each function from pdata's unwind codes.
+ uint32_t NumFpFuncs = 0;
+ for (uint32_t i = 0; i < NumFunctions; ++i)
+ {
+ const RUNTIME_FUNCTION* FunctionTable = FunctionTables + i;
+
+ uintptr_t UnwindInfoAddr = ModuleBase + FunctionTable->UnwindInfoAddress;
+ const auto* UnwindInfo = (FUnwindInfo*)UnwindInfoAddr;
+
+ if (UnwindInfo->Version != 1)
+ {
+ /* some v2s have been seen in msvc. Always seem to be assembly
+ * routines (memset, memcpy, etc) */
+ continue;
+ }
+
+ int32_t FpInfo = 0;
+ int32_t RspBias = 0;
+
+# if BACKTRACE_DBGLVL >= 2
+ uint32_t PrologVerify = UnwindInfo->PrologBytes;
+# endif
+
+ const auto* Code = (FUnwindCode*)(UnwindInfo + 1);
+ const auto* EndCode = Code + UnwindInfo->NumUnwindCodes;
+ while (Code < EndCode)
+ {
+# if BACKTRACE_DBGLVL >= 2
+ if (Code->PrologOffset > PrologVerify)
+ {
+ PLATFORM_BREAK();
+ }
+ PrologVerify = Code->PrologOffset;
+# endif
+
+ switch (Code->OpCode)
+ {
+ case UWOP_PUSH_NONVOL:
+ RspBias += 8;
+ Code += 1;
+ break;
+
+ case UWOP_ALLOC_LARGE:
+ if (Code->OpInfo)
+ {
+ RspBias += *(uint32_t*)(Code->Params);
+ Code += 3;
+ }
+ else
+ {
+ RspBias += Code->Params[0] * 8;
+ Code += 2;
+ }
+ break;
+
+ case UWOP_ALLOC_SMALL:
+ RspBias += (Code->OpInfo * 8) + 8;
+ Code += 1;
+ break;
+
+ case UWOP_SET_FPREG:
+ // Function will adjust RSP (e.g. through use of alloca()) so it
+ // uses a frame pointer register. There's instructions like;
+ //
+ // push FRAME_REG
+ // lea FRAME_REG, [rsp + (FRAME_RSP_BIAS * 16)]
+ // ...
+ // add rsp, rax
+ // ...
+ // sub rsp, FRAME_RSP_BIAS * 16
+ // pop FRAME_REG
+ // ret
+ //
+ // To recover the stack frame we would need to track non-volatile
+ // registers which adds a lot of overhead for a small subset of
+ // functions. Instead we'll end backtraces at these functions.
+
+ // MSB is set to detect variable sized frames that we can't proceed
+ // past when back-tracing.
+ NumFpFuncs++;
+ FpInfo |= 0x80000000 | (uint32_t(UnwindInfo->FrameReg) << 27) | (uint32_t(UnwindInfo->FrameRspBias) << 23);
+ Code += 1;
+ break;
+
+ case UWOP_PUSH_MACHFRAME:
+ RspBias = Code->OpInfo ? 48 : 40;
+ Code += 1;
+ break;
+
+ case UWOP_SAVE_NONVOL:
+ Code += 2;
+ break; /* saves are movs instead of pushes */
+ case UWOP_SAVE_NONVOL_FAR:
+ Code += 3;
+ break;
+ case UWOP_SAVE_XMM128:
+ Code += 2;
+ break;
+ case UWOP_SAVE_XMM128_FAR:
+ Code += 3;
+ break;
+
+ default:
+# if BACKTRACE_DBGLVL >= 2
+ PLATFORM_BREAK();
+# endif
+ break;
+ }
+ }
+
+ // "Chained" simply means that multiple RUNTIME_FUNCTIONs pertains to a
+ // single actual function in the .text segment.
+ bool bIsChained = (UnwindInfo->Flags & UNW_FLAG_CHAININFO);
+
+ RspBias /= sizeof(void*); // stack push/popds in units of one machine word
+ RspBias += !bIsChained; // and one extra push for the ret address
+ RspBias |= FpInfo; // pack in details about possible frame pointer
+
+ if (bIsChained)
+ {
+ OutTableCursor[-1].RspBias += RspBias;
+# if BACKTRACE_DBGLVL >= 2
+ OutTableCursor[-1].Size += (FunctionTable->EndAddress - FunctionTable->BeginAddress);
+# endif
+ }
+ else
+ {
+ *OutTableCursor = {
+ FunctionTable->BeginAddress,
+ RspBias,
+# if BACKTRACE_DBGLVL >= 2
+ FunctionTable->EndAddress - FunctionTable->BeginAddress,
+ UnwindInfo,
+# endif
+ };
+
+ ++OutTableCursor;
+ }
+ }
+
+ uintptr_t ModuleSize = NtHeader->OptionalHeader.SizeOfImage;
+ ModuleSize += 0xffff; // to align up to next 64K page. it'll get shifted by AddressToId()
+
+ FModule Module = {
+ AddressToId(ModuleBase),
+ AddressToId(ModuleSize),
+ uint32_t(uintptr_t(OutTableCursor - OutTable)),
+# if BACKTRACE_DBGLVL >= 1
+ uint16(NumFpFuncs),
+# endif
+ OutTable,
+ };
+
+ {
+ zen::RwLock::ExclusiveLockScope _(Lock);
+
+ if (ModulesNum + 1 > ModulesCapacity)
+ {
+ ModulesCapacity += 8;
+ Modules = (FModule*)Malloc->Realloc(Modules, sizeof(FModule) * ModulesCapacity);
+ }
+ Modules[ModulesNum++] = Module;
+
+ std::sort(Modules, Modules + ModulesNum, [](const FModule& A, const FModule& B) { return A.Id < B.Id; });
+ }
+
+# if BACKTRACE_DBGLVL >= 1
+ NumFpTruncations += NumFpFuncs;
+ TotalFunctions += NumFunctions;
+# endif
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FBacktracer::RemoveModule(uintptr_t ModuleBase)
+{
+ // When Windows' RequestExit() is called it hard-terminates all threads except
+ // the main thread and then proceeds to unload the process' DLLs. This hard
+ // thread termination can result is dangling locked locks. Not an issue as
+ // the rule is "do not do anything multithreaded in DLL load/unload". And here
+ // we are, taking write locks during DLL unload which is, quite unsurprisingly,
+ // deadlocking. In reality tracking Windows' DLL unloads doesn't tell us
+ // anything due to how DLLs and processes' address spaces work. So we will...
+# if defined PLATFORM_WINDOWS
+ ZEN_UNUSED(ModuleBase);
+
+ return;
+# else
+
+ zen::RwLock::ExclusiveLockScope _(Lock);
+
+ uint32_t ModuleId = AddressToId(ModuleBase);
+ TArrayView<FModule> ModulesView(Modules, ModulesNum);
+ int32_t Index = Algo::LowerBound(ModulesView, ModuleId, FIdPredicate());
+ if (Index >= ModulesNum)
+ {
+ return;
+ }
+
+ const FModule& Module = Modules[Index];
+ if (Module.Id != ModuleId)
+ {
+ return;
+ }
+
+# if BACKTRACE_DBGLVL >= 1
+ NumFpTruncations -= Module.NumFpTypes;
+ TotalFunctions -= Module.NumFunctions;
+# endif
+
+ // no code should be executing at this point so we can safely free the
+ // table knowing know one is looking at it.
+ Malloc->Free(Module.Functions);
+
+ for (SIZE_T i = Index; i < ModulesNum; i++)
+ {
+ Modules[i] = Modules[i + 1];
+ }
+
+ --ModulesNum;
+# endif
+}
+
+////////////////////////////////////////////////////////////////////////////////
+const FBacktracer::FFunction*
+FBacktracer::LookupFunction(uintptr_t Address, FLookupState& State) const
+{
+ // This function caches the previous module look up. The theory here is that
+ // a series of return address in a backtrace often cluster around one module
+
+ FIdPredicate IdPredicate;
+
+ // Look up the module that Address belongs to.
+ uint32_t AddressId = AddressToId(Address);
+ if ((AddressId - State.Module.Id) >= State.Module.IdSize)
+ {
+ auto FindIt = std::upper_bound(Modules, Modules + ModulesNum, AddressId, IdPredicate);
+
+ if (FindIt == Modules)
+ {
+ return nullptr;
+ }
+
+ State.Module = *--FindIt;
+ }
+
+ // Check that the address is within the address space of the best-found module
+ const FModule* Module = &(State.Module);
+ if ((AddressId - Module->Id) >= Module->IdSize)
+ {
+ return nullptr;
+ }
+
+ // Now we've a module we have a table of functions and their stack sizes so
+ // we can get the frame size for Address
+ uint32_t FuncId = uint32_t(Address - IdToAddress(Module->Id));
+ std::span<FFunction> FuncsView(Module->Functions, Module->NumFunctions);
+ auto FindIt = std::upper_bound(begin(FuncsView), end(FuncsView), FuncId, IdPredicate);
+ if (FindIt == begin(FuncsView))
+ {
+ return nullptr;
+ }
+
+ const FFunction* Function = &(*--FindIt);
+# if BACKTRACE_DBGLVL >= 2
+ if ((FuncId - Function->Id) >= Function->Size)
+ {
+ PLATFORM_BREAK();
+ return nullptr;
+ }
+# endif
+ return Function;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+uint32_t
+FBacktracer::GetBacktraceId(void* AddressOfReturnAddress)
+{
+ FLookupState LookupState = {};
+ uint64_t Frames[256];
+
+ uintptr_t* StackPointer = (uintptr_t*)AddressOfReturnAddress;
+
+# if BACKTRACE_DBGLVL >= 3
+ uintptr_t TruthBacktrace[1024];
+ uint32_t NumTruth = RtlCaptureStackBackTrace(0, 1024, (void**)TruthBacktrace, nullptr);
+ uintptr_t* TruthCursor = TruthBacktrace;
+ for (; *TruthCursor != *StackPointer; ++TruthCursor)
+ ;
+# endif
+
+# if BACKTRACE_DBGLVL >= 2
+ struct
+ {
+ void* Sp;
+ void* Ip;
+ const FFunction* Function;
+ } Backtrace[1024] = {};
+ uint32_t NumBacktrace = 0;
+# endif
+
+ uint64_t BacktraceHash = 0;
+ uint32_t FrameIdx = 0;
+
+# if BACKTRACE_LOCK_FREE
+ // When running lock free, we defer the lock until a lock free function lookup fails
+ bool Locked = false;
+# else
+ FScopeLock _(&Lock);
+# endif
+ do
+ {
+ uintptr_t RetAddr = *StackPointer;
+
+ Frames[FrameIdx++] = RetAddr;
+
+ // This is a simple order-dependent LCG. Should be sufficient enough
+ BacktraceHash += RetAddr;
+ BacktraceHash *= 0x30be8efa499c249dull;
+
+# if BACKTRACE_LOCK_FREE
+ int32_t RspBias;
+ bool bIsAlreadyInTable;
+ FunctionLookups.Find(RetAddr, &RspBias, &bIsAlreadyInTable);
+ if (bIsAlreadyInTable)
+ {
+ if (RspBias < 0)
+ {
+ break;
+ }
+ else
+ {
+ StackPointer += RspBias;
+ continue;
+ }
+ }
+ if (!Locked)
+ {
+ Lock.AcquireExclusive();
+ Locked = true;
+
+ // If FunctionLookups.Emplace triggers a reallocation, it can cause an infinite recursion
+ // when the allocation reenters the stack trace code. We need to break out of the recursion
+ // in that case, and let the allocation complete, with the assumption that we don't care
+ // about call stacks for internal allocations in the memory reporting system. The "Lock()"
+ // above will only fall through with this flag set if it's a second lock in the same thread.
+ if (bReentranceCheck)
+ {
+ break;
+ }
+ }
+# endif // BACKTRACE_LOCK_FREE
+
+ const FFunction* Function = LookupFunction(RetAddr, LookupState);
+ if (Function == nullptr)
+ {
+# if BACKTRACE_LOCK_FREE
+ // LookupFunction fails when modules are not yet registered. In this case, we do not want the address
+ // to be added to the lookup map, but to retry the lookup later when modules are properly registered.
+ if (GModulesAreInitialized)
+ {
+ bReentranceCheck = true;
+ auto OnExit = zen::MakeGuard([&] { bReentranceCheck = false; });
+ FunctionLookups.Emplace(RetAddr, -1);
+ }
+# endif
+ break;
+ }
+
+# if BACKTRACE_LOCK_FREE
+ {
+ // This conversion improves probing performance for the hash set. Additionally it is critical
+ // to avoid incorrect values when RspBias is compressed into 16 bits in the hash map.
+ int32_t StoreBias = Function->RspBias < 0 ? -1 : Function->RspBias;
+ bReentranceCheck = true;
+ auto OnExit = zen::MakeGuard([&] { bReentranceCheck = false; });
+ FunctionLookups.Emplace(RetAddr, StoreBias);
+ }
+# endif
+
+# if BACKTRACE_DBGLVL >= 2
+ if (NumBacktrace < 1024)
+ {
+ Backtrace[NumBacktrace++] = {
+ StackPointer,
+ (void*)RetAddr,
+ Function,
+ };
+ }
+# endif
+
+ if (Function->RspBias < 0)
+ {
+ // This is a frame with a variable-sized stack pointer. We don't
+ // track enough information to proceed.
+# if BACKTRACE_DBGLVL >= 1
+ NumFpTruncations++;
+# endif
+ break;
+ }
+
+ StackPointer += Function->RspBias;
+ }
+ // Trunkate callstacks longer than MaxStackDepth
+ while (*StackPointer && FrameIdx < ZEN_ARRAY_COUNT(Frames));
+
+ // Build the backtrace entry for submission
+ FCallstackTracer::FBacktraceEntry BacktraceEntry;
+ BacktraceEntry.Hash = BacktraceHash;
+ BacktraceEntry.FrameCount = FrameIdx;
+ BacktraceEntry.Frames = Frames;
+
+# if BACKTRACE_DBGLVL >= 3
+ for (uint32_t i = 0; i < NumBacktrace; ++i)
+ {
+ if ((void*)TruthCursor[i] != Backtrace[i].Ip)
+ {
+ PLATFORM_BREAK();
+ break;
+ }
+ }
+# endif
+
+# if BACKTRACE_LOCK_FREE
+ if (Locked)
+ {
+ Lock.ReleaseExclusive();
+ }
+# endif
+ // Add to queue to be processed. This might block until there is room in the
+ // queue (i.e. the processing thread has caught up processing).
+ return CallstackTracer.AddCallstack(BacktraceEntry);
+}
+}
+
+# else // UE_CALLSTACK_TRACE_USE_UNWIND_TABLES
+
+namespace zen {
+
+ ////////////////////////////////////////////////////////////////////////////////
+ class FBacktracer
+ {
+ public:
+ FBacktracer(FMalloc* InMalloc);
+ ~FBacktracer();
+ static FBacktracer* Get();
+ inline uint32_t GetBacktraceId(void* AddressOfReturnAddress);
+ uint32_t GetBacktraceId(uint64_t ReturnAddress);
+ void AddModule(uintptr_t Base, const char16_t* Name) {}
+ void RemoveModule(uintptr_t Base) {}
+
+ private:
+ static FBacktracer* Instance;
+ FMalloc* Malloc;
+ FCallstackTracer CallstackTracer;
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ FBacktracer* FBacktracer::Instance = nullptr;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ FBacktracer::FBacktracer(FMalloc* InMalloc) : Malloc(InMalloc), CallstackTracer(InMalloc) { Instance = this; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ FBacktracer::~FBacktracer() {}
+
+ ////////////////////////////////////////////////////////////////////////////////
+ FBacktracer* FBacktracer::Get() { return Instance; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ uint32_t FBacktracer::GetBacktraceId(void* AddressOfReturnAddress)
+ {
+ const uint64_t ReturnAddress = *(uint64_t*)AddressOfReturnAddress;
+ return GetBacktraceId(ReturnAddress);
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ uint32_t FBacktracer::GetBacktraceId(uint64_t ReturnAddress)
+ {
+# if !UE_BUILD_SHIPPING
+ uint64_t StackFrames[256];
+ int32_t NumStackFrames = FPlatformStackWalk::CaptureStackBackTrace(StackFrames, UE_ARRAY_COUNT(StackFrames));
+ if (NumStackFrames > 0)
+ {
+ FCallstackTracer::FBacktraceEntry BacktraceEntry;
+ uint64_t BacktraceId = 0;
+ uint32_t FrameIdx = 0;
+ bool bUseAddress = false;
+ for (int32_t Index = 0; Index < NumStackFrames; Index++)
+ {
+ if (!bUseAddress)
+ {
+ // start using backtrace only after ReturnAddress
+ if (StackFrames[Index] == (uint64_t)ReturnAddress)
+ {
+ bUseAddress = true;
+ }
+ }
+ if (bUseAddress || NumStackFrames == 1)
+ {
+ uint64_t RetAddr = StackFrames[Index];
+ StackFrames[FrameIdx++] = RetAddr;
+
+ // This is a simple order-dependent LCG. Should be sufficient enough
+ BacktraceId += RetAddr;
+ BacktraceId *= 0x30be8efa499c249dull;
+ }
+ }
+
+ // Save the collected id
+ BacktraceEntry.Hash = BacktraceId;
+ BacktraceEntry.FrameCount = FrameIdx;
+ BacktraceEntry.Frames = StackFrames;
+
+ // Add to queue to be processed. This might block until there is room in the
+ // queue (i.e. the processing thread has caught up processing).
+ return CallstackTracer.AddCallstack(BacktraceEntry);
+ }
+# endif
+
+ return 0;
+ }
+
+}
+
+# endif // UE_CALLSTACK_TRACE_USE_UNWIND_TABLES
+
+namespace zen {
+
+////////////////////////////////////////////////////////////////////////////////
+void
+CallstackTrace_CreateInternal(FMalloc* Malloc)
+{
+ if (FBacktracer::Get() != nullptr)
+ {
+ return;
+ }
+
+ // Allocate, construct and intentionally leak backtracer
+ void* Alloc = Malloc->Malloc(sizeof(FBacktracer), alignof(FBacktracer));
+ new (Alloc) FBacktracer(Malloc);
+
+ Modules_Create(Malloc);
+ Modules_Subscribe([](bool bLoad, void* Module, const char16_t* Name) {
+ bLoad ? FBacktracer::Get()->AddModule(uintptr_t(Module), Name) //-V522
+ : FBacktracer::Get()->RemoveModule(uintptr_t(Module));
+ });
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+CallstackTrace_InitializeInternal()
+{
+ Modules_Initialize();
+ GModulesAreInitialized = true;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+uint32_t
+CallstackTrace_GetCurrentId()
+{
+ if (!UE_TRACE_CHANNELEXPR_IS_ENABLED(CallstackChannel))
+ {
+ return 0;
+ }
+
+ void* StackAddress = PLATFORM_RETURN_ADDRESS_FOR_CALLSTACKTRACING();
+ if (FBacktracer* Instance = FBacktracer::Get())
+ {
+# if PLATFORM_USE_CALLSTACK_ADDRESS_POINTER
+ return Instance->GetBacktraceId(StackAddress);
+# else
+ return Instance->GetBacktraceId((uint64_t)StackAddress);
+# endif
+ }
+
+ return 0;
+}
+
+} // namespace zen
+
+#endif