diff options
Diffstat (limited to 'src/zencore/memtrack/callstacktrace.cpp')
| -rw-r--r-- | src/zencore/memtrack/callstacktrace.cpp | 1059 |
1 files changed, 1059 insertions, 0 deletions
diff --git a/src/zencore/memtrack/callstacktrace.cpp b/src/zencore/memtrack/callstacktrace.cpp new file mode 100644 index 000000000..d860c05d1 --- /dev/null +++ b/src/zencore/memtrack/callstacktrace.cpp @@ -0,0 +1,1059 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "callstacktrace.h" + +#include <zenbase/zenbase.h> +#include <zencore/string.h> + +#if UE_CALLSTACK_TRACE_ENABLED + +namespace zen { + +// Platform implementations of back tracing +//////////////////////////////////////////////////////////////////////////////// +void CallstackTrace_CreateInternal(FMalloc*); +void CallstackTrace_InitializeInternal(); + +//////////////////////////////////////////////////////////////////////////////// +UE_TRACE_CHANNEL_DEFINE(CallstackChannel) +UE_TRACE_EVENT_DEFINE(Memory, CallstackSpec) + +uint32 GCallStackTracingTlsSlotIndex = FPlatformTLS::InvalidTlsSlot; + +//////////////////////////////////////////////////////////////////////////////// +void +CallstackTrace_Create(class FMalloc* InMalloc) +{ + static auto InitOnce = [&] { + CallstackTrace_CreateInternal(InMalloc); + return true; + }(); +} + +//////////////////////////////////////////////////////////////////////////////// +void +CallstackTrace_Initialize() +{ + GCallStackTracingTlsSlotIndex = FPlatformTLS::AllocTlsSlot(); + + static auto InitOnce = [&] { + CallstackTrace_InitializeInternal(); + return true; + }(); +} + +} // namespace zen + +#endif + +#if ZEN_PLATFORM_WINDOWS +# include "moduletrace.h" + +# include "growonlylockfreehash.h" + +# include <zencore/scopeguard.h> +# include <zencore/thread.h> +# include <zencore/trace.h> + +# include <atomic> +# include <span> + +# include <zencore/windows.h> + +ZEN_THIRD_PARTY_INCLUDES_START +# include <winnt.h> +# include <winternl.h> +ZEN_THIRD_PARTY_INCLUDES_END + +# ifndef UE_CALLSTACK_TRACE_FULL_CALLSTACKS +# define UE_CALLSTACK_TRACE_FULL_CALLSTACKS 0 +# endif + +// 0=off, 1=stats, 2=validation, 3=truth_compare +# define BACKTRACE_DBGLVL 0 + +# define BACKTRACE_LOCK_FREE (1 && (BACKTRACE_DBGLVL == 0)) + +static bool GModulesAreInitialized = false; + +// This implementation is using unwind tables which is results in very fast +// stack walking. In some cases this is not suitable, and we then fall back +// to the standard stack walking implementation. +# if !defined(UE_CALLSTACK_TRACE_USE_UNWIND_TABLES) +# if defined(__clang__) +# define UE_CALLSTACK_TRACE_USE_UNWIND_TABLES 0 +# else +# define UE_CALLSTACK_TRACE_USE_UNWIND_TABLES 1 +# endif +# endif + +// stacktrace tracking using clang intrinsic __builtin_frame_address(0) doesn't work correctly on all windows platforms +# if !defined(PLATFORM_USE_CALLSTACK_ADDRESS_POINTER) +# if defined(__clang__) +# define PLATFORM_USE_CALLSTACK_ADDRESS_POINTER 0 +# else +# define PLATFORM_USE_CALLSTACK_ADDRESS_POINTER 1 +# endif +# endif + +# if !defined(UE_CALLSTACK_TRACE_RESERVE_MB) +// Initial size of the known set of callstacks +# define UE_CALLSTACK_TRACE_RESERVE_MB 8 // ~500k callstacks +# endif + +# if !defined(UE_CALLSTACK_TRACE_RESERVE_GROWABLE) +// If disabled the known set will not grow. New callstacks will not be +// reported if the set is full +# define UE_CALLSTACK_TRACE_RESERVE_GROWABLE 1 +# endif + +namespace zen { + +class FMalloc; + +UE_TRACE_CHANNEL_EXTERN(CallstackChannel) + +UE_TRACE_EVENT_BEGIN_EXTERN(Memory, CallstackSpec, NoSync) + UE_TRACE_EVENT_FIELD(uint32, CallstackId) + UE_TRACE_EVENT_FIELD(uint64[], Frames) +UE_TRACE_EVENT_END() + +class FCallstackTracer +{ +public: + struct FBacktraceEntry + { + uint64_t Hash = 0; + uint32_t FrameCount = 0; + uint64_t* Frames; + }; + + FCallstackTracer(FMalloc* InMalloc) : KnownSet(InMalloc) {} + + uint32_t AddCallstack(const FBacktraceEntry& Entry) + { + bool bAlreadyAdded = false; + + // Our set implementation doesn't allow for zero entries (zero represents an empty element + // in the hash table), so if we get one due to really bad luck in our 64-bit Id calculation, + // treat it as a "1" instead, for purposes of tracking if we've seen that callstack. + const uint64_t Hash = FMath::Max(Entry.Hash, 1ull); + uint32_t Id; + KnownSet.Find(Hash, &Id, &bAlreadyAdded); + if (!bAlreadyAdded) + { + Id = CallstackIdCounter.fetch_add(1, std::memory_order_relaxed); + // On the first callstack reserve memory up front + if (Id == 1) + { + KnownSet.Reserve(InitialReserveCount); + } +# if !UE_CALLSTACK_TRACE_RESERVE_GROWABLE + // If configured as not growable, start returning unknown id's when full. + if (Id >= InitialReserveCount) + { + return 0; + } +# endif + KnownSet.Emplace(Hash, Id); + UE_TRACE_LOG(Memory, CallstackSpec, CallstackChannel) + << CallstackSpec.CallstackId(Id) << CallstackSpec.Frames(Entry.Frames, Entry.FrameCount); + } + + return Id; + } + +private: + struct FEncounteredCallstackSetEntry + { + std::atomic_uint64_t Key; + std::atomic_uint32_t Value; + + inline uint64 GetKey() const { return Key.load(std::memory_order_relaxed); } + inline uint32_t GetValue() const { return Value.load(std::memory_order_relaxed); } + inline bool IsEmpty() const { return Key.load(std::memory_order_relaxed) == 0; } + inline void SetKeyValue(uint64_t InKey, uint32_t InValue) + { + Value.store(InValue, std::memory_order_release); + Key.store(InKey, std::memory_order_relaxed); + } + static inline uint32_t KeyHash(uint64_t Key) { return static_cast<uint32_t>(Key); } + static inline void ClearEntries(FEncounteredCallstackSetEntry* Entries, int32_t EntryCount) + { + memset(Entries, 0, EntryCount * sizeof(FEncounteredCallstackSetEntry)); + } + }; + + typedef TGrowOnlyLockFreeHash<FEncounteredCallstackSetEntry, uint64_t, uint32_t> FEncounteredCallstackSet; + + constexpr static uint32_t InitialReserveBytes = UE_CALLSTACK_TRACE_RESERVE_MB * 1024 * 1024; + constexpr static uint32_t InitialReserveCount = InitialReserveBytes / sizeof(FEncounteredCallstackSetEntry); + + FEncounteredCallstackSet KnownSet; + std::atomic_uint32_t CallstackIdCounter{1}; // 0 is reserved for "unknown callstack" +}; + +# if UE_CALLSTACK_TRACE_USE_UNWIND_TABLES + +/* + * Windows' x64 binaries contain a ".pdata" section that describes the location + * and size of its functions and details on how to unwind them. The unwind + * information includes descriptions about a function's stack frame size and + * the non-volatile registers it pushes onto the stack. From this we can + * calculate where a call instruction wrote its return address. This is enough + * to walk the callstack and by caching this information it can be done + * efficiently. + * + * Some functions need a variable amount of stack (such as those that use + * alloc() for example) will use a frame pointer. Frame pointers involve saving + * and restoring the stack pointer in the function's prologue/epilogue. This + * frees the function up to modify the stack pointer arbitrarily. This + * significantly complicates establishing where a return address is, so this + * pdata scheme of walking the stack just doesn't support functions like this. + * Walking stops if it encounters such a function. Fortunately there are + * usually very few such functions, saving us from having to read and track + * non-volatile registers which adds a significant amount of work. + * + * A further optimisation is to to assume we are only interested methods that + * are part of engine or game code. As such we only build lookup tables for + * such modules and never accept OS or third party modules. Backtracing stops + * if an address is encountered which doesn't map to a known module. + */ + +//////////////////////////////////////////////////////////////////////////////// +static uint32_t +AddressToId(uintptr_t Address) +{ + return uint32_t(Address >> 16); +} + +static uintptr_t +IdToAddress(uint32_t Id) +{ + return static_cast<uint32_t>(uintptr_t(Id) << 16); +} + +struct FIdPredicate +{ + template<class T> + bool operator()(uint32_t Id, const T& Item) const + { + return Id < Item.Id; + } + template<class T> + bool operator()(const T& Item, uint32_t Id) const + { + return Item.Id < Id; + } +}; + +//////////////////////////////////////////////////////////////////////////////// +struct FUnwindInfo +{ + uint8_t Version : 3; + uint8_t Flags : 5; + uint8_t PrologBytes; + uint8_t NumUnwindCodes; + uint8_t FrameReg : 4; + uint8_t FrameRspBias : 4; +}; + +# pragma warning(push) +# pragma warning(disable : 4200) +struct FUnwindCode +{ + uint8_t PrologOffset; + uint8_t OpCode : 4; + uint8_t OpInfo : 4; + uint16_t Params[]; +}; +# pragma warning(pop) + +enum +{ + UWOP_PUSH_NONVOL = 0, // 1 node + UWOP_ALLOC_LARGE = 1, // 2 or 3 nodes + UWOP_ALLOC_SMALL = 2, // 1 node + UWOP_SET_FPREG = 3, // 1 node + UWOP_SAVE_NONVOL = 4, // 2 nodes + UWOP_SAVE_NONVOL_FAR = 5, // 3 nodes + UWOP_SAVE_XMM128 = 8, // 2 nodes + UWOP_SAVE_XMM128_FAR = 9, // 3 nodes + UWOP_PUSH_MACHFRAME = 10, // 1 node +}; + +//////////////////////////////////////////////////////////////////////////////// +class FBacktracer +{ +public: + FBacktracer(FMalloc* InMalloc); + ~FBacktracer(); + static FBacktracer* Get(); + void AddModule(uintptr_t Base, const char16_t* Name); + void RemoveModule(uintptr_t Base); + uint32_t GetBacktraceId(void* AddressOfReturnAddress); + +private: + struct FFunction + { + uint32_t Id; + int32_t RspBias; +# if BACKTRACE_DBGLVL >= 2 + uint32_t Size; + const FUnwindInfo* UnwindInfo; +# endif + }; + + struct FModule + { + uint32_t Id; + uint32_t IdSize; + uint32_t NumFunctions; +# if BACKTRACE_DBGLVL >= 1 + uint16 NumFpTypes; + // uint16 *padding* +# else + // uint32_t *padding* +# endif + FFunction* Functions; + }; + + struct FLookupState + { + FModule Module; + }; + + struct FFunctionLookupSetEntry + { + // Bottom 48 bits are key (pointer), top 16 bits are data (RSP bias for function) + std::atomic_uint64_t Data; + + inline uint64_t GetKey() const { return Data.load(std::memory_order_relaxed) & 0xffffffffffffull; } + inline int32_t GetValue() const { return static_cast<int64_t>(Data.load(std::memory_order_relaxed)) >> 48; } + inline bool IsEmpty() const { return Data.load(std::memory_order_relaxed) == 0; } + inline void SetKeyValue(uint64_t Key, int32_t Value) + { + Data.store(Key | (static_cast<int64_t>(Value) << 48), std::memory_order_relaxed); + } + static inline uint32_t KeyHash(uint64_t Key) + { + // 64 bit pointer to 32 bit hash + Key = (~Key) + (Key << 21); + Key = Key ^ (Key >> 24); + Key = Key * 265; + Key = Key ^ (Key >> 14); + Key = Key * 21; + Key = Key ^ (Key >> 28); + Key = Key + (Key << 31); + return static_cast<uint32_t>(Key); + } + static void ClearEntries(FFunctionLookupSetEntry* Entries, int32_t EntryCount) + { + memset(Entries, 0, EntryCount * sizeof(FFunctionLookupSetEntry)); + } + }; + typedef TGrowOnlyLockFreeHash<FFunctionLookupSetEntry, uint64_t, int32_t> FFunctionLookupSet; + + const FFunction* LookupFunction(uintptr_t Address, FLookupState& State) const; + static FBacktracer* Instance; + mutable zen::RwLock Lock; + FModule* Modules; + int32_t ModulesNum; + int32_t ModulesCapacity; + FMalloc* Malloc; + FCallstackTracer CallstackTracer; +# if BACKTRACE_LOCK_FREE + mutable FFunctionLookupSet FunctionLookups; + mutable bool bReentranceCheck = false; +# endif +# if BACKTRACE_DBGLVL >= 1 + mutable uint32_t NumFpTruncations = 0; + mutable uint32_t TotalFunctions = 0; +# endif +}; + +//////////////////////////////////////////////////////////////////////////////// +FBacktracer* FBacktracer::Instance = nullptr; + +//////////////////////////////////////////////////////////////////////////////// +FBacktracer::FBacktracer(FMalloc* InMalloc) +: Malloc(InMalloc) +, CallstackTracer(InMalloc) +# if BACKTRACE_LOCK_FREE +, FunctionLookups(InMalloc) +# endif +{ +# if BACKTRACE_LOCK_FREE + FunctionLookups.Reserve(512 * 1024); // 4 MB +# endif + ModulesCapacity = 8; + ModulesNum = 0; + Modules = (FModule*)Malloc->Malloc(sizeof(FModule) * ModulesCapacity); + + Instance = this; +} + +//////////////////////////////////////////////////////////////////////////////// +FBacktracer::~FBacktracer() +{ + std::span<FModule> ModulesView(Modules, ModulesNum); + for (FModule& Module : ModulesView) + { + Malloc->Free(Module.Functions); + } +} + +//////////////////////////////////////////////////////////////////////////////// +FBacktracer* +FBacktracer::Get() +{ + return Instance; +} + +bool GFullBacktraces = false; + +//////////////////////////////////////////////////////////////////////////////// +void +FBacktracer::AddModule(uintptr_t ModuleBase, const char16_t* Name) +{ + if (!GFullBacktraces) + { + const size_t NameLen = StringLength(Name); + if (!(NameLen > 4 && StringEquals(Name + NameLen - 4, u".exe"))) + { + return; + } + } + + const auto* DosHeader = (IMAGE_DOS_HEADER*)ModuleBase; + const auto* NtHeader = (IMAGE_NT_HEADERS*)(ModuleBase + DosHeader->e_lfanew); + const IMAGE_FILE_HEADER* FileHeader = &(NtHeader->FileHeader); + + uint32_t NumSections = FileHeader->NumberOfSections; + const auto* Sections = (IMAGE_SECTION_HEADER*)(uintptr_t(&(NtHeader->OptionalHeader)) + FileHeader->SizeOfOptionalHeader); + + // Find ".pdata" section + uintptr_t PdataBase = 0; + uintptr_t PdataEnd = 0; + for (uint32_t i = 0; i < NumSections; ++i) + { + const IMAGE_SECTION_HEADER* Section = Sections + i; + if (*(uint64_t*)(Section->Name) == + 0x61'74'61'64'70'2eull) // Sections names are eight bytes and zero padded. This constant is '.pdata' + { + PdataBase = ModuleBase + Section->VirtualAddress; + PdataEnd = PdataBase + Section->SizeOfRawData; + break; + } + } + + if (PdataBase == 0) + { + return; + } + + // Count the number of functions. The assumption here is that if we have got this far then there is at least one function + uint32_t NumFunctions = uint32_t(PdataEnd - PdataBase) / sizeof(RUNTIME_FUNCTION); + if (NumFunctions == 0) + { + return; + } + + const auto* FunctionTables = (RUNTIME_FUNCTION*)PdataBase; + do + { + const RUNTIME_FUNCTION* Function = FunctionTables + NumFunctions - 1; + if (uint32_t(Function->BeginAddress) < uint32_t(Function->EndAddress)) + { + break; + } + + --NumFunctions; + } while (NumFunctions != 0); + + // Allocate some space for the module's function-to-frame-size table + auto* OutTable = (FFunction*)Malloc->Malloc(sizeof(FFunction) * NumFunctions); + FFunction* OutTableCursor = OutTable; + + // Extract frame size for each function from pdata's unwind codes. + uint32_t NumFpFuncs = 0; + for (uint32_t i = 0; i < NumFunctions; ++i) + { + const RUNTIME_FUNCTION* FunctionTable = FunctionTables + i; + + uintptr_t UnwindInfoAddr = ModuleBase + FunctionTable->UnwindInfoAddress; + const auto* UnwindInfo = (FUnwindInfo*)UnwindInfoAddr; + + if (UnwindInfo->Version != 1) + { + /* some v2s have been seen in msvc. Always seem to be assembly + * routines (memset, memcpy, etc) */ + continue; + } + + int32_t FpInfo = 0; + int32_t RspBias = 0; + +# if BACKTRACE_DBGLVL >= 2 + uint32_t PrologVerify = UnwindInfo->PrologBytes; +# endif + + const auto* Code = (FUnwindCode*)(UnwindInfo + 1); + const auto* EndCode = Code + UnwindInfo->NumUnwindCodes; + while (Code < EndCode) + { +# if BACKTRACE_DBGLVL >= 2 + if (Code->PrologOffset > PrologVerify) + { + PLATFORM_BREAK(); + } + PrologVerify = Code->PrologOffset; +# endif + + switch (Code->OpCode) + { + case UWOP_PUSH_NONVOL: + RspBias += 8; + Code += 1; + break; + + case UWOP_ALLOC_LARGE: + if (Code->OpInfo) + { + RspBias += *(uint32_t*)(Code->Params); + Code += 3; + } + else + { + RspBias += Code->Params[0] * 8; + Code += 2; + } + break; + + case UWOP_ALLOC_SMALL: + RspBias += (Code->OpInfo * 8) + 8; + Code += 1; + break; + + case UWOP_SET_FPREG: + // Function will adjust RSP (e.g. through use of alloca()) so it + // uses a frame pointer register. There's instructions like; + // + // push FRAME_REG + // lea FRAME_REG, [rsp + (FRAME_RSP_BIAS * 16)] + // ... + // add rsp, rax + // ... + // sub rsp, FRAME_RSP_BIAS * 16 + // pop FRAME_REG + // ret + // + // To recover the stack frame we would need to track non-volatile + // registers which adds a lot of overhead for a small subset of + // functions. Instead we'll end backtraces at these functions. + + // MSB is set to detect variable sized frames that we can't proceed + // past when back-tracing. + NumFpFuncs++; + FpInfo |= 0x80000000 | (uint32_t(UnwindInfo->FrameReg) << 27) | (uint32_t(UnwindInfo->FrameRspBias) << 23); + Code += 1; + break; + + case UWOP_PUSH_MACHFRAME: + RspBias = Code->OpInfo ? 48 : 40; + Code += 1; + break; + + case UWOP_SAVE_NONVOL: + Code += 2; + break; /* saves are movs instead of pushes */ + case UWOP_SAVE_NONVOL_FAR: + Code += 3; + break; + case UWOP_SAVE_XMM128: + Code += 2; + break; + case UWOP_SAVE_XMM128_FAR: + Code += 3; + break; + + default: +# if BACKTRACE_DBGLVL >= 2 + PLATFORM_BREAK(); +# endif + break; + } + } + + // "Chained" simply means that multiple RUNTIME_FUNCTIONs pertains to a + // single actual function in the .text segment. + bool bIsChained = (UnwindInfo->Flags & UNW_FLAG_CHAININFO); + + RspBias /= sizeof(void*); // stack push/popds in units of one machine word + RspBias += !bIsChained; // and one extra push for the ret address + RspBias |= FpInfo; // pack in details about possible frame pointer + + if (bIsChained) + { + OutTableCursor[-1].RspBias += RspBias; +# if BACKTRACE_DBGLVL >= 2 + OutTableCursor[-1].Size += (FunctionTable->EndAddress - FunctionTable->BeginAddress); +# endif + } + else + { + *OutTableCursor = { + FunctionTable->BeginAddress, + RspBias, +# if BACKTRACE_DBGLVL >= 2 + FunctionTable->EndAddress - FunctionTable->BeginAddress, + UnwindInfo, +# endif + }; + + ++OutTableCursor; + } + } + + uintptr_t ModuleSize = NtHeader->OptionalHeader.SizeOfImage; + ModuleSize += 0xffff; // to align up to next 64K page. it'll get shifted by AddressToId() + + FModule Module = { + AddressToId(ModuleBase), + AddressToId(ModuleSize), + uint32_t(uintptr_t(OutTableCursor - OutTable)), +# if BACKTRACE_DBGLVL >= 1 + uint16(NumFpFuncs), +# endif + OutTable, + }; + + { + zen::RwLock::ExclusiveLockScope _(Lock); + + if (ModulesNum + 1 > ModulesCapacity) + { + ModulesCapacity += 8; + Modules = (FModule*)Malloc->Realloc(Modules, sizeof(FModule) * ModulesCapacity); + } + Modules[ModulesNum++] = Module; + + std::sort(Modules, Modules + ModulesNum, [](const FModule& A, const FModule& B) { return A.Id < B.Id; }); + } + +# if BACKTRACE_DBGLVL >= 1 + NumFpTruncations += NumFpFuncs; + TotalFunctions += NumFunctions; +# endif +} + +//////////////////////////////////////////////////////////////////////////////// +void +FBacktracer::RemoveModule(uintptr_t ModuleBase) +{ + // When Windows' RequestExit() is called it hard-terminates all threads except + // the main thread and then proceeds to unload the process' DLLs. This hard + // thread termination can result is dangling locked locks. Not an issue as + // the rule is "do not do anything multithreaded in DLL load/unload". And here + // we are, taking write locks during DLL unload which is, quite unsurprisingly, + // deadlocking. In reality tracking Windows' DLL unloads doesn't tell us + // anything due to how DLLs and processes' address spaces work. So we will... +# if defined PLATFORM_WINDOWS + ZEN_UNUSED(ModuleBase); + + return; +# else + + zen::RwLock::ExclusiveLockScope _(Lock); + + uint32_t ModuleId = AddressToId(ModuleBase); + TArrayView<FModule> ModulesView(Modules, ModulesNum); + int32_t Index = Algo::LowerBound(ModulesView, ModuleId, FIdPredicate()); + if (Index >= ModulesNum) + { + return; + } + + const FModule& Module = Modules[Index]; + if (Module.Id != ModuleId) + { + return; + } + +# if BACKTRACE_DBGLVL >= 1 + NumFpTruncations -= Module.NumFpTypes; + TotalFunctions -= Module.NumFunctions; +# endif + + // no code should be executing at this point so we can safely free the + // table knowing know one is looking at it. + Malloc->Free(Module.Functions); + + for (SIZE_T i = Index; i < ModulesNum; i++) + { + Modules[i] = Modules[i + 1]; + } + + --ModulesNum; +# endif +} + +//////////////////////////////////////////////////////////////////////////////// +const FBacktracer::FFunction* +FBacktracer::LookupFunction(uintptr_t Address, FLookupState& State) const +{ + // This function caches the previous module look up. The theory here is that + // a series of return address in a backtrace often cluster around one module + + FIdPredicate IdPredicate; + + // Look up the module that Address belongs to. + uint32_t AddressId = AddressToId(Address); + if ((AddressId - State.Module.Id) >= State.Module.IdSize) + { + auto FindIt = std::upper_bound(Modules, Modules + ModulesNum, AddressId, IdPredicate); + + if (FindIt == Modules) + { + return nullptr; + } + + State.Module = *--FindIt; + } + + // Check that the address is within the address space of the best-found module + const FModule* Module = &(State.Module); + if ((AddressId - Module->Id) >= Module->IdSize) + { + return nullptr; + } + + // Now we've a module we have a table of functions and their stack sizes so + // we can get the frame size for Address + uint32_t FuncId = uint32_t(Address - IdToAddress(Module->Id)); + std::span<FFunction> FuncsView(Module->Functions, Module->NumFunctions); + auto FindIt = std::upper_bound(begin(FuncsView), end(FuncsView), FuncId, IdPredicate); + if (FindIt == begin(FuncsView)) + { + return nullptr; + } + + const FFunction* Function = &(*--FindIt); +# if BACKTRACE_DBGLVL >= 2 + if ((FuncId - Function->Id) >= Function->Size) + { + PLATFORM_BREAK(); + return nullptr; + } +# endif + return Function; +} + +//////////////////////////////////////////////////////////////////////////////// +uint32_t +FBacktracer::GetBacktraceId(void* AddressOfReturnAddress) +{ + FLookupState LookupState = {}; + uint64_t Frames[256]; + + uintptr_t* StackPointer = (uintptr_t*)AddressOfReturnAddress; + +# if BACKTRACE_DBGLVL >= 3 + uintptr_t TruthBacktrace[1024]; + uint32_t NumTruth = RtlCaptureStackBackTrace(0, 1024, (void**)TruthBacktrace, nullptr); + uintptr_t* TruthCursor = TruthBacktrace; + for (; *TruthCursor != *StackPointer; ++TruthCursor) + ; +# endif + +# if BACKTRACE_DBGLVL >= 2 + struct + { + void* Sp; + void* Ip; + const FFunction* Function; + } Backtrace[1024] = {}; + uint32_t NumBacktrace = 0; +# endif + + uint64_t BacktraceHash = 0; + uint32_t FrameIdx = 0; + +# if BACKTRACE_LOCK_FREE + // When running lock free, we defer the lock until a lock free function lookup fails + bool Locked = false; +# else + FScopeLock _(&Lock); +# endif + do + { + uintptr_t RetAddr = *StackPointer; + + Frames[FrameIdx++] = RetAddr; + + // This is a simple order-dependent LCG. Should be sufficient enough + BacktraceHash += RetAddr; + BacktraceHash *= 0x30be8efa499c249dull; + +# if BACKTRACE_LOCK_FREE + int32_t RspBias; + bool bIsAlreadyInTable; + FunctionLookups.Find(RetAddr, &RspBias, &bIsAlreadyInTable); + if (bIsAlreadyInTable) + { + if (RspBias < 0) + { + break; + } + else + { + StackPointer += RspBias; + continue; + } + } + if (!Locked) + { + Lock.AcquireExclusive(); + Locked = true; + + // If FunctionLookups.Emplace triggers a reallocation, it can cause an infinite recursion + // when the allocation reenters the stack trace code. We need to break out of the recursion + // in that case, and let the allocation complete, with the assumption that we don't care + // about call stacks for internal allocations in the memory reporting system. The "Lock()" + // above will only fall through with this flag set if it's a second lock in the same thread. + if (bReentranceCheck) + { + break; + } + } +# endif // BACKTRACE_LOCK_FREE + + const FFunction* Function = LookupFunction(RetAddr, LookupState); + if (Function == nullptr) + { +# if BACKTRACE_LOCK_FREE + // LookupFunction fails when modules are not yet registered. In this case, we do not want the address + // to be added to the lookup map, but to retry the lookup later when modules are properly registered. + if (GModulesAreInitialized) + { + bReentranceCheck = true; + auto OnExit = zen::MakeGuard([&] { bReentranceCheck = false; }); + FunctionLookups.Emplace(RetAddr, -1); + } +# endif + break; + } + +# if BACKTRACE_LOCK_FREE + { + // This conversion improves probing performance for the hash set. Additionally it is critical + // to avoid incorrect values when RspBias is compressed into 16 bits in the hash map. + int32_t StoreBias = Function->RspBias < 0 ? -1 : Function->RspBias; + bReentranceCheck = true; + auto OnExit = zen::MakeGuard([&] { bReentranceCheck = false; }); + FunctionLookups.Emplace(RetAddr, StoreBias); + } +# endif + +# if BACKTRACE_DBGLVL >= 2 + if (NumBacktrace < 1024) + { + Backtrace[NumBacktrace++] = { + StackPointer, + (void*)RetAddr, + Function, + }; + } +# endif + + if (Function->RspBias < 0) + { + // This is a frame with a variable-sized stack pointer. We don't + // track enough information to proceed. +# if BACKTRACE_DBGLVL >= 1 + NumFpTruncations++; +# endif + break; + } + + StackPointer += Function->RspBias; + } + // Trunkate callstacks longer than MaxStackDepth + while (*StackPointer && FrameIdx < ZEN_ARRAY_COUNT(Frames)); + + // Build the backtrace entry for submission + FCallstackTracer::FBacktraceEntry BacktraceEntry; + BacktraceEntry.Hash = BacktraceHash; + BacktraceEntry.FrameCount = FrameIdx; + BacktraceEntry.Frames = Frames; + +# if BACKTRACE_DBGLVL >= 3 + for (uint32_t i = 0; i < NumBacktrace; ++i) + { + if ((void*)TruthCursor[i] != Backtrace[i].Ip) + { + PLATFORM_BREAK(); + break; + } + } +# endif + +# if BACKTRACE_LOCK_FREE + if (Locked) + { + Lock.ReleaseExclusive(); + } +# endif + // Add to queue to be processed. This might block until there is room in the + // queue (i.e. the processing thread has caught up processing). + return CallstackTracer.AddCallstack(BacktraceEntry); +} +} + +# else // UE_CALLSTACK_TRACE_USE_UNWIND_TABLES + +namespace zen { + + //////////////////////////////////////////////////////////////////////////////// + class FBacktracer + { + public: + FBacktracer(FMalloc* InMalloc); + ~FBacktracer(); + static FBacktracer* Get(); + inline uint32_t GetBacktraceId(void* AddressOfReturnAddress); + uint32_t GetBacktraceId(uint64_t ReturnAddress); + void AddModule(uintptr_t Base, const char16_t* Name) {} + void RemoveModule(uintptr_t Base) {} + + private: + static FBacktracer* Instance; + FMalloc* Malloc; + FCallstackTracer CallstackTracer; + }; + + //////////////////////////////////////////////////////////////////////////////// + FBacktracer* FBacktracer::Instance = nullptr; + + //////////////////////////////////////////////////////////////////////////////// + FBacktracer::FBacktracer(FMalloc* InMalloc) : Malloc(InMalloc), CallstackTracer(InMalloc) { Instance = this; } + + //////////////////////////////////////////////////////////////////////////////// + FBacktracer::~FBacktracer() {} + + //////////////////////////////////////////////////////////////////////////////// + FBacktracer* FBacktracer::Get() { return Instance; } + + //////////////////////////////////////////////////////////////////////////////// + uint32_t FBacktracer::GetBacktraceId(void* AddressOfReturnAddress) + { + const uint64_t ReturnAddress = *(uint64_t*)AddressOfReturnAddress; + return GetBacktraceId(ReturnAddress); + } + + //////////////////////////////////////////////////////////////////////////////// + uint32_t FBacktracer::GetBacktraceId(uint64_t ReturnAddress) + { +# if !UE_BUILD_SHIPPING + uint64_t StackFrames[256]; + int32_t NumStackFrames = FPlatformStackWalk::CaptureStackBackTrace(StackFrames, UE_ARRAY_COUNT(StackFrames)); + if (NumStackFrames > 0) + { + FCallstackTracer::FBacktraceEntry BacktraceEntry; + uint64_t BacktraceId = 0; + uint32_t FrameIdx = 0; + bool bUseAddress = false; + for (int32_t Index = 0; Index < NumStackFrames; Index++) + { + if (!bUseAddress) + { + // start using backtrace only after ReturnAddress + if (StackFrames[Index] == (uint64_t)ReturnAddress) + { + bUseAddress = true; + } + } + if (bUseAddress || NumStackFrames == 1) + { + uint64_t RetAddr = StackFrames[Index]; + StackFrames[FrameIdx++] = RetAddr; + + // This is a simple order-dependent LCG. Should be sufficient enough + BacktraceId += RetAddr; + BacktraceId *= 0x30be8efa499c249dull; + } + } + + // Save the collected id + BacktraceEntry.Hash = BacktraceId; + BacktraceEntry.FrameCount = FrameIdx; + BacktraceEntry.Frames = StackFrames; + + // Add to queue to be processed. This might block until there is room in the + // queue (i.e. the processing thread has caught up processing). + return CallstackTracer.AddCallstack(BacktraceEntry); + } +# endif + + return 0; + } + +} + +# endif // UE_CALLSTACK_TRACE_USE_UNWIND_TABLES + +namespace zen { + +//////////////////////////////////////////////////////////////////////////////// +void +CallstackTrace_CreateInternal(FMalloc* Malloc) +{ + if (FBacktracer::Get() != nullptr) + { + return; + } + + // Allocate, construct and intentionally leak backtracer + void* Alloc = Malloc->Malloc(sizeof(FBacktracer), alignof(FBacktracer)); + new (Alloc) FBacktracer(Malloc); + + Modules_Create(Malloc); + Modules_Subscribe([](bool bLoad, void* Module, const char16_t* Name) { + bLoad ? FBacktracer::Get()->AddModule(uintptr_t(Module), Name) //-V522 + : FBacktracer::Get()->RemoveModule(uintptr_t(Module)); + }); +} + +//////////////////////////////////////////////////////////////////////////////// +void +CallstackTrace_InitializeInternal() +{ + Modules_Initialize(); + GModulesAreInitialized = true; +} + +//////////////////////////////////////////////////////////////////////////////// +uint32_t +CallstackTrace_GetCurrentId() +{ + if (!UE_TRACE_CHANNELEXPR_IS_ENABLED(CallstackChannel)) + { + return 0; + } + + void* StackAddress = PLATFORM_RETURN_ADDRESS_FOR_CALLSTACKTRACING(); + if (FBacktracer* Instance = FBacktracer::Get()) + { +# if PLATFORM_USE_CALLSTACK_ADDRESS_POINTER + return Instance->GetBacktraceId(StackAddress); +# else + return Instance->GetBacktraceId((uint64_t)StackAddress); +# endif + } + + return 0; +} + +} // namespace zen + +#endif |