// Copyright Epic Games, Inc. All Rights Reserved. #include "callstacktrace.h" #include #include #if UE_CALLSTACK_TRACE_ENABLED && ZEN_WITH_TRACE namespace zen { // Platform implementations of back tracing //////////////////////////////////////////////////////////////////////////////// void CallstackTrace_CreateInternal(FMalloc*); void CallstackTrace_InitializeInternal(); //////////////////////////////////////////////////////////////////////////////// UE_TRACE_CHANNEL_DEFINE(CallstackChannel) UE_TRACE_EVENT_DEFINE(Memory, CallstackSpec) uint32 GCallStackTracingTlsSlotIndex = FPlatformTLS::InvalidTlsSlot; //////////////////////////////////////////////////////////////////////////////// void CallstackTrace_Create(class FMalloc* InMalloc) { static auto InitOnce = [&] { CallstackTrace_CreateInternal(InMalloc); return true; }(); } //////////////////////////////////////////////////////////////////////////////// void CallstackTrace_Initialize() { GCallStackTracingTlsSlotIndex = FPlatformTLS::AllocTlsSlot(); static auto InitOnce = [&] { CallstackTrace_InitializeInternal(); return true; }(); } } // namespace zen #endif #if ZEN_PLATFORM_WINDOWS && ZEN_WITH_TRACE # include "moduletrace.h" # include "growonlylockfreehash.h" # include # include # include # include # include # include ZEN_THIRD_PARTY_INCLUDES_START # include # include ZEN_THIRD_PARTY_INCLUDES_END # ifndef UE_CALLSTACK_TRACE_FULL_CALLSTACKS # define UE_CALLSTACK_TRACE_FULL_CALLSTACKS 0 # endif // 0=off, 1=stats, 2=validation, 3=truth_compare # define BACKTRACE_DBGLVL 0 # define BACKTRACE_LOCK_FREE (1 && (BACKTRACE_DBGLVL == 0)) static bool GModulesAreInitialized = false; // This implementation is using unwind tables which is results in very fast // stack walking. In some cases this is not suitable, and we then fall back // to the standard stack walking implementation. # if !defined(UE_CALLSTACK_TRACE_USE_UNWIND_TABLES) # if defined(__clang__) # define UE_CALLSTACK_TRACE_USE_UNWIND_TABLES 0 # else # define UE_CALLSTACK_TRACE_USE_UNWIND_TABLES 1 # endif # endif // stacktrace tracking using clang intrinsic __builtin_frame_address(0) doesn't work correctly on all windows platforms # if !defined(PLATFORM_USE_CALLSTACK_ADDRESS_POINTER) # if defined(__clang__) # define PLATFORM_USE_CALLSTACK_ADDRESS_POINTER 0 # else # define PLATFORM_USE_CALLSTACK_ADDRESS_POINTER 1 # endif # endif # if !defined(UE_CALLSTACK_TRACE_RESERVE_MB) // Initial size of the known set of callstacks # define UE_CALLSTACK_TRACE_RESERVE_MB 8 // ~500k callstacks # endif # if !defined(UE_CALLSTACK_TRACE_RESERVE_GROWABLE) // If disabled the known set will not grow. New callstacks will not be // reported if the set is full # define UE_CALLSTACK_TRACE_RESERVE_GROWABLE 1 # endif namespace zen { class FMalloc; UE_TRACE_CHANNEL_EXTERN(CallstackChannel) UE_TRACE_EVENT_BEGIN_EXTERN(Memory, CallstackSpec, NoSync) UE_TRACE_EVENT_FIELD(uint32, CallstackId) UE_TRACE_EVENT_FIELD(uint64[], Frames) UE_TRACE_EVENT_END() class FCallstackTracer { public: struct FBacktraceEntry { uint64_t Hash = 0; uint32_t FrameCount = 0; uint64_t* Frames; }; FCallstackTracer(FMalloc* InMalloc) : KnownSet(InMalloc) {} uint32_t AddCallstack(const FBacktraceEntry& Entry) { bool bAlreadyAdded = false; // Our set implementation doesn't allow for zero entries (zero represents an empty element // in the hash table), so if we get one due to really bad luck in our 64-bit Id calculation, // treat it as a "1" instead, for purposes of tracking if we've seen that callstack. const uint64_t Hash = FMath::Max(Entry.Hash, 1ull); uint32_t Id; KnownSet.Find(Hash, &Id, &bAlreadyAdded); if (!bAlreadyAdded) { Id = CallstackIdCounter.fetch_add(1, std::memory_order_relaxed); // On the first callstack reserve memory up front if (Id == 1) { KnownSet.Reserve(InitialReserveCount); } # if !UE_CALLSTACK_TRACE_RESERVE_GROWABLE // If configured as not growable, start returning unknown id's when full. if (Id >= InitialReserveCount) { return 0; } # endif KnownSet.Emplace(Hash, Id); UE_TRACE_LOG(Memory, CallstackSpec, CallstackChannel) << CallstackSpec.CallstackId(Id) << CallstackSpec.Frames(Entry.Frames, Entry.FrameCount); } return Id; } private: struct FEncounteredCallstackSetEntry { std::atomic_uint64_t Key; std::atomic_uint32_t Value; inline uint64 GetKey() const { return Key.load(std::memory_order_acquire); } inline uint32_t GetValue() const { return Value.load(std::memory_order_relaxed); } inline bool IsEmpty() const { return Key.load(std::memory_order_acquire) == 0; } inline void SetKeyValue(uint64_t InKey, uint32_t InValue) { Value.store(InValue, std::memory_order_relaxed); Key.store(InKey, std::memory_order_release); } static inline uint32_t KeyHash(uint64_t Key) { return static_cast(Key); } static inline void ClearEntries(FEncounteredCallstackSetEntry* Entries, int32_t EntryCount) { memset(Entries, 0, EntryCount * sizeof(FEncounteredCallstackSetEntry)); } }; typedef TGrowOnlyLockFreeHash FEncounteredCallstackSet; constexpr static uint32_t InitialReserveBytes = UE_CALLSTACK_TRACE_RESERVE_MB * 1024 * 1024; constexpr static uint32_t InitialReserveCount = InitialReserveBytes / sizeof(FEncounteredCallstackSetEntry); FEncounteredCallstackSet KnownSet; std::atomic_uint32_t CallstackIdCounter{1}; // 0 is reserved for "unknown callstack" }; # if UE_CALLSTACK_TRACE_USE_UNWIND_TABLES /* * Windows' x64 binaries contain a ".pdata" section that describes the location * and size of its functions and details on how to unwind them. The unwind * information includes descriptions about a function's stack frame size and * the non-volatile registers it pushes onto the stack. From this we can * calculate where a call instruction wrote its return address. This is enough * to walk the callstack and by caching this information it can be done * efficiently. * * Some functions need a variable amount of stack (such as those that use * alloc() for example) will use a frame pointer. Frame pointers involve saving * and restoring the stack pointer in the function's prologue/epilogue. This * frees the function up to modify the stack pointer arbitrarily. This * significantly complicates establishing where a return address is, so this * pdata scheme of walking the stack just doesn't support functions like this. * Walking stops if it encounters such a function. Fortunately there are * usually very few such functions, saving us from having to read and track * non-volatile registers which adds a significant amount of work. * * A further optimisation is to to assume we are only interested methods that * are part of engine or game code. As such we only build lookup tables for * such modules and never accept OS or third party modules. Backtracing stops * if an address is encountered which doesn't map to a known module. */ //////////////////////////////////////////////////////////////////////////////// static uint32_t AddressToId(uintptr_t Address) { return uint32_t(Address >> 16); } static uintptr_t IdToAddress(uint32_t Id) { return static_cast(uintptr_t(Id) << 16); } struct FIdPredicate { template bool operator()(uint32_t Id, const T& Item) const { return Id < Item.Id; } template bool operator()(const T& Item, uint32_t Id) const { return Item.Id < Id; } }; //////////////////////////////////////////////////////////////////////////////// struct FUnwindInfo { uint8_t Version : 3; uint8_t Flags : 5; uint8_t PrologBytes; uint8_t NumUnwindCodes; uint8_t FrameReg : 4; uint8_t FrameRspBias : 4; }; # pragma warning(push) # pragma warning(disable : 4200) struct FUnwindCode { uint8_t PrologOffset; uint8_t OpCode : 4; uint8_t OpInfo : 4; uint16_t Params[]; }; # pragma warning(pop) enum { UWOP_PUSH_NONVOL = 0, // 1 node UWOP_ALLOC_LARGE = 1, // 2 or 3 nodes UWOP_ALLOC_SMALL = 2, // 1 node UWOP_SET_FPREG = 3, // 1 node UWOP_SAVE_NONVOL = 4, // 2 nodes UWOP_SAVE_NONVOL_FAR = 5, // 3 nodes UWOP_SAVE_XMM128 = 8, // 2 nodes UWOP_SAVE_XMM128_FAR = 9, // 3 nodes UWOP_PUSH_MACHFRAME = 10, // 1 node }; //////////////////////////////////////////////////////////////////////////////// class FBacktracer { public: FBacktracer(FMalloc* InMalloc); ~FBacktracer(); static FBacktracer* Get(); void AddModule(uintptr_t Base, const char16_t* Name); void RemoveModule(uintptr_t Base); uint32_t GetBacktraceId(void* AddressOfReturnAddress); private: struct FFunction { uint32_t Id; int32_t RspBias; # if BACKTRACE_DBGLVL >= 2 uint32_t Size; const FUnwindInfo* UnwindInfo; # endif }; struct FModule { uint32_t Id; uint32_t IdSize; uint32_t NumFunctions; # if BACKTRACE_DBGLVL >= 1 uint16 NumFpTypes; // uint16 *padding* # else // uint32_t *padding* # endif FFunction* Functions; }; struct FLookupState { FModule Module; }; struct FFunctionLookupSetEntry { // Bottom 48 bits are key (pointer), top 16 bits are data (RSP bias for function) std::atomic_uint64_t Data; inline uint64_t GetKey() const { return Data.load(std::memory_order_relaxed) & 0xffffffffffffull; } inline int32_t GetValue() const { return static_cast(Data.load(std::memory_order_relaxed)) >> 48; } inline bool IsEmpty() const { return Data.load(std::memory_order_relaxed) == 0; } inline void SetKeyValue(uint64_t Key, int32_t Value) { Data.store(Key | (static_cast(Value) << 48), std::memory_order_relaxed); } static inline uint32_t KeyHash(uint64_t Key) { // 64 bit pointer to 32 bit hash Key = (~Key) + (Key << 21); Key = Key ^ (Key >> 24); Key = Key * 265; Key = Key ^ (Key >> 14); Key = Key * 21; Key = Key ^ (Key >> 28); Key = Key + (Key << 31); return static_cast(Key); } static void ClearEntries(FFunctionLookupSetEntry* Entries, int32_t EntryCount) { memset(Entries, 0, EntryCount * sizeof(FFunctionLookupSetEntry)); } }; typedef TGrowOnlyLockFreeHash FFunctionLookupSet; const FFunction* LookupFunction(uintptr_t Address, FLookupState& State) const; static FBacktracer* Instance; mutable zen::RwLock Lock; FModule* Modules; int32_t ModulesNum; int32_t ModulesCapacity; FMalloc* Malloc; FCallstackTracer CallstackTracer; # if BACKTRACE_LOCK_FREE mutable FFunctionLookupSet FunctionLookups; mutable bool bReentranceCheck = false; # endif # if BACKTRACE_DBGLVL >= 1 mutable uint32_t NumFpTruncations = 0; mutable uint32_t TotalFunctions = 0; # endif }; //////////////////////////////////////////////////////////////////////////////// FBacktracer* FBacktracer::Instance = nullptr; //////////////////////////////////////////////////////////////////////////////// FBacktracer::FBacktracer(FMalloc* InMalloc) : Malloc(InMalloc) , CallstackTracer(InMalloc) # if BACKTRACE_LOCK_FREE , FunctionLookups(InMalloc) # endif { # if BACKTRACE_LOCK_FREE FunctionLookups.Reserve(512 * 1024); // 4 MB # endif ModulesCapacity = 8; ModulesNum = 0; Modules = (FModule*)Malloc->Malloc(sizeof(FModule) * ModulesCapacity); Instance = this; } //////////////////////////////////////////////////////////////////////////////// FBacktracer::~FBacktracer() { std::span ModulesView(Modules, ModulesNum); for (FModule& Module : ModulesView) { Malloc->Free(Module.Functions); } } //////////////////////////////////////////////////////////////////////////////// FBacktracer* FBacktracer::Get() { return Instance; } bool GFullBacktraces = false; //////////////////////////////////////////////////////////////////////////////// void FBacktracer::AddModule(uintptr_t ModuleBase, const char16_t* Name) { if (!GFullBacktraces) { const size_t NameLen = StringLength(Name); if (!(NameLen > 4 && StringEquals(Name + NameLen - 4, u".exe"))) { return; } } const auto* DosHeader = (IMAGE_DOS_HEADER*)ModuleBase; const auto* NtHeader = (IMAGE_NT_HEADERS*)(ModuleBase + DosHeader->e_lfanew); const IMAGE_FILE_HEADER* FileHeader = &(NtHeader->FileHeader); uint32_t NumSections = FileHeader->NumberOfSections; const auto* Sections = (IMAGE_SECTION_HEADER*)(uintptr_t(&(NtHeader->OptionalHeader)) + FileHeader->SizeOfOptionalHeader); // Find ".pdata" section uintptr_t PdataBase = 0; uintptr_t PdataEnd = 0; for (uint32_t i = 0; i < NumSections; ++i) { const IMAGE_SECTION_HEADER* Section = Sections + i; if (*(uint64_t*)(Section->Name) == 0x61'74'61'64'70'2eull) // Sections names are eight bytes and zero padded. This constant is '.pdata' { PdataBase = ModuleBase + Section->VirtualAddress; PdataEnd = PdataBase + Section->SizeOfRawData; break; } } if (PdataBase == 0) { return; } // Count the number of functions. The assumption here is that if we have got this far then there is at least one function uint32_t NumFunctions = uint32_t(PdataEnd - PdataBase) / sizeof(RUNTIME_FUNCTION); if (NumFunctions == 0) { return; } const auto* FunctionTables = (RUNTIME_FUNCTION*)PdataBase; do { const RUNTIME_FUNCTION* Function = FunctionTables + NumFunctions - 1; if (uint32_t(Function->BeginAddress) < uint32_t(Function->EndAddress)) { break; } --NumFunctions; } while (NumFunctions != 0); // Allocate some space for the module's function-to-frame-size table auto* OutTable = (FFunction*)Malloc->Malloc(sizeof(FFunction) * NumFunctions); FFunction* OutTableCursor = OutTable; // Extract frame size for each function from pdata's unwind codes. uint32_t NumFpFuncs = 0; for (uint32_t i = 0; i < NumFunctions; ++i) { const RUNTIME_FUNCTION* FunctionTable = FunctionTables + i; uintptr_t UnwindInfoAddr = ModuleBase + FunctionTable->UnwindInfoAddress; const auto* UnwindInfo = (FUnwindInfo*)UnwindInfoAddr; if (UnwindInfo->Version != 1) { /* some v2s have been seen in msvc. Always seem to be assembly * routines (memset, memcpy, etc) */ continue; } int32_t FpInfo = 0; int32_t RspBias = 0; # if BACKTRACE_DBGLVL >= 2 uint32_t PrologVerify = UnwindInfo->PrologBytes; # endif const auto* Code = (FUnwindCode*)(UnwindInfo + 1); const auto* EndCode = Code + UnwindInfo->NumUnwindCodes; while (Code < EndCode) { # if BACKTRACE_DBGLVL >= 2 if (Code->PrologOffset > PrologVerify) { PLATFORM_BREAK(); } PrologVerify = Code->PrologOffset; # endif switch (Code->OpCode) { case UWOP_PUSH_NONVOL: RspBias += 8; Code += 1; break; case UWOP_ALLOC_LARGE: if (Code->OpInfo) { RspBias += *(uint32_t*)(Code->Params); Code += 3; } else { RspBias += Code->Params[0] * 8; Code += 2; } break; case UWOP_ALLOC_SMALL: RspBias += (Code->OpInfo * 8) + 8; Code += 1; break; case UWOP_SET_FPREG: // Function will adjust RSP (e.g. through use of alloca()) so it // uses a frame pointer register. There's instructions like; // // push FRAME_REG // lea FRAME_REG, [rsp + (FRAME_RSP_BIAS * 16)] // ... // add rsp, rax // ... // sub rsp, FRAME_RSP_BIAS * 16 // pop FRAME_REG // ret // // To recover the stack frame we would need to track non-volatile // registers which adds a lot of overhead for a small subset of // functions. Instead we'll end backtraces at these functions. // MSB is set to detect variable sized frames that we can't proceed // past when back-tracing. NumFpFuncs++; FpInfo |= 0x80000000 | (uint32_t(UnwindInfo->FrameReg) << 27) | (uint32_t(UnwindInfo->FrameRspBias) << 23); Code += 1; break; case UWOP_PUSH_MACHFRAME: RspBias = Code->OpInfo ? 48 : 40; Code += 1; break; case UWOP_SAVE_NONVOL: Code += 2; break; /* saves are movs instead of pushes */ case UWOP_SAVE_NONVOL_FAR: Code += 3; break; case UWOP_SAVE_XMM128: Code += 2; break; case UWOP_SAVE_XMM128_FAR: Code += 3; break; default: # if BACKTRACE_DBGLVL >= 2 PLATFORM_BREAK(); # endif break; } } // "Chained" simply means that multiple RUNTIME_FUNCTIONs pertains to a // single actual function in the .text segment. bool bIsChained = (UnwindInfo->Flags & UNW_FLAG_CHAININFO); RspBias /= sizeof(void*); // stack push/popds in units of one machine word RspBias += !bIsChained; // and one extra push for the ret address RspBias |= FpInfo; // pack in details about possible frame pointer if (bIsChained) { OutTableCursor[-1].RspBias += RspBias; # if BACKTRACE_DBGLVL >= 2 OutTableCursor[-1].Size += (FunctionTable->EndAddress - FunctionTable->BeginAddress); # endif } else { *OutTableCursor = { FunctionTable->BeginAddress, RspBias, # if BACKTRACE_DBGLVL >= 2 FunctionTable->EndAddress - FunctionTable->BeginAddress, UnwindInfo, # endif }; ++OutTableCursor; } } uintptr_t ModuleSize = NtHeader->OptionalHeader.SizeOfImage; ModuleSize += 0xffff; // to align up to next 64K page. it'll get shifted by AddressToId() FModule Module = { AddressToId(ModuleBase), AddressToId(ModuleSize), uint32_t(uintptr_t(OutTableCursor - OutTable)), # if BACKTRACE_DBGLVL >= 1 uint16(NumFpFuncs), # endif OutTable, }; { zen::RwLock::ExclusiveLockScope _(Lock); if (ModulesNum + 1 > ModulesCapacity) { ModulesCapacity += 8; Modules = (FModule*)Malloc->Realloc(Modules, sizeof(FModule) * ModulesCapacity); } Modules[ModulesNum++] = Module; std::sort(Modules, Modules + ModulesNum, [](const FModule& A, const FModule& B) { return A.Id < B.Id; }); } # if BACKTRACE_DBGLVL >= 1 NumFpTruncations += NumFpFuncs; TotalFunctions += NumFunctions; # endif } //////////////////////////////////////////////////////////////////////////////// void FBacktracer::RemoveModule(uintptr_t ModuleBase) { // When Windows' RequestExit() is called it hard-terminates all threads except // the main thread and then proceeds to unload the process' DLLs. This hard // thread termination can result is dangling locked locks. Not an issue as // the rule is "do not do anything multithreaded in DLL load/unload". And here // we are, taking write locks during DLL unload which is, quite unsurprisingly, // deadlocking. In reality tracking Windows' DLL unloads doesn't tell us // anything due to how DLLs and processes' address spaces work. So we will... # if defined PLATFORM_WINDOWS ZEN_UNUSED(ModuleBase); return; # else zen::RwLock::ExclusiveLockScope _(Lock); uint32_t ModuleId = AddressToId(ModuleBase); TArrayView ModulesView(Modules, ModulesNum); int32_t Index = Algo::LowerBound(ModulesView, ModuleId, FIdPredicate()); if (Index >= ModulesNum) { return; } const FModule& Module = Modules[Index]; if (Module.Id != ModuleId) { return; } # if BACKTRACE_DBGLVL >= 1 NumFpTruncations -= Module.NumFpTypes; TotalFunctions -= Module.NumFunctions; # endif // no code should be executing at this point so we can safely free the // table knowing know one is looking at it. Malloc->Free(Module.Functions); for (SIZE_T i = Index; i < ModulesNum; i++) { Modules[i] = Modules[i + 1]; } --ModulesNum; # endif } //////////////////////////////////////////////////////////////////////////////// const FBacktracer::FFunction* FBacktracer::LookupFunction(uintptr_t Address, FLookupState& State) const { // This function caches the previous module look up. The theory here is that // a series of return address in a backtrace often cluster around one module FIdPredicate IdPredicate; // Look up the module that Address belongs to. uint32_t AddressId = AddressToId(Address); if ((AddressId - State.Module.Id) >= State.Module.IdSize) { auto FindIt = std::upper_bound(Modules, Modules + ModulesNum, AddressId, IdPredicate); if (FindIt == Modules) { return nullptr; } State.Module = *--FindIt; } // Check that the address is within the address space of the best-found module const FModule* Module = &(State.Module); if ((AddressId - Module->Id) >= Module->IdSize) { return nullptr; } // Now we've a module we have a table of functions and their stack sizes so // we can get the frame size for Address uint32_t FuncId = uint32_t(Address - IdToAddress(Module->Id)); std::span FuncsView(Module->Functions, Module->NumFunctions); auto FindIt = std::upper_bound(begin(FuncsView), end(FuncsView), FuncId, IdPredicate); if (FindIt == begin(FuncsView)) { return nullptr; } const FFunction* Function = &(*--FindIt); # if BACKTRACE_DBGLVL >= 2 if ((FuncId - Function->Id) >= Function->Size) { PLATFORM_BREAK(); return nullptr; } # endif return Function; } //////////////////////////////////////////////////////////////////////////////// uint32_t FBacktracer::GetBacktraceId(void* AddressOfReturnAddress) { FLookupState LookupState = {}; uint64_t Frames[256]; uintptr_t* StackPointer = (uintptr_t*)AddressOfReturnAddress; # if BACKTRACE_DBGLVL >= 3 uintptr_t TruthBacktrace[1024]; uint32_t NumTruth = RtlCaptureStackBackTrace(0, 1024, (void**)TruthBacktrace, nullptr); uintptr_t* TruthCursor = TruthBacktrace; for (; *TruthCursor != *StackPointer; ++TruthCursor) ; # endif # if BACKTRACE_DBGLVL >= 2 struct { void* Sp; void* Ip; const FFunction* Function; } Backtrace[1024] = {}; uint32_t NumBacktrace = 0; # endif uint64_t BacktraceHash = 0; uint32_t FrameIdx = 0; # if BACKTRACE_LOCK_FREE // When running lock free, we defer the lock until a lock free function lookup fails bool Locked = false; # else FScopeLock _(&Lock); # endif do { uintptr_t RetAddr = *StackPointer; Frames[FrameIdx++] = RetAddr; // This is a simple order-dependent LCG. Should be sufficient enough BacktraceHash += RetAddr; BacktraceHash *= 0x30be8efa499c249dull; # if BACKTRACE_LOCK_FREE int32_t RspBias; bool bIsAlreadyInTable; FunctionLookups.Find(RetAddr, &RspBias, &bIsAlreadyInTable); if (bIsAlreadyInTable) { if (RspBias < 0) { break; } else { StackPointer += RspBias; continue; } } if (!Locked) { Lock.AcquireExclusive(); Locked = true; // If FunctionLookups.Emplace triggers a reallocation, it can cause an infinite recursion // when the allocation reenters the stack trace code. We need to break out of the recursion // in that case, and let the allocation complete, with the assumption that we don't care // about call stacks for internal allocations in the memory reporting system. The "Lock()" // above will only fall through with this flag set if it's a second lock in the same thread. if (bReentranceCheck) { break; } } # endif // BACKTRACE_LOCK_FREE const FFunction* Function = LookupFunction(RetAddr, LookupState); if (Function == nullptr) { # if BACKTRACE_LOCK_FREE // LookupFunction fails when modules are not yet registered. In this case, we do not want the address // to be added to the lookup map, but to retry the lookup later when modules are properly registered. if (GModulesAreInitialized) { bReentranceCheck = true; auto OnExit = zen::MakeGuard([&] { bReentranceCheck = false; }); FunctionLookups.Emplace(RetAddr, -1); } # endif break; } # if BACKTRACE_LOCK_FREE { // This conversion improves probing performance for the hash set. Additionally it is critical // to avoid incorrect values when RspBias is compressed into 16 bits in the hash map. int32_t StoreBias = Function->RspBias < 0 ? -1 : Function->RspBias; bReentranceCheck = true; auto OnExit = zen::MakeGuard([&] { bReentranceCheck = false; }); FunctionLookups.Emplace(RetAddr, StoreBias); } # endif # if BACKTRACE_DBGLVL >= 2 if (NumBacktrace < 1024) { Backtrace[NumBacktrace++] = { StackPointer, (void*)RetAddr, Function, }; } # endif if (Function->RspBias < 0) { // This is a frame with a variable-sized stack pointer. We don't // track enough information to proceed. # if BACKTRACE_DBGLVL >= 1 NumFpTruncations++; # endif break; } StackPointer += Function->RspBias; } // Trunkate callstacks longer than MaxStackDepth while (*StackPointer && FrameIdx < ZEN_ARRAY_COUNT(Frames)); // Build the backtrace entry for submission FCallstackTracer::FBacktraceEntry BacktraceEntry; BacktraceEntry.Hash = BacktraceHash; BacktraceEntry.FrameCount = FrameIdx; BacktraceEntry.Frames = Frames; # if BACKTRACE_DBGLVL >= 3 for (uint32_t i = 0; i < NumBacktrace; ++i) { if ((void*)TruthCursor[i] != Backtrace[i].Ip) { PLATFORM_BREAK(); break; } } # endif # if BACKTRACE_LOCK_FREE if (Locked) { Lock.ReleaseExclusive(); } # endif // Add to queue to be processed. This might block until there is room in the // queue (i.e. the processing thread has caught up processing). return CallstackTracer.AddCallstack(BacktraceEntry); } } # else // UE_CALLSTACK_TRACE_USE_UNWIND_TABLES namespace zen { //////////////////////////////////////////////////////////////////////////////// class FBacktracer { public: FBacktracer(FMalloc* InMalloc); ~FBacktracer(); static FBacktracer* Get(); inline uint32_t GetBacktraceId(void* AddressOfReturnAddress); uint32_t GetBacktraceId(uint64_t ReturnAddress); void AddModule(uintptr_t Base, const char16_t* Name) {} void RemoveModule(uintptr_t Base) {} private: static FBacktracer* Instance; FMalloc* Malloc; FCallstackTracer CallstackTracer; }; //////////////////////////////////////////////////////////////////////////////// FBacktracer* FBacktracer::Instance = nullptr; //////////////////////////////////////////////////////////////////////////////// FBacktracer::FBacktracer(FMalloc* InMalloc) : Malloc(InMalloc), CallstackTracer(InMalloc) { Instance = this; } //////////////////////////////////////////////////////////////////////////////// FBacktracer::~FBacktracer() {} //////////////////////////////////////////////////////////////////////////////// FBacktracer* FBacktracer::Get() { return Instance; } //////////////////////////////////////////////////////////////////////////////// uint32_t FBacktracer::GetBacktraceId(void* AddressOfReturnAddress) { const uint64_t ReturnAddress = *(uint64_t*)AddressOfReturnAddress; return GetBacktraceId(ReturnAddress); } //////////////////////////////////////////////////////////////////////////////// uint32_t FBacktracer::GetBacktraceId(uint64_t ReturnAddress) { # if !UE_BUILD_SHIPPING uint64_t StackFrames[256]; int32_t NumStackFrames = FPlatformStackWalk::CaptureStackBackTrace(StackFrames, UE_ARRAY_COUNT(StackFrames)); if (NumStackFrames > 0) { FCallstackTracer::FBacktraceEntry BacktraceEntry; uint64_t BacktraceId = 0; uint32_t FrameIdx = 0; bool bUseAddress = false; for (int32_t Index = 0; Index < NumStackFrames; Index++) { if (!bUseAddress) { // start using backtrace only after ReturnAddress if (StackFrames[Index] == (uint64_t)ReturnAddress) { bUseAddress = true; } } if (bUseAddress || NumStackFrames == 1) { uint64_t RetAddr = StackFrames[Index]; StackFrames[FrameIdx++] = RetAddr; // This is a simple order-dependent LCG. Should be sufficient enough BacktraceId += RetAddr; BacktraceId *= 0x30be8efa499c249dull; } } // Save the collected id BacktraceEntry.Hash = BacktraceId; BacktraceEntry.FrameCount = FrameIdx; BacktraceEntry.Frames = StackFrames; // Add to queue to be processed. This might block until there is room in the // queue (i.e. the processing thread has caught up processing). return CallstackTracer.AddCallstack(BacktraceEntry); } # endif return 0; } } # endif // UE_CALLSTACK_TRACE_USE_UNWIND_TABLES namespace zen { //////////////////////////////////////////////////////////////////////////////// void CallstackTrace_CreateInternal(FMalloc* Malloc) { if (FBacktracer::Get() != nullptr) { return; } // Allocate, construct and intentionally leak backtracer void* Alloc = Malloc->Malloc(sizeof(FBacktracer), alignof(FBacktracer)); new (Alloc) FBacktracer(Malloc); Modules_Create(Malloc); Modules_Subscribe([](bool bLoad, void* Module, const char16_t* Name) { bLoad ? FBacktracer::Get()->AddModule(uintptr_t(Module), Name) //-V522 : FBacktracer::Get()->RemoveModule(uintptr_t(Module)); }); } //////////////////////////////////////////////////////////////////////////////// void CallstackTrace_InitializeInternal() { Modules_Initialize(); GModulesAreInitialized = true; } //////////////////////////////////////////////////////////////////////////////// uint32_t CallstackTrace_GetCurrentId() { if (!UE_TRACE_CHANNELEXPR_IS_ENABLED(CallstackChannel)) { return 0; } void* StackAddress = PLATFORM_RETURN_ADDRESS_FOR_CALLSTACKTRACING(); if (FBacktracer* Instance = FBacktracer::Get()) { # if PLATFORM_USE_CALLSTACK_ADDRESS_POINTER return Instance->GetBacktraceId(StackAddress); # else return Instance->GetBacktraceId((uint64_t)StackAddress); # endif } return 0; } } // namespace zen #endif