diff options
Diffstat (limited to 'src/zencore/memtrack')
| -rw-r--r-- | src/zencore/memtrack/callstacktrace.cpp | 1059 | ||||
| -rw-r--r-- | src/zencore/memtrack/callstacktrace.h | 151 | ||||
| -rw-r--r-- | src/zencore/memtrack/growonlylockfreehash.h | 255 | ||||
| -rw-r--r-- | src/zencore/memtrack/memorytrace.cpp | 829 | ||||
| -rw-r--r-- | src/zencore/memtrack/moduletrace.cpp | 296 | ||||
| -rw-r--r-- | src/zencore/memtrack/moduletrace.h | 11 | ||||
| -rw-r--r-- | src/zencore/memtrack/moduletrace_events.cpp | 16 | ||||
| -rw-r--r-- | src/zencore/memtrack/moduletrace_events.h | 27 | ||||
| -rw-r--r-- | src/zencore/memtrack/platformtls.h | 107 | ||||
| -rw-r--r-- | src/zencore/memtrack/tagtrace.cpp | 237 | ||||
| -rw-r--r-- | src/zencore/memtrack/tracemalloc.h | 24 | ||||
| -rw-r--r-- | src/zencore/memtrack/vatrace.cpp | 361 | ||||
| -rw-r--r-- | src/zencore/memtrack/vatrace.h | 61 |
13 files changed, 3434 insertions, 0 deletions
diff --git a/src/zencore/memtrack/callstacktrace.cpp b/src/zencore/memtrack/callstacktrace.cpp new file mode 100644 index 000000000..d860c05d1 --- /dev/null +++ b/src/zencore/memtrack/callstacktrace.cpp @@ -0,0 +1,1059 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "callstacktrace.h" + +#include <zenbase/zenbase.h> +#include <zencore/string.h> + +#if UE_CALLSTACK_TRACE_ENABLED + +namespace zen { + +// Platform implementations of back tracing +//////////////////////////////////////////////////////////////////////////////// +void CallstackTrace_CreateInternal(FMalloc*); +void CallstackTrace_InitializeInternal(); + +//////////////////////////////////////////////////////////////////////////////// +UE_TRACE_CHANNEL_DEFINE(CallstackChannel) +UE_TRACE_EVENT_DEFINE(Memory, CallstackSpec) + +uint32 GCallStackTracingTlsSlotIndex = FPlatformTLS::InvalidTlsSlot; + +//////////////////////////////////////////////////////////////////////////////// +void +CallstackTrace_Create(class FMalloc* InMalloc) +{ + static auto InitOnce = [&] { + CallstackTrace_CreateInternal(InMalloc); + return true; + }(); +} + +//////////////////////////////////////////////////////////////////////////////// +void +CallstackTrace_Initialize() +{ + GCallStackTracingTlsSlotIndex = FPlatformTLS::AllocTlsSlot(); + + static auto InitOnce = [&] { + CallstackTrace_InitializeInternal(); + return true; + }(); +} + +} // namespace zen + +#endif + +#if ZEN_PLATFORM_WINDOWS +# include "moduletrace.h" + +# include "growonlylockfreehash.h" + +# include <zencore/scopeguard.h> +# include <zencore/thread.h> +# include <zencore/trace.h> + +# include <atomic> +# include <span> + +# include <zencore/windows.h> + +ZEN_THIRD_PARTY_INCLUDES_START +# include <winnt.h> +# include <winternl.h> +ZEN_THIRD_PARTY_INCLUDES_END + +# ifndef UE_CALLSTACK_TRACE_FULL_CALLSTACKS +# define UE_CALLSTACK_TRACE_FULL_CALLSTACKS 0 +# endif + +// 0=off, 1=stats, 2=validation, 3=truth_compare +# define BACKTRACE_DBGLVL 0 + +# define BACKTRACE_LOCK_FREE (1 && (BACKTRACE_DBGLVL == 0)) + +static bool GModulesAreInitialized = false; + +// This implementation is using unwind tables which is results in very fast +// stack walking. In some cases this is not suitable, and we then fall back +// to the standard stack walking implementation. +# if !defined(UE_CALLSTACK_TRACE_USE_UNWIND_TABLES) +# if defined(__clang__) +# define UE_CALLSTACK_TRACE_USE_UNWIND_TABLES 0 +# else +# define UE_CALLSTACK_TRACE_USE_UNWIND_TABLES 1 +# endif +# endif + +// stacktrace tracking using clang intrinsic __builtin_frame_address(0) doesn't work correctly on all windows platforms +# if !defined(PLATFORM_USE_CALLSTACK_ADDRESS_POINTER) +# if defined(__clang__) +# define PLATFORM_USE_CALLSTACK_ADDRESS_POINTER 0 +# else +# define PLATFORM_USE_CALLSTACK_ADDRESS_POINTER 1 +# endif +# endif + +# if !defined(UE_CALLSTACK_TRACE_RESERVE_MB) +// Initial size of the known set of callstacks +# define UE_CALLSTACK_TRACE_RESERVE_MB 8 // ~500k callstacks +# endif + +# if !defined(UE_CALLSTACK_TRACE_RESERVE_GROWABLE) +// If disabled the known set will not grow. New callstacks will not be +// reported if the set is full +# define UE_CALLSTACK_TRACE_RESERVE_GROWABLE 1 +# endif + +namespace zen { + +class FMalloc; + +UE_TRACE_CHANNEL_EXTERN(CallstackChannel) + +UE_TRACE_EVENT_BEGIN_EXTERN(Memory, CallstackSpec, NoSync) + UE_TRACE_EVENT_FIELD(uint32, CallstackId) + UE_TRACE_EVENT_FIELD(uint64[], Frames) +UE_TRACE_EVENT_END() + +class FCallstackTracer +{ +public: + struct FBacktraceEntry + { + uint64_t Hash = 0; + uint32_t FrameCount = 0; + uint64_t* Frames; + }; + + FCallstackTracer(FMalloc* InMalloc) : KnownSet(InMalloc) {} + + uint32_t AddCallstack(const FBacktraceEntry& Entry) + { + bool bAlreadyAdded = false; + + // Our set implementation doesn't allow for zero entries (zero represents an empty element + // in the hash table), so if we get one due to really bad luck in our 64-bit Id calculation, + // treat it as a "1" instead, for purposes of tracking if we've seen that callstack. + const uint64_t Hash = FMath::Max(Entry.Hash, 1ull); + uint32_t Id; + KnownSet.Find(Hash, &Id, &bAlreadyAdded); + if (!bAlreadyAdded) + { + Id = CallstackIdCounter.fetch_add(1, std::memory_order_relaxed); + // On the first callstack reserve memory up front + if (Id == 1) + { + KnownSet.Reserve(InitialReserveCount); + } +# if !UE_CALLSTACK_TRACE_RESERVE_GROWABLE + // If configured as not growable, start returning unknown id's when full. + if (Id >= InitialReserveCount) + { + return 0; + } +# endif + KnownSet.Emplace(Hash, Id); + UE_TRACE_LOG(Memory, CallstackSpec, CallstackChannel) + << CallstackSpec.CallstackId(Id) << CallstackSpec.Frames(Entry.Frames, Entry.FrameCount); + } + + return Id; + } + +private: + struct FEncounteredCallstackSetEntry + { + std::atomic_uint64_t Key; + std::atomic_uint32_t Value; + + inline uint64 GetKey() const { return Key.load(std::memory_order_relaxed); } + inline uint32_t GetValue() const { return Value.load(std::memory_order_relaxed); } + inline bool IsEmpty() const { return Key.load(std::memory_order_relaxed) == 0; } + inline void SetKeyValue(uint64_t InKey, uint32_t InValue) + { + Value.store(InValue, std::memory_order_release); + Key.store(InKey, std::memory_order_relaxed); + } + static inline uint32_t KeyHash(uint64_t Key) { return static_cast<uint32_t>(Key); } + static inline void ClearEntries(FEncounteredCallstackSetEntry* Entries, int32_t EntryCount) + { + memset(Entries, 0, EntryCount * sizeof(FEncounteredCallstackSetEntry)); + } + }; + + typedef TGrowOnlyLockFreeHash<FEncounteredCallstackSetEntry, uint64_t, uint32_t> FEncounteredCallstackSet; + + constexpr static uint32_t InitialReserveBytes = UE_CALLSTACK_TRACE_RESERVE_MB * 1024 * 1024; + constexpr static uint32_t InitialReserveCount = InitialReserveBytes / sizeof(FEncounteredCallstackSetEntry); + + FEncounteredCallstackSet KnownSet; + std::atomic_uint32_t CallstackIdCounter{1}; // 0 is reserved for "unknown callstack" +}; + +# if UE_CALLSTACK_TRACE_USE_UNWIND_TABLES + +/* + * Windows' x64 binaries contain a ".pdata" section that describes the location + * and size of its functions and details on how to unwind them. The unwind + * information includes descriptions about a function's stack frame size and + * the non-volatile registers it pushes onto the stack. From this we can + * calculate where a call instruction wrote its return address. This is enough + * to walk the callstack and by caching this information it can be done + * efficiently. + * + * Some functions need a variable amount of stack (such as those that use + * alloc() for example) will use a frame pointer. Frame pointers involve saving + * and restoring the stack pointer in the function's prologue/epilogue. This + * frees the function up to modify the stack pointer arbitrarily. This + * significantly complicates establishing where a return address is, so this + * pdata scheme of walking the stack just doesn't support functions like this. + * Walking stops if it encounters such a function. Fortunately there are + * usually very few such functions, saving us from having to read and track + * non-volatile registers which adds a significant amount of work. + * + * A further optimisation is to to assume we are only interested methods that + * are part of engine or game code. As such we only build lookup tables for + * such modules and never accept OS or third party modules. Backtracing stops + * if an address is encountered which doesn't map to a known module. + */ + +//////////////////////////////////////////////////////////////////////////////// +static uint32_t +AddressToId(uintptr_t Address) +{ + return uint32_t(Address >> 16); +} + +static uintptr_t +IdToAddress(uint32_t Id) +{ + return static_cast<uint32_t>(uintptr_t(Id) << 16); +} + +struct FIdPredicate +{ + template<class T> + bool operator()(uint32_t Id, const T& Item) const + { + return Id < Item.Id; + } + template<class T> + bool operator()(const T& Item, uint32_t Id) const + { + return Item.Id < Id; + } +}; + +//////////////////////////////////////////////////////////////////////////////// +struct FUnwindInfo +{ + uint8_t Version : 3; + uint8_t Flags : 5; + uint8_t PrologBytes; + uint8_t NumUnwindCodes; + uint8_t FrameReg : 4; + uint8_t FrameRspBias : 4; +}; + +# pragma warning(push) +# pragma warning(disable : 4200) +struct FUnwindCode +{ + uint8_t PrologOffset; + uint8_t OpCode : 4; + uint8_t OpInfo : 4; + uint16_t Params[]; +}; +# pragma warning(pop) + +enum +{ + UWOP_PUSH_NONVOL = 0, // 1 node + UWOP_ALLOC_LARGE = 1, // 2 or 3 nodes + UWOP_ALLOC_SMALL = 2, // 1 node + UWOP_SET_FPREG = 3, // 1 node + UWOP_SAVE_NONVOL = 4, // 2 nodes + UWOP_SAVE_NONVOL_FAR = 5, // 3 nodes + UWOP_SAVE_XMM128 = 8, // 2 nodes + UWOP_SAVE_XMM128_FAR = 9, // 3 nodes + UWOP_PUSH_MACHFRAME = 10, // 1 node +}; + +//////////////////////////////////////////////////////////////////////////////// +class FBacktracer +{ +public: + FBacktracer(FMalloc* InMalloc); + ~FBacktracer(); + static FBacktracer* Get(); + void AddModule(uintptr_t Base, const char16_t* Name); + void RemoveModule(uintptr_t Base); + uint32_t GetBacktraceId(void* AddressOfReturnAddress); + +private: + struct FFunction + { + uint32_t Id; + int32_t RspBias; +# if BACKTRACE_DBGLVL >= 2 + uint32_t Size; + const FUnwindInfo* UnwindInfo; +# endif + }; + + struct FModule + { + uint32_t Id; + uint32_t IdSize; + uint32_t NumFunctions; +# if BACKTRACE_DBGLVL >= 1 + uint16 NumFpTypes; + // uint16 *padding* +# else + // uint32_t *padding* +# endif + FFunction* Functions; + }; + + struct FLookupState + { + FModule Module; + }; + + struct FFunctionLookupSetEntry + { + // Bottom 48 bits are key (pointer), top 16 bits are data (RSP bias for function) + std::atomic_uint64_t Data; + + inline uint64_t GetKey() const { return Data.load(std::memory_order_relaxed) & 0xffffffffffffull; } + inline int32_t GetValue() const { return static_cast<int64_t>(Data.load(std::memory_order_relaxed)) >> 48; } + inline bool IsEmpty() const { return Data.load(std::memory_order_relaxed) == 0; } + inline void SetKeyValue(uint64_t Key, int32_t Value) + { + Data.store(Key | (static_cast<int64_t>(Value) << 48), std::memory_order_relaxed); + } + static inline uint32_t KeyHash(uint64_t Key) + { + // 64 bit pointer to 32 bit hash + Key = (~Key) + (Key << 21); + Key = Key ^ (Key >> 24); + Key = Key * 265; + Key = Key ^ (Key >> 14); + Key = Key * 21; + Key = Key ^ (Key >> 28); + Key = Key + (Key << 31); + return static_cast<uint32_t>(Key); + } + static void ClearEntries(FFunctionLookupSetEntry* Entries, int32_t EntryCount) + { + memset(Entries, 0, EntryCount * sizeof(FFunctionLookupSetEntry)); + } + }; + typedef TGrowOnlyLockFreeHash<FFunctionLookupSetEntry, uint64_t, int32_t> FFunctionLookupSet; + + const FFunction* LookupFunction(uintptr_t Address, FLookupState& State) const; + static FBacktracer* Instance; + mutable zen::RwLock Lock; + FModule* Modules; + int32_t ModulesNum; + int32_t ModulesCapacity; + FMalloc* Malloc; + FCallstackTracer CallstackTracer; +# if BACKTRACE_LOCK_FREE + mutable FFunctionLookupSet FunctionLookups; + mutable bool bReentranceCheck = false; +# endif +# if BACKTRACE_DBGLVL >= 1 + mutable uint32_t NumFpTruncations = 0; + mutable uint32_t TotalFunctions = 0; +# endif +}; + +//////////////////////////////////////////////////////////////////////////////// +FBacktracer* FBacktracer::Instance = nullptr; + +//////////////////////////////////////////////////////////////////////////////// +FBacktracer::FBacktracer(FMalloc* InMalloc) +: Malloc(InMalloc) +, CallstackTracer(InMalloc) +# if BACKTRACE_LOCK_FREE +, FunctionLookups(InMalloc) +# endif +{ +# if BACKTRACE_LOCK_FREE + FunctionLookups.Reserve(512 * 1024); // 4 MB +# endif + ModulesCapacity = 8; + ModulesNum = 0; + Modules = (FModule*)Malloc->Malloc(sizeof(FModule) * ModulesCapacity); + + Instance = this; +} + +//////////////////////////////////////////////////////////////////////////////// +FBacktracer::~FBacktracer() +{ + std::span<FModule> ModulesView(Modules, ModulesNum); + for (FModule& Module : ModulesView) + { + Malloc->Free(Module.Functions); + } +} + +//////////////////////////////////////////////////////////////////////////////// +FBacktracer* +FBacktracer::Get() +{ + return Instance; +} + +bool GFullBacktraces = false; + +//////////////////////////////////////////////////////////////////////////////// +void +FBacktracer::AddModule(uintptr_t ModuleBase, const char16_t* Name) +{ + if (!GFullBacktraces) + { + const size_t NameLen = StringLength(Name); + if (!(NameLen > 4 && StringEquals(Name + NameLen - 4, u".exe"))) + { + return; + } + } + + const auto* DosHeader = (IMAGE_DOS_HEADER*)ModuleBase; + const auto* NtHeader = (IMAGE_NT_HEADERS*)(ModuleBase + DosHeader->e_lfanew); + const IMAGE_FILE_HEADER* FileHeader = &(NtHeader->FileHeader); + + uint32_t NumSections = FileHeader->NumberOfSections; + const auto* Sections = (IMAGE_SECTION_HEADER*)(uintptr_t(&(NtHeader->OptionalHeader)) + FileHeader->SizeOfOptionalHeader); + + // Find ".pdata" section + uintptr_t PdataBase = 0; + uintptr_t PdataEnd = 0; + for (uint32_t i = 0; i < NumSections; ++i) + { + const IMAGE_SECTION_HEADER* Section = Sections + i; + if (*(uint64_t*)(Section->Name) == + 0x61'74'61'64'70'2eull) // Sections names are eight bytes and zero padded. This constant is '.pdata' + { + PdataBase = ModuleBase + Section->VirtualAddress; + PdataEnd = PdataBase + Section->SizeOfRawData; + break; + } + } + + if (PdataBase == 0) + { + return; + } + + // Count the number of functions. The assumption here is that if we have got this far then there is at least one function + uint32_t NumFunctions = uint32_t(PdataEnd - PdataBase) / sizeof(RUNTIME_FUNCTION); + if (NumFunctions == 0) + { + return; + } + + const auto* FunctionTables = (RUNTIME_FUNCTION*)PdataBase; + do + { + const RUNTIME_FUNCTION* Function = FunctionTables + NumFunctions - 1; + if (uint32_t(Function->BeginAddress) < uint32_t(Function->EndAddress)) + { + break; + } + + --NumFunctions; + } while (NumFunctions != 0); + + // Allocate some space for the module's function-to-frame-size table + auto* OutTable = (FFunction*)Malloc->Malloc(sizeof(FFunction) * NumFunctions); + FFunction* OutTableCursor = OutTable; + + // Extract frame size for each function from pdata's unwind codes. + uint32_t NumFpFuncs = 0; + for (uint32_t i = 0; i < NumFunctions; ++i) + { + const RUNTIME_FUNCTION* FunctionTable = FunctionTables + i; + + uintptr_t UnwindInfoAddr = ModuleBase + FunctionTable->UnwindInfoAddress; + const auto* UnwindInfo = (FUnwindInfo*)UnwindInfoAddr; + + if (UnwindInfo->Version != 1) + { + /* some v2s have been seen in msvc. Always seem to be assembly + * routines (memset, memcpy, etc) */ + continue; + } + + int32_t FpInfo = 0; + int32_t RspBias = 0; + +# if BACKTRACE_DBGLVL >= 2 + uint32_t PrologVerify = UnwindInfo->PrologBytes; +# endif + + const auto* Code = (FUnwindCode*)(UnwindInfo + 1); + const auto* EndCode = Code + UnwindInfo->NumUnwindCodes; + while (Code < EndCode) + { +# if BACKTRACE_DBGLVL >= 2 + if (Code->PrologOffset > PrologVerify) + { + PLATFORM_BREAK(); + } + PrologVerify = Code->PrologOffset; +# endif + + switch (Code->OpCode) + { + case UWOP_PUSH_NONVOL: + RspBias += 8; + Code += 1; + break; + + case UWOP_ALLOC_LARGE: + if (Code->OpInfo) + { + RspBias += *(uint32_t*)(Code->Params); + Code += 3; + } + else + { + RspBias += Code->Params[0] * 8; + Code += 2; + } + break; + + case UWOP_ALLOC_SMALL: + RspBias += (Code->OpInfo * 8) + 8; + Code += 1; + break; + + case UWOP_SET_FPREG: + // Function will adjust RSP (e.g. through use of alloca()) so it + // uses a frame pointer register. There's instructions like; + // + // push FRAME_REG + // lea FRAME_REG, [rsp + (FRAME_RSP_BIAS * 16)] + // ... + // add rsp, rax + // ... + // sub rsp, FRAME_RSP_BIAS * 16 + // pop FRAME_REG + // ret + // + // To recover the stack frame we would need to track non-volatile + // registers which adds a lot of overhead for a small subset of + // functions. Instead we'll end backtraces at these functions. + + // MSB is set to detect variable sized frames that we can't proceed + // past when back-tracing. + NumFpFuncs++; + FpInfo |= 0x80000000 | (uint32_t(UnwindInfo->FrameReg) << 27) | (uint32_t(UnwindInfo->FrameRspBias) << 23); + Code += 1; + break; + + case UWOP_PUSH_MACHFRAME: + RspBias = Code->OpInfo ? 48 : 40; + Code += 1; + break; + + case UWOP_SAVE_NONVOL: + Code += 2; + break; /* saves are movs instead of pushes */ + case UWOP_SAVE_NONVOL_FAR: + Code += 3; + break; + case UWOP_SAVE_XMM128: + Code += 2; + break; + case UWOP_SAVE_XMM128_FAR: + Code += 3; + break; + + default: +# if BACKTRACE_DBGLVL >= 2 + PLATFORM_BREAK(); +# endif + break; + } + } + + // "Chained" simply means that multiple RUNTIME_FUNCTIONs pertains to a + // single actual function in the .text segment. + bool bIsChained = (UnwindInfo->Flags & UNW_FLAG_CHAININFO); + + RspBias /= sizeof(void*); // stack push/popds in units of one machine word + RspBias += !bIsChained; // and one extra push for the ret address + RspBias |= FpInfo; // pack in details about possible frame pointer + + if (bIsChained) + { + OutTableCursor[-1].RspBias += RspBias; +# if BACKTRACE_DBGLVL >= 2 + OutTableCursor[-1].Size += (FunctionTable->EndAddress - FunctionTable->BeginAddress); +# endif + } + else + { + *OutTableCursor = { + FunctionTable->BeginAddress, + RspBias, +# if BACKTRACE_DBGLVL >= 2 + FunctionTable->EndAddress - FunctionTable->BeginAddress, + UnwindInfo, +# endif + }; + + ++OutTableCursor; + } + } + + uintptr_t ModuleSize = NtHeader->OptionalHeader.SizeOfImage; + ModuleSize += 0xffff; // to align up to next 64K page. it'll get shifted by AddressToId() + + FModule Module = { + AddressToId(ModuleBase), + AddressToId(ModuleSize), + uint32_t(uintptr_t(OutTableCursor - OutTable)), +# if BACKTRACE_DBGLVL >= 1 + uint16(NumFpFuncs), +# endif + OutTable, + }; + + { + zen::RwLock::ExclusiveLockScope _(Lock); + + if (ModulesNum + 1 > ModulesCapacity) + { + ModulesCapacity += 8; + Modules = (FModule*)Malloc->Realloc(Modules, sizeof(FModule) * ModulesCapacity); + } + Modules[ModulesNum++] = Module; + + std::sort(Modules, Modules + ModulesNum, [](const FModule& A, const FModule& B) { return A.Id < B.Id; }); + } + +# if BACKTRACE_DBGLVL >= 1 + NumFpTruncations += NumFpFuncs; + TotalFunctions += NumFunctions; +# endif +} + +//////////////////////////////////////////////////////////////////////////////// +void +FBacktracer::RemoveModule(uintptr_t ModuleBase) +{ + // When Windows' RequestExit() is called it hard-terminates all threads except + // the main thread and then proceeds to unload the process' DLLs. This hard + // thread termination can result is dangling locked locks. Not an issue as + // the rule is "do not do anything multithreaded in DLL load/unload". And here + // we are, taking write locks during DLL unload which is, quite unsurprisingly, + // deadlocking. In reality tracking Windows' DLL unloads doesn't tell us + // anything due to how DLLs and processes' address spaces work. So we will... +# if defined PLATFORM_WINDOWS + ZEN_UNUSED(ModuleBase); + + return; +# else + + zen::RwLock::ExclusiveLockScope _(Lock); + + uint32_t ModuleId = AddressToId(ModuleBase); + TArrayView<FModule> ModulesView(Modules, ModulesNum); + int32_t Index = Algo::LowerBound(ModulesView, ModuleId, FIdPredicate()); + if (Index >= ModulesNum) + { + return; + } + + const FModule& Module = Modules[Index]; + if (Module.Id != ModuleId) + { + return; + } + +# if BACKTRACE_DBGLVL >= 1 + NumFpTruncations -= Module.NumFpTypes; + TotalFunctions -= Module.NumFunctions; +# endif + + // no code should be executing at this point so we can safely free the + // table knowing know one is looking at it. + Malloc->Free(Module.Functions); + + for (SIZE_T i = Index; i < ModulesNum; i++) + { + Modules[i] = Modules[i + 1]; + } + + --ModulesNum; +# endif +} + +//////////////////////////////////////////////////////////////////////////////// +const FBacktracer::FFunction* +FBacktracer::LookupFunction(uintptr_t Address, FLookupState& State) const +{ + // This function caches the previous module look up. The theory here is that + // a series of return address in a backtrace often cluster around one module + + FIdPredicate IdPredicate; + + // Look up the module that Address belongs to. + uint32_t AddressId = AddressToId(Address); + if ((AddressId - State.Module.Id) >= State.Module.IdSize) + { + auto FindIt = std::upper_bound(Modules, Modules + ModulesNum, AddressId, IdPredicate); + + if (FindIt == Modules) + { + return nullptr; + } + + State.Module = *--FindIt; + } + + // Check that the address is within the address space of the best-found module + const FModule* Module = &(State.Module); + if ((AddressId - Module->Id) >= Module->IdSize) + { + return nullptr; + } + + // Now we've a module we have a table of functions and their stack sizes so + // we can get the frame size for Address + uint32_t FuncId = uint32_t(Address - IdToAddress(Module->Id)); + std::span<FFunction> FuncsView(Module->Functions, Module->NumFunctions); + auto FindIt = std::upper_bound(begin(FuncsView), end(FuncsView), FuncId, IdPredicate); + if (FindIt == begin(FuncsView)) + { + return nullptr; + } + + const FFunction* Function = &(*--FindIt); +# if BACKTRACE_DBGLVL >= 2 + if ((FuncId - Function->Id) >= Function->Size) + { + PLATFORM_BREAK(); + return nullptr; + } +# endif + return Function; +} + +//////////////////////////////////////////////////////////////////////////////// +uint32_t +FBacktracer::GetBacktraceId(void* AddressOfReturnAddress) +{ + FLookupState LookupState = {}; + uint64_t Frames[256]; + + uintptr_t* StackPointer = (uintptr_t*)AddressOfReturnAddress; + +# if BACKTRACE_DBGLVL >= 3 + uintptr_t TruthBacktrace[1024]; + uint32_t NumTruth = RtlCaptureStackBackTrace(0, 1024, (void**)TruthBacktrace, nullptr); + uintptr_t* TruthCursor = TruthBacktrace; + for (; *TruthCursor != *StackPointer; ++TruthCursor) + ; +# endif + +# if BACKTRACE_DBGLVL >= 2 + struct + { + void* Sp; + void* Ip; + const FFunction* Function; + } Backtrace[1024] = {}; + uint32_t NumBacktrace = 0; +# endif + + uint64_t BacktraceHash = 0; + uint32_t FrameIdx = 0; + +# if BACKTRACE_LOCK_FREE + // When running lock free, we defer the lock until a lock free function lookup fails + bool Locked = false; +# else + FScopeLock _(&Lock); +# endif + do + { + uintptr_t RetAddr = *StackPointer; + + Frames[FrameIdx++] = RetAddr; + + // This is a simple order-dependent LCG. Should be sufficient enough + BacktraceHash += RetAddr; + BacktraceHash *= 0x30be8efa499c249dull; + +# if BACKTRACE_LOCK_FREE + int32_t RspBias; + bool bIsAlreadyInTable; + FunctionLookups.Find(RetAddr, &RspBias, &bIsAlreadyInTable); + if (bIsAlreadyInTable) + { + if (RspBias < 0) + { + break; + } + else + { + StackPointer += RspBias; + continue; + } + } + if (!Locked) + { + Lock.AcquireExclusive(); + Locked = true; + + // If FunctionLookups.Emplace triggers a reallocation, it can cause an infinite recursion + // when the allocation reenters the stack trace code. We need to break out of the recursion + // in that case, and let the allocation complete, with the assumption that we don't care + // about call stacks for internal allocations in the memory reporting system. The "Lock()" + // above will only fall through with this flag set if it's a second lock in the same thread. + if (bReentranceCheck) + { + break; + } + } +# endif // BACKTRACE_LOCK_FREE + + const FFunction* Function = LookupFunction(RetAddr, LookupState); + if (Function == nullptr) + { +# if BACKTRACE_LOCK_FREE + // LookupFunction fails when modules are not yet registered. In this case, we do not want the address + // to be added to the lookup map, but to retry the lookup later when modules are properly registered. + if (GModulesAreInitialized) + { + bReentranceCheck = true; + auto OnExit = zen::MakeGuard([&] { bReentranceCheck = false; }); + FunctionLookups.Emplace(RetAddr, -1); + } +# endif + break; + } + +# if BACKTRACE_LOCK_FREE + { + // This conversion improves probing performance for the hash set. Additionally it is critical + // to avoid incorrect values when RspBias is compressed into 16 bits in the hash map. + int32_t StoreBias = Function->RspBias < 0 ? -1 : Function->RspBias; + bReentranceCheck = true; + auto OnExit = zen::MakeGuard([&] { bReentranceCheck = false; }); + FunctionLookups.Emplace(RetAddr, StoreBias); + } +# endif + +# if BACKTRACE_DBGLVL >= 2 + if (NumBacktrace < 1024) + { + Backtrace[NumBacktrace++] = { + StackPointer, + (void*)RetAddr, + Function, + }; + } +# endif + + if (Function->RspBias < 0) + { + // This is a frame with a variable-sized stack pointer. We don't + // track enough information to proceed. +# if BACKTRACE_DBGLVL >= 1 + NumFpTruncations++; +# endif + break; + } + + StackPointer += Function->RspBias; + } + // Trunkate callstacks longer than MaxStackDepth + while (*StackPointer && FrameIdx < ZEN_ARRAY_COUNT(Frames)); + + // Build the backtrace entry for submission + FCallstackTracer::FBacktraceEntry BacktraceEntry; + BacktraceEntry.Hash = BacktraceHash; + BacktraceEntry.FrameCount = FrameIdx; + BacktraceEntry.Frames = Frames; + +# if BACKTRACE_DBGLVL >= 3 + for (uint32_t i = 0; i < NumBacktrace; ++i) + { + if ((void*)TruthCursor[i] != Backtrace[i].Ip) + { + PLATFORM_BREAK(); + break; + } + } +# endif + +# if BACKTRACE_LOCK_FREE + if (Locked) + { + Lock.ReleaseExclusive(); + } +# endif + // Add to queue to be processed. This might block until there is room in the + // queue (i.e. the processing thread has caught up processing). + return CallstackTracer.AddCallstack(BacktraceEntry); +} +} + +# else // UE_CALLSTACK_TRACE_USE_UNWIND_TABLES + +namespace zen { + + //////////////////////////////////////////////////////////////////////////////// + class FBacktracer + { + public: + FBacktracer(FMalloc* InMalloc); + ~FBacktracer(); + static FBacktracer* Get(); + inline uint32_t GetBacktraceId(void* AddressOfReturnAddress); + uint32_t GetBacktraceId(uint64_t ReturnAddress); + void AddModule(uintptr_t Base, const char16_t* Name) {} + void RemoveModule(uintptr_t Base) {} + + private: + static FBacktracer* Instance; + FMalloc* Malloc; + FCallstackTracer CallstackTracer; + }; + + //////////////////////////////////////////////////////////////////////////////// + FBacktracer* FBacktracer::Instance = nullptr; + + //////////////////////////////////////////////////////////////////////////////// + FBacktracer::FBacktracer(FMalloc* InMalloc) : Malloc(InMalloc), CallstackTracer(InMalloc) { Instance = this; } + + //////////////////////////////////////////////////////////////////////////////// + FBacktracer::~FBacktracer() {} + + //////////////////////////////////////////////////////////////////////////////// + FBacktracer* FBacktracer::Get() { return Instance; } + + //////////////////////////////////////////////////////////////////////////////// + uint32_t FBacktracer::GetBacktraceId(void* AddressOfReturnAddress) + { + const uint64_t ReturnAddress = *(uint64_t*)AddressOfReturnAddress; + return GetBacktraceId(ReturnAddress); + } + + //////////////////////////////////////////////////////////////////////////////// + uint32_t FBacktracer::GetBacktraceId(uint64_t ReturnAddress) + { +# if !UE_BUILD_SHIPPING + uint64_t StackFrames[256]; + int32_t NumStackFrames = FPlatformStackWalk::CaptureStackBackTrace(StackFrames, UE_ARRAY_COUNT(StackFrames)); + if (NumStackFrames > 0) + { + FCallstackTracer::FBacktraceEntry BacktraceEntry; + uint64_t BacktraceId = 0; + uint32_t FrameIdx = 0; + bool bUseAddress = false; + for (int32_t Index = 0; Index < NumStackFrames; Index++) + { + if (!bUseAddress) + { + // start using backtrace only after ReturnAddress + if (StackFrames[Index] == (uint64_t)ReturnAddress) + { + bUseAddress = true; + } + } + if (bUseAddress || NumStackFrames == 1) + { + uint64_t RetAddr = StackFrames[Index]; + StackFrames[FrameIdx++] = RetAddr; + + // This is a simple order-dependent LCG. Should be sufficient enough + BacktraceId += RetAddr; + BacktraceId *= 0x30be8efa499c249dull; + } + } + + // Save the collected id + BacktraceEntry.Hash = BacktraceId; + BacktraceEntry.FrameCount = FrameIdx; + BacktraceEntry.Frames = StackFrames; + + // Add to queue to be processed. This might block until there is room in the + // queue (i.e. the processing thread has caught up processing). + return CallstackTracer.AddCallstack(BacktraceEntry); + } +# endif + + return 0; + } + +} + +# endif // UE_CALLSTACK_TRACE_USE_UNWIND_TABLES + +namespace zen { + +//////////////////////////////////////////////////////////////////////////////// +void +CallstackTrace_CreateInternal(FMalloc* Malloc) +{ + if (FBacktracer::Get() != nullptr) + { + return; + } + + // Allocate, construct and intentionally leak backtracer + void* Alloc = Malloc->Malloc(sizeof(FBacktracer), alignof(FBacktracer)); + new (Alloc) FBacktracer(Malloc); + + Modules_Create(Malloc); + Modules_Subscribe([](bool bLoad, void* Module, const char16_t* Name) { + bLoad ? FBacktracer::Get()->AddModule(uintptr_t(Module), Name) //-V522 + : FBacktracer::Get()->RemoveModule(uintptr_t(Module)); + }); +} + +//////////////////////////////////////////////////////////////////////////////// +void +CallstackTrace_InitializeInternal() +{ + Modules_Initialize(); + GModulesAreInitialized = true; +} + +//////////////////////////////////////////////////////////////////////////////// +uint32_t +CallstackTrace_GetCurrentId() +{ + if (!UE_TRACE_CHANNELEXPR_IS_ENABLED(CallstackChannel)) + { + return 0; + } + + void* StackAddress = PLATFORM_RETURN_ADDRESS_FOR_CALLSTACKTRACING(); + if (FBacktracer* Instance = FBacktracer::Get()) + { +# if PLATFORM_USE_CALLSTACK_ADDRESS_POINTER + return Instance->GetBacktraceId(StackAddress); +# else + return Instance->GetBacktraceId((uint64_t)StackAddress); +# endif + } + + return 0; +} + +} // namespace zen + +#endif diff --git a/src/zencore/memtrack/callstacktrace.h b/src/zencore/memtrack/callstacktrace.h new file mode 100644 index 000000000..3e191490b --- /dev/null +++ b/src/zencore/memtrack/callstacktrace.h @@ -0,0 +1,151 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/trace.h> + +#if ZEN_PLATFORM_WINDOWS +# include <intrin.h> + +# define PLATFORM_RETURN_ADDRESS() _ReturnAddress() +# define PLATFORM_RETURN_ADDRESS_POINTER() _AddressOfReturnAddress() +# define PLATFORM_RETURN_ADDRESS_FOR_CALLSTACKTRACING PLATFORM_RETURN_ADDRESS_POINTER +#endif + +//////////////////////////////////////////////////////////////////////////////// +#if !defined(UE_CALLSTACK_TRACE_ENABLED) +# if UE_TRACE_ENABLED +# if ZEN_PLATFORM_WINDOWS +# define UE_CALLSTACK_TRACE_ENABLED 1 +# endif +# endif +#endif + +#if !defined(UE_CALLSTACK_TRACE_ENABLED) +# define UE_CALLSTACK_TRACE_ENABLED 0 +#endif + +//////////////////////////////////////////////////////////////////////////////// +#if UE_CALLSTACK_TRACE_ENABLED + +# include "platformtls.h" + +namespace zen { + +/** + * Creates callstack tracing. + * @param Malloc Allocator instance to use. + */ +void CallstackTrace_Create(class FMalloc* Malloc); + +/** + * Initializes callstack tracing. On some platforms this has to be delayed due to initialization order. + */ +void CallstackTrace_Initialize(); + +/** + * Capture the current callstack, and trace the definition if it has not already been encountered. The returned value + * can be used in trace events and be resolved in analysis. + * @return Unique id identifying the current callstack. + */ +uint32_t CallstackTrace_GetCurrentId(); + +/** + * Callstack Trace Scoped Macro to avoid resolving the full callstack + * can be used when some external libraries are not compiled with frame pointers + * preventing us to resolve it without crashing. Instead the callstack will be + * only the caller address. + */ +# define CALLSTACK_TRACE_LIMIT_CALLSTACKRESOLVE_SCOPE() FCallStackTraceLimitResolveScope PREPROCESSOR_JOIN(FCTLMScope, __LINE__) + +extern uint32_t GCallStackTracingTlsSlotIndex; + +/** + * @return the fallback callstack address + */ +inline void* +CallstackTrace_GetFallbackPlatformReturnAddressData() +{ + if (FPlatformTLS::IsValidTlsSlot(GCallStackTracingTlsSlotIndex)) + return FPlatformTLS::GetTlsValue(GCallStackTracingTlsSlotIndex); + else + return nullptr; +} + +/** + * @return Needs full callstack resolve + */ +inline bool +CallstackTrace_ResolveFullCallStack() +{ + return CallstackTrace_GetFallbackPlatformReturnAddressData() == nullptr; +} + +/* + * Callstack Trace scope for override CallStack + */ +class FCallStackTraceLimitResolveScope +{ +public: + ZEN_FORCENOINLINE FCallStackTraceLimitResolveScope() + { + if (FPlatformTLS::IsValidTlsSlot(GCallStackTracingTlsSlotIndex)) + { + FPlatformTLS::SetTlsValue(GCallStackTracingTlsSlotIndex, PLATFORM_RETURN_ADDRESS_FOR_CALLSTACKTRACING()); + } + } + + ZEN_FORCENOINLINE ~FCallStackTraceLimitResolveScope() + { + if (FPlatformTLS::IsValidTlsSlot(GCallStackTracingTlsSlotIndex)) + { + FPlatformTLS::SetTlsValue(GCallStackTracingTlsSlotIndex, nullptr); + } + } +}; + +} // namespace zen + +#else // UE_CALLSTACK_TRACE_ENABLED + +namespace zen { + +inline void +CallstackTrace_Create(class FMalloc* /*Malloc*/) +{ +} + +inline void +CallstackTrace_Initialize() +{ +} + +inline uint32_t +CallstackTrace_GetCurrentId() +{ + return 0; +} + +inline void* +CallstackTrace_GetCurrentReturnAddressData() +{ + return nullptr; +} + +inline void* +CallstackTrace_GetFallbackPlatformReturnAddressData() +{ + return nullptr; +} + +inline bool +CallstackTrace_ResolveFullCallStack() +{ + return true; +} + +# define CALLSTACK_TRACE_LIMIT_CALLSTACKRESOLVE_SCOPE() + +} // namespace zen + +#endif // UE_CALLSTACK_TRACE_ENABLED diff --git a/src/zencore/memtrack/growonlylockfreehash.h b/src/zencore/memtrack/growonlylockfreehash.h new file mode 100644 index 000000000..d6ff4fc32 --- /dev/null +++ b/src/zencore/memtrack/growonlylockfreehash.h @@ -0,0 +1,255 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zenbase/zenbase.h> +#include <zencore/intmath.h> +#include <zencore/thread.h> + +#include <zencore/memory/fmalloc.h> + +#include <atomic> + +namespace zen { + +// Hash table with fast lock free reads, that only supports insertion of items, and no modification of +// values. KeyType must be an integer. EntryType should be a POD with an identifiable "empty" state +// that can't occur in the table, and include the following member functions: +// +// KeyType GetKey() const; // Get the key from EntryType +// ValueType GetValue() const; // Get the value from EntryType +// bool IsEmpty() const; // Query whether EntryType is empty +// void SetKeyValue(KeyType Key, ValueType Value); // Write key and value into EntryType (ATOMICALLY! See below) +// static uint32 KeyHash(KeyType Key); // Convert Key to more well distributed hash +// static void ClearEntries(EntryType* Entries, int32 EntryCount); // Fill an array of entries with empty values +// +// The function "SetKeyValue" must be multi-thread safe when writing new items! This means writing the +// Key last and atomically, or writing the entire EntryType in a single write (say if the key and value +// are packed into a single integer word). Inline is recommended, since these functions are called a +// lot in the inner loop of the algorithm. A simple implementation of "KeyHash" can just return the +// Key (if it's already reasonable as a hash), or mix the bits if better distribution is required. A +// simple implementation of "ClearEntries" can just be a memset, if zero represents an empty entry. +// +// A set can be approximated by making "GetValue" a nop function, and just paying attention to the bool +// result from FindEntry, although you do need to either reserve a certain Key as invalid, or add +// space to store a valid flag as the Value. This class should only be used for small value types, as +// the values are embedded into the hash table, and not stored separately. +// +// Writes are implemented using a lock -- it would be possible to make writes lock free (or lock free +// when resizing doesn't occur), but it adds complexity. If we were to go that route, it would make +// sense to create a fully generic lock free set, which would be much more involved to implement and +// validate than this simple class, and might also offer somewhat worse read perf. Lock free containers +// that support item removal either need additional synchronization overhead on readers, so writers can +// tell if a reader is active and spin, or need graveyard markers and a garbage collection pass called +// periodically, which makes it no longer a simple standalone container. +// +// Lock free reads are accomplished by the reader atomically pulling the hash table pointer from the +// class. The hash table is self contained, with its size stored in the table itself, and hash tables +// are not freed until the class's destruction. So if the table needs to be reallocated due to a write, +// active readers will still have valid memory. This does mean that tables leak, but worst case, you +// end up with half of the memory being waste. It would be possible to garbage collect the excess +// tables, but you'd need some kind of global synchronization to make sure no readers are active. +// +// Besides cleanup of wasted tables, it might be useful to provide a function to clear a table. This +// would involve clearing the Key for all the elements in the table (but leaving the memory allocated), +// and can be done safely with active readers. It's not possible to safely remove individual items due +// to the need to potentially move other items, which would break an active reader that has already +// searched past a moved item. But in the case of removing all items, we don't care when a reader fails, +// it's expected that eventually all readers will fail, regardless of where they are searching. A clear +// function could be useful if a lot of the data you are caching is no longer used, and you want to +// reset the cache. +// +template<typename EntryType, typename KeyType, typename ValueType> +class TGrowOnlyLockFreeHash +{ +public: + TGrowOnlyLockFreeHash(FMalloc* InMalloc) : Malloc(InMalloc), HashTable(nullptr) {} + + ~TGrowOnlyLockFreeHash() + { + FHashHeader* HashTableNext; + for (FHashHeader* HashTableCurrent = HashTable; HashTableCurrent; HashTableCurrent = HashTableNext) + { + HashTableNext = HashTableCurrent->Next; + + Malloc->Free(HashTableCurrent); + } + } + + /** + * Preallocate the hash table to a certain size + * @param Count - Number of EntryType elements to allocate + * @warning Can only be called once, and only before any items have been added! + */ + void Reserve(uint32_t Count) + { + zen::RwLock::ExclusiveLockScope _(WriteCriticalSection); + ZEN_ASSERT(HashTable.load(std::memory_order_relaxed) == nullptr); + + if (Count <= 0) + { + Count = DEFAULT_INITIAL_SIZE; + } + Count = uint32_t(zen::NextPow2(Count)); + FHashHeader* HashTableLocal = (FHashHeader*)Malloc->Malloc(sizeof(FHashHeader) + (Count - 1) * sizeof(EntryType)); + + HashTableLocal->Next = nullptr; + HashTableLocal->TableSize = Count; + HashTableLocal->Used = 0; + EntryType::ClearEntries(HashTableLocal->Elements, Count); + + HashTable.store(HashTableLocal, std::memory_order_release); + } + + /** + * Find an entry in the hash table + * @param Key - Key to search for + * @param OutValue - Memory location to write result value to. Left unmodified if Key isn't found. + * @param bIsAlreadyInTable - Optional result for whether key was found in table. + */ + void Find(KeyType Key, ValueType* OutValue, bool* bIsAlreadyInTable = nullptr) const + { + FHashHeader* HashTableLocal = HashTable.load(std::memory_order_acquire); + if (HashTableLocal) + { + uint32_t TableMask = HashTableLocal->TableSize - 1; + + // Linear probing + for (uint32_t TableIndex = EntryType::KeyHash(Key) & TableMask; !HashTableLocal->Elements[TableIndex].IsEmpty(); + TableIndex = (TableIndex + 1) & TableMask) + { + if (HashTableLocal->Elements[TableIndex].GetKey() == Key) + { + if (OutValue) + { + *OutValue = HashTableLocal->Elements[TableIndex].GetValue(); + } + if (bIsAlreadyInTable) + { + *bIsAlreadyInTable = true; + } + return; + } + } + } + + if (bIsAlreadyInTable) + { + *bIsAlreadyInTable = false; + } + } + + /** + * Add an entry with the given Key to the hash table, will do nothing if the item already exists + * @param Key - Key to add + * @param Value - Value to add for key + * @param bIsAlreadyInTable -- Optional result for whether item was already in table + */ + void Emplace(KeyType Key, ValueType Value, bool* bIsAlreadyInTable = nullptr) + { + zen::RwLock::ExclusiveLockScope _(WriteCriticalSection); + + // After locking, check if the item is already in the hash table. + ValueType ValueIgnore; + bool bFindResult; + Find(Key, &ValueIgnore, &bFindResult); + if (bFindResult == true) + { + if (bIsAlreadyInTable) + { + *bIsAlreadyInTable = true; + } + return; + } + + // Check if there is space in the hash table for a new item. We resize when the hash + // table gets half full or more. @todo: allow client to specify max load factor? + FHashHeader* HashTableLocal = HashTable; + + if (!HashTableLocal || (HashTableLocal->Used >= HashTableLocal->TableSize / 2)) + { + int32_t GrowCount = HashTableLocal ? HashTableLocal->TableSize * 2 : DEFAULT_INITIAL_SIZE; + FHashHeader* HashTableGrow = (FHashHeader*)Malloc->Malloc(sizeof(FHashHeader) + (GrowCount - 1) * sizeof(EntryType)); + + HashTableGrow->Next = HashTableLocal; + HashTableGrow->TableSize = GrowCount; + HashTableGrow->Used = 0; + EntryType::ClearEntries(HashTableGrow->Elements, GrowCount); + + if (HashTableLocal) + { + // Copy existing elements from the old table to the new table + for (int32_t TableIndex = 0; TableIndex < HashTableLocal->TableSize; TableIndex++) + { + EntryType& Entry = HashTableLocal->Elements[TableIndex]; + if (!Entry.IsEmpty()) + { + HashInsertInternal(HashTableGrow, Entry.GetKey(), Entry.GetValue()); + } + } + } + + HashTableLocal = HashTableGrow; + HashTable.store(HashTableGrow, std::memory_order_release); + } + + // Then add our new item + HashInsertInternal(HashTableLocal, Key, Value); + + if (bIsAlreadyInTable) + { + *bIsAlreadyInTable = false; + } + } + + void FindOrAdd(KeyType Key, ValueType Value, bool* bIsAlreadyInTable = nullptr) + { + // Attempt to find the item lock free, before calling "Emplace", which locks the container + bool bFindResult; + ValueType IgnoreResult; + Find(Key, &IgnoreResult, &bFindResult); + if (bFindResult) + { + if (bIsAlreadyInTable) + { + *bIsAlreadyInTable = true; + } + return; + } + + Emplace(Key, Value, bIsAlreadyInTable); + } + +private: + struct FHashHeader + { + FHashHeader* Next; // Old buffers are stored in a linked list for cleanup + int32_t TableSize; + int32_t Used; + EntryType Elements[1]; // Variable sized + }; + + FMalloc* Malloc; + std::atomic<FHashHeader*> HashTable; + zen::RwLock WriteCriticalSection; + + static constexpr int32_t DEFAULT_INITIAL_SIZE = 1024; + + static void HashInsertInternal(FHashHeader* HashTableLocal, KeyType Key, ValueType Value) + { + int32_t TableMask = HashTableLocal->TableSize - 1; + + // Linear probing + for (int32_t TableIndex = EntryType::KeyHash(Key) & TableMask;; TableIndex = (TableIndex + 1) & TableMask) + { + if (HashTableLocal->Elements[TableIndex].IsEmpty()) + { + HashTableLocal->Elements[TableIndex].SetKeyValue(Key, Value); + HashTableLocal->Used++; + break; + } + } + } +}; + +} // namespace zen diff --git a/src/zencore/memtrack/memorytrace.cpp b/src/zencore/memtrack/memorytrace.cpp new file mode 100644 index 000000000..b147aee91 --- /dev/null +++ b/src/zencore/memtrack/memorytrace.cpp @@ -0,0 +1,829 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zencore/memory/memorytrace.h> +#include <zencore/memory/tagtrace.h> + +#include "callstacktrace.h" +#include "tracemalloc.h" +#include "vatrace.h" + +#include <zencore/commandline.h> +#include <zencore/enumflags.h> +#include <zencore/guardvalue.h> +#include <zencore/intmath.h> +#include <zencore/string.h> +#include <zencore/trace.h> + +#include <string.h> + +#if ZEN_PLATFORM_WINDOWS +# include <shellapi.h> +#endif + +class FMalloc; + +#if UE_TRACE_ENABLED +namespace zen { +UE_TRACE_CHANNEL_DEFINE(MemAllocChannel, "Memory allocations", true) +} +#endif + +#if UE_MEMORY_TRACE_ENABLED + +//////////////////////////////////////////////////////////////////////////////// + +namespace zen { + +void MemoryTrace_InitTags(FMalloc*); +void MemoryTrace_EnableTracePump(); + +} // namespace zen + +//////////////////////////////////////////////////////////////////////////////// +namespace { +// Controls how often time markers are emitted (default: every 4095 allocations). +constexpr uint32_t MarkerSamplePeriod = (4 << 10) - 1; + +// Number of shifted bits to SizeLower +constexpr uint32_t SizeShift = 3; + +// Counter to track when time marker is emitted +std::atomic<uint32_t> GMarkerCounter(0); + +// If enabled also pumps the Trace system itself. Used on process shutdown +// when worker thread has been killed, but memory events still occurs. +bool GDoPumpTrace; + +// Temporarily disables any internal operation that causes allocations. Used to +// avoid recursive behaviour when memory tracing needs to allocate memory through +// TraceMalloc. +thread_local bool GDoNotAllocateInTrace; + +// Set on initialization; on some platforms we hook allocator functions very early +// before Trace has the ability to allocate memory. +bool GTraceAllowed; +} // namespace + +//////////////////////////////////////////////////////////////////////////////// +namespace UE { namespace Trace { + TRACELOG_API void Update(); +}} // namespace UE::Trace + +namespace zen { + +//////////////////////////////////////////////////////////////////////////////// +UE_TRACE_EVENT_BEGIN(Memory, Init, NoSync | Important) + UE_TRACE_EVENT_FIELD(uint64_t, PageSize) // new in UE 5.5 + UE_TRACE_EVENT_FIELD(uint32_t, MarkerPeriod) + UE_TRACE_EVENT_FIELD(uint8, Version) + UE_TRACE_EVENT_FIELD(uint8, MinAlignment) + UE_TRACE_EVENT_FIELD(uint8, SizeShift) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, Marker) + UE_TRACE_EVENT_FIELD(uint64_t, Cycle) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, Alloc) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint32_t, Size) + UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower) + UE_TRACE_EVENT_FIELD(uint8, RootHeap) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, AllocSystem) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint32_t, Size) + UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, AllocVideo) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint32_t, Size) + UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, Free) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint8, RootHeap) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, FreeSystem) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, FreeVideo) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, ReallocAlloc) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint32_t, Size) + UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower) + UE_TRACE_EVENT_FIELD(uint8, RootHeap) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, ReallocAllocSystem) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint32_t, Size) + UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, ReallocFree) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint8, RootHeap) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, ReallocFreeSystem) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, MemorySwapOp) + UE_TRACE_EVENT_FIELD(uint64_t, Address) // page fault real address + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint32_t, CompressedSize) + UE_TRACE_EVENT_FIELD(uint8, SwapOp) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, HeapSpec, NoSync | Important) + UE_TRACE_EVENT_FIELD(HeapId, Id) + UE_TRACE_EVENT_FIELD(HeapId, ParentId) + UE_TRACE_EVENT_FIELD(uint16, Flags) + UE_TRACE_EVENT_FIELD(UE::Trace::WideString, Name) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, HeapMarkAlloc) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint16, Flags) + UE_TRACE_EVENT_FIELD(HeapId, Heap) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, HeapUnmarkAlloc) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(HeapId, Heap) +UE_TRACE_EVENT_END() + +// If the layout of the above events is changed, bump this version number. +// version 1: Initial version (UE 5.0, UE 5.1) +// version 2: Added CallstackId for Free events and also for HeapMarkAlloc, HeapUnmarkAlloc events (UE 5.2). +constexpr uint8 MemoryTraceVersion = 2; + +//////////////////////////////////////////////////////////////////////////////// +class FMallocWrapper : public FMalloc +{ +public: + FMallocWrapper(FMalloc* InMalloc); + +private: + struct FCookie + { + uint64_t Tag : 16; + uint64_t Bias : 8; + uint64_t Size : 40; + }; + + static uint32_t GetActualAlignment(SIZE_T Size, uint32_t Alignment); + + virtual void* Malloc(SIZE_T Size, uint32_t Alignment) override; + virtual void* Realloc(void* PrevAddress, SIZE_T NewSize, uint32_t Alignment) override; + virtual void Free(void* Address) override; + virtual bool GetAllocationSize(void* Address, SIZE_T& SizeOut) override { return InnerMalloc->GetAllocationSize(Address, SizeOut); } + virtual void OnMallocInitialized() override { InnerMalloc->OnMallocInitialized(); } + + FMalloc* InnerMalloc; +}; + +//////////////////////////////////////////////////////////////////////////////// +FMallocWrapper::FMallocWrapper(FMalloc* InMalloc) : InnerMalloc(InMalloc) +{ +} + +//////////////////////////////////////////////////////////////////////////////// +uint32_t +FMallocWrapper::GetActualAlignment(SIZE_T Size, uint32_t Alignment) +{ + // Defaults; if size is < 16 then alignment is 8 else 16. + uint32_t DefaultAlignment = 8 << uint32_t(Size >= 16); + return (Alignment < DefaultAlignment) ? DefaultAlignment : Alignment; +} + +//////////////////////////////////////////////////////////////////////////////// +void* +FMallocWrapper::Malloc(SIZE_T Size, uint32_t Alignment) +{ + uint32_t ActualAlignment = GetActualAlignment(Size, Alignment); + void* Address = InnerMalloc->Malloc(Size, Alignment); + + MemoryTrace_Alloc((uint64_t)Address, Size, ActualAlignment); + + return Address; +} + +//////////////////////////////////////////////////////////////////////////////// +void* +FMallocWrapper::Realloc(void* PrevAddress, SIZE_T NewSize, uint32_t Alignment) +{ + // This simplifies things and means reallocs trace events are true reallocs + if (PrevAddress == nullptr) + { + return Malloc(NewSize, Alignment); + } + + MemoryTrace_ReallocFree((uint64_t)PrevAddress); + + void* RetAddress = InnerMalloc->Realloc(PrevAddress, NewSize, Alignment); + + Alignment = GetActualAlignment(NewSize, Alignment); + MemoryTrace_ReallocAlloc((uint64_t)RetAddress, NewSize, Alignment); + + return RetAddress; +} + +//////////////////////////////////////////////////////////////////////////////// +void +FMallocWrapper::Free(void* Address) +{ + if (Address == nullptr) + { + return; + } + + MemoryTrace_Free((uint64_t)Address); + + void* InnerAddress = Address; + + return InnerMalloc->Free(InnerAddress); +} + +//////////////////////////////////////////////////////////////////////////////// +template<class T> +class alignas(alignof(T)) FUndestructed +{ +public: + template<typename... ArgTypes> + void Construct(ArgTypes... Args) + { + ::new (Buffer) T(Args...); + bIsConstructed = true; + } + + bool IsConstructed() const { return bIsConstructed; } + + T* operator&() { return (T*)Buffer; } + T* operator->() { return (T*)Buffer; } + +protected: + uint8 Buffer[sizeof(T)]; + bool bIsConstructed; +}; + +//////////////////////////////////////////////////////////////////////////////// +static FUndestructed<FTraceMalloc> GTraceMalloc; + +//////////////////////////////////////////////////////////////////////////////// +static EMemoryTraceInit +MemoryTrace_ShouldEnable() +{ + EMemoryTraceInit Mode = EMemoryTraceInit::Disabled; + + // Process any command line trace options + // + // Note that calls can come into this function before we enter the regular main function + // and we can therefore not rely on the regular command line parsing for the application + + using namespace std::literals; + + auto ProcessTraceArg = [&](const std::string_view& Arg) { + if (Arg == "memalloc"sv) + { + Mode |= EMemoryTraceInit::AllocEvents; + } + else if (Arg == "callstack"sv) + { + Mode |= EMemoryTraceInit::Callstacks; + } + else if (Arg == "memtag"sv) + { + Mode |= EMemoryTraceInit::Tags; + } + else if (Arg == "memory"sv) + { + Mode |= EMemoryTraceInit::Full; + } + else if (Arg == "memory_light"sv) + { + Mode |= EMemoryTraceInit::Light; + } + }; + + constexpr std::string_view TraceOption = "--trace="sv; + + std::function<void(const std::string_view&)> ProcessArg = [&](const std::string_view& Arg) { + if (Arg.starts_with(TraceOption)) + { + const std::string_view OptionArgs = Arg.substr(TraceOption.size()); + + IterateCommaSeparatedValue(OptionArgs, ProcessTraceArg); + } + }; + + IterateCommandlineArgs(ProcessArg); + + return Mode; +} + +//////////////////////////////////////////////////////////////////////////////// +FMalloc* +MemoryTrace_CreateInternal(FMalloc* InMalloc, EMemoryTraceInit Mode) +{ + using namespace zen; + + // If allocation events are not desired we don't need to do anything, even + // if user has enabled only callstacks it will be enabled later. + if (!EnumHasAnyFlags(Mode, EMemoryTraceInit::AllocEvents)) + { + return InMalloc; + } + + // Some OSes (i.e. Windows) will terminate all threads except the main + // one as part of static deinit. However we may receive more memory + // trace events that would get lost as Trace's worker thread has been + // terminated. So flush the last remaining memory events trace needs + // to be updated which we will do that in response to to memory events. + // We'll use an atexit can to know when Trace is probably no longer + // getting ticked. + atexit([]() { MemoryTrace_EnableTracePump(); }); + + GTraceMalloc.Construct(InMalloc); + + // Both tag and callstack tracing need to use the wrapped trace malloc + // so we can break out tracing memory overhead (and not cause recursive behaviour). + if (EnumHasAnyFlags(Mode, EMemoryTraceInit::Tags)) + { + MemoryTrace_InitTags(>raceMalloc); + } + + if (EnumHasAnyFlags(Mode, EMemoryTraceInit::Callstacks)) + { + CallstackTrace_Create(>raceMalloc); + } + + static FUndestructed<FMallocWrapper> SMallocWrapper; + SMallocWrapper.Construct(InMalloc); + + return &SMallocWrapper; +} + +//////////////////////////////////////////////////////////////////////////////// +FMalloc* +MemoryTrace_CreateInternal(FMalloc* InMalloc) +{ + const EMemoryTraceInit Mode = MemoryTrace_ShouldEnable(); + return MemoryTrace_CreateInternal(InMalloc, Mode); +} + +//////////////////////////////////////////////////////////////////////////////// +FMalloc* +MemoryTrace_Create(FMalloc* InMalloc) +{ + FMalloc* OutMalloc = MemoryTrace_CreateInternal(InMalloc); + + if (OutMalloc != InMalloc) + { +# if PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS + FVirtualWinApiHooks::Initialize(false); +# endif + } + + return OutMalloc; +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_Initialize() +{ + // At this point we initialized the system to allow tracing. + GTraceAllowed = true; + + const int MIN_ALIGNMENT = 8; + + UE_TRACE_LOG(Memory, Init, MemAllocChannel) + << Init.PageSize(4096) << Init.MarkerPeriod(MarkerSamplePeriod + 1) << Init.Version(MemoryTraceVersion) + << Init.MinAlignment(uint8(MIN_ALIGNMENT)) << Init.SizeShift(uint8(SizeShift)); + + const HeapId SystemRootHeap = MemoryTrace_RootHeapSpec(u"System memory"); + ZEN_ASSERT(SystemRootHeap == EMemoryTraceRootHeap::SystemMemory); + const HeapId VideoRootHeap = MemoryTrace_RootHeapSpec(u"Video memory"); + ZEN_ASSERT(VideoRootHeap == EMemoryTraceRootHeap::VideoMemory); + + static_assert((1 << SizeShift) - 1 <= MIN_ALIGNMENT, "Not enough bits to pack size fields"); + +# if !UE_MEMORY_TRACE_LATE_INIT + // On some platforms callstack initialization cannot happen this early in the process. It is initialized + // in other locations when UE_MEMORY_TRACE_LATE_INIT is defined. Until that point allocations cannot have + // callstacks. + CallstackTrace_Initialize(); +# endif +} + +void +MemoryTrace_Shutdown() +{ + // Disable any further activity + GTraceAllowed = false; +} + +//////////////////////////////////////////////////////////////////////////////// +bool +MemoryTrace_IsActive() +{ + return GTraceAllowed; +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_EnableTracePump() +{ + GDoPumpTrace = true; +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_UpdateInternal() +{ + const uint32_t TheCount = GMarkerCounter.fetch_add(1, std::memory_order_relaxed); + if ((TheCount & MarkerSamplePeriod) == 0) + { + UE_TRACE_LOG(Memory, Marker, MemAllocChannel) << Marker.Cycle(UE::Trace::Private::TimeGetTimestamp()); + } + + if (GDoPumpTrace) + { + UE::Trace::Update(); + } +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_Alloc(uint64_t Address, uint64_t Size, uint32_t Alignment, HeapId RootHeap, uint32_t ExternalCallstackId) +{ + if (!GTraceAllowed) + { + return; + } + + ZEN_ASSERT_SLOW(RootHeap < 16); + + const uint32_t AlignmentPow2 = uint32_t(zen::CountTrailingZeros64(Alignment)); + const uint32_t Alignment_SizeLower = (AlignmentPow2 << SizeShift) | uint32_t(Size & ((1 << SizeShift) - 1)); + const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId(); + + switch (RootHeap) + { + case EMemoryTraceRootHeap::SystemMemory: + { + UE_TRACE_LOG(Memory, AllocSystem, MemAllocChannel) + << AllocSystem.Address(uint64_t(Address)) << AllocSystem.CallstackId(CallstackId) + << AllocSystem.Size(uint32_t(Size >> SizeShift)) << AllocSystem.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)); + break; + } + + case EMemoryTraceRootHeap::VideoMemory: + { + UE_TRACE_LOG(Memory, AllocVideo, MemAllocChannel) + << AllocVideo.Address(uint64_t(Address)) << AllocVideo.CallstackId(CallstackId) + << AllocVideo.Size(uint32_t(Size >> SizeShift)) << AllocVideo.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)); + break; + } + + default: + { + UE_TRACE_LOG(Memory, Alloc, MemAllocChannel) + << Alloc.Address(uint64_t(Address)) << Alloc.CallstackId(CallstackId) << Alloc.Size(uint32_t(Size >> SizeShift)) + << Alloc.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)) << Alloc.RootHeap(uint8(RootHeap)); + break; + } + } + + MemoryTrace_UpdateInternal(); +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_Free(uint64_t Address, HeapId RootHeap, uint32_t ExternalCallstackId) +{ + if (!GTraceAllowed) + { + return; + } + + ZEN_ASSERT_SLOW(RootHeap < 16); + + const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId(); + + switch (RootHeap) + { + case EMemoryTraceRootHeap::SystemMemory: + { + UE_TRACE_LOG(Memory, FreeSystem, MemAllocChannel) + << FreeSystem.Address(uint64_t(Address)) << FreeSystem.CallstackId(CallstackId); + break; + } + case EMemoryTraceRootHeap::VideoMemory: + { + UE_TRACE_LOG(Memory, FreeVideo, MemAllocChannel) + << FreeVideo.Address(uint64_t(Address)) << FreeVideo.CallstackId(CallstackId); + break; + } + default: + { + UE_TRACE_LOG(Memory, Free, MemAllocChannel) + << Free.Address(uint64_t(Address)) << Free.CallstackId(CallstackId) << Free.RootHeap(uint8(RootHeap)); + break; + } + } + + MemoryTrace_UpdateInternal(); +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_ReallocAlloc(uint64_t Address, uint64_t Size, uint32_t Alignment, HeapId RootHeap, uint32_t ExternalCallstackId) +{ + if (!GTraceAllowed) + { + return; + } + + ZEN_ASSERT_SLOW(RootHeap < 16); + + const uint32_t AlignmentPow2 = uint32_t(zen::CountTrailingZeros64(Alignment)); + const uint32_t Alignment_SizeLower = (AlignmentPow2 << SizeShift) | uint32_t(Size & ((1 << SizeShift) - 1)); + const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId(); + + switch (RootHeap) + { + case EMemoryTraceRootHeap::SystemMemory: + { + UE_TRACE_LOG(Memory, ReallocAllocSystem, MemAllocChannel) + << ReallocAllocSystem.Address(uint64_t(Address)) << ReallocAllocSystem.CallstackId(CallstackId) + << ReallocAllocSystem.Size(uint32_t(Size >> SizeShift)) + << ReallocAllocSystem.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)); + break; + } + + default: + { + UE_TRACE_LOG(Memory, ReallocAlloc, MemAllocChannel) + << ReallocAlloc.Address(uint64_t(Address)) << ReallocAlloc.CallstackId(CallstackId) + << ReallocAlloc.Size(uint32_t(Size >> SizeShift)) << ReallocAlloc.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)) + << ReallocAlloc.RootHeap(uint8(RootHeap)); + break; + } + } + + MemoryTrace_UpdateInternal(); +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_ReallocFree(uint64_t Address, HeapId RootHeap, uint32_t ExternalCallstackId) +{ + if (!GTraceAllowed) + { + return; + } + + ZEN_ASSERT_SLOW(RootHeap < 16); + + const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId(); + + switch (RootHeap) + { + case EMemoryTraceRootHeap::SystemMemory: + { + UE_TRACE_LOG(Memory, ReallocFreeSystem, MemAllocChannel) + << ReallocFreeSystem.Address(uint64_t(Address)) << ReallocFreeSystem.CallstackId(CallstackId); + break; + } + + default: + { + UE_TRACE_LOG(Memory, ReallocFree, MemAllocChannel) + << ReallocFree.Address(uint64_t(Address)) << ReallocFree.CallstackId(CallstackId) + << ReallocFree.RootHeap(uint8(RootHeap)); + break; + } + } + + MemoryTrace_UpdateInternal(); +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_SwapOp(uint64_t PageAddress, EMemoryTraceSwapOperation SwapOperation, uint32_t CompressedSize, uint32_t CallstackId) +{ + if (!GTraceAllowed) + { + return; + } + + UE_TRACE_LOG(Memory, MemorySwapOp, MemAllocChannel) + << MemorySwapOp.Address(PageAddress) << MemorySwapOp.CallstackId(CallstackId) << MemorySwapOp.CompressedSize(CompressedSize) + << MemorySwapOp.SwapOp((uint8)SwapOperation); + + MemoryTrace_UpdateInternal(); +} + +//////////////////////////////////////////////////////////////////////////////// +HeapId +MemoryTrace_HeapSpec(HeapId ParentId, const char16_t* Name, EMemoryTraceHeapFlags Flags) +{ + if (!GTraceAllowed) + { + return 0; + } + + static std::atomic<HeapId> HeapIdCount(EMemoryTraceRootHeap::EndReserved + 1); // Reserve indexes for root heaps + const HeapId Id = HeapIdCount.fetch_add(1); + const uint32_t NameLen = uint32_t(zen::StringLength(Name)); + const uint32_t DataSize = NameLen * sizeof(char16_t); + ZEN_ASSERT(ParentId < Id); + + UE_TRACE_LOG(Memory, HeapSpec, MemAllocChannel, DataSize) + << HeapSpec.Id(Id) << HeapSpec.ParentId(ParentId) << HeapSpec.Name(Name, NameLen) << HeapSpec.Flags(uint16(Flags)); + + return Id; +} + +//////////////////////////////////////////////////////////////////////////////// +HeapId +MemoryTrace_RootHeapSpec(const char16_t* Name, EMemoryTraceHeapFlags Flags) +{ + if (!GTraceAllowed) + { + return 0; + } + + static std::atomic<HeapId> RootHeapCount(0); + const HeapId Id = RootHeapCount.fetch_add(1); + ZEN_ASSERT(Id <= EMemoryTraceRootHeap::EndReserved); + + const uint32_t NameLen = uint32_t(zen::StringLength(Name)); + const uint32_t DataSize = NameLen * sizeof(char16_t); + + UE_TRACE_LOG(Memory, HeapSpec, MemAllocChannel, DataSize) + << HeapSpec.Id(Id) << HeapSpec.ParentId(HeapId(~0)) << HeapSpec.Name(Name, NameLen) + << HeapSpec.Flags(uint16(EMemoryTraceHeapFlags::Root | Flags)); + + return Id; +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_MarkAllocAsHeap(uint64_t Address, HeapId Heap, EMemoryTraceHeapAllocationFlags Flags, uint32_t ExternalCallstackId) +{ + if (!GTraceAllowed) + { + return; + } + + const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId(); + + UE_TRACE_LOG(Memory, HeapMarkAlloc, MemAllocChannel) + << HeapMarkAlloc.Address(uint64_t(Address)) << HeapMarkAlloc.CallstackId(CallstackId) + << HeapMarkAlloc.Flags(uint16(EMemoryTraceHeapAllocationFlags::Heap | Flags)) << HeapMarkAlloc.Heap(Heap); +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_UnmarkAllocAsHeap(uint64_t Address, HeapId Heap, uint32_t ExternalCallstackId) +{ + if (!GTraceAllowed) + { + return; + } + + const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId(); + + // Sets all flags to zero + UE_TRACE_LOG(Memory, HeapUnmarkAlloc, MemAllocChannel) + << HeapUnmarkAlloc.Address(uint64_t(Address)) << HeapUnmarkAlloc.CallstackId(CallstackId) << HeapUnmarkAlloc.Heap(Heap); +} + +} // namespace zen + +#else // UE_MEMORY_TRACE_ENABLED + +///////////////////////////////////////////////////////////////////////////// +bool +MemoryTrace_IsActive() +{ + return false; +} + +#endif // UE_MEMORY_TRACE_ENABLED + +namespace zen { + +///////////////////////////////////////////////////////////////////////////// +FTraceMalloc::FTraceMalloc(FMalloc* InMalloc) +{ + WrappedMalloc = InMalloc; +} + +///////////////////////////////////////////////////////////////////////////// +FTraceMalloc::~FTraceMalloc() +{ +} + +///////////////////////////////////////////////////////////////////////////// +void* +FTraceMalloc::Malloc(SIZE_T Count, uint32_t Alignment) +{ +#if UE_MEMORY_TRACE_ENABLED + // UE_TRACE_METADATA_CLEAR_SCOPE(); + UE_MEMSCOPE(TRACE_TAG); + + void* NewPtr; + { + zen::TGuardValue<bool> _(GDoNotAllocateInTrace, true); + NewPtr = WrappedMalloc->Malloc(Count, Alignment); + } + + const uint64_t Size = Count; + const uint32_t AlignmentPow2 = uint32_t(zen::CountTrailingZeros64(Alignment)); + const uint32_t Alignment_SizeLower = (AlignmentPow2 << SizeShift) | uint32_t(Size & ((1 << SizeShift) - 1)); + + UE_TRACE_LOG(Memory, Alloc, MemAllocChannel) + << Alloc.Address(uint64_t(NewPtr)) << Alloc.CallstackId(0) << Alloc.Size(uint32_t(Size >> SizeShift)) + << Alloc.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)) << Alloc.RootHeap(uint8(EMemoryTraceRootHeap::SystemMemory)); + + return NewPtr; +#else + return WrappedMalloc->Malloc(Count, Alignment); +#endif // UE_MEMORY_TRACE_ENABLED +} + +///////////////////////////////////////////////////////////////////////////// +void* +FTraceMalloc::Realloc(void* Original, SIZE_T Count, uint32_t Alignment) +{ +#if UE_MEMORY_TRACE_ENABLED + // UE_TRACE_METADATA_CLEAR_SCOPE(); + UE_MEMSCOPE(TRACE_TAG); + + UE_TRACE_LOG(Memory, ReallocFree, MemAllocChannel) + << ReallocFree.Address(uint64_t(Original)) << ReallocFree.RootHeap(uint8(EMemoryTraceRootHeap::SystemMemory)); + + void* NewPtr; + { + zen::TGuardValue<bool> _(GDoNotAllocateInTrace, true); + NewPtr = WrappedMalloc->Realloc(Original, Count, Alignment); + } + + const uint64_t Size = Count; + const uint32_t AlignmentPow2 = uint32_t(zen::CountTrailingZeros64(Alignment)); + const uint32_t Alignment_SizeLower = (AlignmentPow2 << SizeShift) | uint32_t(Size & ((1 << SizeShift) - 1)); + + UE_TRACE_LOG(Memory, ReallocAlloc, MemAllocChannel) + << ReallocAlloc.Address(uint64_t(NewPtr)) << ReallocAlloc.CallstackId(0) << ReallocAlloc.Size(uint32_t(Size >> SizeShift)) + << ReallocAlloc.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)) + << ReallocAlloc.RootHeap(uint8(EMemoryTraceRootHeap::SystemMemory)); + + return NewPtr; +#else + return WrappedMalloc->Realloc(Original, Count, Alignment); +#endif // UE_MEMORY_TRACE_ENABLED +} + +///////////////////////////////////////////////////////////////////////////// +void +FTraceMalloc::Free(void* Original) +{ +#if UE_MEMORY_TRACE_ENABLED + UE_TRACE_LOG(Memory, Free, MemAllocChannel) + << Free.Address(uint64_t(Original)) << Free.RootHeap(uint8(EMemoryTraceRootHeap::SystemMemory)); + + { + zen::TGuardValue<bool> _(GDoNotAllocateInTrace, true); + WrappedMalloc->Free(Original); + } +#else + WrappedMalloc->Free(Original); +#endif // UE_MEMORY_TRACE_ENABLED +} + +} // namespace zen diff --git a/src/zencore/memtrack/moduletrace.cpp b/src/zencore/memtrack/moduletrace.cpp new file mode 100644 index 000000000..51280ff3a --- /dev/null +++ b/src/zencore/memtrack/moduletrace.cpp @@ -0,0 +1,296 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenbase/zenbase.h> +#include <zencore/memory/llm.h> +#include <zencore/memory/memorytrace.h> +#include <zencore/memory/tagtrace.h> + +#if ZEN_PLATFORM_WINDOWS +# define PLATFORM_SUPPORTS_TRACE_WIN32_MODULE_DIAGNOSTICS 1 +#else +# define PLATFORM_SUPPORTS_TRACE_WIN32_MODULE_DIAGNOSTICS 0 +#endif + +#include "moduletrace_events.h" + +#if PLATFORM_SUPPORTS_TRACE_WIN32_MODULE_DIAGNOSTICS + +# include <zencore/windows.h> + +ZEN_THIRD_PARTY_INCLUDES_START +# include <winternl.h> +ZEN_THIRD_PARTY_INCLUDES_END + +# include <zencore/trace.h> + +# include <array> + +namespace zen { + +class FMalloc; + +typedef uint32_t HeapId; + +//////////////////////////////////////////////////////////////////////////////// +struct FNtDllFunction +{ + FARPROC Addr; + + FNtDllFunction(const char* Name) + { + HMODULE NtDll = LoadLibraryW(L"ntdll.dll"); + ZEN_ASSERT(NtDll); + Addr = GetProcAddress(NtDll, Name); + } + + template<typename... ArgTypes> + unsigned int operator()(ArgTypes... Args) + { + typedef unsigned int(NTAPI * Prototype)(ArgTypes...); + return (Prototype((void*)Addr))(Args...); + } +}; + +////////////////////////////////////////////////////////////////////////////////7777 +class FModuleTrace +{ +public: + typedef void (*SubscribeFunc)(bool, void*, const char16_t*); + + FModuleTrace(FMalloc* InMalloc); + ~FModuleTrace(); + static FModuleTrace* Get(); + void Initialize(); + void Subscribe(SubscribeFunc Function); + +private: + void OnDllLoaded(const UNICODE_STRING& Name, uintptr_t Base); + void OnDllUnloaded(uintptr_t Base); + void OnDllNotification(unsigned int Reason, const void* DataPtr); + static FModuleTrace* Instance; + SubscribeFunc Subscribers[64]; + int SubscriberCount = 0; + void* CallbackCookie = nullptr; + HeapId ProgramHeapId = 0; +}; + +//////////////////////////////////////////////////////////////////////////////// +FModuleTrace* FModuleTrace::Instance = nullptr; + +//////////////////////////////////////////////////////////////////////////////// +FModuleTrace::FModuleTrace(FMalloc* InMalloc) +{ + ZEN_UNUSED(InMalloc); + Instance = this; +} + +//////////////////////////////////////////////////////////////////////////////// +FModuleTrace::~FModuleTrace() +{ + if (CallbackCookie) + { + FNtDllFunction UnregisterFunc("LdrUnregisterDllNotification"); + UnregisterFunc(CallbackCookie); + } +} + +//////////////////////////////////////////////////////////////////////////////// +FModuleTrace* +FModuleTrace::Get() +{ + return Instance; +} + +//////////////////////////////////////////////////////////////////////////////// +void +FModuleTrace::Initialize() +{ + using namespace UE::Trace; + + ProgramHeapId = MemoryTrace_HeapSpec(SystemMemory, u"Module", EMemoryTraceHeapFlags::None); + + UE_TRACE_LOG(Diagnostics, ModuleInit, ModuleChannel, sizeof(char) * 3) + << ModuleInit.SymbolFormat("pdb", 3) << ModuleInit.ModuleBaseShift(uint8(0)); + + // Register for DLL load/unload notifications. + auto Thunk = [](ULONG Reason, const void* Data, void* Context) { + auto* Self = (FModuleTrace*)Context; + Self->OnDllNotification(Reason, Data); + }; + + typedef void(CALLBACK * ThunkType)(ULONG, const void*, void*); + auto ThunkImpl = ThunkType(Thunk); + + FNtDllFunction RegisterFunc("LdrRegisterDllNotification"); + RegisterFunc(0, ThunkImpl, this, &CallbackCookie); + + // Enumerate already loaded modules. + const TEB* ThreadEnvBlock = NtCurrentTeb(); + const PEB* ProcessEnvBlock = ThreadEnvBlock->ProcessEnvironmentBlock; + const LIST_ENTRY* ModuleIter = ProcessEnvBlock->Ldr->InMemoryOrderModuleList.Flink; + const LIST_ENTRY* ModuleIterEnd = ModuleIter->Blink; + do + { + const auto& ModuleData = *(LDR_DATA_TABLE_ENTRY*)(ModuleIter - 1); + if (ModuleData.DllBase == 0) + { + break; + } + + OnDllLoaded(ModuleData.FullDllName, UPTRINT(ModuleData.DllBase)); + ModuleIter = ModuleIter->Flink; + } while (ModuleIter != ModuleIterEnd); +} + +//////////////////////////////////////////////////////////////////////////////// +void +FModuleTrace::Subscribe(SubscribeFunc Function) +{ + ZEN_ASSERT(SubscriberCount < ZEN_ARRAY_COUNT(Subscribers)); + Subscribers[SubscriberCount++] = Function; +} + +//////////////////////////////////////////////////////////////////////////////// +void +FModuleTrace::OnDllNotification(unsigned int Reason, const void* DataPtr) +{ + enum + { + LDR_DLL_NOTIFICATION_REASON_LOADED = 1, + LDR_DLL_NOTIFICATION_REASON_UNLOADED = 2, + }; + + struct FNotificationData + { + uint32_t Flags; + const UNICODE_STRING& FullPath; + const UNICODE_STRING& BaseName; + uintptr_t Base; + }; + const auto& Data = *(FNotificationData*)DataPtr; + + switch (Reason) + { + case LDR_DLL_NOTIFICATION_REASON_LOADED: + OnDllLoaded(Data.FullPath, Data.Base); + break; + case LDR_DLL_NOTIFICATION_REASON_UNLOADED: + OnDllUnloaded(Data.Base); + break; + } +} + +//////////////////////////////////////////////////////////////////////////////// +void +FModuleTrace::OnDllLoaded(const UNICODE_STRING& Name, UPTRINT Base) +{ + const auto* DosHeader = (IMAGE_DOS_HEADER*)Base; + const auto* NtHeaders = (IMAGE_NT_HEADERS*)(Base + DosHeader->e_lfanew); + const IMAGE_OPTIONAL_HEADER& OptionalHeader = NtHeaders->OptionalHeader; + uint8_t ImageId[20]; + + // Find the guid and age of the binary, used to match debug files + const IMAGE_DATA_DIRECTORY& DebugInfoEntry = OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG]; + const auto* DebugEntries = (IMAGE_DEBUG_DIRECTORY*)(Base + DebugInfoEntry.VirtualAddress); + for (uint32_t i = 0, n = DebugInfoEntry.Size / sizeof(DebugEntries[0]); i < n; ++i) + { + const IMAGE_DEBUG_DIRECTORY& Entry = DebugEntries[i]; + if (Entry.Type == IMAGE_DEBUG_TYPE_CODEVIEW) + { + struct FCodeView7 + { + uint32_t Signature; + uint32_t Guid[4]; + uint32_t Age; + }; + + if (Entry.SizeOfData < sizeof(FCodeView7)) + { + continue; + } + + const auto* CodeView7 = (FCodeView7*)(Base + Entry.AddressOfRawData); + if (CodeView7->Signature != 'SDSR') + { + continue; + } + + memcpy(ImageId, (uint8_t*)&CodeView7->Guid, sizeof(uint32_t) * 4); + memcpy(&ImageId[16], (uint8_t*)&CodeView7->Age, sizeof(uint32_t)); + break; + } + } + + // Note: UNICODE_STRING.Length is the size in bytes of the string buffer. + UE_TRACE_LOG(Diagnostics, ModuleLoad, ModuleChannel, uint32_t(Name.Length + sizeof(ImageId))) + << ModuleLoad.Name((const char16_t*)Name.Buffer, Name.Length / 2) << ModuleLoad.Base(uint64_t(Base)) + << ModuleLoad.Size(OptionalHeader.SizeOfImage) << ModuleLoad.ImageId(ImageId, uint32_t(sizeof(ImageId))); + +# if UE_MEMORY_TRACE_ENABLED + { + UE_MEMSCOPE(ELLMTag::ProgramSize); + MemoryTrace_Alloc(Base, OptionalHeader.SizeOfImage, 4 * 1024, EMemoryTraceRootHeap::SystemMemory); + MemoryTrace_MarkAllocAsHeap(Base, ProgramHeapId); + MemoryTrace_Alloc(Base, OptionalHeader.SizeOfImage, 4 * 1024, EMemoryTraceRootHeap::SystemMemory); + } +# endif // UE_MEMORY_TRACE_ENABLED + + for (int i = 0; i < SubscriberCount; ++i) + { + Subscribers[i](true, (void*)Base, (const char16_t*)Name.Buffer); + } +} + +//////////////////////////////////////////////////////////////////////////////// +void +FModuleTrace::OnDllUnloaded(UPTRINT Base) +{ +# if UE_MEMORY_TRACE_ENABLED + MemoryTrace_Free(Base, EMemoryTraceRootHeap::SystemMemory); + MemoryTrace_UnmarkAllocAsHeap(Base, ProgramHeapId); + MemoryTrace_Free(Base, EMemoryTraceRootHeap::SystemMemory); +# endif // UE_MEMORY_TRACE_ENABLED + + UE_TRACE_LOG(Diagnostics, ModuleUnload, ModuleChannel) << ModuleUnload.Base(uint64(Base)); + + for (int i = 0; i < SubscriberCount; ++i) + { + Subscribers[i](false, (void*)Base, nullptr); + } +} + +//////////////////////////////////////////////////////////////////////////////// +void +Modules_Create(FMalloc* Malloc) +{ + if (FModuleTrace::Get() != nullptr) + { + return; + } + + static FModuleTrace Instance(Malloc); +} + +//////////////////////////////////////////////////////////////////////////////// +void +Modules_Initialize() +{ + if (FModuleTrace* Instance = FModuleTrace::Get()) + { + Instance->Initialize(); + } +} + +//////////////////////////////////////////////////////////////////////////////// +void +Modules_Subscribe(void (*Function)(bool, void*, const char16_t*)) +{ + if (FModuleTrace* Instance = FModuleTrace::Get()) + { + Instance->Subscribe(Function); + } +} + +} // namespace zen + +#endif // PLATFORM_SUPPORTS_WIN32_MEMORY_TRACE diff --git a/src/zencore/memtrack/moduletrace.h b/src/zencore/memtrack/moduletrace.h new file mode 100644 index 000000000..5e7374faa --- /dev/null +++ b/src/zencore/memtrack/moduletrace.h @@ -0,0 +1,11 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +namespace zen { + +void Modules_Create(class FMalloc*); +void Modules_Subscribe(void (*)(bool, void*, const char16_t*)); +void Modules_Initialize(); + +} // namespace zen diff --git a/src/zencore/memtrack/moduletrace_events.cpp b/src/zencore/memtrack/moduletrace_events.cpp new file mode 100644 index 000000000..9c6a9b648 --- /dev/null +++ b/src/zencore/memtrack/moduletrace_events.cpp @@ -0,0 +1,16 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zencore/trace.h> + +#include "moduletrace_events.h" + +namespace zen { + +//////////////////////////////////////////////////////////////////////////////// +UE_TRACE_CHANNEL_DEFINE(ModuleChannel, "Module information needed for symbols resolution", true) + +UE_TRACE_EVENT_DEFINE(Diagnostics, ModuleInit) +UE_TRACE_EVENT_DEFINE(Diagnostics, ModuleLoad) +UE_TRACE_EVENT_DEFINE(Diagnostics, ModuleUnload) + +} // namespace zen diff --git a/src/zencore/memtrack/moduletrace_events.h b/src/zencore/memtrack/moduletrace_events.h new file mode 100644 index 000000000..1bda42fe8 --- /dev/null +++ b/src/zencore/memtrack/moduletrace_events.h @@ -0,0 +1,27 @@ +// Copyright Epic Games, Inc. All Rights Reserved. +#pragma once + +#include <zencore/trace.h> + +namespace zen { + +//////////////////////////////////////////////////////////////////////////////// +UE_TRACE_CHANNEL_EXTERN(ModuleChannel) + +UE_TRACE_EVENT_BEGIN_EXTERN(Diagnostics, ModuleInit, NoSync | Important) + UE_TRACE_EVENT_FIELD(UE::Trace::AnsiString, SymbolFormat) + UE_TRACE_EVENT_FIELD(uint8, ModuleBaseShift) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN_EXTERN(Diagnostics, ModuleLoad, NoSync | Important) + UE_TRACE_EVENT_FIELD(UE::Trace::WideString, Name) + UE_TRACE_EVENT_FIELD(uint64, Base) + UE_TRACE_EVENT_FIELD(uint32, Size) + UE_TRACE_EVENT_FIELD(uint8[], ImageId) // Platform specific id for this image, used to match debug files were available +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN_EXTERN(Diagnostics, ModuleUnload, NoSync | Important) + UE_TRACE_EVENT_FIELD(uint64, Base) +UE_TRACE_EVENT_END() + +} // namespace zen diff --git a/src/zencore/memtrack/platformtls.h b/src/zencore/memtrack/platformtls.h new file mode 100644 index 000000000..f134e68a8 --- /dev/null +++ b/src/zencore/memtrack/platformtls.h @@ -0,0 +1,107 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zenbase/zenbase.h> + +/** + * It should be possible to provide a generic implementation as long as a threadID is provided. We don't do that yet. + */ +struct FGenericPlatformTLS +{ + static const uint32_t InvalidTlsSlot = 0xFFFFFFFF; + + /** + * Return false if this is an invalid TLS slot + * @param SlotIndex the TLS index to check + * @return true if this looks like a valid slot + */ + static bool IsValidTlsSlot(uint32_t SlotIndex) { return SlotIndex != InvalidTlsSlot; } +}; + +#if ZEN_PLATFORM_WINDOWS + +# include <zencore/windows.h> + +class FWindowsPlatformTLS : public FGenericPlatformTLS +{ +public: + static uint32_t AllocTlsSlot() { return ::TlsAlloc(); } + + static void FreeTlsSlot(uint32_t SlotIndex) { ::TlsFree(SlotIndex); } + + static void SetTlsValue(uint32_t SlotIndex, void* Value) { ::TlsSetValue(SlotIndex, Value); } + + /** + * Reads the value stored at the specified TLS slot + * + * @return the value stored in the slot + */ + static void* GetTlsValue(uint32_t SlotIndex) { return ::TlsGetValue(SlotIndex); } + + /** + * Return false if this is an invalid TLS slot + * @param SlotIndex the TLS index to check + * @return true if this looks like a valid slot + */ + static bool IsValidTlsSlot(uint32_t SlotIndex) { return SlotIndex != InvalidTlsSlot; } +}; + +typedef FWindowsPlatformTLS FPlatformTLS; + +#elif ZEN_PLATFORM_MAC + +# include <pthread.h + +/** + * Apple implementation of the TLS OS functions + **/ +struct FApplePlatformTLS : public FGenericPlatformTLS +{ + /** + * Returns the currently executing thread's id + */ + static uint32_t GetCurrentThreadId(void) { return (uint32_t)pthread_mach_thread_np(pthread_self()); } + + /** + * Allocates a thread local store slot + */ + static uint32_t AllocTlsSlot(void) + { + // allocate a per-thread mem slot + pthread_key_t SlotKey = 0; + if (pthread_key_create(&SlotKey, NULL) != 0) + { + SlotKey = InvalidTlsSlot; // matches the Windows TlsAlloc() retval. + } + return SlotKey; + } + + /** + * Sets a value in the specified TLS slot + * + * @param SlotIndex the TLS index to store it in + * @param Value the value to store in the slot + */ + static void SetTlsValue(uint32_t SlotIndex, void* Value) { pthread_setspecific((pthread_key_t)SlotIndex, Value); } + + /** + * Reads the value stored at the specified TLS slot + * + * @return the value stored in the slot + */ + static void* GetTlsValue(uint32_t SlotIndex) { return pthread_getspecific((pthread_key_t)SlotIndex); } + + /** + * Frees a previously allocated TLS slot + * + * @param SlotIndex the TLS index to store it in + */ + static void FreeTlsSlot(uint32_t SlotIndex) { pthread_key_delete((pthread_key_t)SlotIndex); } +}; + +typedef FApplePlatformTLS FPlatformTLS; + +#else +# error Platform not yet supported +#endif diff --git a/src/zencore/memtrack/tagtrace.cpp b/src/zencore/memtrack/tagtrace.cpp new file mode 100644 index 000000000..15ba78ae4 --- /dev/null +++ b/src/zencore/memtrack/tagtrace.cpp @@ -0,0 +1,237 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zencore/memory/fmalloc.h> +#include <zencore/memory/llm.h> +#include <zencore/memory/tagtrace.h> + +#include "growonlylockfreehash.h" + +#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED + +# include <zencore/string.h> + +namespace zen { +//////////////////////////////////////////////////////////////////////////////// + +UE_TRACE_CHANNEL_EXTERN(MemAllocChannel); + +UE_TRACE_EVENT_BEGIN(Memory, TagSpec, Important | NoSync) + UE_TRACE_EVENT_FIELD(int32, Tag) + UE_TRACE_EVENT_FIELD(int32, Parent) + UE_TRACE_EVENT_FIELD(UE::Trace::AnsiString, Display) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, MemoryScope, NoSync) + UE_TRACE_EVENT_FIELD(int32, Tag) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, MemoryScopePtr, NoSync) + UE_TRACE_EVENT_FIELD(uint64, Ptr) +UE_TRACE_EVENT_END() + +//////////////////////////////////////////////////////////////////////////////// +// Per thread active tag, i.e. the top level FMemScope +thread_local int32 GActiveTag; + +//////////////////////////////////////////////////////////////////////////////// +FMemScope::FMemScope() +{ +} + +FMemScope::FMemScope(int32_t InTag, bool bShouldActivate /*= true*/) +{ + if (UE_TRACE_CHANNELEXPR_IS_ENABLED(MemAllocChannel) & bShouldActivate) + { + ActivateScope(InTag); + } +} + +//////////////////////////////////////////////////////////////////////////////// +FMemScope::FMemScope(ELLMTag InTag, bool bShouldActivate /*= true*/) +{ + if (UE_TRACE_CHANNELEXPR_IS_ENABLED(MemAllocChannel) & bShouldActivate) + { + ActivateScope(static_cast<int32>(InTag)); + } +} + +//////////////////////////////////////////////////////////////////////////////// +void +FMemScope::ActivateScope(int32_t InTag) +{ + if (auto LogScope = FMemoryMemoryScopeFields::LogScopeType::ScopedEnter<FMemoryMemoryScopeFields>()) + { + if (const auto& __restrict MemoryScope = *(FMemoryMemoryScopeFields*)(&LogScope)) + { + Inner.SetActive(); + LogScope += LogScope << MemoryScope.Tag(InTag); + PrevTag = GActiveTag; + GActiveTag = InTag; + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +FMemScope::~FMemScope() +{ + if (Inner.bActive) + { + GActiveTag = PrevTag; + } +} + +//////////////////////////////////////////////////////////////////////////////// +FMemScopePtr::FMemScopePtr(uint64_t InPtr) +{ + if (InPtr != 0 && TRACE_PRIVATE_CHANNELEXPR_IS_ENABLED(MemAllocChannel)) + { + if (auto LogScope = FMemoryMemoryScopePtrFields::LogScopeType::ScopedEnter<FMemoryMemoryScopePtrFields>()) + { + if (const auto& __restrict MemoryScope = *(FMemoryMemoryScopePtrFields*)(&LogScope)) + { + Inner.SetActive(), LogScope += LogScope << MemoryScope.Ptr(InPtr); + } + } + } +} + +///////////////////////////////////////////////////////////////////////////////// +FMemScopePtr::~FMemScopePtr() +{ +} + +///////////////////////////////////////////////////////////////////////////////// + +/** + * Utility class that manages tracing the specification of unique LLM tags + * and custom name based tags. + */ +class FTagTrace +{ +public: + FTagTrace(FMalloc* InMalloc); + void AnnounceGenericTags() const; + void AnnounceSpecialTags() const; + int32 AnnounceCustomTag(int32 Tag, int32 ParentTag, const ANSICHAR* Display) const; + +private: + struct FTagNameSetEntry + { + std::atomic_int32_t Data; + + int32_t GetKey() const { return Data.load(std::memory_order_relaxed); } + bool GetValue() const { return true; } + bool IsEmpty() const { return Data.load(std::memory_order_relaxed) == 0; } // NAME_None is treated as empty + void SetKeyValue(int32_t Key, bool Value) + { + ZEN_UNUSED(Value); + Data.store(Key, std::memory_order_relaxed); + } + static uint32_t KeyHash(int32_t Key) { return static_cast<uint32>(Key); } + static void ClearEntries(FTagNameSetEntry* Entries, int32_t EntryCount) + { + memset(Entries, 0, EntryCount * sizeof(FTagNameSetEntry)); + } + }; + typedef TGrowOnlyLockFreeHash<FTagNameSetEntry, int32_t, bool> FTagNameSet; + + FTagNameSet AnnouncedNames; + static FMalloc* Malloc; +}; + +FMalloc* FTagTrace::Malloc = nullptr; +static FTagTrace* GTagTrace = nullptr; + +//////////////////////////////////////////////////////////////////////////////// +FTagTrace::FTagTrace(FMalloc* InMalloc) : AnnouncedNames(InMalloc) +{ + Malloc = InMalloc; + AnnouncedNames.Reserve(1024); + AnnounceGenericTags(); + AnnounceSpecialTags(); +} + +//////////////////////////////////////////////////////////////////////////////// +void +FTagTrace::AnnounceGenericTags() const +{ +# define TRACE_TAG_SPEC(Enum, Str, ParentTag) \ + { \ + const uint32_t DisplayLen = (uint32_t)StringLength(Str); \ + UE_TRACE_LOG(Memory, TagSpec, MemAllocChannel, DisplayLen * sizeof(ANSICHAR)) \ + << TagSpec.Tag((int32_t)ELLMTag::Enum) << TagSpec.Parent((int32_t)ParentTag) << TagSpec.Display(Str, DisplayLen); \ + } + LLM_ENUM_GENERIC_TAGS(TRACE_TAG_SPEC); +# undef TRACE_TAG_SPEC +} + +//////////////////////////////////////////////////////////////////////////////// + +void +FTagTrace::AnnounceSpecialTags() const +{ + auto EmitTag = [](const char16_t* DisplayString, int32_t Tag, int32_t ParentTag) { + const uint32_t DisplayLen = (uint32_t)StringLength(DisplayString); + UE_TRACE_LOG(Memory, TagSpec, MemAllocChannel, DisplayLen * sizeof(ANSICHAR)) + << TagSpec.Tag(Tag) << TagSpec.Parent(ParentTag) << TagSpec.Display(DisplayString, DisplayLen); + }; + + EmitTag(u"Trace", TRACE_TAG, -1); +} + +//////////////////////////////////////////////////////////////////////////////// +int32_t +FTagTrace::AnnounceCustomTag(int32_t Tag, int32_t ParentTag, const ANSICHAR* Display) const +{ + const uint32_t DisplayLen = (uint32_t)StringLength(Display); + UE_TRACE_LOG(Memory, TagSpec, MemAllocChannel, DisplayLen * sizeof(ANSICHAR)) + << TagSpec.Tag(Tag) << TagSpec.Parent(ParentTag) << TagSpec.Display(Display, DisplayLen); + return Tag; +} + +} // namespace zen + +#endif // UE_MEMORY_TAGS_TRACE_ENABLED + +namespace zen { + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_InitTags(FMalloc* InMalloc) +{ +#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED + GTagTrace = (FTagTrace*)InMalloc->Malloc(sizeof(FTagTrace), alignof(FTagTrace)); + new (GTagTrace) FTagTrace(InMalloc); +#else + ZEN_UNUSED(InMalloc); +#endif +} + +//////////////////////////////////////////////////////////////////////////////// +int32_t +MemoryTrace_AnnounceCustomTag(int32_t Tag, int32_t ParentTag, const char* Display) +{ +#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED + // todo: How do we check if tag trace is active? + if (GTagTrace) + { + return GTagTrace->AnnounceCustomTag(Tag, ParentTag, Display); + } +#else + ZEN_UNUSED(Tag, ParentTag, Display); +#endif + return -1; +} + +//////////////////////////////////////////////////////////////////////////////// +int32_t +MemoryTrace_GetActiveTag() +{ +#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED + return GActiveTag; +#else + return -1; +#endif +} + +} // namespace zen diff --git a/src/zencore/memtrack/tracemalloc.h b/src/zencore/memtrack/tracemalloc.h new file mode 100644 index 000000000..54606ac45 --- /dev/null +++ b/src/zencore/memtrack/tracemalloc.h @@ -0,0 +1,24 @@ +// Copyright Epic Games, Inc. All Rights Reserved. +#pragma once + +#include <zencore/memory/fmalloc.h> +#include <zencore/memory/memorytrace.h> + +namespace zen { + +class FTraceMalloc : public FMalloc +{ +public: + FTraceMalloc(FMalloc* InMalloc); + virtual ~FTraceMalloc(); + + virtual void* Malloc(SIZE_T Count, uint32 Alignment) override; + virtual void* Realloc(void* Original, SIZE_T Count, uint32 Alignment) override; + virtual void Free(void* Original) override; + + virtual void OnMallocInitialized() override { WrappedMalloc->OnMallocInitialized(); } + + FMalloc* WrappedMalloc; +}; + +} // namespace zen diff --git a/src/zencore/memtrack/vatrace.cpp b/src/zencore/memtrack/vatrace.cpp new file mode 100644 index 000000000..4dea27f1b --- /dev/null +++ b/src/zencore/memtrack/vatrace.cpp @@ -0,0 +1,361 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "vatrace.h" + +#if PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS + +# include <zencore/memory/memorytrace.h> + +# if (NTDDI_VERSION >= NTDDI_WIN10_RS4) +# pragma comment(lib, "mincore.lib") // VirtualAlloc2 +# endif + +namespace zen { + +//////////////////////////////////////////////////////////////////////////////// +class FTextSectionEditor +{ +public: + ~FTextSectionEditor(); + template<typename T> + T* Hook(T* Target, T* HookFunction); + +private: + struct FTrampolineBlock + { + FTrampolineBlock* Next; + uint32_t Size; + uint32_t Used; + }; + + static void* GetActualAddress(void* Function); + FTrampolineBlock* AllocateTrampolineBlock(void* Reference); + uint8_t* AllocateTrampoline(void* Reference, unsigned int Size); + void* HookImpl(void* Target, void* HookFunction); + FTrampolineBlock* HeadBlock = nullptr; +}; + +//////////////////////////////////////////////////////////////////////////////// +FTextSectionEditor::~FTextSectionEditor() +{ + for (FTrampolineBlock* Block = HeadBlock; Block != nullptr; Block = Block->Next) + { + DWORD Unused; + VirtualProtect(Block, Block->Size, PAGE_EXECUTE_READ, &Unused); + } + + FlushInstructionCache(GetCurrentProcess(), nullptr, 0); +} + +//////////////////////////////////////////////////////////////////////////////// +void* +FTextSectionEditor::GetActualAddress(void* Function) +{ + // Follow a jmp instruction (0xff/4 only for now) at function and returns + // where it would jmp to. + + uint8_t* Addr = (uint8_t*)Function; + int Offset = unsigned(Addr[0] & 0xf0) == 0x40; // REX prefix + if (Addr[Offset + 0] == 0xff && Addr[Offset + 1] == 0x25) + { + Addr += Offset; + Addr = *(uint8_t**)(Addr + 6 + *(uint32_t*)(Addr + 2)); + } + return Addr; +} + +//////////////////////////////////////////////////////////////////////////////// +FTextSectionEditor::FTrampolineBlock* +FTextSectionEditor::AllocateTrampolineBlock(void* Reference) +{ + static const size_t BlockSize = 0x10000; // 64KB is Windows' canonical granularity + + // Find the start of the main allocation that mapped Reference + MEMORY_BASIC_INFORMATION MemInfo; + VirtualQuery(Reference, &MemInfo, sizeof(MemInfo)); + auto* Ptr = (uint8_t*)(MemInfo.AllocationBase); + + // Step backwards one block at a time and try and allocate that address + while (true) + { + Ptr -= BlockSize; + if (VirtualAlloc(Ptr, BlockSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE) != nullptr) + { + break; + } + + uintptr_t Distance = uintptr_t(Reference) - uintptr_t(Ptr); + if (Distance >= 1ull << 31) + { + ZEN_ASSERT(!"Failed to allocate trampoline blocks for memory tracing hooks"); + } + } + + auto* Block = (FTrampolineBlock*)Ptr; + Block->Next = HeadBlock; + Block->Size = BlockSize; + Block->Used = sizeof(FTrampolineBlock); + HeadBlock = Block; + + return Block; +} + +//////////////////////////////////////////////////////////////////////////////// +uint8_t* +FTextSectionEditor::AllocateTrampoline(void* Reference, unsigned int Size) +{ + // Try and find a block that's within 2^31 bytes before Reference + FTrampolineBlock* Block; + for (Block = HeadBlock; Block != nullptr; Block = Block->Next) + { + uintptr_t Distance = uintptr_t(Reference) - uintptr_t(Block); + if (Distance < 1ull << 31) + { + break; + } + } + + // If we didn't find a block then we need to allocate a new one. + if (Block == nullptr) + { + Block = AllocateTrampolineBlock(Reference); + } + + // Allocate space for the trampoline. + uint32_t NextUsed = Block->Used + Size; + if (NextUsed > Block->Size) + { + // Block is full. We could allocate a new block here but as it is not + // expected that so many hooks will be made this path shouldn't happen + ZEN_ASSERT(!"Unable to allocate memory for memory tracing's hooks"); + } + + uint8_t* Out = (uint8_t*)Block + Block->Used; + Block->Used = NextUsed; + + return Out; +} + +//////////////////////////////////////////////////////////////////////////////// +template<typename T> +T* +FTextSectionEditor::Hook(T* Target, T* HookFunction) +{ + return (T*)HookImpl((void*)Target, (void*)HookFunction); +} + +//////////////////////////////////////////////////////////////////////////////// +void* +FTextSectionEditor::HookImpl(void* Target, void* HookFunction) +{ + Target = GetActualAddress(Target); + + // Very rudimentary x86_64 instruction length decoding that only supports op + // code ranges (0x80,0x8b) and (0x50,0x5f). Enough for simple prologues + uint8_t* __restrict Start = (uint8_t*)Target; + const uint8_t* Read = Start; + do + { + Read += (Read[0] & 0xf0) == 0x40; // REX prefix + uint8_t Inst = *Read++; + if (unsigned(Inst - 0x80) < 0x0cu) + { + uint8_t ModRm = *Read++; + Read += ((ModRm & 0300) < 0300) & ((ModRm & 0007) == 0004); // SIB + switch (ModRm & 0300) // Disp[8|32] + { + case 0100: + Read += 1; + break; + case 0200: + Read += 5; + break; + } + Read += (Inst == 0x83); + } + else if (unsigned(Inst - 0x50) >= 0x10u) + { + ZEN_ASSERT(!"Unknown instruction"); + } + } while (Read - Start < 6); + + static const int TrampolineSize = 24; + int PatchSize = int(Read - Start); + uint8_t* TrampolinePtr = AllocateTrampoline(Start, PatchSize + TrampolineSize); + + // Write the trampoline + *(void**)TrampolinePtr = HookFunction; + + uint8_t* PatchJmp = TrampolinePtr + sizeof(void*); + memcpy(PatchJmp, Start, PatchSize); + + PatchJmp += PatchSize; + *PatchJmp = 0xe9; + *(int32_t*)(PatchJmp + 1) = int32_t(intptr_t(Start + PatchSize) - intptr_t(PatchJmp)) - 5; + + // Need to make the text section writeable + DWORD ProtPrev; + uintptr_t ProtBase = uintptr_t(Target) & ~0x0fff; // 0x0fff is mask of VM page size + size_t ProtSize = ((ProtBase + 16 + 0x1000) & ~0x0fff) - ProtBase; // 16 is enough for one x86 instruction + VirtualProtect((void*)ProtBase, ProtSize, PAGE_EXECUTE_READWRITE, &ProtPrev); + + // Patch function to jmp to the hook + uint16_t* HookJmp = (uint16_t*)Target; + HookJmp[0] = 0x25ff; + *(int32_t*)(HookJmp + 1) = int32_t(intptr_t(TrampolinePtr) - intptr_t(HookJmp + 3)); + + // Put the protection back the way it was + VirtualProtect((void*)ProtBase, ProtSize, ProtPrev, &ProtPrev); + + return PatchJmp - PatchSize; +} + +////////////////////////////////////////////////////////////////////////// + +bool FVirtualWinApiHooks::bLight; +LPVOID(WINAPI* FVirtualWinApiHooks::VmAllocOrig)(LPVOID, SIZE_T, DWORD, DWORD); +LPVOID(WINAPI* FVirtualWinApiHooks::VmAllocExOrig)(HANDLE, LPVOID, SIZE_T, DWORD, DWORD); +# if (NTDDI_VERSION >= NTDDI_WIN10_RS4) +PVOID(WINAPI* FVirtualWinApiHooks::VmAlloc2Orig)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MEM_EXTENDED_PARAMETER*, ULONG); +# else +LPVOID(WINAPI* FVirtualWinApiHooks::VmAlloc2Orig)(HANDLE, LPVOID, SIZE_T, ULONG, ULONG, /*MEM_EXTENDED_PARAMETER* */ void*, ULONG); +# endif +BOOL(WINAPI* FVirtualWinApiHooks::VmFreeOrig)(LPVOID, SIZE_T, DWORD); +BOOL(WINAPI* FVirtualWinApiHooks::VmFreeExOrig)(HANDLE, LPVOID, SIZE_T, DWORD); + +void +FVirtualWinApiHooks::Initialize(bool bInLight) +{ + bLight = bInLight; + + FTextSectionEditor Editor; + + // Note that hooking alloc functions is done last as applying the hook can + // allocate some memory pages. + + VmFreeOrig = Editor.Hook(VirtualFree, &FVirtualWinApiHooks::VmFree); + VmFreeExOrig = Editor.Hook(VirtualFreeEx, &FVirtualWinApiHooks::VmFreeEx); + +# if ZEN_PLATFORM_WINDOWS +# if (NTDDI_VERSION >= NTDDI_WIN10_RS4) + { + VmAlloc2Orig = Editor.Hook(VirtualAlloc2, &FVirtualWinApiHooks::VmAlloc2); + } +# else // NTDDI_VERSION + { + VmAlloc2Orig = nullptr; + HINSTANCE DllInstance; + DllInstance = LoadLibrary(TEXT("kernelbase.dll")); + if (DllInstance != NULL) + { +# pragma warning(push) +# pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'FVirtualWinApiHooks::FnVirtualAlloc2' + VmAlloc2Orig = (FnVirtualAlloc2)GetProcAddress(DllInstance, "VirtualAlloc2"); +# pragma warning(pop) + FreeLibrary(DllInstance); + } + if (VmAlloc2Orig) + { + VmAlloc2Orig = Editor.Hook(VmAlloc2Orig, &FVirtualWinApiHooks::VmAlloc2); + } + } +# endif // NTDDI_VERSION +# endif // PLATFORM_WINDOWS + + VmAllocExOrig = Editor.Hook(VirtualAllocEx, &FVirtualWinApiHooks::VmAllocEx); + VmAllocOrig = Editor.Hook(VirtualAlloc, &FVirtualWinApiHooks::VmAlloc); +} + +//////////////////////////////////////////////////////////////////////////////// +LPVOID WINAPI +FVirtualWinApiHooks::VmAlloc(LPVOID Address, SIZE_T Size, DWORD Type, DWORD Protect) +{ + LPVOID Ret = VmAllocOrig(Address, Size, Type, Protect); + + // Track any reserve for now. Going forward we need events to differentiate reserves/commits and + // corresponding information on frees. + if (Ret != nullptr && ((Type & MEM_RESERVE) || ((Type & MEM_COMMIT) && Address == nullptr))) + { + MemoryTrace_Alloc((uint64_t)Ret, Size, 0, EMemoryTraceRootHeap::SystemMemory); + MemoryTrace_MarkAllocAsHeap((uint64_t)Ret, EMemoryTraceRootHeap::SystemMemory); + } + + return Ret; +} + +//////////////////////////////////////////////////////////////////////////////// +BOOL WINAPI +FVirtualWinApiHooks::VmFree(LPVOID Address, SIZE_T Size, DWORD Type) +{ + if (Type & MEM_RELEASE) + { + MemoryTrace_UnmarkAllocAsHeap((uint64_t)Address, EMemoryTraceRootHeap::SystemMemory); + MemoryTrace_Free((uint64_t)Address, EMemoryTraceRootHeap::SystemMemory); + } + + return VmFreeOrig(Address, Size, Type); +} + +//////////////////////////////////////////////////////////////////////////////// +LPVOID WINAPI +FVirtualWinApiHooks::VmAllocEx(HANDLE Process, LPVOID Address, SIZE_T Size, DWORD Type, DWORD Protect) +{ + LPVOID Ret = VmAllocExOrig(Process, Address, Size, Type, Protect); + + if (Process == GetCurrentProcess() && Ret != nullptr && ((Type & MEM_RESERVE) || ((Type & MEM_COMMIT) && Address == nullptr))) + { + MemoryTrace_Alloc((uint64_t)Ret, Size, 0, EMemoryTraceRootHeap::SystemMemory); + MemoryTrace_MarkAllocAsHeap((uint64_t)Ret, EMemoryTraceRootHeap::SystemMemory); + } + + return Ret; +} + +//////////////////////////////////////////////////////////////////////////////// +BOOL WINAPI +FVirtualWinApiHooks::VmFreeEx(HANDLE Process, LPVOID Address, SIZE_T Size, DWORD Type) +{ + if (Process == GetCurrentProcess() && (Type & MEM_RELEASE)) + { + MemoryTrace_UnmarkAllocAsHeap((uint64_t)Address, EMemoryTraceRootHeap::SystemMemory); + MemoryTrace_Free((uint64_t)Address, EMemoryTraceRootHeap::SystemMemory); + } + + return VmFreeExOrig(Process, Address, Size, Type); +} + +//////////////////////////////////////////////////////////////////////////////// +# if (NTDDI_VERSION >= NTDDI_WIN10_RS4) +PVOID WINAPI +FVirtualWinApiHooks::VmAlloc2(HANDLE Process, + PVOID BaseAddress, + SIZE_T Size, + ULONG Type, + ULONG PageProtection, + MEM_EXTENDED_PARAMETER* ExtendedParameters, + ULONG ParameterCount) +# else +LPVOID WINAPI +FVirtualWinApiHooks::VmAlloc2(HANDLE Process, + LPVOID BaseAddress, + SIZE_T Size, + ULONG Type, + ULONG PageProtection, + /*MEM_EXTENDED_PARAMETER* */ void* ExtendedParameters, + ULONG ParameterCount) +# endif +{ + LPVOID Ret = VmAlloc2Orig(Process, BaseAddress, Size, Type, PageProtection, ExtendedParameters, ParameterCount); + + if (Process == GetCurrentProcess() && Ret != nullptr && ((Type & MEM_RESERVE) || ((Type & MEM_COMMIT) && BaseAddress == nullptr))) + { + MemoryTrace_Alloc((uint64_t)Ret, Size, 0, EMemoryTraceRootHeap::SystemMemory); + MemoryTrace_MarkAllocAsHeap((uint64_t)Ret, EMemoryTraceRootHeap::SystemMemory); + } + + return Ret; +} + +} // namespace zen + +#endif // PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS diff --git a/src/zencore/memtrack/vatrace.h b/src/zencore/memtrack/vatrace.h new file mode 100644 index 000000000..59cc7fe97 --- /dev/null +++ b/src/zencore/memtrack/vatrace.h @@ -0,0 +1,61 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zenbase/zenbase.h> + +#if ZEN_PLATFORM_WINDOWS && !defined(PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS) +# define PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS 1 +#endif + +#ifndef PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS +# define PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS 0 +#endif + +#if PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS +# include <zencore/windows.h> + +namespace zen { + +class FVirtualWinApiHooks +{ +public: + static void Initialize(bool bInLight); + +private: + FVirtualWinApiHooks(); + static bool bLight; + static LPVOID WINAPI VmAlloc(LPVOID Address, SIZE_T Size, DWORD Type, DWORD Protect); + static LPVOID WINAPI VmAllocEx(HANDLE Process, LPVOID Address, SIZE_T Size, DWORD Type, DWORD Protect); +# if (NTDDI_VERSION >= NTDDI_WIN10_RS4) + static PVOID WINAPI VmAlloc2(HANDLE Process, + PVOID BaseAddress, + SIZE_T Size, + ULONG AllocationType, + ULONG PageProtection, + MEM_EXTENDED_PARAMETER* ExtendedParameters, + ULONG ParameterCount); + static PVOID(WINAPI* VmAlloc2Orig)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MEM_EXTENDED_PARAMETER*, ULONG); + typedef PVOID(__stdcall* FnVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MEM_EXTENDED_PARAMETER*, ULONG); +# else + static LPVOID WINAPI VmAlloc2(HANDLE Process, + LPVOID BaseAddress, + SIZE_T Size, + ULONG AllocationType, + ULONG PageProtection, + void* ExtendedParameters, + ULONG ParameterCount); + static LPVOID(WINAPI* VmAlloc2Orig)(HANDLE, LPVOID, SIZE_T, ULONG, ULONG, /*MEM_EXTENDED_PARAMETER* */ void*, ULONG); + typedef LPVOID(__stdcall* FnVirtualAlloc2)(HANDLE, LPVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG); +# endif + static BOOL WINAPI VmFree(LPVOID Address, SIZE_T Size, DWORD Type); + static BOOL WINAPI VmFreeEx(HANDLE Process, LPVOID Address, SIZE_T Size, DWORD Type); + static LPVOID(WINAPI* VmAllocOrig)(LPVOID, SIZE_T, DWORD, DWORD); + static LPVOID(WINAPI* VmAllocExOrig)(HANDLE, LPVOID, SIZE_T, DWORD, DWORD); + static BOOL(WINAPI* VmFreeOrig)(LPVOID, SIZE_T, DWORD); + static BOOL(WINAPI* VmFreeExOrig)(HANDLE, LPVOID, SIZE_T, DWORD); +}; + +} // namespace zen + +#endif |