diff options
| author | Stefan Boberg <[email protected]> | 2024-11-25 09:56:23 +0100 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2024-11-25 09:56:23 +0100 |
| commit | 8b8de92e51db4cc4c1727712c736dcba5f79d369 (patch) | |
| tree | 1f58edaaad389837a7652daebab246125762240e /src/zencore/memtrack | |
| parent | 5.5.13 (diff) | |
| download | zen-8b8de92e51db4cc4c1727712c736dcba5f79d369.tar.xz zen-8b8de92e51db4cc4c1727712c736dcba5f79d369.zip | |
Insights-compatible memory tracking (#214)
This change introduces support for tracing of memory allocation activity. The code is ported from UE5, and Unreal Insights can be used to analyze the output. This is currently only fully supported on Windows, but will be extended to Mac/Linux in the near future.
To activate full memory tracking, pass `--trace=memory` on the commandline alongside `--tracehost=<ip>` or `-tracefile=<path>`. For more control over how much detail is traced you can instead pass some combination of `callstack`, `memtag`, `memalloc` instead. In practice, `--trace=memory` is an alias for `--trace=callstack,memtag,memalloc`). For convenience we also support `--trace=memory_light` which omits call stacks.
This change also introduces multiple memory allocators, which may be selected via command-line option `--malloc=<allocator>`:
* `mimalloc` - mimalloc (default, same as before)
* `rpmalloc` - rpmalloc is another high performance allocator for multithreaded applications which may be a better option than mimalloc (to be evaluated). Due to toolchain limitations this is currently only supported on Windows.
* `stomp` - an allocator intended to be used during development/debugging to help track down memory issues such as use-after-free or out-of-bounds access. Currently only supported on Windows.
* `ansi` - fallback to default system allocator
Diffstat (limited to 'src/zencore/memtrack')
| -rw-r--r-- | src/zencore/memtrack/callstacktrace.cpp | 1059 | ||||
| -rw-r--r-- | src/zencore/memtrack/callstacktrace.h | 151 | ||||
| -rw-r--r-- | src/zencore/memtrack/growonlylockfreehash.h | 255 | ||||
| -rw-r--r-- | src/zencore/memtrack/memorytrace.cpp | 829 | ||||
| -rw-r--r-- | src/zencore/memtrack/moduletrace.cpp | 296 | ||||
| -rw-r--r-- | src/zencore/memtrack/moduletrace.h | 11 | ||||
| -rw-r--r-- | src/zencore/memtrack/moduletrace_events.cpp | 16 | ||||
| -rw-r--r-- | src/zencore/memtrack/moduletrace_events.h | 27 | ||||
| -rw-r--r-- | src/zencore/memtrack/platformtls.h | 107 | ||||
| -rw-r--r-- | src/zencore/memtrack/tagtrace.cpp | 237 | ||||
| -rw-r--r-- | src/zencore/memtrack/tracemalloc.h | 24 | ||||
| -rw-r--r-- | src/zencore/memtrack/vatrace.cpp | 361 | ||||
| -rw-r--r-- | src/zencore/memtrack/vatrace.h | 61 |
13 files changed, 3434 insertions, 0 deletions
diff --git a/src/zencore/memtrack/callstacktrace.cpp b/src/zencore/memtrack/callstacktrace.cpp new file mode 100644 index 000000000..d860c05d1 --- /dev/null +++ b/src/zencore/memtrack/callstacktrace.cpp @@ -0,0 +1,1059 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "callstacktrace.h" + +#include <zenbase/zenbase.h> +#include <zencore/string.h> + +#if UE_CALLSTACK_TRACE_ENABLED + +namespace zen { + +// Platform implementations of back tracing +//////////////////////////////////////////////////////////////////////////////// +void CallstackTrace_CreateInternal(FMalloc*); +void CallstackTrace_InitializeInternal(); + +//////////////////////////////////////////////////////////////////////////////// +UE_TRACE_CHANNEL_DEFINE(CallstackChannel) +UE_TRACE_EVENT_DEFINE(Memory, CallstackSpec) + +uint32 GCallStackTracingTlsSlotIndex = FPlatformTLS::InvalidTlsSlot; + +//////////////////////////////////////////////////////////////////////////////// +void +CallstackTrace_Create(class FMalloc* InMalloc) +{ + static auto InitOnce = [&] { + CallstackTrace_CreateInternal(InMalloc); + return true; + }(); +} + +//////////////////////////////////////////////////////////////////////////////// +void +CallstackTrace_Initialize() +{ + GCallStackTracingTlsSlotIndex = FPlatformTLS::AllocTlsSlot(); + + static auto InitOnce = [&] { + CallstackTrace_InitializeInternal(); + return true; + }(); +} + +} // namespace zen + +#endif + +#if ZEN_PLATFORM_WINDOWS +# include "moduletrace.h" + +# include "growonlylockfreehash.h" + +# include <zencore/scopeguard.h> +# include <zencore/thread.h> +# include <zencore/trace.h> + +# include <atomic> +# include <span> + +# include <zencore/windows.h> + +ZEN_THIRD_PARTY_INCLUDES_START +# include <winnt.h> +# include <winternl.h> +ZEN_THIRD_PARTY_INCLUDES_END + +# ifndef UE_CALLSTACK_TRACE_FULL_CALLSTACKS +# define UE_CALLSTACK_TRACE_FULL_CALLSTACKS 0 +# endif + +// 0=off, 1=stats, 2=validation, 3=truth_compare +# define BACKTRACE_DBGLVL 0 + +# define BACKTRACE_LOCK_FREE (1 && (BACKTRACE_DBGLVL == 0)) + +static bool GModulesAreInitialized = false; + +// This implementation is using unwind tables which is results in very fast +// stack walking. In some cases this is not suitable, and we then fall back +// to the standard stack walking implementation. +# if !defined(UE_CALLSTACK_TRACE_USE_UNWIND_TABLES) +# if defined(__clang__) +# define UE_CALLSTACK_TRACE_USE_UNWIND_TABLES 0 +# else +# define UE_CALLSTACK_TRACE_USE_UNWIND_TABLES 1 +# endif +# endif + +// stacktrace tracking using clang intrinsic __builtin_frame_address(0) doesn't work correctly on all windows platforms +# if !defined(PLATFORM_USE_CALLSTACK_ADDRESS_POINTER) +# if defined(__clang__) +# define PLATFORM_USE_CALLSTACK_ADDRESS_POINTER 0 +# else +# define PLATFORM_USE_CALLSTACK_ADDRESS_POINTER 1 +# endif +# endif + +# if !defined(UE_CALLSTACK_TRACE_RESERVE_MB) +// Initial size of the known set of callstacks +# define UE_CALLSTACK_TRACE_RESERVE_MB 8 // ~500k callstacks +# endif + +# if !defined(UE_CALLSTACK_TRACE_RESERVE_GROWABLE) +// If disabled the known set will not grow. New callstacks will not be +// reported if the set is full +# define UE_CALLSTACK_TRACE_RESERVE_GROWABLE 1 +# endif + +namespace zen { + +class FMalloc; + +UE_TRACE_CHANNEL_EXTERN(CallstackChannel) + +UE_TRACE_EVENT_BEGIN_EXTERN(Memory, CallstackSpec, NoSync) + UE_TRACE_EVENT_FIELD(uint32, CallstackId) + UE_TRACE_EVENT_FIELD(uint64[], Frames) +UE_TRACE_EVENT_END() + +class FCallstackTracer +{ +public: + struct FBacktraceEntry + { + uint64_t Hash = 0; + uint32_t FrameCount = 0; + uint64_t* Frames; + }; + + FCallstackTracer(FMalloc* InMalloc) : KnownSet(InMalloc) {} + + uint32_t AddCallstack(const FBacktraceEntry& Entry) + { + bool bAlreadyAdded = false; + + // Our set implementation doesn't allow for zero entries (zero represents an empty element + // in the hash table), so if we get one due to really bad luck in our 64-bit Id calculation, + // treat it as a "1" instead, for purposes of tracking if we've seen that callstack. + const uint64_t Hash = FMath::Max(Entry.Hash, 1ull); + uint32_t Id; + KnownSet.Find(Hash, &Id, &bAlreadyAdded); + if (!bAlreadyAdded) + { + Id = CallstackIdCounter.fetch_add(1, std::memory_order_relaxed); + // On the first callstack reserve memory up front + if (Id == 1) + { + KnownSet.Reserve(InitialReserveCount); + } +# if !UE_CALLSTACK_TRACE_RESERVE_GROWABLE + // If configured as not growable, start returning unknown id's when full. + if (Id >= InitialReserveCount) + { + return 0; + } +# endif + KnownSet.Emplace(Hash, Id); + UE_TRACE_LOG(Memory, CallstackSpec, CallstackChannel) + << CallstackSpec.CallstackId(Id) << CallstackSpec.Frames(Entry.Frames, Entry.FrameCount); + } + + return Id; + } + +private: + struct FEncounteredCallstackSetEntry + { + std::atomic_uint64_t Key; + std::atomic_uint32_t Value; + + inline uint64 GetKey() const { return Key.load(std::memory_order_relaxed); } + inline uint32_t GetValue() const { return Value.load(std::memory_order_relaxed); } + inline bool IsEmpty() const { return Key.load(std::memory_order_relaxed) == 0; } + inline void SetKeyValue(uint64_t InKey, uint32_t InValue) + { + Value.store(InValue, std::memory_order_release); + Key.store(InKey, std::memory_order_relaxed); + } + static inline uint32_t KeyHash(uint64_t Key) { return static_cast<uint32_t>(Key); } + static inline void ClearEntries(FEncounteredCallstackSetEntry* Entries, int32_t EntryCount) + { + memset(Entries, 0, EntryCount * sizeof(FEncounteredCallstackSetEntry)); + } + }; + + typedef TGrowOnlyLockFreeHash<FEncounteredCallstackSetEntry, uint64_t, uint32_t> FEncounteredCallstackSet; + + constexpr static uint32_t InitialReserveBytes = UE_CALLSTACK_TRACE_RESERVE_MB * 1024 * 1024; + constexpr static uint32_t InitialReserveCount = InitialReserveBytes / sizeof(FEncounteredCallstackSetEntry); + + FEncounteredCallstackSet KnownSet; + std::atomic_uint32_t CallstackIdCounter{1}; // 0 is reserved for "unknown callstack" +}; + +# if UE_CALLSTACK_TRACE_USE_UNWIND_TABLES + +/* + * Windows' x64 binaries contain a ".pdata" section that describes the location + * and size of its functions and details on how to unwind them. The unwind + * information includes descriptions about a function's stack frame size and + * the non-volatile registers it pushes onto the stack. From this we can + * calculate where a call instruction wrote its return address. This is enough + * to walk the callstack and by caching this information it can be done + * efficiently. + * + * Some functions need a variable amount of stack (such as those that use + * alloc() for example) will use a frame pointer. Frame pointers involve saving + * and restoring the stack pointer in the function's prologue/epilogue. This + * frees the function up to modify the stack pointer arbitrarily. This + * significantly complicates establishing where a return address is, so this + * pdata scheme of walking the stack just doesn't support functions like this. + * Walking stops if it encounters such a function. Fortunately there are + * usually very few such functions, saving us from having to read and track + * non-volatile registers which adds a significant amount of work. + * + * A further optimisation is to to assume we are only interested methods that + * are part of engine or game code. As such we only build lookup tables for + * such modules and never accept OS or third party modules. Backtracing stops + * if an address is encountered which doesn't map to a known module. + */ + +//////////////////////////////////////////////////////////////////////////////// +static uint32_t +AddressToId(uintptr_t Address) +{ + return uint32_t(Address >> 16); +} + +static uintptr_t +IdToAddress(uint32_t Id) +{ + return static_cast<uint32_t>(uintptr_t(Id) << 16); +} + +struct FIdPredicate +{ + template<class T> + bool operator()(uint32_t Id, const T& Item) const + { + return Id < Item.Id; + } + template<class T> + bool operator()(const T& Item, uint32_t Id) const + { + return Item.Id < Id; + } +}; + +//////////////////////////////////////////////////////////////////////////////// +struct FUnwindInfo +{ + uint8_t Version : 3; + uint8_t Flags : 5; + uint8_t PrologBytes; + uint8_t NumUnwindCodes; + uint8_t FrameReg : 4; + uint8_t FrameRspBias : 4; +}; + +# pragma warning(push) +# pragma warning(disable : 4200) +struct FUnwindCode +{ + uint8_t PrologOffset; + uint8_t OpCode : 4; + uint8_t OpInfo : 4; + uint16_t Params[]; +}; +# pragma warning(pop) + +enum +{ + UWOP_PUSH_NONVOL = 0, // 1 node + UWOP_ALLOC_LARGE = 1, // 2 or 3 nodes + UWOP_ALLOC_SMALL = 2, // 1 node + UWOP_SET_FPREG = 3, // 1 node + UWOP_SAVE_NONVOL = 4, // 2 nodes + UWOP_SAVE_NONVOL_FAR = 5, // 3 nodes + UWOP_SAVE_XMM128 = 8, // 2 nodes + UWOP_SAVE_XMM128_FAR = 9, // 3 nodes + UWOP_PUSH_MACHFRAME = 10, // 1 node +}; + +//////////////////////////////////////////////////////////////////////////////// +class FBacktracer +{ +public: + FBacktracer(FMalloc* InMalloc); + ~FBacktracer(); + static FBacktracer* Get(); + void AddModule(uintptr_t Base, const char16_t* Name); + void RemoveModule(uintptr_t Base); + uint32_t GetBacktraceId(void* AddressOfReturnAddress); + +private: + struct FFunction + { + uint32_t Id; + int32_t RspBias; +# if BACKTRACE_DBGLVL >= 2 + uint32_t Size; + const FUnwindInfo* UnwindInfo; +# endif + }; + + struct FModule + { + uint32_t Id; + uint32_t IdSize; + uint32_t NumFunctions; +# if BACKTRACE_DBGLVL >= 1 + uint16 NumFpTypes; + // uint16 *padding* +# else + // uint32_t *padding* +# endif + FFunction* Functions; + }; + + struct FLookupState + { + FModule Module; + }; + + struct FFunctionLookupSetEntry + { + // Bottom 48 bits are key (pointer), top 16 bits are data (RSP bias for function) + std::atomic_uint64_t Data; + + inline uint64_t GetKey() const { return Data.load(std::memory_order_relaxed) & 0xffffffffffffull; } + inline int32_t GetValue() const { return static_cast<int64_t>(Data.load(std::memory_order_relaxed)) >> 48; } + inline bool IsEmpty() const { return Data.load(std::memory_order_relaxed) == 0; } + inline void SetKeyValue(uint64_t Key, int32_t Value) + { + Data.store(Key | (static_cast<int64_t>(Value) << 48), std::memory_order_relaxed); + } + static inline uint32_t KeyHash(uint64_t Key) + { + // 64 bit pointer to 32 bit hash + Key = (~Key) + (Key << 21); + Key = Key ^ (Key >> 24); + Key = Key * 265; + Key = Key ^ (Key >> 14); + Key = Key * 21; + Key = Key ^ (Key >> 28); + Key = Key + (Key << 31); + return static_cast<uint32_t>(Key); + } + static void ClearEntries(FFunctionLookupSetEntry* Entries, int32_t EntryCount) + { + memset(Entries, 0, EntryCount * sizeof(FFunctionLookupSetEntry)); + } + }; + typedef TGrowOnlyLockFreeHash<FFunctionLookupSetEntry, uint64_t, int32_t> FFunctionLookupSet; + + const FFunction* LookupFunction(uintptr_t Address, FLookupState& State) const; + static FBacktracer* Instance; + mutable zen::RwLock Lock; + FModule* Modules; + int32_t ModulesNum; + int32_t ModulesCapacity; + FMalloc* Malloc; + FCallstackTracer CallstackTracer; +# if BACKTRACE_LOCK_FREE + mutable FFunctionLookupSet FunctionLookups; + mutable bool bReentranceCheck = false; +# endif +# if BACKTRACE_DBGLVL >= 1 + mutable uint32_t NumFpTruncations = 0; + mutable uint32_t TotalFunctions = 0; +# endif +}; + +//////////////////////////////////////////////////////////////////////////////// +FBacktracer* FBacktracer::Instance = nullptr; + +//////////////////////////////////////////////////////////////////////////////// +FBacktracer::FBacktracer(FMalloc* InMalloc) +: Malloc(InMalloc) +, CallstackTracer(InMalloc) +# if BACKTRACE_LOCK_FREE +, FunctionLookups(InMalloc) +# endif +{ +# if BACKTRACE_LOCK_FREE + FunctionLookups.Reserve(512 * 1024); // 4 MB +# endif + ModulesCapacity = 8; + ModulesNum = 0; + Modules = (FModule*)Malloc->Malloc(sizeof(FModule) * ModulesCapacity); + + Instance = this; +} + +//////////////////////////////////////////////////////////////////////////////// +FBacktracer::~FBacktracer() +{ + std::span<FModule> ModulesView(Modules, ModulesNum); + for (FModule& Module : ModulesView) + { + Malloc->Free(Module.Functions); + } +} + +//////////////////////////////////////////////////////////////////////////////// +FBacktracer* +FBacktracer::Get() +{ + return Instance; +} + +bool GFullBacktraces = false; + +//////////////////////////////////////////////////////////////////////////////// +void +FBacktracer::AddModule(uintptr_t ModuleBase, const char16_t* Name) +{ + if (!GFullBacktraces) + { + const size_t NameLen = StringLength(Name); + if (!(NameLen > 4 && StringEquals(Name + NameLen - 4, u".exe"))) + { + return; + } + } + + const auto* DosHeader = (IMAGE_DOS_HEADER*)ModuleBase; + const auto* NtHeader = (IMAGE_NT_HEADERS*)(ModuleBase + DosHeader->e_lfanew); + const IMAGE_FILE_HEADER* FileHeader = &(NtHeader->FileHeader); + + uint32_t NumSections = FileHeader->NumberOfSections; + const auto* Sections = (IMAGE_SECTION_HEADER*)(uintptr_t(&(NtHeader->OptionalHeader)) + FileHeader->SizeOfOptionalHeader); + + // Find ".pdata" section + uintptr_t PdataBase = 0; + uintptr_t PdataEnd = 0; + for (uint32_t i = 0; i < NumSections; ++i) + { + const IMAGE_SECTION_HEADER* Section = Sections + i; + if (*(uint64_t*)(Section->Name) == + 0x61'74'61'64'70'2eull) // Sections names are eight bytes and zero padded. This constant is '.pdata' + { + PdataBase = ModuleBase + Section->VirtualAddress; + PdataEnd = PdataBase + Section->SizeOfRawData; + break; + } + } + + if (PdataBase == 0) + { + return; + } + + // Count the number of functions. The assumption here is that if we have got this far then there is at least one function + uint32_t NumFunctions = uint32_t(PdataEnd - PdataBase) / sizeof(RUNTIME_FUNCTION); + if (NumFunctions == 0) + { + return; + } + + const auto* FunctionTables = (RUNTIME_FUNCTION*)PdataBase; + do + { + const RUNTIME_FUNCTION* Function = FunctionTables + NumFunctions - 1; + if (uint32_t(Function->BeginAddress) < uint32_t(Function->EndAddress)) + { + break; + } + + --NumFunctions; + } while (NumFunctions != 0); + + // Allocate some space for the module's function-to-frame-size table + auto* OutTable = (FFunction*)Malloc->Malloc(sizeof(FFunction) * NumFunctions); + FFunction* OutTableCursor = OutTable; + + // Extract frame size for each function from pdata's unwind codes. + uint32_t NumFpFuncs = 0; + for (uint32_t i = 0; i < NumFunctions; ++i) + { + const RUNTIME_FUNCTION* FunctionTable = FunctionTables + i; + + uintptr_t UnwindInfoAddr = ModuleBase + FunctionTable->UnwindInfoAddress; + const auto* UnwindInfo = (FUnwindInfo*)UnwindInfoAddr; + + if (UnwindInfo->Version != 1) + { + /* some v2s have been seen in msvc. Always seem to be assembly + * routines (memset, memcpy, etc) */ + continue; + } + + int32_t FpInfo = 0; + int32_t RspBias = 0; + +# if BACKTRACE_DBGLVL >= 2 + uint32_t PrologVerify = UnwindInfo->PrologBytes; +# endif + + const auto* Code = (FUnwindCode*)(UnwindInfo + 1); + const auto* EndCode = Code + UnwindInfo->NumUnwindCodes; + while (Code < EndCode) + { +# if BACKTRACE_DBGLVL >= 2 + if (Code->PrologOffset > PrologVerify) + { + PLATFORM_BREAK(); + } + PrologVerify = Code->PrologOffset; +# endif + + switch (Code->OpCode) + { + case UWOP_PUSH_NONVOL: + RspBias += 8; + Code += 1; + break; + + case UWOP_ALLOC_LARGE: + if (Code->OpInfo) + { + RspBias += *(uint32_t*)(Code->Params); + Code += 3; + } + else + { + RspBias += Code->Params[0] * 8; + Code += 2; + } + break; + + case UWOP_ALLOC_SMALL: + RspBias += (Code->OpInfo * 8) + 8; + Code += 1; + break; + + case UWOP_SET_FPREG: + // Function will adjust RSP (e.g. through use of alloca()) so it + // uses a frame pointer register. There's instructions like; + // + // push FRAME_REG + // lea FRAME_REG, [rsp + (FRAME_RSP_BIAS * 16)] + // ... + // add rsp, rax + // ... + // sub rsp, FRAME_RSP_BIAS * 16 + // pop FRAME_REG + // ret + // + // To recover the stack frame we would need to track non-volatile + // registers which adds a lot of overhead for a small subset of + // functions. Instead we'll end backtraces at these functions. + + // MSB is set to detect variable sized frames that we can't proceed + // past when back-tracing. + NumFpFuncs++; + FpInfo |= 0x80000000 | (uint32_t(UnwindInfo->FrameReg) << 27) | (uint32_t(UnwindInfo->FrameRspBias) << 23); + Code += 1; + break; + + case UWOP_PUSH_MACHFRAME: + RspBias = Code->OpInfo ? 48 : 40; + Code += 1; + break; + + case UWOP_SAVE_NONVOL: + Code += 2; + break; /* saves are movs instead of pushes */ + case UWOP_SAVE_NONVOL_FAR: + Code += 3; + break; + case UWOP_SAVE_XMM128: + Code += 2; + break; + case UWOP_SAVE_XMM128_FAR: + Code += 3; + break; + + default: +# if BACKTRACE_DBGLVL >= 2 + PLATFORM_BREAK(); +# endif + break; + } + } + + // "Chained" simply means that multiple RUNTIME_FUNCTIONs pertains to a + // single actual function in the .text segment. + bool bIsChained = (UnwindInfo->Flags & UNW_FLAG_CHAININFO); + + RspBias /= sizeof(void*); // stack push/popds in units of one machine word + RspBias += !bIsChained; // and one extra push for the ret address + RspBias |= FpInfo; // pack in details about possible frame pointer + + if (bIsChained) + { + OutTableCursor[-1].RspBias += RspBias; +# if BACKTRACE_DBGLVL >= 2 + OutTableCursor[-1].Size += (FunctionTable->EndAddress - FunctionTable->BeginAddress); +# endif + } + else + { + *OutTableCursor = { + FunctionTable->BeginAddress, + RspBias, +# if BACKTRACE_DBGLVL >= 2 + FunctionTable->EndAddress - FunctionTable->BeginAddress, + UnwindInfo, +# endif + }; + + ++OutTableCursor; + } + } + + uintptr_t ModuleSize = NtHeader->OptionalHeader.SizeOfImage; + ModuleSize += 0xffff; // to align up to next 64K page. it'll get shifted by AddressToId() + + FModule Module = { + AddressToId(ModuleBase), + AddressToId(ModuleSize), + uint32_t(uintptr_t(OutTableCursor - OutTable)), +# if BACKTRACE_DBGLVL >= 1 + uint16(NumFpFuncs), +# endif + OutTable, + }; + + { + zen::RwLock::ExclusiveLockScope _(Lock); + + if (ModulesNum + 1 > ModulesCapacity) + { + ModulesCapacity += 8; + Modules = (FModule*)Malloc->Realloc(Modules, sizeof(FModule) * ModulesCapacity); + } + Modules[ModulesNum++] = Module; + + std::sort(Modules, Modules + ModulesNum, [](const FModule& A, const FModule& B) { return A.Id < B.Id; }); + } + +# if BACKTRACE_DBGLVL >= 1 + NumFpTruncations += NumFpFuncs; + TotalFunctions += NumFunctions; +# endif +} + +//////////////////////////////////////////////////////////////////////////////// +void +FBacktracer::RemoveModule(uintptr_t ModuleBase) +{ + // When Windows' RequestExit() is called it hard-terminates all threads except + // the main thread and then proceeds to unload the process' DLLs. This hard + // thread termination can result is dangling locked locks. Not an issue as + // the rule is "do not do anything multithreaded in DLL load/unload". And here + // we are, taking write locks during DLL unload which is, quite unsurprisingly, + // deadlocking. In reality tracking Windows' DLL unloads doesn't tell us + // anything due to how DLLs and processes' address spaces work. So we will... +# if defined PLATFORM_WINDOWS + ZEN_UNUSED(ModuleBase); + + return; +# else + + zen::RwLock::ExclusiveLockScope _(Lock); + + uint32_t ModuleId = AddressToId(ModuleBase); + TArrayView<FModule> ModulesView(Modules, ModulesNum); + int32_t Index = Algo::LowerBound(ModulesView, ModuleId, FIdPredicate()); + if (Index >= ModulesNum) + { + return; + } + + const FModule& Module = Modules[Index]; + if (Module.Id != ModuleId) + { + return; + } + +# if BACKTRACE_DBGLVL >= 1 + NumFpTruncations -= Module.NumFpTypes; + TotalFunctions -= Module.NumFunctions; +# endif + + // no code should be executing at this point so we can safely free the + // table knowing know one is looking at it. + Malloc->Free(Module.Functions); + + for (SIZE_T i = Index; i < ModulesNum; i++) + { + Modules[i] = Modules[i + 1]; + } + + --ModulesNum; +# endif +} + +//////////////////////////////////////////////////////////////////////////////// +const FBacktracer::FFunction* +FBacktracer::LookupFunction(uintptr_t Address, FLookupState& State) const +{ + // This function caches the previous module look up. The theory here is that + // a series of return address in a backtrace often cluster around one module + + FIdPredicate IdPredicate; + + // Look up the module that Address belongs to. + uint32_t AddressId = AddressToId(Address); + if ((AddressId - State.Module.Id) >= State.Module.IdSize) + { + auto FindIt = std::upper_bound(Modules, Modules + ModulesNum, AddressId, IdPredicate); + + if (FindIt == Modules) + { + return nullptr; + } + + State.Module = *--FindIt; + } + + // Check that the address is within the address space of the best-found module + const FModule* Module = &(State.Module); + if ((AddressId - Module->Id) >= Module->IdSize) + { + return nullptr; + } + + // Now we've a module we have a table of functions and their stack sizes so + // we can get the frame size for Address + uint32_t FuncId = uint32_t(Address - IdToAddress(Module->Id)); + std::span<FFunction> FuncsView(Module->Functions, Module->NumFunctions); + auto FindIt = std::upper_bound(begin(FuncsView), end(FuncsView), FuncId, IdPredicate); + if (FindIt == begin(FuncsView)) + { + return nullptr; + } + + const FFunction* Function = &(*--FindIt); +# if BACKTRACE_DBGLVL >= 2 + if ((FuncId - Function->Id) >= Function->Size) + { + PLATFORM_BREAK(); + return nullptr; + } +# endif + return Function; +} + +//////////////////////////////////////////////////////////////////////////////// +uint32_t +FBacktracer::GetBacktraceId(void* AddressOfReturnAddress) +{ + FLookupState LookupState = {}; + uint64_t Frames[256]; + + uintptr_t* StackPointer = (uintptr_t*)AddressOfReturnAddress; + +# if BACKTRACE_DBGLVL >= 3 + uintptr_t TruthBacktrace[1024]; + uint32_t NumTruth = RtlCaptureStackBackTrace(0, 1024, (void**)TruthBacktrace, nullptr); + uintptr_t* TruthCursor = TruthBacktrace; + for (; *TruthCursor != *StackPointer; ++TruthCursor) + ; +# endif + +# if BACKTRACE_DBGLVL >= 2 + struct + { + void* Sp; + void* Ip; + const FFunction* Function; + } Backtrace[1024] = {}; + uint32_t NumBacktrace = 0; +# endif + + uint64_t BacktraceHash = 0; + uint32_t FrameIdx = 0; + +# if BACKTRACE_LOCK_FREE + // When running lock free, we defer the lock until a lock free function lookup fails + bool Locked = false; +# else + FScopeLock _(&Lock); +# endif + do + { + uintptr_t RetAddr = *StackPointer; + + Frames[FrameIdx++] = RetAddr; + + // This is a simple order-dependent LCG. Should be sufficient enough + BacktraceHash += RetAddr; + BacktraceHash *= 0x30be8efa499c249dull; + +# if BACKTRACE_LOCK_FREE + int32_t RspBias; + bool bIsAlreadyInTable; + FunctionLookups.Find(RetAddr, &RspBias, &bIsAlreadyInTable); + if (bIsAlreadyInTable) + { + if (RspBias < 0) + { + break; + } + else + { + StackPointer += RspBias; + continue; + } + } + if (!Locked) + { + Lock.AcquireExclusive(); + Locked = true; + + // If FunctionLookups.Emplace triggers a reallocation, it can cause an infinite recursion + // when the allocation reenters the stack trace code. We need to break out of the recursion + // in that case, and let the allocation complete, with the assumption that we don't care + // about call stacks for internal allocations in the memory reporting system. The "Lock()" + // above will only fall through with this flag set if it's a second lock in the same thread. + if (bReentranceCheck) + { + break; + } + } +# endif // BACKTRACE_LOCK_FREE + + const FFunction* Function = LookupFunction(RetAddr, LookupState); + if (Function == nullptr) + { +# if BACKTRACE_LOCK_FREE + // LookupFunction fails when modules are not yet registered. In this case, we do not want the address + // to be added to the lookup map, but to retry the lookup later when modules are properly registered. + if (GModulesAreInitialized) + { + bReentranceCheck = true; + auto OnExit = zen::MakeGuard([&] { bReentranceCheck = false; }); + FunctionLookups.Emplace(RetAddr, -1); + } +# endif + break; + } + +# if BACKTRACE_LOCK_FREE + { + // This conversion improves probing performance for the hash set. Additionally it is critical + // to avoid incorrect values when RspBias is compressed into 16 bits in the hash map. + int32_t StoreBias = Function->RspBias < 0 ? -1 : Function->RspBias; + bReentranceCheck = true; + auto OnExit = zen::MakeGuard([&] { bReentranceCheck = false; }); + FunctionLookups.Emplace(RetAddr, StoreBias); + } +# endif + +# if BACKTRACE_DBGLVL >= 2 + if (NumBacktrace < 1024) + { + Backtrace[NumBacktrace++] = { + StackPointer, + (void*)RetAddr, + Function, + }; + } +# endif + + if (Function->RspBias < 0) + { + // This is a frame with a variable-sized stack pointer. We don't + // track enough information to proceed. +# if BACKTRACE_DBGLVL >= 1 + NumFpTruncations++; +# endif + break; + } + + StackPointer += Function->RspBias; + } + // Trunkate callstacks longer than MaxStackDepth + while (*StackPointer && FrameIdx < ZEN_ARRAY_COUNT(Frames)); + + // Build the backtrace entry for submission + FCallstackTracer::FBacktraceEntry BacktraceEntry; + BacktraceEntry.Hash = BacktraceHash; + BacktraceEntry.FrameCount = FrameIdx; + BacktraceEntry.Frames = Frames; + +# if BACKTRACE_DBGLVL >= 3 + for (uint32_t i = 0; i < NumBacktrace; ++i) + { + if ((void*)TruthCursor[i] != Backtrace[i].Ip) + { + PLATFORM_BREAK(); + break; + } + } +# endif + +# if BACKTRACE_LOCK_FREE + if (Locked) + { + Lock.ReleaseExclusive(); + } +# endif + // Add to queue to be processed. This might block until there is room in the + // queue (i.e. the processing thread has caught up processing). + return CallstackTracer.AddCallstack(BacktraceEntry); +} +} + +# else // UE_CALLSTACK_TRACE_USE_UNWIND_TABLES + +namespace zen { + + //////////////////////////////////////////////////////////////////////////////// + class FBacktracer + { + public: + FBacktracer(FMalloc* InMalloc); + ~FBacktracer(); + static FBacktracer* Get(); + inline uint32_t GetBacktraceId(void* AddressOfReturnAddress); + uint32_t GetBacktraceId(uint64_t ReturnAddress); + void AddModule(uintptr_t Base, const char16_t* Name) {} + void RemoveModule(uintptr_t Base) {} + + private: + static FBacktracer* Instance; + FMalloc* Malloc; + FCallstackTracer CallstackTracer; + }; + + //////////////////////////////////////////////////////////////////////////////// + FBacktracer* FBacktracer::Instance = nullptr; + + //////////////////////////////////////////////////////////////////////////////// + FBacktracer::FBacktracer(FMalloc* InMalloc) : Malloc(InMalloc), CallstackTracer(InMalloc) { Instance = this; } + + //////////////////////////////////////////////////////////////////////////////// + FBacktracer::~FBacktracer() {} + + //////////////////////////////////////////////////////////////////////////////// + FBacktracer* FBacktracer::Get() { return Instance; } + + //////////////////////////////////////////////////////////////////////////////// + uint32_t FBacktracer::GetBacktraceId(void* AddressOfReturnAddress) + { + const uint64_t ReturnAddress = *(uint64_t*)AddressOfReturnAddress; + return GetBacktraceId(ReturnAddress); + } + + //////////////////////////////////////////////////////////////////////////////// + uint32_t FBacktracer::GetBacktraceId(uint64_t ReturnAddress) + { +# if !UE_BUILD_SHIPPING + uint64_t StackFrames[256]; + int32_t NumStackFrames = FPlatformStackWalk::CaptureStackBackTrace(StackFrames, UE_ARRAY_COUNT(StackFrames)); + if (NumStackFrames > 0) + { + FCallstackTracer::FBacktraceEntry BacktraceEntry; + uint64_t BacktraceId = 0; + uint32_t FrameIdx = 0; + bool bUseAddress = false; + for (int32_t Index = 0; Index < NumStackFrames; Index++) + { + if (!bUseAddress) + { + // start using backtrace only after ReturnAddress + if (StackFrames[Index] == (uint64_t)ReturnAddress) + { + bUseAddress = true; + } + } + if (bUseAddress || NumStackFrames == 1) + { + uint64_t RetAddr = StackFrames[Index]; + StackFrames[FrameIdx++] = RetAddr; + + // This is a simple order-dependent LCG. Should be sufficient enough + BacktraceId += RetAddr; + BacktraceId *= 0x30be8efa499c249dull; + } + } + + // Save the collected id + BacktraceEntry.Hash = BacktraceId; + BacktraceEntry.FrameCount = FrameIdx; + BacktraceEntry.Frames = StackFrames; + + // Add to queue to be processed. This might block until there is room in the + // queue (i.e. the processing thread has caught up processing). + return CallstackTracer.AddCallstack(BacktraceEntry); + } +# endif + + return 0; + } + +} + +# endif // UE_CALLSTACK_TRACE_USE_UNWIND_TABLES + +namespace zen { + +//////////////////////////////////////////////////////////////////////////////// +void +CallstackTrace_CreateInternal(FMalloc* Malloc) +{ + if (FBacktracer::Get() != nullptr) + { + return; + } + + // Allocate, construct and intentionally leak backtracer + void* Alloc = Malloc->Malloc(sizeof(FBacktracer), alignof(FBacktracer)); + new (Alloc) FBacktracer(Malloc); + + Modules_Create(Malloc); + Modules_Subscribe([](bool bLoad, void* Module, const char16_t* Name) { + bLoad ? FBacktracer::Get()->AddModule(uintptr_t(Module), Name) //-V522 + : FBacktracer::Get()->RemoveModule(uintptr_t(Module)); + }); +} + +//////////////////////////////////////////////////////////////////////////////// +void +CallstackTrace_InitializeInternal() +{ + Modules_Initialize(); + GModulesAreInitialized = true; +} + +//////////////////////////////////////////////////////////////////////////////// +uint32_t +CallstackTrace_GetCurrentId() +{ + if (!UE_TRACE_CHANNELEXPR_IS_ENABLED(CallstackChannel)) + { + return 0; + } + + void* StackAddress = PLATFORM_RETURN_ADDRESS_FOR_CALLSTACKTRACING(); + if (FBacktracer* Instance = FBacktracer::Get()) + { +# if PLATFORM_USE_CALLSTACK_ADDRESS_POINTER + return Instance->GetBacktraceId(StackAddress); +# else + return Instance->GetBacktraceId((uint64_t)StackAddress); +# endif + } + + return 0; +} + +} // namespace zen + +#endif diff --git a/src/zencore/memtrack/callstacktrace.h b/src/zencore/memtrack/callstacktrace.h new file mode 100644 index 000000000..3e191490b --- /dev/null +++ b/src/zencore/memtrack/callstacktrace.h @@ -0,0 +1,151 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/trace.h> + +#if ZEN_PLATFORM_WINDOWS +# include <intrin.h> + +# define PLATFORM_RETURN_ADDRESS() _ReturnAddress() +# define PLATFORM_RETURN_ADDRESS_POINTER() _AddressOfReturnAddress() +# define PLATFORM_RETURN_ADDRESS_FOR_CALLSTACKTRACING PLATFORM_RETURN_ADDRESS_POINTER +#endif + +//////////////////////////////////////////////////////////////////////////////// +#if !defined(UE_CALLSTACK_TRACE_ENABLED) +# if UE_TRACE_ENABLED +# if ZEN_PLATFORM_WINDOWS +# define UE_CALLSTACK_TRACE_ENABLED 1 +# endif +# endif +#endif + +#if !defined(UE_CALLSTACK_TRACE_ENABLED) +# define UE_CALLSTACK_TRACE_ENABLED 0 +#endif + +//////////////////////////////////////////////////////////////////////////////// +#if UE_CALLSTACK_TRACE_ENABLED + +# include "platformtls.h" + +namespace zen { + +/** + * Creates callstack tracing. + * @param Malloc Allocator instance to use. + */ +void CallstackTrace_Create(class FMalloc* Malloc); + +/** + * Initializes callstack tracing. On some platforms this has to be delayed due to initialization order. + */ +void CallstackTrace_Initialize(); + +/** + * Capture the current callstack, and trace the definition if it has not already been encountered. The returned value + * can be used in trace events and be resolved in analysis. + * @return Unique id identifying the current callstack. + */ +uint32_t CallstackTrace_GetCurrentId(); + +/** + * Callstack Trace Scoped Macro to avoid resolving the full callstack + * can be used when some external libraries are not compiled with frame pointers + * preventing us to resolve it without crashing. Instead the callstack will be + * only the caller address. + */ +# define CALLSTACK_TRACE_LIMIT_CALLSTACKRESOLVE_SCOPE() FCallStackTraceLimitResolveScope PREPROCESSOR_JOIN(FCTLMScope, __LINE__) + +extern uint32_t GCallStackTracingTlsSlotIndex; + +/** + * @return the fallback callstack address + */ +inline void* +CallstackTrace_GetFallbackPlatformReturnAddressData() +{ + if (FPlatformTLS::IsValidTlsSlot(GCallStackTracingTlsSlotIndex)) + return FPlatformTLS::GetTlsValue(GCallStackTracingTlsSlotIndex); + else + return nullptr; +} + +/** + * @return Needs full callstack resolve + */ +inline bool +CallstackTrace_ResolveFullCallStack() +{ + return CallstackTrace_GetFallbackPlatformReturnAddressData() == nullptr; +} + +/* + * Callstack Trace scope for override CallStack + */ +class FCallStackTraceLimitResolveScope +{ +public: + ZEN_FORCENOINLINE FCallStackTraceLimitResolveScope() + { + if (FPlatformTLS::IsValidTlsSlot(GCallStackTracingTlsSlotIndex)) + { + FPlatformTLS::SetTlsValue(GCallStackTracingTlsSlotIndex, PLATFORM_RETURN_ADDRESS_FOR_CALLSTACKTRACING()); + } + } + + ZEN_FORCENOINLINE ~FCallStackTraceLimitResolveScope() + { + if (FPlatformTLS::IsValidTlsSlot(GCallStackTracingTlsSlotIndex)) + { + FPlatformTLS::SetTlsValue(GCallStackTracingTlsSlotIndex, nullptr); + } + } +}; + +} // namespace zen + +#else // UE_CALLSTACK_TRACE_ENABLED + +namespace zen { + +inline void +CallstackTrace_Create(class FMalloc* /*Malloc*/) +{ +} + +inline void +CallstackTrace_Initialize() +{ +} + +inline uint32_t +CallstackTrace_GetCurrentId() +{ + return 0; +} + +inline void* +CallstackTrace_GetCurrentReturnAddressData() +{ + return nullptr; +} + +inline void* +CallstackTrace_GetFallbackPlatformReturnAddressData() +{ + return nullptr; +} + +inline bool +CallstackTrace_ResolveFullCallStack() +{ + return true; +} + +# define CALLSTACK_TRACE_LIMIT_CALLSTACKRESOLVE_SCOPE() + +} // namespace zen + +#endif // UE_CALLSTACK_TRACE_ENABLED diff --git a/src/zencore/memtrack/growonlylockfreehash.h b/src/zencore/memtrack/growonlylockfreehash.h new file mode 100644 index 000000000..d6ff4fc32 --- /dev/null +++ b/src/zencore/memtrack/growonlylockfreehash.h @@ -0,0 +1,255 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zenbase/zenbase.h> +#include <zencore/intmath.h> +#include <zencore/thread.h> + +#include <zencore/memory/fmalloc.h> + +#include <atomic> + +namespace zen { + +// Hash table with fast lock free reads, that only supports insertion of items, and no modification of +// values. KeyType must be an integer. EntryType should be a POD with an identifiable "empty" state +// that can't occur in the table, and include the following member functions: +// +// KeyType GetKey() const; // Get the key from EntryType +// ValueType GetValue() const; // Get the value from EntryType +// bool IsEmpty() const; // Query whether EntryType is empty +// void SetKeyValue(KeyType Key, ValueType Value); // Write key and value into EntryType (ATOMICALLY! See below) +// static uint32 KeyHash(KeyType Key); // Convert Key to more well distributed hash +// static void ClearEntries(EntryType* Entries, int32 EntryCount); // Fill an array of entries with empty values +// +// The function "SetKeyValue" must be multi-thread safe when writing new items! This means writing the +// Key last and atomically, or writing the entire EntryType in a single write (say if the key and value +// are packed into a single integer word). Inline is recommended, since these functions are called a +// lot in the inner loop of the algorithm. A simple implementation of "KeyHash" can just return the +// Key (if it's already reasonable as a hash), or mix the bits if better distribution is required. A +// simple implementation of "ClearEntries" can just be a memset, if zero represents an empty entry. +// +// A set can be approximated by making "GetValue" a nop function, and just paying attention to the bool +// result from FindEntry, although you do need to either reserve a certain Key as invalid, or add +// space to store a valid flag as the Value. This class should only be used for small value types, as +// the values are embedded into the hash table, and not stored separately. +// +// Writes are implemented using a lock -- it would be possible to make writes lock free (or lock free +// when resizing doesn't occur), but it adds complexity. If we were to go that route, it would make +// sense to create a fully generic lock free set, which would be much more involved to implement and +// validate than this simple class, and might also offer somewhat worse read perf. Lock free containers +// that support item removal either need additional synchronization overhead on readers, so writers can +// tell if a reader is active and spin, or need graveyard markers and a garbage collection pass called +// periodically, which makes it no longer a simple standalone container. +// +// Lock free reads are accomplished by the reader atomically pulling the hash table pointer from the +// class. The hash table is self contained, with its size stored in the table itself, and hash tables +// are not freed until the class's destruction. So if the table needs to be reallocated due to a write, +// active readers will still have valid memory. This does mean that tables leak, but worst case, you +// end up with half of the memory being waste. It would be possible to garbage collect the excess +// tables, but you'd need some kind of global synchronization to make sure no readers are active. +// +// Besides cleanup of wasted tables, it might be useful to provide a function to clear a table. This +// would involve clearing the Key for all the elements in the table (but leaving the memory allocated), +// and can be done safely with active readers. It's not possible to safely remove individual items due +// to the need to potentially move other items, which would break an active reader that has already +// searched past a moved item. But in the case of removing all items, we don't care when a reader fails, +// it's expected that eventually all readers will fail, regardless of where they are searching. A clear +// function could be useful if a lot of the data you are caching is no longer used, and you want to +// reset the cache. +// +template<typename EntryType, typename KeyType, typename ValueType> +class TGrowOnlyLockFreeHash +{ +public: + TGrowOnlyLockFreeHash(FMalloc* InMalloc) : Malloc(InMalloc), HashTable(nullptr) {} + + ~TGrowOnlyLockFreeHash() + { + FHashHeader* HashTableNext; + for (FHashHeader* HashTableCurrent = HashTable; HashTableCurrent; HashTableCurrent = HashTableNext) + { + HashTableNext = HashTableCurrent->Next; + + Malloc->Free(HashTableCurrent); + } + } + + /** + * Preallocate the hash table to a certain size + * @param Count - Number of EntryType elements to allocate + * @warning Can only be called once, and only before any items have been added! + */ + void Reserve(uint32_t Count) + { + zen::RwLock::ExclusiveLockScope _(WriteCriticalSection); + ZEN_ASSERT(HashTable.load(std::memory_order_relaxed) == nullptr); + + if (Count <= 0) + { + Count = DEFAULT_INITIAL_SIZE; + } + Count = uint32_t(zen::NextPow2(Count)); + FHashHeader* HashTableLocal = (FHashHeader*)Malloc->Malloc(sizeof(FHashHeader) + (Count - 1) * sizeof(EntryType)); + + HashTableLocal->Next = nullptr; + HashTableLocal->TableSize = Count; + HashTableLocal->Used = 0; + EntryType::ClearEntries(HashTableLocal->Elements, Count); + + HashTable.store(HashTableLocal, std::memory_order_release); + } + + /** + * Find an entry in the hash table + * @param Key - Key to search for + * @param OutValue - Memory location to write result value to. Left unmodified if Key isn't found. + * @param bIsAlreadyInTable - Optional result for whether key was found in table. + */ + void Find(KeyType Key, ValueType* OutValue, bool* bIsAlreadyInTable = nullptr) const + { + FHashHeader* HashTableLocal = HashTable.load(std::memory_order_acquire); + if (HashTableLocal) + { + uint32_t TableMask = HashTableLocal->TableSize - 1; + + // Linear probing + for (uint32_t TableIndex = EntryType::KeyHash(Key) & TableMask; !HashTableLocal->Elements[TableIndex].IsEmpty(); + TableIndex = (TableIndex + 1) & TableMask) + { + if (HashTableLocal->Elements[TableIndex].GetKey() == Key) + { + if (OutValue) + { + *OutValue = HashTableLocal->Elements[TableIndex].GetValue(); + } + if (bIsAlreadyInTable) + { + *bIsAlreadyInTable = true; + } + return; + } + } + } + + if (bIsAlreadyInTable) + { + *bIsAlreadyInTable = false; + } + } + + /** + * Add an entry with the given Key to the hash table, will do nothing if the item already exists + * @param Key - Key to add + * @param Value - Value to add for key + * @param bIsAlreadyInTable -- Optional result for whether item was already in table + */ + void Emplace(KeyType Key, ValueType Value, bool* bIsAlreadyInTable = nullptr) + { + zen::RwLock::ExclusiveLockScope _(WriteCriticalSection); + + // After locking, check if the item is already in the hash table. + ValueType ValueIgnore; + bool bFindResult; + Find(Key, &ValueIgnore, &bFindResult); + if (bFindResult == true) + { + if (bIsAlreadyInTable) + { + *bIsAlreadyInTable = true; + } + return; + } + + // Check if there is space in the hash table for a new item. We resize when the hash + // table gets half full or more. @todo: allow client to specify max load factor? + FHashHeader* HashTableLocal = HashTable; + + if (!HashTableLocal || (HashTableLocal->Used >= HashTableLocal->TableSize / 2)) + { + int32_t GrowCount = HashTableLocal ? HashTableLocal->TableSize * 2 : DEFAULT_INITIAL_SIZE; + FHashHeader* HashTableGrow = (FHashHeader*)Malloc->Malloc(sizeof(FHashHeader) + (GrowCount - 1) * sizeof(EntryType)); + + HashTableGrow->Next = HashTableLocal; + HashTableGrow->TableSize = GrowCount; + HashTableGrow->Used = 0; + EntryType::ClearEntries(HashTableGrow->Elements, GrowCount); + + if (HashTableLocal) + { + // Copy existing elements from the old table to the new table + for (int32_t TableIndex = 0; TableIndex < HashTableLocal->TableSize; TableIndex++) + { + EntryType& Entry = HashTableLocal->Elements[TableIndex]; + if (!Entry.IsEmpty()) + { + HashInsertInternal(HashTableGrow, Entry.GetKey(), Entry.GetValue()); + } + } + } + + HashTableLocal = HashTableGrow; + HashTable.store(HashTableGrow, std::memory_order_release); + } + + // Then add our new item + HashInsertInternal(HashTableLocal, Key, Value); + + if (bIsAlreadyInTable) + { + *bIsAlreadyInTable = false; + } + } + + void FindOrAdd(KeyType Key, ValueType Value, bool* bIsAlreadyInTable = nullptr) + { + // Attempt to find the item lock free, before calling "Emplace", which locks the container + bool bFindResult; + ValueType IgnoreResult; + Find(Key, &IgnoreResult, &bFindResult); + if (bFindResult) + { + if (bIsAlreadyInTable) + { + *bIsAlreadyInTable = true; + } + return; + } + + Emplace(Key, Value, bIsAlreadyInTable); + } + +private: + struct FHashHeader + { + FHashHeader* Next; // Old buffers are stored in a linked list for cleanup + int32_t TableSize; + int32_t Used; + EntryType Elements[1]; // Variable sized + }; + + FMalloc* Malloc; + std::atomic<FHashHeader*> HashTable; + zen::RwLock WriteCriticalSection; + + static constexpr int32_t DEFAULT_INITIAL_SIZE = 1024; + + static void HashInsertInternal(FHashHeader* HashTableLocal, KeyType Key, ValueType Value) + { + int32_t TableMask = HashTableLocal->TableSize - 1; + + // Linear probing + for (int32_t TableIndex = EntryType::KeyHash(Key) & TableMask;; TableIndex = (TableIndex + 1) & TableMask) + { + if (HashTableLocal->Elements[TableIndex].IsEmpty()) + { + HashTableLocal->Elements[TableIndex].SetKeyValue(Key, Value); + HashTableLocal->Used++; + break; + } + } + } +}; + +} // namespace zen diff --git a/src/zencore/memtrack/memorytrace.cpp b/src/zencore/memtrack/memorytrace.cpp new file mode 100644 index 000000000..b147aee91 --- /dev/null +++ b/src/zencore/memtrack/memorytrace.cpp @@ -0,0 +1,829 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zencore/memory/memorytrace.h> +#include <zencore/memory/tagtrace.h> + +#include "callstacktrace.h" +#include "tracemalloc.h" +#include "vatrace.h" + +#include <zencore/commandline.h> +#include <zencore/enumflags.h> +#include <zencore/guardvalue.h> +#include <zencore/intmath.h> +#include <zencore/string.h> +#include <zencore/trace.h> + +#include <string.h> + +#if ZEN_PLATFORM_WINDOWS +# include <shellapi.h> +#endif + +class FMalloc; + +#if UE_TRACE_ENABLED +namespace zen { +UE_TRACE_CHANNEL_DEFINE(MemAllocChannel, "Memory allocations", true) +} +#endif + +#if UE_MEMORY_TRACE_ENABLED + +//////////////////////////////////////////////////////////////////////////////// + +namespace zen { + +void MemoryTrace_InitTags(FMalloc*); +void MemoryTrace_EnableTracePump(); + +} // namespace zen + +//////////////////////////////////////////////////////////////////////////////// +namespace { +// Controls how often time markers are emitted (default: every 4095 allocations). +constexpr uint32_t MarkerSamplePeriod = (4 << 10) - 1; + +// Number of shifted bits to SizeLower +constexpr uint32_t SizeShift = 3; + +// Counter to track when time marker is emitted +std::atomic<uint32_t> GMarkerCounter(0); + +// If enabled also pumps the Trace system itself. Used on process shutdown +// when worker thread has been killed, but memory events still occurs. +bool GDoPumpTrace; + +// Temporarily disables any internal operation that causes allocations. Used to +// avoid recursive behaviour when memory tracing needs to allocate memory through +// TraceMalloc. +thread_local bool GDoNotAllocateInTrace; + +// Set on initialization; on some platforms we hook allocator functions very early +// before Trace has the ability to allocate memory. +bool GTraceAllowed; +} // namespace + +//////////////////////////////////////////////////////////////////////////////// +namespace UE { namespace Trace { + TRACELOG_API void Update(); +}} // namespace UE::Trace + +namespace zen { + +//////////////////////////////////////////////////////////////////////////////// +UE_TRACE_EVENT_BEGIN(Memory, Init, NoSync | Important) + UE_TRACE_EVENT_FIELD(uint64_t, PageSize) // new in UE 5.5 + UE_TRACE_EVENT_FIELD(uint32_t, MarkerPeriod) + UE_TRACE_EVENT_FIELD(uint8, Version) + UE_TRACE_EVENT_FIELD(uint8, MinAlignment) + UE_TRACE_EVENT_FIELD(uint8, SizeShift) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, Marker) + UE_TRACE_EVENT_FIELD(uint64_t, Cycle) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, Alloc) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint32_t, Size) + UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower) + UE_TRACE_EVENT_FIELD(uint8, RootHeap) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, AllocSystem) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint32_t, Size) + UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, AllocVideo) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint32_t, Size) + UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, Free) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint8, RootHeap) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, FreeSystem) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, FreeVideo) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, ReallocAlloc) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint32_t, Size) + UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower) + UE_TRACE_EVENT_FIELD(uint8, RootHeap) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, ReallocAllocSystem) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint32_t, Size) + UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, ReallocFree) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint8, RootHeap) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, ReallocFreeSystem) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, MemorySwapOp) + UE_TRACE_EVENT_FIELD(uint64_t, Address) // page fault real address + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint32_t, CompressedSize) + UE_TRACE_EVENT_FIELD(uint8, SwapOp) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, HeapSpec, NoSync | Important) + UE_TRACE_EVENT_FIELD(HeapId, Id) + UE_TRACE_EVENT_FIELD(HeapId, ParentId) + UE_TRACE_EVENT_FIELD(uint16, Flags) + UE_TRACE_EVENT_FIELD(UE::Trace::WideString, Name) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, HeapMarkAlloc) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint16, Flags) + UE_TRACE_EVENT_FIELD(HeapId, Heap) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, HeapUnmarkAlloc) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(HeapId, Heap) +UE_TRACE_EVENT_END() + +// If the layout of the above events is changed, bump this version number. +// version 1: Initial version (UE 5.0, UE 5.1) +// version 2: Added CallstackId for Free events and also for HeapMarkAlloc, HeapUnmarkAlloc events (UE 5.2). +constexpr uint8 MemoryTraceVersion = 2; + +//////////////////////////////////////////////////////////////////////////////// +class FMallocWrapper : public FMalloc +{ +public: + FMallocWrapper(FMalloc* InMalloc); + +private: + struct FCookie + { + uint64_t Tag : 16; + uint64_t Bias : 8; + uint64_t Size : 40; + }; + + static uint32_t GetActualAlignment(SIZE_T Size, uint32_t Alignment); + + virtual void* Malloc(SIZE_T Size, uint32_t Alignment) override; + virtual void* Realloc(void* PrevAddress, SIZE_T NewSize, uint32_t Alignment) override; + virtual void Free(void* Address) override; + virtual bool GetAllocationSize(void* Address, SIZE_T& SizeOut) override { return InnerMalloc->GetAllocationSize(Address, SizeOut); } + virtual void OnMallocInitialized() override { InnerMalloc->OnMallocInitialized(); } + + FMalloc* InnerMalloc; +}; + +//////////////////////////////////////////////////////////////////////////////// +FMallocWrapper::FMallocWrapper(FMalloc* InMalloc) : InnerMalloc(InMalloc) +{ +} + +//////////////////////////////////////////////////////////////////////////////// +uint32_t +FMallocWrapper::GetActualAlignment(SIZE_T Size, uint32_t Alignment) +{ + // Defaults; if size is < 16 then alignment is 8 else 16. + uint32_t DefaultAlignment = 8 << uint32_t(Size >= 16); + return (Alignment < DefaultAlignment) ? DefaultAlignment : Alignment; +} + +//////////////////////////////////////////////////////////////////////////////// +void* +FMallocWrapper::Malloc(SIZE_T Size, uint32_t Alignment) +{ + uint32_t ActualAlignment = GetActualAlignment(Size, Alignment); + void* Address = InnerMalloc->Malloc(Size, Alignment); + + MemoryTrace_Alloc((uint64_t)Address, Size, ActualAlignment); + + return Address; +} + +//////////////////////////////////////////////////////////////////////////////// +void* +FMallocWrapper::Realloc(void* PrevAddress, SIZE_T NewSize, uint32_t Alignment) +{ + // This simplifies things and means reallocs trace events are true reallocs + if (PrevAddress == nullptr) + { + return Malloc(NewSize, Alignment); + } + + MemoryTrace_ReallocFree((uint64_t)PrevAddress); + + void* RetAddress = InnerMalloc->Realloc(PrevAddress, NewSize, Alignment); + + Alignment = GetActualAlignment(NewSize, Alignment); + MemoryTrace_ReallocAlloc((uint64_t)RetAddress, NewSize, Alignment); + + return RetAddress; +} + +//////////////////////////////////////////////////////////////////////////////// +void +FMallocWrapper::Free(void* Address) +{ + if (Address == nullptr) + { + return; + } + + MemoryTrace_Free((uint64_t)Address); + + void* InnerAddress = Address; + + return InnerMalloc->Free(InnerAddress); +} + +//////////////////////////////////////////////////////////////////////////////// +template<class T> +class alignas(alignof(T)) FUndestructed +{ +public: + template<typename... ArgTypes> + void Construct(ArgTypes... Args) + { + ::new (Buffer) T(Args...); + bIsConstructed = true; + } + + bool IsConstructed() const { return bIsConstructed; } + + T* operator&() { return (T*)Buffer; } + T* operator->() { return (T*)Buffer; } + +protected: + uint8 Buffer[sizeof(T)]; + bool bIsConstructed; +}; + +//////////////////////////////////////////////////////////////////////////////// +static FUndestructed<FTraceMalloc> GTraceMalloc; + +//////////////////////////////////////////////////////////////////////////////// +static EMemoryTraceInit +MemoryTrace_ShouldEnable() +{ + EMemoryTraceInit Mode = EMemoryTraceInit::Disabled; + + // Process any command line trace options + // + // Note that calls can come into this function before we enter the regular main function + // and we can therefore not rely on the regular command line parsing for the application + + using namespace std::literals; + + auto ProcessTraceArg = [&](const std::string_view& Arg) { + if (Arg == "memalloc"sv) + { + Mode |= EMemoryTraceInit::AllocEvents; + } + else if (Arg == "callstack"sv) + { + Mode |= EMemoryTraceInit::Callstacks; + } + else if (Arg == "memtag"sv) + { + Mode |= EMemoryTraceInit::Tags; + } + else if (Arg == "memory"sv) + { + Mode |= EMemoryTraceInit::Full; + } + else if (Arg == "memory_light"sv) + { + Mode |= EMemoryTraceInit::Light; + } + }; + + constexpr std::string_view TraceOption = "--trace="sv; + + std::function<void(const std::string_view&)> ProcessArg = [&](const std::string_view& Arg) { + if (Arg.starts_with(TraceOption)) + { + const std::string_view OptionArgs = Arg.substr(TraceOption.size()); + + IterateCommaSeparatedValue(OptionArgs, ProcessTraceArg); + } + }; + + IterateCommandlineArgs(ProcessArg); + + return Mode; +} + +//////////////////////////////////////////////////////////////////////////////// +FMalloc* +MemoryTrace_CreateInternal(FMalloc* InMalloc, EMemoryTraceInit Mode) +{ + using namespace zen; + + // If allocation events are not desired we don't need to do anything, even + // if user has enabled only callstacks it will be enabled later. + if (!EnumHasAnyFlags(Mode, EMemoryTraceInit::AllocEvents)) + { + return InMalloc; + } + + // Some OSes (i.e. Windows) will terminate all threads except the main + // one as part of static deinit. However we may receive more memory + // trace events that would get lost as Trace's worker thread has been + // terminated. So flush the last remaining memory events trace needs + // to be updated which we will do that in response to to memory events. + // We'll use an atexit can to know when Trace is probably no longer + // getting ticked. + atexit([]() { MemoryTrace_EnableTracePump(); }); + + GTraceMalloc.Construct(InMalloc); + + // Both tag and callstack tracing need to use the wrapped trace malloc + // so we can break out tracing memory overhead (and not cause recursive behaviour). + if (EnumHasAnyFlags(Mode, EMemoryTraceInit::Tags)) + { + MemoryTrace_InitTags(>raceMalloc); + } + + if (EnumHasAnyFlags(Mode, EMemoryTraceInit::Callstacks)) + { + CallstackTrace_Create(>raceMalloc); + } + + static FUndestructed<FMallocWrapper> SMallocWrapper; + SMallocWrapper.Construct(InMalloc); + + return &SMallocWrapper; +} + +//////////////////////////////////////////////////////////////////////////////// +FMalloc* +MemoryTrace_CreateInternal(FMalloc* InMalloc) +{ + const EMemoryTraceInit Mode = MemoryTrace_ShouldEnable(); + return MemoryTrace_CreateInternal(InMalloc, Mode); +} + +//////////////////////////////////////////////////////////////////////////////// +FMalloc* +MemoryTrace_Create(FMalloc* InMalloc) +{ + FMalloc* OutMalloc = MemoryTrace_CreateInternal(InMalloc); + + if (OutMalloc != InMalloc) + { +# if PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS + FVirtualWinApiHooks::Initialize(false); +# endif + } + + return OutMalloc; +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_Initialize() +{ + // At this point we initialized the system to allow tracing. + GTraceAllowed = true; + + const int MIN_ALIGNMENT = 8; + + UE_TRACE_LOG(Memory, Init, MemAllocChannel) + << Init.PageSize(4096) << Init.MarkerPeriod(MarkerSamplePeriod + 1) << Init.Version(MemoryTraceVersion) + << Init.MinAlignment(uint8(MIN_ALIGNMENT)) << Init.SizeShift(uint8(SizeShift)); + + const HeapId SystemRootHeap = MemoryTrace_RootHeapSpec(u"System memory"); + ZEN_ASSERT(SystemRootHeap == EMemoryTraceRootHeap::SystemMemory); + const HeapId VideoRootHeap = MemoryTrace_RootHeapSpec(u"Video memory"); + ZEN_ASSERT(VideoRootHeap == EMemoryTraceRootHeap::VideoMemory); + + static_assert((1 << SizeShift) - 1 <= MIN_ALIGNMENT, "Not enough bits to pack size fields"); + +# if !UE_MEMORY_TRACE_LATE_INIT + // On some platforms callstack initialization cannot happen this early in the process. It is initialized + // in other locations when UE_MEMORY_TRACE_LATE_INIT is defined. Until that point allocations cannot have + // callstacks. + CallstackTrace_Initialize(); +# endif +} + +void +MemoryTrace_Shutdown() +{ + // Disable any further activity + GTraceAllowed = false; +} + +//////////////////////////////////////////////////////////////////////////////// +bool +MemoryTrace_IsActive() +{ + return GTraceAllowed; +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_EnableTracePump() +{ + GDoPumpTrace = true; +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_UpdateInternal() +{ + const uint32_t TheCount = GMarkerCounter.fetch_add(1, std::memory_order_relaxed); + if ((TheCount & MarkerSamplePeriod) == 0) + { + UE_TRACE_LOG(Memory, Marker, MemAllocChannel) << Marker.Cycle(UE::Trace::Private::TimeGetTimestamp()); + } + + if (GDoPumpTrace) + { + UE::Trace::Update(); + } +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_Alloc(uint64_t Address, uint64_t Size, uint32_t Alignment, HeapId RootHeap, uint32_t ExternalCallstackId) +{ + if (!GTraceAllowed) + { + return; + } + + ZEN_ASSERT_SLOW(RootHeap < 16); + + const uint32_t AlignmentPow2 = uint32_t(zen::CountTrailingZeros64(Alignment)); + const uint32_t Alignment_SizeLower = (AlignmentPow2 << SizeShift) | uint32_t(Size & ((1 << SizeShift) - 1)); + const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId(); + + switch (RootHeap) + { + case EMemoryTraceRootHeap::SystemMemory: + { + UE_TRACE_LOG(Memory, AllocSystem, MemAllocChannel) + << AllocSystem.Address(uint64_t(Address)) << AllocSystem.CallstackId(CallstackId) + << AllocSystem.Size(uint32_t(Size >> SizeShift)) << AllocSystem.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)); + break; + } + + case EMemoryTraceRootHeap::VideoMemory: + { + UE_TRACE_LOG(Memory, AllocVideo, MemAllocChannel) + << AllocVideo.Address(uint64_t(Address)) << AllocVideo.CallstackId(CallstackId) + << AllocVideo.Size(uint32_t(Size >> SizeShift)) << AllocVideo.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)); + break; + } + + default: + { + UE_TRACE_LOG(Memory, Alloc, MemAllocChannel) + << Alloc.Address(uint64_t(Address)) << Alloc.CallstackId(CallstackId) << Alloc.Size(uint32_t(Size >> SizeShift)) + << Alloc.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)) << Alloc.RootHeap(uint8(RootHeap)); + break; + } + } + + MemoryTrace_UpdateInternal(); +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_Free(uint64_t Address, HeapId RootHeap, uint32_t ExternalCallstackId) +{ + if (!GTraceAllowed) + { + return; + } + + ZEN_ASSERT_SLOW(RootHeap < 16); + + const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId(); + + switch (RootHeap) + { + case EMemoryTraceRootHeap::SystemMemory: + { + UE_TRACE_LOG(Memory, FreeSystem, MemAllocChannel) + << FreeSystem.Address(uint64_t(Address)) << FreeSystem.CallstackId(CallstackId); + break; + } + case EMemoryTraceRootHeap::VideoMemory: + { + UE_TRACE_LOG(Memory, FreeVideo, MemAllocChannel) + << FreeVideo.Address(uint64_t(Address)) << FreeVideo.CallstackId(CallstackId); + break; + } + default: + { + UE_TRACE_LOG(Memory, Free, MemAllocChannel) + << Free.Address(uint64_t(Address)) << Free.CallstackId(CallstackId) << Free.RootHeap(uint8(RootHeap)); + break; + } + } + + MemoryTrace_UpdateInternal(); +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_ReallocAlloc(uint64_t Address, uint64_t Size, uint32_t Alignment, HeapId RootHeap, uint32_t ExternalCallstackId) +{ + if (!GTraceAllowed) + { + return; + } + + ZEN_ASSERT_SLOW(RootHeap < 16); + + const uint32_t AlignmentPow2 = uint32_t(zen::CountTrailingZeros64(Alignment)); + const uint32_t Alignment_SizeLower = (AlignmentPow2 << SizeShift) | uint32_t(Size & ((1 << SizeShift) - 1)); + const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId(); + + switch (RootHeap) + { + case EMemoryTraceRootHeap::SystemMemory: + { + UE_TRACE_LOG(Memory, ReallocAllocSystem, MemAllocChannel) + << ReallocAllocSystem.Address(uint64_t(Address)) << ReallocAllocSystem.CallstackId(CallstackId) + << ReallocAllocSystem.Size(uint32_t(Size >> SizeShift)) + << ReallocAllocSystem.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)); + break; + } + + default: + { + UE_TRACE_LOG(Memory, ReallocAlloc, MemAllocChannel) + << ReallocAlloc.Address(uint64_t(Address)) << ReallocAlloc.CallstackId(CallstackId) + << ReallocAlloc.Size(uint32_t(Size >> SizeShift)) << ReallocAlloc.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)) + << ReallocAlloc.RootHeap(uint8(RootHeap)); + break; + } + } + + MemoryTrace_UpdateInternal(); +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_ReallocFree(uint64_t Address, HeapId RootHeap, uint32_t ExternalCallstackId) +{ + if (!GTraceAllowed) + { + return; + } + + ZEN_ASSERT_SLOW(RootHeap < 16); + + const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId(); + + switch (RootHeap) + { + case EMemoryTraceRootHeap::SystemMemory: + { + UE_TRACE_LOG(Memory, ReallocFreeSystem, MemAllocChannel) + << ReallocFreeSystem.Address(uint64_t(Address)) << ReallocFreeSystem.CallstackId(CallstackId); + break; + } + + default: + { + UE_TRACE_LOG(Memory, ReallocFree, MemAllocChannel) + << ReallocFree.Address(uint64_t(Address)) << ReallocFree.CallstackId(CallstackId) + << ReallocFree.RootHeap(uint8(RootHeap)); + break; + } + } + + MemoryTrace_UpdateInternal(); +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_SwapOp(uint64_t PageAddress, EMemoryTraceSwapOperation SwapOperation, uint32_t CompressedSize, uint32_t CallstackId) +{ + if (!GTraceAllowed) + { + return; + } + + UE_TRACE_LOG(Memory, MemorySwapOp, MemAllocChannel) + << MemorySwapOp.Address(PageAddress) << MemorySwapOp.CallstackId(CallstackId) << MemorySwapOp.CompressedSize(CompressedSize) + << MemorySwapOp.SwapOp((uint8)SwapOperation); + + MemoryTrace_UpdateInternal(); +} + +//////////////////////////////////////////////////////////////////////////////// +HeapId +MemoryTrace_HeapSpec(HeapId ParentId, const char16_t* Name, EMemoryTraceHeapFlags Flags) +{ + if (!GTraceAllowed) + { + return 0; + } + + static std::atomic<HeapId> HeapIdCount(EMemoryTraceRootHeap::EndReserved + 1); // Reserve indexes for root heaps + const HeapId Id = HeapIdCount.fetch_add(1); + const uint32_t NameLen = uint32_t(zen::StringLength(Name)); + const uint32_t DataSize = NameLen * sizeof(char16_t); + ZEN_ASSERT(ParentId < Id); + + UE_TRACE_LOG(Memory, HeapSpec, MemAllocChannel, DataSize) + << HeapSpec.Id(Id) << HeapSpec.ParentId(ParentId) << HeapSpec.Name(Name, NameLen) << HeapSpec.Flags(uint16(Flags)); + + return Id; +} + +//////////////////////////////////////////////////////////////////////////////// +HeapId +MemoryTrace_RootHeapSpec(const char16_t* Name, EMemoryTraceHeapFlags Flags) +{ + if (!GTraceAllowed) + { + return 0; + } + + static std::atomic<HeapId> RootHeapCount(0); + const HeapId Id = RootHeapCount.fetch_add(1); + ZEN_ASSERT(Id <= EMemoryTraceRootHeap::EndReserved); + + const uint32_t NameLen = uint32_t(zen::StringLength(Name)); + const uint32_t DataSize = NameLen * sizeof(char16_t); + + UE_TRACE_LOG(Memory, HeapSpec, MemAllocChannel, DataSize) + << HeapSpec.Id(Id) << HeapSpec.ParentId(HeapId(~0)) << HeapSpec.Name(Name, NameLen) + << HeapSpec.Flags(uint16(EMemoryTraceHeapFlags::Root | Flags)); + + return Id; +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_MarkAllocAsHeap(uint64_t Address, HeapId Heap, EMemoryTraceHeapAllocationFlags Flags, uint32_t ExternalCallstackId) +{ + if (!GTraceAllowed) + { + return; + } + + const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId(); + + UE_TRACE_LOG(Memory, HeapMarkAlloc, MemAllocChannel) + << HeapMarkAlloc.Address(uint64_t(Address)) << HeapMarkAlloc.CallstackId(CallstackId) + << HeapMarkAlloc.Flags(uint16(EMemoryTraceHeapAllocationFlags::Heap | Flags)) << HeapMarkAlloc.Heap(Heap); +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_UnmarkAllocAsHeap(uint64_t Address, HeapId Heap, uint32_t ExternalCallstackId) +{ + if (!GTraceAllowed) + { + return; + } + + const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId(); + + // Sets all flags to zero + UE_TRACE_LOG(Memory, HeapUnmarkAlloc, MemAllocChannel) + << HeapUnmarkAlloc.Address(uint64_t(Address)) << HeapUnmarkAlloc.CallstackId(CallstackId) << HeapUnmarkAlloc.Heap(Heap); +} + +} // namespace zen + +#else // UE_MEMORY_TRACE_ENABLED + +///////////////////////////////////////////////////////////////////////////// +bool +MemoryTrace_IsActive() +{ + return false; +} + +#endif // UE_MEMORY_TRACE_ENABLED + +namespace zen { + +///////////////////////////////////////////////////////////////////////////// +FTraceMalloc::FTraceMalloc(FMalloc* InMalloc) +{ + WrappedMalloc = InMalloc; +} + +///////////////////////////////////////////////////////////////////////////// +FTraceMalloc::~FTraceMalloc() +{ +} + +///////////////////////////////////////////////////////////////////////////// +void* +FTraceMalloc::Malloc(SIZE_T Count, uint32_t Alignment) +{ +#if UE_MEMORY_TRACE_ENABLED + // UE_TRACE_METADATA_CLEAR_SCOPE(); + UE_MEMSCOPE(TRACE_TAG); + + void* NewPtr; + { + zen::TGuardValue<bool> _(GDoNotAllocateInTrace, true); + NewPtr = WrappedMalloc->Malloc(Count, Alignment); + } + + const uint64_t Size = Count; + const uint32_t AlignmentPow2 = uint32_t(zen::CountTrailingZeros64(Alignment)); + const uint32_t Alignment_SizeLower = (AlignmentPow2 << SizeShift) | uint32_t(Size & ((1 << SizeShift) - 1)); + + UE_TRACE_LOG(Memory, Alloc, MemAllocChannel) + << Alloc.Address(uint64_t(NewPtr)) << Alloc.CallstackId(0) << Alloc.Size(uint32_t(Size >> SizeShift)) + << Alloc.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)) << Alloc.RootHeap(uint8(EMemoryTraceRootHeap::SystemMemory)); + + return NewPtr; +#else + return WrappedMalloc->Malloc(Count, Alignment); +#endif // UE_MEMORY_TRACE_ENABLED +} + +///////////////////////////////////////////////////////////////////////////// +void* +FTraceMalloc::Realloc(void* Original, SIZE_T Count, uint32_t Alignment) +{ +#if UE_MEMORY_TRACE_ENABLED + // UE_TRACE_METADATA_CLEAR_SCOPE(); + UE_MEMSCOPE(TRACE_TAG); + + UE_TRACE_LOG(Memory, ReallocFree, MemAllocChannel) + << ReallocFree.Address(uint64_t(Original)) << ReallocFree.RootHeap(uint8(EMemoryTraceRootHeap::SystemMemory)); + + void* NewPtr; + { + zen::TGuardValue<bool> _(GDoNotAllocateInTrace, true); + NewPtr = WrappedMalloc->Realloc(Original, Count, Alignment); + } + + const uint64_t Size = Count; + const uint32_t AlignmentPow2 = uint32_t(zen::CountTrailingZeros64(Alignment)); + const uint32_t Alignment_SizeLower = (AlignmentPow2 << SizeShift) | uint32_t(Size & ((1 << SizeShift) - 1)); + + UE_TRACE_LOG(Memory, ReallocAlloc, MemAllocChannel) + << ReallocAlloc.Address(uint64_t(NewPtr)) << ReallocAlloc.CallstackId(0) << ReallocAlloc.Size(uint32_t(Size >> SizeShift)) + << ReallocAlloc.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)) + << ReallocAlloc.RootHeap(uint8(EMemoryTraceRootHeap::SystemMemory)); + + return NewPtr; +#else + return WrappedMalloc->Realloc(Original, Count, Alignment); +#endif // UE_MEMORY_TRACE_ENABLED +} + +///////////////////////////////////////////////////////////////////////////// +void +FTraceMalloc::Free(void* Original) +{ +#if UE_MEMORY_TRACE_ENABLED + UE_TRACE_LOG(Memory, Free, MemAllocChannel) + << Free.Address(uint64_t(Original)) << Free.RootHeap(uint8(EMemoryTraceRootHeap::SystemMemory)); + + { + zen::TGuardValue<bool> _(GDoNotAllocateInTrace, true); + WrappedMalloc->Free(Original); + } +#else + WrappedMalloc->Free(Original); +#endif // UE_MEMORY_TRACE_ENABLED +} + +} // namespace zen diff --git a/src/zencore/memtrack/moduletrace.cpp b/src/zencore/memtrack/moduletrace.cpp new file mode 100644 index 000000000..51280ff3a --- /dev/null +++ b/src/zencore/memtrack/moduletrace.cpp @@ -0,0 +1,296 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenbase/zenbase.h> +#include <zencore/memory/llm.h> +#include <zencore/memory/memorytrace.h> +#include <zencore/memory/tagtrace.h> + +#if ZEN_PLATFORM_WINDOWS +# define PLATFORM_SUPPORTS_TRACE_WIN32_MODULE_DIAGNOSTICS 1 +#else +# define PLATFORM_SUPPORTS_TRACE_WIN32_MODULE_DIAGNOSTICS 0 +#endif + +#include "moduletrace_events.h" + +#if PLATFORM_SUPPORTS_TRACE_WIN32_MODULE_DIAGNOSTICS + +# include <zencore/windows.h> + +ZEN_THIRD_PARTY_INCLUDES_START +# include <winternl.h> +ZEN_THIRD_PARTY_INCLUDES_END + +# include <zencore/trace.h> + +# include <array> + +namespace zen { + +class FMalloc; + +typedef uint32_t HeapId; + +//////////////////////////////////////////////////////////////////////////////// +struct FNtDllFunction +{ + FARPROC Addr; + + FNtDllFunction(const char* Name) + { + HMODULE NtDll = LoadLibraryW(L"ntdll.dll"); + ZEN_ASSERT(NtDll); + Addr = GetProcAddress(NtDll, Name); + } + + template<typename... ArgTypes> + unsigned int operator()(ArgTypes... Args) + { + typedef unsigned int(NTAPI * Prototype)(ArgTypes...); + return (Prototype((void*)Addr))(Args...); + } +}; + +////////////////////////////////////////////////////////////////////////////////7777 +class FModuleTrace +{ +public: + typedef void (*SubscribeFunc)(bool, void*, const char16_t*); + + FModuleTrace(FMalloc* InMalloc); + ~FModuleTrace(); + static FModuleTrace* Get(); + void Initialize(); + void Subscribe(SubscribeFunc Function); + +private: + void OnDllLoaded(const UNICODE_STRING& Name, uintptr_t Base); + void OnDllUnloaded(uintptr_t Base); + void OnDllNotification(unsigned int Reason, const void* DataPtr); + static FModuleTrace* Instance; + SubscribeFunc Subscribers[64]; + int SubscriberCount = 0; + void* CallbackCookie = nullptr; + HeapId ProgramHeapId = 0; +}; + +//////////////////////////////////////////////////////////////////////////////// +FModuleTrace* FModuleTrace::Instance = nullptr; + +//////////////////////////////////////////////////////////////////////////////// +FModuleTrace::FModuleTrace(FMalloc* InMalloc) +{ + ZEN_UNUSED(InMalloc); + Instance = this; +} + +//////////////////////////////////////////////////////////////////////////////// +FModuleTrace::~FModuleTrace() +{ + if (CallbackCookie) + { + FNtDllFunction UnregisterFunc("LdrUnregisterDllNotification"); + UnregisterFunc(CallbackCookie); + } +} + +//////////////////////////////////////////////////////////////////////////////// +FModuleTrace* +FModuleTrace::Get() +{ + return Instance; +} + +//////////////////////////////////////////////////////////////////////////////// +void +FModuleTrace::Initialize() +{ + using namespace UE::Trace; + + ProgramHeapId = MemoryTrace_HeapSpec(SystemMemory, u"Module", EMemoryTraceHeapFlags::None); + + UE_TRACE_LOG(Diagnostics, ModuleInit, ModuleChannel, sizeof(char) * 3) + << ModuleInit.SymbolFormat("pdb", 3) << ModuleInit.ModuleBaseShift(uint8(0)); + + // Register for DLL load/unload notifications. + auto Thunk = [](ULONG Reason, const void* Data, void* Context) { + auto* Self = (FModuleTrace*)Context; + Self->OnDllNotification(Reason, Data); + }; + + typedef void(CALLBACK * ThunkType)(ULONG, const void*, void*); + auto ThunkImpl = ThunkType(Thunk); + + FNtDllFunction RegisterFunc("LdrRegisterDllNotification"); + RegisterFunc(0, ThunkImpl, this, &CallbackCookie); + + // Enumerate already loaded modules. + const TEB* ThreadEnvBlock = NtCurrentTeb(); + const PEB* ProcessEnvBlock = ThreadEnvBlock->ProcessEnvironmentBlock; + const LIST_ENTRY* ModuleIter = ProcessEnvBlock->Ldr->InMemoryOrderModuleList.Flink; + const LIST_ENTRY* ModuleIterEnd = ModuleIter->Blink; + do + { + const auto& ModuleData = *(LDR_DATA_TABLE_ENTRY*)(ModuleIter - 1); + if (ModuleData.DllBase == 0) + { + break; + } + + OnDllLoaded(ModuleData.FullDllName, UPTRINT(ModuleData.DllBase)); + ModuleIter = ModuleIter->Flink; + } while (ModuleIter != ModuleIterEnd); +} + +//////////////////////////////////////////////////////////////////////////////// +void +FModuleTrace::Subscribe(SubscribeFunc Function) +{ + ZEN_ASSERT(SubscriberCount < ZEN_ARRAY_COUNT(Subscribers)); + Subscribers[SubscriberCount++] = Function; +} + +//////////////////////////////////////////////////////////////////////////////// +void +FModuleTrace::OnDllNotification(unsigned int Reason, const void* DataPtr) +{ + enum + { + LDR_DLL_NOTIFICATION_REASON_LOADED = 1, + LDR_DLL_NOTIFICATION_REASON_UNLOADED = 2, + }; + + struct FNotificationData + { + uint32_t Flags; + const UNICODE_STRING& FullPath; + const UNICODE_STRING& BaseName; + uintptr_t Base; + }; + const auto& Data = *(FNotificationData*)DataPtr; + + switch (Reason) + { + case LDR_DLL_NOTIFICATION_REASON_LOADED: + OnDllLoaded(Data.FullPath, Data.Base); + break; + case LDR_DLL_NOTIFICATION_REASON_UNLOADED: + OnDllUnloaded(Data.Base); + break; + } +} + +//////////////////////////////////////////////////////////////////////////////// +void +FModuleTrace::OnDllLoaded(const UNICODE_STRING& Name, UPTRINT Base) +{ + const auto* DosHeader = (IMAGE_DOS_HEADER*)Base; + const auto* NtHeaders = (IMAGE_NT_HEADERS*)(Base + DosHeader->e_lfanew); + const IMAGE_OPTIONAL_HEADER& OptionalHeader = NtHeaders->OptionalHeader; + uint8_t ImageId[20]; + + // Find the guid and age of the binary, used to match debug files + const IMAGE_DATA_DIRECTORY& DebugInfoEntry = OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG]; + const auto* DebugEntries = (IMAGE_DEBUG_DIRECTORY*)(Base + DebugInfoEntry.VirtualAddress); + for (uint32_t i = 0, n = DebugInfoEntry.Size / sizeof(DebugEntries[0]); i < n; ++i) + { + const IMAGE_DEBUG_DIRECTORY& Entry = DebugEntries[i]; + if (Entry.Type == IMAGE_DEBUG_TYPE_CODEVIEW) + { + struct FCodeView7 + { + uint32_t Signature; + uint32_t Guid[4]; + uint32_t Age; + }; + + if (Entry.SizeOfData < sizeof(FCodeView7)) + { + continue; + } + + const auto* CodeView7 = (FCodeView7*)(Base + Entry.AddressOfRawData); + if (CodeView7->Signature != 'SDSR') + { + continue; + } + + memcpy(ImageId, (uint8_t*)&CodeView7->Guid, sizeof(uint32_t) * 4); + memcpy(&ImageId[16], (uint8_t*)&CodeView7->Age, sizeof(uint32_t)); + break; + } + } + + // Note: UNICODE_STRING.Length is the size in bytes of the string buffer. + UE_TRACE_LOG(Diagnostics, ModuleLoad, ModuleChannel, uint32_t(Name.Length + sizeof(ImageId))) + << ModuleLoad.Name((const char16_t*)Name.Buffer, Name.Length / 2) << ModuleLoad.Base(uint64_t(Base)) + << ModuleLoad.Size(OptionalHeader.SizeOfImage) << ModuleLoad.ImageId(ImageId, uint32_t(sizeof(ImageId))); + +# if UE_MEMORY_TRACE_ENABLED + { + UE_MEMSCOPE(ELLMTag::ProgramSize); + MemoryTrace_Alloc(Base, OptionalHeader.SizeOfImage, 4 * 1024, EMemoryTraceRootHeap::SystemMemory); + MemoryTrace_MarkAllocAsHeap(Base, ProgramHeapId); + MemoryTrace_Alloc(Base, OptionalHeader.SizeOfImage, 4 * 1024, EMemoryTraceRootHeap::SystemMemory); + } +# endif // UE_MEMORY_TRACE_ENABLED + + for (int i = 0; i < SubscriberCount; ++i) + { + Subscribers[i](true, (void*)Base, (const char16_t*)Name.Buffer); + } +} + +//////////////////////////////////////////////////////////////////////////////// +void +FModuleTrace::OnDllUnloaded(UPTRINT Base) +{ +# if UE_MEMORY_TRACE_ENABLED + MemoryTrace_Free(Base, EMemoryTraceRootHeap::SystemMemory); + MemoryTrace_UnmarkAllocAsHeap(Base, ProgramHeapId); + MemoryTrace_Free(Base, EMemoryTraceRootHeap::SystemMemory); +# endif // UE_MEMORY_TRACE_ENABLED + + UE_TRACE_LOG(Diagnostics, ModuleUnload, ModuleChannel) << ModuleUnload.Base(uint64(Base)); + + for (int i = 0; i < SubscriberCount; ++i) + { + Subscribers[i](false, (void*)Base, nullptr); + } +} + +//////////////////////////////////////////////////////////////////////////////// +void +Modules_Create(FMalloc* Malloc) +{ + if (FModuleTrace::Get() != nullptr) + { + return; + } + + static FModuleTrace Instance(Malloc); +} + +//////////////////////////////////////////////////////////////////////////////// +void +Modules_Initialize() +{ + if (FModuleTrace* Instance = FModuleTrace::Get()) + { + Instance->Initialize(); + } +} + +//////////////////////////////////////////////////////////////////////////////// +void +Modules_Subscribe(void (*Function)(bool, void*, const char16_t*)) +{ + if (FModuleTrace* Instance = FModuleTrace::Get()) + { + Instance->Subscribe(Function); + } +} + +} // namespace zen + +#endif // PLATFORM_SUPPORTS_WIN32_MEMORY_TRACE diff --git a/src/zencore/memtrack/moduletrace.h b/src/zencore/memtrack/moduletrace.h new file mode 100644 index 000000000..5e7374faa --- /dev/null +++ b/src/zencore/memtrack/moduletrace.h @@ -0,0 +1,11 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +namespace zen { + +void Modules_Create(class FMalloc*); +void Modules_Subscribe(void (*)(bool, void*, const char16_t*)); +void Modules_Initialize(); + +} // namespace zen diff --git a/src/zencore/memtrack/moduletrace_events.cpp b/src/zencore/memtrack/moduletrace_events.cpp new file mode 100644 index 000000000..9c6a9b648 --- /dev/null +++ b/src/zencore/memtrack/moduletrace_events.cpp @@ -0,0 +1,16 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zencore/trace.h> + +#include "moduletrace_events.h" + +namespace zen { + +//////////////////////////////////////////////////////////////////////////////// +UE_TRACE_CHANNEL_DEFINE(ModuleChannel, "Module information needed for symbols resolution", true) + +UE_TRACE_EVENT_DEFINE(Diagnostics, ModuleInit) +UE_TRACE_EVENT_DEFINE(Diagnostics, ModuleLoad) +UE_TRACE_EVENT_DEFINE(Diagnostics, ModuleUnload) + +} // namespace zen diff --git a/src/zencore/memtrack/moduletrace_events.h b/src/zencore/memtrack/moduletrace_events.h new file mode 100644 index 000000000..1bda42fe8 --- /dev/null +++ b/src/zencore/memtrack/moduletrace_events.h @@ -0,0 +1,27 @@ +// Copyright Epic Games, Inc. All Rights Reserved. +#pragma once + +#include <zencore/trace.h> + +namespace zen { + +//////////////////////////////////////////////////////////////////////////////// +UE_TRACE_CHANNEL_EXTERN(ModuleChannel) + +UE_TRACE_EVENT_BEGIN_EXTERN(Diagnostics, ModuleInit, NoSync | Important) + UE_TRACE_EVENT_FIELD(UE::Trace::AnsiString, SymbolFormat) + UE_TRACE_EVENT_FIELD(uint8, ModuleBaseShift) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN_EXTERN(Diagnostics, ModuleLoad, NoSync | Important) + UE_TRACE_EVENT_FIELD(UE::Trace::WideString, Name) + UE_TRACE_EVENT_FIELD(uint64, Base) + UE_TRACE_EVENT_FIELD(uint32, Size) + UE_TRACE_EVENT_FIELD(uint8[], ImageId) // Platform specific id for this image, used to match debug files were available +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN_EXTERN(Diagnostics, ModuleUnload, NoSync | Important) + UE_TRACE_EVENT_FIELD(uint64, Base) +UE_TRACE_EVENT_END() + +} // namespace zen diff --git a/src/zencore/memtrack/platformtls.h b/src/zencore/memtrack/platformtls.h new file mode 100644 index 000000000..f134e68a8 --- /dev/null +++ b/src/zencore/memtrack/platformtls.h @@ -0,0 +1,107 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zenbase/zenbase.h> + +/** + * It should be possible to provide a generic implementation as long as a threadID is provided. We don't do that yet. + */ +struct FGenericPlatformTLS +{ + static const uint32_t InvalidTlsSlot = 0xFFFFFFFF; + + /** + * Return false if this is an invalid TLS slot + * @param SlotIndex the TLS index to check + * @return true if this looks like a valid slot + */ + static bool IsValidTlsSlot(uint32_t SlotIndex) { return SlotIndex != InvalidTlsSlot; } +}; + +#if ZEN_PLATFORM_WINDOWS + +# include <zencore/windows.h> + +class FWindowsPlatformTLS : public FGenericPlatformTLS +{ +public: + static uint32_t AllocTlsSlot() { return ::TlsAlloc(); } + + static void FreeTlsSlot(uint32_t SlotIndex) { ::TlsFree(SlotIndex); } + + static void SetTlsValue(uint32_t SlotIndex, void* Value) { ::TlsSetValue(SlotIndex, Value); } + + /** + * Reads the value stored at the specified TLS slot + * + * @return the value stored in the slot + */ + static void* GetTlsValue(uint32_t SlotIndex) { return ::TlsGetValue(SlotIndex); } + + /** + * Return false if this is an invalid TLS slot + * @param SlotIndex the TLS index to check + * @return true if this looks like a valid slot + */ + static bool IsValidTlsSlot(uint32_t SlotIndex) { return SlotIndex != InvalidTlsSlot; } +}; + +typedef FWindowsPlatformTLS FPlatformTLS; + +#elif ZEN_PLATFORM_MAC + +# include <pthread.h + +/** + * Apple implementation of the TLS OS functions + **/ +struct FApplePlatformTLS : public FGenericPlatformTLS +{ + /** + * Returns the currently executing thread's id + */ + static uint32_t GetCurrentThreadId(void) { return (uint32_t)pthread_mach_thread_np(pthread_self()); } + + /** + * Allocates a thread local store slot + */ + static uint32_t AllocTlsSlot(void) + { + // allocate a per-thread mem slot + pthread_key_t SlotKey = 0; + if (pthread_key_create(&SlotKey, NULL) != 0) + { + SlotKey = InvalidTlsSlot; // matches the Windows TlsAlloc() retval. + } + return SlotKey; + } + + /** + * Sets a value in the specified TLS slot + * + * @param SlotIndex the TLS index to store it in + * @param Value the value to store in the slot + */ + static void SetTlsValue(uint32_t SlotIndex, void* Value) { pthread_setspecific((pthread_key_t)SlotIndex, Value); } + + /** + * Reads the value stored at the specified TLS slot + * + * @return the value stored in the slot + */ + static void* GetTlsValue(uint32_t SlotIndex) { return pthread_getspecific((pthread_key_t)SlotIndex); } + + /** + * Frees a previously allocated TLS slot + * + * @param SlotIndex the TLS index to store it in + */ + static void FreeTlsSlot(uint32_t SlotIndex) { pthread_key_delete((pthread_key_t)SlotIndex); } +}; + +typedef FApplePlatformTLS FPlatformTLS; + +#else +# error Platform not yet supported +#endif diff --git a/src/zencore/memtrack/tagtrace.cpp b/src/zencore/memtrack/tagtrace.cpp new file mode 100644 index 000000000..15ba78ae4 --- /dev/null +++ b/src/zencore/memtrack/tagtrace.cpp @@ -0,0 +1,237 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zencore/memory/fmalloc.h> +#include <zencore/memory/llm.h> +#include <zencore/memory/tagtrace.h> + +#include "growonlylockfreehash.h" + +#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED + +# include <zencore/string.h> + +namespace zen { +//////////////////////////////////////////////////////////////////////////////// + +UE_TRACE_CHANNEL_EXTERN(MemAllocChannel); + +UE_TRACE_EVENT_BEGIN(Memory, TagSpec, Important | NoSync) + UE_TRACE_EVENT_FIELD(int32, Tag) + UE_TRACE_EVENT_FIELD(int32, Parent) + UE_TRACE_EVENT_FIELD(UE::Trace::AnsiString, Display) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, MemoryScope, NoSync) + UE_TRACE_EVENT_FIELD(int32, Tag) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, MemoryScopePtr, NoSync) + UE_TRACE_EVENT_FIELD(uint64, Ptr) +UE_TRACE_EVENT_END() + +//////////////////////////////////////////////////////////////////////////////// +// Per thread active tag, i.e. the top level FMemScope +thread_local int32 GActiveTag; + +//////////////////////////////////////////////////////////////////////////////// +FMemScope::FMemScope() +{ +} + +FMemScope::FMemScope(int32_t InTag, bool bShouldActivate /*= true*/) +{ + if (UE_TRACE_CHANNELEXPR_IS_ENABLED(MemAllocChannel) & bShouldActivate) + { + ActivateScope(InTag); + } +} + +//////////////////////////////////////////////////////////////////////////////// +FMemScope::FMemScope(ELLMTag InTag, bool bShouldActivate /*= true*/) +{ + if (UE_TRACE_CHANNELEXPR_IS_ENABLED(MemAllocChannel) & bShouldActivate) + { + ActivateScope(static_cast<int32>(InTag)); + } +} + +//////////////////////////////////////////////////////////////////////////////// +void +FMemScope::ActivateScope(int32_t InTag) +{ + if (auto LogScope = FMemoryMemoryScopeFields::LogScopeType::ScopedEnter<FMemoryMemoryScopeFields>()) + { + if (const auto& __restrict MemoryScope = *(FMemoryMemoryScopeFields*)(&LogScope)) + { + Inner.SetActive(); + LogScope += LogScope << MemoryScope.Tag(InTag); + PrevTag = GActiveTag; + GActiveTag = InTag; + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +FMemScope::~FMemScope() +{ + if (Inner.bActive) + { + GActiveTag = PrevTag; + } +} + +//////////////////////////////////////////////////////////////////////////////// +FMemScopePtr::FMemScopePtr(uint64_t InPtr) +{ + if (InPtr != 0 && TRACE_PRIVATE_CHANNELEXPR_IS_ENABLED(MemAllocChannel)) + { + if (auto LogScope = FMemoryMemoryScopePtrFields::LogScopeType::ScopedEnter<FMemoryMemoryScopePtrFields>()) + { + if (const auto& __restrict MemoryScope = *(FMemoryMemoryScopePtrFields*)(&LogScope)) + { + Inner.SetActive(), LogScope += LogScope << MemoryScope.Ptr(InPtr); + } + } + } +} + +///////////////////////////////////////////////////////////////////////////////// +FMemScopePtr::~FMemScopePtr() +{ +} + +///////////////////////////////////////////////////////////////////////////////// + +/** + * Utility class that manages tracing the specification of unique LLM tags + * and custom name based tags. + */ +class FTagTrace +{ +public: + FTagTrace(FMalloc* InMalloc); + void AnnounceGenericTags() const; + void AnnounceSpecialTags() const; + int32 AnnounceCustomTag(int32 Tag, int32 ParentTag, const ANSICHAR* Display) const; + +private: + struct FTagNameSetEntry + { + std::atomic_int32_t Data; + + int32_t GetKey() const { return Data.load(std::memory_order_relaxed); } + bool GetValue() const { return true; } + bool IsEmpty() const { return Data.load(std::memory_order_relaxed) == 0; } // NAME_None is treated as empty + void SetKeyValue(int32_t Key, bool Value) + { + ZEN_UNUSED(Value); + Data.store(Key, std::memory_order_relaxed); + } + static uint32_t KeyHash(int32_t Key) { return static_cast<uint32>(Key); } + static void ClearEntries(FTagNameSetEntry* Entries, int32_t EntryCount) + { + memset(Entries, 0, EntryCount * sizeof(FTagNameSetEntry)); + } + }; + typedef TGrowOnlyLockFreeHash<FTagNameSetEntry, int32_t, bool> FTagNameSet; + + FTagNameSet AnnouncedNames; + static FMalloc* Malloc; +}; + +FMalloc* FTagTrace::Malloc = nullptr; +static FTagTrace* GTagTrace = nullptr; + +//////////////////////////////////////////////////////////////////////////////// +FTagTrace::FTagTrace(FMalloc* InMalloc) : AnnouncedNames(InMalloc) +{ + Malloc = InMalloc; + AnnouncedNames.Reserve(1024); + AnnounceGenericTags(); + AnnounceSpecialTags(); +} + +//////////////////////////////////////////////////////////////////////////////// +void +FTagTrace::AnnounceGenericTags() const +{ +# define TRACE_TAG_SPEC(Enum, Str, ParentTag) \ + { \ + const uint32_t DisplayLen = (uint32_t)StringLength(Str); \ + UE_TRACE_LOG(Memory, TagSpec, MemAllocChannel, DisplayLen * sizeof(ANSICHAR)) \ + << TagSpec.Tag((int32_t)ELLMTag::Enum) << TagSpec.Parent((int32_t)ParentTag) << TagSpec.Display(Str, DisplayLen); \ + } + LLM_ENUM_GENERIC_TAGS(TRACE_TAG_SPEC); +# undef TRACE_TAG_SPEC +} + +//////////////////////////////////////////////////////////////////////////////// + +void +FTagTrace::AnnounceSpecialTags() const +{ + auto EmitTag = [](const char16_t* DisplayString, int32_t Tag, int32_t ParentTag) { + const uint32_t DisplayLen = (uint32_t)StringLength(DisplayString); + UE_TRACE_LOG(Memory, TagSpec, MemAllocChannel, DisplayLen * sizeof(ANSICHAR)) + << TagSpec.Tag(Tag) << TagSpec.Parent(ParentTag) << TagSpec.Display(DisplayString, DisplayLen); + }; + + EmitTag(u"Trace", TRACE_TAG, -1); +} + +//////////////////////////////////////////////////////////////////////////////// +int32_t +FTagTrace::AnnounceCustomTag(int32_t Tag, int32_t ParentTag, const ANSICHAR* Display) const +{ + const uint32_t DisplayLen = (uint32_t)StringLength(Display); + UE_TRACE_LOG(Memory, TagSpec, MemAllocChannel, DisplayLen * sizeof(ANSICHAR)) + << TagSpec.Tag(Tag) << TagSpec.Parent(ParentTag) << TagSpec.Display(Display, DisplayLen); + return Tag; +} + +} // namespace zen + +#endif // UE_MEMORY_TAGS_TRACE_ENABLED + +namespace zen { + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_InitTags(FMalloc* InMalloc) +{ +#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED + GTagTrace = (FTagTrace*)InMalloc->Malloc(sizeof(FTagTrace), alignof(FTagTrace)); + new (GTagTrace) FTagTrace(InMalloc); +#else + ZEN_UNUSED(InMalloc); +#endif +} + +//////////////////////////////////////////////////////////////////////////////// +int32_t +MemoryTrace_AnnounceCustomTag(int32_t Tag, int32_t ParentTag, const char* Display) +{ +#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED + // todo: How do we check if tag trace is active? + if (GTagTrace) + { + return GTagTrace->AnnounceCustomTag(Tag, ParentTag, Display); + } +#else + ZEN_UNUSED(Tag, ParentTag, Display); +#endif + return -1; +} + +//////////////////////////////////////////////////////////////////////////////// +int32_t +MemoryTrace_GetActiveTag() +{ +#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED + return GActiveTag; +#else + return -1; +#endif +} + +} // namespace zen diff --git a/src/zencore/memtrack/tracemalloc.h b/src/zencore/memtrack/tracemalloc.h new file mode 100644 index 000000000..54606ac45 --- /dev/null +++ b/src/zencore/memtrack/tracemalloc.h @@ -0,0 +1,24 @@ +// Copyright Epic Games, Inc. All Rights Reserved. +#pragma once + +#include <zencore/memory/fmalloc.h> +#include <zencore/memory/memorytrace.h> + +namespace zen { + +class FTraceMalloc : public FMalloc +{ +public: + FTraceMalloc(FMalloc* InMalloc); + virtual ~FTraceMalloc(); + + virtual void* Malloc(SIZE_T Count, uint32 Alignment) override; + virtual void* Realloc(void* Original, SIZE_T Count, uint32 Alignment) override; + virtual void Free(void* Original) override; + + virtual void OnMallocInitialized() override { WrappedMalloc->OnMallocInitialized(); } + + FMalloc* WrappedMalloc; +}; + +} // namespace zen diff --git a/src/zencore/memtrack/vatrace.cpp b/src/zencore/memtrack/vatrace.cpp new file mode 100644 index 000000000..4dea27f1b --- /dev/null +++ b/src/zencore/memtrack/vatrace.cpp @@ -0,0 +1,361 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "vatrace.h" + +#if PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS + +# include <zencore/memory/memorytrace.h> + +# if (NTDDI_VERSION >= NTDDI_WIN10_RS4) +# pragma comment(lib, "mincore.lib") // VirtualAlloc2 +# endif + +namespace zen { + +//////////////////////////////////////////////////////////////////////////////// +class FTextSectionEditor +{ +public: + ~FTextSectionEditor(); + template<typename T> + T* Hook(T* Target, T* HookFunction); + +private: + struct FTrampolineBlock + { + FTrampolineBlock* Next; + uint32_t Size; + uint32_t Used; + }; + + static void* GetActualAddress(void* Function); + FTrampolineBlock* AllocateTrampolineBlock(void* Reference); + uint8_t* AllocateTrampoline(void* Reference, unsigned int Size); + void* HookImpl(void* Target, void* HookFunction); + FTrampolineBlock* HeadBlock = nullptr; +}; + +//////////////////////////////////////////////////////////////////////////////// +FTextSectionEditor::~FTextSectionEditor() +{ + for (FTrampolineBlock* Block = HeadBlock; Block != nullptr; Block = Block->Next) + { + DWORD Unused; + VirtualProtect(Block, Block->Size, PAGE_EXECUTE_READ, &Unused); + } + + FlushInstructionCache(GetCurrentProcess(), nullptr, 0); +} + +//////////////////////////////////////////////////////////////////////////////// +void* +FTextSectionEditor::GetActualAddress(void* Function) +{ + // Follow a jmp instruction (0xff/4 only for now) at function and returns + // where it would jmp to. + + uint8_t* Addr = (uint8_t*)Function; + int Offset = unsigned(Addr[0] & 0xf0) == 0x40; // REX prefix + if (Addr[Offset + 0] == 0xff && Addr[Offset + 1] == 0x25) + { + Addr += Offset; + Addr = *(uint8_t**)(Addr + 6 + *(uint32_t*)(Addr + 2)); + } + return Addr; +} + +//////////////////////////////////////////////////////////////////////////////// +FTextSectionEditor::FTrampolineBlock* +FTextSectionEditor::AllocateTrampolineBlock(void* Reference) +{ + static const size_t BlockSize = 0x10000; // 64KB is Windows' canonical granularity + + // Find the start of the main allocation that mapped Reference + MEMORY_BASIC_INFORMATION MemInfo; + VirtualQuery(Reference, &MemInfo, sizeof(MemInfo)); + auto* Ptr = (uint8_t*)(MemInfo.AllocationBase); + + // Step backwards one block at a time and try and allocate that address + while (true) + { + Ptr -= BlockSize; + if (VirtualAlloc(Ptr, BlockSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE) != nullptr) + { + break; + } + + uintptr_t Distance = uintptr_t(Reference) - uintptr_t(Ptr); + if (Distance >= 1ull << 31) + { + ZEN_ASSERT(!"Failed to allocate trampoline blocks for memory tracing hooks"); + } + } + + auto* Block = (FTrampolineBlock*)Ptr; + Block->Next = HeadBlock; + Block->Size = BlockSize; + Block->Used = sizeof(FTrampolineBlock); + HeadBlock = Block; + + return Block; +} + +//////////////////////////////////////////////////////////////////////////////// +uint8_t* +FTextSectionEditor::AllocateTrampoline(void* Reference, unsigned int Size) +{ + // Try and find a block that's within 2^31 bytes before Reference + FTrampolineBlock* Block; + for (Block = HeadBlock; Block != nullptr; Block = Block->Next) + { + uintptr_t Distance = uintptr_t(Reference) - uintptr_t(Block); + if (Distance < 1ull << 31) + { + break; + } + } + + // If we didn't find a block then we need to allocate a new one. + if (Block == nullptr) + { + Block = AllocateTrampolineBlock(Reference); + } + + // Allocate space for the trampoline. + uint32_t NextUsed = Block->Used + Size; + if (NextUsed > Block->Size) + { + // Block is full. We could allocate a new block here but as it is not + // expected that so many hooks will be made this path shouldn't happen + ZEN_ASSERT(!"Unable to allocate memory for memory tracing's hooks"); + } + + uint8_t* Out = (uint8_t*)Block + Block->Used; + Block->Used = NextUsed; + + return Out; +} + +//////////////////////////////////////////////////////////////////////////////// +template<typename T> +T* +FTextSectionEditor::Hook(T* Target, T* HookFunction) +{ + return (T*)HookImpl((void*)Target, (void*)HookFunction); +} + +//////////////////////////////////////////////////////////////////////////////// +void* +FTextSectionEditor::HookImpl(void* Target, void* HookFunction) +{ + Target = GetActualAddress(Target); + + // Very rudimentary x86_64 instruction length decoding that only supports op + // code ranges (0x80,0x8b) and (0x50,0x5f). Enough for simple prologues + uint8_t* __restrict Start = (uint8_t*)Target; + const uint8_t* Read = Start; + do + { + Read += (Read[0] & 0xf0) == 0x40; // REX prefix + uint8_t Inst = *Read++; + if (unsigned(Inst - 0x80) < 0x0cu) + { + uint8_t ModRm = *Read++; + Read += ((ModRm & 0300) < 0300) & ((ModRm & 0007) == 0004); // SIB + switch (ModRm & 0300) // Disp[8|32] + { + case 0100: + Read += 1; + break; + case 0200: + Read += 5; + break; + } + Read += (Inst == 0x83); + } + else if (unsigned(Inst - 0x50) >= 0x10u) + { + ZEN_ASSERT(!"Unknown instruction"); + } + } while (Read - Start < 6); + + static const int TrampolineSize = 24; + int PatchSize = int(Read - Start); + uint8_t* TrampolinePtr = AllocateTrampoline(Start, PatchSize + TrampolineSize); + + // Write the trampoline + *(void**)TrampolinePtr = HookFunction; + + uint8_t* PatchJmp = TrampolinePtr + sizeof(void*); + memcpy(PatchJmp, Start, PatchSize); + + PatchJmp += PatchSize; + *PatchJmp = 0xe9; + *(int32_t*)(PatchJmp + 1) = int32_t(intptr_t(Start + PatchSize) - intptr_t(PatchJmp)) - 5; + + // Need to make the text section writeable + DWORD ProtPrev; + uintptr_t ProtBase = uintptr_t(Target) & ~0x0fff; // 0x0fff is mask of VM page size + size_t ProtSize = ((ProtBase + 16 + 0x1000) & ~0x0fff) - ProtBase; // 16 is enough for one x86 instruction + VirtualProtect((void*)ProtBase, ProtSize, PAGE_EXECUTE_READWRITE, &ProtPrev); + + // Patch function to jmp to the hook + uint16_t* HookJmp = (uint16_t*)Target; + HookJmp[0] = 0x25ff; + *(int32_t*)(HookJmp + 1) = int32_t(intptr_t(TrampolinePtr) - intptr_t(HookJmp + 3)); + + // Put the protection back the way it was + VirtualProtect((void*)ProtBase, ProtSize, ProtPrev, &ProtPrev); + + return PatchJmp - PatchSize; +} + +////////////////////////////////////////////////////////////////////////// + +bool FVirtualWinApiHooks::bLight; +LPVOID(WINAPI* FVirtualWinApiHooks::VmAllocOrig)(LPVOID, SIZE_T, DWORD, DWORD); +LPVOID(WINAPI* FVirtualWinApiHooks::VmAllocExOrig)(HANDLE, LPVOID, SIZE_T, DWORD, DWORD); +# if (NTDDI_VERSION >= NTDDI_WIN10_RS4) +PVOID(WINAPI* FVirtualWinApiHooks::VmAlloc2Orig)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MEM_EXTENDED_PARAMETER*, ULONG); +# else +LPVOID(WINAPI* FVirtualWinApiHooks::VmAlloc2Orig)(HANDLE, LPVOID, SIZE_T, ULONG, ULONG, /*MEM_EXTENDED_PARAMETER* */ void*, ULONG); +# endif +BOOL(WINAPI* FVirtualWinApiHooks::VmFreeOrig)(LPVOID, SIZE_T, DWORD); +BOOL(WINAPI* FVirtualWinApiHooks::VmFreeExOrig)(HANDLE, LPVOID, SIZE_T, DWORD); + +void +FVirtualWinApiHooks::Initialize(bool bInLight) +{ + bLight = bInLight; + + FTextSectionEditor Editor; + + // Note that hooking alloc functions is done last as applying the hook can + // allocate some memory pages. + + VmFreeOrig = Editor.Hook(VirtualFree, &FVirtualWinApiHooks::VmFree); + VmFreeExOrig = Editor.Hook(VirtualFreeEx, &FVirtualWinApiHooks::VmFreeEx); + +# if ZEN_PLATFORM_WINDOWS +# if (NTDDI_VERSION >= NTDDI_WIN10_RS4) + { + VmAlloc2Orig = Editor.Hook(VirtualAlloc2, &FVirtualWinApiHooks::VmAlloc2); + } +# else // NTDDI_VERSION + { + VmAlloc2Orig = nullptr; + HINSTANCE DllInstance; + DllInstance = LoadLibrary(TEXT("kernelbase.dll")); + if (DllInstance != NULL) + { +# pragma warning(push) +# pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'FVirtualWinApiHooks::FnVirtualAlloc2' + VmAlloc2Orig = (FnVirtualAlloc2)GetProcAddress(DllInstance, "VirtualAlloc2"); +# pragma warning(pop) + FreeLibrary(DllInstance); + } + if (VmAlloc2Orig) + { + VmAlloc2Orig = Editor.Hook(VmAlloc2Orig, &FVirtualWinApiHooks::VmAlloc2); + } + } +# endif // NTDDI_VERSION +# endif // PLATFORM_WINDOWS + + VmAllocExOrig = Editor.Hook(VirtualAllocEx, &FVirtualWinApiHooks::VmAllocEx); + VmAllocOrig = Editor.Hook(VirtualAlloc, &FVirtualWinApiHooks::VmAlloc); +} + +//////////////////////////////////////////////////////////////////////////////// +LPVOID WINAPI +FVirtualWinApiHooks::VmAlloc(LPVOID Address, SIZE_T Size, DWORD Type, DWORD Protect) +{ + LPVOID Ret = VmAllocOrig(Address, Size, Type, Protect); + + // Track any reserve for now. Going forward we need events to differentiate reserves/commits and + // corresponding information on frees. + if (Ret != nullptr && ((Type & MEM_RESERVE) || ((Type & MEM_COMMIT) && Address == nullptr))) + { + MemoryTrace_Alloc((uint64_t)Ret, Size, 0, EMemoryTraceRootHeap::SystemMemory); + MemoryTrace_MarkAllocAsHeap((uint64_t)Ret, EMemoryTraceRootHeap::SystemMemory); + } + + return Ret; +} + +//////////////////////////////////////////////////////////////////////////////// +BOOL WINAPI +FVirtualWinApiHooks::VmFree(LPVOID Address, SIZE_T Size, DWORD Type) +{ + if (Type & MEM_RELEASE) + { + MemoryTrace_UnmarkAllocAsHeap((uint64_t)Address, EMemoryTraceRootHeap::SystemMemory); + MemoryTrace_Free((uint64_t)Address, EMemoryTraceRootHeap::SystemMemory); + } + + return VmFreeOrig(Address, Size, Type); +} + +//////////////////////////////////////////////////////////////////////////////// +LPVOID WINAPI +FVirtualWinApiHooks::VmAllocEx(HANDLE Process, LPVOID Address, SIZE_T Size, DWORD Type, DWORD Protect) +{ + LPVOID Ret = VmAllocExOrig(Process, Address, Size, Type, Protect); + + if (Process == GetCurrentProcess() && Ret != nullptr && ((Type & MEM_RESERVE) || ((Type & MEM_COMMIT) && Address == nullptr))) + { + MemoryTrace_Alloc((uint64_t)Ret, Size, 0, EMemoryTraceRootHeap::SystemMemory); + MemoryTrace_MarkAllocAsHeap((uint64_t)Ret, EMemoryTraceRootHeap::SystemMemory); + } + + return Ret; +} + +//////////////////////////////////////////////////////////////////////////////// +BOOL WINAPI +FVirtualWinApiHooks::VmFreeEx(HANDLE Process, LPVOID Address, SIZE_T Size, DWORD Type) +{ + if (Process == GetCurrentProcess() && (Type & MEM_RELEASE)) + { + MemoryTrace_UnmarkAllocAsHeap((uint64_t)Address, EMemoryTraceRootHeap::SystemMemory); + MemoryTrace_Free((uint64_t)Address, EMemoryTraceRootHeap::SystemMemory); + } + + return VmFreeExOrig(Process, Address, Size, Type); +} + +//////////////////////////////////////////////////////////////////////////////// +# if (NTDDI_VERSION >= NTDDI_WIN10_RS4) +PVOID WINAPI +FVirtualWinApiHooks::VmAlloc2(HANDLE Process, + PVOID BaseAddress, + SIZE_T Size, + ULONG Type, + ULONG PageProtection, + MEM_EXTENDED_PARAMETER* ExtendedParameters, + ULONG ParameterCount) +# else +LPVOID WINAPI +FVirtualWinApiHooks::VmAlloc2(HANDLE Process, + LPVOID BaseAddress, + SIZE_T Size, + ULONG Type, + ULONG PageProtection, + /*MEM_EXTENDED_PARAMETER* */ void* ExtendedParameters, + ULONG ParameterCount) +# endif +{ + LPVOID Ret = VmAlloc2Orig(Process, BaseAddress, Size, Type, PageProtection, ExtendedParameters, ParameterCount); + + if (Process == GetCurrentProcess() && Ret != nullptr && ((Type & MEM_RESERVE) || ((Type & MEM_COMMIT) && BaseAddress == nullptr))) + { + MemoryTrace_Alloc((uint64_t)Ret, Size, 0, EMemoryTraceRootHeap::SystemMemory); + MemoryTrace_MarkAllocAsHeap((uint64_t)Ret, EMemoryTraceRootHeap::SystemMemory); + } + + return Ret; +} + +} // namespace zen + +#endif // PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS diff --git a/src/zencore/memtrack/vatrace.h b/src/zencore/memtrack/vatrace.h new file mode 100644 index 000000000..59cc7fe97 --- /dev/null +++ b/src/zencore/memtrack/vatrace.h @@ -0,0 +1,61 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zenbase/zenbase.h> + +#if ZEN_PLATFORM_WINDOWS && !defined(PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS) +# define PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS 1 +#endif + +#ifndef PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS +# define PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS 0 +#endif + +#if PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS +# include <zencore/windows.h> + +namespace zen { + +class FVirtualWinApiHooks +{ +public: + static void Initialize(bool bInLight); + +private: + FVirtualWinApiHooks(); + static bool bLight; + static LPVOID WINAPI VmAlloc(LPVOID Address, SIZE_T Size, DWORD Type, DWORD Protect); + static LPVOID WINAPI VmAllocEx(HANDLE Process, LPVOID Address, SIZE_T Size, DWORD Type, DWORD Protect); +# if (NTDDI_VERSION >= NTDDI_WIN10_RS4) + static PVOID WINAPI VmAlloc2(HANDLE Process, + PVOID BaseAddress, + SIZE_T Size, + ULONG AllocationType, + ULONG PageProtection, + MEM_EXTENDED_PARAMETER* ExtendedParameters, + ULONG ParameterCount); + static PVOID(WINAPI* VmAlloc2Orig)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MEM_EXTENDED_PARAMETER*, ULONG); + typedef PVOID(__stdcall* FnVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MEM_EXTENDED_PARAMETER*, ULONG); +# else + static LPVOID WINAPI VmAlloc2(HANDLE Process, + LPVOID BaseAddress, + SIZE_T Size, + ULONG AllocationType, + ULONG PageProtection, + void* ExtendedParameters, + ULONG ParameterCount); + static LPVOID(WINAPI* VmAlloc2Orig)(HANDLE, LPVOID, SIZE_T, ULONG, ULONG, /*MEM_EXTENDED_PARAMETER* */ void*, ULONG); + typedef LPVOID(__stdcall* FnVirtualAlloc2)(HANDLE, LPVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG); +# endif + static BOOL WINAPI VmFree(LPVOID Address, SIZE_T Size, DWORD Type); + static BOOL WINAPI VmFreeEx(HANDLE Process, LPVOID Address, SIZE_T Size, DWORD Type); + static LPVOID(WINAPI* VmAllocOrig)(LPVOID, SIZE_T, DWORD, DWORD); + static LPVOID(WINAPI* VmAllocExOrig)(HANDLE, LPVOID, SIZE_T, DWORD, DWORD); + static BOOL(WINAPI* VmFreeOrig)(LPVOID, SIZE_T, DWORD); + static BOOL(WINAPI* VmFreeExOrig)(HANDLE, LPVOID, SIZE_T, DWORD); +}; + +} // namespace zen + +#endif |