aboutsummaryrefslogtreecommitdiff
path: root/src/zencore/memtrack
diff options
context:
space:
mode:
Diffstat (limited to 'src/zencore/memtrack')
-rw-r--r--src/zencore/memtrack/callstacktrace.cpp1059
-rw-r--r--src/zencore/memtrack/callstacktrace.h151
-rw-r--r--src/zencore/memtrack/growonlylockfreehash.h255
-rw-r--r--src/zencore/memtrack/memorytrace.cpp829
-rw-r--r--src/zencore/memtrack/moduletrace.cpp296
-rw-r--r--src/zencore/memtrack/moduletrace.h11
-rw-r--r--src/zencore/memtrack/moduletrace_events.cpp16
-rw-r--r--src/zencore/memtrack/moduletrace_events.h27
-rw-r--r--src/zencore/memtrack/platformtls.h107
-rw-r--r--src/zencore/memtrack/tagtrace.cpp237
-rw-r--r--src/zencore/memtrack/tracemalloc.h24
-rw-r--r--src/zencore/memtrack/vatrace.cpp361
-rw-r--r--src/zencore/memtrack/vatrace.h61
13 files changed, 3434 insertions, 0 deletions
diff --git a/src/zencore/memtrack/callstacktrace.cpp b/src/zencore/memtrack/callstacktrace.cpp
new file mode 100644
index 000000000..d860c05d1
--- /dev/null
+++ b/src/zencore/memtrack/callstacktrace.cpp
@@ -0,0 +1,1059 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include "callstacktrace.h"
+
+#include <zenbase/zenbase.h>
+#include <zencore/string.h>
+
+#if UE_CALLSTACK_TRACE_ENABLED
+
+namespace zen {
+
+// Platform implementations of back tracing
+////////////////////////////////////////////////////////////////////////////////
+void CallstackTrace_CreateInternal(FMalloc*);
+void CallstackTrace_InitializeInternal();
+
+////////////////////////////////////////////////////////////////////////////////
+UE_TRACE_CHANNEL_DEFINE(CallstackChannel)
+UE_TRACE_EVENT_DEFINE(Memory, CallstackSpec)
+
+uint32 GCallStackTracingTlsSlotIndex = FPlatformTLS::InvalidTlsSlot;
+
+////////////////////////////////////////////////////////////////////////////////
+void
+CallstackTrace_Create(class FMalloc* InMalloc)
+{
+ static auto InitOnce = [&] {
+ CallstackTrace_CreateInternal(InMalloc);
+ return true;
+ }();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+CallstackTrace_Initialize()
+{
+ GCallStackTracingTlsSlotIndex = FPlatformTLS::AllocTlsSlot();
+
+ static auto InitOnce = [&] {
+ CallstackTrace_InitializeInternal();
+ return true;
+ }();
+}
+
+} // namespace zen
+
+#endif
+
+#if ZEN_PLATFORM_WINDOWS
+# include "moduletrace.h"
+
+# include "growonlylockfreehash.h"
+
+# include <zencore/scopeguard.h>
+# include <zencore/thread.h>
+# include <zencore/trace.h>
+
+# include <atomic>
+# include <span>
+
+# include <zencore/windows.h>
+
+ZEN_THIRD_PARTY_INCLUDES_START
+# include <winnt.h>
+# include <winternl.h>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+# ifndef UE_CALLSTACK_TRACE_FULL_CALLSTACKS
+# define UE_CALLSTACK_TRACE_FULL_CALLSTACKS 0
+# endif
+
+// 0=off, 1=stats, 2=validation, 3=truth_compare
+# define BACKTRACE_DBGLVL 0
+
+# define BACKTRACE_LOCK_FREE (1 && (BACKTRACE_DBGLVL == 0))
+
+static bool GModulesAreInitialized = false;
+
+// This implementation is using unwind tables which is results in very fast
+// stack walking. In some cases this is not suitable, and we then fall back
+// to the standard stack walking implementation.
+# if !defined(UE_CALLSTACK_TRACE_USE_UNWIND_TABLES)
+# if defined(__clang__)
+# define UE_CALLSTACK_TRACE_USE_UNWIND_TABLES 0
+# else
+# define UE_CALLSTACK_TRACE_USE_UNWIND_TABLES 1
+# endif
+# endif
+
+// stacktrace tracking using clang intrinsic __builtin_frame_address(0) doesn't work correctly on all windows platforms
+# if !defined(PLATFORM_USE_CALLSTACK_ADDRESS_POINTER)
+# if defined(__clang__)
+# define PLATFORM_USE_CALLSTACK_ADDRESS_POINTER 0
+# else
+# define PLATFORM_USE_CALLSTACK_ADDRESS_POINTER 1
+# endif
+# endif
+
+# if !defined(UE_CALLSTACK_TRACE_RESERVE_MB)
+// Initial size of the known set of callstacks
+# define UE_CALLSTACK_TRACE_RESERVE_MB 8 // ~500k callstacks
+# endif
+
+# if !defined(UE_CALLSTACK_TRACE_RESERVE_GROWABLE)
+// If disabled the known set will not grow. New callstacks will not be
+// reported if the set is full
+# define UE_CALLSTACK_TRACE_RESERVE_GROWABLE 1
+# endif
+
+namespace zen {
+
+class FMalloc;
+
+UE_TRACE_CHANNEL_EXTERN(CallstackChannel)
+
+UE_TRACE_EVENT_BEGIN_EXTERN(Memory, CallstackSpec, NoSync)
+ UE_TRACE_EVENT_FIELD(uint32, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint64[], Frames)
+UE_TRACE_EVENT_END()
+
+class FCallstackTracer
+{
+public:
+ struct FBacktraceEntry
+ {
+ uint64_t Hash = 0;
+ uint32_t FrameCount = 0;
+ uint64_t* Frames;
+ };
+
+ FCallstackTracer(FMalloc* InMalloc) : KnownSet(InMalloc) {}
+
+ uint32_t AddCallstack(const FBacktraceEntry& Entry)
+ {
+ bool bAlreadyAdded = false;
+
+ // Our set implementation doesn't allow for zero entries (zero represents an empty element
+ // in the hash table), so if we get one due to really bad luck in our 64-bit Id calculation,
+ // treat it as a "1" instead, for purposes of tracking if we've seen that callstack.
+ const uint64_t Hash = FMath::Max(Entry.Hash, 1ull);
+ uint32_t Id;
+ KnownSet.Find(Hash, &Id, &bAlreadyAdded);
+ if (!bAlreadyAdded)
+ {
+ Id = CallstackIdCounter.fetch_add(1, std::memory_order_relaxed);
+ // On the first callstack reserve memory up front
+ if (Id == 1)
+ {
+ KnownSet.Reserve(InitialReserveCount);
+ }
+# if !UE_CALLSTACK_TRACE_RESERVE_GROWABLE
+ // If configured as not growable, start returning unknown id's when full.
+ if (Id >= InitialReserveCount)
+ {
+ return 0;
+ }
+# endif
+ KnownSet.Emplace(Hash, Id);
+ UE_TRACE_LOG(Memory, CallstackSpec, CallstackChannel)
+ << CallstackSpec.CallstackId(Id) << CallstackSpec.Frames(Entry.Frames, Entry.FrameCount);
+ }
+
+ return Id;
+ }
+
+private:
+ struct FEncounteredCallstackSetEntry
+ {
+ std::atomic_uint64_t Key;
+ std::atomic_uint32_t Value;
+
+ inline uint64 GetKey() const { return Key.load(std::memory_order_relaxed); }
+ inline uint32_t GetValue() const { return Value.load(std::memory_order_relaxed); }
+ inline bool IsEmpty() const { return Key.load(std::memory_order_relaxed) == 0; }
+ inline void SetKeyValue(uint64_t InKey, uint32_t InValue)
+ {
+ Value.store(InValue, std::memory_order_release);
+ Key.store(InKey, std::memory_order_relaxed);
+ }
+ static inline uint32_t KeyHash(uint64_t Key) { return static_cast<uint32_t>(Key); }
+ static inline void ClearEntries(FEncounteredCallstackSetEntry* Entries, int32_t EntryCount)
+ {
+ memset(Entries, 0, EntryCount * sizeof(FEncounteredCallstackSetEntry));
+ }
+ };
+
+ typedef TGrowOnlyLockFreeHash<FEncounteredCallstackSetEntry, uint64_t, uint32_t> FEncounteredCallstackSet;
+
+ constexpr static uint32_t InitialReserveBytes = UE_CALLSTACK_TRACE_RESERVE_MB * 1024 * 1024;
+ constexpr static uint32_t InitialReserveCount = InitialReserveBytes / sizeof(FEncounteredCallstackSetEntry);
+
+ FEncounteredCallstackSet KnownSet;
+ std::atomic_uint32_t CallstackIdCounter{1}; // 0 is reserved for "unknown callstack"
+};
+
+# if UE_CALLSTACK_TRACE_USE_UNWIND_TABLES
+
+/*
+ * Windows' x64 binaries contain a ".pdata" section that describes the location
+ * and size of its functions and details on how to unwind them. The unwind
+ * information includes descriptions about a function's stack frame size and
+ * the non-volatile registers it pushes onto the stack. From this we can
+ * calculate where a call instruction wrote its return address. This is enough
+ * to walk the callstack and by caching this information it can be done
+ * efficiently.
+ *
+ * Some functions need a variable amount of stack (such as those that use
+ * alloc() for example) will use a frame pointer. Frame pointers involve saving
+ * and restoring the stack pointer in the function's prologue/epilogue. This
+ * frees the function up to modify the stack pointer arbitrarily. This
+ * significantly complicates establishing where a return address is, so this
+ * pdata scheme of walking the stack just doesn't support functions like this.
+ * Walking stops if it encounters such a function. Fortunately there are
+ * usually very few such functions, saving us from having to read and track
+ * non-volatile registers which adds a significant amount of work.
+ *
+ * A further optimisation is to to assume we are only interested methods that
+ * are part of engine or game code. As such we only build lookup tables for
+ * such modules and never accept OS or third party modules. Backtracing stops
+ * if an address is encountered which doesn't map to a known module.
+ */
+
+////////////////////////////////////////////////////////////////////////////////
+static uint32_t
+AddressToId(uintptr_t Address)
+{
+ return uint32_t(Address >> 16);
+}
+
+static uintptr_t
+IdToAddress(uint32_t Id)
+{
+ return static_cast<uint32_t>(uintptr_t(Id) << 16);
+}
+
+struct FIdPredicate
+{
+ template<class T>
+ bool operator()(uint32_t Id, const T& Item) const
+ {
+ return Id < Item.Id;
+ }
+ template<class T>
+ bool operator()(const T& Item, uint32_t Id) const
+ {
+ return Item.Id < Id;
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+struct FUnwindInfo
+{
+ uint8_t Version : 3;
+ uint8_t Flags : 5;
+ uint8_t PrologBytes;
+ uint8_t NumUnwindCodes;
+ uint8_t FrameReg : 4;
+ uint8_t FrameRspBias : 4;
+};
+
+# pragma warning(push)
+# pragma warning(disable : 4200)
+struct FUnwindCode
+{
+ uint8_t PrologOffset;
+ uint8_t OpCode : 4;
+ uint8_t OpInfo : 4;
+ uint16_t Params[];
+};
+# pragma warning(pop)
+
+enum
+{
+ UWOP_PUSH_NONVOL = 0, // 1 node
+ UWOP_ALLOC_LARGE = 1, // 2 or 3 nodes
+ UWOP_ALLOC_SMALL = 2, // 1 node
+ UWOP_SET_FPREG = 3, // 1 node
+ UWOP_SAVE_NONVOL = 4, // 2 nodes
+ UWOP_SAVE_NONVOL_FAR = 5, // 3 nodes
+ UWOP_SAVE_XMM128 = 8, // 2 nodes
+ UWOP_SAVE_XMM128_FAR = 9, // 3 nodes
+ UWOP_PUSH_MACHFRAME = 10, // 1 node
+};
+
+////////////////////////////////////////////////////////////////////////////////
+class FBacktracer
+{
+public:
+ FBacktracer(FMalloc* InMalloc);
+ ~FBacktracer();
+ static FBacktracer* Get();
+ void AddModule(uintptr_t Base, const char16_t* Name);
+ void RemoveModule(uintptr_t Base);
+ uint32_t GetBacktraceId(void* AddressOfReturnAddress);
+
+private:
+ struct FFunction
+ {
+ uint32_t Id;
+ int32_t RspBias;
+# if BACKTRACE_DBGLVL >= 2
+ uint32_t Size;
+ const FUnwindInfo* UnwindInfo;
+# endif
+ };
+
+ struct FModule
+ {
+ uint32_t Id;
+ uint32_t IdSize;
+ uint32_t NumFunctions;
+# if BACKTRACE_DBGLVL >= 1
+ uint16 NumFpTypes;
+ // uint16 *padding*
+# else
+ // uint32_t *padding*
+# endif
+ FFunction* Functions;
+ };
+
+ struct FLookupState
+ {
+ FModule Module;
+ };
+
+ struct FFunctionLookupSetEntry
+ {
+ // Bottom 48 bits are key (pointer), top 16 bits are data (RSP bias for function)
+ std::atomic_uint64_t Data;
+
+ inline uint64_t GetKey() const { return Data.load(std::memory_order_relaxed) & 0xffffffffffffull; }
+ inline int32_t GetValue() const { return static_cast<int64_t>(Data.load(std::memory_order_relaxed)) >> 48; }
+ inline bool IsEmpty() const { return Data.load(std::memory_order_relaxed) == 0; }
+ inline void SetKeyValue(uint64_t Key, int32_t Value)
+ {
+ Data.store(Key | (static_cast<int64_t>(Value) << 48), std::memory_order_relaxed);
+ }
+ static inline uint32_t KeyHash(uint64_t Key)
+ {
+ // 64 bit pointer to 32 bit hash
+ Key = (~Key) + (Key << 21);
+ Key = Key ^ (Key >> 24);
+ Key = Key * 265;
+ Key = Key ^ (Key >> 14);
+ Key = Key * 21;
+ Key = Key ^ (Key >> 28);
+ Key = Key + (Key << 31);
+ return static_cast<uint32_t>(Key);
+ }
+ static void ClearEntries(FFunctionLookupSetEntry* Entries, int32_t EntryCount)
+ {
+ memset(Entries, 0, EntryCount * sizeof(FFunctionLookupSetEntry));
+ }
+ };
+ typedef TGrowOnlyLockFreeHash<FFunctionLookupSetEntry, uint64_t, int32_t> FFunctionLookupSet;
+
+ const FFunction* LookupFunction(uintptr_t Address, FLookupState& State) const;
+ static FBacktracer* Instance;
+ mutable zen::RwLock Lock;
+ FModule* Modules;
+ int32_t ModulesNum;
+ int32_t ModulesCapacity;
+ FMalloc* Malloc;
+ FCallstackTracer CallstackTracer;
+# if BACKTRACE_LOCK_FREE
+ mutable FFunctionLookupSet FunctionLookups;
+ mutable bool bReentranceCheck = false;
+# endif
+# if BACKTRACE_DBGLVL >= 1
+ mutable uint32_t NumFpTruncations = 0;
+ mutable uint32_t TotalFunctions = 0;
+# endif
+};
+
+////////////////////////////////////////////////////////////////////////////////
+FBacktracer* FBacktracer::Instance = nullptr;
+
+////////////////////////////////////////////////////////////////////////////////
+FBacktracer::FBacktracer(FMalloc* InMalloc)
+: Malloc(InMalloc)
+, CallstackTracer(InMalloc)
+# if BACKTRACE_LOCK_FREE
+, FunctionLookups(InMalloc)
+# endif
+{
+# if BACKTRACE_LOCK_FREE
+ FunctionLookups.Reserve(512 * 1024); // 4 MB
+# endif
+ ModulesCapacity = 8;
+ ModulesNum = 0;
+ Modules = (FModule*)Malloc->Malloc(sizeof(FModule) * ModulesCapacity);
+
+ Instance = this;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FBacktracer::~FBacktracer()
+{
+ std::span<FModule> ModulesView(Modules, ModulesNum);
+ for (FModule& Module : ModulesView)
+ {
+ Malloc->Free(Module.Functions);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FBacktracer*
+FBacktracer::Get()
+{
+ return Instance;
+}
+
+bool GFullBacktraces = false;
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FBacktracer::AddModule(uintptr_t ModuleBase, const char16_t* Name)
+{
+ if (!GFullBacktraces)
+ {
+ const size_t NameLen = StringLength(Name);
+ if (!(NameLen > 4 && StringEquals(Name + NameLen - 4, u".exe")))
+ {
+ return;
+ }
+ }
+
+ const auto* DosHeader = (IMAGE_DOS_HEADER*)ModuleBase;
+ const auto* NtHeader = (IMAGE_NT_HEADERS*)(ModuleBase + DosHeader->e_lfanew);
+ const IMAGE_FILE_HEADER* FileHeader = &(NtHeader->FileHeader);
+
+ uint32_t NumSections = FileHeader->NumberOfSections;
+ const auto* Sections = (IMAGE_SECTION_HEADER*)(uintptr_t(&(NtHeader->OptionalHeader)) + FileHeader->SizeOfOptionalHeader);
+
+ // Find ".pdata" section
+ uintptr_t PdataBase = 0;
+ uintptr_t PdataEnd = 0;
+ for (uint32_t i = 0; i < NumSections; ++i)
+ {
+ const IMAGE_SECTION_HEADER* Section = Sections + i;
+ if (*(uint64_t*)(Section->Name) ==
+ 0x61'74'61'64'70'2eull) // Sections names are eight bytes and zero padded. This constant is '.pdata'
+ {
+ PdataBase = ModuleBase + Section->VirtualAddress;
+ PdataEnd = PdataBase + Section->SizeOfRawData;
+ break;
+ }
+ }
+
+ if (PdataBase == 0)
+ {
+ return;
+ }
+
+ // Count the number of functions. The assumption here is that if we have got this far then there is at least one function
+ uint32_t NumFunctions = uint32_t(PdataEnd - PdataBase) / sizeof(RUNTIME_FUNCTION);
+ if (NumFunctions == 0)
+ {
+ return;
+ }
+
+ const auto* FunctionTables = (RUNTIME_FUNCTION*)PdataBase;
+ do
+ {
+ const RUNTIME_FUNCTION* Function = FunctionTables + NumFunctions - 1;
+ if (uint32_t(Function->BeginAddress) < uint32_t(Function->EndAddress))
+ {
+ break;
+ }
+
+ --NumFunctions;
+ } while (NumFunctions != 0);
+
+ // Allocate some space for the module's function-to-frame-size table
+ auto* OutTable = (FFunction*)Malloc->Malloc(sizeof(FFunction) * NumFunctions);
+ FFunction* OutTableCursor = OutTable;
+
+ // Extract frame size for each function from pdata's unwind codes.
+ uint32_t NumFpFuncs = 0;
+ for (uint32_t i = 0; i < NumFunctions; ++i)
+ {
+ const RUNTIME_FUNCTION* FunctionTable = FunctionTables + i;
+
+ uintptr_t UnwindInfoAddr = ModuleBase + FunctionTable->UnwindInfoAddress;
+ const auto* UnwindInfo = (FUnwindInfo*)UnwindInfoAddr;
+
+ if (UnwindInfo->Version != 1)
+ {
+ /* some v2s have been seen in msvc. Always seem to be assembly
+ * routines (memset, memcpy, etc) */
+ continue;
+ }
+
+ int32_t FpInfo = 0;
+ int32_t RspBias = 0;
+
+# if BACKTRACE_DBGLVL >= 2
+ uint32_t PrologVerify = UnwindInfo->PrologBytes;
+# endif
+
+ const auto* Code = (FUnwindCode*)(UnwindInfo + 1);
+ const auto* EndCode = Code + UnwindInfo->NumUnwindCodes;
+ while (Code < EndCode)
+ {
+# if BACKTRACE_DBGLVL >= 2
+ if (Code->PrologOffset > PrologVerify)
+ {
+ PLATFORM_BREAK();
+ }
+ PrologVerify = Code->PrologOffset;
+# endif
+
+ switch (Code->OpCode)
+ {
+ case UWOP_PUSH_NONVOL:
+ RspBias += 8;
+ Code += 1;
+ break;
+
+ case UWOP_ALLOC_LARGE:
+ if (Code->OpInfo)
+ {
+ RspBias += *(uint32_t*)(Code->Params);
+ Code += 3;
+ }
+ else
+ {
+ RspBias += Code->Params[0] * 8;
+ Code += 2;
+ }
+ break;
+
+ case UWOP_ALLOC_SMALL:
+ RspBias += (Code->OpInfo * 8) + 8;
+ Code += 1;
+ break;
+
+ case UWOP_SET_FPREG:
+ // Function will adjust RSP (e.g. through use of alloca()) so it
+ // uses a frame pointer register. There's instructions like;
+ //
+ // push FRAME_REG
+ // lea FRAME_REG, [rsp + (FRAME_RSP_BIAS * 16)]
+ // ...
+ // add rsp, rax
+ // ...
+ // sub rsp, FRAME_RSP_BIAS * 16
+ // pop FRAME_REG
+ // ret
+ //
+ // To recover the stack frame we would need to track non-volatile
+ // registers which adds a lot of overhead for a small subset of
+ // functions. Instead we'll end backtraces at these functions.
+
+ // MSB is set to detect variable sized frames that we can't proceed
+ // past when back-tracing.
+ NumFpFuncs++;
+ FpInfo |= 0x80000000 | (uint32_t(UnwindInfo->FrameReg) << 27) | (uint32_t(UnwindInfo->FrameRspBias) << 23);
+ Code += 1;
+ break;
+
+ case UWOP_PUSH_MACHFRAME:
+ RspBias = Code->OpInfo ? 48 : 40;
+ Code += 1;
+ break;
+
+ case UWOP_SAVE_NONVOL:
+ Code += 2;
+ break; /* saves are movs instead of pushes */
+ case UWOP_SAVE_NONVOL_FAR:
+ Code += 3;
+ break;
+ case UWOP_SAVE_XMM128:
+ Code += 2;
+ break;
+ case UWOP_SAVE_XMM128_FAR:
+ Code += 3;
+ break;
+
+ default:
+# if BACKTRACE_DBGLVL >= 2
+ PLATFORM_BREAK();
+# endif
+ break;
+ }
+ }
+
+ // "Chained" simply means that multiple RUNTIME_FUNCTIONs pertains to a
+ // single actual function in the .text segment.
+ bool bIsChained = (UnwindInfo->Flags & UNW_FLAG_CHAININFO);
+
+ RspBias /= sizeof(void*); // stack push/popds in units of one machine word
+ RspBias += !bIsChained; // and one extra push for the ret address
+ RspBias |= FpInfo; // pack in details about possible frame pointer
+
+ if (bIsChained)
+ {
+ OutTableCursor[-1].RspBias += RspBias;
+# if BACKTRACE_DBGLVL >= 2
+ OutTableCursor[-1].Size += (FunctionTable->EndAddress - FunctionTable->BeginAddress);
+# endif
+ }
+ else
+ {
+ *OutTableCursor = {
+ FunctionTable->BeginAddress,
+ RspBias,
+# if BACKTRACE_DBGLVL >= 2
+ FunctionTable->EndAddress - FunctionTable->BeginAddress,
+ UnwindInfo,
+# endif
+ };
+
+ ++OutTableCursor;
+ }
+ }
+
+ uintptr_t ModuleSize = NtHeader->OptionalHeader.SizeOfImage;
+ ModuleSize += 0xffff; // to align up to next 64K page. it'll get shifted by AddressToId()
+
+ FModule Module = {
+ AddressToId(ModuleBase),
+ AddressToId(ModuleSize),
+ uint32_t(uintptr_t(OutTableCursor - OutTable)),
+# if BACKTRACE_DBGLVL >= 1
+ uint16(NumFpFuncs),
+# endif
+ OutTable,
+ };
+
+ {
+ zen::RwLock::ExclusiveLockScope _(Lock);
+
+ if (ModulesNum + 1 > ModulesCapacity)
+ {
+ ModulesCapacity += 8;
+ Modules = (FModule*)Malloc->Realloc(Modules, sizeof(FModule) * ModulesCapacity);
+ }
+ Modules[ModulesNum++] = Module;
+
+ std::sort(Modules, Modules + ModulesNum, [](const FModule& A, const FModule& B) { return A.Id < B.Id; });
+ }
+
+# if BACKTRACE_DBGLVL >= 1
+ NumFpTruncations += NumFpFuncs;
+ TotalFunctions += NumFunctions;
+# endif
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FBacktracer::RemoveModule(uintptr_t ModuleBase)
+{
+ // When Windows' RequestExit() is called it hard-terminates all threads except
+ // the main thread and then proceeds to unload the process' DLLs. This hard
+ // thread termination can result is dangling locked locks. Not an issue as
+ // the rule is "do not do anything multithreaded in DLL load/unload". And here
+ // we are, taking write locks during DLL unload which is, quite unsurprisingly,
+ // deadlocking. In reality tracking Windows' DLL unloads doesn't tell us
+ // anything due to how DLLs and processes' address spaces work. So we will...
+# if defined PLATFORM_WINDOWS
+ ZEN_UNUSED(ModuleBase);
+
+ return;
+# else
+
+ zen::RwLock::ExclusiveLockScope _(Lock);
+
+ uint32_t ModuleId = AddressToId(ModuleBase);
+ TArrayView<FModule> ModulesView(Modules, ModulesNum);
+ int32_t Index = Algo::LowerBound(ModulesView, ModuleId, FIdPredicate());
+ if (Index >= ModulesNum)
+ {
+ return;
+ }
+
+ const FModule& Module = Modules[Index];
+ if (Module.Id != ModuleId)
+ {
+ return;
+ }
+
+# if BACKTRACE_DBGLVL >= 1
+ NumFpTruncations -= Module.NumFpTypes;
+ TotalFunctions -= Module.NumFunctions;
+# endif
+
+ // no code should be executing at this point so we can safely free the
+ // table knowing know one is looking at it.
+ Malloc->Free(Module.Functions);
+
+ for (SIZE_T i = Index; i < ModulesNum; i++)
+ {
+ Modules[i] = Modules[i + 1];
+ }
+
+ --ModulesNum;
+# endif
+}
+
+////////////////////////////////////////////////////////////////////////////////
+const FBacktracer::FFunction*
+FBacktracer::LookupFunction(uintptr_t Address, FLookupState& State) const
+{
+ // This function caches the previous module look up. The theory here is that
+ // a series of return address in a backtrace often cluster around one module
+
+ FIdPredicate IdPredicate;
+
+ // Look up the module that Address belongs to.
+ uint32_t AddressId = AddressToId(Address);
+ if ((AddressId - State.Module.Id) >= State.Module.IdSize)
+ {
+ auto FindIt = std::upper_bound(Modules, Modules + ModulesNum, AddressId, IdPredicate);
+
+ if (FindIt == Modules)
+ {
+ return nullptr;
+ }
+
+ State.Module = *--FindIt;
+ }
+
+ // Check that the address is within the address space of the best-found module
+ const FModule* Module = &(State.Module);
+ if ((AddressId - Module->Id) >= Module->IdSize)
+ {
+ return nullptr;
+ }
+
+ // Now we've a module we have a table of functions and their stack sizes so
+ // we can get the frame size for Address
+ uint32_t FuncId = uint32_t(Address - IdToAddress(Module->Id));
+ std::span<FFunction> FuncsView(Module->Functions, Module->NumFunctions);
+ auto FindIt = std::upper_bound(begin(FuncsView), end(FuncsView), FuncId, IdPredicate);
+ if (FindIt == begin(FuncsView))
+ {
+ return nullptr;
+ }
+
+ const FFunction* Function = &(*--FindIt);
+# if BACKTRACE_DBGLVL >= 2
+ if ((FuncId - Function->Id) >= Function->Size)
+ {
+ PLATFORM_BREAK();
+ return nullptr;
+ }
+# endif
+ return Function;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+uint32_t
+FBacktracer::GetBacktraceId(void* AddressOfReturnAddress)
+{
+ FLookupState LookupState = {};
+ uint64_t Frames[256];
+
+ uintptr_t* StackPointer = (uintptr_t*)AddressOfReturnAddress;
+
+# if BACKTRACE_DBGLVL >= 3
+ uintptr_t TruthBacktrace[1024];
+ uint32_t NumTruth = RtlCaptureStackBackTrace(0, 1024, (void**)TruthBacktrace, nullptr);
+ uintptr_t* TruthCursor = TruthBacktrace;
+ for (; *TruthCursor != *StackPointer; ++TruthCursor)
+ ;
+# endif
+
+# if BACKTRACE_DBGLVL >= 2
+ struct
+ {
+ void* Sp;
+ void* Ip;
+ const FFunction* Function;
+ } Backtrace[1024] = {};
+ uint32_t NumBacktrace = 0;
+# endif
+
+ uint64_t BacktraceHash = 0;
+ uint32_t FrameIdx = 0;
+
+# if BACKTRACE_LOCK_FREE
+ // When running lock free, we defer the lock until a lock free function lookup fails
+ bool Locked = false;
+# else
+ FScopeLock _(&Lock);
+# endif
+ do
+ {
+ uintptr_t RetAddr = *StackPointer;
+
+ Frames[FrameIdx++] = RetAddr;
+
+ // This is a simple order-dependent LCG. Should be sufficient enough
+ BacktraceHash += RetAddr;
+ BacktraceHash *= 0x30be8efa499c249dull;
+
+# if BACKTRACE_LOCK_FREE
+ int32_t RspBias;
+ bool bIsAlreadyInTable;
+ FunctionLookups.Find(RetAddr, &RspBias, &bIsAlreadyInTable);
+ if (bIsAlreadyInTable)
+ {
+ if (RspBias < 0)
+ {
+ break;
+ }
+ else
+ {
+ StackPointer += RspBias;
+ continue;
+ }
+ }
+ if (!Locked)
+ {
+ Lock.AcquireExclusive();
+ Locked = true;
+
+ // If FunctionLookups.Emplace triggers a reallocation, it can cause an infinite recursion
+ // when the allocation reenters the stack trace code. We need to break out of the recursion
+ // in that case, and let the allocation complete, with the assumption that we don't care
+ // about call stacks for internal allocations in the memory reporting system. The "Lock()"
+ // above will only fall through with this flag set if it's a second lock in the same thread.
+ if (bReentranceCheck)
+ {
+ break;
+ }
+ }
+# endif // BACKTRACE_LOCK_FREE
+
+ const FFunction* Function = LookupFunction(RetAddr, LookupState);
+ if (Function == nullptr)
+ {
+# if BACKTRACE_LOCK_FREE
+ // LookupFunction fails when modules are not yet registered. In this case, we do not want the address
+ // to be added to the lookup map, but to retry the lookup later when modules are properly registered.
+ if (GModulesAreInitialized)
+ {
+ bReentranceCheck = true;
+ auto OnExit = zen::MakeGuard([&] { bReentranceCheck = false; });
+ FunctionLookups.Emplace(RetAddr, -1);
+ }
+# endif
+ break;
+ }
+
+# if BACKTRACE_LOCK_FREE
+ {
+ // This conversion improves probing performance for the hash set. Additionally it is critical
+ // to avoid incorrect values when RspBias is compressed into 16 bits in the hash map.
+ int32_t StoreBias = Function->RspBias < 0 ? -1 : Function->RspBias;
+ bReentranceCheck = true;
+ auto OnExit = zen::MakeGuard([&] { bReentranceCheck = false; });
+ FunctionLookups.Emplace(RetAddr, StoreBias);
+ }
+# endif
+
+# if BACKTRACE_DBGLVL >= 2
+ if (NumBacktrace < 1024)
+ {
+ Backtrace[NumBacktrace++] = {
+ StackPointer,
+ (void*)RetAddr,
+ Function,
+ };
+ }
+# endif
+
+ if (Function->RspBias < 0)
+ {
+ // This is a frame with a variable-sized stack pointer. We don't
+ // track enough information to proceed.
+# if BACKTRACE_DBGLVL >= 1
+ NumFpTruncations++;
+# endif
+ break;
+ }
+
+ StackPointer += Function->RspBias;
+ }
+ // Trunkate callstacks longer than MaxStackDepth
+ while (*StackPointer && FrameIdx < ZEN_ARRAY_COUNT(Frames));
+
+ // Build the backtrace entry for submission
+ FCallstackTracer::FBacktraceEntry BacktraceEntry;
+ BacktraceEntry.Hash = BacktraceHash;
+ BacktraceEntry.FrameCount = FrameIdx;
+ BacktraceEntry.Frames = Frames;
+
+# if BACKTRACE_DBGLVL >= 3
+ for (uint32_t i = 0; i < NumBacktrace; ++i)
+ {
+ if ((void*)TruthCursor[i] != Backtrace[i].Ip)
+ {
+ PLATFORM_BREAK();
+ break;
+ }
+ }
+# endif
+
+# if BACKTRACE_LOCK_FREE
+ if (Locked)
+ {
+ Lock.ReleaseExclusive();
+ }
+# endif
+ // Add to queue to be processed. This might block until there is room in the
+ // queue (i.e. the processing thread has caught up processing).
+ return CallstackTracer.AddCallstack(BacktraceEntry);
+}
+}
+
+# else // UE_CALLSTACK_TRACE_USE_UNWIND_TABLES
+
+namespace zen {
+
+ ////////////////////////////////////////////////////////////////////////////////
+ class FBacktracer
+ {
+ public:
+ FBacktracer(FMalloc* InMalloc);
+ ~FBacktracer();
+ static FBacktracer* Get();
+ inline uint32_t GetBacktraceId(void* AddressOfReturnAddress);
+ uint32_t GetBacktraceId(uint64_t ReturnAddress);
+ void AddModule(uintptr_t Base, const char16_t* Name) {}
+ void RemoveModule(uintptr_t Base) {}
+
+ private:
+ static FBacktracer* Instance;
+ FMalloc* Malloc;
+ FCallstackTracer CallstackTracer;
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ FBacktracer* FBacktracer::Instance = nullptr;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ FBacktracer::FBacktracer(FMalloc* InMalloc) : Malloc(InMalloc), CallstackTracer(InMalloc) { Instance = this; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ FBacktracer::~FBacktracer() {}
+
+ ////////////////////////////////////////////////////////////////////////////////
+ FBacktracer* FBacktracer::Get() { return Instance; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ uint32_t FBacktracer::GetBacktraceId(void* AddressOfReturnAddress)
+ {
+ const uint64_t ReturnAddress = *(uint64_t*)AddressOfReturnAddress;
+ return GetBacktraceId(ReturnAddress);
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ uint32_t FBacktracer::GetBacktraceId(uint64_t ReturnAddress)
+ {
+# if !UE_BUILD_SHIPPING
+ uint64_t StackFrames[256];
+ int32_t NumStackFrames = FPlatformStackWalk::CaptureStackBackTrace(StackFrames, UE_ARRAY_COUNT(StackFrames));
+ if (NumStackFrames > 0)
+ {
+ FCallstackTracer::FBacktraceEntry BacktraceEntry;
+ uint64_t BacktraceId = 0;
+ uint32_t FrameIdx = 0;
+ bool bUseAddress = false;
+ for (int32_t Index = 0; Index < NumStackFrames; Index++)
+ {
+ if (!bUseAddress)
+ {
+ // start using backtrace only after ReturnAddress
+ if (StackFrames[Index] == (uint64_t)ReturnAddress)
+ {
+ bUseAddress = true;
+ }
+ }
+ if (bUseAddress || NumStackFrames == 1)
+ {
+ uint64_t RetAddr = StackFrames[Index];
+ StackFrames[FrameIdx++] = RetAddr;
+
+ // This is a simple order-dependent LCG. Should be sufficient enough
+ BacktraceId += RetAddr;
+ BacktraceId *= 0x30be8efa499c249dull;
+ }
+ }
+
+ // Save the collected id
+ BacktraceEntry.Hash = BacktraceId;
+ BacktraceEntry.FrameCount = FrameIdx;
+ BacktraceEntry.Frames = StackFrames;
+
+ // Add to queue to be processed. This might block until there is room in the
+ // queue (i.e. the processing thread has caught up processing).
+ return CallstackTracer.AddCallstack(BacktraceEntry);
+ }
+# endif
+
+ return 0;
+ }
+
+}
+
+# endif // UE_CALLSTACK_TRACE_USE_UNWIND_TABLES
+
+namespace zen {
+
+////////////////////////////////////////////////////////////////////////////////
+void
+CallstackTrace_CreateInternal(FMalloc* Malloc)
+{
+ if (FBacktracer::Get() != nullptr)
+ {
+ return;
+ }
+
+ // Allocate, construct and intentionally leak backtracer
+ void* Alloc = Malloc->Malloc(sizeof(FBacktracer), alignof(FBacktracer));
+ new (Alloc) FBacktracer(Malloc);
+
+ Modules_Create(Malloc);
+ Modules_Subscribe([](bool bLoad, void* Module, const char16_t* Name) {
+ bLoad ? FBacktracer::Get()->AddModule(uintptr_t(Module), Name) //-V522
+ : FBacktracer::Get()->RemoveModule(uintptr_t(Module));
+ });
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+CallstackTrace_InitializeInternal()
+{
+ Modules_Initialize();
+ GModulesAreInitialized = true;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+uint32_t
+CallstackTrace_GetCurrentId()
+{
+ if (!UE_TRACE_CHANNELEXPR_IS_ENABLED(CallstackChannel))
+ {
+ return 0;
+ }
+
+ void* StackAddress = PLATFORM_RETURN_ADDRESS_FOR_CALLSTACKTRACING();
+ if (FBacktracer* Instance = FBacktracer::Get())
+ {
+# if PLATFORM_USE_CALLSTACK_ADDRESS_POINTER
+ return Instance->GetBacktraceId(StackAddress);
+# else
+ return Instance->GetBacktraceId((uint64_t)StackAddress);
+# endif
+ }
+
+ return 0;
+}
+
+} // namespace zen
+
+#endif
diff --git a/src/zencore/memtrack/callstacktrace.h b/src/zencore/memtrack/callstacktrace.h
new file mode 100644
index 000000000..3e191490b
--- /dev/null
+++ b/src/zencore/memtrack/callstacktrace.h
@@ -0,0 +1,151 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/trace.h>
+
+#if ZEN_PLATFORM_WINDOWS
+# include <intrin.h>
+
+# define PLATFORM_RETURN_ADDRESS() _ReturnAddress()
+# define PLATFORM_RETURN_ADDRESS_POINTER() _AddressOfReturnAddress()
+# define PLATFORM_RETURN_ADDRESS_FOR_CALLSTACKTRACING PLATFORM_RETURN_ADDRESS_POINTER
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+#if !defined(UE_CALLSTACK_TRACE_ENABLED)
+# if UE_TRACE_ENABLED
+# if ZEN_PLATFORM_WINDOWS
+# define UE_CALLSTACK_TRACE_ENABLED 1
+# endif
+# endif
+#endif
+
+#if !defined(UE_CALLSTACK_TRACE_ENABLED)
+# define UE_CALLSTACK_TRACE_ENABLED 0
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+#if UE_CALLSTACK_TRACE_ENABLED
+
+# include "platformtls.h"
+
+namespace zen {
+
+/**
+ * Creates callstack tracing.
+ * @param Malloc Allocator instance to use.
+ */
+void CallstackTrace_Create(class FMalloc* Malloc);
+
+/**
+ * Initializes callstack tracing. On some platforms this has to be delayed due to initialization order.
+ */
+void CallstackTrace_Initialize();
+
+/**
+ * Capture the current callstack, and trace the definition if it has not already been encountered. The returned value
+ * can be used in trace events and be resolved in analysis.
+ * @return Unique id identifying the current callstack.
+ */
+uint32_t CallstackTrace_GetCurrentId();
+
+/**
+ * Callstack Trace Scoped Macro to avoid resolving the full callstack
+ * can be used when some external libraries are not compiled with frame pointers
+ * preventing us to resolve it without crashing. Instead the callstack will be
+ * only the caller address.
+ */
+# define CALLSTACK_TRACE_LIMIT_CALLSTACKRESOLVE_SCOPE() FCallStackTraceLimitResolveScope PREPROCESSOR_JOIN(FCTLMScope, __LINE__)
+
+extern uint32_t GCallStackTracingTlsSlotIndex;
+
+/**
+ * @return the fallback callstack address
+ */
+inline void*
+CallstackTrace_GetFallbackPlatformReturnAddressData()
+{
+ if (FPlatformTLS::IsValidTlsSlot(GCallStackTracingTlsSlotIndex))
+ return FPlatformTLS::GetTlsValue(GCallStackTracingTlsSlotIndex);
+ else
+ return nullptr;
+}
+
+/**
+ * @return Needs full callstack resolve
+ */
+inline bool
+CallstackTrace_ResolveFullCallStack()
+{
+ return CallstackTrace_GetFallbackPlatformReturnAddressData() == nullptr;
+}
+
+/*
+ * Callstack Trace scope for override CallStack
+ */
+class FCallStackTraceLimitResolveScope
+{
+public:
+ ZEN_FORCENOINLINE FCallStackTraceLimitResolveScope()
+ {
+ if (FPlatformTLS::IsValidTlsSlot(GCallStackTracingTlsSlotIndex))
+ {
+ FPlatformTLS::SetTlsValue(GCallStackTracingTlsSlotIndex, PLATFORM_RETURN_ADDRESS_FOR_CALLSTACKTRACING());
+ }
+ }
+
+ ZEN_FORCENOINLINE ~FCallStackTraceLimitResolveScope()
+ {
+ if (FPlatformTLS::IsValidTlsSlot(GCallStackTracingTlsSlotIndex))
+ {
+ FPlatformTLS::SetTlsValue(GCallStackTracingTlsSlotIndex, nullptr);
+ }
+ }
+};
+
+} // namespace zen
+
+#else // UE_CALLSTACK_TRACE_ENABLED
+
+namespace zen {
+
+inline void
+CallstackTrace_Create(class FMalloc* /*Malloc*/)
+{
+}
+
+inline void
+CallstackTrace_Initialize()
+{
+}
+
+inline uint32_t
+CallstackTrace_GetCurrentId()
+{
+ return 0;
+}
+
+inline void*
+CallstackTrace_GetCurrentReturnAddressData()
+{
+ return nullptr;
+}
+
+inline void*
+CallstackTrace_GetFallbackPlatformReturnAddressData()
+{
+ return nullptr;
+}
+
+inline bool
+CallstackTrace_ResolveFullCallStack()
+{
+ return true;
+}
+
+# define CALLSTACK_TRACE_LIMIT_CALLSTACKRESOLVE_SCOPE()
+
+} // namespace zen
+
+#endif // UE_CALLSTACK_TRACE_ENABLED
diff --git a/src/zencore/memtrack/growonlylockfreehash.h b/src/zencore/memtrack/growonlylockfreehash.h
new file mode 100644
index 000000000..d6ff4fc32
--- /dev/null
+++ b/src/zencore/memtrack/growonlylockfreehash.h
@@ -0,0 +1,255 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zenbase/zenbase.h>
+#include <zencore/intmath.h>
+#include <zencore/thread.h>
+
+#include <zencore/memory/fmalloc.h>
+
+#include <atomic>
+
+namespace zen {
+
+// Hash table with fast lock free reads, that only supports insertion of items, and no modification of
+// values. KeyType must be an integer. EntryType should be a POD with an identifiable "empty" state
+// that can't occur in the table, and include the following member functions:
+//
+// KeyType GetKey() const; // Get the key from EntryType
+// ValueType GetValue() const; // Get the value from EntryType
+// bool IsEmpty() const; // Query whether EntryType is empty
+// void SetKeyValue(KeyType Key, ValueType Value); // Write key and value into EntryType (ATOMICALLY! See below)
+// static uint32 KeyHash(KeyType Key); // Convert Key to more well distributed hash
+// static void ClearEntries(EntryType* Entries, int32 EntryCount); // Fill an array of entries with empty values
+//
+// The function "SetKeyValue" must be multi-thread safe when writing new items! This means writing the
+// Key last and atomically, or writing the entire EntryType in a single write (say if the key and value
+// are packed into a single integer word). Inline is recommended, since these functions are called a
+// lot in the inner loop of the algorithm. A simple implementation of "KeyHash" can just return the
+// Key (if it's already reasonable as a hash), or mix the bits if better distribution is required. A
+// simple implementation of "ClearEntries" can just be a memset, if zero represents an empty entry.
+//
+// A set can be approximated by making "GetValue" a nop function, and just paying attention to the bool
+// result from FindEntry, although you do need to either reserve a certain Key as invalid, or add
+// space to store a valid flag as the Value. This class should only be used for small value types, as
+// the values are embedded into the hash table, and not stored separately.
+//
+// Writes are implemented using a lock -- it would be possible to make writes lock free (or lock free
+// when resizing doesn't occur), but it adds complexity. If we were to go that route, it would make
+// sense to create a fully generic lock free set, which would be much more involved to implement and
+// validate than this simple class, and might also offer somewhat worse read perf. Lock free containers
+// that support item removal either need additional synchronization overhead on readers, so writers can
+// tell if a reader is active and spin, or need graveyard markers and a garbage collection pass called
+// periodically, which makes it no longer a simple standalone container.
+//
+// Lock free reads are accomplished by the reader atomically pulling the hash table pointer from the
+// class. The hash table is self contained, with its size stored in the table itself, and hash tables
+// are not freed until the class's destruction. So if the table needs to be reallocated due to a write,
+// active readers will still have valid memory. This does mean that tables leak, but worst case, you
+// end up with half of the memory being waste. It would be possible to garbage collect the excess
+// tables, but you'd need some kind of global synchronization to make sure no readers are active.
+//
+// Besides cleanup of wasted tables, it might be useful to provide a function to clear a table. This
+// would involve clearing the Key for all the elements in the table (but leaving the memory allocated),
+// and can be done safely with active readers. It's not possible to safely remove individual items due
+// to the need to potentially move other items, which would break an active reader that has already
+// searched past a moved item. But in the case of removing all items, we don't care when a reader fails,
+// it's expected that eventually all readers will fail, regardless of where they are searching. A clear
+// function could be useful if a lot of the data you are caching is no longer used, and you want to
+// reset the cache.
+//
+template<typename EntryType, typename KeyType, typename ValueType>
+class TGrowOnlyLockFreeHash
+{
+public:
+ TGrowOnlyLockFreeHash(FMalloc* InMalloc) : Malloc(InMalloc), HashTable(nullptr) {}
+
+ ~TGrowOnlyLockFreeHash()
+ {
+ FHashHeader* HashTableNext;
+ for (FHashHeader* HashTableCurrent = HashTable; HashTableCurrent; HashTableCurrent = HashTableNext)
+ {
+ HashTableNext = HashTableCurrent->Next;
+
+ Malloc->Free(HashTableCurrent);
+ }
+ }
+
+ /**
+ * Preallocate the hash table to a certain size
+ * @param Count - Number of EntryType elements to allocate
+ * @warning Can only be called once, and only before any items have been added!
+ */
+ void Reserve(uint32_t Count)
+ {
+ zen::RwLock::ExclusiveLockScope _(WriteCriticalSection);
+ ZEN_ASSERT(HashTable.load(std::memory_order_relaxed) == nullptr);
+
+ if (Count <= 0)
+ {
+ Count = DEFAULT_INITIAL_SIZE;
+ }
+ Count = uint32_t(zen::NextPow2(Count));
+ FHashHeader* HashTableLocal = (FHashHeader*)Malloc->Malloc(sizeof(FHashHeader) + (Count - 1) * sizeof(EntryType));
+
+ HashTableLocal->Next = nullptr;
+ HashTableLocal->TableSize = Count;
+ HashTableLocal->Used = 0;
+ EntryType::ClearEntries(HashTableLocal->Elements, Count);
+
+ HashTable.store(HashTableLocal, std::memory_order_release);
+ }
+
+ /**
+ * Find an entry in the hash table
+ * @param Key - Key to search for
+ * @param OutValue - Memory location to write result value to. Left unmodified if Key isn't found.
+ * @param bIsAlreadyInTable - Optional result for whether key was found in table.
+ */
+ void Find(KeyType Key, ValueType* OutValue, bool* bIsAlreadyInTable = nullptr) const
+ {
+ FHashHeader* HashTableLocal = HashTable.load(std::memory_order_acquire);
+ if (HashTableLocal)
+ {
+ uint32_t TableMask = HashTableLocal->TableSize - 1;
+
+ // Linear probing
+ for (uint32_t TableIndex = EntryType::KeyHash(Key) & TableMask; !HashTableLocal->Elements[TableIndex].IsEmpty();
+ TableIndex = (TableIndex + 1) & TableMask)
+ {
+ if (HashTableLocal->Elements[TableIndex].GetKey() == Key)
+ {
+ if (OutValue)
+ {
+ *OutValue = HashTableLocal->Elements[TableIndex].GetValue();
+ }
+ if (bIsAlreadyInTable)
+ {
+ *bIsAlreadyInTable = true;
+ }
+ return;
+ }
+ }
+ }
+
+ if (bIsAlreadyInTable)
+ {
+ *bIsAlreadyInTable = false;
+ }
+ }
+
+ /**
+ * Add an entry with the given Key to the hash table, will do nothing if the item already exists
+ * @param Key - Key to add
+ * @param Value - Value to add for key
+ * @param bIsAlreadyInTable -- Optional result for whether item was already in table
+ */
+ void Emplace(KeyType Key, ValueType Value, bool* bIsAlreadyInTable = nullptr)
+ {
+ zen::RwLock::ExclusiveLockScope _(WriteCriticalSection);
+
+ // After locking, check if the item is already in the hash table.
+ ValueType ValueIgnore;
+ bool bFindResult;
+ Find(Key, &ValueIgnore, &bFindResult);
+ if (bFindResult == true)
+ {
+ if (bIsAlreadyInTable)
+ {
+ *bIsAlreadyInTable = true;
+ }
+ return;
+ }
+
+ // Check if there is space in the hash table for a new item. We resize when the hash
+ // table gets half full or more. @todo: allow client to specify max load factor?
+ FHashHeader* HashTableLocal = HashTable;
+
+ if (!HashTableLocal || (HashTableLocal->Used >= HashTableLocal->TableSize / 2))
+ {
+ int32_t GrowCount = HashTableLocal ? HashTableLocal->TableSize * 2 : DEFAULT_INITIAL_SIZE;
+ FHashHeader* HashTableGrow = (FHashHeader*)Malloc->Malloc(sizeof(FHashHeader) + (GrowCount - 1) * sizeof(EntryType));
+
+ HashTableGrow->Next = HashTableLocal;
+ HashTableGrow->TableSize = GrowCount;
+ HashTableGrow->Used = 0;
+ EntryType::ClearEntries(HashTableGrow->Elements, GrowCount);
+
+ if (HashTableLocal)
+ {
+ // Copy existing elements from the old table to the new table
+ for (int32_t TableIndex = 0; TableIndex < HashTableLocal->TableSize; TableIndex++)
+ {
+ EntryType& Entry = HashTableLocal->Elements[TableIndex];
+ if (!Entry.IsEmpty())
+ {
+ HashInsertInternal(HashTableGrow, Entry.GetKey(), Entry.GetValue());
+ }
+ }
+ }
+
+ HashTableLocal = HashTableGrow;
+ HashTable.store(HashTableGrow, std::memory_order_release);
+ }
+
+ // Then add our new item
+ HashInsertInternal(HashTableLocal, Key, Value);
+
+ if (bIsAlreadyInTable)
+ {
+ *bIsAlreadyInTable = false;
+ }
+ }
+
+ void FindOrAdd(KeyType Key, ValueType Value, bool* bIsAlreadyInTable = nullptr)
+ {
+ // Attempt to find the item lock free, before calling "Emplace", which locks the container
+ bool bFindResult;
+ ValueType IgnoreResult;
+ Find(Key, &IgnoreResult, &bFindResult);
+ if (bFindResult)
+ {
+ if (bIsAlreadyInTable)
+ {
+ *bIsAlreadyInTable = true;
+ }
+ return;
+ }
+
+ Emplace(Key, Value, bIsAlreadyInTable);
+ }
+
+private:
+ struct FHashHeader
+ {
+ FHashHeader* Next; // Old buffers are stored in a linked list for cleanup
+ int32_t TableSize;
+ int32_t Used;
+ EntryType Elements[1]; // Variable sized
+ };
+
+ FMalloc* Malloc;
+ std::atomic<FHashHeader*> HashTable;
+ zen::RwLock WriteCriticalSection;
+
+ static constexpr int32_t DEFAULT_INITIAL_SIZE = 1024;
+
+ static void HashInsertInternal(FHashHeader* HashTableLocal, KeyType Key, ValueType Value)
+ {
+ int32_t TableMask = HashTableLocal->TableSize - 1;
+
+ // Linear probing
+ for (int32_t TableIndex = EntryType::KeyHash(Key) & TableMask;; TableIndex = (TableIndex + 1) & TableMask)
+ {
+ if (HashTableLocal->Elements[TableIndex].IsEmpty())
+ {
+ HashTableLocal->Elements[TableIndex].SetKeyValue(Key, Value);
+ HashTableLocal->Used++;
+ break;
+ }
+ }
+ }
+};
+
+} // namespace zen
diff --git a/src/zencore/memtrack/memorytrace.cpp b/src/zencore/memtrack/memorytrace.cpp
new file mode 100644
index 000000000..b147aee91
--- /dev/null
+++ b/src/zencore/memtrack/memorytrace.cpp
@@ -0,0 +1,829 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zencore/memory/memorytrace.h>
+#include <zencore/memory/tagtrace.h>
+
+#include "callstacktrace.h"
+#include "tracemalloc.h"
+#include "vatrace.h"
+
+#include <zencore/commandline.h>
+#include <zencore/enumflags.h>
+#include <zencore/guardvalue.h>
+#include <zencore/intmath.h>
+#include <zencore/string.h>
+#include <zencore/trace.h>
+
+#include <string.h>
+
+#if ZEN_PLATFORM_WINDOWS
+# include <shellapi.h>
+#endif
+
+class FMalloc;
+
+#if UE_TRACE_ENABLED
+namespace zen {
+UE_TRACE_CHANNEL_DEFINE(MemAllocChannel, "Memory allocations", true)
+}
+#endif
+
+#if UE_MEMORY_TRACE_ENABLED
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace zen {
+
+void MemoryTrace_InitTags(FMalloc*);
+void MemoryTrace_EnableTracePump();
+
+} // namespace zen
+
+////////////////////////////////////////////////////////////////////////////////
+namespace {
+// Controls how often time markers are emitted (default: every 4095 allocations).
+constexpr uint32_t MarkerSamplePeriod = (4 << 10) - 1;
+
+// Number of shifted bits to SizeLower
+constexpr uint32_t SizeShift = 3;
+
+// Counter to track when time marker is emitted
+std::atomic<uint32_t> GMarkerCounter(0);
+
+// If enabled also pumps the Trace system itself. Used on process shutdown
+// when worker thread has been killed, but memory events still occurs.
+bool GDoPumpTrace;
+
+// Temporarily disables any internal operation that causes allocations. Used to
+// avoid recursive behaviour when memory tracing needs to allocate memory through
+// TraceMalloc.
+thread_local bool GDoNotAllocateInTrace;
+
+// Set on initialization; on some platforms we hook allocator functions very early
+// before Trace has the ability to allocate memory.
+bool GTraceAllowed;
+} // namespace
+
+////////////////////////////////////////////////////////////////////////////////
+namespace UE { namespace Trace {
+ TRACELOG_API void Update();
+}} // namespace UE::Trace
+
+namespace zen {
+
+////////////////////////////////////////////////////////////////////////////////
+UE_TRACE_EVENT_BEGIN(Memory, Init, NoSync | Important)
+ UE_TRACE_EVENT_FIELD(uint64_t, PageSize) // new in UE 5.5
+ UE_TRACE_EVENT_FIELD(uint32_t, MarkerPeriod)
+ UE_TRACE_EVENT_FIELD(uint8, Version)
+ UE_TRACE_EVENT_FIELD(uint8, MinAlignment)
+ UE_TRACE_EVENT_FIELD(uint8, SizeShift)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, Marker)
+ UE_TRACE_EVENT_FIELD(uint64_t, Cycle)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, Alloc)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint32_t, Size)
+ UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower)
+ UE_TRACE_EVENT_FIELD(uint8, RootHeap)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, AllocSystem)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint32_t, Size)
+ UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, AllocVideo)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint32_t, Size)
+ UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, Free)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint8, RootHeap)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, FreeSystem)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, FreeVideo)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, ReallocAlloc)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint32_t, Size)
+ UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower)
+ UE_TRACE_EVENT_FIELD(uint8, RootHeap)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, ReallocAllocSystem)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint32_t, Size)
+ UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, ReallocFree)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint8, RootHeap)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, ReallocFreeSystem)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, MemorySwapOp)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address) // page fault real address
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint32_t, CompressedSize)
+ UE_TRACE_EVENT_FIELD(uint8, SwapOp)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, HeapSpec, NoSync | Important)
+ UE_TRACE_EVENT_FIELD(HeapId, Id)
+ UE_TRACE_EVENT_FIELD(HeapId, ParentId)
+ UE_TRACE_EVENT_FIELD(uint16, Flags)
+ UE_TRACE_EVENT_FIELD(UE::Trace::WideString, Name)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, HeapMarkAlloc)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint16, Flags)
+ UE_TRACE_EVENT_FIELD(HeapId, Heap)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, HeapUnmarkAlloc)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(HeapId, Heap)
+UE_TRACE_EVENT_END()
+
+// If the layout of the above events is changed, bump this version number.
+// version 1: Initial version (UE 5.0, UE 5.1)
+// version 2: Added CallstackId for Free events and also for HeapMarkAlloc, HeapUnmarkAlloc events (UE 5.2).
+constexpr uint8 MemoryTraceVersion = 2;
+
+////////////////////////////////////////////////////////////////////////////////
+class FMallocWrapper : public FMalloc
+{
+public:
+ FMallocWrapper(FMalloc* InMalloc);
+
+private:
+ struct FCookie
+ {
+ uint64_t Tag : 16;
+ uint64_t Bias : 8;
+ uint64_t Size : 40;
+ };
+
+ static uint32_t GetActualAlignment(SIZE_T Size, uint32_t Alignment);
+
+ virtual void* Malloc(SIZE_T Size, uint32_t Alignment) override;
+ virtual void* Realloc(void* PrevAddress, SIZE_T NewSize, uint32_t Alignment) override;
+ virtual void Free(void* Address) override;
+ virtual bool GetAllocationSize(void* Address, SIZE_T& SizeOut) override { return InnerMalloc->GetAllocationSize(Address, SizeOut); }
+ virtual void OnMallocInitialized() override { InnerMalloc->OnMallocInitialized(); }
+
+ FMalloc* InnerMalloc;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+FMallocWrapper::FMallocWrapper(FMalloc* InMalloc) : InnerMalloc(InMalloc)
+{
+}
+
+////////////////////////////////////////////////////////////////////////////////
+uint32_t
+FMallocWrapper::GetActualAlignment(SIZE_T Size, uint32_t Alignment)
+{
+ // Defaults; if size is < 16 then alignment is 8 else 16.
+ uint32_t DefaultAlignment = 8 << uint32_t(Size >= 16);
+ return (Alignment < DefaultAlignment) ? DefaultAlignment : Alignment;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void*
+FMallocWrapper::Malloc(SIZE_T Size, uint32_t Alignment)
+{
+ uint32_t ActualAlignment = GetActualAlignment(Size, Alignment);
+ void* Address = InnerMalloc->Malloc(Size, Alignment);
+
+ MemoryTrace_Alloc((uint64_t)Address, Size, ActualAlignment);
+
+ return Address;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void*
+FMallocWrapper::Realloc(void* PrevAddress, SIZE_T NewSize, uint32_t Alignment)
+{
+ // This simplifies things and means reallocs trace events are true reallocs
+ if (PrevAddress == nullptr)
+ {
+ return Malloc(NewSize, Alignment);
+ }
+
+ MemoryTrace_ReallocFree((uint64_t)PrevAddress);
+
+ void* RetAddress = InnerMalloc->Realloc(PrevAddress, NewSize, Alignment);
+
+ Alignment = GetActualAlignment(NewSize, Alignment);
+ MemoryTrace_ReallocAlloc((uint64_t)RetAddress, NewSize, Alignment);
+
+ return RetAddress;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FMallocWrapper::Free(void* Address)
+{
+ if (Address == nullptr)
+ {
+ return;
+ }
+
+ MemoryTrace_Free((uint64_t)Address);
+
+ void* InnerAddress = Address;
+
+ return InnerMalloc->Free(InnerAddress);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+template<class T>
+class alignas(alignof(T)) FUndestructed
+{
+public:
+ template<typename... ArgTypes>
+ void Construct(ArgTypes... Args)
+ {
+ ::new (Buffer) T(Args...);
+ bIsConstructed = true;
+ }
+
+ bool IsConstructed() const { return bIsConstructed; }
+
+ T* operator&() { return (T*)Buffer; }
+ T* operator->() { return (T*)Buffer; }
+
+protected:
+ uint8 Buffer[sizeof(T)];
+ bool bIsConstructed;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+static FUndestructed<FTraceMalloc> GTraceMalloc;
+
+////////////////////////////////////////////////////////////////////////////////
+static EMemoryTraceInit
+MemoryTrace_ShouldEnable()
+{
+ EMemoryTraceInit Mode = EMemoryTraceInit::Disabled;
+
+ // Process any command line trace options
+ //
+ // Note that calls can come into this function before we enter the regular main function
+ // and we can therefore not rely on the regular command line parsing for the application
+
+ using namespace std::literals;
+
+ auto ProcessTraceArg = [&](const std::string_view& Arg) {
+ if (Arg == "memalloc"sv)
+ {
+ Mode |= EMemoryTraceInit::AllocEvents;
+ }
+ else if (Arg == "callstack"sv)
+ {
+ Mode |= EMemoryTraceInit::Callstacks;
+ }
+ else if (Arg == "memtag"sv)
+ {
+ Mode |= EMemoryTraceInit::Tags;
+ }
+ else if (Arg == "memory"sv)
+ {
+ Mode |= EMemoryTraceInit::Full;
+ }
+ else if (Arg == "memory_light"sv)
+ {
+ Mode |= EMemoryTraceInit::Light;
+ }
+ };
+
+ constexpr std::string_view TraceOption = "--trace="sv;
+
+ std::function<void(const std::string_view&)> ProcessArg = [&](const std::string_view& Arg) {
+ if (Arg.starts_with(TraceOption))
+ {
+ const std::string_view OptionArgs = Arg.substr(TraceOption.size());
+
+ IterateCommaSeparatedValue(OptionArgs, ProcessTraceArg);
+ }
+ };
+
+ IterateCommandlineArgs(ProcessArg);
+
+ return Mode;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FMalloc*
+MemoryTrace_CreateInternal(FMalloc* InMalloc, EMemoryTraceInit Mode)
+{
+ using namespace zen;
+
+ // If allocation events are not desired we don't need to do anything, even
+ // if user has enabled only callstacks it will be enabled later.
+ if (!EnumHasAnyFlags(Mode, EMemoryTraceInit::AllocEvents))
+ {
+ return InMalloc;
+ }
+
+ // Some OSes (i.e. Windows) will terminate all threads except the main
+ // one as part of static deinit. However we may receive more memory
+ // trace events that would get lost as Trace's worker thread has been
+ // terminated. So flush the last remaining memory events trace needs
+ // to be updated which we will do that in response to to memory events.
+ // We'll use an atexit can to know when Trace is probably no longer
+ // getting ticked.
+ atexit([]() { MemoryTrace_EnableTracePump(); });
+
+ GTraceMalloc.Construct(InMalloc);
+
+ // Both tag and callstack tracing need to use the wrapped trace malloc
+ // so we can break out tracing memory overhead (and not cause recursive behaviour).
+ if (EnumHasAnyFlags(Mode, EMemoryTraceInit::Tags))
+ {
+ MemoryTrace_InitTags(&GTraceMalloc);
+ }
+
+ if (EnumHasAnyFlags(Mode, EMemoryTraceInit::Callstacks))
+ {
+ CallstackTrace_Create(&GTraceMalloc);
+ }
+
+ static FUndestructed<FMallocWrapper> SMallocWrapper;
+ SMallocWrapper.Construct(InMalloc);
+
+ return &SMallocWrapper;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FMalloc*
+MemoryTrace_CreateInternal(FMalloc* InMalloc)
+{
+ const EMemoryTraceInit Mode = MemoryTrace_ShouldEnable();
+ return MemoryTrace_CreateInternal(InMalloc, Mode);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FMalloc*
+MemoryTrace_Create(FMalloc* InMalloc)
+{
+ FMalloc* OutMalloc = MemoryTrace_CreateInternal(InMalloc);
+
+ if (OutMalloc != InMalloc)
+ {
+# if PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS
+ FVirtualWinApiHooks::Initialize(false);
+# endif
+ }
+
+ return OutMalloc;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_Initialize()
+{
+ // At this point we initialized the system to allow tracing.
+ GTraceAllowed = true;
+
+ const int MIN_ALIGNMENT = 8;
+
+ UE_TRACE_LOG(Memory, Init, MemAllocChannel)
+ << Init.PageSize(4096) << Init.MarkerPeriod(MarkerSamplePeriod + 1) << Init.Version(MemoryTraceVersion)
+ << Init.MinAlignment(uint8(MIN_ALIGNMENT)) << Init.SizeShift(uint8(SizeShift));
+
+ const HeapId SystemRootHeap = MemoryTrace_RootHeapSpec(u"System memory");
+ ZEN_ASSERT(SystemRootHeap == EMemoryTraceRootHeap::SystemMemory);
+ const HeapId VideoRootHeap = MemoryTrace_RootHeapSpec(u"Video memory");
+ ZEN_ASSERT(VideoRootHeap == EMemoryTraceRootHeap::VideoMemory);
+
+ static_assert((1 << SizeShift) - 1 <= MIN_ALIGNMENT, "Not enough bits to pack size fields");
+
+# if !UE_MEMORY_TRACE_LATE_INIT
+ // On some platforms callstack initialization cannot happen this early in the process. It is initialized
+ // in other locations when UE_MEMORY_TRACE_LATE_INIT is defined. Until that point allocations cannot have
+ // callstacks.
+ CallstackTrace_Initialize();
+# endif
+}
+
+void
+MemoryTrace_Shutdown()
+{
+ // Disable any further activity
+ GTraceAllowed = false;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+bool
+MemoryTrace_IsActive()
+{
+ return GTraceAllowed;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_EnableTracePump()
+{
+ GDoPumpTrace = true;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_UpdateInternal()
+{
+ const uint32_t TheCount = GMarkerCounter.fetch_add(1, std::memory_order_relaxed);
+ if ((TheCount & MarkerSamplePeriod) == 0)
+ {
+ UE_TRACE_LOG(Memory, Marker, MemAllocChannel) << Marker.Cycle(UE::Trace::Private::TimeGetTimestamp());
+ }
+
+ if (GDoPumpTrace)
+ {
+ UE::Trace::Update();
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_Alloc(uint64_t Address, uint64_t Size, uint32_t Alignment, HeapId RootHeap, uint32_t ExternalCallstackId)
+{
+ if (!GTraceAllowed)
+ {
+ return;
+ }
+
+ ZEN_ASSERT_SLOW(RootHeap < 16);
+
+ const uint32_t AlignmentPow2 = uint32_t(zen::CountTrailingZeros64(Alignment));
+ const uint32_t Alignment_SizeLower = (AlignmentPow2 << SizeShift) | uint32_t(Size & ((1 << SizeShift) - 1));
+ const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId();
+
+ switch (RootHeap)
+ {
+ case EMemoryTraceRootHeap::SystemMemory:
+ {
+ UE_TRACE_LOG(Memory, AllocSystem, MemAllocChannel)
+ << AllocSystem.Address(uint64_t(Address)) << AllocSystem.CallstackId(CallstackId)
+ << AllocSystem.Size(uint32_t(Size >> SizeShift)) << AllocSystem.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower));
+ break;
+ }
+
+ case EMemoryTraceRootHeap::VideoMemory:
+ {
+ UE_TRACE_LOG(Memory, AllocVideo, MemAllocChannel)
+ << AllocVideo.Address(uint64_t(Address)) << AllocVideo.CallstackId(CallstackId)
+ << AllocVideo.Size(uint32_t(Size >> SizeShift)) << AllocVideo.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower));
+ break;
+ }
+
+ default:
+ {
+ UE_TRACE_LOG(Memory, Alloc, MemAllocChannel)
+ << Alloc.Address(uint64_t(Address)) << Alloc.CallstackId(CallstackId) << Alloc.Size(uint32_t(Size >> SizeShift))
+ << Alloc.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)) << Alloc.RootHeap(uint8(RootHeap));
+ break;
+ }
+ }
+
+ MemoryTrace_UpdateInternal();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_Free(uint64_t Address, HeapId RootHeap, uint32_t ExternalCallstackId)
+{
+ if (!GTraceAllowed)
+ {
+ return;
+ }
+
+ ZEN_ASSERT_SLOW(RootHeap < 16);
+
+ const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId();
+
+ switch (RootHeap)
+ {
+ case EMemoryTraceRootHeap::SystemMemory:
+ {
+ UE_TRACE_LOG(Memory, FreeSystem, MemAllocChannel)
+ << FreeSystem.Address(uint64_t(Address)) << FreeSystem.CallstackId(CallstackId);
+ break;
+ }
+ case EMemoryTraceRootHeap::VideoMemory:
+ {
+ UE_TRACE_LOG(Memory, FreeVideo, MemAllocChannel)
+ << FreeVideo.Address(uint64_t(Address)) << FreeVideo.CallstackId(CallstackId);
+ break;
+ }
+ default:
+ {
+ UE_TRACE_LOG(Memory, Free, MemAllocChannel)
+ << Free.Address(uint64_t(Address)) << Free.CallstackId(CallstackId) << Free.RootHeap(uint8(RootHeap));
+ break;
+ }
+ }
+
+ MemoryTrace_UpdateInternal();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_ReallocAlloc(uint64_t Address, uint64_t Size, uint32_t Alignment, HeapId RootHeap, uint32_t ExternalCallstackId)
+{
+ if (!GTraceAllowed)
+ {
+ return;
+ }
+
+ ZEN_ASSERT_SLOW(RootHeap < 16);
+
+ const uint32_t AlignmentPow2 = uint32_t(zen::CountTrailingZeros64(Alignment));
+ const uint32_t Alignment_SizeLower = (AlignmentPow2 << SizeShift) | uint32_t(Size & ((1 << SizeShift) - 1));
+ const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId();
+
+ switch (RootHeap)
+ {
+ case EMemoryTraceRootHeap::SystemMemory:
+ {
+ UE_TRACE_LOG(Memory, ReallocAllocSystem, MemAllocChannel)
+ << ReallocAllocSystem.Address(uint64_t(Address)) << ReallocAllocSystem.CallstackId(CallstackId)
+ << ReallocAllocSystem.Size(uint32_t(Size >> SizeShift))
+ << ReallocAllocSystem.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower));
+ break;
+ }
+
+ default:
+ {
+ UE_TRACE_LOG(Memory, ReallocAlloc, MemAllocChannel)
+ << ReallocAlloc.Address(uint64_t(Address)) << ReallocAlloc.CallstackId(CallstackId)
+ << ReallocAlloc.Size(uint32_t(Size >> SizeShift)) << ReallocAlloc.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower))
+ << ReallocAlloc.RootHeap(uint8(RootHeap));
+ break;
+ }
+ }
+
+ MemoryTrace_UpdateInternal();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_ReallocFree(uint64_t Address, HeapId RootHeap, uint32_t ExternalCallstackId)
+{
+ if (!GTraceAllowed)
+ {
+ return;
+ }
+
+ ZEN_ASSERT_SLOW(RootHeap < 16);
+
+ const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId();
+
+ switch (RootHeap)
+ {
+ case EMemoryTraceRootHeap::SystemMemory:
+ {
+ UE_TRACE_LOG(Memory, ReallocFreeSystem, MemAllocChannel)
+ << ReallocFreeSystem.Address(uint64_t(Address)) << ReallocFreeSystem.CallstackId(CallstackId);
+ break;
+ }
+
+ default:
+ {
+ UE_TRACE_LOG(Memory, ReallocFree, MemAllocChannel)
+ << ReallocFree.Address(uint64_t(Address)) << ReallocFree.CallstackId(CallstackId)
+ << ReallocFree.RootHeap(uint8(RootHeap));
+ break;
+ }
+ }
+
+ MemoryTrace_UpdateInternal();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_SwapOp(uint64_t PageAddress, EMemoryTraceSwapOperation SwapOperation, uint32_t CompressedSize, uint32_t CallstackId)
+{
+ if (!GTraceAllowed)
+ {
+ return;
+ }
+
+ UE_TRACE_LOG(Memory, MemorySwapOp, MemAllocChannel)
+ << MemorySwapOp.Address(PageAddress) << MemorySwapOp.CallstackId(CallstackId) << MemorySwapOp.CompressedSize(CompressedSize)
+ << MemorySwapOp.SwapOp((uint8)SwapOperation);
+
+ MemoryTrace_UpdateInternal();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+HeapId
+MemoryTrace_HeapSpec(HeapId ParentId, const char16_t* Name, EMemoryTraceHeapFlags Flags)
+{
+ if (!GTraceAllowed)
+ {
+ return 0;
+ }
+
+ static std::atomic<HeapId> HeapIdCount(EMemoryTraceRootHeap::EndReserved + 1); // Reserve indexes for root heaps
+ const HeapId Id = HeapIdCount.fetch_add(1);
+ const uint32_t NameLen = uint32_t(zen::StringLength(Name));
+ const uint32_t DataSize = NameLen * sizeof(char16_t);
+ ZEN_ASSERT(ParentId < Id);
+
+ UE_TRACE_LOG(Memory, HeapSpec, MemAllocChannel, DataSize)
+ << HeapSpec.Id(Id) << HeapSpec.ParentId(ParentId) << HeapSpec.Name(Name, NameLen) << HeapSpec.Flags(uint16(Flags));
+
+ return Id;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+HeapId
+MemoryTrace_RootHeapSpec(const char16_t* Name, EMemoryTraceHeapFlags Flags)
+{
+ if (!GTraceAllowed)
+ {
+ return 0;
+ }
+
+ static std::atomic<HeapId> RootHeapCount(0);
+ const HeapId Id = RootHeapCount.fetch_add(1);
+ ZEN_ASSERT(Id <= EMemoryTraceRootHeap::EndReserved);
+
+ const uint32_t NameLen = uint32_t(zen::StringLength(Name));
+ const uint32_t DataSize = NameLen * sizeof(char16_t);
+
+ UE_TRACE_LOG(Memory, HeapSpec, MemAllocChannel, DataSize)
+ << HeapSpec.Id(Id) << HeapSpec.ParentId(HeapId(~0)) << HeapSpec.Name(Name, NameLen)
+ << HeapSpec.Flags(uint16(EMemoryTraceHeapFlags::Root | Flags));
+
+ return Id;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_MarkAllocAsHeap(uint64_t Address, HeapId Heap, EMemoryTraceHeapAllocationFlags Flags, uint32_t ExternalCallstackId)
+{
+ if (!GTraceAllowed)
+ {
+ return;
+ }
+
+ const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId();
+
+ UE_TRACE_LOG(Memory, HeapMarkAlloc, MemAllocChannel)
+ << HeapMarkAlloc.Address(uint64_t(Address)) << HeapMarkAlloc.CallstackId(CallstackId)
+ << HeapMarkAlloc.Flags(uint16(EMemoryTraceHeapAllocationFlags::Heap | Flags)) << HeapMarkAlloc.Heap(Heap);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_UnmarkAllocAsHeap(uint64_t Address, HeapId Heap, uint32_t ExternalCallstackId)
+{
+ if (!GTraceAllowed)
+ {
+ return;
+ }
+
+ const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId();
+
+ // Sets all flags to zero
+ UE_TRACE_LOG(Memory, HeapUnmarkAlloc, MemAllocChannel)
+ << HeapUnmarkAlloc.Address(uint64_t(Address)) << HeapUnmarkAlloc.CallstackId(CallstackId) << HeapUnmarkAlloc.Heap(Heap);
+}
+
+} // namespace zen
+
+#else // UE_MEMORY_TRACE_ENABLED
+
+/////////////////////////////////////////////////////////////////////////////
+bool
+MemoryTrace_IsActive()
+{
+ return false;
+}
+
+#endif // UE_MEMORY_TRACE_ENABLED
+
+namespace zen {
+
+/////////////////////////////////////////////////////////////////////////////
+FTraceMalloc::FTraceMalloc(FMalloc* InMalloc)
+{
+ WrappedMalloc = InMalloc;
+}
+
+/////////////////////////////////////////////////////////////////////////////
+FTraceMalloc::~FTraceMalloc()
+{
+}
+
+/////////////////////////////////////////////////////////////////////////////
+void*
+FTraceMalloc::Malloc(SIZE_T Count, uint32_t Alignment)
+{
+#if UE_MEMORY_TRACE_ENABLED
+ // UE_TRACE_METADATA_CLEAR_SCOPE();
+ UE_MEMSCOPE(TRACE_TAG);
+
+ void* NewPtr;
+ {
+ zen::TGuardValue<bool> _(GDoNotAllocateInTrace, true);
+ NewPtr = WrappedMalloc->Malloc(Count, Alignment);
+ }
+
+ const uint64_t Size = Count;
+ const uint32_t AlignmentPow2 = uint32_t(zen::CountTrailingZeros64(Alignment));
+ const uint32_t Alignment_SizeLower = (AlignmentPow2 << SizeShift) | uint32_t(Size & ((1 << SizeShift) - 1));
+
+ UE_TRACE_LOG(Memory, Alloc, MemAllocChannel)
+ << Alloc.Address(uint64_t(NewPtr)) << Alloc.CallstackId(0) << Alloc.Size(uint32_t(Size >> SizeShift))
+ << Alloc.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)) << Alloc.RootHeap(uint8(EMemoryTraceRootHeap::SystemMemory));
+
+ return NewPtr;
+#else
+ return WrappedMalloc->Malloc(Count, Alignment);
+#endif // UE_MEMORY_TRACE_ENABLED
+}
+
+/////////////////////////////////////////////////////////////////////////////
+void*
+FTraceMalloc::Realloc(void* Original, SIZE_T Count, uint32_t Alignment)
+{
+#if UE_MEMORY_TRACE_ENABLED
+ // UE_TRACE_METADATA_CLEAR_SCOPE();
+ UE_MEMSCOPE(TRACE_TAG);
+
+ UE_TRACE_LOG(Memory, ReallocFree, MemAllocChannel)
+ << ReallocFree.Address(uint64_t(Original)) << ReallocFree.RootHeap(uint8(EMemoryTraceRootHeap::SystemMemory));
+
+ void* NewPtr;
+ {
+ zen::TGuardValue<bool> _(GDoNotAllocateInTrace, true);
+ NewPtr = WrappedMalloc->Realloc(Original, Count, Alignment);
+ }
+
+ const uint64_t Size = Count;
+ const uint32_t AlignmentPow2 = uint32_t(zen::CountTrailingZeros64(Alignment));
+ const uint32_t Alignment_SizeLower = (AlignmentPow2 << SizeShift) | uint32_t(Size & ((1 << SizeShift) - 1));
+
+ UE_TRACE_LOG(Memory, ReallocAlloc, MemAllocChannel)
+ << ReallocAlloc.Address(uint64_t(NewPtr)) << ReallocAlloc.CallstackId(0) << ReallocAlloc.Size(uint32_t(Size >> SizeShift))
+ << ReallocAlloc.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower))
+ << ReallocAlloc.RootHeap(uint8(EMemoryTraceRootHeap::SystemMemory));
+
+ return NewPtr;
+#else
+ return WrappedMalloc->Realloc(Original, Count, Alignment);
+#endif // UE_MEMORY_TRACE_ENABLED
+}
+
+/////////////////////////////////////////////////////////////////////////////
+void
+FTraceMalloc::Free(void* Original)
+{
+#if UE_MEMORY_TRACE_ENABLED
+ UE_TRACE_LOG(Memory, Free, MemAllocChannel)
+ << Free.Address(uint64_t(Original)) << Free.RootHeap(uint8(EMemoryTraceRootHeap::SystemMemory));
+
+ {
+ zen::TGuardValue<bool> _(GDoNotAllocateInTrace, true);
+ WrappedMalloc->Free(Original);
+ }
+#else
+ WrappedMalloc->Free(Original);
+#endif // UE_MEMORY_TRACE_ENABLED
+}
+
+} // namespace zen
diff --git a/src/zencore/memtrack/moduletrace.cpp b/src/zencore/memtrack/moduletrace.cpp
new file mode 100644
index 000000000..51280ff3a
--- /dev/null
+++ b/src/zencore/memtrack/moduletrace.cpp
@@ -0,0 +1,296 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zenbase/zenbase.h>
+#include <zencore/memory/llm.h>
+#include <zencore/memory/memorytrace.h>
+#include <zencore/memory/tagtrace.h>
+
+#if ZEN_PLATFORM_WINDOWS
+# define PLATFORM_SUPPORTS_TRACE_WIN32_MODULE_DIAGNOSTICS 1
+#else
+# define PLATFORM_SUPPORTS_TRACE_WIN32_MODULE_DIAGNOSTICS 0
+#endif
+
+#include "moduletrace_events.h"
+
+#if PLATFORM_SUPPORTS_TRACE_WIN32_MODULE_DIAGNOSTICS
+
+# include <zencore/windows.h>
+
+ZEN_THIRD_PARTY_INCLUDES_START
+# include <winternl.h>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+# include <zencore/trace.h>
+
+# include <array>
+
+namespace zen {
+
+class FMalloc;
+
+typedef uint32_t HeapId;
+
+////////////////////////////////////////////////////////////////////////////////
+struct FNtDllFunction
+{
+ FARPROC Addr;
+
+ FNtDllFunction(const char* Name)
+ {
+ HMODULE NtDll = LoadLibraryW(L"ntdll.dll");
+ ZEN_ASSERT(NtDll);
+ Addr = GetProcAddress(NtDll, Name);
+ }
+
+ template<typename... ArgTypes>
+ unsigned int operator()(ArgTypes... Args)
+ {
+ typedef unsigned int(NTAPI * Prototype)(ArgTypes...);
+ return (Prototype((void*)Addr))(Args...);
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////7777
+class FModuleTrace
+{
+public:
+ typedef void (*SubscribeFunc)(bool, void*, const char16_t*);
+
+ FModuleTrace(FMalloc* InMalloc);
+ ~FModuleTrace();
+ static FModuleTrace* Get();
+ void Initialize();
+ void Subscribe(SubscribeFunc Function);
+
+private:
+ void OnDllLoaded(const UNICODE_STRING& Name, uintptr_t Base);
+ void OnDllUnloaded(uintptr_t Base);
+ void OnDllNotification(unsigned int Reason, const void* DataPtr);
+ static FModuleTrace* Instance;
+ SubscribeFunc Subscribers[64];
+ int SubscriberCount = 0;
+ void* CallbackCookie = nullptr;
+ HeapId ProgramHeapId = 0;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+FModuleTrace* FModuleTrace::Instance = nullptr;
+
+////////////////////////////////////////////////////////////////////////////////
+FModuleTrace::FModuleTrace(FMalloc* InMalloc)
+{
+ ZEN_UNUSED(InMalloc);
+ Instance = this;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FModuleTrace::~FModuleTrace()
+{
+ if (CallbackCookie)
+ {
+ FNtDllFunction UnregisterFunc("LdrUnregisterDllNotification");
+ UnregisterFunc(CallbackCookie);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FModuleTrace*
+FModuleTrace::Get()
+{
+ return Instance;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FModuleTrace::Initialize()
+{
+ using namespace UE::Trace;
+
+ ProgramHeapId = MemoryTrace_HeapSpec(SystemMemory, u"Module", EMemoryTraceHeapFlags::None);
+
+ UE_TRACE_LOG(Diagnostics, ModuleInit, ModuleChannel, sizeof(char) * 3)
+ << ModuleInit.SymbolFormat("pdb", 3) << ModuleInit.ModuleBaseShift(uint8(0));
+
+ // Register for DLL load/unload notifications.
+ auto Thunk = [](ULONG Reason, const void* Data, void* Context) {
+ auto* Self = (FModuleTrace*)Context;
+ Self->OnDllNotification(Reason, Data);
+ };
+
+ typedef void(CALLBACK * ThunkType)(ULONG, const void*, void*);
+ auto ThunkImpl = ThunkType(Thunk);
+
+ FNtDllFunction RegisterFunc("LdrRegisterDllNotification");
+ RegisterFunc(0, ThunkImpl, this, &CallbackCookie);
+
+ // Enumerate already loaded modules.
+ const TEB* ThreadEnvBlock = NtCurrentTeb();
+ const PEB* ProcessEnvBlock = ThreadEnvBlock->ProcessEnvironmentBlock;
+ const LIST_ENTRY* ModuleIter = ProcessEnvBlock->Ldr->InMemoryOrderModuleList.Flink;
+ const LIST_ENTRY* ModuleIterEnd = ModuleIter->Blink;
+ do
+ {
+ const auto& ModuleData = *(LDR_DATA_TABLE_ENTRY*)(ModuleIter - 1);
+ if (ModuleData.DllBase == 0)
+ {
+ break;
+ }
+
+ OnDllLoaded(ModuleData.FullDllName, UPTRINT(ModuleData.DllBase));
+ ModuleIter = ModuleIter->Flink;
+ } while (ModuleIter != ModuleIterEnd);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FModuleTrace::Subscribe(SubscribeFunc Function)
+{
+ ZEN_ASSERT(SubscriberCount < ZEN_ARRAY_COUNT(Subscribers));
+ Subscribers[SubscriberCount++] = Function;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FModuleTrace::OnDllNotification(unsigned int Reason, const void* DataPtr)
+{
+ enum
+ {
+ LDR_DLL_NOTIFICATION_REASON_LOADED = 1,
+ LDR_DLL_NOTIFICATION_REASON_UNLOADED = 2,
+ };
+
+ struct FNotificationData
+ {
+ uint32_t Flags;
+ const UNICODE_STRING& FullPath;
+ const UNICODE_STRING& BaseName;
+ uintptr_t Base;
+ };
+ const auto& Data = *(FNotificationData*)DataPtr;
+
+ switch (Reason)
+ {
+ case LDR_DLL_NOTIFICATION_REASON_LOADED:
+ OnDllLoaded(Data.FullPath, Data.Base);
+ break;
+ case LDR_DLL_NOTIFICATION_REASON_UNLOADED:
+ OnDllUnloaded(Data.Base);
+ break;
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FModuleTrace::OnDllLoaded(const UNICODE_STRING& Name, UPTRINT Base)
+{
+ const auto* DosHeader = (IMAGE_DOS_HEADER*)Base;
+ const auto* NtHeaders = (IMAGE_NT_HEADERS*)(Base + DosHeader->e_lfanew);
+ const IMAGE_OPTIONAL_HEADER& OptionalHeader = NtHeaders->OptionalHeader;
+ uint8_t ImageId[20];
+
+ // Find the guid and age of the binary, used to match debug files
+ const IMAGE_DATA_DIRECTORY& DebugInfoEntry = OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG];
+ const auto* DebugEntries = (IMAGE_DEBUG_DIRECTORY*)(Base + DebugInfoEntry.VirtualAddress);
+ for (uint32_t i = 0, n = DebugInfoEntry.Size / sizeof(DebugEntries[0]); i < n; ++i)
+ {
+ const IMAGE_DEBUG_DIRECTORY& Entry = DebugEntries[i];
+ if (Entry.Type == IMAGE_DEBUG_TYPE_CODEVIEW)
+ {
+ struct FCodeView7
+ {
+ uint32_t Signature;
+ uint32_t Guid[4];
+ uint32_t Age;
+ };
+
+ if (Entry.SizeOfData < sizeof(FCodeView7))
+ {
+ continue;
+ }
+
+ const auto* CodeView7 = (FCodeView7*)(Base + Entry.AddressOfRawData);
+ if (CodeView7->Signature != 'SDSR')
+ {
+ continue;
+ }
+
+ memcpy(ImageId, (uint8_t*)&CodeView7->Guid, sizeof(uint32_t) * 4);
+ memcpy(&ImageId[16], (uint8_t*)&CodeView7->Age, sizeof(uint32_t));
+ break;
+ }
+ }
+
+ // Note: UNICODE_STRING.Length is the size in bytes of the string buffer.
+ UE_TRACE_LOG(Diagnostics, ModuleLoad, ModuleChannel, uint32_t(Name.Length + sizeof(ImageId)))
+ << ModuleLoad.Name((const char16_t*)Name.Buffer, Name.Length / 2) << ModuleLoad.Base(uint64_t(Base))
+ << ModuleLoad.Size(OptionalHeader.SizeOfImage) << ModuleLoad.ImageId(ImageId, uint32_t(sizeof(ImageId)));
+
+# if UE_MEMORY_TRACE_ENABLED
+ {
+ UE_MEMSCOPE(ELLMTag::ProgramSize);
+ MemoryTrace_Alloc(Base, OptionalHeader.SizeOfImage, 4 * 1024, EMemoryTraceRootHeap::SystemMemory);
+ MemoryTrace_MarkAllocAsHeap(Base, ProgramHeapId);
+ MemoryTrace_Alloc(Base, OptionalHeader.SizeOfImage, 4 * 1024, EMemoryTraceRootHeap::SystemMemory);
+ }
+# endif // UE_MEMORY_TRACE_ENABLED
+
+ for (int i = 0; i < SubscriberCount; ++i)
+ {
+ Subscribers[i](true, (void*)Base, (const char16_t*)Name.Buffer);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FModuleTrace::OnDllUnloaded(UPTRINT Base)
+{
+# if UE_MEMORY_TRACE_ENABLED
+ MemoryTrace_Free(Base, EMemoryTraceRootHeap::SystemMemory);
+ MemoryTrace_UnmarkAllocAsHeap(Base, ProgramHeapId);
+ MemoryTrace_Free(Base, EMemoryTraceRootHeap::SystemMemory);
+# endif // UE_MEMORY_TRACE_ENABLED
+
+ UE_TRACE_LOG(Diagnostics, ModuleUnload, ModuleChannel) << ModuleUnload.Base(uint64(Base));
+
+ for (int i = 0; i < SubscriberCount; ++i)
+ {
+ Subscribers[i](false, (void*)Base, nullptr);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+Modules_Create(FMalloc* Malloc)
+{
+ if (FModuleTrace::Get() != nullptr)
+ {
+ return;
+ }
+
+ static FModuleTrace Instance(Malloc);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+Modules_Initialize()
+{
+ if (FModuleTrace* Instance = FModuleTrace::Get())
+ {
+ Instance->Initialize();
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+Modules_Subscribe(void (*Function)(bool, void*, const char16_t*))
+{
+ if (FModuleTrace* Instance = FModuleTrace::Get())
+ {
+ Instance->Subscribe(Function);
+ }
+}
+
+} // namespace zen
+
+#endif // PLATFORM_SUPPORTS_WIN32_MEMORY_TRACE
diff --git a/src/zencore/memtrack/moduletrace.h b/src/zencore/memtrack/moduletrace.h
new file mode 100644
index 000000000..5e7374faa
--- /dev/null
+++ b/src/zencore/memtrack/moduletrace.h
@@ -0,0 +1,11 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+namespace zen {
+
+void Modules_Create(class FMalloc*);
+void Modules_Subscribe(void (*)(bool, void*, const char16_t*));
+void Modules_Initialize();
+
+} // namespace zen
diff --git a/src/zencore/memtrack/moduletrace_events.cpp b/src/zencore/memtrack/moduletrace_events.cpp
new file mode 100644
index 000000000..9c6a9b648
--- /dev/null
+++ b/src/zencore/memtrack/moduletrace_events.cpp
@@ -0,0 +1,16 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zencore/trace.h>
+
+#include "moduletrace_events.h"
+
+namespace zen {
+
+////////////////////////////////////////////////////////////////////////////////
+UE_TRACE_CHANNEL_DEFINE(ModuleChannel, "Module information needed for symbols resolution", true)
+
+UE_TRACE_EVENT_DEFINE(Diagnostics, ModuleInit)
+UE_TRACE_EVENT_DEFINE(Diagnostics, ModuleLoad)
+UE_TRACE_EVENT_DEFINE(Diagnostics, ModuleUnload)
+
+} // namespace zen
diff --git a/src/zencore/memtrack/moduletrace_events.h b/src/zencore/memtrack/moduletrace_events.h
new file mode 100644
index 000000000..1bda42fe8
--- /dev/null
+++ b/src/zencore/memtrack/moduletrace_events.h
@@ -0,0 +1,27 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+#pragma once
+
+#include <zencore/trace.h>
+
+namespace zen {
+
+////////////////////////////////////////////////////////////////////////////////
+UE_TRACE_CHANNEL_EXTERN(ModuleChannel)
+
+UE_TRACE_EVENT_BEGIN_EXTERN(Diagnostics, ModuleInit, NoSync | Important)
+ UE_TRACE_EVENT_FIELD(UE::Trace::AnsiString, SymbolFormat)
+ UE_TRACE_EVENT_FIELD(uint8, ModuleBaseShift)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN_EXTERN(Diagnostics, ModuleLoad, NoSync | Important)
+ UE_TRACE_EVENT_FIELD(UE::Trace::WideString, Name)
+ UE_TRACE_EVENT_FIELD(uint64, Base)
+ UE_TRACE_EVENT_FIELD(uint32, Size)
+ UE_TRACE_EVENT_FIELD(uint8[], ImageId) // Platform specific id for this image, used to match debug files were available
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN_EXTERN(Diagnostics, ModuleUnload, NoSync | Important)
+ UE_TRACE_EVENT_FIELD(uint64, Base)
+UE_TRACE_EVENT_END()
+
+} // namespace zen
diff --git a/src/zencore/memtrack/platformtls.h b/src/zencore/memtrack/platformtls.h
new file mode 100644
index 000000000..f134e68a8
--- /dev/null
+++ b/src/zencore/memtrack/platformtls.h
@@ -0,0 +1,107 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zenbase/zenbase.h>
+
+/**
+ * It should be possible to provide a generic implementation as long as a threadID is provided. We don't do that yet.
+ */
+struct FGenericPlatformTLS
+{
+ static const uint32_t InvalidTlsSlot = 0xFFFFFFFF;
+
+ /**
+ * Return false if this is an invalid TLS slot
+ * @param SlotIndex the TLS index to check
+ * @return true if this looks like a valid slot
+ */
+ static bool IsValidTlsSlot(uint32_t SlotIndex) { return SlotIndex != InvalidTlsSlot; }
+};
+
+#if ZEN_PLATFORM_WINDOWS
+
+# include <zencore/windows.h>
+
+class FWindowsPlatformTLS : public FGenericPlatformTLS
+{
+public:
+ static uint32_t AllocTlsSlot() { return ::TlsAlloc(); }
+
+ static void FreeTlsSlot(uint32_t SlotIndex) { ::TlsFree(SlotIndex); }
+
+ static void SetTlsValue(uint32_t SlotIndex, void* Value) { ::TlsSetValue(SlotIndex, Value); }
+
+ /**
+ * Reads the value stored at the specified TLS slot
+ *
+ * @return the value stored in the slot
+ */
+ static void* GetTlsValue(uint32_t SlotIndex) { return ::TlsGetValue(SlotIndex); }
+
+ /**
+ * Return false if this is an invalid TLS slot
+ * @param SlotIndex the TLS index to check
+ * @return true if this looks like a valid slot
+ */
+ static bool IsValidTlsSlot(uint32_t SlotIndex) { return SlotIndex != InvalidTlsSlot; }
+};
+
+typedef FWindowsPlatformTLS FPlatformTLS;
+
+#elif ZEN_PLATFORM_MAC
+
+# include <pthread.h
+
+/**
+ * Apple implementation of the TLS OS functions
+ **/
+struct FApplePlatformTLS : public FGenericPlatformTLS
+{
+ /**
+ * Returns the currently executing thread's id
+ */
+ static uint32_t GetCurrentThreadId(void) { return (uint32_t)pthread_mach_thread_np(pthread_self()); }
+
+ /**
+ * Allocates a thread local store slot
+ */
+ static uint32_t AllocTlsSlot(void)
+ {
+ // allocate a per-thread mem slot
+ pthread_key_t SlotKey = 0;
+ if (pthread_key_create(&SlotKey, NULL) != 0)
+ {
+ SlotKey = InvalidTlsSlot; // matches the Windows TlsAlloc() retval.
+ }
+ return SlotKey;
+ }
+
+ /**
+ * Sets a value in the specified TLS slot
+ *
+ * @param SlotIndex the TLS index to store it in
+ * @param Value the value to store in the slot
+ */
+ static void SetTlsValue(uint32_t SlotIndex, void* Value) { pthread_setspecific((pthread_key_t)SlotIndex, Value); }
+
+ /**
+ * Reads the value stored at the specified TLS slot
+ *
+ * @return the value stored in the slot
+ */
+ static void* GetTlsValue(uint32_t SlotIndex) { return pthread_getspecific((pthread_key_t)SlotIndex); }
+
+ /**
+ * Frees a previously allocated TLS slot
+ *
+ * @param SlotIndex the TLS index to store it in
+ */
+ static void FreeTlsSlot(uint32_t SlotIndex) { pthread_key_delete((pthread_key_t)SlotIndex); }
+};
+
+typedef FApplePlatformTLS FPlatformTLS;
+
+#else
+# error Platform not yet supported
+#endif
diff --git a/src/zencore/memtrack/tagtrace.cpp b/src/zencore/memtrack/tagtrace.cpp
new file mode 100644
index 000000000..15ba78ae4
--- /dev/null
+++ b/src/zencore/memtrack/tagtrace.cpp
@@ -0,0 +1,237 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zencore/memory/fmalloc.h>
+#include <zencore/memory/llm.h>
+#include <zencore/memory/tagtrace.h>
+
+#include "growonlylockfreehash.h"
+
+#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED
+
+# include <zencore/string.h>
+
+namespace zen {
+////////////////////////////////////////////////////////////////////////////////
+
+UE_TRACE_CHANNEL_EXTERN(MemAllocChannel);
+
+UE_TRACE_EVENT_BEGIN(Memory, TagSpec, Important | NoSync)
+ UE_TRACE_EVENT_FIELD(int32, Tag)
+ UE_TRACE_EVENT_FIELD(int32, Parent)
+ UE_TRACE_EVENT_FIELD(UE::Trace::AnsiString, Display)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, MemoryScope, NoSync)
+ UE_TRACE_EVENT_FIELD(int32, Tag)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, MemoryScopePtr, NoSync)
+ UE_TRACE_EVENT_FIELD(uint64, Ptr)
+UE_TRACE_EVENT_END()
+
+////////////////////////////////////////////////////////////////////////////////
+// Per thread active tag, i.e. the top level FMemScope
+thread_local int32 GActiveTag;
+
+////////////////////////////////////////////////////////////////////////////////
+FMemScope::FMemScope()
+{
+}
+
+FMemScope::FMemScope(int32_t InTag, bool bShouldActivate /*= true*/)
+{
+ if (UE_TRACE_CHANNELEXPR_IS_ENABLED(MemAllocChannel) & bShouldActivate)
+ {
+ ActivateScope(InTag);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FMemScope::FMemScope(ELLMTag InTag, bool bShouldActivate /*= true*/)
+{
+ if (UE_TRACE_CHANNELEXPR_IS_ENABLED(MemAllocChannel) & bShouldActivate)
+ {
+ ActivateScope(static_cast<int32>(InTag));
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FMemScope::ActivateScope(int32_t InTag)
+{
+ if (auto LogScope = FMemoryMemoryScopeFields::LogScopeType::ScopedEnter<FMemoryMemoryScopeFields>())
+ {
+ if (const auto& __restrict MemoryScope = *(FMemoryMemoryScopeFields*)(&LogScope))
+ {
+ Inner.SetActive();
+ LogScope += LogScope << MemoryScope.Tag(InTag);
+ PrevTag = GActiveTag;
+ GActiveTag = InTag;
+ }
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FMemScope::~FMemScope()
+{
+ if (Inner.bActive)
+ {
+ GActiveTag = PrevTag;
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FMemScopePtr::FMemScopePtr(uint64_t InPtr)
+{
+ if (InPtr != 0 && TRACE_PRIVATE_CHANNELEXPR_IS_ENABLED(MemAllocChannel))
+ {
+ if (auto LogScope = FMemoryMemoryScopePtrFields::LogScopeType::ScopedEnter<FMemoryMemoryScopePtrFields>())
+ {
+ if (const auto& __restrict MemoryScope = *(FMemoryMemoryScopePtrFields*)(&LogScope))
+ {
+ Inner.SetActive(), LogScope += LogScope << MemoryScope.Ptr(InPtr);
+ }
+ }
+ }
+}
+
+/////////////////////////////////////////////////////////////////////////////////
+FMemScopePtr::~FMemScopePtr()
+{
+}
+
+/////////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Utility class that manages tracing the specification of unique LLM tags
+ * and custom name based tags.
+ */
+class FTagTrace
+{
+public:
+ FTagTrace(FMalloc* InMalloc);
+ void AnnounceGenericTags() const;
+ void AnnounceSpecialTags() const;
+ int32 AnnounceCustomTag(int32 Tag, int32 ParentTag, const ANSICHAR* Display) const;
+
+private:
+ struct FTagNameSetEntry
+ {
+ std::atomic_int32_t Data;
+
+ int32_t GetKey() const { return Data.load(std::memory_order_relaxed); }
+ bool GetValue() const { return true; }
+ bool IsEmpty() const { return Data.load(std::memory_order_relaxed) == 0; } // NAME_None is treated as empty
+ void SetKeyValue(int32_t Key, bool Value)
+ {
+ ZEN_UNUSED(Value);
+ Data.store(Key, std::memory_order_relaxed);
+ }
+ static uint32_t KeyHash(int32_t Key) { return static_cast<uint32>(Key); }
+ static void ClearEntries(FTagNameSetEntry* Entries, int32_t EntryCount)
+ {
+ memset(Entries, 0, EntryCount * sizeof(FTagNameSetEntry));
+ }
+ };
+ typedef TGrowOnlyLockFreeHash<FTagNameSetEntry, int32_t, bool> FTagNameSet;
+
+ FTagNameSet AnnouncedNames;
+ static FMalloc* Malloc;
+};
+
+FMalloc* FTagTrace::Malloc = nullptr;
+static FTagTrace* GTagTrace = nullptr;
+
+////////////////////////////////////////////////////////////////////////////////
+FTagTrace::FTagTrace(FMalloc* InMalloc) : AnnouncedNames(InMalloc)
+{
+ Malloc = InMalloc;
+ AnnouncedNames.Reserve(1024);
+ AnnounceGenericTags();
+ AnnounceSpecialTags();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FTagTrace::AnnounceGenericTags() const
+{
+# define TRACE_TAG_SPEC(Enum, Str, ParentTag) \
+ { \
+ const uint32_t DisplayLen = (uint32_t)StringLength(Str); \
+ UE_TRACE_LOG(Memory, TagSpec, MemAllocChannel, DisplayLen * sizeof(ANSICHAR)) \
+ << TagSpec.Tag((int32_t)ELLMTag::Enum) << TagSpec.Parent((int32_t)ParentTag) << TagSpec.Display(Str, DisplayLen); \
+ }
+ LLM_ENUM_GENERIC_TAGS(TRACE_TAG_SPEC);
+# undef TRACE_TAG_SPEC
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+void
+FTagTrace::AnnounceSpecialTags() const
+{
+ auto EmitTag = [](const char16_t* DisplayString, int32_t Tag, int32_t ParentTag) {
+ const uint32_t DisplayLen = (uint32_t)StringLength(DisplayString);
+ UE_TRACE_LOG(Memory, TagSpec, MemAllocChannel, DisplayLen * sizeof(ANSICHAR))
+ << TagSpec.Tag(Tag) << TagSpec.Parent(ParentTag) << TagSpec.Display(DisplayString, DisplayLen);
+ };
+
+ EmitTag(u"Trace", TRACE_TAG, -1);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+int32_t
+FTagTrace::AnnounceCustomTag(int32_t Tag, int32_t ParentTag, const ANSICHAR* Display) const
+{
+ const uint32_t DisplayLen = (uint32_t)StringLength(Display);
+ UE_TRACE_LOG(Memory, TagSpec, MemAllocChannel, DisplayLen * sizeof(ANSICHAR))
+ << TagSpec.Tag(Tag) << TagSpec.Parent(ParentTag) << TagSpec.Display(Display, DisplayLen);
+ return Tag;
+}
+
+} // namespace zen
+
+#endif // UE_MEMORY_TAGS_TRACE_ENABLED
+
+namespace zen {
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_InitTags(FMalloc* InMalloc)
+{
+#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED
+ GTagTrace = (FTagTrace*)InMalloc->Malloc(sizeof(FTagTrace), alignof(FTagTrace));
+ new (GTagTrace) FTagTrace(InMalloc);
+#else
+ ZEN_UNUSED(InMalloc);
+#endif
+}
+
+////////////////////////////////////////////////////////////////////////////////
+int32_t
+MemoryTrace_AnnounceCustomTag(int32_t Tag, int32_t ParentTag, const char* Display)
+{
+#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED
+ // todo: How do we check if tag trace is active?
+ if (GTagTrace)
+ {
+ return GTagTrace->AnnounceCustomTag(Tag, ParentTag, Display);
+ }
+#else
+ ZEN_UNUSED(Tag, ParentTag, Display);
+#endif
+ return -1;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+int32_t
+MemoryTrace_GetActiveTag()
+{
+#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED
+ return GActiveTag;
+#else
+ return -1;
+#endif
+}
+
+} // namespace zen
diff --git a/src/zencore/memtrack/tracemalloc.h b/src/zencore/memtrack/tracemalloc.h
new file mode 100644
index 000000000..54606ac45
--- /dev/null
+++ b/src/zencore/memtrack/tracemalloc.h
@@ -0,0 +1,24 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+#pragma once
+
+#include <zencore/memory/fmalloc.h>
+#include <zencore/memory/memorytrace.h>
+
+namespace zen {
+
+class FTraceMalloc : public FMalloc
+{
+public:
+ FTraceMalloc(FMalloc* InMalloc);
+ virtual ~FTraceMalloc();
+
+ virtual void* Malloc(SIZE_T Count, uint32 Alignment) override;
+ virtual void* Realloc(void* Original, SIZE_T Count, uint32 Alignment) override;
+ virtual void Free(void* Original) override;
+
+ virtual void OnMallocInitialized() override { WrappedMalloc->OnMallocInitialized(); }
+
+ FMalloc* WrappedMalloc;
+};
+
+} // namespace zen
diff --git a/src/zencore/memtrack/vatrace.cpp b/src/zencore/memtrack/vatrace.cpp
new file mode 100644
index 000000000..4dea27f1b
--- /dev/null
+++ b/src/zencore/memtrack/vatrace.cpp
@@ -0,0 +1,361 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include "vatrace.h"
+
+#if PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS
+
+# include <zencore/memory/memorytrace.h>
+
+# if (NTDDI_VERSION >= NTDDI_WIN10_RS4)
+# pragma comment(lib, "mincore.lib") // VirtualAlloc2
+# endif
+
+namespace zen {
+
+////////////////////////////////////////////////////////////////////////////////
+class FTextSectionEditor
+{
+public:
+ ~FTextSectionEditor();
+ template<typename T>
+ T* Hook(T* Target, T* HookFunction);
+
+private:
+ struct FTrampolineBlock
+ {
+ FTrampolineBlock* Next;
+ uint32_t Size;
+ uint32_t Used;
+ };
+
+ static void* GetActualAddress(void* Function);
+ FTrampolineBlock* AllocateTrampolineBlock(void* Reference);
+ uint8_t* AllocateTrampoline(void* Reference, unsigned int Size);
+ void* HookImpl(void* Target, void* HookFunction);
+ FTrampolineBlock* HeadBlock = nullptr;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+FTextSectionEditor::~FTextSectionEditor()
+{
+ for (FTrampolineBlock* Block = HeadBlock; Block != nullptr; Block = Block->Next)
+ {
+ DWORD Unused;
+ VirtualProtect(Block, Block->Size, PAGE_EXECUTE_READ, &Unused);
+ }
+
+ FlushInstructionCache(GetCurrentProcess(), nullptr, 0);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void*
+FTextSectionEditor::GetActualAddress(void* Function)
+{
+ // Follow a jmp instruction (0xff/4 only for now) at function and returns
+ // where it would jmp to.
+
+ uint8_t* Addr = (uint8_t*)Function;
+ int Offset = unsigned(Addr[0] & 0xf0) == 0x40; // REX prefix
+ if (Addr[Offset + 0] == 0xff && Addr[Offset + 1] == 0x25)
+ {
+ Addr += Offset;
+ Addr = *(uint8_t**)(Addr + 6 + *(uint32_t*)(Addr + 2));
+ }
+ return Addr;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FTextSectionEditor::FTrampolineBlock*
+FTextSectionEditor::AllocateTrampolineBlock(void* Reference)
+{
+ static const size_t BlockSize = 0x10000; // 64KB is Windows' canonical granularity
+
+ // Find the start of the main allocation that mapped Reference
+ MEMORY_BASIC_INFORMATION MemInfo;
+ VirtualQuery(Reference, &MemInfo, sizeof(MemInfo));
+ auto* Ptr = (uint8_t*)(MemInfo.AllocationBase);
+
+ // Step backwards one block at a time and try and allocate that address
+ while (true)
+ {
+ Ptr -= BlockSize;
+ if (VirtualAlloc(Ptr, BlockSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE) != nullptr)
+ {
+ break;
+ }
+
+ uintptr_t Distance = uintptr_t(Reference) - uintptr_t(Ptr);
+ if (Distance >= 1ull << 31)
+ {
+ ZEN_ASSERT(!"Failed to allocate trampoline blocks for memory tracing hooks");
+ }
+ }
+
+ auto* Block = (FTrampolineBlock*)Ptr;
+ Block->Next = HeadBlock;
+ Block->Size = BlockSize;
+ Block->Used = sizeof(FTrampolineBlock);
+ HeadBlock = Block;
+
+ return Block;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+uint8_t*
+FTextSectionEditor::AllocateTrampoline(void* Reference, unsigned int Size)
+{
+ // Try and find a block that's within 2^31 bytes before Reference
+ FTrampolineBlock* Block;
+ for (Block = HeadBlock; Block != nullptr; Block = Block->Next)
+ {
+ uintptr_t Distance = uintptr_t(Reference) - uintptr_t(Block);
+ if (Distance < 1ull << 31)
+ {
+ break;
+ }
+ }
+
+ // If we didn't find a block then we need to allocate a new one.
+ if (Block == nullptr)
+ {
+ Block = AllocateTrampolineBlock(Reference);
+ }
+
+ // Allocate space for the trampoline.
+ uint32_t NextUsed = Block->Used + Size;
+ if (NextUsed > Block->Size)
+ {
+ // Block is full. We could allocate a new block here but as it is not
+ // expected that so many hooks will be made this path shouldn't happen
+ ZEN_ASSERT(!"Unable to allocate memory for memory tracing's hooks");
+ }
+
+ uint8_t* Out = (uint8_t*)Block + Block->Used;
+ Block->Used = NextUsed;
+
+ return Out;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+template<typename T>
+T*
+FTextSectionEditor::Hook(T* Target, T* HookFunction)
+{
+ return (T*)HookImpl((void*)Target, (void*)HookFunction);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void*
+FTextSectionEditor::HookImpl(void* Target, void* HookFunction)
+{
+ Target = GetActualAddress(Target);
+
+ // Very rudimentary x86_64 instruction length decoding that only supports op
+ // code ranges (0x80,0x8b) and (0x50,0x5f). Enough for simple prologues
+ uint8_t* __restrict Start = (uint8_t*)Target;
+ const uint8_t* Read = Start;
+ do
+ {
+ Read += (Read[0] & 0xf0) == 0x40; // REX prefix
+ uint8_t Inst = *Read++;
+ if (unsigned(Inst - 0x80) < 0x0cu)
+ {
+ uint8_t ModRm = *Read++;
+ Read += ((ModRm & 0300) < 0300) & ((ModRm & 0007) == 0004); // SIB
+ switch (ModRm & 0300) // Disp[8|32]
+ {
+ case 0100:
+ Read += 1;
+ break;
+ case 0200:
+ Read += 5;
+ break;
+ }
+ Read += (Inst == 0x83);
+ }
+ else if (unsigned(Inst - 0x50) >= 0x10u)
+ {
+ ZEN_ASSERT(!"Unknown instruction");
+ }
+ } while (Read - Start < 6);
+
+ static const int TrampolineSize = 24;
+ int PatchSize = int(Read - Start);
+ uint8_t* TrampolinePtr = AllocateTrampoline(Start, PatchSize + TrampolineSize);
+
+ // Write the trampoline
+ *(void**)TrampolinePtr = HookFunction;
+
+ uint8_t* PatchJmp = TrampolinePtr + sizeof(void*);
+ memcpy(PatchJmp, Start, PatchSize);
+
+ PatchJmp += PatchSize;
+ *PatchJmp = 0xe9;
+ *(int32_t*)(PatchJmp + 1) = int32_t(intptr_t(Start + PatchSize) - intptr_t(PatchJmp)) - 5;
+
+ // Need to make the text section writeable
+ DWORD ProtPrev;
+ uintptr_t ProtBase = uintptr_t(Target) & ~0x0fff; // 0x0fff is mask of VM page size
+ size_t ProtSize = ((ProtBase + 16 + 0x1000) & ~0x0fff) - ProtBase; // 16 is enough for one x86 instruction
+ VirtualProtect((void*)ProtBase, ProtSize, PAGE_EXECUTE_READWRITE, &ProtPrev);
+
+ // Patch function to jmp to the hook
+ uint16_t* HookJmp = (uint16_t*)Target;
+ HookJmp[0] = 0x25ff;
+ *(int32_t*)(HookJmp + 1) = int32_t(intptr_t(TrampolinePtr) - intptr_t(HookJmp + 3));
+
+ // Put the protection back the way it was
+ VirtualProtect((void*)ProtBase, ProtSize, ProtPrev, &ProtPrev);
+
+ return PatchJmp - PatchSize;
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+bool FVirtualWinApiHooks::bLight;
+LPVOID(WINAPI* FVirtualWinApiHooks::VmAllocOrig)(LPVOID, SIZE_T, DWORD, DWORD);
+LPVOID(WINAPI* FVirtualWinApiHooks::VmAllocExOrig)(HANDLE, LPVOID, SIZE_T, DWORD, DWORD);
+# if (NTDDI_VERSION >= NTDDI_WIN10_RS4)
+PVOID(WINAPI* FVirtualWinApiHooks::VmAlloc2Orig)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MEM_EXTENDED_PARAMETER*, ULONG);
+# else
+LPVOID(WINAPI* FVirtualWinApiHooks::VmAlloc2Orig)(HANDLE, LPVOID, SIZE_T, ULONG, ULONG, /*MEM_EXTENDED_PARAMETER* */ void*, ULONG);
+# endif
+BOOL(WINAPI* FVirtualWinApiHooks::VmFreeOrig)(LPVOID, SIZE_T, DWORD);
+BOOL(WINAPI* FVirtualWinApiHooks::VmFreeExOrig)(HANDLE, LPVOID, SIZE_T, DWORD);
+
+void
+FVirtualWinApiHooks::Initialize(bool bInLight)
+{
+ bLight = bInLight;
+
+ FTextSectionEditor Editor;
+
+ // Note that hooking alloc functions is done last as applying the hook can
+ // allocate some memory pages.
+
+ VmFreeOrig = Editor.Hook(VirtualFree, &FVirtualWinApiHooks::VmFree);
+ VmFreeExOrig = Editor.Hook(VirtualFreeEx, &FVirtualWinApiHooks::VmFreeEx);
+
+# if ZEN_PLATFORM_WINDOWS
+# if (NTDDI_VERSION >= NTDDI_WIN10_RS4)
+ {
+ VmAlloc2Orig = Editor.Hook(VirtualAlloc2, &FVirtualWinApiHooks::VmAlloc2);
+ }
+# else // NTDDI_VERSION
+ {
+ VmAlloc2Orig = nullptr;
+ HINSTANCE DllInstance;
+ DllInstance = LoadLibrary(TEXT("kernelbase.dll"));
+ if (DllInstance != NULL)
+ {
+# pragma warning(push)
+# pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'FVirtualWinApiHooks::FnVirtualAlloc2'
+ VmAlloc2Orig = (FnVirtualAlloc2)GetProcAddress(DllInstance, "VirtualAlloc2");
+# pragma warning(pop)
+ FreeLibrary(DllInstance);
+ }
+ if (VmAlloc2Orig)
+ {
+ VmAlloc2Orig = Editor.Hook(VmAlloc2Orig, &FVirtualWinApiHooks::VmAlloc2);
+ }
+ }
+# endif // NTDDI_VERSION
+# endif // PLATFORM_WINDOWS
+
+ VmAllocExOrig = Editor.Hook(VirtualAllocEx, &FVirtualWinApiHooks::VmAllocEx);
+ VmAllocOrig = Editor.Hook(VirtualAlloc, &FVirtualWinApiHooks::VmAlloc);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+LPVOID WINAPI
+FVirtualWinApiHooks::VmAlloc(LPVOID Address, SIZE_T Size, DWORD Type, DWORD Protect)
+{
+ LPVOID Ret = VmAllocOrig(Address, Size, Type, Protect);
+
+ // Track any reserve for now. Going forward we need events to differentiate reserves/commits and
+ // corresponding information on frees.
+ if (Ret != nullptr && ((Type & MEM_RESERVE) || ((Type & MEM_COMMIT) && Address == nullptr)))
+ {
+ MemoryTrace_Alloc((uint64_t)Ret, Size, 0, EMemoryTraceRootHeap::SystemMemory);
+ MemoryTrace_MarkAllocAsHeap((uint64_t)Ret, EMemoryTraceRootHeap::SystemMemory);
+ }
+
+ return Ret;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+BOOL WINAPI
+FVirtualWinApiHooks::VmFree(LPVOID Address, SIZE_T Size, DWORD Type)
+{
+ if (Type & MEM_RELEASE)
+ {
+ MemoryTrace_UnmarkAllocAsHeap((uint64_t)Address, EMemoryTraceRootHeap::SystemMemory);
+ MemoryTrace_Free((uint64_t)Address, EMemoryTraceRootHeap::SystemMemory);
+ }
+
+ return VmFreeOrig(Address, Size, Type);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+LPVOID WINAPI
+FVirtualWinApiHooks::VmAllocEx(HANDLE Process, LPVOID Address, SIZE_T Size, DWORD Type, DWORD Protect)
+{
+ LPVOID Ret = VmAllocExOrig(Process, Address, Size, Type, Protect);
+
+ if (Process == GetCurrentProcess() && Ret != nullptr && ((Type & MEM_RESERVE) || ((Type & MEM_COMMIT) && Address == nullptr)))
+ {
+ MemoryTrace_Alloc((uint64_t)Ret, Size, 0, EMemoryTraceRootHeap::SystemMemory);
+ MemoryTrace_MarkAllocAsHeap((uint64_t)Ret, EMemoryTraceRootHeap::SystemMemory);
+ }
+
+ return Ret;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+BOOL WINAPI
+FVirtualWinApiHooks::VmFreeEx(HANDLE Process, LPVOID Address, SIZE_T Size, DWORD Type)
+{
+ if (Process == GetCurrentProcess() && (Type & MEM_RELEASE))
+ {
+ MemoryTrace_UnmarkAllocAsHeap((uint64_t)Address, EMemoryTraceRootHeap::SystemMemory);
+ MemoryTrace_Free((uint64_t)Address, EMemoryTraceRootHeap::SystemMemory);
+ }
+
+ return VmFreeExOrig(Process, Address, Size, Type);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+# if (NTDDI_VERSION >= NTDDI_WIN10_RS4)
+PVOID WINAPI
+FVirtualWinApiHooks::VmAlloc2(HANDLE Process,
+ PVOID BaseAddress,
+ SIZE_T Size,
+ ULONG Type,
+ ULONG PageProtection,
+ MEM_EXTENDED_PARAMETER* ExtendedParameters,
+ ULONG ParameterCount)
+# else
+LPVOID WINAPI
+FVirtualWinApiHooks::VmAlloc2(HANDLE Process,
+ LPVOID BaseAddress,
+ SIZE_T Size,
+ ULONG Type,
+ ULONG PageProtection,
+ /*MEM_EXTENDED_PARAMETER* */ void* ExtendedParameters,
+ ULONG ParameterCount)
+# endif
+{
+ LPVOID Ret = VmAlloc2Orig(Process, BaseAddress, Size, Type, PageProtection, ExtendedParameters, ParameterCount);
+
+ if (Process == GetCurrentProcess() && Ret != nullptr && ((Type & MEM_RESERVE) || ((Type & MEM_COMMIT) && BaseAddress == nullptr)))
+ {
+ MemoryTrace_Alloc((uint64_t)Ret, Size, 0, EMemoryTraceRootHeap::SystemMemory);
+ MemoryTrace_MarkAllocAsHeap((uint64_t)Ret, EMemoryTraceRootHeap::SystemMemory);
+ }
+
+ return Ret;
+}
+
+} // namespace zen
+
+#endif // PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS
diff --git a/src/zencore/memtrack/vatrace.h b/src/zencore/memtrack/vatrace.h
new file mode 100644
index 000000000..59cc7fe97
--- /dev/null
+++ b/src/zencore/memtrack/vatrace.h
@@ -0,0 +1,61 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zenbase/zenbase.h>
+
+#if ZEN_PLATFORM_WINDOWS && !defined(PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS)
+# define PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS 1
+#endif
+
+#ifndef PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS
+# define PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS 0
+#endif
+
+#if PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS
+# include <zencore/windows.h>
+
+namespace zen {
+
+class FVirtualWinApiHooks
+{
+public:
+ static void Initialize(bool bInLight);
+
+private:
+ FVirtualWinApiHooks();
+ static bool bLight;
+ static LPVOID WINAPI VmAlloc(LPVOID Address, SIZE_T Size, DWORD Type, DWORD Protect);
+ static LPVOID WINAPI VmAllocEx(HANDLE Process, LPVOID Address, SIZE_T Size, DWORD Type, DWORD Protect);
+# if (NTDDI_VERSION >= NTDDI_WIN10_RS4)
+ static PVOID WINAPI VmAlloc2(HANDLE Process,
+ PVOID BaseAddress,
+ SIZE_T Size,
+ ULONG AllocationType,
+ ULONG PageProtection,
+ MEM_EXTENDED_PARAMETER* ExtendedParameters,
+ ULONG ParameterCount);
+ static PVOID(WINAPI* VmAlloc2Orig)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MEM_EXTENDED_PARAMETER*, ULONG);
+ typedef PVOID(__stdcall* FnVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MEM_EXTENDED_PARAMETER*, ULONG);
+# else
+ static LPVOID WINAPI VmAlloc2(HANDLE Process,
+ LPVOID BaseAddress,
+ SIZE_T Size,
+ ULONG AllocationType,
+ ULONG PageProtection,
+ void* ExtendedParameters,
+ ULONG ParameterCount);
+ static LPVOID(WINAPI* VmAlloc2Orig)(HANDLE, LPVOID, SIZE_T, ULONG, ULONG, /*MEM_EXTENDED_PARAMETER* */ void*, ULONG);
+ typedef LPVOID(__stdcall* FnVirtualAlloc2)(HANDLE, LPVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG);
+# endif
+ static BOOL WINAPI VmFree(LPVOID Address, SIZE_T Size, DWORD Type);
+ static BOOL WINAPI VmFreeEx(HANDLE Process, LPVOID Address, SIZE_T Size, DWORD Type);
+ static LPVOID(WINAPI* VmAllocOrig)(LPVOID, SIZE_T, DWORD, DWORD);
+ static LPVOID(WINAPI* VmAllocExOrig)(HANDLE, LPVOID, SIZE_T, DWORD, DWORD);
+ static BOOL(WINAPI* VmFreeOrig)(LPVOID, SIZE_T, DWORD);
+ static BOOL(WINAPI* VmFreeExOrig)(HANDLE, LPVOID, SIZE_T, DWORD);
+};
+
+} // namespace zen
+
+#endif