diff options
Diffstat (limited to 'src')
58 files changed, 5988 insertions, 169 deletions
diff --git a/src/zen/zen.cpp b/src/zen/zen.cpp index 16f5799e0..fd58b024a 100644 --- a/src/zen/zen.cpp +++ b/src/zen/zen.cpp @@ -44,9 +44,7 @@ ZEN_THIRD_PARTY_INCLUDES_START #include <gsl/gsl-lite.hpp> ZEN_THIRD_PARTY_INCLUDES_END -#if ZEN_USE_MIMALLOC -# include <mimalloc-new-delete.h> -#endif +#include <zencore/memory/newdelete.h> ////////////////////////////////////////////////////////////////////////// @@ -365,10 +363,6 @@ main(int argc, char** argv) using namespace zen; using namespace std::literals; -#if ZEN_USE_MIMALLOC - mi_version(); -#endif - zen::logging::InitializeLogging(); // Set output mode to handle virtual terminal sequences diff --git a/src/zencore-test/zencore-test.cpp b/src/zencore-test/zencore-test.cpp index 64df746e4..40cb51156 100644 --- a/src/zencore-test/zencore-test.cpp +++ b/src/zencore-test/zencore-test.cpp @@ -7,11 +7,7 @@ #include <zencore/logging.h> #include <zencore/zencore.h> -#if ZEN_USE_MIMALLOC -ZEN_THIRD_PARTY_INCLUDES_START -# include <mimalloc-new-delete.h> -ZEN_THIRD_PARTY_INCLUDES_END -#endif +#include <zencore/memory/newdelete.h> #if ZEN_WITH_TESTS # define ZEN_TEST_WITH_RUNNER 1 @@ -21,9 +17,6 @@ ZEN_THIRD_PARTY_INCLUDES_END int main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) { -#if ZEN_USE_MIMALLOC - mi_version(); -#endif #if ZEN_WITH_TESTS zen::zencore_forcelinktests(); diff --git a/src/zencore/crypto.cpp b/src/zencore/crypto.cpp index 8403a35f4..78bea0c17 100644 --- a/src/zencore/crypto.cpp +++ b/src/zencore/crypto.cpp @@ -2,6 +2,7 @@ #include <zencore/crypto.h> #include <zencore/intmath.h> +#include <zencore/memory/memory.h> #include <zencore/scopeguard.h> #include <zencore/testing.h> diff --git a/src/zencore/filesystem.cpp b/src/zencore/filesystem.cpp index 9ca5f1131..36147c5a9 100644 --- a/src/zencore/filesystem.cpp +++ b/src/zencore/filesystem.cpp @@ -7,6 +7,7 @@ #include <zencore/fmtutils.h> #include <zencore/iobuffer.h> #include <zencore/logging.h> +#include <zencore/memory/memory.h> #include <zencore/process.h> #include <zencore/stream.h> #include <zencore/string.h> diff --git a/src/zencore/include/zencore/guardvalue.h b/src/zencore/include/zencore/guardvalue.h new file mode 100644 index 000000000..5419e882a --- /dev/null +++ b/src/zencore/include/zencore/guardvalue.h @@ -0,0 +1,40 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +namespace zen { + +/** + * exception-safe guard around saving/restoring a value. + * Commonly used to make sure a value is restored + * even if the code early outs in the future. + * Usage: + * TGuardValue<bool> GuardSomeBool(bSomeBool, false); // Sets bSomeBool to false, and restores it in dtor. + */ +template<typename RefType, typename AssignedType = RefType> +struct TGuardValue +{ + [[nodiscard]] TGuardValue(RefType& ReferenceValue, const AssignedType& NewValue) + : RefValue(ReferenceValue) + , OriginalValue(ReferenceValue) + { + RefValue = NewValue; + } + ~TGuardValue() { RefValue = OriginalValue; } + + /** + * Provides read-only access to the original value of the data being tracked by this struct + * + * @return a const reference to the original data value + */ + const AssignedType& GetOriginalValue() const { return OriginalValue; } + + TGuardValue& operator=(const TGuardValue&) = delete; + TGuardValue(const TGuardValue&) = delete; + +private: + RefType& RefValue; + AssignedType OriginalValue; +}; + +} // namespace zen diff --git a/src/zencore/include/zencore/iobuffer.h b/src/zencore/include/zencore/iobuffer.h index 493b7375e..93a27ea58 100644 --- a/src/zencore/include/zencore/iobuffer.h +++ b/src/zencore/include/zencore/iobuffer.h @@ -99,6 +99,11 @@ public: ZENCORE_API IoBufferCore(size_t SizeBytes, size_t Alignment); ZENCORE_API ~IoBufferCore(); + void* operator new(size_t Size); + void operator delete(void* Ptr); + void* operator new[](size_t Size) = delete; + void operator delete[](void* Ptr) = delete; + // Reference counting inline uint32_t AddRef() const { return AtomicIncrement(const_cast<IoBufferCore*>(this)->m_RefCount); } @@ -244,7 +249,6 @@ protected: kIsExtended = 1 << 2, // Is actually a SharedBufferExtendedCore kIsMaterialized = 1 << 3, // Data pointers are valid kIsWholeFile = 1 << 5, // References an entire file - kIoBufferAlloc = 1 << 6, // Using IoBuffer allocator kIsOwnedByThis = 1 << 7, // Note that we have some extended flags defined below diff --git a/src/zencore/include/zencore/memory.h b/src/zencore/include/zencore/memory.h index fdea1a5f1..8361ab9d8 100644 --- a/src/zencore/include/zencore/memory.h +++ b/src/zencore/include/zencore/memory.h @@ -22,17 +22,10 @@ template<typename T> concept ContiguousRange = true; #endif -struct MemoryView; - -class Memory -{ -public: - ZENCORE_API static void* Alloc(size_t Size, size_t Alignment = sizeof(void*)); - ZENCORE_API static void Free(void* Ptr); -}; - ////////////////////////////////////////////////////////////////////////// +struct MemoryView; + struct MutableMemoryView { MutableMemoryView() = default; diff --git a/src/zencore/include/zencore/memory/fmalloc.h b/src/zencore/include/zencore/memory/fmalloc.h new file mode 100644 index 000000000..aeb05b651 --- /dev/null +++ b/src/zencore/include/zencore/memory/fmalloc.h @@ -0,0 +1,103 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zenbase/zenbase.h> + +namespace zen { + +enum +{ + DEFAULT_ALIGNMENT = 0 +}; + +/** + * Inherit from FUseSystemMallocForNew if you want your objects to be placed in memory + * alloced by the system malloc routines, bypassing GMalloc. This is e.g. used by FMalloc + * itself. + */ +class FUseSystemMallocForNew +{ +public: + void* operator new(size_t Size); + void operator delete(void* Ptr); + void* operator new[](size_t Size); + void operator delete[](void* Ptr); +}; + +/** Memory allocator abstraction + */ + +class FMalloc : public FUseSystemMallocForNew +{ +public: + /** + * Malloc + */ + virtual void* Malloc(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT) = 0; + + /** + * TryMalloc - like Malloc(), but may return a nullptr result if the allocation + * request cannot be satisfied. + */ + virtual void* TryMalloc(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT); + + /** + * Realloc + */ + virtual void* Realloc(void* Original, size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT) = 0; + + /** + * TryRealloc - like Realloc(), but may return a nullptr if the allocation + * request cannot be satisfied. Note that in this case the memory + * pointed to by Original will still be valid + */ + virtual void* TryRealloc(void* Original, size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT); + + /** + * Free + */ + virtual void Free(void* Original) = 0; + + /** + * Malloc zeroed memory + */ + virtual void* MallocZeroed(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT); + + /** + * TryMallocZeroed - like MallocZeroed(), but may return a nullptr result if the allocation + * request cannot be satisfied. + */ + virtual void* TryMallocZeroed(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT); + + /** + * For some allocators this will return the actual size that should be requested to eliminate + * internal fragmentation. The return value will always be >= Count. This can be used to grow + * and shrink containers to optimal sizes. + * This call is always fast and threadsafe with no locking. + */ + virtual size_t QuantizeSize(size_t Count, uint32_t Alignment); + + /** + * If possible determine the size of the memory allocated at the given address + * + * @param Original - Pointer to memory we are checking the size of + * @param SizeOut - If possible, this value is set to the size of the passed in pointer + * @return true if succeeded + */ + virtual bool GetAllocationSize(void* Original, size_t& SizeOut); + + /** + * Notifies the malloc implementation that initialization of all allocators in GMalloc is complete, so it's safe to initialize any extra + * features that require "regular" allocations + */ + virtual void OnMallocInitialized(); + + virtual void Trim(bool bTrimThreadCaches); + + virtual void OutOfMemory(size_t Size, uint32_t Alignment); +}; + +extern FMalloc* GMalloc; /* Memory allocator */ + +} // namespace zen diff --git a/src/zencore/include/zencore/memory/llm.h b/src/zencore/include/zencore/memory/llm.h new file mode 100644 index 000000000..4f1c9de77 --- /dev/null +++ b/src/zencore/include/zencore/memory/llm.h @@ -0,0 +1,31 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zenbase/zenbase.h> +#include <zencore/memory/tagtrace.h> + +namespace zen { + +// clang-format off +#define LLM_ENUM_GENERIC_TAGS(macro) \ + macro(Untagged, "Untagged", -1) \ + macro(ProgramSize, "ProgramSize", -1) \ + macro(Metrics, "Metrics", -1) \ + macro(Logging, "Logging", -1) \ + macro(IoBuffer, "IoBuffer", -1) \ + macro(IoBufferMemory, "IoMemory", ELLMTag::IoBuffer) \ + macro(IoBufferCore, "IoCore", ELLMTag::IoBuffer) + +// clang-format on + +enum class ELLMTag : uint8_t +{ +#define LLM_ENUM(Enum, Str, Parent) Enum, + LLM_ENUM_GENERIC_TAGS(LLM_ENUM) +#undef LLM_ENUM + + GenericTagCount +}; + +} // namespace zen diff --git a/src/zencore/include/zencore/memory/mallocansi.h b/src/zencore/include/zencore/memory/mallocansi.h new file mode 100644 index 000000000..510695c8c --- /dev/null +++ b/src/zencore/include/zencore/memory/mallocansi.h @@ -0,0 +1,31 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include "fmalloc.h" +#include "memory.h" + +namespace zen { + +void* AnsiMalloc(size_t Size, uint32_t Alignment); +void* AnsiRealloc(void* Ptr, size_t NewSize, uint32_t Alignment); +void AnsiFree(void* Ptr); + +// +// ANSI C memory allocator. +// + +class FMallocAnsi final : public FMalloc +{ +public: + FMallocAnsi(); + + virtual void* Malloc(size_t Size, uint32_t Alignment) override; + virtual void* TryMalloc(size_t Size, uint32_t Alignment) override; + virtual void* Realloc(void* Ptr, size_t NewSize, uint32_t Alignment) override; + virtual void* TryRealloc(void* Ptr, size_t NewSize, uint32_t Alignment) override; + virtual void Free(void* Ptr) override; + virtual bool GetAllocationSize(void* Original, size_t& SizeOut) override; +}; + +} // namespace zen diff --git a/src/zencore/include/zencore/memory/mallocmimalloc.h b/src/zencore/include/zencore/memory/mallocmimalloc.h new file mode 100644 index 000000000..759eeb4a6 --- /dev/null +++ b/src/zencore/include/zencore/memory/mallocmimalloc.h @@ -0,0 +1,36 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/memory/fmalloc.h> + +#if ZEN_USE_MIMALLOC +# define ZEN_MIMALLOC_ENABLED 1 +#endif + +#if !defined(ZEN_MIMALLOC_ENABLED) +# define ZEN_MIMALLOC_ENABLED 0 +#endif + +#if ZEN_MIMALLOC_ENABLED + +namespace zen { + +class FMallocMimalloc final : public FMalloc +{ +public: + FMallocMimalloc(); + virtual void* Malloc(size_t Size, uint32_t Alignment) override; + virtual void* TryMalloc(size_t Size, uint32_t Alignment) override; + virtual void* Realloc(void* Ptr, size_t NewSize, uint32_t Alignment) override; + virtual void* TryRealloc(void* Ptr, size_t NewSize, uint32_t Alignment) override; + virtual void Free(void* Ptr) override; + virtual void* MallocZeroed(size_t Count, uint32_t Alignment) override; + virtual void* TryMallocZeroed(size_t Count, uint32_t Alignment) override; + virtual bool GetAllocationSize(void* Original, size_t& SizeOut) override; + virtual void Trim(bool bTrimThreadCaches) override; +}; + +} // namespace zen + +#endif diff --git a/src/zencore/include/zencore/memory/mallocrpmalloc.h b/src/zencore/include/zencore/memory/mallocrpmalloc.h new file mode 100644 index 000000000..be2627b2d --- /dev/null +++ b/src/zencore/include/zencore/memory/mallocrpmalloc.h @@ -0,0 +1,37 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/memory/fmalloc.h> + +#if ZEN_USE_RPMALLOC +# define ZEN_RPMALLOC_ENABLED 1 +#endif + +#if !defined(ZEN_RPMALLOC_ENABLED) +# define ZEN_RPMALLOC_ENABLED 0 +#endif + +#if ZEN_RPMALLOC_ENABLED + +namespace zen { + +class FMallocRpmalloc final : public FMalloc +{ +public: + FMallocRpmalloc(); + ~FMallocRpmalloc(); + virtual void* Malloc(size_t Size, uint32_t Alignment) override; + virtual void* TryMalloc(size_t Size, uint32_t Alignment) override; + virtual void* Realloc(void* Ptr, size_t NewSize, uint32_t Alignment) override; + virtual void* TryRealloc(void* Ptr, size_t NewSize, uint32_t Alignment) override; + virtual void Free(void* Ptr) override; + virtual void* MallocZeroed(size_t Count, uint32_t Alignment) override; + virtual void* TryMallocZeroed(size_t Count, uint32_t Alignment) override; + virtual bool GetAllocationSize(void* Original, size_t& SizeOut) override; + virtual void Trim(bool bTrimThreadCaches) override; +}; + +} // namespace zen + +#endif diff --git a/src/zencore/include/zencore/memory/mallocstomp.h b/src/zencore/include/zencore/memory/mallocstomp.h new file mode 100644 index 000000000..5d83868bb --- /dev/null +++ b/src/zencore/include/zencore/memory/mallocstomp.h @@ -0,0 +1,100 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zenbase/zenbase.h> + +#if ZEN_PLATFORM_WINDOWS +# define ZEN_WITH_MALLOC_STOMP 1 +#endif + +#ifndef ZEN_WITH_MALLOC_STOMP +# define ZEN_WITH_MALLOC_STOMP 0 +#endif + +/** + * Stomp memory allocator support should be enabled in Core.Build.cs. + * Run-time validation should be enabled using '-stompmalloc' command line argument. + */ + +#if ZEN_WITH_MALLOC_STOMP + +# include <zencore/memory/fmalloc.h> +# include <zencore/thread.h> + +namespace zen { + +/** + * Stomp memory allocator. It helps find the following errors: + * - Read or writes off the end of an allocation. + * - Read or writes off the beginning of an allocation. + * - Read or writes after freeing an allocation. + */ +class FMallocStomp final : public FMalloc +{ + struct FAllocationData; + + const size_t PageSize; + + /** If it is set to true, instead of focusing on overruns the allocator will focus on underruns. */ + const bool bUseUnderrunMode; + RwLock Lock; + + uintptr_t VirtualAddressCursor = 0; + size_t VirtualAddressMax = 0; + static constexpr size_t VirtualAddressBlockSize = 1 * 1024 * 1024 * 1024; // 1 GB blocks + +public: + // FMalloc interface. + explicit FMallocStomp(const bool InUseUnderrunMode = false); + + /** + * Allocates a block of a given number of bytes of memory with the required alignment. + * In the process it allocates as many pages as necessary plus one that will be protected + * making it unaccessible and causing an exception. The actual allocation will be pushed + * to the end of the last valid unprotected page. To deal with underrun errors a sentinel + * is added right before the allocation in page which is checked on free. + * + * @param Size Size in bytes of the memory block to allocate. + * @param Alignment Alignment in bytes of the memory block to allocate. + * @return A pointer to the beginning of the memory block. + */ + virtual void* Malloc(size_t Size, uint32_t Alignment) override; + + virtual void* TryMalloc(size_t Size, uint32_t Alignment) override; + + /** + * Changes the size of the memory block pointed to by OldPtr. + * The function may move the memory block to a new location. + * + * @param OldPtr Pointer to a memory block previously allocated with Malloc. + * @param NewSize New size in bytes for the memory block. + * @param Alignment Alignment in bytes for the reallocation. + * @return A pointer to the reallocated memory block, which may be either the same as ptr or a new location. + */ + virtual void* Realloc(void* InPtr, size_t NewSize, uint32_t Alignment) override; + + virtual void* TryRealloc(void* InPtr, size_t NewSize, uint32_t Alignment) override; + + /** + * Frees a memory allocation and verifies the sentinel in the process. + * + * @param InPtr Pointer of the data to free. + */ + virtual void Free(void* InPtr) override; + + /** + * If possible determine the size of the memory allocated at the given address. + * This will included all the pages that were allocated so it will be far more + * than what's set on the FAllocationData. + * + * @param Original - Pointer to memory we are checking the size of + * @param SizeOut - If possible, this value is set to the size of the passed in pointer + * @return true if succeeded + */ + virtual bool GetAllocationSize(void* Original, size_t& SizeOut) override; +}; + +} // namespace zen + +#endif // WITH_MALLOC_STOMP diff --git a/src/zencore/include/zencore/memory/memory.h b/src/zencore/include/zencore/memory/memory.h new file mode 100644 index 000000000..2fc20def6 --- /dev/null +++ b/src/zencore/include/zencore/memory/memory.h @@ -0,0 +1,78 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <stdlib.h> +#include <zencore/memory/fmalloc.h> + +#define UE_ALLOCATION_FUNCTION(...) + +namespace zen { + +/** + * Corresponds to UE-side FMemory implementation + */ + +class Memory +{ +public: + static void Initialize(); + + // + // C style memory allocation stubs that fall back to C runtime + // + UE_ALLOCATION_FUNCTION(1) static void* SystemMalloc(size_t Size); + static void SystemFree(void* Ptr); + + // + // C style memory allocation stubs. + // + + static inline void* Alloc(size_t Size, size_t Alignment = sizeof(void*)) { return Malloc(Size, uint32_t(Alignment)); } + + UE_ALLOCATION_FUNCTION(1, 2) static inline void* Malloc(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT); + UE_ALLOCATION_FUNCTION(2, 3) static inline void* Realloc(void* Original, size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT); + static inline void Free(void* Original); + static inline size_t GetAllocSize(void* Original); + + UE_ALLOCATION_FUNCTION(1, 2) static inline void* MallocZeroed(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT); + +private: + static void GCreateMalloc(); +}; + +inline void* +Memory::Malloc(size_t Count, uint32_t Alignment) +{ + return GMalloc->TryMalloc(Count, Alignment); +} + +inline void* +Memory::Realloc(void* Original, size_t Count, uint32_t Alignment) +{ + return GMalloc->TryRealloc(Original, Count, Alignment); +} + +inline void +Memory::Free(void* Original) +{ + if (Original) + { + GMalloc->Free(Original); + } +} + +inline size_t +Memory::GetAllocSize(void* Original) +{ + size_t Size = 0; + return GMalloc->GetAllocationSize(Original, Size) ? Size : 0; +} + +inline void* +Memory::MallocZeroed(size_t Count, uint32_t Alignment) +{ + return GMalloc->TryMallocZeroed(Count, Alignment); +} + +} // namespace zen diff --git a/src/zencore/include/zencore/memory/memorytrace.h b/src/zencore/include/zencore/memory/memorytrace.h new file mode 100644 index 000000000..d1ab1f914 --- /dev/null +++ b/src/zencore/include/zencore/memory/memorytrace.h @@ -0,0 +1,251 @@ +// Copyright Epic Games, Inc. All Rights Reserved. +#pragma once + +#include <zencore/enumflags.h> +#include <zencore/trace.h> + +#if !defined(UE_MEMORY_TRACE_AVAILABLE) +# define UE_MEMORY_TRACE_AVAILABLE 0 +#endif + +#if !defined(UE_MEMORY_TRACE_LATE_INIT) +# define UE_MEMORY_TRACE_LATE_INIT 0 +#endif + +#if !defined(PLATFORM_USES_FIXED_GMalloc_CLASS) +# define PLATFORM_USES_FIXED_GMalloc_CLASS 0 +#endif + +#if !defined(UE_MEMORY_TRACE_ENABLED) && UE_TRACE_ENABLED +# if UE_MEMORY_TRACE_AVAILABLE +# define UE_MEMORY_TRACE_ENABLED ZEN_WITH_MEMTRACK +# endif +#endif + +#if !defined(UE_MEMORY_TRACE_ENABLED) +# define UE_MEMORY_TRACE_ENABLED 0 +#endif + +namespace zen { + +//////////////////////////////////////////////////////////////////////////////// +typedef uint32_t HeapId; + +//////////////////////////////////////////////////////////////////////////////// +enum EMemoryTraceRootHeap : uint8_t +{ + SystemMemory, // RAM + VideoMemory, // VRAM + EndHardcoded = VideoMemory, + EndReserved = 15 +}; + +//////////////////////////////////////////////////////////////////////////////// +// These values are traced. Do not modify existing values in order to maintain +// compatibility. +enum class EMemoryTraceHeapFlags : uint16_t +{ + None = 0, + Root = 1 << 0, + NeverFrees = 1 << 1, // The heap doesn't free (e.g. linear allocator) +}; +ENUM_CLASS_FLAGS(EMemoryTraceHeapFlags); + +//////////////////////////////////////////////////////////////////////////////// +// These values are traced. Do not modify existing values in order to maintain +// compatibility. +enum class EMemoryTraceHeapAllocationFlags : uint8_t +{ + None = 0, + Heap = 1 << 0, // Is a heap, can be used to unmark alloc as heap. + Swap = 2 << 0, // Is a swap page +}; +ENUM_CLASS_FLAGS(EMemoryTraceHeapAllocationFlags); + +//////////////////////////////////////////////////////////////////////////////// +enum class EMemoryTraceSwapOperation : uint8 +{ + PageOut = 0, // Paged out to swap + PageIn = 1, // Read from swap via page fault + FreeInSwap = 2, // Freed while being paged out in swap +}; + +//////////////////////////////////////////////////////////////////////////////// + +// Internal options for early initialization of memory tracing systems. Exposed +// here due to visibility in platform implementations. +enum class EMemoryTraceInit : uint8 +{ + Disabled = 0, + AllocEvents = 1 << 0, + Callstacks = 1 << 1, + Tags = 1 << 2, + Full = AllocEvents | Callstacks | Tags, + Light = AllocEvents | Tags, +}; + +ENUM_CLASS_FLAGS(EMemoryTraceInit); + +//////////////////////////////////////////////////////////////////////////////// +#if UE_MEMORY_TRACE_ENABLED + +# define UE_MEMORY_TRACE(x) x + +UE_TRACE_CHANNEL_EXTERN(MemAllocChannel); + +//////////////////////////////////////////////////////////////////////////////// +class FMalloc* MemoryTrace_Create(class FMalloc* InMalloc); +void MemoryTrace_Initialize(); +void MemoryTrace_Shutdown(); + +/** + * Register a new heap specification (name). Use the returned value when marking heaps. + * @param ParentId Heap id of parent heap. + * @param Name Descriptive name of the heap. + * @param Flags Properties of this heap. See \ref EMemoryTraceHeapFlags + * @return Heap id to use when allocating memory + */ +HeapId MemoryTrace_HeapSpec(HeapId ParentId, const char16_t* Name, EMemoryTraceHeapFlags Flags = EMemoryTraceHeapFlags::None); + +/** + * Register a new root heap specification (name). Use the returned value as parent to other heaps. + * @param Name Descriptive name of the root heap. + * @param Flags Properties of the this root heap. See \ref EMemoryTraceHeapFlags + * @return Heap id to use when allocating memory + */ +HeapId MemoryTrace_RootHeapSpec(const char16_t* Name, EMemoryTraceHeapFlags Flags = EMemoryTraceHeapFlags::None); + +/** + * Mark a traced allocation as being a heap. + * @param Address Address of the allocation + * @param Heap Heap id, see /ref MemoryTrace_HeapSpec. If no specific heap spec has been created the correct root heap needs to be given. + * @param Flags Additional properties of the heap allocation. Note that \ref EMemoryTraceHeapAllocationFlags::Heap is implicit. + * @param ExternalCallstackId CallstackId to use, if 0 will use current callstack id. + */ +void MemoryTrace_MarkAllocAsHeap(uint64 Address, + HeapId Heap, + EMemoryTraceHeapAllocationFlags Flags = EMemoryTraceHeapAllocationFlags::None, + uint32 ExternalCallstackId = 0); + +/** + * Unmark an allocation as a heap. When an allocation that has previously been used as a heap is reused as a regular + * allocation. + * @param Address Address of the allocation + * @param Heap Heap id + * @param ExternalCallstackId CallstackId to use, if 0 will use current callstack id. + */ +void MemoryTrace_UnmarkAllocAsHeap(uint64 Address, HeapId Heap, uint32 ExternalCallstackId = 0); + +/** + * Trace an allocation event. + * @param Address Address of allocation + * @param Size Size of allocation + * @param Alignment Alignment of the allocation + * @param RootHeap Which root heap this belongs to (system memory, video memory etc) + * @param ExternalCallstackId CallstackId to use, if 0 will use current callstack id. + */ +void MemoryTrace_Alloc(uint64 Address, + uint64 Size, + uint32 Alignment, + HeapId RootHeap = EMemoryTraceRootHeap::SystemMemory, + uint32 ExternalCallstackId = 0); + +/** + * Trace a free event. + * @param Address Address of the allocation being freed + * @param RootHeap Which root heap this belongs to (system memory, video memory etc) + * @param ExternalCallstackId CallstackId to use, if 0 will use current callstack id. + */ +void MemoryTrace_Free(uint64 Address, HeapId RootHeap = EMemoryTraceRootHeap::SystemMemory, uint32 ExternalCallstackId = 0); + +/** + * Trace a free related to a reallocation event. + * @param Address Address of the allocation being freed + * @param RootHeap Which root heap this belongs to (system memory, video memory etc) + * @param ExternalCallstackId CallstackId to use, if 0 will use current callstack id. + */ +void MemoryTrace_ReallocFree(uint64 Address, HeapId RootHeap = EMemoryTraceRootHeap::SystemMemory, uint32 ExternalCallstackId = 0); + +/** Trace an allocation related to a reallocation event. + * @param Address Address of allocation + * @param NewSize Size of allocation + * @param Alignment Alignment of the allocation + * @param RootHeap Which root heap this belongs to (system memory, video memory etc) + * @param ExternalCallstackId CallstackId to use, if 0 will use current callstack id. + */ +void MemoryTrace_ReallocAlloc(uint64 Address, + uint64 NewSize, + uint32 Alignment, + HeapId RootHeap = EMemoryTraceRootHeap::SystemMemory, + uint32 ExternalCallstackId = 0); + +/** Trace a swap operation. Only available for system memory root heap (EMemoryTraceRootHeap::SystemMemory). + * @param PageAddress Page address for operation, in case of PageIn can be address of the page fault (not aligned to page boundary). + * @param SwapOperation Which swap operation is happening to the address. + * @param CompressedSize Compressed size of the page for page out operation. + * @param CallstackId CallstackId to use, if 0 to ignore (will not use current callstack id). + */ +void MemoryTrace_SwapOp(uint64 PageAddress, EMemoryTraceSwapOperation SwapOperation, uint32 CompressedSize = 0, uint32 CallstackId = 0); + +//////////////////////////////////////////////////////////////////////////////// +#else // UE_MEMORY_TRACE_ENABLED + +# define UE_MEMORY_TRACE(x) +inline HeapId +MemoryTrace_RootHeapSpec(const char16_t* /*Name*/, EMemoryTraceHeapFlags /* Flags = EMemoryTraceHeapFlags::None */) +{ + return ~0u; +}; +inline HeapId +MemoryTrace_HeapSpec(HeapId /*ParentId*/, const char16_t* /*Name*/, EMemoryTraceHeapFlags /* Flags = EMemoryTraceHeapFlags::None */) +{ + return ~0u; +} +inline void +MemoryTrace_MarkAllocAsHeap(uint64_t /*Address*/, HeapId /*Heap*/) +{ +} +inline void +MemoryTrace_UnmarkAllocAsHeap(uint64_t /*Address*/, HeapId /*Heap*/) +{ +} +inline void +MemoryTrace_Alloc(uint64_t /*Address*/, + uint64_t /*Size*/, + uint32_t /*Alignment*/, + HeapId RootHeap = EMemoryTraceRootHeap::SystemMemory, + uint32_t ExternalCallstackId = 0) +{ + ZEN_UNUSED(RootHeap, ExternalCallstackId); +} +inline void +MemoryTrace_Free(uint64_t /*Address*/, HeapId RootHeap = EMemoryTraceRootHeap::SystemMemory, uint32_t ExternalCallstackId = 0) +{ + ZEN_UNUSED(RootHeap, ExternalCallstackId); +} +inline void +MemoryTrace_ReallocFree(uint64_t /*Address*/, HeapId RootHeap = EMemoryTraceRootHeap::SystemMemory, uint32_t ExternalCallstackId = 0) +{ + ZEN_UNUSED(RootHeap, ExternalCallstackId); +} +inline void +MemoryTrace_ReallocAlloc(uint64_t /*Address*/, + uint64_t /*NewSize*/, + uint32_t /*Alignment*/, + HeapId RootHeap = EMemoryTraceRootHeap::SystemMemory, + uint32_t ExternalCallstackId = 0) +{ + ZEN_UNUSED(RootHeap, ExternalCallstackId); +} +inline void +MemoryTrace_SwapOp(uint64_t /*PageAddress*/, + EMemoryTraceSwapOperation /*SwapOperation*/, + uint32_t CompressedSize = 0, + uint32_t CallstackId = 0) +{ + ZEN_UNUSED(CompressedSize, CallstackId); +} + +#endif // UE_MEMORY_TRACE_ENABLED + +} // namespace zen diff --git a/src/zencore/include/zencore/memory/newdelete.h b/src/zencore/include/zencore/memory/newdelete.h new file mode 100644 index 000000000..d22c8604f --- /dev/null +++ b/src/zencore/include/zencore/memory/newdelete.h @@ -0,0 +1,155 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zenbase/zenbase.h> +#include <new> + +#if defined(_MSC_VER) +# if (_MSC_VER >= 1900) && !defined(__EDG__) +# define ZEN_RESTRICT __declspec(allocator) __declspec(restrict) +# else +# define ZEN_RESTRICT __declspec(restrict) +# endif +#else +# define ZEN_RESTRICT +#endif + +////////////////////////////////////////////////////////////////////////// + +[[nodiscard]] ZEN_RESTRICT void* zen_new(size_t size); +[[nodiscard]] ZEN_RESTRICT void* zen_new_aligned(size_t size, size_t alignment); +[[nodiscard]] ZEN_RESTRICT void* zen_new_nothrow(size_t size) noexcept; +[[nodiscard]] ZEN_RESTRICT void* zen_new_aligned_nothrow(size_t size, size_t alignment) noexcept; + +void zen_free(void* p) noexcept; +void zen_free_size(void* p, size_t size) noexcept; +void zen_free_size_aligned(void* p, size_t size, size_t alignment) noexcept; +void zen_free_aligned(void* p, size_t alignment) noexcept; + +////////////////////////////////////////////////////////////////////////// + +#if defined(_MSC_VER) && defined(_Ret_notnull_) && defined(_Post_writable_byte_size_) +# define zen_decl_new(n) [[nodiscard]] _VCRT_ALLOCATOR _Ret_notnull_ _Post_writable_byte_size_(n) +# define zen_decl_new_nothrow(n) [[nodiscard]] _VCRT_ALLOCATOR _Ret_maybenull_ _Success_(return != NULL) _Post_writable_byte_size_(n) +#else +# define zen_decl_new(n) [[nodiscard]] +# define zen_decl_new_nothrow(n) [[nodiscard]] +#endif + +void +operator delete(void* p) noexcept +{ + zen_free(p); +} + +void +operator delete[](void* p) noexcept +{ + zen_free(p); +} + +void +operator delete(void* p, const std::nothrow_t&) noexcept +{ + zen_free(p); +} + +void +operator delete[](void* p, const std::nothrow_t&) noexcept +{ + zen_free(p); +} + +zen_decl_new(n) void* +operator new(std::size_t n) noexcept(false) +{ + return zen_new(n); +} + +zen_decl_new(n) void* +operator new[](std::size_t n) noexcept(false) +{ + return zen_new(n); +} + +zen_decl_new_nothrow(n) void* +operator new(std::size_t n, const std::nothrow_t& tag) noexcept +{ + (void)(tag); + return zen_new_nothrow(n); +} + +zen_decl_new_nothrow(n) void* +operator new[](std::size_t n, const std::nothrow_t& tag) noexcept +{ + (void)(tag); + return zen_new_nothrow(n); +} + +#if (__cplusplus >= 201402L || _MSC_VER >= 1916) +void +operator delete(void* p, std::size_t n) noexcept +{ + zen_free_size(p, n); +}; +void +operator delete[](void* p, std::size_t n) noexcept +{ + zen_free_size(p, n); +}; +#endif + +#if (__cplusplus > 201402L || defined(__cpp_aligned_new)) +void +operator delete(void* p, std::align_val_t al) noexcept +{ + zen_free_aligned(p, static_cast<size_t>(al)); +} +void +operator delete[](void* p, std::align_val_t al) noexcept +{ + zen_free_aligned(p, static_cast<size_t>(al)); +} +void +operator delete(void* p, std::size_t n, std::align_val_t al) noexcept +{ + zen_free_size_aligned(p, n, static_cast<size_t>(al)); +}; +void +operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept +{ + zen_free_size_aligned(p, n, static_cast<size_t>(al)); +}; +void +operator delete(void* p, std::align_val_t al, const std::nothrow_t&) noexcept +{ + zen_free_aligned(p, static_cast<size_t>(al)); +} +void +operator delete[](void* p, std::align_val_t al, const std::nothrow_t&) noexcept +{ + zen_free_aligned(p, static_cast<size_t>(al)); +} + +void* +operator new(std::size_t n, std::align_val_t al) noexcept(false) +{ + return zen_new_aligned(n, static_cast<size_t>(al)); +} +void* +operator new[](std::size_t n, std::align_val_t al) noexcept(false) +{ + return zen_new_aligned(n, static_cast<size_t>(al)); +} +void* +operator new(std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept +{ + return zen_new_aligned_nothrow(n, static_cast<size_t>(al)); +} +void* +operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept +{ + return zen_new_aligned_nothrow(n, static_cast<size_t>(al)); +} +#endif diff --git a/src/zencore/include/zencore/memory/tagtrace.h b/src/zencore/include/zencore/memory/tagtrace.h new file mode 100644 index 000000000..f51b21466 --- /dev/null +++ b/src/zencore/include/zencore/memory/tagtrace.h @@ -0,0 +1,93 @@ +// Copyright Epic Games, Inc. All Rights Reserved. +#pragma once + +#include <zenbase/zenbase.h> +#include <zencore/trace.h> + +//////////////////////////////////////////////////////////////////////////////// + +namespace zen { + +enum class ELLMTag : uint8_t; + +int32_t MemoryTrace_AnnounceCustomTag(int32_t Tag, int32_t ParentTag, const char* Display); +int32_t MemoryTrace_GetActiveTag(); + +inline constexpr int32_t TRACE_TAG = 257; + +} // namespace zen + +//////////////////////////////////////////////////////////////////////////////// +#if !defined(UE_MEMORY_TAGS_TRACE_ENABLED) +# define UE_MEMORY_TAGS_TRACE_ENABLED 1 +#endif + +#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED + +namespace zen { +//////////////////////////////////////////////////////////////////////////////// + +/** + * Used to associate any allocation within this scope to a given tag. + * + * We need to be able to convert the three types of inputs to LLM scopes: + * - ELLMTag, an uint8 with fixed categories. There are three sub ranges + Generic tags, platform and project tags. + * - FName, free form string, for example a specific asset. + * - TagData, an opaque pointer from LLM. + * + */ +class FMemScope +{ +public: + FMemScope(); // Used with SetTagAndActivate + FMemScope(int32_t InTag, bool bShouldActivate = true); + FMemScope(ELLMTag InTag, bool bShouldActivate = true); + ~FMemScope(); + +private: + void ActivateScope(int32_t InTag); + UE::Trace::Private::FScopedLogScope Inner; + int32_t PrevTag; +}; + +/** + * A scope that activates in case no existing scope is active. + */ +template<typename TagType> +class FDefaultMemScope : public FMemScope +{ +public: + FDefaultMemScope(TagType InTag) : FMemScope(InTag, MemoryTrace_GetActiveTag() == 0) {} +}; + +/** + * Used order to keep the tag for memory that is being reallocated. + */ +class FMemScopePtr +{ +public: + FMemScopePtr(uint64_t InPtr); + ~FMemScopePtr(); + +private: + UE::Trace::Private::FScopedLogScope Inner; +}; + +//////////////////////////////////////////////////////////////////////////////// +# define UE_MEMSCOPE(InTag) FMemScope PREPROCESSOR_JOIN(MemScope, __LINE__)(InTag); +# define UE_MEMSCOPE_PTR(InPtr) FMemScopePtr PREPROCESSOR_JOIN(MemPtrScope, __LINE__)((uint64)InPtr); +# define UE_MEMSCOPE_DEFAULT(InTag) FDefaultMemScope PREPROCESSOR_JOIN(MemScope, __LINE__)(InTag); +# define UE_MEMSCOPE_UNINITIALIZED(Line) FMemScope PREPROCESSOR_JOIN(MemScope, Line); + +#else // UE_MEMORY_TAGS_TRACE_ENABLED + +//////////////////////////////////////////////////////////////////////////////// +# define UE_MEMSCOPE(...) +# define UE_MEMSCOPE_PTR(...) +# define UE_MEMSCOPE_DEFAULT(...) +# define UE_MEMSCOPE_UNINITIALIZED(...) +# define UE_MEMSCOPE_ACTIVATE(...) + +#endif // UE_MEMORY_TAGS_TRACE_ENABLED +} diff --git a/src/zencore/include/zencore/string.h b/src/zencore/include/zencore/string.h index b10b6a2ba..e2ef1c1a0 100644 --- a/src/zencore/include/zencore/string.h +++ b/src/zencore/include/zencore/string.h @@ -51,6 +51,30 @@ StringLength(const wchar_t* str) return wcslen(str); } +inline bool +StringCompare(const char16_t* s1, const char16_t* s2) +{ + char16_t c1, c2; + + while ((c1 = *s1) == (c2 = *s2)) + { + if (c1 == 0) + { + return 0; + } + + ++s1; + ++s2; + } + return uint16_t(c1) - uint16_t(c2); +} + +inline bool +StringEquals(const char16_t* s1, const char16_t* s2) +{ + return StringCompare(s1, s2) == 0; +} + inline size_t StringLength(const char16_t* str) { diff --git a/src/zencore/include/zencore/trace.h b/src/zencore/include/zencore/trace.h index 89e4b76bf..2ca2b7c81 100644 --- a/src/zencore/include/zencore/trace.h +++ b/src/zencore/include/zencore/trace.h @@ -19,6 +19,8 @@ ZEN_THIRD_PARTY_INCLUDES_END #define ZEN_TRACE_CPU(x) TRACE_CPU_SCOPE(x) #define ZEN_TRACE_CPU_FLUSH(x) TRACE_CPU_SCOPE(x, trace::CpuScopeFlags::CpuFlush) +namespace zen { + enum class TraceType { File, @@ -32,6 +34,8 @@ bool IsTracing(); void TraceStart(std::string_view ProgramName, const char* HostOrPath, TraceType Type); bool TraceStop(); +} + #else #define ZEN_TRACE_CPU(x) diff --git a/src/zencore/iobuffer.cpp b/src/zencore/iobuffer.cpp index 51f380c34..d6d02eb0b 100644 --- a/src/zencore/iobuffer.cpp +++ b/src/zencore/iobuffer.cpp @@ -8,6 +8,8 @@ #include <zencore/iohash.h> #include <zencore/logging.h> #include <zencore/memory.h> +#include <zencore/memory/llm.h> +#include <zencore/memory/memory.h> #include <zencore/testing.h> #include <zencore/thread.h> #include <zencore/trace.h> @@ -15,12 +17,6 @@ #include <memory.h> #include <system_error> -#if ZEN_USE_MIMALLOC -ZEN_THIRD_PARTY_INCLUDES_START -# include <mimalloc.h> -ZEN_THIRD_PARTY_INCLUDES_END -#endif - #if ZEN_PLATFORM_WINDOWS # include <zencore/windows.h> #else @@ -43,39 +39,39 @@ namespace zen { void IoBufferCore::AllocateBuffer(size_t InSize, size_t Alignment) const { -#if ZEN_USE_MIMALLOC - void* Ptr = mi_aligned_alloc(Alignment, RoundUp(InSize, Alignment)); - m_Flags.fetch_or(kIoBufferAlloc, std::memory_order_relaxed); -#else + UE_MEMSCOPE(ELLMTag::IoBufferMemory); + void* Ptr = Memory::Alloc(InSize, Alignment); -#endif if (!Ptr) { ThrowOutOfMemory(fmt::format("failed allocating {:#x} bytes aligned to {:#x}", InSize, Alignment)); } + m_DataPtr = Ptr; } void IoBufferCore::FreeBuffer() { - if (!m_DataPtr) + if (m_DataPtr) { - return; + Memory::Free(const_cast<void*>(m_DataPtr)); + m_DataPtr = nullptr; } +} - const uint32_t LocalFlags = m_Flags.load(std::memory_order_relaxed); - -#if ZEN_USE_MIMALLOC - if (LocalFlags & kIoBufferAlloc) - { - return mi_free(const_cast<void*>(m_DataPtr)); - } -#endif +void* +IoBufferCore::operator new(size_t Size) +{ + UE_MEMSCOPE(ELLMTag::IoBufferCore); + return Memory::Malloc(Size); +} - ZEN_UNUSED(LocalFlags); - return Memory::Free(const_cast<void*>(m_DataPtr)); +void +IoBufferCore::operator delete(void* Ptr) +{ + Memory::Free(Ptr); } ////////////////////////////////////////////////////////////////////////// @@ -104,10 +100,9 @@ IoBufferCore::IoBufferCore(size_t InSize, size_t Alignment) IoBufferCore::~IoBufferCore() { - if (IsOwnedByThis() && m_DataPtr) + if (IsOwnedByThis()) { FreeBuffer(); - m_DataPtr = nullptr; } } @@ -567,7 +562,7 @@ IoBufferBuilder::ReadFromFileMaybe(const IoBuffer& InBuffer) Error = zen::GetLastError(); } #else - int Fd = int(intptr_t(FileRef.FileHandle)); + int Fd = int(intptr_t(FileRef.FileHandle)); ssize_t ReadResult = pread(Fd, OutBuffer.MutableData(), size_t(NumberOfBytesToRead), off_t(FileOffset)); if (ReadResult != -1) { @@ -635,7 +630,7 @@ IoBufferBuilder::MakeFromFile(const std::filesystem::path& FileName, uint64_t Of DataFile.GetSize((ULONGLONG&)FileSize); #else int Flags = O_RDONLY | O_CLOEXEC; - int Fd = open(FileName.c_str(), Flags); + int Fd = open(FileName.c_str(), Flags); if (Fd < 0) { return {}; @@ -704,7 +699,7 @@ IoBufferBuilder::MakeFromTemporaryFile(const std::filesystem::path& FileName) Handle = DataFile.Detach(); #else - int Fd = open(FileName.native().c_str(), O_RDONLY); + int Fd = open(FileName.native().c_str(), O_RDONLY); if (Fd < 0) { return {}; diff --git a/src/zencore/logging.cpp b/src/zencore/logging.cpp index 1a0a91b3d..7bd500b3b 100644 --- a/src/zencore/logging.cpp +++ b/src/zencore/logging.cpp @@ -6,6 +6,8 @@ #include <zencore/testing.h> #include <zencore/thread.h> +#include <zencore/memory/llm.h> + ZEN_THIRD_PARTY_INCLUDES_START #include <spdlog/details/registry.h> #include <spdlog/sinks/null_sink.h> @@ -66,6 +68,7 @@ static_assert(offsetof(spdlog::source_loc, funcname) == offsetof(SourceLocation, void EmitLogMessage(LoggerRef& Logger, int LogLevel, const std::string_view Message) { + UE_MEMSCOPE(ELLMTag::Logging); const spdlog::level::level_enum InLevel = (spdlog::level::level_enum)LogLevel; Logger.SpdLogger->log(InLevel, Message); if (IsErrorLevel(LogLevel)) @@ -80,6 +83,7 @@ EmitLogMessage(LoggerRef& Logger, int LogLevel, const std::string_view Message) void EmitLogMessage(LoggerRef& Logger, int LogLevel, std::string_view Format, fmt::format_args Args) { + UE_MEMSCOPE(ELLMTag::Logging); zen::logging::LoggingContext LogCtx; fmt::vformat_to(fmt::appender(LogCtx.MessageBuffer), Format, Args); zen::logging::EmitLogMessage(Logger, LogLevel, LogCtx.Message()); @@ -88,6 +92,7 @@ EmitLogMessage(LoggerRef& Logger, int LogLevel, std::string_view Format, fmt::fo void EmitLogMessage(LoggerRef& Logger, const SourceLocation& InLocation, int LogLevel, const std::string_view Message) { + UE_MEMSCOPE(ELLMTag::Logging); const spdlog::source_loc& Location = *reinterpret_cast<const spdlog::source_loc*>(&InLocation); const spdlog::level::level_enum InLevel = (spdlog::level::level_enum)LogLevel; Logger.SpdLogger->log(Location, InLevel, Message); @@ -103,6 +108,7 @@ EmitLogMessage(LoggerRef& Logger, const SourceLocation& InLocation, int LogLevel void EmitLogMessage(LoggerRef& Logger, const SourceLocation& InLocation, int LogLevel, std::string_view Format, fmt::format_args Args) { + UE_MEMSCOPE(ELLMTag::Logging); zen::logging::LoggingContext LogCtx; fmt::vformat_to(fmt::appender(LogCtx.MessageBuffer), Format, Args); zen::logging::EmitLogMessage(Logger, InLocation, LogLevel, LogCtx.Message()); @@ -111,6 +117,7 @@ EmitLogMessage(LoggerRef& Logger, const SourceLocation& InLocation, int LogLevel void EmitConsoleLogMessage(int LogLevel, const std::string_view Message) { + UE_MEMSCOPE(ELLMTag::Logging); const spdlog::level::level_enum InLevel = (spdlog::level::level_enum)LogLevel; ConsoleLog().SpdLogger->log(InLevel, Message); } @@ -118,6 +125,7 @@ EmitConsoleLogMessage(int LogLevel, const std::string_view Message) void EmitConsoleLogMessage(int LogLevel, std::string_view Format, fmt::format_args Args) { + UE_MEMSCOPE(ELLMTag::Logging); zen::logging::LoggingContext LogCtx; fmt::vformat_to(fmt::appender(LogCtx.MessageBuffer), Format, Args); zen::logging::EmitConsoleLogMessage(LogLevel, LogCtx.Message()); @@ -192,6 +200,8 @@ std::string LogLevels[level::LogLevelCount]; void ConfigureLogLevels(level::LogLevel Level, std::string_view Loggers) { + UE_MEMSCOPE(ELLMTag::Logging); + RwLock::ExclusiveLockScope _(LogLevelsLock); LogLevels[Level] = Loggers; } @@ -199,6 +209,8 @@ ConfigureLogLevels(level::LogLevel Level, std::string_view Loggers) void RefreshLogLevels(level::LogLevel* DefaultLevel) { + UE_MEMSCOPE(ELLMTag::Logging); + spdlog::details::registry::log_levels Levels; { @@ -275,6 +287,8 @@ Default() void SetDefault(std::string_view NewDefaultLoggerId) { + UE_MEMSCOPE(ELLMTag::Logging); + auto NewDefaultLogger = spdlog::get(std::string(NewDefaultLoggerId)); ZEN_ASSERT(NewDefaultLogger); @@ -293,6 +307,8 @@ ErrorLog() void SetErrorLog(std::string_view NewErrorLoggerId) { + UE_MEMSCOPE(ELLMTag::Logging); + if (NewErrorLoggerId.empty()) { TheErrorLogger = {}; @@ -310,6 +326,8 @@ SetErrorLog(std::string_view NewErrorLoggerId) LoggerRef Get(std::string_view Name) { + UE_MEMSCOPE(ELLMTag::Logging); + std::shared_ptr<spdlog::logger> Logger = spdlog::get(std::string(Name)); if (!Logger) @@ -339,6 +357,8 @@ SuppressConsoleLog() LoggerRef ConsoleLog() { + UE_MEMSCOPE(ELLMTag::Logging); + std::call_once(ConsoleInitFlag, [&] { if (!ConLogger) { @@ -355,6 +375,8 @@ ConsoleLog() void InitializeLogging() { + UE_MEMSCOPE(ELLMTag::Logging); + TheDefaultLogger = *spdlog::default_logger_raw(); } diff --git a/src/zencore/memory.cpp b/src/zencore/memory.cpp index a0d911786..a2fe02f3a 100644 --- a/src/zencore/memory.cpp +++ b/src/zencore/memory.cpp @@ -4,67 +4,14 @@ #include <zencore/fmtutils.h> #include <zencore/intmath.h> #include <zencore/memory.h> +#include <zencore/memory/memory.h> #include <zencore/testing.h> #include <zencore/zencore.h> #include <cstdlib> -#if ZEN_USE_MIMALLOC -ZEN_THIRD_PARTY_INCLUDES_START -# include <mimalloc.h> -ZEN_THIRD_PARTY_INCLUDES_END -#endif - namespace zen { -////////////////////////////////////////////////////////////////////////// - -static void* -AlignedAllocImpl(size_t Size, size_t Alignment) -{ - // aligned_alloc() states that size must be a multiple of alignment. Some - // platforms return null if this requirement isn't met. - Size = (Size + Alignment - 1) & ~(Alignment - 1); - -#if ZEN_USE_MIMALLOC - return mi_aligned_alloc(Alignment, Size); -#elif ZEN_PLATFORM_WINDOWS - return _aligned_malloc(Size, Alignment); -#else - return std::aligned_alloc(Alignment, Size); -#endif -} - -void -AlignedFreeImpl(void* ptr) -{ - if (ptr == nullptr) - return; - -#if ZEN_USE_MIMALLOC - return mi_free(ptr); -#elif ZEN_PLATFORM_WINDOWS - _aligned_free(ptr); -#else - std::free(ptr); -#endif -} - -////////////////////////////////////////////////////////////////////////// - -void* -Memory::Alloc(size_t Size, size_t Alignment) -{ - return AlignedAllocImpl(Size, Alignment); -} - -void -Memory::Free(void* ptr) -{ - AlignedFreeImpl(ptr); -} - -////////////////////////////////////////////////////////////////////////// // // Unit tests // diff --git a/src/zencore/memory/fmalloc.cpp b/src/zencore/memory/fmalloc.cpp new file mode 100644 index 000000000..3e96003f5 --- /dev/null +++ b/src/zencore/memory/fmalloc.cpp @@ -0,0 +1,156 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <string.h> +#include <zencore/memory/fmalloc.h> +#include <zencore/memory/memory.h> + +namespace zen { + +////////////////////////////////////////////////////////////////////////// + +class FInitialMalloc : public FMalloc +{ + virtual void* Malloc(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT) override + { + Memory::Initialize(); + return GMalloc->Malloc(Count, Alignment); + } + virtual void* TryMalloc(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT) override + { + Memory::Initialize(); + return GMalloc->TryMalloc(Count, Alignment); + } + virtual void* Realloc(void* Original, size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT) override + { + Memory::Initialize(); + return GMalloc->Realloc(Original, Count, Alignment); + } + virtual void* TryRealloc(void* Original, size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT) override + { + Memory::Initialize(); + return GMalloc->TryRealloc(Original, Count, Alignment); + } + virtual void Free(void* Original) override + { + Memory::Initialize(); + return GMalloc->Free(Original); + } + virtual void* MallocZeroed(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT) override + { + Memory::Initialize(); + return GMalloc->MallocZeroed(Count, Alignment); + } + + virtual void* TryMallocZeroed(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT) override + { + Memory::Initialize(); + return GMalloc->TryMallocZeroed(Count, Alignment); + } + virtual size_t QuantizeSize(size_t Count, uint32_t Alignment) override + { + Memory::Initialize(); + return GMalloc->QuantizeSize(Count, Alignment); + } + virtual bool GetAllocationSize(void* Original, size_t& SizeOut) override + { + Memory::Initialize(); + return GMalloc->GetAllocationSize(Original, SizeOut); + } + virtual void OnMallocInitialized() override {} + virtual void Trim(bool bTrimThreadCaches) override { ZEN_UNUSED(bTrimThreadCaches); } +} GInitialMalloc; + +FMalloc* GMalloc = &GInitialMalloc; /* Memory allocator */ + +////////////////////////////////////////////////////////////////////////// + +void* +FUseSystemMallocForNew::operator new(size_t Size) +{ + return Memory::SystemMalloc(Size); +} + +void +FUseSystemMallocForNew::operator delete(void* Ptr) +{ + Memory::SystemFree(Ptr); +} + +void* +FUseSystemMallocForNew::operator new[](size_t Size) +{ + return Memory::SystemMalloc(Size); +} + +void +FUseSystemMallocForNew::operator delete[](void* Ptr) +{ + Memory::SystemFree(Ptr); +} + +////////////////////////////////////////////////////////////////////////// + +void* +FMalloc::TryRealloc(void* Original, size_t Count, uint32_t Alignment) +{ + return Realloc(Original, Count, Alignment); +} + +void* +FMalloc::TryMalloc(size_t Count, uint32_t Alignment) +{ + return Malloc(Count, Alignment); +} + +void* +FMalloc::TryMallocZeroed(size_t Count, uint32_t Alignment) +{ + return MallocZeroed(Count, Alignment); +} + +void* +FMalloc::MallocZeroed(size_t Count, uint32_t Alignment) +{ + void* const Memory = Malloc(Count, Alignment); + + if (Memory) + { + ::memset(Memory, 0, Count); + } + + return Memory; +} + +void +FMalloc::OutOfMemory(size_t Size, uint32_t Alignment) +{ + ZEN_UNUSED(Size, Alignment); + // no-op by default +} + +void +FMalloc::Trim(bool bTrimThreadCaches) +{ + ZEN_UNUSED(bTrimThreadCaches); +} + +void +FMalloc::OnMallocInitialized() +{ +} + +bool +FMalloc::GetAllocationSize(void* Original, size_t& SizeOut) +{ + ZEN_UNUSED(Original, SizeOut); + return false; // Generic implementation has no way of determining this +} + +size_t +FMalloc::QuantizeSize(size_t Count, uint32_t Alignment) +{ + ZEN_UNUSED(Alignment); + return Count; // Generic implementation has no way of determining this +} + +} // namespace zen diff --git a/src/zencore/memory/mallocansi.cpp b/src/zencore/memory/mallocansi.cpp new file mode 100644 index 000000000..9c3936172 --- /dev/null +++ b/src/zencore/memory/mallocansi.cpp @@ -0,0 +1,251 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zencore/memory/mallocansi.h> + +#include <zencore/intmath.h> +#include <zencore/memory/align.h> +#include <zencore/windows.h> + +#if ZEN_PLATFORM_LINUX +# define PLATFORM_USE_ANSI_POSIX_MALLOC 1 +#endif + +#if ZEN_PLATFORM_MAC +# define PLATFORM_USE_CUSTOM_MEMALIGN 1 +#endif + +#ifndef PLATFORM_USE_ANSI_MEMALIGN +# define PLATFORM_USE_ANSI_MEMALIGN 0 +#endif + +#ifndef PLATFORM_USE_ANSI_POSIX_MALLOC +# define PLATFORM_USE_ANSI_POSIX_MALLOC 0 +#endif + +#ifndef PLATFORM_USE_CUSTOM_MEMALIGN +# define PLATFORM_USE_CUSTOM_MEMALIGN 0 +#endif + +#if PLATFORM_USE_ANSI_POSIX_MALLOC +# include <malloc.h> +# include <string.h> +#endif + +#define MALLOC_ANSI_USES__ALIGNED_MALLOC ZEN_PLATFORM_WINDOWS + +namespace zen { + +////////////////////////////////////////////////////////////////////////// + +void* +AnsiMalloc(size_t Size, uint32_t Alignment) +{ +#if MALLOC_ANSI_USES__ALIGNED_MALLOC + void* Result = _aligned_malloc(Size, Alignment); +#elif PLATFORM_USE_ANSI_POSIX_MALLOC + void* Result; + if (posix_memalign(&Result, Alignment, Size) != 0) + { + Result = nullptr; + } +#elif PLATFORM_USE_ANSI_MEMALIGN + Result = reallocalign(Ptr, NewSize, Alignment); +#elif PLATFORM_USE_CUSTOM_MEMALIGN + void* Ptr = malloc(Size + Alignment + sizeof(void*) + sizeof(size_t)); + void* Result = nullptr; + if (Ptr) + { + Result = Align((uint8_t*)Ptr + sizeof(void*) + sizeof(size_t), Alignment); + *((void**)((uint8_t*)Result - sizeof(void*))) = Ptr; + *((size_t*)((uint8_t*)Result - sizeof(void*) - sizeof(size_t))) = Size; + } +#else +# error Unknown allocation path +#endif + + return Result; +} + +size_t +AnsiGetAllocationSize(void* Original) +{ +#if MALLOC_ANSI_USES__ALIGNED_MALLOC + return _aligned_msize(Original, 16, 0); // TODO: incorrectly assumes alignment of 16 +#elif PLATFORM_USE_ANSI_POSIX_MALLOC || PLATFORM_USE_ANSI_MEMALIGN + return malloc_usable_size(Original); +#elif PLATFORM_USE_CUSTOM_MEMALIGN + return *((size_t*)((uint8_t*)Original - sizeof(void*) - sizeof(size_t))); +#else +# error Unknown allocation path +#endif +} + +void* +AnsiRealloc(void* Ptr, size_t NewSize, uint32_t Alignment) +{ + void* Result = nullptr; + +#if MALLOC_ANSI_USES__ALIGNED_MALLOC + if (Ptr && NewSize) + { + Result = _aligned_realloc(Ptr, NewSize, Alignment); + } + else if (Ptr == nullptr) + { + Result = _aligned_malloc(NewSize, Alignment); + } + else + { + _aligned_free(Ptr); + Result = nullptr; + } +#elif PLATFORM_USE_ANSI_POSIX_MALLOC + if (Ptr && NewSize) + { + size_t UsableSize = malloc_usable_size(Ptr); + if (posix_memalign(&Result, Alignment, NewSize) != 0) + { + Result = nullptr; + } + else if (UsableSize) + { + memcpy(Result, Ptr, Min(NewSize, UsableSize)); + } + free(Ptr); + } + else if (Ptr == nullptr) + { + if (posix_memalign(&Result, Alignment, NewSize) != 0) + { + Result = nullptr; + } + } + else + { + free(Ptr); + Result = nullptr; + } +#elif PLATFORM_USE_CUSTOM_MEMALIGN + if (Ptr && NewSize) + { + // Can't use realloc as it might screw with alignment. + Result = AnsiMalloc(NewSize, Alignment); + size_t PtrSize = AnsiGetAllocationSize(Ptr); + memcpy(Result, Ptr, Min(NewSize, PtrSize)); + AnsiFree(Ptr); + } + else if (Ptr == nullptr) + { + Result = AnsiMalloc(NewSize, Alignment); + } + else + { + free(*((void**)((uint8_t*)Ptr - sizeof(void*)))); + Result = nullptr; + } +#else +# error Unknown allocation path +#endif + + return Result; +} + +void +AnsiFree(void* Ptr) +{ +#if MALLOC_ANSI_USES__ALIGNED_MALLOC + _aligned_free(Ptr); +#elif PLATFORM_USE_ANSI_POSIX_MALLOC || PLATFORM_USE_ANSI_MEMALIGN + free(Ptr); +#elif PLATFORM_USE_CUSTOM_MEMALIGN + if (Ptr) + { + free(*((void**)((uint8_t*)Ptr - sizeof(void*)))); + } +#else +# error Unknown allocation path +#endif +} + +////////////////////////////////////////////////////////////////////////// + +FMallocAnsi::FMallocAnsi() +{ +#if ZEN_PLATFORM_WINDOWS + // Enable low fragmentation heap - http://msdn2.microsoft.com/en-US/library/aa366750.aspx + intptr_t CrtHeapHandle = _get_heap_handle(); + ULONG EnableLFH = 2; + HeapSetInformation((void*)CrtHeapHandle, HeapCompatibilityInformation, &EnableLFH, sizeof(EnableLFH)); +#endif +} + +void* +FMallocAnsi::TryMalloc(size_t Size, uint32_t Alignment) +{ + Alignment = Max(Size >= 16 ? (uint32_t)16 : (uint32_t)8, Alignment); + + void* Result = AnsiMalloc(Size, Alignment); + + return Result; +} + +void* +FMallocAnsi::Malloc(size_t Size, uint32_t Alignment) +{ + void* Result = TryMalloc(Size, Alignment); + + if (Result == nullptr && Size) + { + OutOfMemory(Size, Alignment); + } + + return Result; +} + +void* +FMallocAnsi::TryRealloc(void* Ptr, size_t NewSize, uint32_t Alignment) +{ + Alignment = Max(NewSize >= 16 ? (uint32_t)16 : (uint32_t)8, Alignment); + + void* Result = AnsiRealloc(Ptr, NewSize, Alignment); + + return Result; +} + +void* +FMallocAnsi::Realloc(void* Ptr, size_t NewSize, uint32_t Alignment) +{ + void* Result = TryRealloc(Ptr, NewSize, Alignment); + + if (Result == nullptr && NewSize != 0) + { + OutOfMemory(NewSize, Alignment); + } + + return Result; +} + +void +FMallocAnsi::Free(void* Ptr) +{ + AnsiFree(Ptr); +} + +bool +FMallocAnsi::GetAllocationSize(void* Original, size_t& SizeOut) +{ + if (!Original) + { + return false; + } + +#if MALLOC_ANSI_USES__ALIGNED_MALLOC + ZEN_UNUSED(SizeOut); + return false; +#else + SizeOut = AnsiGetAllocationSize(Original); + return true; +#endif +} + +} // namespace zen diff --git a/src/zencore/memory/mallocmimalloc.cpp b/src/zencore/memory/mallocmimalloc.cpp new file mode 100644 index 000000000..1919af3bf --- /dev/null +++ b/src/zencore/memory/mallocmimalloc.cpp @@ -0,0 +1,197 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zencore/intmath.h> +#include <zencore/memory/align.h> +#include <zencore/memory/mallocmimalloc.h> + +#if ZEN_MIMALLOC_ENABLED + +# include <mimalloc.h> + +/** Value we fill a memory block with after it is free, in UE_BUILD_DEBUG **/ +# define DEBUG_FILL_FREED (0xdd) + +/** Value we fill a new memory block with, in UE_BUILD_DEBUG **/ +# define DEBUG_FILL_NEW (0xcd) + +# define ZEN_ENABLE_DEBUG_FILL 1 + +namespace zen { + +// Dramatically reduce memory zeroing and page faults during alloc intense workloads +// by keeping freed pages for a little while instead of releasing them +// right away to the OS, effectively acting like a scratch buffer +// until pages are both freed and inactive for the delay specified +// in milliseconds. +int32_t GMiMallocMemoryResetDelay = 10000; + +FMallocMimalloc::FMallocMimalloc() +{ + mi_option_set(mi_option_reset_delay, GMiMallocMemoryResetDelay); +} + +void* +FMallocMimalloc::TryMalloc(size_t Size, uint32_t Alignment) +{ + void* NewPtr = nullptr; + + if (Alignment != DEFAULT_ALIGNMENT) + { + Alignment = Max(uint32_t(Size >= 16 ? 16 : 8), Alignment); + NewPtr = mi_malloc_aligned(Size, Alignment); + } + else + { + NewPtr = mi_malloc_aligned(Size, uint32_t(Size >= 16 ? 16 : 8)); + } + +# if ZEN_BUILD_DEBUG && ZEN_ENABLE_DEBUG_FILL + if (Size && NewPtr != nullptr) + { + memset(NewPtr, DEBUG_FILL_NEW, mi_usable_size(NewPtr)); + } +# endif + + return NewPtr; +} + +void* +FMallocMimalloc::Malloc(size_t Size, uint32_t Alignment) +{ + void* Result = TryMalloc(Size, Alignment); + + if (Result == nullptr && Size) + { + OutOfMemory(Size, Alignment); + } + + return Result; +} + +void* +FMallocMimalloc::TryRealloc(void* Ptr, size_t NewSize, uint32_t Alignment) +{ +# if ZEN_BUILD_DEBUG && ZEN_ENABLE_DEBUG_FILL + size_t OldSize = 0; + if (Ptr) + { + OldSize = mi_malloc_size(Ptr); + if (NewSize < OldSize) + { + memset((uint8_t*)Ptr + NewSize, DEBUG_FILL_FREED, OldSize - NewSize); + } + } +# endif + void* NewPtr = nullptr; + + if (NewSize == 0) + { + mi_free(Ptr); + + return nullptr; + } + +# if ZEN_PLATFORM_MAC + // macOS expects all allocations to be aligned to 16 bytes, so on Mac we always have to use mi_realloc_aligned + Alignment = AlignArbitrary(Max((uint32_t)16, Alignment), (uint32_t)16); + NewPtr = mi_realloc_aligned(Ptr, NewSize, Alignment); +# else + if (Alignment != DEFAULT_ALIGNMENT) + { + Alignment = Max(NewSize >= 16 ? (uint32_t)16 : (uint32_t)8, Alignment); + NewPtr = mi_realloc_aligned(Ptr, NewSize, Alignment); + } + else + { + NewPtr = mi_realloc(Ptr, NewSize); + } +# endif + +# if ZEN_BUILD_DEBUG && ZEN_ENABLE_DEBUG_FILL + if (NewPtr && NewSize > OldSize) + { + memset((uint8_t*)NewPtr + OldSize, DEBUG_FILL_NEW, mi_usable_size(NewPtr) - OldSize); + } +# endif + + return NewPtr; +} + +void* +FMallocMimalloc::Realloc(void* Ptr, size_t NewSize, uint32_t Alignment) +{ + void* Result = TryRealloc(Ptr, NewSize, Alignment); + + if (Result == nullptr && NewSize) + { + OutOfMemory(NewSize, Alignment); + } + + return Result; +} + +void +FMallocMimalloc::Free(void* Ptr) +{ + if (!Ptr) + { + return; + } + +# if ZEN_BUILD_DEBUG && ZEN_ENABLE_DEBUG_FILL + memset(Ptr, DEBUG_FILL_FREED, mi_usable_size(Ptr)); +# endif + + mi_free(Ptr); +} + +void* +FMallocMimalloc::MallocZeroed(size_t Size, uint32_t Alignment) +{ + void* Result = TryMallocZeroed(Size, Alignment); + + if (Result == nullptr && Size) + { + OutOfMemory(Size, Alignment); + } + + return Result; +} + +void* +FMallocMimalloc::TryMallocZeroed(size_t Size, uint32_t Alignment) +{ + void* NewPtr = nullptr; + + if (Alignment != DEFAULT_ALIGNMENT) + { + Alignment = Max(uint32_t(Size >= 16 ? 16 : 8), Alignment); + NewPtr = mi_zalloc_aligned(Size, Alignment); + } + else + { + NewPtr = mi_zalloc_aligned(Size, uint32_t(Size >= 16 ? 16 : 8)); + } + + return NewPtr; +} + +bool +FMallocMimalloc::GetAllocationSize(void* Original, size_t& SizeOut) +{ + SizeOut = mi_malloc_size(Original); + return true; +} + +void +FMallocMimalloc::Trim(bool bTrimThreadCaches) +{ + mi_collect(bTrimThreadCaches); +} + +# undef DEBUG_FILL_FREED +# undef DEBUG_FILL_NEW + +} // namespace zen + +#endif // MIMALLOC_ENABLED diff --git a/src/zencore/memory/mallocrpmalloc.cpp b/src/zencore/memory/mallocrpmalloc.cpp new file mode 100644 index 000000000..ffced27c9 --- /dev/null +++ b/src/zencore/memory/mallocrpmalloc.cpp @@ -0,0 +1,189 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zencore/intmath.h> +#include <zencore/memory/align.h> +#include <zencore/memory/mallocrpmalloc.h> + +#if ZEN_RPMALLOC_ENABLED + +# include "rpmalloc.h" + +/** Value we fill a memory block with after it is free, in UE_BUILD_DEBUG **/ +# define DEBUG_FILL_FREED (0xdd) + +/** Value we fill a new memory block with, in UE_BUILD_DEBUG **/ +# define DEBUG_FILL_NEW (0xcd) + +# define ZEN_ENABLE_DEBUG_FILL 1 + +namespace zen { + +FMallocRpmalloc::FMallocRpmalloc() +{ + rpmalloc_initialize(nullptr); +} + +FMallocRpmalloc::~FMallocRpmalloc() +{ + rpmalloc_finalize(); +} + +void* +FMallocRpmalloc::TryMalloc(size_t Size, uint32_t Alignment) +{ + void* NewPtr = nullptr; + + if (Alignment != DEFAULT_ALIGNMENT) + { + Alignment = Max(uint32_t(Size >= 16 ? 16 : 8), Alignment); + NewPtr = rpaligned_alloc(Alignment, Size); + } + else + { + NewPtr = rpaligned_alloc(uint32_t(Size >= 16 ? 16 : 8), Size); + } + +# if ZEN_BUILD_DEBUG && ZEN_ENABLE_DEBUG_FILL + if (Size && NewPtr != nullptr) + { + memset(NewPtr, DEBUG_FILL_NEW, rpmalloc_usable_size(NewPtr)); + } +# endif + + return NewPtr; +} + +void* +FMallocRpmalloc::Malloc(size_t Size, uint32_t Alignment) +{ + void* Result = TryMalloc(Size, Alignment); + + if (Result == nullptr && Size) + { + OutOfMemory(Size, Alignment); + } + + return Result; +} + +void* +FMallocRpmalloc::Realloc(void* Ptr, size_t NewSize, uint32_t Alignment) +{ + void* Result = TryRealloc(Ptr, NewSize, Alignment); + + if (Result == nullptr && NewSize) + { + OutOfMemory(NewSize, Alignment); + } + + return Result; +} + +void* +FMallocRpmalloc::TryRealloc(void* Ptr, size_t NewSize, uint32_t Alignment) +{ +# if ZEN_BUILD_DEBUG && ZEN_ENABLE_DEBUG_FILL + size_t OldSize = 0; + if (Ptr) + { + OldSize = rpmalloc_usable_size(Ptr); + if (NewSize < OldSize) + { + memset((uint8_t*)Ptr + NewSize, DEBUG_FILL_FREED, OldSize - NewSize); + } + } +# endif + void* NewPtr = nullptr; + + if (NewSize == 0) + { + rpfree(Ptr); + + return nullptr; + } + +# if ZEN_PLATFORM_MAC + // macOS expects all allocations to be aligned to 16 bytes, so on Mac we always have to use mi_realloc_aligned + Alignment = AlignArbitrary(Max((uint32_t)16, Alignment), (uint32_t)16); + NewPtr = rpaligned_realloc(Ptr, Alignment, NewSize, /* OldSize */ 0, /* flags */ 0); +# else + if (Alignment != DEFAULT_ALIGNMENT) + { + Alignment = Max(NewSize >= 16 ? (uint32_t)16 : (uint32_t)8, Alignment); + NewPtr = rpaligned_realloc(Ptr, Alignment, NewSize, /* OldSize */ 0, /* flags */ 0); + } + else + { + NewPtr = rprealloc(Ptr, NewSize); + } +# endif + +# if ZEN_BUILD_DEBUG && ZEN_ENABLE_DEBUG_FILL + if (NewPtr && NewSize > OldSize) + { + memset((uint8_t*)NewPtr + OldSize, DEBUG_FILL_NEW, rpmalloc_usable_size(NewPtr) - OldSize); + } +# endif + + return NewPtr; +} + +void +FMallocRpmalloc::Free(void* Ptr) +{ + if (!Ptr) + { + return; + } + +# if ZEN_BUILD_DEBUG && ZEN_ENABLE_DEBUG_FILL + memset(Ptr, DEBUG_FILL_FREED, rpmalloc_usable_size(Ptr)); +# endif + + rpfree(Ptr); +} + +void* +FMallocRpmalloc::MallocZeroed(size_t Size, uint32_t Alignment) +{ + void* Result = TryMallocZeroed(Size, Alignment); + + if (Result == nullptr && Size) + { + OutOfMemory(Size, Alignment); + } + + return Result; +} +void* +FMallocRpmalloc::TryMallocZeroed(size_t Size, uint32_t Alignment) +{ + void* NewPtr = nullptr; + + if (Alignment != DEFAULT_ALIGNMENT) + { + Alignment = Max(uint32_t(Size >= 16 ? 16 : 8), Alignment); + NewPtr = rpaligned_zalloc(Alignment, Size); + } + else + { + NewPtr = rpaligned_zalloc(uint32_t(Size >= 16 ? 16 : 8), Size); + } + + return NewPtr; +} + +bool +FMallocRpmalloc::GetAllocationSize(void* Original, size_t& SizeOut) +{ + // this is not the same as the allocation size - is that ok? + SizeOut = rpmalloc_usable_size(Original); + return true; +} +void +FMallocRpmalloc::Trim(bool bTrimThreadCaches) +{ + ZEN_UNUSED(bTrimThreadCaches); +} +} // namespace zen +#endif diff --git a/src/zencore/memory/mallocstomp.cpp b/src/zencore/memory/mallocstomp.cpp new file mode 100644 index 000000000..db9e1535e --- /dev/null +++ b/src/zencore/memory/mallocstomp.cpp @@ -0,0 +1,283 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zencore/memory/mallocstomp.h> + +#if ZEN_WITH_MALLOC_STOMP + +# include <zencore/memory/align.h> +# include <zencore/xxhash.h> + +# if ZEN_PLATFORM_LINUX +# include <sys/mman.h> +# endif + +# if ZEN_PLATFORM_WINDOWS +# include <zencore/windows.h> +# endif + +# if ZEN_PLATFORM_WINDOWS +// MallocStomp can keep virtual address range reserved after memory block is freed, while releasing the physical memory. +// This dramatically increases accuracy of use-after-free detection, but consumes significant amount of memory for the OS page table. +// Virtual memory limit for a process on Win10 is 128 TB, which means we can afford to keep virtual memory reserved for a very long time. +// Running Infiltrator demo consumes ~700MB of virtual address space per second. +# define MALLOC_STOMP_KEEP_VIRTUAL_MEMORY 1 +# else +# define MALLOC_STOMP_KEEP_VIRTUAL_MEMORY 0 +# endif + +// 64-bit ABIs on x86_64 expect a 16-byte alignment +# define STOMPALIGNMENT 16U + +namespace zen { + +struct FMallocStomp::FAllocationData +{ + /** Pointer to the full allocation. Needed so the OS knows what to free. */ + void* FullAllocationPointer; + /** Full size of the allocation including the extra page. */ + size_t FullSize; + /** Size of the allocation requested. */ + size_t Size; + /** Sentinel used to check for underrun. */ + size_t Sentinel; + + /** Calculate the expected sentinel value for this allocation data. */ + size_t CalculateSentinel() const + { + XXH3_128 Xxh = XXH3_128::HashMemory(this, offsetof(FAllocationData, Sentinel)); + + size_t Hash; + memcpy(&Hash, Xxh.Hash, sizeof(Hash)); + + return Hash; + } +}; + +FMallocStomp::FMallocStomp(const bool InUseUnderrunMode) : PageSize(4096 /* TODO: make dynamic */), bUseUnderrunMode(InUseUnderrunMode) +{ +} + +void* +FMallocStomp::Malloc(size_t Size, uint32_t Alignment) +{ + void* Result = TryMalloc(Size, Alignment); + + if (Result == nullptr) + { + OutOfMemory(Size, Alignment); + } + + return Result; +} + +void* +FMallocStomp::TryMalloc(size_t Size, uint32_t Alignment) +{ + if (Size == 0U) + { + Size = 1U; + } + + Alignment = Max<uint32_t>(Alignment, STOMPALIGNMENT); + + constexpr static size_t AllocationDataSize = sizeof(FAllocationData); + + const size_t AlignedSize = Alignment ? ((Size + Alignment - 1) & -(int32_t)Alignment) : Size; + const size_t AlignmentSize = Alignment > PageSize ? Alignment - PageSize : 0; + const size_t AllocFullPageSize = (AlignedSize + AlignmentSize + AllocationDataSize + PageSize - 1) & ~(PageSize - 1); + const size_t TotalAllocationSize = AllocFullPageSize + PageSize; + +# if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + void* FullAllocationPointer = mmap(nullptr, TotalAllocationSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); +# elif ZEN_PLATFORM_WINDOWS && MALLOC_STOMP_KEEP_VIRTUAL_MEMORY + // Allocate virtual address space from current block using linear allocation strategy. + // If there is not enough space, try to allocate new block from OS. Report OOM if block allocation fails. + void* FullAllocationPointer = nullptr; + + { + RwLock::ExclusiveLockScope _(Lock); + + if (VirtualAddressCursor + TotalAllocationSize <= VirtualAddressMax) + { + FullAllocationPointer = (void*)(VirtualAddressCursor); + } + else + { + const size_t ReserveSize = Max(VirtualAddressBlockSize, TotalAllocationSize); + + // Reserve a new block of virtual address space that will be linearly sub-allocated + // We intentionally don't keep track of reserved blocks, as we never need to explicitly release them. + FullAllocationPointer = VirtualAlloc(nullptr, ReserveSize, MEM_RESERVE, PAGE_NOACCESS); + + VirtualAddressCursor = uintptr_t(FullAllocationPointer); + VirtualAddressMax = VirtualAddressCursor + ReserveSize; + } + + VirtualAddressCursor += TotalAllocationSize; + } +# else + void* FullAllocationPointer = FPlatformMemory::BinnedAllocFromOS(TotalAllocationSize); +# endif // PLATFORM_UNIX || PLATFORM_MAC + + if (!FullAllocationPointer) + { + return nullptr; + } + + void* ReturnedPointer = nullptr; + + ZEN_ASSERT_SLOW(IsAligned(FullAllocationPointer, PageSize)); + + if (bUseUnderrunMode) + { + ReturnedPointer = Align((uint8_t*)FullAllocationPointer + PageSize + AllocationDataSize, Alignment); + void* AllocDataPointerStart = static_cast<FAllocationData*>(ReturnedPointer) - 1; + ZEN_ASSERT_SLOW(AllocDataPointerStart >= FullAllocationPointer); + +# if ZEN_PLATFORM_WINDOWS && MALLOC_STOMP_KEEP_VIRTUAL_MEMORY + // Commit physical pages to the used range, leaving the first page unmapped. + void* CommittedMemory = VirtualAlloc(AllocDataPointerStart, AllocationDataSize + AlignedSize, MEM_COMMIT, PAGE_READWRITE); + if (!CommittedMemory) + { + // Failed to allocate and commit physical memory pages. + return nullptr; + } + ZEN_ASSERT(CommittedMemory == AlignDown(AllocDataPointerStart, PageSize)); +# else + // Page protect the first page, this will cause the exception in case there is an underrun. + FPlatformMemory::PageProtect((uint8*)AlignDown(AllocDataPointerStart, PageSize) - PageSize, PageSize, false, false); +# endif + } //-V773 + else + { + ReturnedPointer = AlignDown((uint8_t*)FullAllocationPointer + AllocFullPageSize - AlignedSize, Alignment); + void* ReturnedPointerEnd = (uint8_t*)ReturnedPointer + AlignedSize; + ZEN_ASSERT_SLOW(IsAligned(ReturnedPointerEnd, PageSize)); + + void* AllocDataPointerStart = static_cast<FAllocationData*>(ReturnedPointer) - 1; + ZEN_ASSERT_SLOW(AllocDataPointerStart >= FullAllocationPointer); + +# if ZEN_PLATFORM_WINDOWS && MALLOC_STOMP_KEEP_VIRTUAL_MEMORY + // Commit physical pages to the used range, leaving the last page unmapped. + void* CommitPointerStart = AlignDown(AllocDataPointerStart, PageSize); + void* CommittedMemory = VirtualAlloc(CommitPointerStart, + size_t((uint8_t*)ReturnedPointerEnd - (uint8_t*)CommitPointerStart), + MEM_COMMIT, + PAGE_READWRITE); + if (!CommittedMemory) + { + // Failed to allocate and commit physical memory pages. + return nullptr; + } + ZEN_ASSERT(CommittedMemory == CommitPointerStart); +# else + // Page protect the last page, this will cause the exception in case there is an overrun. + FPlatformMemory::PageProtect(ReturnedPointerEnd, PageSize, false, false); +# endif + } //-V773 + + ZEN_ASSERT_SLOW(IsAligned(FullAllocationPointer, PageSize)); + ZEN_ASSERT_SLOW(IsAligned(TotalAllocationSize, PageSize)); + ZEN_ASSERT_SLOW(IsAligned(ReturnedPointer, Alignment)); + ZEN_ASSERT_SLOW((uint8_t*)ReturnedPointer + AlignedSize <= (uint8_t*)FullAllocationPointer + TotalAllocationSize); + + FAllocationData& AllocationData = static_cast<FAllocationData*>(ReturnedPointer)[-1]; + AllocationData = {FullAllocationPointer, TotalAllocationSize, AlignedSize, 0}; + AllocationData.Sentinel = AllocationData.CalculateSentinel(); + + return ReturnedPointer; +} + +void* +FMallocStomp::Realloc(void* InPtr, size_t NewSize, uint32_t Alignment) +{ + void* Result = TryRealloc(InPtr, NewSize, Alignment); + + if (Result == nullptr && NewSize) + { + OutOfMemory(NewSize, Alignment); + } + + return Result; +} + +void* +FMallocStomp::TryRealloc(void* InPtr, size_t NewSize, uint32_t Alignment) +{ + if (NewSize == 0U) + { + Free(InPtr); + return nullptr; + } + + void* ReturnPtr = nullptr; + + if (InPtr != nullptr) + { + ReturnPtr = TryMalloc(NewSize, Alignment); + + if (ReturnPtr != nullptr) + { + FAllocationData* AllocDataPtr = reinterpret_cast<FAllocationData*>(reinterpret_cast<uint8_t*>(InPtr) - sizeof(FAllocationData)); + memcpy(ReturnPtr, InPtr, Min(AllocDataPtr->Size, NewSize)); + Free(InPtr); + } + } + else + { + ReturnPtr = TryMalloc(NewSize, Alignment); + } + + return ReturnPtr; +} + +void +FMallocStomp::Free(void* InPtr) +{ + if (InPtr == nullptr) + { + return; + } + + FAllocationData* AllocDataPtr = reinterpret_cast<FAllocationData*>(InPtr); + AllocDataPtr--; + + // Check the sentinel to verify that the allocation data is intact. + if (AllocDataPtr->Sentinel != AllocDataPtr->CalculateSentinel()) + { + // There was a memory underrun related to this allocation. + ZEN_DEBUG_BREAK(); + } + +# if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + munmap(AllocDataPtr->FullAllocationPointer, AllocDataPtr->FullSize); +# elif ZEN_PLATFORM_WINDOWS && MALLOC_STOMP_KEEP_VIRTUAL_MEMORY + // Unmap physical memory, but keep virtual address range reserved to catch use-after-free errors. + + VirtualFree(AllocDataPtr->FullAllocationPointer, AllocDataPtr->FullSize, MEM_DECOMMIT); + +# else + FPlatformMemory::BinnedFreeToOS(AllocDataPtr->FullAllocationPointer, AllocDataPtr->FullSize); +# endif // PLATFORM_UNIX || PLATFORM_MAC +} + +bool +FMallocStomp::GetAllocationSize(void* Original, size_t& SizeOut) +{ + if (Original == nullptr) + { + SizeOut = 0U; + } + else + { + FAllocationData* AllocDataPtr = reinterpret_cast<FAllocationData*>(Original); + AllocDataPtr--; + SizeOut = AllocDataPtr->Size; + } + + return true; +} + +} // namespace zen + +#endif // WITH_MALLOC_STOMP diff --git a/src/zencore/memory/memory.cpp b/src/zencore/memory/memory.cpp new file mode 100644 index 000000000..f236796ad --- /dev/null +++ b/src/zencore/memory/memory.cpp @@ -0,0 +1,281 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zencore/commandline.h> +#include <zencore/memory/fmalloc.h> +#include <zencore/memory/mallocansi.h> +#include <zencore/memory/mallocmimalloc.h> +#include <zencore/memory/mallocrpmalloc.h> +#include <zencore/memory/mallocstomp.h> +#include <zencore/memory/memory.h> +#include <zencore/memory/memorytrace.h> +#include <zencore/string.h> + +#if ZEN_PLATFORM_WINDOWS +# include <zencore/windows.h> +ZEN_THIRD_PARTY_INCLUDES_START +# include <shellapi.h> // For command line parsing +ZEN_THIRD_PARTY_INCLUDES_END +#endif + +#if ZEN_PLATFORM_LINUX +# include <stdio.h> +#endif + +namespace zen { + +enum class MallocImpl +{ + None = 0, + Ansi, + Stomp, + Mimalloc, + Rpmalloc +}; + +static int +InitGMalloc() +{ + MallocImpl Malloc = MallocImpl::None; + FMalloc* InitMalloc = GMalloc; + + // Pick a default base allocator based on availability/platform + +#if ZEN_MIMALLOC_ENABLED + if (Malloc == MallocImpl::None) + { + Malloc = MallocImpl::Mimalloc; + } +#endif + +#if ZEN_RPMALLOC_ENABLED + if (Malloc == MallocImpl::None) + { + Malloc = MallocImpl::Rpmalloc; + } +#endif + + // Process any command line overrides + // + // Note that calls can come into this function before we enter the regular main function + // and we can therefore not rely on the regular command line parsing for the application + + using namespace std::literals; + + auto ProcessMallocArg = [&](const std::string_view& Arg) { +#if ZEN_RPMALLOC_ENABLED + if (Arg == "rpmalloc"sv) + { + Malloc = MallocImpl::Rpmalloc; + } +#endif + +#if ZEN_MIMALLOC_ENABLED + if (Arg == "mimalloc"sv) + { + Malloc = MallocImpl::Mimalloc; + } +#endif + + if (Arg == "ansi"sv) + { + Malloc = MallocImpl::Ansi; + } + + if (Arg == "stomp"sv) + { + Malloc = MallocImpl::Stomp; + } + }; + + constexpr std::string_view MallocOption = "--malloc="sv; + + std::function<void(const std::string_view&)> ProcessArg = [&](const std::string_view& Arg) { + if (Arg.starts_with(MallocOption)) + { + const std::string_view OptionArgs = Arg.substr(MallocOption.size()); + + IterateCommaSeparatedValue(OptionArgs, ProcessMallocArg); + } + }; + + IterateCommandlineArgs(ProcessArg); + + switch (Malloc) + { +#if ZEN_WITH_MALLOC_STOMP + case MallocImpl::Stomp: + GMalloc = new FMallocStomp(); + break; +#endif + +#if ZEN_RPMALLOC_ENABLED + case MallocImpl::Rpmalloc: + GMalloc = new FMallocRpmalloc(); + break; +#endif + +#if ZEN_MIMALLOC_ENABLED + case MallocImpl::Mimalloc: + GMalloc = new FMallocMimalloc(); + break; +#endif + default: + break; + } + + if (GMalloc == InitMalloc) + { + GMalloc = new FMallocAnsi(); + } + + return 1; +} + +void +Memory::GCreateMalloc() +{ + static int InitFlag = InitGMalloc(); +} + +void +Memory::Initialize() +{ + GCreateMalloc(); +} + +////////////////////////////////////////////////////////////////////////// + +void* +Memory::SystemMalloc(size_t Size) +{ + void* Ptr = ::malloc(Size); + MemoryTrace_Alloc(uint64_t(Ptr), Size, 0, EMemoryTraceRootHeap::SystemMemory); + return Ptr; +} + +void +Memory::SystemFree(void* Ptr) +{ + MemoryTrace_Free(uint64_t(Ptr), EMemoryTraceRootHeap::SystemMemory); + ::free(Ptr); +} + +} // namespace zen + +////////////////////////////////////////////////////////////////////////// + +static ZEN_NOINLINE bool +InvokeNewHandler(bool NoThrow) +{ + std::new_handler h = std::get_new_handler(); + + if (!h) + { +#if defined(_CPPUNWIND) || defined(__cpp_exceptions) + if (NoThrow == false) + throw std::bad_alloc(); +#else + ZEN_UNUSED(NoThrow); +#endif + return false; + } + else + { + h(); + return true; + } +} + +////////////////////////////////////////////////////////////////////////// + +ZEN_NOINLINE void* +RetryNew(size_t Size, bool NoThrow) +{ + void* Ptr = nullptr; + while (!Ptr && InvokeNewHandler(NoThrow)) + { + Ptr = zen::Memory::Malloc(Size, zen::DEFAULT_ALIGNMENT); + } + return Ptr; +} + +void* +zen_new(size_t Size) +{ + void* Ptr = zen::Memory::Malloc(Size, zen::DEFAULT_ALIGNMENT); + + if (!Ptr) [[unlikely]] + { + const bool NoThrow = false; + return RetryNew(Size, NoThrow); + } + + return Ptr; +} + +void* +zen_new_nothrow(size_t Size) noexcept +{ + void* Ptr = zen::Memory::Malloc(Size, zen::DEFAULT_ALIGNMENT); + + if (!Ptr) [[unlikely]] + { + const bool NoThrow = true; + return RetryNew(Size, NoThrow); + } + + return Ptr; +} + +void* +zen_new_aligned(size_t Size, size_t Alignment) +{ + void* Ptr; + + do + { + Ptr = zen::Memory::Malloc(Size, uint32_t(Alignment)); + } while (!Ptr && InvokeNewHandler(/* NoThrow */ false)); + + return Ptr; +} + +void* +zen_new_aligned_nothrow(size_t Size, size_t Alignment) noexcept +{ + void* Ptr; + + do + { + Ptr = zen::Memory::Malloc(Size, uint32_t(Alignment)); + } while (!Ptr && InvokeNewHandler(/* NoThrow */ true)); + + return Ptr; +} + +void +zen_free(void* Ptr) noexcept +{ + zen::Memory::Free(Ptr); +} + +void +zen_free_size(void* Ptr, size_t Size) noexcept +{ + ZEN_UNUSED(Size); + zen::Memory::Free(Ptr); +} + +void +zen_free_size_aligned(void* Ptr, size_t Size, size_t Alignment) noexcept +{ + ZEN_UNUSED(Size, Alignment); + zen::Memory::Free(Ptr); +} + +void +zen_free_aligned(void* Ptr, size_t Alignment) noexcept +{ + ZEN_UNUSED(Alignment); + zen::Memory::Free(Ptr); +} diff --git a/src/zencore/memtrack/callstacktrace.cpp b/src/zencore/memtrack/callstacktrace.cpp new file mode 100644 index 000000000..d860c05d1 --- /dev/null +++ b/src/zencore/memtrack/callstacktrace.cpp @@ -0,0 +1,1059 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "callstacktrace.h" + +#include <zenbase/zenbase.h> +#include <zencore/string.h> + +#if UE_CALLSTACK_TRACE_ENABLED + +namespace zen { + +// Platform implementations of back tracing +//////////////////////////////////////////////////////////////////////////////// +void CallstackTrace_CreateInternal(FMalloc*); +void CallstackTrace_InitializeInternal(); + +//////////////////////////////////////////////////////////////////////////////// +UE_TRACE_CHANNEL_DEFINE(CallstackChannel) +UE_TRACE_EVENT_DEFINE(Memory, CallstackSpec) + +uint32 GCallStackTracingTlsSlotIndex = FPlatformTLS::InvalidTlsSlot; + +//////////////////////////////////////////////////////////////////////////////// +void +CallstackTrace_Create(class FMalloc* InMalloc) +{ + static auto InitOnce = [&] { + CallstackTrace_CreateInternal(InMalloc); + return true; + }(); +} + +//////////////////////////////////////////////////////////////////////////////// +void +CallstackTrace_Initialize() +{ + GCallStackTracingTlsSlotIndex = FPlatformTLS::AllocTlsSlot(); + + static auto InitOnce = [&] { + CallstackTrace_InitializeInternal(); + return true; + }(); +} + +} // namespace zen + +#endif + +#if ZEN_PLATFORM_WINDOWS +# include "moduletrace.h" + +# include "growonlylockfreehash.h" + +# include <zencore/scopeguard.h> +# include <zencore/thread.h> +# include <zencore/trace.h> + +# include <atomic> +# include <span> + +# include <zencore/windows.h> + +ZEN_THIRD_PARTY_INCLUDES_START +# include <winnt.h> +# include <winternl.h> +ZEN_THIRD_PARTY_INCLUDES_END + +# ifndef UE_CALLSTACK_TRACE_FULL_CALLSTACKS +# define UE_CALLSTACK_TRACE_FULL_CALLSTACKS 0 +# endif + +// 0=off, 1=stats, 2=validation, 3=truth_compare +# define BACKTRACE_DBGLVL 0 + +# define BACKTRACE_LOCK_FREE (1 && (BACKTRACE_DBGLVL == 0)) + +static bool GModulesAreInitialized = false; + +// This implementation is using unwind tables which is results in very fast +// stack walking. In some cases this is not suitable, and we then fall back +// to the standard stack walking implementation. +# if !defined(UE_CALLSTACK_TRACE_USE_UNWIND_TABLES) +# if defined(__clang__) +# define UE_CALLSTACK_TRACE_USE_UNWIND_TABLES 0 +# else +# define UE_CALLSTACK_TRACE_USE_UNWIND_TABLES 1 +# endif +# endif + +// stacktrace tracking using clang intrinsic __builtin_frame_address(0) doesn't work correctly on all windows platforms +# if !defined(PLATFORM_USE_CALLSTACK_ADDRESS_POINTER) +# if defined(__clang__) +# define PLATFORM_USE_CALLSTACK_ADDRESS_POINTER 0 +# else +# define PLATFORM_USE_CALLSTACK_ADDRESS_POINTER 1 +# endif +# endif + +# if !defined(UE_CALLSTACK_TRACE_RESERVE_MB) +// Initial size of the known set of callstacks +# define UE_CALLSTACK_TRACE_RESERVE_MB 8 // ~500k callstacks +# endif + +# if !defined(UE_CALLSTACK_TRACE_RESERVE_GROWABLE) +// If disabled the known set will not grow. New callstacks will not be +// reported if the set is full +# define UE_CALLSTACK_TRACE_RESERVE_GROWABLE 1 +# endif + +namespace zen { + +class FMalloc; + +UE_TRACE_CHANNEL_EXTERN(CallstackChannel) + +UE_TRACE_EVENT_BEGIN_EXTERN(Memory, CallstackSpec, NoSync) + UE_TRACE_EVENT_FIELD(uint32, CallstackId) + UE_TRACE_EVENT_FIELD(uint64[], Frames) +UE_TRACE_EVENT_END() + +class FCallstackTracer +{ +public: + struct FBacktraceEntry + { + uint64_t Hash = 0; + uint32_t FrameCount = 0; + uint64_t* Frames; + }; + + FCallstackTracer(FMalloc* InMalloc) : KnownSet(InMalloc) {} + + uint32_t AddCallstack(const FBacktraceEntry& Entry) + { + bool bAlreadyAdded = false; + + // Our set implementation doesn't allow for zero entries (zero represents an empty element + // in the hash table), so if we get one due to really bad luck in our 64-bit Id calculation, + // treat it as a "1" instead, for purposes of tracking if we've seen that callstack. + const uint64_t Hash = FMath::Max(Entry.Hash, 1ull); + uint32_t Id; + KnownSet.Find(Hash, &Id, &bAlreadyAdded); + if (!bAlreadyAdded) + { + Id = CallstackIdCounter.fetch_add(1, std::memory_order_relaxed); + // On the first callstack reserve memory up front + if (Id == 1) + { + KnownSet.Reserve(InitialReserveCount); + } +# if !UE_CALLSTACK_TRACE_RESERVE_GROWABLE + // If configured as not growable, start returning unknown id's when full. + if (Id >= InitialReserveCount) + { + return 0; + } +# endif + KnownSet.Emplace(Hash, Id); + UE_TRACE_LOG(Memory, CallstackSpec, CallstackChannel) + << CallstackSpec.CallstackId(Id) << CallstackSpec.Frames(Entry.Frames, Entry.FrameCount); + } + + return Id; + } + +private: + struct FEncounteredCallstackSetEntry + { + std::atomic_uint64_t Key; + std::atomic_uint32_t Value; + + inline uint64 GetKey() const { return Key.load(std::memory_order_relaxed); } + inline uint32_t GetValue() const { return Value.load(std::memory_order_relaxed); } + inline bool IsEmpty() const { return Key.load(std::memory_order_relaxed) == 0; } + inline void SetKeyValue(uint64_t InKey, uint32_t InValue) + { + Value.store(InValue, std::memory_order_release); + Key.store(InKey, std::memory_order_relaxed); + } + static inline uint32_t KeyHash(uint64_t Key) { return static_cast<uint32_t>(Key); } + static inline void ClearEntries(FEncounteredCallstackSetEntry* Entries, int32_t EntryCount) + { + memset(Entries, 0, EntryCount * sizeof(FEncounteredCallstackSetEntry)); + } + }; + + typedef TGrowOnlyLockFreeHash<FEncounteredCallstackSetEntry, uint64_t, uint32_t> FEncounteredCallstackSet; + + constexpr static uint32_t InitialReserveBytes = UE_CALLSTACK_TRACE_RESERVE_MB * 1024 * 1024; + constexpr static uint32_t InitialReserveCount = InitialReserveBytes / sizeof(FEncounteredCallstackSetEntry); + + FEncounteredCallstackSet KnownSet; + std::atomic_uint32_t CallstackIdCounter{1}; // 0 is reserved for "unknown callstack" +}; + +# if UE_CALLSTACK_TRACE_USE_UNWIND_TABLES + +/* + * Windows' x64 binaries contain a ".pdata" section that describes the location + * and size of its functions and details on how to unwind them. The unwind + * information includes descriptions about a function's stack frame size and + * the non-volatile registers it pushes onto the stack. From this we can + * calculate where a call instruction wrote its return address. This is enough + * to walk the callstack and by caching this information it can be done + * efficiently. + * + * Some functions need a variable amount of stack (such as those that use + * alloc() for example) will use a frame pointer. Frame pointers involve saving + * and restoring the stack pointer in the function's prologue/epilogue. This + * frees the function up to modify the stack pointer arbitrarily. This + * significantly complicates establishing where a return address is, so this + * pdata scheme of walking the stack just doesn't support functions like this. + * Walking stops if it encounters such a function. Fortunately there are + * usually very few such functions, saving us from having to read and track + * non-volatile registers which adds a significant amount of work. + * + * A further optimisation is to to assume we are only interested methods that + * are part of engine or game code. As such we only build lookup tables for + * such modules and never accept OS or third party modules. Backtracing stops + * if an address is encountered which doesn't map to a known module. + */ + +//////////////////////////////////////////////////////////////////////////////// +static uint32_t +AddressToId(uintptr_t Address) +{ + return uint32_t(Address >> 16); +} + +static uintptr_t +IdToAddress(uint32_t Id) +{ + return static_cast<uint32_t>(uintptr_t(Id) << 16); +} + +struct FIdPredicate +{ + template<class T> + bool operator()(uint32_t Id, const T& Item) const + { + return Id < Item.Id; + } + template<class T> + bool operator()(const T& Item, uint32_t Id) const + { + return Item.Id < Id; + } +}; + +//////////////////////////////////////////////////////////////////////////////// +struct FUnwindInfo +{ + uint8_t Version : 3; + uint8_t Flags : 5; + uint8_t PrologBytes; + uint8_t NumUnwindCodes; + uint8_t FrameReg : 4; + uint8_t FrameRspBias : 4; +}; + +# pragma warning(push) +# pragma warning(disable : 4200) +struct FUnwindCode +{ + uint8_t PrologOffset; + uint8_t OpCode : 4; + uint8_t OpInfo : 4; + uint16_t Params[]; +}; +# pragma warning(pop) + +enum +{ + UWOP_PUSH_NONVOL = 0, // 1 node + UWOP_ALLOC_LARGE = 1, // 2 or 3 nodes + UWOP_ALLOC_SMALL = 2, // 1 node + UWOP_SET_FPREG = 3, // 1 node + UWOP_SAVE_NONVOL = 4, // 2 nodes + UWOP_SAVE_NONVOL_FAR = 5, // 3 nodes + UWOP_SAVE_XMM128 = 8, // 2 nodes + UWOP_SAVE_XMM128_FAR = 9, // 3 nodes + UWOP_PUSH_MACHFRAME = 10, // 1 node +}; + +//////////////////////////////////////////////////////////////////////////////// +class FBacktracer +{ +public: + FBacktracer(FMalloc* InMalloc); + ~FBacktracer(); + static FBacktracer* Get(); + void AddModule(uintptr_t Base, const char16_t* Name); + void RemoveModule(uintptr_t Base); + uint32_t GetBacktraceId(void* AddressOfReturnAddress); + +private: + struct FFunction + { + uint32_t Id; + int32_t RspBias; +# if BACKTRACE_DBGLVL >= 2 + uint32_t Size; + const FUnwindInfo* UnwindInfo; +# endif + }; + + struct FModule + { + uint32_t Id; + uint32_t IdSize; + uint32_t NumFunctions; +# if BACKTRACE_DBGLVL >= 1 + uint16 NumFpTypes; + // uint16 *padding* +# else + // uint32_t *padding* +# endif + FFunction* Functions; + }; + + struct FLookupState + { + FModule Module; + }; + + struct FFunctionLookupSetEntry + { + // Bottom 48 bits are key (pointer), top 16 bits are data (RSP bias for function) + std::atomic_uint64_t Data; + + inline uint64_t GetKey() const { return Data.load(std::memory_order_relaxed) & 0xffffffffffffull; } + inline int32_t GetValue() const { return static_cast<int64_t>(Data.load(std::memory_order_relaxed)) >> 48; } + inline bool IsEmpty() const { return Data.load(std::memory_order_relaxed) == 0; } + inline void SetKeyValue(uint64_t Key, int32_t Value) + { + Data.store(Key | (static_cast<int64_t>(Value) << 48), std::memory_order_relaxed); + } + static inline uint32_t KeyHash(uint64_t Key) + { + // 64 bit pointer to 32 bit hash + Key = (~Key) + (Key << 21); + Key = Key ^ (Key >> 24); + Key = Key * 265; + Key = Key ^ (Key >> 14); + Key = Key * 21; + Key = Key ^ (Key >> 28); + Key = Key + (Key << 31); + return static_cast<uint32_t>(Key); + } + static void ClearEntries(FFunctionLookupSetEntry* Entries, int32_t EntryCount) + { + memset(Entries, 0, EntryCount * sizeof(FFunctionLookupSetEntry)); + } + }; + typedef TGrowOnlyLockFreeHash<FFunctionLookupSetEntry, uint64_t, int32_t> FFunctionLookupSet; + + const FFunction* LookupFunction(uintptr_t Address, FLookupState& State) const; + static FBacktracer* Instance; + mutable zen::RwLock Lock; + FModule* Modules; + int32_t ModulesNum; + int32_t ModulesCapacity; + FMalloc* Malloc; + FCallstackTracer CallstackTracer; +# if BACKTRACE_LOCK_FREE + mutable FFunctionLookupSet FunctionLookups; + mutable bool bReentranceCheck = false; +# endif +# if BACKTRACE_DBGLVL >= 1 + mutable uint32_t NumFpTruncations = 0; + mutable uint32_t TotalFunctions = 0; +# endif +}; + +//////////////////////////////////////////////////////////////////////////////// +FBacktracer* FBacktracer::Instance = nullptr; + +//////////////////////////////////////////////////////////////////////////////// +FBacktracer::FBacktracer(FMalloc* InMalloc) +: Malloc(InMalloc) +, CallstackTracer(InMalloc) +# if BACKTRACE_LOCK_FREE +, FunctionLookups(InMalloc) +# endif +{ +# if BACKTRACE_LOCK_FREE + FunctionLookups.Reserve(512 * 1024); // 4 MB +# endif + ModulesCapacity = 8; + ModulesNum = 0; + Modules = (FModule*)Malloc->Malloc(sizeof(FModule) * ModulesCapacity); + + Instance = this; +} + +//////////////////////////////////////////////////////////////////////////////// +FBacktracer::~FBacktracer() +{ + std::span<FModule> ModulesView(Modules, ModulesNum); + for (FModule& Module : ModulesView) + { + Malloc->Free(Module.Functions); + } +} + +//////////////////////////////////////////////////////////////////////////////// +FBacktracer* +FBacktracer::Get() +{ + return Instance; +} + +bool GFullBacktraces = false; + +//////////////////////////////////////////////////////////////////////////////// +void +FBacktracer::AddModule(uintptr_t ModuleBase, const char16_t* Name) +{ + if (!GFullBacktraces) + { + const size_t NameLen = StringLength(Name); + if (!(NameLen > 4 && StringEquals(Name + NameLen - 4, u".exe"))) + { + return; + } + } + + const auto* DosHeader = (IMAGE_DOS_HEADER*)ModuleBase; + const auto* NtHeader = (IMAGE_NT_HEADERS*)(ModuleBase + DosHeader->e_lfanew); + const IMAGE_FILE_HEADER* FileHeader = &(NtHeader->FileHeader); + + uint32_t NumSections = FileHeader->NumberOfSections; + const auto* Sections = (IMAGE_SECTION_HEADER*)(uintptr_t(&(NtHeader->OptionalHeader)) + FileHeader->SizeOfOptionalHeader); + + // Find ".pdata" section + uintptr_t PdataBase = 0; + uintptr_t PdataEnd = 0; + for (uint32_t i = 0; i < NumSections; ++i) + { + const IMAGE_SECTION_HEADER* Section = Sections + i; + if (*(uint64_t*)(Section->Name) == + 0x61'74'61'64'70'2eull) // Sections names are eight bytes and zero padded. This constant is '.pdata' + { + PdataBase = ModuleBase + Section->VirtualAddress; + PdataEnd = PdataBase + Section->SizeOfRawData; + break; + } + } + + if (PdataBase == 0) + { + return; + } + + // Count the number of functions. The assumption here is that if we have got this far then there is at least one function + uint32_t NumFunctions = uint32_t(PdataEnd - PdataBase) / sizeof(RUNTIME_FUNCTION); + if (NumFunctions == 0) + { + return; + } + + const auto* FunctionTables = (RUNTIME_FUNCTION*)PdataBase; + do + { + const RUNTIME_FUNCTION* Function = FunctionTables + NumFunctions - 1; + if (uint32_t(Function->BeginAddress) < uint32_t(Function->EndAddress)) + { + break; + } + + --NumFunctions; + } while (NumFunctions != 0); + + // Allocate some space for the module's function-to-frame-size table + auto* OutTable = (FFunction*)Malloc->Malloc(sizeof(FFunction) * NumFunctions); + FFunction* OutTableCursor = OutTable; + + // Extract frame size for each function from pdata's unwind codes. + uint32_t NumFpFuncs = 0; + for (uint32_t i = 0; i < NumFunctions; ++i) + { + const RUNTIME_FUNCTION* FunctionTable = FunctionTables + i; + + uintptr_t UnwindInfoAddr = ModuleBase + FunctionTable->UnwindInfoAddress; + const auto* UnwindInfo = (FUnwindInfo*)UnwindInfoAddr; + + if (UnwindInfo->Version != 1) + { + /* some v2s have been seen in msvc. Always seem to be assembly + * routines (memset, memcpy, etc) */ + continue; + } + + int32_t FpInfo = 0; + int32_t RspBias = 0; + +# if BACKTRACE_DBGLVL >= 2 + uint32_t PrologVerify = UnwindInfo->PrologBytes; +# endif + + const auto* Code = (FUnwindCode*)(UnwindInfo + 1); + const auto* EndCode = Code + UnwindInfo->NumUnwindCodes; + while (Code < EndCode) + { +# if BACKTRACE_DBGLVL >= 2 + if (Code->PrologOffset > PrologVerify) + { + PLATFORM_BREAK(); + } + PrologVerify = Code->PrologOffset; +# endif + + switch (Code->OpCode) + { + case UWOP_PUSH_NONVOL: + RspBias += 8; + Code += 1; + break; + + case UWOP_ALLOC_LARGE: + if (Code->OpInfo) + { + RspBias += *(uint32_t*)(Code->Params); + Code += 3; + } + else + { + RspBias += Code->Params[0] * 8; + Code += 2; + } + break; + + case UWOP_ALLOC_SMALL: + RspBias += (Code->OpInfo * 8) + 8; + Code += 1; + break; + + case UWOP_SET_FPREG: + // Function will adjust RSP (e.g. through use of alloca()) so it + // uses a frame pointer register. There's instructions like; + // + // push FRAME_REG + // lea FRAME_REG, [rsp + (FRAME_RSP_BIAS * 16)] + // ... + // add rsp, rax + // ... + // sub rsp, FRAME_RSP_BIAS * 16 + // pop FRAME_REG + // ret + // + // To recover the stack frame we would need to track non-volatile + // registers which adds a lot of overhead for a small subset of + // functions. Instead we'll end backtraces at these functions. + + // MSB is set to detect variable sized frames that we can't proceed + // past when back-tracing. + NumFpFuncs++; + FpInfo |= 0x80000000 | (uint32_t(UnwindInfo->FrameReg) << 27) | (uint32_t(UnwindInfo->FrameRspBias) << 23); + Code += 1; + break; + + case UWOP_PUSH_MACHFRAME: + RspBias = Code->OpInfo ? 48 : 40; + Code += 1; + break; + + case UWOP_SAVE_NONVOL: + Code += 2; + break; /* saves are movs instead of pushes */ + case UWOP_SAVE_NONVOL_FAR: + Code += 3; + break; + case UWOP_SAVE_XMM128: + Code += 2; + break; + case UWOP_SAVE_XMM128_FAR: + Code += 3; + break; + + default: +# if BACKTRACE_DBGLVL >= 2 + PLATFORM_BREAK(); +# endif + break; + } + } + + // "Chained" simply means that multiple RUNTIME_FUNCTIONs pertains to a + // single actual function in the .text segment. + bool bIsChained = (UnwindInfo->Flags & UNW_FLAG_CHAININFO); + + RspBias /= sizeof(void*); // stack push/popds in units of one machine word + RspBias += !bIsChained; // and one extra push for the ret address + RspBias |= FpInfo; // pack in details about possible frame pointer + + if (bIsChained) + { + OutTableCursor[-1].RspBias += RspBias; +# if BACKTRACE_DBGLVL >= 2 + OutTableCursor[-1].Size += (FunctionTable->EndAddress - FunctionTable->BeginAddress); +# endif + } + else + { + *OutTableCursor = { + FunctionTable->BeginAddress, + RspBias, +# if BACKTRACE_DBGLVL >= 2 + FunctionTable->EndAddress - FunctionTable->BeginAddress, + UnwindInfo, +# endif + }; + + ++OutTableCursor; + } + } + + uintptr_t ModuleSize = NtHeader->OptionalHeader.SizeOfImage; + ModuleSize += 0xffff; // to align up to next 64K page. it'll get shifted by AddressToId() + + FModule Module = { + AddressToId(ModuleBase), + AddressToId(ModuleSize), + uint32_t(uintptr_t(OutTableCursor - OutTable)), +# if BACKTRACE_DBGLVL >= 1 + uint16(NumFpFuncs), +# endif + OutTable, + }; + + { + zen::RwLock::ExclusiveLockScope _(Lock); + + if (ModulesNum + 1 > ModulesCapacity) + { + ModulesCapacity += 8; + Modules = (FModule*)Malloc->Realloc(Modules, sizeof(FModule) * ModulesCapacity); + } + Modules[ModulesNum++] = Module; + + std::sort(Modules, Modules + ModulesNum, [](const FModule& A, const FModule& B) { return A.Id < B.Id; }); + } + +# if BACKTRACE_DBGLVL >= 1 + NumFpTruncations += NumFpFuncs; + TotalFunctions += NumFunctions; +# endif +} + +//////////////////////////////////////////////////////////////////////////////// +void +FBacktracer::RemoveModule(uintptr_t ModuleBase) +{ + // When Windows' RequestExit() is called it hard-terminates all threads except + // the main thread and then proceeds to unload the process' DLLs. This hard + // thread termination can result is dangling locked locks. Not an issue as + // the rule is "do not do anything multithreaded in DLL load/unload". And here + // we are, taking write locks during DLL unload which is, quite unsurprisingly, + // deadlocking. In reality tracking Windows' DLL unloads doesn't tell us + // anything due to how DLLs and processes' address spaces work. So we will... +# if defined PLATFORM_WINDOWS + ZEN_UNUSED(ModuleBase); + + return; +# else + + zen::RwLock::ExclusiveLockScope _(Lock); + + uint32_t ModuleId = AddressToId(ModuleBase); + TArrayView<FModule> ModulesView(Modules, ModulesNum); + int32_t Index = Algo::LowerBound(ModulesView, ModuleId, FIdPredicate()); + if (Index >= ModulesNum) + { + return; + } + + const FModule& Module = Modules[Index]; + if (Module.Id != ModuleId) + { + return; + } + +# if BACKTRACE_DBGLVL >= 1 + NumFpTruncations -= Module.NumFpTypes; + TotalFunctions -= Module.NumFunctions; +# endif + + // no code should be executing at this point so we can safely free the + // table knowing know one is looking at it. + Malloc->Free(Module.Functions); + + for (SIZE_T i = Index; i < ModulesNum; i++) + { + Modules[i] = Modules[i + 1]; + } + + --ModulesNum; +# endif +} + +//////////////////////////////////////////////////////////////////////////////// +const FBacktracer::FFunction* +FBacktracer::LookupFunction(uintptr_t Address, FLookupState& State) const +{ + // This function caches the previous module look up. The theory here is that + // a series of return address in a backtrace often cluster around one module + + FIdPredicate IdPredicate; + + // Look up the module that Address belongs to. + uint32_t AddressId = AddressToId(Address); + if ((AddressId - State.Module.Id) >= State.Module.IdSize) + { + auto FindIt = std::upper_bound(Modules, Modules + ModulesNum, AddressId, IdPredicate); + + if (FindIt == Modules) + { + return nullptr; + } + + State.Module = *--FindIt; + } + + // Check that the address is within the address space of the best-found module + const FModule* Module = &(State.Module); + if ((AddressId - Module->Id) >= Module->IdSize) + { + return nullptr; + } + + // Now we've a module we have a table of functions and their stack sizes so + // we can get the frame size for Address + uint32_t FuncId = uint32_t(Address - IdToAddress(Module->Id)); + std::span<FFunction> FuncsView(Module->Functions, Module->NumFunctions); + auto FindIt = std::upper_bound(begin(FuncsView), end(FuncsView), FuncId, IdPredicate); + if (FindIt == begin(FuncsView)) + { + return nullptr; + } + + const FFunction* Function = &(*--FindIt); +# if BACKTRACE_DBGLVL >= 2 + if ((FuncId - Function->Id) >= Function->Size) + { + PLATFORM_BREAK(); + return nullptr; + } +# endif + return Function; +} + +//////////////////////////////////////////////////////////////////////////////// +uint32_t +FBacktracer::GetBacktraceId(void* AddressOfReturnAddress) +{ + FLookupState LookupState = {}; + uint64_t Frames[256]; + + uintptr_t* StackPointer = (uintptr_t*)AddressOfReturnAddress; + +# if BACKTRACE_DBGLVL >= 3 + uintptr_t TruthBacktrace[1024]; + uint32_t NumTruth = RtlCaptureStackBackTrace(0, 1024, (void**)TruthBacktrace, nullptr); + uintptr_t* TruthCursor = TruthBacktrace; + for (; *TruthCursor != *StackPointer; ++TruthCursor) + ; +# endif + +# if BACKTRACE_DBGLVL >= 2 + struct + { + void* Sp; + void* Ip; + const FFunction* Function; + } Backtrace[1024] = {}; + uint32_t NumBacktrace = 0; +# endif + + uint64_t BacktraceHash = 0; + uint32_t FrameIdx = 0; + +# if BACKTRACE_LOCK_FREE + // When running lock free, we defer the lock until a lock free function lookup fails + bool Locked = false; +# else + FScopeLock _(&Lock); +# endif + do + { + uintptr_t RetAddr = *StackPointer; + + Frames[FrameIdx++] = RetAddr; + + // This is a simple order-dependent LCG. Should be sufficient enough + BacktraceHash += RetAddr; + BacktraceHash *= 0x30be8efa499c249dull; + +# if BACKTRACE_LOCK_FREE + int32_t RspBias; + bool bIsAlreadyInTable; + FunctionLookups.Find(RetAddr, &RspBias, &bIsAlreadyInTable); + if (bIsAlreadyInTable) + { + if (RspBias < 0) + { + break; + } + else + { + StackPointer += RspBias; + continue; + } + } + if (!Locked) + { + Lock.AcquireExclusive(); + Locked = true; + + // If FunctionLookups.Emplace triggers a reallocation, it can cause an infinite recursion + // when the allocation reenters the stack trace code. We need to break out of the recursion + // in that case, and let the allocation complete, with the assumption that we don't care + // about call stacks for internal allocations in the memory reporting system. The "Lock()" + // above will only fall through with this flag set if it's a second lock in the same thread. + if (bReentranceCheck) + { + break; + } + } +# endif // BACKTRACE_LOCK_FREE + + const FFunction* Function = LookupFunction(RetAddr, LookupState); + if (Function == nullptr) + { +# if BACKTRACE_LOCK_FREE + // LookupFunction fails when modules are not yet registered. In this case, we do not want the address + // to be added to the lookup map, but to retry the lookup later when modules are properly registered. + if (GModulesAreInitialized) + { + bReentranceCheck = true; + auto OnExit = zen::MakeGuard([&] { bReentranceCheck = false; }); + FunctionLookups.Emplace(RetAddr, -1); + } +# endif + break; + } + +# if BACKTRACE_LOCK_FREE + { + // This conversion improves probing performance for the hash set. Additionally it is critical + // to avoid incorrect values when RspBias is compressed into 16 bits in the hash map. + int32_t StoreBias = Function->RspBias < 0 ? -1 : Function->RspBias; + bReentranceCheck = true; + auto OnExit = zen::MakeGuard([&] { bReentranceCheck = false; }); + FunctionLookups.Emplace(RetAddr, StoreBias); + } +# endif + +# if BACKTRACE_DBGLVL >= 2 + if (NumBacktrace < 1024) + { + Backtrace[NumBacktrace++] = { + StackPointer, + (void*)RetAddr, + Function, + }; + } +# endif + + if (Function->RspBias < 0) + { + // This is a frame with a variable-sized stack pointer. We don't + // track enough information to proceed. +# if BACKTRACE_DBGLVL >= 1 + NumFpTruncations++; +# endif + break; + } + + StackPointer += Function->RspBias; + } + // Trunkate callstacks longer than MaxStackDepth + while (*StackPointer && FrameIdx < ZEN_ARRAY_COUNT(Frames)); + + // Build the backtrace entry for submission + FCallstackTracer::FBacktraceEntry BacktraceEntry; + BacktraceEntry.Hash = BacktraceHash; + BacktraceEntry.FrameCount = FrameIdx; + BacktraceEntry.Frames = Frames; + +# if BACKTRACE_DBGLVL >= 3 + for (uint32_t i = 0; i < NumBacktrace; ++i) + { + if ((void*)TruthCursor[i] != Backtrace[i].Ip) + { + PLATFORM_BREAK(); + break; + } + } +# endif + +# if BACKTRACE_LOCK_FREE + if (Locked) + { + Lock.ReleaseExclusive(); + } +# endif + // Add to queue to be processed. This might block until there is room in the + // queue (i.e. the processing thread has caught up processing). + return CallstackTracer.AddCallstack(BacktraceEntry); +} +} + +# else // UE_CALLSTACK_TRACE_USE_UNWIND_TABLES + +namespace zen { + + //////////////////////////////////////////////////////////////////////////////// + class FBacktracer + { + public: + FBacktracer(FMalloc* InMalloc); + ~FBacktracer(); + static FBacktracer* Get(); + inline uint32_t GetBacktraceId(void* AddressOfReturnAddress); + uint32_t GetBacktraceId(uint64_t ReturnAddress); + void AddModule(uintptr_t Base, const char16_t* Name) {} + void RemoveModule(uintptr_t Base) {} + + private: + static FBacktracer* Instance; + FMalloc* Malloc; + FCallstackTracer CallstackTracer; + }; + + //////////////////////////////////////////////////////////////////////////////// + FBacktracer* FBacktracer::Instance = nullptr; + + //////////////////////////////////////////////////////////////////////////////// + FBacktracer::FBacktracer(FMalloc* InMalloc) : Malloc(InMalloc), CallstackTracer(InMalloc) { Instance = this; } + + //////////////////////////////////////////////////////////////////////////////// + FBacktracer::~FBacktracer() {} + + //////////////////////////////////////////////////////////////////////////////// + FBacktracer* FBacktracer::Get() { return Instance; } + + //////////////////////////////////////////////////////////////////////////////// + uint32_t FBacktracer::GetBacktraceId(void* AddressOfReturnAddress) + { + const uint64_t ReturnAddress = *(uint64_t*)AddressOfReturnAddress; + return GetBacktraceId(ReturnAddress); + } + + //////////////////////////////////////////////////////////////////////////////// + uint32_t FBacktracer::GetBacktraceId(uint64_t ReturnAddress) + { +# if !UE_BUILD_SHIPPING + uint64_t StackFrames[256]; + int32_t NumStackFrames = FPlatformStackWalk::CaptureStackBackTrace(StackFrames, UE_ARRAY_COUNT(StackFrames)); + if (NumStackFrames > 0) + { + FCallstackTracer::FBacktraceEntry BacktraceEntry; + uint64_t BacktraceId = 0; + uint32_t FrameIdx = 0; + bool bUseAddress = false; + for (int32_t Index = 0; Index < NumStackFrames; Index++) + { + if (!bUseAddress) + { + // start using backtrace only after ReturnAddress + if (StackFrames[Index] == (uint64_t)ReturnAddress) + { + bUseAddress = true; + } + } + if (bUseAddress || NumStackFrames == 1) + { + uint64_t RetAddr = StackFrames[Index]; + StackFrames[FrameIdx++] = RetAddr; + + // This is a simple order-dependent LCG. Should be sufficient enough + BacktraceId += RetAddr; + BacktraceId *= 0x30be8efa499c249dull; + } + } + + // Save the collected id + BacktraceEntry.Hash = BacktraceId; + BacktraceEntry.FrameCount = FrameIdx; + BacktraceEntry.Frames = StackFrames; + + // Add to queue to be processed. This might block until there is room in the + // queue (i.e. the processing thread has caught up processing). + return CallstackTracer.AddCallstack(BacktraceEntry); + } +# endif + + return 0; + } + +} + +# endif // UE_CALLSTACK_TRACE_USE_UNWIND_TABLES + +namespace zen { + +//////////////////////////////////////////////////////////////////////////////// +void +CallstackTrace_CreateInternal(FMalloc* Malloc) +{ + if (FBacktracer::Get() != nullptr) + { + return; + } + + // Allocate, construct and intentionally leak backtracer + void* Alloc = Malloc->Malloc(sizeof(FBacktracer), alignof(FBacktracer)); + new (Alloc) FBacktracer(Malloc); + + Modules_Create(Malloc); + Modules_Subscribe([](bool bLoad, void* Module, const char16_t* Name) { + bLoad ? FBacktracer::Get()->AddModule(uintptr_t(Module), Name) //-V522 + : FBacktracer::Get()->RemoveModule(uintptr_t(Module)); + }); +} + +//////////////////////////////////////////////////////////////////////////////// +void +CallstackTrace_InitializeInternal() +{ + Modules_Initialize(); + GModulesAreInitialized = true; +} + +//////////////////////////////////////////////////////////////////////////////// +uint32_t +CallstackTrace_GetCurrentId() +{ + if (!UE_TRACE_CHANNELEXPR_IS_ENABLED(CallstackChannel)) + { + return 0; + } + + void* StackAddress = PLATFORM_RETURN_ADDRESS_FOR_CALLSTACKTRACING(); + if (FBacktracer* Instance = FBacktracer::Get()) + { +# if PLATFORM_USE_CALLSTACK_ADDRESS_POINTER + return Instance->GetBacktraceId(StackAddress); +# else + return Instance->GetBacktraceId((uint64_t)StackAddress); +# endif + } + + return 0; +} + +} // namespace zen + +#endif diff --git a/src/zencore/memtrack/callstacktrace.h b/src/zencore/memtrack/callstacktrace.h new file mode 100644 index 000000000..3e191490b --- /dev/null +++ b/src/zencore/memtrack/callstacktrace.h @@ -0,0 +1,151 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/trace.h> + +#if ZEN_PLATFORM_WINDOWS +# include <intrin.h> + +# define PLATFORM_RETURN_ADDRESS() _ReturnAddress() +# define PLATFORM_RETURN_ADDRESS_POINTER() _AddressOfReturnAddress() +# define PLATFORM_RETURN_ADDRESS_FOR_CALLSTACKTRACING PLATFORM_RETURN_ADDRESS_POINTER +#endif + +//////////////////////////////////////////////////////////////////////////////// +#if !defined(UE_CALLSTACK_TRACE_ENABLED) +# if UE_TRACE_ENABLED +# if ZEN_PLATFORM_WINDOWS +# define UE_CALLSTACK_TRACE_ENABLED 1 +# endif +# endif +#endif + +#if !defined(UE_CALLSTACK_TRACE_ENABLED) +# define UE_CALLSTACK_TRACE_ENABLED 0 +#endif + +//////////////////////////////////////////////////////////////////////////////// +#if UE_CALLSTACK_TRACE_ENABLED + +# include "platformtls.h" + +namespace zen { + +/** + * Creates callstack tracing. + * @param Malloc Allocator instance to use. + */ +void CallstackTrace_Create(class FMalloc* Malloc); + +/** + * Initializes callstack tracing. On some platforms this has to be delayed due to initialization order. + */ +void CallstackTrace_Initialize(); + +/** + * Capture the current callstack, and trace the definition if it has not already been encountered. The returned value + * can be used in trace events and be resolved in analysis. + * @return Unique id identifying the current callstack. + */ +uint32_t CallstackTrace_GetCurrentId(); + +/** + * Callstack Trace Scoped Macro to avoid resolving the full callstack + * can be used when some external libraries are not compiled with frame pointers + * preventing us to resolve it without crashing. Instead the callstack will be + * only the caller address. + */ +# define CALLSTACK_TRACE_LIMIT_CALLSTACKRESOLVE_SCOPE() FCallStackTraceLimitResolveScope PREPROCESSOR_JOIN(FCTLMScope, __LINE__) + +extern uint32_t GCallStackTracingTlsSlotIndex; + +/** + * @return the fallback callstack address + */ +inline void* +CallstackTrace_GetFallbackPlatformReturnAddressData() +{ + if (FPlatformTLS::IsValidTlsSlot(GCallStackTracingTlsSlotIndex)) + return FPlatformTLS::GetTlsValue(GCallStackTracingTlsSlotIndex); + else + return nullptr; +} + +/** + * @return Needs full callstack resolve + */ +inline bool +CallstackTrace_ResolveFullCallStack() +{ + return CallstackTrace_GetFallbackPlatformReturnAddressData() == nullptr; +} + +/* + * Callstack Trace scope for override CallStack + */ +class FCallStackTraceLimitResolveScope +{ +public: + ZEN_FORCENOINLINE FCallStackTraceLimitResolveScope() + { + if (FPlatformTLS::IsValidTlsSlot(GCallStackTracingTlsSlotIndex)) + { + FPlatformTLS::SetTlsValue(GCallStackTracingTlsSlotIndex, PLATFORM_RETURN_ADDRESS_FOR_CALLSTACKTRACING()); + } + } + + ZEN_FORCENOINLINE ~FCallStackTraceLimitResolveScope() + { + if (FPlatformTLS::IsValidTlsSlot(GCallStackTracingTlsSlotIndex)) + { + FPlatformTLS::SetTlsValue(GCallStackTracingTlsSlotIndex, nullptr); + } + } +}; + +} // namespace zen + +#else // UE_CALLSTACK_TRACE_ENABLED + +namespace zen { + +inline void +CallstackTrace_Create(class FMalloc* /*Malloc*/) +{ +} + +inline void +CallstackTrace_Initialize() +{ +} + +inline uint32_t +CallstackTrace_GetCurrentId() +{ + return 0; +} + +inline void* +CallstackTrace_GetCurrentReturnAddressData() +{ + return nullptr; +} + +inline void* +CallstackTrace_GetFallbackPlatformReturnAddressData() +{ + return nullptr; +} + +inline bool +CallstackTrace_ResolveFullCallStack() +{ + return true; +} + +# define CALLSTACK_TRACE_LIMIT_CALLSTACKRESOLVE_SCOPE() + +} // namespace zen + +#endif // UE_CALLSTACK_TRACE_ENABLED diff --git a/src/zencore/memtrack/growonlylockfreehash.h b/src/zencore/memtrack/growonlylockfreehash.h new file mode 100644 index 000000000..d6ff4fc32 --- /dev/null +++ b/src/zencore/memtrack/growonlylockfreehash.h @@ -0,0 +1,255 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zenbase/zenbase.h> +#include <zencore/intmath.h> +#include <zencore/thread.h> + +#include <zencore/memory/fmalloc.h> + +#include <atomic> + +namespace zen { + +// Hash table with fast lock free reads, that only supports insertion of items, and no modification of +// values. KeyType must be an integer. EntryType should be a POD with an identifiable "empty" state +// that can't occur in the table, and include the following member functions: +// +// KeyType GetKey() const; // Get the key from EntryType +// ValueType GetValue() const; // Get the value from EntryType +// bool IsEmpty() const; // Query whether EntryType is empty +// void SetKeyValue(KeyType Key, ValueType Value); // Write key and value into EntryType (ATOMICALLY! See below) +// static uint32 KeyHash(KeyType Key); // Convert Key to more well distributed hash +// static void ClearEntries(EntryType* Entries, int32 EntryCount); // Fill an array of entries with empty values +// +// The function "SetKeyValue" must be multi-thread safe when writing new items! This means writing the +// Key last and atomically, or writing the entire EntryType in a single write (say if the key and value +// are packed into a single integer word). Inline is recommended, since these functions are called a +// lot in the inner loop of the algorithm. A simple implementation of "KeyHash" can just return the +// Key (if it's already reasonable as a hash), or mix the bits if better distribution is required. A +// simple implementation of "ClearEntries" can just be a memset, if zero represents an empty entry. +// +// A set can be approximated by making "GetValue" a nop function, and just paying attention to the bool +// result from FindEntry, although you do need to either reserve a certain Key as invalid, or add +// space to store a valid flag as the Value. This class should only be used for small value types, as +// the values are embedded into the hash table, and not stored separately. +// +// Writes are implemented using a lock -- it would be possible to make writes lock free (or lock free +// when resizing doesn't occur), but it adds complexity. If we were to go that route, it would make +// sense to create a fully generic lock free set, which would be much more involved to implement and +// validate than this simple class, and might also offer somewhat worse read perf. Lock free containers +// that support item removal either need additional synchronization overhead on readers, so writers can +// tell if a reader is active and spin, or need graveyard markers and a garbage collection pass called +// periodically, which makes it no longer a simple standalone container. +// +// Lock free reads are accomplished by the reader atomically pulling the hash table pointer from the +// class. The hash table is self contained, with its size stored in the table itself, and hash tables +// are not freed until the class's destruction. So if the table needs to be reallocated due to a write, +// active readers will still have valid memory. This does mean that tables leak, but worst case, you +// end up with half of the memory being waste. It would be possible to garbage collect the excess +// tables, but you'd need some kind of global synchronization to make sure no readers are active. +// +// Besides cleanup of wasted tables, it might be useful to provide a function to clear a table. This +// would involve clearing the Key for all the elements in the table (but leaving the memory allocated), +// and can be done safely with active readers. It's not possible to safely remove individual items due +// to the need to potentially move other items, which would break an active reader that has already +// searched past a moved item. But in the case of removing all items, we don't care when a reader fails, +// it's expected that eventually all readers will fail, regardless of where they are searching. A clear +// function could be useful if a lot of the data you are caching is no longer used, and you want to +// reset the cache. +// +template<typename EntryType, typename KeyType, typename ValueType> +class TGrowOnlyLockFreeHash +{ +public: + TGrowOnlyLockFreeHash(FMalloc* InMalloc) : Malloc(InMalloc), HashTable(nullptr) {} + + ~TGrowOnlyLockFreeHash() + { + FHashHeader* HashTableNext; + for (FHashHeader* HashTableCurrent = HashTable; HashTableCurrent; HashTableCurrent = HashTableNext) + { + HashTableNext = HashTableCurrent->Next; + + Malloc->Free(HashTableCurrent); + } + } + + /** + * Preallocate the hash table to a certain size + * @param Count - Number of EntryType elements to allocate + * @warning Can only be called once, and only before any items have been added! + */ + void Reserve(uint32_t Count) + { + zen::RwLock::ExclusiveLockScope _(WriteCriticalSection); + ZEN_ASSERT(HashTable.load(std::memory_order_relaxed) == nullptr); + + if (Count <= 0) + { + Count = DEFAULT_INITIAL_SIZE; + } + Count = uint32_t(zen::NextPow2(Count)); + FHashHeader* HashTableLocal = (FHashHeader*)Malloc->Malloc(sizeof(FHashHeader) + (Count - 1) * sizeof(EntryType)); + + HashTableLocal->Next = nullptr; + HashTableLocal->TableSize = Count; + HashTableLocal->Used = 0; + EntryType::ClearEntries(HashTableLocal->Elements, Count); + + HashTable.store(HashTableLocal, std::memory_order_release); + } + + /** + * Find an entry in the hash table + * @param Key - Key to search for + * @param OutValue - Memory location to write result value to. Left unmodified if Key isn't found. + * @param bIsAlreadyInTable - Optional result for whether key was found in table. + */ + void Find(KeyType Key, ValueType* OutValue, bool* bIsAlreadyInTable = nullptr) const + { + FHashHeader* HashTableLocal = HashTable.load(std::memory_order_acquire); + if (HashTableLocal) + { + uint32_t TableMask = HashTableLocal->TableSize - 1; + + // Linear probing + for (uint32_t TableIndex = EntryType::KeyHash(Key) & TableMask; !HashTableLocal->Elements[TableIndex].IsEmpty(); + TableIndex = (TableIndex + 1) & TableMask) + { + if (HashTableLocal->Elements[TableIndex].GetKey() == Key) + { + if (OutValue) + { + *OutValue = HashTableLocal->Elements[TableIndex].GetValue(); + } + if (bIsAlreadyInTable) + { + *bIsAlreadyInTable = true; + } + return; + } + } + } + + if (bIsAlreadyInTable) + { + *bIsAlreadyInTable = false; + } + } + + /** + * Add an entry with the given Key to the hash table, will do nothing if the item already exists + * @param Key - Key to add + * @param Value - Value to add for key + * @param bIsAlreadyInTable -- Optional result for whether item was already in table + */ + void Emplace(KeyType Key, ValueType Value, bool* bIsAlreadyInTable = nullptr) + { + zen::RwLock::ExclusiveLockScope _(WriteCriticalSection); + + // After locking, check if the item is already in the hash table. + ValueType ValueIgnore; + bool bFindResult; + Find(Key, &ValueIgnore, &bFindResult); + if (bFindResult == true) + { + if (bIsAlreadyInTable) + { + *bIsAlreadyInTable = true; + } + return; + } + + // Check if there is space in the hash table for a new item. We resize when the hash + // table gets half full or more. @todo: allow client to specify max load factor? + FHashHeader* HashTableLocal = HashTable; + + if (!HashTableLocal || (HashTableLocal->Used >= HashTableLocal->TableSize / 2)) + { + int32_t GrowCount = HashTableLocal ? HashTableLocal->TableSize * 2 : DEFAULT_INITIAL_SIZE; + FHashHeader* HashTableGrow = (FHashHeader*)Malloc->Malloc(sizeof(FHashHeader) + (GrowCount - 1) * sizeof(EntryType)); + + HashTableGrow->Next = HashTableLocal; + HashTableGrow->TableSize = GrowCount; + HashTableGrow->Used = 0; + EntryType::ClearEntries(HashTableGrow->Elements, GrowCount); + + if (HashTableLocal) + { + // Copy existing elements from the old table to the new table + for (int32_t TableIndex = 0; TableIndex < HashTableLocal->TableSize; TableIndex++) + { + EntryType& Entry = HashTableLocal->Elements[TableIndex]; + if (!Entry.IsEmpty()) + { + HashInsertInternal(HashTableGrow, Entry.GetKey(), Entry.GetValue()); + } + } + } + + HashTableLocal = HashTableGrow; + HashTable.store(HashTableGrow, std::memory_order_release); + } + + // Then add our new item + HashInsertInternal(HashTableLocal, Key, Value); + + if (bIsAlreadyInTable) + { + *bIsAlreadyInTable = false; + } + } + + void FindOrAdd(KeyType Key, ValueType Value, bool* bIsAlreadyInTable = nullptr) + { + // Attempt to find the item lock free, before calling "Emplace", which locks the container + bool bFindResult; + ValueType IgnoreResult; + Find(Key, &IgnoreResult, &bFindResult); + if (bFindResult) + { + if (bIsAlreadyInTable) + { + *bIsAlreadyInTable = true; + } + return; + } + + Emplace(Key, Value, bIsAlreadyInTable); + } + +private: + struct FHashHeader + { + FHashHeader* Next; // Old buffers are stored in a linked list for cleanup + int32_t TableSize; + int32_t Used; + EntryType Elements[1]; // Variable sized + }; + + FMalloc* Malloc; + std::atomic<FHashHeader*> HashTable; + zen::RwLock WriteCriticalSection; + + static constexpr int32_t DEFAULT_INITIAL_SIZE = 1024; + + static void HashInsertInternal(FHashHeader* HashTableLocal, KeyType Key, ValueType Value) + { + int32_t TableMask = HashTableLocal->TableSize - 1; + + // Linear probing + for (int32_t TableIndex = EntryType::KeyHash(Key) & TableMask;; TableIndex = (TableIndex + 1) & TableMask) + { + if (HashTableLocal->Elements[TableIndex].IsEmpty()) + { + HashTableLocal->Elements[TableIndex].SetKeyValue(Key, Value); + HashTableLocal->Used++; + break; + } + } + } +}; + +} // namespace zen diff --git a/src/zencore/memtrack/memorytrace.cpp b/src/zencore/memtrack/memorytrace.cpp new file mode 100644 index 000000000..b147aee91 --- /dev/null +++ b/src/zencore/memtrack/memorytrace.cpp @@ -0,0 +1,829 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zencore/memory/memorytrace.h> +#include <zencore/memory/tagtrace.h> + +#include "callstacktrace.h" +#include "tracemalloc.h" +#include "vatrace.h" + +#include <zencore/commandline.h> +#include <zencore/enumflags.h> +#include <zencore/guardvalue.h> +#include <zencore/intmath.h> +#include <zencore/string.h> +#include <zencore/trace.h> + +#include <string.h> + +#if ZEN_PLATFORM_WINDOWS +# include <shellapi.h> +#endif + +class FMalloc; + +#if UE_TRACE_ENABLED +namespace zen { +UE_TRACE_CHANNEL_DEFINE(MemAllocChannel, "Memory allocations", true) +} +#endif + +#if UE_MEMORY_TRACE_ENABLED + +//////////////////////////////////////////////////////////////////////////////// + +namespace zen { + +void MemoryTrace_InitTags(FMalloc*); +void MemoryTrace_EnableTracePump(); + +} // namespace zen + +//////////////////////////////////////////////////////////////////////////////// +namespace { +// Controls how often time markers are emitted (default: every 4095 allocations). +constexpr uint32_t MarkerSamplePeriod = (4 << 10) - 1; + +// Number of shifted bits to SizeLower +constexpr uint32_t SizeShift = 3; + +// Counter to track when time marker is emitted +std::atomic<uint32_t> GMarkerCounter(0); + +// If enabled also pumps the Trace system itself. Used on process shutdown +// when worker thread has been killed, but memory events still occurs. +bool GDoPumpTrace; + +// Temporarily disables any internal operation that causes allocations. Used to +// avoid recursive behaviour when memory tracing needs to allocate memory through +// TraceMalloc. +thread_local bool GDoNotAllocateInTrace; + +// Set on initialization; on some platforms we hook allocator functions very early +// before Trace has the ability to allocate memory. +bool GTraceAllowed; +} // namespace + +//////////////////////////////////////////////////////////////////////////////// +namespace UE { namespace Trace { + TRACELOG_API void Update(); +}} // namespace UE::Trace + +namespace zen { + +//////////////////////////////////////////////////////////////////////////////// +UE_TRACE_EVENT_BEGIN(Memory, Init, NoSync | Important) + UE_TRACE_EVENT_FIELD(uint64_t, PageSize) // new in UE 5.5 + UE_TRACE_EVENT_FIELD(uint32_t, MarkerPeriod) + UE_TRACE_EVENT_FIELD(uint8, Version) + UE_TRACE_EVENT_FIELD(uint8, MinAlignment) + UE_TRACE_EVENT_FIELD(uint8, SizeShift) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, Marker) + UE_TRACE_EVENT_FIELD(uint64_t, Cycle) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, Alloc) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint32_t, Size) + UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower) + UE_TRACE_EVENT_FIELD(uint8, RootHeap) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, AllocSystem) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint32_t, Size) + UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, AllocVideo) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint32_t, Size) + UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, Free) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint8, RootHeap) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, FreeSystem) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, FreeVideo) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, ReallocAlloc) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint32_t, Size) + UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower) + UE_TRACE_EVENT_FIELD(uint8, RootHeap) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, ReallocAllocSystem) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint32_t, Size) + UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, ReallocFree) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint8, RootHeap) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, ReallocFreeSystem) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, MemorySwapOp) + UE_TRACE_EVENT_FIELD(uint64_t, Address) // page fault real address + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint32_t, CompressedSize) + UE_TRACE_EVENT_FIELD(uint8, SwapOp) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, HeapSpec, NoSync | Important) + UE_TRACE_EVENT_FIELD(HeapId, Id) + UE_TRACE_EVENT_FIELD(HeapId, ParentId) + UE_TRACE_EVENT_FIELD(uint16, Flags) + UE_TRACE_EVENT_FIELD(UE::Trace::WideString, Name) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, HeapMarkAlloc) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(uint16, Flags) + UE_TRACE_EVENT_FIELD(HeapId, Heap) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, HeapUnmarkAlloc) + UE_TRACE_EVENT_FIELD(uint64_t, Address) + UE_TRACE_EVENT_FIELD(uint32_t, CallstackId) + UE_TRACE_EVENT_FIELD(HeapId, Heap) +UE_TRACE_EVENT_END() + +// If the layout of the above events is changed, bump this version number. +// version 1: Initial version (UE 5.0, UE 5.1) +// version 2: Added CallstackId for Free events and also for HeapMarkAlloc, HeapUnmarkAlloc events (UE 5.2). +constexpr uint8 MemoryTraceVersion = 2; + +//////////////////////////////////////////////////////////////////////////////// +class FMallocWrapper : public FMalloc +{ +public: + FMallocWrapper(FMalloc* InMalloc); + +private: + struct FCookie + { + uint64_t Tag : 16; + uint64_t Bias : 8; + uint64_t Size : 40; + }; + + static uint32_t GetActualAlignment(SIZE_T Size, uint32_t Alignment); + + virtual void* Malloc(SIZE_T Size, uint32_t Alignment) override; + virtual void* Realloc(void* PrevAddress, SIZE_T NewSize, uint32_t Alignment) override; + virtual void Free(void* Address) override; + virtual bool GetAllocationSize(void* Address, SIZE_T& SizeOut) override { return InnerMalloc->GetAllocationSize(Address, SizeOut); } + virtual void OnMallocInitialized() override { InnerMalloc->OnMallocInitialized(); } + + FMalloc* InnerMalloc; +}; + +//////////////////////////////////////////////////////////////////////////////// +FMallocWrapper::FMallocWrapper(FMalloc* InMalloc) : InnerMalloc(InMalloc) +{ +} + +//////////////////////////////////////////////////////////////////////////////// +uint32_t +FMallocWrapper::GetActualAlignment(SIZE_T Size, uint32_t Alignment) +{ + // Defaults; if size is < 16 then alignment is 8 else 16. + uint32_t DefaultAlignment = 8 << uint32_t(Size >= 16); + return (Alignment < DefaultAlignment) ? DefaultAlignment : Alignment; +} + +//////////////////////////////////////////////////////////////////////////////// +void* +FMallocWrapper::Malloc(SIZE_T Size, uint32_t Alignment) +{ + uint32_t ActualAlignment = GetActualAlignment(Size, Alignment); + void* Address = InnerMalloc->Malloc(Size, Alignment); + + MemoryTrace_Alloc((uint64_t)Address, Size, ActualAlignment); + + return Address; +} + +//////////////////////////////////////////////////////////////////////////////// +void* +FMallocWrapper::Realloc(void* PrevAddress, SIZE_T NewSize, uint32_t Alignment) +{ + // This simplifies things and means reallocs trace events are true reallocs + if (PrevAddress == nullptr) + { + return Malloc(NewSize, Alignment); + } + + MemoryTrace_ReallocFree((uint64_t)PrevAddress); + + void* RetAddress = InnerMalloc->Realloc(PrevAddress, NewSize, Alignment); + + Alignment = GetActualAlignment(NewSize, Alignment); + MemoryTrace_ReallocAlloc((uint64_t)RetAddress, NewSize, Alignment); + + return RetAddress; +} + +//////////////////////////////////////////////////////////////////////////////// +void +FMallocWrapper::Free(void* Address) +{ + if (Address == nullptr) + { + return; + } + + MemoryTrace_Free((uint64_t)Address); + + void* InnerAddress = Address; + + return InnerMalloc->Free(InnerAddress); +} + +//////////////////////////////////////////////////////////////////////////////// +template<class T> +class alignas(alignof(T)) FUndestructed +{ +public: + template<typename... ArgTypes> + void Construct(ArgTypes... Args) + { + ::new (Buffer) T(Args...); + bIsConstructed = true; + } + + bool IsConstructed() const { return bIsConstructed; } + + T* operator&() { return (T*)Buffer; } + T* operator->() { return (T*)Buffer; } + +protected: + uint8 Buffer[sizeof(T)]; + bool bIsConstructed; +}; + +//////////////////////////////////////////////////////////////////////////////// +static FUndestructed<FTraceMalloc> GTraceMalloc; + +//////////////////////////////////////////////////////////////////////////////// +static EMemoryTraceInit +MemoryTrace_ShouldEnable() +{ + EMemoryTraceInit Mode = EMemoryTraceInit::Disabled; + + // Process any command line trace options + // + // Note that calls can come into this function before we enter the regular main function + // and we can therefore not rely on the regular command line parsing for the application + + using namespace std::literals; + + auto ProcessTraceArg = [&](const std::string_view& Arg) { + if (Arg == "memalloc"sv) + { + Mode |= EMemoryTraceInit::AllocEvents; + } + else if (Arg == "callstack"sv) + { + Mode |= EMemoryTraceInit::Callstacks; + } + else if (Arg == "memtag"sv) + { + Mode |= EMemoryTraceInit::Tags; + } + else if (Arg == "memory"sv) + { + Mode |= EMemoryTraceInit::Full; + } + else if (Arg == "memory_light"sv) + { + Mode |= EMemoryTraceInit::Light; + } + }; + + constexpr std::string_view TraceOption = "--trace="sv; + + std::function<void(const std::string_view&)> ProcessArg = [&](const std::string_view& Arg) { + if (Arg.starts_with(TraceOption)) + { + const std::string_view OptionArgs = Arg.substr(TraceOption.size()); + + IterateCommaSeparatedValue(OptionArgs, ProcessTraceArg); + } + }; + + IterateCommandlineArgs(ProcessArg); + + return Mode; +} + +//////////////////////////////////////////////////////////////////////////////// +FMalloc* +MemoryTrace_CreateInternal(FMalloc* InMalloc, EMemoryTraceInit Mode) +{ + using namespace zen; + + // If allocation events are not desired we don't need to do anything, even + // if user has enabled only callstacks it will be enabled later. + if (!EnumHasAnyFlags(Mode, EMemoryTraceInit::AllocEvents)) + { + return InMalloc; + } + + // Some OSes (i.e. Windows) will terminate all threads except the main + // one as part of static deinit. However we may receive more memory + // trace events that would get lost as Trace's worker thread has been + // terminated. So flush the last remaining memory events trace needs + // to be updated which we will do that in response to to memory events. + // We'll use an atexit can to know when Trace is probably no longer + // getting ticked. + atexit([]() { MemoryTrace_EnableTracePump(); }); + + GTraceMalloc.Construct(InMalloc); + + // Both tag and callstack tracing need to use the wrapped trace malloc + // so we can break out tracing memory overhead (and not cause recursive behaviour). + if (EnumHasAnyFlags(Mode, EMemoryTraceInit::Tags)) + { + MemoryTrace_InitTags(>raceMalloc); + } + + if (EnumHasAnyFlags(Mode, EMemoryTraceInit::Callstacks)) + { + CallstackTrace_Create(>raceMalloc); + } + + static FUndestructed<FMallocWrapper> SMallocWrapper; + SMallocWrapper.Construct(InMalloc); + + return &SMallocWrapper; +} + +//////////////////////////////////////////////////////////////////////////////// +FMalloc* +MemoryTrace_CreateInternal(FMalloc* InMalloc) +{ + const EMemoryTraceInit Mode = MemoryTrace_ShouldEnable(); + return MemoryTrace_CreateInternal(InMalloc, Mode); +} + +//////////////////////////////////////////////////////////////////////////////// +FMalloc* +MemoryTrace_Create(FMalloc* InMalloc) +{ + FMalloc* OutMalloc = MemoryTrace_CreateInternal(InMalloc); + + if (OutMalloc != InMalloc) + { +# if PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS + FVirtualWinApiHooks::Initialize(false); +# endif + } + + return OutMalloc; +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_Initialize() +{ + // At this point we initialized the system to allow tracing. + GTraceAllowed = true; + + const int MIN_ALIGNMENT = 8; + + UE_TRACE_LOG(Memory, Init, MemAllocChannel) + << Init.PageSize(4096) << Init.MarkerPeriod(MarkerSamplePeriod + 1) << Init.Version(MemoryTraceVersion) + << Init.MinAlignment(uint8(MIN_ALIGNMENT)) << Init.SizeShift(uint8(SizeShift)); + + const HeapId SystemRootHeap = MemoryTrace_RootHeapSpec(u"System memory"); + ZEN_ASSERT(SystemRootHeap == EMemoryTraceRootHeap::SystemMemory); + const HeapId VideoRootHeap = MemoryTrace_RootHeapSpec(u"Video memory"); + ZEN_ASSERT(VideoRootHeap == EMemoryTraceRootHeap::VideoMemory); + + static_assert((1 << SizeShift) - 1 <= MIN_ALIGNMENT, "Not enough bits to pack size fields"); + +# if !UE_MEMORY_TRACE_LATE_INIT + // On some platforms callstack initialization cannot happen this early in the process. It is initialized + // in other locations when UE_MEMORY_TRACE_LATE_INIT is defined. Until that point allocations cannot have + // callstacks. + CallstackTrace_Initialize(); +# endif +} + +void +MemoryTrace_Shutdown() +{ + // Disable any further activity + GTraceAllowed = false; +} + +//////////////////////////////////////////////////////////////////////////////// +bool +MemoryTrace_IsActive() +{ + return GTraceAllowed; +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_EnableTracePump() +{ + GDoPumpTrace = true; +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_UpdateInternal() +{ + const uint32_t TheCount = GMarkerCounter.fetch_add(1, std::memory_order_relaxed); + if ((TheCount & MarkerSamplePeriod) == 0) + { + UE_TRACE_LOG(Memory, Marker, MemAllocChannel) << Marker.Cycle(UE::Trace::Private::TimeGetTimestamp()); + } + + if (GDoPumpTrace) + { + UE::Trace::Update(); + } +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_Alloc(uint64_t Address, uint64_t Size, uint32_t Alignment, HeapId RootHeap, uint32_t ExternalCallstackId) +{ + if (!GTraceAllowed) + { + return; + } + + ZEN_ASSERT_SLOW(RootHeap < 16); + + const uint32_t AlignmentPow2 = uint32_t(zen::CountTrailingZeros64(Alignment)); + const uint32_t Alignment_SizeLower = (AlignmentPow2 << SizeShift) | uint32_t(Size & ((1 << SizeShift) - 1)); + const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId(); + + switch (RootHeap) + { + case EMemoryTraceRootHeap::SystemMemory: + { + UE_TRACE_LOG(Memory, AllocSystem, MemAllocChannel) + << AllocSystem.Address(uint64_t(Address)) << AllocSystem.CallstackId(CallstackId) + << AllocSystem.Size(uint32_t(Size >> SizeShift)) << AllocSystem.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)); + break; + } + + case EMemoryTraceRootHeap::VideoMemory: + { + UE_TRACE_LOG(Memory, AllocVideo, MemAllocChannel) + << AllocVideo.Address(uint64_t(Address)) << AllocVideo.CallstackId(CallstackId) + << AllocVideo.Size(uint32_t(Size >> SizeShift)) << AllocVideo.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)); + break; + } + + default: + { + UE_TRACE_LOG(Memory, Alloc, MemAllocChannel) + << Alloc.Address(uint64_t(Address)) << Alloc.CallstackId(CallstackId) << Alloc.Size(uint32_t(Size >> SizeShift)) + << Alloc.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)) << Alloc.RootHeap(uint8(RootHeap)); + break; + } + } + + MemoryTrace_UpdateInternal(); +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_Free(uint64_t Address, HeapId RootHeap, uint32_t ExternalCallstackId) +{ + if (!GTraceAllowed) + { + return; + } + + ZEN_ASSERT_SLOW(RootHeap < 16); + + const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId(); + + switch (RootHeap) + { + case EMemoryTraceRootHeap::SystemMemory: + { + UE_TRACE_LOG(Memory, FreeSystem, MemAllocChannel) + << FreeSystem.Address(uint64_t(Address)) << FreeSystem.CallstackId(CallstackId); + break; + } + case EMemoryTraceRootHeap::VideoMemory: + { + UE_TRACE_LOG(Memory, FreeVideo, MemAllocChannel) + << FreeVideo.Address(uint64_t(Address)) << FreeVideo.CallstackId(CallstackId); + break; + } + default: + { + UE_TRACE_LOG(Memory, Free, MemAllocChannel) + << Free.Address(uint64_t(Address)) << Free.CallstackId(CallstackId) << Free.RootHeap(uint8(RootHeap)); + break; + } + } + + MemoryTrace_UpdateInternal(); +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_ReallocAlloc(uint64_t Address, uint64_t Size, uint32_t Alignment, HeapId RootHeap, uint32_t ExternalCallstackId) +{ + if (!GTraceAllowed) + { + return; + } + + ZEN_ASSERT_SLOW(RootHeap < 16); + + const uint32_t AlignmentPow2 = uint32_t(zen::CountTrailingZeros64(Alignment)); + const uint32_t Alignment_SizeLower = (AlignmentPow2 << SizeShift) | uint32_t(Size & ((1 << SizeShift) - 1)); + const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId(); + + switch (RootHeap) + { + case EMemoryTraceRootHeap::SystemMemory: + { + UE_TRACE_LOG(Memory, ReallocAllocSystem, MemAllocChannel) + << ReallocAllocSystem.Address(uint64_t(Address)) << ReallocAllocSystem.CallstackId(CallstackId) + << ReallocAllocSystem.Size(uint32_t(Size >> SizeShift)) + << ReallocAllocSystem.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)); + break; + } + + default: + { + UE_TRACE_LOG(Memory, ReallocAlloc, MemAllocChannel) + << ReallocAlloc.Address(uint64_t(Address)) << ReallocAlloc.CallstackId(CallstackId) + << ReallocAlloc.Size(uint32_t(Size >> SizeShift)) << ReallocAlloc.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)) + << ReallocAlloc.RootHeap(uint8(RootHeap)); + break; + } + } + + MemoryTrace_UpdateInternal(); +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_ReallocFree(uint64_t Address, HeapId RootHeap, uint32_t ExternalCallstackId) +{ + if (!GTraceAllowed) + { + return; + } + + ZEN_ASSERT_SLOW(RootHeap < 16); + + const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId(); + + switch (RootHeap) + { + case EMemoryTraceRootHeap::SystemMemory: + { + UE_TRACE_LOG(Memory, ReallocFreeSystem, MemAllocChannel) + << ReallocFreeSystem.Address(uint64_t(Address)) << ReallocFreeSystem.CallstackId(CallstackId); + break; + } + + default: + { + UE_TRACE_LOG(Memory, ReallocFree, MemAllocChannel) + << ReallocFree.Address(uint64_t(Address)) << ReallocFree.CallstackId(CallstackId) + << ReallocFree.RootHeap(uint8(RootHeap)); + break; + } + } + + MemoryTrace_UpdateInternal(); +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_SwapOp(uint64_t PageAddress, EMemoryTraceSwapOperation SwapOperation, uint32_t CompressedSize, uint32_t CallstackId) +{ + if (!GTraceAllowed) + { + return; + } + + UE_TRACE_LOG(Memory, MemorySwapOp, MemAllocChannel) + << MemorySwapOp.Address(PageAddress) << MemorySwapOp.CallstackId(CallstackId) << MemorySwapOp.CompressedSize(CompressedSize) + << MemorySwapOp.SwapOp((uint8)SwapOperation); + + MemoryTrace_UpdateInternal(); +} + +//////////////////////////////////////////////////////////////////////////////// +HeapId +MemoryTrace_HeapSpec(HeapId ParentId, const char16_t* Name, EMemoryTraceHeapFlags Flags) +{ + if (!GTraceAllowed) + { + return 0; + } + + static std::atomic<HeapId> HeapIdCount(EMemoryTraceRootHeap::EndReserved + 1); // Reserve indexes for root heaps + const HeapId Id = HeapIdCount.fetch_add(1); + const uint32_t NameLen = uint32_t(zen::StringLength(Name)); + const uint32_t DataSize = NameLen * sizeof(char16_t); + ZEN_ASSERT(ParentId < Id); + + UE_TRACE_LOG(Memory, HeapSpec, MemAllocChannel, DataSize) + << HeapSpec.Id(Id) << HeapSpec.ParentId(ParentId) << HeapSpec.Name(Name, NameLen) << HeapSpec.Flags(uint16(Flags)); + + return Id; +} + +//////////////////////////////////////////////////////////////////////////////// +HeapId +MemoryTrace_RootHeapSpec(const char16_t* Name, EMemoryTraceHeapFlags Flags) +{ + if (!GTraceAllowed) + { + return 0; + } + + static std::atomic<HeapId> RootHeapCount(0); + const HeapId Id = RootHeapCount.fetch_add(1); + ZEN_ASSERT(Id <= EMemoryTraceRootHeap::EndReserved); + + const uint32_t NameLen = uint32_t(zen::StringLength(Name)); + const uint32_t DataSize = NameLen * sizeof(char16_t); + + UE_TRACE_LOG(Memory, HeapSpec, MemAllocChannel, DataSize) + << HeapSpec.Id(Id) << HeapSpec.ParentId(HeapId(~0)) << HeapSpec.Name(Name, NameLen) + << HeapSpec.Flags(uint16(EMemoryTraceHeapFlags::Root | Flags)); + + return Id; +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_MarkAllocAsHeap(uint64_t Address, HeapId Heap, EMemoryTraceHeapAllocationFlags Flags, uint32_t ExternalCallstackId) +{ + if (!GTraceAllowed) + { + return; + } + + const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId(); + + UE_TRACE_LOG(Memory, HeapMarkAlloc, MemAllocChannel) + << HeapMarkAlloc.Address(uint64_t(Address)) << HeapMarkAlloc.CallstackId(CallstackId) + << HeapMarkAlloc.Flags(uint16(EMemoryTraceHeapAllocationFlags::Heap | Flags)) << HeapMarkAlloc.Heap(Heap); +} + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_UnmarkAllocAsHeap(uint64_t Address, HeapId Heap, uint32_t ExternalCallstackId) +{ + if (!GTraceAllowed) + { + return; + } + + const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId(); + + // Sets all flags to zero + UE_TRACE_LOG(Memory, HeapUnmarkAlloc, MemAllocChannel) + << HeapUnmarkAlloc.Address(uint64_t(Address)) << HeapUnmarkAlloc.CallstackId(CallstackId) << HeapUnmarkAlloc.Heap(Heap); +} + +} // namespace zen + +#else // UE_MEMORY_TRACE_ENABLED + +///////////////////////////////////////////////////////////////////////////// +bool +MemoryTrace_IsActive() +{ + return false; +} + +#endif // UE_MEMORY_TRACE_ENABLED + +namespace zen { + +///////////////////////////////////////////////////////////////////////////// +FTraceMalloc::FTraceMalloc(FMalloc* InMalloc) +{ + WrappedMalloc = InMalloc; +} + +///////////////////////////////////////////////////////////////////////////// +FTraceMalloc::~FTraceMalloc() +{ +} + +///////////////////////////////////////////////////////////////////////////// +void* +FTraceMalloc::Malloc(SIZE_T Count, uint32_t Alignment) +{ +#if UE_MEMORY_TRACE_ENABLED + // UE_TRACE_METADATA_CLEAR_SCOPE(); + UE_MEMSCOPE(TRACE_TAG); + + void* NewPtr; + { + zen::TGuardValue<bool> _(GDoNotAllocateInTrace, true); + NewPtr = WrappedMalloc->Malloc(Count, Alignment); + } + + const uint64_t Size = Count; + const uint32_t AlignmentPow2 = uint32_t(zen::CountTrailingZeros64(Alignment)); + const uint32_t Alignment_SizeLower = (AlignmentPow2 << SizeShift) | uint32_t(Size & ((1 << SizeShift) - 1)); + + UE_TRACE_LOG(Memory, Alloc, MemAllocChannel) + << Alloc.Address(uint64_t(NewPtr)) << Alloc.CallstackId(0) << Alloc.Size(uint32_t(Size >> SizeShift)) + << Alloc.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)) << Alloc.RootHeap(uint8(EMemoryTraceRootHeap::SystemMemory)); + + return NewPtr; +#else + return WrappedMalloc->Malloc(Count, Alignment); +#endif // UE_MEMORY_TRACE_ENABLED +} + +///////////////////////////////////////////////////////////////////////////// +void* +FTraceMalloc::Realloc(void* Original, SIZE_T Count, uint32_t Alignment) +{ +#if UE_MEMORY_TRACE_ENABLED + // UE_TRACE_METADATA_CLEAR_SCOPE(); + UE_MEMSCOPE(TRACE_TAG); + + UE_TRACE_LOG(Memory, ReallocFree, MemAllocChannel) + << ReallocFree.Address(uint64_t(Original)) << ReallocFree.RootHeap(uint8(EMemoryTraceRootHeap::SystemMemory)); + + void* NewPtr; + { + zen::TGuardValue<bool> _(GDoNotAllocateInTrace, true); + NewPtr = WrappedMalloc->Realloc(Original, Count, Alignment); + } + + const uint64_t Size = Count; + const uint32_t AlignmentPow2 = uint32_t(zen::CountTrailingZeros64(Alignment)); + const uint32_t Alignment_SizeLower = (AlignmentPow2 << SizeShift) | uint32_t(Size & ((1 << SizeShift) - 1)); + + UE_TRACE_LOG(Memory, ReallocAlloc, MemAllocChannel) + << ReallocAlloc.Address(uint64_t(NewPtr)) << ReallocAlloc.CallstackId(0) << ReallocAlloc.Size(uint32_t(Size >> SizeShift)) + << ReallocAlloc.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)) + << ReallocAlloc.RootHeap(uint8(EMemoryTraceRootHeap::SystemMemory)); + + return NewPtr; +#else + return WrappedMalloc->Realloc(Original, Count, Alignment); +#endif // UE_MEMORY_TRACE_ENABLED +} + +///////////////////////////////////////////////////////////////////////////// +void +FTraceMalloc::Free(void* Original) +{ +#if UE_MEMORY_TRACE_ENABLED + UE_TRACE_LOG(Memory, Free, MemAllocChannel) + << Free.Address(uint64_t(Original)) << Free.RootHeap(uint8(EMemoryTraceRootHeap::SystemMemory)); + + { + zen::TGuardValue<bool> _(GDoNotAllocateInTrace, true); + WrappedMalloc->Free(Original); + } +#else + WrappedMalloc->Free(Original); +#endif // UE_MEMORY_TRACE_ENABLED +} + +} // namespace zen diff --git a/src/zencore/memtrack/moduletrace.cpp b/src/zencore/memtrack/moduletrace.cpp new file mode 100644 index 000000000..51280ff3a --- /dev/null +++ b/src/zencore/memtrack/moduletrace.cpp @@ -0,0 +1,296 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenbase/zenbase.h> +#include <zencore/memory/llm.h> +#include <zencore/memory/memorytrace.h> +#include <zencore/memory/tagtrace.h> + +#if ZEN_PLATFORM_WINDOWS +# define PLATFORM_SUPPORTS_TRACE_WIN32_MODULE_DIAGNOSTICS 1 +#else +# define PLATFORM_SUPPORTS_TRACE_WIN32_MODULE_DIAGNOSTICS 0 +#endif + +#include "moduletrace_events.h" + +#if PLATFORM_SUPPORTS_TRACE_WIN32_MODULE_DIAGNOSTICS + +# include <zencore/windows.h> + +ZEN_THIRD_PARTY_INCLUDES_START +# include <winternl.h> +ZEN_THIRD_PARTY_INCLUDES_END + +# include <zencore/trace.h> + +# include <array> + +namespace zen { + +class FMalloc; + +typedef uint32_t HeapId; + +//////////////////////////////////////////////////////////////////////////////// +struct FNtDllFunction +{ + FARPROC Addr; + + FNtDllFunction(const char* Name) + { + HMODULE NtDll = LoadLibraryW(L"ntdll.dll"); + ZEN_ASSERT(NtDll); + Addr = GetProcAddress(NtDll, Name); + } + + template<typename... ArgTypes> + unsigned int operator()(ArgTypes... Args) + { + typedef unsigned int(NTAPI * Prototype)(ArgTypes...); + return (Prototype((void*)Addr))(Args...); + } +}; + +////////////////////////////////////////////////////////////////////////////////7777 +class FModuleTrace +{ +public: + typedef void (*SubscribeFunc)(bool, void*, const char16_t*); + + FModuleTrace(FMalloc* InMalloc); + ~FModuleTrace(); + static FModuleTrace* Get(); + void Initialize(); + void Subscribe(SubscribeFunc Function); + +private: + void OnDllLoaded(const UNICODE_STRING& Name, uintptr_t Base); + void OnDllUnloaded(uintptr_t Base); + void OnDllNotification(unsigned int Reason, const void* DataPtr); + static FModuleTrace* Instance; + SubscribeFunc Subscribers[64]; + int SubscriberCount = 0; + void* CallbackCookie = nullptr; + HeapId ProgramHeapId = 0; +}; + +//////////////////////////////////////////////////////////////////////////////// +FModuleTrace* FModuleTrace::Instance = nullptr; + +//////////////////////////////////////////////////////////////////////////////// +FModuleTrace::FModuleTrace(FMalloc* InMalloc) +{ + ZEN_UNUSED(InMalloc); + Instance = this; +} + +//////////////////////////////////////////////////////////////////////////////// +FModuleTrace::~FModuleTrace() +{ + if (CallbackCookie) + { + FNtDllFunction UnregisterFunc("LdrUnregisterDllNotification"); + UnregisterFunc(CallbackCookie); + } +} + +//////////////////////////////////////////////////////////////////////////////// +FModuleTrace* +FModuleTrace::Get() +{ + return Instance; +} + +//////////////////////////////////////////////////////////////////////////////// +void +FModuleTrace::Initialize() +{ + using namespace UE::Trace; + + ProgramHeapId = MemoryTrace_HeapSpec(SystemMemory, u"Module", EMemoryTraceHeapFlags::None); + + UE_TRACE_LOG(Diagnostics, ModuleInit, ModuleChannel, sizeof(char) * 3) + << ModuleInit.SymbolFormat("pdb", 3) << ModuleInit.ModuleBaseShift(uint8(0)); + + // Register for DLL load/unload notifications. + auto Thunk = [](ULONG Reason, const void* Data, void* Context) { + auto* Self = (FModuleTrace*)Context; + Self->OnDllNotification(Reason, Data); + }; + + typedef void(CALLBACK * ThunkType)(ULONG, const void*, void*); + auto ThunkImpl = ThunkType(Thunk); + + FNtDllFunction RegisterFunc("LdrRegisterDllNotification"); + RegisterFunc(0, ThunkImpl, this, &CallbackCookie); + + // Enumerate already loaded modules. + const TEB* ThreadEnvBlock = NtCurrentTeb(); + const PEB* ProcessEnvBlock = ThreadEnvBlock->ProcessEnvironmentBlock; + const LIST_ENTRY* ModuleIter = ProcessEnvBlock->Ldr->InMemoryOrderModuleList.Flink; + const LIST_ENTRY* ModuleIterEnd = ModuleIter->Blink; + do + { + const auto& ModuleData = *(LDR_DATA_TABLE_ENTRY*)(ModuleIter - 1); + if (ModuleData.DllBase == 0) + { + break; + } + + OnDllLoaded(ModuleData.FullDllName, UPTRINT(ModuleData.DllBase)); + ModuleIter = ModuleIter->Flink; + } while (ModuleIter != ModuleIterEnd); +} + +//////////////////////////////////////////////////////////////////////////////// +void +FModuleTrace::Subscribe(SubscribeFunc Function) +{ + ZEN_ASSERT(SubscriberCount < ZEN_ARRAY_COUNT(Subscribers)); + Subscribers[SubscriberCount++] = Function; +} + +//////////////////////////////////////////////////////////////////////////////// +void +FModuleTrace::OnDllNotification(unsigned int Reason, const void* DataPtr) +{ + enum + { + LDR_DLL_NOTIFICATION_REASON_LOADED = 1, + LDR_DLL_NOTIFICATION_REASON_UNLOADED = 2, + }; + + struct FNotificationData + { + uint32_t Flags; + const UNICODE_STRING& FullPath; + const UNICODE_STRING& BaseName; + uintptr_t Base; + }; + const auto& Data = *(FNotificationData*)DataPtr; + + switch (Reason) + { + case LDR_DLL_NOTIFICATION_REASON_LOADED: + OnDllLoaded(Data.FullPath, Data.Base); + break; + case LDR_DLL_NOTIFICATION_REASON_UNLOADED: + OnDllUnloaded(Data.Base); + break; + } +} + +//////////////////////////////////////////////////////////////////////////////// +void +FModuleTrace::OnDllLoaded(const UNICODE_STRING& Name, UPTRINT Base) +{ + const auto* DosHeader = (IMAGE_DOS_HEADER*)Base; + const auto* NtHeaders = (IMAGE_NT_HEADERS*)(Base + DosHeader->e_lfanew); + const IMAGE_OPTIONAL_HEADER& OptionalHeader = NtHeaders->OptionalHeader; + uint8_t ImageId[20]; + + // Find the guid and age of the binary, used to match debug files + const IMAGE_DATA_DIRECTORY& DebugInfoEntry = OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG]; + const auto* DebugEntries = (IMAGE_DEBUG_DIRECTORY*)(Base + DebugInfoEntry.VirtualAddress); + for (uint32_t i = 0, n = DebugInfoEntry.Size / sizeof(DebugEntries[0]); i < n; ++i) + { + const IMAGE_DEBUG_DIRECTORY& Entry = DebugEntries[i]; + if (Entry.Type == IMAGE_DEBUG_TYPE_CODEVIEW) + { + struct FCodeView7 + { + uint32_t Signature; + uint32_t Guid[4]; + uint32_t Age; + }; + + if (Entry.SizeOfData < sizeof(FCodeView7)) + { + continue; + } + + const auto* CodeView7 = (FCodeView7*)(Base + Entry.AddressOfRawData); + if (CodeView7->Signature != 'SDSR') + { + continue; + } + + memcpy(ImageId, (uint8_t*)&CodeView7->Guid, sizeof(uint32_t) * 4); + memcpy(&ImageId[16], (uint8_t*)&CodeView7->Age, sizeof(uint32_t)); + break; + } + } + + // Note: UNICODE_STRING.Length is the size in bytes of the string buffer. + UE_TRACE_LOG(Diagnostics, ModuleLoad, ModuleChannel, uint32_t(Name.Length + sizeof(ImageId))) + << ModuleLoad.Name((const char16_t*)Name.Buffer, Name.Length / 2) << ModuleLoad.Base(uint64_t(Base)) + << ModuleLoad.Size(OptionalHeader.SizeOfImage) << ModuleLoad.ImageId(ImageId, uint32_t(sizeof(ImageId))); + +# if UE_MEMORY_TRACE_ENABLED + { + UE_MEMSCOPE(ELLMTag::ProgramSize); + MemoryTrace_Alloc(Base, OptionalHeader.SizeOfImage, 4 * 1024, EMemoryTraceRootHeap::SystemMemory); + MemoryTrace_MarkAllocAsHeap(Base, ProgramHeapId); + MemoryTrace_Alloc(Base, OptionalHeader.SizeOfImage, 4 * 1024, EMemoryTraceRootHeap::SystemMemory); + } +# endif // UE_MEMORY_TRACE_ENABLED + + for (int i = 0; i < SubscriberCount; ++i) + { + Subscribers[i](true, (void*)Base, (const char16_t*)Name.Buffer); + } +} + +//////////////////////////////////////////////////////////////////////////////// +void +FModuleTrace::OnDllUnloaded(UPTRINT Base) +{ +# if UE_MEMORY_TRACE_ENABLED + MemoryTrace_Free(Base, EMemoryTraceRootHeap::SystemMemory); + MemoryTrace_UnmarkAllocAsHeap(Base, ProgramHeapId); + MemoryTrace_Free(Base, EMemoryTraceRootHeap::SystemMemory); +# endif // UE_MEMORY_TRACE_ENABLED + + UE_TRACE_LOG(Diagnostics, ModuleUnload, ModuleChannel) << ModuleUnload.Base(uint64(Base)); + + for (int i = 0; i < SubscriberCount; ++i) + { + Subscribers[i](false, (void*)Base, nullptr); + } +} + +//////////////////////////////////////////////////////////////////////////////// +void +Modules_Create(FMalloc* Malloc) +{ + if (FModuleTrace::Get() != nullptr) + { + return; + } + + static FModuleTrace Instance(Malloc); +} + +//////////////////////////////////////////////////////////////////////////////// +void +Modules_Initialize() +{ + if (FModuleTrace* Instance = FModuleTrace::Get()) + { + Instance->Initialize(); + } +} + +//////////////////////////////////////////////////////////////////////////////// +void +Modules_Subscribe(void (*Function)(bool, void*, const char16_t*)) +{ + if (FModuleTrace* Instance = FModuleTrace::Get()) + { + Instance->Subscribe(Function); + } +} + +} // namespace zen + +#endif // PLATFORM_SUPPORTS_WIN32_MEMORY_TRACE diff --git a/src/zencore/memtrack/moduletrace.h b/src/zencore/memtrack/moduletrace.h new file mode 100644 index 000000000..5e7374faa --- /dev/null +++ b/src/zencore/memtrack/moduletrace.h @@ -0,0 +1,11 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +namespace zen { + +void Modules_Create(class FMalloc*); +void Modules_Subscribe(void (*)(bool, void*, const char16_t*)); +void Modules_Initialize(); + +} // namespace zen diff --git a/src/zencore/memtrack/moduletrace_events.cpp b/src/zencore/memtrack/moduletrace_events.cpp new file mode 100644 index 000000000..9c6a9b648 --- /dev/null +++ b/src/zencore/memtrack/moduletrace_events.cpp @@ -0,0 +1,16 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zencore/trace.h> + +#include "moduletrace_events.h" + +namespace zen { + +//////////////////////////////////////////////////////////////////////////////// +UE_TRACE_CHANNEL_DEFINE(ModuleChannel, "Module information needed for symbols resolution", true) + +UE_TRACE_EVENT_DEFINE(Diagnostics, ModuleInit) +UE_TRACE_EVENT_DEFINE(Diagnostics, ModuleLoad) +UE_TRACE_EVENT_DEFINE(Diagnostics, ModuleUnload) + +} // namespace zen diff --git a/src/zencore/memtrack/moduletrace_events.h b/src/zencore/memtrack/moduletrace_events.h new file mode 100644 index 000000000..1bda42fe8 --- /dev/null +++ b/src/zencore/memtrack/moduletrace_events.h @@ -0,0 +1,27 @@ +// Copyright Epic Games, Inc. All Rights Reserved. +#pragma once + +#include <zencore/trace.h> + +namespace zen { + +//////////////////////////////////////////////////////////////////////////////// +UE_TRACE_CHANNEL_EXTERN(ModuleChannel) + +UE_TRACE_EVENT_BEGIN_EXTERN(Diagnostics, ModuleInit, NoSync | Important) + UE_TRACE_EVENT_FIELD(UE::Trace::AnsiString, SymbolFormat) + UE_TRACE_EVENT_FIELD(uint8, ModuleBaseShift) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN_EXTERN(Diagnostics, ModuleLoad, NoSync | Important) + UE_TRACE_EVENT_FIELD(UE::Trace::WideString, Name) + UE_TRACE_EVENT_FIELD(uint64, Base) + UE_TRACE_EVENT_FIELD(uint32, Size) + UE_TRACE_EVENT_FIELD(uint8[], ImageId) // Platform specific id for this image, used to match debug files were available +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN_EXTERN(Diagnostics, ModuleUnload, NoSync | Important) + UE_TRACE_EVENT_FIELD(uint64, Base) +UE_TRACE_EVENT_END() + +} // namespace zen diff --git a/src/zencore/memtrack/platformtls.h b/src/zencore/memtrack/platformtls.h new file mode 100644 index 000000000..f134e68a8 --- /dev/null +++ b/src/zencore/memtrack/platformtls.h @@ -0,0 +1,107 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zenbase/zenbase.h> + +/** + * It should be possible to provide a generic implementation as long as a threadID is provided. We don't do that yet. + */ +struct FGenericPlatformTLS +{ + static const uint32_t InvalidTlsSlot = 0xFFFFFFFF; + + /** + * Return false if this is an invalid TLS slot + * @param SlotIndex the TLS index to check + * @return true if this looks like a valid slot + */ + static bool IsValidTlsSlot(uint32_t SlotIndex) { return SlotIndex != InvalidTlsSlot; } +}; + +#if ZEN_PLATFORM_WINDOWS + +# include <zencore/windows.h> + +class FWindowsPlatformTLS : public FGenericPlatformTLS +{ +public: + static uint32_t AllocTlsSlot() { return ::TlsAlloc(); } + + static void FreeTlsSlot(uint32_t SlotIndex) { ::TlsFree(SlotIndex); } + + static void SetTlsValue(uint32_t SlotIndex, void* Value) { ::TlsSetValue(SlotIndex, Value); } + + /** + * Reads the value stored at the specified TLS slot + * + * @return the value stored in the slot + */ + static void* GetTlsValue(uint32_t SlotIndex) { return ::TlsGetValue(SlotIndex); } + + /** + * Return false if this is an invalid TLS slot + * @param SlotIndex the TLS index to check + * @return true if this looks like a valid slot + */ + static bool IsValidTlsSlot(uint32_t SlotIndex) { return SlotIndex != InvalidTlsSlot; } +}; + +typedef FWindowsPlatformTLS FPlatformTLS; + +#elif ZEN_PLATFORM_MAC + +# include <pthread.h + +/** + * Apple implementation of the TLS OS functions + **/ +struct FApplePlatformTLS : public FGenericPlatformTLS +{ + /** + * Returns the currently executing thread's id + */ + static uint32_t GetCurrentThreadId(void) { return (uint32_t)pthread_mach_thread_np(pthread_self()); } + + /** + * Allocates a thread local store slot + */ + static uint32_t AllocTlsSlot(void) + { + // allocate a per-thread mem slot + pthread_key_t SlotKey = 0; + if (pthread_key_create(&SlotKey, NULL) != 0) + { + SlotKey = InvalidTlsSlot; // matches the Windows TlsAlloc() retval. + } + return SlotKey; + } + + /** + * Sets a value in the specified TLS slot + * + * @param SlotIndex the TLS index to store it in + * @param Value the value to store in the slot + */ + static void SetTlsValue(uint32_t SlotIndex, void* Value) { pthread_setspecific((pthread_key_t)SlotIndex, Value); } + + /** + * Reads the value stored at the specified TLS slot + * + * @return the value stored in the slot + */ + static void* GetTlsValue(uint32_t SlotIndex) { return pthread_getspecific((pthread_key_t)SlotIndex); } + + /** + * Frees a previously allocated TLS slot + * + * @param SlotIndex the TLS index to store it in + */ + static void FreeTlsSlot(uint32_t SlotIndex) { pthread_key_delete((pthread_key_t)SlotIndex); } +}; + +typedef FApplePlatformTLS FPlatformTLS; + +#else +# error Platform not yet supported +#endif diff --git a/src/zencore/memtrack/tagtrace.cpp b/src/zencore/memtrack/tagtrace.cpp new file mode 100644 index 000000000..15ba78ae4 --- /dev/null +++ b/src/zencore/memtrack/tagtrace.cpp @@ -0,0 +1,237 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zencore/memory/fmalloc.h> +#include <zencore/memory/llm.h> +#include <zencore/memory/tagtrace.h> + +#include "growonlylockfreehash.h" + +#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED + +# include <zencore/string.h> + +namespace zen { +//////////////////////////////////////////////////////////////////////////////// + +UE_TRACE_CHANNEL_EXTERN(MemAllocChannel); + +UE_TRACE_EVENT_BEGIN(Memory, TagSpec, Important | NoSync) + UE_TRACE_EVENT_FIELD(int32, Tag) + UE_TRACE_EVENT_FIELD(int32, Parent) + UE_TRACE_EVENT_FIELD(UE::Trace::AnsiString, Display) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, MemoryScope, NoSync) + UE_TRACE_EVENT_FIELD(int32, Tag) +UE_TRACE_EVENT_END() + +UE_TRACE_EVENT_BEGIN(Memory, MemoryScopePtr, NoSync) + UE_TRACE_EVENT_FIELD(uint64, Ptr) +UE_TRACE_EVENT_END() + +//////////////////////////////////////////////////////////////////////////////// +// Per thread active tag, i.e. the top level FMemScope +thread_local int32 GActiveTag; + +//////////////////////////////////////////////////////////////////////////////// +FMemScope::FMemScope() +{ +} + +FMemScope::FMemScope(int32_t InTag, bool bShouldActivate /*= true*/) +{ + if (UE_TRACE_CHANNELEXPR_IS_ENABLED(MemAllocChannel) & bShouldActivate) + { + ActivateScope(InTag); + } +} + +//////////////////////////////////////////////////////////////////////////////// +FMemScope::FMemScope(ELLMTag InTag, bool bShouldActivate /*= true*/) +{ + if (UE_TRACE_CHANNELEXPR_IS_ENABLED(MemAllocChannel) & bShouldActivate) + { + ActivateScope(static_cast<int32>(InTag)); + } +} + +//////////////////////////////////////////////////////////////////////////////// +void +FMemScope::ActivateScope(int32_t InTag) +{ + if (auto LogScope = FMemoryMemoryScopeFields::LogScopeType::ScopedEnter<FMemoryMemoryScopeFields>()) + { + if (const auto& __restrict MemoryScope = *(FMemoryMemoryScopeFields*)(&LogScope)) + { + Inner.SetActive(); + LogScope += LogScope << MemoryScope.Tag(InTag); + PrevTag = GActiveTag; + GActiveTag = InTag; + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +FMemScope::~FMemScope() +{ + if (Inner.bActive) + { + GActiveTag = PrevTag; + } +} + +//////////////////////////////////////////////////////////////////////////////// +FMemScopePtr::FMemScopePtr(uint64_t InPtr) +{ + if (InPtr != 0 && TRACE_PRIVATE_CHANNELEXPR_IS_ENABLED(MemAllocChannel)) + { + if (auto LogScope = FMemoryMemoryScopePtrFields::LogScopeType::ScopedEnter<FMemoryMemoryScopePtrFields>()) + { + if (const auto& __restrict MemoryScope = *(FMemoryMemoryScopePtrFields*)(&LogScope)) + { + Inner.SetActive(), LogScope += LogScope << MemoryScope.Ptr(InPtr); + } + } + } +} + +///////////////////////////////////////////////////////////////////////////////// +FMemScopePtr::~FMemScopePtr() +{ +} + +///////////////////////////////////////////////////////////////////////////////// + +/** + * Utility class that manages tracing the specification of unique LLM tags + * and custom name based tags. + */ +class FTagTrace +{ +public: + FTagTrace(FMalloc* InMalloc); + void AnnounceGenericTags() const; + void AnnounceSpecialTags() const; + int32 AnnounceCustomTag(int32 Tag, int32 ParentTag, const ANSICHAR* Display) const; + +private: + struct FTagNameSetEntry + { + std::atomic_int32_t Data; + + int32_t GetKey() const { return Data.load(std::memory_order_relaxed); } + bool GetValue() const { return true; } + bool IsEmpty() const { return Data.load(std::memory_order_relaxed) == 0; } // NAME_None is treated as empty + void SetKeyValue(int32_t Key, bool Value) + { + ZEN_UNUSED(Value); + Data.store(Key, std::memory_order_relaxed); + } + static uint32_t KeyHash(int32_t Key) { return static_cast<uint32>(Key); } + static void ClearEntries(FTagNameSetEntry* Entries, int32_t EntryCount) + { + memset(Entries, 0, EntryCount * sizeof(FTagNameSetEntry)); + } + }; + typedef TGrowOnlyLockFreeHash<FTagNameSetEntry, int32_t, bool> FTagNameSet; + + FTagNameSet AnnouncedNames; + static FMalloc* Malloc; +}; + +FMalloc* FTagTrace::Malloc = nullptr; +static FTagTrace* GTagTrace = nullptr; + +//////////////////////////////////////////////////////////////////////////////// +FTagTrace::FTagTrace(FMalloc* InMalloc) : AnnouncedNames(InMalloc) +{ + Malloc = InMalloc; + AnnouncedNames.Reserve(1024); + AnnounceGenericTags(); + AnnounceSpecialTags(); +} + +//////////////////////////////////////////////////////////////////////////////// +void +FTagTrace::AnnounceGenericTags() const +{ +# define TRACE_TAG_SPEC(Enum, Str, ParentTag) \ + { \ + const uint32_t DisplayLen = (uint32_t)StringLength(Str); \ + UE_TRACE_LOG(Memory, TagSpec, MemAllocChannel, DisplayLen * sizeof(ANSICHAR)) \ + << TagSpec.Tag((int32_t)ELLMTag::Enum) << TagSpec.Parent((int32_t)ParentTag) << TagSpec.Display(Str, DisplayLen); \ + } + LLM_ENUM_GENERIC_TAGS(TRACE_TAG_SPEC); +# undef TRACE_TAG_SPEC +} + +//////////////////////////////////////////////////////////////////////////////// + +void +FTagTrace::AnnounceSpecialTags() const +{ + auto EmitTag = [](const char16_t* DisplayString, int32_t Tag, int32_t ParentTag) { + const uint32_t DisplayLen = (uint32_t)StringLength(DisplayString); + UE_TRACE_LOG(Memory, TagSpec, MemAllocChannel, DisplayLen * sizeof(ANSICHAR)) + << TagSpec.Tag(Tag) << TagSpec.Parent(ParentTag) << TagSpec.Display(DisplayString, DisplayLen); + }; + + EmitTag(u"Trace", TRACE_TAG, -1); +} + +//////////////////////////////////////////////////////////////////////////////// +int32_t +FTagTrace::AnnounceCustomTag(int32_t Tag, int32_t ParentTag, const ANSICHAR* Display) const +{ + const uint32_t DisplayLen = (uint32_t)StringLength(Display); + UE_TRACE_LOG(Memory, TagSpec, MemAllocChannel, DisplayLen * sizeof(ANSICHAR)) + << TagSpec.Tag(Tag) << TagSpec.Parent(ParentTag) << TagSpec.Display(Display, DisplayLen); + return Tag; +} + +} // namespace zen + +#endif // UE_MEMORY_TAGS_TRACE_ENABLED + +namespace zen { + +//////////////////////////////////////////////////////////////////////////////// +void +MemoryTrace_InitTags(FMalloc* InMalloc) +{ +#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED + GTagTrace = (FTagTrace*)InMalloc->Malloc(sizeof(FTagTrace), alignof(FTagTrace)); + new (GTagTrace) FTagTrace(InMalloc); +#else + ZEN_UNUSED(InMalloc); +#endif +} + +//////////////////////////////////////////////////////////////////////////////// +int32_t +MemoryTrace_AnnounceCustomTag(int32_t Tag, int32_t ParentTag, const char* Display) +{ +#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED + // todo: How do we check if tag trace is active? + if (GTagTrace) + { + return GTagTrace->AnnounceCustomTag(Tag, ParentTag, Display); + } +#else + ZEN_UNUSED(Tag, ParentTag, Display); +#endif + return -1; +} + +//////////////////////////////////////////////////////////////////////////////// +int32_t +MemoryTrace_GetActiveTag() +{ +#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED + return GActiveTag; +#else + return -1; +#endif +} + +} // namespace zen diff --git a/src/zencore/memtrack/tracemalloc.h b/src/zencore/memtrack/tracemalloc.h new file mode 100644 index 000000000..54606ac45 --- /dev/null +++ b/src/zencore/memtrack/tracemalloc.h @@ -0,0 +1,24 @@ +// Copyright Epic Games, Inc. All Rights Reserved. +#pragma once + +#include <zencore/memory/fmalloc.h> +#include <zencore/memory/memorytrace.h> + +namespace zen { + +class FTraceMalloc : public FMalloc +{ +public: + FTraceMalloc(FMalloc* InMalloc); + virtual ~FTraceMalloc(); + + virtual void* Malloc(SIZE_T Count, uint32 Alignment) override; + virtual void* Realloc(void* Original, SIZE_T Count, uint32 Alignment) override; + virtual void Free(void* Original) override; + + virtual void OnMallocInitialized() override { WrappedMalloc->OnMallocInitialized(); } + + FMalloc* WrappedMalloc; +}; + +} // namespace zen diff --git a/src/zencore/memtrack/vatrace.cpp b/src/zencore/memtrack/vatrace.cpp new file mode 100644 index 000000000..4dea27f1b --- /dev/null +++ b/src/zencore/memtrack/vatrace.cpp @@ -0,0 +1,361 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "vatrace.h" + +#if PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS + +# include <zencore/memory/memorytrace.h> + +# if (NTDDI_VERSION >= NTDDI_WIN10_RS4) +# pragma comment(lib, "mincore.lib") // VirtualAlloc2 +# endif + +namespace zen { + +//////////////////////////////////////////////////////////////////////////////// +class FTextSectionEditor +{ +public: + ~FTextSectionEditor(); + template<typename T> + T* Hook(T* Target, T* HookFunction); + +private: + struct FTrampolineBlock + { + FTrampolineBlock* Next; + uint32_t Size; + uint32_t Used; + }; + + static void* GetActualAddress(void* Function); + FTrampolineBlock* AllocateTrampolineBlock(void* Reference); + uint8_t* AllocateTrampoline(void* Reference, unsigned int Size); + void* HookImpl(void* Target, void* HookFunction); + FTrampolineBlock* HeadBlock = nullptr; +}; + +//////////////////////////////////////////////////////////////////////////////// +FTextSectionEditor::~FTextSectionEditor() +{ + for (FTrampolineBlock* Block = HeadBlock; Block != nullptr; Block = Block->Next) + { + DWORD Unused; + VirtualProtect(Block, Block->Size, PAGE_EXECUTE_READ, &Unused); + } + + FlushInstructionCache(GetCurrentProcess(), nullptr, 0); +} + +//////////////////////////////////////////////////////////////////////////////// +void* +FTextSectionEditor::GetActualAddress(void* Function) +{ + // Follow a jmp instruction (0xff/4 only for now) at function and returns + // where it would jmp to. + + uint8_t* Addr = (uint8_t*)Function; + int Offset = unsigned(Addr[0] & 0xf0) == 0x40; // REX prefix + if (Addr[Offset + 0] == 0xff && Addr[Offset + 1] == 0x25) + { + Addr += Offset; + Addr = *(uint8_t**)(Addr + 6 + *(uint32_t*)(Addr + 2)); + } + return Addr; +} + +//////////////////////////////////////////////////////////////////////////////// +FTextSectionEditor::FTrampolineBlock* +FTextSectionEditor::AllocateTrampolineBlock(void* Reference) +{ + static const size_t BlockSize = 0x10000; // 64KB is Windows' canonical granularity + + // Find the start of the main allocation that mapped Reference + MEMORY_BASIC_INFORMATION MemInfo; + VirtualQuery(Reference, &MemInfo, sizeof(MemInfo)); + auto* Ptr = (uint8_t*)(MemInfo.AllocationBase); + + // Step backwards one block at a time and try and allocate that address + while (true) + { + Ptr -= BlockSize; + if (VirtualAlloc(Ptr, BlockSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE) != nullptr) + { + break; + } + + uintptr_t Distance = uintptr_t(Reference) - uintptr_t(Ptr); + if (Distance >= 1ull << 31) + { + ZEN_ASSERT(!"Failed to allocate trampoline blocks for memory tracing hooks"); + } + } + + auto* Block = (FTrampolineBlock*)Ptr; + Block->Next = HeadBlock; + Block->Size = BlockSize; + Block->Used = sizeof(FTrampolineBlock); + HeadBlock = Block; + + return Block; +} + +//////////////////////////////////////////////////////////////////////////////// +uint8_t* +FTextSectionEditor::AllocateTrampoline(void* Reference, unsigned int Size) +{ + // Try and find a block that's within 2^31 bytes before Reference + FTrampolineBlock* Block; + for (Block = HeadBlock; Block != nullptr; Block = Block->Next) + { + uintptr_t Distance = uintptr_t(Reference) - uintptr_t(Block); + if (Distance < 1ull << 31) + { + break; + } + } + + // If we didn't find a block then we need to allocate a new one. + if (Block == nullptr) + { + Block = AllocateTrampolineBlock(Reference); + } + + // Allocate space for the trampoline. + uint32_t NextUsed = Block->Used + Size; + if (NextUsed > Block->Size) + { + // Block is full. We could allocate a new block here but as it is not + // expected that so many hooks will be made this path shouldn't happen + ZEN_ASSERT(!"Unable to allocate memory for memory tracing's hooks"); + } + + uint8_t* Out = (uint8_t*)Block + Block->Used; + Block->Used = NextUsed; + + return Out; +} + +//////////////////////////////////////////////////////////////////////////////// +template<typename T> +T* +FTextSectionEditor::Hook(T* Target, T* HookFunction) +{ + return (T*)HookImpl((void*)Target, (void*)HookFunction); +} + +//////////////////////////////////////////////////////////////////////////////// +void* +FTextSectionEditor::HookImpl(void* Target, void* HookFunction) +{ + Target = GetActualAddress(Target); + + // Very rudimentary x86_64 instruction length decoding that only supports op + // code ranges (0x80,0x8b) and (0x50,0x5f). Enough for simple prologues + uint8_t* __restrict Start = (uint8_t*)Target; + const uint8_t* Read = Start; + do + { + Read += (Read[0] & 0xf0) == 0x40; // REX prefix + uint8_t Inst = *Read++; + if (unsigned(Inst - 0x80) < 0x0cu) + { + uint8_t ModRm = *Read++; + Read += ((ModRm & 0300) < 0300) & ((ModRm & 0007) == 0004); // SIB + switch (ModRm & 0300) // Disp[8|32] + { + case 0100: + Read += 1; + break; + case 0200: + Read += 5; + break; + } + Read += (Inst == 0x83); + } + else if (unsigned(Inst - 0x50) >= 0x10u) + { + ZEN_ASSERT(!"Unknown instruction"); + } + } while (Read - Start < 6); + + static const int TrampolineSize = 24; + int PatchSize = int(Read - Start); + uint8_t* TrampolinePtr = AllocateTrampoline(Start, PatchSize + TrampolineSize); + + // Write the trampoline + *(void**)TrampolinePtr = HookFunction; + + uint8_t* PatchJmp = TrampolinePtr + sizeof(void*); + memcpy(PatchJmp, Start, PatchSize); + + PatchJmp += PatchSize; + *PatchJmp = 0xe9; + *(int32_t*)(PatchJmp + 1) = int32_t(intptr_t(Start + PatchSize) - intptr_t(PatchJmp)) - 5; + + // Need to make the text section writeable + DWORD ProtPrev; + uintptr_t ProtBase = uintptr_t(Target) & ~0x0fff; // 0x0fff is mask of VM page size + size_t ProtSize = ((ProtBase + 16 + 0x1000) & ~0x0fff) - ProtBase; // 16 is enough for one x86 instruction + VirtualProtect((void*)ProtBase, ProtSize, PAGE_EXECUTE_READWRITE, &ProtPrev); + + // Patch function to jmp to the hook + uint16_t* HookJmp = (uint16_t*)Target; + HookJmp[0] = 0x25ff; + *(int32_t*)(HookJmp + 1) = int32_t(intptr_t(TrampolinePtr) - intptr_t(HookJmp + 3)); + + // Put the protection back the way it was + VirtualProtect((void*)ProtBase, ProtSize, ProtPrev, &ProtPrev); + + return PatchJmp - PatchSize; +} + +////////////////////////////////////////////////////////////////////////// + +bool FVirtualWinApiHooks::bLight; +LPVOID(WINAPI* FVirtualWinApiHooks::VmAllocOrig)(LPVOID, SIZE_T, DWORD, DWORD); +LPVOID(WINAPI* FVirtualWinApiHooks::VmAllocExOrig)(HANDLE, LPVOID, SIZE_T, DWORD, DWORD); +# if (NTDDI_VERSION >= NTDDI_WIN10_RS4) +PVOID(WINAPI* FVirtualWinApiHooks::VmAlloc2Orig)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MEM_EXTENDED_PARAMETER*, ULONG); +# else +LPVOID(WINAPI* FVirtualWinApiHooks::VmAlloc2Orig)(HANDLE, LPVOID, SIZE_T, ULONG, ULONG, /*MEM_EXTENDED_PARAMETER* */ void*, ULONG); +# endif +BOOL(WINAPI* FVirtualWinApiHooks::VmFreeOrig)(LPVOID, SIZE_T, DWORD); +BOOL(WINAPI* FVirtualWinApiHooks::VmFreeExOrig)(HANDLE, LPVOID, SIZE_T, DWORD); + +void +FVirtualWinApiHooks::Initialize(bool bInLight) +{ + bLight = bInLight; + + FTextSectionEditor Editor; + + // Note that hooking alloc functions is done last as applying the hook can + // allocate some memory pages. + + VmFreeOrig = Editor.Hook(VirtualFree, &FVirtualWinApiHooks::VmFree); + VmFreeExOrig = Editor.Hook(VirtualFreeEx, &FVirtualWinApiHooks::VmFreeEx); + +# if ZEN_PLATFORM_WINDOWS +# if (NTDDI_VERSION >= NTDDI_WIN10_RS4) + { + VmAlloc2Orig = Editor.Hook(VirtualAlloc2, &FVirtualWinApiHooks::VmAlloc2); + } +# else // NTDDI_VERSION + { + VmAlloc2Orig = nullptr; + HINSTANCE DllInstance; + DllInstance = LoadLibrary(TEXT("kernelbase.dll")); + if (DllInstance != NULL) + { +# pragma warning(push) +# pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'FVirtualWinApiHooks::FnVirtualAlloc2' + VmAlloc2Orig = (FnVirtualAlloc2)GetProcAddress(DllInstance, "VirtualAlloc2"); +# pragma warning(pop) + FreeLibrary(DllInstance); + } + if (VmAlloc2Orig) + { + VmAlloc2Orig = Editor.Hook(VmAlloc2Orig, &FVirtualWinApiHooks::VmAlloc2); + } + } +# endif // NTDDI_VERSION +# endif // PLATFORM_WINDOWS + + VmAllocExOrig = Editor.Hook(VirtualAllocEx, &FVirtualWinApiHooks::VmAllocEx); + VmAllocOrig = Editor.Hook(VirtualAlloc, &FVirtualWinApiHooks::VmAlloc); +} + +//////////////////////////////////////////////////////////////////////////////// +LPVOID WINAPI +FVirtualWinApiHooks::VmAlloc(LPVOID Address, SIZE_T Size, DWORD Type, DWORD Protect) +{ + LPVOID Ret = VmAllocOrig(Address, Size, Type, Protect); + + // Track any reserve for now. Going forward we need events to differentiate reserves/commits and + // corresponding information on frees. + if (Ret != nullptr && ((Type & MEM_RESERVE) || ((Type & MEM_COMMIT) && Address == nullptr))) + { + MemoryTrace_Alloc((uint64_t)Ret, Size, 0, EMemoryTraceRootHeap::SystemMemory); + MemoryTrace_MarkAllocAsHeap((uint64_t)Ret, EMemoryTraceRootHeap::SystemMemory); + } + + return Ret; +} + +//////////////////////////////////////////////////////////////////////////////// +BOOL WINAPI +FVirtualWinApiHooks::VmFree(LPVOID Address, SIZE_T Size, DWORD Type) +{ + if (Type & MEM_RELEASE) + { + MemoryTrace_UnmarkAllocAsHeap((uint64_t)Address, EMemoryTraceRootHeap::SystemMemory); + MemoryTrace_Free((uint64_t)Address, EMemoryTraceRootHeap::SystemMemory); + } + + return VmFreeOrig(Address, Size, Type); +} + +//////////////////////////////////////////////////////////////////////////////// +LPVOID WINAPI +FVirtualWinApiHooks::VmAllocEx(HANDLE Process, LPVOID Address, SIZE_T Size, DWORD Type, DWORD Protect) +{ + LPVOID Ret = VmAllocExOrig(Process, Address, Size, Type, Protect); + + if (Process == GetCurrentProcess() && Ret != nullptr && ((Type & MEM_RESERVE) || ((Type & MEM_COMMIT) && Address == nullptr))) + { + MemoryTrace_Alloc((uint64_t)Ret, Size, 0, EMemoryTraceRootHeap::SystemMemory); + MemoryTrace_MarkAllocAsHeap((uint64_t)Ret, EMemoryTraceRootHeap::SystemMemory); + } + + return Ret; +} + +//////////////////////////////////////////////////////////////////////////////// +BOOL WINAPI +FVirtualWinApiHooks::VmFreeEx(HANDLE Process, LPVOID Address, SIZE_T Size, DWORD Type) +{ + if (Process == GetCurrentProcess() && (Type & MEM_RELEASE)) + { + MemoryTrace_UnmarkAllocAsHeap((uint64_t)Address, EMemoryTraceRootHeap::SystemMemory); + MemoryTrace_Free((uint64_t)Address, EMemoryTraceRootHeap::SystemMemory); + } + + return VmFreeExOrig(Process, Address, Size, Type); +} + +//////////////////////////////////////////////////////////////////////////////// +# if (NTDDI_VERSION >= NTDDI_WIN10_RS4) +PVOID WINAPI +FVirtualWinApiHooks::VmAlloc2(HANDLE Process, + PVOID BaseAddress, + SIZE_T Size, + ULONG Type, + ULONG PageProtection, + MEM_EXTENDED_PARAMETER* ExtendedParameters, + ULONG ParameterCount) +# else +LPVOID WINAPI +FVirtualWinApiHooks::VmAlloc2(HANDLE Process, + LPVOID BaseAddress, + SIZE_T Size, + ULONG Type, + ULONG PageProtection, + /*MEM_EXTENDED_PARAMETER* */ void* ExtendedParameters, + ULONG ParameterCount) +# endif +{ + LPVOID Ret = VmAlloc2Orig(Process, BaseAddress, Size, Type, PageProtection, ExtendedParameters, ParameterCount); + + if (Process == GetCurrentProcess() && Ret != nullptr && ((Type & MEM_RESERVE) || ((Type & MEM_COMMIT) && BaseAddress == nullptr))) + { + MemoryTrace_Alloc((uint64_t)Ret, Size, 0, EMemoryTraceRootHeap::SystemMemory); + MemoryTrace_MarkAllocAsHeap((uint64_t)Ret, EMemoryTraceRootHeap::SystemMemory); + } + + return Ret; +} + +} // namespace zen + +#endif // PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS diff --git a/src/zencore/memtrack/vatrace.h b/src/zencore/memtrack/vatrace.h new file mode 100644 index 000000000..59cc7fe97 --- /dev/null +++ b/src/zencore/memtrack/vatrace.h @@ -0,0 +1,61 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zenbase/zenbase.h> + +#if ZEN_PLATFORM_WINDOWS && !defined(PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS) +# define PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS 1 +#endif + +#ifndef PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS +# define PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS 0 +#endif + +#if PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS +# include <zencore/windows.h> + +namespace zen { + +class FVirtualWinApiHooks +{ +public: + static void Initialize(bool bInLight); + +private: + FVirtualWinApiHooks(); + static bool bLight; + static LPVOID WINAPI VmAlloc(LPVOID Address, SIZE_T Size, DWORD Type, DWORD Protect); + static LPVOID WINAPI VmAllocEx(HANDLE Process, LPVOID Address, SIZE_T Size, DWORD Type, DWORD Protect); +# if (NTDDI_VERSION >= NTDDI_WIN10_RS4) + static PVOID WINAPI VmAlloc2(HANDLE Process, + PVOID BaseAddress, + SIZE_T Size, + ULONG AllocationType, + ULONG PageProtection, + MEM_EXTENDED_PARAMETER* ExtendedParameters, + ULONG ParameterCount); + static PVOID(WINAPI* VmAlloc2Orig)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MEM_EXTENDED_PARAMETER*, ULONG); + typedef PVOID(__stdcall* FnVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MEM_EXTENDED_PARAMETER*, ULONG); +# else + static LPVOID WINAPI VmAlloc2(HANDLE Process, + LPVOID BaseAddress, + SIZE_T Size, + ULONG AllocationType, + ULONG PageProtection, + void* ExtendedParameters, + ULONG ParameterCount); + static LPVOID(WINAPI* VmAlloc2Orig)(HANDLE, LPVOID, SIZE_T, ULONG, ULONG, /*MEM_EXTENDED_PARAMETER* */ void*, ULONG); + typedef LPVOID(__stdcall* FnVirtualAlloc2)(HANDLE, LPVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG); +# endif + static BOOL WINAPI VmFree(LPVOID Address, SIZE_T Size, DWORD Type); + static BOOL WINAPI VmFreeEx(HANDLE Process, LPVOID Address, SIZE_T Size, DWORD Type); + static LPVOID(WINAPI* VmAllocOrig)(LPVOID, SIZE_T, DWORD, DWORD); + static LPVOID(WINAPI* VmAllocExOrig)(HANDLE, LPVOID, SIZE_T, DWORD, DWORD); + static BOOL(WINAPI* VmFreeOrig)(LPVOID, SIZE_T, DWORD); + static BOOL(WINAPI* VmFreeExOrig)(HANDLE, LPVOID, SIZE_T, DWORD); +}; + +} // namespace zen + +#endif diff --git a/src/zencore/sharedbuffer.cpp b/src/zencore/sharedbuffer.cpp index 993ca40e6..78efb9d42 100644 --- a/src/zencore/sharedbuffer.cpp +++ b/src/zencore/sharedbuffer.cpp @@ -2,6 +2,7 @@ #include <zencore/except.h> #include <zencore/fmtutils.h> +#include <zencore/memory/memory.h> #include <zencore/sharedbuffer.h> #include <zencore/testing.h> diff --git a/src/zencore/stats.cpp b/src/zencore/stats.cpp index 7c1a9e086..6be16688b 100644 --- a/src/zencore/stats.cpp +++ b/src/zencore/stats.cpp @@ -3,9 +3,11 @@ #include "zencore/stats.h" #include <zencore/compactbinarybuilder.h> -#include "zencore/intmath.h" -#include "zencore/thread.h" -#include "zencore/timer.h" +#include <zencore/intmath.h> +#include <zencore/memory/llm.h> +#include <zencore/memory/tagtrace.h> +#include <zencore/thread.h> +#include <zencore/timer.h> #include <cmath> #include <gsl/gsl-lite.hpp> @@ -222,8 +224,10 @@ thread_local xoshiro256 ThreadLocalRng; ////////////////////////////////////////////////////////////////////////// -UniformSample::UniformSample(uint32_t ReservoirSize) : m_Values(ReservoirSize) +UniformSample::UniformSample(uint32_t ReservoirSize) { + UE_MEMSCOPE(ELLMTag::Metrics); + m_Values = std::vector<std::atomic<int64_t>>(ReservoirSize); } UniformSample::~UniformSample() @@ -273,6 +277,8 @@ UniformSample::Update(int64_t Value) SampleSnapshot UniformSample::Snapshot() const { + UE_MEMSCOPE(ELLMTag::Metrics); + uint64_t ValuesSize = Size(); std::vector<double> Values(ValuesSize); diff --git a/src/zencore/string.cpp b/src/zencore/string.cpp index ad6ee78fc..263c49f7e 100644 --- a/src/zencore/string.cpp +++ b/src/zencore/string.cpp @@ -1,6 +1,7 @@ // Copyright Epic Games, Inc. All Rights Reserved. #include <zencore/memory.h> +#include <zencore/memory/memory.h> #include <zencore/string.h> #include <zencore/testing.h> diff --git a/src/zencore/system.cpp b/src/zencore/system.cpp index f51273e0d..f37bdf423 100644 --- a/src/zencore/system.cpp +++ b/src/zencore/system.cpp @@ -4,6 +4,7 @@ #include <zencore/compactbinarybuilder.h> #include <zencore/except.h> +#include <zencore/memory/memory.h> #include <zencore/string.h> #if ZEN_PLATFORM_WINDOWS diff --git a/src/zencore/trace.cpp b/src/zencore/trace.cpp index f7e4c4b68..ef7cbf596 100644 --- a/src/zencore/trace.cpp +++ b/src/zencore/trace.cpp @@ -4,10 +4,86 @@ # include <zencore/config.h> # include <zencore/zencore.h> +# include <zencore/commandline.h> +# include <zencore/string.h> +# include <zencore/logging.h> # define TRACE_IMPLEMENT 1 # include <zencore/trace.h> +# include <zencore/memory/memorytrace.h> + +namespace zen { + +void +TraceConfigure() +{ + // Configure channels based on command line options + + using namespace std::literals; + + constexpr std::string_view TraceOption = "--trace="sv; + + std::function<void(const std::string_view&)> ProcessChannelList; + + auto ProcessTraceArg = [&](const std::string_view& Arg) { + if (Arg == "default"sv) + { + ProcessChannelList("cpu,log"sv); + } + else if (Arg == "memory"sv) + { + ProcessChannelList("memtag,memalloc,callstack,module"sv); + } + else if (Arg == "memory_light"sv) + { + ProcessChannelList("memtag,memalloc"sv); + } + else if (Arg == "memtag"sv) + { + // memtag actually traces to the memalloc channel + ProcessChannelList("memalloc"sv); + } + else + { + // Presume that the argument is a trace channel name + + StringBuilder<128> AnsiChannel; + AnsiChannel << Arg; + + const bool IsEnabled = trace::ToggleChannel(AnsiChannel.c_str(), true); + + if (IsEnabled == false) + { + // Logging here could be iffy, but we might want some other feedback mechanism here + // to indicate to users that they're not getting what they might be expecting + } + } + }; + + ProcessChannelList = [&](const std::string_view& OptionArgs) { IterateCommaSeparatedValue(OptionArgs, ProcessTraceArg); }; + + bool TraceOptionPresent = false; + + std::function<void(const std::string_view&)> ProcessArg = [&](const std::string_view& Arg) { + if (Arg.starts_with(TraceOption)) + { + const std::string_view OptionArgs = Arg.substr(TraceOption.size()); + + TraceOptionPresent = true; + + ProcessChannelList(OptionArgs); + } + }; + + IterateCommandlineArgs(ProcessArg); + + if (!TraceOptionPresent) + { + ProcessTraceArg("default"sv); + } +} + void TraceInit(std::string_view ProgramName) { @@ -38,6 +114,16 @@ TraceInit(std::string_view ProgramName) # endif CommandLineString, ZEN_CFG_VERSION_BUILD_STRING); + + atexit([] { +# if ZEN_WITH_MEMTRACK + zen::MemoryTrace_Shutdown(); +# endif + trace::Update(); + TraceShutdown(); + }); + + TraceConfigure(); } void @@ -70,13 +156,11 @@ TraceStart(std::string_view ProgramName, const char* HostOrPath, TraceType Type) case TraceType::None: break; } - trace::ToggleChannel("cpu", true); } bool TraceStop() { - trace::ToggleChannel("cpu", false); if (trace::Stop()) { return true; @@ -84,4 +168,6 @@ TraceStop() return false; } +} // namespace zen + #endif // ZEN_WITH_TRACE diff --git a/src/zencore/xmake.lua b/src/zencore/xmake.lua index 5f2d95e16..21b47b484 100644 --- a/src/zencore/xmake.lua +++ b/src/zencore/xmake.lua @@ -3,6 +3,7 @@ target('zencore') set_kind("static") set_group("libs") + add_options("zentrace", "zenmimalloc", "zenrpmalloc") add_headerfiles("**.h") add_configfiles("include/zencore/config.h.in") on_load(function (target) @@ -12,10 +13,25 @@ target('zencore') end) set_configdir("include/zencore") add_files("**.cpp") + + if has_config("zenrpmalloc") then + set_languages("c17", "cxx20") + if is_os("windows") then + add_cflags("/experimental:c11atomics") + end + add_defines("RPMALLOC_FIRST_CLASS_HEAPS=1", "ENABLE_STATISTICS=1", "ENABLE_OVERRIDE=0") + add_files("$(projectdir)/thirdparty/rpmalloc/rpmalloc.c") + end + + if has_config("zenmimalloc") then + add_packages("vcpkg::mimalloc") + end + add_includedirs("include", {public=true}) add_includedirs("$(projectdir)/thirdparty/utfcpp/source") add_includedirs("$(projectdir)/thirdparty/Oodle/include") add_includedirs("$(projectdir)/thirdparty/trace", {public=true}) + add_includedirs("$(projectdir)/thirdparty/rpmalloc") if is_os("windows") then add_linkdirs("$(projectdir)/thirdparty/Oodle/lib/Win64") add_links("oo2core_win64") @@ -27,14 +43,12 @@ target('zencore') add_linkdirs("$(projectdir)/thirdparty/Oodle/lib/Mac_x64") add_links("oo2coremac64") end - add_options("zentrace") add_deps("zenbase") add_packages( "vcpkg::blake3", "vcpkg::json11", "vcpkg::ryml", "vcpkg::c4core", - "vcpkg::mimalloc", "vcpkg::openssl", -- required for crypto "vcpkg::spdlog") diff --git a/src/zenhttp-test/zenhttp-test.cpp b/src/zenhttp-test/zenhttp-test.cpp index 440e85a9f..49db1ba54 100644 --- a/src/zenhttp-test/zenhttp-test.cpp +++ b/src/zenhttp-test/zenhttp-test.cpp @@ -2,14 +2,9 @@ #include <zencore/filesystem.h> #include <zencore/logging.h> +#include <zencore/memory/newdelete.h> #include <zenhttp/zenhttp.h> -#if ZEN_USE_MIMALLOC -ZEN_THIRD_PARTY_INCLUDES_START -# include <mimalloc-new-delete.h> -ZEN_THIRD_PARTY_INCLUDES_END -#endif - #if ZEN_WITH_TESTS # define ZEN_TEST_WITH_RUNNER 1 # include <zencore/testing.h> @@ -18,9 +13,6 @@ ZEN_THIRD_PARTY_INCLUDES_END int main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) { -#if ZEN_USE_MIMALLOC - mi_version(); -#endif #if ZEN_WITH_TESTS zen::zenhttp_forcelinktests(); diff --git a/src/zennet-test/zennet-test.cpp b/src/zennet-test/zennet-test.cpp index f7f54e6ad..482d3c617 100644 --- a/src/zennet-test/zennet-test.cpp +++ b/src/zennet-test/zennet-test.cpp @@ -4,11 +4,7 @@ #include <zencore/logging.h> #include <zennet/zennet.h> -#if ZEN_USE_MIMALLOC -ZEN_THIRD_PARTY_INCLUDES_START -# include <mimalloc-new-delete.h> -ZEN_THIRD_PARTY_INCLUDES_END -#endif +#include <zencore/memory/newdelete.h> #if ZEN_WITH_TESTS # define ZEN_TEST_WITH_RUNNER 1 @@ -18,9 +14,6 @@ ZEN_THIRD_PARTY_INCLUDES_END int main([[maybe_unused]] int argc, [[maybe_unused]] char** argv) { -#if ZEN_USE_MIMALLOC - mi_version(); -#endif #if ZEN_WITH_TESTS zen::zennet_forcelinktests(); diff --git a/src/zenserver-test/zenserver-test.cpp b/src/zenserver-test/zenserver-test.cpp index ca2257361..e3f701be1 100644 --- a/src/zenserver-test/zenserver-test.cpp +++ b/src/zenserver-test/zenserver-test.cpp @@ -54,11 +54,7 @@ ZEN_THIRD_PARTY_INCLUDES_END # include <process.h> #endif -#if ZEN_USE_MIMALLOC -ZEN_THIRD_PARTY_INCLUDES_START -# include <mimalloc-new-delete.h> -ZEN_THIRD_PARTY_INCLUDES_END -#endif +#include <zencore/memory/newdelete.h> ////////////////////////////////////////////////////////////////////////// @@ -101,9 +97,6 @@ zen::ZenServerEnvironment TestEnv; int main(int argc, char** argv) { -# if ZEN_USE_MIMALLOC - mi_version(); -# endif using namespace std::literals; using namespace zen; diff --git a/src/zenserver/config.cpp b/src/zenserver/config.cpp index bedab7049..0108e8b9f 100644 --- a/src/zenserver/config.cpp +++ b/src/zenserver/config.cpp @@ -593,6 +593,9 @@ ParseCliOptions(int argc, char* argv[], ZenServerOptions& ServerOptions) options.add_options()("detach", "Indicate whether zenserver should detach from parent process group", cxxopts::value<bool>(ServerOptions.Detach)->default_value("true")); + options.add_options()("malloc", + "Configure memory allocator subsystem", + cxxopts::value(ServerOptions.MemoryOptions)->default_value("mimalloc")); // clang-format off options.add_options("logging") @@ -713,6 +716,13 @@ ParseCliOptions(int argc, char* argv[], ZenServerOptions& ServerOptions) #if ZEN_WITH_TRACE options.add_option("ue-trace", "", + "trace", + "Specify which trace channels should be enabled", + cxxopts::value<std::string>(ServerOptions.TraceChannels)->default_value(""), + ""); + + options.add_option("ue-trace", + "", "tracehost", "Hostname to send the trace to", cxxopts::value<std::string>(ServerOptions.TraceHost)->default_value(""), diff --git a/src/zenserver/config.h b/src/zenserver/config.h index 5c56695f3..c7781aada 100644 --- a/src/zenserver/config.h +++ b/src/zenserver/config.h @@ -176,9 +176,11 @@ struct ZenServerOptions std::string Loggers[zen::logging::level::LogLevelCount]; std::string ScrubOptions; #if ZEN_WITH_TRACE - std::string TraceHost; // Host name or IP address to send trace data to - std::string TraceFile; // Path of a file to write a trace + std::string TraceChannels; // Trace channels to enable + std::string TraceHost; // Host name or IP address to send trace data to + std::string TraceFile; // Path of a file to write a trace #endif + std::string MemoryOptions; // Memory allocation options std::string CommandLine; }; diff --git a/src/zenserver/diag/logging.cpp b/src/zenserver/diag/logging.cpp index 595be70cb..0d96cd8d6 100644 --- a/src/zenserver/diag/logging.cpp +++ b/src/zenserver/diag/logging.cpp @@ -6,6 +6,7 @@ #include <zencore/filesystem.h> #include <zencore/fmtutils.h> +#include <zencore/memory/llm.h> #include <zencore/session.h> #include <zencore/string.h> #include <zenutil/logging.h> @@ -20,6 +21,8 @@ namespace zen { void InitializeServerLogging(const ZenServerOptions& InOptions) { + UE_MEMSCOPE(ELLMTag::Logging); + const LoggingOptions LogOptions = {.IsDebug = InOptions.IsDebug, .IsVerbose = false, .IsTest = InOptions.IsTest, @@ -79,6 +82,8 @@ InitializeServerLogging(const ZenServerOptions& InOptions) void ShutdownServerLogging() { + UE_MEMSCOPE(ELLMTag::Logging); + zen::ShutdownLogging(); } diff --git a/src/zenserver/main.cpp b/src/zenserver/main.cpp index 2fb01ebf1..4444241cc 100644 --- a/src/zenserver/main.cpp +++ b/src/zenserver/main.cpp @@ -17,16 +17,15 @@ #include <zencore/trace.h> #include <zenhttp/httpserver.h> +#include <zencore/memory/fmalloc.h> +#include <zencore/memory/memory.h> +#include <zencore/memory/memorytrace.h> +#include <zencore/memory/newdelete.h> + #include "config.h" #include "diag/logging.h" #include "sentryintegration.h" -#if ZEN_USE_MIMALLOC -ZEN_THIRD_PARTY_INCLUDES_START -# include <mimalloc-new-delete.h> -ZEN_THIRD_PARTY_INCLUDES_END -#endif - #if ZEN_PLATFORM_WINDOWS # include <zencore/windows.h> # include "windows/service.h" @@ -354,9 +353,6 @@ test_main(int argc, char** argv) int main(int argc, char* argv[]) { -#if ZEN_USE_MIMALLOC - mi_version(); -#endif using namespace zen; if (argc >= 2) @@ -433,9 +429,17 @@ main(int argc, char* argv[]) { TraceInit("zenserver"); } - atexit(TraceShutdown); #endif // ZEN_WITH_TRACE +#if ZEN_WITH_MEMTRACK + FMalloc* TraceMalloc = MemoryTrace_Create(GMalloc); + if (TraceMalloc != GMalloc) + { + GMalloc = TraceMalloc; + MemoryTrace_Initialize(); + } +#endif + #if ZEN_PLATFORM_WINDOWS if (ServerOptions.InstallService) { diff --git a/src/zenstore-test/zenstore-test.cpp b/src/zenstore-test/zenstore-test.cpp index 3ad9e620b..e5b312984 100644 --- a/src/zenstore-test/zenstore-test.cpp +++ b/src/zenstore-test/zenstore-test.cpp @@ -4,11 +4,7 @@ #include <zencore/logging.h> #include <zenstore/zenstore.h> -#if ZEN_USE_MIMALLOC -ZEN_THIRD_PARTY_INCLUDES_START -# include <mimalloc-new-delete.h> -ZEN_THIRD_PARTY_INCLUDES_END -#endif +#include <zencore/memory/newdelete.h> #if ZEN_WITH_TESTS # define ZEN_TEST_WITH_RUNNER 1 @@ -18,9 +14,6 @@ ZEN_THIRD_PARTY_INCLUDES_END int main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) { -#if ZEN_USE_MIMALLOC - mi_version(); -#endif #if ZEN_WITH_TESTS zen::zenstore_forcelinktests(); diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp index 2031804c9..62ed44bbb 100644 --- a/src/zenstore/filecas.cpp +++ b/src/zenstore/filecas.cpp @@ -7,7 +7,7 @@ #include <zencore/filesystem.h> #include <zencore/fmtutils.h> #include <zencore/logging.h> -#include <zencore/memory.h> +#include <zencore/memory/memory.h> #include <zencore/scopeguard.h> #include <zencore/string.h> #include <zencore/testing.h> diff --git a/src/zenutil-test/zenutil-test.cpp b/src/zenutil-test/zenutil-test.cpp index f95b7e888..fadaf0995 100644 --- a/src/zenutil-test/zenutil-test.cpp +++ b/src/zenutil-test/zenutil-test.cpp @@ -4,11 +4,7 @@ #include <zencore/logging.h> #include <zenutil/zenutil.h> -#if ZEN_USE_MIMALLOC -ZEN_THIRD_PARTY_INCLUDES_START -# include <mimalloc-new-delete.h> -ZEN_THIRD_PARTY_INCLUDES_END -#endif +#include <zencore/memory/newdelete.h> #if ZEN_WITH_TESTS # define ZEN_TEST_WITH_RUNNER 1 @@ -18,9 +14,6 @@ ZEN_THIRD_PARTY_INCLUDES_END int main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) { -#if ZEN_USE_MIMALLOC - mi_version(); -#endif #if ZEN_WITH_TESTS zen::zenutil_forcelinktests(); diff --git a/src/zenutil/basicfile.cpp b/src/zenutil/basicfile.cpp index 73f27b587..391c150c6 100644 --- a/src/zenutil/basicfile.cpp +++ b/src/zenutil/basicfile.cpp @@ -6,6 +6,7 @@ #include <zencore/except.h> #include <zencore/filesystem.h> #include <zencore/fmtutils.h> +#include <zencore/memory/memory.h> #include <zencore/testing.h> #include <zencore/testutils.h> |