aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.clang-format3
-rw-r--r--.pre-commit-config.yaml1
-rw-r--r--CHANGELOG.md4
-rw-r--r--src/zen/zen.cpp8
-rw-r--r--src/zencore-test/zencore-test.cpp9
-rw-r--r--src/zencore/crypto.cpp1
-rw-r--r--src/zencore/filesystem.cpp1
-rw-r--r--src/zencore/include/zencore/guardvalue.h40
-rw-r--r--src/zencore/include/zencore/iobuffer.h6
-rw-r--r--src/zencore/include/zencore/memory.h11
-rw-r--r--src/zencore/include/zencore/memory/fmalloc.h103
-rw-r--r--src/zencore/include/zencore/memory/llm.h31
-rw-r--r--src/zencore/include/zencore/memory/mallocansi.h31
-rw-r--r--src/zencore/include/zencore/memory/mallocmimalloc.h36
-rw-r--r--src/zencore/include/zencore/memory/mallocrpmalloc.h37
-rw-r--r--src/zencore/include/zencore/memory/mallocstomp.h100
-rw-r--r--src/zencore/include/zencore/memory/memory.h78
-rw-r--r--src/zencore/include/zencore/memory/memorytrace.h251
-rw-r--r--src/zencore/include/zencore/memory/newdelete.h155
-rw-r--r--src/zencore/include/zencore/memory/tagtrace.h93
-rw-r--r--src/zencore/include/zencore/string.h24
-rw-r--r--src/zencore/include/zencore/trace.h4
-rw-r--r--src/zencore/iobuffer.cpp51
-rw-r--r--src/zencore/logging.cpp22
-rw-r--r--src/zencore/memory.cpp55
-rw-r--r--src/zencore/memory/fmalloc.cpp156
-rw-r--r--src/zencore/memory/mallocansi.cpp251
-rw-r--r--src/zencore/memory/mallocmimalloc.cpp197
-rw-r--r--src/zencore/memory/mallocrpmalloc.cpp189
-rw-r--r--src/zencore/memory/mallocstomp.cpp283
-rw-r--r--src/zencore/memory/memory.cpp281
-rw-r--r--src/zencore/memtrack/callstacktrace.cpp1059
-rw-r--r--src/zencore/memtrack/callstacktrace.h151
-rw-r--r--src/zencore/memtrack/growonlylockfreehash.h255
-rw-r--r--src/zencore/memtrack/memorytrace.cpp829
-rw-r--r--src/zencore/memtrack/moduletrace.cpp296
-rw-r--r--src/zencore/memtrack/moduletrace.h11
-rw-r--r--src/zencore/memtrack/moduletrace_events.cpp16
-rw-r--r--src/zencore/memtrack/moduletrace_events.h27
-rw-r--r--src/zencore/memtrack/platformtls.h107
-rw-r--r--src/zencore/memtrack/tagtrace.cpp237
-rw-r--r--src/zencore/memtrack/tracemalloc.h24
-rw-r--r--src/zencore/memtrack/vatrace.cpp361
-rw-r--r--src/zencore/memtrack/vatrace.h61
-rw-r--r--src/zencore/sharedbuffer.cpp1
-rw-r--r--src/zencore/stats.cpp14
-rw-r--r--src/zencore/string.cpp1
-rw-r--r--src/zencore/system.cpp1
-rw-r--r--src/zencore/trace.cpp90
-rw-r--r--src/zencore/xmake.lua18
-rw-r--r--src/zenhttp-test/zenhttp-test.cpp10
-rw-r--r--src/zennet-test/zennet-test.cpp9
-rw-r--r--src/zenserver-test/zenserver-test.cpp9
-rw-r--r--src/zenserver/config.cpp10
-rw-r--r--src/zenserver/config.h6
-rw-r--r--src/zenserver/diag/logging.cpp5
-rw-r--r--src/zenserver/main.cpp24
-rw-r--r--src/zenstore-test/zenstore-test.cpp9
-rw-r--r--src/zenstore/filecas.cpp2
-rw-r--r--src/zenutil-test/zenutil-test.cpp9
-rw-r--r--src/zenutil/basicfile.cpp1
-rw-r--r--thirdparty/rpmalloc/malloc.c367
-rw-r--r--thirdparty/rpmalloc/rpmalloc.c2341
-rw-r--r--thirdparty/rpmalloc/rpmalloc.h396
-rw-r--r--thirdparty/rpmalloc/rpnew.h111
-rw-r--r--xmake.lua19
66 files changed, 9230 insertions, 169 deletions
diff --git a/.clang-format b/.clang-format
index 4688f60ae..46f378928 100644
--- a/.clang-format
+++ b/.clang-format
@@ -147,3 +147,6 @@ UseTab: Always
WhitespaceSensitiveMacros:
- STRINGIZE
...
+---
+Language: JavaScript
+DisableFormat: true
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b624a6733..c71fc6877 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -9,6 +9,7 @@ repos:
#- id: check-added-large-files
- id: mixed-line-ending
- id: check-yaml
+ args: [--allow-multiple-documents]
- id: check-case-conflict
- repo: https://github.com/Lucas-C/pre-commit-hooks
rev: v1.3.1
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b9cae3e6b..36b91f6cf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,8 @@
##
+- Feature: Added `--malloc` option for selecting which memory allocator should be used. Currently the available options are `--malloc=mimalloc`, `--malloc=rpmalloc`, `--malloc=stomp` or `--malloc=ansi`. Some of these options are currently not available on all targets, but all are available on Windows. `rpmalloc` is currently not supported on Linux or Mac due to toolchain limitations.
+- Feature: Added support for generating Unreal Insights-compatible traces for memory usage analysis. Currently only supported for Windows. Activate memory tracing by passing `--trace=memory` on the command line, alongside one of `--tracehost=<ip>` or `--tracefile=<path>` to enable tracing over network or to a file.
+
+## 5.5.13
- Bugfix: Fix inconsistencies in filecas due to failing to remove payload file during GC causing "Missing Chunk" errors
- Bugfix: Fixed crash on corrupt attachment block when doing oplog import
- Bugfix: Fixed off-by-one in GetPidStatus (Linux) which could cause spurious errors
diff --git a/src/zen/zen.cpp b/src/zen/zen.cpp
index 16f5799e0..fd58b024a 100644
--- a/src/zen/zen.cpp
+++ b/src/zen/zen.cpp
@@ -44,9 +44,7 @@ ZEN_THIRD_PARTY_INCLUDES_START
#include <gsl/gsl-lite.hpp>
ZEN_THIRD_PARTY_INCLUDES_END
-#if ZEN_USE_MIMALLOC
-# include <mimalloc-new-delete.h>
-#endif
+#include <zencore/memory/newdelete.h>
//////////////////////////////////////////////////////////////////////////
@@ -365,10 +363,6 @@ main(int argc, char** argv)
using namespace zen;
using namespace std::literals;
-#if ZEN_USE_MIMALLOC
- mi_version();
-#endif
-
zen::logging::InitializeLogging();
// Set output mode to handle virtual terminal sequences
diff --git a/src/zencore-test/zencore-test.cpp b/src/zencore-test/zencore-test.cpp
index 64df746e4..40cb51156 100644
--- a/src/zencore-test/zencore-test.cpp
+++ b/src/zencore-test/zencore-test.cpp
@@ -7,11 +7,7 @@
#include <zencore/logging.h>
#include <zencore/zencore.h>
-#if ZEN_USE_MIMALLOC
-ZEN_THIRD_PARTY_INCLUDES_START
-# include <mimalloc-new-delete.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-#endif
+#include <zencore/memory/newdelete.h>
#if ZEN_WITH_TESTS
# define ZEN_TEST_WITH_RUNNER 1
@@ -21,9 +17,6 @@ ZEN_THIRD_PARTY_INCLUDES_END
int
main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[])
{
-#if ZEN_USE_MIMALLOC
- mi_version();
-#endif
#if ZEN_WITH_TESTS
zen::zencore_forcelinktests();
diff --git a/src/zencore/crypto.cpp b/src/zencore/crypto.cpp
index 8403a35f4..78bea0c17 100644
--- a/src/zencore/crypto.cpp
+++ b/src/zencore/crypto.cpp
@@ -2,6 +2,7 @@
#include <zencore/crypto.h>
#include <zencore/intmath.h>
+#include <zencore/memory/memory.h>
#include <zencore/scopeguard.h>
#include <zencore/testing.h>
diff --git a/src/zencore/filesystem.cpp b/src/zencore/filesystem.cpp
index 9ca5f1131..36147c5a9 100644
--- a/src/zencore/filesystem.cpp
+++ b/src/zencore/filesystem.cpp
@@ -7,6 +7,7 @@
#include <zencore/fmtutils.h>
#include <zencore/iobuffer.h>
#include <zencore/logging.h>
+#include <zencore/memory/memory.h>
#include <zencore/process.h>
#include <zencore/stream.h>
#include <zencore/string.h>
diff --git a/src/zencore/include/zencore/guardvalue.h b/src/zencore/include/zencore/guardvalue.h
new file mode 100644
index 000000000..5419e882a
--- /dev/null
+++ b/src/zencore/include/zencore/guardvalue.h
@@ -0,0 +1,40 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+namespace zen {
+
+/**
+ * exception-safe guard around saving/restoring a value.
+ * Commonly used to make sure a value is restored
+ * even if the code early outs in the future.
+ * Usage:
+ * TGuardValue<bool> GuardSomeBool(bSomeBool, false); // Sets bSomeBool to false, and restores it in dtor.
+ */
+template<typename RefType, typename AssignedType = RefType>
+struct TGuardValue
+{
+ [[nodiscard]] TGuardValue(RefType& ReferenceValue, const AssignedType& NewValue)
+ : RefValue(ReferenceValue)
+ , OriginalValue(ReferenceValue)
+ {
+ RefValue = NewValue;
+ }
+ ~TGuardValue() { RefValue = OriginalValue; }
+
+ /**
+ * Provides read-only access to the original value of the data being tracked by this struct
+ *
+ * @return a const reference to the original data value
+ */
+ const AssignedType& GetOriginalValue() const { return OriginalValue; }
+
+ TGuardValue& operator=(const TGuardValue&) = delete;
+ TGuardValue(const TGuardValue&) = delete;
+
+private:
+ RefType& RefValue;
+ AssignedType OriginalValue;
+};
+
+} // namespace zen
diff --git a/src/zencore/include/zencore/iobuffer.h b/src/zencore/include/zencore/iobuffer.h
index 493b7375e..93a27ea58 100644
--- a/src/zencore/include/zencore/iobuffer.h
+++ b/src/zencore/include/zencore/iobuffer.h
@@ -99,6 +99,11 @@ public:
ZENCORE_API IoBufferCore(size_t SizeBytes, size_t Alignment);
ZENCORE_API ~IoBufferCore();
+ void* operator new(size_t Size);
+ void operator delete(void* Ptr);
+ void* operator new[](size_t Size) = delete;
+ void operator delete[](void* Ptr) = delete;
+
// Reference counting
inline uint32_t AddRef() const { return AtomicIncrement(const_cast<IoBufferCore*>(this)->m_RefCount); }
@@ -244,7 +249,6 @@ protected:
kIsExtended = 1 << 2, // Is actually a SharedBufferExtendedCore
kIsMaterialized = 1 << 3, // Data pointers are valid
kIsWholeFile = 1 << 5, // References an entire file
- kIoBufferAlloc = 1 << 6, // Using IoBuffer allocator
kIsOwnedByThis = 1 << 7,
// Note that we have some extended flags defined below
diff --git a/src/zencore/include/zencore/memory.h b/src/zencore/include/zencore/memory.h
index fdea1a5f1..8361ab9d8 100644
--- a/src/zencore/include/zencore/memory.h
+++ b/src/zencore/include/zencore/memory.h
@@ -22,17 +22,10 @@ template<typename T>
concept ContiguousRange = true;
#endif
-struct MemoryView;
-
-class Memory
-{
-public:
- ZENCORE_API static void* Alloc(size_t Size, size_t Alignment = sizeof(void*));
- ZENCORE_API static void Free(void* Ptr);
-};
-
//////////////////////////////////////////////////////////////////////////
+struct MemoryView;
+
struct MutableMemoryView
{
MutableMemoryView() = default;
diff --git a/src/zencore/include/zencore/memory/fmalloc.h b/src/zencore/include/zencore/memory/fmalloc.h
new file mode 100644
index 000000000..aeb05b651
--- /dev/null
+++ b/src/zencore/include/zencore/memory/fmalloc.h
@@ -0,0 +1,103 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zenbase/zenbase.h>
+
+namespace zen {
+
+enum
+{
+ DEFAULT_ALIGNMENT = 0
+};
+
+/**
+ * Inherit from FUseSystemMallocForNew if you want your objects to be placed in memory
+ * alloced by the system malloc routines, bypassing GMalloc. This is e.g. used by FMalloc
+ * itself.
+ */
+class FUseSystemMallocForNew
+{
+public:
+ void* operator new(size_t Size);
+ void operator delete(void* Ptr);
+ void* operator new[](size_t Size);
+ void operator delete[](void* Ptr);
+};
+
+/** Memory allocator abstraction
+ */
+
+class FMalloc : public FUseSystemMallocForNew
+{
+public:
+ /**
+ * Malloc
+ */
+ virtual void* Malloc(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT) = 0;
+
+ /**
+ * TryMalloc - like Malloc(), but may return a nullptr result if the allocation
+ * request cannot be satisfied.
+ */
+ virtual void* TryMalloc(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT);
+
+ /**
+ * Realloc
+ */
+ virtual void* Realloc(void* Original, size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT) = 0;
+
+ /**
+ * TryRealloc - like Realloc(), but may return a nullptr if the allocation
+ * request cannot be satisfied. Note that in this case the memory
+ * pointed to by Original will still be valid
+ */
+ virtual void* TryRealloc(void* Original, size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT);
+
+ /**
+ * Free
+ */
+ virtual void Free(void* Original) = 0;
+
+ /**
+ * Malloc zeroed memory
+ */
+ virtual void* MallocZeroed(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT);
+
+ /**
+ * TryMallocZeroed - like MallocZeroed(), but may return a nullptr result if the allocation
+ * request cannot be satisfied.
+ */
+ virtual void* TryMallocZeroed(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT);
+
+ /**
+ * For some allocators this will return the actual size that should be requested to eliminate
+ * internal fragmentation. The return value will always be >= Count. This can be used to grow
+ * and shrink containers to optimal sizes.
+ * This call is always fast and threadsafe with no locking.
+ */
+ virtual size_t QuantizeSize(size_t Count, uint32_t Alignment);
+
+ /**
+ * If possible determine the size of the memory allocated at the given address
+ *
+ * @param Original - Pointer to memory we are checking the size of
+ * @param SizeOut - If possible, this value is set to the size of the passed in pointer
+ * @return true if succeeded
+ */
+ virtual bool GetAllocationSize(void* Original, size_t& SizeOut);
+
+ /**
+ * Notifies the malloc implementation that initialization of all allocators in GMalloc is complete, so it's safe to initialize any extra
+ * features that require "regular" allocations
+ */
+ virtual void OnMallocInitialized();
+
+ virtual void Trim(bool bTrimThreadCaches);
+
+ virtual void OutOfMemory(size_t Size, uint32_t Alignment);
+};
+
+extern FMalloc* GMalloc; /* Memory allocator */
+
+} // namespace zen
diff --git a/src/zencore/include/zencore/memory/llm.h b/src/zencore/include/zencore/memory/llm.h
new file mode 100644
index 000000000..4f1c9de77
--- /dev/null
+++ b/src/zencore/include/zencore/memory/llm.h
@@ -0,0 +1,31 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zenbase/zenbase.h>
+#include <zencore/memory/tagtrace.h>
+
+namespace zen {
+
+// clang-format off
+#define LLM_ENUM_GENERIC_TAGS(macro) \
+ macro(Untagged, "Untagged", -1) \
+ macro(ProgramSize, "ProgramSize", -1) \
+ macro(Metrics, "Metrics", -1) \
+ macro(Logging, "Logging", -1) \
+ macro(IoBuffer, "IoBuffer", -1) \
+ macro(IoBufferMemory, "IoMemory", ELLMTag::IoBuffer) \
+ macro(IoBufferCore, "IoCore", ELLMTag::IoBuffer)
+
+// clang-format on
+
+enum class ELLMTag : uint8_t
+{
+#define LLM_ENUM(Enum, Str, Parent) Enum,
+ LLM_ENUM_GENERIC_TAGS(LLM_ENUM)
+#undef LLM_ENUM
+
+ GenericTagCount
+};
+
+} // namespace zen
diff --git a/src/zencore/include/zencore/memory/mallocansi.h b/src/zencore/include/zencore/memory/mallocansi.h
new file mode 100644
index 000000000..510695c8c
--- /dev/null
+++ b/src/zencore/include/zencore/memory/mallocansi.h
@@ -0,0 +1,31 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include "fmalloc.h"
+#include "memory.h"
+
+namespace zen {
+
+void* AnsiMalloc(size_t Size, uint32_t Alignment);
+void* AnsiRealloc(void* Ptr, size_t NewSize, uint32_t Alignment);
+void AnsiFree(void* Ptr);
+
+//
+// ANSI C memory allocator.
+//
+
+class FMallocAnsi final : public FMalloc
+{
+public:
+ FMallocAnsi();
+
+ virtual void* Malloc(size_t Size, uint32_t Alignment) override;
+ virtual void* TryMalloc(size_t Size, uint32_t Alignment) override;
+ virtual void* Realloc(void* Ptr, size_t NewSize, uint32_t Alignment) override;
+ virtual void* TryRealloc(void* Ptr, size_t NewSize, uint32_t Alignment) override;
+ virtual void Free(void* Ptr) override;
+ virtual bool GetAllocationSize(void* Original, size_t& SizeOut) override;
+};
+
+} // namespace zen
diff --git a/src/zencore/include/zencore/memory/mallocmimalloc.h b/src/zencore/include/zencore/memory/mallocmimalloc.h
new file mode 100644
index 000000000..759eeb4a6
--- /dev/null
+++ b/src/zencore/include/zencore/memory/mallocmimalloc.h
@@ -0,0 +1,36 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/memory/fmalloc.h>
+
+#if ZEN_USE_MIMALLOC
+# define ZEN_MIMALLOC_ENABLED 1
+#endif
+
+#if !defined(ZEN_MIMALLOC_ENABLED)
+# define ZEN_MIMALLOC_ENABLED 0
+#endif
+
+#if ZEN_MIMALLOC_ENABLED
+
+namespace zen {
+
+class FMallocMimalloc final : public FMalloc
+{
+public:
+ FMallocMimalloc();
+ virtual void* Malloc(size_t Size, uint32_t Alignment) override;
+ virtual void* TryMalloc(size_t Size, uint32_t Alignment) override;
+ virtual void* Realloc(void* Ptr, size_t NewSize, uint32_t Alignment) override;
+ virtual void* TryRealloc(void* Ptr, size_t NewSize, uint32_t Alignment) override;
+ virtual void Free(void* Ptr) override;
+ virtual void* MallocZeroed(size_t Count, uint32_t Alignment) override;
+ virtual void* TryMallocZeroed(size_t Count, uint32_t Alignment) override;
+ virtual bool GetAllocationSize(void* Original, size_t& SizeOut) override;
+ virtual void Trim(bool bTrimThreadCaches) override;
+};
+
+} // namespace zen
+
+#endif
diff --git a/src/zencore/include/zencore/memory/mallocrpmalloc.h b/src/zencore/include/zencore/memory/mallocrpmalloc.h
new file mode 100644
index 000000000..be2627b2d
--- /dev/null
+++ b/src/zencore/include/zencore/memory/mallocrpmalloc.h
@@ -0,0 +1,37 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/memory/fmalloc.h>
+
+#if ZEN_USE_RPMALLOC
+# define ZEN_RPMALLOC_ENABLED 1
+#endif
+
+#if !defined(ZEN_RPMALLOC_ENABLED)
+# define ZEN_RPMALLOC_ENABLED 0
+#endif
+
+#if ZEN_RPMALLOC_ENABLED
+
+namespace zen {
+
+class FMallocRpmalloc final : public FMalloc
+{
+public:
+ FMallocRpmalloc();
+ ~FMallocRpmalloc();
+ virtual void* Malloc(size_t Size, uint32_t Alignment) override;
+ virtual void* TryMalloc(size_t Size, uint32_t Alignment) override;
+ virtual void* Realloc(void* Ptr, size_t NewSize, uint32_t Alignment) override;
+ virtual void* TryRealloc(void* Ptr, size_t NewSize, uint32_t Alignment) override;
+ virtual void Free(void* Ptr) override;
+ virtual void* MallocZeroed(size_t Count, uint32_t Alignment) override;
+ virtual void* TryMallocZeroed(size_t Count, uint32_t Alignment) override;
+ virtual bool GetAllocationSize(void* Original, size_t& SizeOut) override;
+ virtual void Trim(bool bTrimThreadCaches) override;
+};
+
+} // namespace zen
+
+#endif
diff --git a/src/zencore/include/zencore/memory/mallocstomp.h b/src/zencore/include/zencore/memory/mallocstomp.h
new file mode 100644
index 000000000..5d83868bb
--- /dev/null
+++ b/src/zencore/include/zencore/memory/mallocstomp.h
@@ -0,0 +1,100 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zenbase/zenbase.h>
+
+#if ZEN_PLATFORM_WINDOWS
+# define ZEN_WITH_MALLOC_STOMP 1
+#endif
+
+#ifndef ZEN_WITH_MALLOC_STOMP
+# define ZEN_WITH_MALLOC_STOMP 0
+#endif
+
+/**
+ * Stomp memory allocator support should be enabled in Core.Build.cs.
+ * Run-time validation should be enabled using '-stompmalloc' command line argument.
+ */
+
+#if ZEN_WITH_MALLOC_STOMP
+
+# include <zencore/memory/fmalloc.h>
+# include <zencore/thread.h>
+
+namespace zen {
+
+/**
+ * Stomp memory allocator. It helps find the following errors:
+ * - Read or writes off the end of an allocation.
+ * - Read or writes off the beginning of an allocation.
+ * - Read or writes after freeing an allocation.
+ */
+class FMallocStomp final : public FMalloc
+{
+ struct FAllocationData;
+
+ const size_t PageSize;
+
+ /** If it is set to true, instead of focusing on overruns the allocator will focus on underruns. */
+ const bool bUseUnderrunMode;
+ RwLock Lock;
+
+ uintptr_t VirtualAddressCursor = 0;
+ size_t VirtualAddressMax = 0;
+ static constexpr size_t VirtualAddressBlockSize = 1 * 1024 * 1024 * 1024; // 1 GB blocks
+
+public:
+ // FMalloc interface.
+ explicit FMallocStomp(const bool InUseUnderrunMode = false);
+
+ /**
+ * Allocates a block of a given number of bytes of memory with the required alignment.
+ * In the process it allocates as many pages as necessary plus one that will be protected
+ * making it unaccessible and causing an exception. The actual allocation will be pushed
+ * to the end of the last valid unprotected page. To deal with underrun errors a sentinel
+ * is added right before the allocation in page which is checked on free.
+ *
+ * @param Size Size in bytes of the memory block to allocate.
+ * @param Alignment Alignment in bytes of the memory block to allocate.
+ * @return A pointer to the beginning of the memory block.
+ */
+ virtual void* Malloc(size_t Size, uint32_t Alignment) override;
+
+ virtual void* TryMalloc(size_t Size, uint32_t Alignment) override;
+
+ /**
+ * Changes the size of the memory block pointed to by OldPtr.
+ * The function may move the memory block to a new location.
+ *
+ * @param OldPtr Pointer to a memory block previously allocated with Malloc.
+ * @param NewSize New size in bytes for the memory block.
+ * @param Alignment Alignment in bytes for the reallocation.
+ * @return A pointer to the reallocated memory block, which may be either the same as ptr or a new location.
+ */
+ virtual void* Realloc(void* InPtr, size_t NewSize, uint32_t Alignment) override;
+
+ virtual void* TryRealloc(void* InPtr, size_t NewSize, uint32_t Alignment) override;
+
+ /**
+ * Frees a memory allocation and verifies the sentinel in the process.
+ *
+ * @param InPtr Pointer of the data to free.
+ */
+ virtual void Free(void* InPtr) override;
+
+ /**
+ * If possible determine the size of the memory allocated at the given address.
+ * This will included all the pages that were allocated so it will be far more
+ * than what's set on the FAllocationData.
+ *
+ * @param Original - Pointer to memory we are checking the size of
+ * @param SizeOut - If possible, this value is set to the size of the passed in pointer
+ * @return true if succeeded
+ */
+ virtual bool GetAllocationSize(void* Original, size_t& SizeOut) override;
+};
+
+} // namespace zen
+
+#endif // WITH_MALLOC_STOMP
diff --git a/src/zencore/include/zencore/memory/memory.h b/src/zencore/include/zencore/memory/memory.h
new file mode 100644
index 000000000..2fc20def6
--- /dev/null
+++ b/src/zencore/include/zencore/memory/memory.h
@@ -0,0 +1,78 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <stdlib.h>
+#include <zencore/memory/fmalloc.h>
+
+#define UE_ALLOCATION_FUNCTION(...)
+
+namespace zen {
+
+/**
+ * Corresponds to UE-side FMemory implementation
+ */
+
+class Memory
+{
+public:
+ static void Initialize();
+
+ //
+ // C style memory allocation stubs that fall back to C runtime
+ //
+ UE_ALLOCATION_FUNCTION(1) static void* SystemMalloc(size_t Size);
+ static void SystemFree(void* Ptr);
+
+ //
+ // C style memory allocation stubs.
+ //
+
+ static inline void* Alloc(size_t Size, size_t Alignment = sizeof(void*)) { return Malloc(Size, uint32_t(Alignment)); }
+
+ UE_ALLOCATION_FUNCTION(1, 2) static inline void* Malloc(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT);
+ UE_ALLOCATION_FUNCTION(2, 3) static inline void* Realloc(void* Original, size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT);
+ static inline void Free(void* Original);
+ static inline size_t GetAllocSize(void* Original);
+
+ UE_ALLOCATION_FUNCTION(1, 2) static inline void* MallocZeroed(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT);
+
+private:
+ static void GCreateMalloc();
+};
+
+inline void*
+Memory::Malloc(size_t Count, uint32_t Alignment)
+{
+ return GMalloc->TryMalloc(Count, Alignment);
+}
+
+inline void*
+Memory::Realloc(void* Original, size_t Count, uint32_t Alignment)
+{
+ return GMalloc->TryRealloc(Original, Count, Alignment);
+}
+
+inline void
+Memory::Free(void* Original)
+{
+ if (Original)
+ {
+ GMalloc->Free(Original);
+ }
+}
+
+inline size_t
+Memory::GetAllocSize(void* Original)
+{
+ size_t Size = 0;
+ return GMalloc->GetAllocationSize(Original, Size) ? Size : 0;
+}
+
+inline void*
+Memory::MallocZeroed(size_t Count, uint32_t Alignment)
+{
+ return GMalloc->TryMallocZeroed(Count, Alignment);
+}
+
+} // namespace zen
diff --git a/src/zencore/include/zencore/memory/memorytrace.h b/src/zencore/include/zencore/memory/memorytrace.h
new file mode 100644
index 000000000..d1ab1f914
--- /dev/null
+++ b/src/zencore/include/zencore/memory/memorytrace.h
@@ -0,0 +1,251 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+#pragma once
+
+#include <zencore/enumflags.h>
+#include <zencore/trace.h>
+
+#if !defined(UE_MEMORY_TRACE_AVAILABLE)
+# define UE_MEMORY_TRACE_AVAILABLE 0
+#endif
+
+#if !defined(UE_MEMORY_TRACE_LATE_INIT)
+# define UE_MEMORY_TRACE_LATE_INIT 0
+#endif
+
+#if !defined(PLATFORM_USES_FIXED_GMalloc_CLASS)
+# define PLATFORM_USES_FIXED_GMalloc_CLASS 0
+#endif
+
+#if !defined(UE_MEMORY_TRACE_ENABLED) && UE_TRACE_ENABLED
+# if UE_MEMORY_TRACE_AVAILABLE
+# define UE_MEMORY_TRACE_ENABLED ZEN_WITH_MEMTRACK
+# endif
+#endif
+
+#if !defined(UE_MEMORY_TRACE_ENABLED)
+# define UE_MEMORY_TRACE_ENABLED 0
+#endif
+
+namespace zen {
+
+////////////////////////////////////////////////////////////////////////////////
+typedef uint32_t HeapId;
+
+////////////////////////////////////////////////////////////////////////////////
+enum EMemoryTraceRootHeap : uint8_t
+{
+ SystemMemory, // RAM
+ VideoMemory, // VRAM
+ EndHardcoded = VideoMemory,
+ EndReserved = 15
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// These values are traced. Do not modify existing values in order to maintain
+// compatibility.
+enum class EMemoryTraceHeapFlags : uint16_t
+{
+ None = 0,
+ Root = 1 << 0,
+ NeverFrees = 1 << 1, // The heap doesn't free (e.g. linear allocator)
+};
+ENUM_CLASS_FLAGS(EMemoryTraceHeapFlags);
+
+////////////////////////////////////////////////////////////////////////////////
+// These values are traced. Do not modify existing values in order to maintain
+// compatibility.
+enum class EMemoryTraceHeapAllocationFlags : uint8_t
+{
+ None = 0,
+ Heap = 1 << 0, // Is a heap, can be used to unmark alloc as heap.
+ Swap = 2 << 0, // Is a swap page
+};
+ENUM_CLASS_FLAGS(EMemoryTraceHeapAllocationFlags);
+
+////////////////////////////////////////////////////////////////////////////////
+enum class EMemoryTraceSwapOperation : uint8
+{
+ PageOut = 0, // Paged out to swap
+ PageIn = 1, // Read from swap via page fault
+ FreeInSwap = 2, // Freed while being paged out in swap
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+// Internal options for early initialization of memory tracing systems. Exposed
+// here due to visibility in platform implementations.
+enum class EMemoryTraceInit : uint8
+{
+ Disabled = 0,
+ AllocEvents = 1 << 0,
+ Callstacks = 1 << 1,
+ Tags = 1 << 2,
+ Full = AllocEvents | Callstacks | Tags,
+ Light = AllocEvents | Tags,
+};
+
+ENUM_CLASS_FLAGS(EMemoryTraceInit);
+
+////////////////////////////////////////////////////////////////////////////////
+#if UE_MEMORY_TRACE_ENABLED
+
+# define UE_MEMORY_TRACE(x) x
+
+UE_TRACE_CHANNEL_EXTERN(MemAllocChannel);
+
+////////////////////////////////////////////////////////////////////////////////
+class FMalloc* MemoryTrace_Create(class FMalloc* InMalloc);
+void MemoryTrace_Initialize();
+void MemoryTrace_Shutdown();
+
+/**
+ * Register a new heap specification (name). Use the returned value when marking heaps.
+ * @param ParentId Heap id of parent heap.
+ * @param Name Descriptive name of the heap.
+ * @param Flags Properties of this heap. See \ref EMemoryTraceHeapFlags
+ * @return Heap id to use when allocating memory
+ */
+HeapId MemoryTrace_HeapSpec(HeapId ParentId, const char16_t* Name, EMemoryTraceHeapFlags Flags = EMemoryTraceHeapFlags::None);
+
+/**
+ * Register a new root heap specification (name). Use the returned value as parent to other heaps.
+ * @param Name Descriptive name of the root heap.
+ * @param Flags Properties of the this root heap. See \ref EMemoryTraceHeapFlags
+ * @return Heap id to use when allocating memory
+ */
+HeapId MemoryTrace_RootHeapSpec(const char16_t* Name, EMemoryTraceHeapFlags Flags = EMemoryTraceHeapFlags::None);
+
+/**
+ * Mark a traced allocation as being a heap.
+ * @param Address Address of the allocation
+ * @param Heap Heap id, see /ref MemoryTrace_HeapSpec. If no specific heap spec has been created the correct root heap needs to be given.
+ * @param Flags Additional properties of the heap allocation. Note that \ref EMemoryTraceHeapAllocationFlags::Heap is implicit.
+ * @param ExternalCallstackId CallstackId to use, if 0 will use current callstack id.
+ */
+void MemoryTrace_MarkAllocAsHeap(uint64 Address,
+ HeapId Heap,
+ EMemoryTraceHeapAllocationFlags Flags = EMemoryTraceHeapAllocationFlags::None,
+ uint32 ExternalCallstackId = 0);
+
+/**
+ * Unmark an allocation as a heap. When an allocation that has previously been used as a heap is reused as a regular
+ * allocation.
+ * @param Address Address of the allocation
+ * @param Heap Heap id
+ * @param ExternalCallstackId CallstackId to use, if 0 will use current callstack id.
+ */
+void MemoryTrace_UnmarkAllocAsHeap(uint64 Address, HeapId Heap, uint32 ExternalCallstackId = 0);
+
+/**
+ * Trace an allocation event.
+ * @param Address Address of allocation
+ * @param Size Size of allocation
+ * @param Alignment Alignment of the allocation
+ * @param RootHeap Which root heap this belongs to (system memory, video memory etc)
+ * @param ExternalCallstackId CallstackId to use, if 0 will use current callstack id.
+ */
+void MemoryTrace_Alloc(uint64 Address,
+ uint64 Size,
+ uint32 Alignment,
+ HeapId RootHeap = EMemoryTraceRootHeap::SystemMemory,
+ uint32 ExternalCallstackId = 0);
+
+/**
+ * Trace a free event.
+ * @param Address Address of the allocation being freed
+ * @param RootHeap Which root heap this belongs to (system memory, video memory etc)
+ * @param ExternalCallstackId CallstackId to use, if 0 will use current callstack id.
+ */
+void MemoryTrace_Free(uint64 Address, HeapId RootHeap = EMemoryTraceRootHeap::SystemMemory, uint32 ExternalCallstackId = 0);
+
+/**
+ * Trace a free related to a reallocation event.
+ * @param Address Address of the allocation being freed
+ * @param RootHeap Which root heap this belongs to (system memory, video memory etc)
+ * @param ExternalCallstackId CallstackId to use, if 0 will use current callstack id.
+ */
+void MemoryTrace_ReallocFree(uint64 Address, HeapId RootHeap = EMemoryTraceRootHeap::SystemMemory, uint32 ExternalCallstackId = 0);
+
+/** Trace an allocation related to a reallocation event.
+ * @param Address Address of allocation
+ * @param NewSize Size of allocation
+ * @param Alignment Alignment of the allocation
+ * @param RootHeap Which root heap this belongs to (system memory, video memory etc)
+ * @param ExternalCallstackId CallstackId to use, if 0 will use current callstack id.
+ */
+void MemoryTrace_ReallocAlloc(uint64 Address,
+ uint64 NewSize,
+ uint32 Alignment,
+ HeapId RootHeap = EMemoryTraceRootHeap::SystemMemory,
+ uint32 ExternalCallstackId = 0);
+
+/** Trace a swap operation. Only available for system memory root heap (EMemoryTraceRootHeap::SystemMemory).
+ * @param PageAddress Page address for operation, in case of PageIn can be address of the page fault (not aligned to page boundary).
+ * @param SwapOperation Which swap operation is happening to the address.
+ * @param CompressedSize Compressed size of the page for page out operation.
+ * @param CallstackId CallstackId to use, if 0 to ignore (will not use current callstack id).
+ */
+void MemoryTrace_SwapOp(uint64 PageAddress, EMemoryTraceSwapOperation SwapOperation, uint32 CompressedSize = 0, uint32 CallstackId = 0);
+
+////////////////////////////////////////////////////////////////////////////////
+#else // UE_MEMORY_TRACE_ENABLED
+
+# define UE_MEMORY_TRACE(x)
+inline HeapId
+MemoryTrace_RootHeapSpec(const char16_t* /*Name*/, EMemoryTraceHeapFlags /* Flags = EMemoryTraceHeapFlags::None */)
+{
+ return ~0u;
+};
+inline HeapId
+MemoryTrace_HeapSpec(HeapId /*ParentId*/, const char16_t* /*Name*/, EMemoryTraceHeapFlags /* Flags = EMemoryTraceHeapFlags::None */)
+{
+ return ~0u;
+}
+inline void
+MemoryTrace_MarkAllocAsHeap(uint64_t /*Address*/, HeapId /*Heap*/)
+{
+}
+inline void
+MemoryTrace_UnmarkAllocAsHeap(uint64_t /*Address*/, HeapId /*Heap*/)
+{
+}
+inline void
+MemoryTrace_Alloc(uint64_t /*Address*/,
+ uint64_t /*Size*/,
+ uint32_t /*Alignment*/,
+ HeapId RootHeap = EMemoryTraceRootHeap::SystemMemory,
+ uint32_t ExternalCallstackId = 0)
+{
+ ZEN_UNUSED(RootHeap, ExternalCallstackId);
+}
+inline void
+MemoryTrace_Free(uint64_t /*Address*/, HeapId RootHeap = EMemoryTraceRootHeap::SystemMemory, uint32_t ExternalCallstackId = 0)
+{
+ ZEN_UNUSED(RootHeap, ExternalCallstackId);
+}
+inline void
+MemoryTrace_ReallocFree(uint64_t /*Address*/, HeapId RootHeap = EMemoryTraceRootHeap::SystemMemory, uint32_t ExternalCallstackId = 0)
+{
+ ZEN_UNUSED(RootHeap, ExternalCallstackId);
+}
+inline void
+MemoryTrace_ReallocAlloc(uint64_t /*Address*/,
+ uint64_t /*NewSize*/,
+ uint32_t /*Alignment*/,
+ HeapId RootHeap = EMemoryTraceRootHeap::SystemMemory,
+ uint32_t ExternalCallstackId = 0)
+{
+ ZEN_UNUSED(RootHeap, ExternalCallstackId);
+}
+inline void
+MemoryTrace_SwapOp(uint64_t /*PageAddress*/,
+ EMemoryTraceSwapOperation /*SwapOperation*/,
+ uint32_t CompressedSize = 0,
+ uint32_t CallstackId = 0)
+{
+ ZEN_UNUSED(CompressedSize, CallstackId);
+}
+
+#endif // UE_MEMORY_TRACE_ENABLED
+
+} // namespace zen
diff --git a/src/zencore/include/zencore/memory/newdelete.h b/src/zencore/include/zencore/memory/newdelete.h
new file mode 100644
index 000000000..d22c8604f
--- /dev/null
+++ b/src/zencore/include/zencore/memory/newdelete.h
@@ -0,0 +1,155 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zenbase/zenbase.h>
+#include <new>
+
+#if defined(_MSC_VER)
+# if (_MSC_VER >= 1900) && !defined(__EDG__)
+# define ZEN_RESTRICT __declspec(allocator) __declspec(restrict)
+# else
+# define ZEN_RESTRICT __declspec(restrict)
+# endif
+#else
+# define ZEN_RESTRICT
+#endif
+
+//////////////////////////////////////////////////////////////////////////
+
+[[nodiscard]] ZEN_RESTRICT void* zen_new(size_t size);
+[[nodiscard]] ZEN_RESTRICT void* zen_new_aligned(size_t size, size_t alignment);
+[[nodiscard]] ZEN_RESTRICT void* zen_new_nothrow(size_t size) noexcept;
+[[nodiscard]] ZEN_RESTRICT void* zen_new_aligned_nothrow(size_t size, size_t alignment) noexcept;
+
+void zen_free(void* p) noexcept;
+void zen_free_size(void* p, size_t size) noexcept;
+void zen_free_size_aligned(void* p, size_t size, size_t alignment) noexcept;
+void zen_free_aligned(void* p, size_t alignment) noexcept;
+
+//////////////////////////////////////////////////////////////////////////
+
+#if defined(_MSC_VER) && defined(_Ret_notnull_) && defined(_Post_writable_byte_size_)
+# define zen_decl_new(n) [[nodiscard]] _VCRT_ALLOCATOR _Ret_notnull_ _Post_writable_byte_size_(n)
+# define zen_decl_new_nothrow(n) [[nodiscard]] _VCRT_ALLOCATOR _Ret_maybenull_ _Success_(return != NULL) _Post_writable_byte_size_(n)
+#else
+# define zen_decl_new(n) [[nodiscard]]
+# define zen_decl_new_nothrow(n) [[nodiscard]]
+#endif
+
+void
+operator delete(void* p) noexcept
+{
+ zen_free(p);
+}
+
+void
+operator delete[](void* p) noexcept
+{
+ zen_free(p);
+}
+
+void
+operator delete(void* p, const std::nothrow_t&) noexcept
+{
+ zen_free(p);
+}
+
+void
+operator delete[](void* p, const std::nothrow_t&) noexcept
+{
+ zen_free(p);
+}
+
+zen_decl_new(n) void*
+operator new(std::size_t n) noexcept(false)
+{
+ return zen_new(n);
+}
+
+zen_decl_new(n) void*
+operator new[](std::size_t n) noexcept(false)
+{
+ return zen_new(n);
+}
+
+zen_decl_new_nothrow(n) void*
+operator new(std::size_t n, const std::nothrow_t& tag) noexcept
+{
+ (void)(tag);
+ return zen_new_nothrow(n);
+}
+
+zen_decl_new_nothrow(n) void*
+operator new[](std::size_t n, const std::nothrow_t& tag) noexcept
+{
+ (void)(tag);
+ return zen_new_nothrow(n);
+}
+
+#if (__cplusplus >= 201402L || _MSC_VER >= 1916)
+void
+operator delete(void* p, std::size_t n) noexcept
+{
+ zen_free_size(p, n);
+};
+void
+operator delete[](void* p, std::size_t n) noexcept
+{
+ zen_free_size(p, n);
+};
+#endif
+
+#if (__cplusplus > 201402L || defined(__cpp_aligned_new))
+void
+operator delete(void* p, std::align_val_t al) noexcept
+{
+ zen_free_aligned(p, static_cast<size_t>(al));
+}
+void
+operator delete[](void* p, std::align_val_t al) noexcept
+{
+ zen_free_aligned(p, static_cast<size_t>(al));
+}
+void
+operator delete(void* p, std::size_t n, std::align_val_t al) noexcept
+{
+ zen_free_size_aligned(p, n, static_cast<size_t>(al));
+};
+void
+operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept
+{
+ zen_free_size_aligned(p, n, static_cast<size_t>(al));
+};
+void
+operator delete(void* p, std::align_val_t al, const std::nothrow_t&) noexcept
+{
+ zen_free_aligned(p, static_cast<size_t>(al));
+}
+void
+operator delete[](void* p, std::align_val_t al, const std::nothrow_t&) noexcept
+{
+ zen_free_aligned(p, static_cast<size_t>(al));
+}
+
+void*
+operator new(std::size_t n, std::align_val_t al) noexcept(false)
+{
+ return zen_new_aligned(n, static_cast<size_t>(al));
+}
+void*
+operator new[](std::size_t n, std::align_val_t al) noexcept(false)
+{
+ return zen_new_aligned(n, static_cast<size_t>(al));
+}
+void*
+operator new(std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept
+{
+ return zen_new_aligned_nothrow(n, static_cast<size_t>(al));
+}
+void*
+operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept
+{
+ return zen_new_aligned_nothrow(n, static_cast<size_t>(al));
+}
+#endif
diff --git a/src/zencore/include/zencore/memory/tagtrace.h b/src/zencore/include/zencore/memory/tagtrace.h
new file mode 100644
index 000000000..f51b21466
--- /dev/null
+++ b/src/zencore/include/zencore/memory/tagtrace.h
@@ -0,0 +1,93 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+#pragma once
+
+#include <zenbase/zenbase.h>
+#include <zencore/trace.h>
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace zen {
+
+enum class ELLMTag : uint8_t;
+
+int32_t MemoryTrace_AnnounceCustomTag(int32_t Tag, int32_t ParentTag, const char* Display);
+int32_t MemoryTrace_GetActiveTag();
+
+inline constexpr int32_t TRACE_TAG = 257;
+
+} // namespace zen
+
+////////////////////////////////////////////////////////////////////////////////
+#if !defined(UE_MEMORY_TAGS_TRACE_ENABLED)
+# define UE_MEMORY_TAGS_TRACE_ENABLED 1
+#endif
+
+#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED
+
+namespace zen {
+////////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Used to associate any allocation within this scope to a given tag.
+ *
+ * We need to be able to convert the three types of inputs to LLM scopes:
+ * - ELLMTag, an uint8 with fixed categories. There are three sub ranges
+ Generic tags, platform and project tags.
+ * - FName, free form string, for example a specific asset.
+ * - TagData, an opaque pointer from LLM.
+ *
+ */
+class FMemScope
+{
+public:
+ FMemScope(); // Used with SetTagAndActivate
+ FMemScope(int32_t InTag, bool bShouldActivate = true);
+ FMemScope(ELLMTag InTag, bool bShouldActivate = true);
+ ~FMemScope();
+
+private:
+ void ActivateScope(int32_t InTag);
+ UE::Trace::Private::FScopedLogScope Inner;
+ int32_t PrevTag;
+};
+
+/**
+ * A scope that activates in case no existing scope is active.
+ */
+template<typename TagType>
+class FDefaultMemScope : public FMemScope
+{
+public:
+ FDefaultMemScope(TagType InTag) : FMemScope(InTag, MemoryTrace_GetActiveTag() == 0) {}
+};
+
+/**
+ * Used order to keep the tag for memory that is being reallocated.
+ */
+class FMemScopePtr
+{
+public:
+ FMemScopePtr(uint64_t InPtr);
+ ~FMemScopePtr();
+
+private:
+ UE::Trace::Private::FScopedLogScope Inner;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+# define UE_MEMSCOPE(InTag) FMemScope PREPROCESSOR_JOIN(MemScope, __LINE__)(InTag);
+# define UE_MEMSCOPE_PTR(InPtr) FMemScopePtr PREPROCESSOR_JOIN(MemPtrScope, __LINE__)((uint64)InPtr);
+# define UE_MEMSCOPE_DEFAULT(InTag) FDefaultMemScope PREPROCESSOR_JOIN(MemScope, __LINE__)(InTag);
+# define UE_MEMSCOPE_UNINITIALIZED(Line) FMemScope PREPROCESSOR_JOIN(MemScope, Line);
+
+#else // UE_MEMORY_TAGS_TRACE_ENABLED
+
+////////////////////////////////////////////////////////////////////////////////
+# define UE_MEMSCOPE(...)
+# define UE_MEMSCOPE_PTR(...)
+# define UE_MEMSCOPE_DEFAULT(...)
+# define UE_MEMSCOPE_UNINITIALIZED(...)
+# define UE_MEMSCOPE_ACTIVATE(...)
+
+#endif // UE_MEMORY_TAGS_TRACE_ENABLED
+}
diff --git a/src/zencore/include/zencore/string.h b/src/zencore/include/zencore/string.h
index b10b6a2ba..e2ef1c1a0 100644
--- a/src/zencore/include/zencore/string.h
+++ b/src/zencore/include/zencore/string.h
@@ -51,6 +51,30 @@ StringLength(const wchar_t* str)
return wcslen(str);
}
+inline bool
+StringCompare(const char16_t* s1, const char16_t* s2)
+{
+ char16_t c1, c2;
+
+ while ((c1 = *s1) == (c2 = *s2))
+ {
+ if (c1 == 0)
+ {
+ return 0;
+ }
+
+ ++s1;
+ ++s2;
+ }
+ return uint16_t(c1) - uint16_t(c2);
+}
+
+inline bool
+StringEquals(const char16_t* s1, const char16_t* s2)
+{
+ return StringCompare(s1, s2) == 0;
+}
+
inline size_t
StringLength(const char16_t* str)
{
diff --git a/src/zencore/include/zencore/trace.h b/src/zencore/include/zencore/trace.h
index 89e4b76bf..2ca2b7c81 100644
--- a/src/zencore/include/zencore/trace.h
+++ b/src/zencore/include/zencore/trace.h
@@ -19,6 +19,8 @@ ZEN_THIRD_PARTY_INCLUDES_END
#define ZEN_TRACE_CPU(x) TRACE_CPU_SCOPE(x)
#define ZEN_TRACE_CPU_FLUSH(x) TRACE_CPU_SCOPE(x, trace::CpuScopeFlags::CpuFlush)
+namespace zen {
+
enum class TraceType
{
File,
@@ -32,6 +34,8 @@ bool IsTracing();
void TraceStart(std::string_view ProgramName, const char* HostOrPath, TraceType Type);
bool TraceStop();
+}
+
#else
#define ZEN_TRACE_CPU(x)
diff --git a/src/zencore/iobuffer.cpp b/src/zencore/iobuffer.cpp
index 51f380c34..d6d02eb0b 100644
--- a/src/zencore/iobuffer.cpp
+++ b/src/zencore/iobuffer.cpp
@@ -8,6 +8,8 @@
#include <zencore/iohash.h>
#include <zencore/logging.h>
#include <zencore/memory.h>
+#include <zencore/memory/llm.h>
+#include <zencore/memory/memory.h>
#include <zencore/testing.h>
#include <zencore/thread.h>
#include <zencore/trace.h>
@@ -15,12 +17,6 @@
#include <memory.h>
#include <system_error>
-#if ZEN_USE_MIMALLOC
-ZEN_THIRD_PARTY_INCLUDES_START
-# include <mimalloc.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-#endif
-
#if ZEN_PLATFORM_WINDOWS
# include <zencore/windows.h>
#else
@@ -43,39 +39,39 @@ namespace zen {
void
IoBufferCore::AllocateBuffer(size_t InSize, size_t Alignment) const
{
-#if ZEN_USE_MIMALLOC
- void* Ptr = mi_aligned_alloc(Alignment, RoundUp(InSize, Alignment));
- m_Flags.fetch_or(kIoBufferAlloc, std::memory_order_relaxed);
-#else
+ UE_MEMSCOPE(ELLMTag::IoBufferMemory);
+
void* Ptr = Memory::Alloc(InSize, Alignment);
-#endif
if (!Ptr)
{
ThrowOutOfMemory(fmt::format("failed allocating {:#x} bytes aligned to {:#x}", InSize, Alignment));
}
+
m_DataPtr = Ptr;
}
void
IoBufferCore::FreeBuffer()
{
- if (!m_DataPtr)
+ if (m_DataPtr)
{
- return;
+ Memory::Free(const_cast<void*>(m_DataPtr));
+ m_DataPtr = nullptr;
}
+}
- const uint32_t LocalFlags = m_Flags.load(std::memory_order_relaxed);
-
-#if ZEN_USE_MIMALLOC
- if (LocalFlags & kIoBufferAlloc)
- {
- return mi_free(const_cast<void*>(m_DataPtr));
- }
-#endif
+void*
+IoBufferCore::operator new(size_t Size)
+{
+ UE_MEMSCOPE(ELLMTag::IoBufferCore);
+ return Memory::Malloc(Size);
+}
- ZEN_UNUSED(LocalFlags);
- return Memory::Free(const_cast<void*>(m_DataPtr));
+void
+IoBufferCore::operator delete(void* Ptr)
+{
+ Memory::Free(Ptr);
}
//////////////////////////////////////////////////////////////////////////
@@ -104,10 +100,9 @@ IoBufferCore::IoBufferCore(size_t InSize, size_t Alignment)
IoBufferCore::~IoBufferCore()
{
- if (IsOwnedByThis() && m_DataPtr)
+ if (IsOwnedByThis())
{
FreeBuffer();
- m_DataPtr = nullptr;
}
}
@@ -567,7 +562,7 @@ IoBufferBuilder::ReadFromFileMaybe(const IoBuffer& InBuffer)
Error = zen::GetLastError();
}
#else
- int Fd = int(intptr_t(FileRef.FileHandle));
+ int Fd = int(intptr_t(FileRef.FileHandle));
ssize_t ReadResult = pread(Fd, OutBuffer.MutableData(), size_t(NumberOfBytesToRead), off_t(FileOffset));
if (ReadResult != -1)
{
@@ -635,7 +630,7 @@ IoBufferBuilder::MakeFromFile(const std::filesystem::path& FileName, uint64_t Of
DataFile.GetSize((ULONGLONG&)FileSize);
#else
int Flags = O_RDONLY | O_CLOEXEC;
- int Fd = open(FileName.c_str(), Flags);
+ int Fd = open(FileName.c_str(), Flags);
if (Fd < 0)
{
return {};
@@ -704,7 +699,7 @@ IoBufferBuilder::MakeFromTemporaryFile(const std::filesystem::path& FileName)
Handle = DataFile.Detach();
#else
- int Fd = open(FileName.native().c_str(), O_RDONLY);
+ int Fd = open(FileName.native().c_str(), O_RDONLY);
if (Fd < 0)
{
return {};
diff --git a/src/zencore/logging.cpp b/src/zencore/logging.cpp
index 1a0a91b3d..7bd500b3b 100644
--- a/src/zencore/logging.cpp
+++ b/src/zencore/logging.cpp
@@ -6,6 +6,8 @@
#include <zencore/testing.h>
#include <zencore/thread.h>
+#include <zencore/memory/llm.h>
+
ZEN_THIRD_PARTY_INCLUDES_START
#include <spdlog/details/registry.h>
#include <spdlog/sinks/null_sink.h>
@@ -66,6 +68,7 @@ static_assert(offsetof(spdlog::source_loc, funcname) == offsetof(SourceLocation,
void
EmitLogMessage(LoggerRef& Logger, int LogLevel, const std::string_view Message)
{
+ UE_MEMSCOPE(ELLMTag::Logging);
const spdlog::level::level_enum InLevel = (spdlog::level::level_enum)LogLevel;
Logger.SpdLogger->log(InLevel, Message);
if (IsErrorLevel(LogLevel))
@@ -80,6 +83,7 @@ EmitLogMessage(LoggerRef& Logger, int LogLevel, const std::string_view Message)
void
EmitLogMessage(LoggerRef& Logger, int LogLevel, std::string_view Format, fmt::format_args Args)
{
+ UE_MEMSCOPE(ELLMTag::Logging);
zen::logging::LoggingContext LogCtx;
fmt::vformat_to(fmt::appender(LogCtx.MessageBuffer), Format, Args);
zen::logging::EmitLogMessage(Logger, LogLevel, LogCtx.Message());
@@ -88,6 +92,7 @@ EmitLogMessage(LoggerRef& Logger, int LogLevel, std::string_view Format, fmt::fo
void
EmitLogMessage(LoggerRef& Logger, const SourceLocation& InLocation, int LogLevel, const std::string_view Message)
{
+ UE_MEMSCOPE(ELLMTag::Logging);
const spdlog::source_loc& Location = *reinterpret_cast<const spdlog::source_loc*>(&InLocation);
const spdlog::level::level_enum InLevel = (spdlog::level::level_enum)LogLevel;
Logger.SpdLogger->log(Location, InLevel, Message);
@@ -103,6 +108,7 @@ EmitLogMessage(LoggerRef& Logger, const SourceLocation& InLocation, int LogLevel
void
EmitLogMessage(LoggerRef& Logger, const SourceLocation& InLocation, int LogLevel, std::string_view Format, fmt::format_args Args)
{
+ UE_MEMSCOPE(ELLMTag::Logging);
zen::logging::LoggingContext LogCtx;
fmt::vformat_to(fmt::appender(LogCtx.MessageBuffer), Format, Args);
zen::logging::EmitLogMessage(Logger, InLocation, LogLevel, LogCtx.Message());
@@ -111,6 +117,7 @@ EmitLogMessage(LoggerRef& Logger, const SourceLocation& InLocation, int LogLevel
void
EmitConsoleLogMessage(int LogLevel, const std::string_view Message)
{
+ UE_MEMSCOPE(ELLMTag::Logging);
const spdlog::level::level_enum InLevel = (spdlog::level::level_enum)LogLevel;
ConsoleLog().SpdLogger->log(InLevel, Message);
}
@@ -118,6 +125,7 @@ EmitConsoleLogMessage(int LogLevel, const std::string_view Message)
void
EmitConsoleLogMessage(int LogLevel, std::string_view Format, fmt::format_args Args)
{
+ UE_MEMSCOPE(ELLMTag::Logging);
zen::logging::LoggingContext LogCtx;
fmt::vformat_to(fmt::appender(LogCtx.MessageBuffer), Format, Args);
zen::logging::EmitConsoleLogMessage(LogLevel, LogCtx.Message());
@@ -192,6 +200,8 @@ std::string LogLevels[level::LogLevelCount];
void
ConfigureLogLevels(level::LogLevel Level, std::string_view Loggers)
{
+ UE_MEMSCOPE(ELLMTag::Logging);
+
RwLock::ExclusiveLockScope _(LogLevelsLock);
LogLevels[Level] = Loggers;
}
@@ -199,6 +209,8 @@ ConfigureLogLevels(level::LogLevel Level, std::string_view Loggers)
void
RefreshLogLevels(level::LogLevel* DefaultLevel)
{
+ UE_MEMSCOPE(ELLMTag::Logging);
+
spdlog::details::registry::log_levels Levels;
{
@@ -275,6 +287,8 @@ Default()
void
SetDefault(std::string_view NewDefaultLoggerId)
{
+ UE_MEMSCOPE(ELLMTag::Logging);
+
auto NewDefaultLogger = spdlog::get(std::string(NewDefaultLoggerId));
ZEN_ASSERT(NewDefaultLogger);
@@ -293,6 +307,8 @@ ErrorLog()
void
SetErrorLog(std::string_view NewErrorLoggerId)
{
+ UE_MEMSCOPE(ELLMTag::Logging);
+
if (NewErrorLoggerId.empty())
{
TheErrorLogger = {};
@@ -310,6 +326,8 @@ SetErrorLog(std::string_view NewErrorLoggerId)
LoggerRef
Get(std::string_view Name)
{
+ UE_MEMSCOPE(ELLMTag::Logging);
+
std::shared_ptr<spdlog::logger> Logger = spdlog::get(std::string(Name));
if (!Logger)
@@ -339,6 +357,8 @@ SuppressConsoleLog()
LoggerRef
ConsoleLog()
{
+ UE_MEMSCOPE(ELLMTag::Logging);
+
std::call_once(ConsoleInitFlag, [&] {
if (!ConLogger)
{
@@ -355,6 +375,8 @@ ConsoleLog()
void
InitializeLogging()
{
+ UE_MEMSCOPE(ELLMTag::Logging);
+
TheDefaultLogger = *spdlog::default_logger_raw();
}
diff --git a/src/zencore/memory.cpp b/src/zencore/memory.cpp
index a0d911786..a2fe02f3a 100644
--- a/src/zencore/memory.cpp
+++ b/src/zencore/memory.cpp
@@ -4,67 +4,14 @@
#include <zencore/fmtutils.h>
#include <zencore/intmath.h>
#include <zencore/memory.h>
+#include <zencore/memory/memory.h>
#include <zencore/testing.h>
#include <zencore/zencore.h>
#include <cstdlib>
-#if ZEN_USE_MIMALLOC
-ZEN_THIRD_PARTY_INCLUDES_START
-# include <mimalloc.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-#endif
-
namespace zen {
-//////////////////////////////////////////////////////////////////////////
-
-static void*
-AlignedAllocImpl(size_t Size, size_t Alignment)
-{
- // aligned_alloc() states that size must be a multiple of alignment. Some
- // platforms return null if this requirement isn't met.
- Size = (Size + Alignment - 1) & ~(Alignment - 1);
-
-#if ZEN_USE_MIMALLOC
- return mi_aligned_alloc(Alignment, Size);
-#elif ZEN_PLATFORM_WINDOWS
- return _aligned_malloc(Size, Alignment);
-#else
- return std::aligned_alloc(Alignment, Size);
-#endif
-}
-
-void
-AlignedFreeImpl(void* ptr)
-{
- if (ptr == nullptr)
- return;
-
-#if ZEN_USE_MIMALLOC
- return mi_free(ptr);
-#elif ZEN_PLATFORM_WINDOWS
- _aligned_free(ptr);
-#else
- std::free(ptr);
-#endif
-}
-
-//////////////////////////////////////////////////////////////////////////
-
-void*
-Memory::Alloc(size_t Size, size_t Alignment)
-{
- return AlignedAllocImpl(Size, Alignment);
-}
-
-void
-Memory::Free(void* ptr)
-{
- AlignedFreeImpl(ptr);
-}
-
-//////////////////////////////////////////////////////////////////////////
//
// Unit tests
//
diff --git a/src/zencore/memory/fmalloc.cpp b/src/zencore/memory/fmalloc.cpp
new file mode 100644
index 000000000..3e96003f5
--- /dev/null
+++ b/src/zencore/memory/fmalloc.cpp
@@ -0,0 +1,156 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <string.h>
+#include <zencore/memory/fmalloc.h>
+#include <zencore/memory/memory.h>
+
+namespace zen {
+
+//////////////////////////////////////////////////////////////////////////
+
+class FInitialMalloc : public FMalloc
+{
+ virtual void* Malloc(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT) override
+ {
+ Memory::Initialize();
+ return GMalloc->Malloc(Count, Alignment);
+ }
+ virtual void* TryMalloc(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT) override
+ {
+ Memory::Initialize();
+ return GMalloc->TryMalloc(Count, Alignment);
+ }
+ virtual void* Realloc(void* Original, size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT) override
+ {
+ Memory::Initialize();
+ return GMalloc->Realloc(Original, Count, Alignment);
+ }
+ virtual void* TryRealloc(void* Original, size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT) override
+ {
+ Memory::Initialize();
+ return GMalloc->TryRealloc(Original, Count, Alignment);
+ }
+ virtual void Free(void* Original) override
+ {
+ Memory::Initialize();
+ return GMalloc->Free(Original);
+ }
+ virtual void* MallocZeroed(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT) override
+ {
+ Memory::Initialize();
+ return GMalloc->MallocZeroed(Count, Alignment);
+ }
+
+ virtual void* TryMallocZeroed(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT) override
+ {
+ Memory::Initialize();
+ return GMalloc->TryMallocZeroed(Count, Alignment);
+ }
+ virtual size_t QuantizeSize(size_t Count, uint32_t Alignment) override
+ {
+ Memory::Initialize();
+ return GMalloc->QuantizeSize(Count, Alignment);
+ }
+ virtual bool GetAllocationSize(void* Original, size_t& SizeOut) override
+ {
+ Memory::Initialize();
+ return GMalloc->GetAllocationSize(Original, SizeOut);
+ }
+ virtual void OnMallocInitialized() override {}
+ virtual void Trim(bool bTrimThreadCaches) override { ZEN_UNUSED(bTrimThreadCaches); }
+} GInitialMalloc;
+
+FMalloc* GMalloc = &GInitialMalloc; /* Memory allocator */
+
+//////////////////////////////////////////////////////////////////////////
+
+void*
+FUseSystemMallocForNew::operator new(size_t Size)
+{
+ return Memory::SystemMalloc(Size);
+}
+
+void
+FUseSystemMallocForNew::operator delete(void* Ptr)
+{
+ Memory::SystemFree(Ptr);
+}
+
+void*
+FUseSystemMallocForNew::operator new[](size_t Size)
+{
+ return Memory::SystemMalloc(Size);
+}
+
+void
+FUseSystemMallocForNew::operator delete[](void* Ptr)
+{
+ Memory::SystemFree(Ptr);
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+void*
+FMalloc::TryRealloc(void* Original, size_t Count, uint32_t Alignment)
+{
+ return Realloc(Original, Count, Alignment);
+}
+
+void*
+FMalloc::TryMalloc(size_t Count, uint32_t Alignment)
+{
+ return Malloc(Count, Alignment);
+}
+
+void*
+FMalloc::TryMallocZeroed(size_t Count, uint32_t Alignment)
+{
+ return MallocZeroed(Count, Alignment);
+}
+
+void*
+FMalloc::MallocZeroed(size_t Count, uint32_t Alignment)
+{
+ void* const Memory = Malloc(Count, Alignment);
+
+ if (Memory)
+ {
+ ::memset(Memory, 0, Count);
+ }
+
+ return Memory;
+}
+
+void
+FMalloc::OutOfMemory(size_t Size, uint32_t Alignment)
+{
+ ZEN_UNUSED(Size, Alignment);
+ // no-op by default
+}
+
+void
+FMalloc::Trim(bool bTrimThreadCaches)
+{
+ ZEN_UNUSED(bTrimThreadCaches);
+}
+
+void
+FMalloc::OnMallocInitialized()
+{
+}
+
+bool
+FMalloc::GetAllocationSize(void* Original, size_t& SizeOut)
+{
+ ZEN_UNUSED(Original, SizeOut);
+ return false; // Generic implementation has no way of determining this
+}
+
+size_t
+FMalloc::QuantizeSize(size_t Count, uint32_t Alignment)
+{
+ ZEN_UNUSED(Alignment);
+ return Count; // Generic implementation has no way of determining this
+}
+
+} // namespace zen
diff --git a/src/zencore/memory/mallocansi.cpp b/src/zencore/memory/mallocansi.cpp
new file mode 100644
index 000000000..9c3936172
--- /dev/null
+++ b/src/zencore/memory/mallocansi.cpp
@@ -0,0 +1,251 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zencore/memory/mallocansi.h>
+
+#include <zencore/intmath.h>
+#include <zencore/memory/align.h>
+#include <zencore/windows.h>
+
+#if ZEN_PLATFORM_LINUX
+# define PLATFORM_USE_ANSI_POSIX_MALLOC 1
+#endif
+
+#if ZEN_PLATFORM_MAC
+# define PLATFORM_USE_CUSTOM_MEMALIGN 1
+#endif
+
+#ifndef PLATFORM_USE_ANSI_MEMALIGN
+# define PLATFORM_USE_ANSI_MEMALIGN 0
+#endif
+
+#ifndef PLATFORM_USE_ANSI_POSIX_MALLOC
+# define PLATFORM_USE_ANSI_POSIX_MALLOC 0
+#endif
+
+#ifndef PLATFORM_USE_CUSTOM_MEMALIGN
+# define PLATFORM_USE_CUSTOM_MEMALIGN 0
+#endif
+
+#if PLATFORM_USE_ANSI_POSIX_MALLOC
+# include <malloc.h>
+# include <string.h>
+#endif
+
+#define MALLOC_ANSI_USES__ALIGNED_MALLOC ZEN_PLATFORM_WINDOWS
+
+namespace zen {
+
+//////////////////////////////////////////////////////////////////////////
+
+void*
+AnsiMalloc(size_t Size, uint32_t Alignment)
+{
+#if MALLOC_ANSI_USES__ALIGNED_MALLOC
+ void* Result = _aligned_malloc(Size, Alignment);
+#elif PLATFORM_USE_ANSI_POSIX_MALLOC
+ void* Result;
+ if (posix_memalign(&Result, Alignment, Size) != 0)
+ {
+ Result = nullptr;
+ }
+#elif PLATFORM_USE_ANSI_MEMALIGN
+ Result = reallocalign(Ptr, NewSize, Alignment);
+#elif PLATFORM_USE_CUSTOM_MEMALIGN
+ void* Ptr = malloc(Size + Alignment + sizeof(void*) + sizeof(size_t));
+ void* Result = nullptr;
+ if (Ptr)
+ {
+ Result = Align((uint8_t*)Ptr + sizeof(void*) + sizeof(size_t), Alignment);
+ *((void**)((uint8_t*)Result - sizeof(void*))) = Ptr;
+ *((size_t*)((uint8_t*)Result - sizeof(void*) - sizeof(size_t))) = Size;
+ }
+#else
+# error Unknown allocation path
+#endif
+
+ return Result;
+}
+
+size_t
+AnsiGetAllocationSize(void* Original)
+{
+#if MALLOC_ANSI_USES__ALIGNED_MALLOC
+ return _aligned_msize(Original, 16, 0); // TODO: incorrectly assumes alignment of 16
+#elif PLATFORM_USE_ANSI_POSIX_MALLOC || PLATFORM_USE_ANSI_MEMALIGN
+ return malloc_usable_size(Original);
+#elif PLATFORM_USE_CUSTOM_MEMALIGN
+ return *((size_t*)((uint8_t*)Original - sizeof(void*) - sizeof(size_t)));
+#else
+# error Unknown allocation path
+#endif
+}
+
+void*
+AnsiRealloc(void* Ptr, size_t NewSize, uint32_t Alignment)
+{
+ void* Result = nullptr;
+
+#if MALLOC_ANSI_USES__ALIGNED_MALLOC
+ if (Ptr && NewSize)
+ {
+ Result = _aligned_realloc(Ptr, NewSize, Alignment);
+ }
+ else if (Ptr == nullptr)
+ {
+ Result = _aligned_malloc(NewSize, Alignment);
+ }
+ else
+ {
+ _aligned_free(Ptr);
+ Result = nullptr;
+ }
+#elif PLATFORM_USE_ANSI_POSIX_MALLOC
+ if (Ptr && NewSize)
+ {
+ size_t UsableSize = malloc_usable_size(Ptr);
+ if (posix_memalign(&Result, Alignment, NewSize) != 0)
+ {
+ Result = nullptr;
+ }
+ else if (UsableSize)
+ {
+ memcpy(Result, Ptr, Min(NewSize, UsableSize));
+ }
+ free(Ptr);
+ }
+ else if (Ptr == nullptr)
+ {
+ if (posix_memalign(&Result, Alignment, NewSize) != 0)
+ {
+ Result = nullptr;
+ }
+ }
+ else
+ {
+ free(Ptr);
+ Result = nullptr;
+ }
+#elif PLATFORM_USE_CUSTOM_MEMALIGN
+ if (Ptr && NewSize)
+ {
+ // Can't use realloc as it might screw with alignment.
+ Result = AnsiMalloc(NewSize, Alignment);
+ size_t PtrSize = AnsiGetAllocationSize(Ptr);
+ memcpy(Result, Ptr, Min(NewSize, PtrSize));
+ AnsiFree(Ptr);
+ }
+ else if (Ptr == nullptr)
+ {
+ Result = AnsiMalloc(NewSize, Alignment);
+ }
+ else
+ {
+ free(*((void**)((uint8_t*)Ptr - sizeof(void*))));
+ Result = nullptr;
+ }
+#else
+# error Unknown allocation path
+#endif
+
+ return Result;
+}
+
+void
+AnsiFree(void* Ptr)
+{
+#if MALLOC_ANSI_USES__ALIGNED_MALLOC
+ _aligned_free(Ptr);
+#elif PLATFORM_USE_ANSI_POSIX_MALLOC || PLATFORM_USE_ANSI_MEMALIGN
+ free(Ptr);
+#elif PLATFORM_USE_CUSTOM_MEMALIGN
+ if (Ptr)
+ {
+ free(*((void**)((uint8_t*)Ptr - sizeof(void*))));
+ }
+#else
+# error Unknown allocation path
+#endif
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+FMallocAnsi::FMallocAnsi()
+{
+#if ZEN_PLATFORM_WINDOWS
+ // Enable low fragmentation heap - http://msdn2.microsoft.com/en-US/library/aa366750.aspx
+ intptr_t CrtHeapHandle = _get_heap_handle();
+ ULONG EnableLFH = 2;
+ HeapSetInformation((void*)CrtHeapHandle, HeapCompatibilityInformation, &EnableLFH, sizeof(EnableLFH));
+#endif
+}
+
+void*
+FMallocAnsi::TryMalloc(size_t Size, uint32_t Alignment)
+{
+ Alignment = Max(Size >= 16 ? (uint32_t)16 : (uint32_t)8, Alignment);
+
+ void* Result = AnsiMalloc(Size, Alignment);
+
+ return Result;
+}
+
+void*
+FMallocAnsi::Malloc(size_t Size, uint32_t Alignment)
+{
+ void* Result = TryMalloc(Size, Alignment);
+
+ if (Result == nullptr && Size)
+ {
+ OutOfMemory(Size, Alignment);
+ }
+
+ return Result;
+}
+
+void*
+FMallocAnsi::TryRealloc(void* Ptr, size_t NewSize, uint32_t Alignment)
+{
+ Alignment = Max(NewSize >= 16 ? (uint32_t)16 : (uint32_t)8, Alignment);
+
+ void* Result = AnsiRealloc(Ptr, NewSize, Alignment);
+
+ return Result;
+}
+
+void*
+FMallocAnsi::Realloc(void* Ptr, size_t NewSize, uint32_t Alignment)
+{
+ void* Result = TryRealloc(Ptr, NewSize, Alignment);
+
+ if (Result == nullptr && NewSize != 0)
+ {
+ OutOfMemory(NewSize, Alignment);
+ }
+
+ return Result;
+}
+
+void
+FMallocAnsi::Free(void* Ptr)
+{
+ AnsiFree(Ptr);
+}
+
+bool
+FMallocAnsi::GetAllocationSize(void* Original, size_t& SizeOut)
+{
+ if (!Original)
+ {
+ return false;
+ }
+
+#if MALLOC_ANSI_USES__ALIGNED_MALLOC
+ ZEN_UNUSED(SizeOut);
+ return false;
+#else
+ SizeOut = AnsiGetAllocationSize(Original);
+ return true;
+#endif
+}
+
+} // namespace zen
diff --git a/src/zencore/memory/mallocmimalloc.cpp b/src/zencore/memory/mallocmimalloc.cpp
new file mode 100644
index 000000000..1919af3bf
--- /dev/null
+++ b/src/zencore/memory/mallocmimalloc.cpp
@@ -0,0 +1,197 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zencore/intmath.h>
+#include <zencore/memory/align.h>
+#include <zencore/memory/mallocmimalloc.h>
+
+#if ZEN_MIMALLOC_ENABLED
+
+# include <mimalloc.h>
+
+/** Value we fill a memory block with after it is free, in UE_BUILD_DEBUG **/
+# define DEBUG_FILL_FREED (0xdd)
+
+/** Value we fill a new memory block with, in UE_BUILD_DEBUG **/
+# define DEBUG_FILL_NEW (0xcd)
+
+# define ZEN_ENABLE_DEBUG_FILL 1
+
+namespace zen {
+
+// Dramatically reduce memory zeroing and page faults during alloc intense workloads
+// by keeping freed pages for a little while instead of releasing them
+// right away to the OS, effectively acting like a scratch buffer
+// until pages are both freed and inactive for the delay specified
+// in milliseconds.
+int32_t GMiMallocMemoryResetDelay = 10000;
+
+FMallocMimalloc::FMallocMimalloc()
+{
+ mi_option_set(mi_option_reset_delay, GMiMallocMemoryResetDelay);
+}
+
+void*
+FMallocMimalloc::TryMalloc(size_t Size, uint32_t Alignment)
+{
+ void* NewPtr = nullptr;
+
+ if (Alignment != DEFAULT_ALIGNMENT)
+ {
+ Alignment = Max(uint32_t(Size >= 16 ? 16 : 8), Alignment);
+ NewPtr = mi_malloc_aligned(Size, Alignment);
+ }
+ else
+ {
+ NewPtr = mi_malloc_aligned(Size, uint32_t(Size >= 16 ? 16 : 8));
+ }
+
+# if ZEN_BUILD_DEBUG && ZEN_ENABLE_DEBUG_FILL
+ if (Size && NewPtr != nullptr)
+ {
+ memset(NewPtr, DEBUG_FILL_NEW, mi_usable_size(NewPtr));
+ }
+# endif
+
+ return NewPtr;
+}
+
+void*
+FMallocMimalloc::Malloc(size_t Size, uint32_t Alignment)
+{
+ void* Result = TryMalloc(Size, Alignment);
+
+ if (Result == nullptr && Size)
+ {
+ OutOfMemory(Size, Alignment);
+ }
+
+ return Result;
+}
+
+void*
+FMallocMimalloc::TryRealloc(void* Ptr, size_t NewSize, uint32_t Alignment)
+{
+# if ZEN_BUILD_DEBUG && ZEN_ENABLE_DEBUG_FILL
+ size_t OldSize = 0;
+ if (Ptr)
+ {
+ OldSize = mi_malloc_size(Ptr);
+ if (NewSize < OldSize)
+ {
+ memset((uint8_t*)Ptr + NewSize, DEBUG_FILL_FREED, OldSize - NewSize);
+ }
+ }
+# endif
+ void* NewPtr = nullptr;
+
+ if (NewSize == 0)
+ {
+ mi_free(Ptr);
+
+ return nullptr;
+ }
+
+# if ZEN_PLATFORM_MAC
+ // macOS expects all allocations to be aligned to 16 bytes, so on Mac we always have to use mi_realloc_aligned
+ Alignment = AlignArbitrary(Max((uint32_t)16, Alignment), (uint32_t)16);
+ NewPtr = mi_realloc_aligned(Ptr, NewSize, Alignment);
+# else
+ if (Alignment != DEFAULT_ALIGNMENT)
+ {
+ Alignment = Max(NewSize >= 16 ? (uint32_t)16 : (uint32_t)8, Alignment);
+ NewPtr = mi_realloc_aligned(Ptr, NewSize, Alignment);
+ }
+ else
+ {
+ NewPtr = mi_realloc(Ptr, NewSize);
+ }
+# endif
+
+# if ZEN_BUILD_DEBUG && ZEN_ENABLE_DEBUG_FILL
+ if (NewPtr && NewSize > OldSize)
+ {
+ memset((uint8_t*)NewPtr + OldSize, DEBUG_FILL_NEW, mi_usable_size(NewPtr) - OldSize);
+ }
+# endif
+
+ return NewPtr;
+}
+
+void*
+FMallocMimalloc::Realloc(void* Ptr, size_t NewSize, uint32_t Alignment)
+{
+ void* Result = TryRealloc(Ptr, NewSize, Alignment);
+
+ if (Result == nullptr && NewSize)
+ {
+ OutOfMemory(NewSize, Alignment);
+ }
+
+ return Result;
+}
+
+void
+FMallocMimalloc::Free(void* Ptr)
+{
+ if (!Ptr)
+ {
+ return;
+ }
+
+# if ZEN_BUILD_DEBUG && ZEN_ENABLE_DEBUG_FILL
+ memset(Ptr, DEBUG_FILL_FREED, mi_usable_size(Ptr));
+# endif
+
+ mi_free(Ptr);
+}
+
+void*
+FMallocMimalloc::MallocZeroed(size_t Size, uint32_t Alignment)
+{
+ void* Result = TryMallocZeroed(Size, Alignment);
+
+ if (Result == nullptr && Size)
+ {
+ OutOfMemory(Size, Alignment);
+ }
+
+ return Result;
+}
+
+void*
+FMallocMimalloc::TryMallocZeroed(size_t Size, uint32_t Alignment)
+{
+ void* NewPtr = nullptr;
+
+ if (Alignment != DEFAULT_ALIGNMENT)
+ {
+ Alignment = Max(uint32_t(Size >= 16 ? 16 : 8), Alignment);
+ NewPtr = mi_zalloc_aligned(Size, Alignment);
+ }
+ else
+ {
+ NewPtr = mi_zalloc_aligned(Size, uint32_t(Size >= 16 ? 16 : 8));
+ }
+
+ return NewPtr;
+}
+
+bool
+FMallocMimalloc::GetAllocationSize(void* Original, size_t& SizeOut)
+{
+ SizeOut = mi_malloc_size(Original);
+ return true;
+}
+
+void
+FMallocMimalloc::Trim(bool bTrimThreadCaches)
+{
+ mi_collect(bTrimThreadCaches);
+}
+
+# undef DEBUG_FILL_FREED
+# undef DEBUG_FILL_NEW
+
+} // namespace zen
+
+#endif // MIMALLOC_ENABLED
diff --git a/src/zencore/memory/mallocrpmalloc.cpp b/src/zencore/memory/mallocrpmalloc.cpp
new file mode 100644
index 000000000..ffced27c9
--- /dev/null
+++ b/src/zencore/memory/mallocrpmalloc.cpp
@@ -0,0 +1,189 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zencore/intmath.h>
+#include <zencore/memory/align.h>
+#include <zencore/memory/mallocrpmalloc.h>
+
+#if ZEN_RPMALLOC_ENABLED
+
+# include "rpmalloc.h"
+
+/** Value we fill a memory block with after it is free, in UE_BUILD_DEBUG **/
+# define DEBUG_FILL_FREED (0xdd)
+
+/** Value we fill a new memory block with, in UE_BUILD_DEBUG **/
+# define DEBUG_FILL_NEW (0xcd)
+
+# define ZEN_ENABLE_DEBUG_FILL 1
+
+namespace zen {
+
+FMallocRpmalloc::FMallocRpmalloc()
+{
+ rpmalloc_initialize(nullptr);
+}
+
+FMallocRpmalloc::~FMallocRpmalloc()
+{
+ rpmalloc_finalize();
+}
+
+void*
+FMallocRpmalloc::TryMalloc(size_t Size, uint32_t Alignment)
+{
+ void* NewPtr = nullptr;
+
+ if (Alignment != DEFAULT_ALIGNMENT)
+ {
+ Alignment = Max(uint32_t(Size >= 16 ? 16 : 8), Alignment);
+ NewPtr = rpaligned_alloc(Alignment, Size);
+ }
+ else
+ {
+ NewPtr = rpaligned_alloc(uint32_t(Size >= 16 ? 16 : 8), Size);
+ }
+
+# if ZEN_BUILD_DEBUG && ZEN_ENABLE_DEBUG_FILL
+ if (Size && NewPtr != nullptr)
+ {
+ memset(NewPtr, DEBUG_FILL_NEW, rpmalloc_usable_size(NewPtr));
+ }
+# endif
+
+ return NewPtr;
+}
+
+void*
+FMallocRpmalloc::Malloc(size_t Size, uint32_t Alignment)
+{
+ void* Result = TryMalloc(Size, Alignment);
+
+ if (Result == nullptr && Size)
+ {
+ OutOfMemory(Size, Alignment);
+ }
+
+ return Result;
+}
+
+void*
+FMallocRpmalloc::Realloc(void* Ptr, size_t NewSize, uint32_t Alignment)
+{
+ void* Result = TryRealloc(Ptr, NewSize, Alignment);
+
+ if (Result == nullptr && NewSize)
+ {
+ OutOfMemory(NewSize, Alignment);
+ }
+
+ return Result;
+}
+
+void*
+FMallocRpmalloc::TryRealloc(void* Ptr, size_t NewSize, uint32_t Alignment)
+{
+# if ZEN_BUILD_DEBUG && ZEN_ENABLE_DEBUG_FILL
+ size_t OldSize = 0;
+ if (Ptr)
+ {
+ OldSize = rpmalloc_usable_size(Ptr);
+ if (NewSize < OldSize)
+ {
+ memset((uint8_t*)Ptr + NewSize, DEBUG_FILL_FREED, OldSize - NewSize);
+ }
+ }
+# endif
+ void* NewPtr = nullptr;
+
+ if (NewSize == 0)
+ {
+ rpfree(Ptr);
+
+ return nullptr;
+ }
+
+# if ZEN_PLATFORM_MAC
+ // macOS expects all allocations to be aligned to 16 bytes, so on Mac we always have to use mi_realloc_aligned
+ Alignment = AlignArbitrary(Max((uint32_t)16, Alignment), (uint32_t)16);
+ NewPtr = rpaligned_realloc(Ptr, Alignment, NewSize, /* OldSize */ 0, /* flags */ 0);
+# else
+ if (Alignment != DEFAULT_ALIGNMENT)
+ {
+ Alignment = Max(NewSize >= 16 ? (uint32_t)16 : (uint32_t)8, Alignment);
+ NewPtr = rpaligned_realloc(Ptr, Alignment, NewSize, /* OldSize */ 0, /* flags */ 0);
+ }
+ else
+ {
+ NewPtr = rprealloc(Ptr, NewSize);
+ }
+# endif
+
+# if ZEN_BUILD_DEBUG && ZEN_ENABLE_DEBUG_FILL
+ if (NewPtr && NewSize > OldSize)
+ {
+ memset((uint8_t*)NewPtr + OldSize, DEBUG_FILL_NEW, rpmalloc_usable_size(NewPtr) - OldSize);
+ }
+# endif
+
+ return NewPtr;
+}
+
+void
+FMallocRpmalloc::Free(void* Ptr)
+{
+ if (!Ptr)
+ {
+ return;
+ }
+
+# if ZEN_BUILD_DEBUG && ZEN_ENABLE_DEBUG_FILL
+ memset(Ptr, DEBUG_FILL_FREED, rpmalloc_usable_size(Ptr));
+# endif
+
+ rpfree(Ptr);
+}
+
+void*
+FMallocRpmalloc::MallocZeroed(size_t Size, uint32_t Alignment)
+{
+ void* Result = TryMallocZeroed(Size, Alignment);
+
+ if (Result == nullptr && Size)
+ {
+ OutOfMemory(Size, Alignment);
+ }
+
+ return Result;
+}
+void*
+FMallocRpmalloc::TryMallocZeroed(size_t Size, uint32_t Alignment)
+{
+ void* NewPtr = nullptr;
+
+ if (Alignment != DEFAULT_ALIGNMENT)
+ {
+ Alignment = Max(uint32_t(Size >= 16 ? 16 : 8), Alignment);
+ NewPtr = rpaligned_zalloc(Alignment, Size);
+ }
+ else
+ {
+ NewPtr = rpaligned_zalloc(uint32_t(Size >= 16 ? 16 : 8), Size);
+ }
+
+ return NewPtr;
+}
+
+bool
+FMallocRpmalloc::GetAllocationSize(void* Original, size_t& SizeOut)
+{
+ // this is not the same as the allocation size - is that ok?
+ SizeOut = rpmalloc_usable_size(Original);
+ return true;
+}
+void
+FMallocRpmalloc::Trim(bool bTrimThreadCaches)
+{
+ ZEN_UNUSED(bTrimThreadCaches);
+}
+} // namespace zen
+#endif
diff --git a/src/zencore/memory/mallocstomp.cpp b/src/zencore/memory/mallocstomp.cpp
new file mode 100644
index 000000000..db9e1535e
--- /dev/null
+++ b/src/zencore/memory/mallocstomp.cpp
@@ -0,0 +1,283 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zencore/memory/mallocstomp.h>
+
+#if ZEN_WITH_MALLOC_STOMP
+
+# include <zencore/memory/align.h>
+# include <zencore/xxhash.h>
+
+# if ZEN_PLATFORM_LINUX
+# include <sys/mman.h>
+# endif
+
+# if ZEN_PLATFORM_WINDOWS
+# include <zencore/windows.h>
+# endif
+
+# if ZEN_PLATFORM_WINDOWS
+// MallocStomp can keep virtual address range reserved after memory block is freed, while releasing the physical memory.
+// This dramatically increases accuracy of use-after-free detection, but consumes significant amount of memory for the OS page table.
+// Virtual memory limit for a process on Win10 is 128 TB, which means we can afford to keep virtual memory reserved for a very long time.
+// Running Infiltrator demo consumes ~700MB of virtual address space per second.
+# define MALLOC_STOMP_KEEP_VIRTUAL_MEMORY 1
+# else
+# define MALLOC_STOMP_KEEP_VIRTUAL_MEMORY 0
+# endif
+
+// 64-bit ABIs on x86_64 expect a 16-byte alignment
+# define STOMPALIGNMENT 16U
+
+namespace zen {
+
+struct FMallocStomp::FAllocationData
+{
+ /** Pointer to the full allocation. Needed so the OS knows what to free. */
+ void* FullAllocationPointer;
+ /** Full size of the allocation including the extra page. */
+ size_t FullSize;
+ /** Size of the allocation requested. */
+ size_t Size;
+ /** Sentinel used to check for underrun. */
+ size_t Sentinel;
+
+ /** Calculate the expected sentinel value for this allocation data. */
+ size_t CalculateSentinel() const
+ {
+ XXH3_128 Xxh = XXH3_128::HashMemory(this, offsetof(FAllocationData, Sentinel));
+
+ size_t Hash;
+ memcpy(&Hash, Xxh.Hash, sizeof(Hash));
+
+ return Hash;
+ }
+};
+
+FMallocStomp::FMallocStomp(const bool InUseUnderrunMode) : PageSize(4096 /* TODO: make dynamic */), bUseUnderrunMode(InUseUnderrunMode)
+{
+}
+
+void*
+FMallocStomp::Malloc(size_t Size, uint32_t Alignment)
+{
+ void* Result = TryMalloc(Size, Alignment);
+
+ if (Result == nullptr)
+ {
+ OutOfMemory(Size, Alignment);
+ }
+
+ return Result;
+}
+
+void*
+FMallocStomp::TryMalloc(size_t Size, uint32_t Alignment)
+{
+ if (Size == 0U)
+ {
+ Size = 1U;
+ }
+
+ Alignment = Max<uint32_t>(Alignment, STOMPALIGNMENT);
+
+ constexpr static size_t AllocationDataSize = sizeof(FAllocationData);
+
+ const size_t AlignedSize = Alignment ? ((Size + Alignment - 1) & -(int32_t)Alignment) : Size;
+ const size_t AlignmentSize = Alignment > PageSize ? Alignment - PageSize : 0;
+ const size_t AllocFullPageSize = (AlignedSize + AlignmentSize + AllocationDataSize + PageSize - 1) & ~(PageSize - 1);
+ const size_t TotalAllocationSize = AllocFullPageSize + PageSize;
+
+# if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC
+ void* FullAllocationPointer = mmap(nullptr, TotalAllocationSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+# elif ZEN_PLATFORM_WINDOWS && MALLOC_STOMP_KEEP_VIRTUAL_MEMORY
+ // Allocate virtual address space from current block using linear allocation strategy.
+ // If there is not enough space, try to allocate new block from OS. Report OOM if block allocation fails.
+ void* FullAllocationPointer = nullptr;
+
+ {
+ RwLock::ExclusiveLockScope _(Lock);
+
+ if (VirtualAddressCursor + TotalAllocationSize <= VirtualAddressMax)
+ {
+ FullAllocationPointer = (void*)(VirtualAddressCursor);
+ }
+ else
+ {
+ const size_t ReserveSize = Max(VirtualAddressBlockSize, TotalAllocationSize);
+
+ // Reserve a new block of virtual address space that will be linearly sub-allocated
+ // We intentionally don't keep track of reserved blocks, as we never need to explicitly release them.
+ FullAllocationPointer = VirtualAlloc(nullptr, ReserveSize, MEM_RESERVE, PAGE_NOACCESS);
+
+ VirtualAddressCursor = uintptr_t(FullAllocationPointer);
+ VirtualAddressMax = VirtualAddressCursor + ReserveSize;
+ }
+
+ VirtualAddressCursor += TotalAllocationSize;
+ }
+# else
+ void* FullAllocationPointer = FPlatformMemory::BinnedAllocFromOS(TotalAllocationSize);
+# endif // PLATFORM_UNIX || PLATFORM_MAC
+
+ if (!FullAllocationPointer)
+ {
+ return nullptr;
+ }
+
+ void* ReturnedPointer = nullptr;
+
+ ZEN_ASSERT_SLOW(IsAligned(FullAllocationPointer, PageSize));
+
+ if (bUseUnderrunMode)
+ {
+ ReturnedPointer = Align((uint8_t*)FullAllocationPointer + PageSize + AllocationDataSize, Alignment);
+ void* AllocDataPointerStart = static_cast<FAllocationData*>(ReturnedPointer) - 1;
+ ZEN_ASSERT_SLOW(AllocDataPointerStart >= FullAllocationPointer);
+
+# if ZEN_PLATFORM_WINDOWS && MALLOC_STOMP_KEEP_VIRTUAL_MEMORY
+ // Commit physical pages to the used range, leaving the first page unmapped.
+ void* CommittedMemory = VirtualAlloc(AllocDataPointerStart, AllocationDataSize + AlignedSize, MEM_COMMIT, PAGE_READWRITE);
+ if (!CommittedMemory)
+ {
+ // Failed to allocate and commit physical memory pages.
+ return nullptr;
+ }
+ ZEN_ASSERT(CommittedMemory == AlignDown(AllocDataPointerStart, PageSize));
+# else
+ // Page protect the first page, this will cause the exception in case there is an underrun.
+ FPlatformMemory::PageProtect((uint8*)AlignDown(AllocDataPointerStart, PageSize) - PageSize, PageSize, false, false);
+# endif
+ } //-V773
+ else
+ {
+ ReturnedPointer = AlignDown((uint8_t*)FullAllocationPointer + AllocFullPageSize - AlignedSize, Alignment);
+ void* ReturnedPointerEnd = (uint8_t*)ReturnedPointer + AlignedSize;
+ ZEN_ASSERT_SLOW(IsAligned(ReturnedPointerEnd, PageSize));
+
+ void* AllocDataPointerStart = static_cast<FAllocationData*>(ReturnedPointer) - 1;
+ ZEN_ASSERT_SLOW(AllocDataPointerStart >= FullAllocationPointer);
+
+# if ZEN_PLATFORM_WINDOWS && MALLOC_STOMP_KEEP_VIRTUAL_MEMORY
+ // Commit physical pages to the used range, leaving the last page unmapped.
+ void* CommitPointerStart = AlignDown(AllocDataPointerStart, PageSize);
+ void* CommittedMemory = VirtualAlloc(CommitPointerStart,
+ size_t((uint8_t*)ReturnedPointerEnd - (uint8_t*)CommitPointerStart),
+ MEM_COMMIT,
+ PAGE_READWRITE);
+ if (!CommittedMemory)
+ {
+ // Failed to allocate and commit physical memory pages.
+ return nullptr;
+ }
+ ZEN_ASSERT(CommittedMemory == CommitPointerStart);
+# else
+ // Page protect the last page, this will cause the exception in case there is an overrun.
+ FPlatformMemory::PageProtect(ReturnedPointerEnd, PageSize, false, false);
+# endif
+ } //-V773
+
+ ZEN_ASSERT_SLOW(IsAligned(FullAllocationPointer, PageSize));
+ ZEN_ASSERT_SLOW(IsAligned(TotalAllocationSize, PageSize));
+ ZEN_ASSERT_SLOW(IsAligned(ReturnedPointer, Alignment));
+ ZEN_ASSERT_SLOW((uint8_t*)ReturnedPointer + AlignedSize <= (uint8_t*)FullAllocationPointer + TotalAllocationSize);
+
+ FAllocationData& AllocationData = static_cast<FAllocationData*>(ReturnedPointer)[-1];
+ AllocationData = {FullAllocationPointer, TotalAllocationSize, AlignedSize, 0};
+ AllocationData.Sentinel = AllocationData.CalculateSentinel();
+
+ return ReturnedPointer;
+}
+
+void*
+FMallocStomp::Realloc(void* InPtr, size_t NewSize, uint32_t Alignment)
+{
+ void* Result = TryRealloc(InPtr, NewSize, Alignment);
+
+ if (Result == nullptr && NewSize)
+ {
+ OutOfMemory(NewSize, Alignment);
+ }
+
+ return Result;
+}
+
+void*
+FMallocStomp::TryRealloc(void* InPtr, size_t NewSize, uint32_t Alignment)
+{
+ if (NewSize == 0U)
+ {
+ Free(InPtr);
+ return nullptr;
+ }
+
+ void* ReturnPtr = nullptr;
+
+ if (InPtr != nullptr)
+ {
+ ReturnPtr = TryMalloc(NewSize, Alignment);
+
+ if (ReturnPtr != nullptr)
+ {
+ FAllocationData* AllocDataPtr = reinterpret_cast<FAllocationData*>(reinterpret_cast<uint8_t*>(InPtr) - sizeof(FAllocationData));
+ memcpy(ReturnPtr, InPtr, Min(AllocDataPtr->Size, NewSize));
+ Free(InPtr);
+ }
+ }
+ else
+ {
+ ReturnPtr = TryMalloc(NewSize, Alignment);
+ }
+
+ return ReturnPtr;
+}
+
+void
+FMallocStomp::Free(void* InPtr)
+{
+ if (InPtr == nullptr)
+ {
+ return;
+ }
+
+ FAllocationData* AllocDataPtr = reinterpret_cast<FAllocationData*>(InPtr);
+ AllocDataPtr--;
+
+ // Check the sentinel to verify that the allocation data is intact.
+ if (AllocDataPtr->Sentinel != AllocDataPtr->CalculateSentinel())
+ {
+ // There was a memory underrun related to this allocation.
+ ZEN_DEBUG_BREAK();
+ }
+
+# if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC
+ munmap(AllocDataPtr->FullAllocationPointer, AllocDataPtr->FullSize);
+# elif ZEN_PLATFORM_WINDOWS && MALLOC_STOMP_KEEP_VIRTUAL_MEMORY
+ // Unmap physical memory, but keep virtual address range reserved to catch use-after-free errors.
+
+ VirtualFree(AllocDataPtr->FullAllocationPointer, AllocDataPtr->FullSize, MEM_DECOMMIT);
+
+# else
+ FPlatformMemory::BinnedFreeToOS(AllocDataPtr->FullAllocationPointer, AllocDataPtr->FullSize);
+# endif // PLATFORM_UNIX || PLATFORM_MAC
+}
+
+bool
+FMallocStomp::GetAllocationSize(void* Original, size_t& SizeOut)
+{
+ if (Original == nullptr)
+ {
+ SizeOut = 0U;
+ }
+ else
+ {
+ FAllocationData* AllocDataPtr = reinterpret_cast<FAllocationData*>(Original);
+ AllocDataPtr--;
+ SizeOut = AllocDataPtr->Size;
+ }
+
+ return true;
+}
+
+} // namespace zen
+
+#endif // WITH_MALLOC_STOMP
diff --git a/src/zencore/memory/memory.cpp b/src/zencore/memory/memory.cpp
new file mode 100644
index 000000000..f236796ad
--- /dev/null
+++ b/src/zencore/memory/memory.cpp
@@ -0,0 +1,281 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zencore/commandline.h>
+#include <zencore/memory/fmalloc.h>
+#include <zencore/memory/mallocansi.h>
+#include <zencore/memory/mallocmimalloc.h>
+#include <zencore/memory/mallocrpmalloc.h>
+#include <zencore/memory/mallocstomp.h>
+#include <zencore/memory/memory.h>
+#include <zencore/memory/memorytrace.h>
+#include <zencore/string.h>
+
+#if ZEN_PLATFORM_WINDOWS
+# include <zencore/windows.h>
+ZEN_THIRD_PARTY_INCLUDES_START
+# include <shellapi.h> // For command line parsing
+ZEN_THIRD_PARTY_INCLUDES_END
+#endif
+
+#if ZEN_PLATFORM_LINUX
+# include <stdio.h>
+#endif
+
+namespace zen {
+
+enum class MallocImpl
+{
+ None = 0,
+ Ansi,
+ Stomp,
+ Mimalloc,
+ Rpmalloc
+};
+
+static int
+InitGMalloc()
+{
+ MallocImpl Malloc = MallocImpl::None;
+ FMalloc* InitMalloc = GMalloc;
+
+ // Pick a default base allocator based on availability/platform
+
+#if ZEN_MIMALLOC_ENABLED
+ if (Malloc == MallocImpl::None)
+ {
+ Malloc = MallocImpl::Mimalloc;
+ }
+#endif
+
+#if ZEN_RPMALLOC_ENABLED
+ if (Malloc == MallocImpl::None)
+ {
+ Malloc = MallocImpl::Rpmalloc;
+ }
+#endif
+
+ // Process any command line overrides
+ //
+ // Note that calls can come into this function before we enter the regular main function
+ // and we can therefore not rely on the regular command line parsing for the application
+
+ using namespace std::literals;
+
+ auto ProcessMallocArg = [&](const std::string_view& Arg) {
+#if ZEN_RPMALLOC_ENABLED
+ if (Arg == "rpmalloc"sv)
+ {
+ Malloc = MallocImpl::Rpmalloc;
+ }
+#endif
+
+#if ZEN_MIMALLOC_ENABLED
+ if (Arg == "mimalloc"sv)
+ {
+ Malloc = MallocImpl::Mimalloc;
+ }
+#endif
+
+ if (Arg == "ansi"sv)
+ {
+ Malloc = MallocImpl::Ansi;
+ }
+
+ if (Arg == "stomp"sv)
+ {
+ Malloc = MallocImpl::Stomp;
+ }
+ };
+
+ constexpr std::string_view MallocOption = "--malloc="sv;
+
+ std::function<void(const std::string_view&)> ProcessArg = [&](const std::string_view& Arg) {
+ if (Arg.starts_with(MallocOption))
+ {
+ const std::string_view OptionArgs = Arg.substr(MallocOption.size());
+
+ IterateCommaSeparatedValue(OptionArgs, ProcessMallocArg);
+ }
+ };
+
+ IterateCommandlineArgs(ProcessArg);
+
+ switch (Malloc)
+ {
+#if ZEN_WITH_MALLOC_STOMP
+ case MallocImpl::Stomp:
+ GMalloc = new FMallocStomp();
+ break;
+#endif
+
+#if ZEN_RPMALLOC_ENABLED
+ case MallocImpl::Rpmalloc:
+ GMalloc = new FMallocRpmalloc();
+ break;
+#endif
+
+#if ZEN_MIMALLOC_ENABLED
+ case MallocImpl::Mimalloc:
+ GMalloc = new FMallocMimalloc();
+ break;
+#endif
+ default:
+ break;
+ }
+
+ if (GMalloc == InitMalloc)
+ {
+ GMalloc = new FMallocAnsi();
+ }
+
+ return 1;
+}
+
+void
+Memory::GCreateMalloc()
+{
+ static int InitFlag = InitGMalloc();
+}
+
+void
+Memory::Initialize()
+{
+ GCreateMalloc();
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+void*
+Memory::SystemMalloc(size_t Size)
+{
+ void* Ptr = ::malloc(Size);
+ MemoryTrace_Alloc(uint64_t(Ptr), Size, 0, EMemoryTraceRootHeap::SystemMemory);
+ return Ptr;
+}
+
+void
+Memory::SystemFree(void* Ptr)
+{
+ MemoryTrace_Free(uint64_t(Ptr), EMemoryTraceRootHeap::SystemMemory);
+ ::free(Ptr);
+}
+
+} // namespace zen
+
+//////////////////////////////////////////////////////////////////////////
+
+static ZEN_NOINLINE bool
+InvokeNewHandler(bool NoThrow)
+{
+ std::new_handler h = std::get_new_handler();
+
+ if (!h)
+ {
+#if defined(_CPPUNWIND) || defined(__cpp_exceptions)
+ if (NoThrow == false)
+ throw std::bad_alloc();
+#else
+ ZEN_UNUSED(NoThrow);
+#endif
+ return false;
+ }
+ else
+ {
+ h();
+ return true;
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+ZEN_NOINLINE void*
+RetryNew(size_t Size, bool NoThrow)
+{
+ void* Ptr = nullptr;
+ while (!Ptr && InvokeNewHandler(NoThrow))
+ {
+ Ptr = zen::Memory::Malloc(Size, zen::DEFAULT_ALIGNMENT);
+ }
+ return Ptr;
+}
+
+void*
+zen_new(size_t Size)
+{
+ void* Ptr = zen::Memory::Malloc(Size, zen::DEFAULT_ALIGNMENT);
+
+ if (!Ptr) [[unlikely]]
+ {
+ const bool NoThrow = false;
+ return RetryNew(Size, NoThrow);
+ }
+
+ return Ptr;
+}
+
+void*
+zen_new_nothrow(size_t Size) noexcept
+{
+ void* Ptr = zen::Memory::Malloc(Size, zen::DEFAULT_ALIGNMENT);
+
+ if (!Ptr) [[unlikely]]
+ {
+ const bool NoThrow = true;
+ return RetryNew(Size, NoThrow);
+ }
+
+ return Ptr;
+}
+
+void*
+zen_new_aligned(size_t Size, size_t Alignment)
+{
+ void* Ptr;
+
+ do
+ {
+ Ptr = zen::Memory::Malloc(Size, uint32_t(Alignment));
+ } while (!Ptr && InvokeNewHandler(/* NoThrow */ false));
+
+ return Ptr;
+}
+
+void*
+zen_new_aligned_nothrow(size_t Size, size_t Alignment) noexcept
+{
+ void* Ptr;
+
+ do
+ {
+ Ptr = zen::Memory::Malloc(Size, uint32_t(Alignment));
+ } while (!Ptr && InvokeNewHandler(/* NoThrow */ true));
+
+ return Ptr;
+}
+
+void
+zen_free(void* Ptr) noexcept
+{
+ zen::Memory::Free(Ptr);
+}
+
+void
+zen_free_size(void* Ptr, size_t Size) noexcept
+{
+ ZEN_UNUSED(Size);
+ zen::Memory::Free(Ptr);
+}
+
+void
+zen_free_size_aligned(void* Ptr, size_t Size, size_t Alignment) noexcept
+{
+ ZEN_UNUSED(Size, Alignment);
+ zen::Memory::Free(Ptr);
+}
+
+void
+zen_free_aligned(void* Ptr, size_t Alignment) noexcept
+{
+ ZEN_UNUSED(Alignment);
+ zen::Memory::Free(Ptr);
+}
diff --git a/src/zencore/memtrack/callstacktrace.cpp b/src/zencore/memtrack/callstacktrace.cpp
new file mode 100644
index 000000000..d860c05d1
--- /dev/null
+++ b/src/zencore/memtrack/callstacktrace.cpp
@@ -0,0 +1,1059 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include "callstacktrace.h"
+
+#include <zenbase/zenbase.h>
+#include <zencore/string.h>
+
+#if UE_CALLSTACK_TRACE_ENABLED
+
+namespace zen {
+
+// Platform implementations of back tracing
+////////////////////////////////////////////////////////////////////////////////
+void CallstackTrace_CreateInternal(FMalloc*);
+void CallstackTrace_InitializeInternal();
+
+////////////////////////////////////////////////////////////////////////////////
+UE_TRACE_CHANNEL_DEFINE(CallstackChannel)
+UE_TRACE_EVENT_DEFINE(Memory, CallstackSpec)
+
+uint32 GCallStackTracingTlsSlotIndex = FPlatformTLS::InvalidTlsSlot;
+
+////////////////////////////////////////////////////////////////////////////////
+void
+CallstackTrace_Create(class FMalloc* InMalloc)
+{
+ static auto InitOnce = [&] {
+ CallstackTrace_CreateInternal(InMalloc);
+ return true;
+ }();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+CallstackTrace_Initialize()
+{
+ GCallStackTracingTlsSlotIndex = FPlatformTLS::AllocTlsSlot();
+
+ static auto InitOnce = [&] {
+ CallstackTrace_InitializeInternal();
+ return true;
+ }();
+}
+
+} // namespace zen
+
+#endif
+
+#if ZEN_PLATFORM_WINDOWS
+# include "moduletrace.h"
+
+# include "growonlylockfreehash.h"
+
+# include <zencore/scopeguard.h>
+# include <zencore/thread.h>
+# include <zencore/trace.h>
+
+# include <atomic>
+# include <span>
+
+# include <zencore/windows.h>
+
+ZEN_THIRD_PARTY_INCLUDES_START
+# include <winnt.h>
+# include <winternl.h>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+# ifndef UE_CALLSTACK_TRACE_FULL_CALLSTACKS
+# define UE_CALLSTACK_TRACE_FULL_CALLSTACKS 0
+# endif
+
+// 0=off, 1=stats, 2=validation, 3=truth_compare
+# define BACKTRACE_DBGLVL 0
+
+# define BACKTRACE_LOCK_FREE (1 && (BACKTRACE_DBGLVL == 0))
+
+static bool GModulesAreInitialized = false;
+
+// This implementation is using unwind tables which is results in very fast
+// stack walking. In some cases this is not suitable, and we then fall back
+// to the standard stack walking implementation.
+# if !defined(UE_CALLSTACK_TRACE_USE_UNWIND_TABLES)
+# if defined(__clang__)
+# define UE_CALLSTACK_TRACE_USE_UNWIND_TABLES 0
+# else
+# define UE_CALLSTACK_TRACE_USE_UNWIND_TABLES 1
+# endif
+# endif
+
+// stacktrace tracking using clang intrinsic __builtin_frame_address(0) doesn't work correctly on all windows platforms
+# if !defined(PLATFORM_USE_CALLSTACK_ADDRESS_POINTER)
+# if defined(__clang__)
+# define PLATFORM_USE_CALLSTACK_ADDRESS_POINTER 0
+# else
+# define PLATFORM_USE_CALLSTACK_ADDRESS_POINTER 1
+# endif
+# endif
+
+# if !defined(UE_CALLSTACK_TRACE_RESERVE_MB)
+// Initial size of the known set of callstacks
+# define UE_CALLSTACK_TRACE_RESERVE_MB 8 // ~500k callstacks
+# endif
+
+# if !defined(UE_CALLSTACK_TRACE_RESERVE_GROWABLE)
+// If disabled the known set will not grow. New callstacks will not be
+// reported if the set is full
+# define UE_CALLSTACK_TRACE_RESERVE_GROWABLE 1
+# endif
+
+namespace zen {
+
+class FMalloc;
+
+UE_TRACE_CHANNEL_EXTERN(CallstackChannel)
+
+UE_TRACE_EVENT_BEGIN_EXTERN(Memory, CallstackSpec, NoSync)
+ UE_TRACE_EVENT_FIELD(uint32, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint64[], Frames)
+UE_TRACE_EVENT_END()
+
+class FCallstackTracer
+{
+public:
+ struct FBacktraceEntry
+ {
+ uint64_t Hash = 0;
+ uint32_t FrameCount = 0;
+ uint64_t* Frames;
+ };
+
+ FCallstackTracer(FMalloc* InMalloc) : KnownSet(InMalloc) {}
+
+ uint32_t AddCallstack(const FBacktraceEntry& Entry)
+ {
+ bool bAlreadyAdded = false;
+
+ // Our set implementation doesn't allow for zero entries (zero represents an empty element
+ // in the hash table), so if we get one due to really bad luck in our 64-bit Id calculation,
+ // treat it as a "1" instead, for purposes of tracking if we've seen that callstack.
+ const uint64_t Hash = FMath::Max(Entry.Hash, 1ull);
+ uint32_t Id;
+ KnownSet.Find(Hash, &Id, &bAlreadyAdded);
+ if (!bAlreadyAdded)
+ {
+ Id = CallstackIdCounter.fetch_add(1, std::memory_order_relaxed);
+ // On the first callstack reserve memory up front
+ if (Id == 1)
+ {
+ KnownSet.Reserve(InitialReserveCount);
+ }
+# if !UE_CALLSTACK_TRACE_RESERVE_GROWABLE
+ // If configured as not growable, start returning unknown id's when full.
+ if (Id >= InitialReserveCount)
+ {
+ return 0;
+ }
+# endif
+ KnownSet.Emplace(Hash, Id);
+ UE_TRACE_LOG(Memory, CallstackSpec, CallstackChannel)
+ << CallstackSpec.CallstackId(Id) << CallstackSpec.Frames(Entry.Frames, Entry.FrameCount);
+ }
+
+ return Id;
+ }
+
+private:
+ struct FEncounteredCallstackSetEntry
+ {
+ std::atomic_uint64_t Key;
+ std::atomic_uint32_t Value;
+
+ inline uint64 GetKey() const { return Key.load(std::memory_order_relaxed); }
+ inline uint32_t GetValue() const { return Value.load(std::memory_order_relaxed); }
+ inline bool IsEmpty() const { return Key.load(std::memory_order_relaxed) == 0; }
+ inline void SetKeyValue(uint64_t InKey, uint32_t InValue)
+ {
+ Value.store(InValue, std::memory_order_release);
+ Key.store(InKey, std::memory_order_relaxed);
+ }
+ static inline uint32_t KeyHash(uint64_t Key) { return static_cast<uint32_t>(Key); }
+ static inline void ClearEntries(FEncounteredCallstackSetEntry* Entries, int32_t EntryCount)
+ {
+ memset(Entries, 0, EntryCount * sizeof(FEncounteredCallstackSetEntry));
+ }
+ };
+
+ typedef TGrowOnlyLockFreeHash<FEncounteredCallstackSetEntry, uint64_t, uint32_t> FEncounteredCallstackSet;
+
+ constexpr static uint32_t InitialReserveBytes = UE_CALLSTACK_TRACE_RESERVE_MB * 1024 * 1024;
+ constexpr static uint32_t InitialReserveCount = InitialReserveBytes / sizeof(FEncounteredCallstackSetEntry);
+
+ FEncounteredCallstackSet KnownSet;
+ std::atomic_uint32_t CallstackIdCounter{1}; // 0 is reserved for "unknown callstack"
+};
+
+# if UE_CALLSTACK_TRACE_USE_UNWIND_TABLES
+
+/*
+ * Windows' x64 binaries contain a ".pdata" section that describes the location
+ * and size of its functions and details on how to unwind them. The unwind
+ * information includes descriptions about a function's stack frame size and
+ * the non-volatile registers it pushes onto the stack. From this we can
+ * calculate where a call instruction wrote its return address. This is enough
+ * to walk the callstack and by caching this information it can be done
+ * efficiently.
+ *
+ * Some functions need a variable amount of stack (such as those that use
+ * alloc() for example) will use a frame pointer. Frame pointers involve saving
+ * and restoring the stack pointer in the function's prologue/epilogue. This
+ * frees the function up to modify the stack pointer arbitrarily. This
+ * significantly complicates establishing where a return address is, so this
+ * pdata scheme of walking the stack just doesn't support functions like this.
+ * Walking stops if it encounters such a function. Fortunately there are
+ * usually very few such functions, saving us from having to read and track
+ * non-volatile registers which adds a significant amount of work.
+ *
+ * A further optimisation is to to assume we are only interested methods that
+ * are part of engine or game code. As such we only build lookup tables for
+ * such modules and never accept OS or third party modules. Backtracing stops
+ * if an address is encountered which doesn't map to a known module.
+ */
+
+////////////////////////////////////////////////////////////////////////////////
+static uint32_t
+AddressToId(uintptr_t Address)
+{
+ return uint32_t(Address >> 16);
+}
+
+static uintptr_t
+IdToAddress(uint32_t Id)
+{
+ return static_cast<uint32_t>(uintptr_t(Id) << 16);
+}
+
+struct FIdPredicate
+{
+ template<class T>
+ bool operator()(uint32_t Id, const T& Item) const
+ {
+ return Id < Item.Id;
+ }
+ template<class T>
+ bool operator()(const T& Item, uint32_t Id) const
+ {
+ return Item.Id < Id;
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+struct FUnwindInfo
+{
+ uint8_t Version : 3;
+ uint8_t Flags : 5;
+ uint8_t PrologBytes;
+ uint8_t NumUnwindCodes;
+ uint8_t FrameReg : 4;
+ uint8_t FrameRspBias : 4;
+};
+
+# pragma warning(push)
+# pragma warning(disable : 4200)
+struct FUnwindCode
+{
+ uint8_t PrologOffset;
+ uint8_t OpCode : 4;
+ uint8_t OpInfo : 4;
+ uint16_t Params[];
+};
+# pragma warning(pop)
+
+enum
+{
+ UWOP_PUSH_NONVOL = 0, // 1 node
+ UWOP_ALLOC_LARGE = 1, // 2 or 3 nodes
+ UWOP_ALLOC_SMALL = 2, // 1 node
+ UWOP_SET_FPREG = 3, // 1 node
+ UWOP_SAVE_NONVOL = 4, // 2 nodes
+ UWOP_SAVE_NONVOL_FAR = 5, // 3 nodes
+ UWOP_SAVE_XMM128 = 8, // 2 nodes
+ UWOP_SAVE_XMM128_FAR = 9, // 3 nodes
+ UWOP_PUSH_MACHFRAME = 10, // 1 node
+};
+
+////////////////////////////////////////////////////////////////////////////////
+class FBacktracer
+{
+public:
+ FBacktracer(FMalloc* InMalloc);
+ ~FBacktracer();
+ static FBacktracer* Get();
+ void AddModule(uintptr_t Base, const char16_t* Name);
+ void RemoveModule(uintptr_t Base);
+ uint32_t GetBacktraceId(void* AddressOfReturnAddress);
+
+private:
+ struct FFunction
+ {
+ uint32_t Id;
+ int32_t RspBias;
+# if BACKTRACE_DBGLVL >= 2
+ uint32_t Size;
+ const FUnwindInfo* UnwindInfo;
+# endif
+ };
+
+ struct FModule
+ {
+ uint32_t Id;
+ uint32_t IdSize;
+ uint32_t NumFunctions;
+# if BACKTRACE_DBGLVL >= 1
+ uint16 NumFpTypes;
+ // uint16 *padding*
+# else
+ // uint32_t *padding*
+# endif
+ FFunction* Functions;
+ };
+
+ struct FLookupState
+ {
+ FModule Module;
+ };
+
+ struct FFunctionLookupSetEntry
+ {
+ // Bottom 48 bits are key (pointer), top 16 bits are data (RSP bias for function)
+ std::atomic_uint64_t Data;
+
+ inline uint64_t GetKey() const { return Data.load(std::memory_order_relaxed) & 0xffffffffffffull; }
+ inline int32_t GetValue() const { return static_cast<int64_t>(Data.load(std::memory_order_relaxed)) >> 48; }
+ inline bool IsEmpty() const { return Data.load(std::memory_order_relaxed) == 0; }
+ inline void SetKeyValue(uint64_t Key, int32_t Value)
+ {
+ Data.store(Key | (static_cast<int64_t>(Value) << 48), std::memory_order_relaxed);
+ }
+ static inline uint32_t KeyHash(uint64_t Key)
+ {
+ // 64 bit pointer to 32 bit hash
+ Key = (~Key) + (Key << 21);
+ Key = Key ^ (Key >> 24);
+ Key = Key * 265;
+ Key = Key ^ (Key >> 14);
+ Key = Key * 21;
+ Key = Key ^ (Key >> 28);
+ Key = Key + (Key << 31);
+ return static_cast<uint32_t>(Key);
+ }
+ static void ClearEntries(FFunctionLookupSetEntry* Entries, int32_t EntryCount)
+ {
+ memset(Entries, 0, EntryCount * sizeof(FFunctionLookupSetEntry));
+ }
+ };
+ typedef TGrowOnlyLockFreeHash<FFunctionLookupSetEntry, uint64_t, int32_t> FFunctionLookupSet;
+
+ const FFunction* LookupFunction(uintptr_t Address, FLookupState& State) const;
+ static FBacktracer* Instance;
+ mutable zen::RwLock Lock;
+ FModule* Modules;
+ int32_t ModulesNum;
+ int32_t ModulesCapacity;
+ FMalloc* Malloc;
+ FCallstackTracer CallstackTracer;
+# if BACKTRACE_LOCK_FREE
+ mutable FFunctionLookupSet FunctionLookups;
+ mutable bool bReentranceCheck = false;
+# endif
+# if BACKTRACE_DBGLVL >= 1
+ mutable uint32_t NumFpTruncations = 0;
+ mutable uint32_t TotalFunctions = 0;
+# endif
+};
+
+////////////////////////////////////////////////////////////////////////////////
+FBacktracer* FBacktracer::Instance = nullptr;
+
+////////////////////////////////////////////////////////////////////////////////
+FBacktracer::FBacktracer(FMalloc* InMalloc)
+: Malloc(InMalloc)
+, CallstackTracer(InMalloc)
+# if BACKTRACE_LOCK_FREE
+, FunctionLookups(InMalloc)
+# endif
+{
+# if BACKTRACE_LOCK_FREE
+ FunctionLookups.Reserve(512 * 1024); // 4 MB
+# endif
+ ModulesCapacity = 8;
+ ModulesNum = 0;
+ Modules = (FModule*)Malloc->Malloc(sizeof(FModule) * ModulesCapacity);
+
+ Instance = this;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FBacktracer::~FBacktracer()
+{
+ std::span<FModule> ModulesView(Modules, ModulesNum);
+ for (FModule& Module : ModulesView)
+ {
+ Malloc->Free(Module.Functions);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FBacktracer*
+FBacktracer::Get()
+{
+ return Instance;
+}
+
+bool GFullBacktraces = false;
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FBacktracer::AddModule(uintptr_t ModuleBase, const char16_t* Name)
+{
+ if (!GFullBacktraces)
+ {
+ const size_t NameLen = StringLength(Name);
+ if (!(NameLen > 4 && StringEquals(Name + NameLen - 4, u".exe")))
+ {
+ return;
+ }
+ }
+
+ const auto* DosHeader = (IMAGE_DOS_HEADER*)ModuleBase;
+ const auto* NtHeader = (IMAGE_NT_HEADERS*)(ModuleBase + DosHeader->e_lfanew);
+ const IMAGE_FILE_HEADER* FileHeader = &(NtHeader->FileHeader);
+
+ uint32_t NumSections = FileHeader->NumberOfSections;
+ const auto* Sections = (IMAGE_SECTION_HEADER*)(uintptr_t(&(NtHeader->OptionalHeader)) + FileHeader->SizeOfOptionalHeader);
+
+ // Find ".pdata" section
+ uintptr_t PdataBase = 0;
+ uintptr_t PdataEnd = 0;
+ for (uint32_t i = 0; i < NumSections; ++i)
+ {
+ const IMAGE_SECTION_HEADER* Section = Sections + i;
+ if (*(uint64_t*)(Section->Name) ==
+ 0x61'74'61'64'70'2eull) // Sections names are eight bytes and zero padded. This constant is '.pdata'
+ {
+ PdataBase = ModuleBase + Section->VirtualAddress;
+ PdataEnd = PdataBase + Section->SizeOfRawData;
+ break;
+ }
+ }
+
+ if (PdataBase == 0)
+ {
+ return;
+ }
+
+ // Count the number of functions. The assumption here is that if we have got this far then there is at least one function
+ uint32_t NumFunctions = uint32_t(PdataEnd - PdataBase) / sizeof(RUNTIME_FUNCTION);
+ if (NumFunctions == 0)
+ {
+ return;
+ }
+
+ const auto* FunctionTables = (RUNTIME_FUNCTION*)PdataBase;
+ do
+ {
+ const RUNTIME_FUNCTION* Function = FunctionTables + NumFunctions - 1;
+ if (uint32_t(Function->BeginAddress) < uint32_t(Function->EndAddress))
+ {
+ break;
+ }
+
+ --NumFunctions;
+ } while (NumFunctions != 0);
+
+ // Allocate some space for the module's function-to-frame-size table
+ auto* OutTable = (FFunction*)Malloc->Malloc(sizeof(FFunction) * NumFunctions);
+ FFunction* OutTableCursor = OutTable;
+
+ // Extract frame size for each function from pdata's unwind codes.
+ uint32_t NumFpFuncs = 0;
+ for (uint32_t i = 0; i < NumFunctions; ++i)
+ {
+ const RUNTIME_FUNCTION* FunctionTable = FunctionTables + i;
+
+ uintptr_t UnwindInfoAddr = ModuleBase + FunctionTable->UnwindInfoAddress;
+ const auto* UnwindInfo = (FUnwindInfo*)UnwindInfoAddr;
+
+ if (UnwindInfo->Version != 1)
+ {
+ /* some v2s have been seen in msvc. Always seem to be assembly
+ * routines (memset, memcpy, etc) */
+ continue;
+ }
+
+ int32_t FpInfo = 0;
+ int32_t RspBias = 0;
+
+# if BACKTRACE_DBGLVL >= 2
+ uint32_t PrologVerify = UnwindInfo->PrologBytes;
+# endif
+
+ const auto* Code = (FUnwindCode*)(UnwindInfo + 1);
+ const auto* EndCode = Code + UnwindInfo->NumUnwindCodes;
+ while (Code < EndCode)
+ {
+# if BACKTRACE_DBGLVL >= 2
+ if (Code->PrologOffset > PrologVerify)
+ {
+ PLATFORM_BREAK();
+ }
+ PrologVerify = Code->PrologOffset;
+# endif
+
+ switch (Code->OpCode)
+ {
+ case UWOP_PUSH_NONVOL:
+ RspBias += 8;
+ Code += 1;
+ break;
+
+ case UWOP_ALLOC_LARGE:
+ if (Code->OpInfo)
+ {
+ RspBias += *(uint32_t*)(Code->Params);
+ Code += 3;
+ }
+ else
+ {
+ RspBias += Code->Params[0] * 8;
+ Code += 2;
+ }
+ break;
+
+ case UWOP_ALLOC_SMALL:
+ RspBias += (Code->OpInfo * 8) + 8;
+ Code += 1;
+ break;
+
+ case UWOP_SET_FPREG:
+ // Function will adjust RSP (e.g. through use of alloca()) so it
+ // uses a frame pointer register. There's instructions like;
+ //
+ // push FRAME_REG
+ // lea FRAME_REG, [rsp + (FRAME_RSP_BIAS * 16)]
+ // ...
+ // add rsp, rax
+ // ...
+ // sub rsp, FRAME_RSP_BIAS * 16
+ // pop FRAME_REG
+ // ret
+ //
+ // To recover the stack frame we would need to track non-volatile
+ // registers which adds a lot of overhead for a small subset of
+ // functions. Instead we'll end backtraces at these functions.
+
+ // MSB is set to detect variable sized frames that we can't proceed
+ // past when back-tracing.
+ NumFpFuncs++;
+ FpInfo |= 0x80000000 | (uint32_t(UnwindInfo->FrameReg) << 27) | (uint32_t(UnwindInfo->FrameRspBias) << 23);
+ Code += 1;
+ break;
+
+ case UWOP_PUSH_MACHFRAME:
+ RspBias = Code->OpInfo ? 48 : 40;
+ Code += 1;
+ break;
+
+ case UWOP_SAVE_NONVOL:
+ Code += 2;
+ break; /* saves are movs instead of pushes */
+ case UWOP_SAVE_NONVOL_FAR:
+ Code += 3;
+ break;
+ case UWOP_SAVE_XMM128:
+ Code += 2;
+ break;
+ case UWOP_SAVE_XMM128_FAR:
+ Code += 3;
+ break;
+
+ default:
+# if BACKTRACE_DBGLVL >= 2
+ PLATFORM_BREAK();
+# endif
+ break;
+ }
+ }
+
+ // "Chained" simply means that multiple RUNTIME_FUNCTIONs pertains to a
+ // single actual function in the .text segment.
+ bool bIsChained = (UnwindInfo->Flags & UNW_FLAG_CHAININFO);
+
+ RspBias /= sizeof(void*); // stack push/popds in units of one machine word
+ RspBias += !bIsChained; // and one extra push for the ret address
+ RspBias |= FpInfo; // pack in details about possible frame pointer
+
+ if (bIsChained)
+ {
+ OutTableCursor[-1].RspBias += RspBias;
+# if BACKTRACE_DBGLVL >= 2
+ OutTableCursor[-1].Size += (FunctionTable->EndAddress - FunctionTable->BeginAddress);
+# endif
+ }
+ else
+ {
+ *OutTableCursor = {
+ FunctionTable->BeginAddress,
+ RspBias,
+# if BACKTRACE_DBGLVL >= 2
+ FunctionTable->EndAddress - FunctionTable->BeginAddress,
+ UnwindInfo,
+# endif
+ };
+
+ ++OutTableCursor;
+ }
+ }
+
+ uintptr_t ModuleSize = NtHeader->OptionalHeader.SizeOfImage;
+ ModuleSize += 0xffff; // to align up to next 64K page. it'll get shifted by AddressToId()
+
+ FModule Module = {
+ AddressToId(ModuleBase),
+ AddressToId(ModuleSize),
+ uint32_t(uintptr_t(OutTableCursor - OutTable)),
+# if BACKTRACE_DBGLVL >= 1
+ uint16(NumFpFuncs),
+# endif
+ OutTable,
+ };
+
+ {
+ zen::RwLock::ExclusiveLockScope _(Lock);
+
+ if (ModulesNum + 1 > ModulesCapacity)
+ {
+ ModulesCapacity += 8;
+ Modules = (FModule*)Malloc->Realloc(Modules, sizeof(FModule) * ModulesCapacity);
+ }
+ Modules[ModulesNum++] = Module;
+
+ std::sort(Modules, Modules + ModulesNum, [](const FModule& A, const FModule& B) { return A.Id < B.Id; });
+ }
+
+# if BACKTRACE_DBGLVL >= 1
+ NumFpTruncations += NumFpFuncs;
+ TotalFunctions += NumFunctions;
+# endif
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FBacktracer::RemoveModule(uintptr_t ModuleBase)
+{
+ // When Windows' RequestExit() is called it hard-terminates all threads except
+ // the main thread and then proceeds to unload the process' DLLs. This hard
+ // thread termination can result is dangling locked locks. Not an issue as
+ // the rule is "do not do anything multithreaded in DLL load/unload". And here
+ // we are, taking write locks during DLL unload which is, quite unsurprisingly,
+ // deadlocking. In reality tracking Windows' DLL unloads doesn't tell us
+ // anything due to how DLLs and processes' address spaces work. So we will...
+# if defined PLATFORM_WINDOWS
+ ZEN_UNUSED(ModuleBase);
+
+ return;
+# else
+
+ zen::RwLock::ExclusiveLockScope _(Lock);
+
+ uint32_t ModuleId = AddressToId(ModuleBase);
+ TArrayView<FModule> ModulesView(Modules, ModulesNum);
+ int32_t Index = Algo::LowerBound(ModulesView, ModuleId, FIdPredicate());
+ if (Index >= ModulesNum)
+ {
+ return;
+ }
+
+ const FModule& Module = Modules[Index];
+ if (Module.Id != ModuleId)
+ {
+ return;
+ }
+
+# if BACKTRACE_DBGLVL >= 1
+ NumFpTruncations -= Module.NumFpTypes;
+ TotalFunctions -= Module.NumFunctions;
+# endif
+
+ // no code should be executing at this point so we can safely free the
+ // table knowing know one is looking at it.
+ Malloc->Free(Module.Functions);
+
+ for (SIZE_T i = Index; i < ModulesNum; i++)
+ {
+ Modules[i] = Modules[i + 1];
+ }
+
+ --ModulesNum;
+# endif
+}
+
+////////////////////////////////////////////////////////////////////////////////
+const FBacktracer::FFunction*
+FBacktracer::LookupFunction(uintptr_t Address, FLookupState& State) const
+{
+ // This function caches the previous module look up. The theory here is that
+ // a series of return address in a backtrace often cluster around one module
+
+ FIdPredicate IdPredicate;
+
+ // Look up the module that Address belongs to.
+ uint32_t AddressId = AddressToId(Address);
+ if ((AddressId - State.Module.Id) >= State.Module.IdSize)
+ {
+ auto FindIt = std::upper_bound(Modules, Modules + ModulesNum, AddressId, IdPredicate);
+
+ if (FindIt == Modules)
+ {
+ return nullptr;
+ }
+
+ State.Module = *--FindIt;
+ }
+
+ // Check that the address is within the address space of the best-found module
+ const FModule* Module = &(State.Module);
+ if ((AddressId - Module->Id) >= Module->IdSize)
+ {
+ return nullptr;
+ }
+
+ // Now we've a module we have a table of functions and their stack sizes so
+ // we can get the frame size for Address
+ uint32_t FuncId = uint32_t(Address - IdToAddress(Module->Id));
+ std::span<FFunction> FuncsView(Module->Functions, Module->NumFunctions);
+ auto FindIt = std::upper_bound(begin(FuncsView), end(FuncsView), FuncId, IdPredicate);
+ if (FindIt == begin(FuncsView))
+ {
+ return nullptr;
+ }
+
+ const FFunction* Function = &(*--FindIt);
+# if BACKTRACE_DBGLVL >= 2
+ if ((FuncId - Function->Id) >= Function->Size)
+ {
+ PLATFORM_BREAK();
+ return nullptr;
+ }
+# endif
+ return Function;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+uint32_t
+FBacktracer::GetBacktraceId(void* AddressOfReturnAddress)
+{
+ FLookupState LookupState = {};
+ uint64_t Frames[256];
+
+ uintptr_t* StackPointer = (uintptr_t*)AddressOfReturnAddress;
+
+# if BACKTRACE_DBGLVL >= 3
+ uintptr_t TruthBacktrace[1024];
+ uint32_t NumTruth = RtlCaptureStackBackTrace(0, 1024, (void**)TruthBacktrace, nullptr);
+ uintptr_t* TruthCursor = TruthBacktrace;
+ for (; *TruthCursor != *StackPointer; ++TruthCursor)
+ ;
+# endif
+
+# if BACKTRACE_DBGLVL >= 2
+ struct
+ {
+ void* Sp;
+ void* Ip;
+ const FFunction* Function;
+ } Backtrace[1024] = {};
+ uint32_t NumBacktrace = 0;
+# endif
+
+ uint64_t BacktraceHash = 0;
+ uint32_t FrameIdx = 0;
+
+# if BACKTRACE_LOCK_FREE
+ // When running lock free, we defer the lock until a lock free function lookup fails
+ bool Locked = false;
+# else
+ FScopeLock _(&Lock);
+# endif
+ do
+ {
+ uintptr_t RetAddr = *StackPointer;
+
+ Frames[FrameIdx++] = RetAddr;
+
+ // This is a simple order-dependent LCG. Should be sufficient enough
+ BacktraceHash += RetAddr;
+ BacktraceHash *= 0x30be8efa499c249dull;
+
+# if BACKTRACE_LOCK_FREE
+ int32_t RspBias;
+ bool bIsAlreadyInTable;
+ FunctionLookups.Find(RetAddr, &RspBias, &bIsAlreadyInTable);
+ if (bIsAlreadyInTable)
+ {
+ if (RspBias < 0)
+ {
+ break;
+ }
+ else
+ {
+ StackPointer += RspBias;
+ continue;
+ }
+ }
+ if (!Locked)
+ {
+ Lock.AcquireExclusive();
+ Locked = true;
+
+ // If FunctionLookups.Emplace triggers a reallocation, it can cause an infinite recursion
+ // when the allocation reenters the stack trace code. We need to break out of the recursion
+ // in that case, and let the allocation complete, with the assumption that we don't care
+ // about call stacks for internal allocations in the memory reporting system. The "Lock()"
+ // above will only fall through with this flag set if it's a second lock in the same thread.
+ if (bReentranceCheck)
+ {
+ break;
+ }
+ }
+# endif // BACKTRACE_LOCK_FREE
+
+ const FFunction* Function = LookupFunction(RetAddr, LookupState);
+ if (Function == nullptr)
+ {
+# if BACKTRACE_LOCK_FREE
+ // LookupFunction fails when modules are not yet registered. In this case, we do not want the address
+ // to be added to the lookup map, but to retry the lookup later when modules are properly registered.
+ if (GModulesAreInitialized)
+ {
+ bReentranceCheck = true;
+ auto OnExit = zen::MakeGuard([&] { bReentranceCheck = false; });
+ FunctionLookups.Emplace(RetAddr, -1);
+ }
+# endif
+ break;
+ }
+
+# if BACKTRACE_LOCK_FREE
+ {
+ // This conversion improves probing performance for the hash set. Additionally it is critical
+ // to avoid incorrect values when RspBias is compressed into 16 bits in the hash map.
+ int32_t StoreBias = Function->RspBias < 0 ? -1 : Function->RspBias;
+ bReentranceCheck = true;
+ auto OnExit = zen::MakeGuard([&] { bReentranceCheck = false; });
+ FunctionLookups.Emplace(RetAddr, StoreBias);
+ }
+# endif
+
+# if BACKTRACE_DBGLVL >= 2
+ if (NumBacktrace < 1024)
+ {
+ Backtrace[NumBacktrace++] = {
+ StackPointer,
+ (void*)RetAddr,
+ Function,
+ };
+ }
+# endif
+
+ if (Function->RspBias < 0)
+ {
+ // This is a frame with a variable-sized stack pointer. We don't
+ // track enough information to proceed.
+# if BACKTRACE_DBGLVL >= 1
+ NumFpTruncations++;
+# endif
+ break;
+ }
+
+ StackPointer += Function->RspBias;
+ }
+ // Trunkate callstacks longer than MaxStackDepth
+ while (*StackPointer && FrameIdx < ZEN_ARRAY_COUNT(Frames));
+
+ // Build the backtrace entry for submission
+ FCallstackTracer::FBacktraceEntry BacktraceEntry;
+ BacktraceEntry.Hash = BacktraceHash;
+ BacktraceEntry.FrameCount = FrameIdx;
+ BacktraceEntry.Frames = Frames;
+
+# if BACKTRACE_DBGLVL >= 3
+ for (uint32_t i = 0; i < NumBacktrace; ++i)
+ {
+ if ((void*)TruthCursor[i] != Backtrace[i].Ip)
+ {
+ PLATFORM_BREAK();
+ break;
+ }
+ }
+# endif
+
+# if BACKTRACE_LOCK_FREE
+ if (Locked)
+ {
+ Lock.ReleaseExclusive();
+ }
+# endif
+ // Add to queue to be processed. This might block until there is room in the
+ // queue (i.e. the processing thread has caught up processing).
+ return CallstackTracer.AddCallstack(BacktraceEntry);
+}
+}
+
+# else // UE_CALLSTACK_TRACE_USE_UNWIND_TABLES
+
+namespace zen {
+
+ ////////////////////////////////////////////////////////////////////////////////
+ class FBacktracer
+ {
+ public:
+ FBacktracer(FMalloc* InMalloc);
+ ~FBacktracer();
+ static FBacktracer* Get();
+ inline uint32_t GetBacktraceId(void* AddressOfReturnAddress);
+ uint32_t GetBacktraceId(uint64_t ReturnAddress);
+ void AddModule(uintptr_t Base, const char16_t* Name) {}
+ void RemoveModule(uintptr_t Base) {}
+
+ private:
+ static FBacktracer* Instance;
+ FMalloc* Malloc;
+ FCallstackTracer CallstackTracer;
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ FBacktracer* FBacktracer::Instance = nullptr;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ FBacktracer::FBacktracer(FMalloc* InMalloc) : Malloc(InMalloc), CallstackTracer(InMalloc) { Instance = this; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ FBacktracer::~FBacktracer() {}
+
+ ////////////////////////////////////////////////////////////////////////////////
+ FBacktracer* FBacktracer::Get() { return Instance; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ uint32_t FBacktracer::GetBacktraceId(void* AddressOfReturnAddress)
+ {
+ const uint64_t ReturnAddress = *(uint64_t*)AddressOfReturnAddress;
+ return GetBacktraceId(ReturnAddress);
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ uint32_t FBacktracer::GetBacktraceId(uint64_t ReturnAddress)
+ {
+# if !UE_BUILD_SHIPPING
+ uint64_t StackFrames[256];
+ int32_t NumStackFrames = FPlatformStackWalk::CaptureStackBackTrace(StackFrames, UE_ARRAY_COUNT(StackFrames));
+ if (NumStackFrames > 0)
+ {
+ FCallstackTracer::FBacktraceEntry BacktraceEntry;
+ uint64_t BacktraceId = 0;
+ uint32_t FrameIdx = 0;
+ bool bUseAddress = false;
+ for (int32_t Index = 0; Index < NumStackFrames; Index++)
+ {
+ if (!bUseAddress)
+ {
+ // start using backtrace only after ReturnAddress
+ if (StackFrames[Index] == (uint64_t)ReturnAddress)
+ {
+ bUseAddress = true;
+ }
+ }
+ if (bUseAddress || NumStackFrames == 1)
+ {
+ uint64_t RetAddr = StackFrames[Index];
+ StackFrames[FrameIdx++] = RetAddr;
+
+ // This is a simple order-dependent LCG. Should be sufficient enough
+ BacktraceId += RetAddr;
+ BacktraceId *= 0x30be8efa499c249dull;
+ }
+ }
+
+ // Save the collected id
+ BacktraceEntry.Hash = BacktraceId;
+ BacktraceEntry.FrameCount = FrameIdx;
+ BacktraceEntry.Frames = StackFrames;
+
+ // Add to queue to be processed. This might block until there is room in the
+ // queue (i.e. the processing thread has caught up processing).
+ return CallstackTracer.AddCallstack(BacktraceEntry);
+ }
+# endif
+
+ return 0;
+ }
+
+}
+
+# endif // UE_CALLSTACK_TRACE_USE_UNWIND_TABLES
+
+namespace zen {
+
+////////////////////////////////////////////////////////////////////////////////
+void
+CallstackTrace_CreateInternal(FMalloc* Malloc)
+{
+ if (FBacktracer::Get() != nullptr)
+ {
+ return;
+ }
+
+ // Allocate, construct and intentionally leak backtracer
+ void* Alloc = Malloc->Malloc(sizeof(FBacktracer), alignof(FBacktracer));
+ new (Alloc) FBacktracer(Malloc);
+
+ Modules_Create(Malloc);
+ Modules_Subscribe([](bool bLoad, void* Module, const char16_t* Name) {
+ bLoad ? FBacktracer::Get()->AddModule(uintptr_t(Module), Name) //-V522
+ : FBacktracer::Get()->RemoveModule(uintptr_t(Module));
+ });
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+CallstackTrace_InitializeInternal()
+{
+ Modules_Initialize();
+ GModulesAreInitialized = true;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+uint32_t
+CallstackTrace_GetCurrentId()
+{
+ if (!UE_TRACE_CHANNELEXPR_IS_ENABLED(CallstackChannel))
+ {
+ return 0;
+ }
+
+ void* StackAddress = PLATFORM_RETURN_ADDRESS_FOR_CALLSTACKTRACING();
+ if (FBacktracer* Instance = FBacktracer::Get())
+ {
+# if PLATFORM_USE_CALLSTACK_ADDRESS_POINTER
+ return Instance->GetBacktraceId(StackAddress);
+# else
+ return Instance->GetBacktraceId((uint64_t)StackAddress);
+# endif
+ }
+
+ return 0;
+}
+
+} // namespace zen
+
+#endif
diff --git a/src/zencore/memtrack/callstacktrace.h b/src/zencore/memtrack/callstacktrace.h
new file mode 100644
index 000000000..3e191490b
--- /dev/null
+++ b/src/zencore/memtrack/callstacktrace.h
@@ -0,0 +1,151 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/trace.h>
+
+#if ZEN_PLATFORM_WINDOWS
+# include <intrin.h>
+
+# define PLATFORM_RETURN_ADDRESS() _ReturnAddress()
+# define PLATFORM_RETURN_ADDRESS_POINTER() _AddressOfReturnAddress()
+# define PLATFORM_RETURN_ADDRESS_FOR_CALLSTACKTRACING PLATFORM_RETURN_ADDRESS_POINTER
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+#if !defined(UE_CALLSTACK_TRACE_ENABLED)
+# if UE_TRACE_ENABLED
+# if ZEN_PLATFORM_WINDOWS
+# define UE_CALLSTACK_TRACE_ENABLED 1
+# endif
+# endif
+#endif
+
+#if !defined(UE_CALLSTACK_TRACE_ENABLED)
+# define UE_CALLSTACK_TRACE_ENABLED 0
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+#if UE_CALLSTACK_TRACE_ENABLED
+
+# include "platformtls.h"
+
+namespace zen {
+
+/**
+ * Creates callstack tracing.
+ * @param Malloc Allocator instance to use.
+ */
+void CallstackTrace_Create(class FMalloc* Malloc);
+
+/**
+ * Initializes callstack tracing. On some platforms this has to be delayed due to initialization order.
+ */
+void CallstackTrace_Initialize();
+
+/**
+ * Capture the current callstack, and trace the definition if it has not already been encountered. The returned value
+ * can be used in trace events and be resolved in analysis.
+ * @return Unique id identifying the current callstack.
+ */
+uint32_t CallstackTrace_GetCurrentId();
+
+/**
+ * Callstack Trace Scoped Macro to avoid resolving the full callstack
+ * can be used when some external libraries are not compiled with frame pointers
+ * preventing us to resolve it without crashing. Instead the callstack will be
+ * only the caller address.
+ */
+# define CALLSTACK_TRACE_LIMIT_CALLSTACKRESOLVE_SCOPE() FCallStackTraceLimitResolveScope PREPROCESSOR_JOIN(FCTLMScope, __LINE__)
+
+extern uint32_t GCallStackTracingTlsSlotIndex;
+
+/**
+ * @return the fallback callstack address
+ */
+inline void*
+CallstackTrace_GetFallbackPlatformReturnAddressData()
+{
+ if (FPlatformTLS::IsValidTlsSlot(GCallStackTracingTlsSlotIndex))
+ return FPlatformTLS::GetTlsValue(GCallStackTracingTlsSlotIndex);
+ else
+ return nullptr;
+}
+
+/**
+ * @return Needs full callstack resolve
+ */
+inline bool
+CallstackTrace_ResolveFullCallStack()
+{
+ return CallstackTrace_GetFallbackPlatformReturnAddressData() == nullptr;
+}
+
+/*
+ * Callstack Trace scope for override CallStack
+ */
+class FCallStackTraceLimitResolveScope
+{
+public:
+ ZEN_FORCENOINLINE FCallStackTraceLimitResolveScope()
+ {
+ if (FPlatformTLS::IsValidTlsSlot(GCallStackTracingTlsSlotIndex))
+ {
+ FPlatformTLS::SetTlsValue(GCallStackTracingTlsSlotIndex, PLATFORM_RETURN_ADDRESS_FOR_CALLSTACKTRACING());
+ }
+ }
+
+ ZEN_FORCENOINLINE ~FCallStackTraceLimitResolveScope()
+ {
+ if (FPlatformTLS::IsValidTlsSlot(GCallStackTracingTlsSlotIndex))
+ {
+ FPlatformTLS::SetTlsValue(GCallStackTracingTlsSlotIndex, nullptr);
+ }
+ }
+};
+
+} // namespace zen
+
+#else // UE_CALLSTACK_TRACE_ENABLED
+
+namespace zen {
+
+inline void
+CallstackTrace_Create(class FMalloc* /*Malloc*/)
+{
+}
+
+inline void
+CallstackTrace_Initialize()
+{
+}
+
+inline uint32_t
+CallstackTrace_GetCurrentId()
+{
+ return 0;
+}
+
+inline void*
+CallstackTrace_GetCurrentReturnAddressData()
+{
+ return nullptr;
+}
+
+inline void*
+CallstackTrace_GetFallbackPlatformReturnAddressData()
+{
+ return nullptr;
+}
+
+inline bool
+CallstackTrace_ResolveFullCallStack()
+{
+ return true;
+}
+
+# define CALLSTACK_TRACE_LIMIT_CALLSTACKRESOLVE_SCOPE()
+
+} // namespace zen
+
+#endif // UE_CALLSTACK_TRACE_ENABLED
diff --git a/src/zencore/memtrack/growonlylockfreehash.h b/src/zencore/memtrack/growonlylockfreehash.h
new file mode 100644
index 000000000..d6ff4fc32
--- /dev/null
+++ b/src/zencore/memtrack/growonlylockfreehash.h
@@ -0,0 +1,255 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zenbase/zenbase.h>
+#include <zencore/intmath.h>
+#include <zencore/thread.h>
+
+#include <zencore/memory/fmalloc.h>
+
+#include <atomic>
+
+namespace zen {
+
+// Hash table with fast lock free reads, that only supports insertion of items, and no modification of
+// values. KeyType must be an integer. EntryType should be a POD with an identifiable "empty" state
+// that can't occur in the table, and include the following member functions:
+//
+// KeyType GetKey() const; // Get the key from EntryType
+// ValueType GetValue() const; // Get the value from EntryType
+// bool IsEmpty() const; // Query whether EntryType is empty
+// void SetKeyValue(KeyType Key, ValueType Value); // Write key and value into EntryType (ATOMICALLY! See below)
+// static uint32 KeyHash(KeyType Key); // Convert Key to more well distributed hash
+// static void ClearEntries(EntryType* Entries, int32 EntryCount); // Fill an array of entries with empty values
+//
+// The function "SetKeyValue" must be multi-thread safe when writing new items! This means writing the
+// Key last and atomically, or writing the entire EntryType in a single write (say if the key and value
+// are packed into a single integer word). Inline is recommended, since these functions are called a
+// lot in the inner loop of the algorithm. A simple implementation of "KeyHash" can just return the
+// Key (if it's already reasonable as a hash), or mix the bits if better distribution is required. A
+// simple implementation of "ClearEntries" can just be a memset, if zero represents an empty entry.
+//
+// A set can be approximated by making "GetValue" a nop function, and just paying attention to the bool
+// result from FindEntry, although you do need to either reserve a certain Key as invalid, or add
+// space to store a valid flag as the Value. This class should only be used for small value types, as
+// the values are embedded into the hash table, and not stored separately.
+//
+// Writes are implemented using a lock -- it would be possible to make writes lock free (or lock free
+// when resizing doesn't occur), but it adds complexity. If we were to go that route, it would make
+// sense to create a fully generic lock free set, which would be much more involved to implement and
+// validate than this simple class, and might also offer somewhat worse read perf. Lock free containers
+// that support item removal either need additional synchronization overhead on readers, so writers can
+// tell if a reader is active and spin, or need graveyard markers and a garbage collection pass called
+// periodically, which makes it no longer a simple standalone container.
+//
+// Lock free reads are accomplished by the reader atomically pulling the hash table pointer from the
+// class. The hash table is self contained, with its size stored in the table itself, and hash tables
+// are not freed until the class's destruction. So if the table needs to be reallocated due to a write,
+// active readers will still have valid memory. This does mean that tables leak, but worst case, you
+// end up with half of the memory being waste. It would be possible to garbage collect the excess
+// tables, but you'd need some kind of global synchronization to make sure no readers are active.
+//
+// Besides cleanup of wasted tables, it might be useful to provide a function to clear a table. This
+// would involve clearing the Key for all the elements in the table (but leaving the memory allocated),
+// and can be done safely with active readers. It's not possible to safely remove individual items due
+// to the need to potentially move other items, which would break an active reader that has already
+// searched past a moved item. But in the case of removing all items, we don't care when a reader fails,
+// it's expected that eventually all readers will fail, regardless of where they are searching. A clear
+// function could be useful if a lot of the data you are caching is no longer used, and you want to
+// reset the cache.
+//
+template<typename EntryType, typename KeyType, typename ValueType>
+class TGrowOnlyLockFreeHash
+{
+public:
+ TGrowOnlyLockFreeHash(FMalloc* InMalloc) : Malloc(InMalloc), HashTable(nullptr) {}
+
+ ~TGrowOnlyLockFreeHash()
+ {
+ FHashHeader* HashTableNext;
+ for (FHashHeader* HashTableCurrent = HashTable; HashTableCurrent; HashTableCurrent = HashTableNext)
+ {
+ HashTableNext = HashTableCurrent->Next;
+
+ Malloc->Free(HashTableCurrent);
+ }
+ }
+
+ /**
+ * Preallocate the hash table to a certain size
+ * @param Count - Number of EntryType elements to allocate
+ * @warning Can only be called once, and only before any items have been added!
+ */
+ void Reserve(uint32_t Count)
+ {
+ zen::RwLock::ExclusiveLockScope _(WriteCriticalSection);
+ ZEN_ASSERT(HashTable.load(std::memory_order_relaxed) == nullptr);
+
+ if (Count <= 0)
+ {
+ Count = DEFAULT_INITIAL_SIZE;
+ }
+ Count = uint32_t(zen::NextPow2(Count));
+ FHashHeader* HashTableLocal = (FHashHeader*)Malloc->Malloc(sizeof(FHashHeader) + (Count - 1) * sizeof(EntryType));
+
+ HashTableLocal->Next = nullptr;
+ HashTableLocal->TableSize = Count;
+ HashTableLocal->Used = 0;
+ EntryType::ClearEntries(HashTableLocal->Elements, Count);
+
+ HashTable.store(HashTableLocal, std::memory_order_release);
+ }
+
+ /**
+ * Find an entry in the hash table
+ * @param Key - Key to search for
+ * @param OutValue - Memory location to write result value to. Left unmodified if Key isn't found.
+ * @param bIsAlreadyInTable - Optional result for whether key was found in table.
+ */
+ void Find(KeyType Key, ValueType* OutValue, bool* bIsAlreadyInTable = nullptr) const
+ {
+ FHashHeader* HashTableLocal = HashTable.load(std::memory_order_acquire);
+ if (HashTableLocal)
+ {
+ uint32_t TableMask = HashTableLocal->TableSize - 1;
+
+ // Linear probing
+ for (uint32_t TableIndex = EntryType::KeyHash(Key) & TableMask; !HashTableLocal->Elements[TableIndex].IsEmpty();
+ TableIndex = (TableIndex + 1) & TableMask)
+ {
+ if (HashTableLocal->Elements[TableIndex].GetKey() == Key)
+ {
+ if (OutValue)
+ {
+ *OutValue = HashTableLocal->Elements[TableIndex].GetValue();
+ }
+ if (bIsAlreadyInTable)
+ {
+ *bIsAlreadyInTable = true;
+ }
+ return;
+ }
+ }
+ }
+
+ if (bIsAlreadyInTable)
+ {
+ *bIsAlreadyInTable = false;
+ }
+ }
+
+ /**
+ * Add an entry with the given Key to the hash table, will do nothing if the item already exists
+ * @param Key - Key to add
+ * @param Value - Value to add for key
+ * @param bIsAlreadyInTable -- Optional result for whether item was already in table
+ */
+ void Emplace(KeyType Key, ValueType Value, bool* bIsAlreadyInTable = nullptr)
+ {
+ zen::RwLock::ExclusiveLockScope _(WriteCriticalSection);
+
+ // After locking, check if the item is already in the hash table.
+ ValueType ValueIgnore;
+ bool bFindResult;
+ Find(Key, &ValueIgnore, &bFindResult);
+ if (bFindResult == true)
+ {
+ if (bIsAlreadyInTable)
+ {
+ *bIsAlreadyInTable = true;
+ }
+ return;
+ }
+
+ // Check if there is space in the hash table for a new item. We resize when the hash
+ // table gets half full or more. @todo: allow client to specify max load factor?
+ FHashHeader* HashTableLocal = HashTable;
+
+ if (!HashTableLocal || (HashTableLocal->Used >= HashTableLocal->TableSize / 2))
+ {
+ int32_t GrowCount = HashTableLocal ? HashTableLocal->TableSize * 2 : DEFAULT_INITIAL_SIZE;
+ FHashHeader* HashTableGrow = (FHashHeader*)Malloc->Malloc(sizeof(FHashHeader) + (GrowCount - 1) * sizeof(EntryType));
+
+ HashTableGrow->Next = HashTableLocal;
+ HashTableGrow->TableSize = GrowCount;
+ HashTableGrow->Used = 0;
+ EntryType::ClearEntries(HashTableGrow->Elements, GrowCount);
+
+ if (HashTableLocal)
+ {
+ // Copy existing elements from the old table to the new table
+ for (int32_t TableIndex = 0; TableIndex < HashTableLocal->TableSize; TableIndex++)
+ {
+ EntryType& Entry = HashTableLocal->Elements[TableIndex];
+ if (!Entry.IsEmpty())
+ {
+ HashInsertInternal(HashTableGrow, Entry.GetKey(), Entry.GetValue());
+ }
+ }
+ }
+
+ HashTableLocal = HashTableGrow;
+ HashTable.store(HashTableGrow, std::memory_order_release);
+ }
+
+ // Then add our new item
+ HashInsertInternal(HashTableLocal, Key, Value);
+
+ if (bIsAlreadyInTable)
+ {
+ *bIsAlreadyInTable = false;
+ }
+ }
+
+ void FindOrAdd(KeyType Key, ValueType Value, bool* bIsAlreadyInTable = nullptr)
+ {
+ // Attempt to find the item lock free, before calling "Emplace", which locks the container
+ bool bFindResult;
+ ValueType IgnoreResult;
+ Find(Key, &IgnoreResult, &bFindResult);
+ if (bFindResult)
+ {
+ if (bIsAlreadyInTable)
+ {
+ *bIsAlreadyInTable = true;
+ }
+ return;
+ }
+
+ Emplace(Key, Value, bIsAlreadyInTable);
+ }
+
+private:
+ struct FHashHeader
+ {
+ FHashHeader* Next; // Old buffers are stored in a linked list for cleanup
+ int32_t TableSize;
+ int32_t Used;
+ EntryType Elements[1]; // Variable sized
+ };
+
+ FMalloc* Malloc;
+ std::atomic<FHashHeader*> HashTable;
+ zen::RwLock WriteCriticalSection;
+
+ static constexpr int32_t DEFAULT_INITIAL_SIZE = 1024;
+
+ static void HashInsertInternal(FHashHeader* HashTableLocal, KeyType Key, ValueType Value)
+ {
+ int32_t TableMask = HashTableLocal->TableSize - 1;
+
+ // Linear probing
+ for (int32_t TableIndex = EntryType::KeyHash(Key) & TableMask;; TableIndex = (TableIndex + 1) & TableMask)
+ {
+ if (HashTableLocal->Elements[TableIndex].IsEmpty())
+ {
+ HashTableLocal->Elements[TableIndex].SetKeyValue(Key, Value);
+ HashTableLocal->Used++;
+ break;
+ }
+ }
+ }
+};
+
+} // namespace zen
diff --git a/src/zencore/memtrack/memorytrace.cpp b/src/zencore/memtrack/memorytrace.cpp
new file mode 100644
index 000000000..b147aee91
--- /dev/null
+++ b/src/zencore/memtrack/memorytrace.cpp
@@ -0,0 +1,829 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zencore/memory/memorytrace.h>
+#include <zencore/memory/tagtrace.h>
+
+#include "callstacktrace.h"
+#include "tracemalloc.h"
+#include "vatrace.h"
+
+#include <zencore/commandline.h>
+#include <zencore/enumflags.h>
+#include <zencore/guardvalue.h>
+#include <zencore/intmath.h>
+#include <zencore/string.h>
+#include <zencore/trace.h>
+
+#include <string.h>
+
+#if ZEN_PLATFORM_WINDOWS
+# include <shellapi.h>
+#endif
+
+class FMalloc;
+
+#if UE_TRACE_ENABLED
+namespace zen {
+UE_TRACE_CHANNEL_DEFINE(MemAllocChannel, "Memory allocations", true)
+}
+#endif
+
+#if UE_MEMORY_TRACE_ENABLED
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace zen {
+
+void MemoryTrace_InitTags(FMalloc*);
+void MemoryTrace_EnableTracePump();
+
+} // namespace zen
+
+////////////////////////////////////////////////////////////////////////////////
+namespace {
+// Controls how often time markers are emitted (default: every 4095 allocations).
+constexpr uint32_t MarkerSamplePeriod = (4 << 10) - 1;
+
+// Number of shifted bits to SizeLower
+constexpr uint32_t SizeShift = 3;
+
+// Counter to track when time marker is emitted
+std::atomic<uint32_t> GMarkerCounter(0);
+
+// If enabled also pumps the Trace system itself. Used on process shutdown
+// when worker thread has been killed, but memory events still occurs.
+bool GDoPumpTrace;
+
+// Temporarily disables any internal operation that causes allocations. Used to
+// avoid recursive behaviour when memory tracing needs to allocate memory through
+// TraceMalloc.
+thread_local bool GDoNotAllocateInTrace;
+
+// Set on initialization; on some platforms we hook allocator functions very early
+// before Trace has the ability to allocate memory.
+bool GTraceAllowed;
+} // namespace
+
+////////////////////////////////////////////////////////////////////////////////
+namespace UE { namespace Trace {
+ TRACELOG_API void Update();
+}} // namespace UE::Trace
+
+namespace zen {
+
+////////////////////////////////////////////////////////////////////////////////
+UE_TRACE_EVENT_BEGIN(Memory, Init, NoSync | Important)
+ UE_TRACE_EVENT_FIELD(uint64_t, PageSize) // new in UE 5.5
+ UE_TRACE_EVENT_FIELD(uint32_t, MarkerPeriod)
+ UE_TRACE_EVENT_FIELD(uint8, Version)
+ UE_TRACE_EVENT_FIELD(uint8, MinAlignment)
+ UE_TRACE_EVENT_FIELD(uint8, SizeShift)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, Marker)
+ UE_TRACE_EVENT_FIELD(uint64_t, Cycle)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, Alloc)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint32_t, Size)
+ UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower)
+ UE_TRACE_EVENT_FIELD(uint8, RootHeap)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, AllocSystem)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint32_t, Size)
+ UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, AllocVideo)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint32_t, Size)
+ UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, Free)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint8, RootHeap)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, FreeSystem)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, FreeVideo)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, ReallocAlloc)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint32_t, Size)
+ UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower)
+ UE_TRACE_EVENT_FIELD(uint8, RootHeap)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, ReallocAllocSystem)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint32_t, Size)
+ UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, ReallocFree)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint8, RootHeap)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, ReallocFreeSystem)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, MemorySwapOp)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address) // page fault real address
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint32_t, CompressedSize)
+ UE_TRACE_EVENT_FIELD(uint8, SwapOp)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, HeapSpec, NoSync | Important)
+ UE_TRACE_EVENT_FIELD(HeapId, Id)
+ UE_TRACE_EVENT_FIELD(HeapId, ParentId)
+ UE_TRACE_EVENT_FIELD(uint16, Flags)
+ UE_TRACE_EVENT_FIELD(UE::Trace::WideString, Name)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, HeapMarkAlloc)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint16, Flags)
+ UE_TRACE_EVENT_FIELD(HeapId, Heap)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, HeapUnmarkAlloc)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(HeapId, Heap)
+UE_TRACE_EVENT_END()
+
+// If the layout of the above events is changed, bump this version number.
+// version 1: Initial version (UE 5.0, UE 5.1)
+// version 2: Added CallstackId for Free events and also for HeapMarkAlloc, HeapUnmarkAlloc events (UE 5.2).
+constexpr uint8 MemoryTraceVersion = 2;
+
+////////////////////////////////////////////////////////////////////////////////
+class FMallocWrapper : public FMalloc
+{
+public:
+ FMallocWrapper(FMalloc* InMalloc);
+
+private:
+ struct FCookie
+ {
+ uint64_t Tag : 16;
+ uint64_t Bias : 8;
+ uint64_t Size : 40;
+ };
+
+ static uint32_t GetActualAlignment(SIZE_T Size, uint32_t Alignment);
+
+ virtual void* Malloc(SIZE_T Size, uint32_t Alignment) override;
+ virtual void* Realloc(void* PrevAddress, SIZE_T NewSize, uint32_t Alignment) override;
+ virtual void Free(void* Address) override;
+ virtual bool GetAllocationSize(void* Address, SIZE_T& SizeOut) override { return InnerMalloc->GetAllocationSize(Address, SizeOut); }
+ virtual void OnMallocInitialized() override { InnerMalloc->OnMallocInitialized(); }
+
+ FMalloc* InnerMalloc;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+FMallocWrapper::FMallocWrapper(FMalloc* InMalloc) : InnerMalloc(InMalloc)
+{
+}
+
+////////////////////////////////////////////////////////////////////////////////
+uint32_t
+FMallocWrapper::GetActualAlignment(SIZE_T Size, uint32_t Alignment)
+{
+ // Defaults; if size is < 16 then alignment is 8 else 16.
+ uint32_t DefaultAlignment = 8 << uint32_t(Size >= 16);
+ return (Alignment < DefaultAlignment) ? DefaultAlignment : Alignment;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void*
+FMallocWrapper::Malloc(SIZE_T Size, uint32_t Alignment)
+{
+ uint32_t ActualAlignment = GetActualAlignment(Size, Alignment);
+ void* Address = InnerMalloc->Malloc(Size, Alignment);
+
+ MemoryTrace_Alloc((uint64_t)Address, Size, ActualAlignment);
+
+ return Address;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void*
+FMallocWrapper::Realloc(void* PrevAddress, SIZE_T NewSize, uint32_t Alignment)
+{
+ // This simplifies things and means reallocs trace events are true reallocs
+ if (PrevAddress == nullptr)
+ {
+ return Malloc(NewSize, Alignment);
+ }
+
+ MemoryTrace_ReallocFree((uint64_t)PrevAddress);
+
+ void* RetAddress = InnerMalloc->Realloc(PrevAddress, NewSize, Alignment);
+
+ Alignment = GetActualAlignment(NewSize, Alignment);
+ MemoryTrace_ReallocAlloc((uint64_t)RetAddress, NewSize, Alignment);
+
+ return RetAddress;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FMallocWrapper::Free(void* Address)
+{
+ if (Address == nullptr)
+ {
+ return;
+ }
+
+ MemoryTrace_Free((uint64_t)Address);
+
+ void* InnerAddress = Address;
+
+ return InnerMalloc->Free(InnerAddress);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+template<class T>
+class alignas(alignof(T)) FUndestructed
+{
+public:
+ template<typename... ArgTypes>
+ void Construct(ArgTypes... Args)
+ {
+ ::new (Buffer) T(Args...);
+ bIsConstructed = true;
+ }
+
+ bool IsConstructed() const { return bIsConstructed; }
+
+ T* operator&() { return (T*)Buffer; }
+ T* operator->() { return (T*)Buffer; }
+
+protected:
+ uint8 Buffer[sizeof(T)];
+ bool bIsConstructed;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+static FUndestructed<FTraceMalloc> GTraceMalloc;
+
+////////////////////////////////////////////////////////////////////////////////
+static EMemoryTraceInit
+MemoryTrace_ShouldEnable()
+{
+ EMemoryTraceInit Mode = EMemoryTraceInit::Disabled;
+
+ // Process any command line trace options
+ //
+ // Note that calls can come into this function before we enter the regular main function
+ // and we can therefore not rely on the regular command line parsing for the application
+
+ using namespace std::literals;
+
+ auto ProcessTraceArg = [&](const std::string_view& Arg) {
+ if (Arg == "memalloc"sv)
+ {
+ Mode |= EMemoryTraceInit::AllocEvents;
+ }
+ else if (Arg == "callstack"sv)
+ {
+ Mode |= EMemoryTraceInit::Callstacks;
+ }
+ else if (Arg == "memtag"sv)
+ {
+ Mode |= EMemoryTraceInit::Tags;
+ }
+ else if (Arg == "memory"sv)
+ {
+ Mode |= EMemoryTraceInit::Full;
+ }
+ else if (Arg == "memory_light"sv)
+ {
+ Mode |= EMemoryTraceInit::Light;
+ }
+ };
+
+ constexpr std::string_view TraceOption = "--trace="sv;
+
+ std::function<void(const std::string_view&)> ProcessArg = [&](const std::string_view& Arg) {
+ if (Arg.starts_with(TraceOption))
+ {
+ const std::string_view OptionArgs = Arg.substr(TraceOption.size());
+
+ IterateCommaSeparatedValue(OptionArgs, ProcessTraceArg);
+ }
+ };
+
+ IterateCommandlineArgs(ProcessArg);
+
+ return Mode;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FMalloc*
+MemoryTrace_CreateInternal(FMalloc* InMalloc, EMemoryTraceInit Mode)
+{
+ using namespace zen;
+
+ // If allocation events are not desired we don't need to do anything, even
+ // if user has enabled only callstacks it will be enabled later.
+ if (!EnumHasAnyFlags(Mode, EMemoryTraceInit::AllocEvents))
+ {
+ return InMalloc;
+ }
+
+ // Some OSes (i.e. Windows) will terminate all threads except the main
+ // one as part of static deinit. However we may receive more memory
+ // trace events that would get lost as Trace's worker thread has been
+ // terminated. So flush the last remaining memory events trace needs
+ // to be updated which we will do that in response to to memory events.
+ // We'll use an atexit can to know when Trace is probably no longer
+ // getting ticked.
+ atexit([]() { MemoryTrace_EnableTracePump(); });
+
+ GTraceMalloc.Construct(InMalloc);
+
+ // Both tag and callstack tracing need to use the wrapped trace malloc
+ // so we can break out tracing memory overhead (and not cause recursive behaviour).
+ if (EnumHasAnyFlags(Mode, EMemoryTraceInit::Tags))
+ {
+ MemoryTrace_InitTags(&GTraceMalloc);
+ }
+
+ if (EnumHasAnyFlags(Mode, EMemoryTraceInit::Callstacks))
+ {
+ CallstackTrace_Create(&GTraceMalloc);
+ }
+
+ static FUndestructed<FMallocWrapper> SMallocWrapper;
+ SMallocWrapper.Construct(InMalloc);
+
+ return &SMallocWrapper;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FMalloc*
+MemoryTrace_CreateInternal(FMalloc* InMalloc)
+{
+ const EMemoryTraceInit Mode = MemoryTrace_ShouldEnable();
+ return MemoryTrace_CreateInternal(InMalloc, Mode);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FMalloc*
+MemoryTrace_Create(FMalloc* InMalloc)
+{
+ FMalloc* OutMalloc = MemoryTrace_CreateInternal(InMalloc);
+
+ if (OutMalloc != InMalloc)
+ {
+# if PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS
+ FVirtualWinApiHooks::Initialize(false);
+# endif
+ }
+
+ return OutMalloc;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_Initialize()
+{
+ // At this point we initialized the system to allow tracing.
+ GTraceAllowed = true;
+
+ const int MIN_ALIGNMENT = 8;
+
+ UE_TRACE_LOG(Memory, Init, MemAllocChannel)
+ << Init.PageSize(4096) << Init.MarkerPeriod(MarkerSamplePeriod + 1) << Init.Version(MemoryTraceVersion)
+ << Init.MinAlignment(uint8(MIN_ALIGNMENT)) << Init.SizeShift(uint8(SizeShift));
+
+ const HeapId SystemRootHeap = MemoryTrace_RootHeapSpec(u"System memory");
+ ZEN_ASSERT(SystemRootHeap == EMemoryTraceRootHeap::SystemMemory);
+ const HeapId VideoRootHeap = MemoryTrace_RootHeapSpec(u"Video memory");
+ ZEN_ASSERT(VideoRootHeap == EMemoryTraceRootHeap::VideoMemory);
+
+ static_assert((1 << SizeShift) - 1 <= MIN_ALIGNMENT, "Not enough bits to pack size fields");
+
+# if !UE_MEMORY_TRACE_LATE_INIT
+ // On some platforms callstack initialization cannot happen this early in the process. It is initialized
+ // in other locations when UE_MEMORY_TRACE_LATE_INIT is defined. Until that point allocations cannot have
+ // callstacks.
+ CallstackTrace_Initialize();
+# endif
+}
+
+void
+MemoryTrace_Shutdown()
+{
+ // Disable any further activity
+ GTraceAllowed = false;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+bool
+MemoryTrace_IsActive()
+{
+ return GTraceAllowed;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_EnableTracePump()
+{
+ GDoPumpTrace = true;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_UpdateInternal()
+{
+ const uint32_t TheCount = GMarkerCounter.fetch_add(1, std::memory_order_relaxed);
+ if ((TheCount & MarkerSamplePeriod) == 0)
+ {
+ UE_TRACE_LOG(Memory, Marker, MemAllocChannel) << Marker.Cycle(UE::Trace::Private::TimeGetTimestamp());
+ }
+
+ if (GDoPumpTrace)
+ {
+ UE::Trace::Update();
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_Alloc(uint64_t Address, uint64_t Size, uint32_t Alignment, HeapId RootHeap, uint32_t ExternalCallstackId)
+{
+ if (!GTraceAllowed)
+ {
+ return;
+ }
+
+ ZEN_ASSERT_SLOW(RootHeap < 16);
+
+ const uint32_t AlignmentPow2 = uint32_t(zen::CountTrailingZeros64(Alignment));
+ const uint32_t Alignment_SizeLower = (AlignmentPow2 << SizeShift) | uint32_t(Size & ((1 << SizeShift) - 1));
+ const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId();
+
+ switch (RootHeap)
+ {
+ case EMemoryTraceRootHeap::SystemMemory:
+ {
+ UE_TRACE_LOG(Memory, AllocSystem, MemAllocChannel)
+ << AllocSystem.Address(uint64_t(Address)) << AllocSystem.CallstackId(CallstackId)
+ << AllocSystem.Size(uint32_t(Size >> SizeShift)) << AllocSystem.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower));
+ break;
+ }
+
+ case EMemoryTraceRootHeap::VideoMemory:
+ {
+ UE_TRACE_LOG(Memory, AllocVideo, MemAllocChannel)
+ << AllocVideo.Address(uint64_t(Address)) << AllocVideo.CallstackId(CallstackId)
+ << AllocVideo.Size(uint32_t(Size >> SizeShift)) << AllocVideo.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower));
+ break;
+ }
+
+ default:
+ {
+ UE_TRACE_LOG(Memory, Alloc, MemAllocChannel)
+ << Alloc.Address(uint64_t(Address)) << Alloc.CallstackId(CallstackId) << Alloc.Size(uint32_t(Size >> SizeShift))
+ << Alloc.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)) << Alloc.RootHeap(uint8(RootHeap));
+ break;
+ }
+ }
+
+ MemoryTrace_UpdateInternal();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_Free(uint64_t Address, HeapId RootHeap, uint32_t ExternalCallstackId)
+{
+ if (!GTraceAllowed)
+ {
+ return;
+ }
+
+ ZEN_ASSERT_SLOW(RootHeap < 16);
+
+ const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId();
+
+ switch (RootHeap)
+ {
+ case EMemoryTraceRootHeap::SystemMemory:
+ {
+ UE_TRACE_LOG(Memory, FreeSystem, MemAllocChannel)
+ << FreeSystem.Address(uint64_t(Address)) << FreeSystem.CallstackId(CallstackId);
+ break;
+ }
+ case EMemoryTraceRootHeap::VideoMemory:
+ {
+ UE_TRACE_LOG(Memory, FreeVideo, MemAllocChannel)
+ << FreeVideo.Address(uint64_t(Address)) << FreeVideo.CallstackId(CallstackId);
+ break;
+ }
+ default:
+ {
+ UE_TRACE_LOG(Memory, Free, MemAllocChannel)
+ << Free.Address(uint64_t(Address)) << Free.CallstackId(CallstackId) << Free.RootHeap(uint8(RootHeap));
+ break;
+ }
+ }
+
+ MemoryTrace_UpdateInternal();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_ReallocAlloc(uint64_t Address, uint64_t Size, uint32_t Alignment, HeapId RootHeap, uint32_t ExternalCallstackId)
+{
+ if (!GTraceAllowed)
+ {
+ return;
+ }
+
+ ZEN_ASSERT_SLOW(RootHeap < 16);
+
+ const uint32_t AlignmentPow2 = uint32_t(zen::CountTrailingZeros64(Alignment));
+ const uint32_t Alignment_SizeLower = (AlignmentPow2 << SizeShift) | uint32_t(Size & ((1 << SizeShift) - 1));
+ const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId();
+
+ switch (RootHeap)
+ {
+ case EMemoryTraceRootHeap::SystemMemory:
+ {
+ UE_TRACE_LOG(Memory, ReallocAllocSystem, MemAllocChannel)
+ << ReallocAllocSystem.Address(uint64_t(Address)) << ReallocAllocSystem.CallstackId(CallstackId)
+ << ReallocAllocSystem.Size(uint32_t(Size >> SizeShift))
+ << ReallocAllocSystem.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower));
+ break;
+ }
+
+ default:
+ {
+ UE_TRACE_LOG(Memory, ReallocAlloc, MemAllocChannel)
+ << ReallocAlloc.Address(uint64_t(Address)) << ReallocAlloc.CallstackId(CallstackId)
+ << ReallocAlloc.Size(uint32_t(Size >> SizeShift)) << ReallocAlloc.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower))
+ << ReallocAlloc.RootHeap(uint8(RootHeap));
+ break;
+ }
+ }
+
+ MemoryTrace_UpdateInternal();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_ReallocFree(uint64_t Address, HeapId RootHeap, uint32_t ExternalCallstackId)
+{
+ if (!GTraceAllowed)
+ {
+ return;
+ }
+
+ ZEN_ASSERT_SLOW(RootHeap < 16);
+
+ const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId();
+
+ switch (RootHeap)
+ {
+ case EMemoryTraceRootHeap::SystemMemory:
+ {
+ UE_TRACE_LOG(Memory, ReallocFreeSystem, MemAllocChannel)
+ << ReallocFreeSystem.Address(uint64_t(Address)) << ReallocFreeSystem.CallstackId(CallstackId);
+ break;
+ }
+
+ default:
+ {
+ UE_TRACE_LOG(Memory, ReallocFree, MemAllocChannel)
+ << ReallocFree.Address(uint64_t(Address)) << ReallocFree.CallstackId(CallstackId)
+ << ReallocFree.RootHeap(uint8(RootHeap));
+ break;
+ }
+ }
+
+ MemoryTrace_UpdateInternal();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_SwapOp(uint64_t PageAddress, EMemoryTraceSwapOperation SwapOperation, uint32_t CompressedSize, uint32_t CallstackId)
+{
+ if (!GTraceAllowed)
+ {
+ return;
+ }
+
+ UE_TRACE_LOG(Memory, MemorySwapOp, MemAllocChannel)
+ << MemorySwapOp.Address(PageAddress) << MemorySwapOp.CallstackId(CallstackId) << MemorySwapOp.CompressedSize(CompressedSize)
+ << MemorySwapOp.SwapOp((uint8)SwapOperation);
+
+ MemoryTrace_UpdateInternal();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+HeapId
+MemoryTrace_HeapSpec(HeapId ParentId, const char16_t* Name, EMemoryTraceHeapFlags Flags)
+{
+ if (!GTraceAllowed)
+ {
+ return 0;
+ }
+
+ static std::atomic<HeapId> HeapIdCount(EMemoryTraceRootHeap::EndReserved + 1); // Reserve indexes for root heaps
+ const HeapId Id = HeapIdCount.fetch_add(1);
+ const uint32_t NameLen = uint32_t(zen::StringLength(Name));
+ const uint32_t DataSize = NameLen * sizeof(char16_t);
+ ZEN_ASSERT(ParentId < Id);
+
+ UE_TRACE_LOG(Memory, HeapSpec, MemAllocChannel, DataSize)
+ << HeapSpec.Id(Id) << HeapSpec.ParentId(ParentId) << HeapSpec.Name(Name, NameLen) << HeapSpec.Flags(uint16(Flags));
+
+ return Id;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+HeapId
+MemoryTrace_RootHeapSpec(const char16_t* Name, EMemoryTraceHeapFlags Flags)
+{
+ if (!GTraceAllowed)
+ {
+ return 0;
+ }
+
+ static std::atomic<HeapId> RootHeapCount(0);
+ const HeapId Id = RootHeapCount.fetch_add(1);
+ ZEN_ASSERT(Id <= EMemoryTraceRootHeap::EndReserved);
+
+ const uint32_t NameLen = uint32_t(zen::StringLength(Name));
+ const uint32_t DataSize = NameLen * sizeof(char16_t);
+
+ UE_TRACE_LOG(Memory, HeapSpec, MemAllocChannel, DataSize)
+ << HeapSpec.Id(Id) << HeapSpec.ParentId(HeapId(~0)) << HeapSpec.Name(Name, NameLen)
+ << HeapSpec.Flags(uint16(EMemoryTraceHeapFlags::Root | Flags));
+
+ return Id;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_MarkAllocAsHeap(uint64_t Address, HeapId Heap, EMemoryTraceHeapAllocationFlags Flags, uint32_t ExternalCallstackId)
+{
+ if (!GTraceAllowed)
+ {
+ return;
+ }
+
+ const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId();
+
+ UE_TRACE_LOG(Memory, HeapMarkAlloc, MemAllocChannel)
+ << HeapMarkAlloc.Address(uint64_t(Address)) << HeapMarkAlloc.CallstackId(CallstackId)
+ << HeapMarkAlloc.Flags(uint16(EMemoryTraceHeapAllocationFlags::Heap | Flags)) << HeapMarkAlloc.Heap(Heap);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_UnmarkAllocAsHeap(uint64_t Address, HeapId Heap, uint32_t ExternalCallstackId)
+{
+ if (!GTraceAllowed)
+ {
+ return;
+ }
+
+ const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId();
+
+ // Sets all flags to zero
+ UE_TRACE_LOG(Memory, HeapUnmarkAlloc, MemAllocChannel)
+ << HeapUnmarkAlloc.Address(uint64_t(Address)) << HeapUnmarkAlloc.CallstackId(CallstackId) << HeapUnmarkAlloc.Heap(Heap);
+}
+
+} // namespace zen
+
+#else // UE_MEMORY_TRACE_ENABLED
+
+/////////////////////////////////////////////////////////////////////////////
+bool
+MemoryTrace_IsActive()
+{
+ return false;
+}
+
+#endif // UE_MEMORY_TRACE_ENABLED
+
+namespace zen {
+
+/////////////////////////////////////////////////////////////////////////////
+FTraceMalloc::FTraceMalloc(FMalloc* InMalloc)
+{
+ WrappedMalloc = InMalloc;
+}
+
+/////////////////////////////////////////////////////////////////////////////
+FTraceMalloc::~FTraceMalloc()
+{
+}
+
+/////////////////////////////////////////////////////////////////////////////
+void*
+FTraceMalloc::Malloc(SIZE_T Count, uint32_t Alignment)
+{
+#if UE_MEMORY_TRACE_ENABLED
+ // UE_TRACE_METADATA_CLEAR_SCOPE();
+ UE_MEMSCOPE(TRACE_TAG);
+
+ void* NewPtr;
+ {
+ zen::TGuardValue<bool> _(GDoNotAllocateInTrace, true);
+ NewPtr = WrappedMalloc->Malloc(Count, Alignment);
+ }
+
+ const uint64_t Size = Count;
+ const uint32_t AlignmentPow2 = uint32_t(zen::CountTrailingZeros64(Alignment));
+ const uint32_t Alignment_SizeLower = (AlignmentPow2 << SizeShift) | uint32_t(Size & ((1 << SizeShift) - 1));
+
+ UE_TRACE_LOG(Memory, Alloc, MemAllocChannel)
+ << Alloc.Address(uint64_t(NewPtr)) << Alloc.CallstackId(0) << Alloc.Size(uint32_t(Size >> SizeShift))
+ << Alloc.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)) << Alloc.RootHeap(uint8(EMemoryTraceRootHeap::SystemMemory));
+
+ return NewPtr;
+#else
+ return WrappedMalloc->Malloc(Count, Alignment);
+#endif // UE_MEMORY_TRACE_ENABLED
+}
+
+/////////////////////////////////////////////////////////////////////////////
+void*
+FTraceMalloc::Realloc(void* Original, SIZE_T Count, uint32_t Alignment)
+{
+#if UE_MEMORY_TRACE_ENABLED
+ // UE_TRACE_METADATA_CLEAR_SCOPE();
+ UE_MEMSCOPE(TRACE_TAG);
+
+ UE_TRACE_LOG(Memory, ReallocFree, MemAllocChannel)
+ << ReallocFree.Address(uint64_t(Original)) << ReallocFree.RootHeap(uint8(EMemoryTraceRootHeap::SystemMemory));
+
+ void* NewPtr;
+ {
+ zen::TGuardValue<bool> _(GDoNotAllocateInTrace, true);
+ NewPtr = WrappedMalloc->Realloc(Original, Count, Alignment);
+ }
+
+ const uint64_t Size = Count;
+ const uint32_t AlignmentPow2 = uint32_t(zen::CountTrailingZeros64(Alignment));
+ const uint32_t Alignment_SizeLower = (AlignmentPow2 << SizeShift) | uint32_t(Size & ((1 << SizeShift) - 1));
+
+ UE_TRACE_LOG(Memory, ReallocAlloc, MemAllocChannel)
+ << ReallocAlloc.Address(uint64_t(NewPtr)) << ReallocAlloc.CallstackId(0) << ReallocAlloc.Size(uint32_t(Size >> SizeShift))
+ << ReallocAlloc.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower))
+ << ReallocAlloc.RootHeap(uint8(EMemoryTraceRootHeap::SystemMemory));
+
+ return NewPtr;
+#else
+ return WrappedMalloc->Realloc(Original, Count, Alignment);
+#endif // UE_MEMORY_TRACE_ENABLED
+}
+
+/////////////////////////////////////////////////////////////////////////////
+void
+FTraceMalloc::Free(void* Original)
+{
+#if UE_MEMORY_TRACE_ENABLED
+ UE_TRACE_LOG(Memory, Free, MemAllocChannel)
+ << Free.Address(uint64_t(Original)) << Free.RootHeap(uint8(EMemoryTraceRootHeap::SystemMemory));
+
+ {
+ zen::TGuardValue<bool> _(GDoNotAllocateInTrace, true);
+ WrappedMalloc->Free(Original);
+ }
+#else
+ WrappedMalloc->Free(Original);
+#endif // UE_MEMORY_TRACE_ENABLED
+}
+
+} // namespace zen
diff --git a/src/zencore/memtrack/moduletrace.cpp b/src/zencore/memtrack/moduletrace.cpp
new file mode 100644
index 000000000..51280ff3a
--- /dev/null
+++ b/src/zencore/memtrack/moduletrace.cpp
@@ -0,0 +1,296 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zenbase/zenbase.h>
+#include <zencore/memory/llm.h>
+#include <zencore/memory/memorytrace.h>
+#include <zencore/memory/tagtrace.h>
+
+#if ZEN_PLATFORM_WINDOWS
+# define PLATFORM_SUPPORTS_TRACE_WIN32_MODULE_DIAGNOSTICS 1
+#else
+# define PLATFORM_SUPPORTS_TRACE_WIN32_MODULE_DIAGNOSTICS 0
+#endif
+
+#include "moduletrace_events.h"
+
+#if PLATFORM_SUPPORTS_TRACE_WIN32_MODULE_DIAGNOSTICS
+
+# include <zencore/windows.h>
+
+ZEN_THIRD_PARTY_INCLUDES_START
+# include <winternl.h>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+# include <zencore/trace.h>
+
+# include <array>
+
+namespace zen {
+
+class FMalloc;
+
+typedef uint32_t HeapId;
+
+////////////////////////////////////////////////////////////////////////////////
+struct FNtDllFunction
+{
+ FARPROC Addr;
+
+ FNtDllFunction(const char* Name)
+ {
+ HMODULE NtDll = LoadLibraryW(L"ntdll.dll");
+ ZEN_ASSERT(NtDll);
+ Addr = GetProcAddress(NtDll, Name);
+ }
+
+ template<typename... ArgTypes>
+ unsigned int operator()(ArgTypes... Args)
+ {
+ typedef unsigned int(NTAPI * Prototype)(ArgTypes...);
+ return (Prototype((void*)Addr))(Args...);
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////7777
+class FModuleTrace
+{
+public:
+ typedef void (*SubscribeFunc)(bool, void*, const char16_t*);
+
+ FModuleTrace(FMalloc* InMalloc);
+ ~FModuleTrace();
+ static FModuleTrace* Get();
+ void Initialize();
+ void Subscribe(SubscribeFunc Function);
+
+private:
+ void OnDllLoaded(const UNICODE_STRING& Name, uintptr_t Base);
+ void OnDllUnloaded(uintptr_t Base);
+ void OnDllNotification(unsigned int Reason, const void* DataPtr);
+ static FModuleTrace* Instance;
+ SubscribeFunc Subscribers[64];
+ int SubscriberCount = 0;
+ void* CallbackCookie = nullptr;
+ HeapId ProgramHeapId = 0;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+FModuleTrace* FModuleTrace::Instance = nullptr;
+
+////////////////////////////////////////////////////////////////////////////////
+FModuleTrace::FModuleTrace(FMalloc* InMalloc)
+{
+ ZEN_UNUSED(InMalloc);
+ Instance = this;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FModuleTrace::~FModuleTrace()
+{
+ if (CallbackCookie)
+ {
+ FNtDllFunction UnregisterFunc("LdrUnregisterDllNotification");
+ UnregisterFunc(CallbackCookie);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FModuleTrace*
+FModuleTrace::Get()
+{
+ return Instance;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FModuleTrace::Initialize()
+{
+ using namespace UE::Trace;
+
+ ProgramHeapId = MemoryTrace_HeapSpec(SystemMemory, u"Module", EMemoryTraceHeapFlags::None);
+
+ UE_TRACE_LOG(Diagnostics, ModuleInit, ModuleChannel, sizeof(char) * 3)
+ << ModuleInit.SymbolFormat("pdb", 3) << ModuleInit.ModuleBaseShift(uint8(0));
+
+ // Register for DLL load/unload notifications.
+ auto Thunk = [](ULONG Reason, const void* Data, void* Context) {
+ auto* Self = (FModuleTrace*)Context;
+ Self->OnDllNotification(Reason, Data);
+ };
+
+ typedef void(CALLBACK * ThunkType)(ULONG, const void*, void*);
+ auto ThunkImpl = ThunkType(Thunk);
+
+ FNtDllFunction RegisterFunc("LdrRegisterDllNotification");
+ RegisterFunc(0, ThunkImpl, this, &CallbackCookie);
+
+ // Enumerate already loaded modules.
+ const TEB* ThreadEnvBlock = NtCurrentTeb();
+ const PEB* ProcessEnvBlock = ThreadEnvBlock->ProcessEnvironmentBlock;
+ const LIST_ENTRY* ModuleIter = ProcessEnvBlock->Ldr->InMemoryOrderModuleList.Flink;
+ const LIST_ENTRY* ModuleIterEnd = ModuleIter->Blink;
+ do
+ {
+ const auto& ModuleData = *(LDR_DATA_TABLE_ENTRY*)(ModuleIter - 1);
+ if (ModuleData.DllBase == 0)
+ {
+ break;
+ }
+
+ OnDllLoaded(ModuleData.FullDllName, UPTRINT(ModuleData.DllBase));
+ ModuleIter = ModuleIter->Flink;
+ } while (ModuleIter != ModuleIterEnd);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FModuleTrace::Subscribe(SubscribeFunc Function)
+{
+ ZEN_ASSERT(SubscriberCount < ZEN_ARRAY_COUNT(Subscribers));
+ Subscribers[SubscriberCount++] = Function;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FModuleTrace::OnDllNotification(unsigned int Reason, const void* DataPtr)
+{
+ enum
+ {
+ LDR_DLL_NOTIFICATION_REASON_LOADED = 1,
+ LDR_DLL_NOTIFICATION_REASON_UNLOADED = 2,
+ };
+
+ struct FNotificationData
+ {
+ uint32_t Flags;
+ const UNICODE_STRING& FullPath;
+ const UNICODE_STRING& BaseName;
+ uintptr_t Base;
+ };
+ const auto& Data = *(FNotificationData*)DataPtr;
+
+ switch (Reason)
+ {
+ case LDR_DLL_NOTIFICATION_REASON_LOADED:
+ OnDllLoaded(Data.FullPath, Data.Base);
+ break;
+ case LDR_DLL_NOTIFICATION_REASON_UNLOADED:
+ OnDllUnloaded(Data.Base);
+ break;
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FModuleTrace::OnDllLoaded(const UNICODE_STRING& Name, UPTRINT Base)
+{
+ const auto* DosHeader = (IMAGE_DOS_HEADER*)Base;
+ const auto* NtHeaders = (IMAGE_NT_HEADERS*)(Base + DosHeader->e_lfanew);
+ const IMAGE_OPTIONAL_HEADER& OptionalHeader = NtHeaders->OptionalHeader;
+ uint8_t ImageId[20];
+
+ // Find the guid and age of the binary, used to match debug files
+ const IMAGE_DATA_DIRECTORY& DebugInfoEntry = OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG];
+ const auto* DebugEntries = (IMAGE_DEBUG_DIRECTORY*)(Base + DebugInfoEntry.VirtualAddress);
+ for (uint32_t i = 0, n = DebugInfoEntry.Size / sizeof(DebugEntries[0]); i < n; ++i)
+ {
+ const IMAGE_DEBUG_DIRECTORY& Entry = DebugEntries[i];
+ if (Entry.Type == IMAGE_DEBUG_TYPE_CODEVIEW)
+ {
+ struct FCodeView7
+ {
+ uint32_t Signature;
+ uint32_t Guid[4];
+ uint32_t Age;
+ };
+
+ if (Entry.SizeOfData < sizeof(FCodeView7))
+ {
+ continue;
+ }
+
+ const auto* CodeView7 = (FCodeView7*)(Base + Entry.AddressOfRawData);
+ if (CodeView7->Signature != 'SDSR')
+ {
+ continue;
+ }
+
+ memcpy(ImageId, (uint8_t*)&CodeView7->Guid, sizeof(uint32_t) * 4);
+ memcpy(&ImageId[16], (uint8_t*)&CodeView7->Age, sizeof(uint32_t));
+ break;
+ }
+ }
+
+ // Note: UNICODE_STRING.Length is the size in bytes of the string buffer.
+ UE_TRACE_LOG(Diagnostics, ModuleLoad, ModuleChannel, uint32_t(Name.Length + sizeof(ImageId)))
+ << ModuleLoad.Name((const char16_t*)Name.Buffer, Name.Length / 2) << ModuleLoad.Base(uint64_t(Base))
+ << ModuleLoad.Size(OptionalHeader.SizeOfImage) << ModuleLoad.ImageId(ImageId, uint32_t(sizeof(ImageId)));
+
+# if UE_MEMORY_TRACE_ENABLED
+ {
+ UE_MEMSCOPE(ELLMTag::ProgramSize);
+ MemoryTrace_Alloc(Base, OptionalHeader.SizeOfImage, 4 * 1024, EMemoryTraceRootHeap::SystemMemory);
+ MemoryTrace_MarkAllocAsHeap(Base, ProgramHeapId);
+ MemoryTrace_Alloc(Base, OptionalHeader.SizeOfImage, 4 * 1024, EMemoryTraceRootHeap::SystemMemory);
+ }
+# endif // UE_MEMORY_TRACE_ENABLED
+
+ for (int i = 0; i < SubscriberCount; ++i)
+ {
+ Subscribers[i](true, (void*)Base, (const char16_t*)Name.Buffer);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FModuleTrace::OnDllUnloaded(UPTRINT Base)
+{
+# if UE_MEMORY_TRACE_ENABLED
+ MemoryTrace_Free(Base, EMemoryTraceRootHeap::SystemMemory);
+ MemoryTrace_UnmarkAllocAsHeap(Base, ProgramHeapId);
+ MemoryTrace_Free(Base, EMemoryTraceRootHeap::SystemMemory);
+# endif // UE_MEMORY_TRACE_ENABLED
+
+ UE_TRACE_LOG(Diagnostics, ModuleUnload, ModuleChannel) << ModuleUnload.Base(uint64(Base));
+
+ for (int i = 0; i < SubscriberCount; ++i)
+ {
+ Subscribers[i](false, (void*)Base, nullptr);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+Modules_Create(FMalloc* Malloc)
+{
+ if (FModuleTrace::Get() != nullptr)
+ {
+ return;
+ }
+
+ static FModuleTrace Instance(Malloc);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+Modules_Initialize()
+{
+ if (FModuleTrace* Instance = FModuleTrace::Get())
+ {
+ Instance->Initialize();
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+Modules_Subscribe(void (*Function)(bool, void*, const char16_t*))
+{
+ if (FModuleTrace* Instance = FModuleTrace::Get())
+ {
+ Instance->Subscribe(Function);
+ }
+}
+
+} // namespace zen
+
+#endif // PLATFORM_SUPPORTS_WIN32_MEMORY_TRACE
diff --git a/src/zencore/memtrack/moduletrace.h b/src/zencore/memtrack/moduletrace.h
new file mode 100644
index 000000000..5e7374faa
--- /dev/null
+++ b/src/zencore/memtrack/moduletrace.h
@@ -0,0 +1,11 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+namespace zen {
+
+void Modules_Create(class FMalloc*);
+void Modules_Subscribe(void (*)(bool, void*, const char16_t*));
+void Modules_Initialize();
+
+} // namespace zen
diff --git a/src/zencore/memtrack/moduletrace_events.cpp b/src/zencore/memtrack/moduletrace_events.cpp
new file mode 100644
index 000000000..9c6a9b648
--- /dev/null
+++ b/src/zencore/memtrack/moduletrace_events.cpp
@@ -0,0 +1,16 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zencore/trace.h>
+
+#include "moduletrace_events.h"
+
+namespace zen {
+
+////////////////////////////////////////////////////////////////////////////////
+UE_TRACE_CHANNEL_DEFINE(ModuleChannel, "Module information needed for symbols resolution", true)
+
+UE_TRACE_EVENT_DEFINE(Diagnostics, ModuleInit)
+UE_TRACE_EVENT_DEFINE(Diagnostics, ModuleLoad)
+UE_TRACE_EVENT_DEFINE(Diagnostics, ModuleUnload)
+
+} // namespace zen
diff --git a/src/zencore/memtrack/moduletrace_events.h b/src/zencore/memtrack/moduletrace_events.h
new file mode 100644
index 000000000..1bda42fe8
--- /dev/null
+++ b/src/zencore/memtrack/moduletrace_events.h
@@ -0,0 +1,27 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+#pragma once
+
+#include <zencore/trace.h>
+
+namespace zen {
+
+////////////////////////////////////////////////////////////////////////////////
+UE_TRACE_CHANNEL_EXTERN(ModuleChannel)
+
+UE_TRACE_EVENT_BEGIN_EXTERN(Diagnostics, ModuleInit, NoSync | Important)
+ UE_TRACE_EVENT_FIELD(UE::Trace::AnsiString, SymbolFormat)
+ UE_TRACE_EVENT_FIELD(uint8, ModuleBaseShift)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN_EXTERN(Diagnostics, ModuleLoad, NoSync | Important)
+ UE_TRACE_EVENT_FIELD(UE::Trace::WideString, Name)
+ UE_TRACE_EVENT_FIELD(uint64, Base)
+ UE_TRACE_EVENT_FIELD(uint32, Size)
+ UE_TRACE_EVENT_FIELD(uint8[], ImageId) // Platform specific id for this image, used to match debug files were available
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN_EXTERN(Diagnostics, ModuleUnload, NoSync | Important)
+ UE_TRACE_EVENT_FIELD(uint64, Base)
+UE_TRACE_EVENT_END()
+
+} // namespace zen
diff --git a/src/zencore/memtrack/platformtls.h b/src/zencore/memtrack/platformtls.h
new file mode 100644
index 000000000..f134e68a8
--- /dev/null
+++ b/src/zencore/memtrack/platformtls.h
@@ -0,0 +1,107 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zenbase/zenbase.h>
+
+/**
+ * It should be possible to provide a generic implementation as long as a threadID is provided. We don't do that yet.
+ */
+struct FGenericPlatformTLS
+{
+ static const uint32_t InvalidTlsSlot = 0xFFFFFFFF;
+
+ /**
+ * Return false if this is an invalid TLS slot
+ * @param SlotIndex the TLS index to check
+ * @return true if this looks like a valid slot
+ */
+ static bool IsValidTlsSlot(uint32_t SlotIndex) { return SlotIndex != InvalidTlsSlot; }
+};
+
+#if ZEN_PLATFORM_WINDOWS
+
+# include <zencore/windows.h>
+
+class FWindowsPlatformTLS : public FGenericPlatformTLS
+{
+public:
+ static uint32_t AllocTlsSlot() { return ::TlsAlloc(); }
+
+ static void FreeTlsSlot(uint32_t SlotIndex) { ::TlsFree(SlotIndex); }
+
+ static void SetTlsValue(uint32_t SlotIndex, void* Value) { ::TlsSetValue(SlotIndex, Value); }
+
+ /**
+ * Reads the value stored at the specified TLS slot
+ *
+ * @return the value stored in the slot
+ */
+ static void* GetTlsValue(uint32_t SlotIndex) { return ::TlsGetValue(SlotIndex); }
+
+ /**
+ * Return false if this is an invalid TLS slot
+ * @param SlotIndex the TLS index to check
+ * @return true if this looks like a valid slot
+ */
+ static bool IsValidTlsSlot(uint32_t SlotIndex) { return SlotIndex != InvalidTlsSlot; }
+};
+
+typedef FWindowsPlatformTLS FPlatformTLS;
+
+#elif ZEN_PLATFORM_MAC
+
+# include <pthread.h
+
+/**
+ * Apple implementation of the TLS OS functions
+ **/
+struct FApplePlatformTLS : public FGenericPlatformTLS
+{
+ /**
+ * Returns the currently executing thread's id
+ */
+ static uint32_t GetCurrentThreadId(void) { return (uint32_t)pthread_mach_thread_np(pthread_self()); }
+
+ /**
+ * Allocates a thread local store slot
+ */
+ static uint32_t AllocTlsSlot(void)
+ {
+ // allocate a per-thread mem slot
+ pthread_key_t SlotKey = 0;
+ if (pthread_key_create(&SlotKey, NULL) != 0)
+ {
+ SlotKey = InvalidTlsSlot; // matches the Windows TlsAlloc() retval.
+ }
+ return SlotKey;
+ }
+
+ /**
+ * Sets a value in the specified TLS slot
+ *
+ * @param SlotIndex the TLS index to store it in
+ * @param Value the value to store in the slot
+ */
+ static void SetTlsValue(uint32_t SlotIndex, void* Value) { pthread_setspecific((pthread_key_t)SlotIndex, Value); }
+
+ /**
+ * Reads the value stored at the specified TLS slot
+ *
+ * @return the value stored in the slot
+ */
+ static void* GetTlsValue(uint32_t SlotIndex) { return pthread_getspecific((pthread_key_t)SlotIndex); }
+
+ /**
+ * Frees a previously allocated TLS slot
+ *
+ * @param SlotIndex the TLS index to store it in
+ */
+ static void FreeTlsSlot(uint32_t SlotIndex) { pthread_key_delete((pthread_key_t)SlotIndex); }
+};
+
+typedef FApplePlatformTLS FPlatformTLS;
+
+#else
+# error Platform not yet supported
+#endif
diff --git a/src/zencore/memtrack/tagtrace.cpp b/src/zencore/memtrack/tagtrace.cpp
new file mode 100644
index 000000000..15ba78ae4
--- /dev/null
+++ b/src/zencore/memtrack/tagtrace.cpp
@@ -0,0 +1,237 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zencore/memory/fmalloc.h>
+#include <zencore/memory/llm.h>
+#include <zencore/memory/tagtrace.h>
+
+#include "growonlylockfreehash.h"
+
+#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED
+
+# include <zencore/string.h>
+
+namespace zen {
+////////////////////////////////////////////////////////////////////////////////
+
+UE_TRACE_CHANNEL_EXTERN(MemAllocChannel);
+
+UE_TRACE_EVENT_BEGIN(Memory, TagSpec, Important | NoSync)
+ UE_TRACE_EVENT_FIELD(int32, Tag)
+ UE_TRACE_EVENT_FIELD(int32, Parent)
+ UE_TRACE_EVENT_FIELD(UE::Trace::AnsiString, Display)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, MemoryScope, NoSync)
+ UE_TRACE_EVENT_FIELD(int32, Tag)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, MemoryScopePtr, NoSync)
+ UE_TRACE_EVENT_FIELD(uint64, Ptr)
+UE_TRACE_EVENT_END()
+
+////////////////////////////////////////////////////////////////////////////////
+// Per thread active tag, i.e. the top level FMemScope
+thread_local int32 GActiveTag;
+
+////////////////////////////////////////////////////////////////////////////////
+FMemScope::FMemScope()
+{
+}
+
+FMemScope::FMemScope(int32_t InTag, bool bShouldActivate /*= true*/)
+{
+ if (UE_TRACE_CHANNELEXPR_IS_ENABLED(MemAllocChannel) & bShouldActivate)
+ {
+ ActivateScope(InTag);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FMemScope::FMemScope(ELLMTag InTag, bool bShouldActivate /*= true*/)
+{
+ if (UE_TRACE_CHANNELEXPR_IS_ENABLED(MemAllocChannel) & bShouldActivate)
+ {
+ ActivateScope(static_cast<int32>(InTag));
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FMemScope::ActivateScope(int32_t InTag)
+{
+ if (auto LogScope = FMemoryMemoryScopeFields::LogScopeType::ScopedEnter<FMemoryMemoryScopeFields>())
+ {
+ if (const auto& __restrict MemoryScope = *(FMemoryMemoryScopeFields*)(&LogScope))
+ {
+ Inner.SetActive();
+ LogScope += LogScope << MemoryScope.Tag(InTag);
+ PrevTag = GActiveTag;
+ GActiveTag = InTag;
+ }
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FMemScope::~FMemScope()
+{
+ if (Inner.bActive)
+ {
+ GActiveTag = PrevTag;
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FMemScopePtr::FMemScopePtr(uint64_t InPtr)
+{
+ if (InPtr != 0 && TRACE_PRIVATE_CHANNELEXPR_IS_ENABLED(MemAllocChannel))
+ {
+ if (auto LogScope = FMemoryMemoryScopePtrFields::LogScopeType::ScopedEnter<FMemoryMemoryScopePtrFields>())
+ {
+ if (const auto& __restrict MemoryScope = *(FMemoryMemoryScopePtrFields*)(&LogScope))
+ {
+ Inner.SetActive(), LogScope += LogScope << MemoryScope.Ptr(InPtr);
+ }
+ }
+ }
+}
+
+/////////////////////////////////////////////////////////////////////////////////
+FMemScopePtr::~FMemScopePtr()
+{
+}
+
+/////////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Utility class that manages tracing the specification of unique LLM tags
+ * and custom name based tags.
+ */
+class FTagTrace
+{
+public:
+ FTagTrace(FMalloc* InMalloc);
+ void AnnounceGenericTags() const;
+ void AnnounceSpecialTags() const;
+ int32 AnnounceCustomTag(int32 Tag, int32 ParentTag, const ANSICHAR* Display) const;
+
+private:
+ struct FTagNameSetEntry
+ {
+ std::atomic_int32_t Data;
+
+ int32_t GetKey() const { return Data.load(std::memory_order_relaxed); }
+ bool GetValue() const { return true; }
+ bool IsEmpty() const { return Data.load(std::memory_order_relaxed) == 0; } // NAME_None is treated as empty
+ void SetKeyValue(int32_t Key, bool Value)
+ {
+ ZEN_UNUSED(Value);
+ Data.store(Key, std::memory_order_relaxed);
+ }
+ static uint32_t KeyHash(int32_t Key) { return static_cast<uint32>(Key); }
+ static void ClearEntries(FTagNameSetEntry* Entries, int32_t EntryCount)
+ {
+ memset(Entries, 0, EntryCount * sizeof(FTagNameSetEntry));
+ }
+ };
+ typedef TGrowOnlyLockFreeHash<FTagNameSetEntry, int32_t, bool> FTagNameSet;
+
+ FTagNameSet AnnouncedNames;
+ static FMalloc* Malloc;
+};
+
+FMalloc* FTagTrace::Malloc = nullptr;
+static FTagTrace* GTagTrace = nullptr;
+
+////////////////////////////////////////////////////////////////////////////////
+FTagTrace::FTagTrace(FMalloc* InMalloc) : AnnouncedNames(InMalloc)
+{
+ Malloc = InMalloc;
+ AnnouncedNames.Reserve(1024);
+ AnnounceGenericTags();
+ AnnounceSpecialTags();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FTagTrace::AnnounceGenericTags() const
+{
+# define TRACE_TAG_SPEC(Enum, Str, ParentTag) \
+ { \
+ const uint32_t DisplayLen = (uint32_t)StringLength(Str); \
+ UE_TRACE_LOG(Memory, TagSpec, MemAllocChannel, DisplayLen * sizeof(ANSICHAR)) \
+ << TagSpec.Tag((int32_t)ELLMTag::Enum) << TagSpec.Parent((int32_t)ParentTag) << TagSpec.Display(Str, DisplayLen); \
+ }
+ LLM_ENUM_GENERIC_TAGS(TRACE_TAG_SPEC);
+# undef TRACE_TAG_SPEC
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+void
+FTagTrace::AnnounceSpecialTags() const
+{
+ auto EmitTag = [](const char16_t* DisplayString, int32_t Tag, int32_t ParentTag) {
+ const uint32_t DisplayLen = (uint32_t)StringLength(DisplayString);
+ UE_TRACE_LOG(Memory, TagSpec, MemAllocChannel, DisplayLen * sizeof(ANSICHAR))
+ << TagSpec.Tag(Tag) << TagSpec.Parent(ParentTag) << TagSpec.Display(DisplayString, DisplayLen);
+ };
+
+ EmitTag(u"Trace", TRACE_TAG, -1);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+int32_t
+FTagTrace::AnnounceCustomTag(int32_t Tag, int32_t ParentTag, const ANSICHAR* Display) const
+{
+ const uint32_t DisplayLen = (uint32_t)StringLength(Display);
+ UE_TRACE_LOG(Memory, TagSpec, MemAllocChannel, DisplayLen * sizeof(ANSICHAR))
+ << TagSpec.Tag(Tag) << TagSpec.Parent(ParentTag) << TagSpec.Display(Display, DisplayLen);
+ return Tag;
+}
+
+} // namespace zen
+
+#endif // UE_MEMORY_TAGS_TRACE_ENABLED
+
+namespace zen {
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_InitTags(FMalloc* InMalloc)
+{
+#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED
+ GTagTrace = (FTagTrace*)InMalloc->Malloc(sizeof(FTagTrace), alignof(FTagTrace));
+ new (GTagTrace) FTagTrace(InMalloc);
+#else
+ ZEN_UNUSED(InMalloc);
+#endif
+}
+
+////////////////////////////////////////////////////////////////////////////////
+int32_t
+MemoryTrace_AnnounceCustomTag(int32_t Tag, int32_t ParentTag, const char* Display)
+{
+#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED
+ // todo: How do we check if tag trace is active?
+ if (GTagTrace)
+ {
+ return GTagTrace->AnnounceCustomTag(Tag, ParentTag, Display);
+ }
+#else
+ ZEN_UNUSED(Tag, ParentTag, Display);
+#endif
+ return -1;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+int32_t
+MemoryTrace_GetActiveTag()
+{
+#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED
+ return GActiveTag;
+#else
+ return -1;
+#endif
+}
+
+} // namespace zen
diff --git a/src/zencore/memtrack/tracemalloc.h b/src/zencore/memtrack/tracemalloc.h
new file mode 100644
index 000000000..54606ac45
--- /dev/null
+++ b/src/zencore/memtrack/tracemalloc.h
@@ -0,0 +1,24 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+#pragma once
+
+#include <zencore/memory/fmalloc.h>
+#include <zencore/memory/memorytrace.h>
+
+namespace zen {
+
+class FTraceMalloc : public FMalloc
+{
+public:
+ FTraceMalloc(FMalloc* InMalloc);
+ virtual ~FTraceMalloc();
+
+ virtual void* Malloc(SIZE_T Count, uint32 Alignment) override;
+ virtual void* Realloc(void* Original, SIZE_T Count, uint32 Alignment) override;
+ virtual void Free(void* Original) override;
+
+ virtual void OnMallocInitialized() override { WrappedMalloc->OnMallocInitialized(); }
+
+ FMalloc* WrappedMalloc;
+};
+
+} // namespace zen
diff --git a/src/zencore/memtrack/vatrace.cpp b/src/zencore/memtrack/vatrace.cpp
new file mode 100644
index 000000000..4dea27f1b
--- /dev/null
+++ b/src/zencore/memtrack/vatrace.cpp
@@ -0,0 +1,361 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include "vatrace.h"
+
+#if PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS
+
+# include <zencore/memory/memorytrace.h>
+
+# if (NTDDI_VERSION >= NTDDI_WIN10_RS4)
+# pragma comment(lib, "mincore.lib") // VirtualAlloc2
+# endif
+
+namespace zen {
+
+////////////////////////////////////////////////////////////////////////////////
+class FTextSectionEditor
+{
+public:
+ ~FTextSectionEditor();
+ template<typename T>
+ T* Hook(T* Target, T* HookFunction);
+
+private:
+ struct FTrampolineBlock
+ {
+ FTrampolineBlock* Next;
+ uint32_t Size;
+ uint32_t Used;
+ };
+
+ static void* GetActualAddress(void* Function);
+ FTrampolineBlock* AllocateTrampolineBlock(void* Reference);
+ uint8_t* AllocateTrampoline(void* Reference, unsigned int Size);
+ void* HookImpl(void* Target, void* HookFunction);
+ FTrampolineBlock* HeadBlock = nullptr;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+FTextSectionEditor::~FTextSectionEditor()
+{
+ for (FTrampolineBlock* Block = HeadBlock; Block != nullptr; Block = Block->Next)
+ {
+ DWORD Unused;
+ VirtualProtect(Block, Block->Size, PAGE_EXECUTE_READ, &Unused);
+ }
+
+ FlushInstructionCache(GetCurrentProcess(), nullptr, 0);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void*
+FTextSectionEditor::GetActualAddress(void* Function)
+{
+ // Follow a jmp instruction (0xff/4 only for now) at function and returns
+ // where it would jmp to.
+
+ uint8_t* Addr = (uint8_t*)Function;
+ int Offset = unsigned(Addr[0] & 0xf0) == 0x40; // REX prefix
+ if (Addr[Offset + 0] == 0xff && Addr[Offset + 1] == 0x25)
+ {
+ Addr += Offset;
+ Addr = *(uint8_t**)(Addr + 6 + *(uint32_t*)(Addr + 2));
+ }
+ return Addr;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FTextSectionEditor::FTrampolineBlock*
+FTextSectionEditor::AllocateTrampolineBlock(void* Reference)
+{
+ static const size_t BlockSize = 0x10000; // 64KB is Windows' canonical granularity
+
+ // Find the start of the main allocation that mapped Reference
+ MEMORY_BASIC_INFORMATION MemInfo;
+ VirtualQuery(Reference, &MemInfo, sizeof(MemInfo));
+ auto* Ptr = (uint8_t*)(MemInfo.AllocationBase);
+
+ // Step backwards one block at a time and try and allocate that address
+ while (true)
+ {
+ Ptr -= BlockSize;
+ if (VirtualAlloc(Ptr, BlockSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE) != nullptr)
+ {
+ break;
+ }
+
+ uintptr_t Distance = uintptr_t(Reference) - uintptr_t(Ptr);
+ if (Distance >= 1ull << 31)
+ {
+ ZEN_ASSERT(!"Failed to allocate trampoline blocks for memory tracing hooks");
+ }
+ }
+
+ auto* Block = (FTrampolineBlock*)Ptr;
+ Block->Next = HeadBlock;
+ Block->Size = BlockSize;
+ Block->Used = sizeof(FTrampolineBlock);
+ HeadBlock = Block;
+
+ return Block;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+uint8_t*
+FTextSectionEditor::AllocateTrampoline(void* Reference, unsigned int Size)
+{
+ // Try and find a block that's within 2^31 bytes before Reference
+ FTrampolineBlock* Block;
+ for (Block = HeadBlock; Block != nullptr; Block = Block->Next)
+ {
+ uintptr_t Distance = uintptr_t(Reference) - uintptr_t(Block);
+ if (Distance < 1ull << 31)
+ {
+ break;
+ }
+ }
+
+ // If we didn't find a block then we need to allocate a new one.
+ if (Block == nullptr)
+ {
+ Block = AllocateTrampolineBlock(Reference);
+ }
+
+ // Allocate space for the trampoline.
+ uint32_t NextUsed = Block->Used + Size;
+ if (NextUsed > Block->Size)
+ {
+ // Block is full. We could allocate a new block here but as it is not
+ // expected that so many hooks will be made this path shouldn't happen
+ ZEN_ASSERT(!"Unable to allocate memory for memory tracing's hooks");
+ }
+
+ uint8_t* Out = (uint8_t*)Block + Block->Used;
+ Block->Used = NextUsed;
+
+ return Out;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+template<typename T>
+T*
+FTextSectionEditor::Hook(T* Target, T* HookFunction)
+{
+ return (T*)HookImpl((void*)Target, (void*)HookFunction);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void*
+FTextSectionEditor::HookImpl(void* Target, void* HookFunction)
+{
+ Target = GetActualAddress(Target);
+
+ // Very rudimentary x86_64 instruction length decoding that only supports op
+ // code ranges (0x80,0x8b) and (0x50,0x5f). Enough for simple prologues
+ uint8_t* __restrict Start = (uint8_t*)Target;
+ const uint8_t* Read = Start;
+ do
+ {
+ Read += (Read[0] & 0xf0) == 0x40; // REX prefix
+ uint8_t Inst = *Read++;
+ if (unsigned(Inst - 0x80) < 0x0cu)
+ {
+ uint8_t ModRm = *Read++;
+ Read += ((ModRm & 0300) < 0300) & ((ModRm & 0007) == 0004); // SIB
+ switch (ModRm & 0300) // Disp[8|32]
+ {
+ case 0100:
+ Read += 1;
+ break;
+ case 0200:
+ Read += 5;
+ break;
+ }
+ Read += (Inst == 0x83);
+ }
+ else if (unsigned(Inst - 0x50) >= 0x10u)
+ {
+ ZEN_ASSERT(!"Unknown instruction");
+ }
+ } while (Read - Start < 6);
+
+ static const int TrampolineSize = 24;
+ int PatchSize = int(Read - Start);
+ uint8_t* TrampolinePtr = AllocateTrampoline(Start, PatchSize + TrampolineSize);
+
+ // Write the trampoline
+ *(void**)TrampolinePtr = HookFunction;
+
+ uint8_t* PatchJmp = TrampolinePtr + sizeof(void*);
+ memcpy(PatchJmp, Start, PatchSize);
+
+ PatchJmp += PatchSize;
+ *PatchJmp = 0xe9;
+ *(int32_t*)(PatchJmp + 1) = int32_t(intptr_t(Start + PatchSize) - intptr_t(PatchJmp)) - 5;
+
+ // Need to make the text section writeable
+ DWORD ProtPrev;
+ uintptr_t ProtBase = uintptr_t(Target) & ~0x0fff; // 0x0fff is mask of VM page size
+ size_t ProtSize = ((ProtBase + 16 + 0x1000) & ~0x0fff) - ProtBase; // 16 is enough for one x86 instruction
+ VirtualProtect((void*)ProtBase, ProtSize, PAGE_EXECUTE_READWRITE, &ProtPrev);
+
+ // Patch function to jmp to the hook
+ uint16_t* HookJmp = (uint16_t*)Target;
+ HookJmp[0] = 0x25ff;
+ *(int32_t*)(HookJmp + 1) = int32_t(intptr_t(TrampolinePtr) - intptr_t(HookJmp + 3));
+
+ // Put the protection back the way it was
+ VirtualProtect((void*)ProtBase, ProtSize, ProtPrev, &ProtPrev);
+
+ return PatchJmp - PatchSize;
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+bool FVirtualWinApiHooks::bLight;
+LPVOID(WINAPI* FVirtualWinApiHooks::VmAllocOrig)(LPVOID, SIZE_T, DWORD, DWORD);
+LPVOID(WINAPI* FVirtualWinApiHooks::VmAllocExOrig)(HANDLE, LPVOID, SIZE_T, DWORD, DWORD);
+# if (NTDDI_VERSION >= NTDDI_WIN10_RS4)
+PVOID(WINAPI* FVirtualWinApiHooks::VmAlloc2Orig)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MEM_EXTENDED_PARAMETER*, ULONG);
+# else
+LPVOID(WINAPI* FVirtualWinApiHooks::VmAlloc2Orig)(HANDLE, LPVOID, SIZE_T, ULONG, ULONG, /*MEM_EXTENDED_PARAMETER* */ void*, ULONG);
+# endif
+BOOL(WINAPI* FVirtualWinApiHooks::VmFreeOrig)(LPVOID, SIZE_T, DWORD);
+BOOL(WINAPI* FVirtualWinApiHooks::VmFreeExOrig)(HANDLE, LPVOID, SIZE_T, DWORD);
+
+void
+FVirtualWinApiHooks::Initialize(bool bInLight)
+{
+ bLight = bInLight;
+
+ FTextSectionEditor Editor;
+
+ // Note that hooking alloc functions is done last as applying the hook can
+ // allocate some memory pages.
+
+ VmFreeOrig = Editor.Hook(VirtualFree, &FVirtualWinApiHooks::VmFree);
+ VmFreeExOrig = Editor.Hook(VirtualFreeEx, &FVirtualWinApiHooks::VmFreeEx);
+
+# if ZEN_PLATFORM_WINDOWS
+# if (NTDDI_VERSION >= NTDDI_WIN10_RS4)
+ {
+ VmAlloc2Orig = Editor.Hook(VirtualAlloc2, &FVirtualWinApiHooks::VmAlloc2);
+ }
+# else // NTDDI_VERSION
+ {
+ VmAlloc2Orig = nullptr;
+ HINSTANCE DllInstance;
+ DllInstance = LoadLibrary(TEXT("kernelbase.dll"));
+ if (DllInstance != NULL)
+ {
+# pragma warning(push)
+# pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'FVirtualWinApiHooks::FnVirtualAlloc2'
+ VmAlloc2Orig = (FnVirtualAlloc2)GetProcAddress(DllInstance, "VirtualAlloc2");
+# pragma warning(pop)
+ FreeLibrary(DllInstance);
+ }
+ if (VmAlloc2Orig)
+ {
+ VmAlloc2Orig = Editor.Hook(VmAlloc2Orig, &FVirtualWinApiHooks::VmAlloc2);
+ }
+ }
+# endif // NTDDI_VERSION
+# endif // PLATFORM_WINDOWS
+
+ VmAllocExOrig = Editor.Hook(VirtualAllocEx, &FVirtualWinApiHooks::VmAllocEx);
+ VmAllocOrig = Editor.Hook(VirtualAlloc, &FVirtualWinApiHooks::VmAlloc);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+LPVOID WINAPI
+FVirtualWinApiHooks::VmAlloc(LPVOID Address, SIZE_T Size, DWORD Type, DWORD Protect)
+{
+ LPVOID Ret = VmAllocOrig(Address, Size, Type, Protect);
+
+ // Track any reserve for now. Going forward we need events to differentiate reserves/commits and
+ // corresponding information on frees.
+ if (Ret != nullptr && ((Type & MEM_RESERVE) || ((Type & MEM_COMMIT) && Address == nullptr)))
+ {
+ MemoryTrace_Alloc((uint64_t)Ret, Size, 0, EMemoryTraceRootHeap::SystemMemory);
+ MemoryTrace_MarkAllocAsHeap((uint64_t)Ret, EMemoryTraceRootHeap::SystemMemory);
+ }
+
+ return Ret;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+BOOL WINAPI
+FVirtualWinApiHooks::VmFree(LPVOID Address, SIZE_T Size, DWORD Type)
+{
+ if (Type & MEM_RELEASE)
+ {
+ MemoryTrace_UnmarkAllocAsHeap((uint64_t)Address, EMemoryTraceRootHeap::SystemMemory);
+ MemoryTrace_Free((uint64_t)Address, EMemoryTraceRootHeap::SystemMemory);
+ }
+
+ return VmFreeOrig(Address, Size, Type);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+LPVOID WINAPI
+FVirtualWinApiHooks::VmAllocEx(HANDLE Process, LPVOID Address, SIZE_T Size, DWORD Type, DWORD Protect)
+{
+ LPVOID Ret = VmAllocExOrig(Process, Address, Size, Type, Protect);
+
+ if (Process == GetCurrentProcess() && Ret != nullptr && ((Type & MEM_RESERVE) || ((Type & MEM_COMMIT) && Address == nullptr)))
+ {
+ MemoryTrace_Alloc((uint64_t)Ret, Size, 0, EMemoryTraceRootHeap::SystemMemory);
+ MemoryTrace_MarkAllocAsHeap((uint64_t)Ret, EMemoryTraceRootHeap::SystemMemory);
+ }
+
+ return Ret;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+BOOL WINAPI
+FVirtualWinApiHooks::VmFreeEx(HANDLE Process, LPVOID Address, SIZE_T Size, DWORD Type)
+{
+ if (Process == GetCurrentProcess() && (Type & MEM_RELEASE))
+ {
+ MemoryTrace_UnmarkAllocAsHeap((uint64_t)Address, EMemoryTraceRootHeap::SystemMemory);
+ MemoryTrace_Free((uint64_t)Address, EMemoryTraceRootHeap::SystemMemory);
+ }
+
+ return VmFreeExOrig(Process, Address, Size, Type);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+# if (NTDDI_VERSION >= NTDDI_WIN10_RS4)
+PVOID WINAPI
+FVirtualWinApiHooks::VmAlloc2(HANDLE Process,
+ PVOID BaseAddress,
+ SIZE_T Size,
+ ULONG Type,
+ ULONG PageProtection,
+ MEM_EXTENDED_PARAMETER* ExtendedParameters,
+ ULONG ParameterCount)
+# else
+LPVOID WINAPI
+FVirtualWinApiHooks::VmAlloc2(HANDLE Process,
+ LPVOID BaseAddress,
+ SIZE_T Size,
+ ULONG Type,
+ ULONG PageProtection,
+ /*MEM_EXTENDED_PARAMETER* */ void* ExtendedParameters,
+ ULONG ParameterCount)
+# endif
+{
+ LPVOID Ret = VmAlloc2Orig(Process, BaseAddress, Size, Type, PageProtection, ExtendedParameters, ParameterCount);
+
+ if (Process == GetCurrentProcess() && Ret != nullptr && ((Type & MEM_RESERVE) || ((Type & MEM_COMMIT) && BaseAddress == nullptr)))
+ {
+ MemoryTrace_Alloc((uint64_t)Ret, Size, 0, EMemoryTraceRootHeap::SystemMemory);
+ MemoryTrace_MarkAllocAsHeap((uint64_t)Ret, EMemoryTraceRootHeap::SystemMemory);
+ }
+
+ return Ret;
+}
+
+} // namespace zen
+
+#endif // PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS
diff --git a/src/zencore/memtrack/vatrace.h b/src/zencore/memtrack/vatrace.h
new file mode 100644
index 000000000..59cc7fe97
--- /dev/null
+++ b/src/zencore/memtrack/vatrace.h
@@ -0,0 +1,61 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zenbase/zenbase.h>
+
+#if ZEN_PLATFORM_WINDOWS && !defined(PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS)
+# define PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS 1
+#endif
+
+#ifndef PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS
+# define PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS 0
+#endif
+
+#if PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS
+# include <zencore/windows.h>
+
+namespace zen {
+
+class FVirtualWinApiHooks
+{
+public:
+ static void Initialize(bool bInLight);
+
+private:
+ FVirtualWinApiHooks();
+ static bool bLight;
+ static LPVOID WINAPI VmAlloc(LPVOID Address, SIZE_T Size, DWORD Type, DWORD Protect);
+ static LPVOID WINAPI VmAllocEx(HANDLE Process, LPVOID Address, SIZE_T Size, DWORD Type, DWORD Protect);
+# if (NTDDI_VERSION >= NTDDI_WIN10_RS4)
+ static PVOID WINAPI VmAlloc2(HANDLE Process,
+ PVOID BaseAddress,
+ SIZE_T Size,
+ ULONG AllocationType,
+ ULONG PageProtection,
+ MEM_EXTENDED_PARAMETER* ExtendedParameters,
+ ULONG ParameterCount);
+ static PVOID(WINAPI* VmAlloc2Orig)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MEM_EXTENDED_PARAMETER*, ULONG);
+ typedef PVOID(__stdcall* FnVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MEM_EXTENDED_PARAMETER*, ULONG);
+# else
+ static LPVOID WINAPI VmAlloc2(HANDLE Process,
+ LPVOID BaseAddress,
+ SIZE_T Size,
+ ULONG AllocationType,
+ ULONG PageProtection,
+ void* ExtendedParameters,
+ ULONG ParameterCount);
+ static LPVOID(WINAPI* VmAlloc2Orig)(HANDLE, LPVOID, SIZE_T, ULONG, ULONG, /*MEM_EXTENDED_PARAMETER* */ void*, ULONG);
+ typedef LPVOID(__stdcall* FnVirtualAlloc2)(HANDLE, LPVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG);
+# endif
+ static BOOL WINAPI VmFree(LPVOID Address, SIZE_T Size, DWORD Type);
+ static BOOL WINAPI VmFreeEx(HANDLE Process, LPVOID Address, SIZE_T Size, DWORD Type);
+ static LPVOID(WINAPI* VmAllocOrig)(LPVOID, SIZE_T, DWORD, DWORD);
+ static LPVOID(WINAPI* VmAllocExOrig)(HANDLE, LPVOID, SIZE_T, DWORD, DWORD);
+ static BOOL(WINAPI* VmFreeOrig)(LPVOID, SIZE_T, DWORD);
+ static BOOL(WINAPI* VmFreeExOrig)(HANDLE, LPVOID, SIZE_T, DWORD);
+};
+
+} // namespace zen
+
+#endif
diff --git a/src/zencore/sharedbuffer.cpp b/src/zencore/sharedbuffer.cpp
index 993ca40e6..78efb9d42 100644
--- a/src/zencore/sharedbuffer.cpp
+++ b/src/zencore/sharedbuffer.cpp
@@ -2,6 +2,7 @@
#include <zencore/except.h>
#include <zencore/fmtutils.h>
+#include <zencore/memory/memory.h>
#include <zencore/sharedbuffer.h>
#include <zencore/testing.h>
diff --git a/src/zencore/stats.cpp b/src/zencore/stats.cpp
index 7c1a9e086..6be16688b 100644
--- a/src/zencore/stats.cpp
+++ b/src/zencore/stats.cpp
@@ -3,9 +3,11 @@
#include "zencore/stats.h"
#include <zencore/compactbinarybuilder.h>
-#include "zencore/intmath.h"
-#include "zencore/thread.h"
-#include "zencore/timer.h"
+#include <zencore/intmath.h>
+#include <zencore/memory/llm.h>
+#include <zencore/memory/tagtrace.h>
+#include <zencore/thread.h>
+#include <zencore/timer.h>
#include <cmath>
#include <gsl/gsl-lite.hpp>
@@ -222,8 +224,10 @@ thread_local xoshiro256 ThreadLocalRng;
//////////////////////////////////////////////////////////////////////////
-UniformSample::UniformSample(uint32_t ReservoirSize) : m_Values(ReservoirSize)
+UniformSample::UniformSample(uint32_t ReservoirSize)
{
+ UE_MEMSCOPE(ELLMTag::Metrics);
+ m_Values = std::vector<std::atomic<int64_t>>(ReservoirSize);
}
UniformSample::~UniformSample()
@@ -273,6 +277,8 @@ UniformSample::Update(int64_t Value)
SampleSnapshot
UniformSample::Snapshot() const
{
+ UE_MEMSCOPE(ELLMTag::Metrics);
+
uint64_t ValuesSize = Size();
std::vector<double> Values(ValuesSize);
diff --git a/src/zencore/string.cpp b/src/zencore/string.cpp
index ad6ee78fc..263c49f7e 100644
--- a/src/zencore/string.cpp
+++ b/src/zencore/string.cpp
@@ -1,6 +1,7 @@
// Copyright Epic Games, Inc. All Rights Reserved.
#include <zencore/memory.h>
+#include <zencore/memory/memory.h>
#include <zencore/string.h>
#include <zencore/testing.h>
diff --git a/src/zencore/system.cpp b/src/zencore/system.cpp
index f51273e0d..f37bdf423 100644
--- a/src/zencore/system.cpp
+++ b/src/zencore/system.cpp
@@ -4,6 +4,7 @@
#include <zencore/compactbinarybuilder.h>
#include <zencore/except.h>
+#include <zencore/memory/memory.h>
#include <zencore/string.h>
#if ZEN_PLATFORM_WINDOWS
diff --git a/src/zencore/trace.cpp b/src/zencore/trace.cpp
index f7e4c4b68..ef7cbf596 100644
--- a/src/zencore/trace.cpp
+++ b/src/zencore/trace.cpp
@@ -4,10 +4,86 @@
# include <zencore/config.h>
# include <zencore/zencore.h>
+# include <zencore/commandline.h>
+# include <zencore/string.h>
+# include <zencore/logging.h>
# define TRACE_IMPLEMENT 1
# include <zencore/trace.h>
+# include <zencore/memory/memorytrace.h>
+
+namespace zen {
+
+void
+TraceConfigure()
+{
+ // Configure channels based on command line options
+
+ using namespace std::literals;
+
+ constexpr std::string_view TraceOption = "--trace="sv;
+
+ std::function<void(const std::string_view&)> ProcessChannelList;
+
+ auto ProcessTraceArg = [&](const std::string_view& Arg) {
+ if (Arg == "default"sv)
+ {
+ ProcessChannelList("cpu,log"sv);
+ }
+ else if (Arg == "memory"sv)
+ {
+ ProcessChannelList("memtag,memalloc,callstack,module"sv);
+ }
+ else if (Arg == "memory_light"sv)
+ {
+ ProcessChannelList("memtag,memalloc"sv);
+ }
+ else if (Arg == "memtag"sv)
+ {
+ // memtag actually traces to the memalloc channel
+ ProcessChannelList("memalloc"sv);
+ }
+ else
+ {
+ // Presume that the argument is a trace channel name
+
+ StringBuilder<128> AnsiChannel;
+ AnsiChannel << Arg;
+
+ const bool IsEnabled = trace::ToggleChannel(AnsiChannel.c_str(), true);
+
+ if (IsEnabled == false)
+ {
+ // Logging here could be iffy, but we might want some other feedback mechanism here
+ // to indicate to users that they're not getting what they might be expecting
+ }
+ }
+ };
+
+ ProcessChannelList = [&](const std::string_view& OptionArgs) { IterateCommaSeparatedValue(OptionArgs, ProcessTraceArg); };
+
+ bool TraceOptionPresent = false;
+
+ std::function<void(const std::string_view&)> ProcessArg = [&](const std::string_view& Arg) {
+ if (Arg.starts_with(TraceOption))
+ {
+ const std::string_view OptionArgs = Arg.substr(TraceOption.size());
+
+ TraceOptionPresent = true;
+
+ ProcessChannelList(OptionArgs);
+ }
+ };
+
+ IterateCommandlineArgs(ProcessArg);
+
+ if (!TraceOptionPresent)
+ {
+ ProcessTraceArg("default"sv);
+ }
+}
+
void
TraceInit(std::string_view ProgramName)
{
@@ -38,6 +114,16 @@ TraceInit(std::string_view ProgramName)
# endif
CommandLineString,
ZEN_CFG_VERSION_BUILD_STRING);
+
+ atexit([] {
+# if ZEN_WITH_MEMTRACK
+ zen::MemoryTrace_Shutdown();
+# endif
+ trace::Update();
+ TraceShutdown();
+ });
+
+ TraceConfigure();
}
void
@@ -70,13 +156,11 @@ TraceStart(std::string_view ProgramName, const char* HostOrPath, TraceType Type)
case TraceType::None:
break;
}
- trace::ToggleChannel("cpu", true);
}
bool
TraceStop()
{
- trace::ToggleChannel("cpu", false);
if (trace::Stop())
{
return true;
@@ -84,4 +168,6 @@ TraceStop()
return false;
}
+} // namespace zen
+
#endif // ZEN_WITH_TRACE
diff --git a/src/zencore/xmake.lua b/src/zencore/xmake.lua
index 5f2d95e16..21b47b484 100644
--- a/src/zencore/xmake.lua
+++ b/src/zencore/xmake.lua
@@ -3,6 +3,7 @@
target('zencore')
set_kind("static")
set_group("libs")
+ add_options("zentrace", "zenmimalloc", "zenrpmalloc")
add_headerfiles("**.h")
add_configfiles("include/zencore/config.h.in")
on_load(function (target)
@@ -12,10 +13,25 @@ target('zencore')
end)
set_configdir("include/zencore")
add_files("**.cpp")
+
+ if has_config("zenrpmalloc") then
+ set_languages("c17", "cxx20")
+ if is_os("windows") then
+ add_cflags("/experimental:c11atomics")
+ end
+ add_defines("RPMALLOC_FIRST_CLASS_HEAPS=1", "ENABLE_STATISTICS=1", "ENABLE_OVERRIDE=0")
+ add_files("$(projectdir)/thirdparty/rpmalloc/rpmalloc.c")
+ end
+
+ if has_config("zenmimalloc") then
+ add_packages("vcpkg::mimalloc")
+ end
+
add_includedirs("include", {public=true})
add_includedirs("$(projectdir)/thirdparty/utfcpp/source")
add_includedirs("$(projectdir)/thirdparty/Oodle/include")
add_includedirs("$(projectdir)/thirdparty/trace", {public=true})
+ add_includedirs("$(projectdir)/thirdparty/rpmalloc")
if is_os("windows") then
add_linkdirs("$(projectdir)/thirdparty/Oodle/lib/Win64")
add_links("oo2core_win64")
@@ -27,14 +43,12 @@ target('zencore')
add_linkdirs("$(projectdir)/thirdparty/Oodle/lib/Mac_x64")
add_links("oo2coremac64")
end
- add_options("zentrace")
add_deps("zenbase")
add_packages(
"vcpkg::blake3",
"vcpkg::json11",
"vcpkg::ryml",
"vcpkg::c4core",
- "vcpkg::mimalloc",
"vcpkg::openssl", -- required for crypto
"vcpkg::spdlog")
diff --git a/src/zenhttp-test/zenhttp-test.cpp b/src/zenhttp-test/zenhttp-test.cpp
index 440e85a9f..49db1ba54 100644
--- a/src/zenhttp-test/zenhttp-test.cpp
+++ b/src/zenhttp-test/zenhttp-test.cpp
@@ -2,14 +2,9 @@
#include <zencore/filesystem.h>
#include <zencore/logging.h>
+#include <zencore/memory/newdelete.h>
#include <zenhttp/zenhttp.h>
-#if ZEN_USE_MIMALLOC
-ZEN_THIRD_PARTY_INCLUDES_START
-# include <mimalloc-new-delete.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-#endif
-
#if ZEN_WITH_TESTS
# define ZEN_TEST_WITH_RUNNER 1
# include <zencore/testing.h>
@@ -18,9 +13,6 @@ ZEN_THIRD_PARTY_INCLUDES_END
int
main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[])
{
-#if ZEN_USE_MIMALLOC
- mi_version();
-#endif
#if ZEN_WITH_TESTS
zen::zenhttp_forcelinktests();
diff --git a/src/zennet-test/zennet-test.cpp b/src/zennet-test/zennet-test.cpp
index f7f54e6ad..482d3c617 100644
--- a/src/zennet-test/zennet-test.cpp
+++ b/src/zennet-test/zennet-test.cpp
@@ -4,11 +4,7 @@
#include <zencore/logging.h>
#include <zennet/zennet.h>
-#if ZEN_USE_MIMALLOC
-ZEN_THIRD_PARTY_INCLUDES_START
-# include <mimalloc-new-delete.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-#endif
+#include <zencore/memory/newdelete.h>
#if ZEN_WITH_TESTS
# define ZEN_TEST_WITH_RUNNER 1
@@ -18,9 +14,6 @@ ZEN_THIRD_PARTY_INCLUDES_END
int
main([[maybe_unused]] int argc, [[maybe_unused]] char** argv)
{
-#if ZEN_USE_MIMALLOC
- mi_version();
-#endif
#if ZEN_WITH_TESTS
zen::zennet_forcelinktests();
diff --git a/src/zenserver-test/zenserver-test.cpp b/src/zenserver-test/zenserver-test.cpp
index ca2257361..e3f701be1 100644
--- a/src/zenserver-test/zenserver-test.cpp
+++ b/src/zenserver-test/zenserver-test.cpp
@@ -54,11 +54,7 @@ ZEN_THIRD_PARTY_INCLUDES_END
# include <process.h>
#endif
-#if ZEN_USE_MIMALLOC
-ZEN_THIRD_PARTY_INCLUDES_START
-# include <mimalloc-new-delete.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-#endif
+#include <zencore/memory/newdelete.h>
//////////////////////////////////////////////////////////////////////////
@@ -101,9 +97,6 @@ zen::ZenServerEnvironment TestEnv;
int
main(int argc, char** argv)
{
-# if ZEN_USE_MIMALLOC
- mi_version();
-# endif
using namespace std::literals;
using namespace zen;
diff --git a/src/zenserver/config.cpp b/src/zenserver/config.cpp
index bedab7049..0108e8b9f 100644
--- a/src/zenserver/config.cpp
+++ b/src/zenserver/config.cpp
@@ -593,6 +593,9 @@ ParseCliOptions(int argc, char* argv[], ZenServerOptions& ServerOptions)
options.add_options()("detach",
"Indicate whether zenserver should detach from parent process group",
cxxopts::value<bool>(ServerOptions.Detach)->default_value("true"));
+ options.add_options()("malloc",
+ "Configure memory allocator subsystem",
+ cxxopts::value(ServerOptions.MemoryOptions)->default_value("mimalloc"));
// clang-format off
options.add_options("logging")
@@ -713,6 +716,13 @@ ParseCliOptions(int argc, char* argv[], ZenServerOptions& ServerOptions)
#if ZEN_WITH_TRACE
options.add_option("ue-trace",
"",
+ "trace",
+ "Specify which trace channels should be enabled",
+ cxxopts::value<std::string>(ServerOptions.TraceChannels)->default_value(""),
+ "");
+
+ options.add_option("ue-trace",
+ "",
"tracehost",
"Hostname to send the trace to",
cxxopts::value<std::string>(ServerOptions.TraceHost)->default_value(""),
diff --git a/src/zenserver/config.h b/src/zenserver/config.h
index 5c56695f3..c7781aada 100644
--- a/src/zenserver/config.h
+++ b/src/zenserver/config.h
@@ -176,9 +176,11 @@ struct ZenServerOptions
std::string Loggers[zen::logging::level::LogLevelCount];
std::string ScrubOptions;
#if ZEN_WITH_TRACE
- std::string TraceHost; // Host name or IP address to send trace data to
- std::string TraceFile; // Path of a file to write a trace
+ std::string TraceChannels; // Trace channels to enable
+ std::string TraceHost; // Host name or IP address to send trace data to
+ std::string TraceFile; // Path of a file to write a trace
#endif
+ std::string MemoryOptions; // Memory allocation options
std::string CommandLine;
};
diff --git a/src/zenserver/diag/logging.cpp b/src/zenserver/diag/logging.cpp
index 595be70cb..0d96cd8d6 100644
--- a/src/zenserver/diag/logging.cpp
+++ b/src/zenserver/diag/logging.cpp
@@ -6,6 +6,7 @@
#include <zencore/filesystem.h>
#include <zencore/fmtutils.h>
+#include <zencore/memory/llm.h>
#include <zencore/session.h>
#include <zencore/string.h>
#include <zenutil/logging.h>
@@ -20,6 +21,8 @@ namespace zen {
void
InitializeServerLogging(const ZenServerOptions& InOptions)
{
+ UE_MEMSCOPE(ELLMTag::Logging);
+
const LoggingOptions LogOptions = {.IsDebug = InOptions.IsDebug,
.IsVerbose = false,
.IsTest = InOptions.IsTest,
@@ -79,6 +82,8 @@ InitializeServerLogging(const ZenServerOptions& InOptions)
void
ShutdownServerLogging()
{
+ UE_MEMSCOPE(ELLMTag::Logging);
+
zen::ShutdownLogging();
}
diff --git a/src/zenserver/main.cpp b/src/zenserver/main.cpp
index 2fb01ebf1..4444241cc 100644
--- a/src/zenserver/main.cpp
+++ b/src/zenserver/main.cpp
@@ -17,16 +17,15 @@
#include <zencore/trace.h>
#include <zenhttp/httpserver.h>
+#include <zencore/memory/fmalloc.h>
+#include <zencore/memory/memory.h>
+#include <zencore/memory/memorytrace.h>
+#include <zencore/memory/newdelete.h>
+
#include "config.h"
#include "diag/logging.h"
#include "sentryintegration.h"
-#if ZEN_USE_MIMALLOC
-ZEN_THIRD_PARTY_INCLUDES_START
-# include <mimalloc-new-delete.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-#endif
-
#if ZEN_PLATFORM_WINDOWS
# include <zencore/windows.h>
# include "windows/service.h"
@@ -354,9 +353,6 @@ test_main(int argc, char** argv)
int
main(int argc, char* argv[])
{
-#if ZEN_USE_MIMALLOC
- mi_version();
-#endif
using namespace zen;
if (argc >= 2)
@@ -433,9 +429,17 @@ main(int argc, char* argv[])
{
TraceInit("zenserver");
}
- atexit(TraceShutdown);
#endif // ZEN_WITH_TRACE
+#if ZEN_WITH_MEMTRACK
+ FMalloc* TraceMalloc = MemoryTrace_Create(GMalloc);
+ if (TraceMalloc != GMalloc)
+ {
+ GMalloc = TraceMalloc;
+ MemoryTrace_Initialize();
+ }
+#endif
+
#if ZEN_PLATFORM_WINDOWS
if (ServerOptions.InstallService)
{
diff --git a/src/zenstore-test/zenstore-test.cpp b/src/zenstore-test/zenstore-test.cpp
index 3ad9e620b..e5b312984 100644
--- a/src/zenstore-test/zenstore-test.cpp
+++ b/src/zenstore-test/zenstore-test.cpp
@@ -4,11 +4,7 @@
#include <zencore/logging.h>
#include <zenstore/zenstore.h>
-#if ZEN_USE_MIMALLOC
-ZEN_THIRD_PARTY_INCLUDES_START
-# include <mimalloc-new-delete.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-#endif
+#include <zencore/memory/newdelete.h>
#if ZEN_WITH_TESTS
# define ZEN_TEST_WITH_RUNNER 1
@@ -18,9 +14,6 @@ ZEN_THIRD_PARTY_INCLUDES_END
int
main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[])
{
-#if ZEN_USE_MIMALLOC
- mi_version();
-#endif
#if ZEN_WITH_TESTS
zen::zenstore_forcelinktests();
diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp
index 2031804c9..62ed44bbb 100644
--- a/src/zenstore/filecas.cpp
+++ b/src/zenstore/filecas.cpp
@@ -7,7 +7,7 @@
#include <zencore/filesystem.h>
#include <zencore/fmtutils.h>
#include <zencore/logging.h>
-#include <zencore/memory.h>
+#include <zencore/memory/memory.h>
#include <zencore/scopeguard.h>
#include <zencore/string.h>
#include <zencore/testing.h>
diff --git a/src/zenutil-test/zenutil-test.cpp b/src/zenutil-test/zenutil-test.cpp
index f95b7e888..fadaf0995 100644
--- a/src/zenutil-test/zenutil-test.cpp
+++ b/src/zenutil-test/zenutil-test.cpp
@@ -4,11 +4,7 @@
#include <zencore/logging.h>
#include <zenutil/zenutil.h>
-#if ZEN_USE_MIMALLOC
-ZEN_THIRD_PARTY_INCLUDES_START
-# include <mimalloc-new-delete.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-#endif
+#include <zencore/memory/newdelete.h>
#if ZEN_WITH_TESTS
# define ZEN_TEST_WITH_RUNNER 1
@@ -18,9 +14,6 @@ ZEN_THIRD_PARTY_INCLUDES_END
int
main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[])
{
-#if ZEN_USE_MIMALLOC
- mi_version();
-#endif
#if ZEN_WITH_TESTS
zen::zenutil_forcelinktests();
diff --git a/src/zenutil/basicfile.cpp b/src/zenutil/basicfile.cpp
index 73f27b587..391c150c6 100644
--- a/src/zenutil/basicfile.cpp
+++ b/src/zenutil/basicfile.cpp
@@ -6,6 +6,7 @@
#include <zencore/except.h>
#include <zencore/filesystem.h>
#include <zencore/fmtutils.h>
+#include <zencore/memory/memory.h>
#include <zencore/testing.h>
#include <zencore/testutils.h>
diff --git a/thirdparty/rpmalloc/malloc.c b/thirdparty/rpmalloc/malloc.c
new file mode 100644
index 000000000..835eff18e
--- /dev/null
+++ b/thirdparty/rpmalloc/malloc.c
@@ -0,0 +1,367 @@
+/* malloc.c - Memory allocator - Public Domain - 2016 Mattias Jansson
+ *
+ * This library provides a cross-platform lock free thread caching malloc implementation in C11.
+ * The latest source code is always available at
+ *
+ * https://github.com/mjansson/rpmalloc
+ *
+ * This library is put in the public domain; you can redistribute it and/or modify it without any restrictions.
+ *
+ */
+
+/* clang-format off */
+
+//
+// This file provides overrides for the standard library malloc entry points for C and new/delete operators for C++
+// It also provides automatic initialization/finalization of process and threads
+//
+
+#if defined(__TINYC__)
+#include <sys/types.h>
+#endif
+
+#if (defined(__GNUC__) || defined(__clang__))
+#pragma GCC visibility push(default)
+#endif
+
+#define USE_IMPLEMENT 1
+#define USE_INTERPOSE 0
+#define USE_ALIAS 0
+
+#if defined(__APPLE__)
+#undef USE_INTERPOSE
+#define USE_INTERPOSE 1
+
+typedef struct interpose_t {
+ void* new_func;
+ void* orig_func;
+} interpose_t;
+
+#define MAC_INTERPOSE_PAIR(newf, oldf) { (void*)newf, (void*)oldf }
+#define MAC_INTERPOSE_SINGLE(newf, oldf) \
+__attribute__((used)) static const interpose_t macinterpose##newf##oldf \
+__attribute__ ((section("__DATA, __interpose"))) = MAC_INTERPOSE_PAIR(newf, oldf)
+
+#endif
+
+#if !defined(_WIN32) && !defined(__APPLE__)
+#undef USE_IMPLEMENT
+#undef USE_ALIAS
+#define USE_IMPLEMENT 0
+#define USE_ALIAS 1
+#endif
+
+#ifdef _MSC_VER
+#pragma warning (disable : 4100)
+#undef malloc
+#undef free
+#undef calloc
+#endif
+
+#if ENABLE_OVERRIDE
+
+typedef struct rp_nothrow_t { int __dummy; } rp_nothrow_t;
+
+#if USE_INTERPOSE || USE_ALIAS
+
+static void* rpmalloc_nothrow(size_t size, rp_nothrow_t t) { (void)sizeof(t); return rpmalloc(size); }
+static void* rpaligned_alloc_reverse(size_t size, size_t align) { return rpaligned_alloc(align, size); }
+static void* rpaligned_alloc_reverse_nothrow(size_t size, size_t align, rp_nothrow_t t) { (void)sizeof(t); return rpaligned_alloc(align, size); }
+static void rpfree_size(void* p, size_t size) { (void)sizeof(size); rpfree(p); }
+static void rpfree_aligned(void* p, size_t align) { (void)sizeof(align); rpfree(p); }
+static void rpfree_size_aligned(void* p, size_t size, size_t align) { (void)sizeof(size); (void)sizeof(align); rpfree(p); }
+
+#endif
+
+extern inline void* RPMALLOC_CDECL rpvalloc(size_t size) {
+ return rpaligned_alloc(os_page_size, size);
+}
+
+extern inline void* RPMALLOC_CDECL
+rppvalloc(size_t size) {
+ const size_t page_size = os_page_size;
+ const size_t aligned_size = ((size + page_size - 1) / page_size) * page_size;
+#if ENABLE_VALIDATE_ARGS
+ if (aligned_size < size) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+ return rpaligned_alloc(os_page_size, aligned_size);
+}
+
+extern inline void* RPMALLOC_CDECL
+rpreallocarray(void* ptr, size_t count, size_t size) {
+ size_t total;
+#if ENABLE_VALIDATE_ARGS
+#ifdef _MSC_VER
+ int err = SizeTMult(count, size, &total);
+ if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#else
+ int err = __builtin_umull_overflow(count, size, &total);
+ if (err || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+#else
+ total = count * size;
+#endif
+ return rprealloc(ptr, total);
+}
+
+#if USE_IMPLEMENT
+
+extern inline void* RPMALLOC_CDECL malloc(size_t size) { return rpmalloc(size); }
+extern inline void* RPMALLOC_CDECL calloc(size_t count, size_t size) { return rpcalloc(count, size); }
+extern inline void* RPMALLOC_CDECL realloc(void* ptr, size_t size) { return rprealloc(ptr, size); }
+extern inline void* RPMALLOC_CDECL reallocf(void* ptr, size_t size) { return rprealloc(ptr, size); }
+extern inline void* RPMALLOC_CDECL aligned_alloc(size_t alignment, size_t size) { return rpaligned_alloc(alignment, size); }
+extern inline void* RPMALLOC_CDECL memalign(size_t alignment, size_t size) { return rpmemalign(alignment, size); }
+extern inline int RPMALLOC_CDECL posix_memalign(void** memptr, size_t alignment, size_t size) { return rpposix_memalign(memptr, alignment, size); }
+extern inline void RPMALLOC_CDECL free(void* ptr) { rpfree(ptr); }
+extern inline void RPMALLOC_CDECL cfree(void* ptr) { rpfree(ptr); }
+extern inline size_t RPMALLOC_CDECL malloc_usable_size(void* ptr) { return rpmalloc_usable_size(ptr); }
+extern inline size_t RPMALLOC_CDECL malloc_size(void* ptr) { return rpmalloc_usable_size(ptr); }
+extern inline void* RPMALLOC_CDECL valloc(size_t size) { return rpvalloc(size); }
+extern inline void* RPMALLOC_CDECL pvalloc(size_t size) { return rppvalloc(size); }
+extern inline void* RPMALLOC_CDECL reallocarray(void* ptr, size_t count, size_t size) { return rpreallocarray(ptr, count, size); }
+
+#ifdef _WIN32
+extern inline void* RPMALLOC_CDECL _malloc_base(size_t size) { return rpmalloc(size); }
+extern inline void RPMALLOC_CDECL _free_base(void* ptr) { rpfree(ptr); }
+extern inline void* RPMALLOC_CDECL _calloc_base(size_t count, size_t size) { return rpcalloc(count, size); }
+extern inline size_t RPMALLOC_CDECL _msize(void* ptr) { return rpmalloc_usable_size(ptr); }
+extern inline size_t RPMALLOC_CDECL _msize_base(void* ptr) { return rpmalloc_usable_size(ptr); }
+extern inline void* RPMALLOC_CDECL _realloc_base(void* ptr, size_t size) { return rprealloc(ptr, size); }
+#endif
+
+#ifdef _WIN32
+// For Windows, #include <rpnew.h> in one source file to get the C++ operator overrides implemented in your module
+#else
+// Overload the C++ operators using the mangled names (https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling)
+// operators delete and delete[]
+#define RPDEFVIS __attribute__((visibility("default")))
+extern void _ZdlPv(void* p); void RPDEFVIS _ZdlPv(void* p) { rpfree(p); }
+extern void _ZdaPv(void* p); void RPDEFVIS _ZdaPv(void* p) { rpfree(p); }
+#if ARCH_64BIT
+// 64-bit operators new and new[], normal and aligned
+extern void* _Znwm(uint64_t size); void* RPDEFVIS _Znwm(uint64_t size) { return rpmalloc(size); }
+extern void* _Znam(uint64_t size); void* RPDEFVIS _Znam(uint64_t size) { return rpmalloc(size); }
+extern void* _Znwmm(uint64_t size, uint64_t align); void* RPDEFVIS _Znwmm(uint64_t size, uint64_t align) { return rpaligned_alloc(align, size); }
+extern void* _Znamm(uint64_t size, uint64_t align); void* RPDEFVIS _Znamm(uint64_t size, uint64_t align) { return rpaligned_alloc(align, size); }
+extern void* _ZnwmSt11align_val_t(uint64_t size, uint64_t align); void* RPDEFVIS _ZnwmSt11align_val_t(uint64_t size, uint64_t align) { return rpaligned_alloc(align, size); }
+extern void* _ZnamSt11align_val_t(uint64_t size, uint64_t align); void* RPDEFVIS _ZnamSt11align_val_t(uint64_t size, uint64_t align) { return rpaligned_alloc(align, size); }
+extern void* _ZnwmRKSt9nothrow_t(uint64_t size, rp_nothrow_t t); void* RPDEFVIS _ZnwmRKSt9nothrow_t(uint64_t size, rp_nothrow_t t) { (void)sizeof(t); return rpmalloc(size); }
+extern void* _ZnamRKSt9nothrow_t(uint64_t size, rp_nothrow_t t); void* RPDEFVIS _ZnamRKSt9nothrow_t(uint64_t size, rp_nothrow_t t) { (void)sizeof(t); return rpmalloc(size); }
+extern void* _ZnwmSt11align_val_tRKSt9nothrow_t(uint64_t size, uint64_t align, rp_nothrow_t t); void* RPDEFVIS _ZnwmSt11align_val_tRKSt9nothrow_t(uint64_t size, uint64_t align, rp_nothrow_t t) { (void)sizeof(t); return rpaligned_alloc(align, size); }
+extern void* _ZnamSt11align_val_tRKSt9nothrow_t(uint64_t size, uint64_t align, rp_nothrow_t t); void* RPDEFVIS _ZnamSt11align_val_tRKSt9nothrow_t(uint64_t size, uint64_t align, rp_nothrow_t t) { (void)sizeof(t); return rpaligned_alloc(align, size); }
+// 64-bit operators sized delete and delete[], normal and aligned
+extern void _ZdlPvm(void* p, uint64_t size); void RPDEFVIS _ZdlPvm(void* p, uint64_t size) { rpfree(p); (void)sizeof(size); }
+extern void _ZdaPvm(void* p, uint64_t size); void RPDEFVIS _ZdaPvm(void* p, uint64_t size) { rpfree(p); (void)sizeof(size); }
+extern void _ZdlPvSt11align_val_t(void* p, uint64_t align); void RPDEFVIS _ZdlPvSt11align_val_t(void* p, uint64_t align) { rpfree(p); (void)sizeof(align); }
+extern void _ZdaPvSt11align_val_t(void* p, uint64_t align); void RPDEFVIS _ZdaPvSt11align_val_t(void* p, uint64_t align) { rpfree(p); (void)sizeof(align); }
+extern void _ZdlPvmSt11align_val_t(void* p, uint64_t size, uint64_t align); void RPDEFVIS _ZdlPvmSt11align_val_t(void* p, uint64_t size, uint64_t align) { rpfree(p); (void)sizeof(size); (void)sizeof(align); }
+extern void _ZdaPvmSt11align_val_t(void* p, uint64_t size, uint64_t align); void RPDEFVIS _ZdaPvmSt11align_val_t(void* p, uint64_t size, uint64_t align) { rpfree(p); (void)sizeof(size); (void)sizeof(align); }
+#else
+// 32-bit operators new and new[], normal and aligned
+extern void* _Znwj(uint32_t size); void* RPDEFVIS _Znwj(uint32_t size) { return rpmalloc(size); }
+extern void* _Znaj(uint32_t size); void* RPDEFVIS _Znaj(uint32_t size) { return rpmalloc(size); }
+extern void* _Znwjj(uint32_t size, uint32_t align); void* RPDEFVIS _Znwjj(uint32_t size, uint32_t align) { return rpaligned_alloc(align, size); }
+extern void* _Znajj(uint32_t size, uint32_t align); void* RPDEFVIS _Znajj(uint32_t size, uint32_t align) { return rpaligned_alloc(align, size); }
+extern void* _ZnwjSt11align_val_t(size_t size, size_t align); void* RPDEFVIS _ZnwjSt11align_val_t(size_t size, size_t align) { return rpaligned_alloc(align, size); }
+extern void* _ZnajSt11align_val_t(size_t size, size_t align); void* RPDEFVIS _ZnajSt11align_val_t(size_t size, size_t align) { return rpaligned_alloc(align, size); }
+extern void* _ZnwjRKSt9nothrow_t(size_t size, rp_nothrow_t t); void* RPDEFVIS _ZnwjRKSt9nothrow_t(size_t size, rp_nothrow_t t) { (void)sizeof(t); return rpmalloc(size); }
+extern void* _ZnajRKSt9nothrow_t(size_t size, rp_nothrow_t t); void* RPDEFVIS _ZnajRKSt9nothrow_t(size_t size, rp_nothrow_t t) { (void)sizeof(t); return rpmalloc(size); }
+extern void* _ZnwjSt11align_val_tRKSt9nothrow_t(size_t size, size_t align, rp_nothrow_t t); void* RPDEFVIS _ZnwjSt11align_val_tRKSt9nothrow_t(size_t size, size_t align, rp_nothrow_t t) { (void)sizeof(t); return rpaligned_alloc(align, size); }
+extern void* _ZnajSt11align_val_tRKSt9nothrow_t(size_t size, size_t align, rp_nothrow_t t); void* RPDEFVIS _ZnajSt11align_val_tRKSt9nothrow_t(size_t size, size_t align, rp_nothrow_t t) { (void)sizeof(t); return rpaligned_alloc(align, size); }
+// 32-bit operators sized delete and delete[], normal and aligned
+extern void _ZdlPvj(void* p, uint64_t size); void RPDEFVIS _ZdlPvj(void* p, uint64_t size) { rpfree(p); (void)sizeof(size); }
+extern void _ZdaPvj(void* p, uint64_t size); void RPDEFVIS _ZdaPvj(void* p, uint64_t size) { rpfree(p); (void)sizeof(size); }
+extern void _ZdlPvSt11align_val_t(void* p, uint32_t align); void RPDEFVIS _ZdlPvSt11align_val_t(void* p, uint64_t a) { rpfree(p); (void)sizeof(align); }
+extern void _ZdaPvSt11align_val_t(void* p, uint32_t align); void RPDEFVIS _ZdaPvSt11align_val_t(void* p, uint64_t a) { rpfree(p); (void)sizeof(align); }
+extern void _ZdlPvjSt11align_val_t(void* p, uint32_t size, uint32_t align); void RPDEFVIS _ZdlPvjSt11align_val_t(void* p, uint64_t size, uint64_t align) { rpfree(p); (void)sizeof(size); (void)sizeof(a); }
+extern void _ZdaPvjSt11align_val_t(void* p, uint32_t size, uint32_t align); void RPDEFVIS _ZdaPvjSt11align_val_t(void* p, uint64_t size, uint64_t align) { rpfree(p); (void)sizeof(size); (void)sizeof(a); }
+#endif
+#endif
+#endif
+
+#if USE_INTERPOSE
+
+__attribute__((used)) static const interpose_t macinterpose_malloc[]
+__attribute__ ((section("__DATA, __interpose"))) = {
+ //new and new[]
+ MAC_INTERPOSE_PAIR(rpmalloc, _Znwm),
+ MAC_INTERPOSE_PAIR(rpmalloc, _Znam),
+ MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse, _Znwmm),
+ MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse, _Znamm),
+ MAC_INTERPOSE_PAIR(rpmalloc_nothrow, _ZnwmRKSt9nothrow_t),
+ MAC_INTERPOSE_PAIR(rpmalloc_nothrow, _ZnamRKSt9nothrow_t),
+ MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse, _ZnwmSt11align_val_t),
+ MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse, _ZnamSt11align_val_t),
+ MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse_nothrow, _ZnwmSt11align_val_tRKSt9nothrow_t),
+ MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse_nothrow, _ZnamSt11align_val_tRKSt9nothrow_t),
+ //delete and delete[]
+ MAC_INTERPOSE_PAIR(rpfree, _ZdlPv),
+ MAC_INTERPOSE_PAIR(rpfree, _ZdaPv),
+ MAC_INTERPOSE_PAIR(rpfree_size, _ZdlPvm),
+ MAC_INTERPOSE_PAIR(rpfree_size, _ZdaPvm),
+ MAC_INTERPOSE_PAIR(rpfree_aligned, _ZdlPvSt11align_val_t),
+ MAC_INTERPOSE_PAIR(rpfree_aligned, _ZdaPvSt11align_val_t),
+ MAC_INTERPOSE_PAIR(rpfree_size_aligned, _ZdlPvmSt11align_val_t),
+ MAC_INTERPOSE_PAIR(rpfree_size_aligned, _ZdaPvmSt11align_val_t),
+ //libc entry points
+ MAC_INTERPOSE_PAIR(rpmalloc, malloc),
+ MAC_INTERPOSE_PAIR(rpmalloc, calloc),
+ MAC_INTERPOSE_PAIR(rprealloc, realloc),
+ MAC_INTERPOSE_PAIR(rprealloc, reallocf),
+ MAC_INTERPOSE_PAIR(rpvalloc, valloc),
+#if defined(__MAC_10_15) && __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_15
+ MAC_INTERPOSE_PAIR(rpaligned_alloc, aligned_alloc),
+#endif
+ MAC_INTERPOSE_PAIR(rpmemalign, memalign),
+ MAC_INTERPOSE_PAIR(rpposix_memalign, posix_memalign),
+ MAC_INTERPOSE_PAIR(rpfree, free),
+ MAC_INTERPOSE_PAIR(rpfree, cfree),
+ MAC_INTERPOSE_PAIR(rpmalloc_usable_size, malloc_usable_size),
+ MAC_INTERPOSE_PAIR(rpmalloc_usable_size, malloc_size)
+};
+
+#endif
+
+#if USE_ALIAS
+
+#define RPALIAS(fn) __attribute__((alias(#fn), used, visibility("default")));
+
+// Alias the C++ operators using the mangled names (https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling)
+
+// operators delete and delete[]
+void _ZdlPv(void* p) RPALIAS(rpfree)
+void _ZdaPv(void* p) RPALIAS(rpfree)
+
+#if ARCH_64BIT
+// 64-bit operators new and new[], normal and aligned
+void* _Znwm(uint64_t size) RPALIAS(rpmalloc)
+void* _Znam(uint64_t size) RPALIAS(rpmalloc)
+void* _Znwmm(uint64_t size, uint64_t align) RPALIAS(rpaligned_alloc_reverse)
+void* _Znamm(uint64_t size, uint64_t align) RPALIAS(rpaligned_alloc_reverse)
+void* _ZnwmSt11align_val_t(size_t size, size_t align) RPALIAS(rpaligned_alloc_reverse)
+void* _ZnamSt11align_val_t(size_t size, size_t align) RPALIAS(rpaligned_alloc_reverse)
+void* _ZnwmRKSt9nothrow_t(size_t size, rp_nothrow_t t) RPALIAS(rpmalloc_nothrow)
+void* _ZnamRKSt9nothrow_t(size_t size, rp_nothrow_t t) RPALIAS(rpmalloc_nothrow)
+void* _ZnwmSt11align_val_tRKSt9nothrow_t(size_t size, size_t align, rp_nothrow_t t) RPALIAS(rpaligned_alloc_reverse_nothrow)
+void* _ZnamSt11align_val_tRKSt9nothrow_t(size_t size, size_t align, rp_nothrow_t t) RPALIAS(rpaligned_alloc_reverse_nothrow)
+// 64-bit operators delete and delete[], sized and aligned
+void _ZdlPvm(void* p, size_t n) RPALIAS(rpfree_size)
+void _ZdaPvm(void* p, size_t n) RPALIAS(rpfree_size)
+void _ZdlPvSt11align_val_t(void* p, size_t a) RPALIAS(rpfree_aligned)
+void _ZdaPvSt11align_val_t(void* p, size_t a) RPALIAS(rpfree_aligned)
+void _ZdlPvmSt11align_val_t(void* p, size_t n, size_t a) RPALIAS(rpfree_size_aligned)
+void _ZdaPvmSt11align_val_t(void* p, size_t n, size_t a) RPALIAS(rpfree_size_aligned)
+#else
+// 32-bit operators new and new[], normal and aligned
+void* _Znwj(uint32_t size) RPALIAS(rpmalloc)
+void* _Znaj(uint32_t size) RPALIAS(rpmalloc)
+void* _Znwjj(uint32_t size, uint32_t align) RPALIAS(rpaligned_alloc_reverse)
+void* _Znajj(uint32_t size, uint32_t align) RPALIAS(rpaligned_alloc_reverse)
+void* _ZnwjSt11align_val_t(size_t size, size_t align) RPALIAS(rpaligned_alloc_reverse)
+void* _ZnajSt11align_val_t(size_t size, size_t align) RPALIAS(rpaligned_alloc_reverse)
+void* _ZnwjRKSt9nothrow_t(size_t size, rp_nothrow_t t) RPALIAS(rpmalloc_nothrow)
+void* _ZnajRKSt9nothrow_t(size_t size, rp_nothrow_t t) RPALIAS(rpmalloc_nothrow)
+void* _ZnwjSt11align_val_tRKSt9nothrow_t(size_t size, size_t align, rp_nothrow_t t) RPALIAS(rpaligned_alloc_reverse_nothrow)
+void* _ZnajSt11align_val_tRKSt9nothrow_t(size_t size, size_t align, rp_nothrow_t t) RPALIAS(rpaligned_alloc_reverse_nothrow)
+// 32-bit operators delete and delete[], sized and aligned
+void _ZdlPvj(void* p, size_t n) RPALIAS(rpfree_size)
+void _ZdaPvj(void* p, size_t n) RPALIAS(rpfree_size)
+void _ZdlPvSt11align_val_t(void* p, size_t a) RPALIAS(rpfree_aligned)
+void _ZdaPvSt11align_val_t(void* p, size_t a) RPALIAS(rpfree_aligned)
+void _ZdlPvjSt11align_val_t(void* p, size_t n, size_t a) RPALIAS(rpfree_size_aligned)
+void _ZdaPvjSt11align_val_t(void* p, size_t n, size_t a) RPALIAS(rpfree_size_aligned)
+#endif
+
+void* malloc(size_t size) RPALIAS(rpmalloc)
+void* calloc(size_t count, size_t size) RPALIAS(rpcalloc)
+void* realloc(void* ptr, size_t size) RPALIAS(rprealloc)
+void* reallocf(void* ptr, size_t size) RPALIAS(rprealloc)
+void* aligned_alloc(size_t alignment, size_t size) RPALIAS(rpaligned_alloc)
+void* memalign(size_t alignment, size_t size) RPALIAS(rpmemalign)
+int posix_memalign(void** memptr, size_t alignment, size_t size) RPALIAS(rpposix_memalign)
+void free(void* ptr) RPALIAS(rpfree)
+void cfree(void* ptr) RPALIAS(rpfree)
+void* reallocarray(void* ptr, size_t count, size_t size) RPALIAS(rpreallocarray)
+void* valloc(size_t size) RPALIAS(rpvalloc)
+void* pvalloc(size_t size) RPALIAS(rppvalloc)
+#if defined(__ANDROID__) || defined(__FreeBSD__)
+size_t malloc_usable_size(const void* ptr) RPALIAS(rpmalloc_usable_size)
+#else
+size_t malloc_usable_size(void* ptr) RPALIAS(rpmalloc_usable_size)
+#endif
+size_t malloc_size(void* ptr) RPALIAS(rpmalloc_usable_size)
+
+// end USE_ALIAS
+#endif
+
+#if defined(__GLIBC__) && defined(__linux__) && 0
+
+void* __libc_malloc(size_t size) RPALIAS(rpmalloc)
+void* __libc_calloc(size_t count, size_t size) RPALIAS(rpcalloc)
+void* __libc_realloc(void* p, size_t size) RPALIAS(rprealloc)
+void __libc_free(void* p) RPALIAS(rpfree)
+void __libc_cfree(void* p) RPALIAS(rpfree)
+
+extern void* __libc_valloc(size_t size);
+extern void* __libc_pvalloc(size_t size);
+
+void*
+__libc_valloc(size_t size) {
+ return valloc(size);
+}
+
+void*
+__libc_pvalloc(size_t size) {
+ return pvalloc(size);
+}
+void* __libc_memalign(size_t align, size_t size) RPALIAS(rpmemalign)
+
+int __posix_memalign(void** p, size_t align, size_t size) RPALIAS(rpposix_memalign)
+
+#endif
+
+// end ENABLE_OVERRIDE
+#endif
+
+#if ENABLE_DYNAMIC_LINK
+
+#ifdef _WIN32
+
+extern __declspec(dllexport) BOOL WINAPI
+DllMain(HINSTANCE instance, DWORD reason, LPVOID reserved);
+
+extern __declspec(dllexport) BOOL WINAPI
+DllMain(HINSTANCE instance, DWORD reason, LPVOID reserved) {
+ (void)sizeof(reserved);
+ (void)sizeof(instance);
+ if (reason == DLL_PROCESS_ATTACH)
+ rpmalloc_initialize(0);
+ else if (reason == DLL_PROCESS_DETACH)
+ rpmalloc_finalize();
+ else if (reason == DLL_THREAD_ATTACH)
+ rpmalloc_thread_initialize();
+ else if (reason == DLL_THREAD_DETACH)
+ rpmalloc_thread_finalize();
+ return TRUE;
+}
+
+#endif
+
+// end ENABLE_DYNAMIC_LINK
+#endif
+
+#if (defined(__GNUC__) || defined(__clang__))
+#pragma GCC visibility pop
+#endif
+
+/* clang-format on */
diff --git a/thirdparty/rpmalloc/rpmalloc.c b/thirdparty/rpmalloc/rpmalloc.c
new file mode 100644
index 000000000..7aecfb0f4
--- /dev/null
+++ b/thirdparty/rpmalloc/rpmalloc.c
@@ -0,0 +1,2341 @@
+/* rpmalloc.c - Memory allocator - Public Domain - 2016-2020 Mattias
+ * Jansson
+ *
+ * This library provides a cross-platform lock free thread caching malloc
+ * implementation in C11. The latest source code is always available at
+ *
+ * https://github.com/mjansson/rpmalloc
+ *
+ * This library is put in the public domain; you can redistribute it and/or
+ * modify it without any restrictions.
+ *
+ */
+
+#include "rpmalloc.h"
+
+#include <errno.h>
+#include <string.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdatomic.h>
+
+#if defined(__clang__)
+#pragma clang diagnostic ignored "-Wunused-macros"
+#pragma clang diagnostic ignored "-Wunused-function"
+#if __has_warning("-Wreserved-identifier")
+#pragma clang diagnostic ignored "-Wreserved-identifier"
+#endif
+#if __has_warning("-Wstatic-in-inline")
+#pragma clang diagnostic ignored "-Wstatic-in-inline"
+#endif
+#if __has_warning("-Wunsafe-buffer-usage")
+#pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
+#endif
+#elif defined(__GNUC__)
+#pragma GCC diagnostic ignored "-Wunused-macros"
+#pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
+#if defined(_WIN32) || defined(__WIN32__) || defined(_WIN64)
+#define PLATFORM_WINDOWS 1
+#define PLATFORM_POSIX 0
+#else
+#define PLATFORM_WINDOWS 0
+#define PLATFORM_POSIX 1
+#endif
+
+#if defined(_MSC_VER)
+#define NOINLINE __declspec(noinline)
+#else
+#define NOINLINE __attribute__((noinline))
+#endif
+
+#if PLATFORM_WINDOWS
+#include <windows.h>
+#include <fibersapi.h>
+static DWORD fls_key;
+#endif
+#if PLATFORM_POSIX
+#include <sys/mman.h>
+#include <sched.h>
+#include <unistd.h>
+#include <pthread.h>
+static pthread_key_t pthread_key;
+#ifdef __FreeBSD__
+#include <sys/sysctl.h>
+#define MAP_HUGETLB MAP_ALIGNED_SUPER
+#ifndef PROT_MAX
+#define PROT_MAX(f) 0
+#endif
+#else
+#define PROT_MAX(f) 0
+#endif
+#ifdef __sun
+extern int
+madvise(caddr_t, size_t, int);
+#endif
+#ifndef MAP_UNINITIALIZED
+#define MAP_UNINITIALIZED 0
+#endif
+#endif
+
+#if defined(__linux__) || defined(__ANDROID__)
+#include <sys/prctl.h>
+#if !defined(PR_SET_VMA)
+#define PR_SET_VMA 0x53564d41
+#define PR_SET_VMA_ANON_NAME 0
+#endif
+#endif
+#if defined(__APPLE__)
+#include <TargetConditionals.h>
+#if !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
+#include <mach/mach_vm.h>
+#include <mach/vm_statistics.h>
+#endif
+#include <pthread.h>
+#endif
+#if defined(__HAIKU__) || defined(__TINYC__)
+#include <pthread.h>
+#endif
+
+#include <limits.h>
+#if (INTPTR_MAX > INT32_MAX)
+#define ARCH_64BIT 1
+#define ARCH_32BIT 0
+#else
+#define ARCH_64BIT 0
+#define ARCH_32BIT 1
+#endif
+
+#if !defined(__has_builtin)
+#define __has_builtin(b) 0
+#endif
+
+#define pointer_offset(ptr, ofs) (void*)((char*)(ptr) + (ptrdiff_t)(ofs))
+#define pointer_diff(first, second) (ptrdiff_t)((const char*)(first) - (const char*)(second))
+
+////////////
+///
+/// Build time configurable limits
+///
+//////
+
+#ifndef ENABLE_VALIDATE_ARGS
+//! Enable validation of args to public entry points
+#define ENABLE_VALIDATE_ARGS 0
+#endif
+#ifndef ENABLE_ASSERTS
+//! Enable asserts
+#define ENABLE_ASSERTS 0
+#endif
+#ifndef ENABLE_UNMAP
+//! Enable unmapping memory pages
+#define ENABLE_UNMAP 1
+#endif
+#ifndef ENABLE_DECOMMIT
+//! Enable decommitting memory pages
+#define ENABLE_DECOMMIT 1
+#endif
+#ifndef ENABLE_DYNAMIC_LINK
+//! Enable building as dynamic library
+#define ENABLE_DYNAMIC_LINK 0
+#endif
+#ifndef ENABLE_OVERRIDE
+//! Enable standard library malloc/free/new/delete overrides
+#define ENABLE_OVERRIDE 1
+#endif
+#ifndef ENABLE_STATISTICS
+//! Enable statistics
+#define ENABLE_STATISTICS 0
+#endif
+
+////////////
+///
+/// Built in size configurations
+///
+//////
+
+#define PAGE_HEADER_SIZE 128
+#define SPAN_HEADER_SIZE PAGE_HEADER_SIZE
+
+#define SMALL_GRANULARITY 16
+
+#define SMALL_BLOCK_SIZE_LIMIT (4 * 1024)
+#define MEDIUM_BLOCK_SIZE_LIMIT (256 * 1024)
+#define LARGE_BLOCK_SIZE_LIMIT (8 * 1024 * 1024)
+
+#define SMALL_SIZE_CLASS_COUNT 73
+#define MEDIUM_SIZE_CLASS_COUNT 24
+#define LARGE_SIZE_CLASS_COUNT 20
+#define SIZE_CLASS_COUNT (SMALL_SIZE_CLASS_COUNT + MEDIUM_SIZE_CLASS_COUNT + LARGE_SIZE_CLASS_COUNT)
+
+#define SMALL_PAGE_SIZE_SHIFT 16
+#define SMALL_PAGE_SIZE (1 << SMALL_PAGE_SIZE_SHIFT)
+#define SMALL_PAGE_MASK (~((uintptr_t)SMALL_PAGE_SIZE - 1))
+#define MEDIUM_PAGE_SIZE_SHIFT 22
+#define MEDIUM_PAGE_SIZE (1 << MEDIUM_PAGE_SIZE_SHIFT)
+#define MEDIUM_PAGE_MASK (~((uintptr_t)MEDIUM_PAGE_SIZE - 1))
+#define LARGE_PAGE_SIZE_SHIFT 26
+#define LARGE_PAGE_SIZE (1 << LARGE_PAGE_SIZE_SHIFT)
+#define LARGE_PAGE_MASK (~((uintptr_t)LARGE_PAGE_SIZE - 1))
+
+#define SPAN_SIZE (256 * 1024 * 1024)
+#define SPAN_MASK (~((uintptr_t)(SPAN_SIZE - 1)))
+
+////////////
+///
+/// Utility macros
+///
+//////
+
+#if ENABLE_ASSERTS
+#undef NDEBUG
+#if defined(_MSC_VER) && !defined(_DEBUG)
+#define _DEBUG
+#endif
+#include <assert.h>
+#define RPMALLOC_TOSTRING_M(x) #x
+#define RPMALLOC_TOSTRING(x) RPMALLOC_TOSTRING_M(x)
+#define rpmalloc_assert(truth, message) \
+ do { \
+ if (!(truth)) { \
+ assert((truth) && message); \
+ } \
+ } while (0)
+#else
+#define rpmalloc_assert(truth, message) \
+ do { \
+ } while (0)
+#endif
+
+#if __has_builtin(__builtin_assume)
+#define rpmalloc_assume(cond) __builtin_assume(cond)
+#elif defined(__GNUC__)
+#define rpmalloc_assume(cond) \
+ do { \
+ if (!__builtin_expect(cond, 0)) \
+ __builtin_unreachable(); \
+ } while (0)
+#elif defined(_MSC_VER)
+#define rpmalloc_assume(cond) __assume(cond)
+#else
+#define rpmalloc_assume(cond) 0
+#endif
+
+////////////
+///
+/// Statistics
+///
+//////
+
+#if ENABLE_STATISTICS
+
+typedef struct rpmalloc_statistics_t {
+ atomic_size_t page_mapped;
+ atomic_size_t page_mapped_peak;
+ atomic_size_t page_commit;
+ atomic_size_t page_decommit;
+ atomic_size_t page_active;
+ atomic_size_t page_active_peak;
+ atomic_size_t heap_count;
+} rpmalloc_statistics_t;
+
+static rpmalloc_statistics_t global_statistics;
+
+#else
+
+#endif
+
+////////////
+///
+/// Low level abstractions
+///
+//////
+
+static inline size_t
+rpmalloc_clz(uintptr_t x) {
+#if ARCH_64BIT
+#if defined(_MSC_VER) && !defined(__clang__)
+ return (size_t)_lzcnt_u64(x);
+#else
+ return (size_t)__builtin_clzll(x);
+#endif
+#else
+#if defined(_MSC_VER) && !defined(__clang__)
+ return (size_t)_lzcnt_u32(x);
+#else
+ return (size_t)__builtin_clzl(x);
+#endif
+#endif
+}
+
+static inline void
+wait_spin(void) {
+#if defined(_MSC_VER)
+#if defined(_M_ARM64)
+ __yield();
+#else
+ _mm_pause();
+#endif
+#elif defined(__x86_64__) || defined(__i386__)
+ __asm__ volatile("pause" ::: "memory");
+#elif defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH >= 7)
+ __asm__ volatile("yield" ::: "memory");
+#elif defined(__powerpc__) || defined(__powerpc64__)
+ // No idea if ever been compiled in such archs but ... as precaution
+ __asm__ volatile("or 27,27,27");
+#elif defined(__sparc__)
+ __asm__ volatile("rd %ccr, %g0 \n\trd %ccr, %g0 \n\trd %ccr, %g0");
+#else
+ struct timespec ts = {0};
+ nanosleep(&ts, 0);
+#endif
+}
+
+#if defined(__GNUC__) || defined(__clang__)
+
+#define EXPECTED(x) __builtin_expect((x), 1)
+#define UNEXPECTED(x) __builtin_expect((x), 0)
+
+#else
+
+#define EXPECTED(x) x
+#define UNEXPECTED(x) x
+
+#endif
+#if defined(__GNUC__) || defined(__clang__)
+
+#if __has_builtin(__builtin_memcpy_inline)
+#define memcpy_const(x, y, s) __builtin_memcpy_inline(x, y, s)
+#else
+#define memcpy_const(x, y, s) \
+ do { \
+ _Static_assert(__builtin_choose_expr(__builtin_constant_p(s), 1, 0), "len must be a constant integer"); \
+ memcpy(x, y, s); \
+ } while (0)
+#endif
+
+#if __has_builtin(__builtin_memset_inline)
+#define memset_const(x, y, s) __builtin_memset_inline(x, y, s)
+#else
+#define memset_const(x, y, s) \
+ do { \
+ _Static_assert(__builtin_choose_expr(__builtin_constant_p(s), 1, 0), "len must be a constant integer"); \
+ memset(x, y, s); \
+ } while (0)
+#endif
+#else
+#define memcpy_const(x, y, s) memcpy(x, y, s)
+#define memset_const(x, y, s) memset(x, y, s)
+#endif
+
+////////////
+///
+/// Data types
+///
+//////
+
+//! A memory heap, per thread
+typedef struct heap_t heap_t;
+//! Span of memory pages
+typedef struct span_t span_t;
+//! Memory page
+typedef struct page_t page_t;
+//! Memory block
+typedef struct block_t block_t;
+//! Size class for a memory block
+typedef struct size_class_t size_class_t;
+
+//! Memory page type
+typedef enum page_type_t {
+ PAGE_SMALL, // 64KiB
+ PAGE_MEDIUM, // 4MiB
+ PAGE_LARGE, // 64MiB
+ PAGE_HUGE
+} page_type_t;
+
+//! Block size class
+struct size_class_t {
+ //! Size of blocks in this class
+ uint32_t block_size;
+ //! Number of blocks in each chunk
+ uint32_t block_count;
+};
+
+//! A memory block
+struct block_t {
+ //! Next block in list
+ block_t* next;
+};
+
+//! A page contains blocks of a given size
+struct page_t {
+ //! Size class of blocks
+ uint32_t size_class;
+ //! Block size
+ uint32_t block_size;
+ //! Block count
+ uint32_t block_count;
+ //! Block initialized count
+ uint32_t block_initialized;
+ //! Block used count
+ uint32_t block_used;
+ //! Page type
+ page_type_t page_type;
+ //! Flag set if part of heap full list
+ uint32_t is_full : 1;
+ //! Flag set if part of heap free list
+ uint32_t is_free : 1;
+ //! Flag set if blocks are zero initialied
+ uint32_t is_zero : 1;
+ //! Flag set if memory pages have been decommitted
+ uint32_t is_decommitted : 1;
+ //! Flag set if containing aligned blocks
+ uint32_t has_aligned_block : 1;
+ //! Fast combination flag for either huge, fully allocated or has aligned blocks
+ uint32_t generic_free : 1;
+ //! Local free list count
+ uint32_t local_free_count;
+ //! Local free list
+ block_t* local_free;
+ //! Owning heap
+ heap_t* heap;
+ //! Next page in list
+ page_t* next;
+ //! Previous page in list
+ page_t* prev;
+ //! Multithreaded free list, block index is in low 32 bit, list count is high 32 bit
+ atomic_ullong thread_free;
+};
+
+//! A span contains pages of a given type
+struct span_t {
+ //! Page header
+ page_t page;
+ //! Owning heap
+ heap_t* heap;
+ //! Page address mask
+ uintptr_t page_address_mask;
+ //! Number of pages initialized
+ uint32_t page_initialized;
+ //! Number of pages in use
+ uint32_t page_count;
+ //! Number of bytes per page
+ uint32_t page_size;
+ //! Page type
+ page_type_t page_type;
+ //! Offset to start of mapped memory region
+ uint32_t offset;
+ //! Mapped size
+ uint64_t mapped_size;
+ //! Next span in list
+ span_t* next;
+};
+
+// Control structure for a heap, either a thread heap or a first class heap if enabled
+struct heap_t {
+ //! Owning thread ID
+ uintptr_t owner_thread;
+ //! Heap local free list for small size classes
+ block_t* local_free[SIZE_CLASS_COUNT];
+ //! Available non-full pages for each size class
+ page_t* page_available[SIZE_CLASS_COUNT];
+ //! Free pages for each page type
+ page_t* page_free[3];
+ //! Free but still committed page count for each page tyoe
+ uint32_t page_free_commit_count[3];
+ //! Multithreaded free list
+ atomic_uintptr_t thread_free[3];
+ //! Available partially initialized spans for each page type
+ span_t* span_partial[3];
+ //! Spans in full use for each page type
+ span_t* span_used[4];
+ //! Next heap in queue
+ heap_t* next;
+ //! Previous heap in queue
+ heap_t* prev;
+ //! Heap ID
+ uint32_t id;
+ //! Finalization state flag
+ uint32_t finalize;
+ //! Memory map region offset
+ uint32_t offset;
+ //! Memory map size
+ size_t mapped_size;
+};
+
+_Static_assert(sizeof(page_t) <= PAGE_HEADER_SIZE, "Invalid page header size");
+_Static_assert(sizeof(span_t) <= SPAN_HEADER_SIZE, "Invalid span header size");
+_Static_assert(sizeof(heap_t) <= 4096, "Invalid heap size");
+
+////////////
+///
+/// Global data
+///
+//////
+
+//! Fallback heap
+static RPMALLOC_CACHE_ALIGNED heap_t global_heap_fallback;
+//! Default heap
+static heap_t* global_heap_default = &global_heap_fallback;
+//! Available heaps
+static heap_t* global_heap_queue;
+//! In use heaps
+static heap_t* global_heap_used;
+//! Lock for heap queue
+static atomic_uintptr_t global_heap_lock;
+//! Heap ID counter
+static atomic_uint global_heap_id = 1;
+//! Initialized flag
+static int global_rpmalloc_initialized;
+//! Memory interface
+static rpmalloc_interface_t* global_memory_interface;
+//! Default memory interface
+static rpmalloc_interface_t global_memory_interface_default;
+//! Current configuration
+static rpmalloc_config_t global_config = {0};
+//! Main thread ID
+static uintptr_t global_main_thread_id;
+
+//! Size classes
+#define SCLASS(n) \
+ { (n * SMALL_GRANULARITY), (SMALL_PAGE_SIZE - PAGE_HEADER_SIZE) / (n * SMALL_GRANULARITY) }
+#define MCLASS(n) \
+ { (n * SMALL_GRANULARITY), (MEDIUM_PAGE_SIZE - PAGE_HEADER_SIZE) / (n * SMALL_GRANULARITY) }
+#define LCLASS(n) \
+ { (n * SMALL_GRANULARITY), (LARGE_PAGE_SIZE - PAGE_HEADER_SIZE) / (n * SMALL_GRANULARITY) }
+static const size_class_t global_size_class[SIZE_CLASS_COUNT] = {
+ SCLASS(1), SCLASS(1), SCLASS(2), SCLASS(3), SCLASS(4), SCLASS(5), SCLASS(6),
+ SCLASS(7), SCLASS(8), SCLASS(9), SCLASS(10), SCLASS(11), SCLASS(12), SCLASS(13),
+ SCLASS(14), SCLASS(15), SCLASS(16), SCLASS(17), SCLASS(18), SCLASS(19), SCLASS(20),
+ SCLASS(21), SCLASS(22), SCLASS(23), SCLASS(24), SCLASS(25), SCLASS(26), SCLASS(27),
+ SCLASS(28), SCLASS(29), SCLASS(30), SCLASS(31), SCLASS(32), SCLASS(33), SCLASS(34),
+ SCLASS(35), SCLASS(36), SCLASS(37), SCLASS(38), SCLASS(39), SCLASS(40), SCLASS(41),
+ SCLASS(42), SCLASS(43), SCLASS(44), SCLASS(45), SCLASS(46), SCLASS(47), SCLASS(48),
+ SCLASS(49), SCLASS(50), SCLASS(51), SCLASS(52), SCLASS(53), SCLASS(54), SCLASS(55),
+ SCLASS(56), SCLASS(57), SCLASS(58), SCLASS(59), SCLASS(60), SCLASS(61), SCLASS(62),
+ SCLASS(63), SCLASS(64), SCLASS(80), SCLASS(96), SCLASS(112), SCLASS(128), SCLASS(160),
+ SCLASS(192), SCLASS(224), SCLASS(256), MCLASS(320), MCLASS(384), MCLASS(448), MCLASS(512),
+ MCLASS(640), MCLASS(768), MCLASS(896), MCLASS(1024), MCLASS(1280), MCLASS(1536), MCLASS(1792),
+ MCLASS(2048), MCLASS(2560), MCLASS(3072), MCLASS(3584), MCLASS(4096), MCLASS(5120), MCLASS(6144),
+ MCLASS(7168), MCLASS(8192), MCLASS(10240), MCLASS(12288), MCLASS(14336), MCLASS(16384), LCLASS(20480),
+ LCLASS(24576), LCLASS(28672), LCLASS(32768), LCLASS(40960), LCLASS(49152), LCLASS(57344), LCLASS(65536),
+ LCLASS(81920), LCLASS(98304), LCLASS(114688), LCLASS(131072), LCLASS(163840), LCLASS(196608), LCLASS(229376),
+ LCLASS(262144), LCLASS(327680), LCLASS(393216), LCLASS(458752), LCLASS(524288)};
+
+//! Threshold number of pages for when free pages are decommitted
+static uint32_t global_page_free_overflow[4] = {16, 8, 2, 0};
+
+//! Number of pages to retain when free page threshold overflows
+static uint32_t global_page_free_retain[4] = {4, 2, 1, 0};
+
+//! OS huge page support
+static int os_huge_pages;
+//! OS memory map granularity
+static size_t os_map_granularity;
+//! OS memory page size
+static size_t os_page_size;
+
+////////////
+///
+/// Thread local heap and ID
+///
+//////
+
+//! Current thread heap
+#if defined(_MSC_VER) && !defined(__clang__)
+#define TLS_MODEL
+#define _Thread_local __declspec(thread)
+#else
+// #define TLS_MODEL __attribute__((tls_model("initial-exec")))
+#define TLS_MODEL
+#endif
+static _Thread_local heap_t* global_thread_heap TLS_MODEL = &global_heap_fallback;
+
+static heap_t*
+heap_allocate(int first_class);
+
+static void
+heap_page_free_decommit(heap_t* heap, uint32_t page_type, uint32_t page_retain_count);
+
+//! Fast thread ID
+static inline uintptr_t
+get_thread_id(void) {
+#if defined(_WIN32)
+ return (uintptr_t)((void*)NtCurrentTeb());
+#else
+ void* thp = __builtin_thread_pointer();
+ return (uintptr_t)thp;
+#endif
+ /*
+ #elif (defined(__GNUC__) || defined(__clang__)) && !defined(__CYGWIN__)
+ uintptr_t tid;
+ #if defined(__i386__)
+ __asm__("movl %%gs:0, %0" : "=r"(tid) : :);
+ #elif defined(__x86_64__)
+ #if defined(__MACH__)
+ __asm__("movq %%gs:0, %0" : "=r"(tid) : :);
+ #else
+ __asm__("movq %%fs:0, %0" : "=r"(tid) : :);
+ #endif
+ #elif defined(__arm__)
+ __asm__ volatile("mrc p15, 0, %0, c13, c0, 3" : "=r"(tid));
+ #elif defined(__aarch64__)
+ #if defined(__MACH__)
+ // tpidr_el0 likely unused, always return 0 on iOS
+ __asm__ volatile("mrs %0, tpidrro_el0" : "=r"(tid));
+ #else
+ __asm__ volatile("mrs %0, tpidr_el0" : "=r"(tid));
+ #endif
+ #else
+ #error This platform needs implementation of get_thread_id()
+ #endif
+ return tid;
+ #else
+ #error This platform needs implementation of get_thread_id()
+ #endif
+ */
+}
+
+//! Set the current thread heap
+static void
+set_thread_heap(heap_t* heap) {
+ global_thread_heap = heap;
+ if (heap && (heap->id != 0)) {
+ rpmalloc_assert(heap->id != 0, "Default heap being used");
+ heap->owner_thread = get_thread_id();
+ }
+#if PLATFORM_WINDOWS
+ FlsSetValue(fls_key, heap);
+#else
+ pthread_setspecific(pthread_key, heap);
+#endif
+}
+
+static heap_t*
+get_thread_heap_allocate(void) {
+ heap_t* heap = heap_allocate(0);
+ set_thread_heap(heap);
+ return heap;
+}
+
+//! Get the current thread heap
+static inline heap_t*
+get_thread_heap(void) {
+ return global_thread_heap;
+}
+
+//! Get the size class from given size in bytes for tiny blocks (below 16 times the minimum granularity)
+static inline uint32_t
+get_size_class_tiny(size_t size) {
+ return (((uint32_t)size + (SMALL_GRANULARITY - 1)) / SMALL_GRANULARITY);
+}
+
+//! Get the size class from given size in bytes
+static inline uint32_t
+get_size_class(size_t size) {
+ uintptr_t minblock_count = (size + (SMALL_GRANULARITY - 1)) / SMALL_GRANULARITY;
+ // For sizes up to 64 times the minimum granularity (i.e 1024 bytes) the size class is equal to number of such
+ // blocks
+ if (size <= (SMALL_GRANULARITY * 64)) {
+ rpmalloc_assert(global_size_class[minblock_count].block_size >= size, "Size class misconfiguration");
+ return (uint32_t)(minblock_count ? minblock_count : 1);
+ }
+ --minblock_count;
+ // Calculate position of most significant bit, since minblock_count now guaranteed to be > 64 this position is
+ // guaranteed to be >= 6
+#if ARCH_64BIT
+ const uint32_t most_significant_bit = (uint32_t)(63 - (int)rpmalloc_clz(minblock_count));
+#else
+ const uint32_t most_significant_bit = (uint32_t)(31 - (int)rpmalloc_clz(minblock_count));
+#endif
+ // Class sizes are of the bit format [..]000xxx000[..] where we already have the position of the most significant
+ // bit, now calculate the subclass from the remaining two bits
+ const uint32_t subclass_bits = (minblock_count >> (most_significant_bit - 2)) & 0x03;
+ const uint32_t class_idx = (uint32_t)((most_significant_bit << 2) + subclass_bits) + 41;
+ rpmalloc_assert((class_idx >= SIZE_CLASS_COUNT) || (global_size_class[class_idx].block_size >= size),
+ "Size class misconfiguration");
+ rpmalloc_assert((class_idx >= SIZE_CLASS_COUNT) || (global_size_class[class_idx - 1].block_size < size),
+ "Size class misconfiguration");
+ return class_idx;
+}
+
+static inline page_type_t
+get_page_type(uint32_t size_class) {
+ if (size_class < SMALL_SIZE_CLASS_COUNT)
+ return PAGE_SMALL;
+ else if (size_class < (SMALL_SIZE_CLASS_COUNT + MEDIUM_SIZE_CLASS_COUNT))
+ return PAGE_MEDIUM;
+ else if (size_class < SIZE_CLASS_COUNT)
+ return PAGE_LARGE;
+ return PAGE_HUGE;
+}
+
+static inline size_t
+get_page_aligned_size(size_t size) {
+ size_t unalign = size % global_config.page_size;
+ if (unalign)
+ size += global_config.page_size - unalign;
+ return size;
+}
+
+////////////
+///
+/// OS entry points
+///
+//////
+
+static void
+os_set_page_name(void* address, size_t size) {
+#if defined(__linux__) || defined(__ANDROID__)
+ const char* name = os_huge_pages ? global_config.huge_page_name : global_config.page_name;
+ if ((address == MAP_FAILED) || !name)
+ return;
+ // If the kernel does not support CONFIG_ANON_VMA_NAME or if the call fails
+ // (e.g. invalid name) it is a no-op basically.
+ (void)prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)address, size, (uintptr_t)name);
+#else
+ (void)sizeof(size);
+ (void)sizeof(address);
+#endif
+}
+
+static void*
+os_mmap(size_t size, size_t alignment, size_t* offset, size_t* mapped_size) {
+ size_t map_size = size + alignment;
+#if PLATFORM_WINDOWS
+ // Ok to MEM_COMMIT - according to MSDN, "actual physical pages are not allocated unless/until the virtual addresses
+ // are actually accessed". But if we enable decommit it's better to not immediately commit and instead commit per
+ // page to avoid saturating the OS commit limit
+#if ENABLE_DECOMMIT
+ DWORD do_commit = 0;
+#else
+ DWORD do_commit = MEM_COMMIT;
+#endif
+ void* ptr =
+ VirtualAlloc(0, map_size, (os_huge_pages ? MEM_LARGE_PAGES : 0) | MEM_RESERVE | do_commit, PAGE_READWRITE);
+#else
+ int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_UNINITIALIZED;
+#if defined(__APPLE__) && !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
+ int fd = (int)VM_MAKE_TAG(240U);
+ if (os_huge_pages)
+ fd |= VM_FLAGS_SUPERPAGE_SIZE_2MB;
+ void* ptr = mmap(0, map_size, PROT_READ | PROT_WRITE, flags, fd, 0);
+#elif defined(MAP_HUGETLB)
+ void* ptr = mmap(0, map_size, PROT_READ | PROT_WRITE | PROT_MAX(PROT_READ | PROT_WRITE),
+ (os_huge_pages ? MAP_HUGETLB : 0) | flags, -1, 0);
+#if defined(MADV_HUGEPAGE)
+ // In some configurations, huge pages allocations might fail thus
+ // we fallback to normal allocations and promote the region as transparent huge page
+ if ((ptr == MAP_FAILED || !ptr) && os_huge_pages) {
+ ptr = mmap(0, map_size, PROT_READ | PROT_WRITE, flags, -1, 0);
+ if (ptr && ptr != MAP_FAILED) {
+ int prm = madvise(ptr, size, MADV_HUGEPAGE);
+ (void)prm;
+ rpmalloc_assert((prm == 0), "Failed to promote the page to transparent huge page");
+ }
+ }
+#endif
+ os_set_page_name(ptr, map_size);
+#elif defined(MAP_ALIGNED)
+ const size_t align = (sizeof(size_t) * 8) - (size_t)(__builtin_clzl(size - 1));
+ void* ptr = mmap(0, map_size, PROT_READ | PROT_WRITE, (os_huge_pages ? MAP_ALIGNED(align) : 0) | flags, -1, 0);
+#elif defined(MAP_ALIGN)
+ caddr_t base = (os_huge_pages ? (caddr_t)(4 << 20) : 0);
+ void* ptr = mmap(base, map_size, PROT_READ | PROT_WRITE, (os_huge_pages ? MAP_ALIGN : 0) | flags, -1, 0);
+#else
+ void* ptr = mmap(0, map_size, PROT_READ | PROT_WRITE, flags, -1, 0);
+#endif
+ if (ptr == MAP_FAILED)
+ ptr = 0;
+#endif
+ if (!ptr) {
+ if (global_memory_interface->map_fail_callback) {
+ if (global_memory_interface->map_fail_callback(map_size))
+ return os_mmap(size, alignment, offset, mapped_size);
+ } else {
+ rpmalloc_assert(ptr != 0, "Failed to map more virtual memory");
+ }
+ return 0;
+ }
+ if (alignment) {
+ size_t padding = ((uintptr_t)ptr & (uintptr_t)(alignment - 1));
+ if (padding)
+ padding = alignment - padding;
+ rpmalloc_assert(padding <= alignment, "Internal failure in padding");
+ rpmalloc_assert(!(padding % 8), "Internal failure in padding");
+ ptr = pointer_offset(ptr, padding);
+ *offset = padding;
+ }
+ *mapped_size = map_size;
+#if ENABLE_STATISTICS
+ size_t page_count = map_size / global_config.page_size;
+ size_t page_mapped_current =
+ atomic_fetch_add_explicit(&global_statistics.page_mapped, page_count, memory_order_relaxed) + page_count;
+ size_t page_mapped_peak = atomic_load_explicit(&global_statistics.page_mapped_peak, memory_order_relaxed);
+ while (page_mapped_current > page_mapped_peak) {
+ if (atomic_compare_exchange_weak_explicit(&global_statistics.page_mapped_peak, &page_mapped_peak,
+ page_mapped_current, memory_order_relaxed, memory_order_relaxed))
+ break;
+ }
+#if ENABLE_DECOMMIT
+ size_t page_active_current =
+ atomic_fetch_add_explicit(&global_statistics.page_active, page_count, memory_order_relaxed) + page_count;
+ size_t page_active_peak = atomic_load_explicit(&global_statistics.page_active_peak, memory_order_relaxed);
+ while (page_active_current > page_active_peak) {
+ if (atomic_compare_exchange_weak_explicit(&global_statistics.page_active_peak, &page_active_peak,
+ page_active_current, memory_order_relaxed, memory_order_relaxed))
+ break;
+ }
+#endif
+#endif
+ return ptr;
+}
+
+static void
+os_mcommit(void* address, size_t size) {
+#if ENABLE_DECOMMIT
+ if (global_config.disable_decommit)
+ return;
+#if PLATFORM_WINDOWS
+ if (!VirtualAlloc(address, size, MEM_COMMIT, PAGE_READWRITE)) {
+ rpmalloc_assert(0, "Failed to commit virtual memory block");
+ }
+#else
+ /*
+ if (mprotect(address, size, PROT_READ | PROT_WRITE)) {
+ rpmalloc_assert(0, "Failed to commit virtual memory block");
+ }
+ */
+#endif
+#if ENABLE_STATISTICS
+ size_t page_count = size / global_config.page_size;
+ atomic_fetch_add_explicit(&global_statistics.page_commit, page_count, memory_order_relaxed);
+ size_t page_active_current =
+ atomic_fetch_add_explicit(&global_statistics.page_active, page_count, memory_order_relaxed) + page_count;
+ size_t page_active_peak = atomic_load_explicit(&global_statistics.page_active_peak, memory_order_relaxed);
+ while (page_active_current > page_active_peak) {
+ if (atomic_compare_exchange_weak_explicit(&global_statistics.page_active_peak, &page_active_peak,
+ page_active_current, memory_order_relaxed, memory_order_relaxed))
+ break;
+ }
+#endif
+#endif
+ (void)sizeof(address);
+ (void)sizeof(size);
+}
+
+static void
+os_mdecommit(void* address, size_t size) {
+#if ENABLE_DECOMMIT
+ if (global_config.disable_decommit)
+ return;
+#if PLATFORM_WINDOWS
+ if (!VirtualFree(address, size, MEM_DECOMMIT)) {
+ rpmalloc_assert(0, "Failed to decommit virtual memory block");
+ }
+#else
+ /*
+ if (mprotect(address, size, PROT_NONE)) {
+ rpmalloc_assert(0, "Failed to decommit virtual memory block");
+ }
+ */
+#if defined(MADV_DONTNEED)
+ if (madvise(address, size, MADV_DONTNEED)) {
+#elif defined(MADV_FREE_REUSABLE)
+ int ret;
+ while ((ret = madvise(address, size, MADV_FREE_REUSABLE)) == -1 && (errno == EAGAIN))
+ errno = 0;
+ if ((ret == -1) && (errno != 0)) {
+#elif defined(MADV_PAGEOUT)
+ if (madvise(address, size, MADV_PAGEOUT)) {
+#elif defined(MADV_FREE)
+ if (madvise(address, size, MADV_FREE)) {
+#else
+ if (posix_madvise(address, size, POSIX_MADV_DONTNEED)) {
+#endif
+ rpmalloc_assert(0, "Failed to decommit virtual memory block");
+ }
+#endif
+#if ENABLE_STATISTICS
+ size_t page_count = size / global_config.page_size;
+ atomic_fetch_add_explicit(&global_statistics.page_decommit, page_count, memory_order_relaxed);
+ size_t page_active_current =
+ atomic_fetch_sub_explicit(&global_statistics.page_active, page_count, memory_order_relaxed);
+ rpmalloc_assert(page_active_current >= page_count, "Decommit counter out of sync");
+ (void)sizeof(page_active_current);
+#endif
+#else
+ (void)sizeof(address);
+ (void)sizeof(size);
+#endif
+}
+
+static void
+os_munmap(void* address, size_t offset, size_t mapped_size) {
+ (void)sizeof(mapped_size);
+ address = pointer_offset(address, -(int32_t)offset);
+#if ENABLE_UNMAP
+#if PLATFORM_WINDOWS
+ if (!VirtualFree(address, 0, MEM_RELEASE)) {
+ rpmalloc_assert(0, "Failed to unmap virtual memory block");
+ }
+#else
+ if (munmap(address, mapped_size))
+ rpmalloc_assert(0, "Failed to unmap virtual memory block");
+#endif
+#if ENABLE_STATISTICS
+ size_t page_count = mapped_size / global_config.page_size;
+ atomic_fetch_sub_explicit(&global_statistics.page_mapped, page_count, memory_order_relaxed);
+ atomic_fetch_sub_explicit(&global_statistics.page_active, page_count, memory_order_relaxed);
+#endif
+#endif
+}
+
+////////////
+///
+/// Page interface
+///
+//////
+
+static inline span_t*
+page_get_span(page_t* page) {
+ return (span_t*)((uintptr_t)page & SPAN_MASK);
+}
+
+static inline size_t
+page_get_size(page_t* page) {
+ if (page->page_type == PAGE_SMALL)
+ return SMALL_PAGE_SIZE;
+ else if (page->page_type == PAGE_MEDIUM)
+ return MEDIUM_PAGE_SIZE;
+ else if (page->page_type == PAGE_LARGE)
+ return LARGE_PAGE_SIZE;
+ else
+ return page_get_span(page)->page_size;
+}
+
+static inline int
+page_is_thread_heap(page_t* page) {
+#if RPMALLOC_FIRST_CLASS_HEAPS
+ return (!page->heap->owner_thread || (page->heap->owner_thread == get_thread_id()));
+#else
+ return (page->heap->owner_thread == get_thread_id());
+#endif
+}
+
+static inline block_t*
+page_block_start(page_t* page) {
+ return pointer_offset(page, PAGE_HEADER_SIZE);
+}
+
+static inline block_t*
+page_block(page_t* page, uint32_t block_index) {
+ return pointer_offset(page, PAGE_HEADER_SIZE + (page->block_size * block_index));
+}
+
+static inline uint32_t
+page_block_index(page_t* page, block_t* block) {
+ block_t* block_first = page_block_start(page);
+ return (uint32_t)pointer_diff(block, block_first) / page->block_size;
+}
+
+static inline uint32_t
+page_block_from_thread_free_list(page_t* page, uint64_t token, block_t** block) {
+ uint32_t block_index = (uint32_t)(token & 0xFFFFFFFFULL);
+ uint32_t list_count = (uint32_t)((token >> 32ULL) & 0xFFFFFFFFULL);
+ *block = list_count ? page_block(page, block_index) : 0;
+ return list_count;
+}
+
+static inline uint64_t
+page_block_to_thread_free_list(page_t* page, uint32_t block_index, uint32_t list_count) {
+ (void)sizeof(page);
+ return ((uint64_t)list_count << 32ULL) | (uint64_t)block_index;
+}
+
+static inline block_t*
+page_block_realign(page_t* page, block_t* block) {
+ void* blocks_start = page_block_start(page);
+ uint32_t block_offset = (uint32_t)pointer_diff(block, blocks_start);
+ return pointer_offset(block, -(int32_t)(block_offset % page->block_size));
+}
+
+static block_t*
+page_get_local_free_block(page_t* page) {
+ block_t* block = page->local_free;
+ page->local_free = block->next;
+ --page->local_free_count;
+ ++page->block_used;
+ return block;
+}
+
+static inline void
+page_decommit_memory_pages(page_t* page) {
+ if (page->is_decommitted)
+ return;
+ void* extra_page = pointer_offset(page, global_config.page_size);
+ size_t extra_page_size = page_get_size(page) - global_config.page_size;
+ global_memory_interface->memory_decommit(extra_page, extra_page_size);
+ page->is_decommitted = 1;
+}
+
+static inline void
+page_commit_memory_pages(page_t* page) {
+ if (!page->is_decommitted)
+ return;
+ void* extra_page = pointer_offset(page, global_config.page_size);
+ size_t extra_page_size = page_get_size(page) - global_config.page_size;
+ global_memory_interface->memory_commit(extra_page, extra_page_size);
+ page->is_decommitted = 0;
+#if ENABLE_DECOMMIT
+#if !defined(__APPLE__)
+ // When page is recommitted, the blocks in the second memory page and forward
+ // will be zeroed out by OS - take advantage in zalloc/calloc calls and make sure
+ // blocks in first page is zeroed out
+ void* first_page = pointer_offset(page, PAGE_HEADER_SIZE);
+ memset(first_page, 0, global_config.page_size - PAGE_HEADER_SIZE);
+ page->is_zero = 1;
+#endif
+#endif
+}
+
+static void
+page_available_to_free(page_t* page) {
+ rpmalloc_assert(page->is_full == 0, "Page full flag internal failure");
+ rpmalloc_assert(page->is_decommitted == 0, "Page decommitted flag internal failure");
+ heap_t* heap = page->heap;
+ if (heap->page_available[page->size_class] == page) {
+ heap->page_available[page->size_class] = page->next;
+ } else {
+ page->prev->next = page->next;
+ if (page->next)
+ page->next->prev = page->prev;
+ }
+ page->is_free = 1;
+ page->is_zero = 0;
+ page->next = heap->page_free[page->page_type];
+ heap->page_free[page->page_type] = page;
+ if (++heap->page_free_commit_count[page->page_type] >= global_page_free_overflow[page->page_type])
+ heap_page_free_decommit(heap, page->page_type, global_page_free_retain[page->page_type]);
+}
+
+static void
+page_full_to_available(page_t* page) {
+ rpmalloc_assert(page->is_full == 1, "Page full flag internal failure");
+ rpmalloc_assert(page->is_decommitted == 0, "Page decommitted flag internal failure");
+ heap_t* heap = page->heap;
+ page->next = heap->page_available[page->size_class];
+ if (page->next)
+ page->next->prev = page;
+ heap->page_available[page->size_class] = page;
+ page->is_full = 0;
+ if (page->has_aligned_block == 0)
+ page->generic_free = 0;
+}
+
+static void
+page_full_to_free_on_new_heap(page_t* page, heap_t* heap) {
+ rpmalloc_assert(heap->id, "Page full to free on default heap");
+ rpmalloc_assert(page->is_full == 1, "Page full flag internal failure");
+ rpmalloc_assert(page->is_decommitted == 0, "Page decommitted flag internal failure");
+ page->is_full = 0;
+ page->is_free = 1;
+ page->heap = heap;
+ atomic_store_explicit(&page->thread_free, 0, memory_order_relaxed);
+ page->next = heap->page_free[page->page_type];
+ heap->page_free[page->page_type] = page;
+ if (++heap->page_free_commit_count[page->page_type] >= global_page_free_overflow[page->page_type])
+ heap_page_free_decommit(heap, page->page_type, global_page_free_retain[page->page_type]);
+}
+
+static void
+page_available_to_full(page_t* page) {
+ heap_t* heap = page->heap;
+ if (heap->page_available[page->size_class] == page) {
+ heap->page_available[page->size_class] = page->next;
+ } else {
+ page->prev->next = page->next;
+ if (page->next)
+ page->next->prev = page->prev;
+ }
+ page->is_full = 1;
+ page->is_zero = 0;
+ page->generic_free = 1;
+}
+
+static inline void
+page_put_local_free_block(page_t* page, block_t* block) {
+ block->next = page->local_free;
+ page->local_free = block;
+ ++page->local_free_count;
+ if (UNEXPECTED(--page->block_used == 0)) {
+ page_available_to_free(page);
+ } else if (UNEXPECTED(page->is_full != 0)) {
+ page_full_to_available(page);
+ }
+}
+
+static NOINLINE void
+page_adopt_thread_free_block_list(page_t* page) {
+ if (page->local_free)
+ return;
+ unsigned long long thread_free = atomic_load_explicit(&page->thread_free, memory_order_relaxed);
+ if (thread_free != 0) {
+ // Other threads can only replace with another valid list head, this will never change to 0 in other threads
+ while (!atomic_compare_exchange_weak_explicit(&page->thread_free, &thread_free, 0, memory_order_relaxed,
+ memory_order_relaxed))
+ wait_spin();
+ page->local_free_count = page_block_from_thread_free_list(page, thread_free, &page->local_free);
+ rpmalloc_assert(page->local_free_count <= page->block_used, "Page thread free list count internal failure");
+ page->block_used -= page->local_free_count;
+ }
+}
+
+static NOINLINE void
+page_put_thread_free_block(page_t* page, block_t* block) {
+ atomic_thread_fence(memory_order_acquire);
+ if (page->is_full) {
+ // Page is full, put the block in the heap thread free list instead, otherwise
+ // the heap will not pick up the free blocks until a thread local free happens
+ heap_t* heap = page->heap;
+ uintptr_t prev_head = atomic_load_explicit(&heap->thread_free[page->page_type], memory_order_relaxed);
+ block->next = (void*)prev_head;
+ while (!atomic_compare_exchange_weak_explicit(&heap->thread_free[page->page_type], &prev_head, (uintptr_t)block,
+ memory_order_relaxed, memory_order_relaxed)) {
+ block->next = (void*)prev_head;
+ wait_spin();
+ }
+ } else {
+ unsigned long long prev_thread_free = atomic_load_explicit(&page->thread_free, memory_order_relaxed);
+ uint32_t block_index = page_block_index(page, block);
+ rpmalloc_assert(page_block(page, block_index) == block, "Block pointer is not aligned to start of block");
+ uint32_t list_size = page_block_from_thread_free_list(page, prev_thread_free, &block->next) + 1;
+ uint64_t thread_free = page_block_to_thread_free_list(page, block_index, list_size);
+ while (!atomic_compare_exchange_weak_explicit(&page->thread_free, &prev_thread_free, thread_free,
+ memory_order_relaxed, memory_order_relaxed)) {
+ list_size = page_block_from_thread_free_list(page, prev_thread_free, &block->next) + 1;
+ thread_free = page_block_to_thread_free_list(page, block_index, list_size);
+ wait_spin();
+ }
+ }
+}
+
+static void
+page_push_local_free_to_heap(page_t* page) {
+ // Push the page free list as the fast track list of free blocks for heap
+ page->heap->local_free[page->size_class] = page->local_free;
+ page->block_used += page->local_free_count;
+ page->local_free = 0;
+ page->local_free_count = 0;
+}
+
+static NOINLINE void*
+page_initialize_blocks(page_t* page) {
+ rpmalloc_assert(page->block_initialized < page->block_count, "Block initialization internal failure");
+ block_t* block = page_block(page, page->block_initialized);
+ ++page->block_initialized;
+ ++page->block_used;
+
+ if ((page->page_type == PAGE_SMALL) && (page->block_size < (global_config.page_size >> 1))) {
+ // Link up until next memory page in free list
+ void* memory_page_start = (void*)((uintptr_t)block & ~(uintptr_t)(global_config.page_size - 1));
+ void* memory_page_next = pointer_offset(memory_page_start, global_config.page_size);
+ block_t* free_block = pointer_offset(block, page->block_size);
+ block_t* first_block = free_block;
+ block_t* last_block = free_block;
+ uint32_t list_count = 0;
+ uint32_t max_list_count = page->block_count - page->block_initialized;
+ while (((void*)free_block < memory_page_next) && (list_count < max_list_count)) {
+ last_block = free_block;
+ free_block->next = pointer_offset(free_block, page->block_size);
+ free_block = free_block->next;
+ ++list_count;
+ }
+ if (list_count) {
+ last_block->next = 0;
+ page->local_free = first_block;
+ page->block_initialized += list_count;
+ page->local_free_count = list_count;
+ }
+ }
+
+ return block;
+}
+
+static inline RPMALLOC_ALLOCATOR void*
+page_allocate_block(page_t* page, unsigned int zero) {
+ unsigned int is_zero = 0;
+ block_t* block = (page->local_free != 0) ? page_get_local_free_block(page) : 0;
+ if (UNEXPECTED(block == 0)) {
+ if (atomic_load_explicit(&page->thread_free, memory_order_relaxed) != 0) {
+ page_adopt_thread_free_block_list(page);
+ block = (page->local_free != 0) ? page_get_local_free_block(page) : 0;
+ }
+ if (block == 0) {
+ block = page_initialize_blocks(page);
+ is_zero = page->is_zero;
+ }
+ }
+
+ rpmalloc_assert(page->block_used <= page->block_count, "Page block use counter out of sync");
+ if (page->local_free && !page->heap->local_free[page->size_class])
+ page_push_local_free_to_heap(page);
+
+ // The page might be full when free list has been pushed to heap local free list,
+ // check if there is a thread free list to adopt
+ if (page->block_used == page->block_count)
+ page_adopt_thread_free_block_list(page);
+
+ if (page->block_used == page->block_count) {
+ // Page is now fully utilized
+ rpmalloc_assert(!page->is_full, "Page block use counter out of sync with full flag");
+ page_available_to_full(page);
+ }
+
+ if (zero) {
+ if (!is_zero)
+ memset(block, 0, page->block_size);
+ else
+ *(uintptr_t*)block = 0;
+ }
+
+ return block;
+}
+
+////////////
+///
+/// Span interface
+///
+//////
+
+static inline int
+span_is_thread_heap(span_t* span) {
+#if RPMALLOC_FIRST_CLASS_HEAPS
+ return (!span->heap->owner_thread || (span->heap->owner_thread == get_thread_id()));
+#else
+ return (span->heap->owner_thread == get_thread_id());
+#endif
+}
+
+static inline page_t*
+span_get_page_from_block(span_t* span, void* block) {
+ return (page_t*)((uintptr_t)block & span->page_address_mask);
+}
+
+//! Find or allocate a page from the given span
+static inline page_t*
+span_allocate_page(span_t* span) {
+ // Allocate path, initialize a new chunk of memory for a page in the given span
+ rpmalloc_assert(span->page_initialized < span->page_count, "Page initialization internal failure");
+ heap_t* heap = span->heap;
+ page_t* page = pointer_offset(span, span->page_size * span->page_initialized);
+
+#if ENABLE_DECOMMIT
+ // The first page is always committed on initial span map of memory
+ if (span->page_initialized)
+ global_memory_interface->memory_commit(page, span->page_size);
+#endif
+ ++span->page_initialized;
+
+ page->page_type = span->page_type;
+ page->is_zero = 1;
+ page->heap = heap;
+ rpmalloc_assert(page_is_thread_heap(page), "Page owner thread mismatch");
+
+ if (span->page_initialized == span->page_count) {
+ // Span fully utilized
+ rpmalloc_assert(span == heap->span_partial[span->page_type], "Span partial tracking out of sync");
+ heap->span_partial[span->page_type] = 0;
+
+ span->next = heap->span_used[span->page_type];
+ heap->span_used[span->page_type] = span;
+ }
+
+ return page;
+}
+
+static NOINLINE void
+span_deallocate_block(span_t* span, page_t* page, void* block) {
+ if (UNEXPECTED(page->page_type == PAGE_HUGE)) {
+ global_memory_interface->memory_unmap(span, span->offset, span->mapped_size);
+ return;
+ }
+
+ if (page->has_aligned_block) {
+ // Realign pointer to block start
+ block = page_block_realign(page, block);
+ }
+
+ int is_thread_local = page_is_thread_heap(page);
+ if (EXPECTED(is_thread_local != 0)) {
+ page_put_local_free_block(page, block);
+ } else {
+ // Multithreaded deallocation, push to deferred deallocation list.
+ page_put_thread_free_block(page, block);
+ }
+}
+
+////////////
+///
+/// Block interface
+///
+//////
+
+static inline span_t*
+block_get_span(block_t* block) {
+ return (span_t*)((uintptr_t)block & SPAN_MASK);
+}
+
+static inline void
+block_deallocate(block_t* block) {
+ span_t* span = (span_t*)((uintptr_t)block & SPAN_MASK);
+ page_t* page = span_get_page_from_block(span, block);
+ const int is_thread_local = page_is_thread_heap(page);
+
+ // Optimized path for thread local free with non-huge block in page
+ // that has no aligned blocks
+ if (EXPECTED(is_thread_local != 0)) {
+ if (EXPECTED(page->generic_free == 0)) {
+ // Page is not huge, not full and has no aligned block - fast path
+ block->next = page->local_free;
+ page->local_free = block;
+ ++page->local_free_count;
+ if (UNEXPECTED(--page->block_used == 0))
+ page_available_to_free(page);
+ } else {
+ span_deallocate_block(span, page, block);
+ }
+ } else {
+ span_deallocate_block(span, page, block);
+ }
+}
+
+static inline size_t
+block_usable_size(block_t* block) {
+ span_t* span = (span_t*)((uintptr_t)block & SPAN_MASK);
+ if (EXPECTED(span->page_type <= PAGE_LARGE)) {
+ page_t* page = span_get_page_from_block(span, block);
+ void* blocks_start = pointer_offset(page, PAGE_HEADER_SIZE);
+ return page->block_size - ((size_t)pointer_diff(block, blocks_start) % page->block_size);
+ } else {
+ return ((size_t)span->page_size * (size_t)span->page_count) - (size_t)pointer_diff(block, span);
+ }
+}
+
+////////////
+///
+/// Heap interface
+///
+//////
+
+static inline void
+heap_lock_acquire(void) {
+ uintptr_t lock = 0;
+ uintptr_t this_lock = get_thread_id();
+ while (!atomic_compare_exchange_strong(&global_heap_lock, &lock, this_lock)) {
+ lock = 0;
+ wait_spin();
+ }
+}
+
+static inline void
+heap_lock_release(void) {
+ rpmalloc_assert((uintptr_t)atomic_load_explicit(&global_heap_lock, memory_order_relaxed) == get_thread_id(),
+ "Bad heap lock");
+ atomic_store_explicit(&global_heap_lock, 0, memory_order_release);
+}
+
+static inline heap_t*
+heap_initialize(void* block) {
+ heap_t* heap = block;
+ memset_const(heap, 0, sizeof(heap_t));
+ heap->id = 1 + atomic_fetch_add_explicit(&global_heap_id, 1, memory_order_relaxed);
+ return heap;
+}
+
+static heap_t*
+heap_allocate_new(void) {
+ if (!global_config.page_size)
+ rpmalloc_initialize(0);
+ size_t heap_size = get_page_aligned_size(sizeof(heap_t));
+ size_t offset = 0;
+ size_t mapped_size = 0;
+ block_t* block = global_memory_interface->memory_map(heap_size, 0, &offset, &mapped_size);
+#if ENABLE_DECOMMIT
+ global_memory_interface->memory_commit(block, heap_size);
+#endif
+ heap_t* heap = heap_initialize((void*)block);
+ heap->offset = (uint32_t)offset;
+ heap->mapped_size = mapped_size;
+#if ENABLE_STATISTICS
+ atomic_fetch_add_explicit(&global_statistics.heap_count, 1, memory_order_relaxed);
+#endif
+ return heap;
+}
+
+static void
+heap_unmap(heap_t* heap) {
+ global_memory_interface->memory_unmap(heap, heap->offset, heap->mapped_size);
+}
+
+static heap_t*
+heap_allocate(int first_class) {
+ heap_t* heap = 0;
+ if (!first_class) {
+ heap_lock_acquire();
+ heap = global_heap_queue;
+ global_heap_queue = heap ? heap->next : 0;
+ heap_lock_release();
+ }
+ if (!heap)
+ heap = heap_allocate_new();
+ if (heap) {
+ uintptr_t current_thread_id = get_thread_id();
+ heap_lock_acquire();
+ heap->next = global_heap_used;
+ heap->prev = 0;
+ if (global_heap_used)
+ global_heap_used->prev = heap;
+ global_heap_used = heap;
+ heap_lock_release();
+ heap->owner_thread = current_thread_id;
+ }
+ return heap;
+}
+
+static inline void
+heap_release(heap_t* heap) {
+ heap_lock_acquire();
+ if (heap->prev)
+ heap->prev->next = heap->next;
+ if (heap->next)
+ heap->next->prev = heap->prev;
+ if (global_heap_used == heap)
+ global_heap_used = heap->next;
+ heap->next = global_heap_queue;
+ global_heap_queue = heap;
+ heap_lock_release();
+}
+
+static void
+heap_page_free_decommit(heap_t* heap, uint32_t page_type, uint32_t page_retain_count) {
+ page_t* page = heap->page_free[page_type];
+ while (page && page_retain_count) {
+ page = page->next;
+ --page_retain_count;
+ }
+ while (page && (page->is_decommitted == 0)) {
+ page_decommit_memory_pages(page);
+ --heap->page_free_commit_count[page_type];
+ page = page->next;
+ }
+}
+
+static inline void
+heap_make_free_page_available(heap_t* heap, uint32_t size_class, page_t* page) {
+ page->size_class = size_class;
+ page->block_size = global_size_class[size_class].block_size;
+ page->block_count = global_size_class[size_class].block_count;
+ page->block_used = 0;
+ page->block_initialized = 0;
+ page->local_free = 0;
+ page->local_free_count = 0;
+ page->is_full = 0;
+ page->is_free = 0;
+ page->has_aligned_block = 0;
+ page->generic_free = 0;
+ page->heap = heap;
+ page_t* head = heap->page_available[size_class];
+ page->next = head;
+ page->prev = 0;
+ atomic_store_explicit(&page->thread_free, 0, memory_order_relaxed);
+ if (head)
+ head->prev = page;
+ heap->page_available[size_class] = page;
+ if (page->is_decommitted)
+ page_commit_memory_pages(page);
+}
+
+//! Find or allocate a span for the given page type with the given size class
+static inline span_t*
+heap_get_span(heap_t* heap, page_type_t page_type) {
+ // Fast path, available span for given page type
+ if (EXPECTED(heap->span_partial[page_type] != 0))
+ return heap->span_partial[page_type];
+
+ // Fallback path, map more memory
+ size_t offset = 0;
+ size_t mapped_size = 0;
+ span_t* span = global_memory_interface->memory_map(SPAN_SIZE, SPAN_SIZE, &offset, &mapped_size);
+ if (EXPECTED(span != 0)) {
+ uint32_t page_count = 0;
+ uint32_t page_size = 0;
+ uintptr_t page_address_mask = 0;
+ if (page_type == PAGE_SMALL) {
+ page_count = SPAN_SIZE / SMALL_PAGE_SIZE;
+ page_size = SMALL_PAGE_SIZE;
+ page_address_mask = SMALL_PAGE_MASK;
+ } else if (page_type == PAGE_MEDIUM) {
+ page_count = SPAN_SIZE / MEDIUM_PAGE_SIZE;
+ page_size = MEDIUM_PAGE_SIZE;
+ page_address_mask = MEDIUM_PAGE_MASK;
+ } else {
+ page_count = SPAN_SIZE / LARGE_PAGE_SIZE;
+ page_size = LARGE_PAGE_SIZE;
+ page_address_mask = LARGE_PAGE_MASK;
+ }
+#if ENABLE_DECOMMIT
+ global_memory_interface->memory_commit(span, page_size);
+#endif
+ span->heap = heap;
+ span->page_type = page_type;
+ span->page_count = page_count;
+ span->page_size = page_size;
+ span->page_address_mask = page_address_mask;
+ span->offset = (uint32_t)offset;
+ span->mapped_size = mapped_size;
+
+ heap->span_partial[page_type] = span;
+ }
+
+ return span;
+}
+
+static page_t*
+heap_get_page(heap_t* heap, uint32_t size_class);
+
+static void
+block_deallocate(block_t* block);
+
+static page_t*
+heap_get_page_generic(heap_t* heap, uint32_t size_class) {
+ page_type_t page_type = get_page_type(size_class);
+
+ // Check if there is a free page from multithreaded deallocations
+ uintptr_t block_mt = atomic_load_explicit(&heap->thread_free[page_type], memory_order_relaxed);
+ if (UNEXPECTED(block_mt != 0)) {
+ while (!atomic_compare_exchange_weak_explicit(&heap->thread_free[page_type], &block_mt, 0, memory_order_relaxed,
+ memory_order_relaxed)) {
+ wait_spin();
+ }
+ block_t* block = (void*)block_mt;
+ while (block) {
+ block_t* next_block = block->next;
+ block_deallocate(block);
+ block = next_block;
+ }
+ // Retry after processing deferred thread frees
+ return heap_get_page(heap, size_class);
+ }
+
+ // Check if there is a free page
+ page_t* page = heap->page_free[page_type];
+ if (EXPECTED(page != 0)) {
+ heap->page_free[page_type] = page->next;
+ if (page->is_decommitted == 0) {
+ rpmalloc_assert(heap->page_free_commit_count[page_type] > 0, "Free committed page count out of sync");
+ --heap->page_free_commit_count[page_type];
+ }
+ heap_make_free_page_available(heap, size_class, page);
+ return page;
+ }
+ rpmalloc_assert(heap->page_free_commit_count[page_type] == 0, "Free committed page count out of sync");
+
+ if (heap->id == 0) {
+ // Thread has not yet initialized, assign heap and try again
+ rpmalloc_initialize(0);
+ return heap_get_page(get_thread_heap(), size_class);
+ }
+
+ // Fallback path, find or allocate span for given size class
+ // If thread was not initialized, the heap for the new span
+ // will be different from the local heap variable in this scope
+ // (which is the default heap) - so use span page heap instead
+ span_t* span = heap_get_span(heap, page_type);
+ if (EXPECTED(span != 0)) {
+ page = span_allocate_page(span);
+ heap_make_free_page_available(page->heap, size_class, page);
+ }
+
+ return page;
+}
+
+//! Find or allocate a page for the given size class
+static page_t*
+heap_get_page(heap_t* heap, uint32_t size_class) {
+ // Fast path, available page for given size class
+ page_t* page = heap->page_available[size_class];
+ if (EXPECTED(page != 0))
+ return page;
+ return heap_get_page_generic(heap, size_class);
+}
+
+//! Pop a block from the heap local free list
+static inline RPMALLOC_ALLOCATOR void*
+heap_pop_local_free(heap_t* heap, uint32_t size_class) {
+ block_t** free_list = heap->local_free + size_class;
+ block_t* block = *free_list;
+ if (EXPECTED(block != 0))
+ *free_list = block->next;
+ return block;
+}
+
+//! Generic allocation path from heap pages, spans or new mapping
+static NOINLINE RPMALLOC_ALLOCATOR void*
+heap_allocate_block_small_to_large(heap_t* heap, uint32_t size_class, unsigned int zero) {
+ page_t* page = heap_get_page(heap, size_class);
+ if (EXPECTED(page != 0))
+ return page_allocate_block(page, zero);
+ return 0;
+}
+
+//! Generic allocation path from heap pages, spans or new mapping
+static NOINLINE RPMALLOC_ALLOCATOR void*
+heap_allocate_block_huge(heap_t* heap, size_t size, unsigned int zero) {
+ (void)sizeof(heap);
+ size_t alloc_size = get_page_aligned_size(size + SPAN_HEADER_SIZE);
+ size_t offset = 0;
+ size_t mapped_size = 0;
+ void* block = global_memory_interface->memory_map(alloc_size, SPAN_SIZE, &offset, &mapped_size);
+ if (block) {
+ span_t* span = block;
+#if ENABLE_DECOMMIT
+ global_memory_interface->memory_commit(span, alloc_size);
+#endif
+ span->heap = heap;
+ span->page_type = PAGE_HUGE;
+ span->page_size = (uint32_t)global_config.page_size;
+ span->page_count = (uint32_t)(alloc_size / global_config.page_size);
+ span->page_address_mask = LARGE_PAGE_MASK;
+ span->offset = (uint32_t)offset;
+ span->mapped_size = mapped_size;
+ span->page.heap = heap;
+ span->page.is_full = 1;
+ span->page.generic_free = 1;
+ span->page.page_type = PAGE_HUGE;
+ // Keep track of span if first class heap
+ if (!heap->owner_thread) {
+ span->next = heap->span_used[PAGE_HUGE];
+ heap->span_used[PAGE_HUGE] = span;
+ }
+ void* ptr = pointer_offset(block, SPAN_HEADER_SIZE);
+ if (zero)
+ memset(ptr, 0, size);
+ return ptr;
+ }
+ return 0;
+}
+
+static RPMALLOC_ALLOCATOR NOINLINE void*
+heap_allocate_block_generic(heap_t* heap, size_t size, unsigned int zero) {
+ uint32_t size_class = get_size_class(size);
+ if (EXPECTED(size_class < SIZE_CLASS_COUNT)) {
+ block_t* block = heap_pop_local_free(heap, size_class);
+ if (EXPECTED(block != 0)) {
+ // Fast track with small block available in heap level local free list
+ if (zero)
+ memset(block, 0, global_size_class[size_class].block_size);
+ return block;
+ }
+
+ return heap_allocate_block_small_to_large(heap, size_class, zero);
+ }
+
+ return heap_allocate_block_huge(heap, size, zero);
+}
+
+//! Find or allocate a block of the given size
+static inline RPMALLOC_ALLOCATOR void*
+heap_allocate_block(heap_t* heap, size_t size, unsigned int zero) {
+ if (size <= (SMALL_GRANULARITY * 64)) {
+ uint32_t size_class = get_size_class_tiny(size);
+ block_t* block = heap_pop_local_free(heap, size_class);
+ if (EXPECTED(block != 0)) {
+ // Fast track with small block available in heap level local free list
+ if (zero)
+ memset(block, 0, global_size_class[size_class].block_size);
+ return block;
+ }
+ }
+ return heap_allocate_block_generic(heap, size, zero);
+}
+
+static RPMALLOC_ALLOCATOR void*
+heap_allocate_block_aligned(heap_t* heap, size_t alignment, size_t size, unsigned int zero) {
+ if (alignment <= SMALL_GRANULARITY)
+ return heap_allocate_block(heap, size, zero);
+
+#if ENABLE_VALIDATE_ARGS
+ if ((size + alignment) < size) {
+ errno = EINVAL;
+ return 0;
+ }
+ if (alignment & (alignment - 1)) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+ if (alignment >= RPMALLOC_MAX_ALIGNMENT) {
+ errno = EINVAL;
+ return 0;
+ }
+
+ size_t align_mask = alignment - 1;
+ block_t* block = heap_allocate_block(heap, size + alignment, zero);
+ if ((uintptr_t)block & align_mask) {
+ block = (void*)(((uintptr_t)block & ~(uintptr_t)align_mask) + alignment);
+ // Mark as having aligned blocks
+ span_t* span = block_get_span(block);
+ page_t* page = span_get_page_from_block(span, block);
+ page->has_aligned_block = 1;
+ page->generic_free = 1;
+ }
+ return block;
+}
+
+static void*
+heap_reallocate_block(heap_t* heap, void* block, size_t size, size_t old_size, unsigned int flags) {
+ if (block) {
+ // Grab the span using guaranteed span alignment
+ span_t* span = block_get_span(block);
+ if (EXPECTED(span->page_type <= PAGE_LARGE)) {
+ // Normal sized block
+ page_t* page = span_get_page_from_block(span, block);
+ void* blocks_start = pointer_offset(page, PAGE_HEADER_SIZE);
+ uint32_t block_offset = (uint32_t)pointer_diff(block, blocks_start);
+ uint32_t block_idx = block_offset / page->block_size;
+ void* block_origin = pointer_offset(blocks_start, (size_t)block_idx * page->block_size);
+ if (!old_size)
+ old_size = (size_t)((ptrdiff_t)page->block_size - pointer_diff(block, block_origin));
+ if ((size_t)page->block_size >= size) {
+ // Still fits in block, never mind trying to save memory, but preserve data if alignment changed
+ if ((block != block_origin) && !(flags & RPMALLOC_NO_PRESERVE))
+ memmove(block_origin, block, old_size);
+ return block_origin;
+ }
+ } else {
+ // Huge block
+ void* block_start = pointer_offset(span, SPAN_HEADER_SIZE);
+ if (!old_size)
+ old_size = ((size_t)span->page_size * (size_t)span->page_count) - SPAN_HEADER_SIZE;
+ if ((size < old_size) && (size > LARGE_BLOCK_SIZE_LIMIT)) {
+ // Still fits in block and still huge, never mind trying to save memory,
+ // but preserve data if alignment changed
+ if ((block_start != block) && !(flags & RPMALLOC_NO_PRESERVE))
+ memmove(block_start, block, old_size);
+ return block_start;
+ }
+ }
+ } else {
+ old_size = 0;
+ }
+
+ if (!!(flags & RPMALLOC_GROW_OR_FAIL))
+ return 0;
+
+ // Size is greater than block size or saves enough memory to resize, need to allocate a new block
+ // and deallocate the old. Avoid hysteresis by overallocating if increase is small (below 37%)
+ size_t lower_bound = old_size + (old_size >> 2) + (old_size >> 3);
+ size_t new_size = (size > lower_bound) ? size : ((size > old_size) ? lower_bound : size);
+ void* old_block = block;
+ block = heap_allocate_block(heap, new_size, 0);
+ if (block && old_block) {
+ if (!(flags & RPMALLOC_NO_PRESERVE))
+ memcpy(block, old_block, old_size < new_size ? old_size : new_size);
+ block_deallocate(old_block);
+ }
+
+ return block;
+}
+
+static void*
+heap_reallocate_block_aligned(heap_t* heap, void* block, size_t alignment, size_t size, size_t old_size,
+ unsigned int flags) {
+ if (alignment <= SMALL_GRANULARITY)
+ return heap_reallocate_block(heap, block, size, old_size, flags);
+
+ int no_alloc = !!(flags & RPMALLOC_GROW_OR_FAIL);
+ size_t usable_size = (block ? block_usable_size(block) : 0);
+ if ((usable_size >= size) && !((uintptr_t)block & (alignment - 1))) {
+ if (no_alloc || (size >= (usable_size / 2)))
+ return block;
+ }
+ // Aligned alloc marks span as having aligned blocks
+ void* old_block = block;
+ block = (!no_alloc ? heap_allocate_block_aligned(heap, alignment, size, 0) : 0);
+ if (EXPECTED(block != 0)) {
+ if (!(flags & RPMALLOC_NO_PRESERVE) && old_block) {
+ if (!old_size)
+ old_size = usable_size;
+ memcpy(block, old_block, old_size < size ? old_size : size);
+ }
+ if (EXPECTED(old_block != 0))
+ block_deallocate(old_block);
+ }
+ return block;
+}
+
+static void
+heap_free_all(heap_t* heap) {
+ for (int itype = 0; itype < 3; ++itype) {
+ span_t* span = heap->span_partial[itype];
+ while (span) {
+ span_t* span_next = span->next;
+ global_memory_interface->memory_unmap(span, span->offset, span->mapped_size);
+ span = span_next;
+ }
+ heap->span_partial[itype] = 0;
+ heap->page_free[itype] = 0;
+ heap->page_free_commit_count[itype] = 0;
+ atomic_store_explicit(&heap->thread_free[itype], 0, memory_order_relaxed);
+ }
+ for (int itype = 0; itype < 4; ++itype) {
+ span_t* span = heap->span_used[itype];
+ while (span) {
+ span_t* span_next = span->next;
+ global_memory_interface->memory_unmap(span, span->offset, span->mapped_size);
+ span = span_next;
+ }
+ heap->span_used[itype] = 0;
+ }
+ memset(heap->local_free, 0, sizeof(heap->local_free));
+ memset(heap->page_available, 0, sizeof(heap->page_available));
+
+#if ENABLE_STATISTICS
+ // TODO: Fix
+#endif
+}
+
+////////////
+///
+/// Extern interface
+///
+//////
+
+int
+rpmalloc_is_thread_initialized(void) {
+ return (get_thread_heap() != global_heap_default) ? 1 : 0;
+}
+
+extern inline RPMALLOC_ALLOCATOR void*
+rpmalloc(size_t size) {
+#if ENABLE_VALIDATE_ARGS
+ if (size >= MAX_ALLOC_SIZE) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+ heap_t* heap = get_thread_heap();
+ return heap_allocate_block(heap, size, 0);
+}
+
+extern inline RPMALLOC_ALLOCATOR void*
+rpzalloc(size_t size) {
+#if ENABLE_VALIDATE_ARGS
+ if (size >= MAX_ALLOC_SIZE) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+ heap_t* heap = get_thread_heap();
+ return heap_allocate_block(heap, size, 1);
+}
+
+extern inline void
+rpfree(void* ptr) {
+ if (UNEXPECTED(ptr == 0))
+ return;
+ block_deallocate(ptr);
+}
+
+extern inline RPMALLOC_ALLOCATOR void*
+rpcalloc(size_t num, size_t size) {
+ size_t total;
+#if ENABLE_VALIDATE_ARGS
+#if PLATFORM_WINDOWS
+ int err = SizeTMult(num, size, &total);
+ if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#else
+ int err = __builtin_umull_overflow(num, size, &total);
+ if (err || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+#else
+ total = num * size;
+#endif
+ heap_t* heap = get_thread_heap();
+ return heap_allocate_block(heap, total, 1);
+}
+
+extern inline RPMALLOC_ALLOCATOR void*
+rprealloc(void* ptr, size_t size) {
+#if ENABLE_VALIDATE_ARGS
+ if (size >= MAX_ALLOC_SIZE) {
+ errno = EINVAL;
+ return ptr;
+ }
+#endif
+ heap_t* heap = get_thread_heap();
+ return heap_reallocate_block(heap, ptr, size, 0, 0);
+}
+
+extern RPMALLOC_ALLOCATOR void*
+rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize, unsigned int flags) {
+#if ENABLE_VALIDATE_ARGS
+ if ((size + alignment < size) || (alignment > _memory_page_size)) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+ heap_t* heap = get_thread_heap();
+ return heap_reallocate_block_aligned(heap, ptr, alignment, size, oldsize, flags);
+}
+
+extern RPMALLOC_ALLOCATOR void*
+rpaligned_alloc(size_t alignment, size_t size) {
+ heap_t* heap = get_thread_heap();
+ return heap_allocate_block_aligned(heap, alignment, size, 0);
+}
+
+extern RPMALLOC_ALLOCATOR void*
+rpaligned_zalloc(size_t alignment, size_t size) {
+ heap_t* heap = get_thread_heap();
+ return heap_allocate_block_aligned(heap, alignment, size, 1);
+}
+
+extern inline RPMALLOC_ALLOCATOR void*
+rpaligned_calloc(size_t alignment, size_t num, size_t size) {
+ size_t total;
+#if ENABLE_VALIDATE_ARGS
+#if PLATFORM_WINDOWS
+ int err = SizeTMult(num, size, &total);
+ if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#else
+ int err = __builtin_umull_overflow(num, size, &total);
+ if (err || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+#else
+ total = num * size;
+#endif
+ heap_t* heap = get_thread_heap();
+ return heap_allocate_block_aligned(heap, alignment, total, 1);
+}
+
+extern inline RPMALLOC_ALLOCATOR void*
+rpmemalign(size_t alignment, size_t size) {
+ heap_t* heap = get_thread_heap();
+ return heap_allocate_block_aligned(heap, alignment, size, 0);
+}
+
+extern inline int
+rpposix_memalign(void** memptr, size_t alignment, size_t size) {
+ heap_t* heap = get_thread_heap();
+ if (memptr)
+ *memptr = heap_allocate_block_aligned(heap, alignment, size, 0);
+ else
+ return EINVAL;
+ return *memptr ? 0 : ENOMEM;
+}
+
+extern inline size_t
+rpmalloc_usable_size(void* ptr) {
+ return (ptr ? block_usable_size(ptr) : 0);
+}
+
+////////////
+///
+/// Initialization and finalization
+///
+//////
+
+static void
+rpmalloc_thread_destructor(void* value) {
+ // If this is called on main thread assume it means rpmalloc_finalize
+ // has not been called and shutdown is forced (through _exit) or unclean
+ if (get_thread_id() == global_main_thread_id)
+ return;
+ if (value)
+ rpmalloc_thread_finalize();
+}
+
+extern int
+rpmalloc_initialize_config(rpmalloc_interface_t* memory_interface, rpmalloc_config_t* config) {
+ if (global_rpmalloc_initialized) {
+ rpmalloc_thread_initialize();
+ if (config)
+ *config = global_config;
+ return 0;
+ }
+
+ if (config)
+ global_config = *config;
+
+ int result = rpmalloc_initialize(memory_interface);
+
+ if (config)
+ *config = global_config;
+
+ return result;
+}
+
+extern int
+rpmalloc_initialize(rpmalloc_interface_t* memory_interface) {
+ if (global_rpmalloc_initialized) {
+ rpmalloc_thread_initialize();
+ return 0;
+ }
+
+ global_rpmalloc_initialized = 1;
+
+ global_memory_interface = memory_interface ? memory_interface : &global_memory_interface_default;
+ if (!global_memory_interface->memory_map || !global_memory_interface->memory_unmap) {
+ global_memory_interface->memory_map = os_mmap;
+ global_memory_interface->memory_commit = os_mcommit;
+ global_memory_interface->memory_decommit = os_mdecommit;
+ global_memory_interface->memory_unmap = os_munmap;
+ }
+
+#if PLATFORM_WINDOWS
+ SYSTEM_INFO system_info;
+ memset(&system_info, 0, sizeof(system_info));
+ GetSystemInfo(&system_info);
+ os_map_granularity = system_info.dwAllocationGranularity;
+#else
+ os_map_granularity = (size_t)sysconf(_SC_PAGESIZE);
+#endif
+
+#if PLATFORM_WINDOWS
+ os_page_size = system_info.dwPageSize;
+#else
+ os_page_size = os_map_granularity;
+#endif
+ if (global_config.enable_huge_pages) {
+#if PLATFORM_WINDOWS
+ HANDLE token = 0;
+ size_t large_page_minimum = GetLargePageMinimum();
+ if (large_page_minimum)
+ OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token);
+ if (token) {
+ LUID luid;
+ if (LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &luid)) {
+ TOKEN_PRIVILEGES token_privileges;
+ memset(&token_privileges, 0, sizeof(token_privileges));
+ token_privileges.PrivilegeCount = 1;
+ token_privileges.Privileges[0].Luid = luid;
+ token_privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
+ if (AdjustTokenPrivileges(token, FALSE, &token_privileges, 0, 0, 0)) {
+ if (GetLastError() == ERROR_SUCCESS)
+ os_huge_pages = 1;
+ }
+ }
+ CloseHandle(token);
+ }
+ if (os_huge_pages) {
+ if (large_page_minimum > os_page_size)
+ os_page_size = large_page_minimum;
+ if (large_page_minimum > os_map_granularity)
+ os_map_granularity = large_page_minimum;
+ }
+#elif defined(__linux__)
+ size_t huge_page_size = 0;
+ FILE* meminfo = fopen("/proc/meminfo", "r");
+ if (meminfo) {
+ char line[128];
+ while (!huge_page_size && fgets(line, sizeof(line) - 1, meminfo)) {
+ line[sizeof(line) - 1] = 0;
+ if (strstr(line, "Hugepagesize:"))
+ huge_page_size = (size_t)strtol(line + 13, 0, 10) * 1024;
+ }
+ fclose(meminfo);
+ }
+ if (huge_page_size) {
+ os_huge_pages = 1;
+ os_page_size = huge_page_size;
+ os_map_granularity = huge_page_size;
+ }
+#elif defined(__FreeBSD__)
+ int rc;
+ size_t sz = sizeof(rc);
+
+ if (sysctlbyname("vm.pmap.pg_ps_enabled", &rc, &sz, NULL, 0) == 0 && rc == 1) {
+ os_huge_pages = 1;
+ os_page_size = 2 * 1024 * 1024;
+ os_map_granularity = os_page_size;
+ }
+#elif defined(__APPLE__) || defined(__NetBSD__)
+ os_huge_pages = 1;
+ os_page_size = 2 * 1024 * 1024;
+ os_map_granularity = os_page_size;
+#endif
+ } else {
+ os_huge_pages = 0;
+ }
+
+ global_config.enable_huge_pages = os_huge_pages;
+
+ if (!memory_interface || (global_config.page_size < os_page_size))
+ global_config.page_size = os_page_size;
+
+ if (global_config.enable_huge_pages || global_config.page_size > (256 * 1024))
+ global_config.disable_decommit = 1;
+
+#if defined(__linux__) || defined(__ANDROID__)
+ if (global_config.disable_thp)
+ (void)prctl(PR_SET_THP_DISABLE, 1, 0, 0, 0);
+#endif
+
+#ifdef _WIN32
+ fls_key = FlsAlloc(&rpmalloc_thread_destructor);
+#else
+ pthread_key_create(&pthread_key, rpmalloc_thread_destructor);
+#endif
+
+ global_main_thread_id = get_thread_id();
+
+ rpmalloc_thread_initialize();
+
+ return 0;
+}
+
+extern const rpmalloc_config_t*
+rpmalloc_config(void) {
+ return &global_config;
+}
+
+extern void
+rpmalloc_finalize(void) {
+ rpmalloc_thread_finalize();
+
+ if (global_config.unmap_on_finalize) {
+ heap_t* heap = global_heap_queue;
+ global_heap_queue = 0;
+ while (heap) {
+ heap_t* heap_next = heap->next;
+ heap_free_all(heap);
+ heap_unmap(heap);
+ heap = heap_next;
+ }
+ heap = global_heap_used;
+ global_heap_used = 0;
+ while (heap) {
+ heap_t* heap_next = heap->next;
+ heap_free_all(heap);
+ heap_unmap(heap);
+ heap = heap_next;
+ }
+#if ENABLE_STATISTICS
+ memset(&global_statistics, 0, sizeof(global_statistics));
+#endif
+ }
+
+#ifdef _WIN32
+ FlsFree(fls_key);
+ fls_key = 0;
+#else
+ pthread_key_delete(pthread_key);
+ pthread_key = 0;
+#endif
+
+ global_main_thread_id = 0;
+ global_rpmalloc_initialized = 0;
+}
+
+extern void
+rpmalloc_thread_initialize(void) {
+ if (get_thread_heap() == global_heap_default)
+ get_thread_heap_allocate();
+}
+
+extern void
+rpmalloc_thread_finalize(void) {
+ heap_t* heap = get_thread_heap();
+ if (heap != global_heap_default) {
+ heap_release(heap);
+ set_thread_heap(global_heap_default);
+ }
+}
+
+extern void
+rpmalloc_thread_collect(void) {
+}
+
+void
+rpmalloc_dump_statistics(void* file) {
+#if ENABLE_STATISTICS
+ fprintf(file, "Mapped pages: %llu\n",
+ (unsigned long long)atomic_load_explicit(&global_statistics.page_mapped, memory_order_relaxed));
+ fprintf(file, "Mapped pages (peak): %llu\n",
+ (unsigned long long)atomic_load_explicit(&global_statistics.page_mapped_peak, memory_order_relaxed));
+ fprintf(file, "Active pages: %llu\n",
+ (unsigned long long)atomic_load_explicit(&global_statistics.page_active, memory_order_relaxed));
+ fprintf(file, "Active pages (peak): %llu\n",
+ (unsigned long long)atomic_load_explicit(&global_statistics.page_active_peak, memory_order_relaxed));
+ fprintf(file, "Pages committed: %llu\n",
+ (unsigned long long)atomic_load_explicit(&global_statistics.page_commit, memory_order_relaxed));
+ fprintf(file, "Pages decommitted: %llu\n",
+ (unsigned long long)atomic_load_explicit(&global_statistics.page_decommit, memory_order_relaxed));
+ fprintf(file, "Heaps created: %llu\n",
+ (unsigned long long)atomic_load_explicit(&global_statistics.heap_count, memory_order_relaxed));
+#else
+ (void)sizeof(file);
+#endif
+}
+
+#if RPMALLOC_FIRST_CLASS_HEAPS
+
+rpmalloc_heap_t*
+rpmalloc_heap_acquire(void) {
+ // Must be a pristine heap from newly mapped memory pages, or else memory blocks
+ // could already be allocated from the heap which would (wrongly) be released when
+ // heap is cleared with rpmalloc_heap_free_all(). Also heaps guaranteed to be
+ // pristine from the dedicated orphan list can be used.
+ heap_t* heap = heap_allocate(1);
+ rpmalloc_assume(heap != 0);
+ heap->owner_thread = 0;
+ return heap;
+}
+
+void
+rpmalloc_heap_release(rpmalloc_heap_t* heap) {
+ if (heap)
+ heap_release(heap);
+}
+
+RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_alloc(rpmalloc_heap_t* heap, size_t size) {
+#if ENABLE_VALIDATE_ARGS
+ if (size >= MAX_ALLOC_SIZE) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+ return heap_allocate_block(heap, size, 0);
+}
+
+RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_aligned_alloc(rpmalloc_heap_t* heap, size_t alignment, size_t size) {
+#if ENABLE_VALIDATE_ARGS
+ if (size >= MAX_ALLOC_SIZE) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+ return heap_allocate_block_aligned(heap, alignment, size, 0);
+}
+
+RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_calloc(rpmalloc_heap_t* heap, size_t num, size_t size) {
+ size_t total;
+#if ENABLE_VALIDATE_ARGS
+#if PLATFORM_WINDOWS
+ int err = SizeTMult(num, size, &total);
+ if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#else
+ int err = __builtin_umull_overflow(num, size, &total);
+ if (err || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+#else
+ total = num * size;
+#endif
+ return heap_allocate_block(heap, total, 1);
+}
+
+extern inline RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_aligned_calloc(rpmalloc_heap_t* heap, size_t alignment, size_t num, size_t size) {
+ size_t total;
+#if ENABLE_VALIDATE_ARGS
+#if PLATFORM_WINDOWS
+ int err = SizeTMult(num, size, &total);
+ if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#else
+ int err = __builtin_umull_overflow(num, size, &total);
+ if (err || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+#else
+ total = num * size;
+#endif
+ return heap_allocate_block_aligned(heap, alignment, total, 1);
+}
+
+RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_realloc(rpmalloc_heap_t* heap, void* ptr, size_t size, unsigned int flags) {
+#if ENABLE_VALIDATE_ARGS
+ if (size >= MAX_ALLOC_SIZE) {
+ errno = EINVAL;
+ return ptr;
+ }
+#endif
+ return heap_reallocate_block(heap, ptr, size, 0, flags);
+}
+
+RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_aligned_realloc(rpmalloc_heap_t* heap, void* ptr, size_t alignment, size_t size, unsigned int flags) {
+#if ENABLE_VALIDATE_ARGS
+ if ((size + alignment < size) || (alignment > _memory_page_size)) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+ return heap_reallocate_block_aligned(heap, ptr, alignment, size, 0, flags);
+}
+
+void
+rpmalloc_heap_free(rpmalloc_heap_t* heap, void* ptr) {
+ (void)sizeof(heap);
+ block_deallocate(ptr);
+}
+
+//! Free all memory allocated by the heap
+void
+rpmalloc_heap_free_all(rpmalloc_heap_t* heap) {
+ heap_free_all(heap);
+}
+
+extern inline void
+rpmalloc_heap_thread_set_current(rpmalloc_heap_t* heap) {
+ heap_t* prev_heap = get_thread_heap();
+ if (prev_heap != heap) {
+ set_thread_heap(heap);
+ if (prev_heap)
+ heap_release(prev_heap);
+ }
+}
+
+rpmalloc_heap_t*
+rpmalloc_get_heap_for_ptr(void* ptr) {
+ // Grab the span, and then the heap from the span
+ span_t* span = (span_t*)((uintptr_t)ptr & SPAN_MASK);
+ if (span)
+ return span_get_page_from_block(span, ptr)->heap;
+ return 0;
+}
+
+#endif
+
+#include "malloc.c"
diff --git a/thirdparty/rpmalloc/rpmalloc.h b/thirdparty/rpmalloc/rpmalloc.h
new file mode 100644
index 000000000..2e67280f9
--- /dev/null
+++ b/thirdparty/rpmalloc/rpmalloc.h
@@ -0,0 +1,396 @@
+/* rpmalloc.h - Memory allocator - Public Domain - 2016-2024 Mattias Jansson
+ *
+ * This library provides a cross-platform lock free thread caching malloc
+ * implementation in C11. The latest source code is always available at
+ *
+ * https://github.com/mjansson/rpmalloc
+ *
+ * This library is put in the public domain; you can redistribute it and/or
+ * modify it without any restrictions.
+ *
+ */
+
+#pragma once
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RPMALLOC_CACHE_LINE_SIZE 64
+#if defined(__clang__) || defined(__GNUC__)
+#define RPMALLOC_EXPORT __attribute__((visibility("default")))
+#define RPMALLOC_RESTRICT __restrict
+#define RPMALLOC_ALLOCATOR
+#define RPMALLOC_CACHE_ALIGNED __attribute__((aligned(RPMALLOC_CACHE_LINE_SIZE)))
+#if (defined(__clang_major__) && (__clang_major__ < 4)) || (!defined(__clang_major__) && defined(__GNUC__))
+#define RPMALLOC_ATTRIB_MALLOC
+#define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
+#define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
+#else
+#define RPMALLOC_ATTRIB_MALLOC __attribute__((__malloc__))
+#define RPMALLOC_ATTRIB_ALLOC_SIZE(size) __attribute__((alloc_size(size)))
+#define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size) __attribute__((alloc_size(count, size)))
+#endif
+#define RPMALLOC_CDECL
+#elif defined(_MSC_VER)
+#define RPMALLOC_EXPORT
+#define RPMALLOC_RESTRICT __declspec(restrict)
+#define RPMALLOC_ALLOCATOR __declspec(allocator) __declspec(restrict)
+#define RPMALLOC_CACHE_ALIGNED __declspec(align(RPMALLOC_CACHE_LINE_SIZE))
+#define RPMALLOC_ATTRIB_MALLOC
+#define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
+#define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
+#define RPMALLOC_CDECL __cdecl
+#else
+#define RPMALLOC_EXPORT
+#define RPMALLOC_ALLOCATOR
+#define RPMALLOC_ATTRIB_MALLOC
+#define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
+#define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
+#define RPMALLOC_CDECL
+#endif
+
+#define RPMALLOC_MAX_ALIGNMENT (256 * 1024)
+
+//! Define RPMALLOC_FIRST_CLASS_HEAPS to enable heap based API (rpmalloc_heap_* functions).
+#ifndef RPMALLOC_FIRST_CLASS_HEAPS
+#define RPMALLOC_FIRST_CLASS_HEAPS 0
+#endif
+
+//! Flag to rpaligned_realloc to not preserve content in reallocation
+#define RPMALLOC_NO_PRESERVE 1
+//! Flag to rpaligned_realloc to fail and return null pointer if grow cannot be done in-place,
+// in which case the original pointer is still valid (just like a call to realloc which failes to allocate
+// a new block).
+#define RPMALLOC_GROW_OR_FAIL 2
+
+typedef struct rpmalloc_global_statistics_t {
+ //! Current amount of virtual memory mapped, all of which might not have been committed (only if
+ //! ENABLE_STATISTICS=1)
+ size_t mapped;
+ //! Peak amount of virtual memory mapped, all of which might not have been committed (only if ENABLE_STATISTICS=1)
+ size_t mapped_peak;
+ //! Current amount of memory in global caches for small and medium sizes (<32KiB)
+ size_t cached;
+ //! Current amount of memory allocated in huge allocations, i.e larger than LARGE_SIZE_LIMIT which is 2MiB by
+ //! default (only if ENABLE_STATISTICS=1)
+ size_t huge_alloc;
+ //! Peak amount of memory allocated in huge allocations, i.e larger than LARGE_SIZE_LIMIT which is 2MiB by default
+ //! (only if ENABLE_STATISTICS=1)
+ size_t huge_alloc_peak;
+ //! Total amount of memory mapped since initialization (only if ENABLE_STATISTICS=1)
+ size_t mapped_total;
+ //! Total amount of memory unmapped since initialization (only if ENABLE_STATISTICS=1)
+ size_t unmapped_total;
+} rpmalloc_global_statistics_t;
+
+typedef struct rpmalloc_thread_statistics_t {
+ //! Current number of bytes available in thread size class caches for small and medium sizes (<32KiB)
+ size_t sizecache;
+ //! Current number of bytes available in thread span caches for small and medium sizes (<32KiB)
+ size_t spancache;
+ //! Total number of bytes transitioned from thread cache to global cache (only if ENABLE_STATISTICS=1)
+ size_t thread_to_global;
+ //! Total number of bytes transitioned from global cache to thread cache (only if ENABLE_STATISTICS=1)
+ size_t global_to_thread;
+ //! Per span count statistics (only if ENABLE_STATISTICS=1)
+ struct {
+ //! Currently used number of spans
+ size_t current;
+ //! High water mark of spans used
+ size_t peak;
+ //! Number of spans transitioned to global cache
+ size_t to_global;
+ //! Number of spans transitioned from global cache
+ size_t from_global;
+ //! Number of spans transitioned to thread cache
+ size_t to_cache;
+ //! Number of spans transitioned from thread cache
+ size_t from_cache;
+ //! Number of spans transitioned to reserved state
+ size_t to_reserved;
+ //! Number of spans transitioned from reserved state
+ size_t from_reserved;
+ //! Number of raw memory map calls (not hitting the reserve spans but resulting in actual OS mmap calls)
+ size_t map_calls;
+ } span_use[64];
+ //! Per size class statistics (only if ENABLE_STATISTICS=1)
+ struct {
+ //! Current number of allocations
+ size_t alloc_current;
+ //! Peak number of allocations
+ size_t alloc_peak;
+ //! Total number of allocations
+ size_t alloc_total;
+ //! Total number of frees
+ size_t free_total;
+ //! Number of spans transitioned to cache
+ size_t spans_to_cache;
+ //! Number of spans transitioned from cache
+ size_t spans_from_cache;
+ //! Number of spans transitioned from reserved state
+ size_t spans_from_reserved;
+ //! Number of raw memory map calls (not hitting the reserve spans but resulting in actual OS mmap calls)
+ size_t map_calls;
+ } size_use[128];
+} rpmalloc_thread_statistics_t;
+
+typedef struct rpmalloc_interface_t {
+ //! Map memory pages for the given number of bytes. The returned address MUST be aligned to the given alignment,
+ //! which will always be either 0 or the span size. The function can store an alignment offset in the offset
+ //! variable in case it performs alignment and the returned pointer is offset from the actual start of the memory
+ //! region due to this alignment. This alignment offset will be passed to the memory unmap function. The mapped size
+ //! can be stored in the mapped_size variable, which will also be passed to the memory unmap function as the release
+ //! parameter once the entire mapped region is ready to be released. If you set a memory_map function, you must also
+ //! set a memory_unmap function or else the default implementation will be used for both. This function must be
+ //! thread safe, it can be called by multiple threads simultaneously.
+ void* (*memory_map)(size_t size, size_t alignment, size_t* offset, size_t* mapped_size);
+ //! Commit a range of memory pages
+ void (*memory_commit)(void* address, size_t size);
+ //! Decommit a range of memory pages
+ void (*memory_decommit)(void* address, size_t size);
+ //! Unmap the memory pages starting at address and spanning the given number of bytes. If you set a memory_unmap
+ //! function, you must also set a memory_map function or else the default implementation will be used for both. This
+ //! function must be thread safe, it can be called by multiple threads simultaneously.
+ void (*memory_unmap)(void* address, size_t offset, size_t mapped_size);
+ //! Called when a call to map memory pages fails (out of memory). If this callback is not set or returns zero the
+ //! library will return a null pointer in the allocation call. If this callback returns non-zero the map call will
+ //! be retried. The argument passed is the number of bytes that was requested in the map call. Only used if the
+ //! default system memory map function is used (memory_map callback is not set).
+ int (*map_fail_callback)(size_t size);
+ //! Called when an assert fails, if asserts are enabled. Will use the standard assert() if this is not set.
+ void (*error_callback)(const char* message);
+} rpmalloc_interface_t;
+
+typedef struct rpmalloc_config_t {
+ //! Size of memory pages. The page size MUST be a power of two. All memory mapping
+ // requests to memory_map will be made with size set to a multiple of the page size.
+ // Set to 0 to use the OS default page size.
+ size_t page_size;
+ //! Enable use of large/huge pages. If this flag is set to non-zero and page size is
+ // zero, the allocator will try to enable huge pages and auto detect the configuration.
+ // If this is set to non-zero and page_size is also non-zero, the allocator will
+ // assume huge pages have been configured and enabled prior to initializing the
+ // allocator.
+ // For Windows, see https://docs.microsoft.com/en-us/windows/desktop/memory/large-page-support
+ // For Linux, see https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt
+ int enable_huge_pages;
+ //! Disable decommitting unused pages when allocator determines the memory pressure
+ // is low and there is enough active pages cached. If set to 1, keep all pages committed.
+ int disable_decommit;
+ //! Allocated pages names for systems supporting it to be able to distinguish among anonymous regions.
+ const char* page_name;
+ //! Allocated huge pages names for systems supporting it to be able to distinguish among anonymous regions.
+ const char* huge_page_name;
+ //! Unmap all memory on finalize if set to 1. Normally you can let the OS unmap all pages
+ // when process exits, but if using rpmalloc in a dynamic library you might want to unmap
+ // all pages when the dynamic library unloads to avoid process memory leaks and bloat.
+ int unmap_on_finalize;
+#if defined(__linux__) || defined(__ANDROID__)
+ ///! Allows to disable the Transparent Huge Page feature on Linux on a process basis,
+ /// rather than enabling/disabling system-wise (done via /sys/kernel/mm/transparent_hugepage/enabled).
+ /// It can possibly improve performance and reduced allocation overhead in some contexts, albeit
+ /// THP is usually enabled by default.
+ int disable_thp;
+#endif
+} rpmalloc_config_t;
+
+//! Initialize allocator
+RPMALLOC_EXPORT int
+rpmalloc_initialize(rpmalloc_interface_t* memory_interface);
+
+//! Initialize allocator
+RPMALLOC_EXPORT int
+rpmalloc_initialize_config(rpmalloc_interface_t* memory_interface, rpmalloc_config_t* config);
+
+//! Get allocator configuration
+RPMALLOC_EXPORT const rpmalloc_config_t*
+rpmalloc_config(void);
+
+//! Finalize allocator
+RPMALLOC_EXPORT void
+rpmalloc_finalize(void);
+
+//! Initialize allocator for calling thread
+RPMALLOC_EXPORT void
+rpmalloc_thread_initialize(void);
+
+//! Finalize allocator for calling thread
+RPMALLOC_EXPORT void
+rpmalloc_thread_finalize(void);
+
+//! Perform deferred deallocations pending for the calling thread heap
+RPMALLOC_EXPORT void
+rpmalloc_thread_collect(void);
+
+//! Query if allocator is initialized for calling thread
+RPMALLOC_EXPORT int
+rpmalloc_is_thread_initialized(void);
+
+//! Get per-thread statistics
+RPMALLOC_EXPORT void
+rpmalloc_thread_statistics(rpmalloc_thread_statistics_t* stats);
+
+//! Get global statistics
+RPMALLOC_EXPORT void
+rpmalloc_global_statistics(rpmalloc_global_statistics_t* stats);
+
+//! Dump all statistics in human readable format to file (should be a FILE*)
+RPMALLOC_EXPORT void
+rpmalloc_dump_statistics(void* file);
+
+//! Allocate a memory block of at least the given size
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpmalloc(size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(1);
+
+//! Allocate a zero initialized memory block of at least the given size
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpzalloc(size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(1);
+
+//! Allocate a memory block of at least the given size and zero initialize it
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpcalloc(size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE2(1, 2);
+
+//! Reallocate the given block to at least the given size
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rprealloc(void* ptr, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
+
+//! Reallocate the given block to at least the given size and alignment,
+// with optional control flags (see RPMALLOC_NO_PRESERVE).
+// Alignment must be a power of two and a multiple of sizeof(void*),
+// and should ideally be less than memory page size. A caveat of rpmalloc
+// internals is that this must also be strictly less than the span size (default 64KiB)
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize, unsigned int flags) RPMALLOC_ATTRIB_MALLOC
+ RPMALLOC_ATTRIB_ALLOC_SIZE(3);
+
+//! Allocate a memory block of at least the given size and alignment.
+// Alignment must be a power of two and a multiple of sizeof(void*),
+// and should ideally be less than memory page size. A caveat of rpmalloc
+// internals is that this must also be strictly less than the span size (default 64KiB)
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpaligned_alloc(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
+
+//! Allocate a memory block of at least the given size and alignment.
+// Alignment must be a power of two and a multiple of sizeof(void*),
+// and should ideally be less than memory page size. A caveat of rpmalloc
+// internals is that this must also be strictly less than the span size (default 64KiB)
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpaligned_zalloc(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
+
+//! Allocate a memory block of at least the given size and alignment, and zero initialize it.
+// Alignment must be a power of two and a multiple of sizeof(void*),
+// and should ideally be less than memory page size. A caveat of rpmalloc
+// internals is that this must also be strictly less than the span size (default 64KiB)
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpaligned_calloc(size_t alignment, size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3);
+
+//! Allocate a memory block of at least the given size and alignment.
+// Alignment must be a power of two and a multiple of sizeof(void*),
+// and should ideally be less than memory page size. A caveat of rpmalloc
+// internals is that this must also be strictly less than the span size (default 64KiB)
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpmemalign(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
+
+//! Allocate a memory block of at least the given size and alignment.
+// Alignment must be a power of two and a multiple of sizeof(void*),
+// and should ideally be less than memory page size. A caveat of rpmalloc
+// internals is that this must also be strictly less than the span size (default 64KiB)
+RPMALLOC_EXPORT int
+rpposix_memalign(void** memptr, size_t alignment, size_t size);
+
+//! Free the given memory block
+RPMALLOC_EXPORT void
+rpfree(void* ptr);
+
+//! Query the usable size of the given memory block (from given pointer to the end of block)
+RPMALLOC_EXPORT size_t
+rpmalloc_usable_size(void* ptr);
+
+//! Dummy empty function for forcing linker symbol inclusion
+RPMALLOC_EXPORT void
+rpmalloc_linker_reference(void);
+
+#if RPMALLOC_FIRST_CLASS_HEAPS
+
+//! Heap type
+typedef struct heap_t rpmalloc_heap_t;
+
+//! Acquire a new heap. Will reuse existing released heaps or allocate memory for a new heap
+// if none available. Heap API is implemented with the strict assumption that only one single
+// thread will call heap functions for a given heap at any given time, no functions are thread safe.
+RPMALLOC_EXPORT rpmalloc_heap_t*
+rpmalloc_heap_acquire(void);
+
+//! Release a heap (does NOT free the memory allocated by the heap, use rpmalloc_heap_free_all before destroying the
+//! heap).
+// Releasing a heap will enable it to be reused by other threads. Safe to pass a null pointer.
+RPMALLOC_EXPORT void
+rpmalloc_heap_release(rpmalloc_heap_t* heap);
+
+//! Allocate a memory block of at least the given size using the given heap.
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_alloc(rpmalloc_heap_t* heap, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
+
+//! Allocate a memory block of at least the given size using the given heap. The returned
+// block will have the requested alignment. Alignment must be a power of two and a multiple of sizeof(void*),
+// and should ideally be less than memory page size. A caveat of rpmalloc
+// internals is that this must also be strictly less than the span size (default 64KiB).
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_aligned_alloc(rpmalloc_heap_t* heap, size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC
+ RPMALLOC_ATTRIB_ALLOC_SIZE(3);
+
+//! Allocate a memory block of at least the given size using the given heap and zero initialize it.
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_calloc(rpmalloc_heap_t* heap, size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC
+ RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3);
+
+//! Allocate a memory block of at least the given size using the given heap and zero initialize it. The returned
+// block will have the requested alignment. Alignment must either be zero, or a power of two and a multiple of
+// sizeof(void*), and should ideally be less than memory page size. A caveat of rpmalloc internals is that this must
+// also be strictly less than the span size (default 64KiB).
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_aligned_calloc(rpmalloc_heap_t* heap, size_t alignment, size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC
+ RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3);
+
+//! Reallocate the given block to at least the given size. The memory block MUST be allocated
+// by the same heap given to this function.
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_realloc(rpmalloc_heap_t* heap, void* ptr, size_t size, unsigned int flags) RPMALLOC_ATTRIB_MALLOC
+ RPMALLOC_ATTRIB_ALLOC_SIZE(3);
+
+//! Reallocate the given block to at least the given size. The memory block MUST be allocated
+// by the same heap given to this function. The returned block will have the requested alignment.
+// Alignment must be either zero, or a power of two and a multiple of sizeof(void*), and should ideally be
+// less than memory page size. A caveat of rpmalloc internals is that this must also be strictly less than
+// the span size (default 64KiB).
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_aligned_realloc(rpmalloc_heap_t* heap, void* ptr, size_t alignment, size_t size,
+ unsigned int flags) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(4);
+
+//! Free the given memory block from the given heap. The memory block MUST be allocated
+// by the same heap given to this function.
+RPMALLOC_EXPORT void
+rpmalloc_heap_free(rpmalloc_heap_t* heap, void* ptr);
+
+//! Free all memory allocated by the heap
+RPMALLOC_EXPORT void
+rpmalloc_heap_free_all(rpmalloc_heap_t* heap);
+
+//! Set the given heap as the current heap for the calling thread. A heap MUST only be current heap
+// for a single thread, a heap can never be shared between multiple threads. The previous
+// current heap for the calling thread is released to be reused by other threads.
+RPMALLOC_EXPORT void
+rpmalloc_heap_thread_set_current(rpmalloc_heap_t* heap);
+
+//! Returns which heap the given pointer is allocated on
+RPMALLOC_EXPORT rpmalloc_heap_t*
+rpmalloc_get_heap_for_ptr(void* ptr);
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/thirdparty/rpmalloc/rpnew.h b/thirdparty/rpmalloc/rpnew.h
new file mode 100644
index 000000000..75d381a3f
--- /dev/null
+++ b/thirdparty/rpmalloc/rpnew.h
@@ -0,0 +1,111 @@
+
+#ifdef __cplusplus
+
+#include <new>
+#include <rpmalloc.h>
+
+#ifndef __CRTDECL
+#define __CRTDECL
+#endif
+
+extern void __CRTDECL
+operator delete(void* p) noexcept {
+ rpfree(p);
+}
+
+extern void __CRTDECL
+operator delete[](void* p) noexcept {
+ rpfree(p);
+}
+
+extern void* __CRTDECL
+operator new(std::size_t size) noexcept(false) {
+ return rpmalloc(size);
+}
+
+extern void* __CRTDECL
+operator new[](std::size_t size) noexcept(false) {
+ return rpmalloc(size);
+}
+
+extern void* __CRTDECL
+operator new(std::size_t size, const std::nothrow_t& tag) noexcept {
+ (void)sizeof(tag);
+ return rpmalloc(size);
+}
+
+extern void* __CRTDECL
+operator new[](std::size_t size, const std::nothrow_t& tag) noexcept {
+ (void)sizeof(tag);
+ return rpmalloc(size);
+}
+
+#if (__cplusplus >= 201402L || _MSC_VER >= 1916)
+
+extern void __CRTDECL
+operator delete(void* p, std::size_t size) noexcept {
+ (void)sizeof(size);
+ rpfree(p);
+}
+
+extern void __CRTDECL
+operator delete[](void* p, std::size_t size) noexcept {
+ (void)sizeof(size);
+ rpfree(p);
+}
+
+#endif
+
+#if (__cplusplus > 201402L || defined(__cpp_aligned_new))
+
+extern void __CRTDECL
+operator delete(void* p, std::align_val_t align) noexcept {
+ (void)sizeof(align);
+ rpfree(p);
+}
+
+extern void __CRTDECL
+operator delete[](void* p, std::align_val_t align) noexcept {
+ (void)sizeof(align);
+ rpfree(p);
+}
+
+extern void __CRTDECL
+operator delete(void* p, std::size_t size, std::align_val_t align) noexcept {
+ (void)sizeof(size);
+ (void)sizeof(align);
+ rpfree(p);
+}
+
+extern void __CRTDECL
+operator delete[](void* p, std::size_t size, std::align_val_t align) noexcept {
+ (void)sizeof(size);
+ (void)sizeof(align);
+ rpfree(p);
+}
+
+extern void* __CRTDECL
+operator new(std::size_t size, std::align_val_t align) noexcept(false) {
+ return rpaligned_alloc(static_cast<size_t>(align), size);
+}
+
+extern void* __CRTDECL
+operator new[](std::size_t size, std::align_val_t align) noexcept(false) {
+ return rpaligned_alloc(static_cast<size_t>(align), size);
+}
+
+extern void* __CRTDECL
+operator new(std::size_t size, std::align_val_t align, const std::nothrow_t& tag) noexcept {
+ (void)sizeof(tag);
+ return rpaligned_alloc(static_cast<size_t>(align), size);
+}
+
+extern void* __CRTDECL
+operator new[](std::size_t size, std::align_val_t align, const std::nothrow_t& tag) noexcept {
+ (void)sizeof(tag);
+ return rpaligned_alloc(static_cast<size_t>(align), size);
+}
+
+#endif
+
+#endif
diff --git a/xmake.lua b/xmake.lua
index a8256dbd5..b1e0f809d 100644
--- a/xmake.lua
+++ b/xmake.lua
@@ -136,6 +136,13 @@ option("zenmimalloc")
option_end()
add_define_by_config("ZEN_USE_MIMALLOC", "zenmimalloc")
+option("zenrpmalloc")
+ set_default(is_os("windows"))
+ set_showmenu(true)
+ set_description("Use rpmalloc for faster memory management")
+option_end()
+add_define_by_config("ZEN_USE_RPMALLOC", "zenrpmalloc")
+
if is_os("windows") then
option("httpsys")
set_default(true)
@@ -147,6 +154,18 @@ else
add_defines("ZEN_WITH_HTTPSYS=0")
end
+if is_os("windows") then
+ add_defines("UE_MEMORY_TRACE_AVAILABLE=1")
+ option("zenmemtrack")
+ set_default(true)
+ set_showmenu(true)
+ set_description("Enable UE's Memory Trace support")
+ option_end()
+ add_define_by_config("ZEN_WITH_MEMTRACK", "zenmemtrack")
+else
+ add_defines("ZEN_WITH_MEMTRACK=0")
+end
+
option("zentrace")
set_default(true)
set_showmenu(true)