aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2024-11-25 09:56:23 +0100
committerGitHub Enterprise <[email protected]>2024-11-25 09:56:23 +0100
commit8b8de92e51db4cc4c1727712c736dcba5f79d369 (patch)
tree1f58edaaad389837a7652daebab246125762240e
parent5.5.13 (diff)
downloadzen-8b8de92e51db4cc4c1727712c736dcba5f79d369.tar.xz
zen-8b8de92e51db4cc4c1727712c736dcba5f79d369.zip
Insights-compatible memory tracking (#214)
This change introduces support for tracing of memory allocation activity. The code is ported from UE5, and Unreal Insights can be used to analyze the output. This is currently only fully supported on Windows, but will be extended to Mac/Linux in the near future. To activate full memory tracking, pass `--trace=memory` on the commandline alongside `--tracehost=<ip>` or `-tracefile=<path>`. For more control over how much detail is traced you can instead pass some combination of `callstack`, `memtag`, `memalloc` instead. In practice, `--trace=memory` is an alias for `--trace=callstack,memtag,memalloc`). For convenience we also support `--trace=memory_light` which omits call stacks. This change also introduces multiple memory allocators, which may be selected via command-line option `--malloc=<allocator>`: * `mimalloc` - mimalloc (default, same as before) * `rpmalloc` - rpmalloc is another high performance allocator for multithreaded applications which may be a better option than mimalloc (to be evaluated). Due to toolchain limitations this is currently only supported on Windows. * `stomp` - an allocator intended to be used during development/debugging to help track down memory issues such as use-after-free or out-of-bounds access. Currently only supported on Windows. * `ansi` - fallback to default system allocator
-rw-r--r--.clang-format3
-rw-r--r--.pre-commit-config.yaml1
-rw-r--r--CHANGELOG.md4
-rw-r--r--src/zen/zen.cpp8
-rw-r--r--src/zencore-test/zencore-test.cpp9
-rw-r--r--src/zencore/crypto.cpp1
-rw-r--r--src/zencore/filesystem.cpp1
-rw-r--r--src/zencore/include/zencore/guardvalue.h40
-rw-r--r--src/zencore/include/zencore/iobuffer.h6
-rw-r--r--src/zencore/include/zencore/memory.h11
-rw-r--r--src/zencore/include/zencore/memory/fmalloc.h103
-rw-r--r--src/zencore/include/zencore/memory/llm.h31
-rw-r--r--src/zencore/include/zencore/memory/mallocansi.h31
-rw-r--r--src/zencore/include/zencore/memory/mallocmimalloc.h36
-rw-r--r--src/zencore/include/zencore/memory/mallocrpmalloc.h37
-rw-r--r--src/zencore/include/zencore/memory/mallocstomp.h100
-rw-r--r--src/zencore/include/zencore/memory/memory.h78
-rw-r--r--src/zencore/include/zencore/memory/memorytrace.h251
-rw-r--r--src/zencore/include/zencore/memory/newdelete.h155
-rw-r--r--src/zencore/include/zencore/memory/tagtrace.h93
-rw-r--r--src/zencore/include/zencore/string.h24
-rw-r--r--src/zencore/include/zencore/trace.h4
-rw-r--r--src/zencore/iobuffer.cpp51
-rw-r--r--src/zencore/logging.cpp22
-rw-r--r--src/zencore/memory.cpp55
-rw-r--r--src/zencore/memory/fmalloc.cpp156
-rw-r--r--src/zencore/memory/mallocansi.cpp251
-rw-r--r--src/zencore/memory/mallocmimalloc.cpp197
-rw-r--r--src/zencore/memory/mallocrpmalloc.cpp189
-rw-r--r--src/zencore/memory/mallocstomp.cpp283
-rw-r--r--src/zencore/memory/memory.cpp281
-rw-r--r--src/zencore/memtrack/callstacktrace.cpp1059
-rw-r--r--src/zencore/memtrack/callstacktrace.h151
-rw-r--r--src/zencore/memtrack/growonlylockfreehash.h255
-rw-r--r--src/zencore/memtrack/memorytrace.cpp829
-rw-r--r--src/zencore/memtrack/moduletrace.cpp296
-rw-r--r--src/zencore/memtrack/moduletrace.h11
-rw-r--r--src/zencore/memtrack/moduletrace_events.cpp16
-rw-r--r--src/zencore/memtrack/moduletrace_events.h27
-rw-r--r--src/zencore/memtrack/platformtls.h107
-rw-r--r--src/zencore/memtrack/tagtrace.cpp237
-rw-r--r--src/zencore/memtrack/tracemalloc.h24
-rw-r--r--src/zencore/memtrack/vatrace.cpp361
-rw-r--r--src/zencore/memtrack/vatrace.h61
-rw-r--r--src/zencore/sharedbuffer.cpp1
-rw-r--r--src/zencore/stats.cpp14
-rw-r--r--src/zencore/string.cpp1
-rw-r--r--src/zencore/system.cpp1
-rw-r--r--src/zencore/trace.cpp90
-rw-r--r--src/zencore/xmake.lua18
-rw-r--r--src/zenhttp-test/zenhttp-test.cpp10
-rw-r--r--src/zennet-test/zennet-test.cpp9
-rw-r--r--src/zenserver-test/zenserver-test.cpp9
-rw-r--r--src/zenserver/config.cpp10
-rw-r--r--src/zenserver/config.h6
-rw-r--r--src/zenserver/diag/logging.cpp5
-rw-r--r--src/zenserver/main.cpp24
-rw-r--r--src/zenstore-test/zenstore-test.cpp9
-rw-r--r--src/zenstore/filecas.cpp2
-rw-r--r--src/zenutil-test/zenutil-test.cpp9
-rw-r--r--src/zenutil/basicfile.cpp1
-rw-r--r--thirdparty/rpmalloc/malloc.c367
-rw-r--r--thirdparty/rpmalloc/rpmalloc.c2341
-rw-r--r--thirdparty/rpmalloc/rpmalloc.h396
-rw-r--r--thirdparty/rpmalloc/rpnew.h111
-rw-r--r--xmake.lua19
66 files changed, 9230 insertions, 169 deletions
diff --git a/.clang-format b/.clang-format
index 4688f60ae..46f378928 100644
--- a/.clang-format
+++ b/.clang-format
@@ -147,3 +147,6 @@ UseTab: Always
WhitespaceSensitiveMacros:
- STRINGIZE
...
+---
+Language: JavaScript
+DisableFormat: true
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b624a6733..c71fc6877 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -9,6 +9,7 @@ repos:
#- id: check-added-large-files
- id: mixed-line-ending
- id: check-yaml
+ args: [--allow-multiple-documents]
- id: check-case-conflict
- repo: https://github.com/Lucas-C/pre-commit-hooks
rev: v1.3.1
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b9cae3e6b..36b91f6cf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,8 @@
##
+- Feature: Added `--malloc` option for selecting which memory allocator should be used. Currently the available options are `--malloc=mimalloc`, `--malloc=rpmalloc`, `--malloc=stomp` or `--malloc=ansi`. Some of these options are currently not available on all targets, but all are available on Windows. `rpmalloc` is currently not supported on Linux or Mac due to toolchain limitations.
+- Feature: Added support for generating Unreal Insights-compatible traces for memory usage analysis. Currently only supported for Windows. Activate memory tracing by passing `--trace=memory` on the command line, alongside one of `--tracehost=<ip>` or `--tracefile=<path>` to enable tracing over network or to a file.
+
+## 5.5.13
- Bugfix: Fix inconsistencies in filecas due to failing to remove payload file during GC causing "Missing Chunk" errors
- Bugfix: Fixed crash on corrupt attachment block when doing oplog import
- Bugfix: Fixed off-by-one in GetPidStatus (Linux) which could cause spurious errors
diff --git a/src/zen/zen.cpp b/src/zen/zen.cpp
index 16f5799e0..fd58b024a 100644
--- a/src/zen/zen.cpp
+++ b/src/zen/zen.cpp
@@ -44,9 +44,7 @@ ZEN_THIRD_PARTY_INCLUDES_START
#include <gsl/gsl-lite.hpp>
ZEN_THIRD_PARTY_INCLUDES_END
-#if ZEN_USE_MIMALLOC
-# include <mimalloc-new-delete.h>
-#endif
+#include <zencore/memory/newdelete.h>
//////////////////////////////////////////////////////////////////////////
@@ -365,10 +363,6 @@ main(int argc, char** argv)
using namespace zen;
using namespace std::literals;
-#if ZEN_USE_MIMALLOC
- mi_version();
-#endif
-
zen::logging::InitializeLogging();
// Set output mode to handle virtual terminal sequences
diff --git a/src/zencore-test/zencore-test.cpp b/src/zencore-test/zencore-test.cpp
index 64df746e4..40cb51156 100644
--- a/src/zencore-test/zencore-test.cpp
+++ b/src/zencore-test/zencore-test.cpp
@@ -7,11 +7,7 @@
#include <zencore/logging.h>
#include <zencore/zencore.h>
-#if ZEN_USE_MIMALLOC
-ZEN_THIRD_PARTY_INCLUDES_START
-# include <mimalloc-new-delete.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-#endif
+#include <zencore/memory/newdelete.h>
#if ZEN_WITH_TESTS
# define ZEN_TEST_WITH_RUNNER 1
@@ -21,9 +17,6 @@ ZEN_THIRD_PARTY_INCLUDES_END
int
main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[])
{
-#if ZEN_USE_MIMALLOC
- mi_version();
-#endif
#if ZEN_WITH_TESTS
zen::zencore_forcelinktests();
diff --git a/src/zencore/crypto.cpp b/src/zencore/crypto.cpp
index 8403a35f4..78bea0c17 100644
--- a/src/zencore/crypto.cpp
+++ b/src/zencore/crypto.cpp
@@ -2,6 +2,7 @@
#include <zencore/crypto.h>
#include <zencore/intmath.h>
+#include <zencore/memory/memory.h>
#include <zencore/scopeguard.h>
#include <zencore/testing.h>
diff --git a/src/zencore/filesystem.cpp b/src/zencore/filesystem.cpp
index 9ca5f1131..36147c5a9 100644
--- a/src/zencore/filesystem.cpp
+++ b/src/zencore/filesystem.cpp
@@ -7,6 +7,7 @@
#include <zencore/fmtutils.h>
#include <zencore/iobuffer.h>
#include <zencore/logging.h>
+#include <zencore/memory/memory.h>
#include <zencore/process.h>
#include <zencore/stream.h>
#include <zencore/string.h>
diff --git a/src/zencore/include/zencore/guardvalue.h b/src/zencore/include/zencore/guardvalue.h
new file mode 100644
index 000000000..5419e882a
--- /dev/null
+++ b/src/zencore/include/zencore/guardvalue.h
@@ -0,0 +1,40 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+namespace zen {
+
+/**
+ * exception-safe guard around saving/restoring a value.
+ * Commonly used to make sure a value is restored
+ * even if the code early outs in the future.
+ * Usage:
+ * TGuardValue<bool> GuardSomeBool(bSomeBool, false); // Sets bSomeBool to false, and restores it in dtor.
+ */
+template<typename RefType, typename AssignedType = RefType>
+struct TGuardValue
+{
+ [[nodiscard]] TGuardValue(RefType& ReferenceValue, const AssignedType& NewValue)
+ : RefValue(ReferenceValue)
+ , OriginalValue(ReferenceValue)
+ {
+ RefValue = NewValue;
+ }
+ ~TGuardValue() { RefValue = OriginalValue; }
+
+ /**
+ * Provides read-only access to the original value of the data being tracked by this struct
+ *
+ * @return a const reference to the original data value
+ */
+ const AssignedType& GetOriginalValue() const { return OriginalValue; }
+
+ TGuardValue& operator=(const TGuardValue&) = delete;
+ TGuardValue(const TGuardValue&) = delete;
+
+private:
+ RefType& RefValue;
+ AssignedType OriginalValue;
+};
+
+} // namespace zen
diff --git a/src/zencore/include/zencore/iobuffer.h b/src/zencore/include/zencore/iobuffer.h
index 493b7375e..93a27ea58 100644
--- a/src/zencore/include/zencore/iobuffer.h
+++ b/src/zencore/include/zencore/iobuffer.h
@@ -99,6 +99,11 @@ public:
ZENCORE_API IoBufferCore(size_t SizeBytes, size_t Alignment);
ZENCORE_API ~IoBufferCore();
+ void* operator new(size_t Size);
+ void operator delete(void* Ptr);
+ void* operator new[](size_t Size) = delete;
+ void operator delete[](void* Ptr) = delete;
+
// Reference counting
inline uint32_t AddRef() const { return AtomicIncrement(const_cast<IoBufferCore*>(this)->m_RefCount); }
@@ -244,7 +249,6 @@ protected:
kIsExtended = 1 << 2, // Is actually a SharedBufferExtendedCore
kIsMaterialized = 1 << 3, // Data pointers are valid
kIsWholeFile = 1 << 5, // References an entire file
- kIoBufferAlloc = 1 << 6, // Using IoBuffer allocator
kIsOwnedByThis = 1 << 7,
// Note that we have some extended flags defined below
diff --git a/src/zencore/include/zencore/memory.h b/src/zencore/include/zencore/memory.h
index fdea1a5f1..8361ab9d8 100644
--- a/src/zencore/include/zencore/memory.h
+++ b/src/zencore/include/zencore/memory.h
@@ -22,17 +22,10 @@ template<typename T>
concept ContiguousRange = true;
#endif
-struct MemoryView;
-
-class Memory
-{
-public:
- ZENCORE_API static void* Alloc(size_t Size, size_t Alignment = sizeof(void*));
- ZENCORE_API static void Free(void* Ptr);
-};
-
//////////////////////////////////////////////////////////////////////////
+struct MemoryView;
+
struct MutableMemoryView
{
MutableMemoryView() = default;
diff --git a/src/zencore/include/zencore/memory/fmalloc.h b/src/zencore/include/zencore/memory/fmalloc.h
new file mode 100644
index 000000000..aeb05b651
--- /dev/null
+++ b/src/zencore/include/zencore/memory/fmalloc.h
@@ -0,0 +1,103 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zenbase/zenbase.h>
+
+namespace zen {
+
+enum
+{
+ DEFAULT_ALIGNMENT = 0
+};
+
+/**
+ * Inherit from FUseSystemMallocForNew if you want your objects to be placed in memory
+ * alloced by the system malloc routines, bypassing GMalloc. This is e.g. used by FMalloc
+ * itself.
+ */
+class FUseSystemMallocForNew
+{
+public:
+ void* operator new(size_t Size);
+ void operator delete(void* Ptr);
+ void* operator new[](size_t Size);
+ void operator delete[](void* Ptr);
+};
+
+/** Memory allocator abstraction
+ */
+
+class FMalloc : public FUseSystemMallocForNew
+{
+public:
+ /**
+ * Malloc
+ */
+ virtual void* Malloc(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT) = 0;
+
+ /**
+ * TryMalloc - like Malloc(), but may return a nullptr result if the allocation
+ * request cannot be satisfied.
+ */
+ virtual void* TryMalloc(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT);
+
+ /**
+ * Realloc
+ */
+ virtual void* Realloc(void* Original, size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT) = 0;
+
+ /**
+ * TryRealloc - like Realloc(), but may return a nullptr if the allocation
+ * request cannot be satisfied. Note that in this case the memory
+ * pointed to by Original will still be valid
+ */
+ virtual void* TryRealloc(void* Original, size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT);
+
+ /**
+ * Free
+ */
+ virtual void Free(void* Original) = 0;
+
+ /**
+ * Malloc zeroed memory
+ */
+ virtual void* MallocZeroed(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT);
+
+ /**
+ * TryMallocZeroed - like MallocZeroed(), but may return a nullptr result if the allocation
+ * request cannot be satisfied.
+ */
+ virtual void* TryMallocZeroed(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT);
+
+ /**
+ * For some allocators this will return the actual size that should be requested to eliminate
+ * internal fragmentation. The return value will always be >= Count. This can be used to grow
+ * and shrink containers to optimal sizes.
+ * This call is always fast and threadsafe with no locking.
+ */
+ virtual size_t QuantizeSize(size_t Count, uint32_t Alignment);
+
+ /**
+ * If possible determine the size of the memory allocated at the given address
+ *
+ * @param Original - Pointer to memory we are checking the size of
+ * @param SizeOut - If possible, this value is set to the size of the passed in pointer
+ * @return true if succeeded
+ */
+ virtual bool GetAllocationSize(void* Original, size_t& SizeOut);
+
+ /**
+ * Notifies the malloc implementation that initialization of all allocators in GMalloc is complete, so it's safe to initialize any extra
+ * features that require "regular" allocations
+ */
+ virtual void OnMallocInitialized();
+
+ virtual void Trim(bool bTrimThreadCaches);
+
+ virtual void OutOfMemory(size_t Size, uint32_t Alignment);
+};
+
+extern FMalloc* GMalloc; /* Memory allocator */
+
+} // namespace zen
diff --git a/src/zencore/include/zencore/memory/llm.h b/src/zencore/include/zencore/memory/llm.h
new file mode 100644
index 000000000..4f1c9de77
--- /dev/null
+++ b/src/zencore/include/zencore/memory/llm.h
@@ -0,0 +1,31 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zenbase/zenbase.h>
+#include <zencore/memory/tagtrace.h>
+
+namespace zen {
+
+// clang-format off
+#define LLM_ENUM_GENERIC_TAGS(macro) \
+ macro(Untagged, "Untagged", -1) \
+ macro(ProgramSize, "ProgramSize", -1) \
+ macro(Metrics, "Metrics", -1) \
+ macro(Logging, "Logging", -1) \
+ macro(IoBuffer, "IoBuffer", -1) \
+ macro(IoBufferMemory, "IoMemory", ELLMTag::IoBuffer) \
+ macro(IoBufferCore, "IoCore", ELLMTag::IoBuffer)
+
+// clang-format on
+
+enum class ELLMTag : uint8_t
+{
+#define LLM_ENUM(Enum, Str, Parent) Enum,
+ LLM_ENUM_GENERIC_TAGS(LLM_ENUM)
+#undef LLM_ENUM
+
+ GenericTagCount
+};
+
+} // namespace zen
diff --git a/src/zencore/include/zencore/memory/mallocansi.h b/src/zencore/include/zencore/memory/mallocansi.h
new file mode 100644
index 000000000..510695c8c
--- /dev/null
+++ b/src/zencore/include/zencore/memory/mallocansi.h
@@ -0,0 +1,31 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include "fmalloc.h"
+#include "memory.h"
+
+namespace zen {
+
+void* AnsiMalloc(size_t Size, uint32_t Alignment);
+void* AnsiRealloc(void* Ptr, size_t NewSize, uint32_t Alignment);
+void AnsiFree(void* Ptr);
+
+//
+// ANSI C memory allocator.
+//
+
+class FMallocAnsi final : public FMalloc
+{
+public:
+ FMallocAnsi();
+
+ virtual void* Malloc(size_t Size, uint32_t Alignment) override;
+ virtual void* TryMalloc(size_t Size, uint32_t Alignment) override;
+ virtual void* Realloc(void* Ptr, size_t NewSize, uint32_t Alignment) override;
+ virtual void* TryRealloc(void* Ptr, size_t NewSize, uint32_t Alignment) override;
+ virtual void Free(void* Ptr) override;
+ virtual bool GetAllocationSize(void* Original, size_t& SizeOut) override;
+};
+
+} // namespace zen
diff --git a/src/zencore/include/zencore/memory/mallocmimalloc.h b/src/zencore/include/zencore/memory/mallocmimalloc.h
new file mode 100644
index 000000000..759eeb4a6
--- /dev/null
+++ b/src/zencore/include/zencore/memory/mallocmimalloc.h
@@ -0,0 +1,36 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/memory/fmalloc.h>
+
+#if ZEN_USE_MIMALLOC
+# define ZEN_MIMALLOC_ENABLED 1
+#endif
+
+#if !defined(ZEN_MIMALLOC_ENABLED)
+# define ZEN_MIMALLOC_ENABLED 0
+#endif
+
+#if ZEN_MIMALLOC_ENABLED
+
+namespace zen {
+
+class FMallocMimalloc final : public FMalloc
+{
+public:
+ FMallocMimalloc();
+ virtual void* Malloc(size_t Size, uint32_t Alignment) override;
+ virtual void* TryMalloc(size_t Size, uint32_t Alignment) override;
+ virtual void* Realloc(void* Ptr, size_t NewSize, uint32_t Alignment) override;
+ virtual void* TryRealloc(void* Ptr, size_t NewSize, uint32_t Alignment) override;
+ virtual void Free(void* Ptr) override;
+ virtual void* MallocZeroed(size_t Count, uint32_t Alignment) override;
+ virtual void* TryMallocZeroed(size_t Count, uint32_t Alignment) override;
+ virtual bool GetAllocationSize(void* Original, size_t& SizeOut) override;
+ virtual void Trim(bool bTrimThreadCaches) override;
+};
+
+} // namespace zen
+
+#endif
diff --git a/src/zencore/include/zencore/memory/mallocrpmalloc.h b/src/zencore/include/zencore/memory/mallocrpmalloc.h
new file mode 100644
index 000000000..be2627b2d
--- /dev/null
+++ b/src/zencore/include/zencore/memory/mallocrpmalloc.h
@@ -0,0 +1,37 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/memory/fmalloc.h>
+
+#if ZEN_USE_RPMALLOC
+# define ZEN_RPMALLOC_ENABLED 1
+#endif
+
+#if !defined(ZEN_RPMALLOC_ENABLED)
+# define ZEN_RPMALLOC_ENABLED 0
+#endif
+
+#if ZEN_RPMALLOC_ENABLED
+
+namespace zen {
+
+class FMallocRpmalloc final : public FMalloc
+{
+public:
+ FMallocRpmalloc();
+ ~FMallocRpmalloc();
+ virtual void* Malloc(size_t Size, uint32_t Alignment) override;
+ virtual void* TryMalloc(size_t Size, uint32_t Alignment) override;
+ virtual void* Realloc(void* Ptr, size_t NewSize, uint32_t Alignment) override;
+ virtual void* TryRealloc(void* Ptr, size_t NewSize, uint32_t Alignment) override;
+ virtual void Free(void* Ptr) override;
+ virtual void* MallocZeroed(size_t Count, uint32_t Alignment) override;
+ virtual void* TryMallocZeroed(size_t Count, uint32_t Alignment) override;
+ virtual bool GetAllocationSize(void* Original, size_t& SizeOut) override;
+ virtual void Trim(bool bTrimThreadCaches) override;
+};
+
+} // namespace zen
+
+#endif
diff --git a/src/zencore/include/zencore/memory/mallocstomp.h b/src/zencore/include/zencore/memory/mallocstomp.h
new file mode 100644
index 000000000..5d83868bb
--- /dev/null
+++ b/src/zencore/include/zencore/memory/mallocstomp.h
@@ -0,0 +1,100 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zenbase/zenbase.h>
+
+#if ZEN_PLATFORM_WINDOWS
+# define ZEN_WITH_MALLOC_STOMP 1
+#endif
+
+#ifndef ZEN_WITH_MALLOC_STOMP
+# define ZEN_WITH_MALLOC_STOMP 0
+#endif
+
+/**
+ * Stomp memory allocator support should be enabled in Core.Build.cs.
+ * Run-time validation should be enabled using '-stompmalloc' command line argument.
+ */
+
+#if ZEN_WITH_MALLOC_STOMP
+
+# include <zencore/memory/fmalloc.h>
+# include <zencore/thread.h>
+
+namespace zen {
+
+/**
+ * Stomp memory allocator. It helps find the following errors:
+ * - Read or writes off the end of an allocation.
+ * - Read or writes off the beginning of an allocation.
+ * - Read or writes after freeing an allocation.
+ */
+class FMallocStomp final : public FMalloc
+{
+ struct FAllocationData;
+
+ const size_t PageSize;
+
+ /** If it is set to true, instead of focusing on overruns the allocator will focus on underruns. */
+ const bool bUseUnderrunMode;
+ RwLock Lock;
+
+ uintptr_t VirtualAddressCursor = 0;
+ size_t VirtualAddressMax = 0;
+ static constexpr size_t VirtualAddressBlockSize = 1 * 1024 * 1024 * 1024; // 1 GB blocks
+
+public:
+ // FMalloc interface.
+ explicit FMallocStomp(const bool InUseUnderrunMode = false);
+
+ /**
+ * Allocates a block of a given number of bytes of memory with the required alignment.
+ * In the process it allocates as many pages as necessary plus one that will be protected
+ * making it unaccessible and causing an exception. The actual allocation will be pushed
+ * to the end of the last valid unprotected page. To deal with underrun errors a sentinel
+ * is added right before the allocation in page which is checked on free.
+ *
+ * @param Size Size in bytes of the memory block to allocate.
+ * @param Alignment Alignment in bytes of the memory block to allocate.
+ * @return A pointer to the beginning of the memory block.
+ */
+ virtual void* Malloc(size_t Size, uint32_t Alignment) override;
+
+ virtual void* TryMalloc(size_t Size, uint32_t Alignment) override;
+
+ /**
+ * Changes the size of the memory block pointed to by OldPtr.
+ * The function may move the memory block to a new location.
+ *
+ * @param OldPtr Pointer to a memory block previously allocated with Malloc.
+ * @param NewSize New size in bytes for the memory block.
+ * @param Alignment Alignment in bytes for the reallocation.
+ * @return A pointer to the reallocated memory block, which may be either the same as ptr or a new location.
+ */
+ virtual void* Realloc(void* InPtr, size_t NewSize, uint32_t Alignment) override;
+
+ virtual void* TryRealloc(void* InPtr, size_t NewSize, uint32_t Alignment) override;
+
+ /**
+ * Frees a memory allocation and verifies the sentinel in the process.
+ *
+ * @param InPtr Pointer of the data to free.
+ */
+ virtual void Free(void* InPtr) override;
+
+ /**
+ * If possible determine the size of the memory allocated at the given address.
+ * This will included all the pages that were allocated so it will be far more
+ * than what's set on the FAllocationData.
+ *
+ * @param Original - Pointer to memory we are checking the size of
+ * @param SizeOut - If possible, this value is set to the size of the passed in pointer
+ * @return true if succeeded
+ */
+ virtual bool GetAllocationSize(void* Original, size_t& SizeOut) override;
+};
+
+} // namespace zen
+
+#endif // WITH_MALLOC_STOMP
diff --git a/src/zencore/include/zencore/memory/memory.h b/src/zencore/include/zencore/memory/memory.h
new file mode 100644
index 000000000..2fc20def6
--- /dev/null
+++ b/src/zencore/include/zencore/memory/memory.h
@@ -0,0 +1,78 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <stdlib.h>
+#include <zencore/memory/fmalloc.h>
+
+#define UE_ALLOCATION_FUNCTION(...)
+
+namespace zen {
+
+/**
+ * Corresponds to UE-side FMemory implementation
+ */
+
+class Memory
+{
+public:
+ static void Initialize();
+
+ //
+ // C style memory allocation stubs that fall back to C runtime
+ //
+ UE_ALLOCATION_FUNCTION(1) static void* SystemMalloc(size_t Size);
+ static void SystemFree(void* Ptr);
+
+ //
+ // C style memory allocation stubs.
+ //
+
+ static inline void* Alloc(size_t Size, size_t Alignment = sizeof(void*)) { return Malloc(Size, uint32_t(Alignment)); }
+
+ UE_ALLOCATION_FUNCTION(1, 2) static inline void* Malloc(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT);
+ UE_ALLOCATION_FUNCTION(2, 3) static inline void* Realloc(void* Original, size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT);
+ static inline void Free(void* Original);
+ static inline size_t GetAllocSize(void* Original);
+
+ UE_ALLOCATION_FUNCTION(1, 2) static inline void* MallocZeroed(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT);
+
+private:
+ static void GCreateMalloc();
+};
+
+inline void*
+Memory::Malloc(size_t Count, uint32_t Alignment)
+{
+ return GMalloc->TryMalloc(Count, Alignment);
+}
+
+inline void*
+Memory::Realloc(void* Original, size_t Count, uint32_t Alignment)
+{
+ return GMalloc->TryRealloc(Original, Count, Alignment);
+}
+
+inline void
+Memory::Free(void* Original)
+{
+ if (Original)
+ {
+ GMalloc->Free(Original);
+ }
+}
+
+inline size_t
+Memory::GetAllocSize(void* Original)
+{
+ size_t Size = 0;
+ return GMalloc->GetAllocationSize(Original, Size) ? Size : 0;
+}
+
+inline void*
+Memory::MallocZeroed(size_t Count, uint32_t Alignment)
+{
+ return GMalloc->TryMallocZeroed(Count, Alignment);
+}
+
+} // namespace zen
diff --git a/src/zencore/include/zencore/memory/memorytrace.h b/src/zencore/include/zencore/memory/memorytrace.h
new file mode 100644
index 000000000..d1ab1f914
--- /dev/null
+++ b/src/zencore/include/zencore/memory/memorytrace.h
@@ -0,0 +1,251 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+#pragma once
+
+#include <zencore/enumflags.h>
+#include <zencore/trace.h>
+
+#if !defined(UE_MEMORY_TRACE_AVAILABLE)
+# define UE_MEMORY_TRACE_AVAILABLE 0
+#endif
+
+#if !defined(UE_MEMORY_TRACE_LATE_INIT)
+# define UE_MEMORY_TRACE_LATE_INIT 0
+#endif
+
+#if !defined(PLATFORM_USES_FIXED_GMalloc_CLASS)
+# define PLATFORM_USES_FIXED_GMalloc_CLASS 0
+#endif
+
+#if !defined(UE_MEMORY_TRACE_ENABLED) && UE_TRACE_ENABLED
+# if UE_MEMORY_TRACE_AVAILABLE
+# define UE_MEMORY_TRACE_ENABLED ZEN_WITH_MEMTRACK
+# endif
+#endif
+
+#if !defined(UE_MEMORY_TRACE_ENABLED)
+# define UE_MEMORY_TRACE_ENABLED 0
+#endif
+
+namespace zen {
+
+////////////////////////////////////////////////////////////////////////////////
+typedef uint32_t HeapId;
+
+////////////////////////////////////////////////////////////////////////////////
+enum EMemoryTraceRootHeap : uint8_t
+{
+ SystemMemory, // RAM
+ VideoMemory, // VRAM
+ EndHardcoded = VideoMemory,
+ EndReserved = 15
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// These values are traced. Do not modify existing values in order to maintain
+// compatibility.
+enum class EMemoryTraceHeapFlags : uint16_t
+{
+ None = 0,
+ Root = 1 << 0,
+ NeverFrees = 1 << 1, // The heap doesn't free (e.g. linear allocator)
+};
+ENUM_CLASS_FLAGS(EMemoryTraceHeapFlags);
+
+////////////////////////////////////////////////////////////////////////////////
+// These values are traced. Do not modify existing values in order to maintain
+// compatibility.
+enum class EMemoryTraceHeapAllocationFlags : uint8_t
+{
+ None = 0,
+ Heap = 1 << 0, // Is a heap, can be used to unmark alloc as heap.
+ Swap = 2 << 0, // Is a swap page
+};
+ENUM_CLASS_FLAGS(EMemoryTraceHeapAllocationFlags);
+
+////////////////////////////////////////////////////////////////////////////////
+enum class EMemoryTraceSwapOperation : uint8
+{
+ PageOut = 0, // Paged out to swap
+ PageIn = 1, // Read from swap via page fault
+ FreeInSwap = 2, // Freed while being paged out in swap
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+// Internal options for early initialization of memory tracing systems. Exposed
+// here due to visibility in platform implementations.
+enum class EMemoryTraceInit : uint8
+{
+ Disabled = 0,
+ AllocEvents = 1 << 0,
+ Callstacks = 1 << 1,
+ Tags = 1 << 2,
+ Full = AllocEvents | Callstacks | Tags,
+ Light = AllocEvents | Tags,
+};
+
+ENUM_CLASS_FLAGS(EMemoryTraceInit);
+
+////////////////////////////////////////////////////////////////////////////////
+#if UE_MEMORY_TRACE_ENABLED
+
+# define UE_MEMORY_TRACE(x) x
+
+UE_TRACE_CHANNEL_EXTERN(MemAllocChannel);
+
+////////////////////////////////////////////////////////////////////////////////
+class FMalloc* MemoryTrace_Create(class FMalloc* InMalloc);
+void MemoryTrace_Initialize();
+void MemoryTrace_Shutdown();
+
+/**
+ * Register a new heap specification (name). Use the returned value when marking heaps.
+ * @param ParentId Heap id of parent heap.
+ * @param Name Descriptive name of the heap.
+ * @param Flags Properties of this heap. See \ref EMemoryTraceHeapFlags
+ * @return Heap id to use when allocating memory
+ */
+HeapId MemoryTrace_HeapSpec(HeapId ParentId, const char16_t* Name, EMemoryTraceHeapFlags Flags = EMemoryTraceHeapFlags::None);
+
+/**
+ * Register a new root heap specification (name). Use the returned value as parent to other heaps.
+ * @param Name Descriptive name of the root heap.
+ * @param Flags Properties of the this root heap. See \ref EMemoryTraceHeapFlags
+ * @return Heap id to use when allocating memory
+ */
+HeapId MemoryTrace_RootHeapSpec(const char16_t* Name, EMemoryTraceHeapFlags Flags = EMemoryTraceHeapFlags::None);
+
+/**
+ * Mark a traced allocation as being a heap.
+ * @param Address Address of the allocation
+ * @param Heap Heap id, see /ref MemoryTrace_HeapSpec. If no specific heap spec has been created the correct root heap needs to be given.
+ * @param Flags Additional properties of the heap allocation. Note that \ref EMemoryTraceHeapAllocationFlags::Heap is implicit.
+ * @param ExternalCallstackId CallstackId to use, if 0 will use current callstack id.
+ */
+void MemoryTrace_MarkAllocAsHeap(uint64 Address,
+ HeapId Heap,
+ EMemoryTraceHeapAllocationFlags Flags = EMemoryTraceHeapAllocationFlags::None,
+ uint32 ExternalCallstackId = 0);
+
+/**
+ * Unmark an allocation as a heap. When an allocation that has previously been used as a heap is reused as a regular
+ * allocation.
+ * @param Address Address of the allocation
+ * @param Heap Heap id
+ * @param ExternalCallstackId CallstackId to use, if 0 will use current callstack id.
+ */
+void MemoryTrace_UnmarkAllocAsHeap(uint64 Address, HeapId Heap, uint32 ExternalCallstackId = 0);
+
+/**
+ * Trace an allocation event.
+ * @param Address Address of allocation
+ * @param Size Size of allocation
+ * @param Alignment Alignment of the allocation
+ * @param RootHeap Which root heap this belongs to (system memory, video memory etc)
+ * @param ExternalCallstackId CallstackId to use, if 0 will use current callstack id.
+ */
+void MemoryTrace_Alloc(uint64 Address,
+ uint64 Size,
+ uint32 Alignment,
+ HeapId RootHeap = EMemoryTraceRootHeap::SystemMemory,
+ uint32 ExternalCallstackId = 0);
+
+/**
+ * Trace a free event.
+ * @param Address Address of the allocation being freed
+ * @param RootHeap Which root heap this belongs to (system memory, video memory etc)
+ * @param ExternalCallstackId CallstackId to use, if 0 will use current callstack id.
+ */
+void MemoryTrace_Free(uint64 Address, HeapId RootHeap = EMemoryTraceRootHeap::SystemMemory, uint32 ExternalCallstackId = 0);
+
+/**
+ * Trace a free related to a reallocation event.
+ * @param Address Address of the allocation being freed
+ * @param RootHeap Which root heap this belongs to (system memory, video memory etc)
+ * @param ExternalCallstackId CallstackId to use, if 0 will use current callstack id.
+ */
+void MemoryTrace_ReallocFree(uint64 Address, HeapId RootHeap = EMemoryTraceRootHeap::SystemMemory, uint32 ExternalCallstackId = 0);
+
+/** Trace an allocation related to a reallocation event.
+ * @param Address Address of allocation
+ * @param NewSize Size of allocation
+ * @param Alignment Alignment of the allocation
+ * @param RootHeap Which root heap this belongs to (system memory, video memory etc)
+ * @param ExternalCallstackId CallstackId to use, if 0 will use current callstack id.
+ */
+void MemoryTrace_ReallocAlloc(uint64 Address,
+ uint64 NewSize,
+ uint32 Alignment,
+ HeapId RootHeap = EMemoryTraceRootHeap::SystemMemory,
+ uint32 ExternalCallstackId = 0);
+
+/** Trace a swap operation. Only available for system memory root heap (EMemoryTraceRootHeap::SystemMemory).
+ * @param PageAddress Page address for operation, in case of PageIn can be address of the page fault (not aligned to page boundary).
+ * @param SwapOperation Which swap operation is happening to the address.
+ * @param CompressedSize Compressed size of the page for page out operation.
+ * @param CallstackId CallstackId to use, if 0 to ignore (will not use current callstack id).
+ */
+void MemoryTrace_SwapOp(uint64 PageAddress, EMemoryTraceSwapOperation SwapOperation, uint32 CompressedSize = 0, uint32 CallstackId = 0);
+
+////////////////////////////////////////////////////////////////////////////////
+#else // UE_MEMORY_TRACE_ENABLED
+
+# define UE_MEMORY_TRACE(x)
+inline HeapId
+MemoryTrace_RootHeapSpec(const char16_t* /*Name*/, EMemoryTraceHeapFlags /* Flags = EMemoryTraceHeapFlags::None */)
+{
+ return ~0u;
+};
+inline HeapId
+MemoryTrace_HeapSpec(HeapId /*ParentId*/, const char16_t* /*Name*/, EMemoryTraceHeapFlags /* Flags = EMemoryTraceHeapFlags::None */)
+{
+ return ~0u;
+}
+inline void
+MemoryTrace_MarkAllocAsHeap(uint64_t /*Address*/, HeapId /*Heap*/)
+{
+}
+inline void
+MemoryTrace_UnmarkAllocAsHeap(uint64_t /*Address*/, HeapId /*Heap*/)
+{
+}
+inline void
+MemoryTrace_Alloc(uint64_t /*Address*/,
+ uint64_t /*Size*/,
+ uint32_t /*Alignment*/,
+ HeapId RootHeap = EMemoryTraceRootHeap::SystemMemory,
+ uint32_t ExternalCallstackId = 0)
+{
+ ZEN_UNUSED(RootHeap, ExternalCallstackId);
+}
+inline void
+MemoryTrace_Free(uint64_t /*Address*/, HeapId RootHeap = EMemoryTraceRootHeap::SystemMemory, uint32_t ExternalCallstackId = 0)
+{
+ ZEN_UNUSED(RootHeap, ExternalCallstackId);
+}
+inline void
+MemoryTrace_ReallocFree(uint64_t /*Address*/, HeapId RootHeap = EMemoryTraceRootHeap::SystemMemory, uint32_t ExternalCallstackId = 0)
+{
+ ZEN_UNUSED(RootHeap, ExternalCallstackId);
+}
+inline void
+MemoryTrace_ReallocAlloc(uint64_t /*Address*/,
+ uint64_t /*NewSize*/,
+ uint32_t /*Alignment*/,
+ HeapId RootHeap = EMemoryTraceRootHeap::SystemMemory,
+ uint32_t ExternalCallstackId = 0)
+{
+ ZEN_UNUSED(RootHeap, ExternalCallstackId);
+}
+inline void
+MemoryTrace_SwapOp(uint64_t /*PageAddress*/,
+ EMemoryTraceSwapOperation /*SwapOperation*/,
+ uint32_t CompressedSize = 0,
+ uint32_t CallstackId = 0)
+{
+ ZEN_UNUSED(CompressedSize, CallstackId);
+}
+
+#endif // UE_MEMORY_TRACE_ENABLED
+
+} // namespace zen
diff --git a/src/zencore/include/zencore/memory/newdelete.h b/src/zencore/include/zencore/memory/newdelete.h
new file mode 100644
index 000000000..d22c8604f
--- /dev/null
+++ b/src/zencore/include/zencore/memory/newdelete.h
@@ -0,0 +1,155 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zenbase/zenbase.h>
+#include <new>
+
+#if defined(_MSC_VER)
+# if (_MSC_VER >= 1900) && !defined(__EDG__)
+# define ZEN_RESTRICT __declspec(allocator) __declspec(restrict)
+# else
+# define ZEN_RESTRICT __declspec(restrict)
+# endif
+#else
+# define ZEN_RESTRICT
+#endif
+
+//////////////////////////////////////////////////////////////////////////
+
+[[nodiscard]] ZEN_RESTRICT void* zen_new(size_t size);
+[[nodiscard]] ZEN_RESTRICT void* zen_new_aligned(size_t size, size_t alignment);
+[[nodiscard]] ZEN_RESTRICT void* zen_new_nothrow(size_t size) noexcept;
+[[nodiscard]] ZEN_RESTRICT void* zen_new_aligned_nothrow(size_t size, size_t alignment) noexcept;
+
+void zen_free(void* p) noexcept;
+void zen_free_size(void* p, size_t size) noexcept;
+void zen_free_size_aligned(void* p, size_t size, size_t alignment) noexcept;
+void zen_free_aligned(void* p, size_t alignment) noexcept;
+
+//////////////////////////////////////////////////////////////////////////
+
+#if defined(_MSC_VER) && defined(_Ret_notnull_) && defined(_Post_writable_byte_size_)
+# define zen_decl_new(n) [[nodiscard]] _VCRT_ALLOCATOR _Ret_notnull_ _Post_writable_byte_size_(n)
+# define zen_decl_new_nothrow(n) [[nodiscard]] _VCRT_ALLOCATOR _Ret_maybenull_ _Success_(return != NULL) _Post_writable_byte_size_(n)
+#else
+# define zen_decl_new(n) [[nodiscard]]
+# define zen_decl_new_nothrow(n) [[nodiscard]]
+#endif
+
+void
+operator delete(void* p) noexcept
+{
+ zen_free(p);
+}
+
+void
+operator delete[](void* p) noexcept
+{
+ zen_free(p);
+}
+
+void
+operator delete(void* p, const std::nothrow_t&) noexcept
+{
+ zen_free(p);
+}
+
+void
+operator delete[](void* p, const std::nothrow_t&) noexcept
+{
+ zen_free(p);
+}
+
+zen_decl_new(n) void*
+operator new(std::size_t n) noexcept(false)
+{
+ return zen_new(n);
+}
+
+zen_decl_new(n) void*
+operator new[](std::size_t n) noexcept(false)
+{
+ return zen_new(n);
+}
+
+zen_decl_new_nothrow(n) void*
+operator new(std::size_t n, const std::nothrow_t& tag) noexcept
+{
+ (void)(tag);
+ return zen_new_nothrow(n);
+}
+
+zen_decl_new_nothrow(n) void*
+operator new[](std::size_t n, const std::nothrow_t& tag) noexcept
+{
+ (void)(tag);
+ return zen_new_nothrow(n);
+}
+
+#if (__cplusplus >= 201402L || _MSC_VER >= 1916)
+void
+operator delete(void* p, std::size_t n) noexcept
+{
+ zen_free_size(p, n);
+};
+void
+operator delete[](void* p, std::size_t n) noexcept
+{
+ zen_free_size(p, n);
+};
+#endif
+
+#if (__cplusplus > 201402L || defined(__cpp_aligned_new))
+void
+operator delete(void* p, std::align_val_t al) noexcept
+{
+ zen_free_aligned(p, static_cast<size_t>(al));
+}
+void
+operator delete[](void* p, std::align_val_t al) noexcept
+{
+ zen_free_aligned(p, static_cast<size_t>(al));
+}
+void
+operator delete(void* p, std::size_t n, std::align_val_t al) noexcept
+{
+ zen_free_size_aligned(p, n, static_cast<size_t>(al));
+};
+void
+operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept
+{
+ zen_free_size_aligned(p, n, static_cast<size_t>(al));
+};
+void
+operator delete(void* p, std::align_val_t al, const std::nothrow_t&) noexcept
+{
+ zen_free_aligned(p, static_cast<size_t>(al));
+}
+void
+operator delete[](void* p, std::align_val_t al, const std::nothrow_t&) noexcept
+{
+ zen_free_aligned(p, static_cast<size_t>(al));
+}
+
+void*
+operator new(std::size_t n, std::align_val_t al) noexcept(false)
+{
+ return zen_new_aligned(n, static_cast<size_t>(al));
+}
+void*
+operator new[](std::size_t n, std::align_val_t al) noexcept(false)
+{
+ return zen_new_aligned(n, static_cast<size_t>(al));
+}
+void*
+operator new(std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept
+{
+ return zen_new_aligned_nothrow(n, static_cast<size_t>(al));
+}
+void*
+operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept
+{
+ return zen_new_aligned_nothrow(n, static_cast<size_t>(al));
+}
+#endif
diff --git a/src/zencore/include/zencore/memory/tagtrace.h b/src/zencore/include/zencore/memory/tagtrace.h
new file mode 100644
index 000000000..f51b21466
--- /dev/null
+++ b/src/zencore/include/zencore/memory/tagtrace.h
@@ -0,0 +1,93 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+#pragma once
+
+#include <zenbase/zenbase.h>
+#include <zencore/trace.h>
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace zen {
+
+enum class ELLMTag : uint8_t;
+
+int32_t MemoryTrace_AnnounceCustomTag(int32_t Tag, int32_t ParentTag, const char* Display);
+int32_t MemoryTrace_GetActiveTag();
+
+inline constexpr int32_t TRACE_TAG = 257;
+
+} // namespace zen
+
+////////////////////////////////////////////////////////////////////////////////
+#if !defined(UE_MEMORY_TAGS_TRACE_ENABLED)
+# define UE_MEMORY_TAGS_TRACE_ENABLED 1
+#endif
+
+#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED
+
+namespace zen {
+////////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Used to associate any allocation within this scope to a given tag.
+ *
+ * We need to be able to convert the three types of inputs to LLM scopes:
+ * - ELLMTag, an uint8 with fixed categories. There are three sub ranges
+ Generic tags, platform and project tags.
+ * - FName, free form string, for example a specific asset.
+ * - TagData, an opaque pointer from LLM.
+ *
+ */
+class FMemScope
+{
+public:
+ FMemScope(); // Used with SetTagAndActivate
+ FMemScope(int32_t InTag, bool bShouldActivate = true);
+ FMemScope(ELLMTag InTag, bool bShouldActivate = true);
+ ~FMemScope();
+
+private:
+ void ActivateScope(int32_t InTag);
+ UE::Trace::Private::FScopedLogScope Inner;
+ int32_t PrevTag;
+};
+
+/**
+ * A scope that activates in case no existing scope is active.
+ */
+template<typename TagType>
+class FDefaultMemScope : public FMemScope
+{
+public:
+ FDefaultMemScope(TagType InTag) : FMemScope(InTag, MemoryTrace_GetActiveTag() == 0) {}
+};
+
+/**
+ * Used order to keep the tag for memory that is being reallocated.
+ */
+class FMemScopePtr
+{
+public:
+ FMemScopePtr(uint64_t InPtr);
+ ~FMemScopePtr();
+
+private:
+ UE::Trace::Private::FScopedLogScope Inner;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+# define UE_MEMSCOPE(InTag) FMemScope PREPROCESSOR_JOIN(MemScope, __LINE__)(InTag);
+# define UE_MEMSCOPE_PTR(InPtr) FMemScopePtr PREPROCESSOR_JOIN(MemPtrScope, __LINE__)((uint64)InPtr);
+# define UE_MEMSCOPE_DEFAULT(InTag) FDefaultMemScope PREPROCESSOR_JOIN(MemScope, __LINE__)(InTag);
+# define UE_MEMSCOPE_UNINITIALIZED(Line) FMemScope PREPROCESSOR_JOIN(MemScope, Line);
+
+#else // UE_MEMORY_TAGS_TRACE_ENABLED
+
+////////////////////////////////////////////////////////////////////////////////
+# define UE_MEMSCOPE(...)
+# define UE_MEMSCOPE_PTR(...)
+# define UE_MEMSCOPE_DEFAULT(...)
+# define UE_MEMSCOPE_UNINITIALIZED(...)
+# define UE_MEMSCOPE_ACTIVATE(...)
+
+#endif // UE_MEMORY_TAGS_TRACE_ENABLED
+}
diff --git a/src/zencore/include/zencore/string.h b/src/zencore/include/zencore/string.h
index b10b6a2ba..e2ef1c1a0 100644
--- a/src/zencore/include/zencore/string.h
+++ b/src/zencore/include/zencore/string.h
@@ -51,6 +51,30 @@ StringLength(const wchar_t* str)
return wcslen(str);
}
+inline bool
+StringCompare(const char16_t* s1, const char16_t* s2)
+{
+ char16_t c1, c2;
+
+ while ((c1 = *s1) == (c2 = *s2))
+ {
+ if (c1 == 0)
+ {
+ return 0;
+ }
+
+ ++s1;
+ ++s2;
+ }
+ return uint16_t(c1) - uint16_t(c2);
+}
+
+inline bool
+StringEquals(const char16_t* s1, const char16_t* s2)
+{
+ return StringCompare(s1, s2) == 0;
+}
+
inline size_t
StringLength(const char16_t* str)
{
diff --git a/src/zencore/include/zencore/trace.h b/src/zencore/include/zencore/trace.h
index 89e4b76bf..2ca2b7c81 100644
--- a/src/zencore/include/zencore/trace.h
+++ b/src/zencore/include/zencore/trace.h
@@ -19,6 +19,8 @@ ZEN_THIRD_PARTY_INCLUDES_END
#define ZEN_TRACE_CPU(x) TRACE_CPU_SCOPE(x)
#define ZEN_TRACE_CPU_FLUSH(x) TRACE_CPU_SCOPE(x, trace::CpuScopeFlags::CpuFlush)
+namespace zen {
+
enum class TraceType
{
File,
@@ -32,6 +34,8 @@ bool IsTracing();
void TraceStart(std::string_view ProgramName, const char* HostOrPath, TraceType Type);
bool TraceStop();
+}
+
#else
#define ZEN_TRACE_CPU(x)
diff --git a/src/zencore/iobuffer.cpp b/src/zencore/iobuffer.cpp
index 51f380c34..d6d02eb0b 100644
--- a/src/zencore/iobuffer.cpp
+++ b/src/zencore/iobuffer.cpp
@@ -8,6 +8,8 @@
#include <zencore/iohash.h>
#include <zencore/logging.h>
#include <zencore/memory.h>
+#include <zencore/memory/llm.h>
+#include <zencore/memory/memory.h>
#include <zencore/testing.h>
#include <zencore/thread.h>
#include <zencore/trace.h>
@@ -15,12 +17,6 @@
#include <memory.h>
#include <system_error>
-#if ZEN_USE_MIMALLOC
-ZEN_THIRD_PARTY_INCLUDES_START
-# include <mimalloc.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-#endif
-
#if ZEN_PLATFORM_WINDOWS
# include <zencore/windows.h>
#else
@@ -43,39 +39,39 @@ namespace zen {
void
IoBufferCore::AllocateBuffer(size_t InSize, size_t Alignment) const
{
-#if ZEN_USE_MIMALLOC
- void* Ptr = mi_aligned_alloc(Alignment, RoundUp(InSize, Alignment));
- m_Flags.fetch_or(kIoBufferAlloc, std::memory_order_relaxed);
-#else
+ UE_MEMSCOPE(ELLMTag::IoBufferMemory);
+
void* Ptr = Memory::Alloc(InSize, Alignment);
-#endif
if (!Ptr)
{
ThrowOutOfMemory(fmt::format("failed allocating {:#x} bytes aligned to {:#x}", InSize, Alignment));
}
+
m_DataPtr = Ptr;
}
void
IoBufferCore::FreeBuffer()
{
- if (!m_DataPtr)
+ if (m_DataPtr)
{
- return;
+ Memory::Free(const_cast<void*>(m_DataPtr));
+ m_DataPtr = nullptr;
}
+}
- const uint32_t LocalFlags = m_Flags.load(std::memory_order_relaxed);
-
-#if ZEN_USE_MIMALLOC
- if (LocalFlags & kIoBufferAlloc)
- {
- return mi_free(const_cast<void*>(m_DataPtr));
- }
-#endif
+void*
+IoBufferCore::operator new(size_t Size)
+{
+ UE_MEMSCOPE(ELLMTag::IoBufferCore);
+ return Memory::Malloc(Size);
+}
- ZEN_UNUSED(LocalFlags);
- return Memory::Free(const_cast<void*>(m_DataPtr));
+void
+IoBufferCore::operator delete(void* Ptr)
+{
+ Memory::Free(Ptr);
}
//////////////////////////////////////////////////////////////////////////
@@ -104,10 +100,9 @@ IoBufferCore::IoBufferCore(size_t InSize, size_t Alignment)
IoBufferCore::~IoBufferCore()
{
- if (IsOwnedByThis() && m_DataPtr)
+ if (IsOwnedByThis())
{
FreeBuffer();
- m_DataPtr = nullptr;
}
}
@@ -567,7 +562,7 @@ IoBufferBuilder::ReadFromFileMaybe(const IoBuffer& InBuffer)
Error = zen::GetLastError();
}
#else
- int Fd = int(intptr_t(FileRef.FileHandle));
+ int Fd = int(intptr_t(FileRef.FileHandle));
ssize_t ReadResult = pread(Fd, OutBuffer.MutableData(), size_t(NumberOfBytesToRead), off_t(FileOffset));
if (ReadResult != -1)
{
@@ -635,7 +630,7 @@ IoBufferBuilder::MakeFromFile(const std::filesystem::path& FileName, uint64_t Of
DataFile.GetSize((ULONGLONG&)FileSize);
#else
int Flags = O_RDONLY | O_CLOEXEC;
- int Fd = open(FileName.c_str(), Flags);
+ int Fd = open(FileName.c_str(), Flags);
if (Fd < 0)
{
return {};
@@ -704,7 +699,7 @@ IoBufferBuilder::MakeFromTemporaryFile(const std::filesystem::path& FileName)
Handle = DataFile.Detach();
#else
- int Fd = open(FileName.native().c_str(), O_RDONLY);
+ int Fd = open(FileName.native().c_str(), O_RDONLY);
if (Fd < 0)
{
return {};
diff --git a/src/zencore/logging.cpp b/src/zencore/logging.cpp
index 1a0a91b3d..7bd500b3b 100644
--- a/src/zencore/logging.cpp
+++ b/src/zencore/logging.cpp
@@ -6,6 +6,8 @@
#include <zencore/testing.h>
#include <zencore/thread.h>
+#include <zencore/memory/llm.h>
+
ZEN_THIRD_PARTY_INCLUDES_START
#include <spdlog/details/registry.h>
#include <spdlog/sinks/null_sink.h>
@@ -66,6 +68,7 @@ static_assert(offsetof(spdlog::source_loc, funcname) == offsetof(SourceLocation,
void
EmitLogMessage(LoggerRef& Logger, int LogLevel, const std::string_view Message)
{
+ UE_MEMSCOPE(ELLMTag::Logging);
const spdlog::level::level_enum InLevel = (spdlog::level::level_enum)LogLevel;
Logger.SpdLogger->log(InLevel, Message);
if (IsErrorLevel(LogLevel))
@@ -80,6 +83,7 @@ EmitLogMessage(LoggerRef& Logger, int LogLevel, const std::string_view Message)
void
EmitLogMessage(LoggerRef& Logger, int LogLevel, std::string_view Format, fmt::format_args Args)
{
+ UE_MEMSCOPE(ELLMTag::Logging);
zen::logging::LoggingContext LogCtx;
fmt::vformat_to(fmt::appender(LogCtx.MessageBuffer), Format, Args);
zen::logging::EmitLogMessage(Logger, LogLevel, LogCtx.Message());
@@ -88,6 +92,7 @@ EmitLogMessage(LoggerRef& Logger, int LogLevel, std::string_view Format, fmt::fo
void
EmitLogMessage(LoggerRef& Logger, const SourceLocation& InLocation, int LogLevel, const std::string_view Message)
{
+ UE_MEMSCOPE(ELLMTag::Logging);
const spdlog::source_loc& Location = *reinterpret_cast<const spdlog::source_loc*>(&InLocation);
const spdlog::level::level_enum InLevel = (spdlog::level::level_enum)LogLevel;
Logger.SpdLogger->log(Location, InLevel, Message);
@@ -103,6 +108,7 @@ EmitLogMessage(LoggerRef& Logger, const SourceLocation& InLocation, int LogLevel
void
EmitLogMessage(LoggerRef& Logger, const SourceLocation& InLocation, int LogLevel, std::string_view Format, fmt::format_args Args)
{
+ UE_MEMSCOPE(ELLMTag::Logging);
zen::logging::LoggingContext LogCtx;
fmt::vformat_to(fmt::appender(LogCtx.MessageBuffer), Format, Args);
zen::logging::EmitLogMessage(Logger, InLocation, LogLevel, LogCtx.Message());
@@ -111,6 +117,7 @@ EmitLogMessage(LoggerRef& Logger, const SourceLocation& InLocation, int LogLevel
void
EmitConsoleLogMessage(int LogLevel, const std::string_view Message)
{
+ UE_MEMSCOPE(ELLMTag::Logging);
const spdlog::level::level_enum InLevel = (spdlog::level::level_enum)LogLevel;
ConsoleLog().SpdLogger->log(InLevel, Message);
}
@@ -118,6 +125,7 @@ EmitConsoleLogMessage(int LogLevel, const std::string_view Message)
void
EmitConsoleLogMessage(int LogLevel, std::string_view Format, fmt::format_args Args)
{
+ UE_MEMSCOPE(ELLMTag::Logging);
zen::logging::LoggingContext LogCtx;
fmt::vformat_to(fmt::appender(LogCtx.MessageBuffer), Format, Args);
zen::logging::EmitConsoleLogMessage(LogLevel, LogCtx.Message());
@@ -192,6 +200,8 @@ std::string LogLevels[level::LogLevelCount];
void
ConfigureLogLevels(level::LogLevel Level, std::string_view Loggers)
{
+ UE_MEMSCOPE(ELLMTag::Logging);
+
RwLock::ExclusiveLockScope _(LogLevelsLock);
LogLevels[Level] = Loggers;
}
@@ -199,6 +209,8 @@ ConfigureLogLevels(level::LogLevel Level, std::string_view Loggers)
void
RefreshLogLevels(level::LogLevel* DefaultLevel)
{
+ UE_MEMSCOPE(ELLMTag::Logging);
+
spdlog::details::registry::log_levels Levels;
{
@@ -275,6 +287,8 @@ Default()
void
SetDefault(std::string_view NewDefaultLoggerId)
{
+ UE_MEMSCOPE(ELLMTag::Logging);
+
auto NewDefaultLogger = spdlog::get(std::string(NewDefaultLoggerId));
ZEN_ASSERT(NewDefaultLogger);
@@ -293,6 +307,8 @@ ErrorLog()
void
SetErrorLog(std::string_view NewErrorLoggerId)
{
+ UE_MEMSCOPE(ELLMTag::Logging);
+
if (NewErrorLoggerId.empty())
{
TheErrorLogger = {};
@@ -310,6 +326,8 @@ SetErrorLog(std::string_view NewErrorLoggerId)
LoggerRef
Get(std::string_view Name)
{
+ UE_MEMSCOPE(ELLMTag::Logging);
+
std::shared_ptr<spdlog::logger> Logger = spdlog::get(std::string(Name));
if (!Logger)
@@ -339,6 +357,8 @@ SuppressConsoleLog()
LoggerRef
ConsoleLog()
{
+ UE_MEMSCOPE(ELLMTag::Logging);
+
std::call_once(ConsoleInitFlag, [&] {
if (!ConLogger)
{
@@ -355,6 +375,8 @@ ConsoleLog()
void
InitializeLogging()
{
+ UE_MEMSCOPE(ELLMTag::Logging);
+
TheDefaultLogger = *spdlog::default_logger_raw();
}
diff --git a/src/zencore/memory.cpp b/src/zencore/memory.cpp
index a0d911786..a2fe02f3a 100644
--- a/src/zencore/memory.cpp
+++ b/src/zencore/memory.cpp
@@ -4,67 +4,14 @@
#include <zencore/fmtutils.h>
#include <zencore/intmath.h>
#include <zencore/memory.h>
+#include <zencore/memory/memory.h>
#include <zencore/testing.h>
#include <zencore/zencore.h>
#include <cstdlib>
-#if ZEN_USE_MIMALLOC
-ZEN_THIRD_PARTY_INCLUDES_START
-# include <mimalloc.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-#endif
-
namespace zen {
-//////////////////////////////////////////////////////////////////////////
-
-static void*
-AlignedAllocImpl(size_t Size, size_t Alignment)
-{
- // aligned_alloc() states that size must be a multiple of alignment. Some
- // platforms return null if this requirement isn't met.
- Size = (Size + Alignment - 1) & ~(Alignment - 1);
-
-#if ZEN_USE_MIMALLOC
- return mi_aligned_alloc(Alignment, Size);
-#elif ZEN_PLATFORM_WINDOWS
- return _aligned_malloc(Size, Alignment);
-#else
- return std::aligned_alloc(Alignment, Size);
-#endif
-}
-
-void
-AlignedFreeImpl(void* ptr)
-{
- if (ptr == nullptr)
- return;
-
-#if ZEN_USE_MIMALLOC
- return mi_free(ptr);
-#elif ZEN_PLATFORM_WINDOWS
- _aligned_free(ptr);
-#else
- std::free(ptr);
-#endif
-}
-
-//////////////////////////////////////////////////////////////////////////
-
-void*
-Memory::Alloc(size_t Size, size_t Alignment)
-{
- return AlignedAllocImpl(Size, Alignment);
-}
-
-void
-Memory::Free(void* ptr)
-{
- AlignedFreeImpl(ptr);
-}
-
-//////////////////////////////////////////////////////////////////////////
//
// Unit tests
//
diff --git a/src/zencore/memory/fmalloc.cpp b/src/zencore/memory/fmalloc.cpp
new file mode 100644
index 000000000..3e96003f5
--- /dev/null
+++ b/src/zencore/memory/fmalloc.cpp
@@ -0,0 +1,156 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <string.h>
+#include <zencore/memory/fmalloc.h>
+#include <zencore/memory/memory.h>
+
+namespace zen {
+
+//////////////////////////////////////////////////////////////////////////
+
+class FInitialMalloc : public FMalloc
+{
+ virtual void* Malloc(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT) override
+ {
+ Memory::Initialize();
+ return GMalloc->Malloc(Count, Alignment);
+ }
+ virtual void* TryMalloc(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT) override
+ {
+ Memory::Initialize();
+ return GMalloc->TryMalloc(Count, Alignment);
+ }
+ virtual void* Realloc(void* Original, size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT) override
+ {
+ Memory::Initialize();
+ return GMalloc->Realloc(Original, Count, Alignment);
+ }
+ virtual void* TryRealloc(void* Original, size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT) override
+ {
+ Memory::Initialize();
+ return GMalloc->TryRealloc(Original, Count, Alignment);
+ }
+ virtual void Free(void* Original) override
+ {
+ Memory::Initialize();
+ return GMalloc->Free(Original);
+ }
+ virtual void* MallocZeroed(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT) override
+ {
+ Memory::Initialize();
+ return GMalloc->MallocZeroed(Count, Alignment);
+ }
+
+ virtual void* TryMallocZeroed(size_t Count, uint32_t Alignment = DEFAULT_ALIGNMENT) override
+ {
+ Memory::Initialize();
+ return GMalloc->TryMallocZeroed(Count, Alignment);
+ }
+ virtual size_t QuantizeSize(size_t Count, uint32_t Alignment) override
+ {
+ Memory::Initialize();
+ return GMalloc->QuantizeSize(Count, Alignment);
+ }
+ virtual bool GetAllocationSize(void* Original, size_t& SizeOut) override
+ {
+ Memory::Initialize();
+ return GMalloc->GetAllocationSize(Original, SizeOut);
+ }
+ virtual void OnMallocInitialized() override {}
+ virtual void Trim(bool bTrimThreadCaches) override { ZEN_UNUSED(bTrimThreadCaches); }
+} GInitialMalloc;
+
+FMalloc* GMalloc = &GInitialMalloc; /* Memory allocator */
+
+//////////////////////////////////////////////////////////////////////////
+
+void*
+FUseSystemMallocForNew::operator new(size_t Size)
+{
+ return Memory::SystemMalloc(Size);
+}
+
+void
+FUseSystemMallocForNew::operator delete(void* Ptr)
+{
+ Memory::SystemFree(Ptr);
+}
+
+void*
+FUseSystemMallocForNew::operator new[](size_t Size)
+{
+ return Memory::SystemMalloc(Size);
+}
+
+void
+FUseSystemMallocForNew::operator delete[](void* Ptr)
+{
+ Memory::SystemFree(Ptr);
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+void*
+FMalloc::TryRealloc(void* Original, size_t Count, uint32_t Alignment)
+{
+ return Realloc(Original, Count, Alignment);
+}
+
+void*
+FMalloc::TryMalloc(size_t Count, uint32_t Alignment)
+{
+ return Malloc(Count, Alignment);
+}
+
+void*
+FMalloc::TryMallocZeroed(size_t Count, uint32_t Alignment)
+{
+ return MallocZeroed(Count, Alignment);
+}
+
+void*
+FMalloc::MallocZeroed(size_t Count, uint32_t Alignment)
+{
+ void* const Memory = Malloc(Count, Alignment);
+
+ if (Memory)
+ {
+ ::memset(Memory, 0, Count);
+ }
+
+ return Memory;
+}
+
+void
+FMalloc::OutOfMemory(size_t Size, uint32_t Alignment)
+{
+ ZEN_UNUSED(Size, Alignment);
+ // no-op by default
+}
+
+void
+FMalloc::Trim(bool bTrimThreadCaches)
+{
+ ZEN_UNUSED(bTrimThreadCaches);
+}
+
+void
+FMalloc::OnMallocInitialized()
+{
+}
+
+bool
+FMalloc::GetAllocationSize(void* Original, size_t& SizeOut)
+{
+ ZEN_UNUSED(Original, SizeOut);
+ return false; // Generic implementation has no way of determining this
+}
+
+size_t
+FMalloc::QuantizeSize(size_t Count, uint32_t Alignment)
+{
+ ZEN_UNUSED(Alignment);
+ return Count; // Generic implementation has no way of determining this
+}
+
+} // namespace zen
diff --git a/src/zencore/memory/mallocansi.cpp b/src/zencore/memory/mallocansi.cpp
new file mode 100644
index 000000000..9c3936172
--- /dev/null
+++ b/src/zencore/memory/mallocansi.cpp
@@ -0,0 +1,251 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zencore/memory/mallocansi.h>
+
+#include <zencore/intmath.h>
+#include <zencore/memory/align.h>
+#include <zencore/windows.h>
+
+#if ZEN_PLATFORM_LINUX
+# define PLATFORM_USE_ANSI_POSIX_MALLOC 1
+#endif
+
+#if ZEN_PLATFORM_MAC
+# define PLATFORM_USE_CUSTOM_MEMALIGN 1
+#endif
+
+#ifndef PLATFORM_USE_ANSI_MEMALIGN
+# define PLATFORM_USE_ANSI_MEMALIGN 0
+#endif
+
+#ifndef PLATFORM_USE_ANSI_POSIX_MALLOC
+# define PLATFORM_USE_ANSI_POSIX_MALLOC 0
+#endif
+
+#ifndef PLATFORM_USE_CUSTOM_MEMALIGN
+# define PLATFORM_USE_CUSTOM_MEMALIGN 0
+#endif
+
+#if PLATFORM_USE_ANSI_POSIX_MALLOC
+# include <malloc.h>
+# include <string.h>
+#endif
+
+#define MALLOC_ANSI_USES__ALIGNED_MALLOC ZEN_PLATFORM_WINDOWS
+
+namespace zen {
+
+//////////////////////////////////////////////////////////////////////////
+
+void*
+AnsiMalloc(size_t Size, uint32_t Alignment)
+{
+#if MALLOC_ANSI_USES__ALIGNED_MALLOC
+ void* Result = _aligned_malloc(Size, Alignment);
+#elif PLATFORM_USE_ANSI_POSIX_MALLOC
+ void* Result;
+ if (posix_memalign(&Result, Alignment, Size) != 0)
+ {
+ Result = nullptr;
+ }
+#elif PLATFORM_USE_ANSI_MEMALIGN
+ Result = reallocalign(Ptr, NewSize, Alignment);
+#elif PLATFORM_USE_CUSTOM_MEMALIGN
+ void* Ptr = malloc(Size + Alignment + sizeof(void*) + sizeof(size_t));
+ void* Result = nullptr;
+ if (Ptr)
+ {
+ Result = Align((uint8_t*)Ptr + sizeof(void*) + sizeof(size_t), Alignment);
+ *((void**)((uint8_t*)Result - sizeof(void*))) = Ptr;
+ *((size_t*)((uint8_t*)Result - sizeof(void*) - sizeof(size_t))) = Size;
+ }
+#else
+# error Unknown allocation path
+#endif
+
+ return Result;
+}
+
+size_t
+AnsiGetAllocationSize(void* Original)
+{
+#if MALLOC_ANSI_USES__ALIGNED_MALLOC
+ return _aligned_msize(Original, 16, 0); // TODO: incorrectly assumes alignment of 16
+#elif PLATFORM_USE_ANSI_POSIX_MALLOC || PLATFORM_USE_ANSI_MEMALIGN
+ return malloc_usable_size(Original);
+#elif PLATFORM_USE_CUSTOM_MEMALIGN
+ return *((size_t*)((uint8_t*)Original - sizeof(void*) - sizeof(size_t)));
+#else
+# error Unknown allocation path
+#endif
+}
+
+void*
+AnsiRealloc(void* Ptr, size_t NewSize, uint32_t Alignment)
+{
+ void* Result = nullptr;
+
+#if MALLOC_ANSI_USES__ALIGNED_MALLOC
+ if (Ptr && NewSize)
+ {
+ Result = _aligned_realloc(Ptr, NewSize, Alignment);
+ }
+ else if (Ptr == nullptr)
+ {
+ Result = _aligned_malloc(NewSize, Alignment);
+ }
+ else
+ {
+ _aligned_free(Ptr);
+ Result = nullptr;
+ }
+#elif PLATFORM_USE_ANSI_POSIX_MALLOC
+ if (Ptr && NewSize)
+ {
+ size_t UsableSize = malloc_usable_size(Ptr);
+ if (posix_memalign(&Result, Alignment, NewSize) != 0)
+ {
+ Result = nullptr;
+ }
+ else if (UsableSize)
+ {
+ memcpy(Result, Ptr, Min(NewSize, UsableSize));
+ }
+ free(Ptr);
+ }
+ else if (Ptr == nullptr)
+ {
+ if (posix_memalign(&Result, Alignment, NewSize) != 0)
+ {
+ Result = nullptr;
+ }
+ }
+ else
+ {
+ free(Ptr);
+ Result = nullptr;
+ }
+#elif PLATFORM_USE_CUSTOM_MEMALIGN
+ if (Ptr && NewSize)
+ {
+ // Can't use realloc as it might screw with alignment.
+ Result = AnsiMalloc(NewSize, Alignment);
+ size_t PtrSize = AnsiGetAllocationSize(Ptr);
+ memcpy(Result, Ptr, Min(NewSize, PtrSize));
+ AnsiFree(Ptr);
+ }
+ else if (Ptr == nullptr)
+ {
+ Result = AnsiMalloc(NewSize, Alignment);
+ }
+ else
+ {
+ free(*((void**)((uint8_t*)Ptr - sizeof(void*))));
+ Result = nullptr;
+ }
+#else
+# error Unknown allocation path
+#endif
+
+ return Result;
+}
+
+void
+AnsiFree(void* Ptr)
+{
+#if MALLOC_ANSI_USES__ALIGNED_MALLOC
+ _aligned_free(Ptr);
+#elif PLATFORM_USE_ANSI_POSIX_MALLOC || PLATFORM_USE_ANSI_MEMALIGN
+ free(Ptr);
+#elif PLATFORM_USE_CUSTOM_MEMALIGN
+ if (Ptr)
+ {
+ free(*((void**)((uint8_t*)Ptr - sizeof(void*))));
+ }
+#else
+# error Unknown allocation path
+#endif
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+FMallocAnsi::FMallocAnsi()
+{
+#if ZEN_PLATFORM_WINDOWS
+ // Enable low fragmentation heap - http://msdn2.microsoft.com/en-US/library/aa366750.aspx
+ intptr_t CrtHeapHandle = _get_heap_handle();
+ ULONG EnableLFH = 2;
+ HeapSetInformation((void*)CrtHeapHandle, HeapCompatibilityInformation, &EnableLFH, sizeof(EnableLFH));
+#endif
+}
+
+void*
+FMallocAnsi::TryMalloc(size_t Size, uint32_t Alignment)
+{
+ Alignment = Max(Size >= 16 ? (uint32_t)16 : (uint32_t)8, Alignment);
+
+ void* Result = AnsiMalloc(Size, Alignment);
+
+ return Result;
+}
+
+void*
+FMallocAnsi::Malloc(size_t Size, uint32_t Alignment)
+{
+ void* Result = TryMalloc(Size, Alignment);
+
+ if (Result == nullptr && Size)
+ {
+ OutOfMemory(Size, Alignment);
+ }
+
+ return Result;
+}
+
+void*
+FMallocAnsi::TryRealloc(void* Ptr, size_t NewSize, uint32_t Alignment)
+{
+ Alignment = Max(NewSize >= 16 ? (uint32_t)16 : (uint32_t)8, Alignment);
+
+ void* Result = AnsiRealloc(Ptr, NewSize, Alignment);
+
+ return Result;
+}
+
+void*
+FMallocAnsi::Realloc(void* Ptr, size_t NewSize, uint32_t Alignment)
+{
+ void* Result = TryRealloc(Ptr, NewSize, Alignment);
+
+ if (Result == nullptr && NewSize != 0)
+ {
+ OutOfMemory(NewSize, Alignment);
+ }
+
+ return Result;
+}
+
+void
+FMallocAnsi::Free(void* Ptr)
+{
+ AnsiFree(Ptr);
+}
+
+bool
+FMallocAnsi::GetAllocationSize(void* Original, size_t& SizeOut)
+{
+ if (!Original)
+ {
+ return false;
+ }
+
+#if MALLOC_ANSI_USES__ALIGNED_MALLOC
+ ZEN_UNUSED(SizeOut);
+ return false;
+#else
+ SizeOut = AnsiGetAllocationSize(Original);
+ return true;
+#endif
+}
+
+} // namespace zen
diff --git a/src/zencore/memory/mallocmimalloc.cpp b/src/zencore/memory/mallocmimalloc.cpp
new file mode 100644
index 000000000..1919af3bf
--- /dev/null
+++ b/src/zencore/memory/mallocmimalloc.cpp
@@ -0,0 +1,197 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zencore/intmath.h>
+#include <zencore/memory/align.h>
+#include <zencore/memory/mallocmimalloc.h>
+
+#if ZEN_MIMALLOC_ENABLED
+
+# include <mimalloc.h>
+
+/** Value we fill a memory block with after it is free, in UE_BUILD_DEBUG **/
+# define DEBUG_FILL_FREED (0xdd)
+
+/** Value we fill a new memory block with, in UE_BUILD_DEBUG **/
+# define DEBUG_FILL_NEW (0xcd)
+
+# define ZEN_ENABLE_DEBUG_FILL 1
+
+namespace zen {
+
+// Dramatically reduce memory zeroing and page faults during alloc intense workloads
+// by keeping freed pages for a little while instead of releasing them
+// right away to the OS, effectively acting like a scratch buffer
+// until pages are both freed and inactive for the delay specified
+// in milliseconds.
+int32_t GMiMallocMemoryResetDelay = 10000;
+
+FMallocMimalloc::FMallocMimalloc()
+{
+ mi_option_set(mi_option_reset_delay, GMiMallocMemoryResetDelay);
+}
+
+void*
+FMallocMimalloc::TryMalloc(size_t Size, uint32_t Alignment)
+{
+ void* NewPtr = nullptr;
+
+ if (Alignment != DEFAULT_ALIGNMENT)
+ {
+ Alignment = Max(uint32_t(Size >= 16 ? 16 : 8), Alignment);
+ NewPtr = mi_malloc_aligned(Size, Alignment);
+ }
+ else
+ {
+ NewPtr = mi_malloc_aligned(Size, uint32_t(Size >= 16 ? 16 : 8));
+ }
+
+# if ZEN_BUILD_DEBUG && ZEN_ENABLE_DEBUG_FILL
+ if (Size && NewPtr != nullptr)
+ {
+ memset(NewPtr, DEBUG_FILL_NEW, mi_usable_size(NewPtr));
+ }
+# endif
+
+ return NewPtr;
+}
+
+void*
+FMallocMimalloc::Malloc(size_t Size, uint32_t Alignment)
+{
+ void* Result = TryMalloc(Size, Alignment);
+
+ if (Result == nullptr && Size)
+ {
+ OutOfMemory(Size, Alignment);
+ }
+
+ return Result;
+}
+
+void*
+FMallocMimalloc::TryRealloc(void* Ptr, size_t NewSize, uint32_t Alignment)
+{
+# if ZEN_BUILD_DEBUG && ZEN_ENABLE_DEBUG_FILL
+ size_t OldSize = 0;
+ if (Ptr)
+ {
+ OldSize = mi_malloc_size(Ptr);
+ if (NewSize < OldSize)
+ {
+ memset((uint8_t*)Ptr + NewSize, DEBUG_FILL_FREED, OldSize - NewSize);
+ }
+ }
+# endif
+ void* NewPtr = nullptr;
+
+ if (NewSize == 0)
+ {
+ mi_free(Ptr);
+
+ return nullptr;
+ }
+
+# if ZEN_PLATFORM_MAC
+ // macOS expects all allocations to be aligned to 16 bytes, so on Mac we always have to use mi_realloc_aligned
+ Alignment = AlignArbitrary(Max((uint32_t)16, Alignment), (uint32_t)16);
+ NewPtr = mi_realloc_aligned(Ptr, NewSize, Alignment);
+# else
+ if (Alignment != DEFAULT_ALIGNMENT)
+ {
+ Alignment = Max(NewSize >= 16 ? (uint32_t)16 : (uint32_t)8, Alignment);
+ NewPtr = mi_realloc_aligned(Ptr, NewSize, Alignment);
+ }
+ else
+ {
+ NewPtr = mi_realloc(Ptr, NewSize);
+ }
+# endif
+
+# if ZEN_BUILD_DEBUG && ZEN_ENABLE_DEBUG_FILL
+ if (NewPtr && NewSize > OldSize)
+ {
+ memset((uint8_t*)NewPtr + OldSize, DEBUG_FILL_NEW, mi_usable_size(NewPtr) - OldSize);
+ }
+# endif
+
+ return NewPtr;
+}
+
+void*
+FMallocMimalloc::Realloc(void* Ptr, size_t NewSize, uint32_t Alignment)
+{
+ void* Result = TryRealloc(Ptr, NewSize, Alignment);
+
+ if (Result == nullptr && NewSize)
+ {
+ OutOfMemory(NewSize, Alignment);
+ }
+
+ return Result;
+}
+
+void
+FMallocMimalloc::Free(void* Ptr)
+{
+ if (!Ptr)
+ {
+ return;
+ }
+
+# if ZEN_BUILD_DEBUG && ZEN_ENABLE_DEBUG_FILL
+ memset(Ptr, DEBUG_FILL_FREED, mi_usable_size(Ptr));
+# endif
+
+ mi_free(Ptr);
+}
+
+void*
+FMallocMimalloc::MallocZeroed(size_t Size, uint32_t Alignment)
+{
+ void* Result = TryMallocZeroed(Size, Alignment);
+
+ if (Result == nullptr && Size)
+ {
+ OutOfMemory(Size, Alignment);
+ }
+
+ return Result;
+}
+
+void*
+FMallocMimalloc::TryMallocZeroed(size_t Size, uint32_t Alignment)
+{
+ void* NewPtr = nullptr;
+
+ if (Alignment != DEFAULT_ALIGNMENT)
+ {
+ Alignment = Max(uint32_t(Size >= 16 ? 16 : 8), Alignment);
+ NewPtr = mi_zalloc_aligned(Size, Alignment);
+ }
+ else
+ {
+ NewPtr = mi_zalloc_aligned(Size, uint32_t(Size >= 16 ? 16 : 8));
+ }
+
+ return NewPtr;
+}
+
+bool
+FMallocMimalloc::GetAllocationSize(void* Original, size_t& SizeOut)
+{
+ SizeOut = mi_malloc_size(Original);
+ return true;
+}
+
+void
+FMallocMimalloc::Trim(bool bTrimThreadCaches)
+{
+ mi_collect(bTrimThreadCaches);
+}
+
+# undef DEBUG_FILL_FREED
+# undef DEBUG_FILL_NEW
+
+} // namespace zen
+
+#endif // MIMALLOC_ENABLED
diff --git a/src/zencore/memory/mallocrpmalloc.cpp b/src/zencore/memory/mallocrpmalloc.cpp
new file mode 100644
index 000000000..ffced27c9
--- /dev/null
+++ b/src/zencore/memory/mallocrpmalloc.cpp
@@ -0,0 +1,189 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zencore/intmath.h>
+#include <zencore/memory/align.h>
+#include <zencore/memory/mallocrpmalloc.h>
+
+#if ZEN_RPMALLOC_ENABLED
+
+# include "rpmalloc.h"
+
+/** Value we fill a memory block with after it is free, in UE_BUILD_DEBUG **/
+# define DEBUG_FILL_FREED (0xdd)
+
+/** Value we fill a new memory block with, in UE_BUILD_DEBUG **/
+# define DEBUG_FILL_NEW (0xcd)
+
+# define ZEN_ENABLE_DEBUG_FILL 1
+
+namespace zen {
+
+FMallocRpmalloc::FMallocRpmalloc()
+{
+ rpmalloc_initialize(nullptr);
+}
+
+FMallocRpmalloc::~FMallocRpmalloc()
+{
+ rpmalloc_finalize();
+}
+
+void*
+FMallocRpmalloc::TryMalloc(size_t Size, uint32_t Alignment)
+{
+ void* NewPtr = nullptr;
+
+ if (Alignment != DEFAULT_ALIGNMENT)
+ {
+ Alignment = Max(uint32_t(Size >= 16 ? 16 : 8), Alignment);
+ NewPtr = rpaligned_alloc(Alignment, Size);
+ }
+ else
+ {
+ NewPtr = rpaligned_alloc(uint32_t(Size >= 16 ? 16 : 8), Size);
+ }
+
+# if ZEN_BUILD_DEBUG && ZEN_ENABLE_DEBUG_FILL
+ if (Size && NewPtr != nullptr)
+ {
+ memset(NewPtr, DEBUG_FILL_NEW, rpmalloc_usable_size(NewPtr));
+ }
+# endif
+
+ return NewPtr;
+}
+
+void*
+FMallocRpmalloc::Malloc(size_t Size, uint32_t Alignment)
+{
+ void* Result = TryMalloc(Size, Alignment);
+
+ if (Result == nullptr && Size)
+ {
+ OutOfMemory(Size, Alignment);
+ }
+
+ return Result;
+}
+
+void*
+FMallocRpmalloc::Realloc(void* Ptr, size_t NewSize, uint32_t Alignment)
+{
+ void* Result = TryRealloc(Ptr, NewSize, Alignment);
+
+ if (Result == nullptr && NewSize)
+ {
+ OutOfMemory(NewSize, Alignment);
+ }
+
+ return Result;
+}
+
+void*
+FMallocRpmalloc::TryRealloc(void* Ptr, size_t NewSize, uint32_t Alignment)
+{
+# if ZEN_BUILD_DEBUG && ZEN_ENABLE_DEBUG_FILL
+ size_t OldSize = 0;
+ if (Ptr)
+ {
+ OldSize = rpmalloc_usable_size(Ptr);
+ if (NewSize < OldSize)
+ {
+ memset((uint8_t*)Ptr + NewSize, DEBUG_FILL_FREED, OldSize - NewSize);
+ }
+ }
+# endif
+ void* NewPtr = nullptr;
+
+ if (NewSize == 0)
+ {
+ rpfree(Ptr);
+
+ return nullptr;
+ }
+
+# if ZEN_PLATFORM_MAC
+ // macOS expects all allocations to be aligned to 16 bytes, so on Mac we always have to use mi_realloc_aligned
+ Alignment = AlignArbitrary(Max((uint32_t)16, Alignment), (uint32_t)16);
+ NewPtr = rpaligned_realloc(Ptr, Alignment, NewSize, /* OldSize */ 0, /* flags */ 0);
+# else
+ if (Alignment != DEFAULT_ALIGNMENT)
+ {
+ Alignment = Max(NewSize >= 16 ? (uint32_t)16 : (uint32_t)8, Alignment);
+ NewPtr = rpaligned_realloc(Ptr, Alignment, NewSize, /* OldSize */ 0, /* flags */ 0);
+ }
+ else
+ {
+ NewPtr = rprealloc(Ptr, NewSize);
+ }
+# endif
+
+# if ZEN_BUILD_DEBUG && ZEN_ENABLE_DEBUG_FILL
+ if (NewPtr && NewSize > OldSize)
+ {
+ memset((uint8_t*)NewPtr + OldSize, DEBUG_FILL_NEW, rpmalloc_usable_size(NewPtr) - OldSize);
+ }
+# endif
+
+ return NewPtr;
+}
+
+void
+FMallocRpmalloc::Free(void* Ptr)
+{
+ if (!Ptr)
+ {
+ return;
+ }
+
+# if ZEN_BUILD_DEBUG && ZEN_ENABLE_DEBUG_FILL
+ memset(Ptr, DEBUG_FILL_FREED, rpmalloc_usable_size(Ptr));
+# endif
+
+ rpfree(Ptr);
+}
+
+void*
+FMallocRpmalloc::MallocZeroed(size_t Size, uint32_t Alignment)
+{
+ void* Result = TryMallocZeroed(Size, Alignment);
+
+ if (Result == nullptr && Size)
+ {
+ OutOfMemory(Size, Alignment);
+ }
+
+ return Result;
+}
+void*
+FMallocRpmalloc::TryMallocZeroed(size_t Size, uint32_t Alignment)
+{
+ void* NewPtr = nullptr;
+
+ if (Alignment != DEFAULT_ALIGNMENT)
+ {
+ Alignment = Max(uint32_t(Size >= 16 ? 16 : 8), Alignment);
+ NewPtr = rpaligned_zalloc(Alignment, Size);
+ }
+ else
+ {
+ NewPtr = rpaligned_zalloc(uint32_t(Size >= 16 ? 16 : 8), Size);
+ }
+
+ return NewPtr;
+}
+
+bool
+FMallocRpmalloc::GetAllocationSize(void* Original, size_t& SizeOut)
+{
+ // this is not the same as the allocation size - is that ok?
+ SizeOut = rpmalloc_usable_size(Original);
+ return true;
+}
+void
+FMallocRpmalloc::Trim(bool bTrimThreadCaches)
+{
+ ZEN_UNUSED(bTrimThreadCaches);
+}
+} // namespace zen
+#endif
diff --git a/src/zencore/memory/mallocstomp.cpp b/src/zencore/memory/mallocstomp.cpp
new file mode 100644
index 000000000..db9e1535e
--- /dev/null
+++ b/src/zencore/memory/mallocstomp.cpp
@@ -0,0 +1,283 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zencore/memory/mallocstomp.h>
+
+#if ZEN_WITH_MALLOC_STOMP
+
+# include <zencore/memory/align.h>
+# include <zencore/xxhash.h>
+
+# if ZEN_PLATFORM_LINUX
+# include <sys/mman.h>
+# endif
+
+# if ZEN_PLATFORM_WINDOWS
+# include <zencore/windows.h>
+# endif
+
+# if ZEN_PLATFORM_WINDOWS
+// MallocStomp can keep virtual address range reserved after memory block is freed, while releasing the physical memory.
+// This dramatically increases accuracy of use-after-free detection, but consumes significant amount of memory for the OS page table.
+// Virtual memory limit for a process on Win10 is 128 TB, which means we can afford to keep virtual memory reserved for a very long time.
+// Running Infiltrator demo consumes ~700MB of virtual address space per second.
+# define MALLOC_STOMP_KEEP_VIRTUAL_MEMORY 1
+# else
+# define MALLOC_STOMP_KEEP_VIRTUAL_MEMORY 0
+# endif
+
+// 64-bit ABIs on x86_64 expect a 16-byte alignment
+# define STOMPALIGNMENT 16U
+
+namespace zen {
+
+struct FMallocStomp::FAllocationData
+{
+ /** Pointer to the full allocation. Needed so the OS knows what to free. */
+ void* FullAllocationPointer;
+ /** Full size of the allocation including the extra page. */
+ size_t FullSize;
+ /** Size of the allocation requested. */
+ size_t Size;
+ /** Sentinel used to check for underrun. */
+ size_t Sentinel;
+
+ /** Calculate the expected sentinel value for this allocation data. */
+ size_t CalculateSentinel() const
+ {
+ XXH3_128 Xxh = XXH3_128::HashMemory(this, offsetof(FAllocationData, Sentinel));
+
+ size_t Hash;
+ memcpy(&Hash, Xxh.Hash, sizeof(Hash));
+
+ return Hash;
+ }
+};
+
+FMallocStomp::FMallocStomp(const bool InUseUnderrunMode) : PageSize(4096 /* TODO: make dynamic */), bUseUnderrunMode(InUseUnderrunMode)
+{
+}
+
+void*
+FMallocStomp::Malloc(size_t Size, uint32_t Alignment)
+{
+ void* Result = TryMalloc(Size, Alignment);
+
+ if (Result == nullptr)
+ {
+ OutOfMemory(Size, Alignment);
+ }
+
+ return Result;
+}
+
+void*
+FMallocStomp::TryMalloc(size_t Size, uint32_t Alignment)
+{
+ if (Size == 0U)
+ {
+ Size = 1U;
+ }
+
+ Alignment = Max<uint32_t>(Alignment, STOMPALIGNMENT);
+
+ constexpr static size_t AllocationDataSize = sizeof(FAllocationData);
+
+ const size_t AlignedSize = Alignment ? ((Size + Alignment - 1) & -(int32_t)Alignment) : Size;
+ const size_t AlignmentSize = Alignment > PageSize ? Alignment - PageSize : 0;
+ const size_t AllocFullPageSize = (AlignedSize + AlignmentSize + AllocationDataSize + PageSize - 1) & ~(PageSize - 1);
+ const size_t TotalAllocationSize = AllocFullPageSize + PageSize;
+
+# if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC
+ void* FullAllocationPointer = mmap(nullptr, TotalAllocationSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+# elif ZEN_PLATFORM_WINDOWS && MALLOC_STOMP_KEEP_VIRTUAL_MEMORY
+ // Allocate virtual address space from current block using linear allocation strategy.
+ // If there is not enough space, try to allocate new block from OS. Report OOM if block allocation fails.
+ void* FullAllocationPointer = nullptr;
+
+ {
+ RwLock::ExclusiveLockScope _(Lock);
+
+ if (VirtualAddressCursor + TotalAllocationSize <= VirtualAddressMax)
+ {
+ FullAllocationPointer = (void*)(VirtualAddressCursor);
+ }
+ else
+ {
+ const size_t ReserveSize = Max(VirtualAddressBlockSize, TotalAllocationSize);
+
+ // Reserve a new block of virtual address space that will be linearly sub-allocated
+ // We intentionally don't keep track of reserved blocks, as we never need to explicitly release them.
+ FullAllocationPointer = VirtualAlloc(nullptr, ReserveSize, MEM_RESERVE, PAGE_NOACCESS);
+
+ VirtualAddressCursor = uintptr_t(FullAllocationPointer);
+ VirtualAddressMax = VirtualAddressCursor + ReserveSize;
+ }
+
+ VirtualAddressCursor += TotalAllocationSize;
+ }
+# else
+ void* FullAllocationPointer = FPlatformMemory::BinnedAllocFromOS(TotalAllocationSize);
+# endif // PLATFORM_UNIX || PLATFORM_MAC
+
+ if (!FullAllocationPointer)
+ {
+ return nullptr;
+ }
+
+ void* ReturnedPointer = nullptr;
+
+ ZEN_ASSERT_SLOW(IsAligned(FullAllocationPointer, PageSize));
+
+ if (bUseUnderrunMode)
+ {
+ ReturnedPointer = Align((uint8_t*)FullAllocationPointer + PageSize + AllocationDataSize, Alignment);
+ void* AllocDataPointerStart = static_cast<FAllocationData*>(ReturnedPointer) - 1;
+ ZEN_ASSERT_SLOW(AllocDataPointerStart >= FullAllocationPointer);
+
+# if ZEN_PLATFORM_WINDOWS && MALLOC_STOMP_KEEP_VIRTUAL_MEMORY
+ // Commit physical pages to the used range, leaving the first page unmapped.
+ void* CommittedMemory = VirtualAlloc(AllocDataPointerStart, AllocationDataSize + AlignedSize, MEM_COMMIT, PAGE_READWRITE);
+ if (!CommittedMemory)
+ {
+ // Failed to allocate and commit physical memory pages.
+ return nullptr;
+ }
+ ZEN_ASSERT(CommittedMemory == AlignDown(AllocDataPointerStart, PageSize));
+# else
+ // Page protect the first page, this will cause the exception in case there is an underrun.
+ FPlatformMemory::PageProtect((uint8*)AlignDown(AllocDataPointerStart, PageSize) - PageSize, PageSize, false, false);
+# endif
+ } //-V773
+ else
+ {
+ ReturnedPointer = AlignDown((uint8_t*)FullAllocationPointer + AllocFullPageSize - AlignedSize, Alignment);
+ void* ReturnedPointerEnd = (uint8_t*)ReturnedPointer + AlignedSize;
+ ZEN_ASSERT_SLOW(IsAligned(ReturnedPointerEnd, PageSize));
+
+ void* AllocDataPointerStart = static_cast<FAllocationData*>(ReturnedPointer) - 1;
+ ZEN_ASSERT_SLOW(AllocDataPointerStart >= FullAllocationPointer);
+
+# if ZEN_PLATFORM_WINDOWS && MALLOC_STOMP_KEEP_VIRTUAL_MEMORY
+ // Commit physical pages to the used range, leaving the last page unmapped.
+ void* CommitPointerStart = AlignDown(AllocDataPointerStart, PageSize);
+ void* CommittedMemory = VirtualAlloc(CommitPointerStart,
+ size_t((uint8_t*)ReturnedPointerEnd - (uint8_t*)CommitPointerStart),
+ MEM_COMMIT,
+ PAGE_READWRITE);
+ if (!CommittedMemory)
+ {
+ // Failed to allocate and commit physical memory pages.
+ return nullptr;
+ }
+ ZEN_ASSERT(CommittedMemory == CommitPointerStart);
+# else
+ // Page protect the last page, this will cause the exception in case there is an overrun.
+ FPlatformMemory::PageProtect(ReturnedPointerEnd, PageSize, false, false);
+# endif
+ } //-V773
+
+ ZEN_ASSERT_SLOW(IsAligned(FullAllocationPointer, PageSize));
+ ZEN_ASSERT_SLOW(IsAligned(TotalAllocationSize, PageSize));
+ ZEN_ASSERT_SLOW(IsAligned(ReturnedPointer, Alignment));
+ ZEN_ASSERT_SLOW((uint8_t*)ReturnedPointer + AlignedSize <= (uint8_t*)FullAllocationPointer + TotalAllocationSize);
+
+ FAllocationData& AllocationData = static_cast<FAllocationData*>(ReturnedPointer)[-1];
+ AllocationData = {FullAllocationPointer, TotalAllocationSize, AlignedSize, 0};
+ AllocationData.Sentinel = AllocationData.CalculateSentinel();
+
+ return ReturnedPointer;
+}
+
+void*
+FMallocStomp::Realloc(void* InPtr, size_t NewSize, uint32_t Alignment)
+{
+ void* Result = TryRealloc(InPtr, NewSize, Alignment);
+
+ if (Result == nullptr && NewSize)
+ {
+ OutOfMemory(NewSize, Alignment);
+ }
+
+ return Result;
+}
+
+void*
+FMallocStomp::TryRealloc(void* InPtr, size_t NewSize, uint32_t Alignment)
+{
+ if (NewSize == 0U)
+ {
+ Free(InPtr);
+ return nullptr;
+ }
+
+ void* ReturnPtr = nullptr;
+
+ if (InPtr != nullptr)
+ {
+ ReturnPtr = TryMalloc(NewSize, Alignment);
+
+ if (ReturnPtr != nullptr)
+ {
+ FAllocationData* AllocDataPtr = reinterpret_cast<FAllocationData*>(reinterpret_cast<uint8_t*>(InPtr) - sizeof(FAllocationData));
+ memcpy(ReturnPtr, InPtr, Min(AllocDataPtr->Size, NewSize));
+ Free(InPtr);
+ }
+ }
+ else
+ {
+ ReturnPtr = TryMalloc(NewSize, Alignment);
+ }
+
+ return ReturnPtr;
+}
+
+void
+FMallocStomp::Free(void* InPtr)
+{
+ if (InPtr == nullptr)
+ {
+ return;
+ }
+
+ FAllocationData* AllocDataPtr = reinterpret_cast<FAllocationData*>(InPtr);
+ AllocDataPtr--;
+
+ // Check the sentinel to verify that the allocation data is intact.
+ if (AllocDataPtr->Sentinel != AllocDataPtr->CalculateSentinel())
+ {
+ // There was a memory underrun related to this allocation.
+ ZEN_DEBUG_BREAK();
+ }
+
+# if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC
+ munmap(AllocDataPtr->FullAllocationPointer, AllocDataPtr->FullSize);
+# elif ZEN_PLATFORM_WINDOWS && MALLOC_STOMP_KEEP_VIRTUAL_MEMORY
+ // Unmap physical memory, but keep virtual address range reserved to catch use-after-free errors.
+
+ VirtualFree(AllocDataPtr->FullAllocationPointer, AllocDataPtr->FullSize, MEM_DECOMMIT);
+
+# else
+ FPlatformMemory::BinnedFreeToOS(AllocDataPtr->FullAllocationPointer, AllocDataPtr->FullSize);
+# endif // PLATFORM_UNIX || PLATFORM_MAC
+}
+
+bool
+FMallocStomp::GetAllocationSize(void* Original, size_t& SizeOut)
+{
+ if (Original == nullptr)
+ {
+ SizeOut = 0U;
+ }
+ else
+ {
+ FAllocationData* AllocDataPtr = reinterpret_cast<FAllocationData*>(Original);
+ AllocDataPtr--;
+ SizeOut = AllocDataPtr->Size;
+ }
+
+ return true;
+}
+
+} // namespace zen
+
+#endif // WITH_MALLOC_STOMP
diff --git a/src/zencore/memory/memory.cpp b/src/zencore/memory/memory.cpp
new file mode 100644
index 000000000..f236796ad
--- /dev/null
+++ b/src/zencore/memory/memory.cpp
@@ -0,0 +1,281 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zencore/commandline.h>
+#include <zencore/memory/fmalloc.h>
+#include <zencore/memory/mallocansi.h>
+#include <zencore/memory/mallocmimalloc.h>
+#include <zencore/memory/mallocrpmalloc.h>
+#include <zencore/memory/mallocstomp.h>
+#include <zencore/memory/memory.h>
+#include <zencore/memory/memorytrace.h>
+#include <zencore/string.h>
+
+#if ZEN_PLATFORM_WINDOWS
+# include <zencore/windows.h>
+ZEN_THIRD_PARTY_INCLUDES_START
+# include <shellapi.h> // For command line parsing
+ZEN_THIRD_PARTY_INCLUDES_END
+#endif
+
+#if ZEN_PLATFORM_LINUX
+# include <stdio.h>
+#endif
+
+namespace zen {
+
+enum class MallocImpl
+{
+ None = 0,
+ Ansi,
+ Stomp,
+ Mimalloc,
+ Rpmalloc
+};
+
+static int
+InitGMalloc()
+{
+ MallocImpl Malloc = MallocImpl::None;
+ FMalloc* InitMalloc = GMalloc;
+
+ // Pick a default base allocator based on availability/platform
+
+#if ZEN_MIMALLOC_ENABLED
+ if (Malloc == MallocImpl::None)
+ {
+ Malloc = MallocImpl::Mimalloc;
+ }
+#endif
+
+#if ZEN_RPMALLOC_ENABLED
+ if (Malloc == MallocImpl::None)
+ {
+ Malloc = MallocImpl::Rpmalloc;
+ }
+#endif
+
+ // Process any command line overrides
+ //
+ // Note that calls can come into this function before we enter the regular main function
+ // and we can therefore not rely on the regular command line parsing for the application
+
+ using namespace std::literals;
+
+ auto ProcessMallocArg = [&](const std::string_view& Arg) {
+#if ZEN_RPMALLOC_ENABLED
+ if (Arg == "rpmalloc"sv)
+ {
+ Malloc = MallocImpl::Rpmalloc;
+ }
+#endif
+
+#if ZEN_MIMALLOC_ENABLED
+ if (Arg == "mimalloc"sv)
+ {
+ Malloc = MallocImpl::Mimalloc;
+ }
+#endif
+
+ if (Arg == "ansi"sv)
+ {
+ Malloc = MallocImpl::Ansi;
+ }
+
+ if (Arg == "stomp"sv)
+ {
+ Malloc = MallocImpl::Stomp;
+ }
+ };
+
+ constexpr std::string_view MallocOption = "--malloc="sv;
+
+ std::function<void(const std::string_view&)> ProcessArg = [&](const std::string_view& Arg) {
+ if (Arg.starts_with(MallocOption))
+ {
+ const std::string_view OptionArgs = Arg.substr(MallocOption.size());
+
+ IterateCommaSeparatedValue(OptionArgs, ProcessMallocArg);
+ }
+ };
+
+ IterateCommandlineArgs(ProcessArg);
+
+ switch (Malloc)
+ {
+#if ZEN_WITH_MALLOC_STOMP
+ case MallocImpl::Stomp:
+ GMalloc = new FMallocStomp();
+ break;
+#endif
+
+#if ZEN_RPMALLOC_ENABLED
+ case MallocImpl::Rpmalloc:
+ GMalloc = new FMallocRpmalloc();
+ break;
+#endif
+
+#if ZEN_MIMALLOC_ENABLED
+ case MallocImpl::Mimalloc:
+ GMalloc = new FMallocMimalloc();
+ break;
+#endif
+ default:
+ break;
+ }
+
+ if (GMalloc == InitMalloc)
+ {
+ GMalloc = new FMallocAnsi();
+ }
+
+ return 1;
+}
+
+void
+Memory::GCreateMalloc()
+{
+ static int InitFlag = InitGMalloc();
+}
+
+void
+Memory::Initialize()
+{
+ GCreateMalloc();
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+void*
+Memory::SystemMalloc(size_t Size)
+{
+ void* Ptr = ::malloc(Size);
+ MemoryTrace_Alloc(uint64_t(Ptr), Size, 0, EMemoryTraceRootHeap::SystemMemory);
+ return Ptr;
+}
+
+void
+Memory::SystemFree(void* Ptr)
+{
+ MemoryTrace_Free(uint64_t(Ptr), EMemoryTraceRootHeap::SystemMemory);
+ ::free(Ptr);
+}
+
+} // namespace zen
+
+//////////////////////////////////////////////////////////////////////////
+
+static ZEN_NOINLINE bool
+InvokeNewHandler(bool NoThrow)
+{
+ std::new_handler h = std::get_new_handler();
+
+ if (!h)
+ {
+#if defined(_CPPUNWIND) || defined(__cpp_exceptions)
+ if (NoThrow == false)
+ throw std::bad_alloc();
+#else
+ ZEN_UNUSED(NoThrow);
+#endif
+ return false;
+ }
+ else
+ {
+ h();
+ return true;
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+ZEN_NOINLINE void*
+RetryNew(size_t Size, bool NoThrow)
+{
+ void* Ptr = nullptr;
+ while (!Ptr && InvokeNewHandler(NoThrow))
+ {
+ Ptr = zen::Memory::Malloc(Size, zen::DEFAULT_ALIGNMENT);
+ }
+ return Ptr;
+}
+
+void*
+zen_new(size_t Size)
+{
+ void* Ptr = zen::Memory::Malloc(Size, zen::DEFAULT_ALIGNMENT);
+
+ if (!Ptr) [[unlikely]]
+ {
+ const bool NoThrow = false;
+ return RetryNew(Size, NoThrow);
+ }
+
+ return Ptr;
+}
+
+void*
+zen_new_nothrow(size_t Size) noexcept
+{
+ void* Ptr = zen::Memory::Malloc(Size, zen::DEFAULT_ALIGNMENT);
+
+ if (!Ptr) [[unlikely]]
+ {
+ const bool NoThrow = true;
+ return RetryNew(Size, NoThrow);
+ }
+
+ return Ptr;
+}
+
+void*
+zen_new_aligned(size_t Size, size_t Alignment)
+{
+ void* Ptr;
+
+ do
+ {
+ Ptr = zen::Memory::Malloc(Size, uint32_t(Alignment));
+ } while (!Ptr && InvokeNewHandler(/* NoThrow */ false));
+
+ return Ptr;
+}
+
+void*
+zen_new_aligned_nothrow(size_t Size, size_t Alignment) noexcept
+{
+ void* Ptr;
+
+ do
+ {
+ Ptr = zen::Memory::Malloc(Size, uint32_t(Alignment));
+ } while (!Ptr && InvokeNewHandler(/* NoThrow */ true));
+
+ return Ptr;
+}
+
+void
+zen_free(void* Ptr) noexcept
+{
+ zen::Memory::Free(Ptr);
+}
+
+void
+zen_free_size(void* Ptr, size_t Size) noexcept
+{
+ ZEN_UNUSED(Size);
+ zen::Memory::Free(Ptr);
+}
+
+void
+zen_free_size_aligned(void* Ptr, size_t Size, size_t Alignment) noexcept
+{
+ ZEN_UNUSED(Size, Alignment);
+ zen::Memory::Free(Ptr);
+}
+
+void
+zen_free_aligned(void* Ptr, size_t Alignment) noexcept
+{
+ ZEN_UNUSED(Alignment);
+ zen::Memory::Free(Ptr);
+}
diff --git a/src/zencore/memtrack/callstacktrace.cpp b/src/zencore/memtrack/callstacktrace.cpp
new file mode 100644
index 000000000..d860c05d1
--- /dev/null
+++ b/src/zencore/memtrack/callstacktrace.cpp
@@ -0,0 +1,1059 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include "callstacktrace.h"
+
+#include <zenbase/zenbase.h>
+#include <zencore/string.h>
+
+#if UE_CALLSTACK_TRACE_ENABLED
+
+namespace zen {
+
+// Platform implementations of back tracing
+////////////////////////////////////////////////////////////////////////////////
+void CallstackTrace_CreateInternal(FMalloc*);
+void CallstackTrace_InitializeInternal();
+
+////////////////////////////////////////////////////////////////////////////////
+UE_TRACE_CHANNEL_DEFINE(CallstackChannel)
+UE_TRACE_EVENT_DEFINE(Memory, CallstackSpec)
+
+uint32 GCallStackTracingTlsSlotIndex = FPlatformTLS::InvalidTlsSlot;
+
+////////////////////////////////////////////////////////////////////////////////
+void
+CallstackTrace_Create(class FMalloc* InMalloc)
+{
+ static auto InitOnce = [&] {
+ CallstackTrace_CreateInternal(InMalloc);
+ return true;
+ }();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+CallstackTrace_Initialize()
+{
+ GCallStackTracingTlsSlotIndex = FPlatformTLS::AllocTlsSlot();
+
+ static auto InitOnce = [&] {
+ CallstackTrace_InitializeInternal();
+ return true;
+ }();
+}
+
+} // namespace zen
+
+#endif
+
+#if ZEN_PLATFORM_WINDOWS
+# include "moduletrace.h"
+
+# include "growonlylockfreehash.h"
+
+# include <zencore/scopeguard.h>
+# include <zencore/thread.h>
+# include <zencore/trace.h>
+
+# include <atomic>
+# include <span>
+
+# include <zencore/windows.h>
+
+ZEN_THIRD_PARTY_INCLUDES_START
+# include <winnt.h>
+# include <winternl.h>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+# ifndef UE_CALLSTACK_TRACE_FULL_CALLSTACKS
+# define UE_CALLSTACK_TRACE_FULL_CALLSTACKS 0
+# endif
+
+// 0=off, 1=stats, 2=validation, 3=truth_compare
+# define BACKTRACE_DBGLVL 0
+
+# define BACKTRACE_LOCK_FREE (1 && (BACKTRACE_DBGLVL == 0))
+
+static bool GModulesAreInitialized = false;
+
+// This implementation is using unwind tables which is results in very fast
+// stack walking. In some cases this is not suitable, and we then fall back
+// to the standard stack walking implementation.
+# if !defined(UE_CALLSTACK_TRACE_USE_UNWIND_TABLES)
+# if defined(__clang__)
+# define UE_CALLSTACK_TRACE_USE_UNWIND_TABLES 0
+# else
+# define UE_CALLSTACK_TRACE_USE_UNWIND_TABLES 1
+# endif
+# endif
+
+// stacktrace tracking using clang intrinsic __builtin_frame_address(0) doesn't work correctly on all windows platforms
+# if !defined(PLATFORM_USE_CALLSTACK_ADDRESS_POINTER)
+# if defined(__clang__)
+# define PLATFORM_USE_CALLSTACK_ADDRESS_POINTER 0
+# else
+# define PLATFORM_USE_CALLSTACK_ADDRESS_POINTER 1
+# endif
+# endif
+
+# if !defined(UE_CALLSTACK_TRACE_RESERVE_MB)
+// Initial size of the known set of callstacks
+# define UE_CALLSTACK_TRACE_RESERVE_MB 8 // ~500k callstacks
+# endif
+
+# if !defined(UE_CALLSTACK_TRACE_RESERVE_GROWABLE)
+// If disabled the known set will not grow. New callstacks will not be
+// reported if the set is full
+# define UE_CALLSTACK_TRACE_RESERVE_GROWABLE 1
+# endif
+
+namespace zen {
+
+class FMalloc;
+
+UE_TRACE_CHANNEL_EXTERN(CallstackChannel)
+
+UE_TRACE_EVENT_BEGIN_EXTERN(Memory, CallstackSpec, NoSync)
+ UE_TRACE_EVENT_FIELD(uint32, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint64[], Frames)
+UE_TRACE_EVENT_END()
+
+class FCallstackTracer
+{
+public:
+ struct FBacktraceEntry
+ {
+ uint64_t Hash = 0;
+ uint32_t FrameCount = 0;
+ uint64_t* Frames;
+ };
+
+ FCallstackTracer(FMalloc* InMalloc) : KnownSet(InMalloc) {}
+
+ uint32_t AddCallstack(const FBacktraceEntry& Entry)
+ {
+ bool bAlreadyAdded = false;
+
+ // Our set implementation doesn't allow for zero entries (zero represents an empty element
+ // in the hash table), so if we get one due to really bad luck in our 64-bit Id calculation,
+ // treat it as a "1" instead, for purposes of tracking if we've seen that callstack.
+ const uint64_t Hash = FMath::Max(Entry.Hash, 1ull);
+ uint32_t Id;
+ KnownSet.Find(Hash, &Id, &bAlreadyAdded);
+ if (!bAlreadyAdded)
+ {
+ Id = CallstackIdCounter.fetch_add(1, std::memory_order_relaxed);
+ // On the first callstack reserve memory up front
+ if (Id == 1)
+ {
+ KnownSet.Reserve(InitialReserveCount);
+ }
+# if !UE_CALLSTACK_TRACE_RESERVE_GROWABLE
+ // If configured as not growable, start returning unknown id's when full.
+ if (Id >= InitialReserveCount)
+ {
+ return 0;
+ }
+# endif
+ KnownSet.Emplace(Hash, Id);
+ UE_TRACE_LOG(Memory, CallstackSpec, CallstackChannel)
+ << CallstackSpec.CallstackId(Id) << CallstackSpec.Frames(Entry.Frames, Entry.FrameCount);
+ }
+
+ return Id;
+ }
+
+private:
+ struct FEncounteredCallstackSetEntry
+ {
+ std::atomic_uint64_t Key;
+ std::atomic_uint32_t Value;
+
+ inline uint64 GetKey() const { return Key.load(std::memory_order_relaxed); }
+ inline uint32_t GetValue() const { return Value.load(std::memory_order_relaxed); }
+ inline bool IsEmpty() const { return Key.load(std::memory_order_relaxed) == 0; }
+ inline void SetKeyValue(uint64_t InKey, uint32_t InValue)
+ {
+ Value.store(InValue, std::memory_order_release);
+ Key.store(InKey, std::memory_order_relaxed);
+ }
+ static inline uint32_t KeyHash(uint64_t Key) { return static_cast<uint32_t>(Key); }
+ static inline void ClearEntries(FEncounteredCallstackSetEntry* Entries, int32_t EntryCount)
+ {
+ memset(Entries, 0, EntryCount * sizeof(FEncounteredCallstackSetEntry));
+ }
+ };
+
+ typedef TGrowOnlyLockFreeHash<FEncounteredCallstackSetEntry, uint64_t, uint32_t> FEncounteredCallstackSet;
+
+ constexpr static uint32_t InitialReserveBytes = UE_CALLSTACK_TRACE_RESERVE_MB * 1024 * 1024;
+ constexpr static uint32_t InitialReserveCount = InitialReserveBytes / sizeof(FEncounteredCallstackSetEntry);
+
+ FEncounteredCallstackSet KnownSet;
+ std::atomic_uint32_t CallstackIdCounter{1}; // 0 is reserved for "unknown callstack"
+};
+
+# if UE_CALLSTACK_TRACE_USE_UNWIND_TABLES
+
+/*
+ * Windows' x64 binaries contain a ".pdata" section that describes the location
+ * and size of its functions and details on how to unwind them. The unwind
+ * information includes descriptions about a function's stack frame size and
+ * the non-volatile registers it pushes onto the stack. From this we can
+ * calculate where a call instruction wrote its return address. This is enough
+ * to walk the callstack and by caching this information it can be done
+ * efficiently.
+ *
+ * Some functions need a variable amount of stack (such as those that use
+ * alloc() for example) will use a frame pointer. Frame pointers involve saving
+ * and restoring the stack pointer in the function's prologue/epilogue. This
+ * frees the function up to modify the stack pointer arbitrarily. This
+ * significantly complicates establishing where a return address is, so this
+ * pdata scheme of walking the stack just doesn't support functions like this.
+ * Walking stops if it encounters such a function. Fortunately there are
+ * usually very few such functions, saving us from having to read and track
+ * non-volatile registers which adds a significant amount of work.
+ *
+ * A further optimisation is to to assume we are only interested methods that
+ * are part of engine or game code. As such we only build lookup tables for
+ * such modules and never accept OS or third party modules. Backtracing stops
+ * if an address is encountered which doesn't map to a known module.
+ */
+
+////////////////////////////////////////////////////////////////////////////////
+static uint32_t
+AddressToId(uintptr_t Address)
+{
+ return uint32_t(Address >> 16);
+}
+
+static uintptr_t
+IdToAddress(uint32_t Id)
+{
+ return static_cast<uint32_t>(uintptr_t(Id) << 16);
+}
+
+struct FIdPredicate
+{
+ template<class T>
+ bool operator()(uint32_t Id, const T& Item) const
+ {
+ return Id < Item.Id;
+ }
+ template<class T>
+ bool operator()(const T& Item, uint32_t Id) const
+ {
+ return Item.Id < Id;
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+struct FUnwindInfo
+{
+ uint8_t Version : 3;
+ uint8_t Flags : 5;
+ uint8_t PrologBytes;
+ uint8_t NumUnwindCodes;
+ uint8_t FrameReg : 4;
+ uint8_t FrameRspBias : 4;
+};
+
+# pragma warning(push)
+# pragma warning(disable : 4200)
+struct FUnwindCode
+{
+ uint8_t PrologOffset;
+ uint8_t OpCode : 4;
+ uint8_t OpInfo : 4;
+ uint16_t Params[];
+};
+# pragma warning(pop)
+
+enum
+{
+ UWOP_PUSH_NONVOL = 0, // 1 node
+ UWOP_ALLOC_LARGE = 1, // 2 or 3 nodes
+ UWOP_ALLOC_SMALL = 2, // 1 node
+ UWOP_SET_FPREG = 3, // 1 node
+ UWOP_SAVE_NONVOL = 4, // 2 nodes
+ UWOP_SAVE_NONVOL_FAR = 5, // 3 nodes
+ UWOP_SAVE_XMM128 = 8, // 2 nodes
+ UWOP_SAVE_XMM128_FAR = 9, // 3 nodes
+ UWOP_PUSH_MACHFRAME = 10, // 1 node
+};
+
+////////////////////////////////////////////////////////////////////////////////
+class FBacktracer
+{
+public:
+ FBacktracer(FMalloc* InMalloc);
+ ~FBacktracer();
+ static FBacktracer* Get();
+ void AddModule(uintptr_t Base, const char16_t* Name);
+ void RemoveModule(uintptr_t Base);
+ uint32_t GetBacktraceId(void* AddressOfReturnAddress);
+
+private:
+ struct FFunction
+ {
+ uint32_t Id;
+ int32_t RspBias;
+# if BACKTRACE_DBGLVL >= 2
+ uint32_t Size;
+ const FUnwindInfo* UnwindInfo;
+# endif
+ };
+
+ struct FModule
+ {
+ uint32_t Id;
+ uint32_t IdSize;
+ uint32_t NumFunctions;
+# if BACKTRACE_DBGLVL >= 1
+ uint16 NumFpTypes;
+ // uint16 *padding*
+# else
+ // uint32_t *padding*
+# endif
+ FFunction* Functions;
+ };
+
+ struct FLookupState
+ {
+ FModule Module;
+ };
+
+ struct FFunctionLookupSetEntry
+ {
+ // Bottom 48 bits are key (pointer), top 16 bits are data (RSP bias for function)
+ std::atomic_uint64_t Data;
+
+ inline uint64_t GetKey() const { return Data.load(std::memory_order_relaxed) & 0xffffffffffffull; }
+ inline int32_t GetValue() const { return static_cast<int64_t>(Data.load(std::memory_order_relaxed)) >> 48; }
+ inline bool IsEmpty() const { return Data.load(std::memory_order_relaxed) == 0; }
+ inline void SetKeyValue(uint64_t Key, int32_t Value)
+ {
+ Data.store(Key | (static_cast<int64_t>(Value) << 48), std::memory_order_relaxed);
+ }
+ static inline uint32_t KeyHash(uint64_t Key)
+ {
+ // 64 bit pointer to 32 bit hash
+ Key = (~Key) + (Key << 21);
+ Key = Key ^ (Key >> 24);
+ Key = Key * 265;
+ Key = Key ^ (Key >> 14);
+ Key = Key * 21;
+ Key = Key ^ (Key >> 28);
+ Key = Key + (Key << 31);
+ return static_cast<uint32_t>(Key);
+ }
+ static void ClearEntries(FFunctionLookupSetEntry* Entries, int32_t EntryCount)
+ {
+ memset(Entries, 0, EntryCount * sizeof(FFunctionLookupSetEntry));
+ }
+ };
+ typedef TGrowOnlyLockFreeHash<FFunctionLookupSetEntry, uint64_t, int32_t> FFunctionLookupSet;
+
+ const FFunction* LookupFunction(uintptr_t Address, FLookupState& State) const;
+ static FBacktracer* Instance;
+ mutable zen::RwLock Lock;
+ FModule* Modules;
+ int32_t ModulesNum;
+ int32_t ModulesCapacity;
+ FMalloc* Malloc;
+ FCallstackTracer CallstackTracer;
+# if BACKTRACE_LOCK_FREE
+ mutable FFunctionLookupSet FunctionLookups;
+ mutable bool bReentranceCheck = false;
+# endif
+# if BACKTRACE_DBGLVL >= 1
+ mutable uint32_t NumFpTruncations = 0;
+ mutable uint32_t TotalFunctions = 0;
+# endif
+};
+
+////////////////////////////////////////////////////////////////////////////////
+FBacktracer* FBacktracer::Instance = nullptr;
+
+////////////////////////////////////////////////////////////////////////////////
+FBacktracer::FBacktracer(FMalloc* InMalloc)
+: Malloc(InMalloc)
+, CallstackTracer(InMalloc)
+# if BACKTRACE_LOCK_FREE
+, FunctionLookups(InMalloc)
+# endif
+{
+# if BACKTRACE_LOCK_FREE
+ FunctionLookups.Reserve(512 * 1024); // 4 MB
+# endif
+ ModulesCapacity = 8;
+ ModulesNum = 0;
+ Modules = (FModule*)Malloc->Malloc(sizeof(FModule) * ModulesCapacity);
+
+ Instance = this;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FBacktracer::~FBacktracer()
+{
+ std::span<FModule> ModulesView(Modules, ModulesNum);
+ for (FModule& Module : ModulesView)
+ {
+ Malloc->Free(Module.Functions);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FBacktracer*
+FBacktracer::Get()
+{
+ return Instance;
+}
+
+bool GFullBacktraces = false;
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FBacktracer::AddModule(uintptr_t ModuleBase, const char16_t* Name)
+{
+ if (!GFullBacktraces)
+ {
+ const size_t NameLen = StringLength(Name);
+ if (!(NameLen > 4 && StringEquals(Name + NameLen - 4, u".exe")))
+ {
+ return;
+ }
+ }
+
+ const auto* DosHeader = (IMAGE_DOS_HEADER*)ModuleBase;
+ const auto* NtHeader = (IMAGE_NT_HEADERS*)(ModuleBase + DosHeader->e_lfanew);
+ const IMAGE_FILE_HEADER* FileHeader = &(NtHeader->FileHeader);
+
+ uint32_t NumSections = FileHeader->NumberOfSections;
+ const auto* Sections = (IMAGE_SECTION_HEADER*)(uintptr_t(&(NtHeader->OptionalHeader)) + FileHeader->SizeOfOptionalHeader);
+
+ // Find ".pdata" section
+ uintptr_t PdataBase = 0;
+ uintptr_t PdataEnd = 0;
+ for (uint32_t i = 0; i < NumSections; ++i)
+ {
+ const IMAGE_SECTION_HEADER* Section = Sections + i;
+ if (*(uint64_t*)(Section->Name) ==
+ 0x61'74'61'64'70'2eull) // Sections names are eight bytes and zero padded. This constant is '.pdata'
+ {
+ PdataBase = ModuleBase + Section->VirtualAddress;
+ PdataEnd = PdataBase + Section->SizeOfRawData;
+ break;
+ }
+ }
+
+ if (PdataBase == 0)
+ {
+ return;
+ }
+
+ // Count the number of functions. The assumption here is that if we have got this far then there is at least one function
+ uint32_t NumFunctions = uint32_t(PdataEnd - PdataBase) / sizeof(RUNTIME_FUNCTION);
+ if (NumFunctions == 0)
+ {
+ return;
+ }
+
+ const auto* FunctionTables = (RUNTIME_FUNCTION*)PdataBase;
+ do
+ {
+ const RUNTIME_FUNCTION* Function = FunctionTables + NumFunctions - 1;
+ if (uint32_t(Function->BeginAddress) < uint32_t(Function->EndAddress))
+ {
+ break;
+ }
+
+ --NumFunctions;
+ } while (NumFunctions != 0);
+
+ // Allocate some space for the module's function-to-frame-size table
+ auto* OutTable = (FFunction*)Malloc->Malloc(sizeof(FFunction) * NumFunctions);
+ FFunction* OutTableCursor = OutTable;
+
+ // Extract frame size for each function from pdata's unwind codes.
+ uint32_t NumFpFuncs = 0;
+ for (uint32_t i = 0; i < NumFunctions; ++i)
+ {
+ const RUNTIME_FUNCTION* FunctionTable = FunctionTables + i;
+
+ uintptr_t UnwindInfoAddr = ModuleBase + FunctionTable->UnwindInfoAddress;
+ const auto* UnwindInfo = (FUnwindInfo*)UnwindInfoAddr;
+
+ if (UnwindInfo->Version != 1)
+ {
+ /* some v2s have been seen in msvc. Always seem to be assembly
+ * routines (memset, memcpy, etc) */
+ continue;
+ }
+
+ int32_t FpInfo = 0;
+ int32_t RspBias = 0;
+
+# if BACKTRACE_DBGLVL >= 2
+ uint32_t PrologVerify = UnwindInfo->PrologBytes;
+# endif
+
+ const auto* Code = (FUnwindCode*)(UnwindInfo + 1);
+ const auto* EndCode = Code + UnwindInfo->NumUnwindCodes;
+ while (Code < EndCode)
+ {
+# if BACKTRACE_DBGLVL >= 2
+ if (Code->PrologOffset > PrologVerify)
+ {
+ PLATFORM_BREAK();
+ }
+ PrologVerify = Code->PrologOffset;
+# endif
+
+ switch (Code->OpCode)
+ {
+ case UWOP_PUSH_NONVOL:
+ RspBias += 8;
+ Code += 1;
+ break;
+
+ case UWOP_ALLOC_LARGE:
+ if (Code->OpInfo)
+ {
+ RspBias += *(uint32_t*)(Code->Params);
+ Code += 3;
+ }
+ else
+ {
+ RspBias += Code->Params[0] * 8;
+ Code += 2;
+ }
+ break;
+
+ case UWOP_ALLOC_SMALL:
+ RspBias += (Code->OpInfo * 8) + 8;
+ Code += 1;
+ break;
+
+ case UWOP_SET_FPREG:
+ // Function will adjust RSP (e.g. through use of alloca()) so it
+ // uses a frame pointer register. There's instructions like;
+ //
+ // push FRAME_REG
+ // lea FRAME_REG, [rsp + (FRAME_RSP_BIAS * 16)]
+ // ...
+ // add rsp, rax
+ // ...
+ // sub rsp, FRAME_RSP_BIAS * 16
+ // pop FRAME_REG
+ // ret
+ //
+ // To recover the stack frame we would need to track non-volatile
+ // registers which adds a lot of overhead for a small subset of
+ // functions. Instead we'll end backtraces at these functions.
+
+ // MSB is set to detect variable sized frames that we can't proceed
+ // past when back-tracing.
+ NumFpFuncs++;
+ FpInfo |= 0x80000000 | (uint32_t(UnwindInfo->FrameReg) << 27) | (uint32_t(UnwindInfo->FrameRspBias) << 23);
+ Code += 1;
+ break;
+
+ case UWOP_PUSH_MACHFRAME:
+ RspBias = Code->OpInfo ? 48 : 40;
+ Code += 1;
+ break;
+
+ case UWOP_SAVE_NONVOL:
+ Code += 2;
+ break; /* saves are movs instead of pushes */
+ case UWOP_SAVE_NONVOL_FAR:
+ Code += 3;
+ break;
+ case UWOP_SAVE_XMM128:
+ Code += 2;
+ break;
+ case UWOP_SAVE_XMM128_FAR:
+ Code += 3;
+ break;
+
+ default:
+# if BACKTRACE_DBGLVL >= 2
+ PLATFORM_BREAK();
+# endif
+ break;
+ }
+ }
+
+ // "Chained" simply means that multiple RUNTIME_FUNCTIONs pertains to a
+ // single actual function in the .text segment.
+ bool bIsChained = (UnwindInfo->Flags & UNW_FLAG_CHAININFO);
+
+ RspBias /= sizeof(void*); // stack push/popds in units of one machine word
+ RspBias += !bIsChained; // and one extra push for the ret address
+ RspBias |= FpInfo; // pack in details about possible frame pointer
+
+ if (bIsChained)
+ {
+ OutTableCursor[-1].RspBias += RspBias;
+# if BACKTRACE_DBGLVL >= 2
+ OutTableCursor[-1].Size += (FunctionTable->EndAddress - FunctionTable->BeginAddress);
+# endif
+ }
+ else
+ {
+ *OutTableCursor = {
+ FunctionTable->BeginAddress,
+ RspBias,
+# if BACKTRACE_DBGLVL >= 2
+ FunctionTable->EndAddress - FunctionTable->BeginAddress,
+ UnwindInfo,
+# endif
+ };
+
+ ++OutTableCursor;
+ }
+ }
+
+ uintptr_t ModuleSize = NtHeader->OptionalHeader.SizeOfImage;
+ ModuleSize += 0xffff; // to align up to next 64K page. it'll get shifted by AddressToId()
+
+ FModule Module = {
+ AddressToId(ModuleBase),
+ AddressToId(ModuleSize),
+ uint32_t(uintptr_t(OutTableCursor - OutTable)),
+# if BACKTRACE_DBGLVL >= 1
+ uint16(NumFpFuncs),
+# endif
+ OutTable,
+ };
+
+ {
+ zen::RwLock::ExclusiveLockScope _(Lock);
+
+ if (ModulesNum + 1 > ModulesCapacity)
+ {
+ ModulesCapacity += 8;
+ Modules = (FModule*)Malloc->Realloc(Modules, sizeof(FModule) * ModulesCapacity);
+ }
+ Modules[ModulesNum++] = Module;
+
+ std::sort(Modules, Modules + ModulesNum, [](const FModule& A, const FModule& B) { return A.Id < B.Id; });
+ }
+
+# if BACKTRACE_DBGLVL >= 1
+ NumFpTruncations += NumFpFuncs;
+ TotalFunctions += NumFunctions;
+# endif
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FBacktracer::RemoveModule(uintptr_t ModuleBase)
+{
+ // When Windows' RequestExit() is called it hard-terminates all threads except
+ // the main thread and then proceeds to unload the process' DLLs. This hard
+ // thread termination can result is dangling locked locks. Not an issue as
+ // the rule is "do not do anything multithreaded in DLL load/unload". And here
+ // we are, taking write locks during DLL unload which is, quite unsurprisingly,
+ // deadlocking. In reality tracking Windows' DLL unloads doesn't tell us
+ // anything due to how DLLs and processes' address spaces work. So we will...
+# if defined PLATFORM_WINDOWS
+ ZEN_UNUSED(ModuleBase);
+
+ return;
+# else
+
+ zen::RwLock::ExclusiveLockScope _(Lock);
+
+ uint32_t ModuleId = AddressToId(ModuleBase);
+ TArrayView<FModule> ModulesView(Modules, ModulesNum);
+ int32_t Index = Algo::LowerBound(ModulesView, ModuleId, FIdPredicate());
+ if (Index >= ModulesNum)
+ {
+ return;
+ }
+
+ const FModule& Module = Modules[Index];
+ if (Module.Id != ModuleId)
+ {
+ return;
+ }
+
+# if BACKTRACE_DBGLVL >= 1
+ NumFpTruncations -= Module.NumFpTypes;
+ TotalFunctions -= Module.NumFunctions;
+# endif
+
+ // no code should be executing at this point so we can safely free the
+ // table knowing know one is looking at it.
+ Malloc->Free(Module.Functions);
+
+ for (SIZE_T i = Index; i < ModulesNum; i++)
+ {
+ Modules[i] = Modules[i + 1];
+ }
+
+ --ModulesNum;
+# endif
+}
+
+////////////////////////////////////////////////////////////////////////////////
+const FBacktracer::FFunction*
+FBacktracer::LookupFunction(uintptr_t Address, FLookupState& State) const
+{
+ // This function caches the previous module look up. The theory here is that
+ // a series of return address in a backtrace often cluster around one module
+
+ FIdPredicate IdPredicate;
+
+ // Look up the module that Address belongs to.
+ uint32_t AddressId = AddressToId(Address);
+ if ((AddressId - State.Module.Id) >= State.Module.IdSize)
+ {
+ auto FindIt = std::upper_bound(Modules, Modules + ModulesNum, AddressId, IdPredicate);
+
+ if (FindIt == Modules)
+ {
+ return nullptr;
+ }
+
+ State.Module = *--FindIt;
+ }
+
+ // Check that the address is within the address space of the best-found module
+ const FModule* Module = &(State.Module);
+ if ((AddressId - Module->Id) >= Module->IdSize)
+ {
+ return nullptr;
+ }
+
+ // Now we've a module we have a table of functions and their stack sizes so
+ // we can get the frame size for Address
+ uint32_t FuncId = uint32_t(Address - IdToAddress(Module->Id));
+ std::span<FFunction> FuncsView(Module->Functions, Module->NumFunctions);
+ auto FindIt = std::upper_bound(begin(FuncsView), end(FuncsView), FuncId, IdPredicate);
+ if (FindIt == begin(FuncsView))
+ {
+ return nullptr;
+ }
+
+ const FFunction* Function = &(*--FindIt);
+# if BACKTRACE_DBGLVL >= 2
+ if ((FuncId - Function->Id) >= Function->Size)
+ {
+ PLATFORM_BREAK();
+ return nullptr;
+ }
+# endif
+ return Function;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+uint32_t
+FBacktracer::GetBacktraceId(void* AddressOfReturnAddress)
+{
+ FLookupState LookupState = {};
+ uint64_t Frames[256];
+
+ uintptr_t* StackPointer = (uintptr_t*)AddressOfReturnAddress;
+
+# if BACKTRACE_DBGLVL >= 3
+ uintptr_t TruthBacktrace[1024];
+ uint32_t NumTruth = RtlCaptureStackBackTrace(0, 1024, (void**)TruthBacktrace, nullptr);
+ uintptr_t* TruthCursor = TruthBacktrace;
+ for (; *TruthCursor != *StackPointer; ++TruthCursor)
+ ;
+# endif
+
+# if BACKTRACE_DBGLVL >= 2
+ struct
+ {
+ void* Sp;
+ void* Ip;
+ const FFunction* Function;
+ } Backtrace[1024] = {};
+ uint32_t NumBacktrace = 0;
+# endif
+
+ uint64_t BacktraceHash = 0;
+ uint32_t FrameIdx = 0;
+
+# if BACKTRACE_LOCK_FREE
+ // When running lock free, we defer the lock until a lock free function lookup fails
+ bool Locked = false;
+# else
+ FScopeLock _(&Lock);
+# endif
+ do
+ {
+ uintptr_t RetAddr = *StackPointer;
+
+ Frames[FrameIdx++] = RetAddr;
+
+ // This is a simple order-dependent LCG. Should be sufficient enough
+ BacktraceHash += RetAddr;
+ BacktraceHash *= 0x30be8efa499c249dull;
+
+# if BACKTRACE_LOCK_FREE
+ int32_t RspBias;
+ bool bIsAlreadyInTable;
+ FunctionLookups.Find(RetAddr, &RspBias, &bIsAlreadyInTable);
+ if (bIsAlreadyInTable)
+ {
+ if (RspBias < 0)
+ {
+ break;
+ }
+ else
+ {
+ StackPointer += RspBias;
+ continue;
+ }
+ }
+ if (!Locked)
+ {
+ Lock.AcquireExclusive();
+ Locked = true;
+
+ // If FunctionLookups.Emplace triggers a reallocation, it can cause an infinite recursion
+ // when the allocation reenters the stack trace code. We need to break out of the recursion
+ // in that case, and let the allocation complete, with the assumption that we don't care
+ // about call stacks for internal allocations in the memory reporting system. The "Lock()"
+ // above will only fall through with this flag set if it's a second lock in the same thread.
+ if (bReentranceCheck)
+ {
+ break;
+ }
+ }
+# endif // BACKTRACE_LOCK_FREE
+
+ const FFunction* Function = LookupFunction(RetAddr, LookupState);
+ if (Function == nullptr)
+ {
+# if BACKTRACE_LOCK_FREE
+ // LookupFunction fails when modules are not yet registered. In this case, we do not want the address
+ // to be added to the lookup map, but to retry the lookup later when modules are properly registered.
+ if (GModulesAreInitialized)
+ {
+ bReentranceCheck = true;
+ auto OnExit = zen::MakeGuard([&] { bReentranceCheck = false; });
+ FunctionLookups.Emplace(RetAddr, -1);
+ }
+# endif
+ break;
+ }
+
+# if BACKTRACE_LOCK_FREE
+ {
+ // This conversion improves probing performance for the hash set. Additionally it is critical
+ // to avoid incorrect values when RspBias is compressed into 16 bits in the hash map.
+ int32_t StoreBias = Function->RspBias < 0 ? -1 : Function->RspBias;
+ bReentranceCheck = true;
+ auto OnExit = zen::MakeGuard([&] { bReentranceCheck = false; });
+ FunctionLookups.Emplace(RetAddr, StoreBias);
+ }
+# endif
+
+# if BACKTRACE_DBGLVL >= 2
+ if (NumBacktrace < 1024)
+ {
+ Backtrace[NumBacktrace++] = {
+ StackPointer,
+ (void*)RetAddr,
+ Function,
+ };
+ }
+# endif
+
+ if (Function->RspBias < 0)
+ {
+ // This is a frame with a variable-sized stack pointer. We don't
+ // track enough information to proceed.
+# if BACKTRACE_DBGLVL >= 1
+ NumFpTruncations++;
+# endif
+ break;
+ }
+
+ StackPointer += Function->RspBias;
+ }
+ // Trunkate callstacks longer than MaxStackDepth
+ while (*StackPointer && FrameIdx < ZEN_ARRAY_COUNT(Frames));
+
+ // Build the backtrace entry for submission
+ FCallstackTracer::FBacktraceEntry BacktraceEntry;
+ BacktraceEntry.Hash = BacktraceHash;
+ BacktraceEntry.FrameCount = FrameIdx;
+ BacktraceEntry.Frames = Frames;
+
+# if BACKTRACE_DBGLVL >= 3
+ for (uint32_t i = 0; i < NumBacktrace; ++i)
+ {
+ if ((void*)TruthCursor[i] != Backtrace[i].Ip)
+ {
+ PLATFORM_BREAK();
+ break;
+ }
+ }
+# endif
+
+# if BACKTRACE_LOCK_FREE
+ if (Locked)
+ {
+ Lock.ReleaseExclusive();
+ }
+# endif
+ // Add to queue to be processed. This might block until there is room in the
+ // queue (i.e. the processing thread has caught up processing).
+ return CallstackTracer.AddCallstack(BacktraceEntry);
+}
+}
+
+# else // UE_CALLSTACK_TRACE_USE_UNWIND_TABLES
+
+namespace zen {
+
+ ////////////////////////////////////////////////////////////////////////////////
+ class FBacktracer
+ {
+ public:
+ FBacktracer(FMalloc* InMalloc);
+ ~FBacktracer();
+ static FBacktracer* Get();
+ inline uint32_t GetBacktraceId(void* AddressOfReturnAddress);
+ uint32_t GetBacktraceId(uint64_t ReturnAddress);
+ void AddModule(uintptr_t Base, const char16_t* Name) {}
+ void RemoveModule(uintptr_t Base) {}
+
+ private:
+ static FBacktracer* Instance;
+ FMalloc* Malloc;
+ FCallstackTracer CallstackTracer;
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ FBacktracer* FBacktracer::Instance = nullptr;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ FBacktracer::FBacktracer(FMalloc* InMalloc) : Malloc(InMalloc), CallstackTracer(InMalloc) { Instance = this; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ FBacktracer::~FBacktracer() {}
+
+ ////////////////////////////////////////////////////////////////////////////////
+ FBacktracer* FBacktracer::Get() { return Instance; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ uint32_t FBacktracer::GetBacktraceId(void* AddressOfReturnAddress)
+ {
+ const uint64_t ReturnAddress = *(uint64_t*)AddressOfReturnAddress;
+ return GetBacktraceId(ReturnAddress);
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ uint32_t FBacktracer::GetBacktraceId(uint64_t ReturnAddress)
+ {
+# if !UE_BUILD_SHIPPING
+ uint64_t StackFrames[256];
+ int32_t NumStackFrames = FPlatformStackWalk::CaptureStackBackTrace(StackFrames, UE_ARRAY_COUNT(StackFrames));
+ if (NumStackFrames > 0)
+ {
+ FCallstackTracer::FBacktraceEntry BacktraceEntry;
+ uint64_t BacktraceId = 0;
+ uint32_t FrameIdx = 0;
+ bool bUseAddress = false;
+ for (int32_t Index = 0; Index < NumStackFrames; Index++)
+ {
+ if (!bUseAddress)
+ {
+ // start using backtrace only after ReturnAddress
+ if (StackFrames[Index] == (uint64_t)ReturnAddress)
+ {
+ bUseAddress = true;
+ }
+ }
+ if (bUseAddress || NumStackFrames == 1)
+ {
+ uint64_t RetAddr = StackFrames[Index];
+ StackFrames[FrameIdx++] = RetAddr;
+
+ // This is a simple order-dependent LCG. Should be sufficient enough
+ BacktraceId += RetAddr;
+ BacktraceId *= 0x30be8efa499c249dull;
+ }
+ }
+
+ // Save the collected id
+ BacktraceEntry.Hash = BacktraceId;
+ BacktraceEntry.FrameCount = FrameIdx;
+ BacktraceEntry.Frames = StackFrames;
+
+ // Add to queue to be processed. This might block until there is room in the
+ // queue (i.e. the processing thread has caught up processing).
+ return CallstackTracer.AddCallstack(BacktraceEntry);
+ }
+# endif
+
+ return 0;
+ }
+
+}
+
+# endif // UE_CALLSTACK_TRACE_USE_UNWIND_TABLES
+
+namespace zen {
+
+////////////////////////////////////////////////////////////////////////////////
+void
+CallstackTrace_CreateInternal(FMalloc* Malloc)
+{
+ if (FBacktracer::Get() != nullptr)
+ {
+ return;
+ }
+
+ // Allocate, construct and intentionally leak backtracer
+ void* Alloc = Malloc->Malloc(sizeof(FBacktracer), alignof(FBacktracer));
+ new (Alloc) FBacktracer(Malloc);
+
+ Modules_Create(Malloc);
+ Modules_Subscribe([](bool bLoad, void* Module, const char16_t* Name) {
+ bLoad ? FBacktracer::Get()->AddModule(uintptr_t(Module), Name) //-V522
+ : FBacktracer::Get()->RemoveModule(uintptr_t(Module));
+ });
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+CallstackTrace_InitializeInternal()
+{
+ Modules_Initialize();
+ GModulesAreInitialized = true;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+uint32_t
+CallstackTrace_GetCurrentId()
+{
+ if (!UE_TRACE_CHANNELEXPR_IS_ENABLED(CallstackChannel))
+ {
+ return 0;
+ }
+
+ void* StackAddress = PLATFORM_RETURN_ADDRESS_FOR_CALLSTACKTRACING();
+ if (FBacktracer* Instance = FBacktracer::Get())
+ {
+# if PLATFORM_USE_CALLSTACK_ADDRESS_POINTER
+ return Instance->GetBacktraceId(StackAddress);
+# else
+ return Instance->GetBacktraceId((uint64_t)StackAddress);
+# endif
+ }
+
+ return 0;
+}
+
+} // namespace zen
+
+#endif
diff --git a/src/zencore/memtrack/callstacktrace.h b/src/zencore/memtrack/callstacktrace.h
new file mode 100644
index 000000000..3e191490b
--- /dev/null
+++ b/src/zencore/memtrack/callstacktrace.h
@@ -0,0 +1,151 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/trace.h>
+
+#if ZEN_PLATFORM_WINDOWS
+# include <intrin.h>
+
+# define PLATFORM_RETURN_ADDRESS() _ReturnAddress()
+# define PLATFORM_RETURN_ADDRESS_POINTER() _AddressOfReturnAddress()
+# define PLATFORM_RETURN_ADDRESS_FOR_CALLSTACKTRACING PLATFORM_RETURN_ADDRESS_POINTER
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+#if !defined(UE_CALLSTACK_TRACE_ENABLED)
+# if UE_TRACE_ENABLED
+# if ZEN_PLATFORM_WINDOWS
+# define UE_CALLSTACK_TRACE_ENABLED 1
+# endif
+# endif
+#endif
+
+#if !defined(UE_CALLSTACK_TRACE_ENABLED)
+# define UE_CALLSTACK_TRACE_ENABLED 0
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+#if UE_CALLSTACK_TRACE_ENABLED
+
+# include "platformtls.h"
+
+namespace zen {
+
+/**
+ * Creates callstack tracing.
+ * @param Malloc Allocator instance to use.
+ */
+void CallstackTrace_Create(class FMalloc* Malloc);
+
+/**
+ * Initializes callstack tracing. On some platforms this has to be delayed due to initialization order.
+ */
+void CallstackTrace_Initialize();
+
+/**
+ * Capture the current callstack, and trace the definition if it has not already been encountered. The returned value
+ * can be used in trace events and be resolved in analysis.
+ * @return Unique id identifying the current callstack.
+ */
+uint32_t CallstackTrace_GetCurrentId();
+
+/**
+ * Callstack Trace Scoped Macro to avoid resolving the full callstack
+ * can be used when some external libraries are not compiled with frame pointers
+ * preventing us to resolve it without crashing. Instead the callstack will be
+ * only the caller address.
+ */
+# define CALLSTACK_TRACE_LIMIT_CALLSTACKRESOLVE_SCOPE() FCallStackTraceLimitResolveScope PREPROCESSOR_JOIN(FCTLMScope, __LINE__)
+
+extern uint32_t GCallStackTracingTlsSlotIndex;
+
+/**
+ * @return the fallback callstack address
+ */
+inline void*
+CallstackTrace_GetFallbackPlatformReturnAddressData()
+{
+ if (FPlatformTLS::IsValidTlsSlot(GCallStackTracingTlsSlotIndex))
+ return FPlatformTLS::GetTlsValue(GCallStackTracingTlsSlotIndex);
+ else
+ return nullptr;
+}
+
+/**
+ * @return Needs full callstack resolve
+ */
+inline bool
+CallstackTrace_ResolveFullCallStack()
+{
+ return CallstackTrace_GetFallbackPlatformReturnAddressData() == nullptr;
+}
+
+/*
+ * Callstack Trace scope for override CallStack
+ */
+class FCallStackTraceLimitResolveScope
+{
+public:
+ ZEN_FORCENOINLINE FCallStackTraceLimitResolveScope()
+ {
+ if (FPlatformTLS::IsValidTlsSlot(GCallStackTracingTlsSlotIndex))
+ {
+ FPlatformTLS::SetTlsValue(GCallStackTracingTlsSlotIndex, PLATFORM_RETURN_ADDRESS_FOR_CALLSTACKTRACING());
+ }
+ }
+
+ ZEN_FORCENOINLINE ~FCallStackTraceLimitResolveScope()
+ {
+ if (FPlatformTLS::IsValidTlsSlot(GCallStackTracingTlsSlotIndex))
+ {
+ FPlatformTLS::SetTlsValue(GCallStackTracingTlsSlotIndex, nullptr);
+ }
+ }
+};
+
+} // namespace zen
+
+#else // UE_CALLSTACK_TRACE_ENABLED
+
+namespace zen {
+
+inline void
+CallstackTrace_Create(class FMalloc* /*Malloc*/)
+{
+}
+
+inline void
+CallstackTrace_Initialize()
+{
+}
+
+inline uint32_t
+CallstackTrace_GetCurrentId()
+{
+ return 0;
+}
+
+inline void*
+CallstackTrace_GetCurrentReturnAddressData()
+{
+ return nullptr;
+}
+
+inline void*
+CallstackTrace_GetFallbackPlatformReturnAddressData()
+{
+ return nullptr;
+}
+
+inline bool
+CallstackTrace_ResolveFullCallStack()
+{
+ return true;
+}
+
+# define CALLSTACK_TRACE_LIMIT_CALLSTACKRESOLVE_SCOPE()
+
+} // namespace zen
+
+#endif // UE_CALLSTACK_TRACE_ENABLED
diff --git a/src/zencore/memtrack/growonlylockfreehash.h b/src/zencore/memtrack/growonlylockfreehash.h
new file mode 100644
index 000000000..d6ff4fc32
--- /dev/null
+++ b/src/zencore/memtrack/growonlylockfreehash.h
@@ -0,0 +1,255 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zenbase/zenbase.h>
+#include <zencore/intmath.h>
+#include <zencore/thread.h>
+
+#include <zencore/memory/fmalloc.h>
+
+#include <atomic>
+
+namespace zen {
+
+// Hash table with fast lock free reads, that only supports insertion of items, and no modification of
+// values. KeyType must be an integer. EntryType should be a POD with an identifiable "empty" state
+// that can't occur in the table, and include the following member functions:
+//
+// KeyType GetKey() const; // Get the key from EntryType
+// ValueType GetValue() const; // Get the value from EntryType
+// bool IsEmpty() const; // Query whether EntryType is empty
+// void SetKeyValue(KeyType Key, ValueType Value); // Write key and value into EntryType (ATOMICALLY! See below)
+// static uint32 KeyHash(KeyType Key); // Convert Key to more well distributed hash
+// static void ClearEntries(EntryType* Entries, int32 EntryCount); // Fill an array of entries with empty values
+//
+// The function "SetKeyValue" must be multi-thread safe when writing new items! This means writing the
+// Key last and atomically, or writing the entire EntryType in a single write (say if the key and value
+// are packed into a single integer word). Inline is recommended, since these functions are called a
+// lot in the inner loop of the algorithm. A simple implementation of "KeyHash" can just return the
+// Key (if it's already reasonable as a hash), or mix the bits if better distribution is required. A
+// simple implementation of "ClearEntries" can just be a memset, if zero represents an empty entry.
+//
+// A set can be approximated by making "GetValue" a nop function, and just paying attention to the bool
+// result from FindEntry, although you do need to either reserve a certain Key as invalid, or add
+// space to store a valid flag as the Value. This class should only be used for small value types, as
+// the values are embedded into the hash table, and not stored separately.
+//
+// Writes are implemented using a lock -- it would be possible to make writes lock free (or lock free
+// when resizing doesn't occur), but it adds complexity. If we were to go that route, it would make
+// sense to create a fully generic lock free set, which would be much more involved to implement and
+// validate than this simple class, and might also offer somewhat worse read perf. Lock free containers
+// that support item removal either need additional synchronization overhead on readers, so writers can
+// tell if a reader is active and spin, or need graveyard markers and a garbage collection pass called
+// periodically, which makes it no longer a simple standalone container.
+//
+// Lock free reads are accomplished by the reader atomically pulling the hash table pointer from the
+// class. The hash table is self contained, with its size stored in the table itself, and hash tables
+// are not freed until the class's destruction. So if the table needs to be reallocated due to a write,
+// active readers will still have valid memory. This does mean that tables leak, but worst case, you
+// end up with half of the memory being waste. It would be possible to garbage collect the excess
+// tables, but you'd need some kind of global synchronization to make sure no readers are active.
+//
+// Besides cleanup of wasted tables, it might be useful to provide a function to clear a table. This
+// would involve clearing the Key for all the elements in the table (but leaving the memory allocated),
+// and can be done safely with active readers. It's not possible to safely remove individual items due
+// to the need to potentially move other items, which would break an active reader that has already
+// searched past a moved item. But in the case of removing all items, we don't care when a reader fails,
+// it's expected that eventually all readers will fail, regardless of where they are searching. A clear
+// function could be useful if a lot of the data you are caching is no longer used, and you want to
+// reset the cache.
+//
+template<typename EntryType, typename KeyType, typename ValueType>
+class TGrowOnlyLockFreeHash
+{
+public:
+ TGrowOnlyLockFreeHash(FMalloc* InMalloc) : Malloc(InMalloc), HashTable(nullptr) {}
+
+ ~TGrowOnlyLockFreeHash()
+ {
+ FHashHeader* HashTableNext;
+ for (FHashHeader* HashTableCurrent = HashTable; HashTableCurrent; HashTableCurrent = HashTableNext)
+ {
+ HashTableNext = HashTableCurrent->Next;
+
+ Malloc->Free(HashTableCurrent);
+ }
+ }
+
+ /**
+ * Preallocate the hash table to a certain size
+ * @param Count - Number of EntryType elements to allocate
+ * @warning Can only be called once, and only before any items have been added!
+ */
+ void Reserve(uint32_t Count)
+ {
+ zen::RwLock::ExclusiveLockScope _(WriteCriticalSection);
+ ZEN_ASSERT(HashTable.load(std::memory_order_relaxed) == nullptr);
+
+ if (Count <= 0)
+ {
+ Count = DEFAULT_INITIAL_SIZE;
+ }
+ Count = uint32_t(zen::NextPow2(Count));
+ FHashHeader* HashTableLocal = (FHashHeader*)Malloc->Malloc(sizeof(FHashHeader) + (Count - 1) * sizeof(EntryType));
+
+ HashTableLocal->Next = nullptr;
+ HashTableLocal->TableSize = Count;
+ HashTableLocal->Used = 0;
+ EntryType::ClearEntries(HashTableLocal->Elements, Count);
+
+ HashTable.store(HashTableLocal, std::memory_order_release);
+ }
+
+ /**
+ * Find an entry in the hash table
+ * @param Key - Key to search for
+ * @param OutValue - Memory location to write result value to. Left unmodified if Key isn't found.
+ * @param bIsAlreadyInTable - Optional result for whether key was found in table.
+ */
+ void Find(KeyType Key, ValueType* OutValue, bool* bIsAlreadyInTable = nullptr) const
+ {
+ FHashHeader* HashTableLocal = HashTable.load(std::memory_order_acquire);
+ if (HashTableLocal)
+ {
+ uint32_t TableMask = HashTableLocal->TableSize - 1;
+
+ // Linear probing
+ for (uint32_t TableIndex = EntryType::KeyHash(Key) & TableMask; !HashTableLocal->Elements[TableIndex].IsEmpty();
+ TableIndex = (TableIndex + 1) & TableMask)
+ {
+ if (HashTableLocal->Elements[TableIndex].GetKey() == Key)
+ {
+ if (OutValue)
+ {
+ *OutValue = HashTableLocal->Elements[TableIndex].GetValue();
+ }
+ if (bIsAlreadyInTable)
+ {
+ *bIsAlreadyInTable = true;
+ }
+ return;
+ }
+ }
+ }
+
+ if (bIsAlreadyInTable)
+ {
+ *bIsAlreadyInTable = false;
+ }
+ }
+
+ /**
+ * Add an entry with the given Key to the hash table, will do nothing if the item already exists
+ * @param Key - Key to add
+ * @param Value - Value to add for key
+ * @param bIsAlreadyInTable -- Optional result for whether item was already in table
+ */
+ void Emplace(KeyType Key, ValueType Value, bool* bIsAlreadyInTable = nullptr)
+ {
+ zen::RwLock::ExclusiveLockScope _(WriteCriticalSection);
+
+ // After locking, check if the item is already in the hash table.
+ ValueType ValueIgnore;
+ bool bFindResult;
+ Find(Key, &ValueIgnore, &bFindResult);
+ if (bFindResult == true)
+ {
+ if (bIsAlreadyInTable)
+ {
+ *bIsAlreadyInTable = true;
+ }
+ return;
+ }
+
+ // Check if there is space in the hash table for a new item. We resize when the hash
+ // table gets half full or more. @todo: allow client to specify max load factor?
+ FHashHeader* HashTableLocal = HashTable;
+
+ if (!HashTableLocal || (HashTableLocal->Used >= HashTableLocal->TableSize / 2))
+ {
+ int32_t GrowCount = HashTableLocal ? HashTableLocal->TableSize * 2 : DEFAULT_INITIAL_SIZE;
+ FHashHeader* HashTableGrow = (FHashHeader*)Malloc->Malloc(sizeof(FHashHeader) + (GrowCount - 1) * sizeof(EntryType));
+
+ HashTableGrow->Next = HashTableLocal;
+ HashTableGrow->TableSize = GrowCount;
+ HashTableGrow->Used = 0;
+ EntryType::ClearEntries(HashTableGrow->Elements, GrowCount);
+
+ if (HashTableLocal)
+ {
+ // Copy existing elements from the old table to the new table
+ for (int32_t TableIndex = 0; TableIndex < HashTableLocal->TableSize; TableIndex++)
+ {
+ EntryType& Entry = HashTableLocal->Elements[TableIndex];
+ if (!Entry.IsEmpty())
+ {
+ HashInsertInternal(HashTableGrow, Entry.GetKey(), Entry.GetValue());
+ }
+ }
+ }
+
+ HashTableLocal = HashTableGrow;
+ HashTable.store(HashTableGrow, std::memory_order_release);
+ }
+
+ // Then add our new item
+ HashInsertInternal(HashTableLocal, Key, Value);
+
+ if (bIsAlreadyInTable)
+ {
+ *bIsAlreadyInTable = false;
+ }
+ }
+
+ void FindOrAdd(KeyType Key, ValueType Value, bool* bIsAlreadyInTable = nullptr)
+ {
+ // Attempt to find the item lock free, before calling "Emplace", which locks the container
+ bool bFindResult;
+ ValueType IgnoreResult;
+ Find(Key, &IgnoreResult, &bFindResult);
+ if (bFindResult)
+ {
+ if (bIsAlreadyInTable)
+ {
+ *bIsAlreadyInTable = true;
+ }
+ return;
+ }
+
+ Emplace(Key, Value, bIsAlreadyInTable);
+ }
+
+private:
+ struct FHashHeader
+ {
+ FHashHeader* Next; // Old buffers are stored in a linked list for cleanup
+ int32_t TableSize;
+ int32_t Used;
+ EntryType Elements[1]; // Variable sized
+ };
+
+ FMalloc* Malloc;
+ std::atomic<FHashHeader*> HashTable;
+ zen::RwLock WriteCriticalSection;
+
+ static constexpr int32_t DEFAULT_INITIAL_SIZE = 1024;
+
+ static void HashInsertInternal(FHashHeader* HashTableLocal, KeyType Key, ValueType Value)
+ {
+ int32_t TableMask = HashTableLocal->TableSize - 1;
+
+ // Linear probing
+ for (int32_t TableIndex = EntryType::KeyHash(Key) & TableMask;; TableIndex = (TableIndex + 1) & TableMask)
+ {
+ if (HashTableLocal->Elements[TableIndex].IsEmpty())
+ {
+ HashTableLocal->Elements[TableIndex].SetKeyValue(Key, Value);
+ HashTableLocal->Used++;
+ break;
+ }
+ }
+ }
+};
+
+} // namespace zen
diff --git a/src/zencore/memtrack/memorytrace.cpp b/src/zencore/memtrack/memorytrace.cpp
new file mode 100644
index 000000000..b147aee91
--- /dev/null
+++ b/src/zencore/memtrack/memorytrace.cpp
@@ -0,0 +1,829 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zencore/memory/memorytrace.h>
+#include <zencore/memory/tagtrace.h>
+
+#include "callstacktrace.h"
+#include "tracemalloc.h"
+#include "vatrace.h"
+
+#include <zencore/commandline.h>
+#include <zencore/enumflags.h>
+#include <zencore/guardvalue.h>
+#include <zencore/intmath.h>
+#include <zencore/string.h>
+#include <zencore/trace.h>
+
+#include <string.h>
+
+#if ZEN_PLATFORM_WINDOWS
+# include <shellapi.h>
+#endif
+
+class FMalloc;
+
+#if UE_TRACE_ENABLED
+namespace zen {
+UE_TRACE_CHANNEL_DEFINE(MemAllocChannel, "Memory allocations", true)
+}
+#endif
+
+#if UE_MEMORY_TRACE_ENABLED
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace zen {
+
+void MemoryTrace_InitTags(FMalloc*);
+void MemoryTrace_EnableTracePump();
+
+} // namespace zen
+
+////////////////////////////////////////////////////////////////////////////////
+namespace {
+// Controls how often time markers are emitted (default: every 4095 allocations).
+constexpr uint32_t MarkerSamplePeriod = (4 << 10) - 1;
+
+// Number of shifted bits to SizeLower
+constexpr uint32_t SizeShift = 3;
+
+// Counter to track when time marker is emitted
+std::atomic<uint32_t> GMarkerCounter(0);
+
+// If enabled also pumps the Trace system itself. Used on process shutdown
+// when worker thread has been killed, but memory events still occurs.
+bool GDoPumpTrace;
+
+// Temporarily disables any internal operation that causes allocations. Used to
+// avoid recursive behaviour when memory tracing needs to allocate memory through
+// TraceMalloc.
+thread_local bool GDoNotAllocateInTrace;
+
+// Set on initialization; on some platforms we hook allocator functions very early
+// before Trace has the ability to allocate memory.
+bool GTraceAllowed;
+} // namespace
+
+////////////////////////////////////////////////////////////////////////////////
+namespace UE { namespace Trace {
+ TRACELOG_API void Update();
+}} // namespace UE::Trace
+
+namespace zen {
+
+////////////////////////////////////////////////////////////////////////////////
+UE_TRACE_EVENT_BEGIN(Memory, Init, NoSync | Important)
+ UE_TRACE_EVENT_FIELD(uint64_t, PageSize) // new in UE 5.5
+ UE_TRACE_EVENT_FIELD(uint32_t, MarkerPeriod)
+ UE_TRACE_EVENT_FIELD(uint8, Version)
+ UE_TRACE_EVENT_FIELD(uint8, MinAlignment)
+ UE_TRACE_EVENT_FIELD(uint8, SizeShift)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, Marker)
+ UE_TRACE_EVENT_FIELD(uint64_t, Cycle)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, Alloc)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint32_t, Size)
+ UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower)
+ UE_TRACE_EVENT_FIELD(uint8, RootHeap)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, AllocSystem)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint32_t, Size)
+ UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, AllocVideo)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint32_t, Size)
+ UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, Free)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint8, RootHeap)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, FreeSystem)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, FreeVideo)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, ReallocAlloc)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint32_t, Size)
+ UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower)
+ UE_TRACE_EVENT_FIELD(uint8, RootHeap)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, ReallocAllocSystem)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint32_t, Size)
+ UE_TRACE_EVENT_FIELD(uint8, AlignmentPow2_SizeLower)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, ReallocFree)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint8, RootHeap)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, ReallocFreeSystem)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, MemorySwapOp)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address) // page fault real address
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint32_t, CompressedSize)
+ UE_TRACE_EVENT_FIELD(uint8, SwapOp)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, HeapSpec, NoSync | Important)
+ UE_TRACE_EVENT_FIELD(HeapId, Id)
+ UE_TRACE_EVENT_FIELD(HeapId, ParentId)
+ UE_TRACE_EVENT_FIELD(uint16, Flags)
+ UE_TRACE_EVENT_FIELD(UE::Trace::WideString, Name)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, HeapMarkAlloc)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(uint16, Flags)
+ UE_TRACE_EVENT_FIELD(HeapId, Heap)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, HeapUnmarkAlloc)
+ UE_TRACE_EVENT_FIELD(uint64_t, Address)
+ UE_TRACE_EVENT_FIELD(uint32_t, CallstackId)
+ UE_TRACE_EVENT_FIELD(HeapId, Heap)
+UE_TRACE_EVENT_END()
+
+// If the layout of the above events is changed, bump this version number.
+// version 1: Initial version (UE 5.0, UE 5.1)
+// version 2: Added CallstackId for Free events and also for HeapMarkAlloc, HeapUnmarkAlloc events (UE 5.2).
+constexpr uint8 MemoryTraceVersion = 2;
+
+////////////////////////////////////////////////////////////////////////////////
+class FMallocWrapper : public FMalloc
+{
+public:
+ FMallocWrapper(FMalloc* InMalloc);
+
+private:
+ struct FCookie
+ {
+ uint64_t Tag : 16;
+ uint64_t Bias : 8;
+ uint64_t Size : 40;
+ };
+
+ static uint32_t GetActualAlignment(SIZE_T Size, uint32_t Alignment);
+
+ virtual void* Malloc(SIZE_T Size, uint32_t Alignment) override;
+ virtual void* Realloc(void* PrevAddress, SIZE_T NewSize, uint32_t Alignment) override;
+ virtual void Free(void* Address) override;
+ virtual bool GetAllocationSize(void* Address, SIZE_T& SizeOut) override { return InnerMalloc->GetAllocationSize(Address, SizeOut); }
+ virtual void OnMallocInitialized() override { InnerMalloc->OnMallocInitialized(); }
+
+ FMalloc* InnerMalloc;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+FMallocWrapper::FMallocWrapper(FMalloc* InMalloc) : InnerMalloc(InMalloc)
+{
+}
+
+////////////////////////////////////////////////////////////////////////////////
+uint32_t
+FMallocWrapper::GetActualAlignment(SIZE_T Size, uint32_t Alignment)
+{
+ // Defaults; if size is < 16 then alignment is 8 else 16.
+ uint32_t DefaultAlignment = 8 << uint32_t(Size >= 16);
+ return (Alignment < DefaultAlignment) ? DefaultAlignment : Alignment;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void*
+FMallocWrapper::Malloc(SIZE_T Size, uint32_t Alignment)
+{
+ uint32_t ActualAlignment = GetActualAlignment(Size, Alignment);
+ void* Address = InnerMalloc->Malloc(Size, Alignment);
+
+ MemoryTrace_Alloc((uint64_t)Address, Size, ActualAlignment);
+
+ return Address;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void*
+FMallocWrapper::Realloc(void* PrevAddress, SIZE_T NewSize, uint32_t Alignment)
+{
+ // This simplifies things and means reallocs trace events are true reallocs
+ if (PrevAddress == nullptr)
+ {
+ return Malloc(NewSize, Alignment);
+ }
+
+ MemoryTrace_ReallocFree((uint64_t)PrevAddress);
+
+ void* RetAddress = InnerMalloc->Realloc(PrevAddress, NewSize, Alignment);
+
+ Alignment = GetActualAlignment(NewSize, Alignment);
+ MemoryTrace_ReallocAlloc((uint64_t)RetAddress, NewSize, Alignment);
+
+ return RetAddress;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FMallocWrapper::Free(void* Address)
+{
+ if (Address == nullptr)
+ {
+ return;
+ }
+
+ MemoryTrace_Free((uint64_t)Address);
+
+ void* InnerAddress = Address;
+
+ return InnerMalloc->Free(InnerAddress);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+template<class T>
+class alignas(alignof(T)) FUndestructed
+{
+public:
+ template<typename... ArgTypes>
+ void Construct(ArgTypes... Args)
+ {
+ ::new (Buffer) T(Args...);
+ bIsConstructed = true;
+ }
+
+ bool IsConstructed() const { return bIsConstructed; }
+
+ T* operator&() { return (T*)Buffer; }
+ T* operator->() { return (T*)Buffer; }
+
+protected:
+ uint8 Buffer[sizeof(T)];
+ bool bIsConstructed;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+static FUndestructed<FTraceMalloc> GTraceMalloc;
+
+////////////////////////////////////////////////////////////////////////////////
+static EMemoryTraceInit
+MemoryTrace_ShouldEnable()
+{
+ EMemoryTraceInit Mode = EMemoryTraceInit::Disabled;
+
+ // Process any command line trace options
+ //
+ // Note that calls can come into this function before we enter the regular main function
+ // and we can therefore not rely on the regular command line parsing for the application
+
+ using namespace std::literals;
+
+ auto ProcessTraceArg = [&](const std::string_view& Arg) {
+ if (Arg == "memalloc"sv)
+ {
+ Mode |= EMemoryTraceInit::AllocEvents;
+ }
+ else if (Arg == "callstack"sv)
+ {
+ Mode |= EMemoryTraceInit::Callstacks;
+ }
+ else if (Arg == "memtag"sv)
+ {
+ Mode |= EMemoryTraceInit::Tags;
+ }
+ else if (Arg == "memory"sv)
+ {
+ Mode |= EMemoryTraceInit::Full;
+ }
+ else if (Arg == "memory_light"sv)
+ {
+ Mode |= EMemoryTraceInit::Light;
+ }
+ };
+
+ constexpr std::string_view TraceOption = "--trace="sv;
+
+ std::function<void(const std::string_view&)> ProcessArg = [&](const std::string_view& Arg) {
+ if (Arg.starts_with(TraceOption))
+ {
+ const std::string_view OptionArgs = Arg.substr(TraceOption.size());
+
+ IterateCommaSeparatedValue(OptionArgs, ProcessTraceArg);
+ }
+ };
+
+ IterateCommandlineArgs(ProcessArg);
+
+ return Mode;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FMalloc*
+MemoryTrace_CreateInternal(FMalloc* InMalloc, EMemoryTraceInit Mode)
+{
+ using namespace zen;
+
+ // If allocation events are not desired we don't need to do anything, even
+ // if user has enabled only callstacks it will be enabled later.
+ if (!EnumHasAnyFlags(Mode, EMemoryTraceInit::AllocEvents))
+ {
+ return InMalloc;
+ }
+
+ // Some OSes (i.e. Windows) will terminate all threads except the main
+ // one as part of static deinit. However we may receive more memory
+ // trace events that would get lost as Trace's worker thread has been
+ // terminated. So flush the last remaining memory events trace needs
+ // to be updated which we will do that in response to to memory events.
+ // We'll use an atexit can to know when Trace is probably no longer
+ // getting ticked.
+ atexit([]() { MemoryTrace_EnableTracePump(); });
+
+ GTraceMalloc.Construct(InMalloc);
+
+ // Both tag and callstack tracing need to use the wrapped trace malloc
+ // so we can break out tracing memory overhead (and not cause recursive behaviour).
+ if (EnumHasAnyFlags(Mode, EMemoryTraceInit::Tags))
+ {
+ MemoryTrace_InitTags(&GTraceMalloc);
+ }
+
+ if (EnumHasAnyFlags(Mode, EMemoryTraceInit::Callstacks))
+ {
+ CallstackTrace_Create(&GTraceMalloc);
+ }
+
+ static FUndestructed<FMallocWrapper> SMallocWrapper;
+ SMallocWrapper.Construct(InMalloc);
+
+ return &SMallocWrapper;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FMalloc*
+MemoryTrace_CreateInternal(FMalloc* InMalloc)
+{
+ const EMemoryTraceInit Mode = MemoryTrace_ShouldEnable();
+ return MemoryTrace_CreateInternal(InMalloc, Mode);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FMalloc*
+MemoryTrace_Create(FMalloc* InMalloc)
+{
+ FMalloc* OutMalloc = MemoryTrace_CreateInternal(InMalloc);
+
+ if (OutMalloc != InMalloc)
+ {
+# if PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS
+ FVirtualWinApiHooks::Initialize(false);
+# endif
+ }
+
+ return OutMalloc;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_Initialize()
+{
+ // At this point we initialized the system to allow tracing.
+ GTraceAllowed = true;
+
+ const int MIN_ALIGNMENT = 8;
+
+ UE_TRACE_LOG(Memory, Init, MemAllocChannel)
+ << Init.PageSize(4096) << Init.MarkerPeriod(MarkerSamplePeriod + 1) << Init.Version(MemoryTraceVersion)
+ << Init.MinAlignment(uint8(MIN_ALIGNMENT)) << Init.SizeShift(uint8(SizeShift));
+
+ const HeapId SystemRootHeap = MemoryTrace_RootHeapSpec(u"System memory");
+ ZEN_ASSERT(SystemRootHeap == EMemoryTraceRootHeap::SystemMemory);
+ const HeapId VideoRootHeap = MemoryTrace_RootHeapSpec(u"Video memory");
+ ZEN_ASSERT(VideoRootHeap == EMemoryTraceRootHeap::VideoMemory);
+
+ static_assert((1 << SizeShift) - 1 <= MIN_ALIGNMENT, "Not enough bits to pack size fields");
+
+# if !UE_MEMORY_TRACE_LATE_INIT
+ // On some platforms callstack initialization cannot happen this early in the process. It is initialized
+ // in other locations when UE_MEMORY_TRACE_LATE_INIT is defined. Until that point allocations cannot have
+ // callstacks.
+ CallstackTrace_Initialize();
+# endif
+}
+
+void
+MemoryTrace_Shutdown()
+{
+ // Disable any further activity
+ GTraceAllowed = false;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+bool
+MemoryTrace_IsActive()
+{
+ return GTraceAllowed;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_EnableTracePump()
+{
+ GDoPumpTrace = true;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_UpdateInternal()
+{
+ const uint32_t TheCount = GMarkerCounter.fetch_add(1, std::memory_order_relaxed);
+ if ((TheCount & MarkerSamplePeriod) == 0)
+ {
+ UE_TRACE_LOG(Memory, Marker, MemAllocChannel) << Marker.Cycle(UE::Trace::Private::TimeGetTimestamp());
+ }
+
+ if (GDoPumpTrace)
+ {
+ UE::Trace::Update();
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_Alloc(uint64_t Address, uint64_t Size, uint32_t Alignment, HeapId RootHeap, uint32_t ExternalCallstackId)
+{
+ if (!GTraceAllowed)
+ {
+ return;
+ }
+
+ ZEN_ASSERT_SLOW(RootHeap < 16);
+
+ const uint32_t AlignmentPow2 = uint32_t(zen::CountTrailingZeros64(Alignment));
+ const uint32_t Alignment_SizeLower = (AlignmentPow2 << SizeShift) | uint32_t(Size & ((1 << SizeShift) - 1));
+ const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId();
+
+ switch (RootHeap)
+ {
+ case EMemoryTraceRootHeap::SystemMemory:
+ {
+ UE_TRACE_LOG(Memory, AllocSystem, MemAllocChannel)
+ << AllocSystem.Address(uint64_t(Address)) << AllocSystem.CallstackId(CallstackId)
+ << AllocSystem.Size(uint32_t(Size >> SizeShift)) << AllocSystem.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower));
+ break;
+ }
+
+ case EMemoryTraceRootHeap::VideoMemory:
+ {
+ UE_TRACE_LOG(Memory, AllocVideo, MemAllocChannel)
+ << AllocVideo.Address(uint64_t(Address)) << AllocVideo.CallstackId(CallstackId)
+ << AllocVideo.Size(uint32_t(Size >> SizeShift)) << AllocVideo.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower));
+ break;
+ }
+
+ default:
+ {
+ UE_TRACE_LOG(Memory, Alloc, MemAllocChannel)
+ << Alloc.Address(uint64_t(Address)) << Alloc.CallstackId(CallstackId) << Alloc.Size(uint32_t(Size >> SizeShift))
+ << Alloc.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)) << Alloc.RootHeap(uint8(RootHeap));
+ break;
+ }
+ }
+
+ MemoryTrace_UpdateInternal();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_Free(uint64_t Address, HeapId RootHeap, uint32_t ExternalCallstackId)
+{
+ if (!GTraceAllowed)
+ {
+ return;
+ }
+
+ ZEN_ASSERT_SLOW(RootHeap < 16);
+
+ const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId();
+
+ switch (RootHeap)
+ {
+ case EMemoryTraceRootHeap::SystemMemory:
+ {
+ UE_TRACE_LOG(Memory, FreeSystem, MemAllocChannel)
+ << FreeSystem.Address(uint64_t(Address)) << FreeSystem.CallstackId(CallstackId);
+ break;
+ }
+ case EMemoryTraceRootHeap::VideoMemory:
+ {
+ UE_TRACE_LOG(Memory, FreeVideo, MemAllocChannel)
+ << FreeVideo.Address(uint64_t(Address)) << FreeVideo.CallstackId(CallstackId);
+ break;
+ }
+ default:
+ {
+ UE_TRACE_LOG(Memory, Free, MemAllocChannel)
+ << Free.Address(uint64_t(Address)) << Free.CallstackId(CallstackId) << Free.RootHeap(uint8(RootHeap));
+ break;
+ }
+ }
+
+ MemoryTrace_UpdateInternal();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_ReallocAlloc(uint64_t Address, uint64_t Size, uint32_t Alignment, HeapId RootHeap, uint32_t ExternalCallstackId)
+{
+ if (!GTraceAllowed)
+ {
+ return;
+ }
+
+ ZEN_ASSERT_SLOW(RootHeap < 16);
+
+ const uint32_t AlignmentPow2 = uint32_t(zen::CountTrailingZeros64(Alignment));
+ const uint32_t Alignment_SizeLower = (AlignmentPow2 << SizeShift) | uint32_t(Size & ((1 << SizeShift) - 1));
+ const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId();
+
+ switch (RootHeap)
+ {
+ case EMemoryTraceRootHeap::SystemMemory:
+ {
+ UE_TRACE_LOG(Memory, ReallocAllocSystem, MemAllocChannel)
+ << ReallocAllocSystem.Address(uint64_t(Address)) << ReallocAllocSystem.CallstackId(CallstackId)
+ << ReallocAllocSystem.Size(uint32_t(Size >> SizeShift))
+ << ReallocAllocSystem.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower));
+ break;
+ }
+
+ default:
+ {
+ UE_TRACE_LOG(Memory, ReallocAlloc, MemAllocChannel)
+ << ReallocAlloc.Address(uint64_t(Address)) << ReallocAlloc.CallstackId(CallstackId)
+ << ReallocAlloc.Size(uint32_t(Size >> SizeShift)) << ReallocAlloc.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower))
+ << ReallocAlloc.RootHeap(uint8(RootHeap));
+ break;
+ }
+ }
+
+ MemoryTrace_UpdateInternal();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_ReallocFree(uint64_t Address, HeapId RootHeap, uint32_t ExternalCallstackId)
+{
+ if (!GTraceAllowed)
+ {
+ return;
+ }
+
+ ZEN_ASSERT_SLOW(RootHeap < 16);
+
+ const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId();
+
+ switch (RootHeap)
+ {
+ case EMemoryTraceRootHeap::SystemMemory:
+ {
+ UE_TRACE_LOG(Memory, ReallocFreeSystem, MemAllocChannel)
+ << ReallocFreeSystem.Address(uint64_t(Address)) << ReallocFreeSystem.CallstackId(CallstackId);
+ break;
+ }
+
+ default:
+ {
+ UE_TRACE_LOG(Memory, ReallocFree, MemAllocChannel)
+ << ReallocFree.Address(uint64_t(Address)) << ReallocFree.CallstackId(CallstackId)
+ << ReallocFree.RootHeap(uint8(RootHeap));
+ break;
+ }
+ }
+
+ MemoryTrace_UpdateInternal();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_SwapOp(uint64_t PageAddress, EMemoryTraceSwapOperation SwapOperation, uint32_t CompressedSize, uint32_t CallstackId)
+{
+ if (!GTraceAllowed)
+ {
+ return;
+ }
+
+ UE_TRACE_LOG(Memory, MemorySwapOp, MemAllocChannel)
+ << MemorySwapOp.Address(PageAddress) << MemorySwapOp.CallstackId(CallstackId) << MemorySwapOp.CompressedSize(CompressedSize)
+ << MemorySwapOp.SwapOp((uint8)SwapOperation);
+
+ MemoryTrace_UpdateInternal();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+HeapId
+MemoryTrace_HeapSpec(HeapId ParentId, const char16_t* Name, EMemoryTraceHeapFlags Flags)
+{
+ if (!GTraceAllowed)
+ {
+ return 0;
+ }
+
+ static std::atomic<HeapId> HeapIdCount(EMemoryTraceRootHeap::EndReserved + 1); // Reserve indexes for root heaps
+ const HeapId Id = HeapIdCount.fetch_add(1);
+ const uint32_t NameLen = uint32_t(zen::StringLength(Name));
+ const uint32_t DataSize = NameLen * sizeof(char16_t);
+ ZEN_ASSERT(ParentId < Id);
+
+ UE_TRACE_LOG(Memory, HeapSpec, MemAllocChannel, DataSize)
+ << HeapSpec.Id(Id) << HeapSpec.ParentId(ParentId) << HeapSpec.Name(Name, NameLen) << HeapSpec.Flags(uint16(Flags));
+
+ return Id;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+HeapId
+MemoryTrace_RootHeapSpec(const char16_t* Name, EMemoryTraceHeapFlags Flags)
+{
+ if (!GTraceAllowed)
+ {
+ return 0;
+ }
+
+ static std::atomic<HeapId> RootHeapCount(0);
+ const HeapId Id = RootHeapCount.fetch_add(1);
+ ZEN_ASSERT(Id <= EMemoryTraceRootHeap::EndReserved);
+
+ const uint32_t NameLen = uint32_t(zen::StringLength(Name));
+ const uint32_t DataSize = NameLen * sizeof(char16_t);
+
+ UE_TRACE_LOG(Memory, HeapSpec, MemAllocChannel, DataSize)
+ << HeapSpec.Id(Id) << HeapSpec.ParentId(HeapId(~0)) << HeapSpec.Name(Name, NameLen)
+ << HeapSpec.Flags(uint16(EMemoryTraceHeapFlags::Root | Flags));
+
+ return Id;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_MarkAllocAsHeap(uint64_t Address, HeapId Heap, EMemoryTraceHeapAllocationFlags Flags, uint32_t ExternalCallstackId)
+{
+ if (!GTraceAllowed)
+ {
+ return;
+ }
+
+ const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId();
+
+ UE_TRACE_LOG(Memory, HeapMarkAlloc, MemAllocChannel)
+ << HeapMarkAlloc.Address(uint64_t(Address)) << HeapMarkAlloc.CallstackId(CallstackId)
+ << HeapMarkAlloc.Flags(uint16(EMemoryTraceHeapAllocationFlags::Heap | Flags)) << HeapMarkAlloc.Heap(Heap);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_UnmarkAllocAsHeap(uint64_t Address, HeapId Heap, uint32_t ExternalCallstackId)
+{
+ if (!GTraceAllowed)
+ {
+ return;
+ }
+
+ const uint32_t CallstackId = ExternalCallstackId ? ExternalCallstackId : GDoNotAllocateInTrace ? 0 : CallstackTrace_GetCurrentId();
+
+ // Sets all flags to zero
+ UE_TRACE_LOG(Memory, HeapUnmarkAlloc, MemAllocChannel)
+ << HeapUnmarkAlloc.Address(uint64_t(Address)) << HeapUnmarkAlloc.CallstackId(CallstackId) << HeapUnmarkAlloc.Heap(Heap);
+}
+
+} // namespace zen
+
+#else // UE_MEMORY_TRACE_ENABLED
+
+/////////////////////////////////////////////////////////////////////////////
+bool
+MemoryTrace_IsActive()
+{
+ return false;
+}
+
+#endif // UE_MEMORY_TRACE_ENABLED
+
+namespace zen {
+
+/////////////////////////////////////////////////////////////////////////////
+FTraceMalloc::FTraceMalloc(FMalloc* InMalloc)
+{
+ WrappedMalloc = InMalloc;
+}
+
+/////////////////////////////////////////////////////////////////////////////
+FTraceMalloc::~FTraceMalloc()
+{
+}
+
+/////////////////////////////////////////////////////////////////////////////
+void*
+FTraceMalloc::Malloc(SIZE_T Count, uint32_t Alignment)
+{
+#if UE_MEMORY_TRACE_ENABLED
+ // UE_TRACE_METADATA_CLEAR_SCOPE();
+ UE_MEMSCOPE(TRACE_TAG);
+
+ void* NewPtr;
+ {
+ zen::TGuardValue<bool> _(GDoNotAllocateInTrace, true);
+ NewPtr = WrappedMalloc->Malloc(Count, Alignment);
+ }
+
+ const uint64_t Size = Count;
+ const uint32_t AlignmentPow2 = uint32_t(zen::CountTrailingZeros64(Alignment));
+ const uint32_t Alignment_SizeLower = (AlignmentPow2 << SizeShift) | uint32_t(Size & ((1 << SizeShift) - 1));
+
+ UE_TRACE_LOG(Memory, Alloc, MemAllocChannel)
+ << Alloc.Address(uint64_t(NewPtr)) << Alloc.CallstackId(0) << Alloc.Size(uint32_t(Size >> SizeShift))
+ << Alloc.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower)) << Alloc.RootHeap(uint8(EMemoryTraceRootHeap::SystemMemory));
+
+ return NewPtr;
+#else
+ return WrappedMalloc->Malloc(Count, Alignment);
+#endif // UE_MEMORY_TRACE_ENABLED
+}
+
+/////////////////////////////////////////////////////////////////////////////
+void*
+FTraceMalloc::Realloc(void* Original, SIZE_T Count, uint32_t Alignment)
+{
+#if UE_MEMORY_TRACE_ENABLED
+ // UE_TRACE_METADATA_CLEAR_SCOPE();
+ UE_MEMSCOPE(TRACE_TAG);
+
+ UE_TRACE_LOG(Memory, ReallocFree, MemAllocChannel)
+ << ReallocFree.Address(uint64_t(Original)) << ReallocFree.RootHeap(uint8(EMemoryTraceRootHeap::SystemMemory));
+
+ void* NewPtr;
+ {
+ zen::TGuardValue<bool> _(GDoNotAllocateInTrace, true);
+ NewPtr = WrappedMalloc->Realloc(Original, Count, Alignment);
+ }
+
+ const uint64_t Size = Count;
+ const uint32_t AlignmentPow2 = uint32_t(zen::CountTrailingZeros64(Alignment));
+ const uint32_t Alignment_SizeLower = (AlignmentPow2 << SizeShift) | uint32_t(Size & ((1 << SizeShift) - 1));
+
+ UE_TRACE_LOG(Memory, ReallocAlloc, MemAllocChannel)
+ << ReallocAlloc.Address(uint64_t(NewPtr)) << ReallocAlloc.CallstackId(0) << ReallocAlloc.Size(uint32_t(Size >> SizeShift))
+ << ReallocAlloc.AlignmentPow2_SizeLower(uint8(Alignment_SizeLower))
+ << ReallocAlloc.RootHeap(uint8(EMemoryTraceRootHeap::SystemMemory));
+
+ return NewPtr;
+#else
+ return WrappedMalloc->Realloc(Original, Count, Alignment);
+#endif // UE_MEMORY_TRACE_ENABLED
+}
+
+/////////////////////////////////////////////////////////////////////////////
+void
+FTraceMalloc::Free(void* Original)
+{
+#if UE_MEMORY_TRACE_ENABLED
+ UE_TRACE_LOG(Memory, Free, MemAllocChannel)
+ << Free.Address(uint64_t(Original)) << Free.RootHeap(uint8(EMemoryTraceRootHeap::SystemMemory));
+
+ {
+ zen::TGuardValue<bool> _(GDoNotAllocateInTrace, true);
+ WrappedMalloc->Free(Original);
+ }
+#else
+ WrappedMalloc->Free(Original);
+#endif // UE_MEMORY_TRACE_ENABLED
+}
+
+} // namespace zen
diff --git a/src/zencore/memtrack/moduletrace.cpp b/src/zencore/memtrack/moduletrace.cpp
new file mode 100644
index 000000000..51280ff3a
--- /dev/null
+++ b/src/zencore/memtrack/moduletrace.cpp
@@ -0,0 +1,296 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zenbase/zenbase.h>
+#include <zencore/memory/llm.h>
+#include <zencore/memory/memorytrace.h>
+#include <zencore/memory/tagtrace.h>
+
+#if ZEN_PLATFORM_WINDOWS
+# define PLATFORM_SUPPORTS_TRACE_WIN32_MODULE_DIAGNOSTICS 1
+#else
+# define PLATFORM_SUPPORTS_TRACE_WIN32_MODULE_DIAGNOSTICS 0
+#endif
+
+#include "moduletrace_events.h"
+
+#if PLATFORM_SUPPORTS_TRACE_WIN32_MODULE_DIAGNOSTICS
+
+# include <zencore/windows.h>
+
+ZEN_THIRD_PARTY_INCLUDES_START
+# include <winternl.h>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+# include <zencore/trace.h>
+
+# include <array>
+
+namespace zen {
+
+class FMalloc;
+
+typedef uint32_t HeapId;
+
+////////////////////////////////////////////////////////////////////////////////
+struct FNtDllFunction
+{
+ FARPROC Addr;
+
+ FNtDllFunction(const char* Name)
+ {
+ HMODULE NtDll = LoadLibraryW(L"ntdll.dll");
+ ZEN_ASSERT(NtDll);
+ Addr = GetProcAddress(NtDll, Name);
+ }
+
+ template<typename... ArgTypes>
+ unsigned int operator()(ArgTypes... Args)
+ {
+ typedef unsigned int(NTAPI * Prototype)(ArgTypes...);
+ return (Prototype((void*)Addr))(Args...);
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////7777
+class FModuleTrace
+{
+public:
+ typedef void (*SubscribeFunc)(bool, void*, const char16_t*);
+
+ FModuleTrace(FMalloc* InMalloc);
+ ~FModuleTrace();
+ static FModuleTrace* Get();
+ void Initialize();
+ void Subscribe(SubscribeFunc Function);
+
+private:
+ void OnDllLoaded(const UNICODE_STRING& Name, uintptr_t Base);
+ void OnDllUnloaded(uintptr_t Base);
+ void OnDllNotification(unsigned int Reason, const void* DataPtr);
+ static FModuleTrace* Instance;
+ SubscribeFunc Subscribers[64];
+ int SubscriberCount = 0;
+ void* CallbackCookie = nullptr;
+ HeapId ProgramHeapId = 0;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+FModuleTrace* FModuleTrace::Instance = nullptr;
+
+////////////////////////////////////////////////////////////////////////////////
+FModuleTrace::FModuleTrace(FMalloc* InMalloc)
+{
+ ZEN_UNUSED(InMalloc);
+ Instance = this;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FModuleTrace::~FModuleTrace()
+{
+ if (CallbackCookie)
+ {
+ FNtDllFunction UnregisterFunc("LdrUnregisterDllNotification");
+ UnregisterFunc(CallbackCookie);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FModuleTrace*
+FModuleTrace::Get()
+{
+ return Instance;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FModuleTrace::Initialize()
+{
+ using namespace UE::Trace;
+
+ ProgramHeapId = MemoryTrace_HeapSpec(SystemMemory, u"Module", EMemoryTraceHeapFlags::None);
+
+ UE_TRACE_LOG(Diagnostics, ModuleInit, ModuleChannel, sizeof(char) * 3)
+ << ModuleInit.SymbolFormat("pdb", 3) << ModuleInit.ModuleBaseShift(uint8(0));
+
+ // Register for DLL load/unload notifications.
+ auto Thunk = [](ULONG Reason, const void* Data, void* Context) {
+ auto* Self = (FModuleTrace*)Context;
+ Self->OnDllNotification(Reason, Data);
+ };
+
+ typedef void(CALLBACK * ThunkType)(ULONG, const void*, void*);
+ auto ThunkImpl = ThunkType(Thunk);
+
+ FNtDllFunction RegisterFunc("LdrRegisterDllNotification");
+ RegisterFunc(0, ThunkImpl, this, &CallbackCookie);
+
+ // Enumerate already loaded modules.
+ const TEB* ThreadEnvBlock = NtCurrentTeb();
+ const PEB* ProcessEnvBlock = ThreadEnvBlock->ProcessEnvironmentBlock;
+ const LIST_ENTRY* ModuleIter = ProcessEnvBlock->Ldr->InMemoryOrderModuleList.Flink;
+ const LIST_ENTRY* ModuleIterEnd = ModuleIter->Blink;
+ do
+ {
+ const auto& ModuleData = *(LDR_DATA_TABLE_ENTRY*)(ModuleIter - 1);
+ if (ModuleData.DllBase == 0)
+ {
+ break;
+ }
+
+ OnDllLoaded(ModuleData.FullDllName, UPTRINT(ModuleData.DllBase));
+ ModuleIter = ModuleIter->Flink;
+ } while (ModuleIter != ModuleIterEnd);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FModuleTrace::Subscribe(SubscribeFunc Function)
+{
+ ZEN_ASSERT(SubscriberCount < ZEN_ARRAY_COUNT(Subscribers));
+ Subscribers[SubscriberCount++] = Function;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FModuleTrace::OnDllNotification(unsigned int Reason, const void* DataPtr)
+{
+ enum
+ {
+ LDR_DLL_NOTIFICATION_REASON_LOADED = 1,
+ LDR_DLL_NOTIFICATION_REASON_UNLOADED = 2,
+ };
+
+ struct FNotificationData
+ {
+ uint32_t Flags;
+ const UNICODE_STRING& FullPath;
+ const UNICODE_STRING& BaseName;
+ uintptr_t Base;
+ };
+ const auto& Data = *(FNotificationData*)DataPtr;
+
+ switch (Reason)
+ {
+ case LDR_DLL_NOTIFICATION_REASON_LOADED:
+ OnDllLoaded(Data.FullPath, Data.Base);
+ break;
+ case LDR_DLL_NOTIFICATION_REASON_UNLOADED:
+ OnDllUnloaded(Data.Base);
+ break;
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FModuleTrace::OnDllLoaded(const UNICODE_STRING& Name, UPTRINT Base)
+{
+ const auto* DosHeader = (IMAGE_DOS_HEADER*)Base;
+ const auto* NtHeaders = (IMAGE_NT_HEADERS*)(Base + DosHeader->e_lfanew);
+ const IMAGE_OPTIONAL_HEADER& OptionalHeader = NtHeaders->OptionalHeader;
+ uint8_t ImageId[20];
+
+ // Find the guid and age of the binary, used to match debug files
+ const IMAGE_DATA_DIRECTORY& DebugInfoEntry = OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG];
+ const auto* DebugEntries = (IMAGE_DEBUG_DIRECTORY*)(Base + DebugInfoEntry.VirtualAddress);
+ for (uint32_t i = 0, n = DebugInfoEntry.Size / sizeof(DebugEntries[0]); i < n; ++i)
+ {
+ const IMAGE_DEBUG_DIRECTORY& Entry = DebugEntries[i];
+ if (Entry.Type == IMAGE_DEBUG_TYPE_CODEVIEW)
+ {
+ struct FCodeView7
+ {
+ uint32_t Signature;
+ uint32_t Guid[4];
+ uint32_t Age;
+ };
+
+ if (Entry.SizeOfData < sizeof(FCodeView7))
+ {
+ continue;
+ }
+
+ const auto* CodeView7 = (FCodeView7*)(Base + Entry.AddressOfRawData);
+ if (CodeView7->Signature != 'SDSR')
+ {
+ continue;
+ }
+
+ memcpy(ImageId, (uint8_t*)&CodeView7->Guid, sizeof(uint32_t) * 4);
+ memcpy(&ImageId[16], (uint8_t*)&CodeView7->Age, sizeof(uint32_t));
+ break;
+ }
+ }
+
+ // Note: UNICODE_STRING.Length is the size in bytes of the string buffer.
+ UE_TRACE_LOG(Diagnostics, ModuleLoad, ModuleChannel, uint32_t(Name.Length + sizeof(ImageId)))
+ << ModuleLoad.Name((const char16_t*)Name.Buffer, Name.Length / 2) << ModuleLoad.Base(uint64_t(Base))
+ << ModuleLoad.Size(OptionalHeader.SizeOfImage) << ModuleLoad.ImageId(ImageId, uint32_t(sizeof(ImageId)));
+
+# if UE_MEMORY_TRACE_ENABLED
+ {
+ UE_MEMSCOPE(ELLMTag::ProgramSize);
+ MemoryTrace_Alloc(Base, OptionalHeader.SizeOfImage, 4 * 1024, EMemoryTraceRootHeap::SystemMemory);
+ MemoryTrace_MarkAllocAsHeap(Base, ProgramHeapId);
+ MemoryTrace_Alloc(Base, OptionalHeader.SizeOfImage, 4 * 1024, EMemoryTraceRootHeap::SystemMemory);
+ }
+# endif // UE_MEMORY_TRACE_ENABLED
+
+ for (int i = 0; i < SubscriberCount; ++i)
+ {
+ Subscribers[i](true, (void*)Base, (const char16_t*)Name.Buffer);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FModuleTrace::OnDllUnloaded(UPTRINT Base)
+{
+# if UE_MEMORY_TRACE_ENABLED
+ MemoryTrace_Free(Base, EMemoryTraceRootHeap::SystemMemory);
+ MemoryTrace_UnmarkAllocAsHeap(Base, ProgramHeapId);
+ MemoryTrace_Free(Base, EMemoryTraceRootHeap::SystemMemory);
+# endif // UE_MEMORY_TRACE_ENABLED
+
+ UE_TRACE_LOG(Diagnostics, ModuleUnload, ModuleChannel) << ModuleUnload.Base(uint64(Base));
+
+ for (int i = 0; i < SubscriberCount; ++i)
+ {
+ Subscribers[i](false, (void*)Base, nullptr);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+Modules_Create(FMalloc* Malloc)
+{
+ if (FModuleTrace::Get() != nullptr)
+ {
+ return;
+ }
+
+ static FModuleTrace Instance(Malloc);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+Modules_Initialize()
+{
+ if (FModuleTrace* Instance = FModuleTrace::Get())
+ {
+ Instance->Initialize();
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+Modules_Subscribe(void (*Function)(bool, void*, const char16_t*))
+{
+ if (FModuleTrace* Instance = FModuleTrace::Get())
+ {
+ Instance->Subscribe(Function);
+ }
+}
+
+} // namespace zen
+
+#endif // PLATFORM_SUPPORTS_WIN32_MEMORY_TRACE
diff --git a/src/zencore/memtrack/moduletrace.h b/src/zencore/memtrack/moduletrace.h
new file mode 100644
index 000000000..5e7374faa
--- /dev/null
+++ b/src/zencore/memtrack/moduletrace.h
@@ -0,0 +1,11 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+namespace zen {
+
+void Modules_Create(class FMalloc*);
+void Modules_Subscribe(void (*)(bool, void*, const char16_t*));
+void Modules_Initialize();
+
+} // namespace zen
diff --git a/src/zencore/memtrack/moduletrace_events.cpp b/src/zencore/memtrack/moduletrace_events.cpp
new file mode 100644
index 000000000..9c6a9b648
--- /dev/null
+++ b/src/zencore/memtrack/moduletrace_events.cpp
@@ -0,0 +1,16 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zencore/trace.h>
+
+#include "moduletrace_events.h"
+
+namespace zen {
+
+////////////////////////////////////////////////////////////////////////////////
+UE_TRACE_CHANNEL_DEFINE(ModuleChannel, "Module information needed for symbols resolution", true)
+
+UE_TRACE_EVENT_DEFINE(Diagnostics, ModuleInit)
+UE_TRACE_EVENT_DEFINE(Diagnostics, ModuleLoad)
+UE_TRACE_EVENT_DEFINE(Diagnostics, ModuleUnload)
+
+} // namespace zen
diff --git a/src/zencore/memtrack/moduletrace_events.h b/src/zencore/memtrack/moduletrace_events.h
new file mode 100644
index 000000000..1bda42fe8
--- /dev/null
+++ b/src/zencore/memtrack/moduletrace_events.h
@@ -0,0 +1,27 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+#pragma once
+
+#include <zencore/trace.h>
+
+namespace zen {
+
+////////////////////////////////////////////////////////////////////////////////
+UE_TRACE_CHANNEL_EXTERN(ModuleChannel)
+
+UE_TRACE_EVENT_BEGIN_EXTERN(Diagnostics, ModuleInit, NoSync | Important)
+ UE_TRACE_EVENT_FIELD(UE::Trace::AnsiString, SymbolFormat)
+ UE_TRACE_EVENT_FIELD(uint8, ModuleBaseShift)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN_EXTERN(Diagnostics, ModuleLoad, NoSync | Important)
+ UE_TRACE_EVENT_FIELD(UE::Trace::WideString, Name)
+ UE_TRACE_EVENT_FIELD(uint64, Base)
+ UE_TRACE_EVENT_FIELD(uint32, Size)
+ UE_TRACE_EVENT_FIELD(uint8[], ImageId) // Platform specific id for this image, used to match debug files were available
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN_EXTERN(Diagnostics, ModuleUnload, NoSync | Important)
+ UE_TRACE_EVENT_FIELD(uint64, Base)
+UE_TRACE_EVENT_END()
+
+} // namespace zen
diff --git a/src/zencore/memtrack/platformtls.h b/src/zencore/memtrack/platformtls.h
new file mode 100644
index 000000000..f134e68a8
--- /dev/null
+++ b/src/zencore/memtrack/platformtls.h
@@ -0,0 +1,107 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zenbase/zenbase.h>
+
+/**
+ * It should be possible to provide a generic implementation as long as a threadID is provided. We don't do that yet.
+ */
+struct FGenericPlatformTLS
+{
+ static const uint32_t InvalidTlsSlot = 0xFFFFFFFF;
+
+ /**
+ * Return false if this is an invalid TLS slot
+ * @param SlotIndex the TLS index to check
+ * @return true if this looks like a valid slot
+ */
+ static bool IsValidTlsSlot(uint32_t SlotIndex) { return SlotIndex != InvalidTlsSlot; }
+};
+
+#if ZEN_PLATFORM_WINDOWS
+
+# include <zencore/windows.h>
+
+class FWindowsPlatformTLS : public FGenericPlatformTLS
+{
+public:
+ static uint32_t AllocTlsSlot() { return ::TlsAlloc(); }
+
+ static void FreeTlsSlot(uint32_t SlotIndex) { ::TlsFree(SlotIndex); }
+
+ static void SetTlsValue(uint32_t SlotIndex, void* Value) { ::TlsSetValue(SlotIndex, Value); }
+
+ /**
+ * Reads the value stored at the specified TLS slot
+ *
+ * @return the value stored in the slot
+ */
+ static void* GetTlsValue(uint32_t SlotIndex) { return ::TlsGetValue(SlotIndex); }
+
+ /**
+ * Return false if this is an invalid TLS slot
+ * @param SlotIndex the TLS index to check
+ * @return true if this looks like a valid slot
+ */
+ static bool IsValidTlsSlot(uint32_t SlotIndex) { return SlotIndex != InvalidTlsSlot; }
+};
+
+typedef FWindowsPlatformTLS FPlatformTLS;
+
+#elif ZEN_PLATFORM_MAC
+
+# include <pthread.h
+
+/**
+ * Apple implementation of the TLS OS functions
+ **/
+struct FApplePlatformTLS : public FGenericPlatformTLS
+{
+ /**
+ * Returns the currently executing thread's id
+ */
+ static uint32_t GetCurrentThreadId(void) { return (uint32_t)pthread_mach_thread_np(pthread_self()); }
+
+ /**
+ * Allocates a thread local store slot
+ */
+ static uint32_t AllocTlsSlot(void)
+ {
+ // allocate a per-thread mem slot
+ pthread_key_t SlotKey = 0;
+ if (pthread_key_create(&SlotKey, NULL) != 0)
+ {
+ SlotKey = InvalidTlsSlot; // matches the Windows TlsAlloc() retval.
+ }
+ return SlotKey;
+ }
+
+ /**
+ * Sets a value in the specified TLS slot
+ *
+ * @param SlotIndex the TLS index to store it in
+ * @param Value the value to store in the slot
+ */
+ static void SetTlsValue(uint32_t SlotIndex, void* Value) { pthread_setspecific((pthread_key_t)SlotIndex, Value); }
+
+ /**
+ * Reads the value stored at the specified TLS slot
+ *
+ * @return the value stored in the slot
+ */
+ static void* GetTlsValue(uint32_t SlotIndex) { return pthread_getspecific((pthread_key_t)SlotIndex); }
+
+ /**
+ * Frees a previously allocated TLS slot
+ *
+ * @param SlotIndex the TLS index to store it in
+ */
+ static void FreeTlsSlot(uint32_t SlotIndex) { pthread_key_delete((pthread_key_t)SlotIndex); }
+};
+
+typedef FApplePlatformTLS FPlatformTLS;
+
+#else
+# error Platform not yet supported
+#endif
diff --git a/src/zencore/memtrack/tagtrace.cpp b/src/zencore/memtrack/tagtrace.cpp
new file mode 100644
index 000000000..15ba78ae4
--- /dev/null
+++ b/src/zencore/memtrack/tagtrace.cpp
@@ -0,0 +1,237 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zencore/memory/fmalloc.h>
+#include <zencore/memory/llm.h>
+#include <zencore/memory/tagtrace.h>
+
+#include "growonlylockfreehash.h"
+
+#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED
+
+# include <zencore/string.h>
+
+namespace zen {
+////////////////////////////////////////////////////////////////////////////////
+
+UE_TRACE_CHANNEL_EXTERN(MemAllocChannel);
+
+UE_TRACE_EVENT_BEGIN(Memory, TagSpec, Important | NoSync)
+ UE_TRACE_EVENT_FIELD(int32, Tag)
+ UE_TRACE_EVENT_FIELD(int32, Parent)
+ UE_TRACE_EVENT_FIELD(UE::Trace::AnsiString, Display)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, MemoryScope, NoSync)
+ UE_TRACE_EVENT_FIELD(int32, Tag)
+UE_TRACE_EVENT_END()
+
+UE_TRACE_EVENT_BEGIN(Memory, MemoryScopePtr, NoSync)
+ UE_TRACE_EVENT_FIELD(uint64, Ptr)
+UE_TRACE_EVENT_END()
+
+////////////////////////////////////////////////////////////////////////////////
+// Per thread active tag, i.e. the top level FMemScope
+thread_local int32 GActiveTag;
+
+////////////////////////////////////////////////////////////////////////////////
+FMemScope::FMemScope()
+{
+}
+
+FMemScope::FMemScope(int32_t InTag, bool bShouldActivate /*= true*/)
+{
+ if (UE_TRACE_CHANNELEXPR_IS_ENABLED(MemAllocChannel) & bShouldActivate)
+ {
+ ActivateScope(InTag);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FMemScope::FMemScope(ELLMTag InTag, bool bShouldActivate /*= true*/)
+{
+ if (UE_TRACE_CHANNELEXPR_IS_ENABLED(MemAllocChannel) & bShouldActivate)
+ {
+ ActivateScope(static_cast<int32>(InTag));
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FMemScope::ActivateScope(int32_t InTag)
+{
+ if (auto LogScope = FMemoryMemoryScopeFields::LogScopeType::ScopedEnter<FMemoryMemoryScopeFields>())
+ {
+ if (const auto& __restrict MemoryScope = *(FMemoryMemoryScopeFields*)(&LogScope))
+ {
+ Inner.SetActive();
+ LogScope += LogScope << MemoryScope.Tag(InTag);
+ PrevTag = GActiveTag;
+ GActiveTag = InTag;
+ }
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FMemScope::~FMemScope()
+{
+ if (Inner.bActive)
+ {
+ GActiveTag = PrevTag;
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FMemScopePtr::FMemScopePtr(uint64_t InPtr)
+{
+ if (InPtr != 0 && TRACE_PRIVATE_CHANNELEXPR_IS_ENABLED(MemAllocChannel))
+ {
+ if (auto LogScope = FMemoryMemoryScopePtrFields::LogScopeType::ScopedEnter<FMemoryMemoryScopePtrFields>())
+ {
+ if (const auto& __restrict MemoryScope = *(FMemoryMemoryScopePtrFields*)(&LogScope))
+ {
+ Inner.SetActive(), LogScope += LogScope << MemoryScope.Ptr(InPtr);
+ }
+ }
+ }
+}
+
+/////////////////////////////////////////////////////////////////////////////////
+FMemScopePtr::~FMemScopePtr()
+{
+}
+
+/////////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Utility class that manages tracing the specification of unique LLM tags
+ * and custom name based tags.
+ */
+class FTagTrace
+{
+public:
+ FTagTrace(FMalloc* InMalloc);
+ void AnnounceGenericTags() const;
+ void AnnounceSpecialTags() const;
+ int32 AnnounceCustomTag(int32 Tag, int32 ParentTag, const ANSICHAR* Display) const;
+
+private:
+ struct FTagNameSetEntry
+ {
+ std::atomic_int32_t Data;
+
+ int32_t GetKey() const { return Data.load(std::memory_order_relaxed); }
+ bool GetValue() const { return true; }
+ bool IsEmpty() const { return Data.load(std::memory_order_relaxed) == 0; } // NAME_None is treated as empty
+ void SetKeyValue(int32_t Key, bool Value)
+ {
+ ZEN_UNUSED(Value);
+ Data.store(Key, std::memory_order_relaxed);
+ }
+ static uint32_t KeyHash(int32_t Key) { return static_cast<uint32>(Key); }
+ static void ClearEntries(FTagNameSetEntry* Entries, int32_t EntryCount)
+ {
+ memset(Entries, 0, EntryCount * sizeof(FTagNameSetEntry));
+ }
+ };
+ typedef TGrowOnlyLockFreeHash<FTagNameSetEntry, int32_t, bool> FTagNameSet;
+
+ FTagNameSet AnnouncedNames;
+ static FMalloc* Malloc;
+};
+
+FMalloc* FTagTrace::Malloc = nullptr;
+static FTagTrace* GTagTrace = nullptr;
+
+////////////////////////////////////////////////////////////////////////////////
+FTagTrace::FTagTrace(FMalloc* InMalloc) : AnnouncedNames(InMalloc)
+{
+ Malloc = InMalloc;
+ AnnouncedNames.Reserve(1024);
+ AnnounceGenericTags();
+ AnnounceSpecialTags();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void
+FTagTrace::AnnounceGenericTags() const
+{
+# define TRACE_TAG_SPEC(Enum, Str, ParentTag) \
+ { \
+ const uint32_t DisplayLen = (uint32_t)StringLength(Str); \
+ UE_TRACE_LOG(Memory, TagSpec, MemAllocChannel, DisplayLen * sizeof(ANSICHAR)) \
+ << TagSpec.Tag((int32_t)ELLMTag::Enum) << TagSpec.Parent((int32_t)ParentTag) << TagSpec.Display(Str, DisplayLen); \
+ }
+ LLM_ENUM_GENERIC_TAGS(TRACE_TAG_SPEC);
+# undef TRACE_TAG_SPEC
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+void
+FTagTrace::AnnounceSpecialTags() const
+{
+ auto EmitTag = [](const char16_t* DisplayString, int32_t Tag, int32_t ParentTag) {
+ const uint32_t DisplayLen = (uint32_t)StringLength(DisplayString);
+ UE_TRACE_LOG(Memory, TagSpec, MemAllocChannel, DisplayLen * sizeof(ANSICHAR))
+ << TagSpec.Tag(Tag) << TagSpec.Parent(ParentTag) << TagSpec.Display(DisplayString, DisplayLen);
+ };
+
+ EmitTag(u"Trace", TRACE_TAG, -1);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+int32_t
+FTagTrace::AnnounceCustomTag(int32_t Tag, int32_t ParentTag, const ANSICHAR* Display) const
+{
+ const uint32_t DisplayLen = (uint32_t)StringLength(Display);
+ UE_TRACE_LOG(Memory, TagSpec, MemAllocChannel, DisplayLen * sizeof(ANSICHAR))
+ << TagSpec.Tag(Tag) << TagSpec.Parent(ParentTag) << TagSpec.Display(Display, DisplayLen);
+ return Tag;
+}
+
+} // namespace zen
+
+#endif // UE_MEMORY_TAGS_TRACE_ENABLED
+
+namespace zen {
+
+////////////////////////////////////////////////////////////////////////////////
+void
+MemoryTrace_InitTags(FMalloc* InMalloc)
+{
+#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED
+ GTagTrace = (FTagTrace*)InMalloc->Malloc(sizeof(FTagTrace), alignof(FTagTrace));
+ new (GTagTrace) FTagTrace(InMalloc);
+#else
+ ZEN_UNUSED(InMalloc);
+#endif
+}
+
+////////////////////////////////////////////////////////////////////////////////
+int32_t
+MemoryTrace_AnnounceCustomTag(int32_t Tag, int32_t ParentTag, const char* Display)
+{
+#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED
+ // todo: How do we check if tag trace is active?
+ if (GTagTrace)
+ {
+ return GTagTrace->AnnounceCustomTag(Tag, ParentTag, Display);
+ }
+#else
+ ZEN_UNUSED(Tag, ParentTag, Display);
+#endif
+ return -1;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+int32_t
+MemoryTrace_GetActiveTag()
+{
+#if UE_MEMORY_TAGS_TRACE_ENABLED && UE_TRACE_ENABLED
+ return GActiveTag;
+#else
+ return -1;
+#endif
+}
+
+} // namespace zen
diff --git a/src/zencore/memtrack/tracemalloc.h b/src/zencore/memtrack/tracemalloc.h
new file mode 100644
index 000000000..54606ac45
--- /dev/null
+++ b/src/zencore/memtrack/tracemalloc.h
@@ -0,0 +1,24 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+#pragma once
+
+#include <zencore/memory/fmalloc.h>
+#include <zencore/memory/memorytrace.h>
+
+namespace zen {
+
+class FTraceMalloc : public FMalloc
+{
+public:
+ FTraceMalloc(FMalloc* InMalloc);
+ virtual ~FTraceMalloc();
+
+ virtual void* Malloc(SIZE_T Count, uint32 Alignment) override;
+ virtual void* Realloc(void* Original, SIZE_T Count, uint32 Alignment) override;
+ virtual void Free(void* Original) override;
+
+ virtual void OnMallocInitialized() override { WrappedMalloc->OnMallocInitialized(); }
+
+ FMalloc* WrappedMalloc;
+};
+
+} // namespace zen
diff --git a/src/zencore/memtrack/vatrace.cpp b/src/zencore/memtrack/vatrace.cpp
new file mode 100644
index 000000000..4dea27f1b
--- /dev/null
+++ b/src/zencore/memtrack/vatrace.cpp
@@ -0,0 +1,361 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include "vatrace.h"
+
+#if PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS
+
+# include <zencore/memory/memorytrace.h>
+
+# if (NTDDI_VERSION >= NTDDI_WIN10_RS4)
+# pragma comment(lib, "mincore.lib") // VirtualAlloc2
+# endif
+
+namespace zen {
+
+////////////////////////////////////////////////////////////////////////////////
+class FTextSectionEditor
+{
+public:
+ ~FTextSectionEditor();
+ template<typename T>
+ T* Hook(T* Target, T* HookFunction);
+
+private:
+ struct FTrampolineBlock
+ {
+ FTrampolineBlock* Next;
+ uint32_t Size;
+ uint32_t Used;
+ };
+
+ static void* GetActualAddress(void* Function);
+ FTrampolineBlock* AllocateTrampolineBlock(void* Reference);
+ uint8_t* AllocateTrampoline(void* Reference, unsigned int Size);
+ void* HookImpl(void* Target, void* HookFunction);
+ FTrampolineBlock* HeadBlock = nullptr;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+FTextSectionEditor::~FTextSectionEditor()
+{
+ for (FTrampolineBlock* Block = HeadBlock; Block != nullptr; Block = Block->Next)
+ {
+ DWORD Unused;
+ VirtualProtect(Block, Block->Size, PAGE_EXECUTE_READ, &Unused);
+ }
+
+ FlushInstructionCache(GetCurrentProcess(), nullptr, 0);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void*
+FTextSectionEditor::GetActualAddress(void* Function)
+{
+ // Follow a jmp instruction (0xff/4 only for now) at function and returns
+ // where it would jmp to.
+
+ uint8_t* Addr = (uint8_t*)Function;
+ int Offset = unsigned(Addr[0] & 0xf0) == 0x40; // REX prefix
+ if (Addr[Offset + 0] == 0xff && Addr[Offset + 1] == 0x25)
+ {
+ Addr += Offset;
+ Addr = *(uint8_t**)(Addr + 6 + *(uint32_t*)(Addr + 2));
+ }
+ return Addr;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+FTextSectionEditor::FTrampolineBlock*
+FTextSectionEditor::AllocateTrampolineBlock(void* Reference)
+{
+ static const size_t BlockSize = 0x10000; // 64KB is Windows' canonical granularity
+
+ // Find the start of the main allocation that mapped Reference
+ MEMORY_BASIC_INFORMATION MemInfo;
+ VirtualQuery(Reference, &MemInfo, sizeof(MemInfo));
+ auto* Ptr = (uint8_t*)(MemInfo.AllocationBase);
+
+ // Step backwards one block at a time and try and allocate that address
+ while (true)
+ {
+ Ptr -= BlockSize;
+ if (VirtualAlloc(Ptr, BlockSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE) != nullptr)
+ {
+ break;
+ }
+
+ uintptr_t Distance = uintptr_t(Reference) - uintptr_t(Ptr);
+ if (Distance >= 1ull << 31)
+ {
+ ZEN_ASSERT(!"Failed to allocate trampoline blocks for memory tracing hooks");
+ }
+ }
+
+ auto* Block = (FTrampolineBlock*)Ptr;
+ Block->Next = HeadBlock;
+ Block->Size = BlockSize;
+ Block->Used = sizeof(FTrampolineBlock);
+ HeadBlock = Block;
+
+ return Block;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+uint8_t*
+FTextSectionEditor::AllocateTrampoline(void* Reference, unsigned int Size)
+{
+ // Try and find a block that's within 2^31 bytes before Reference
+ FTrampolineBlock* Block;
+ for (Block = HeadBlock; Block != nullptr; Block = Block->Next)
+ {
+ uintptr_t Distance = uintptr_t(Reference) - uintptr_t(Block);
+ if (Distance < 1ull << 31)
+ {
+ break;
+ }
+ }
+
+ // If we didn't find a block then we need to allocate a new one.
+ if (Block == nullptr)
+ {
+ Block = AllocateTrampolineBlock(Reference);
+ }
+
+ // Allocate space for the trampoline.
+ uint32_t NextUsed = Block->Used + Size;
+ if (NextUsed > Block->Size)
+ {
+ // Block is full. We could allocate a new block here but as it is not
+ // expected that so many hooks will be made this path shouldn't happen
+ ZEN_ASSERT(!"Unable to allocate memory for memory tracing's hooks");
+ }
+
+ uint8_t* Out = (uint8_t*)Block + Block->Used;
+ Block->Used = NextUsed;
+
+ return Out;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+template<typename T>
+T*
+FTextSectionEditor::Hook(T* Target, T* HookFunction)
+{
+ return (T*)HookImpl((void*)Target, (void*)HookFunction);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void*
+FTextSectionEditor::HookImpl(void* Target, void* HookFunction)
+{
+ Target = GetActualAddress(Target);
+
+ // Very rudimentary x86_64 instruction length decoding that only supports op
+ // code ranges (0x80,0x8b) and (0x50,0x5f). Enough for simple prologues
+ uint8_t* __restrict Start = (uint8_t*)Target;
+ const uint8_t* Read = Start;
+ do
+ {
+ Read += (Read[0] & 0xf0) == 0x40; // REX prefix
+ uint8_t Inst = *Read++;
+ if (unsigned(Inst - 0x80) < 0x0cu)
+ {
+ uint8_t ModRm = *Read++;
+ Read += ((ModRm & 0300) < 0300) & ((ModRm & 0007) == 0004); // SIB
+ switch (ModRm & 0300) // Disp[8|32]
+ {
+ case 0100:
+ Read += 1;
+ break;
+ case 0200:
+ Read += 5;
+ break;
+ }
+ Read += (Inst == 0x83);
+ }
+ else if (unsigned(Inst - 0x50) >= 0x10u)
+ {
+ ZEN_ASSERT(!"Unknown instruction");
+ }
+ } while (Read - Start < 6);
+
+ static const int TrampolineSize = 24;
+ int PatchSize = int(Read - Start);
+ uint8_t* TrampolinePtr = AllocateTrampoline(Start, PatchSize + TrampolineSize);
+
+ // Write the trampoline
+ *(void**)TrampolinePtr = HookFunction;
+
+ uint8_t* PatchJmp = TrampolinePtr + sizeof(void*);
+ memcpy(PatchJmp, Start, PatchSize);
+
+ PatchJmp += PatchSize;
+ *PatchJmp = 0xe9;
+ *(int32_t*)(PatchJmp + 1) = int32_t(intptr_t(Start + PatchSize) - intptr_t(PatchJmp)) - 5;
+
+ // Need to make the text section writeable
+ DWORD ProtPrev;
+ uintptr_t ProtBase = uintptr_t(Target) & ~0x0fff; // 0x0fff is mask of VM page size
+ size_t ProtSize = ((ProtBase + 16 + 0x1000) & ~0x0fff) - ProtBase; // 16 is enough for one x86 instruction
+ VirtualProtect((void*)ProtBase, ProtSize, PAGE_EXECUTE_READWRITE, &ProtPrev);
+
+ // Patch function to jmp to the hook
+ uint16_t* HookJmp = (uint16_t*)Target;
+ HookJmp[0] = 0x25ff;
+ *(int32_t*)(HookJmp + 1) = int32_t(intptr_t(TrampolinePtr) - intptr_t(HookJmp + 3));
+
+ // Put the protection back the way it was
+ VirtualProtect((void*)ProtBase, ProtSize, ProtPrev, &ProtPrev);
+
+ return PatchJmp - PatchSize;
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+bool FVirtualWinApiHooks::bLight;
+LPVOID(WINAPI* FVirtualWinApiHooks::VmAllocOrig)(LPVOID, SIZE_T, DWORD, DWORD);
+LPVOID(WINAPI* FVirtualWinApiHooks::VmAllocExOrig)(HANDLE, LPVOID, SIZE_T, DWORD, DWORD);
+# if (NTDDI_VERSION >= NTDDI_WIN10_RS4)
+PVOID(WINAPI* FVirtualWinApiHooks::VmAlloc2Orig)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MEM_EXTENDED_PARAMETER*, ULONG);
+# else
+LPVOID(WINAPI* FVirtualWinApiHooks::VmAlloc2Orig)(HANDLE, LPVOID, SIZE_T, ULONG, ULONG, /*MEM_EXTENDED_PARAMETER* */ void*, ULONG);
+# endif
+BOOL(WINAPI* FVirtualWinApiHooks::VmFreeOrig)(LPVOID, SIZE_T, DWORD);
+BOOL(WINAPI* FVirtualWinApiHooks::VmFreeExOrig)(HANDLE, LPVOID, SIZE_T, DWORD);
+
+void
+FVirtualWinApiHooks::Initialize(bool bInLight)
+{
+ bLight = bInLight;
+
+ FTextSectionEditor Editor;
+
+ // Note that hooking alloc functions is done last as applying the hook can
+ // allocate some memory pages.
+
+ VmFreeOrig = Editor.Hook(VirtualFree, &FVirtualWinApiHooks::VmFree);
+ VmFreeExOrig = Editor.Hook(VirtualFreeEx, &FVirtualWinApiHooks::VmFreeEx);
+
+# if ZEN_PLATFORM_WINDOWS
+# if (NTDDI_VERSION >= NTDDI_WIN10_RS4)
+ {
+ VmAlloc2Orig = Editor.Hook(VirtualAlloc2, &FVirtualWinApiHooks::VmAlloc2);
+ }
+# else // NTDDI_VERSION
+ {
+ VmAlloc2Orig = nullptr;
+ HINSTANCE DllInstance;
+ DllInstance = LoadLibrary(TEXT("kernelbase.dll"));
+ if (DllInstance != NULL)
+ {
+# pragma warning(push)
+# pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'FVirtualWinApiHooks::FnVirtualAlloc2'
+ VmAlloc2Orig = (FnVirtualAlloc2)GetProcAddress(DllInstance, "VirtualAlloc2");
+# pragma warning(pop)
+ FreeLibrary(DllInstance);
+ }
+ if (VmAlloc2Orig)
+ {
+ VmAlloc2Orig = Editor.Hook(VmAlloc2Orig, &FVirtualWinApiHooks::VmAlloc2);
+ }
+ }
+# endif // NTDDI_VERSION
+# endif // PLATFORM_WINDOWS
+
+ VmAllocExOrig = Editor.Hook(VirtualAllocEx, &FVirtualWinApiHooks::VmAllocEx);
+ VmAllocOrig = Editor.Hook(VirtualAlloc, &FVirtualWinApiHooks::VmAlloc);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+LPVOID WINAPI
+FVirtualWinApiHooks::VmAlloc(LPVOID Address, SIZE_T Size, DWORD Type, DWORD Protect)
+{
+ LPVOID Ret = VmAllocOrig(Address, Size, Type, Protect);
+
+ // Track any reserve for now. Going forward we need events to differentiate reserves/commits and
+ // corresponding information on frees.
+ if (Ret != nullptr && ((Type & MEM_RESERVE) || ((Type & MEM_COMMIT) && Address == nullptr)))
+ {
+ MemoryTrace_Alloc((uint64_t)Ret, Size, 0, EMemoryTraceRootHeap::SystemMemory);
+ MemoryTrace_MarkAllocAsHeap((uint64_t)Ret, EMemoryTraceRootHeap::SystemMemory);
+ }
+
+ return Ret;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+BOOL WINAPI
+FVirtualWinApiHooks::VmFree(LPVOID Address, SIZE_T Size, DWORD Type)
+{
+ if (Type & MEM_RELEASE)
+ {
+ MemoryTrace_UnmarkAllocAsHeap((uint64_t)Address, EMemoryTraceRootHeap::SystemMemory);
+ MemoryTrace_Free((uint64_t)Address, EMemoryTraceRootHeap::SystemMemory);
+ }
+
+ return VmFreeOrig(Address, Size, Type);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+LPVOID WINAPI
+FVirtualWinApiHooks::VmAllocEx(HANDLE Process, LPVOID Address, SIZE_T Size, DWORD Type, DWORD Protect)
+{
+ LPVOID Ret = VmAllocExOrig(Process, Address, Size, Type, Protect);
+
+ if (Process == GetCurrentProcess() && Ret != nullptr && ((Type & MEM_RESERVE) || ((Type & MEM_COMMIT) && Address == nullptr)))
+ {
+ MemoryTrace_Alloc((uint64_t)Ret, Size, 0, EMemoryTraceRootHeap::SystemMemory);
+ MemoryTrace_MarkAllocAsHeap((uint64_t)Ret, EMemoryTraceRootHeap::SystemMemory);
+ }
+
+ return Ret;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+BOOL WINAPI
+FVirtualWinApiHooks::VmFreeEx(HANDLE Process, LPVOID Address, SIZE_T Size, DWORD Type)
+{
+ if (Process == GetCurrentProcess() && (Type & MEM_RELEASE))
+ {
+ MemoryTrace_UnmarkAllocAsHeap((uint64_t)Address, EMemoryTraceRootHeap::SystemMemory);
+ MemoryTrace_Free((uint64_t)Address, EMemoryTraceRootHeap::SystemMemory);
+ }
+
+ return VmFreeExOrig(Process, Address, Size, Type);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+# if (NTDDI_VERSION >= NTDDI_WIN10_RS4)
+PVOID WINAPI
+FVirtualWinApiHooks::VmAlloc2(HANDLE Process,
+ PVOID BaseAddress,
+ SIZE_T Size,
+ ULONG Type,
+ ULONG PageProtection,
+ MEM_EXTENDED_PARAMETER* ExtendedParameters,
+ ULONG ParameterCount)
+# else
+LPVOID WINAPI
+FVirtualWinApiHooks::VmAlloc2(HANDLE Process,
+ LPVOID BaseAddress,
+ SIZE_T Size,
+ ULONG Type,
+ ULONG PageProtection,
+ /*MEM_EXTENDED_PARAMETER* */ void* ExtendedParameters,
+ ULONG ParameterCount)
+# endif
+{
+ LPVOID Ret = VmAlloc2Orig(Process, BaseAddress, Size, Type, PageProtection, ExtendedParameters, ParameterCount);
+
+ if (Process == GetCurrentProcess() && Ret != nullptr && ((Type & MEM_RESERVE) || ((Type & MEM_COMMIT) && BaseAddress == nullptr)))
+ {
+ MemoryTrace_Alloc((uint64_t)Ret, Size, 0, EMemoryTraceRootHeap::SystemMemory);
+ MemoryTrace_MarkAllocAsHeap((uint64_t)Ret, EMemoryTraceRootHeap::SystemMemory);
+ }
+
+ return Ret;
+}
+
+} // namespace zen
+
+#endif // PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS
diff --git a/src/zencore/memtrack/vatrace.h b/src/zencore/memtrack/vatrace.h
new file mode 100644
index 000000000..59cc7fe97
--- /dev/null
+++ b/src/zencore/memtrack/vatrace.h
@@ -0,0 +1,61 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zenbase/zenbase.h>
+
+#if ZEN_PLATFORM_WINDOWS && !defined(PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS)
+# define PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS 1
+#endif
+
+#ifndef PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS
+# define PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS 0
+#endif
+
+#if PLATFORM_SUPPORTS_TRACE_WIN32_VIRTUAL_MEMORY_HOOKS
+# include <zencore/windows.h>
+
+namespace zen {
+
+class FVirtualWinApiHooks
+{
+public:
+ static void Initialize(bool bInLight);
+
+private:
+ FVirtualWinApiHooks();
+ static bool bLight;
+ static LPVOID WINAPI VmAlloc(LPVOID Address, SIZE_T Size, DWORD Type, DWORD Protect);
+ static LPVOID WINAPI VmAllocEx(HANDLE Process, LPVOID Address, SIZE_T Size, DWORD Type, DWORD Protect);
+# if (NTDDI_VERSION >= NTDDI_WIN10_RS4)
+ static PVOID WINAPI VmAlloc2(HANDLE Process,
+ PVOID BaseAddress,
+ SIZE_T Size,
+ ULONG AllocationType,
+ ULONG PageProtection,
+ MEM_EXTENDED_PARAMETER* ExtendedParameters,
+ ULONG ParameterCount);
+ static PVOID(WINAPI* VmAlloc2Orig)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MEM_EXTENDED_PARAMETER*, ULONG);
+ typedef PVOID(__stdcall* FnVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MEM_EXTENDED_PARAMETER*, ULONG);
+# else
+ static LPVOID WINAPI VmAlloc2(HANDLE Process,
+ LPVOID BaseAddress,
+ SIZE_T Size,
+ ULONG AllocationType,
+ ULONG PageProtection,
+ void* ExtendedParameters,
+ ULONG ParameterCount);
+ static LPVOID(WINAPI* VmAlloc2Orig)(HANDLE, LPVOID, SIZE_T, ULONG, ULONG, /*MEM_EXTENDED_PARAMETER* */ void*, ULONG);
+ typedef LPVOID(__stdcall* FnVirtualAlloc2)(HANDLE, LPVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG);
+# endif
+ static BOOL WINAPI VmFree(LPVOID Address, SIZE_T Size, DWORD Type);
+ static BOOL WINAPI VmFreeEx(HANDLE Process, LPVOID Address, SIZE_T Size, DWORD Type);
+ static LPVOID(WINAPI* VmAllocOrig)(LPVOID, SIZE_T, DWORD, DWORD);
+ static LPVOID(WINAPI* VmAllocExOrig)(HANDLE, LPVOID, SIZE_T, DWORD, DWORD);
+ static BOOL(WINAPI* VmFreeOrig)(LPVOID, SIZE_T, DWORD);
+ static BOOL(WINAPI* VmFreeExOrig)(HANDLE, LPVOID, SIZE_T, DWORD);
+};
+
+} // namespace zen
+
+#endif
diff --git a/src/zencore/sharedbuffer.cpp b/src/zencore/sharedbuffer.cpp
index 993ca40e6..78efb9d42 100644
--- a/src/zencore/sharedbuffer.cpp
+++ b/src/zencore/sharedbuffer.cpp
@@ -2,6 +2,7 @@
#include <zencore/except.h>
#include <zencore/fmtutils.h>
+#include <zencore/memory/memory.h>
#include <zencore/sharedbuffer.h>
#include <zencore/testing.h>
diff --git a/src/zencore/stats.cpp b/src/zencore/stats.cpp
index 7c1a9e086..6be16688b 100644
--- a/src/zencore/stats.cpp
+++ b/src/zencore/stats.cpp
@@ -3,9 +3,11 @@
#include "zencore/stats.h"
#include <zencore/compactbinarybuilder.h>
-#include "zencore/intmath.h"
-#include "zencore/thread.h"
-#include "zencore/timer.h"
+#include <zencore/intmath.h>
+#include <zencore/memory/llm.h>
+#include <zencore/memory/tagtrace.h>
+#include <zencore/thread.h>
+#include <zencore/timer.h>
#include <cmath>
#include <gsl/gsl-lite.hpp>
@@ -222,8 +224,10 @@ thread_local xoshiro256 ThreadLocalRng;
//////////////////////////////////////////////////////////////////////////
-UniformSample::UniformSample(uint32_t ReservoirSize) : m_Values(ReservoirSize)
+UniformSample::UniformSample(uint32_t ReservoirSize)
{
+ UE_MEMSCOPE(ELLMTag::Metrics);
+ m_Values = std::vector<std::atomic<int64_t>>(ReservoirSize);
}
UniformSample::~UniformSample()
@@ -273,6 +277,8 @@ UniformSample::Update(int64_t Value)
SampleSnapshot
UniformSample::Snapshot() const
{
+ UE_MEMSCOPE(ELLMTag::Metrics);
+
uint64_t ValuesSize = Size();
std::vector<double> Values(ValuesSize);
diff --git a/src/zencore/string.cpp b/src/zencore/string.cpp
index ad6ee78fc..263c49f7e 100644
--- a/src/zencore/string.cpp
+++ b/src/zencore/string.cpp
@@ -1,6 +1,7 @@
// Copyright Epic Games, Inc. All Rights Reserved.
#include <zencore/memory.h>
+#include <zencore/memory/memory.h>
#include <zencore/string.h>
#include <zencore/testing.h>
diff --git a/src/zencore/system.cpp b/src/zencore/system.cpp
index f51273e0d..f37bdf423 100644
--- a/src/zencore/system.cpp
+++ b/src/zencore/system.cpp
@@ -4,6 +4,7 @@
#include <zencore/compactbinarybuilder.h>
#include <zencore/except.h>
+#include <zencore/memory/memory.h>
#include <zencore/string.h>
#if ZEN_PLATFORM_WINDOWS
diff --git a/src/zencore/trace.cpp b/src/zencore/trace.cpp
index f7e4c4b68..ef7cbf596 100644
--- a/src/zencore/trace.cpp
+++ b/src/zencore/trace.cpp
@@ -4,10 +4,86 @@
# include <zencore/config.h>
# include <zencore/zencore.h>
+# include <zencore/commandline.h>
+# include <zencore/string.h>
+# include <zencore/logging.h>
# define TRACE_IMPLEMENT 1
# include <zencore/trace.h>
+# include <zencore/memory/memorytrace.h>
+
+namespace zen {
+
+void
+TraceConfigure()
+{
+ // Configure channels based on command line options
+
+ using namespace std::literals;
+
+ constexpr std::string_view TraceOption = "--trace="sv;
+
+ std::function<void(const std::string_view&)> ProcessChannelList;
+
+ auto ProcessTraceArg = [&](const std::string_view& Arg) {
+ if (Arg == "default"sv)
+ {
+ ProcessChannelList("cpu,log"sv);
+ }
+ else if (Arg == "memory"sv)
+ {
+ ProcessChannelList("memtag,memalloc,callstack,module"sv);
+ }
+ else if (Arg == "memory_light"sv)
+ {
+ ProcessChannelList("memtag,memalloc"sv);
+ }
+ else if (Arg == "memtag"sv)
+ {
+ // memtag actually traces to the memalloc channel
+ ProcessChannelList("memalloc"sv);
+ }
+ else
+ {
+ // Presume that the argument is a trace channel name
+
+ StringBuilder<128> AnsiChannel;
+ AnsiChannel << Arg;
+
+ const bool IsEnabled = trace::ToggleChannel(AnsiChannel.c_str(), true);
+
+ if (IsEnabled == false)
+ {
+ // Logging here could be iffy, but we might want some other feedback mechanism here
+ // to indicate to users that they're not getting what they might be expecting
+ }
+ }
+ };
+
+ ProcessChannelList = [&](const std::string_view& OptionArgs) { IterateCommaSeparatedValue(OptionArgs, ProcessTraceArg); };
+
+ bool TraceOptionPresent = false;
+
+ std::function<void(const std::string_view&)> ProcessArg = [&](const std::string_view& Arg) {
+ if (Arg.starts_with(TraceOption))
+ {
+ const std::string_view OptionArgs = Arg.substr(TraceOption.size());
+
+ TraceOptionPresent = true;
+
+ ProcessChannelList(OptionArgs);
+ }
+ };
+
+ IterateCommandlineArgs(ProcessArg);
+
+ if (!TraceOptionPresent)
+ {
+ ProcessTraceArg("default"sv);
+ }
+}
+
void
TraceInit(std::string_view ProgramName)
{
@@ -38,6 +114,16 @@ TraceInit(std::string_view ProgramName)
# endif
CommandLineString,
ZEN_CFG_VERSION_BUILD_STRING);
+
+ atexit([] {
+# if ZEN_WITH_MEMTRACK
+ zen::MemoryTrace_Shutdown();
+# endif
+ trace::Update();
+ TraceShutdown();
+ });
+
+ TraceConfigure();
}
void
@@ -70,13 +156,11 @@ TraceStart(std::string_view ProgramName, const char* HostOrPath, TraceType Type)
case TraceType::None:
break;
}
- trace::ToggleChannel("cpu", true);
}
bool
TraceStop()
{
- trace::ToggleChannel("cpu", false);
if (trace::Stop())
{
return true;
@@ -84,4 +168,6 @@ TraceStop()
return false;
}
+} // namespace zen
+
#endif // ZEN_WITH_TRACE
diff --git a/src/zencore/xmake.lua b/src/zencore/xmake.lua
index 5f2d95e16..21b47b484 100644
--- a/src/zencore/xmake.lua
+++ b/src/zencore/xmake.lua
@@ -3,6 +3,7 @@
target('zencore')
set_kind("static")
set_group("libs")
+ add_options("zentrace", "zenmimalloc", "zenrpmalloc")
add_headerfiles("**.h")
add_configfiles("include/zencore/config.h.in")
on_load(function (target)
@@ -12,10 +13,25 @@ target('zencore')
end)
set_configdir("include/zencore")
add_files("**.cpp")
+
+ if has_config("zenrpmalloc") then
+ set_languages("c17", "cxx20")
+ if is_os("windows") then
+ add_cflags("/experimental:c11atomics")
+ end
+ add_defines("RPMALLOC_FIRST_CLASS_HEAPS=1", "ENABLE_STATISTICS=1", "ENABLE_OVERRIDE=0")
+ add_files("$(projectdir)/thirdparty/rpmalloc/rpmalloc.c")
+ end
+
+ if has_config("zenmimalloc") then
+ add_packages("vcpkg::mimalloc")
+ end
+
add_includedirs("include", {public=true})
add_includedirs("$(projectdir)/thirdparty/utfcpp/source")
add_includedirs("$(projectdir)/thirdparty/Oodle/include")
add_includedirs("$(projectdir)/thirdparty/trace", {public=true})
+ add_includedirs("$(projectdir)/thirdparty/rpmalloc")
if is_os("windows") then
add_linkdirs("$(projectdir)/thirdparty/Oodle/lib/Win64")
add_links("oo2core_win64")
@@ -27,14 +43,12 @@ target('zencore')
add_linkdirs("$(projectdir)/thirdparty/Oodle/lib/Mac_x64")
add_links("oo2coremac64")
end
- add_options("zentrace")
add_deps("zenbase")
add_packages(
"vcpkg::blake3",
"vcpkg::json11",
"vcpkg::ryml",
"vcpkg::c4core",
- "vcpkg::mimalloc",
"vcpkg::openssl", -- required for crypto
"vcpkg::spdlog")
diff --git a/src/zenhttp-test/zenhttp-test.cpp b/src/zenhttp-test/zenhttp-test.cpp
index 440e85a9f..49db1ba54 100644
--- a/src/zenhttp-test/zenhttp-test.cpp
+++ b/src/zenhttp-test/zenhttp-test.cpp
@@ -2,14 +2,9 @@
#include <zencore/filesystem.h>
#include <zencore/logging.h>
+#include <zencore/memory/newdelete.h>
#include <zenhttp/zenhttp.h>
-#if ZEN_USE_MIMALLOC
-ZEN_THIRD_PARTY_INCLUDES_START
-# include <mimalloc-new-delete.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-#endif
-
#if ZEN_WITH_TESTS
# define ZEN_TEST_WITH_RUNNER 1
# include <zencore/testing.h>
@@ -18,9 +13,6 @@ ZEN_THIRD_PARTY_INCLUDES_END
int
main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[])
{
-#if ZEN_USE_MIMALLOC
- mi_version();
-#endif
#if ZEN_WITH_TESTS
zen::zenhttp_forcelinktests();
diff --git a/src/zennet-test/zennet-test.cpp b/src/zennet-test/zennet-test.cpp
index f7f54e6ad..482d3c617 100644
--- a/src/zennet-test/zennet-test.cpp
+++ b/src/zennet-test/zennet-test.cpp
@@ -4,11 +4,7 @@
#include <zencore/logging.h>
#include <zennet/zennet.h>
-#if ZEN_USE_MIMALLOC
-ZEN_THIRD_PARTY_INCLUDES_START
-# include <mimalloc-new-delete.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-#endif
+#include <zencore/memory/newdelete.h>
#if ZEN_WITH_TESTS
# define ZEN_TEST_WITH_RUNNER 1
@@ -18,9 +14,6 @@ ZEN_THIRD_PARTY_INCLUDES_END
int
main([[maybe_unused]] int argc, [[maybe_unused]] char** argv)
{
-#if ZEN_USE_MIMALLOC
- mi_version();
-#endif
#if ZEN_WITH_TESTS
zen::zennet_forcelinktests();
diff --git a/src/zenserver-test/zenserver-test.cpp b/src/zenserver-test/zenserver-test.cpp
index ca2257361..e3f701be1 100644
--- a/src/zenserver-test/zenserver-test.cpp
+++ b/src/zenserver-test/zenserver-test.cpp
@@ -54,11 +54,7 @@ ZEN_THIRD_PARTY_INCLUDES_END
# include <process.h>
#endif
-#if ZEN_USE_MIMALLOC
-ZEN_THIRD_PARTY_INCLUDES_START
-# include <mimalloc-new-delete.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-#endif
+#include <zencore/memory/newdelete.h>
//////////////////////////////////////////////////////////////////////////
@@ -101,9 +97,6 @@ zen::ZenServerEnvironment TestEnv;
int
main(int argc, char** argv)
{
-# if ZEN_USE_MIMALLOC
- mi_version();
-# endif
using namespace std::literals;
using namespace zen;
diff --git a/src/zenserver/config.cpp b/src/zenserver/config.cpp
index bedab7049..0108e8b9f 100644
--- a/src/zenserver/config.cpp
+++ b/src/zenserver/config.cpp
@@ -593,6 +593,9 @@ ParseCliOptions(int argc, char* argv[], ZenServerOptions& ServerOptions)
options.add_options()("detach",
"Indicate whether zenserver should detach from parent process group",
cxxopts::value<bool>(ServerOptions.Detach)->default_value("true"));
+ options.add_options()("malloc",
+ "Configure memory allocator subsystem",
+ cxxopts::value(ServerOptions.MemoryOptions)->default_value("mimalloc"));
// clang-format off
options.add_options("logging")
@@ -713,6 +716,13 @@ ParseCliOptions(int argc, char* argv[], ZenServerOptions& ServerOptions)
#if ZEN_WITH_TRACE
options.add_option("ue-trace",
"",
+ "trace",
+ "Specify which trace channels should be enabled",
+ cxxopts::value<std::string>(ServerOptions.TraceChannels)->default_value(""),
+ "");
+
+ options.add_option("ue-trace",
+ "",
"tracehost",
"Hostname to send the trace to",
cxxopts::value<std::string>(ServerOptions.TraceHost)->default_value(""),
diff --git a/src/zenserver/config.h b/src/zenserver/config.h
index 5c56695f3..c7781aada 100644
--- a/src/zenserver/config.h
+++ b/src/zenserver/config.h
@@ -176,9 +176,11 @@ struct ZenServerOptions
std::string Loggers[zen::logging::level::LogLevelCount];
std::string ScrubOptions;
#if ZEN_WITH_TRACE
- std::string TraceHost; // Host name or IP address to send trace data to
- std::string TraceFile; // Path of a file to write a trace
+ std::string TraceChannels; // Trace channels to enable
+ std::string TraceHost; // Host name or IP address to send trace data to
+ std::string TraceFile; // Path of a file to write a trace
#endif
+ std::string MemoryOptions; // Memory allocation options
std::string CommandLine;
};
diff --git a/src/zenserver/diag/logging.cpp b/src/zenserver/diag/logging.cpp
index 595be70cb..0d96cd8d6 100644
--- a/src/zenserver/diag/logging.cpp
+++ b/src/zenserver/diag/logging.cpp
@@ -6,6 +6,7 @@
#include <zencore/filesystem.h>
#include <zencore/fmtutils.h>
+#include <zencore/memory/llm.h>
#include <zencore/session.h>
#include <zencore/string.h>
#include <zenutil/logging.h>
@@ -20,6 +21,8 @@ namespace zen {
void
InitializeServerLogging(const ZenServerOptions& InOptions)
{
+ UE_MEMSCOPE(ELLMTag::Logging);
+
const LoggingOptions LogOptions = {.IsDebug = InOptions.IsDebug,
.IsVerbose = false,
.IsTest = InOptions.IsTest,
@@ -79,6 +82,8 @@ InitializeServerLogging(const ZenServerOptions& InOptions)
void
ShutdownServerLogging()
{
+ UE_MEMSCOPE(ELLMTag::Logging);
+
zen::ShutdownLogging();
}
diff --git a/src/zenserver/main.cpp b/src/zenserver/main.cpp
index 2fb01ebf1..4444241cc 100644
--- a/src/zenserver/main.cpp
+++ b/src/zenserver/main.cpp
@@ -17,16 +17,15 @@
#include <zencore/trace.h>
#include <zenhttp/httpserver.h>
+#include <zencore/memory/fmalloc.h>
+#include <zencore/memory/memory.h>
+#include <zencore/memory/memorytrace.h>
+#include <zencore/memory/newdelete.h>
+
#include "config.h"
#include "diag/logging.h"
#include "sentryintegration.h"
-#if ZEN_USE_MIMALLOC
-ZEN_THIRD_PARTY_INCLUDES_START
-# include <mimalloc-new-delete.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-#endif
-
#if ZEN_PLATFORM_WINDOWS
# include <zencore/windows.h>
# include "windows/service.h"
@@ -354,9 +353,6 @@ test_main(int argc, char** argv)
int
main(int argc, char* argv[])
{
-#if ZEN_USE_MIMALLOC
- mi_version();
-#endif
using namespace zen;
if (argc >= 2)
@@ -433,9 +429,17 @@ main(int argc, char* argv[])
{
TraceInit("zenserver");
}
- atexit(TraceShutdown);
#endif // ZEN_WITH_TRACE
+#if ZEN_WITH_MEMTRACK
+ FMalloc* TraceMalloc = MemoryTrace_Create(GMalloc);
+ if (TraceMalloc != GMalloc)
+ {
+ GMalloc = TraceMalloc;
+ MemoryTrace_Initialize();
+ }
+#endif
+
#if ZEN_PLATFORM_WINDOWS
if (ServerOptions.InstallService)
{
diff --git a/src/zenstore-test/zenstore-test.cpp b/src/zenstore-test/zenstore-test.cpp
index 3ad9e620b..e5b312984 100644
--- a/src/zenstore-test/zenstore-test.cpp
+++ b/src/zenstore-test/zenstore-test.cpp
@@ -4,11 +4,7 @@
#include <zencore/logging.h>
#include <zenstore/zenstore.h>
-#if ZEN_USE_MIMALLOC
-ZEN_THIRD_PARTY_INCLUDES_START
-# include <mimalloc-new-delete.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-#endif
+#include <zencore/memory/newdelete.h>
#if ZEN_WITH_TESTS
# define ZEN_TEST_WITH_RUNNER 1
@@ -18,9 +14,6 @@ ZEN_THIRD_PARTY_INCLUDES_END
int
main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[])
{
-#if ZEN_USE_MIMALLOC
- mi_version();
-#endif
#if ZEN_WITH_TESTS
zen::zenstore_forcelinktests();
diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp
index 2031804c9..62ed44bbb 100644
--- a/src/zenstore/filecas.cpp
+++ b/src/zenstore/filecas.cpp
@@ -7,7 +7,7 @@
#include <zencore/filesystem.h>
#include <zencore/fmtutils.h>
#include <zencore/logging.h>
-#include <zencore/memory.h>
+#include <zencore/memory/memory.h>
#include <zencore/scopeguard.h>
#include <zencore/string.h>
#include <zencore/testing.h>
diff --git a/src/zenutil-test/zenutil-test.cpp b/src/zenutil-test/zenutil-test.cpp
index f95b7e888..fadaf0995 100644
--- a/src/zenutil-test/zenutil-test.cpp
+++ b/src/zenutil-test/zenutil-test.cpp
@@ -4,11 +4,7 @@
#include <zencore/logging.h>
#include <zenutil/zenutil.h>
-#if ZEN_USE_MIMALLOC
-ZEN_THIRD_PARTY_INCLUDES_START
-# include <mimalloc-new-delete.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-#endif
+#include <zencore/memory/newdelete.h>
#if ZEN_WITH_TESTS
# define ZEN_TEST_WITH_RUNNER 1
@@ -18,9 +14,6 @@ ZEN_THIRD_PARTY_INCLUDES_END
int
main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[])
{
-#if ZEN_USE_MIMALLOC
- mi_version();
-#endif
#if ZEN_WITH_TESTS
zen::zenutil_forcelinktests();
diff --git a/src/zenutil/basicfile.cpp b/src/zenutil/basicfile.cpp
index 73f27b587..391c150c6 100644
--- a/src/zenutil/basicfile.cpp
+++ b/src/zenutil/basicfile.cpp
@@ -6,6 +6,7 @@
#include <zencore/except.h>
#include <zencore/filesystem.h>
#include <zencore/fmtutils.h>
+#include <zencore/memory/memory.h>
#include <zencore/testing.h>
#include <zencore/testutils.h>
diff --git a/thirdparty/rpmalloc/malloc.c b/thirdparty/rpmalloc/malloc.c
new file mode 100644
index 000000000..835eff18e
--- /dev/null
+++ b/thirdparty/rpmalloc/malloc.c
@@ -0,0 +1,367 @@
+/* malloc.c - Memory allocator - Public Domain - 2016 Mattias Jansson
+ *
+ * This library provides a cross-platform lock free thread caching malloc implementation in C11.
+ * The latest source code is always available at
+ *
+ * https://github.com/mjansson/rpmalloc
+ *
+ * This library is put in the public domain; you can redistribute it and/or modify it without any restrictions.
+ *
+ */
+
+/* clang-format off */
+
+//
+// This file provides overrides for the standard library malloc entry points for C and new/delete operators for C++
+// It also provides automatic initialization/finalization of process and threads
+//
+
+#if defined(__TINYC__)
+#include <sys/types.h>
+#endif
+
+#if (defined(__GNUC__) || defined(__clang__))
+#pragma GCC visibility push(default)
+#endif
+
+#define USE_IMPLEMENT 1
+#define USE_INTERPOSE 0
+#define USE_ALIAS 0
+
+#if defined(__APPLE__)
+#undef USE_INTERPOSE
+#define USE_INTERPOSE 1
+
+typedef struct interpose_t {
+ void* new_func;
+ void* orig_func;
+} interpose_t;
+
+#define MAC_INTERPOSE_PAIR(newf, oldf) { (void*)newf, (void*)oldf }
+#define MAC_INTERPOSE_SINGLE(newf, oldf) \
+__attribute__((used)) static const interpose_t macinterpose##newf##oldf \
+__attribute__ ((section("__DATA, __interpose"))) = MAC_INTERPOSE_PAIR(newf, oldf)
+
+#endif
+
+#if !defined(_WIN32) && !defined(__APPLE__)
+#undef USE_IMPLEMENT
+#undef USE_ALIAS
+#define USE_IMPLEMENT 0
+#define USE_ALIAS 1
+#endif
+
+#ifdef _MSC_VER
+#pragma warning (disable : 4100)
+#undef malloc
+#undef free
+#undef calloc
+#endif
+
+#if ENABLE_OVERRIDE
+
+typedef struct rp_nothrow_t { int __dummy; } rp_nothrow_t;
+
+#if USE_INTERPOSE || USE_ALIAS
+
+static void* rpmalloc_nothrow(size_t size, rp_nothrow_t t) { (void)sizeof(t); return rpmalloc(size); }
+static void* rpaligned_alloc_reverse(size_t size, size_t align) { return rpaligned_alloc(align, size); }
+static void* rpaligned_alloc_reverse_nothrow(size_t size, size_t align, rp_nothrow_t t) { (void)sizeof(t); return rpaligned_alloc(align, size); }
+static void rpfree_size(void* p, size_t size) { (void)sizeof(size); rpfree(p); }
+static void rpfree_aligned(void* p, size_t align) { (void)sizeof(align); rpfree(p); }
+static void rpfree_size_aligned(void* p, size_t size, size_t align) { (void)sizeof(size); (void)sizeof(align); rpfree(p); }
+
+#endif
+
+extern inline void* RPMALLOC_CDECL rpvalloc(size_t size) {
+ return rpaligned_alloc(os_page_size, size);
+}
+
+extern inline void* RPMALLOC_CDECL
+rppvalloc(size_t size) {
+ const size_t page_size = os_page_size;
+ const size_t aligned_size = ((size + page_size - 1) / page_size) * page_size;
+#if ENABLE_VALIDATE_ARGS
+ if (aligned_size < size) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+ return rpaligned_alloc(os_page_size, aligned_size);
+}
+
+extern inline void* RPMALLOC_CDECL
+rpreallocarray(void* ptr, size_t count, size_t size) {
+ size_t total;
+#if ENABLE_VALIDATE_ARGS
+#ifdef _MSC_VER
+ int err = SizeTMult(count, size, &total);
+ if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#else
+ int err = __builtin_umull_overflow(count, size, &total);
+ if (err || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+#else
+ total = count * size;
+#endif
+ return rprealloc(ptr, total);
+}
+
+#if USE_IMPLEMENT
+
+extern inline void* RPMALLOC_CDECL malloc(size_t size) { return rpmalloc(size); }
+extern inline void* RPMALLOC_CDECL calloc(size_t count, size_t size) { return rpcalloc(count, size); }
+extern inline void* RPMALLOC_CDECL realloc(void* ptr, size_t size) { return rprealloc(ptr, size); }
+extern inline void* RPMALLOC_CDECL reallocf(void* ptr, size_t size) { return rprealloc(ptr, size); }
+extern inline void* RPMALLOC_CDECL aligned_alloc(size_t alignment, size_t size) { return rpaligned_alloc(alignment, size); }
+extern inline void* RPMALLOC_CDECL memalign(size_t alignment, size_t size) { return rpmemalign(alignment, size); }
+extern inline int RPMALLOC_CDECL posix_memalign(void** memptr, size_t alignment, size_t size) { return rpposix_memalign(memptr, alignment, size); }
+extern inline void RPMALLOC_CDECL free(void* ptr) { rpfree(ptr); }
+extern inline void RPMALLOC_CDECL cfree(void* ptr) { rpfree(ptr); }
+extern inline size_t RPMALLOC_CDECL malloc_usable_size(void* ptr) { return rpmalloc_usable_size(ptr); }
+extern inline size_t RPMALLOC_CDECL malloc_size(void* ptr) { return rpmalloc_usable_size(ptr); }
+extern inline void* RPMALLOC_CDECL valloc(size_t size) { return rpvalloc(size); }
+extern inline void* RPMALLOC_CDECL pvalloc(size_t size) { return rppvalloc(size); }
+extern inline void* RPMALLOC_CDECL reallocarray(void* ptr, size_t count, size_t size) { return rpreallocarray(ptr, count, size); }
+
+#ifdef _WIN32
+extern inline void* RPMALLOC_CDECL _malloc_base(size_t size) { return rpmalloc(size); }
+extern inline void RPMALLOC_CDECL _free_base(void* ptr) { rpfree(ptr); }
+extern inline void* RPMALLOC_CDECL _calloc_base(size_t count, size_t size) { return rpcalloc(count, size); }
+extern inline size_t RPMALLOC_CDECL _msize(void* ptr) { return rpmalloc_usable_size(ptr); }
+extern inline size_t RPMALLOC_CDECL _msize_base(void* ptr) { return rpmalloc_usable_size(ptr); }
+extern inline void* RPMALLOC_CDECL _realloc_base(void* ptr, size_t size) { return rprealloc(ptr, size); }
+#endif
+
+#ifdef _WIN32
+// For Windows, #include <rpnew.h> in one source file to get the C++ operator overrides implemented in your module
+#else
+// Overload the C++ operators using the mangled names (https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling)
+// operators delete and delete[]
+#define RPDEFVIS __attribute__((visibility("default")))
+extern void _ZdlPv(void* p); void RPDEFVIS _ZdlPv(void* p) { rpfree(p); }
+extern void _ZdaPv(void* p); void RPDEFVIS _ZdaPv(void* p) { rpfree(p); }
+#if ARCH_64BIT
+// 64-bit operators new and new[], normal and aligned
+extern void* _Znwm(uint64_t size); void* RPDEFVIS _Znwm(uint64_t size) { return rpmalloc(size); }
+extern void* _Znam(uint64_t size); void* RPDEFVIS _Znam(uint64_t size) { return rpmalloc(size); }
+extern void* _Znwmm(uint64_t size, uint64_t align); void* RPDEFVIS _Znwmm(uint64_t size, uint64_t align) { return rpaligned_alloc(align, size); }
+extern void* _Znamm(uint64_t size, uint64_t align); void* RPDEFVIS _Znamm(uint64_t size, uint64_t align) { return rpaligned_alloc(align, size); }
+extern void* _ZnwmSt11align_val_t(uint64_t size, uint64_t align); void* RPDEFVIS _ZnwmSt11align_val_t(uint64_t size, uint64_t align) { return rpaligned_alloc(align, size); }
+extern void* _ZnamSt11align_val_t(uint64_t size, uint64_t align); void* RPDEFVIS _ZnamSt11align_val_t(uint64_t size, uint64_t align) { return rpaligned_alloc(align, size); }
+extern void* _ZnwmRKSt9nothrow_t(uint64_t size, rp_nothrow_t t); void* RPDEFVIS _ZnwmRKSt9nothrow_t(uint64_t size, rp_nothrow_t t) { (void)sizeof(t); return rpmalloc(size); }
+extern void* _ZnamRKSt9nothrow_t(uint64_t size, rp_nothrow_t t); void* RPDEFVIS _ZnamRKSt9nothrow_t(uint64_t size, rp_nothrow_t t) { (void)sizeof(t); return rpmalloc(size); }
+extern void* _ZnwmSt11align_val_tRKSt9nothrow_t(uint64_t size, uint64_t align, rp_nothrow_t t); void* RPDEFVIS _ZnwmSt11align_val_tRKSt9nothrow_t(uint64_t size, uint64_t align, rp_nothrow_t t) { (void)sizeof(t); return rpaligned_alloc(align, size); }
+extern void* _ZnamSt11align_val_tRKSt9nothrow_t(uint64_t size, uint64_t align, rp_nothrow_t t); void* RPDEFVIS _ZnamSt11align_val_tRKSt9nothrow_t(uint64_t size, uint64_t align, rp_nothrow_t t) { (void)sizeof(t); return rpaligned_alloc(align, size); }
+// 64-bit operators sized delete and delete[], normal and aligned
+extern void _ZdlPvm(void* p, uint64_t size); void RPDEFVIS _ZdlPvm(void* p, uint64_t size) { rpfree(p); (void)sizeof(size); }
+extern void _ZdaPvm(void* p, uint64_t size); void RPDEFVIS _ZdaPvm(void* p, uint64_t size) { rpfree(p); (void)sizeof(size); }
+extern void _ZdlPvSt11align_val_t(void* p, uint64_t align); void RPDEFVIS _ZdlPvSt11align_val_t(void* p, uint64_t align) { rpfree(p); (void)sizeof(align); }
+extern void _ZdaPvSt11align_val_t(void* p, uint64_t align); void RPDEFVIS _ZdaPvSt11align_val_t(void* p, uint64_t align) { rpfree(p); (void)sizeof(align); }
+extern void _ZdlPvmSt11align_val_t(void* p, uint64_t size, uint64_t align); void RPDEFVIS _ZdlPvmSt11align_val_t(void* p, uint64_t size, uint64_t align) { rpfree(p); (void)sizeof(size); (void)sizeof(align); }
+extern void _ZdaPvmSt11align_val_t(void* p, uint64_t size, uint64_t align); void RPDEFVIS _ZdaPvmSt11align_val_t(void* p, uint64_t size, uint64_t align) { rpfree(p); (void)sizeof(size); (void)sizeof(align); }
+#else
+// 32-bit operators new and new[], normal and aligned
+extern void* _Znwj(uint32_t size); void* RPDEFVIS _Znwj(uint32_t size) { return rpmalloc(size); }
+extern void* _Znaj(uint32_t size); void* RPDEFVIS _Znaj(uint32_t size) { return rpmalloc(size); }
+extern void* _Znwjj(uint32_t size, uint32_t align); void* RPDEFVIS _Znwjj(uint32_t size, uint32_t align) { return rpaligned_alloc(align, size); }
+extern void* _Znajj(uint32_t size, uint32_t align); void* RPDEFVIS _Znajj(uint32_t size, uint32_t align) { return rpaligned_alloc(align, size); }
+extern void* _ZnwjSt11align_val_t(size_t size, size_t align); void* RPDEFVIS _ZnwjSt11align_val_t(size_t size, size_t align) { return rpaligned_alloc(align, size); }
+extern void* _ZnajSt11align_val_t(size_t size, size_t align); void* RPDEFVIS _ZnajSt11align_val_t(size_t size, size_t align) { return rpaligned_alloc(align, size); }
+extern void* _ZnwjRKSt9nothrow_t(size_t size, rp_nothrow_t t); void* RPDEFVIS _ZnwjRKSt9nothrow_t(size_t size, rp_nothrow_t t) { (void)sizeof(t); return rpmalloc(size); }
+extern void* _ZnajRKSt9nothrow_t(size_t size, rp_nothrow_t t); void* RPDEFVIS _ZnajRKSt9nothrow_t(size_t size, rp_nothrow_t t) { (void)sizeof(t); return rpmalloc(size); }
+extern void* _ZnwjSt11align_val_tRKSt9nothrow_t(size_t size, size_t align, rp_nothrow_t t); void* RPDEFVIS _ZnwjSt11align_val_tRKSt9nothrow_t(size_t size, size_t align, rp_nothrow_t t) { (void)sizeof(t); return rpaligned_alloc(align, size); }
+extern void* _ZnajSt11align_val_tRKSt9nothrow_t(size_t size, size_t align, rp_nothrow_t t); void* RPDEFVIS _ZnajSt11align_val_tRKSt9nothrow_t(size_t size, size_t align, rp_nothrow_t t) { (void)sizeof(t); return rpaligned_alloc(align, size); }
+// 32-bit operators sized delete and delete[], normal and aligned
+extern void _ZdlPvj(void* p, uint64_t size); void RPDEFVIS _ZdlPvj(void* p, uint64_t size) { rpfree(p); (void)sizeof(size); }
+extern void _ZdaPvj(void* p, uint64_t size); void RPDEFVIS _ZdaPvj(void* p, uint64_t size) { rpfree(p); (void)sizeof(size); }
+extern void _ZdlPvSt11align_val_t(void* p, uint32_t align); void RPDEFVIS _ZdlPvSt11align_val_t(void* p, uint64_t a) { rpfree(p); (void)sizeof(align); }
+extern void _ZdaPvSt11align_val_t(void* p, uint32_t align); void RPDEFVIS _ZdaPvSt11align_val_t(void* p, uint64_t a) { rpfree(p); (void)sizeof(align); }
+extern void _ZdlPvjSt11align_val_t(void* p, uint32_t size, uint32_t align); void RPDEFVIS _ZdlPvjSt11align_val_t(void* p, uint64_t size, uint64_t align) { rpfree(p); (void)sizeof(size); (void)sizeof(a); }
+extern void _ZdaPvjSt11align_val_t(void* p, uint32_t size, uint32_t align); void RPDEFVIS _ZdaPvjSt11align_val_t(void* p, uint64_t size, uint64_t align) { rpfree(p); (void)sizeof(size); (void)sizeof(a); }
+#endif
+#endif
+#endif
+
+#if USE_INTERPOSE
+
+__attribute__((used)) static const interpose_t macinterpose_malloc[]
+__attribute__ ((section("__DATA, __interpose"))) = {
+ //new and new[]
+ MAC_INTERPOSE_PAIR(rpmalloc, _Znwm),
+ MAC_INTERPOSE_PAIR(rpmalloc, _Znam),
+ MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse, _Znwmm),
+ MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse, _Znamm),
+ MAC_INTERPOSE_PAIR(rpmalloc_nothrow, _ZnwmRKSt9nothrow_t),
+ MAC_INTERPOSE_PAIR(rpmalloc_nothrow, _ZnamRKSt9nothrow_t),
+ MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse, _ZnwmSt11align_val_t),
+ MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse, _ZnamSt11align_val_t),
+ MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse_nothrow, _ZnwmSt11align_val_tRKSt9nothrow_t),
+ MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse_nothrow, _ZnamSt11align_val_tRKSt9nothrow_t),
+ //delete and delete[]
+ MAC_INTERPOSE_PAIR(rpfree, _ZdlPv),
+ MAC_INTERPOSE_PAIR(rpfree, _ZdaPv),
+ MAC_INTERPOSE_PAIR(rpfree_size, _ZdlPvm),
+ MAC_INTERPOSE_PAIR(rpfree_size, _ZdaPvm),
+ MAC_INTERPOSE_PAIR(rpfree_aligned, _ZdlPvSt11align_val_t),
+ MAC_INTERPOSE_PAIR(rpfree_aligned, _ZdaPvSt11align_val_t),
+ MAC_INTERPOSE_PAIR(rpfree_size_aligned, _ZdlPvmSt11align_val_t),
+ MAC_INTERPOSE_PAIR(rpfree_size_aligned, _ZdaPvmSt11align_val_t),
+ //libc entry points
+ MAC_INTERPOSE_PAIR(rpmalloc, malloc),
+ MAC_INTERPOSE_PAIR(rpmalloc, calloc),
+ MAC_INTERPOSE_PAIR(rprealloc, realloc),
+ MAC_INTERPOSE_PAIR(rprealloc, reallocf),
+ MAC_INTERPOSE_PAIR(rpvalloc, valloc),
+#if defined(__MAC_10_15) && __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_15
+ MAC_INTERPOSE_PAIR(rpaligned_alloc, aligned_alloc),
+#endif
+ MAC_INTERPOSE_PAIR(rpmemalign, memalign),
+ MAC_INTERPOSE_PAIR(rpposix_memalign, posix_memalign),
+ MAC_INTERPOSE_PAIR(rpfree, free),
+ MAC_INTERPOSE_PAIR(rpfree, cfree),
+ MAC_INTERPOSE_PAIR(rpmalloc_usable_size, malloc_usable_size),
+ MAC_INTERPOSE_PAIR(rpmalloc_usable_size, malloc_size)
+};
+
+#endif
+
+#if USE_ALIAS
+
+#define RPALIAS(fn) __attribute__((alias(#fn), used, visibility("default")));
+
+// Alias the C++ operators using the mangled names (https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling)
+
+// operators delete and delete[]
+void _ZdlPv(void* p) RPALIAS(rpfree)
+void _ZdaPv(void* p) RPALIAS(rpfree)
+
+#if ARCH_64BIT
+// 64-bit operators new and new[], normal and aligned
+void* _Znwm(uint64_t size) RPALIAS(rpmalloc)
+void* _Znam(uint64_t size) RPALIAS(rpmalloc)
+void* _Znwmm(uint64_t size, uint64_t align) RPALIAS(rpaligned_alloc_reverse)
+void* _Znamm(uint64_t size, uint64_t align) RPALIAS(rpaligned_alloc_reverse)
+void* _ZnwmSt11align_val_t(size_t size, size_t align) RPALIAS(rpaligned_alloc_reverse)
+void* _ZnamSt11align_val_t(size_t size, size_t align) RPALIAS(rpaligned_alloc_reverse)
+void* _ZnwmRKSt9nothrow_t(size_t size, rp_nothrow_t t) RPALIAS(rpmalloc_nothrow)
+void* _ZnamRKSt9nothrow_t(size_t size, rp_nothrow_t t) RPALIAS(rpmalloc_nothrow)
+void* _ZnwmSt11align_val_tRKSt9nothrow_t(size_t size, size_t align, rp_nothrow_t t) RPALIAS(rpaligned_alloc_reverse_nothrow)
+void* _ZnamSt11align_val_tRKSt9nothrow_t(size_t size, size_t align, rp_nothrow_t t) RPALIAS(rpaligned_alloc_reverse_nothrow)
+// 64-bit operators delete and delete[], sized and aligned
+void _ZdlPvm(void* p, size_t n) RPALIAS(rpfree_size)
+void _ZdaPvm(void* p, size_t n) RPALIAS(rpfree_size)
+void _ZdlPvSt11align_val_t(void* p, size_t a) RPALIAS(rpfree_aligned)
+void _ZdaPvSt11align_val_t(void* p, size_t a) RPALIAS(rpfree_aligned)
+void _ZdlPvmSt11align_val_t(void* p, size_t n, size_t a) RPALIAS(rpfree_size_aligned)
+void _ZdaPvmSt11align_val_t(void* p, size_t n, size_t a) RPALIAS(rpfree_size_aligned)
+#else
+// 32-bit operators new and new[], normal and aligned
+void* _Znwj(uint32_t size) RPALIAS(rpmalloc)
+void* _Znaj(uint32_t size) RPALIAS(rpmalloc)
+void* _Znwjj(uint32_t size, uint32_t align) RPALIAS(rpaligned_alloc_reverse)
+void* _Znajj(uint32_t size, uint32_t align) RPALIAS(rpaligned_alloc_reverse)
+void* _ZnwjSt11align_val_t(size_t size, size_t align) RPALIAS(rpaligned_alloc_reverse)
+void* _ZnajSt11align_val_t(size_t size, size_t align) RPALIAS(rpaligned_alloc_reverse)
+void* _ZnwjRKSt9nothrow_t(size_t size, rp_nothrow_t t) RPALIAS(rpmalloc_nothrow)
+void* _ZnajRKSt9nothrow_t(size_t size, rp_nothrow_t t) RPALIAS(rpmalloc_nothrow)
+void* _ZnwjSt11align_val_tRKSt9nothrow_t(size_t size, size_t align, rp_nothrow_t t) RPALIAS(rpaligned_alloc_reverse_nothrow)
+void* _ZnajSt11align_val_tRKSt9nothrow_t(size_t size, size_t align, rp_nothrow_t t) RPALIAS(rpaligned_alloc_reverse_nothrow)
+// 32-bit operators delete and delete[], sized and aligned
+void _ZdlPvj(void* p, size_t n) RPALIAS(rpfree_size)
+void _ZdaPvj(void* p, size_t n) RPALIAS(rpfree_size)
+void _ZdlPvSt11align_val_t(void* p, size_t a) RPALIAS(rpfree_aligned)
+void _ZdaPvSt11align_val_t(void* p, size_t a) RPALIAS(rpfree_aligned)
+void _ZdlPvjSt11align_val_t(void* p, size_t n, size_t a) RPALIAS(rpfree_size_aligned)
+void _ZdaPvjSt11align_val_t(void* p, size_t n, size_t a) RPALIAS(rpfree_size_aligned)
+#endif
+
+void* malloc(size_t size) RPALIAS(rpmalloc)
+void* calloc(size_t count, size_t size) RPALIAS(rpcalloc)
+void* realloc(void* ptr, size_t size) RPALIAS(rprealloc)
+void* reallocf(void* ptr, size_t size) RPALIAS(rprealloc)
+void* aligned_alloc(size_t alignment, size_t size) RPALIAS(rpaligned_alloc)
+void* memalign(size_t alignment, size_t size) RPALIAS(rpmemalign)
+int posix_memalign(void** memptr, size_t alignment, size_t size) RPALIAS(rpposix_memalign)
+void free(void* ptr) RPALIAS(rpfree)
+void cfree(void* ptr) RPALIAS(rpfree)
+void* reallocarray(void* ptr, size_t count, size_t size) RPALIAS(rpreallocarray)
+void* valloc(size_t size) RPALIAS(rpvalloc)
+void* pvalloc(size_t size) RPALIAS(rppvalloc)
+#if defined(__ANDROID__) || defined(__FreeBSD__)
+size_t malloc_usable_size(const void* ptr) RPALIAS(rpmalloc_usable_size)
+#else
+size_t malloc_usable_size(void* ptr) RPALIAS(rpmalloc_usable_size)
+#endif
+size_t malloc_size(void* ptr) RPALIAS(rpmalloc_usable_size)
+
+// end USE_ALIAS
+#endif
+
+#if defined(__GLIBC__) && defined(__linux__) && 0
+
+void* __libc_malloc(size_t size) RPALIAS(rpmalloc)
+void* __libc_calloc(size_t count, size_t size) RPALIAS(rpcalloc)
+void* __libc_realloc(void* p, size_t size) RPALIAS(rprealloc)
+void __libc_free(void* p) RPALIAS(rpfree)
+void __libc_cfree(void* p) RPALIAS(rpfree)
+
+extern void* __libc_valloc(size_t size);
+extern void* __libc_pvalloc(size_t size);
+
+void*
+__libc_valloc(size_t size) {
+ return valloc(size);
+}
+
+void*
+__libc_pvalloc(size_t size) {
+ return pvalloc(size);
+}
+void* __libc_memalign(size_t align, size_t size) RPALIAS(rpmemalign)
+
+int __posix_memalign(void** p, size_t align, size_t size) RPALIAS(rpposix_memalign)
+
+#endif
+
+// end ENABLE_OVERRIDE
+#endif
+
+#if ENABLE_DYNAMIC_LINK
+
+#ifdef _WIN32
+
+extern __declspec(dllexport) BOOL WINAPI
+DllMain(HINSTANCE instance, DWORD reason, LPVOID reserved);
+
+extern __declspec(dllexport) BOOL WINAPI
+DllMain(HINSTANCE instance, DWORD reason, LPVOID reserved) {
+ (void)sizeof(reserved);
+ (void)sizeof(instance);
+ if (reason == DLL_PROCESS_ATTACH)
+ rpmalloc_initialize(0);
+ else if (reason == DLL_PROCESS_DETACH)
+ rpmalloc_finalize();
+ else if (reason == DLL_THREAD_ATTACH)
+ rpmalloc_thread_initialize();
+ else if (reason == DLL_THREAD_DETACH)
+ rpmalloc_thread_finalize();
+ return TRUE;
+}
+
+#endif
+
+// end ENABLE_DYNAMIC_LINK
+#endif
+
+#if (defined(__GNUC__) || defined(__clang__))
+#pragma GCC visibility pop
+#endif
+
+/* clang-format on */
diff --git a/thirdparty/rpmalloc/rpmalloc.c b/thirdparty/rpmalloc/rpmalloc.c
new file mode 100644
index 000000000..7aecfb0f4
--- /dev/null
+++ b/thirdparty/rpmalloc/rpmalloc.c
@@ -0,0 +1,2341 @@
+/* rpmalloc.c - Memory allocator - Public Domain - 2016-2020 Mattias
+ * Jansson
+ *
+ * This library provides a cross-platform lock free thread caching malloc
+ * implementation in C11. The latest source code is always available at
+ *
+ * https://github.com/mjansson/rpmalloc
+ *
+ * This library is put in the public domain; you can redistribute it and/or
+ * modify it without any restrictions.
+ *
+ */
+
+#include "rpmalloc.h"
+
+#include <errno.h>
+#include <string.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdatomic.h>
+
+#if defined(__clang__)
+#pragma clang diagnostic ignored "-Wunused-macros"
+#pragma clang diagnostic ignored "-Wunused-function"
+#if __has_warning("-Wreserved-identifier")
+#pragma clang diagnostic ignored "-Wreserved-identifier"
+#endif
+#if __has_warning("-Wstatic-in-inline")
+#pragma clang diagnostic ignored "-Wstatic-in-inline"
+#endif
+#if __has_warning("-Wunsafe-buffer-usage")
+#pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
+#endif
+#elif defined(__GNUC__)
+#pragma GCC diagnostic ignored "-Wunused-macros"
+#pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
+#if defined(_WIN32) || defined(__WIN32__) || defined(_WIN64)
+#define PLATFORM_WINDOWS 1
+#define PLATFORM_POSIX 0
+#else
+#define PLATFORM_WINDOWS 0
+#define PLATFORM_POSIX 1
+#endif
+
+#if defined(_MSC_VER)
+#define NOINLINE __declspec(noinline)
+#else
+#define NOINLINE __attribute__((noinline))
+#endif
+
+#if PLATFORM_WINDOWS
+#include <windows.h>
+#include <fibersapi.h>
+static DWORD fls_key;
+#endif
+#if PLATFORM_POSIX
+#include <sys/mman.h>
+#include <sched.h>
+#include <unistd.h>
+#include <pthread.h>
+static pthread_key_t pthread_key;
+#ifdef __FreeBSD__
+#include <sys/sysctl.h>
+#define MAP_HUGETLB MAP_ALIGNED_SUPER
+#ifndef PROT_MAX
+#define PROT_MAX(f) 0
+#endif
+#else
+#define PROT_MAX(f) 0
+#endif
+#ifdef __sun
+extern int
+madvise(caddr_t, size_t, int);
+#endif
+#ifndef MAP_UNINITIALIZED
+#define MAP_UNINITIALIZED 0
+#endif
+#endif
+
+#if defined(__linux__) || defined(__ANDROID__)
+#include <sys/prctl.h>
+#if !defined(PR_SET_VMA)
+#define PR_SET_VMA 0x53564d41
+#define PR_SET_VMA_ANON_NAME 0
+#endif
+#endif
+#if defined(__APPLE__)
+#include <TargetConditionals.h>
+#if !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
+#include <mach/mach_vm.h>
+#include <mach/vm_statistics.h>
+#endif
+#include <pthread.h>
+#endif
+#if defined(__HAIKU__) || defined(__TINYC__)
+#include <pthread.h>
+#endif
+
+#include <limits.h>
+#if (INTPTR_MAX > INT32_MAX)
+#define ARCH_64BIT 1
+#define ARCH_32BIT 0
+#else
+#define ARCH_64BIT 0
+#define ARCH_32BIT 1
+#endif
+
+#if !defined(__has_builtin)
+#define __has_builtin(b) 0
+#endif
+
+#define pointer_offset(ptr, ofs) (void*)((char*)(ptr) + (ptrdiff_t)(ofs))
+#define pointer_diff(first, second) (ptrdiff_t)((const char*)(first) - (const char*)(second))
+
+////////////
+///
+/// Build time configurable limits
+///
+//////
+
+#ifndef ENABLE_VALIDATE_ARGS
+//! Enable validation of args to public entry points
+#define ENABLE_VALIDATE_ARGS 0
+#endif
+#ifndef ENABLE_ASSERTS
+//! Enable asserts
+#define ENABLE_ASSERTS 0
+#endif
+#ifndef ENABLE_UNMAP
+//! Enable unmapping memory pages
+#define ENABLE_UNMAP 1
+#endif
+#ifndef ENABLE_DECOMMIT
+//! Enable decommitting memory pages
+#define ENABLE_DECOMMIT 1
+#endif
+#ifndef ENABLE_DYNAMIC_LINK
+//! Enable building as dynamic library
+#define ENABLE_DYNAMIC_LINK 0
+#endif
+#ifndef ENABLE_OVERRIDE
+//! Enable standard library malloc/free/new/delete overrides
+#define ENABLE_OVERRIDE 1
+#endif
+#ifndef ENABLE_STATISTICS
+//! Enable statistics
+#define ENABLE_STATISTICS 0
+#endif
+
+////////////
+///
+/// Built in size configurations
+///
+//////
+
+#define PAGE_HEADER_SIZE 128
+#define SPAN_HEADER_SIZE PAGE_HEADER_SIZE
+
+#define SMALL_GRANULARITY 16
+
+#define SMALL_BLOCK_SIZE_LIMIT (4 * 1024)
+#define MEDIUM_BLOCK_SIZE_LIMIT (256 * 1024)
+#define LARGE_BLOCK_SIZE_LIMIT (8 * 1024 * 1024)
+
+#define SMALL_SIZE_CLASS_COUNT 73
+#define MEDIUM_SIZE_CLASS_COUNT 24
+#define LARGE_SIZE_CLASS_COUNT 20
+#define SIZE_CLASS_COUNT (SMALL_SIZE_CLASS_COUNT + MEDIUM_SIZE_CLASS_COUNT + LARGE_SIZE_CLASS_COUNT)
+
+#define SMALL_PAGE_SIZE_SHIFT 16
+#define SMALL_PAGE_SIZE (1 << SMALL_PAGE_SIZE_SHIFT)
+#define SMALL_PAGE_MASK (~((uintptr_t)SMALL_PAGE_SIZE - 1))
+#define MEDIUM_PAGE_SIZE_SHIFT 22
+#define MEDIUM_PAGE_SIZE (1 << MEDIUM_PAGE_SIZE_SHIFT)
+#define MEDIUM_PAGE_MASK (~((uintptr_t)MEDIUM_PAGE_SIZE - 1))
+#define LARGE_PAGE_SIZE_SHIFT 26
+#define LARGE_PAGE_SIZE (1 << LARGE_PAGE_SIZE_SHIFT)
+#define LARGE_PAGE_MASK (~((uintptr_t)LARGE_PAGE_SIZE - 1))
+
+#define SPAN_SIZE (256 * 1024 * 1024)
+#define SPAN_MASK (~((uintptr_t)(SPAN_SIZE - 1)))
+
+////////////
+///
+/// Utility macros
+///
+//////
+
+#if ENABLE_ASSERTS
+#undef NDEBUG
+#if defined(_MSC_VER) && !defined(_DEBUG)
+#define _DEBUG
+#endif
+#include <assert.h>
+#define RPMALLOC_TOSTRING_M(x) #x
+#define RPMALLOC_TOSTRING(x) RPMALLOC_TOSTRING_M(x)
+#define rpmalloc_assert(truth, message) \
+ do { \
+ if (!(truth)) { \
+ assert((truth) && message); \
+ } \
+ } while (0)
+#else
+#define rpmalloc_assert(truth, message) \
+ do { \
+ } while (0)
+#endif
+
+#if __has_builtin(__builtin_assume)
+#define rpmalloc_assume(cond) __builtin_assume(cond)
+#elif defined(__GNUC__)
+#define rpmalloc_assume(cond) \
+ do { \
+ if (!__builtin_expect(cond, 0)) \
+ __builtin_unreachable(); \
+ } while (0)
+#elif defined(_MSC_VER)
+#define rpmalloc_assume(cond) __assume(cond)
+#else
+#define rpmalloc_assume(cond) 0
+#endif
+
+////////////
+///
+/// Statistics
+///
+//////
+
+#if ENABLE_STATISTICS
+
+typedef struct rpmalloc_statistics_t {
+ atomic_size_t page_mapped;
+ atomic_size_t page_mapped_peak;
+ atomic_size_t page_commit;
+ atomic_size_t page_decommit;
+ atomic_size_t page_active;
+ atomic_size_t page_active_peak;
+ atomic_size_t heap_count;
+} rpmalloc_statistics_t;
+
+static rpmalloc_statistics_t global_statistics;
+
+#else
+
+#endif
+
+////////////
+///
+/// Low level abstractions
+///
+//////
+
+static inline size_t
+rpmalloc_clz(uintptr_t x) {
+#if ARCH_64BIT
+#if defined(_MSC_VER) && !defined(__clang__)
+ return (size_t)_lzcnt_u64(x);
+#else
+ return (size_t)__builtin_clzll(x);
+#endif
+#else
+#if defined(_MSC_VER) && !defined(__clang__)
+ return (size_t)_lzcnt_u32(x);
+#else
+ return (size_t)__builtin_clzl(x);
+#endif
+#endif
+}
+
+static inline void
+wait_spin(void) {
+#if defined(_MSC_VER)
+#if defined(_M_ARM64)
+ __yield();
+#else
+ _mm_pause();
+#endif
+#elif defined(__x86_64__) || defined(__i386__)
+ __asm__ volatile("pause" ::: "memory");
+#elif defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH >= 7)
+ __asm__ volatile("yield" ::: "memory");
+#elif defined(__powerpc__) || defined(__powerpc64__)
+ // No idea if ever been compiled in such archs but ... as precaution
+ __asm__ volatile("or 27,27,27");
+#elif defined(__sparc__)
+ __asm__ volatile("rd %ccr, %g0 \n\trd %ccr, %g0 \n\trd %ccr, %g0");
+#else
+ struct timespec ts = {0};
+ nanosleep(&ts, 0);
+#endif
+}
+
+#if defined(__GNUC__) || defined(__clang__)
+
+#define EXPECTED(x) __builtin_expect((x), 1)
+#define UNEXPECTED(x) __builtin_expect((x), 0)
+
+#else
+
+#define EXPECTED(x) x
+#define UNEXPECTED(x) x
+
+#endif
+#if defined(__GNUC__) || defined(__clang__)
+
+#if __has_builtin(__builtin_memcpy_inline)
+#define memcpy_const(x, y, s) __builtin_memcpy_inline(x, y, s)
+#else
+#define memcpy_const(x, y, s) \
+ do { \
+ _Static_assert(__builtin_choose_expr(__builtin_constant_p(s), 1, 0), "len must be a constant integer"); \
+ memcpy(x, y, s); \
+ } while (0)
+#endif
+
+#if __has_builtin(__builtin_memset_inline)
+#define memset_const(x, y, s) __builtin_memset_inline(x, y, s)
+#else
+#define memset_const(x, y, s) \
+ do { \
+ _Static_assert(__builtin_choose_expr(__builtin_constant_p(s), 1, 0), "len must be a constant integer"); \
+ memset(x, y, s); \
+ } while (0)
+#endif
+#else
+#define memcpy_const(x, y, s) memcpy(x, y, s)
+#define memset_const(x, y, s) memset(x, y, s)
+#endif
+
+////////////
+///
+/// Data types
+///
+//////
+
+//! A memory heap, per thread
+typedef struct heap_t heap_t;
+//! Span of memory pages
+typedef struct span_t span_t;
+//! Memory page
+typedef struct page_t page_t;
+//! Memory block
+typedef struct block_t block_t;
+//! Size class for a memory block
+typedef struct size_class_t size_class_t;
+
+//! Memory page type
+typedef enum page_type_t {
+ PAGE_SMALL, // 64KiB
+ PAGE_MEDIUM, // 4MiB
+ PAGE_LARGE, // 64MiB
+ PAGE_HUGE
+} page_type_t;
+
+//! Block size class
+struct size_class_t {
+ //! Size of blocks in this class
+ uint32_t block_size;
+ //! Number of blocks in each chunk
+ uint32_t block_count;
+};
+
+//! A memory block
+struct block_t {
+ //! Next block in list
+ block_t* next;
+};
+
+//! A page contains blocks of a given size
+struct page_t {
+ //! Size class of blocks
+ uint32_t size_class;
+ //! Block size
+ uint32_t block_size;
+ //! Block count
+ uint32_t block_count;
+ //! Block initialized count
+ uint32_t block_initialized;
+ //! Block used count
+ uint32_t block_used;
+ //! Page type
+ page_type_t page_type;
+ //! Flag set if part of heap full list
+ uint32_t is_full : 1;
+ //! Flag set if part of heap free list
+ uint32_t is_free : 1;
+ //! Flag set if blocks are zero initialied
+ uint32_t is_zero : 1;
+ //! Flag set if memory pages have been decommitted
+ uint32_t is_decommitted : 1;
+ //! Flag set if containing aligned blocks
+ uint32_t has_aligned_block : 1;
+ //! Fast combination flag for either huge, fully allocated or has aligned blocks
+ uint32_t generic_free : 1;
+ //! Local free list count
+ uint32_t local_free_count;
+ //! Local free list
+ block_t* local_free;
+ //! Owning heap
+ heap_t* heap;
+ //! Next page in list
+ page_t* next;
+ //! Previous page in list
+ page_t* prev;
+ //! Multithreaded free list, block index is in low 32 bit, list count is high 32 bit
+ atomic_ullong thread_free;
+};
+
+//! A span contains pages of a given type
+struct span_t {
+ //! Page header
+ page_t page;
+ //! Owning heap
+ heap_t* heap;
+ //! Page address mask
+ uintptr_t page_address_mask;
+ //! Number of pages initialized
+ uint32_t page_initialized;
+ //! Number of pages in use
+ uint32_t page_count;
+ //! Number of bytes per page
+ uint32_t page_size;
+ //! Page type
+ page_type_t page_type;
+ //! Offset to start of mapped memory region
+ uint32_t offset;
+ //! Mapped size
+ uint64_t mapped_size;
+ //! Next span in list
+ span_t* next;
+};
+
+// Control structure for a heap, either a thread heap or a first class heap if enabled
+struct heap_t {
+ //! Owning thread ID
+ uintptr_t owner_thread;
+ //! Heap local free list for small size classes
+ block_t* local_free[SIZE_CLASS_COUNT];
+ //! Available non-full pages for each size class
+ page_t* page_available[SIZE_CLASS_COUNT];
+ //! Free pages for each page type
+ page_t* page_free[3];
+ //! Free but still committed page count for each page tyoe
+ uint32_t page_free_commit_count[3];
+ //! Multithreaded free list
+ atomic_uintptr_t thread_free[3];
+ //! Available partially initialized spans for each page type
+ span_t* span_partial[3];
+ //! Spans in full use for each page type
+ span_t* span_used[4];
+ //! Next heap in queue
+ heap_t* next;
+ //! Previous heap in queue
+ heap_t* prev;
+ //! Heap ID
+ uint32_t id;
+ //! Finalization state flag
+ uint32_t finalize;
+ //! Memory map region offset
+ uint32_t offset;
+ //! Memory map size
+ size_t mapped_size;
+};
+
+_Static_assert(sizeof(page_t) <= PAGE_HEADER_SIZE, "Invalid page header size");
+_Static_assert(sizeof(span_t) <= SPAN_HEADER_SIZE, "Invalid span header size");
+_Static_assert(sizeof(heap_t) <= 4096, "Invalid heap size");
+
+////////////
+///
+/// Global data
+///
+//////
+
+//! Fallback heap
+static RPMALLOC_CACHE_ALIGNED heap_t global_heap_fallback;
+//! Default heap
+static heap_t* global_heap_default = &global_heap_fallback;
+//! Available heaps
+static heap_t* global_heap_queue;
+//! In use heaps
+static heap_t* global_heap_used;
+//! Lock for heap queue
+static atomic_uintptr_t global_heap_lock;
+//! Heap ID counter
+static atomic_uint global_heap_id = 1;
+//! Initialized flag
+static int global_rpmalloc_initialized;
+//! Memory interface
+static rpmalloc_interface_t* global_memory_interface;
+//! Default memory interface
+static rpmalloc_interface_t global_memory_interface_default;
+//! Current configuration
+static rpmalloc_config_t global_config = {0};
+//! Main thread ID
+static uintptr_t global_main_thread_id;
+
+//! Size classes
+#define SCLASS(n) \
+ { (n * SMALL_GRANULARITY), (SMALL_PAGE_SIZE - PAGE_HEADER_SIZE) / (n * SMALL_GRANULARITY) }
+#define MCLASS(n) \
+ { (n * SMALL_GRANULARITY), (MEDIUM_PAGE_SIZE - PAGE_HEADER_SIZE) / (n * SMALL_GRANULARITY) }
+#define LCLASS(n) \
+ { (n * SMALL_GRANULARITY), (LARGE_PAGE_SIZE - PAGE_HEADER_SIZE) / (n * SMALL_GRANULARITY) }
+static const size_class_t global_size_class[SIZE_CLASS_COUNT] = {
+ SCLASS(1), SCLASS(1), SCLASS(2), SCLASS(3), SCLASS(4), SCLASS(5), SCLASS(6),
+ SCLASS(7), SCLASS(8), SCLASS(9), SCLASS(10), SCLASS(11), SCLASS(12), SCLASS(13),
+ SCLASS(14), SCLASS(15), SCLASS(16), SCLASS(17), SCLASS(18), SCLASS(19), SCLASS(20),
+ SCLASS(21), SCLASS(22), SCLASS(23), SCLASS(24), SCLASS(25), SCLASS(26), SCLASS(27),
+ SCLASS(28), SCLASS(29), SCLASS(30), SCLASS(31), SCLASS(32), SCLASS(33), SCLASS(34),
+ SCLASS(35), SCLASS(36), SCLASS(37), SCLASS(38), SCLASS(39), SCLASS(40), SCLASS(41),
+ SCLASS(42), SCLASS(43), SCLASS(44), SCLASS(45), SCLASS(46), SCLASS(47), SCLASS(48),
+ SCLASS(49), SCLASS(50), SCLASS(51), SCLASS(52), SCLASS(53), SCLASS(54), SCLASS(55),
+ SCLASS(56), SCLASS(57), SCLASS(58), SCLASS(59), SCLASS(60), SCLASS(61), SCLASS(62),
+ SCLASS(63), SCLASS(64), SCLASS(80), SCLASS(96), SCLASS(112), SCLASS(128), SCLASS(160),
+ SCLASS(192), SCLASS(224), SCLASS(256), MCLASS(320), MCLASS(384), MCLASS(448), MCLASS(512),
+ MCLASS(640), MCLASS(768), MCLASS(896), MCLASS(1024), MCLASS(1280), MCLASS(1536), MCLASS(1792),
+ MCLASS(2048), MCLASS(2560), MCLASS(3072), MCLASS(3584), MCLASS(4096), MCLASS(5120), MCLASS(6144),
+ MCLASS(7168), MCLASS(8192), MCLASS(10240), MCLASS(12288), MCLASS(14336), MCLASS(16384), LCLASS(20480),
+ LCLASS(24576), LCLASS(28672), LCLASS(32768), LCLASS(40960), LCLASS(49152), LCLASS(57344), LCLASS(65536),
+ LCLASS(81920), LCLASS(98304), LCLASS(114688), LCLASS(131072), LCLASS(163840), LCLASS(196608), LCLASS(229376),
+ LCLASS(262144), LCLASS(327680), LCLASS(393216), LCLASS(458752), LCLASS(524288)};
+
+//! Threshold number of pages for when free pages are decommitted
+static uint32_t global_page_free_overflow[4] = {16, 8, 2, 0};
+
+//! Number of pages to retain when free page threshold overflows
+static uint32_t global_page_free_retain[4] = {4, 2, 1, 0};
+
+//! OS huge page support
+static int os_huge_pages;
+//! OS memory map granularity
+static size_t os_map_granularity;
+//! OS memory page size
+static size_t os_page_size;
+
+////////////
+///
+/// Thread local heap and ID
+///
+//////
+
+//! Current thread heap
+#if defined(_MSC_VER) && !defined(__clang__)
+#define TLS_MODEL
+#define _Thread_local __declspec(thread)
+#else
+// #define TLS_MODEL __attribute__((tls_model("initial-exec")))
+#define TLS_MODEL
+#endif
+static _Thread_local heap_t* global_thread_heap TLS_MODEL = &global_heap_fallback;
+
+static heap_t*
+heap_allocate(int first_class);
+
+static void
+heap_page_free_decommit(heap_t* heap, uint32_t page_type, uint32_t page_retain_count);
+
+//! Fast thread ID
+static inline uintptr_t
+get_thread_id(void) {
+#if defined(_WIN32)
+ return (uintptr_t)((void*)NtCurrentTeb());
+#else
+ void* thp = __builtin_thread_pointer();
+ return (uintptr_t)thp;
+#endif
+ /*
+ #elif (defined(__GNUC__) || defined(__clang__)) && !defined(__CYGWIN__)
+ uintptr_t tid;
+ #if defined(__i386__)
+ __asm__("movl %%gs:0, %0" : "=r"(tid) : :);
+ #elif defined(__x86_64__)
+ #if defined(__MACH__)
+ __asm__("movq %%gs:0, %0" : "=r"(tid) : :);
+ #else
+ __asm__("movq %%fs:0, %0" : "=r"(tid) : :);
+ #endif
+ #elif defined(__arm__)
+ __asm__ volatile("mrc p15, 0, %0, c13, c0, 3" : "=r"(tid));
+ #elif defined(__aarch64__)
+ #if defined(__MACH__)
+ // tpidr_el0 likely unused, always return 0 on iOS
+ __asm__ volatile("mrs %0, tpidrro_el0" : "=r"(tid));
+ #else
+ __asm__ volatile("mrs %0, tpidr_el0" : "=r"(tid));
+ #endif
+ #else
+ #error This platform needs implementation of get_thread_id()
+ #endif
+ return tid;
+ #else
+ #error This platform needs implementation of get_thread_id()
+ #endif
+ */
+}
+
+//! Set the current thread heap
+static void
+set_thread_heap(heap_t* heap) {
+ global_thread_heap = heap;
+ if (heap && (heap->id != 0)) {
+ rpmalloc_assert(heap->id != 0, "Default heap being used");
+ heap->owner_thread = get_thread_id();
+ }
+#if PLATFORM_WINDOWS
+ FlsSetValue(fls_key, heap);
+#else
+ pthread_setspecific(pthread_key, heap);
+#endif
+}
+
+static heap_t*
+get_thread_heap_allocate(void) {
+ heap_t* heap = heap_allocate(0);
+ set_thread_heap(heap);
+ return heap;
+}
+
+//! Get the current thread heap
+static inline heap_t*
+get_thread_heap(void) {
+ return global_thread_heap;
+}
+
+//! Get the size class from given size in bytes for tiny blocks (below 16 times the minimum granularity)
+static inline uint32_t
+get_size_class_tiny(size_t size) {
+ return (((uint32_t)size + (SMALL_GRANULARITY - 1)) / SMALL_GRANULARITY);
+}
+
+//! Get the size class from given size in bytes
+static inline uint32_t
+get_size_class(size_t size) {
+ uintptr_t minblock_count = (size + (SMALL_GRANULARITY - 1)) / SMALL_GRANULARITY;
+ // For sizes up to 64 times the minimum granularity (i.e 1024 bytes) the size class is equal to number of such
+ // blocks
+ if (size <= (SMALL_GRANULARITY * 64)) {
+ rpmalloc_assert(global_size_class[minblock_count].block_size >= size, "Size class misconfiguration");
+ return (uint32_t)(minblock_count ? minblock_count : 1);
+ }
+ --minblock_count;
+ // Calculate position of most significant bit, since minblock_count now guaranteed to be > 64 this position is
+ // guaranteed to be >= 6
+#if ARCH_64BIT
+ const uint32_t most_significant_bit = (uint32_t)(63 - (int)rpmalloc_clz(minblock_count));
+#else
+ const uint32_t most_significant_bit = (uint32_t)(31 - (int)rpmalloc_clz(minblock_count));
+#endif
+ // Class sizes are of the bit format [..]000xxx000[..] where we already have the position of the most significant
+ // bit, now calculate the subclass from the remaining two bits
+ const uint32_t subclass_bits = (minblock_count >> (most_significant_bit - 2)) & 0x03;
+ const uint32_t class_idx = (uint32_t)((most_significant_bit << 2) + subclass_bits) + 41;
+ rpmalloc_assert((class_idx >= SIZE_CLASS_COUNT) || (global_size_class[class_idx].block_size >= size),
+ "Size class misconfiguration");
+ rpmalloc_assert((class_idx >= SIZE_CLASS_COUNT) || (global_size_class[class_idx - 1].block_size < size),
+ "Size class misconfiguration");
+ return class_idx;
+}
+
+static inline page_type_t
+get_page_type(uint32_t size_class) {
+ if (size_class < SMALL_SIZE_CLASS_COUNT)
+ return PAGE_SMALL;
+ else if (size_class < (SMALL_SIZE_CLASS_COUNT + MEDIUM_SIZE_CLASS_COUNT))
+ return PAGE_MEDIUM;
+ else if (size_class < SIZE_CLASS_COUNT)
+ return PAGE_LARGE;
+ return PAGE_HUGE;
+}
+
+static inline size_t
+get_page_aligned_size(size_t size) {
+ size_t unalign = size % global_config.page_size;
+ if (unalign)
+ size += global_config.page_size - unalign;
+ return size;
+}
+
+////////////
+///
+/// OS entry points
+///
+//////
+
+static void
+os_set_page_name(void* address, size_t size) {
+#if defined(__linux__) || defined(__ANDROID__)
+ const char* name = os_huge_pages ? global_config.huge_page_name : global_config.page_name;
+ if ((address == MAP_FAILED) || !name)
+ return;
+ // If the kernel does not support CONFIG_ANON_VMA_NAME or if the call fails
+ // (e.g. invalid name) it is a no-op basically.
+ (void)prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)address, size, (uintptr_t)name);
+#else
+ (void)sizeof(size);
+ (void)sizeof(address);
+#endif
+}
+
+static void*
+os_mmap(size_t size, size_t alignment, size_t* offset, size_t* mapped_size) {
+ size_t map_size = size + alignment;
+#if PLATFORM_WINDOWS
+ // Ok to MEM_COMMIT - according to MSDN, "actual physical pages are not allocated unless/until the virtual addresses
+ // are actually accessed". But if we enable decommit it's better to not immediately commit and instead commit per
+ // page to avoid saturating the OS commit limit
+#if ENABLE_DECOMMIT
+ DWORD do_commit = 0;
+#else
+ DWORD do_commit = MEM_COMMIT;
+#endif
+ void* ptr =
+ VirtualAlloc(0, map_size, (os_huge_pages ? MEM_LARGE_PAGES : 0) | MEM_RESERVE | do_commit, PAGE_READWRITE);
+#else
+ int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_UNINITIALIZED;
+#if defined(__APPLE__) && !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
+ int fd = (int)VM_MAKE_TAG(240U);
+ if (os_huge_pages)
+ fd |= VM_FLAGS_SUPERPAGE_SIZE_2MB;
+ void* ptr = mmap(0, map_size, PROT_READ | PROT_WRITE, flags, fd, 0);
+#elif defined(MAP_HUGETLB)
+ void* ptr = mmap(0, map_size, PROT_READ | PROT_WRITE | PROT_MAX(PROT_READ | PROT_WRITE),
+ (os_huge_pages ? MAP_HUGETLB : 0) | flags, -1, 0);
+#if defined(MADV_HUGEPAGE)
+ // In some configurations, huge pages allocations might fail thus
+ // we fallback to normal allocations and promote the region as transparent huge page
+ if ((ptr == MAP_FAILED || !ptr) && os_huge_pages) {
+ ptr = mmap(0, map_size, PROT_READ | PROT_WRITE, flags, -1, 0);
+ if (ptr && ptr != MAP_FAILED) {
+ int prm = madvise(ptr, size, MADV_HUGEPAGE);
+ (void)prm;
+ rpmalloc_assert((prm == 0), "Failed to promote the page to transparent huge page");
+ }
+ }
+#endif
+ os_set_page_name(ptr, map_size);
+#elif defined(MAP_ALIGNED)
+ const size_t align = (sizeof(size_t) * 8) - (size_t)(__builtin_clzl(size - 1));
+ void* ptr = mmap(0, map_size, PROT_READ | PROT_WRITE, (os_huge_pages ? MAP_ALIGNED(align) : 0) | flags, -1, 0);
+#elif defined(MAP_ALIGN)
+ caddr_t base = (os_huge_pages ? (caddr_t)(4 << 20) : 0);
+ void* ptr = mmap(base, map_size, PROT_READ | PROT_WRITE, (os_huge_pages ? MAP_ALIGN : 0) | flags, -1, 0);
+#else
+ void* ptr = mmap(0, map_size, PROT_READ | PROT_WRITE, flags, -1, 0);
+#endif
+ if (ptr == MAP_FAILED)
+ ptr = 0;
+#endif
+ if (!ptr) {
+ if (global_memory_interface->map_fail_callback) {
+ if (global_memory_interface->map_fail_callback(map_size))
+ return os_mmap(size, alignment, offset, mapped_size);
+ } else {
+ rpmalloc_assert(ptr != 0, "Failed to map more virtual memory");
+ }
+ return 0;
+ }
+ if (alignment) {
+ size_t padding = ((uintptr_t)ptr & (uintptr_t)(alignment - 1));
+ if (padding)
+ padding = alignment - padding;
+ rpmalloc_assert(padding <= alignment, "Internal failure in padding");
+ rpmalloc_assert(!(padding % 8), "Internal failure in padding");
+ ptr = pointer_offset(ptr, padding);
+ *offset = padding;
+ }
+ *mapped_size = map_size;
+#if ENABLE_STATISTICS
+ size_t page_count = map_size / global_config.page_size;
+ size_t page_mapped_current =
+ atomic_fetch_add_explicit(&global_statistics.page_mapped, page_count, memory_order_relaxed) + page_count;
+ size_t page_mapped_peak = atomic_load_explicit(&global_statistics.page_mapped_peak, memory_order_relaxed);
+ while (page_mapped_current > page_mapped_peak) {
+ if (atomic_compare_exchange_weak_explicit(&global_statistics.page_mapped_peak, &page_mapped_peak,
+ page_mapped_current, memory_order_relaxed, memory_order_relaxed))
+ break;
+ }
+#if ENABLE_DECOMMIT
+ size_t page_active_current =
+ atomic_fetch_add_explicit(&global_statistics.page_active, page_count, memory_order_relaxed) + page_count;
+ size_t page_active_peak = atomic_load_explicit(&global_statistics.page_active_peak, memory_order_relaxed);
+ while (page_active_current > page_active_peak) {
+ if (atomic_compare_exchange_weak_explicit(&global_statistics.page_active_peak, &page_active_peak,
+ page_active_current, memory_order_relaxed, memory_order_relaxed))
+ break;
+ }
+#endif
+#endif
+ return ptr;
+}
+
+static void
+os_mcommit(void* address, size_t size) {
+#if ENABLE_DECOMMIT
+ if (global_config.disable_decommit)
+ return;
+#if PLATFORM_WINDOWS
+ if (!VirtualAlloc(address, size, MEM_COMMIT, PAGE_READWRITE)) {
+ rpmalloc_assert(0, "Failed to commit virtual memory block");
+ }
+#else
+ /*
+ if (mprotect(address, size, PROT_READ | PROT_WRITE)) {
+ rpmalloc_assert(0, "Failed to commit virtual memory block");
+ }
+ */
+#endif
+#if ENABLE_STATISTICS
+ size_t page_count = size / global_config.page_size;
+ atomic_fetch_add_explicit(&global_statistics.page_commit, page_count, memory_order_relaxed);
+ size_t page_active_current =
+ atomic_fetch_add_explicit(&global_statistics.page_active, page_count, memory_order_relaxed) + page_count;
+ size_t page_active_peak = atomic_load_explicit(&global_statistics.page_active_peak, memory_order_relaxed);
+ while (page_active_current > page_active_peak) {
+ if (atomic_compare_exchange_weak_explicit(&global_statistics.page_active_peak, &page_active_peak,
+ page_active_current, memory_order_relaxed, memory_order_relaxed))
+ break;
+ }
+#endif
+#endif
+ (void)sizeof(address);
+ (void)sizeof(size);
+}
+
+static void
+os_mdecommit(void* address, size_t size) {
+#if ENABLE_DECOMMIT
+ if (global_config.disable_decommit)
+ return;
+#if PLATFORM_WINDOWS
+ if (!VirtualFree(address, size, MEM_DECOMMIT)) {
+ rpmalloc_assert(0, "Failed to decommit virtual memory block");
+ }
+#else
+ /*
+ if (mprotect(address, size, PROT_NONE)) {
+ rpmalloc_assert(0, "Failed to decommit virtual memory block");
+ }
+ */
+#if defined(MADV_DONTNEED)
+ if (madvise(address, size, MADV_DONTNEED)) {
+#elif defined(MADV_FREE_REUSABLE)
+ int ret;
+ while ((ret = madvise(address, size, MADV_FREE_REUSABLE)) == -1 && (errno == EAGAIN))
+ errno = 0;
+ if ((ret == -1) && (errno != 0)) {
+#elif defined(MADV_PAGEOUT)
+ if (madvise(address, size, MADV_PAGEOUT)) {
+#elif defined(MADV_FREE)
+ if (madvise(address, size, MADV_FREE)) {
+#else
+ if (posix_madvise(address, size, POSIX_MADV_DONTNEED)) {
+#endif
+ rpmalloc_assert(0, "Failed to decommit virtual memory block");
+ }
+#endif
+#if ENABLE_STATISTICS
+ size_t page_count = size / global_config.page_size;
+ atomic_fetch_add_explicit(&global_statistics.page_decommit, page_count, memory_order_relaxed);
+ size_t page_active_current =
+ atomic_fetch_sub_explicit(&global_statistics.page_active, page_count, memory_order_relaxed);
+ rpmalloc_assert(page_active_current >= page_count, "Decommit counter out of sync");
+ (void)sizeof(page_active_current);
+#endif
+#else
+ (void)sizeof(address);
+ (void)sizeof(size);
+#endif
+}
+
+static void
+os_munmap(void* address, size_t offset, size_t mapped_size) {
+ (void)sizeof(mapped_size);
+ address = pointer_offset(address, -(int32_t)offset);
+#if ENABLE_UNMAP
+#if PLATFORM_WINDOWS
+ if (!VirtualFree(address, 0, MEM_RELEASE)) {
+ rpmalloc_assert(0, "Failed to unmap virtual memory block");
+ }
+#else
+ if (munmap(address, mapped_size))
+ rpmalloc_assert(0, "Failed to unmap virtual memory block");
+#endif
+#if ENABLE_STATISTICS
+ size_t page_count = mapped_size / global_config.page_size;
+ atomic_fetch_sub_explicit(&global_statistics.page_mapped, page_count, memory_order_relaxed);
+ atomic_fetch_sub_explicit(&global_statistics.page_active, page_count, memory_order_relaxed);
+#endif
+#endif
+}
+
+////////////
+///
+/// Page interface
+///
+//////
+
+static inline span_t*
+page_get_span(page_t* page) {
+ return (span_t*)((uintptr_t)page & SPAN_MASK);
+}
+
+static inline size_t
+page_get_size(page_t* page) {
+ if (page->page_type == PAGE_SMALL)
+ return SMALL_PAGE_SIZE;
+ else if (page->page_type == PAGE_MEDIUM)
+ return MEDIUM_PAGE_SIZE;
+ else if (page->page_type == PAGE_LARGE)
+ return LARGE_PAGE_SIZE;
+ else
+ return page_get_span(page)->page_size;
+}
+
+static inline int
+page_is_thread_heap(page_t* page) {
+#if RPMALLOC_FIRST_CLASS_HEAPS
+ return (!page->heap->owner_thread || (page->heap->owner_thread == get_thread_id()));
+#else
+ return (page->heap->owner_thread == get_thread_id());
+#endif
+}
+
+static inline block_t*
+page_block_start(page_t* page) {
+ return pointer_offset(page, PAGE_HEADER_SIZE);
+}
+
+static inline block_t*
+page_block(page_t* page, uint32_t block_index) {
+ return pointer_offset(page, PAGE_HEADER_SIZE + (page->block_size * block_index));
+}
+
+static inline uint32_t
+page_block_index(page_t* page, block_t* block) {
+ block_t* block_first = page_block_start(page);
+ return (uint32_t)pointer_diff(block, block_first) / page->block_size;
+}
+
+static inline uint32_t
+page_block_from_thread_free_list(page_t* page, uint64_t token, block_t** block) {
+ uint32_t block_index = (uint32_t)(token & 0xFFFFFFFFULL);
+ uint32_t list_count = (uint32_t)((token >> 32ULL) & 0xFFFFFFFFULL);
+ *block = list_count ? page_block(page, block_index) : 0;
+ return list_count;
+}
+
+static inline uint64_t
+page_block_to_thread_free_list(page_t* page, uint32_t block_index, uint32_t list_count) {
+ (void)sizeof(page);
+ return ((uint64_t)list_count << 32ULL) | (uint64_t)block_index;
+}
+
+static inline block_t*
+page_block_realign(page_t* page, block_t* block) {
+ void* blocks_start = page_block_start(page);
+ uint32_t block_offset = (uint32_t)pointer_diff(block, blocks_start);
+ return pointer_offset(block, -(int32_t)(block_offset % page->block_size));
+}
+
+static block_t*
+page_get_local_free_block(page_t* page) {
+ block_t* block = page->local_free;
+ page->local_free = block->next;
+ --page->local_free_count;
+ ++page->block_used;
+ return block;
+}
+
+static inline void
+page_decommit_memory_pages(page_t* page) {
+ if (page->is_decommitted)
+ return;
+ void* extra_page = pointer_offset(page, global_config.page_size);
+ size_t extra_page_size = page_get_size(page) - global_config.page_size;
+ global_memory_interface->memory_decommit(extra_page, extra_page_size);
+ page->is_decommitted = 1;
+}
+
+static inline void
+page_commit_memory_pages(page_t* page) {
+ if (!page->is_decommitted)
+ return;
+ void* extra_page = pointer_offset(page, global_config.page_size);
+ size_t extra_page_size = page_get_size(page) - global_config.page_size;
+ global_memory_interface->memory_commit(extra_page, extra_page_size);
+ page->is_decommitted = 0;
+#if ENABLE_DECOMMIT
+#if !defined(__APPLE__)
+ // When page is recommitted, the blocks in the second memory page and forward
+ // will be zeroed out by OS - take advantage in zalloc/calloc calls and make sure
+ // blocks in first page is zeroed out
+ void* first_page = pointer_offset(page, PAGE_HEADER_SIZE);
+ memset(first_page, 0, global_config.page_size - PAGE_HEADER_SIZE);
+ page->is_zero = 1;
+#endif
+#endif
+}
+
+static void
+page_available_to_free(page_t* page) {
+ rpmalloc_assert(page->is_full == 0, "Page full flag internal failure");
+ rpmalloc_assert(page->is_decommitted == 0, "Page decommitted flag internal failure");
+ heap_t* heap = page->heap;
+ if (heap->page_available[page->size_class] == page) {
+ heap->page_available[page->size_class] = page->next;
+ } else {
+ page->prev->next = page->next;
+ if (page->next)
+ page->next->prev = page->prev;
+ }
+ page->is_free = 1;
+ page->is_zero = 0;
+ page->next = heap->page_free[page->page_type];
+ heap->page_free[page->page_type] = page;
+ if (++heap->page_free_commit_count[page->page_type] >= global_page_free_overflow[page->page_type])
+ heap_page_free_decommit(heap, page->page_type, global_page_free_retain[page->page_type]);
+}
+
+static void
+page_full_to_available(page_t* page) {
+ rpmalloc_assert(page->is_full == 1, "Page full flag internal failure");
+ rpmalloc_assert(page->is_decommitted == 0, "Page decommitted flag internal failure");
+ heap_t* heap = page->heap;
+ page->next = heap->page_available[page->size_class];
+ if (page->next)
+ page->next->prev = page;
+ heap->page_available[page->size_class] = page;
+ page->is_full = 0;
+ if (page->has_aligned_block == 0)
+ page->generic_free = 0;
+}
+
+static void
+page_full_to_free_on_new_heap(page_t* page, heap_t* heap) {
+ rpmalloc_assert(heap->id, "Page full to free on default heap");
+ rpmalloc_assert(page->is_full == 1, "Page full flag internal failure");
+ rpmalloc_assert(page->is_decommitted == 0, "Page decommitted flag internal failure");
+ page->is_full = 0;
+ page->is_free = 1;
+ page->heap = heap;
+ atomic_store_explicit(&page->thread_free, 0, memory_order_relaxed);
+ page->next = heap->page_free[page->page_type];
+ heap->page_free[page->page_type] = page;
+ if (++heap->page_free_commit_count[page->page_type] >= global_page_free_overflow[page->page_type])
+ heap_page_free_decommit(heap, page->page_type, global_page_free_retain[page->page_type]);
+}
+
+static void
+page_available_to_full(page_t* page) {
+ heap_t* heap = page->heap;
+ if (heap->page_available[page->size_class] == page) {
+ heap->page_available[page->size_class] = page->next;
+ } else {
+ page->prev->next = page->next;
+ if (page->next)
+ page->next->prev = page->prev;
+ }
+ page->is_full = 1;
+ page->is_zero = 0;
+ page->generic_free = 1;
+}
+
+static inline void
+page_put_local_free_block(page_t* page, block_t* block) {
+ block->next = page->local_free;
+ page->local_free = block;
+ ++page->local_free_count;
+ if (UNEXPECTED(--page->block_used == 0)) {
+ page_available_to_free(page);
+ } else if (UNEXPECTED(page->is_full != 0)) {
+ page_full_to_available(page);
+ }
+}
+
+static NOINLINE void
+page_adopt_thread_free_block_list(page_t* page) {
+ if (page->local_free)
+ return;
+ unsigned long long thread_free = atomic_load_explicit(&page->thread_free, memory_order_relaxed);
+ if (thread_free != 0) {
+ // Other threads can only replace with another valid list head, this will never change to 0 in other threads
+ while (!atomic_compare_exchange_weak_explicit(&page->thread_free, &thread_free, 0, memory_order_relaxed,
+ memory_order_relaxed))
+ wait_spin();
+ page->local_free_count = page_block_from_thread_free_list(page, thread_free, &page->local_free);
+ rpmalloc_assert(page->local_free_count <= page->block_used, "Page thread free list count internal failure");
+ page->block_used -= page->local_free_count;
+ }
+}
+
+static NOINLINE void
+page_put_thread_free_block(page_t* page, block_t* block) {
+ atomic_thread_fence(memory_order_acquire);
+ if (page->is_full) {
+ // Page is full, put the block in the heap thread free list instead, otherwise
+ // the heap will not pick up the free blocks until a thread local free happens
+ heap_t* heap = page->heap;
+ uintptr_t prev_head = atomic_load_explicit(&heap->thread_free[page->page_type], memory_order_relaxed);
+ block->next = (void*)prev_head;
+ while (!atomic_compare_exchange_weak_explicit(&heap->thread_free[page->page_type], &prev_head, (uintptr_t)block,
+ memory_order_relaxed, memory_order_relaxed)) {
+ block->next = (void*)prev_head;
+ wait_spin();
+ }
+ } else {
+ unsigned long long prev_thread_free = atomic_load_explicit(&page->thread_free, memory_order_relaxed);
+ uint32_t block_index = page_block_index(page, block);
+ rpmalloc_assert(page_block(page, block_index) == block, "Block pointer is not aligned to start of block");
+ uint32_t list_size = page_block_from_thread_free_list(page, prev_thread_free, &block->next) + 1;
+ uint64_t thread_free = page_block_to_thread_free_list(page, block_index, list_size);
+ while (!atomic_compare_exchange_weak_explicit(&page->thread_free, &prev_thread_free, thread_free,
+ memory_order_relaxed, memory_order_relaxed)) {
+ list_size = page_block_from_thread_free_list(page, prev_thread_free, &block->next) + 1;
+ thread_free = page_block_to_thread_free_list(page, block_index, list_size);
+ wait_spin();
+ }
+ }
+}
+
+static void
+page_push_local_free_to_heap(page_t* page) {
+ // Push the page free list as the fast track list of free blocks for heap
+ page->heap->local_free[page->size_class] = page->local_free;
+ page->block_used += page->local_free_count;
+ page->local_free = 0;
+ page->local_free_count = 0;
+}
+
+static NOINLINE void*
+page_initialize_blocks(page_t* page) {
+ rpmalloc_assert(page->block_initialized < page->block_count, "Block initialization internal failure");
+ block_t* block = page_block(page, page->block_initialized);
+ ++page->block_initialized;
+ ++page->block_used;
+
+ if ((page->page_type == PAGE_SMALL) && (page->block_size < (global_config.page_size >> 1))) {
+ // Link up until next memory page in free list
+ void* memory_page_start = (void*)((uintptr_t)block & ~(uintptr_t)(global_config.page_size - 1));
+ void* memory_page_next = pointer_offset(memory_page_start, global_config.page_size);
+ block_t* free_block = pointer_offset(block, page->block_size);
+ block_t* first_block = free_block;
+ block_t* last_block = free_block;
+ uint32_t list_count = 0;
+ uint32_t max_list_count = page->block_count - page->block_initialized;
+ while (((void*)free_block < memory_page_next) && (list_count < max_list_count)) {
+ last_block = free_block;
+ free_block->next = pointer_offset(free_block, page->block_size);
+ free_block = free_block->next;
+ ++list_count;
+ }
+ if (list_count) {
+ last_block->next = 0;
+ page->local_free = first_block;
+ page->block_initialized += list_count;
+ page->local_free_count = list_count;
+ }
+ }
+
+ return block;
+}
+
+static inline RPMALLOC_ALLOCATOR void*
+page_allocate_block(page_t* page, unsigned int zero) {
+ unsigned int is_zero = 0;
+ block_t* block = (page->local_free != 0) ? page_get_local_free_block(page) : 0;
+ if (UNEXPECTED(block == 0)) {
+ if (atomic_load_explicit(&page->thread_free, memory_order_relaxed) != 0) {
+ page_adopt_thread_free_block_list(page);
+ block = (page->local_free != 0) ? page_get_local_free_block(page) : 0;
+ }
+ if (block == 0) {
+ block = page_initialize_blocks(page);
+ is_zero = page->is_zero;
+ }
+ }
+
+ rpmalloc_assert(page->block_used <= page->block_count, "Page block use counter out of sync");
+ if (page->local_free && !page->heap->local_free[page->size_class])
+ page_push_local_free_to_heap(page);
+
+ // The page might be full when free list has been pushed to heap local free list,
+ // check if there is a thread free list to adopt
+ if (page->block_used == page->block_count)
+ page_adopt_thread_free_block_list(page);
+
+ if (page->block_used == page->block_count) {
+ // Page is now fully utilized
+ rpmalloc_assert(!page->is_full, "Page block use counter out of sync with full flag");
+ page_available_to_full(page);
+ }
+
+ if (zero) {
+ if (!is_zero)
+ memset(block, 0, page->block_size);
+ else
+ *(uintptr_t*)block = 0;
+ }
+
+ return block;
+}
+
+////////////
+///
+/// Span interface
+///
+//////
+
+static inline int
+span_is_thread_heap(span_t* span) {
+#if RPMALLOC_FIRST_CLASS_HEAPS
+ return (!span->heap->owner_thread || (span->heap->owner_thread == get_thread_id()));
+#else
+ return (span->heap->owner_thread == get_thread_id());
+#endif
+}
+
+static inline page_t*
+span_get_page_from_block(span_t* span, void* block) {
+ return (page_t*)((uintptr_t)block & span->page_address_mask);
+}
+
+//! Find or allocate a page from the given span
+static inline page_t*
+span_allocate_page(span_t* span) {
+ // Allocate path, initialize a new chunk of memory for a page in the given span
+ rpmalloc_assert(span->page_initialized < span->page_count, "Page initialization internal failure");
+ heap_t* heap = span->heap;
+ page_t* page = pointer_offset(span, span->page_size * span->page_initialized);
+
+#if ENABLE_DECOMMIT
+ // The first page is always committed on initial span map of memory
+ if (span->page_initialized)
+ global_memory_interface->memory_commit(page, span->page_size);
+#endif
+ ++span->page_initialized;
+
+ page->page_type = span->page_type;
+ page->is_zero = 1;
+ page->heap = heap;
+ rpmalloc_assert(page_is_thread_heap(page), "Page owner thread mismatch");
+
+ if (span->page_initialized == span->page_count) {
+ // Span fully utilized
+ rpmalloc_assert(span == heap->span_partial[span->page_type], "Span partial tracking out of sync");
+ heap->span_partial[span->page_type] = 0;
+
+ span->next = heap->span_used[span->page_type];
+ heap->span_used[span->page_type] = span;
+ }
+
+ return page;
+}
+
+static NOINLINE void
+span_deallocate_block(span_t* span, page_t* page, void* block) {
+ if (UNEXPECTED(page->page_type == PAGE_HUGE)) {
+ global_memory_interface->memory_unmap(span, span->offset, span->mapped_size);
+ return;
+ }
+
+ if (page->has_aligned_block) {
+ // Realign pointer to block start
+ block = page_block_realign(page, block);
+ }
+
+ int is_thread_local = page_is_thread_heap(page);
+ if (EXPECTED(is_thread_local != 0)) {
+ page_put_local_free_block(page, block);
+ } else {
+ // Multithreaded deallocation, push to deferred deallocation list.
+ page_put_thread_free_block(page, block);
+ }
+}
+
+////////////
+///
+/// Block interface
+///
+//////
+
+static inline span_t*
+block_get_span(block_t* block) {
+ return (span_t*)((uintptr_t)block & SPAN_MASK);
+}
+
+static inline void
+block_deallocate(block_t* block) {
+ span_t* span = (span_t*)((uintptr_t)block & SPAN_MASK);
+ page_t* page = span_get_page_from_block(span, block);
+ const int is_thread_local = page_is_thread_heap(page);
+
+ // Optimized path for thread local free with non-huge block in page
+ // that has no aligned blocks
+ if (EXPECTED(is_thread_local != 0)) {
+ if (EXPECTED(page->generic_free == 0)) {
+ // Page is not huge, not full and has no aligned block - fast path
+ block->next = page->local_free;
+ page->local_free = block;
+ ++page->local_free_count;
+ if (UNEXPECTED(--page->block_used == 0))
+ page_available_to_free(page);
+ } else {
+ span_deallocate_block(span, page, block);
+ }
+ } else {
+ span_deallocate_block(span, page, block);
+ }
+}
+
+static inline size_t
+block_usable_size(block_t* block) {
+ span_t* span = (span_t*)((uintptr_t)block & SPAN_MASK);
+ if (EXPECTED(span->page_type <= PAGE_LARGE)) {
+ page_t* page = span_get_page_from_block(span, block);
+ void* blocks_start = pointer_offset(page, PAGE_HEADER_SIZE);
+ return page->block_size - ((size_t)pointer_diff(block, blocks_start) % page->block_size);
+ } else {
+ return ((size_t)span->page_size * (size_t)span->page_count) - (size_t)pointer_diff(block, span);
+ }
+}
+
+////////////
+///
+/// Heap interface
+///
+//////
+
+static inline void
+heap_lock_acquire(void) {
+ uintptr_t lock = 0;
+ uintptr_t this_lock = get_thread_id();
+ while (!atomic_compare_exchange_strong(&global_heap_lock, &lock, this_lock)) {
+ lock = 0;
+ wait_spin();
+ }
+}
+
+static inline void
+heap_lock_release(void) {
+ rpmalloc_assert((uintptr_t)atomic_load_explicit(&global_heap_lock, memory_order_relaxed) == get_thread_id(),
+ "Bad heap lock");
+ atomic_store_explicit(&global_heap_lock, 0, memory_order_release);
+}
+
+static inline heap_t*
+heap_initialize(void* block) {
+ heap_t* heap = block;
+ memset_const(heap, 0, sizeof(heap_t));
+ heap->id = 1 + atomic_fetch_add_explicit(&global_heap_id, 1, memory_order_relaxed);
+ return heap;
+}
+
+static heap_t*
+heap_allocate_new(void) {
+ if (!global_config.page_size)
+ rpmalloc_initialize(0);
+ size_t heap_size = get_page_aligned_size(sizeof(heap_t));
+ size_t offset = 0;
+ size_t mapped_size = 0;
+ block_t* block = global_memory_interface->memory_map(heap_size, 0, &offset, &mapped_size);
+#if ENABLE_DECOMMIT
+ global_memory_interface->memory_commit(block, heap_size);
+#endif
+ heap_t* heap = heap_initialize((void*)block);
+ heap->offset = (uint32_t)offset;
+ heap->mapped_size = mapped_size;
+#if ENABLE_STATISTICS
+ atomic_fetch_add_explicit(&global_statistics.heap_count, 1, memory_order_relaxed);
+#endif
+ return heap;
+}
+
+static void
+heap_unmap(heap_t* heap) {
+ global_memory_interface->memory_unmap(heap, heap->offset, heap->mapped_size);
+}
+
+static heap_t*
+heap_allocate(int first_class) {
+ heap_t* heap = 0;
+ if (!first_class) {
+ heap_lock_acquire();
+ heap = global_heap_queue;
+ global_heap_queue = heap ? heap->next : 0;
+ heap_lock_release();
+ }
+ if (!heap)
+ heap = heap_allocate_new();
+ if (heap) {
+ uintptr_t current_thread_id = get_thread_id();
+ heap_lock_acquire();
+ heap->next = global_heap_used;
+ heap->prev = 0;
+ if (global_heap_used)
+ global_heap_used->prev = heap;
+ global_heap_used = heap;
+ heap_lock_release();
+ heap->owner_thread = current_thread_id;
+ }
+ return heap;
+}
+
+static inline void
+heap_release(heap_t* heap) {
+ heap_lock_acquire();
+ if (heap->prev)
+ heap->prev->next = heap->next;
+ if (heap->next)
+ heap->next->prev = heap->prev;
+ if (global_heap_used == heap)
+ global_heap_used = heap->next;
+ heap->next = global_heap_queue;
+ global_heap_queue = heap;
+ heap_lock_release();
+}
+
+static void
+heap_page_free_decommit(heap_t* heap, uint32_t page_type, uint32_t page_retain_count) {
+ page_t* page = heap->page_free[page_type];
+ while (page && page_retain_count) {
+ page = page->next;
+ --page_retain_count;
+ }
+ while (page && (page->is_decommitted == 0)) {
+ page_decommit_memory_pages(page);
+ --heap->page_free_commit_count[page_type];
+ page = page->next;
+ }
+}
+
+static inline void
+heap_make_free_page_available(heap_t* heap, uint32_t size_class, page_t* page) {
+ page->size_class = size_class;
+ page->block_size = global_size_class[size_class].block_size;
+ page->block_count = global_size_class[size_class].block_count;
+ page->block_used = 0;
+ page->block_initialized = 0;
+ page->local_free = 0;
+ page->local_free_count = 0;
+ page->is_full = 0;
+ page->is_free = 0;
+ page->has_aligned_block = 0;
+ page->generic_free = 0;
+ page->heap = heap;
+ page_t* head = heap->page_available[size_class];
+ page->next = head;
+ page->prev = 0;
+ atomic_store_explicit(&page->thread_free, 0, memory_order_relaxed);
+ if (head)
+ head->prev = page;
+ heap->page_available[size_class] = page;
+ if (page->is_decommitted)
+ page_commit_memory_pages(page);
+}
+
+//! Find or allocate a span for the given page type with the given size class
+static inline span_t*
+heap_get_span(heap_t* heap, page_type_t page_type) {
+ // Fast path, available span for given page type
+ if (EXPECTED(heap->span_partial[page_type] != 0))
+ return heap->span_partial[page_type];
+
+ // Fallback path, map more memory
+ size_t offset = 0;
+ size_t mapped_size = 0;
+ span_t* span = global_memory_interface->memory_map(SPAN_SIZE, SPAN_SIZE, &offset, &mapped_size);
+ if (EXPECTED(span != 0)) {
+ uint32_t page_count = 0;
+ uint32_t page_size = 0;
+ uintptr_t page_address_mask = 0;
+ if (page_type == PAGE_SMALL) {
+ page_count = SPAN_SIZE / SMALL_PAGE_SIZE;
+ page_size = SMALL_PAGE_SIZE;
+ page_address_mask = SMALL_PAGE_MASK;
+ } else if (page_type == PAGE_MEDIUM) {
+ page_count = SPAN_SIZE / MEDIUM_PAGE_SIZE;
+ page_size = MEDIUM_PAGE_SIZE;
+ page_address_mask = MEDIUM_PAGE_MASK;
+ } else {
+ page_count = SPAN_SIZE / LARGE_PAGE_SIZE;
+ page_size = LARGE_PAGE_SIZE;
+ page_address_mask = LARGE_PAGE_MASK;
+ }
+#if ENABLE_DECOMMIT
+ global_memory_interface->memory_commit(span, page_size);
+#endif
+ span->heap = heap;
+ span->page_type = page_type;
+ span->page_count = page_count;
+ span->page_size = page_size;
+ span->page_address_mask = page_address_mask;
+ span->offset = (uint32_t)offset;
+ span->mapped_size = mapped_size;
+
+ heap->span_partial[page_type] = span;
+ }
+
+ return span;
+}
+
+static page_t*
+heap_get_page(heap_t* heap, uint32_t size_class);
+
+static void
+block_deallocate(block_t* block);
+
+static page_t*
+heap_get_page_generic(heap_t* heap, uint32_t size_class) {
+ page_type_t page_type = get_page_type(size_class);
+
+ // Check if there is a free page from multithreaded deallocations
+ uintptr_t block_mt = atomic_load_explicit(&heap->thread_free[page_type], memory_order_relaxed);
+ if (UNEXPECTED(block_mt != 0)) {
+ while (!atomic_compare_exchange_weak_explicit(&heap->thread_free[page_type], &block_mt, 0, memory_order_relaxed,
+ memory_order_relaxed)) {
+ wait_spin();
+ }
+ block_t* block = (void*)block_mt;
+ while (block) {
+ block_t* next_block = block->next;
+ block_deallocate(block);
+ block = next_block;
+ }
+ // Retry after processing deferred thread frees
+ return heap_get_page(heap, size_class);
+ }
+
+ // Check if there is a free page
+ page_t* page = heap->page_free[page_type];
+ if (EXPECTED(page != 0)) {
+ heap->page_free[page_type] = page->next;
+ if (page->is_decommitted == 0) {
+ rpmalloc_assert(heap->page_free_commit_count[page_type] > 0, "Free committed page count out of sync");
+ --heap->page_free_commit_count[page_type];
+ }
+ heap_make_free_page_available(heap, size_class, page);
+ return page;
+ }
+ rpmalloc_assert(heap->page_free_commit_count[page_type] == 0, "Free committed page count out of sync");
+
+ if (heap->id == 0) {
+ // Thread has not yet initialized, assign heap and try again
+ rpmalloc_initialize(0);
+ return heap_get_page(get_thread_heap(), size_class);
+ }
+
+ // Fallback path, find or allocate span for given size class
+ // If thread was not initialized, the heap for the new span
+ // will be different from the local heap variable in this scope
+ // (which is the default heap) - so use span page heap instead
+ span_t* span = heap_get_span(heap, page_type);
+ if (EXPECTED(span != 0)) {
+ page = span_allocate_page(span);
+ heap_make_free_page_available(page->heap, size_class, page);
+ }
+
+ return page;
+}
+
+//! Find or allocate a page for the given size class
+static page_t*
+heap_get_page(heap_t* heap, uint32_t size_class) {
+ // Fast path, available page for given size class
+ page_t* page = heap->page_available[size_class];
+ if (EXPECTED(page != 0))
+ return page;
+ return heap_get_page_generic(heap, size_class);
+}
+
+//! Pop a block from the heap local free list
+static inline RPMALLOC_ALLOCATOR void*
+heap_pop_local_free(heap_t* heap, uint32_t size_class) {
+ block_t** free_list = heap->local_free + size_class;
+ block_t* block = *free_list;
+ if (EXPECTED(block != 0))
+ *free_list = block->next;
+ return block;
+}
+
+//! Generic allocation path from heap pages, spans or new mapping
+static NOINLINE RPMALLOC_ALLOCATOR void*
+heap_allocate_block_small_to_large(heap_t* heap, uint32_t size_class, unsigned int zero) {
+ page_t* page = heap_get_page(heap, size_class);
+ if (EXPECTED(page != 0))
+ return page_allocate_block(page, zero);
+ return 0;
+}
+
+//! Generic allocation path from heap pages, spans or new mapping
+static NOINLINE RPMALLOC_ALLOCATOR void*
+heap_allocate_block_huge(heap_t* heap, size_t size, unsigned int zero) {
+ (void)sizeof(heap);
+ size_t alloc_size = get_page_aligned_size(size + SPAN_HEADER_SIZE);
+ size_t offset = 0;
+ size_t mapped_size = 0;
+ void* block = global_memory_interface->memory_map(alloc_size, SPAN_SIZE, &offset, &mapped_size);
+ if (block) {
+ span_t* span = block;
+#if ENABLE_DECOMMIT
+ global_memory_interface->memory_commit(span, alloc_size);
+#endif
+ span->heap = heap;
+ span->page_type = PAGE_HUGE;
+ span->page_size = (uint32_t)global_config.page_size;
+ span->page_count = (uint32_t)(alloc_size / global_config.page_size);
+ span->page_address_mask = LARGE_PAGE_MASK;
+ span->offset = (uint32_t)offset;
+ span->mapped_size = mapped_size;
+ span->page.heap = heap;
+ span->page.is_full = 1;
+ span->page.generic_free = 1;
+ span->page.page_type = PAGE_HUGE;
+ // Keep track of span if first class heap
+ if (!heap->owner_thread) {
+ span->next = heap->span_used[PAGE_HUGE];
+ heap->span_used[PAGE_HUGE] = span;
+ }
+ void* ptr = pointer_offset(block, SPAN_HEADER_SIZE);
+ if (zero)
+ memset(ptr, 0, size);
+ return ptr;
+ }
+ return 0;
+}
+
+static RPMALLOC_ALLOCATOR NOINLINE void*
+heap_allocate_block_generic(heap_t* heap, size_t size, unsigned int zero) {
+ uint32_t size_class = get_size_class(size);
+ if (EXPECTED(size_class < SIZE_CLASS_COUNT)) {
+ block_t* block = heap_pop_local_free(heap, size_class);
+ if (EXPECTED(block != 0)) {
+ // Fast track with small block available in heap level local free list
+ if (zero)
+ memset(block, 0, global_size_class[size_class].block_size);
+ return block;
+ }
+
+ return heap_allocate_block_small_to_large(heap, size_class, zero);
+ }
+
+ return heap_allocate_block_huge(heap, size, zero);
+}
+
+//! Find or allocate a block of the given size
+static inline RPMALLOC_ALLOCATOR void*
+heap_allocate_block(heap_t* heap, size_t size, unsigned int zero) {
+ if (size <= (SMALL_GRANULARITY * 64)) {
+ uint32_t size_class = get_size_class_tiny(size);
+ block_t* block = heap_pop_local_free(heap, size_class);
+ if (EXPECTED(block != 0)) {
+ // Fast track with small block available in heap level local free list
+ if (zero)
+ memset(block, 0, global_size_class[size_class].block_size);
+ return block;
+ }
+ }
+ return heap_allocate_block_generic(heap, size, zero);
+}
+
+static RPMALLOC_ALLOCATOR void*
+heap_allocate_block_aligned(heap_t* heap, size_t alignment, size_t size, unsigned int zero) {
+ if (alignment <= SMALL_GRANULARITY)
+ return heap_allocate_block(heap, size, zero);
+
+#if ENABLE_VALIDATE_ARGS
+ if ((size + alignment) < size) {
+ errno = EINVAL;
+ return 0;
+ }
+ if (alignment & (alignment - 1)) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+ if (alignment >= RPMALLOC_MAX_ALIGNMENT) {
+ errno = EINVAL;
+ return 0;
+ }
+
+ size_t align_mask = alignment - 1;
+ block_t* block = heap_allocate_block(heap, size + alignment, zero);
+ if ((uintptr_t)block & align_mask) {
+ block = (void*)(((uintptr_t)block & ~(uintptr_t)align_mask) + alignment);
+ // Mark as having aligned blocks
+ span_t* span = block_get_span(block);
+ page_t* page = span_get_page_from_block(span, block);
+ page->has_aligned_block = 1;
+ page->generic_free = 1;
+ }
+ return block;
+}
+
+static void*
+heap_reallocate_block(heap_t* heap, void* block, size_t size, size_t old_size, unsigned int flags) {
+ if (block) {
+ // Grab the span using guaranteed span alignment
+ span_t* span = block_get_span(block);
+ if (EXPECTED(span->page_type <= PAGE_LARGE)) {
+ // Normal sized block
+ page_t* page = span_get_page_from_block(span, block);
+ void* blocks_start = pointer_offset(page, PAGE_HEADER_SIZE);
+ uint32_t block_offset = (uint32_t)pointer_diff(block, blocks_start);
+ uint32_t block_idx = block_offset / page->block_size;
+ void* block_origin = pointer_offset(blocks_start, (size_t)block_idx * page->block_size);
+ if (!old_size)
+ old_size = (size_t)((ptrdiff_t)page->block_size - pointer_diff(block, block_origin));
+ if ((size_t)page->block_size >= size) {
+ // Still fits in block, never mind trying to save memory, but preserve data if alignment changed
+ if ((block != block_origin) && !(flags & RPMALLOC_NO_PRESERVE))
+ memmove(block_origin, block, old_size);
+ return block_origin;
+ }
+ } else {
+ // Huge block
+ void* block_start = pointer_offset(span, SPAN_HEADER_SIZE);
+ if (!old_size)
+ old_size = ((size_t)span->page_size * (size_t)span->page_count) - SPAN_HEADER_SIZE;
+ if ((size < old_size) && (size > LARGE_BLOCK_SIZE_LIMIT)) {
+ // Still fits in block and still huge, never mind trying to save memory,
+ // but preserve data if alignment changed
+ if ((block_start != block) && !(flags & RPMALLOC_NO_PRESERVE))
+ memmove(block_start, block, old_size);
+ return block_start;
+ }
+ }
+ } else {
+ old_size = 0;
+ }
+
+ if (!!(flags & RPMALLOC_GROW_OR_FAIL))
+ return 0;
+
+ // Size is greater than block size or saves enough memory to resize, need to allocate a new block
+ // and deallocate the old. Avoid hysteresis by overallocating if increase is small (below 37%)
+ size_t lower_bound = old_size + (old_size >> 2) + (old_size >> 3);
+ size_t new_size = (size > lower_bound) ? size : ((size > old_size) ? lower_bound : size);
+ void* old_block = block;
+ block = heap_allocate_block(heap, new_size, 0);
+ if (block && old_block) {
+ if (!(flags & RPMALLOC_NO_PRESERVE))
+ memcpy(block, old_block, old_size < new_size ? old_size : new_size);
+ block_deallocate(old_block);
+ }
+
+ return block;
+}
+
+static void*
+heap_reallocate_block_aligned(heap_t* heap, void* block, size_t alignment, size_t size, size_t old_size,
+ unsigned int flags) {
+ if (alignment <= SMALL_GRANULARITY)
+ return heap_reallocate_block(heap, block, size, old_size, flags);
+
+ int no_alloc = !!(flags & RPMALLOC_GROW_OR_FAIL);
+ size_t usable_size = (block ? block_usable_size(block) : 0);
+ if ((usable_size >= size) && !((uintptr_t)block & (alignment - 1))) {
+ if (no_alloc || (size >= (usable_size / 2)))
+ return block;
+ }
+ // Aligned alloc marks span as having aligned blocks
+ void* old_block = block;
+ block = (!no_alloc ? heap_allocate_block_aligned(heap, alignment, size, 0) : 0);
+ if (EXPECTED(block != 0)) {
+ if (!(flags & RPMALLOC_NO_PRESERVE) && old_block) {
+ if (!old_size)
+ old_size = usable_size;
+ memcpy(block, old_block, old_size < size ? old_size : size);
+ }
+ if (EXPECTED(old_block != 0))
+ block_deallocate(old_block);
+ }
+ return block;
+}
+
+static void
+heap_free_all(heap_t* heap) {
+ for (int itype = 0; itype < 3; ++itype) {
+ span_t* span = heap->span_partial[itype];
+ while (span) {
+ span_t* span_next = span->next;
+ global_memory_interface->memory_unmap(span, span->offset, span->mapped_size);
+ span = span_next;
+ }
+ heap->span_partial[itype] = 0;
+ heap->page_free[itype] = 0;
+ heap->page_free_commit_count[itype] = 0;
+ atomic_store_explicit(&heap->thread_free[itype], 0, memory_order_relaxed);
+ }
+ for (int itype = 0; itype < 4; ++itype) {
+ span_t* span = heap->span_used[itype];
+ while (span) {
+ span_t* span_next = span->next;
+ global_memory_interface->memory_unmap(span, span->offset, span->mapped_size);
+ span = span_next;
+ }
+ heap->span_used[itype] = 0;
+ }
+ memset(heap->local_free, 0, sizeof(heap->local_free));
+ memset(heap->page_available, 0, sizeof(heap->page_available));
+
+#if ENABLE_STATISTICS
+ // TODO: Fix
+#endif
+}
+
+////////////
+///
+/// Extern interface
+///
+//////
+
+int
+rpmalloc_is_thread_initialized(void) {
+ return (get_thread_heap() != global_heap_default) ? 1 : 0;
+}
+
+extern inline RPMALLOC_ALLOCATOR void*
+rpmalloc(size_t size) {
+#if ENABLE_VALIDATE_ARGS
+ if (size >= MAX_ALLOC_SIZE) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+ heap_t* heap = get_thread_heap();
+ return heap_allocate_block(heap, size, 0);
+}
+
+extern inline RPMALLOC_ALLOCATOR void*
+rpzalloc(size_t size) {
+#if ENABLE_VALIDATE_ARGS
+ if (size >= MAX_ALLOC_SIZE) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+ heap_t* heap = get_thread_heap();
+ return heap_allocate_block(heap, size, 1);
+}
+
+extern inline void
+rpfree(void* ptr) {
+ if (UNEXPECTED(ptr == 0))
+ return;
+ block_deallocate(ptr);
+}
+
+extern inline RPMALLOC_ALLOCATOR void*
+rpcalloc(size_t num, size_t size) {
+ size_t total;
+#if ENABLE_VALIDATE_ARGS
+#if PLATFORM_WINDOWS
+ int err = SizeTMult(num, size, &total);
+ if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#else
+ int err = __builtin_umull_overflow(num, size, &total);
+ if (err || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+#else
+ total = num * size;
+#endif
+ heap_t* heap = get_thread_heap();
+ return heap_allocate_block(heap, total, 1);
+}
+
+extern inline RPMALLOC_ALLOCATOR void*
+rprealloc(void* ptr, size_t size) {
+#if ENABLE_VALIDATE_ARGS
+ if (size >= MAX_ALLOC_SIZE) {
+ errno = EINVAL;
+ return ptr;
+ }
+#endif
+ heap_t* heap = get_thread_heap();
+ return heap_reallocate_block(heap, ptr, size, 0, 0);
+}
+
+extern RPMALLOC_ALLOCATOR void*
+rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize, unsigned int flags) {
+#if ENABLE_VALIDATE_ARGS
+ if ((size + alignment < size) || (alignment > _memory_page_size)) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+ heap_t* heap = get_thread_heap();
+ return heap_reallocate_block_aligned(heap, ptr, alignment, size, oldsize, flags);
+}
+
+extern RPMALLOC_ALLOCATOR void*
+rpaligned_alloc(size_t alignment, size_t size) {
+ heap_t* heap = get_thread_heap();
+ return heap_allocate_block_aligned(heap, alignment, size, 0);
+}
+
+extern RPMALLOC_ALLOCATOR void*
+rpaligned_zalloc(size_t alignment, size_t size) {
+ heap_t* heap = get_thread_heap();
+ return heap_allocate_block_aligned(heap, alignment, size, 1);
+}
+
+extern inline RPMALLOC_ALLOCATOR void*
+rpaligned_calloc(size_t alignment, size_t num, size_t size) {
+ size_t total;
+#if ENABLE_VALIDATE_ARGS
+#if PLATFORM_WINDOWS
+ int err = SizeTMult(num, size, &total);
+ if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#else
+ int err = __builtin_umull_overflow(num, size, &total);
+ if (err || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+#else
+ total = num * size;
+#endif
+ heap_t* heap = get_thread_heap();
+ return heap_allocate_block_aligned(heap, alignment, total, 1);
+}
+
+extern inline RPMALLOC_ALLOCATOR void*
+rpmemalign(size_t alignment, size_t size) {
+ heap_t* heap = get_thread_heap();
+ return heap_allocate_block_aligned(heap, alignment, size, 0);
+}
+
+extern inline int
+rpposix_memalign(void** memptr, size_t alignment, size_t size) {
+ heap_t* heap = get_thread_heap();
+ if (memptr)
+ *memptr = heap_allocate_block_aligned(heap, alignment, size, 0);
+ else
+ return EINVAL;
+ return *memptr ? 0 : ENOMEM;
+}
+
+extern inline size_t
+rpmalloc_usable_size(void* ptr) {
+ return (ptr ? block_usable_size(ptr) : 0);
+}
+
+////////////
+///
+/// Initialization and finalization
+///
+//////
+
+static void
+rpmalloc_thread_destructor(void* value) {
+ // If this is called on main thread assume it means rpmalloc_finalize
+ // has not been called and shutdown is forced (through _exit) or unclean
+ if (get_thread_id() == global_main_thread_id)
+ return;
+ if (value)
+ rpmalloc_thread_finalize();
+}
+
+extern int
+rpmalloc_initialize_config(rpmalloc_interface_t* memory_interface, rpmalloc_config_t* config) {
+ if (global_rpmalloc_initialized) {
+ rpmalloc_thread_initialize();
+ if (config)
+ *config = global_config;
+ return 0;
+ }
+
+ if (config)
+ global_config = *config;
+
+ int result = rpmalloc_initialize(memory_interface);
+
+ if (config)
+ *config = global_config;
+
+ return result;
+}
+
+extern int
+rpmalloc_initialize(rpmalloc_interface_t* memory_interface) {
+ if (global_rpmalloc_initialized) {
+ rpmalloc_thread_initialize();
+ return 0;
+ }
+
+ global_rpmalloc_initialized = 1;
+
+ global_memory_interface = memory_interface ? memory_interface : &global_memory_interface_default;
+ if (!global_memory_interface->memory_map || !global_memory_interface->memory_unmap) {
+ global_memory_interface->memory_map = os_mmap;
+ global_memory_interface->memory_commit = os_mcommit;
+ global_memory_interface->memory_decommit = os_mdecommit;
+ global_memory_interface->memory_unmap = os_munmap;
+ }
+
+#if PLATFORM_WINDOWS
+ SYSTEM_INFO system_info;
+ memset(&system_info, 0, sizeof(system_info));
+ GetSystemInfo(&system_info);
+ os_map_granularity = system_info.dwAllocationGranularity;
+#else
+ os_map_granularity = (size_t)sysconf(_SC_PAGESIZE);
+#endif
+
+#if PLATFORM_WINDOWS
+ os_page_size = system_info.dwPageSize;
+#else
+ os_page_size = os_map_granularity;
+#endif
+ if (global_config.enable_huge_pages) {
+#if PLATFORM_WINDOWS
+ HANDLE token = 0;
+ size_t large_page_minimum = GetLargePageMinimum();
+ if (large_page_minimum)
+ OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token);
+ if (token) {
+ LUID luid;
+ if (LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &luid)) {
+ TOKEN_PRIVILEGES token_privileges;
+ memset(&token_privileges, 0, sizeof(token_privileges));
+ token_privileges.PrivilegeCount = 1;
+ token_privileges.Privileges[0].Luid = luid;
+ token_privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
+ if (AdjustTokenPrivileges(token, FALSE, &token_privileges, 0, 0, 0)) {
+ if (GetLastError() == ERROR_SUCCESS)
+ os_huge_pages = 1;
+ }
+ }
+ CloseHandle(token);
+ }
+ if (os_huge_pages) {
+ if (large_page_minimum > os_page_size)
+ os_page_size = large_page_minimum;
+ if (large_page_minimum > os_map_granularity)
+ os_map_granularity = large_page_minimum;
+ }
+#elif defined(__linux__)
+ size_t huge_page_size = 0;
+ FILE* meminfo = fopen("/proc/meminfo", "r");
+ if (meminfo) {
+ char line[128];
+ while (!huge_page_size && fgets(line, sizeof(line) - 1, meminfo)) {
+ line[sizeof(line) - 1] = 0;
+ if (strstr(line, "Hugepagesize:"))
+ huge_page_size = (size_t)strtol(line + 13, 0, 10) * 1024;
+ }
+ fclose(meminfo);
+ }
+ if (huge_page_size) {
+ os_huge_pages = 1;
+ os_page_size = huge_page_size;
+ os_map_granularity = huge_page_size;
+ }
+#elif defined(__FreeBSD__)
+ int rc;
+ size_t sz = sizeof(rc);
+
+ if (sysctlbyname("vm.pmap.pg_ps_enabled", &rc, &sz, NULL, 0) == 0 && rc == 1) {
+ os_huge_pages = 1;
+ os_page_size = 2 * 1024 * 1024;
+ os_map_granularity = os_page_size;
+ }
+#elif defined(__APPLE__) || defined(__NetBSD__)
+ os_huge_pages = 1;
+ os_page_size = 2 * 1024 * 1024;
+ os_map_granularity = os_page_size;
+#endif
+ } else {
+ os_huge_pages = 0;
+ }
+
+ global_config.enable_huge_pages = os_huge_pages;
+
+ if (!memory_interface || (global_config.page_size < os_page_size))
+ global_config.page_size = os_page_size;
+
+ if (global_config.enable_huge_pages || global_config.page_size > (256 * 1024))
+ global_config.disable_decommit = 1;
+
+#if defined(__linux__) || defined(__ANDROID__)
+ if (global_config.disable_thp)
+ (void)prctl(PR_SET_THP_DISABLE, 1, 0, 0, 0);
+#endif
+
+#ifdef _WIN32
+ fls_key = FlsAlloc(&rpmalloc_thread_destructor);
+#else
+ pthread_key_create(&pthread_key, rpmalloc_thread_destructor);
+#endif
+
+ global_main_thread_id = get_thread_id();
+
+ rpmalloc_thread_initialize();
+
+ return 0;
+}
+
+extern const rpmalloc_config_t*
+rpmalloc_config(void) {
+ return &global_config;
+}
+
+extern void
+rpmalloc_finalize(void) {
+ rpmalloc_thread_finalize();
+
+ if (global_config.unmap_on_finalize) {
+ heap_t* heap = global_heap_queue;
+ global_heap_queue = 0;
+ while (heap) {
+ heap_t* heap_next = heap->next;
+ heap_free_all(heap);
+ heap_unmap(heap);
+ heap = heap_next;
+ }
+ heap = global_heap_used;
+ global_heap_used = 0;
+ while (heap) {
+ heap_t* heap_next = heap->next;
+ heap_free_all(heap);
+ heap_unmap(heap);
+ heap = heap_next;
+ }
+#if ENABLE_STATISTICS
+ memset(&global_statistics, 0, sizeof(global_statistics));
+#endif
+ }
+
+#ifdef _WIN32
+ FlsFree(fls_key);
+ fls_key = 0;
+#else
+ pthread_key_delete(pthread_key);
+ pthread_key = 0;
+#endif
+
+ global_main_thread_id = 0;
+ global_rpmalloc_initialized = 0;
+}
+
+extern void
+rpmalloc_thread_initialize(void) {
+ if (get_thread_heap() == global_heap_default)
+ get_thread_heap_allocate();
+}
+
+extern void
+rpmalloc_thread_finalize(void) {
+ heap_t* heap = get_thread_heap();
+ if (heap != global_heap_default) {
+ heap_release(heap);
+ set_thread_heap(global_heap_default);
+ }
+}
+
+extern void
+rpmalloc_thread_collect(void) {
+}
+
+void
+rpmalloc_dump_statistics(void* file) {
+#if ENABLE_STATISTICS
+ fprintf(file, "Mapped pages: %llu\n",
+ (unsigned long long)atomic_load_explicit(&global_statistics.page_mapped, memory_order_relaxed));
+ fprintf(file, "Mapped pages (peak): %llu\n",
+ (unsigned long long)atomic_load_explicit(&global_statistics.page_mapped_peak, memory_order_relaxed));
+ fprintf(file, "Active pages: %llu\n",
+ (unsigned long long)atomic_load_explicit(&global_statistics.page_active, memory_order_relaxed));
+ fprintf(file, "Active pages (peak): %llu\n",
+ (unsigned long long)atomic_load_explicit(&global_statistics.page_active_peak, memory_order_relaxed));
+ fprintf(file, "Pages committed: %llu\n",
+ (unsigned long long)atomic_load_explicit(&global_statistics.page_commit, memory_order_relaxed));
+ fprintf(file, "Pages decommitted: %llu\n",
+ (unsigned long long)atomic_load_explicit(&global_statistics.page_decommit, memory_order_relaxed));
+ fprintf(file, "Heaps created: %llu\n",
+ (unsigned long long)atomic_load_explicit(&global_statistics.heap_count, memory_order_relaxed));
+#else
+ (void)sizeof(file);
+#endif
+}
+
+#if RPMALLOC_FIRST_CLASS_HEAPS
+
+rpmalloc_heap_t*
+rpmalloc_heap_acquire(void) {
+ // Must be a pristine heap from newly mapped memory pages, or else memory blocks
+ // could already be allocated from the heap which would (wrongly) be released when
+ // heap is cleared with rpmalloc_heap_free_all(). Also heaps guaranteed to be
+ // pristine from the dedicated orphan list can be used.
+ heap_t* heap = heap_allocate(1);
+ rpmalloc_assume(heap != 0);
+ heap->owner_thread = 0;
+ return heap;
+}
+
+void
+rpmalloc_heap_release(rpmalloc_heap_t* heap) {
+ if (heap)
+ heap_release(heap);
+}
+
+RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_alloc(rpmalloc_heap_t* heap, size_t size) {
+#if ENABLE_VALIDATE_ARGS
+ if (size >= MAX_ALLOC_SIZE) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+ return heap_allocate_block(heap, size, 0);
+}
+
+RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_aligned_alloc(rpmalloc_heap_t* heap, size_t alignment, size_t size) {
+#if ENABLE_VALIDATE_ARGS
+ if (size >= MAX_ALLOC_SIZE) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+ return heap_allocate_block_aligned(heap, alignment, size, 0);
+}
+
+RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_calloc(rpmalloc_heap_t* heap, size_t num, size_t size) {
+ size_t total;
+#if ENABLE_VALIDATE_ARGS
+#if PLATFORM_WINDOWS
+ int err = SizeTMult(num, size, &total);
+ if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#else
+ int err = __builtin_umull_overflow(num, size, &total);
+ if (err || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+#else
+ total = num * size;
+#endif
+ return heap_allocate_block(heap, total, 1);
+}
+
+extern inline RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_aligned_calloc(rpmalloc_heap_t* heap, size_t alignment, size_t num, size_t size) {
+ size_t total;
+#if ENABLE_VALIDATE_ARGS
+#if PLATFORM_WINDOWS
+ int err = SizeTMult(num, size, &total);
+ if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#else
+ int err = __builtin_umull_overflow(num, size, &total);
+ if (err || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+#else
+ total = num * size;
+#endif
+ return heap_allocate_block_aligned(heap, alignment, total, 1);
+}
+
+RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_realloc(rpmalloc_heap_t* heap, void* ptr, size_t size, unsigned int flags) {
+#if ENABLE_VALIDATE_ARGS
+ if (size >= MAX_ALLOC_SIZE) {
+ errno = EINVAL;
+ return ptr;
+ }
+#endif
+ return heap_reallocate_block(heap, ptr, size, 0, flags);
+}
+
+RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_aligned_realloc(rpmalloc_heap_t* heap, void* ptr, size_t alignment, size_t size, unsigned int flags) {
+#if ENABLE_VALIDATE_ARGS
+ if ((size + alignment < size) || (alignment > _memory_page_size)) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+ return heap_reallocate_block_aligned(heap, ptr, alignment, size, 0, flags);
+}
+
+void
+rpmalloc_heap_free(rpmalloc_heap_t* heap, void* ptr) {
+ (void)sizeof(heap);
+ block_deallocate(ptr);
+}
+
+//! Free all memory allocated by the heap
+void
+rpmalloc_heap_free_all(rpmalloc_heap_t* heap) {
+ heap_free_all(heap);
+}
+
+extern inline void
+rpmalloc_heap_thread_set_current(rpmalloc_heap_t* heap) {
+ heap_t* prev_heap = get_thread_heap();
+ if (prev_heap != heap) {
+ set_thread_heap(heap);
+ if (prev_heap)
+ heap_release(prev_heap);
+ }
+}
+
+rpmalloc_heap_t*
+rpmalloc_get_heap_for_ptr(void* ptr) {
+ // Grab the span, and then the heap from the span
+ span_t* span = (span_t*)((uintptr_t)ptr & SPAN_MASK);
+ if (span)
+ return span_get_page_from_block(span, ptr)->heap;
+ return 0;
+}
+
+#endif
+
+#include "malloc.c"
diff --git a/thirdparty/rpmalloc/rpmalloc.h b/thirdparty/rpmalloc/rpmalloc.h
new file mode 100644
index 000000000..2e67280f9
--- /dev/null
+++ b/thirdparty/rpmalloc/rpmalloc.h
@@ -0,0 +1,396 @@
+/* rpmalloc.h - Memory allocator - Public Domain - 2016-2024 Mattias Jansson
+ *
+ * This library provides a cross-platform lock free thread caching malloc
+ * implementation in C11. The latest source code is always available at
+ *
+ * https://github.com/mjansson/rpmalloc
+ *
+ * This library is put in the public domain; you can redistribute it and/or
+ * modify it without any restrictions.
+ *
+ */
+
+#pragma once
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RPMALLOC_CACHE_LINE_SIZE 64
+#if defined(__clang__) || defined(__GNUC__)
+#define RPMALLOC_EXPORT __attribute__((visibility("default")))
+#define RPMALLOC_RESTRICT __restrict
+#define RPMALLOC_ALLOCATOR
+#define RPMALLOC_CACHE_ALIGNED __attribute__((aligned(RPMALLOC_CACHE_LINE_SIZE)))
+#if (defined(__clang_major__) && (__clang_major__ < 4)) || (!defined(__clang_major__) && defined(__GNUC__))
+#define RPMALLOC_ATTRIB_MALLOC
+#define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
+#define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
+#else
+#define RPMALLOC_ATTRIB_MALLOC __attribute__((__malloc__))
+#define RPMALLOC_ATTRIB_ALLOC_SIZE(size) __attribute__((alloc_size(size)))
+#define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size) __attribute__((alloc_size(count, size)))
+#endif
+#define RPMALLOC_CDECL
+#elif defined(_MSC_VER)
+#define RPMALLOC_EXPORT
+#define RPMALLOC_RESTRICT __declspec(restrict)
+#define RPMALLOC_ALLOCATOR __declspec(allocator) __declspec(restrict)
+#define RPMALLOC_CACHE_ALIGNED __declspec(align(RPMALLOC_CACHE_LINE_SIZE))
+#define RPMALLOC_ATTRIB_MALLOC
+#define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
+#define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
+#define RPMALLOC_CDECL __cdecl
+#else
+#define RPMALLOC_EXPORT
+#define RPMALLOC_ALLOCATOR
+#define RPMALLOC_ATTRIB_MALLOC
+#define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
+#define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
+#define RPMALLOC_CDECL
+#endif
+
+#define RPMALLOC_MAX_ALIGNMENT (256 * 1024)
+
+//! Define RPMALLOC_FIRST_CLASS_HEAPS to enable heap based API (rpmalloc_heap_* functions).
+#ifndef RPMALLOC_FIRST_CLASS_HEAPS
+#define RPMALLOC_FIRST_CLASS_HEAPS 0
+#endif
+
+//! Flag to rpaligned_realloc to not preserve content in reallocation
+#define RPMALLOC_NO_PRESERVE 1
+//! Flag to rpaligned_realloc to fail and return null pointer if grow cannot be done in-place,
+// in which case the original pointer is still valid (just like a call to realloc which failes to allocate
+// a new block).
+#define RPMALLOC_GROW_OR_FAIL 2
+
+typedef struct rpmalloc_global_statistics_t {
+ //! Current amount of virtual memory mapped, all of which might not have been committed (only if
+ //! ENABLE_STATISTICS=1)
+ size_t mapped;
+ //! Peak amount of virtual memory mapped, all of which might not have been committed (only if ENABLE_STATISTICS=1)
+ size_t mapped_peak;
+ //! Current amount of memory in global caches for small and medium sizes (<32KiB)
+ size_t cached;
+ //! Current amount of memory allocated in huge allocations, i.e larger than LARGE_SIZE_LIMIT which is 2MiB by
+ //! default (only if ENABLE_STATISTICS=1)
+ size_t huge_alloc;
+ //! Peak amount of memory allocated in huge allocations, i.e larger than LARGE_SIZE_LIMIT which is 2MiB by default
+ //! (only if ENABLE_STATISTICS=1)
+ size_t huge_alloc_peak;
+ //! Total amount of memory mapped since initialization (only if ENABLE_STATISTICS=1)
+ size_t mapped_total;
+ //! Total amount of memory unmapped since initialization (only if ENABLE_STATISTICS=1)
+ size_t unmapped_total;
+} rpmalloc_global_statistics_t;
+
+typedef struct rpmalloc_thread_statistics_t {
+ //! Current number of bytes available in thread size class caches for small and medium sizes (<32KiB)
+ size_t sizecache;
+ //! Current number of bytes available in thread span caches for small and medium sizes (<32KiB)
+ size_t spancache;
+ //! Total number of bytes transitioned from thread cache to global cache (only if ENABLE_STATISTICS=1)
+ size_t thread_to_global;
+ //! Total number of bytes transitioned from global cache to thread cache (only if ENABLE_STATISTICS=1)
+ size_t global_to_thread;
+ //! Per span count statistics (only if ENABLE_STATISTICS=1)
+ struct {
+ //! Currently used number of spans
+ size_t current;
+ //! High water mark of spans used
+ size_t peak;
+ //! Number of spans transitioned to global cache
+ size_t to_global;
+ //! Number of spans transitioned from global cache
+ size_t from_global;
+ //! Number of spans transitioned to thread cache
+ size_t to_cache;
+ //! Number of spans transitioned from thread cache
+ size_t from_cache;
+ //! Number of spans transitioned to reserved state
+ size_t to_reserved;
+ //! Number of spans transitioned from reserved state
+ size_t from_reserved;
+ //! Number of raw memory map calls (not hitting the reserve spans but resulting in actual OS mmap calls)
+ size_t map_calls;
+ } span_use[64];
+ //! Per size class statistics (only if ENABLE_STATISTICS=1)
+ struct {
+ //! Current number of allocations
+ size_t alloc_current;
+ //! Peak number of allocations
+ size_t alloc_peak;
+ //! Total number of allocations
+ size_t alloc_total;
+ //! Total number of frees
+ size_t free_total;
+ //! Number of spans transitioned to cache
+ size_t spans_to_cache;
+ //! Number of spans transitioned from cache
+ size_t spans_from_cache;
+ //! Number of spans transitioned from reserved state
+ size_t spans_from_reserved;
+ //! Number of raw memory map calls (not hitting the reserve spans but resulting in actual OS mmap calls)
+ size_t map_calls;
+ } size_use[128];
+} rpmalloc_thread_statistics_t;
+
+typedef struct rpmalloc_interface_t {
+ //! Map memory pages for the given number of bytes. The returned address MUST be aligned to the given alignment,
+ //! which will always be either 0 or the span size. The function can store an alignment offset in the offset
+ //! variable in case it performs alignment and the returned pointer is offset from the actual start of the memory
+ //! region due to this alignment. This alignment offset will be passed to the memory unmap function. The mapped size
+ //! can be stored in the mapped_size variable, which will also be passed to the memory unmap function as the release
+ //! parameter once the entire mapped region is ready to be released. If you set a memory_map function, you must also
+ //! set a memory_unmap function or else the default implementation will be used for both. This function must be
+ //! thread safe, it can be called by multiple threads simultaneously.
+ void* (*memory_map)(size_t size, size_t alignment, size_t* offset, size_t* mapped_size);
+ //! Commit a range of memory pages
+ void (*memory_commit)(void* address, size_t size);
+ //! Decommit a range of memory pages
+ void (*memory_decommit)(void* address, size_t size);
+ //! Unmap the memory pages starting at address and spanning the given number of bytes. If you set a memory_unmap
+ //! function, you must also set a memory_map function or else the default implementation will be used for both. This
+ //! function must be thread safe, it can be called by multiple threads simultaneously.
+ void (*memory_unmap)(void* address, size_t offset, size_t mapped_size);
+ //! Called when a call to map memory pages fails (out of memory). If this callback is not set or returns zero the
+ //! library will return a null pointer in the allocation call. If this callback returns non-zero the map call will
+ //! be retried. The argument passed is the number of bytes that was requested in the map call. Only used if the
+ //! default system memory map function is used (memory_map callback is not set).
+ int (*map_fail_callback)(size_t size);
+ //! Called when an assert fails, if asserts are enabled. Will use the standard assert() if this is not set.
+ void (*error_callback)(const char* message);
+} rpmalloc_interface_t;
+
+typedef struct rpmalloc_config_t {
+ //! Size of memory pages. The page size MUST be a power of two. All memory mapping
+ // requests to memory_map will be made with size set to a multiple of the page size.
+ // Set to 0 to use the OS default page size.
+ size_t page_size;
+ //! Enable use of large/huge pages. If this flag is set to non-zero and page size is
+ // zero, the allocator will try to enable huge pages and auto detect the configuration.
+ // If this is set to non-zero and page_size is also non-zero, the allocator will
+ // assume huge pages have been configured and enabled prior to initializing the
+ // allocator.
+ // For Windows, see https://docs.microsoft.com/en-us/windows/desktop/memory/large-page-support
+ // For Linux, see https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt
+ int enable_huge_pages;
+ //! Disable decommitting unused pages when allocator determines the memory pressure
+ // is low and there is enough active pages cached. If set to 1, keep all pages committed.
+ int disable_decommit;
+ //! Allocated pages names for systems supporting it to be able to distinguish among anonymous regions.
+ const char* page_name;
+ //! Allocated huge pages names for systems supporting it to be able to distinguish among anonymous regions.
+ const char* huge_page_name;
+ //! Unmap all memory on finalize if set to 1. Normally you can let the OS unmap all pages
+ // when process exits, but if using rpmalloc in a dynamic library you might want to unmap
+ // all pages when the dynamic library unloads to avoid process memory leaks and bloat.
+ int unmap_on_finalize;
+#if defined(__linux__) || defined(__ANDROID__)
+ ///! Allows to disable the Transparent Huge Page feature on Linux on a process basis,
+ /// rather than enabling/disabling system-wise (done via /sys/kernel/mm/transparent_hugepage/enabled).
+ /// It can possibly improve performance and reduced allocation overhead in some contexts, albeit
+ /// THP is usually enabled by default.
+ int disable_thp;
+#endif
+} rpmalloc_config_t;
+
+//! Initialize allocator
+RPMALLOC_EXPORT int
+rpmalloc_initialize(rpmalloc_interface_t* memory_interface);
+
+//! Initialize allocator
+RPMALLOC_EXPORT int
+rpmalloc_initialize_config(rpmalloc_interface_t* memory_interface, rpmalloc_config_t* config);
+
+//! Get allocator configuration
+RPMALLOC_EXPORT const rpmalloc_config_t*
+rpmalloc_config(void);
+
+//! Finalize allocator
+RPMALLOC_EXPORT void
+rpmalloc_finalize(void);
+
+//! Initialize allocator for calling thread
+RPMALLOC_EXPORT void
+rpmalloc_thread_initialize(void);
+
+//! Finalize allocator for calling thread
+RPMALLOC_EXPORT void
+rpmalloc_thread_finalize(void);
+
+//! Perform deferred deallocations pending for the calling thread heap
+RPMALLOC_EXPORT void
+rpmalloc_thread_collect(void);
+
+//! Query if allocator is initialized for calling thread
+RPMALLOC_EXPORT int
+rpmalloc_is_thread_initialized(void);
+
+//! Get per-thread statistics
+RPMALLOC_EXPORT void
+rpmalloc_thread_statistics(rpmalloc_thread_statistics_t* stats);
+
+//! Get global statistics
+RPMALLOC_EXPORT void
+rpmalloc_global_statistics(rpmalloc_global_statistics_t* stats);
+
+//! Dump all statistics in human readable format to file (should be a FILE*)
+RPMALLOC_EXPORT void
+rpmalloc_dump_statistics(void* file);
+
+//! Allocate a memory block of at least the given size
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpmalloc(size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(1);
+
+//! Allocate a zero initialized memory block of at least the given size
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpzalloc(size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(1);
+
+//! Allocate a memory block of at least the given size and zero initialize it
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpcalloc(size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE2(1, 2);
+
+//! Reallocate the given block to at least the given size
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rprealloc(void* ptr, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
+
+//! Reallocate the given block to at least the given size and alignment,
+// with optional control flags (see RPMALLOC_NO_PRESERVE).
+// Alignment must be a power of two and a multiple of sizeof(void*),
+// and should ideally be less than memory page size. A caveat of rpmalloc
+// internals is that this must also be strictly less than the span size (default 64KiB)
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize, unsigned int flags) RPMALLOC_ATTRIB_MALLOC
+ RPMALLOC_ATTRIB_ALLOC_SIZE(3);
+
+//! Allocate a memory block of at least the given size and alignment.
+// Alignment must be a power of two and a multiple of sizeof(void*),
+// and should ideally be less than memory page size. A caveat of rpmalloc
+// internals is that this must also be strictly less than the span size (default 64KiB)
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpaligned_alloc(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
+
+//! Allocate a memory block of at least the given size and alignment.
+// Alignment must be a power of two and a multiple of sizeof(void*),
+// and should ideally be less than memory page size. A caveat of rpmalloc
+// internals is that this must also be strictly less than the span size (default 64KiB)
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpaligned_zalloc(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
+
+//! Allocate a memory block of at least the given size and alignment, and zero initialize it.
+// Alignment must be a power of two and a multiple of sizeof(void*),
+// and should ideally be less than memory page size. A caveat of rpmalloc
+// internals is that this must also be strictly less than the span size (default 64KiB)
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpaligned_calloc(size_t alignment, size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3);
+
+//! Allocate a memory block of at least the given size and alignment.
+// Alignment must be a power of two and a multiple of sizeof(void*),
+// and should ideally be less than memory page size. A caveat of rpmalloc
+// internals is that this must also be strictly less than the span size (default 64KiB)
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpmemalign(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
+
+//! Allocate a memory block of at least the given size and alignment.
+// Alignment must be a power of two and a multiple of sizeof(void*),
+// and should ideally be less than memory page size. A caveat of rpmalloc
+// internals is that this must also be strictly less than the span size (default 64KiB)
+RPMALLOC_EXPORT int
+rpposix_memalign(void** memptr, size_t alignment, size_t size);
+
+//! Free the given memory block
+RPMALLOC_EXPORT void
+rpfree(void* ptr);
+
+//! Query the usable size of the given memory block (from given pointer to the end of block)
+RPMALLOC_EXPORT size_t
+rpmalloc_usable_size(void* ptr);
+
+//! Dummy empty function for forcing linker symbol inclusion
+RPMALLOC_EXPORT void
+rpmalloc_linker_reference(void);
+
+#if RPMALLOC_FIRST_CLASS_HEAPS
+
+//! Heap type
+typedef struct heap_t rpmalloc_heap_t;
+
+//! Acquire a new heap. Will reuse existing released heaps or allocate memory for a new heap
+// if none available. Heap API is implemented with the strict assumption that only one single
+// thread will call heap functions for a given heap at any given time, no functions are thread safe.
+RPMALLOC_EXPORT rpmalloc_heap_t*
+rpmalloc_heap_acquire(void);
+
+//! Release a heap (does NOT free the memory allocated by the heap, use rpmalloc_heap_free_all before destroying the
+//! heap).
+// Releasing a heap will enable it to be reused by other threads. Safe to pass a null pointer.
+RPMALLOC_EXPORT void
+rpmalloc_heap_release(rpmalloc_heap_t* heap);
+
+//! Allocate a memory block of at least the given size using the given heap.
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_alloc(rpmalloc_heap_t* heap, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
+
+//! Allocate a memory block of at least the given size using the given heap. The returned
+// block will have the requested alignment. Alignment must be a power of two and a multiple of sizeof(void*),
+// and should ideally be less than memory page size. A caveat of rpmalloc
+// internals is that this must also be strictly less than the span size (default 64KiB).
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_aligned_alloc(rpmalloc_heap_t* heap, size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC
+ RPMALLOC_ATTRIB_ALLOC_SIZE(3);
+
+//! Allocate a memory block of at least the given size using the given heap and zero initialize it.
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_calloc(rpmalloc_heap_t* heap, size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC
+ RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3);
+
+//! Allocate a memory block of at least the given size using the given heap and zero initialize it. The returned
+// block will have the requested alignment. Alignment must either be zero, or a power of two and a multiple of
+// sizeof(void*), and should ideally be less than memory page size. A caveat of rpmalloc internals is that this must
+// also be strictly less than the span size (default 64KiB).
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_aligned_calloc(rpmalloc_heap_t* heap, size_t alignment, size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC
+ RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3);
+
+//! Reallocate the given block to at least the given size. The memory block MUST be allocated
+// by the same heap given to this function.
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_realloc(rpmalloc_heap_t* heap, void* ptr, size_t size, unsigned int flags) RPMALLOC_ATTRIB_MALLOC
+ RPMALLOC_ATTRIB_ALLOC_SIZE(3);
+
+//! Reallocate the given block to at least the given size. The memory block MUST be allocated
+// by the same heap given to this function. The returned block will have the requested alignment.
+// Alignment must be either zero, or a power of two and a multiple of sizeof(void*), and should ideally be
+// less than memory page size. A caveat of rpmalloc internals is that this must also be strictly less than
+// the span size (default 64KiB).
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_aligned_realloc(rpmalloc_heap_t* heap, void* ptr, size_t alignment, size_t size,
+ unsigned int flags) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(4);
+
+//! Free the given memory block from the given heap. The memory block MUST be allocated
+// by the same heap given to this function.
+RPMALLOC_EXPORT void
+rpmalloc_heap_free(rpmalloc_heap_t* heap, void* ptr);
+
+//! Free all memory allocated by the heap
+RPMALLOC_EXPORT void
+rpmalloc_heap_free_all(rpmalloc_heap_t* heap);
+
+//! Set the given heap as the current heap for the calling thread. A heap MUST only be current heap
+// for a single thread, a heap can never be shared between multiple threads. The previous
+// current heap for the calling thread is released to be reused by other threads.
+RPMALLOC_EXPORT void
+rpmalloc_heap_thread_set_current(rpmalloc_heap_t* heap);
+
+//! Returns which heap the given pointer is allocated on
+RPMALLOC_EXPORT rpmalloc_heap_t*
+rpmalloc_get_heap_for_ptr(void* ptr);
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/thirdparty/rpmalloc/rpnew.h b/thirdparty/rpmalloc/rpnew.h
new file mode 100644
index 000000000..75d381a3f
--- /dev/null
+++ b/thirdparty/rpmalloc/rpnew.h
@@ -0,0 +1,111 @@
+
+#ifdef __cplusplus
+
+#include <new>
+#include <rpmalloc.h>
+
+#ifndef __CRTDECL
+#define __CRTDECL
+#endif
+
+extern void __CRTDECL
+operator delete(void* p) noexcept {
+ rpfree(p);
+}
+
+extern void __CRTDECL
+operator delete[](void* p) noexcept {
+ rpfree(p);
+}
+
+extern void* __CRTDECL
+operator new(std::size_t size) noexcept(false) {
+ return rpmalloc(size);
+}
+
+extern void* __CRTDECL
+operator new[](std::size_t size) noexcept(false) {
+ return rpmalloc(size);
+}
+
+extern void* __CRTDECL
+operator new(std::size_t size, const std::nothrow_t& tag) noexcept {
+ (void)sizeof(tag);
+ return rpmalloc(size);
+}
+
+extern void* __CRTDECL
+operator new[](std::size_t size, const std::nothrow_t& tag) noexcept {
+ (void)sizeof(tag);
+ return rpmalloc(size);
+}
+
+#if (__cplusplus >= 201402L || _MSC_VER >= 1916)
+
+extern void __CRTDECL
+operator delete(void* p, std::size_t size) noexcept {
+ (void)sizeof(size);
+ rpfree(p);
+}
+
+extern void __CRTDECL
+operator delete[](void* p, std::size_t size) noexcept {
+ (void)sizeof(size);
+ rpfree(p);
+}
+
+#endif
+
+#if (__cplusplus > 201402L || defined(__cpp_aligned_new))
+
+extern void __CRTDECL
+operator delete(void* p, std::align_val_t align) noexcept {
+ (void)sizeof(align);
+ rpfree(p);
+}
+
+extern void __CRTDECL
+operator delete[](void* p, std::align_val_t align) noexcept {
+ (void)sizeof(align);
+ rpfree(p);
+}
+
+extern void __CRTDECL
+operator delete(void* p, std::size_t size, std::align_val_t align) noexcept {
+ (void)sizeof(size);
+ (void)sizeof(align);
+ rpfree(p);
+}
+
+extern void __CRTDECL
+operator delete[](void* p, std::size_t size, std::align_val_t align) noexcept {
+ (void)sizeof(size);
+ (void)sizeof(align);
+ rpfree(p);
+}
+
+extern void* __CRTDECL
+operator new(std::size_t size, std::align_val_t align) noexcept(false) {
+ return rpaligned_alloc(static_cast<size_t>(align), size);
+}
+
+extern void* __CRTDECL
+operator new[](std::size_t size, std::align_val_t align) noexcept(false) {
+ return rpaligned_alloc(static_cast<size_t>(align), size);
+}
+
+extern void* __CRTDECL
+operator new(std::size_t size, std::align_val_t align, const std::nothrow_t& tag) noexcept {
+ (void)sizeof(tag);
+ return rpaligned_alloc(static_cast<size_t>(align), size);
+}
+
+extern void* __CRTDECL
+operator new[](std::size_t size, std::align_val_t align, const std::nothrow_t& tag) noexcept {
+ (void)sizeof(tag);
+ return rpaligned_alloc(static_cast<size_t>(align), size);
+}
+
+#endif
+
+#endif
diff --git a/xmake.lua b/xmake.lua
index a8256dbd5..b1e0f809d 100644
--- a/xmake.lua
+++ b/xmake.lua
@@ -136,6 +136,13 @@ option("zenmimalloc")
option_end()
add_define_by_config("ZEN_USE_MIMALLOC", "zenmimalloc")
+option("zenrpmalloc")
+ set_default(is_os("windows"))
+ set_showmenu(true)
+ set_description("Use rpmalloc for faster memory management")
+option_end()
+add_define_by_config("ZEN_USE_RPMALLOC", "zenrpmalloc")
+
if is_os("windows") then
option("httpsys")
set_default(true)
@@ -147,6 +154,18 @@ else
add_defines("ZEN_WITH_HTTPSYS=0")
end
+if is_os("windows") then
+ add_defines("UE_MEMORY_TRACE_AVAILABLE=1")
+ option("zenmemtrack")
+ set_default(true)
+ set_showmenu(true)
+ set_description("Enable UE's Memory Trace support")
+ option_end()
+ add_define_by_config("ZEN_WITH_MEMTRACK", "zenmemtrack")
+else
+ add_defines("ZEN_WITH_MEMTRACK=0")
+end
+
option("zentrace")
set_default(true)
set_showmenu(true)