aboutsummaryrefslogtreecommitdiff
path: root/thirdparty/rpmalloc/rpmalloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/rpmalloc/rpmalloc.c')
-rw-r--r--thirdparty/rpmalloc/rpmalloc.c2341
1 files changed, 2341 insertions, 0 deletions
diff --git a/thirdparty/rpmalloc/rpmalloc.c b/thirdparty/rpmalloc/rpmalloc.c
new file mode 100644
index 000000000..7aecfb0f4
--- /dev/null
+++ b/thirdparty/rpmalloc/rpmalloc.c
@@ -0,0 +1,2341 @@
+/* rpmalloc.c - Memory allocator - Public Domain - 2016-2020 Mattias
+ * Jansson
+ *
+ * This library provides a cross-platform lock free thread caching malloc
+ * implementation in C11. The latest source code is always available at
+ *
+ * https://github.com/mjansson/rpmalloc
+ *
+ * This library is put in the public domain; you can redistribute it and/or
+ * modify it without any restrictions.
+ *
+ */
+
+#include "rpmalloc.h"
+
+#include <errno.h>
+#include <string.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdatomic.h>
+
+#if defined(__clang__)
+#pragma clang diagnostic ignored "-Wunused-macros"
+#pragma clang diagnostic ignored "-Wunused-function"
+#if __has_warning("-Wreserved-identifier")
+#pragma clang diagnostic ignored "-Wreserved-identifier"
+#endif
+#if __has_warning("-Wstatic-in-inline")
+#pragma clang diagnostic ignored "-Wstatic-in-inline"
+#endif
+#if __has_warning("-Wunsafe-buffer-usage")
+#pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
+#endif
+#elif defined(__GNUC__)
+#pragma GCC diagnostic ignored "-Wunused-macros"
+#pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
+#if defined(_WIN32) || defined(__WIN32__) || defined(_WIN64)
+#define PLATFORM_WINDOWS 1
+#define PLATFORM_POSIX 0
+#else
+#define PLATFORM_WINDOWS 0
+#define PLATFORM_POSIX 1
+#endif
+
+#if defined(_MSC_VER)
+#define NOINLINE __declspec(noinline)
+#else
+#define NOINLINE __attribute__((noinline))
+#endif
+
+#if PLATFORM_WINDOWS
+#include <windows.h>
+#include <fibersapi.h>
+static DWORD fls_key;
+#endif
+#if PLATFORM_POSIX
+#include <sys/mman.h>
+#include <sched.h>
+#include <unistd.h>
+#include <pthread.h>
+static pthread_key_t pthread_key;
+#ifdef __FreeBSD__
+#include <sys/sysctl.h>
+#define MAP_HUGETLB MAP_ALIGNED_SUPER
+#ifndef PROT_MAX
+#define PROT_MAX(f) 0
+#endif
+#else
+#define PROT_MAX(f) 0
+#endif
+#ifdef __sun
+extern int
+madvise(caddr_t, size_t, int);
+#endif
+#ifndef MAP_UNINITIALIZED
+#define MAP_UNINITIALIZED 0
+#endif
+#endif
+
+#if defined(__linux__) || defined(__ANDROID__)
+#include <sys/prctl.h>
+#if !defined(PR_SET_VMA)
+#define PR_SET_VMA 0x53564d41
+#define PR_SET_VMA_ANON_NAME 0
+#endif
+#endif
+#if defined(__APPLE__)
+#include <TargetConditionals.h>
+#if !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
+#include <mach/mach_vm.h>
+#include <mach/vm_statistics.h>
+#endif
+#include <pthread.h>
+#endif
+#if defined(__HAIKU__) || defined(__TINYC__)
+#include <pthread.h>
+#endif
+
+#include <limits.h>
+#if (INTPTR_MAX > INT32_MAX)
+#define ARCH_64BIT 1
+#define ARCH_32BIT 0
+#else
+#define ARCH_64BIT 0
+#define ARCH_32BIT 1
+#endif
+
+#if !defined(__has_builtin)
+#define __has_builtin(b) 0
+#endif
+
+#define pointer_offset(ptr, ofs) (void*)((char*)(ptr) + (ptrdiff_t)(ofs))
+#define pointer_diff(first, second) (ptrdiff_t)((const char*)(first) - (const char*)(second))
+
+////////////
+///
+/// Build time configurable limits
+///
+//////
+
+#ifndef ENABLE_VALIDATE_ARGS
+//! Enable validation of args to public entry points
+#define ENABLE_VALIDATE_ARGS 0
+#endif
+#ifndef ENABLE_ASSERTS
+//! Enable asserts
+#define ENABLE_ASSERTS 0
+#endif
+#ifndef ENABLE_UNMAP
+//! Enable unmapping memory pages
+#define ENABLE_UNMAP 1
+#endif
+#ifndef ENABLE_DECOMMIT
+//! Enable decommitting memory pages
+#define ENABLE_DECOMMIT 1
+#endif
+#ifndef ENABLE_DYNAMIC_LINK
+//! Enable building as dynamic library
+#define ENABLE_DYNAMIC_LINK 0
+#endif
+#ifndef ENABLE_OVERRIDE
+//! Enable standard library malloc/free/new/delete overrides
+#define ENABLE_OVERRIDE 1
+#endif
+#ifndef ENABLE_STATISTICS
+//! Enable statistics
+#define ENABLE_STATISTICS 0
+#endif
+
+////////////
+///
+/// Built in size configurations
+///
+//////
+
+#define PAGE_HEADER_SIZE 128
+#define SPAN_HEADER_SIZE PAGE_HEADER_SIZE
+
+#define SMALL_GRANULARITY 16
+
+#define SMALL_BLOCK_SIZE_LIMIT (4 * 1024)
+#define MEDIUM_BLOCK_SIZE_LIMIT (256 * 1024)
+#define LARGE_BLOCK_SIZE_LIMIT (8 * 1024 * 1024)
+
+#define SMALL_SIZE_CLASS_COUNT 73
+#define MEDIUM_SIZE_CLASS_COUNT 24
+#define LARGE_SIZE_CLASS_COUNT 20
+#define SIZE_CLASS_COUNT (SMALL_SIZE_CLASS_COUNT + MEDIUM_SIZE_CLASS_COUNT + LARGE_SIZE_CLASS_COUNT)
+
+#define SMALL_PAGE_SIZE_SHIFT 16
+#define SMALL_PAGE_SIZE (1 << SMALL_PAGE_SIZE_SHIFT)
+#define SMALL_PAGE_MASK (~((uintptr_t)SMALL_PAGE_SIZE - 1))
+#define MEDIUM_PAGE_SIZE_SHIFT 22
+#define MEDIUM_PAGE_SIZE (1 << MEDIUM_PAGE_SIZE_SHIFT)
+#define MEDIUM_PAGE_MASK (~((uintptr_t)MEDIUM_PAGE_SIZE - 1))
+#define LARGE_PAGE_SIZE_SHIFT 26
+#define LARGE_PAGE_SIZE (1 << LARGE_PAGE_SIZE_SHIFT)
+#define LARGE_PAGE_MASK (~((uintptr_t)LARGE_PAGE_SIZE - 1))
+
+#define SPAN_SIZE (256 * 1024 * 1024)
+#define SPAN_MASK (~((uintptr_t)(SPAN_SIZE - 1)))
+
+////////////
+///
+/// Utility macros
+///
+//////
+
+#if ENABLE_ASSERTS
+#undef NDEBUG
+#if defined(_MSC_VER) && !defined(_DEBUG)
+#define _DEBUG
+#endif
+#include <assert.h>
+#define RPMALLOC_TOSTRING_M(x) #x
+#define RPMALLOC_TOSTRING(x) RPMALLOC_TOSTRING_M(x)
+#define rpmalloc_assert(truth, message) \
+ do { \
+ if (!(truth)) { \
+ assert((truth) && message); \
+ } \
+ } while (0)
+#else
+#define rpmalloc_assert(truth, message) \
+ do { \
+ } while (0)
+#endif
+
+#if __has_builtin(__builtin_assume)
+#define rpmalloc_assume(cond) __builtin_assume(cond)
+#elif defined(__GNUC__)
+#define rpmalloc_assume(cond) \
+ do { \
+ if (!__builtin_expect(cond, 0)) \
+ __builtin_unreachable(); \
+ } while (0)
+#elif defined(_MSC_VER)
+#define rpmalloc_assume(cond) __assume(cond)
+#else
+#define rpmalloc_assume(cond) 0
+#endif
+
+////////////
+///
+/// Statistics
+///
+//////
+
+#if ENABLE_STATISTICS
+
+typedef struct rpmalloc_statistics_t {
+ atomic_size_t page_mapped;
+ atomic_size_t page_mapped_peak;
+ atomic_size_t page_commit;
+ atomic_size_t page_decommit;
+ atomic_size_t page_active;
+ atomic_size_t page_active_peak;
+ atomic_size_t heap_count;
+} rpmalloc_statistics_t;
+
+static rpmalloc_statistics_t global_statistics;
+
+#else
+
+#endif
+
+////////////
+///
+/// Low level abstractions
+///
+//////
+
+static inline size_t
+rpmalloc_clz(uintptr_t x) {
+#if ARCH_64BIT
+#if defined(_MSC_VER) && !defined(__clang__)
+ return (size_t)_lzcnt_u64(x);
+#else
+ return (size_t)__builtin_clzll(x);
+#endif
+#else
+#if defined(_MSC_VER) && !defined(__clang__)
+ return (size_t)_lzcnt_u32(x);
+#else
+ return (size_t)__builtin_clzl(x);
+#endif
+#endif
+}
+
+static inline void
+wait_spin(void) {
+#if defined(_MSC_VER)
+#if defined(_M_ARM64)
+ __yield();
+#else
+ _mm_pause();
+#endif
+#elif defined(__x86_64__) || defined(__i386__)
+ __asm__ volatile("pause" ::: "memory");
+#elif defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH >= 7)
+ __asm__ volatile("yield" ::: "memory");
+#elif defined(__powerpc__) || defined(__powerpc64__)
+ // No idea if ever been compiled in such archs but ... as precaution
+ __asm__ volatile("or 27,27,27");
+#elif defined(__sparc__)
+ __asm__ volatile("rd %ccr, %g0 \n\trd %ccr, %g0 \n\trd %ccr, %g0");
+#else
+ struct timespec ts = {0};
+ nanosleep(&ts, 0);
+#endif
+}
+
+#if defined(__GNUC__) || defined(__clang__)
+
+#define EXPECTED(x) __builtin_expect((x), 1)
+#define UNEXPECTED(x) __builtin_expect((x), 0)
+
+#else
+
+#define EXPECTED(x) x
+#define UNEXPECTED(x) x
+
+#endif
+#if defined(__GNUC__) || defined(__clang__)
+
+#if __has_builtin(__builtin_memcpy_inline)
+#define memcpy_const(x, y, s) __builtin_memcpy_inline(x, y, s)
+#else
+#define memcpy_const(x, y, s) \
+ do { \
+ _Static_assert(__builtin_choose_expr(__builtin_constant_p(s), 1, 0), "len must be a constant integer"); \
+ memcpy(x, y, s); \
+ } while (0)
+#endif
+
+#if __has_builtin(__builtin_memset_inline)
+#define memset_const(x, y, s) __builtin_memset_inline(x, y, s)
+#else
+#define memset_const(x, y, s) \
+ do { \
+ _Static_assert(__builtin_choose_expr(__builtin_constant_p(s), 1, 0), "len must be a constant integer"); \
+ memset(x, y, s); \
+ } while (0)
+#endif
+#else
+#define memcpy_const(x, y, s) memcpy(x, y, s)
+#define memset_const(x, y, s) memset(x, y, s)
+#endif
+
+////////////
+///
+/// Data types
+///
+//////
+
+//! A memory heap, per thread
+typedef struct heap_t heap_t;
+//! Span of memory pages
+typedef struct span_t span_t;
+//! Memory page
+typedef struct page_t page_t;
+//! Memory block
+typedef struct block_t block_t;
+//! Size class for a memory block
+typedef struct size_class_t size_class_t;
+
+//! Memory page type
+typedef enum page_type_t {
+ PAGE_SMALL, // 64KiB
+ PAGE_MEDIUM, // 4MiB
+ PAGE_LARGE, // 64MiB
+ PAGE_HUGE
+} page_type_t;
+
+//! Block size class
+struct size_class_t {
+ //! Size of blocks in this class
+ uint32_t block_size;
+ //! Number of blocks in each chunk
+ uint32_t block_count;
+};
+
+//! A memory block
+struct block_t {
+ //! Next block in list
+ block_t* next;
+};
+
+//! A page contains blocks of a given size
+struct page_t {
+ //! Size class of blocks
+ uint32_t size_class;
+ //! Block size
+ uint32_t block_size;
+ //! Block count
+ uint32_t block_count;
+ //! Block initialized count
+ uint32_t block_initialized;
+ //! Block used count
+ uint32_t block_used;
+ //! Page type
+ page_type_t page_type;
+ //! Flag set if part of heap full list
+ uint32_t is_full : 1;
+ //! Flag set if part of heap free list
+ uint32_t is_free : 1;
+ //! Flag set if blocks are zero initialied
+ uint32_t is_zero : 1;
+ //! Flag set if memory pages have been decommitted
+ uint32_t is_decommitted : 1;
+ //! Flag set if containing aligned blocks
+ uint32_t has_aligned_block : 1;
+ //! Fast combination flag for either huge, fully allocated or has aligned blocks
+ uint32_t generic_free : 1;
+ //! Local free list count
+ uint32_t local_free_count;
+ //! Local free list
+ block_t* local_free;
+ //! Owning heap
+ heap_t* heap;
+ //! Next page in list
+ page_t* next;
+ //! Previous page in list
+ page_t* prev;
+ //! Multithreaded free list, block index is in low 32 bit, list count is high 32 bit
+ atomic_ullong thread_free;
+};
+
+//! A span contains pages of a given type
+struct span_t {
+ //! Page header
+ page_t page;
+ //! Owning heap
+ heap_t* heap;
+ //! Page address mask
+ uintptr_t page_address_mask;
+ //! Number of pages initialized
+ uint32_t page_initialized;
+ //! Number of pages in use
+ uint32_t page_count;
+ //! Number of bytes per page
+ uint32_t page_size;
+ //! Page type
+ page_type_t page_type;
+ //! Offset to start of mapped memory region
+ uint32_t offset;
+ //! Mapped size
+ uint64_t mapped_size;
+ //! Next span in list
+ span_t* next;
+};
+
+// Control structure for a heap, either a thread heap or a first class heap if enabled
+struct heap_t {
+ //! Owning thread ID
+ uintptr_t owner_thread;
+ //! Heap local free list for small size classes
+ block_t* local_free[SIZE_CLASS_COUNT];
+ //! Available non-full pages for each size class
+ page_t* page_available[SIZE_CLASS_COUNT];
+ //! Free pages for each page type
+ page_t* page_free[3];
+ //! Free but still committed page count for each page tyoe
+ uint32_t page_free_commit_count[3];
+ //! Multithreaded free list
+ atomic_uintptr_t thread_free[3];
+ //! Available partially initialized spans for each page type
+ span_t* span_partial[3];
+ //! Spans in full use for each page type
+ span_t* span_used[4];
+ //! Next heap in queue
+ heap_t* next;
+ //! Previous heap in queue
+ heap_t* prev;
+ //! Heap ID
+ uint32_t id;
+ //! Finalization state flag
+ uint32_t finalize;
+ //! Memory map region offset
+ uint32_t offset;
+ //! Memory map size
+ size_t mapped_size;
+};
+
+_Static_assert(sizeof(page_t) <= PAGE_HEADER_SIZE, "Invalid page header size");
+_Static_assert(sizeof(span_t) <= SPAN_HEADER_SIZE, "Invalid span header size");
+_Static_assert(sizeof(heap_t) <= 4096, "Invalid heap size");
+
+////////////
+///
+/// Global data
+///
+//////
+
+//! Fallback heap
+static RPMALLOC_CACHE_ALIGNED heap_t global_heap_fallback;
+//! Default heap
+static heap_t* global_heap_default = &global_heap_fallback;
+//! Available heaps
+static heap_t* global_heap_queue;
+//! In use heaps
+static heap_t* global_heap_used;
+//! Lock for heap queue
+static atomic_uintptr_t global_heap_lock;
+//! Heap ID counter
+static atomic_uint global_heap_id = 1;
+//! Initialized flag
+static int global_rpmalloc_initialized;
+//! Memory interface
+static rpmalloc_interface_t* global_memory_interface;
+//! Default memory interface
+static rpmalloc_interface_t global_memory_interface_default;
+//! Current configuration
+static rpmalloc_config_t global_config = {0};
+//! Main thread ID
+static uintptr_t global_main_thread_id;
+
+//! Size classes
+#define SCLASS(n) \
+ { (n * SMALL_GRANULARITY), (SMALL_PAGE_SIZE - PAGE_HEADER_SIZE) / (n * SMALL_GRANULARITY) }
+#define MCLASS(n) \
+ { (n * SMALL_GRANULARITY), (MEDIUM_PAGE_SIZE - PAGE_HEADER_SIZE) / (n * SMALL_GRANULARITY) }
+#define LCLASS(n) \
+ { (n * SMALL_GRANULARITY), (LARGE_PAGE_SIZE - PAGE_HEADER_SIZE) / (n * SMALL_GRANULARITY) }
+static const size_class_t global_size_class[SIZE_CLASS_COUNT] = {
+ SCLASS(1), SCLASS(1), SCLASS(2), SCLASS(3), SCLASS(4), SCLASS(5), SCLASS(6),
+ SCLASS(7), SCLASS(8), SCLASS(9), SCLASS(10), SCLASS(11), SCLASS(12), SCLASS(13),
+ SCLASS(14), SCLASS(15), SCLASS(16), SCLASS(17), SCLASS(18), SCLASS(19), SCLASS(20),
+ SCLASS(21), SCLASS(22), SCLASS(23), SCLASS(24), SCLASS(25), SCLASS(26), SCLASS(27),
+ SCLASS(28), SCLASS(29), SCLASS(30), SCLASS(31), SCLASS(32), SCLASS(33), SCLASS(34),
+ SCLASS(35), SCLASS(36), SCLASS(37), SCLASS(38), SCLASS(39), SCLASS(40), SCLASS(41),
+ SCLASS(42), SCLASS(43), SCLASS(44), SCLASS(45), SCLASS(46), SCLASS(47), SCLASS(48),
+ SCLASS(49), SCLASS(50), SCLASS(51), SCLASS(52), SCLASS(53), SCLASS(54), SCLASS(55),
+ SCLASS(56), SCLASS(57), SCLASS(58), SCLASS(59), SCLASS(60), SCLASS(61), SCLASS(62),
+ SCLASS(63), SCLASS(64), SCLASS(80), SCLASS(96), SCLASS(112), SCLASS(128), SCLASS(160),
+ SCLASS(192), SCLASS(224), SCLASS(256), MCLASS(320), MCLASS(384), MCLASS(448), MCLASS(512),
+ MCLASS(640), MCLASS(768), MCLASS(896), MCLASS(1024), MCLASS(1280), MCLASS(1536), MCLASS(1792),
+ MCLASS(2048), MCLASS(2560), MCLASS(3072), MCLASS(3584), MCLASS(4096), MCLASS(5120), MCLASS(6144),
+ MCLASS(7168), MCLASS(8192), MCLASS(10240), MCLASS(12288), MCLASS(14336), MCLASS(16384), LCLASS(20480),
+ LCLASS(24576), LCLASS(28672), LCLASS(32768), LCLASS(40960), LCLASS(49152), LCLASS(57344), LCLASS(65536),
+ LCLASS(81920), LCLASS(98304), LCLASS(114688), LCLASS(131072), LCLASS(163840), LCLASS(196608), LCLASS(229376),
+ LCLASS(262144), LCLASS(327680), LCLASS(393216), LCLASS(458752), LCLASS(524288)};
+
+//! Threshold number of pages for when free pages are decommitted
+static uint32_t global_page_free_overflow[4] = {16, 8, 2, 0};
+
+//! Number of pages to retain when free page threshold overflows
+static uint32_t global_page_free_retain[4] = {4, 2, 1, 0};
+
+//! OS huge page support
+static int os_huge_pages;
+//! OS memory map granularity
+static size_t os_map_granularity;
+//! OS memory page size
+static size_t os_page_size;
+
+////////////
+///
+/// Thread local heap and ID
+///
+//////
+
+//! Current thread heap
+#if defined(_MSC_VER) && !defined(__clang__)
+#define TLS_MODEL
+#define _Thread_local __declspec(thread)
+#else
+// #define TLS_MODEL __attribute__((tls_model("initial-exec")))
+#define TLS_MODEL
+#endif
+static _Thread_local heap_t* global_thread_heap TLS_MODEL = &global_heap_fallback;
+
+static heap_t*
+heap_allocate(int first_class);
+
+static void
+heap_page_free_decommit(heap_t* heap, uint32_t page_type, uint32_t page_retain_count);
+
+//! Fast thread ID
+static inline uintptr_t
+get_thread_id(void) {
+#if defined(_WIN32)
+ return (uintptr_t)((void*)NtCurrentTeb());
+#else
+ void* thp = __builtin_thread_pointer();
+ return (uintptr_t)thp;
+#endif
+ /*
+ #elif (defined(__GNUC__) || defined(__clang__)) && !defined(__CYGWIN__)
+ uintptr_t tid;
+ #if defined(__i386__)
+ __asm__("movl %%gs:0, %0" : "=r"(tid) : :);
+ #elif defined(__x86_64__)
+ #if defined(__MACH__)
+ __asm__("movq %%gs:0, %0" : "=r"(tid) : :);
+ #else
+ __asm__("movq %%fs:0, %0" : "=r"(tid) : :);
+ #endif
+ #elif defined(__arm__)
+ __asm__ volatile("mrc p15, 0, %0, c13, c0, 3" : "=r"(tid));
+ #elif defined(__aarch64__)
+ #if defined(__MACH__)
+ // tpidr_el0 likely unused, always return 0 on iOS
+ __asm__ volatile("mrs %0, tpidrro_el0" : "=r"(tid));
+ #else
+ __asm__ volatile("mrs %0, tpidr_el0" : "=r"(tid));
+ #endif
+ #else
+ #error This platform needs implementation of get_thread_id()
+ #endif
+ return tid;
+ #else
+ #error This platform needs implementation of get_thread_id()
+ #endif
+ */
+}
+
+//! Set the current thread heap
+static void
+set_thread_heap(heap_t* heap) {
+ global_thread_heap = heap;
+ if (heap && (heap->id != 0)) {
+ rpmalloc_assert(heap->id != 0, "Default heap being used");
+ heap->owner_thread = get_thread_id();
+ }
+#if PLATFORM_WINDOWS
+ FlsSetValue(fls_key, heap);
+#else
+ pthread_setspecific(pthread_key, heap);
+#endif
+}
+
+static heap_t*
+get_thread_heap_allocate(void) {
+ heap_t* heap = heap_allocate(0);
+ set_thread_heap(heap);
+ return heap;
+}
+
+//! Get the current thread heap
+static inline heap_t*
+get_thread_heap(void) {
+ return global_thread_heap;
+}
+
+//! Get the size class from given size in bytes for tiny blocks (below 16 times the minimum granularity)
+static inline uint32_t
+get_size_class_tiny(size_t size) {
+ return (((uint32_t)size + (SMALL_GRANULARITY - 1)) / SMALL_GRANULARITY);
+}
+
+//! Get the size class from given size in bytes
+static inline uint32_t
+get_size_class(size_t size) {
+ uintptr_t minblock_count = (size + (SMALL_GRANULARITY - 1)) / SMALL_GRANULARITY;
+ // For sizes up to 64 times the minimum granularity (i.e 1024 bytes) the size class is equal to number of such
+ // blocks
+ if (size <= (SMALL_GRANULARITY * 64)) {
+ rpmalloc_assert(global_size_class[minblock_count].block_size >= size, "Size class misconfiguration");
+ return (uint32_t)(minblock_count ? minblock_count : 1);
+ }
+ --minblock_count;
+ // Calculate position of most significant bit, since minblock_count now guaranteed to be > 64 this position is
+ // guaranteed to be >= 6
+#if ARCH_64BIT
+ const uint32_t most_significant_bit = (uint32_t)(63 - (int)rpmalloc_clz(minblock_count));
+#else
+ const uint32_t most_significant_bit = (uint32_t)(31 - (int)rpmalloc_clz(minblock_count));
+#endif
+ // Class sizes are of the bit format [..]000xxx000[..] where we already have the position of the most significant
+ // bit, now calculate the subclass from the remaining two bits
+ const uint32_t subclass_bits = (minblock_count >> (most_significant_bit - 2)) & 0x03;
+ const uint32_t class_idx = (uint32_t)((most_significant_bit << 2) + subclass_bits) + 41;
+ rpmalloc_assert((class_idx >= SIZE_CLASS_COUNT) || (global_size_class[class_idx].block_size >= size),
+ "Size class misconfiguration");
+ rpmalloc_assert((class_idx >= SIZE_CLASS_COUNT) || (global_size_class[class_idx - 1].block_size < size),
+ "Size class misconfiguration");
+ return class_idx;
+}
+
+static inline page_type_t
+get_page_type(uint32_t size_class) {
+ if (size_class < SMALL_SIZE_CLASS_COUNT)
+ return PAGE_SMALL;
+ else if (size_class < (SMALL_SIZE_CLASS_COUNT + MEDIUM_SIZE_CLASS_COUNT))
+ return PAGE_MEDIUM;
+ else if (size_class < SIZE_CLASS_COUNT)
+ return PAGE_LARGE;
+ return PAGE_HUGE;
+}
+
+static inline size_t
+get_page_aligned_size(size_t size) {
+ size_t unalign = size % global_config.page_size;
+ if (unalign)
+ size += global_config.page_size - unalign;
+ return size;
+}
+
+////////////
+///
+/// OS entry points
+///
+//////
+
+static void
+os_set_page_name(void* address, size_t size) {
+#if defined(__linux__) || defined(__ANDROID__)
+ const char* name = os_huge_pages ? global_config.huge_page_name : global_config.page_name;
+ if ((address == MAP_FAILED) || !name)
+ return;
+ // If the kernel does not support CONFIG_ANON_VMA_NAME or if the call fails
+ // (e.g. invalid name) it is a no-op basically.
+ (void)prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)address, size, (uintptr_t)name);
+#else
+ (void)sizeof(size);
+ (void)sizeof(address);
+#endif
+}
+
+static void*
+os_mmap(size_t size, size_t alignment, size_t* offset, size_t* mapped_size) {
+ size_t map_size = size + alignment;
+#if PLATFORM_WINDOWS
+ // Ok to MEM_COMMIT - according to MSDN, "actual physical pages are not allocated unless/until the virtual addresses
+ // are actually accessed". But if we enable decommit it's better to not immediately commit and instead commit per
+ // page to avoid saturating the OS commit limit
+#if ENABLE_DECOMMIT
+ DWORD do_commit = 0;
+#else
+ DWORD do_commit = MEM_COMMIT;
+#endif
+ void* ptr =
+ VirtualAlloc(0, map_size, (os_huge_pages ? MEM_LARGE_PAGES : 0) | MEM_RESERVE | do_commit, PAGE_READWRITE);
+#else
+ int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_UNINITIALIZED;
+#if defined(__APPLE__) && !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
+ int fd = (int)VM_MAKE_TAG(240U);
+ if (os_huge_pages)
+ fd |= VM_FLAGS_SUPERPAGE_SIZE_2MB;
+ void* ptr = mmap(0, map_size, PROT_READ | PROT_WRITE, flags, fd, 0);
+#elif defined(MAP_HUGETLB)
+ void* ptr = mmap(0, map_size, PROT_READ | PROT_WRITE | PROT_MAX(PROT_READ | PROT_WRITE),
+ (os_huge_pages ? MAP_HUGETLB : 0) | flags, -1, 0);
+#if defined(MADV_HUGEPAGE)
+ // In some configurations, huge pages allocations might fail thus
+ // we fallback to normal allocations and promote the region as transparent huge page
+ if ((ptr == MAP_FAILED || !ptr) && os_huge_pages) {
+ ptr = mmap(0, map_size, PROT_READ | PROT_WRITE, flags, -1, 0);
+ if (ptr && ptr != MAP_FAILED) {
+ int prm = madvise(ptr, size, MADV_HUGEPAGE);
+ (void)prm;
+ rpmalloc_assert((prm == 0), "Failed to promote the page to transparent huge page");
+ }
+ }
+#endif
+ os_set_page_name(ptr, map_size);
+#elif defined(MAP_ALIGNED)
+ const size_t align = (sizeof(size_t) * 8) - (size_t)(__builtin_clzl(size - 1));
+ void* ptr = mmap(0, map_size, PROT_READ | PROT_WRITE, (os_huge_pages ? MAP_ALIGNED(align) : 0) | flags, -1, 0);
+#elif defined(MAP_ALIGN)
+ caddr_t base = (os_huge_pages ? (caddr_t)(4 << 20) : 0);
+ void* ptr = mmap(base, map_size, PROT_READ | PROT_WRITE, (os_huge_pages ? MAP_ALIGN : 0) | flags, -1, 0);
+#else
+ void* ptr = mmap(0, map_size, PROT_READ | PROT_WRITE, flags, -1, 0);
+#endif
+ if (ptr == MAP_FAILED)
+ ptr = 0;
+#endif
+ if (!ptr) {
+ if (global_memory_interface->map_fail_callback) {
+ if (global_memory_interface->map_fail_callback(map_size))
+ return os_mmap(size, alignment, offset, mapped_size);
+ } else {
+ rpmalloc_assert(ptr != 0, "Failed to map more virtual memory");
+ }
+ return 0;
+ }
+ if (alignment) {
+ size_t padding = ((uintptr_t)ptr & (uintptr_t)(alignment - 1));
+ if (padding)
+ padding = alignment - padding;
+ rpmalloc_assert(padding <= alignment, "Internal failure in padding");
+ rpmalloc_assert(!(padding % 8), "Internal failure in padding");
+ ptr = pointer_offset(ptr, padding);
+ *offset = padding;
+ }
+ *mapped_size = map_size;
+#if ENABLE_STATISTICS
+ size_t page_count = map_size / global_config.page_size;
+ size_t page_mapped_current =
+ atomic_fetch_add_explicit(&global_statistics.page_mapped, page_count, memory_order_relaxed) + page_count;
+ size_t page_mapped_peak = atomic_load_explicit(&global_statistics.page_mapped_peak, memory_order_relaxed);
+ while (page_mapped_current > page_mapped_peak) {
+ if (atomic_compare_exchange_weak_explicit(&global_statistics.page_mapped_peak, &page_mapped_peak,
+ page_mapped_current, memory_order_relaxed, memory_order_relaxed))
+ break;
+ }
+#if ENABLE_DECOMMIT
+ size_t page_active_current =
+ atomic_fetch_add_explicit(&global_statistics.page_active, page_count, memory_order_relaxed) + page_count;
+ size_t page_active_peak = atomic_load_explicit(&global_statistics.page_active_peak, memory_order_relaxed);
+ while (page_active_current > page_active_peak) {
+ if (atomic_compare_exchange_weak_explicit(&global_statistics.page_active_peak, &page_active_peak,
+ page_active_current, memory_order_relaxed, memory_order_relaxed))
+ break;
+ }
+#endif
+#endif
+ return ptr;
+}
+
+static void
+os_mcommit(void* address, size_t size) {
+#if ENABLE_DECOMMIT
+ if (global_config.disable_decommit)
+ return;
+#if PLATFORM_WINDOWS
+ if (!VirtualAlloc(address, size, MEM_COMMIT, PAGE_READWRITE)) {
+ rpmalloc_assert(0, "Failed to commit virtual memory block");
+ }
+#else
+ /*
+ if (mprotect(address, size, PROT_READ | PROT_WRITE)) {
+ rpmalloc_assert(0, "Failed to commit virtual memory block");
+ }
+ */
+#endif
+#if ENABLE_STATISTICS
+ size_t page_count = size / global_config.page_size;
+ atomic_fetch_add_explicit(&global_statistics.page_commit, page_count, memory_order_relaxed);
+ size_t page_active_current =
+ atomic_fetch_add_explicit(&global_statistics.page_active, page_count, memory_order_relaxed) + page_count;
+ size_t page_active_peak = atomic_load_explicit(&global_statistics.page_active_peak, memory_order_relaxed);
+ while (page_active_current > page_active_peak) {
+ if (atomic_compare_exchange_weak_explicit(&global_statistics.page_active_peak, &page_active_peak,
+ page_active_current, memory_order_relaxed, memory_order_relaxed))
+ break;
+ }
+#endif
+#endif
+ (void)sizeof(address);
+ (void)sizeof(size);
+}
+
+static void
+os_mdecommit(void* address, size_t size) {
+#if ENABLE_DECOMMIT
+ if (global_config.disable_decommit)
+ return;
+#if PLATFORM_WINDOWS
+ if (!VirtualFree(address, size, MEM_DECOMMIT)) {
+ rpmalloc_assert(0, "Failed to decommit virtual memory block");
+ }
+#else
+ /*
+ if (mprotect(address, size, PROT_NONE)) {
+ rpmalloc_assert(0, "Failed to decommit virtual memory block");
+ }
+ */
+#if defined(MADV_DONTNEED)
+ if (madvise(address, size, MADV_DONTNEED)) {
+#elif defined(MADV_FREE_REUSABLE)
+ int ret;
+ while ((ret = madvise(address, size, MADV_FREE_REUSABLE)) == -1 && (errno == EAGAIN))
+ errno = 0;
+ if ((ret == -1) && (errno != 0)) {
+#elif defined(MADV_PAGEOUT)
+ if (madvise(address, size, MADV_PAGEOUT)) {
+#elif defined(MADV_FREE)
+ if (madvise(address, size, MADV_FREE)) {
+#else
+ if (posix_madvise(address, size, POSIX_MADV_DONTNEED)) {
+#endif
+ rpmalloc_assert(0, "Failed to decommit virtual memory block");
+ }
+#endif
+#if ENABLE_STATISTICS
+ size_t page_count = size / global_config.page_size;
+ atomic_fetch_add_explicit(&global_statistics.page_decommit, page_count, memory_order_relaxed);
+ size_t page_active_current =
+ atomic_fetch_sub_explicit(&global_statistics.page_active, page_count, memory_order_relaxed);
+ rpmalloc_assert(page_active_current >= page_count, "Decommit counter out of sync");
+ (void)sizeof(page_active_current);
+#endif
+#else
+ (void)sizeof(address);
+ (void)sizeof(size);
+#endif
+}
+
+static void
+os_munmap(void* address, size_t offset, size_t mapped_size) {
+ (void)sizeof(mapped_size);
+ address = pointer_offset(address, -(int32_t)offset);
+#if ENABLE_UNMAP
+#if PLATFORM_WINDOWS
+ if (!VirtualFree(address, 0, MEM_RELEASE)) {
+ rpmalloc_assert(0, "Failed to unmap virtual memory block");
+ }
+#else
+ if (munmap(address, mapped_size))
+ rpmalloc_assert(0, "Failed to unmap virtual memory block");
+#endif
+#if ENABLE_STATISTICS
+ size_t page_count = mapped_size / global_config.page_size;
+ atomic_fetch_sub_explicit(&global_statistics.page_mapped, page_count, memory_order_relaxed);
+ atomic_fetch_sub_explicit(&global_statistics.page_active, page_count, memory_order_relaxed);
+#endif
+#endif
+}
+
+////////////
+///
+/// Page interface
+///
+//////
+
+static inline span_t*
+page_get_span(page_t* page) {
+ return (span_t*)((uintptr_t)page & SPAN_MASK);
+}
+
+static inline size_t
+page_get_size(page_t* page) {
+ if (page->page_type == PAGE_SMALL)
+ return SMALL_PAGE_SIZE;
+ else if (page->page_type == PAGE_MEDIUM)
+ return MEDIUM_PAGE_SIZE;
+ else if (page->page_type == PAGE_LARGE)
+ return LARGE_PAGE_SIZE;
+ else
+ return page_get_span(page)->page_size;
+}
+
+static inline int
+page_is_thread_heap(page_t* page) {
+#if RPMALLOC_FIRST_CLASS_HEAPS
+ return (!page->heap->owner_thread || (page->heap->owner_thread == get_thread_id()));
+#else
+ return (page->heap->owner_thread == get_thread_id());
+#endif
+}
+
+static inline block_t*
+page_block_start(page_t* page) {
+ return pointer_offset(page, PAGE_HEADER_SIZE);
+}
+
+static inline block_t*
+page_block(page_t* page, uint32_t block_index) {
+ return pointer_offset(page, PAGE_HEADER_SIZE + (page->block_size * block_index));
+}
+
+static inline uint32_t
+page_block_index(page_t* page, block_t* block) {
+ block_t* block_first = page_block_start(page);
+ return (uint32_t)pointer_diff(block, block_first) / page->block_size;
+}
+
+static inline uint32_t
+page_block_from_thread_free_list(page_t* page, uint64_t token, block_t** block) {
+ uint32_t block_index = (uint32_t)(token & 0xFFFFFFFFULL);
+ uint32_t list_count = (uint32_t)((token >> 32ULL) & 0xFFFFFFFFULL);
+ *block = list_count ? page_block(page, block_index) : 0;
+ return list_count;
+}
+
+static inline uint64_t
+page_block_to_thread_free_list(page_t* page, uint32_t block_index, uint32_t list_count) {
+ (void)sizeof(page);
+ return ((uint64_t)list_count << 32ULL) | (uint64_t)block_index;
+}
+
+static inline block_t*
+page_block_realign(page_t* page, block_t* block) {
+ void* blocks_start = page_block_start(page);
+ uint32_t block_offset = (uint32_t)pointer_diff(block, blocks_start);
+ return pointer_offset(block, -(int32_t)(block_offset % page->block_size));
+}
+
+static block_t*
+page_get_local_free_block(page_t* page) {
+ block_t* block = page->local_free;
+ page->local_free = block->next;
+ --page->local_free_count;
+ ++page->block_used;
+ return block;
+}
+
+static inline void
+page_decommit_memory_pages(page_t* page) {
+ if (page->is_decommitted)
+ return;
+ void* extra_page = pointer_offset(page, global_config.page_size);
+ size_t extra_page_size = page_get_size(page) - global_config.page_size;
+ global_memory_interface->memory_decommit(extra_page, extra_page_size);
+ page->is_decommitted = 1;
+}
+
+static inline void
+page_commit_memory_pages(page_t* page) {
+ if (!page->is_decommitted)
+ return;
+ void* extra_page = pointer_offset(page, global_config.page_size);
+ size_t extra_page_size = page_get_size(page) - global_config.page_size;
+ global_memory_interface->memory_commit(extra_page, extra_page_size);
+ page->is_decommitted = 0;
+#if ENABLE_DECOMMIT
+#if !defined(__APPLE__)
+ // When page is recommitted, the blocks in the second memory page and forward
+ // will be zeroed out by OS - take advantage in zalloc/calloc calls and make sure
+ // blocks in first page is zeroed out
+ void* first_page = pointer_offset(page, PAGE_HEADER_SIZE);
+ memset(first_page, 0, global_config.page_size - PAGE_HEADER_SIZE);
+ page->is_zero = 1;
+#endif
+#endif
+}
+
+static void
+page_available_to_free(page_t* page) {
+ rpmalloc_assert(page->is_full == 0, "Page full flag internal failure");
+ rpmalloc_assert(page->is_decommitted == 0, "Page decommitted flag internal failure");
+ heap_t* heap = page->heap;
+ if (heap->page_available[page->size_class] == page) {
+ heap->page_available[page->size_class] = page->next;
+ } else {
+ page->prev->next = page->next;
+ if (page->next)
+ page->next->prev = page->prev;
+ }
+ page->is_free = 1;
+ page->is_zero = 0;
+ page->next = heap->page_free[page->page_type];
+ heap->page_free[page->page_type] = page;
+ if (++heap->page_free_commit_count[page->page_type] >= global_page_free_overflow[page->page_type])
+ heap_page_free_decommit(heap, page->page_type, global_page_free_retain[page->page_type]);
+}
+
+static void
+page_full_to_available(page_t* page) {
+ rpmalloc_assert(page->is_full == 1, "Page full flag internal failure");
+ rpmalloc_assert(page->is_decommitted == 0, "Page decommitted flag internal failure");
+ heap_t* heap = page->heap;
+ page->next = heap->page_available[page->size_class];
+ if (page->next)
+ page->next->prev = page;
+ heap->page_available[page->size_class] = page;
+ page->is_full = 0;
+ if (page->has_aligned_block == 0)
+ page->generic_free = 0;
+}
+
+static void
+page_full_to_free_on_new_heap(page_t* page, heap_t* heap) {
+ rpmalloc_assert(heap->id, "Page full to free on default heap");
+ rpmalloc_assert(page->is_full == 1, "Page full flag internal failure");
+ rpmalloc_assert(page->is_decommitted == 0, "Page decommitted flag internal failure");
+ page->is_full = 0;
+ page->is_free = 1;
+ page->heap = heap;
+ atomic_store_explicit(&page->thread_free, 0, memory_order_relaxed);
+ page->next = heap->page_free[page->page_type];
+ heap->page_free[page->page_type] = page;
+ if (++heap->page_free_commit_count[page->page_type] >= global_page_free_overflow[page->page_type])
+ heap_page_free_decommit(heap, page->page_type, global_page_free_retain[page->page_type]);
+}
+
+static void
+page_available_to_full(page_t* page) {
+ heap_t* heap = page->heap;
+ if (heap->page_available[page->size_class] == page) {
+ heap->page_available[page->size_class] = page->next;
+ } else {
+ page->prev->next = page->next;
+ if (page->next)
+ page->next->prev = page->prev;
+ }
+ page->is_full = 1;
+ page->is_zero = 0;
+ page->generic_free = 1;
+}
+
+static inline void
+page_put_local_free_block(page_t* page, block_t* block) {
+ block->next = page->local_free;
+ page->local_free = block;
+ ++page->local_free_count;
+ if (UNEXPECTED(--page->block_used == 0)) {
+ page_available_to_free(page);
+ } else if (UNEXPECTED(page->is_full != 0)) {
+ page_full_to_available(page);
+ }
+}
+
+static NOINLINE void
+page_adopt_thread_free_block_list(page_t* page) {
+ if (page->local_free)
+ return;
+ unsigned long long thread_free = atomic_load_explicit(&page->thread_free, memory_order_relaxed);
+ if (thread_free != 0) {
+ // Other threads can only replace with another valid list head, this will never change to 0 in other threads
+ while (!atomic_compare_exchange_weak_explicit(&page->thread_free, &thread_free, 0, memory_order_relaxed,
+ memory_order_relaxed))
+ wait_spin();
+ page->local_free_count = page_block_from_thread_free_list(page, thread_free, &page->local_free);
+ rpmalloc_assert(page->local_free_count <= page->block_used, "Page thread free list count internal failure");
+ page->block_used -= page->local_free_count;
+ }
+}
+
+static NOINLINE void
+page_put_thread_free_block(page_t* page, block_t* block) {
+ atomic_thread_fence(memory_order_acquire);
+ if (page->is_full) {
+ // Page is full, put the block in the heap thread free list instead, otherwise
+ // the heap will not pick up the free blocks until a thread local free happens
+ heap_t* heap = page->heap;
+ uintptr_t prev_head = atomic_load_explicit(&heap->thread_free[page->page_type], memory_order_relaxed);
+ block->next = (void*)prev_head;
+ while (!atomic_compare_exchange_weak_explicit(&heap->thread_free[page->page_type], &prev_head, (uintptr_t)block,
+ memory_order_relaxed, memory_order_relaxed)) {
+ block->next = (void*)prev_head;
+ wait_spin();
+ }
+ } else {
+ unsigned long long prev_thread_free = atomic_load_explicit(&page->thread_free, memory_order_relaxed);
+ uint32_t block_index = page_block_index(page, block);
+ rpmalloc_assert(page_block(page, block_index) == block, "Block pointer is not aligned to start of block");
+ uint32_t list_size = page_block_from_thread_free_list(page, prev_thread_free, &block->next) + 1;
+ uint64_t thread_free = page_block_to_thread_free_list(page, block_index, list_size);
+ while (!atomic_compare_exchange_weak_explicit(&page->thread_free, &prev_thread_free, thread_free,
+ memory_order_relaxed, memory_order_relaxed)) {
+ list_size = page_block_from_thread_free_list(page, prev_thread_free, &block->next) + 1;
+ thread_free = page_block_to_thread_free_list(page, block_index, list_size);
+ wait_spin();
+ }
+ }
+}
+
+static void
+page_push_local_free_to_heap(page_t* page) {
+ // Push the page free list as the fast track list of free blocks for heap
+ page->heap->local_free[page->size_class] = page->local_free;
+ page->block_used += page->local_free_count;
+ page->local_free = 0;
+ page->local_free_count = 0;
+}
+
+static NOINLINE void*
+page_initialize_blocks(page_t* page) {
+ rpmalloc_assert(page->block_initialized < page->block_count, "Block initialization internal failure");
+ block_t* block = page_block(page, page->block_initialized);
+ ++page->block_initialized;
+ ++page->block_used;
+
+ if ((page->page_type == PAGE_SMALL) && (page->block_size < (global_config.page_size >> 1))) {
+ // Link up until next memory page in free list
+ void* memory_page_start = (void*)((uintptr_t)block & ~(uintptr_t)(global_config.page_size - 1));
+ void* memory_page_next = pointer_offset(memory_page_start, global_config.page_size);
+ block_t* free_block = pointer_offset(block, page->block_size);
+ block_t* first_block = free_block;
+ block_t* last_block = free_block;
+ uint32_t list_count = 0;
+ uint32_t max_list_count = page->block_count - page->block_initialized;
+ while (((void*)free_block < memory_page_next) && (list_count < max_list_count)) {
+ last_block = free_block;
+ free_block->next = pointer_offset(free_block, page->block_size);
+ free_block = free_block->next;
+ ++list_count;
+ }
+ if (list_count) {
+ last_block->next = 0;
+ page->local_free = first_block;
+ page->block_initialized += list_count;
+ page->local_free_count = list_count;
+ }
+ }
+
+ return block;
+}
+
+static inline RPMALLOC_ALLOCATOR void*
+page_allocate_block(page_t* page, unsigned int zero) {
+ unsigned int is_zero = 0;
+ block_t* block = (page->local_free != 0) ? page_get_local_free_block(page) : 0;
+ if (UNEXPECTED(block == 0)) {
+ if (atomic_load_explicit(&page->thread_free, memory_order_relaxed) != 0) {
+ page_adopt_thread_free_block_list(page);
+ block = (page->local_free != 0) ? page_get_local_free_block(page) : 0;
+ }
+ if (block == 0) {
+ block = page_initialize_blocks(page);
+ is_zero = page->is_zero;
+ }
+ }
+
+ rpmalloc_assert(page->block_used <= page->block_count, "Page block use counter out of sync");
+ if (page->local_free && !page->heap->local_free[page->size_class])
+ page_push_local_free_to_heap(page);
+
+ // The page might be full when free list has been pushed to heap local free list,
+ // check if there is a thread free list to adopt
+ if (page->block_used == page->block_count)
+ page_adopt_thread_free_block_list(page);
+
+ if (page->block_used == page->block_count) {
+ // Page is now fully utilized
+ rpmalloc_assert(!page->is_full, "Page block use counter out of sync with full flag");
+ page_available_to_full(page);
+ }
+
+ if (zero) {
+ if (!is_zero)
+ memset(block, 0, page->block_size);
+ else
+ *(uintptr_t*)block = 0;
+ }
+
+ return block;
+}
+
+////////////
+///
+/// Span interface
+///
+//////
+
+static inline int
+span_is_thread_heap(span_t* span) {
+#if RPMALLOC_FIRST_CLASS_HEAPS
+ return (!span->heap->owner_thread || (span->heap->owner_thread == get_thread_id()));
+#else
+ return (span->heap->owner_thread == get_thread_id());
+#endif
+}
+
+static inline page_t*
+span_get_page_from_block(span_t* span, void* block) {
+ return (page_t*)((uintptr_t)block & span->page_address_mask);
+}
+
+//! Find or allocate a page from the given span
+static inline page_t*
+span_allocate_page(span_t* span) {
+ // Allocate path, initialize a new chunk of memory for a page in the given span
+ rpmalloc_assert(span->page_initialized < span->page_count, "Page initialization internal failure");
+ heap_t* heap = span->heap;
+ page_t* page = pointer_offset(span, span->page_size * span->page_initialized);
+
+#if ENABLE_DECOMMIT
+ // The first page is always committed on initial span map of memory
+ if (span->page_initialized)
+ global_memory_interface->memory_commit(page, span->page_size);
+#endif
+ ++span->page_initialized;
+
+ page->page_type = span->page_type;
+ page->is_zero = 1;
+ page->heap = heap;
+ rpmalloc_assert(page_is_thread_heap(page), "Page owner thread mismatch");
+
+ if (span->page_initialized == span->page_count) {
+ // Span fully utilized
+ rpmalloc_assert(span == heap->span_partial[span->page_type], "Span partial tracking out of sync");
+ heap->span_partial[span->page_type] = 0;
+
+ span->next = heap->span_used[span->page_type];
+ heap->span_used[span->page_type] = span;
+ }
+
+ return page;
+}
+
+static NOINLINE void
+span_deallocate_block(span_t* span, page_t* page, void* block) {
+ if (UNEXPECTED(page->page_type == PAGE_HUGE)) {
+ global_memory_interface->memory_unmap(span, span->offset, span->mapped_size);
+ return;
+ }
+
+ if (page->has_aligned_block) {
+ // Realign pointer to block start
+ block = page_block_realign(page, block);
+ }
+
+ int is_thread_local = page_is_thread_heap(page);
+ if (EXPECTED(is_thread_local != 0)) {
+ page_put_local_free_block(page, block);
+ } else {
+ // Multithreaded deallocation, push to deferred deallocation list.
+ page_put_thread_free_block(page, block);
+ }
+}
+
+////////////
+///
+/// Block interface
+///
+//////
+
+static inline span_t*
+block_get_span(block_t* block) {
+ return (span_t*)((uintptr_t)block & SPAN_MASK);
+}
+
+static inline void
+block_deallocate(block_t* block) {
+ span_t* span = (span_t*)((uintptr_t)block & SPAN_MASK);
+ page_t* page = span_get_page_from_block(span, block);
+ const int is_thread_local = page_is_thread_heap(page);
+
+ // Optimized path for thread local free with non-huge block in page
+ // that has no aligned blocks
+ if (EXPECTED(is_thread_local != 0)) {
+ if (EXPECTED(page->generic_free == 0)) {
+ // Page is not huge, not full and has no aligned block - fast path
+ block->next = page->local_free;
+ page->local_free = block;
+ ++page->local_free_count;
+ if (UNEXPECTED(--page->block_used == 0))
+ page_available_to_free(page);
+ } else {
+ span_deallocate_block(span, page, block);
+ }
+ } else {
+ span_deallocate_block(span, page, block);
+ }
+}
+
+static inline size_t
+block_usable_size(block_t* block) {
+ span_t* span = (span_t*)((uintptr_t)block & SPAN_MASK);
+ if (EXPECTED(span->page_type <= PAGE_LARGE)) {
+ page_t* page = span_get_page_from_block(span, block);
+ void* blocks_start = pointer_offset(page, PAGE_HEADER_SIZE);
+ return page->block_size - ((size_t)pointer_diff(block, blocks_start) % page->block_size);
+ } else {
+ return ((size_t)span->page_size * (size_t)span->page_count) - (size_t)pointer_diff(block, span);
+ }
+}
+
+////////////
+///
+/// Heap interface
+///
+//////
+
+static inline void
+heap_lock_acquire(void) {
+ uintptr_t lock = 0;
+ uintptr_t this_lock = get_thread_id();
+ while (!atomic_compare_exchange_strong(&global_heap_lock, &lock, this_lock)) {
+ lock = 0;
+ wait_spin();
+ }
+}
+
+static inline void
+heap_lock_release(void) {
+ rpmalloc_assert((uintptr_t)atomic_load_explicit(&global_heap_lock, memory_order_relaxed) == get_thread_id(),
+ "Bad heap lock");
+ atomic_store_explicit(&global_heap_lock, 0, memory_order_release);
+}
+
+static inline heap_t*
+heap_initialize(void* block) {
+ heap_t* heap = block;
+ memset_const(heap, 0, sizeof(heap_t));
+ heap->id = 1 + atomic_fetch_add_explicit(&global_heap_id, 1, memory_order_relaxed);
+ return heap;
+}
+
+static heap_t*
+heap_allocate_new(void) {
+ if (!global_config.page_size)
+ rpmalloc_initialize(0);
+ size_t heap_size = get_page_aligned_size(sizeof(heap_t));
+ size_t offset = 0;
+ size_t mapped_size = 0;
+ block_t* block = global_memory_interface->memory_map(heap_size, 0, &offset, &mapped_size);
+#if ENABLE_DECOMMIT
+ global_memory_interface->memory_commit(block, heap_size);
+#endif
+ heap_t* heap = heap_initialize((void*)block);
+ heap->offset = (uint32_t)offset;
+ heap->mapped_size = mapped_size;
+#if ENABLE_STATISTICS
+ atomic_fetch_add_explicit(&global_statistics.heap_count, 1, memory_order_relaxed);
+#endif
+ return heap;
+}
+
+static void
+heap_unmap(heap_t* heap) {
+ global_memory_interface->memory_unmap(heap, heap->offset, heap->mapped_size);
+}
+
+static heap_t*
+heap_allocate(int first_class) {
+ heap_t* heap = 0;
+ if (!first_class) {
+ heap_lock_acquire();
+ heap = global_heap_queue;
+ global_heap_queue = heap ? heap->next : 0;
+ heap_lock_release();
+ }
+ if (!heap)
+ heap = heap_allocate_new();
+ if (heap) {
+ uintptr_t current_thread_id = get_thread_id();
+ heap_lock_acquire();
+ heap->next = global_heap_used;
+ heap->prev = 0;
+ if (global_heap_used)
+ global_heap_used->prev = heap;
+ global_heap_used = heap;
+ heap_lock_release();
+ heap->owner_thread = current_thread_id;
+ }
+ return heap;
+}
+
+static inline void
+heap_release(heap_t* heap) {
+ heap_lock_acquire();
+ if (heap->prev)
+ heap->prev->next = heap->next;
+ if (heap->next)
+ heap->next->prev = heap->prev;
+ if (global_heap_used == heap)
+ global_heap_used = heap->next;
+ heap->next = global_heap_queue;
+ global_heap_queue = heap;
+ heap_lock_release();
+}
+
+static void
+heap_page_free_decommit(heap_t* heap, uint32_t page_type, uint32_t page_retain_count) {
+ page_t* page = heap->page_free[page_type];
+ while (page && page_retain_count) {
+ page = page->next;
+ --page_retain_count;
+ }
+ while (page && (page->is_decommitted == 0)) {
+ page_decommit_memory_pages(page);
+ --heap->page_free_commit_count[page_type];
+ page = page->next;
+ }
+}
+
+static inline void
+heap_make_free_page_available(heap_t* heap, uint32_t size_class, page_t* page) {
+ page->size_class = size_class;
+ page->block_size = global_size_class[size_class].block_size;
+ page->block_count = global_size_class[size_class].block_count;
+ page->block_used = 0;
+ page->block_initialized = 0;
+ page->local_free = 0;
+ page->local_free_count = 0;
+ page->is_full = 0;
+ page->is_free = 0;
+ page->has_aligned_block = 0;
+ page->generic_free = 0;
+ page->heap = heap;
+ page_t* head = heap->page_available[size_class];
+ page->next = head;
+ page->prev = 0;
+ atomic_store_explicit(&page->thread_free, 0, memory_order_relaxed);
+ if (head)
+ head->prev = page;
+ heap->page_available[size_class] = page;
+ if (page->is_decommitted)
+ page_commit_memory_pages(page);
+}
+
+//! Find or allocate a span for the given page type with the given size class
+static inline span_t*
+heap_get_span(heap_t* heap, page_type_t page_type) {
+ // Fast path, available span for given page type
+ if (EXPECTED(heap->span_partial[page_type] != 0))
+ return heap->span_partial[page_type];
+
+ // Fallback path, map more memory
+ size_t offset = 0;
+ size_t mapped_size = 0;
+ span_t* span = global_memory_interface->memory_map(SPAN_SIZE, SPAN_SIZE, &offset, &mapped_size);
+ if (EXPECTED(span != 0)) {
+ uint32_t page_count = 0;
+ uint32_t page_size = 0;
+ uintptr_t page_address_mask = 0;
+ if (page_type == PAGE_SMALL) {
+ page_count = SPAN_SIZE / SMALL_PAGE_SIZE;
+ page_size = SMALL_PAGE_SIZE;
+ page_address_mask = SMALL_PAGE_MASK;
+ } else if (page_type == PAGE_MEDIUM) {
+ page_count = SPAN_SIZE / MEDIUM_PAGE_SIZE;
+ page_size = MEDIUM_PAGE_SIZE;
+ page_address_mask = MEDIUM_PAGE_MASK;
+ } else {
+ page_count = SPAN_SIZE / LARGE_PAGE_SIZE;
+ page_size = LARGE_PAGE_SIZE;
+ page_address_mask = LARGE_PAGE_MASK;
+ }
+#if ENABLE_DECOMMIT
+ global_memory_interface->memory_commit(span, page_size);
+#endif
+ span->heap = heap;
+ span->page_type = page_type;
+ span->page_count = page_count;
+ span->page_size = page_size;
+ span->page_address_mask = page_address_mask;
+ span->offset = (uint32_t)offset;
+ span->mapped_size = mapped_size;
+
+ heap->span_partial[page_type] = span;
+ }
+
+ return span;
+}
+
+static page_t*
+heap_get_page(heap_t* heap, uint32_t size_class);
+
+static void
+block_deallocate(block_t* block);
+
+static page_t*
+heap_get_page_generic(heap_t* heap, uint32_t size_class) {
+ page_type_t page_type = get_page_type(size_class);
+
+ // Check if there is a free page from multithreaded deallocations
+ uintptr_t block_mt = atomic_load_explicit(&heap->thread_free[page_type], memory_order_relaxed);
+ if (UNEXPECTED(block_mt != 0)) {
+ while (!atomic_compare_exchange_weak_explicit(&heap->thread_free[page_type], &block_mt, 0, memory_order_relaxed,
+ memory_order_relaxed)) {
+ wait_spin();
+ }
+ block_t* block = (void*)block_mt;
+ while (block) {
+ block_t* next_block = block->next;
+ block_deallocate(block);
+ block = next_block;
+ }
+ // Retry after processing deferred thread frees
+ return heap_get_page(heap, size_class);
+ }
+
+ // Check if there is a free page
+ page_t* page = heap->page_free[page_type];
+ if (EXPECTED(page != 0)) {
+ heap->page_free[page_type] = page->next;
+ if (page->is_decommitted == 0) {
+ rpmalloc_assert(heap->page_free_commit_count[page_type] > 0, "Free committed page count out of sync");
+ --heap->page_free_commit_count[page_type];
+ }
+ heap_make_free_page_available(heap, size_class, page);
+ return page;
+ }
+ rpmalloc_assert(heap->page_free_commit_count[page_type] == 0, "Free committed page count out of sync");
+
+ if (heap->id == 0) {
+ // Thread has not yet initialized, assign heap and try again
+ rpmalloc_initialize(0);
+ return heap_get_page(get_thread_heap(), size_class);
+ }
+
+ // Fallback path, find or allocate span for given size class
+ // If thread was not initialized, the heap for the new span
+ // will be different from the local heap variable in this scope
+ // (which is the default heap) - so use span page heap instead
+ span_t* span = heap_get_span(heap, page_type);
+ if (EXPECTED(span != 0)) {
+ page = span_allocate_page(span);
+ heap_make_free_page_available(page->heap, size_class, page);
+ }
+
+ return page;
+}
+
+//! Find or allocate a page for the given size class
+static page_t*
+heap_get_page(heap_t* heap, uint32_t size_class) {
+ // Fast path, available page for given size class
+ page_t* page = heap->page_available[size_class];
+ if (EXPECTED(page != 0))
+ return page;
+ return heap_get_page_generic(heap, size_class);
+}
+
+//! Pop a block from the heap local free list
+static inline RPMALLOC_ALLOCATOR void*
+heap_pop_local_free(heap_t* heap, uint32_t size_class) {
+ block_t** free_list = heap->local_free + size_class;
+ block_t* block = *free_list;
+ if (EXPECTED(block != 0))
+ *free_list = block->next;
+ return block;
+}
+
+//! Generic allocation path from heap pages, spans or new mapping
+static NOINLINE RPMALLOC_ALLOCATOR void*
+heap_allocate_block_small_to_large(heap_t* heap, uint32_t size_class, unsigned int zero) {
+ page_t* page = heap_get_page(heap, size_class);
+ if (EXPECTED(page != 0))
+ return page_allocate_block(page, zero);
+ return 0;
+}
+
+//! Generic allocation path from heap pages, spans or new mapping
+static NOINLINE RPMALLOC_ALLOCATOR void*
+heap_allocate_block_huge(heap_t* heap, size_t size, unsigned int zero) {
+ (void)sizeof(heap);
+ size_t alloc_size = get_page_aligned_size(size + SPAN_HEADER_SIZE);
+ size_t offset = 0;
+ size_t mapped_size = 0;
+ void* block = global_memory_interface->memory_map(alloc_size, SPAN_SIZE, &offset, &mapped_size);
+ if (block) {
+ span_t* span = block;
+#if ENABLE_DECOMMIT
+ global_memory_interface->memory_commit(span, alloc_size);
+#endif
+ span->heap = heap;
+ span->page_type = PAGE_HUGE;
+ span->page_size = (uint32_t)global_config.page_size;
+ span->page_count = (uint32_t)(alloc_size / global_config.page_size);
+ span->page_address_mask = LARGE_PAGE_MASK;
+ span->offset = (uint32_t)offset;
+ span->mapped_size = mapped_size;
+ span->page.heap = heap;
+ span->page.is_full = 1;
+ span->page.generic_free = 1;
+ span->page.page_type = PAGE_HUGE;
+ // Keep track of span if first class heap
+ if (!heap->owner_thread) {
+ span->next = heap->span_used[PAGE_HUGE];
+ heap->span_used[PAGE_HUGE] = span;
+ }
+ void* ptr = pointer_offset(block, SPAN_HEADER_SIZE);
+ if (zero)
+ memset(ptr, 0, size);
+ return ptr;
+ }
+ return 0;
+}
+
+static RPMALLOC_ALLOCATOR NOINLINE void*
+heap_allocate_block_generic(heap_t* heap, size_t size, unsigned int zero) {
+ uint32_t size_class = get_size_class(size);
+ if (EXPECTED(size_class < SIZE_CLASS_COUNT)) {
+ block_t* block = heap_pop_local_free(heap, size_class);
+ if (EXPECTED(block != 0)) {
+ // Fast track with small block available in heap level local free list
+ if (zero)
+ memset(block, 0, global_size_class[size_class].block_size);
+ return block;
+ }
+
+ return heap_allocate_block_small_to_large(heap, size_class, zero);
+ }
+
+ return heap_allocate_block_huge(heap, size, zero);
+}
+
+//! Find or allocate a block of the given size
+static inline RPMALLOC_ALLOCATOR void*
+heap_allocate_block(heap_t* heap, size_t size, unsigned int zero) {
+ if (size <= (SMALL_GRANULARITY * 64)) {
+ uint32_t size_class = get_size_class_tiny(size);
+ block_t* block = heap_pop_local_free(heap, size_class);
+ if (EXPECTED(block != 0)) {
+ // Fast track with small block available in heap level local free list
+ if (zero)
+ memset(block, 0, global_size_class[size_class].block_size);
+ return block;
+ }
+ }
+ return heap_allocate_block_generic(heap, size, zero);
+}
+
+static RPMALLOC_ALLOCATOR void*
+heap_allocate_block_aligned(heap_t* heap, size_t alignment, size_t size, unsigned int zero) {
+ if (alignment <= SMALL_GRANULARITY)
+ return heap_allocate_block(heap, size, zero);
+
+#if ENABLE_VALIDATE_ARGS
+ if ((size + alignment) < size) {
+ errno = EINVAL;
+ return 0;
+ }
+ if (alignment & (alignment - 1)) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+ if (alignment >= RPMALLOC_MAX_ALIGNMENT) {
+ errno = EINVAL;
+ return 0;
+ }
+
+ size_t align_mask = alignment - 1;
+ block_t* block = heap_allocate_block(heap, size + alignment, zero);
+ if ((uintptr_t)block & align_mask) {
+ block = (void*)(((uintptr_t)block & ~(uintptr_t)align_mask) + alignment);
+ // Mark as having aligned blocks
+ span_t* span = block_get_span(block);
+ page_t* page = span_get_page_from_block(span, block);
+ page->has_aligned_block = 1;
+ page->generic_free = 1;
+ }
+ return block;
+}
+
+static void*
+heap_reallocate_block(heap_t* heap, void* block, size_t size, size_t old_size, unsigned int flags) {
+ if (block) {
+ // Grab the span using guaranteed span alignment
+ span_t* span = block_get_span(block);
+ if (EXPECTED(span->page_type <= PAGE_LARGE)) {
+ // Normal sized block
+ page_t* page = span_get_page_from_block(span, block);
+ void* blocks_start = pointer_offset(page, PAGE_HEADER_SIZE);
+ uint32_t block_offset = (uint32_t)pointer_diff(block, blocks_start);
+ uint32_t block_idx = block_offset / page->block_size;
+ void* block_origin = pointer_offset(blocks_start, (size_t)block_idx * page->block_size);
+ if (!old_size)
+ old_size = (size_t)((ptrdiff_t)page->block_size - pointer_diff(block, block_origin));
+ if ((size_t)page->block_size >= size) {
+ // Still fits in block, never mind trying to save memory, but preserve data if alignment changed
+ if ((block != block_origin) && !(flags & RPMALLOC_NO_PRESERVE))
+ memmove(block_origin, block, old_size);
+ return block_origin;
+ }
+ } else {
+ // Huge block
+ void* block_start = pointer_offset(span, SPAN_HEADER_SIZE);
+ if (!old_size)
+ old_size = ((size_t)span->page_size * (size_t)span->page_count) - SPAN_HEADER_SIZE;
+ if ((size < old_size) && (size > LARGE_BLOCK_SIZE_LIMIT)) {
+ // Still fits in block and still huge, never mind trying to save memory,
+ // but preserve data if alignment changed
+ if ((block_start != block) && !(flags & RPMALLOC_NO_PRESERVE))
+ memmove(block_start, block, old_size);
+ return block_start;
+ }
+ }
+ } else {
+ old_size = 0;
+ }
+
+ if (!!(flags & RPMALLOC_GROW_OR_FAIL))
+ return 0;
+
+ // Size is greater than block size or saves enough memory to resize, need to allocate a new block
+ // and deallocate the old. Avoid hysteresis by overallocating if increase is small (below 37%)
+ size_t lower_bound = old_size + (old_size >> 2) + (old_size >> 3);
+ size_t new_size = (size > lower_bound) ? size : ((size > old_size) ? lower_bound : size);
+ void* old_block = block;
+ block = heap_allocate_block(heap, new_size, 0);
+ if (block && old_block) {
+ if (!(flags & RPMALLOC_NO_PRESERVE))
+ memcpy(block, old_block, old_size < new_size ? old_size : new_size);
+ block_deallocate(old_block);
+ }
+
+ return block;
+}
+
+static void*
+heap_reallocate_block_aligned(heap_t* heap, void* block, size_t alignment, size_t size, size_t old_size,
+ unsigned int flags) {
+ if (alignment <= SMALL_GRANULARITY)
+ return heap_reallocate_block(heap, block, size, old_size, flags);
+
+ int no_alloc = !!(flags & RPMALLOC_GROW_OR_FAIL);
+ size_t usable_size = (block ? block_usable_size(block) : 0);
+ if ((usable_size >= size) && !((uintptr_t)block & (alignment - 1))) {
+ if (no_alloc || (size >= (usable_size / 2)))
+ return block;
+ }
+ // Aligned alloc marks span as having aligned blocks
+ void* old_block = block;
+ block = (!no_alloc ? heap_allocate_block_aligned(heap, alignment, size, 0) : 0);
+ if (EXPECTED(block != 0)) {
+ if (!(flags & RPMALLOC_NO_PRESERVE) && old_block) {
+ if (!old_size)
+ old_size = usable_size;
+ memcpy(block, old_block, old_size < size ? old_size : size);
+ }
+ if (EXPECTED(old_block != 0))
+ block_deallocate(old_block);
+ }
+ return block;
+}
+
+static void
+heap_free_all(heap_t* heap) {
+ for (int itype = 0; itype < 3; ++itype) {
+ span_t* span = heap->span_partial[itype];
+ while (span) {
+ span_t* span_next = span->next;
+ global_memory_interface->memory_unmap(span, span->offset, span->mapped_size);
+ span = span_next;
+ }
+ heap->span_partial[itype] = 0;
+ heap->page_free[itype] = 0;
+ heap->page_free_commit_count[itype] = 0;
+ atomic_store_explicit(&heap->thread_free[itype], 0, memory_order_relaxed);
+ }
+ for (int itype = 0; itype < 4; ++itype) {
+ span_t* span = heap->span_used[itype];
+ while (span) {
+ span_t* span_next = span->next;
+ global_memory_interface->memory_unmap(span, span->offset, span->mapped_size);
+ span = span_next;
+ }
+ heap->span_used[itype] = 0;
+ }
+ memset(heap->local_free, 0, sizeof(heap->local_free));
+ memset(heap->page_available, 0, sizeof(heap->page_available));
+
+#if ENABLE_STATISTICS
+ // TODO: Fix
+#endif
+}
+
+////////////
+///
+/// Extern interface
+///
+//////
+
+int
+rpmalloc_is_thread_initialized(void) {
+ return (get_thread_heap() != global_heap_default) ? 1 : 0;
+}
+
+extern inline RPMALLOC_ALLOCATOR void*
+rpmalloc(size_t size) {
+#if ENABLE_VALIDATE_ARGS
+ if (size >= MAX_ALLOC_SIZE) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+ heap_t* heap = get_thread_heap();
+ return heap_allocate_block(heap, size, 0);
+}
+
+extern inline RPMALLOC_ALLOCATOR void*
+rpzalloc(size_t size) {
+#if ENABLE_VALIDATE_ARGS
+ if (size >= MAX_ALLOC_SIZE) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+ heap_t* heap = get_thread_heap();
+ return heap_allocate_block(heap, size, 1);
+}
+
+extern inline void
+rpfree(void* ptr) {
+ if (UNEXPECTED(ptr == 0))
+ return;
+ block_deallocate(ptr);
+}
+
+extern inline RPMALLOC_ALLOCATOR void*
+rpcalloc(size_t num, size_t size) {
+ size_t total;
+#if ENABLE_VALIDATE_ARGS
+#if PLATFORM_WINDOWS
+ int err = SizeTMult(num, size, &total);
+ if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#else
+ int err = __builtin_umull_overflow(num, size, &total);
+ if (err || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+#else
+ total = num * size;
+#endif
+ heap_t* heap = get_thread_heap();
+ return heap_allocate_block(heap, total, 1);
+}
+
+extern inline RPMALLOC_ALLOCATOR void*
+rprealloc(void* ptr, size_t size) {
+#if ENABLE_VALIDATE_ARGS
+ if (size >= MAX_ALLOC_SIZE) {
+ errno = EINVAL;
+ return ptr;
+ }
+#endif
+ heap_t* heap = get_thread_heap();
+ return heap_reallocate_block(heap, ptr, size, 0, 0);
+}
+
+extern RPMALLOC_ALLOCATOR void*
+rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize, unsigned int flags) {
+#if ENABLE_VALIDATE_ARGS
+ if ((size + alignment < size) || (alignment > _memory_page_size)) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+ heap_t* heap = get_thread_heap();
+ return heap_reallocate_block_aligned(heap, ptr, alignment, size, oldsize, flags);
+}
+
+extern RPMALLOC_ALLOCATOR void*
+rpaligned_alloc(size_t alignment, size_t size) {
+ heap_t* heap = get_thread_heap();
+ return heap_allocate_block_aligned(heap, alignment, size, 0);
+}
+
+extern RPMALLOC_ALLOCATOR void*
+rpaligned_zalloc(size_t alignment, size_t size) {
+ heap_t* heap = get_thread_heap();
+ return heap_allocate_block_aligned(heap, alignment, size, 1);
+}
+
+extern inline RPMALLOC_ALLOCATOR void*
+rpaligned_calloc(size_t alignment, size_t num, size_t size) {
+ size_t total;
+#if ENABLE_VALIDATE_ARGS
+#if PLATFORM_WINDOWS
+ int err = SizeTMult(num, size, &total);
+ if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#else
+ int err = __builtin_umull_overflow(num, size, &total);
+ if (err || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+#else
+ total = num * size;
+#endif
+ heap_t* heap = get_thread_heap();
+ return heap_allocate_block_aligned(heap, alignment, total, 1);
+}
+
+extern inline RPMALLOC_ALLOCATOR void*
+rpmemalign(size_t alignment, size_t size) {
+ heap_t* heap = get_thread_heap();
+ return heap_allocate_block_aligned(heap, alignment, size, 0);
+}
+
+extern inline int
+rpposix_memalign(void** memptr, size_t alignment, size_t size) {
+ heap_t* heap = get_thread_heap();
+ if (memptr)
+ *memptr = heap_allocate_block_aligned(heap, alignment, size, 0);
+ else
+ return EINVAL;
+ return *memptr ? 0 : ENOMEM;
+}
+
+extern inline size_t
+rpmalloc_usable_size(void* ptr) {
+ return (ptr ? block_usable_size(ptr) : 0);
+}
+
+////////////
+///
+/// Initialization and finalization
+///
+//////
+
+static void
+rpmalloc_thread_destructor(void* value) {
+ // If this is called on main thread assume it means rpmalloc_finalize
+ // has not been called and shutdown is forced (through _exit) or unclean
+ if (get_thread_id() == global_main_thread_id)
+ return;
+ if (value)
+ rpmalloc_thread_finalize();
+}
+
+extern int
+rpmalloc_initialize_config(rpmalloc_interface_t* memory_interface, rpmalloc_config_t* config) {
+ if (global_rpmalloc_initialized) {
+ rpmalloc_thread_initialize();
+ if (config)
+ *config = global_config;
+ return 0;
+ }
+
+ if (config)
+ global_config = *config;
+
+ int result = rpmalloc_initialize(memory_interface);
+
+ if (config)
+ *config = global_config;
+
+ return result;
+}
+
+extern int
+rpmalloc_initialize(rpmalloc_interface_t* memory_interface) {
+ if (global_rpmalloc_initialized) {
+ rpmalloc_thread_initialize();
+ return 0;
+ }
+
+ global_rpmalloc_initialized = 1;
+
+ global_memory_interface = memory_interface ? memory_interface : &global_memory_interface_default;
+ if (!global_memory_interface->memory_map || !global_memory_interface->memory_unmap) {
+ global_memory_interface->memory_map = os_mmap;
+ global_memory_interface->memory_commit = os_mcommit;
+ global_memory_interface->memory_decommit = os_mdecommit;
+ global_memory_interface->memory_unmap = os_munmap;
+ }
+
+#if PLATFORM_WINDOWS
+ SYSTEM_INFO system_info;
+ memset(&system_info, 0, sizeof(system_info));
+ GetSystemInfo(&system_info);
+ os_map_granularity = system_info.dwAllocationGranularity;
+#else
+ os_map_granularity = (size_t)sysconf(_SC_PAGESIZE);
+#endif
+
+#if PLATFORM_WINDOWS
+ os_page_size = system_info.dwPageSize;
+#else
+ os_page_size = os_map_granularity;
+#endif
+ if (global_config.enable_huge_pages) {
+#if PLATFORM_WINDOWS
+ HANDLE token = 0;
+ size_t large_page_minimum = GetLargePageMinimum();
+ if (large_page_minimum)
+ OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token);
+ if (token) {
+ LUID luid;
+ if (LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &luid)) {
+ TOKEN_PRIVILEGES token_privileges;
+ memset(&token_privileges, 0, sizeof(token_privileges));
+ token_privileges.PrivilegeCount = 1;
+ token_privileges.Privileges[0].Luid = luid;
+ token_privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
+ if (AdjustTokenPrivileges(token, FALSE, &token_privileges, 0, 0, 0)) {
+ if (GetLastError() == ERROR_SUCCESS)
+ os_huge_pages = 1;
+ }
+ }
+ CloseHandle(token);
+ }
+ if (os_huge_pages) {
+ if (large_page_minimum > os_page_size)
+ os_page_size = large_page_minimum;
+ if (large_page_minimum > os_map_granularity)
+ os_map_granularity = large_page_minimum;
+ }
+#elif defined(__linux__)
+ size_t huge_page_size = 0;
+ FILE* meminfo = fopen("/proc/meminfo", "r");
+ if (meminfo) {
+ char line[128];
+ while (!huge_page_size && fgets(line, sizeof(line) - 1, meminfo)) {
+ line[sizeof(line) - 1] = 0;
+ if (strstr(line, "Hugepagesize:"))
+ huge_page_size = (size_t)strtol(line + 13, 0, 10) * 1024;
+ }
+ fclose(meminfo);
+ }
+ if (huge_page_size) {
+ os_huge_pages = 1;
+ os_page_size = huge_page_size;
+ os_map_granularity = huge_page_size;
+ }
+#elif defined(__FreeBSD__)
+ int rc;
+ size_t sz = sizeof(rc);
+
+ if (sysctlbyname("vm.pmap.pg_ps_enabled", &rc, &sz, NULL, 0) == 0 && rc == 1) {
+ os_huge_pages = 1;
+ os_page_size = 2 * 1024 * 1024;
+ os_map_granularity = os_page_size;
+ }
+#elif defined(__APPLE__) || defined(__NetBSD__)
+ os_huge_pages = 1;
+ os_page_size = 2 * 1024 * 1024;
+ os_map_granularity = os_page_size;
+#endif
+ } else {
+ os_huge_pages = 0;
+ }
+
+ global_config.enable_huge_pages = os_huge_pages;
+
+ if (!memory_interface || (global_config.page_size < os_page_size))
+ global_config.page_size = os_page_size;
+
+ if (global_config.enable_huge_pages || global_config.page_size > (256 * 1024))
+ global_config.disable_decommit = 1;
+
+#if defined(__linux__) || defined(__ANDROID__)
+ if (global_config.disable_thp)
+ (void)prctl(PR_SET_THP_DISABLE, 1, 0, 0, 0);
+#endif
+
+#ifdef _WIN32
+ fls_key = FlsAlloc(&rpmalloc_thread_destructor);
+#else
+ pthread_key_create(&pthread_key, rpmalloc_thread_destructor);
+#endif
+
+ global_main_thread_id = get_thread_id();
+
+ rpmalloc_thread_initialize();
+
+ return 0;
+}
+
+extern const rpmalloc_config_t*
+rpmalloc_config(void) {
+ return &global_config;
+}
+
+extern void
+rpmalloc_finalize(void) {
+ rpmalloc_thread_finalize();
+
+ if (global_config.unmap_on_finalize) {
+ heap_t* heap = global_heap_queue;
+ global_heap_queue = 0;
+ while (heap) {
+ heap_t* heap_next = heap->next;
+ heap_free_all(heap);
+ heap_unmap(heap);
+ heap = heap_next;
+ }
+ heap = global_heap_used;
+ global_heap_used = 0;
+ while (heap) {
+ heap_t* heap_next = heap->next;
+ heap_free_all(heap);
+ heap_unmap(heap);
+ heap = heap_next;
+ }
+#if ENABLE_STATISTICS
+ memset(&global_statistics, 0, sizeof(global_statistics));
+#endif
+ }
+
+#ifdef _WIN32
+ FlsFree(fls_key);
+ fls_key = 0;
+#else
+ pthread_key_delete(pthread_key);
+ pthread_key = 0;
+#endif
+
+ global_main_thread_id = 0;
+ global_rpmalloc_initialized = 0;
+}
+
+extern void
+rpmalloc_thread_initialize(void) {
+ if (get_thread_heap() == global_heap_default)
+ get_thread_heap_allocate();
+}
+
+extern void
+rpmalloc_thread_finalize(void) {
+ heap_t* heap = get_thread_heap();
+ if (heap != global_heap_default) {
+ heap_release(heap);
+ set_thread_heap(global_heap_default);
+ }
+}
+
+extern void
+rpmalloc_thread_collect(void) {
+}
+
+void
+rpmalloc_dump_statistics(void* file) {
+#if ENABLE_STATISTICS
+ fprintf(file, "Mapped pages: %llu\n",
+ (unsigned long long)atomic_load_explicit(&global_statistics.page_mapped, memory_order_relaxed));
+ fprintf(file, "Mapped pages (peak): %llu\n",
+ (unsigned long long)atomic_load_explicit(&global_statistics.page_mapped_peak, memory_order_relaxed));
+ fprintf(file, "Active pages: %llu\n",
+ (unsigned long long)atomic_load_explicit(&global_statistics.page_active, memory_order_relaxed));
+ fprintf(file, "Active pages (peak): %llu\n",
+ (unsigned long long)atomic_load_explicit(&global_statistics.page_active_peak, memory_order_relaxed));
+ fprintf(file, "Pages committed: %llu\n",
+ (unsigned long long)atomic_load_explicit(&global_statistics.page_commit, memory_order_relaxed));
+ fprintf(file, "Pages decommitted: %llu\n",
+ (unsigned long long)atomic_load_explicit(&global_statistics.page_decommit, memory_order_relaxed));
+ fprintf(file, "Heaps created: %llu\n",
+ (unsigned long long)atomic_load_explicit(&global_statistics.heap_count, memory_order_relaxed));
+#else
+ (void)sizeof(file);
+#endif
+}
+
+#if RPMALLOC_FIRST_CLASS_HEAPS
+
+rpmalloc_heap_t*
+rpmalloc_heap_acquire(void) {
+ // Must be a pristine heap from newly mapped memory pages, or else memory blocks
+ // could already be allocated from the heap which would (wrongly) be released when
+ // heap is cleared with rpmalloc_heap_free_all(). Also heaps guaranteed to be
+ // pristine from the dedicated orphan list can be used.
+ heap_t* heap = heap_allocate(1);
+ rpmalloc_assume(heap != 0);
+ heap->owner_thread = 0;
+ return heap;
+}
+
+void
+rpmalloc_heap_release(rpmalloc_heap_t* heap) {
+ if (heap)
+ heap_release(heap);
+}
+
+RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_alloc(rpmalloc_heap_t* heap, size_t size) {
+#if ENABLE_VALIDATE_ARGS
+ if (size >= MAX_ALLOC_SIZE) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+ return heap_allocate_block(heap, size, 0);
+}
+
+RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_aligned_alloc(rpmalloc_heap_t* heap, size_t alignment, size_t size) {
+#if ENABLE_VALIDATE_ARGS
+ if (size >= MAX_ALLOC_SIZE) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+ return heap_allocate_block_aligned(heap, alignment, size, 0);
+}
+
+RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_calloc(rpmalloc_heap_t* heap, size_t num, size_t size) {
+ size_t total;
+#if ENABLE_VALIDATE_ARGS
+#if PLATFORM_WINDOWS
+ int err = SizeTMult(num, size, &total);
+ if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#else
+ int err = __builtin_umull_overflow(num, size, &total);
+ if (err || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+#else
+ total = num * size;
+#endif
+ return heap_allocate_block(heap, total, 1);
+}
+
+extern inline RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_aligned_calloc(rpmalloc_heap_t* heap, size_t alignment, size_t num, size_t size) {
+ size_t total;
+#if ENABLE_VALIDATE_ARGS
+#if PLATFORM_WINDOWS
+ int err = SizeTMult(num, size, &total);
+ if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#else
+ int err = __builtin_umull_overflow(num, size, &total);
+ if (err || (total >= MAX_ALLOC_SIZE)) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+#else
+ total = num * size;
+#endif
+ return heap_allocate_block_aligned(heap, alignment, total, 1);
+}
+
+RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_realloc(rpmalloc_heap_t* heap, void* ptr, size_t size, unsigned int flags) {
+#if ENABLE_VALIDATE_ARGS
+ if (size >= MAX_ALLOC_SIZE) {
+ errno = EINVAL;
+ return ptr;
+ }
+#endif
+ return heap_reallocate_block(heap, ptr, size, 0, flags);
+}
+
+RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_aligned_realloc(rpmalloc_heap_t* heap, void* ptr, size_t alignment, size_t size, unsigned int flags) {
+#if ENABLE_VALIDATE_ARGS
+ if ((size + alignment < size) || (alignment > _memory_page_size)) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+ return heap_reallocate_block_aligned(heap, ptr, alignment, size, 0, flags);
+}
+
+void
+rpmalloc_heap_free(rpmalloc_heap_t* heap, void* ptr) {
+ (void)sizeof(heap);
+ block_deallocate(ptr);
+}
+
+//! Free all memory allocated by the heap
+void
+rpmalloc_heap_free_all(rpmalloc_heap_t* heap) {
+ heap_free_all(heap);
+}
+
+extern inline void
+rpmalloc_heap_thread_set_current(rpmalloc_heap_t* heap) {
+ heap_t* prev_heap = get_thread_heap();
+ if (prev_heap != heap) {
+ set_thread_heap(heap);
+ if (prev_heap)
+ heap_release(prev_heap);
+ }
+}
+
+rpmalloc_heap_t*
+rpmalloc_get_heap_for_ptr(void* ptr) {
+ // Grab the span, and then the heap from the span
+ span_t* span = (span_t*)((uintptr_t)ptr & SPAN_MASK);
+ if (span)
+ return span_get_page_from_block(span, ptr)->heap;
+ return 0;
+}
+
+#endif
+
+#include "malloc.c"