diff options
Diffstat (limited to 'thirdparty/rpmalloc/rpmalloc.c')
| -rw-r--r-- | thirdparty/rpmalloc/rpmalloc.c | 2341 |
1 files changed, 2341 insertions, 0 deletions
diff --git a/thirdparty/rpmalloc/rpmalloc.c b/thirdparty/rpmalloc/rpmalloc.c new file mode 100644 index 000000000..7aecfb0f4 --- /dev/null +++ b/thirdparty/rpmalloc/rpmalloc.c @@ -0,0 +1,2341 @@ +/* rpmalloc.c - Memory allocator - Public Domain - 2016-2020 Mattias + * Jansson + * + * This library provides a cross-platform lock free thread caching malloc + * implementation in C11. The latest source code is always available at + * + * https://github.com/mjansson/rpmalloc + * + * This library is put in the public domain; you can redistribute it and/or + * modify it without any restrictions. + * + */ + +#include "rpmalloc.h" + +#include <errno.h> +#include <string.h> + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdatomic.h> + +#if defined(__clang__) +#pragma clang diagnostic ignored "-Wunused-macros" +#pragma clang diagnostic ignored "-Wunused-function" +#if __has_warning("-Wreserved-identifier") +#pragma clang diagnostic ignored "-Wreserved-identifier" +#endif +#if __has_warning("-Wstatic-in-inline") +#pragma clang diagnostic ignored "-Wstatic-in-inline" +#endif +#if __has_warning("-Wunsafe-buffer-usage") +#pragma clang diagnostic ignored "-Wunsafe-buffer-usage" +#endif +#elif defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wunused-macros" +#pragma GCC diagnostic ignored "-Wunused-function" +#endif + +#if defined(_WIN32) || defined(__WIN32__) || defined(_WIN64) +#define PLATFORM_WINDOWS 1 +#define PLATFORM_POSIX 0 +#else +#define PLATFORM_WINDOWS 0 +#define PLATFORM_POSIX 1 +#endif + +#if defined(_MSC_VER) +#define NOINLINE __declspec(noinline) +#else +#define NOINLINE __attribute__((noinline)) +#endif + +#if PLATFORM_WINDOWS +#include <windows.h> +#include <fibersapi.h> +static DWORD fls_key; +#endif +#if PLATFORM_POSIX +#include <sys/mman.h> +#include <sched.h> +#include <unistd.h> +#include <pthread.h> +static pthread_key_t pthread_key; +#ifdef __FreeBSD__ +#include <sys/sysctl.h> +#define MAP_HUGETLB MAP_ALIGNED_SUPER +#ifndef PROT_MAX +#define PROT_MAX(f) 0 +#endif +#else +#define PROT_MAX(f) 0 +#endif +#ifdef __sun +extern int +madvise(caddr_t, size_t, int); +#endif +#ifndef MAP_UNINITIALIZED +#define MAP_UNINITIALIZED 0 +#endif +#endif + +#if defined(__linux__) || defined(__ANDROID__) +#include <sys/prctl.h> +#if !defined(PR_SET_VMA) +#define PR_SET_VMA 0x53564d41 +#define PR_SET_VMA_ANON_NAME 0 +#endif +#endif +#if defined(__APPLE__) +#include <TargetConditionals.h> +#if !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR +#include <mach/mach_vm.h> +#include <mach/vm_statistics.h> +#endif +#include <pthread.h> +#endif +#if defined(__HAIKU__) || defined(__TINYC__) +#include <pthread.h> +#endif + +#include <limits.h> +#if (INTPTR_MAX > INT32_MAX) +#define ARCH_64BIT 1 +#define ARCH_32BIT 0 +#else +#define ARCH_64BIT 0 +#define ARCH_32BIT 1 +#endif + +#if !defined(__has_builtin) +#define __has_builtin(b) 0 +#endif + +#define pointer_offset(ptr, ofs) (void*)((char*)(ptr) + (ptrdiff_t)(ofs)) +#define pointer_diff(first, second) (ptrdiff_t)((const char*)(first) - (const char*)(second)) + +//////////// +/// +/// Build time configurable limits +/// +////// + +#ifndef ENABLE_VALIDATE_ARGS +//! Enable validation of args to public entry points +#define ENABLE_VALIDATE_ARGS 0 +#endif +#ifndef ENABLE_ASSERTS +//! Enable asserts +#define ENABLE_ASSERTS 0 +#endif +#ifndef ENABLE_UNMAP +//! Enable unmapping memory pages +#define ENABLE_UNMAP 1 +#endif +#ifndef ENABLE_DECOMMIT +//! Enable decommitting memory pages +#define ENABLE_DECOMMIT 1 +#endif +#ifndef ENABLE_DYNAMIC_LINK +//! Enable building as dynamic library +#define ENABLE_DYNAMIC_LINK 0 +#endif +#ifndef ENABLE_OVERRIDE +//! Enable standard library malloc/free/new/delete overrides +#define ENABLE_OVERRIDE 1 +#endif +#ifndef ENABLE_STATISTICS +//! Enable statistics +#define ENABLE_STATISTICS 0 +#endif + +//////////// +/// +/// Built in size configurations +/// +////// + +#define PAGE_HEADER_SIZE 128 +#define SPAN_HEADER_SIZE PAGE_HEADER_SIZE + +#define SMALL_GRANULARITY 16 + +#define SMALL_BLOCK_SIZE_LIMIT (4 * 1024) +#define MEDIUM_BLOCK_SIZE_LIMIT (256 * 1024) +#define LARGE_BLOCK_SIZE_LIMIT (8 * 1024 * 1024) + +#define SMALL_SIZE_CLASS_COUNT 73 +#define MEDIUM_SIZE_CLASS_COUNT 24 +#define LARGE_SIZE_CLASS_COUNT 20 +#define SIZE_CLASS_COUNT (SMALL_SIZE_CLASS_COUNT + MEDIUM_SIZE_CLASS_COUNT + LARGE_SIZE_CLASS_COUNT) + +#define SMALL_PAGE_SIZE_SHIFT 16 +#define SMALL_PAGE_SIZE (1 << SMALL_PAGE_SIZE_SHIFT) +#define SMALL_PAGE_MASK (~((uintptr_t)SMALL_PAGE_SIZE - 1)) +#define MEDIUM_PAGE_SIZE_SHIFT 22 +#define MEDIUM_PAGE_SIZE (1 << MEDIUM_PAGE_SIZE_SHIFT) +#define MEDIUM_PAGE_MASK (~((uintptr_t)MEDIUM_PAGE_SIZE - 1)) +#define LARGE_PAGE_SIZE_SHIFT 26 +#define LARGE_PAGE_SIZE (1 << LARGE_PAGE_SIZE_SHIFT) +#define LARGE_PAGE_MASK (~((uintptr_t)LARGE_PAGE_SIZE - 1)) + +#define SPAN_SIZE (256 * 1024 * 1024) +#define SPAN_MASK (~((uintptr_t)(SPAN_SIZE - 1))) + +//////////// +/// +/// Utility macros +/// +////// + +#if ENABLE_ASSERTS +#undef NDEBUG +#if defined(_MSC_VER) && !defined(_DEBUG) +#define _DEBUG +#endif +#include <assert.h> +#define RPMALLOC_TOSTRING_M(x) #x +#define RPMALLOC_TOSTRING(x) RPMALLOC_TOSTRING_M(x) +#define rpmalloc_assert(truth, message) \ + do { \ + if (!(truth)) { \ + assert((truth) && message); \ + } \ + } while (0) +#else +#define rpmalloc_assert(truth, message) \ + do { \ + } while (0) +#endif + +#if __has_builtin(__builtin_assume) +#define rpmalloc_assume(cond) __builtin_assume(cond) +#elif defined(__GNUC__) +#define rpmalloc_assume(cond) \ + do { \ + if (!__builtin_expect(cond, 0)) \ + __builtin_unreachable(); \ + } while (0) +#elif defined(_MSC_VER) +#define rpmalloc_assume(cond) __assume(cond) +#else +#define rpmalloc_assume(cond) 0 +#endif + +//////////// +/// +/// Statistics +/// +////// + +#if ENABLE_STATISTICS + +typedef struct rpmalloc_statistics_t { + atomic_size_t page_mapped; + atomic_size_t page_mapped_peak; + atomic_size_t page_commit; + atomic_size_t page_decommit; + atomic_size_t page_active; + atomic_size_t page_active_peak; + atomic_size_t heap_count; +} rpmalloc_statistics_t; + +static rpmalloc_statistics_t global_statistics; + +#else + +#endif + +//////////// +/// +/// Low level abstractions +/// +////// + +static inline size_t +rpmalloc_clz(uintptr_t x) { +#if ARCH_64BIT +#if defined(_MSC_VER) && !defined(__clang__) + return (size_t)_lzcnt_u64(x); +#else + return (size_t)__builtin_clzll(x); +#endif +#else +#if defined(_MSC_VER) && !defined(__clang__) + return (size_t)_lzcnt_u32(x); +#else + return (size_t)__builtin_clzl(x); +#endif +#endif +} + +static inline void +wait_spin(void) { +#if defined(_MSC_VER) +#if defined(_M_ARM64) + __yield(); +#else + _mm_pause(); +#endif +#elif defined(__x86_64__) || defined(__i386__) + __asm__ volatile("pause" ::: "memory"); +#elif defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH >= 7) + __asm__ volatile("yield" ::: "memory"); +#elif defined(__powerpc__) || defined(__powerpc64__) + // No idea if ever been compiled in such archs but ... as precaution + __asm__ volatile("or 27,27,27"); +#elif defined(__sparc__) + __asm__ volatile("rd %ccr, %g0 \n\trd %ccr, %g0 \n\trd %ccr, %g0"); +#else + struct timespec ts = {0}; + nanosleep(&ts, 0); +#endif +} + +#if defined(__GNUC__) || defined(__clang__) + +#define EXPECTED(x) __builtin_expect((x), 1) +#define UNEXPECTED(x) __builtin_expect((x), 0) + +#else + +#define EXPECTED(x) x +#define UNEXPECTED(x) x + +#endif +#if defined(__GNUC__) || defined(__clang__) + +#if __has_builtin(__builtin_memcpy_inline) +#define memcpy_const(x, y, s) __builtin_memcpy_inline(x, y, s) +#else +#define memcpy_const(x, y, s) \ + do { \ + _Static_assert(__builtin_choose_expr(__builtin_constant_p(s), 1, 0), "len must be a constant integer"); \ + memcpy(x, y, s); \ + } while (0) +#endif + +#if __has_builtin(__builtin_memset_inline) +#define memset_const(x, y, s) __builtin_memset_inline(x, y, s) +#else +#define memset_const(x, y, s) \ + do { \ + _Static_assert(__builtin_choose_expr(__builtin_constant_p(s), 1, 0), "len must be a constant integer"); \ + memset(x, y, s); \ + } while (0) +#endif +#else +#define memcpy_const(x, y, s) memcpy(x, y, s) +#define memset_const(x, y, s) memset(x, y, s) +#endif + +//////////// +/// +/// Data types +/// +////// + +//! A memory heap, per thread +typedef struct heap_t heap_t; +//! Span of memory pages +typedef struct span_t span_t; +//! Memory page +typedef struct page_t page_t; +//! Memory block +typedef struct block_t block_t; +//! Size class for a memory block +typedef struct size_class_t size_class_t; + +//! Memory page type +typedef enum page_type_t { + PAGE_SMALL, // 64KiB + PAGE_MEDIUM, // 4MiB + PAGE_LARGE, // 64MiB + PAGE_HUGE +} page_type_t; + +//! Block size class +struct size_class_t { + //! Size of blocks in this class + uint32_t block_size; + //! Number of blocks in each chunk + uint32_t block_count; +}; + +//! A memory block +struct block_t { + //! Next block in list + block_t* next; +}; + +//! A page contains blocks of a given size +struct page_t { + //! Size class of blocks + uint32_t size_class; + //! Block size + uint32_t block_size; + //! Block count + uint32_t block_count; + //! Block initialized count + uint32_t block_initialized; + //! Block used count + uint32_t block_used; + //! Page type + page_type_t page_type; + //! Flag set if part of heap full list + uint32_t is_full : 1; + //! Flag set if part of heap free list + uint32_t is_free : 1; + //! Flag set if blocks are zero initialied + uint32_t is_zero : 1; + //! Flag set if memory pages have been decommitted + uint32_t is_decommitted : 1; + //! Flag set if containing aligned blocks + uint32_t has_aligned_block : 1; + //! Fast combination flag for either huge, fully allocated or has aligned blocks + uint32_t generic_free : 1; + //! Local free list count + uint32_t local_free_count; + //! Local free list + block_t* local_free; + //! Owning heap + heap_t* heap; + //! Next page in list + page_t* next; + //! Previous page in list + page_t* prev; + //! Multithreaded free list, block index is in low 32 bit, list count is high 32 bit + atomic_ullong thread_free; +}; + +//! A span contains pages of a given type +struct span_t { + //! Page header + page_t page; + //! Owning heap + heap_t* heap; + //! Page address mask + uintptr_t page_address_mask; + //! Number of pages initialized + uint32_t page_initialized; + //! Number of pages in use + uint32_t page_count; + //! Number of bytes per page + uint32_t page_size; + //! Page type + page_type_t page_type; + //! Offset to start of mapped memory region + uint32_t offset; + //! Mapped size + uint64_t mapped_size; + //! Next span in list + span_t* next; +}; + +// Control structure for a heap, either a thread heap or a first class heap if enabled +struct heap_t { + //! Owning thread ID + uintptr_t owner_thread; + //! Heap local free list for small size classes + block_t* local_free[SIZE_CLASS_COUNT]; + //! Available non-full pages for each size class + page_t* page_available[SIZE_CLASS_COUNT]; + //! Free pages for each page type + page_t* page_free[3]; + //! Free but still committed page count for each page tyoe + uint32_t page_free_commit_count[3]; + //! Multithreaded free list + atomic_uintptr_t thread_free[3]; + //! Available partially initialized spans for each page type + span_t* span_partial[3]; + //! Spans in full use for each page type + span_t* span_used[4]; + //! Next heap in queue + heap_t* next; + //! Previous heap in queue + heap_t* prev; + //! Heap ID + uint32_t id; + //! Finalization state flag + uint32_t finalize; + //! Memory map region offset + uint32_t offset; + //! Memory map size + size_t mapped_size; +}; + +_Static_assert(sizeof(page_t) <= PAGE_HEADER_SIZE, "Invalid page header size"); +_Static_assert(sizeof(span_t) <= SPAN_HEADER_SIZE, "Invalid span header size"); +_Static_assert(sizeof(heap_t) <= 4096, "Invalid heap size"); + +//////////// +/// +/// Global data +/// +////// + +//! Fallback heap +static RPMALLOC_CACHE_ALIGNED heap_t global_heap_fallback; +//! Default heap +static heap_t* global_heap_default = &global_heap_fallback; +//! Available heaps +static heap_t* global_heap_queue; +//! In use heaps +static heap_t* global_heap_used; +//! Lock for heap queue +static atomic_uintptr_t global_heap_lock; +//! Heap ID counter +static atomic_uint global_heap_id = 1; +//! Initialized flag +static int global_rpmalloc_initialized; +//! Memory interface +static rpmalloc_interface_t* global_memory_interface; +//! Default memory interface +static rpmalloc_interface_t global_memory_interface_default; +//! Current configuration +static rpmalloc_config_t global_config = {0}; +//! Main thread ID +static uintptr_t global_main_thread_id; + +//! Size classes +#define SCLASS(n) \ + { (n * SMALL_GRANULARITY), (SMALL_PAGE_SIZE - PAGE_HEADER_SIZE) / (n * SMALL_GRANULARITY) } +#define MCLASS(n) \ + { (n * SMALL_GRANULARITY), (MEDIUM_PAGE_SIZE - PAGE_HEADER_SIZE) / (n * SMALL_GRANULARITY) } +#define LCLASS(n) \ + { (n * SMALL_GRANULARITY), (LARGE_PAGE_SIZE - PAGE_HEADER_SIZE) / (n * SMALL_GRANULARITY) } +static const size_class_t global_size_class[SIZE_CLASS_COUNT] = { + SCLASS(1), SCLASS(1), SCLASS(2), SCLASS(3), SCLASS(4), SCLASS(5), SCLASS(6), + SCLASS(7), SCLASS(8), SCLASS(9), SCLASS(10), SCLASS(11), SCLASS(12), SCLASS(13), + SCLASS(14), SCLASS(15), SCLASS(16), SCLASS(17), SCLASS(18), SCLASS(19), SCLASS(20), + SCLASS(21), SCLASS(22), SCLASS(23), SCLASS(24), SCLASS(25), SCLASS(26), SCLASS(27), + SCLASS(28), SCLASS(29), SCLASS(30), SCLASS(31), SCLASS(32), SCLASS(33), SCLASS(34), + SCLASS(35), SCLASS(36), SCLASS(37), SCLASS(38), SCLASS(39), SCLASS(40), SCLASS(41), + SCLASS(42), SCLASS(43), SCLASS(44), SCLASS(45), SCLASS(46), SCLASS(47), SCLASS(48), + SCLASS(49), SCLASS(50), SCLASS(51), SCLASS(52), SCLASS(53), SCLASS(54), SCLASS(55), + SCLASS(56), SCLASS(57), SCLASS(58), SCLASS(59), SCLASS(60), SCLASS(61), SCLASS(62), + SCLASS(63), SCLASS(64), SCLASS(80), SCLASS(96), SCLASS(112), SCLASS(128), SCLASS(160), + SCLASS(192), SCLASS(224), SCLASS(256), MCLASS(320), MCLASS(384), MCLASS(448), MCLASS(512), + MCLASS(640), MCLASS(768), MCLASS(896), MCLASS(1024), MCLASS(1280), MCLASS(1536), MCLASS(1792), + MCLASS(2048), MCLASS(2560), MCLASS(3072), MCLASS(3584), MCLASS(4096), MCLASS(5120), MCLASS(6144), + MCLASS(7168), MCLASS(8192), MCLASS(10240), MCLASS(12288), MCLASS(14336), MCLASS(16384), LCLASS(20480), + LCLASS(24576), LCLASS(28672), LCLASS(32768), LCLASS(40960), LCLASS(49152), LCLASS(57344), LCLASS(65536), + LCLASS(81920), LCLASS(98304), LCLASS(114688), LCLASS(131072), LCLASS(163840), LCLASS(196608), LCLASS(229376), + LCLASS(262144), LCLASS(327680), LCLASS(393216), LCLASS(458752), LCLASS(524288)}; + +//! Threshold number of pages for when free pages are decommitted +static uint32_t global_page_free_overflow[4] = {16, 8, 2, 0}; + +//! Number of pages to retain when free page threshold overflows +static uint32_t global_page_free_retain[4] = {4, 2, 1, 0}; + +//! OS huge page support +static int os_huge_pages; +//! OS memory map granularity +static size_t os_map_granularity; +//! OS memory page size +static size_t os_page_size; + +//////////// +/// +/// Thread local heap and ID +/// +////// + +//! Current thread heap +#if defined(_MSC_VER) && !defined(__clang__) +#define TLS_MODEL +#define _Thread_local __declspec(thread) +#else +// #define TLS_MODEL __attribute__((tls_model("initial-exec"))) +#define TLS_MODEL +#endif +static _Thread_local heap_t* global_thread_heap TLS_MODEL = &global_heap_fallback; + +static heap_t* +heap_allocate(int first_class); + +static void +heap_page_free_decommit(heap_t* heap, uint32_t page_type, uint32_t page_retain_count); + +//! Fast thread ID +static inline uintptr_t +get_thread_id(void) { +#if defined(_WIN32) + return (uintptr_t)((void*)NtCurrentTeb()); +#else + void* thp = __builtin_thread_pointer(); + return (uintptr_t)thp; +#endif + /* + #elif (defined(__GNUC__) || defined(__clang__)) && !defined(__CYGWIN__) + uintptr_t tid; + #if defined(__i386__) + __asm__("movl %%gs:0, %0" : "=r"(tid) : :); + #elif defined(__x86_64__) + #if defined(__MACH__) + __asm__("movq %%gs:0, %0" : "=r"(tid) : :); + #else + __asm__("movq %%fs:0, %0" : "=r"(tid) : :); + #endif + #elif defined(__arm__) + __asm__ volatile("mrc p15, 0, %0, c13, c0, 3" : "=r"(tid)); + #elif defined(__aarch64__) + #if defined(__MACH__) + // tpidr_el0 likely unused, always return 0 on iOS + __asm__ volatile("mrs %0, tpidrro_el0" : "=r"(tid)); + #else + __asm__ volatile("mrs %0, tpidr_el0" : "=r"(tid)); + #endif + #else + #error This platform needs implementation of get_thread_id() + #endif + return tid; + #else + #error This platform needs implementation of get_thread_id() + #endif + */ +} + +//! Set the current thread heap +static void +set_thread_heap(heap_t* heap) { + global_thread_heap = heap; + if (heap && (heap->id != 0)) { + rpmalloc_assert(heap->id != 0, "Default heap being used"); + heap->owner_thread = get_thread_id(); + } +#if PLATFORM_WINDOWS + FlsSetValue(fls_key, heap); +#else + pthread_setspecific(pthread_key, heap); +#endif +} + +static heap_t* +get_thread_heap_allocate(void) { + heap_t* heap = heap_allocate(0); + set_thread_heap(heap); + return heap; +} + +//! Get the current thread heap +static inline heap_t* +get_thread_heap(void) { + return global_thread_heap; +} + +//! Get the size class from given size in bytes for tiny blocks (below 16 times the minimum granularity) +static inline uint32_t +get_size_class_tiny(size_t size) { + return (((uint32_t)size + (SMALL_GRANULARITY - 1)) / SMALL_GRANULARITY); +} + +//! Get the size class from given size in bytes +static inline uint32_t +get_size_class(size_t size) { + uintptr_t minblock_count = (size + (SMALL_GRANULARITY - 1)) / SMALL_GRANULARITY; + // For sizes up to 64 times the minimum granularity (i.e 1024 bytes) the size class is equal to number of such + // blocks + if (size <= (SMALL_GRANULARITY * 64)) { + rpmalloc_assert(global_size_class[minblock_count].block_size >= size, "Size class misconfiguration"); + return (uint32_t)(minblock_count ? minblock_count : 1); + } + --minblock_count; + // Calculate position of most significant bit, since minblock_count now guaranteed to be > 64 this position is + // guaranteed to be >= 6 +#if ARCH_64BIT + const uint32_t most_significant_bit = (uint32_t)(63 - (int)rpmalloc_clz(minblock_count)); +#else + const uint32_t most_significant_bit = (uint32_t)(31 - (int)rpmalloc_clz(minblock_count)); +#endif + // Class sizes are of the bit format [..]000xxx000[..] where we already have the position of the most significant + // bit, now calculate the subclass from the remaining two bits + const uint32_t subclass_bits = (minblock_count >> (most_significant_bit - 2)) & 0x03; + const uint32_t class_idx = (uint32_t)((most_significant_bit << 2) + subclass_bits) + 41; + rpmalloc_assert((class_idx >= SIZE_CLASS_COUNT) || (global_size_class[class_idx].block_size >= size), + "Size class misconfiguration"); + rpmalloc_assert((class_idx >= SIZE_CLASS_COUNT) || (global_size_class[class_idx - 1].block_size < size), + "Size class misconfiguration"); + return class_idx; +} + +static inline page_type_t +get_page_type(uint32_t size_class) { + if (size_class < SMALL_SIZE_CLASS_COUNT) + return PAGE_SMALL; + else if (size_class < (SMALL_SIZE_CLASS_COUNT + MEDIUM_SIZE_CLASS_COUNT)) + return PAGE_MEDIUM; + else if (size_class < SIZE_CLASS_COUNT) + return PAGE_LARGE; + return PAGE_HUGE; +} + +static inline size_t +get_page_aligned_size(size_t size) { + size_t unalign = size % global_config.page_size; + if (unalign) + size += global_config.page_size - unalign; + return size; +} + +//////////// +/// +/// OS entry points +/// +////// + +static void +os_set_page_name(void* address, size_t size) { +#if defined(__linux__) || defined(__ANDROID__) + const char* name = os_huge_pages ? global_config.huge_page_name : global_config.page_name; + if ((address == MAP_FAILED) || !name) + return; + // If the kernel does not support CONFIG_ANON_VMA_NAME or if the call fails + // (e.g. invalid name) it is a no-op basically. + (void)prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)address, size, (uintptr_t)name); +#else + (void)sizeof(size); + (void)sizeof(address); +#endif +} + +static void* +os_mmap(size_t size, size_t alignment, size_t* offset, size_t* mapped_size) { + size_t map_size = size + alignment; +#if PLATFORM_WINDOWS + // Ok to MEM_COMMIT - according to MSDN, "actual physical pages are not allocated unless/until the virtual addresses + // are actually accessed". But if we enable decommit it's better to not immediately commit and instead commit per + // page to avoid saturating the OS commit limit +#if ENABLE_DECOMMIT + DWORD do_commit = 0; +#else + DWORD do_commit = MEM_COMMIT; +#endif + void* ptr = + VirtualAlloc(0, map_size, (os_huge_pages ? MEM_LARGE_PAGES : 0) | MEM_RESERVE | do_commit, PAGE_READWRITE); +#else + int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_UNINITIALIZED; +#if defined(__APPLE__) && !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR + int fd = (int)VM_MAKE_TAG(240U); + if (os_huge_pages) + fd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; + void* ptr = mmap(0, map_size, PROT_READ | PROT_WRITE, flags, fd, 0); +#elif defined(MAP_HUGETLB) + void* ptr = mmap(0, map_size, PROT_READ | PROT_WRITE | PROT_MAX(PROT_READ | PROT_WRITE), + (os_huge_pages ? MAP_HUGETLB : 0) | flags, -1, 0); +#if defined(MADV_HUGEPAGE) + // In some configurations, huge pages allocations might fail thus + // we fallback to normal allocations and promote the region as transparent huge page + if ((ptr == MAP_FAILED || !ptr) && os_huge_pages) { + ptr = mmap(0, map_size, PROT_READ | PROT_WRITE, flags, -1, 0); + if (ptr && ptr != MAP_FAILED) { + int prm = madvise(ptr, size, MADV_HUGEPAGE); + (void)prm; + rpmalloc_assert((prm == 0), "Failed to promote the page to transparent huge page"); + } + } +#endif + os_set_page_name(ptr, map_size); +#elif defined(MAP_ALIGNED) + const size_t align = (sizeof(size_t) * 8) - (size_t)(__builtin_clzl(size - 1)); + void* ptr = mmap(0, map_size, PROT_READ | PROT_WRITE, (os_huge_pages ? MAP_ALIGNED(align) : 0) | flags, -1, 0); +#elif defined(MAP_ALIGN) + caddr_t base = (os_huge_pages ? (caddr_t)(4 << 20) : 0); + void* ptr = mmap(base, map_size, PROT_READ | PROT_WRITE, (os_huge_pages ? MAP_ALIGN : 0) | flags, -1, 0); +#else + void* ptr = mmap(0, map_size, PROT_READ | PROT_WRITE, flags, -1, 0); +#endif + if (ptr == MAP_FAILED) + ptr = 0; +#endif + if (!ptr) { + if (global_memory_interface->map_fail_callback) { + if (global_memory_interface->map_fail_callback(map_size)) + return os_mmap(size, alignment, offset, mapped_size); + } else { + rpmalloc_assert(ptr != 0, "Failed to map more virtual memory"); + } + return 0; + } + if (alignment) { + size_t padding = ((uintptr_t)ptr & (uintptr_t)(alignment - 1)); + if (padding) + padding = alignment - padding; + rpmalloc_assert(padding <= alignment, "Internal failure in padding"); + rpmalloc_assert(!(padding % 8), "Internal failure in padding"); + ptr = pointer_offset(ptr, padding); + *offset = padding; + } + *mapped_size = map_size; +#if ENABLE_STATISTICS + size_t page_count = map_size / global_config.page_size; + size_t page_mapped_current = + atomic_fetch_add_explicit(&global_statistics.page_mapped, page_count, memory_order_relaxed) + page_count; + size_t page_mapped_peak = atomic_load_explicit(&global_statistics.page_mapped_peak, memory_order_relaxed); + while (page_mapped_current > page_mapped_peak) { + if (atomic_compare_exchange_weak_explicit(&global_statistics.page_mapped_peak, &page_mapped_peak, + page_mapped_current, memory_order_relaxed, memory_order_relaxed)) + break; + } +#if ENABLE_DECOMMIT + size_t page_active_current = + atomic_fetch_add_explicit(&global_statistics.page_active, page_count, memory_order_relaxed) + page_count; + size_t page_active_peak = atomic_load_explicit(&global_statistics.page_active_peak, memory_order_relaxed); + while (page_active_current > page_active_peak) { + if (atomic_compare_exchange_weak_explicit(&global_statistics.page_active_peak, &page_active_peak, + page_active_current, memory_order_relaxed, memory_order_relaxed)) + break; + } +#endif +#endif + return ptr; +} + +static void +os_mcommit(void* address, size_t size) { +#if ENABLE_DECOMMIT + if (global_config.disable_decommit) + return; +#if PLATFORM_WINDOWS + if (!VirtualAlloc(address, size, MEM_COMMIT, PAGE_READWRITE)) { + rpmalloc_assert(0, "Failed to commit virtual memory block"); + } +#else + /* + if (mprotect(address, size, PROT_READ | PROT_WRITE)) { + rpmalloc_assert(0, "Failed to commit virtual memory block"); + } + */ +#endif +#if ENABLE_STATISTICS + size_t page_count = size / global_config.page_size; + atomic_fetch_add_explicit(&global_statistics.page_commit, page_count, memory_order_relaxed); + size_t page_active_current = + atomic_fetch_add_explicit(&global_statistics.page_active, page_count, memory_order_relaxed) + page_count; + size_t page_active_peak = atomic_load_explicit(&global_statistics.page_active_peak, memory_order_relaxed); + while (page_active_current > page_active_peak) { + if (atomic_compare_exchange_weak_explicit(&global_statistics.page_active_peak, &page_active_peak, + page_active_current, memory_order_relaxed, memory_order_relaxed)) + break; + } +#endif +#endif + (void)sizeof(address); + (void)sizeof(size); +} + +static void +os_mdecommit(void* address, size_t size) { +#if ENABLE_DECOMMIT + if (global_config.disable_decommit) + return; +#if PLATFORM_WINDOWS + if (!VirtualFree(address, size, MEM_DECOMMIT)) { + rpmalloc_assert(0, "Failed to decommit virtual memory block"); + } +#else + /* + if (mprotect(address, size, PROT_NONE)) { + rpmalloc_assert(0, "Failed to decommit virtual memory block"); + } + */ +#if defined(MADV_DONTNEED) + if (madvise(address, size, MADV_DONTNEED)) { +#elif defined(MADV_FREE_REUSABLE) + int ret; + while ((ret = madvise(address, size, MADV_FREE_REUSABLE)) == -1 && (errno == EAGAIN)) + errno = 0; + if ((ret == -1) && (errno != 0)) { +#elif defined(MADV_PAGEOUT) + if (madvise(address, size, MADV_PAGEOUT)) { +#elif defined(MADV_FREE) + if (madvise(address, size, MADV_FREE)) { +#else + if (posix_madvise(address, size, POSIX_MADV_DONTNEED)) { +#endif + rpmalloc_assert(0, "Failed to decommit virtual memory block"); + } +#endif +#if ENABLE_STATISTICS + size_t page_count = size / global_config.page_size; + atomic_fetch_add_explicit(&global_statistics.page_decommit, page_count, memory_order_relaxed); + size_t page_active_current = + atomic_fetch_sub_explicit(&global_statistics.page_active, page_count, memory_order_relaxed); + rpmalloc_assert(page_active_current >= page_count, "Decommit counter out of sync"); + (void)sizeof(page_active_current); +#endif +#else + (void)sizeof(address); + (void)sizeof(size); +#endif +} + +static void +os_munmap(void* address, size_t offset, size_t mapped_size) { + (void)sizeof(mapped_size); + address = pointer_offset(address, -(int32_t)offset); +#if ENABLE_UNMAP +#if PLATFORM_WINDOWS + if (!VirtualFree(address, 0, MEM_RELEASE)) { + rpmalloc_assert(0, "Failed to unmap virtual memory block"); + } +#else + if (munmap(address, mapped_size)) + rpmalloc_assert(0, "Failed to unmap virtual memory block"); +#endif +#if ENABLE_STATISTICS + size_t page_count = mapped_size / global_config.page_size; + atomic_fetch_sub_explicit(&global_statistics.page_mapped, page_count, memory_order_relaxed); + atomic_fetch_sub_explicit(&global_statistics.page_active, page_count, memory_order_relaxed); +#endif +#endif +} + +//////////// +/// +/// Page interface +/// +////// + +static inline span_t* +page_get_span(page_t* page) { + return (span_t*)((uintptr_t)page & SPAN_MASK); +} + +static inline size_t +page_get_size(page_t* page) { + if (page->page_type == PAGE_SMALL) + return SMALL_PAGE_SIZE; + else if (page->page_type == PAGE_MEDIUM) + return MEDIUM_PAGE_SIZE; + else if (page->page_type == PAGE_LARGE) + return LARGE_PAGE_SIZE; + else + return page_get_span(page)->page_size; +} + +static inline int +page_is_thread_heap(page_t* page) { +#if RPMALLOC_FIRST_CLASS_HEAPS + return (!page->heap->owner_thread || (page->heap->owner_thread == get_thread_id())); +#else + return (page->heap->owner_thread == get_thread_id()); +#endif +} + +static inline block_t* +page_block_start(page_t* page) { + return pointer_offset(page, PAGE_HEADER_SIZE); +} + +static inline block_t* +page_block(page_t* page, uint32_t block_index) { + return pointer_offset(page, PAGE_HEADER_SIZE + (page->block_size * block_index)); +} + +static inline uint32_t +page_block_index(page_t* page, block_t* block) { + block_t* block_first = page_block_start(page); + return (uint32_t)pointer_diff(block, block_first) / page->block_size; +} + +static inline uint32_t +page_block_from_thread_free_list(page_t* page, uint64_t token, block_t** block) { + uint32_t block_index = (uint32_t)(token & 0xFFFFFFFFULL); + uint32_t list_count = (uint32_t)((token >> 32ULL) & 0xFFFFFFFFULL); + *block = list_count ? page_block(page, block_index) : 0; + return list_count; +} + +static inline uint64_t +page_block_to_thread_free_list(page_t* page, uint32_t block_index, uint32_t list_count) { + (void)sizeof(page); + return ((uint64_t)list_count << 32ULL) | (uint64_t)block_index; +} + +static inline block_t* +page_block_realign(page_t* page, block_t* block) { + void* blocks_start = page_block_start(page); + uint32_t block_offset = (uint32_t)pointer_diff(block, blocks_start); + return pointer_offset(block, -(int32_t)(block_offset % page->block_size)); +} + +static block_t* +page_get_local_free_block(page_t* page) { + block_t* block = page->local_free; + page->local_free = block->next; + --page->local_free_count; + ++page->block_used; + return block; +} + +static inline void +page_decommit_memory_pages(page_t* page) { + if (page->is_decommitted) + return; + void* extra_page = pointer_offset(page, global_config.page_size); + size_t extra_page_size = page_get_size(page) - global_config.page_size; + global_memory_interface->memory_decommit(extra_page, extra_page_size); + page->is_decommitted = 1; +} + +static inline void +page_commit_memory_pages(page_t* page) { + if (!page->is_decommitted) + return; + void* extra_page = pointer_offset(page, global_config.page_size); + size_t extra_page_size = page_get_size(page) - global_config.page_size; + global_memory_interface->memory_commit(extra_page, extra_page_size); + page->is_decommitted = 0; +#if ENABLE_DECOMMIT +#if !defined(__APPLE__) + // When page is recommitted, the blocks in the second memory page and forward + // will be zeroed out by OS - take advantage in zalloc/calloc calls and make sure + // blocks in first page is zeroed out + void* first_page = pointer_offset(page, PAGE_HEADER_SIZE); + memset(first_page, 0, global_config.page_size - PAGE_HEADER_SIZE); + page->is_zero = 1; +#endif +#endif +} + +static void +page_available_to_free(page_t* page) { + rpmalloc_assert(page->is_full == 0, "Page full flag internal failure"); + rpmalloc_assert(page->is_decommitted == 0, "Page decommitted flag internal failure"); + heap_t* heap = page->heap; + if (heap->page_available[page->size_class] == page) { + heap->page_available[page->size_class] = page->next; + } else { + page->prev->next = page->next; + if (page->next) + page->next->prev = page->prev; + } + page->is_free = 1; + page->is_zero = 0; + page->next = heap->page_free[page->page_type]; + heap->page_free[page->page_type] = page; + if (++heap->page_free_commit_count[page->page_type] >= global_page_free_overflow[page->page_type]) + heap_page_free_decommit(heap, page->page_type, global_page_free_retain[page->page_type]); +} + +static void +page_full_to_available(page_t* page) { + rpmalloc_assert(page->is_full == 1, "Page full flag internal failure"); + rpmalloc_assert(page->is_decommitted == 0, "Page decommitted flag internal failure"); + heap_t* heap = page->heap; + page->next = heap->page_available[page->size_class]; + if (page->next) + page->next->prev = page; + heap->page_available[page->size_class] = page; + page->is_full = 0; + if (page->has_aligned_block == 0) + page->generic_free = 0; +} + +static void +page_full_to_free_on_new_heap(page_t* page, heap_t* heap) { + rpmalloc_assert(heap->id, "Page full to free on default heap"); + rpmalloc_assert(page->is_full == 1, "Page full flag internal failure"); + rpmalloc_assert(page->is_decommitted == 0, "Page decommitted flag internal failure"); + page->is_full = 0; + page->is_free = 1; + page->heap = heap; + atomic_store_explicit(&page->thread_free, 0, memory_order_relaxed); + page->next = heap->page_free[page->page_type]; + heap->page_free[page->page_type] = page; + if (++heap->page_free_commit_count[page->page_type] >= global_page_free_overflow[page->page_type]) + heap_page_free_decommit(heap, page->page_type, global_page_free_retain[page->page_type]); +} + +static void +page_available_to_full(page_t* page) { + heap_t* heap = page->heap; + if (heap->page_available[page->size_class] == page) { + heap->page_available[page->size_class] = page->next; + } else { + page->prev->next = page->next; + if (page->next) + page->next->prev = page->prev; + } + page->is_full = 1; + page->is_zero = 0; + page->generic_free = 1; +} + +static inline void +page_put_local_free_block(page_t* page, block_t* block) { + block->next = page->local_free; + page->local_free = block; + ++page->local_free_count; + if (UNEXPECTED(--page->block_used == 0)) { + page_available_to_free(page); + } else if (UNEXPECTED(page->is_full != 0)) { + page_full_to_available(page); + } +} + +static NOINLINE void +page_adopt_thread_free_block_list(page_t* page) { + if (page->local_free) + return; + unsigned long long thread_free = atomic_load_explicit(&page->thread_free, memory_order_relaxed); + if (thread_free != 0) { + // Other threads can only replace with another valid list head, this will never change to 0 in other threads + while (!atomic_compare_exchange_weak_explicit(&page->thread_free, &thread_free, 0, memory_order_relaxed, + memory_order_relaxed)) + wait_spin(); + page->local_free_count = page_block_from_thread_free_list(page, thread_free, &page->local_free); + rpmalloc_assert(page->local_free_count <= page->block_used, "Page thread free list count internal failure"); + page->block_used -= page->local_free_count; + } +} + +static NOINLINE void +page_put_thread_free_block(page_t* page, block_t* block) { + atomic_thread_fence(memory_order_acquire); + if (page->is_full) { + // Page is full, put the block in the heap thread free list instead, otherwise + // the heap will not pick up the free blocks until a thread local free happens + heap_t* heap = page->heap; + uintptr_t prev_head = atomic_load_explicit(&heap->thread_free[page->page_type], memory_order_relaxed); + block->next = (void*)prev_head; + while (!atomic_compare_exchange_weak_explicit(&heap->thread_free[page->page_type], &prev_head, (uintptr_t)block, + memory_order_relaxed, memory_order_relaxed)) { + block->next = (void*)prev_head; + wait_spin(); + } + } else { + unsigned long long prev_thread_free = atomic_load_explicit(&page->thread_free, memory_order_relaxed); + uint32_t block_index = page_block_index(page, block); + rpmalloc_assert(page_block(page, block_index) == block, "Block pointer is not aligned to start of block"); + uint32_t list_size = page_block_from_thread_free_list(page, prev_thread_free, &block->next) + 1; + uint64_t thread_free = page_block_to_thread_free_list(page, block_index, list_size); + while (!atomic_compare_exchange_weak_explicit(&page->thread_free, &prev_thread_free, thread_free, + memory_order_relaxed, memory_order_relaxed)) { + list_size = page_block_from_thread_free_list(page, prev_thread_free, &block->next) + 1; + thread_free = page_block_to_thread_free_list(page, block_index, list_size); + wait_spin(); + } + } +} + +static void +page_push_local_free_to_heap(page_t* page) { + // Push the page free list as the fast track list of free blocks for heap + page->heap->local_free[page->size_class] = page->local_free; + page->block_used += page->local_free_count; + page->local_free = 0; + page->local_free_count = 0; +} + +static NOINLINE void* +page_initialize_blocks(page_t* page) { + rpmalloc_assert(page->block_initialized < page->block_count, "Block initialization internal failure"); + block_t* block = page_block(page, page->block_initialized); + ++page->block_initialized; + ++page->block_used; + + if ((page->page_type == PAGE_SMALL) && (page->block_size < (global_config.page_size >> 1))) { + // Link up until next memory page in free list + void* memory_page_start = (void*)((uintptr_t)block & ~(uintptr_t)(global_config.page_size - 1)); + void* memory_page_next = pointer_offset(memory_page_start, global_config.page_size); + block_t* free_block = pointer_offset(block, page->block_size); + block_t* first_block = free_block; + block_t* last_block = free_block; + uint32_t list_count = 0; + uint32_t max_list_count = page->block_count - page->block_initialized; + while (((void*)free_block < memory_page_next) && (list_count < max_list_count)) { + last_block = free_block; + free_block->next = pointer_offset(free_block, page->block_size); + free_block = free_block->next; + ++list_count; + } + if (list_count) { + last_block->next = 0; + page->local_free = first_block; + page->block_initialized += list_count; + page->local_free_count = list_count; + } + } + + return block; +} + +static inline RPMALLOC_ALLOCATOR void* +page_allocate_block(page_t* page, unsigned int zero) { + unsigned int is_zero = 0; + block_t* block = (page->local_free != 0) ? page_get_local_free_block(page) : 0; + if (UNEXPECTED(block == 0)) { + if (atomic_load_explicit(&page->thread_free, memory_order_relaxed) != 0) { + page_adopt_thread_free_block_list(page); + block = (page->local_free != 0) ? page_get_local_free_block(page) : 0; + } + if (block == 0) { + block = page_initialize_blocks(page); + is_zero = page->is_zero; + } + } + + rpmalloc_assert(page->block_used <= page->block_count, "Page block use counter out of sync"); + if (page->local_free && !page->heap->local_free[page->size_class]) + page_push_local_free_to_heap(page); + + // The page might be full when free list has been pushed to heap local free list, + // check if there is a thread free list to adopt + if (page->block_used == page->block_count) + page_adopt_thread_free_block_list(page); + + if (page->block_used == page->block_count) { + // Page is now fully utilized + rpmalloc_assert(!page->is_full, "Page block use counter out of sync with full flag"); + page_available_to_full(page); + } + + if (zero) { + if (!is_zero) + memset(block, 0, page->block_size); + else + *(uintptr_t*)block = 0; + } + + return block; +} + +//////////// +/// +/// Span interface +/// +////// + +static inline int +span_is_thread_heap(span_t* span) { +#if RPMALLOC_FIRST_CLASS_HEAPS + return (!span->heap->owner_thread || (span->heap->owner_thread == get_thread_id())); +#else + return (span->heap->owner_thread == get_thread_id()); +#endif +} + +static inline page_t* +span_get_page_from_block(span_t* span, void* block) { + return (page_t*)((uintptr_t)block & span->page_address_mask); +} + +//! Find or allocate a page from the given span +static inline page_t* +span_allocate_page(span_t* span) { + // Allocate path, initialize a new chunk of memory for a page in the given span + rpmalloc_assert(span->page_initialized < span->page_count, "Page initialization internal failure"); + heap_t* heap = span->heap; + page_t* page = pointer_offset(span, span->page_size * span->page_initialized); + +#if ENABLE_DECOMMIT + // The first page is always committed on initial span map of memory + if (span->page_initialized) + global_memory_interface->memory_commit(page, span->page_size); +#endif + ++span->page_initialized; + + page->page_type = span->page_type; + page->is_zero = 1; + page->heap = heap; + rpmalloc_assert(page_is_thread_heap(page), "Page owner thread mismatch"); + + if (span->page_initialized == span->page_count) { + // Span fully utilized + rpmalloc_assert(span == heap->span_partial[span->page_type], "Span partial tracking out of sync"); + heap->span_partial[span->page_type] = 0; + + span->next = heap->span_used[span->page_type]; + heap->span_used[span->page_type] = span; + } + + return page; +} + +static NOINLINE void +span_deallocate_block(span_t* span, page_t* page, void* block) { + if (UNEXPECTED(page->page_type == PAGE_HUGE)) { + global_memory_interface->memory_unmap(span, span->offset, span->mapped_size); + return; + } + + if (page->has_aligned_block) { + // Realign pointer to block start + block = page_block_realign(page, block); + } + + int is_thread_local = page_is_thread_heap(page); + if (EXPECTED(is_thread_local != 0)) { + page_put_local_free_block(page, block); + } else { + // Multithreaded deallocation, push to deferred deallocation list. + page_put_thread_free_block(page, block); + } +} + +//////////// +/// +/// Block interface +/// +////// + +static inline span_t* +block_get_span(block_t* block) { + return (span_t*)((uintptr_t)block & SPAN_MASK); +} + +static inline void +block_deallocate(block_t* block) { + span_t* span = (span_t*)((uintptr_t)block & SPAN_MASK); + page_t* page = span_get_page_from_block(span, block); + const int is_thread_local = page_is_thread_heap(page); + + // Optimized path for thread local free with non-huge block in page + // that has no aligned blocks + if (EXPECTED(is_thread_local != 0)) { + if (EXPECTED(page->generic_free == 0)) { + // Page is not huge, not full and has no aligned block - fast path + block->next = page->local_free; + page->local_free = block; + ++page->local_free_count; + if (UNEXPECTED(--page->block_used == 0)) + page_available_to_free(page); + } else { + span_deallocate_block(span, page, block); + } + } else { + span_deallocate_block(span, page, block); + } +} + +static inline size_t +block_usable_size(block_t* block) { + span_t* span = (span_t*)((uintptr_t)block & SPAN_MASK); + if (EXPECTED(span->page_type <= PAGE_LARGE)) { + page_t* page = span_get_page_from_block(span, block); + void* blocks_start = pointer_offset(page, PAGE_HEADER_SIZE); + return page->block_size - ((size_t)pointer_diff(block, blocks_start) % page->block_size); + } else { + return ((size_t)span->page_size * (size_t)span->page_count) - (size_t)pointer_diff(block, span); + } +} + +//////////// +/// +/// Heap interface +/// +////// + +static inline void +heap_lock_acquire(void) { + uintptr_t lock = 0; + uintptr_t this_lock = get_thread_id(); + while (!atomic_compare_exchange_strong(&global_heap_lock, &lock, this_lock)) { + lock = 0; + wait_spin(); + } +} + +static inline void +heap_lock_release(void) { + rpmalloc_assert((uintptr_t)atomic_load_explicit(&global_heap_lock, memory_order_relaxed) == get_thread_id(), + "Bad heap lock"); + atomic_store_explicit(&global_heap_lock, 0, memory_order_release); +} + +static inline heap_t* +heap_initialize(void* block) { + heap_t* heap = block; + memset_const(heap, 0, sizeof(heap_t)); + heap->id = 1 + atomic_fetch_add_explicit(&global_heap_id, 1, memory_order_relaxed); + return heap; +} + +static heap_t* +heap_allocate_new(void) { + if (!global_config.page_size) + rpmalloc_initialize(0); + size_t heap_size = get_page_aligned_size(sizeof(heap_t)); + size_t offset = 0; + size_t mapped_size = 0; + block_t* block = global_memory_interface->memory_map(heap_size, 0, &offset, &mapped_size); +#if ENABLE_DECOMMIT + global_memory_interface->memory_commit(block, heap_size); +#endif + heap_t* heap = heap_initialize((void*)block); + heap->offset = (uint32_t)offset; + heap->mapped_size = mapped_size; +#if ENABLE_STATISTICS + atomic_fetch_add_explicit(&global_statistics.heap_count, 1, memory_order_relaxed); +#endif + return heap; +} + +static void +heap_unmap(heap_t* heap) { + global_memory_interface->memory_unmap(heap, heap->offset, heap->mapped_size); +} + +static heap_t* +heap_allocate(int first_class) { + heap_t* heap = 0; + if (!first_class) { + heap_lock_acquire(); + heap = global_heap_queue; + global_heap_queue = heap ? heap->next : 0; + heap_lock_release(); + } + if (!heap) + heap = heap_allocate_new(); + if (heap) { + uintptr_t current_thread_id = get_thread_id(); + heap_lock_acquire(); + heap->next = global_heap_used; + heap->prev = 0; + if (global_heap_used) + global_heap_used->prev = heap; + global_heap_used = heap; + heap_lock_release(); + heap->owner_thread = current_thread_id; + } + return heap; +} + +static inline void +heap_release(heap_t* heap) { + heap_lock_acquire(); + if (heap->prev) + heap->prev->next = heap->next; + if (heap->next) + heap->next->prev = heap->prev; + if (global_heap_used == heap) + global_heap_used = heap->next; + heap->next = global_heap_queue; + global_heap_queue = heap; + heap_lock_release(); +} + +static void +heap_page_free_decommit(heap_t* heap, uint32_t page_type, uint32_t page_retain_count) { + page_t* page = heap->page_free[page_type]; + while (page && page_retain_count) { + page = page->next; + --page_retain_count; + } + while (page && (page->is_decommitted == 0)) { + page_decommit_memory_pages(page); + --heap->page_free_commit_count[page_type]; + page = page->next; + } +} + +static inline void +heap_make_free_page_available(heap_t* heap, uint32_t size_class, page_t* page) { + page->size_class = size_class; + page->block_size = global_size_class[size_class].block_size; + page->block_count = global_size_class[size_class].block_count; + page->block_used = 0; + page->block_initialized = 0; + page->local_free = 0; + page->local_free_count = 0; + page->is_full = 0; + page->is_free = 0; + page->has_aligned_block = 0; + page->generic_free = 0; + page->heap = heap; + page_t* head = heap->page_available[size_class]; + page->next = head; + page->prev = 0; + atomic_store_explicit(&page->thread_free, 0, memory_order_relaxed); + if (head) + head->prev = page; + heap->page_available[size_class] = page; + if (page->is_decommitted) + page_commit_memory_pages(page); +} + +//! Find or allocate a span for the given page type with the given size class +static inline span_t* +heap_get_span(heap_t* heap, page_type_t page_type) { + // Fast path, available span for given page type + if (EXPECTED(heap->span_partial[page_type] != 0)) + return heap->span_partial[page_type]; + + // Fallback path, map more memory + size_t offset = 0; + size_t mapped_size = 0; + span_t* span = global_memory_interface->memory_map(SPAN_SIZE, SPAN_SIZE, &offset, &mapped_size); + if (EXPECTED(span != 0)) { + uint32_t page_count = 0; + uint32_t page_size = 0; + uintptr_t page_address_mask = 0; + if (page_type == PAGE_SMALL) { + page_count = SPAN_SIZE / SMALL_PAGE_SIZE; + page_size = SMALL_PAGE_SIZE; + page_address_mask = SMALL_PAGE_MASK; + } else if (page_type == PAGE_MEDIUM) { + page_count = SPAN_SIZE / MEDIUM_PAGE_SIZE; + page_size = MEDIUM_PAGE_SIZE; + page_address_mask = MEDIUM_PAGE_MASK; + } else { + page_count = SPAN_SIZE / LARGE_PAGE_SIZE; + page_size = LARGE_PAGE_SIZE; + page_address_mask = LARGE_PAGE_MASK; + } +#if ENABLE_DECOMMIT + global_memory_interface->memory_commit(span, page_size); +#endif + span->heap = heap; + span->page_type = page_type; + span->page_count = page_count; + span->page_size = page_size; + span->page_address_mask = page_address_mask; + span->offset = (uint32_t)offset; + span->mapped_size = mapped_size; + + heap->span_partial[page_type] = span; + } + + return span; +} + +static page_t* +heap_get_page(heap_t* heap, uint32_t size_class); + +static void +block_deallocate(block_t* block); + +static page_t* +heap_get_page_generic(heap_t* heap, uint32_t size_class) { + page_type_t page_type = get_page_type(size_class); + + // Check if there is a free page from multithreaded deallocations + uintptr_t block_mt = atomic_load_explicit(&heap->thread_free[page_type], memory_order_relaxed); + if (UNEXPECTED(block_mt != 0)) { + while (!atomic_compare_exchange_weak_explicit(&heap->thread_free[page_type], &block_mt, 0, memory_order_relaxed, + memory_order_relaxed)) { + wait_spin(); + } + block_t* block = (void*)block_mt; + while (block) { + block_t* next_block = block->next; + block_deallocate(block); + block = next_block; + } + // Retry after processing deferred thread frees + return heap_get_page(heap, size_class); + } + + // Check if there is a free page + page_t* page = heap->page_free[page_type]; + if (EXPECTED(page != 0)) { + heap->page_free[page_type] = page->next; + if (page->is_decommitted == 0) { + rpmalloc_assert(heap->page_free_commit_count[page_type] > 0, "Free committed page count out of sync"); + --heap->page_free_commit_count[page_type]; + } + heap_make_free_page_available(heap, size_class, page); + return page; + } + rpmalloc_assert(heap->page_free_commit_count[page_type] == 0, "Free committed page count out of sync"); + + if (heap->id == 0) { + // Thread has not yet initialized, assign heap and try again + rpmalloc_initialize(0); + return heap_get_page(get_thread_heap(), size_class); + } + + // Fallback path, find or allocate span for given size class + // If thread was not initialized, the heap for the new span + // will be different from the local heap variable in this scope + // (which is the default heap) - so use span page heap instead + span_t* span = heap_get_span(heap, page_type); + if (EXPECTED(span != 0)) { + page = span_allocate_page(span); + heap_make_free_page_available(page->heap, size_class, page); + } + + return page; +} + +//! Find or allocate a page for the given size class +static page_t* +heap_get_page(heap_t* heap, uint32_t size_class) { + // Fast path, available page for given size class + page_t* page = heap->page_available[size_class]; + if (EXPECTED(page != 0)) + return page; + return heap_get_page_generic(heap, size_class); +} + +//! Pop a block from the heap local free list +static inline RPMALLOC_ALLOCATOR void* +heap_pop_local_free(heap_t* heap, uint32_t size_class) { + block_t** free_list = heap->local_free + size_class; + block_t* block = *free_list; + if (EXPECTED(block != 0)) + *free_list = block->next; + return block; +} + +//! Generic allocation path from heap pages, spans or new mapping +static NOINLINE RPMALLOC_ALLOCATOR void* +heap_allocate_block_small_to_large(heap_t* heap, uint32_t size_class, unsigned int zero) { + page_t* page = heap_get_page(heap, size_class); + if (EXPECTED(page != 0)) + return page_allocate_block(page, zero); + return 0; +} + +//! Generic allocation path from heap pages, spans or new mapping +static NOINLINE RPMALLOC_ALLOCATOR void* +heap_allocate_block_huge(heap_t* heap, size_t size, unsigned int zero) { + (void)sizeof(heap); + size_t alloc_size = get_page_aligned_size(size + SPAN_HEADER_SIZE); + size_t offset = 0; + size_t mapped_size = 0; + void* block = global_memory_interface->memory_map(alloc_size, SPAN_SIZE, &offset, &mapped_size); + if (block) { + span_t* span = block; +#if ENABLE_DECOMMIT + global_memory_interface->memory_commit(span, alloc_size); +#endif + span->heap = heap; + span->page_type = PAGE_HUGE; + span->page_size = (uint32_t)global_config.page_size; + span->page_count = (uint32_t)(alloc_size / global_config.page_size); + span->page_address_mask = LARGE_PAGE_MASK; + span->offset = (uint32_t)offset; + span->mapped_size = mapped_size; + span->page.heap = heap; + span->page.is_full = 1; + span->page.generic_free = 1; + span->page.page_type = PAGE_HUGE; + // Keep track of span if first class heap + if (!heap->owner_thread) { + span->next = heap->span_used[PAGE_HUGE]; + heap->span_used[PAGE_HUGE] = span; + } + void* ptr = pointer_offset(block, SPAN_HEADER_SIZE); + if (zero) + memset(ptr, 0, size); + return ptr; + } + return 0; +} + +static RPMALLOC_ALLOCATOR NOINLINE void* +heap_allocate_block_generic(heap_t* heap, size_t size, unsigned int zero) { + uint32_t size_class = get_size_class(size); + if (EXPECTED(size_class < SIZE_CLASS_COUNT)) { + block_t* block = heap_pop_local_free(heap, size_class); + if (EXPECTED(block != 0)) { + // Fast track with small block available in heap level local free list + if (zero) + memset(block, 0, global_size_class[size_class].block_size); + return block; + } + + return heap_allocate_block_small_to_large(heap, size_class, zero); + } + + return heap_allocate_block_huge(heap, size, zero); +} + +//! Find or allocate a block of the given size +static inline RPMALLOC_ALLOCATOR void* +heap_allocate_block(heap_t* heap, size_t size, unsigned int zero) { + if (size <= (SMALL_GRANULARITY * 64)) { + uint32_t size_class = get_size_class_tiny(size); + block_t* block = heap_pop_local_free(heap, size_class); + if (EXPECTED(block != 0)) { + // Fast track with small block available in heap level local free list + if (zero) + memset(block, 0, global_size_class[size_class].block_size); + return block; + } + } + return heap_allocate_block_generic(heap, size, zero); +} + +static RPMALLOC_ALLOCATOR void* +heap_allocate_block_aligned(heap_t* heap, size_t alignment, size_t size, unsigned int zero) { + if (alignment <= SMALL_GRANULARITY) + return heap_allocate_block(heap, size, zero); + +#if ENABLE_VALIDATE_ARGS + if ((size + alignment) < size) { + errno = EINVAL; + return 0; + } + if (alignment & (alignment - 1)) { + errno = EINVAL; + return 0; + } +#endif + if (alignment >= RPMALLOC_MAX_ALIGNMENT) { + errno = EINVAL; + return 0; + } + + size_t align_mask = alignment - 1; + block_t* block = heap_allocate_block(heap, size + alignment, zero); + if ((uintptr_t)block & align_mask) { + block = (void*)(((uintptr_t)block & ~(uintptr_t)align_mask) + alignment); + // Mark as having aligned blocks + span_t* span = block_get_span(block); + page_t* page = span_get_page_from_block(span, block); + page->has_aligned_block = 1; + page->generic_free = 1; + } + return block; +} + +static void* +heap_reallocate_block(heap_t* heap, void* block, size_t size, size_t old_size, unsigned int flags) { + if (block) { + // Grab the span using guaranteed span alignment + span_t* span = block_get_span(block); + if (EXPECTED(span->page_type <= PAGE_LARGE)) { + // Normal sized block + page_t* page = span_get_page_from_block(span, block); + void* blocks_start = pointer_offset(page, PAGE_HEADER_SIZE); + uint32_t block_offset = (uint32_t)pointer_diff(block, blocks_start); + uint32_t block_idx = block_offset / page->block_size; + void* block_origin = pointer_offset(blocks_start, (size_t)block_idx * page->block_size); + if (!old_size) + old_size = (size_t)((ptrdiff_t)page->block_size - pointer_diff(block, block_origin)); + if ((size_t)page->block_size >= size) { + // Still fits in block, never mind trying to save memory, but preserve data if alignment changed + if ((block != block_origin) && !(flags & RPMALLOC_NO_PRESERVE)) + memmove(block_origin, block, old_size); + return block_origin; + } + } else { + // Huge block + void* block_start = pointer_offset(span, SPAN_HEADER_SIZE); + if (!old_size) + old_size = ((size_t)span->page_size * (size_t)span->page_count) - SPAN_HEADER_SIZE; + if ((size < old_size) && (size > LARGE_BLOCK_SIZE_LIMIT)) { + // Still fits in block and still huge, never mind trying to save memory, + // but preserve data if alignment changed + if ((block_start != block) && !(flags & RPMALLOC_NO_PRESERVE)) + memmove(block_start, block, old_size); + return block_start; + } + } + } else { + old_size = 0; + } + + if (!!(flags & RPMALLOC_GROW_OR_FAIL)) + return 0; + + // Size is greater than block size or saves enough memory to resize, need to allocate a new block + // and deallocate the old. Avoid hysteresis by overallocating if increase is small (below 37%) + size_t lower_bound = old_size + (old_size >> 2) + (old_size >> 3); + size_t new_size = (size > lower_bound) ? size : ((size > old_size) ? lower_bound : size); + void* old_block = block; + block = heap_allocate_block(heap, new_size, 0); + if (block && old_block) { + if (!(flags & RPMALLOC_NO_PRESERVE)) + memcpy(block, old_block, old_size < new_size ? old_size : new_size); + block_deallocate(old_block); + } + + return block; +} + +static void* +heap_reallocate_block_aligned(heap_t* heap, void* block, size_t alignment, size_t size, size_t old_size, + unsigned int flags) { + if (alignment <= SMALL_GRANULARITY) + return heap_reallocate_block(heap, block, size, old_size, flags); + + int no_alloc = !!(flags & RPMALLOC_GROW_OR_FAIL); + size_t usable_size = (block ? block_usable_size(block) : 0); + if ((usable_size >= size) && !((uintptr_t)block & (alignment - 1))) { + if (no_alloc || (size >= (usable_size / 2))) + return block; + } + // Aligned alloc marks span as having aligned blocks + void* old_block = block; + block = (!no_alloc ? heap_allocate_block_aligned(heap, alignment, size, 0) : 0); + if (EXPECTED(block != 0)) { + if (!(flags & RPMALLOC_NO_PRESERVE) && old_block) { + if (!old_size) + old_size = usable_size; + memcpy(block, old_block, old_size < size ? old_size : size); + } + if (EXPECTED(old_block != 0)) + block_deallocate(old_block); + } + return block; +} + +static void +heap_free_all(heap_t* heap) { + for (int itype = 0; itype < 3; ++itype) { + span_t* span = heap->span_partial[itype]; + while (span) { + span_t* span_next = span->next; + global_memory_interface->memory_unmap(span, span->offset, span->mapped_size); + span = span_next; + } + heap->span_partial[itype] = 0; + heap->page_free[itype] = 0; + heap->page_free_commit_count[itype] = 0; + atomic_store_explicit(&heap->thread_free[itype], 0, memory_order_relaxed); + } + for (int itype = 0; itype < 4; ++itype) { + span_t* span = heap->span_used[itype]; + while (span) { + span_t* span_next = span->next; + global_memory_interface->memory_unmap(span, span->offset, span->mapped_size); + span = span_next; + } + heap->span_used[itype] = 0; + } + memset(heap->local_free, 0, sizeof(heap->local_free)); + memset(heap->page_available, 0, sizeof(heap->page_available)); + +#if ENABLE_STATISTICS + // TODO: Fix +#endif +} + +//////////// +/// +/// Extern interface +/// +////// + +int +rpmalloc_is_thread_initialized(void) { + return (get_thread_heap() != global_heap_default) ? 1 : 0; +} + +extern inline RPMALLOC_ALLOCATOR void* +rpmalloc(size_t size) { +#if ENABLE_VALIDATE_ARGS + if (size >= MAX_ALLOC_SIZE) { + errno = EINVAL; + return 0; + } +#endif + heap_t* heap = get_thread_heap(); + return heap_allocate_block(heap, size, 0); +} + +extern inline RPMALLOC_ALLOCATOR void* +rpzalloc(size_t size) { +#if ENABLE_VALIDATE_ARGS + if (size >= MAX_ALLOC_SIZE) { + errno = EINVAL; + return 0; + } +#endif + heap_t* heap = get_thread_heap(); + return heap_allocate_block(heap, size, 1); +} + +extern inline void +rpfree(void* ptr) { + if (UNEXPECTED(ptr == 0)) + return; + block_deallocate(ptr); +} + +extern inline RPMALLOC_ALLOCATOR void* +rpcalloc(size_t num, size_t size) { + size_t total; +#if ENABLE_VALIDATE_ARGS +#if PLATFORM_WINDOWS + int err = SizeTMult(num, size, &total); + if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) { + errno = EINVAL; + return 0; + } +#else + int err = __builtin_umull_overflow(num, size, &total); + if (err || (total >= MAX_ALLOC_SIZE)) { + errno = EINVAL; + return 0; + } +#endif +#else + total = num * size; +#endif + heap_t* heap = get_thread_heap(); + return heap_allocate_block(heap, total, 1); +} + +extern inline RPMALLOC_ALLOCATOR void* +rprealloc(void* ptr, size_t size) { +#if ENABLE_VALIDATE_ARGS + if (size >= MAX_ALLOC_SIZE) { + errno = EINVAL; + return ptr; + } +#endif + heap_t* heap = get_thread_heap(); + return heap_reallocate_block(heap, ptr, size, 0, 0); +} + +extern RPMALLOC_ALLOCATOR void* +rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize, unsigned int flags) { +#if ENABLE_VALIDATE_ARGS + if ((size + alignment < size) || (alignment > _memory_page_size)) { + errno = EINVAL; + return 0; + } +#endif + heap_t* heap = get_thread_heap(); + return heap_reallocate_block_aligned(heap, ptr, alignment, size, oldsize, flags); +} + +extern RPMALLOC_ALLOCATOR void* +rpaligned_alloc(size_t alignment, size_t size) { + heap_t* heap = get_thread_heap(); + return heap_allocate_block_aligned(heap, alignment, size, 0); +} + +extern RPMALLOC_ALLOCATOR void* +rpaligned_zalloc(size_t alignment, size_t size) { + heap_t* heap = get_thread_heap(); + return heap_allocate_block_aligned(heap, alignment, size, 1); +} + +extern inline RPMALLOC_ALLOCATOR void* +rpaligned_calloc(size_t alignment, size_t num, size_t size) { + size_t total; +#if ENABLE_VALIDATE_ARGS +#if PLATFORM_WINDOWS + int err = SizeTMult(num, size, &total); + if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) { + errno = EINVAL; + return 0; + } +#else + int err = __builtin_umull_overflow(num, size, &total); + if (err || (total >= MAX_ALLOC_SIZE)) { + errno = EINVAL; + return 0; + } +#endif +#else + total = num * size; +#endif + heap_t* heap = get_thread_heap(); + return heap_allocate_block_aligned(heap, alignment, total, 1); +} + +extern inline RPMALLOC_ALLOCATOR void* +rpmemalign(size_t alignment, size_t size) { + heap_t* heap = get_thread_heap(); + return heap_allocate_block_aligned(heap, alignment, size, 0); +} + +extern inline int +rpposix_memalign(void** memptr, size_t alignment, size_t size) { + heap_t* heap = get_thread_heap(); + if (memptr) + *memptr = heap_allocate_block_aligned(heap, alignment, size, 0); + else + return EINVAL; + return *memptr ? 0 : ENOMEM; +} + +extern inline size_t +rpmalloc_usable_size(void* ptr) { + return (ptr ? block_usable_size(ptr) : 0); +} + +//////////// +/// +/// Initialization and finalization +/// +////// + +static void +rpmalloc_thread_destructor(void* value) { + // If this is called on main thread assume it means rpmalloc_finalize + // has not been called and shutdown is forced (through _exit) or unclean + if (get_thread_id() == global_main_thread_id) + return; + if (value) + rpmalloc_thread_finalize(); +} + +extern int +rpmalloc_initialize_config(rpmalloc_interface_t* memory_interface, rpmalloc_config_t* config) { + if (global_rpmalloc_initialized) { + rpmalloc_thread_initialize(); + if (config) + *config = global_config; + return 0; + } + + if (config) + global_config = *config; + + int result = rpmalloc_initialize(memory_interface); + + if (config) + *config = global_config; + + return result; +} + +extern int +rpmalloc_initialize(rpmalloc_interface_t* memory_interface) { + if (global_rpmalloc_initialized) { + rpmalloc_thread_initialize(); + return 0; + } + + global_rpmalloc_initialized = 1; + + global_memory_interface = memory_interface ? memory_interface : &global_memory_interface_default; + if (!global_memory_interface->memory_map || !global_memory_interface->memory_unmap) { + global_memory_interface->memory_map = os_mmap; + global_memory_interface->memory_commit = os_mcommit; + global_memory_interface->memory_decommit = os_mdecommit; + global_memory_interface->memory_unmap = os_munmap; + } + +#if PLATFORM_WINDOWS + SYSTEM_INFO system_info; + memset(&system_info, 0, sizeof(system_info)); + GetSystemInfo(&system_info); + os_map_granularity = system_info.dwAllocationGranularity; +#else + os_map_granularity = (size_t)sysconf(_SC_PAGESIZE); +#endif + +#if PLATFORM_WINDOWS + os_page_size = system_info.dwPageSize; +#else + os_page_size = os_map_granularity; +#endif + if (global_config.enable_huge_pages) { +#if PLATFORM_WINDOWS + HANDLE token = 0; + size_t large_page_minimum = GetLargePageMinimum(); + if (large_page_minimum) + OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token); + if (token) { + LUID luid; + if (LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &luid)) { + TOKEN_PRIVILEGES token_privileges; + memset(&token_privileges, 0, sizeof(token_privileges)); + token_privileges.PrivilegeCount = 1; + token_privileges.Privileges[0].Luid = luid; + token_privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + if (AdjustTokenPrivileges(token, FALSE, &token_privileges, 0, 0, 0)) { + if (GetLastError() == ERROR_SUCCESS) + os_huge_pages = 1; + } + } + CloseHandle(token); + } + if (os_huge_pages) { + if (large_page_minimum > os_page_size) + os_page_size = large_page_minimum; + if (large_page_minimum > os_map_granularity) + os_map_granularity = large_page_minimum; + } +#elif defined(__linux__) + size_t huge_page_size = 0; + FILE* meminfo = fopen("/proc/meminfo", "r"); + if (meminfo) { + char line[128]; + while (!huge_page_size && fgets(line, sizeof(line) - 1, meminfo)) { + line[sizeof(line) - 1] = 0; + if (strstr(line, "Hugepagesize:")) + huge_page_size = (size_t)strtol(line + 13, 0, 10) * 1024; + } + fclose(meminfo); + } + if (huge_page_size) { + os_huge_pages = 1; + os_page_size = huge_page_size; + os_map_granularity = huge_page_size; + } +#elif defined(__FreeBSD__) + int rc; + size_t sz = sizeof(rc); + + if (sysctlbyname("vm.pmap.pg_ps_enabled", &rc, &sz, NULL, 0) == 0 && rc == 1) { + os_huge_pages = 1; + os_page_size = 2 * 1024 * 1024; + os_map_granularity = os_page_size; + } +#elif defined(__APPLE__) || defined(__NetBSD__) + os_huge_pages = 1; + os_page_size = 2 * 1024 * 1024; + os_map_granularity = os_page_size; +#endif + } else { + os_huge_pages = 0; + } + + global_config.enable_huge_pages = os_huge_pages; + + if (!memory_interface || (global_config.page_size < os_page_size)) + global_config.page_size = os_page_size; + + if (global_config.enable_huge_pages || global_config.page_size > (256 * 1024)) + global_config.disable_decommit = 1; + +#if defined(__linux__) || defined(__ANDROID__) + if (global_config.disable_thp) + (void)prctl(PR_SET_THP_DISABLE, 1, 0, 0, 0); +#endif + +#ifdef _WIN32 + fls_key = FlsAlloc(&rpmalloc_thread_destructor); +#else + pthread_key_create(&pthread_key, rpmalloc_thread_destructor); +#endif + + global_main_thread_id = get_thread_id(); + + rpmalloc_thread_initialize(); + + return 0; +} + +extern const rpmalloc_config_t* +rpmalloc_config(void) { + return &global_config; +} + +extern void +rpmalloc_finalize(void) { + rpmalloc_thread_finalize(); + + if (global_config.unmap_on_finalize) { + heap_t* heap = global_heap_queue; + global_heap_queue = 0; + while (heap) { + heap_t* heap_next = heap->next; + heap_free_all(heap); + heap_unmap(heap); + heap = heap_next; + } + heap = global_heap_used; + global_heap_used = 0; + while (heap) { + heap_t* heap_next = heap->next; + heap_free_all(heap); + heap_unmap(heap); + heap = heap_next; + } +#if ENABLE_STATISTICS + memset(&global_statistics, 0, sizeof(global_statistics)); +#endif + } + +#ifdef _WIN32 + FlsFree(fls_key); + fls_key = 0; +#else + pthread_key_delete(pthread_key); + pthread_key = 0; +#endif + + global_main_thread_id = 0; + global_rpmalloc_initialized = 0; +} + +extern void +rpmalloc_thread_initialize(void) { + if (get_thread_heap() == global_heap_default) + get_thread_heap_allocate(); +} + +extern void +rpmalloc_thread_finalize(void) { + heap_t* heap = get_thread_heap(); + if (heap != global_heap_default) { + heap_release(heap); + set_thread_heap(global_heap_default); + } +} + +extern void +rpmalloc_thread_collect(void) { +} + +void +rpmalloc_dump_statistics(void* file) { +#if ENABLE_STATISTICS + fprintf(file, "Mapped pages: %llu\n", + (unsigned long long)atomic_load_explicit(&global_statistics.page_mapped, memory_order_relaxed)); + fprintf(file, "Mapped pages (peak): %llu\n", + (unsigned long long)atomic_load_explicit(&global_statistics.page_mapped_peak, memory_order_relaxed)); + fprintf(file, "Active pages: %llu\n", + (unsigned long long)atomic_load_explicit(&global_statistics.page_active, memory_order_relaxed)); + fprintf(file, "Active pages (peak): %llu\n", + (unsigned long long)atomic_load_explicit(&global_statistics.page_active_peak, memory_order_relaxed)); + fprintf(file, "Pages committed: %llu\n", + (unsigned long long)atomic_load_explicit(&global_statistics.page_commit, memory_order_relaxed)); + fprintf(file, "Pages decommitted: %llu\n", + (unsigned long long)atomic_load_explicit(&global_statistics.page_decommit, memory_order_relaxed)); + fprintf(file, "Heaps created: %llu\n", + (unsigned long long)atomic_load_explicit(&global_statistics.heap_count, memory_order_relaxed)); +#else + (void)sizeof(file); +#endif +} + +#if RPMALLOC_FIRST_CLASS_HEAPS + +rpmalloc_heap_t* +rpmalloc_heap_acquire(void) { + // Must be a pristine heap from newly mapped memory pages, or else memory blocks + // could already be allocated from the heap which would (wrongly) be released when + // heap is cleared with rpmalloc_heap_free_all(). Also heaps guaranteed to be + // pristine from the dedicated orphan list can be used. + heap_t* heap = heap_allocate(1); + rpmalloc_assume(heap != 0); + heap->owner_thread = 0; + return heap; +} + +void +rpmalloc_heap_release(rpmalloc_heap_t* heap) { + if (heap) + heap_release(heap); +} + +RPMALLOC_ALLOCATOR void* +rpmalloc_heap_alloc(rpmalloc_heap_t* heap, size_t size) { +#if ENABLE_VALIDATE_ARGS + if (size >= MAX_ALLOC_SIZE) { + errno = EINVAL; + return 0; + } +#endif + return heap_allocate_block(heap, size, 0); +} + +RPMALLOC_ALLOCATOR void* +rpmalloc_heap_aligned_alloc(rpmalloc_heap_t* heap, size_t alignment, size_t size) { +#if ENABLE_VALIDATE_ARGS + if (size >= MAX_ALLOC_SIZE) { + errno = EINVAL; + return 0; + } +#endif + return heap_allocate_block_aligned(heap, alignment, size, 0); +} + +RPMALLOC_ALLOCATOR void* +rpmalloc_heap_calloc(rpmalloc_heap_t* heap, size_t num, size_t size) { + size_t total; +#if ENABLE_VALIDATE_ARGS +#if PLATFORM_WINDOWS + int err = SizeTMult(num, size, &total); + if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) { + errno = EINVAL; + return 0; + } +#else + int err = __builtin_umull_overflow(num, size, &total); + if (err || (total >= MAX_ALLOC_SIZE)) { + errno = EINVAL; + return 0; + } +#endif +#else + total = num * size; +#endif + return heap_allocate_block(heap, total, 1); +} + +extern inline RPMALLOC_ALLOCATOR void* +rpmalloc_heap_aligned_calloc(rpmalloc_heap_t* heap, size_t alignment, size_t num, size_t size) { + size_t total; +#if ENABLE_VALIDATE_ARGS +#if PLATFORM_WINDOWS + int err = SizeTMult(num, size, &total); + if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) { + errno = EINVAL; + return 0; + } +#else + int err = __builtin_umull_overflow(num, size, &total); + if (err || (total >= MAX_ALLOC_SIZE)) { + errno = EINVAL; + return 0; + } +#endif +#else + total = num * size; +#endif + return heap_allocate_block_aligned(heap, alignment, total, 1); +} + +RPMALLOC_ALLOCATOR void* +rpmalloc_heap_realloc(rpmalloc_heap_t* heap, void* ptr, size_t size, unsigned int flags) { +#if ENABLE_VALIDATE_ARGS + if (size >= MAX_ALLOC_SIZE) { + errno = EINVAL; + return ptr; + } +#endif + return heap_reallocate_block(heap, ptr, size, 0, flags); +} + +RPMALLOC_ALLOCATOR void* +rpmalloc_heap_aligned_realloc(rpmalloc_heap_t* heap, void* ptr, size_t alignment, size_t size, unsigned int flags) { +#if ENABLE_VALIDATE_ARGS + if ((size + alignment < size) || (alignment > _memory_page_size)) { + errno = EINVAL; + return 0; + } +#endif + return heap_reallocate_block_aligned(heap, ptr, alignment, size, 0, flags); +} + +void +rpmalloc_heap_free(rpmalloc_heap_t* heap, void* ptr) { + (void)sizeof(heap); + block_deallocate(ptr); +} + +//! Free all memory allocated by the heap +void +rpmalloc_heap_free_all(rpmalloc_heap_t* heap) { + heap_free_all(heap); +} + +extern inline void +rpmalloc_heap_thread_set_current(rpmalloc_heap_t* heap) { + heap_t* prev_heap = get_thread_heap(); + if (prev_heap != heap) { + set_thread_heap(heap); + if (prev_heap) + heap_release(prev_heap); + } +} + +rpmalloc_heap_t* +rpmalloc_get_heap_for_ptr(void* ptr) { + // Grab the span, and then the heap from the span + span_t* span = (span_t*)((uintptr_t)ptr & SPAN_MASK); + if (span) + return span_get_page_from_block(span, ptr)->heap; + return 0; +} + +#endif + +#include "malloc.c" |