diff options
| author | Stefan Boberg <[email protected]> | 2026-04-11 12:46:01 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2026-04-11 12:46:01 +0200 |
| commit | dc742b88d908d23e0c5c5d1d95994637658db2b2 (patch) | |
| tree | 6fb25b88b64c92c503c239cf3cef497ed18ee172 /thirdparty/rpmalloc/rpmalloc.c | |
| parent | Reduce short-lived heap allocations in zenhttp (diff) | |
| parent | hub deprovision all (#938) (diff) | |
| download | zen-sb/reduce-allocs.tar.xz zen-sb/reduce-allocs.zip | |
Merge branch 'main' into sb/reduce-allocssb/reduce-allocs
Diffstat (limited to 'thirdparty/rpmalloc/rpmalloc.c')
| -rw-r--r-- | thirdparty/rpmalloc/rpmalloc.c | 221 |
1 files changed, 171 insertions, 50 deletions
diff --git a/thirdparty/rpmalloc/rpmalloc.c b/thirdparty/rpmalloc/rpmalloc.c index 08cefe6dd..b8fe16a0a 100644 --- a/thirdparty/rpmalloc/rpmalloc.c +++ b/thirdparty/rpmalloc/rpmalloc.c @@ -57,6 +57,9 @@ #endif #if PLATFORM_WINDOWS +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif #include <windows.h> #include <fibersapi.h> static DWORD fls_key; @@ -184,6 +187,12 @@ madvise(caddr_t, size_t, int); #define SPAN_SIZE (256 * 1024 * 1024) #define SPAN_MASK (~((uintptr_t)(SPAN_SIZE - 1))) +#if ENABLE_VALIDATE_ARGS +//! Maximum allocation size to avoid integer overflow +#undef MAX_ALLOC_SIZE +#define MAX_ALLOC_SIZE (((size_t)-1) - SPAN_SIZE) +#endif + //////////// /// /// Utility macros @@ -258,13 +267,13 @@ static inline size_t rpmalloc_clz(uintptr_t x) { #if ARCH_64BIT #if defined(_MSC_VER) && !defined(__clang__) - return (size_t)_lzcnt_u64(x); + return (size_t)__lzcnt64(x); #else return (size_t)__builtin_clzll(x); #endif #else #if defined(_MSC_VER) && !defined(__clang__) - return (size_t)_lzcnt_u32(x); + return (size_t)__lzcnt32(x); #else return (size_t)__builtin_clzl(x); #endif @@ -279,9 +288,9 @@ wait_spin(void) { #else _mm_pause(); #endif -#elif defined(__x86_64__) || defined(__i386__) +#elif (defined(__x86_64__) || defined(__i386__)) && !defined(_M_ARM64EC) __asm__ volatile("pause" ::: "memory"); -#elif defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH >= 7) +#elif defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH >= 7) || defined(_M_ARM64EC) __asm__ volatile("yield" ::: "memory"); #elif defined(__powerpc__) || defined(__powerpc64__) // No idea if ever been compiled in such archs but ... as precaution @@ -468,6 +477,9 @@ struct heap_t { uint32_t offset; //! Memory map size size_t mapped_size; +#if RPMALLOC_HEAP_STATISTICS + struct rpmalloc_heap_statistics_t stats; +#endif }; _Static_assert(sizeof(page_t) <= PAGE_HEADER_SIZE, "Invalid page header size"); @@ -530,10 +542,10 @@ static const size_class_t global_size_class[SIZE_CLASS_COUNT] = { LCLASS(262144), LCLASS(327680), LCLASS(393216), LCLASS(458752), LCLASS(524288)}; //! Threshold number of pages for when free pages are decommitted -static uint32_t global_page_free_overflow[4] = {16, 8, 2, 0}; +static uint32_t global_page_free_overflow[4] = {64, 16, 4, 0}; //! Number of pages to retain when free page threshold overflows -static uint32_t global_page_free_retain[4] = {4, 2, 1, 0}; +static uint32_t global_page_free_retain[4] = {16, 4, 2, 0}; //! OS huge page support static int os_huge_pages; @@ -719,6 +731,8 @@ os_mmap(size_t size, size_t alignment, size_t* offset, size_t* mapped_size) { // page to avoid saturating the OS commit limit #if ENABLE_DECOMMIT DWORD do_commit = 0; + if (global_config.disable_decommit) + do_commit = MEM_COMMIT; #else DWORD do_commit = MEM_COMMIT; #endif @@ -788,35 +802,29 @@ os_mmap(size_t size, size_t alignment, size_t* offset, size_t* mapped_size) { page_mapped_current, memory_order_relaxed, memory_order_relaxed)) break; } -#if ENABLE_DECOMMIT - size_t page_active_current = - atomic_fetch_add_explicit(&global_statistics.page_active, page_count, memory_order_relaxed) + page_count; - size_t page_active_peak = atomic_load_explicit(&global_statistics.page_active_peak, memory_order_relaxed); - while (page_active_current > page_active_peak) { - if (atomic_compare_exchange_weak_explicit(&global_statistics.page_active_peak, &page_active_peak, - page_active_current, memory_order_relaxed, memory_order_relaxed)) - break; - } -#endif #endif return ptr; } -static void +static int os_mcommit(void* address, size_t size) { #if ENABLE_DECOMMIT - if (global_config.disable_decommit) - return; + if (global_config.disable_decommit) { + return 0; + } #if PLATFORM_WINDOWS if (!VirtualAlloc(address, size, MEM_COMMIT, PAGE_READWRITE)) { + if (global_memory_interface->map_fail_callback && global_memory_interface->map_fail_callback(size)) + return os_mcommit(address, size); rpmalloc_assert(0, "Failed to commit virtual memory block"); + return 1; } #else - /* - if (mprotect(address, size, PROT_READ | PROT_WRITE)) { - rpmalloc_assert(0, "Failed to commit virtual memory block"); - } - */ + /* + if (mprotect(address, size, PROT_READ | PROT_WRITE)) { + rpmalloc_assert(0, "Failed to commit virtual memory block"); + } + */ #endif #if ENABLE_STATISTICS size_t page_count = size / global_config.page_size; @@ -833,23 +841,25 @@ os_mcommit(void* address, size_t size) { #endif (void)sizeof(address); (void)sizeof(size); + return 0; } -static void +static int os_mdecommit(void* address, size_t size) { #if ENABLE_DECOMMIT if (global_config.disable_decommit) - return; + return 1; #if PLATFORM_WINDOWS if (!VirtualFree(address, size, MEM_DECOMMIT)) { rpmalloc_assert(0, "Failed to decommit virtual memory block"); + return 1; } #else - /* - if (mprotect(address, size, PROT_NONE)) { - rpmalloc_assert(0, "Failed to decommit virtual memory block"); - } - */ + /* + if (mprotect(address, size, PROT_NONE)) { + rpmalloc_assert(0, "Failed to decommit virtual memory block"); + } + */ #if defined(MADV_DONTNEED) if (madvise(address, size, MADV_DONTNEED)) { #elif defined(MADV_FREE_REUSABLE) @@ -865,6 +875,7 @@ os_mdecommit(void* address, size_t size) { if (posix_madvise(address, size, POSIX_MADV_DONTNEED)) { #endif rpmalloc_assert(0, "Failed to decommit virtual memory block"); + return 1; } #endif #if ENABLE_STATISTICS @@ -879,6 +890,7 @@ os_mdecommit(void* address, size_t size) { (void)sizeof(address); (void)sizeof(size); #endif + return 0; } static void @@ -986,19 +998,29 @@ page_decommit_memory_pages(page_t* page) { return; void* extra_page = pointer_offset(page, global_config.page_size); size_t extra_page_size = page_get_size(page) - global_config.page_size; - global_memory_interface->memory_decommit(extra_page, extra_page_size); + if (global_memory_interface->memory_decommit(extra_page, extra_page_size) != 0) + return; +#if RPMALLOC_HEAP_STATISTICS && ENABLE_DECOMMIT + if (page->heap) + page->heap->stats.committed_size -= extra_page_size; +#endif page->is_decommitted = 1; } -static inline void +static inline int page_commit_memory_pages(page_t* page) { if (!page->is_decommitted) - return; + return 0; void* extra_page = pointer_offset(page, global_config.page_size); size_t extra_page_size = page_get_size(page) - global_config.page_size; - global_memory_interface->memory_commit(extra_page, extra_page_size); + if (global_memory_interface->memory_commit(extra_page, extra_page_size) != 0) + return 1; page->is_decommitted = 0; #if ENABLE_DECOMMIT +#if RPMALLOC_HEAP_STATISTICS + if (page->heap) + page->heap->stats.committed_size += extra_page_size; +#endif #if !defined(__APPLE__) // When page is recommitted, the blocks in the second memory page and forward // will be zeroed out by OS - take advantage in zalloc/calloc calls and make sure @@ -1008,6 +1030,7 @@ page_commit_memory_pages(page_t* page) { page->is_zero = 1; #endif #endif + return 0; } static void @@ -1090,7 +1113,7 @@ static NOINLINE void page_adopt_thread_free_block_list(page_t* page) { if (page->local_free) return; - unsigned long long thread_free = atomic_load_explicit(&page->thread_free, memory_order_acquire); + unsigned long long thread_free = atomic_load_explicit(&page->thread_free, memory_order_relaxed); if (thread_free != 0) { // Other threads can only replace with another valid list head, this will never change to 0 in other threads while (!atomic_compare_exchange_weak_explicit(&page->thread_free, &thread_free, 0, memory_order_acquire, @@ -1243,8 +1266,13 @@ span_allocate_page(span_t* span) { #if ENABLE_DECOMMIT // The first page is always committed on initial span map of memory - if (span->page_initialized) - global_memory_interface->memory_commit(page, span->page_size); + if (span->page_initialized) { + if (global_memory_interface->memory_commit(page, span->page_size) != 0) + return 0; +#if RPMALLOC_HEAP_STATISTICS + heap->stats.committed_size += span->page_size; +#endif + } #endif ++span->page_initialized; @@ -1268,6 +1296,16 @@ span_allocate_page(span_t* span) { static NOINLINE void span_deallocate_block(span_t* span, page_t* page, void* block) { if (UNEXPECTED(page->page_type == PAGE_HUGE)) { +#if RPMALLOC_HEAP_STATISTICS + if (span->heap) { + span->heap->stats.mapped_size -= span->mapped_size; +#if ENABLE_DECOMMIT + span->heap->stats.committed_size -= span->page_count * span->page_size; +#else + span->heap->stats.committed_size -= mapped_size; +#endif + } +#endif global_memory_interface->memory_unmap(span, span->offset, span->mapped_size); return; } @@ -1303,6 +1341,16 @@ block_deallocate(block_t* block) { page_t* page = span_get_page_from_block(span, block); const int is_thread_local = page_is_thread_heap(page); +#if RPMALLOC_HEAP_STATISTICS + heap_t* heap = span->heap; + if (heap) { + if (span->page_type <= PAGE_LARGE) + heap->stats.allocated_size -= page->block_size; + else + heap->stats.allocated_size -= ((size_t)span->page_size * (size_t)span->page_count); + } +#endif + // Optimized path for thread local free with non-huge block in page // that has no aligned blocks if (EXPECTED(is_thread_local != 0)) { @@ -1373,7 +1421,8 @@ heap_allocate_new(void) { size_t mapped_size = 0; block_t* block = global_memory_interface->memory_map(heap_size, 0, &offset, &mapped_size); #if ENABLE_DECOMMIT - global_memory_interface->memory_commit(block, heap_size); + if (global_memory_interface->memory_commit(block, heap_size) != 0) + return 0; #endif heap_t* heap = heap_initialize((void*)block); heap->offset = (uint32_t)offset; @@ -1442,7 +1491,7 @@ heap_page_free_decommit(heap_t* heap, uint32_t page_type, uint32_t page_retain_c } } -static inline void +static inline int heap_make_free_page_available(heap_t* heap, uint32_t size_class, page_t* page) { page->size_class = size_class; page->block_size = global_size_class[size_class].block_size; @@ -1463,8 +1512,9 @@ heap_make_free_page_available(heap_t* heap, uint32_t size_class, page_t* page) { if (head) head->prev = page; heap->page_available[size_class] = page; - if (page->is_decommitted) - page_commit_memory_pages(page); + if (page->is_decommitted != 0) + return page_commit_memory_pages(page); + return 0; } //! Find or allocate a span for the given page type with the given size class @@ -1478,6 +1528,9 @@ heap_get_span(heap_t* heap, page_type_t page_type) { size_t offset = 0; size_t mapped_size = 0; span_t* span = global_memory_interface->memory_map(SPAN_SIZE, SPAN_SIZE, &offset, &mapped_size); +#if RPMALLOC_HEAP_STATISTICS + heap->stats.mapped_size += mapped_size; +#endif if (EXPECTED(span != 0)) { uint32_t page_count = 0; uint32_t page_size = 0; @@ -1496,7 +1549,15 @@ heap_get_span(heap_t* heap, page_type_t page_type) { page_address_mask = LARGE_PAGE_MASK; } #if ENABLE_DECOMMIT - global_memory_interface->memory_commit(span, page_size); + if (global_memory_interface->memory_commit(span, page_size) != 0) + return 0; +#endif +#if RPMALLOC_HEAP_STATISTICS +#if ENABLE_DECOMMIT + heap->stats.committed_size += page_size; +#else + heap->stats.committed_size += mapped_size; +#endif #endif span->heap = heap; span->page_type = page_type; @@ -1523,9 +1584,9 @@ heap_get_page_generic(heap_t* heap, uint32_t size_class) { page_type_t page_type = get_page_type(size_class); // Check if there is a free page from multithreaded deallocations - uintptr_t block_mt = atomic_load_explicit(&heap->thread_free[page_type], memory_order_acquire); + uintptr_t block_mt = atomic_load_explicit(&heap->thread_free[page_type], memory_order_relaxed); if (UNEXPECTED(block_mt != 0)) { - while (!atomic_compare_exchange_weak_explicit(&heap->thread_free[page_type], &block_mt, 0, memory_order_release, + while (!atomic_compare_exchange_weak_explicit(&heap->thread_free[page_type], &block_mt, 0, memory_order_acquire, memory_order_relaxed)) { wait_spin(); } @@ -1547,7 +1608,8 @@ heap_get_page_generic(heap_t* heap, uint32_t size_class) { rpmalloc_assert(heap->page_free_commit_count[page_type] > 0, "Free committed page count out of sync"); --heap->page_free_commit_count[page_type]; } - heap_make_free_page_available(heap, size_class, page); + if (heap_make_free_page_available(heap, size_class, page) != 0) + return 0; return page; } rpmalloc_assert(heap->page_free_commit_count[page_type] == 0, "Free committed page count out of sync"); @@ -1565,7 +1627,8 @@ heap_get_page_generic(heap_t* heap, uint32_t size_class) { span_t* span = heap_get_span(heap, page_type); if (EXPECTED(span != 0)) { page = span_allocate_page(span); - heap_make_free_page_available(page->heap, size_class, page); + if (heap_make_free_page_available(page->heap, size_class, page) != 0) + return 0; } return page; @@ -1604,6 +1667,7 @@ heap_allocate_block_small_to_large(heap_t* heap, uint32_t size_class, unsigned i static NOINLINE RPMALLOC_ALLOCATOR void* heap_allocate_block_huge(heap_t* heap, size_t size, unsigned int zero) { if (heap->id == 0) { + // Thread has not yet initialized, assign heap and try again rpmalloc_initialize(0); heap = get_thread_heap(); } @@ -1614,7 +1678,16 @@ heap_allocate_block_huge(heap_t* heap, size_t size, unsigned int zero) { if (block) { span_t* span = block; #if ENABLE_DECOMMIT - global_memory_interface->memory_commit(span, alloc_size); + if (global_memory_interface->memory_commit(span, alloc_size) != 0) + return 0; +#endif +#if RPMALLOC_HEAP_STATISTICS + heap->stats.mapped_size += mapped_size; +#if ENABLE_DECOMMIT + heap->stats.committed_size += alloc_size; +#else + heap->stats.committed_size += mapped_size; +#endif #endif span->heap = heap; span->page_type = PAGE_HUGE; @@ -1635,6 +1708,9 @@ heap_allocate_block_huge(heap_t* heap, size_t size, unsigned int zero) { void* ptr = pointer_offset(block, SPAN_HEADER_SIZE); if (zero) memset(ptr, 0, size); +#if RPMALLOC_HEAP_STATISTICS + heap->stats.allocated_size += size; +#endif return ptr; } return 0; @@ -1644,6 +1720,10 @@ static RPMALLOC_ALLOCATOR NOINLINE void* heap_allocate_block_generic(heap_t* heap, size_t size, unsigned int zero) { uint32_t size_class = get_size_class(size); if (EXPECTED(size_class < SIZE_CLASS_COUNT)) { +#if RPMALLOC_HEAP_STATISTICS + heap->stats.allocated_size += global_size_class[size_class].block_size; +#endif + block_t* block = heap_pop_local_free(heap, size_class); if (EXPECTED(block != 0)) { // Fast track with small block available in heap level local free list @@ -1668,6 +1748,9 @@ heap_allocate_block(heap_t* heap, size_t size, unsigned int zero) { // Fast track with small block available in heap level local free list if (zero) memset(block, 0, global_size_class[size_class].block_size); +#if RPMALLOC_HEAP_STATISTICS + heap->stats.allocated_size += global_size_class[size_class].block_size; +#endif return block; } } @@ -1901,7 +1984,7 @@ rprealloc(void* ptr, size_t size) { extern RPMALLOC_ALLOCATOR void* rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize, unsigned int flags) { #if ENABLE_VALIDATE_ARGS - if ((size + alignment < size) || (alignment > _memory_page_size)) { + if ((size + alignment < size) || (alignment > SMALL_PAGE_SIZE)) { errno = EINVAL; return 0; } @@ -2210,6 +2293,21 @@ rpmalloc_dump_statistics(void* file) { #endif } +void +rpmalloc_global_statistics(rpmalloc_global_statistics_t* stats) { +#if ENABLE_STATISTICS + stats->mapped = global_config.page_size * atomic_load_explicit(&global_statistics.page_mapped, memory_order_relaxed); + stats->mapped_peak = global_config.page_size * atomic_load_explicit(&global_statistics.page_mapped_peak, memory_order_relaxed); + stats->committed = global_config.page_size * atomic_load_explicit(&global_statistics.page_commit, memory_order_relaxed); + stats->decommitted = global_config.page_size * atomic_load_explicit(&global_statistics.page_decommit, memory_order_relaxed); + stats->active = global_config.page_size * atomic_load_explicit(&global_statistics.page_active, memory_order_relaxed); + stats->active_peak = global_config.page_size * atomic_load_explicit(&global_statistics.page_active_peak, memory_order_relaxed); + stats->heap_count = atomic_load_explicit(&global_statistics.heap_count, memory_order_relaxed); +#else + memset(stats, 0, sizeof(rpmalloc_global_statistics_t)); +#endif +} + #if RPMALLOC_FIRST_CLASS_HEAPS rpmalloc_heap_t* @@ -2253,6 +2351,17 @@ rpmalloc_heap_aligned_alloc(rpmalloc_heap_t* heap, size_t alignment, size_t size } RPMALLOC_ALLOCATOR void* +rpmalloc_heap_aligned_zalloc(rpmalloc_heap_t* heap, size_t alignment, size_t size) { +#if ENABLE_VALIDATE_ARGS + if (size >= MAX_ALLOC_SIZE) { + errno = EINVAL; + return 0; + } +#endif + return heap_allocate_block_aligned(heap, alignment, size, 1); +} + +RPMALLOC_ALLOCATOR void* rpmalloc_heap_calloc(rpmalloc_heap_t* heap, size_t num, size_t size) { size_t total; #if ENABLE_VALIDATE_ARGS @@ -2312,7 +2421,7 @@ rpmalloc_heap_realloc(rpmalloc_heap_t* heap, void* ptr, size_t size, unsigned in RPMALLOC_ALLOCATOR void* rpmalloc_heap_aligned_realloc(rpmalloc_heap_t* heap, void* ptr, size_t alignment, size_t size, unsigned int flags) { #if ENABLE_VALIDATE_ARGS - if ((size + alignment < size) || (alignment > _memory_page_size)) { + if ((size + alignment < size) || (alignment > SMALL_PAGE_SIZE)) { errno = EINVAL; return 0; } @@ -2332,6 +2441,18 @@ rpmalloc_heap_free_all(rpmalloc_heap_t* heap) { heap_free_all(heap); } +struct rpmalloc_heap_statistics_t +rpmalloc_heap_statistics(rpmalloc_heap_t* heap) { +#if RPMALLOC_HEAP_STATISTICS + if (heap) { + return heap->stats; + } +#endif + (void)sizeof(heap); + struct rpmalloc_heap_statistics_t stats = {0}; + return stats; +} + extern inline void rpmalloc_heap_thread_set_current(rpmalloc_heap_t* heap) { heap_t* prev_heap = get_thread_heap(); |