aboutsummaryrefslogtreecommitdiff
path: root/thirdparty/rpmalloc/rpmalloc.c
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2026-04-11 12:46:01 +0200
committerGitHub Enterprise <[email protected]>2026-04-11 12:46:01 +0200
commitdc742b88d908d23e0c5c5d1d95994637658db2b2 (patch)
tree6fb25b88b64c92c503c239cf3cef497ed18ee172 /thirdparty/rpmalloc/rpmalloc.c
parentReduce short-lived heap allocations in zenhttp (diff)
parenthub deprovision all (#938) (diff)
downloadzen-sb/reduce-allocs.tar.xz
zen-sb/reduce-allocs.zip
Merge branch 'main' into sb/reduce-allocssb/reduce-allocs
Diffstat (limited to 'thirdparty/rpmalloc/rpmalloc.c')
-rw-r--r--thirdparty/rpmalloc/rpmalloc.c221
1 files changed, 171 insertions, 50 deletions
diff --git a/thirdparty/rpmalloc/rpmalloc.c b/thirdparty/rpmalloc/rpmalloc.c
index 08cefe6dd..b8fe16a0a 100644
--- a/thirdparty/rpmalloc/rpmalloc.c
+++ b/thirdparty/rpmalloc/rpmalloc.c
@@ -57,6 +57,9 @@
#endif
#if PLATFORM_WINDOWS
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
#include <windows.h>
#include <fibersapi.h>
static DWORD fls_key;
@@ -184,6 +187,12 @@ madvise(caddr_t, size_t, int);
#define SPAN_SIZE (256 * 1024 * 1024)
#define SPAN_MASK (~((uintptr_t)(SPAN_SIZE - 1)))
+#if ENABLE_VALIDATE_ARGS
+//! Maximum allocation size to avoid integer overflow
+#undef MAX_ALLOC_SIZE
+#define MAX_ALLOC_SIZE (((size_t)-1) - SPAN_SIZE)
+#endif
+
////////////
///
/// Utility macros
@@ -258,13 +267,13 @@ static inline size_t
rpmalloc_clz(uintptr_t x) {
#if ARCH_64BIT
#if defined(_MSC_VER) && !defined(__clang__)
- return (size_t)_lzcnt_u64(x);
+ return (size_t)__lzcnt64(x);
#else
return (size_t)__builtin_clzll(x);
#endif
#else
#if defined(_MSC_VER) && !defined(__clang__)
- return (size_t)_lzcnt_u32(x);
+ return (size_t)__lzcnt32(x);
#else
return (size_t)__builtin_clzl(x);
#endif
@@ -279,9 +288,9 @@ wait_spin(void) {
#else
_mm_pause();
#endif
-#elif defined(__x86_64__) || defined(__i386__)
+#elif (defined(__x86_64__) || defined(__i386__)) && !defined(_M_ARM64EC)
__asm__ volatile("pause" ::: "memory");
-#elif defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH >= 7)
+#elif defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH >= 7) || defined(_M_ARM64EC)
__asm__ volatile("yield" ::: "memory");
#elif defined(__powerpc__) || defined(__powerpc64__)
// No idea if ever been compiled in such archs but ... as precaution
@@ -468,6 +477,9 @@ struct heap_t {
uint32_t offset;
//! Memory map size
size_t mapped_size;
+#if RPMALLOC_HEAP_STATISTICS
+ struct rpmalloc_heap_statistics_t stats;
+#endif
};
_Static_assert(sizeof(page_t) <= PAGE_HEADER_SIZE, "Invalid page header size");
@@ -530,10 +542,10 @@ static const size_class_t global_size_class[SIZE_CLASS_COUNT] = {
LCLASS(262144), LCLASS(327680), LCLASS(393216), LCLASS(458752), LCLASS(524288)};
//! Threshold number of pages for when free pages are decommitted
-static uint32_t global_page_free_overflow[4] = {16, 8, 2, 0};
+static uint32_t global_page_free_overflow[4] = {64, 16, 4, 0};
//! Number of pages to retain when free page threshold overflows
-static uint32_t global_page_free_retain[4] = {4, 2, 1, 0};
+static uint32_t global_page_free_retain[4] = {16, 4, 2, 0};
//! OS huge page support
static int os_huge_pages;
@@ -719,6 +731,8 @@ os_mmap(size_t size, size_t alignment, size_t* offset, size_t* mapped_size) {
// page to avoid saturating the OS commit limit
#if ENABLE_DECOMMIT
DWORD do_commit = 0;
+ if (global_config.disable_decommit)
+ do_commit = MEM_COMMIT;
#else
DWORD do_commit = MEM_COMMIT;
#endif
@@ -788,35 +802,29 @@ os_mmap(size_t size, size_t alignment, size_t* offset, size_t* mapped_size) {
page_mapped_current, memory_order_relaxed, memory_order_relaxed))
break;
}
-#if ENABLE_DECOMMIT
- size_t page_active_current =
- atomic_fetch_add_explicit(&global_statistics.page_active, page_count, memory_order_relaxed) + page_count;
- size_t page_active_peak = atomic_load_explicit(&global_statistics.page_active_peak, memory_order_relaxed);
- while (page_active_current > page_active_peak) {
- if (atomic_compare_exchange_weak_explicit(&global_statistics.page_active_peak, &page_active_peak,
- page_active_current, memory_order_relaxed, memory_order_relaxed))
- break;
- }
-#endif
#endif
return ptr;
}
-static void
+static int
os_mcommit(void* address, size_t size) {
#if ENABLE_DECOMMIT
- if (global_config.disable_decommit)
- return;
+ if (global_config.disable_decommit) {
+ return 0;
+ }
#if PLATFORM_WINDOWS
if (!VirtualAlloc(address, size, MEM_COMMIT, PAGE_READWRITE)) {
+ if (global_memory_interface->map_fail_callback && global_memory_interface->map_fail_callback(size))
+ return os_mcommit(address, size);
rpmalloc_assert(0, "Failed to commit virtual memory block");
+ return 1;
}
#else
- /*
- if (mprotect(address, size, PROT_READ | PROT_WRITE)) {
- rpmalloc_assert(0, "Failed to commit virtual memory block");
- }
- */
+ /*
+ if (mprotect(address, size, PROT_READ | PROT_WRITE)) {
+ rpmalloc_assert(0, "Failed to commit virtual memory block");
+ }
+ */
#endif
#if ENABLE_STATISTICS
size_t page_count = size / global_config.page_size;
@@ -833,23 +841,25 @@ os_mcommit(void* address, size_t size) {
#endif
(void)sizeof(address);
(void)sizeof(size);
+ return 0;
}
-static void
+static int
os_mdecommit(void* address, size_t size) {
#if ENABLE_DECOMMIT
if (global_config.disable_decommit)
- return;
+ return 1;
#if PLATFORM_WINDOWS
if (!VirtualFree(address, size, MEM_DECOMMIT)) {
rpmalloc_assert(0, "Failed to decommit virtual memory block");
+ return 1;
}
#else
- /*
- if (mprotect(address, size, PROT_NONE)) {
- rpmalloc_assert(0, "Failed to decommit virtual memory block");
- }
- */
+ /*
+ if (mprotect(address, size, PROT_NONE)) {
+ rpmalloc_assert(0, "Failed to decommit virtual memory block");
+ }
+ */
#if defined(MADV_DONTNEED)
if (madvise(address, size, MADV_DONTNEED)) {
#elif defined(MADV_FREE_REUSABLE)
@@ -865,6 +875,7 @@ os_mdecommit(void* address, size_t size) {
if (posix_madvise(address, size, POSIX_MADV_DONTNEED)) {
#endif
rpmalloc_assert(0, "Failed to decommit virtual memory block");
+ return 1;
}
#endif
#if ENABLE_STATISTICS
@@ -879,6 +890,7 @@ os_mdecommit(void* address, size_t size) {
(void)sizeof(address);
(void)sizeof(size);
#endif
+ return 0;
}
static void
@@ -986,19 +998,29 @@ page_decommit_memory_pages(page_t* page) {
return;
void* extra_page = pointer_offset(page, global_config.page_size);
size_t extra_page_size = page_get_size(page) - global_config.page_size;
- global_memory_interface->memory_decommit(extra_page, extra_page_size);
+ if (global_memory_interface->memory_decommit(extra_page, extra_page_size) != 0)
+ return;
+#if RPMALLOC_HEAP_STATISTICS && ENABLE_DECOMMIT
+ if (page->heap)
+ page->heap->stats.committed_size -= extra_page_size;
+#endif
page->is_decommitted = 1;
}
-static inline void
+static inline int
page_commit_memory_pages(page_t* page) {
if (!page->is_decommitted)
- return;
+ return 0;
void* extra_page = pointer_offset(page, global_config.page_size);
size_t extra_page_size = page_get_size(page) - global_config.page_size;
- global_memory_interface->memory_commit(extra_page, extra_page_size);
+ if (global_memory_interface->memory_commit(extra_page, extra_page_size) != 0)
+ return 1;
page->is_decommitted = 0;
#if ENABLE_DECOMMIT
+#if RPMALLOC_HEAP_STATISTICS
+ if (page->heap)
+ page->heap->stats.committed_size += extra_page_size;
+#endif
#if !defined(__APPLE__)
// When page is recommitted, the blocks in the second memory page and forward
// will be zeroed out by OS - take advantage in zalloc/calloc calls and make sure
@@ -1008,6 +1030,7 @@ page_commit_memory_pages(page_t* page) {
page->is_zero = 1;
#endif
#endif
+ return 0;
}
static void
@@ -1090,7 +1113,7 @@ static NOINLINE void
page_adopt_thread_free_block_list(page_t* page) {
if (page->local_free)
return;
- unsigned long long thread_free = atomic_load_explicit(&page->thread_free, memory_order_acquire);
+ unsigned long long thread_free = atomic_load_explicit(&page->thread_free, memory_order_relaxed);
if (thread_free != 0) {
// Other threads can only replace with another valid list head, this will never change to 0 in other threads
while (!atomic_compare_exchange_weak_explicit(&page->thread_free, &thread_free, 0, memory_order_acquire,
@@ -1243,8 +1266,13 @@ span_allocate_page(span_t* span) {
#if ENABLE_DECOMMIT
// The first page is always committed on initial span map of memory
- if (span->page_initialized)
- global_memory_interface->memory_commit(page, span->page_size);
+ if (span->page_initialized) {
+ if (global_memory_interface->memory_commit(page, span->page_size) != 0)
+ return 0;
+#if RPMALLOC_HEAP_STATISTICS
+ heap->stats.committed_size += span->page_size;
+#endif
+ }
#endif
++span->page_initialized;
@@ -1268,6 +1296,16 @@ span_allocate_page(span_t* span) {
static NOINLINE void
span_deallocate_block(span_t* span, page_t* page, void* block) {
if (UNEXPECTED(page->page_type == PAGE_HUGE)) {
+#if RPMALLOC_HEAP_STATISTICS
+ if (span->heap) {
+ span->heap->stats.mapped_size -= span->mapped_size;
+#if ENABLE_DECOMMIT
+ span->heap->stats.committed_size -= span->page_count * span->page_size;
+#else
+ span->heap->stats.committed_size -= mapped_size;
+#endif
+ }
+#endif
global_memory_interface->memory_unmap(span, span->offset, span->mapped_size);
return;
}
@@ -1303,6 +1341,16 @@ block_deallocate(block_t* block) {
page_t* page = span_get_page_from_block(span, block);
const int is_thread_local = page_is_thread_heap(page);
+#if RPMALLOC_HEAP_STATISTICS
+ heap_t* heap = span->heap;
+ if (heap) {
+ if (span->page_type <= PAGE_LARGE)
+ heap->stats.allocated_size -= page->block_size;
+ else
+ heap->stats.allocated_size -= ((size_t)span->page_size * (size_t)span->page_count);
+ }
+#endif
+
// Optimized path for thread local free with non-huge block in page
// that has no aligned blocks
if (EXPECTED(is_thread_local != 0)) {
@@ -1373,7 +1421,8 @@ heap_allocate_new(void) {
size_t mapped_size = 0;
block_t* block = global_memory_interface->memory_map(heap_size, 0, &offset, &mapped_size);
#if ENABLE_DECOMMIT
- global_memory_interface->memory_commit(block, heap_size);
+ if (global_memory_interface->memory_commit(block, heap_size) != 0)
+ return 0;
#endif
heap_t* heap = heap_initialize((void*)block);
heap->offset = (uint32_t)offset;
@@ -1442,7 +1491,7 @@ heap_page_free_decommit(heap_t* heap, uint32_t page_type, uint32_t page_retain_c
}
}
-static inline void
+static inline int
heap_make_free_page_available(heap_t* heap, uint32_t size_class, page_t* page) {
page->size_class = size_class;
page->block_size = global_size_class[size_class].block_size;
@@ -1463,8 +1512,9 @@ heap_make_free_page_available(heap_t* heap, uint32_t size_class, page_t* page) {
if (head)
head->prev = page;
heap->page_available[size_class] = page;
- if (page->is_decommitted)
- page_commit_memory_pages(page);
+ if (page->is_decommitted != 0)
+ return page_commit_memory_pages(page);
+ return 0;
}
//! Find or allocate a span for the given page type with the given size class
@@ -1478,6 +1528,9 @@ heap_get_span(heap_t* heap, page_type_t page_type) {
size_t offset = 0;
size_t mapped_size = 0;
span_t* span = global_memory_interface->memory_map(SPAN_SIZE, SPAN_SIZE, &offset, &mapped_size);
+#if RPMALLOC_HEAP_STATISTICS
+ heap->stats.mapped_size += mapped_size;
+#endif
if (EXPECTED(span != 0)) {
uint32_t page_count = 0;
uint32_t page_size = 0;
@@ -1496,7 +1549,15 @@ heap_get_span(heap_t* heap, page_type_t page_type) {
page_address_mask = LARGE_PAGE_MASK;
}
#if ENABLE_DECOMMIT
- global_memory_interface->memory_commit(span, page_size);
+ if (global_memory_interface->memory_commit(span, page_size) != 0)
+ return 0;
+#endif
+#if RPMALLOC_HEAP_STATISTICS
+#if ENABLE_DECOMMIT
+ heap->stats.committed_size += page_size;
+#else
+ heap->stats.committed_size += mapped_size;
+#endif
#endif
span->heap = heap;
span->page_type = page_type;
@@ -1523,9 +1584,9 @@ heap_get_page_generic(heap_t* heap, uint32_t size_class) {
page_type_t page_type = get_page_type(size_class);
// Check if there is a free page from multithreaded deallocations
- uintptr_t block_mt = atomic_load_explicit(&heap->thread_free[page_type], memory_order_acquire);
+ uintptr_t block_mt = atomic_load_explicit(&heap->thread_free[page_type], memory_order_relaxed);
if (UNEXPECTED(block_mt != 0)) {
- while (!atomic_compare_exchange_weak_explicit(&heap->thread_free[page_type], &block_mt, 0, memory_order_release,
+ while (!atomic_compare_exchange_weak_explicit(&heap->thread_free[page_type], &block_mt, 0, memory_order_acquire,
memory_order_relaxed)) {
wait_spin();
}
@@ -1547,7 +1608,8 @@ heap_get_page_generic(heap_t* heap, uint32_t size_class) {
rpmalloc_assert(heap->page_free_commit_count[page_type] > 0, "Free committed page count out of sync");
--heap->page_free_commit_count[page_type];
}
- heap_make_free_page_available(heap, size_class, page);
+ if (heap_make_free_page_available(heap, size_class, page) != 0)
+ return 0;
return page;
}
rpmalloc_assert(heap->page_free_commit_count[page_type] == 0, "Free committed page count out of sync");
@@ -1565,7 +1627,8 @@ heap_get_page_generic(heap_t* heap, uint32_t size_class) {
span_t* span = heap_get_span(heap, page_type);
if (EXPECTED(span != 0)) {
page = span_allocate_page(span);
- heap_make_free_page_available(page->heap, size_class, page);
+ if (heap_make_free_page_available(page->heap, size_class, page) != 0)
+ return 0;
}
return page;
@@ -1604,6 +1667,7 @@ heap_allocate_block_small_to_large(heap_t* heap, uint32_t size_class, unsigned i
static NOINLINE RPMALLOC_ALLOCATOR void*
heap_allocate_block_huge(heap_t* heap, size_t size, unsigned int zero) {
if (heap->id == 0) {
+ // Thread has not yet initialized, assign heap and try again
rpmalloc_initialize(0);
heap = get_thread_heap();
}
@@ -1614,7 +1678,16 @@ heap_allocate_block_huge(heap_t* heap, size_t size, unsigned int zero) {
if (block) {
span_t* span = block;
#if ENABLE_DECOMMIT
- global_memory_interface->memory_commit(span, alloc_size);
+ if (global_memory_interface->memory_commit(span, alloc_size) != 0)
+ return 0;
+#endif
+#if RPMALLOC_HEAP_STATISTICS
+ heap->stats.mapped_size += mapped_size;
+#if ENABLE_DECOMMIT
+ heap->stats.committed_size += alloc_size;
+#else
+ heap->stats.committed_size += mapped_size;
+#endif
#endif
span->heap = heap;
span->page_type = PAGE_HUGE;
@@ -1635,6 +1708,9 @@ heap_allocate_block_huge(heap_t* heap, size_t size, unsigned int zero) {
void* ptr = pointer_offset(block, SPAN_HEADER_SIZE);
if (zero)
memset(ptr, 0, size);
+#if RPMALLOC_HEAP_STATISTICS
+ heap->stats.allocated_size += size;
+#endif
return ptr;
}
return 0;
@@ -1644,6 +1720,10 @@ static RPMALLOC_ALLOCATOR NOINLINE void*
heap_allocate_block_generic(heap_t* heap, size_t size, unsigned int zero) {
uint32_t size_class = get_size_class(size);
if (EXPECTED(size_class < SIZE_CLASS_COUNT)) {
+#if RPMALLOC_HEAP_STATISTICS
+ heap->stats.allocated_size += global_size_class[size_class].block_size;
+#endif
+
block_t* block = heap_pop_local_free(heap, size_class);
if (EXPECTED(block != 0)) {
// Fast track with small block available in heap level local free list
@@ -1668,6 +1748,9 @@ heap_allocate_block(heap_t* heap, size_t size, unsigned int zero) {
// Fast track with small block available in heap level local free list
if (zero)
memset(block, 0, global_size_class[size_class].block_size);
+#if RPMALLOC_HEAP_STATISTICS
+ heap->stats.allocated_size += global_size_class[size_class].block_size;
+#endif
return block;
}
}
@@ -1901,7 +1984,7 @@ rprealloc(void* ptr, size_t size) {
extern RPMALLOC_ALLOCATOR void*
rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize, unsigned int flags) {
#if ENABLE_VALIDATE_ARGS
- if ((size + alignment < size) || (alignment > _memory_page_size)) {
+ if ((size + alignment < size) || (alignment > SMALL_PAGE_SIZE)) {
errno = EINVAL;
return 0;
}
@@ -2210,6 +2293,21 @@ rpmalloc_dump_statistics(void* file) {
#endif
}
+void
+rpmalloc_global_statistics(rpmalloc_global_statistics_t* stats) {
+#if ENABLE_STATISTICS
+ stats->mapped = global_config.page_size * atomic_load_explicit(&global_statistics.page_mapped, memory_order_relaxed);
+ stats->mapped_peak = global_config.page_size * atomic_load_explicit(&global_statistics.page_mapped_peak, memory_order_relaxed);
+ stats->committed = global_config.page_size * atomic_load_explicit(&global_statistics.page_commit, memory_order_relaxed);
+ stats->decommitted = global_config.page_size * atomic_load_explicit(&global_statistics.page_decommit, memory_order_relaxed);
+ stats->active = global_config.page_size * atomic_load_explicit(&global_statistics.page_active, memory_order_relaxed);
+ stats->active_peak = global_config.page_size * atomic_load_explicit(&global_statistics.page_active_peak, memory_order_relaxed);
+ stats->heap_count = atomic_load_explicit(&global_statistics.heap_count, memory_order_relaxed);
+#else
+ memset(stats, 0, sizeof(rpmalloc_global_statistics_t));
+#endif
+}
+
#if RPMALLOC_FIRST_CLASS_HEAPS
rpmalloc_heap_t*
@@ -2253,6 +2351,17 @@ rpmalloc_heap_aligned_alloc(rpmalloc_heap_t* heap, size_t alignment, size_t size
}
RPMALLOC_ALLOCATOR void*
+rpmalloc_heap_aligned_zalloc(rpmalloc_heap_t* heap, size_t alignment, size_t size) {
+#if ENABLE_VALIDATE_ARGS
+ if (size >= MAX_ALLOC_SIZE) {
+ errno = EINVAL;
+ return 0;
+ }
+#endif
+ return heap_allocate_block_aligned(heap, alignment, size, 1);
+}
+
+RPMALLOC_ALLOCATOR void*
rpmalloc_heap_calloc(rpmalloc_heap_t* heap, size_t num, size_t size) {
size_t total;
#if ENABLE_VALIDATE_ARGS
@@ -2312,7 +2421,7 @@ rpmalloc_heap_realloc(rpmalloc_heap_t* heap, void* ptr, size_t size, unsigned in
RPMALLOC_ALLOCATOR void*
rpmalloc_heap_aligned_realloc(rpmalloc_heap_t* heap, void* ptr, size_t alignment, size_t size, unsigned int flags) {
#if ENABLE_VALIDATE_ARGS
- if ((size + alignment < size) || (alignment > _memory_page_size)) {
+ if ((size + alignment < size) || (alignment > SMALL_PAGE_SIZE)) {
errno = EINVAL;
return 0;
}
@@ -2332,6 +2441,18 @@ rpmalloc_heap_free_all(rpmalloc_heap_t* heap) {
heap_free_all(heap);
}
+struct rpmalloc_heap_statistics_t
+rpmalloc_heap_statistics(rpmalloc_heap_t* heap) {
+#if RPMALLOC_HEAP_STATISTICS
+ if (heap) {
+ return heap->stats;
+ }
+#endif
+ (void)sizeof(heap);
+ struct rpmalloc_heap_statistics_t stats = {0};
+ return stats;
+}
+
extern inline void
rpmalloc_heap_thread_set_current(rpmalloc_heap_t* heap) {
heap_t* prev_heap = get_thread_heap();