aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2025-09-17 12:48:38 +0200
committerGitHub Enterprise <[email protected]>2025-09-17 12:48:38 +0200
commit324d7ebca12909a91eb98c41ee73304ad7ee7ea6 (patch)
tree9d256567ed94da881713c2b252b8fc4a64b1134b
parentSorting oplog tree view by size would raise an error. (#497) (diff)
downloadzen-324d7ebca12909a91eb98c41ee73304ad7ee7ea6.tar.xz
zen-324d7ebca12909a91eb98c41ee73304ad7ee7ea6.zip
rpmalloc fixes (#499)
* fixed rpmalloc build on Linux and Mac * updated rpmalloc from develop branch on the advice of mjansson * enabled rpmalloc on all platforms note that this does not change any behaviour unless `--malloc=rpmalloc` is passed in on the command line. The default is still `mimalloc`.
-rw-r--r--CHANGELOG.md1
-rw-r--r--thirdparty/rpmalloc/rpmalloc.c115
-rw-r--r--thirdparty/rpmalloc/rpmalloc.h2
-rw-r--r--xmake.lua3
4 files changed, 69 insertions, 52 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8b51c4bc6..39786b3b5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,7 @@
- Improvement: Make exceptions handled in http request processing to warnings
- Improvement: Revised project oplog in-memory representation which reduces load times and memory usage
- Improvement: Updated README.md to state the required version vcpkg
+- Improvement: Updated rpmalloc to latest from 'develop' stream, now also compiles on Linux/Mac
- Bugfix: Self-hosted dashboard; sorting oplog by size in the tree view would raise an error
## 5.7.1
diff --git a/thirdparty/rpmalloc/rpmalloc.c b/thirdparty/rpmalloc/rpmalloc.c
index 7aecfb0f4..08cefe6dd 100644
--- a/thirdparty/rpmalloc/rpmalloc.c
+++ b/thirdparty/rpmalloc/rpmalloc.c
@@ -21,6 +21,10 @@
#include <stdint.h>
#include <stdatomic.h>
+#if !defined(__has_builtin)
+#define __has_builtin(b) 0
+#endif
+
#if defined(__clang__)
#pragma clang diagnostic ignored "-Wunused-macros"
#pragma clang diagnostic ignored "-Wunused-function"
@@ -109,10 +113,6 @@ madvise(caddr_t, size_t, int);
#define ARCH_32BIT 1
#endif
-#if !defined(__has_builtin)
-#define __has_builtin(b) 0
-#endif
-
#define pointer_offset(ptr, ofs) (void*)((char*)(ptr) + (ptrdiff_t)(ofs))
#define pointer_diff(first, second) (ptrdiff_t)((const char*)(first) - (const char*)(second))
@@ -210,7 +210,9 @@ madvise(caddr_t, size_t, int);
} while (0)
#endif
-#if __has_builtin(__builtin_assume)
+#if defined(_MSC_VER)
+#define rpmalloc_assume(cond) __assume(cond)
+#elif defined(__clang__) && __has_builtin(__builtin_assume)
#define rpmalloc_assume(cond) __builtin_assume(cond)
#elif defined(__GNUC__)
#define rpmalloc_assume(cond) \
@@ -218,8 +220,6 @@ madvise(caddr_t, size_t, int);
if (!__builtin_expect(cond, 0)) \
__builtin_unreachable(); \
} while (0)
-#elif defined(_MSC_VER)
-#define rpmalloc_assume(cond) __assume(cond)
#else
#define rpmalloc_assume(cond) 0
#endif
@@ -305,8 +305,9 @@ wait_spin(void) {
#define UNEXPECTED(x) x
#endif
-#if defined(__GNUC__) || defined(__clang__)
+#if defined(__GNUC__) || defined(__clang__)
+#ifdef __has_builtin
#if __has_builtin(__builtin_memcpy_inline)
#define memcpy_const(x, y, s) __builtin_memcpy_inline(x, y, s)
#else
@@ -326,7 +327,10 @@ wait_spin(void) {
memset(x, y, s); \
} while (0)
#endif
-#else
+#endif
+#endif
+
+#ifndef memcpy_const
#define memcpy_const(x, y, s) memcpy(x, y, s)
#define memset_const(x, y, s) memset(x, y, s)
#endif
@@ -548,10 +552,17 @@ static size_t os_page_size;
#if defined(_MSC_VER) && !defined(__clang__)
#define TLS_MODEL
#define _Thread_local __declspec(thread)
+#elif defined(__ANDROID__)
+#if __ANDROID_API__ >= 29 && \
+ ((defined(__clang__) && (__clang_major__ >= 17)) || (defined(__NDK_MAJOR__) && (__NDK_MAJOR__ >= 26)))
+#define TLS_MODEL __attribute__((tls_model("local-dynamic")))
#else
-// #define TLS_MODEL __attribute__((tls_model("initial-exec")))
#define TLS_MODEL
#endif
+#else
+#define TLS_MODEL __attribute__((tls_model("initial-exec")))
+// #define TLS_MODEL
+#endif
static _Thread_local heap_t* global_thread_heap TLS_MODEL = &global_heap_fallback;
static heap_t*
@@ -565,38 +576,35 @@ static inline uintptr_t
get_thread_id(void) {
#if defined(_WIN32)
return (uintptr_t)((void*)NtCurrentTeb());
-#else
+#elif !defined(__APPLE__) && !defined(__CYGWIN__) && \
+ ((defined(__clang__) && (__clang_major__ >= 7)) || ((defined(__GNUC__) && (__GNUC__ >= 5)))) && \
+ (defined(__aarch64__) || defined(__x86_64__) || defined(__loongarch__)) // Unsure of other archs, needs testing
void* thp = __builtin_thread_pointer();
return (uintptr_t)thp;
+#else
+ uintptr_t tid;
+#if defined(__i386__)
+ __asm__("movl %%gs:0, %0" : "=r"(tid) : :);
+#elif defined(__x86_64__)
+#if defined(__MACH__)
+ __asm__("movq %%gs:0, %0" : "=r"(tid) : :);
+#else
+ __asm__("movq %%fs:0, %0" : "=r"(tid) : :);
+#endif
+#elif defined(__arm__)
+ __asm__ volatile("mrc p15, 0, %0, c13, c0, 3" : "=r"(tid));
+#elif defined(__aarch64__)
+#if defined(__MACH__)
+ // tpidr_el0 likely unused, always return 0 on iOS
+ __asm__ volatile("mrs %0, tpidrro_el0" : "=r"(tid));
+#else
+ __asm__ volatile("mrs %0, tpidr_el0" : "=r"(tid));
+#endif
+#else
+ tid = (uintptr_t)&global_thread_heap;
+#endif
+ return tid;
#endif
- /*
- #elif (defined(__GNUC__) || defined(__clang__)) && !defined(__CYGWIN__)
- uintptr_t tid;
- #if defined(__i386__)
- __asm__("movl %%gs:0, %0" : "=r"(tid) : :);
- #elif defined(__x86_64__)
- #if defined(__MACH__)
- __asm__("movq %%gs:0, %0" : "=r"(tid) : :);
- #else
- __asm__("movq %%fs:0, %0" : "=r"(tid) : :);
- #endif
- #elif defined(__arm__)
- __asm__ volatile("mrc p15, 0, %0, c13, c0, 3" : "=r"(tid));
- #elif defined(__aarch64__)
- #if defined(__MACH__)
- // tpidr_el0 likely unused, always return 0 on iOS
- __asm__ volatile("mrs %0, tpidrro_el0" : "=r"(tid));
- #else
- __asm__ volatile("mrs %0, tpidr_el0" : "=r"(tid));
- #endif
- #else
- #error This platform needs implementation of get_thread_id()
- #endif
- return tid;
- #else
- #error This platform needs implementation of get_thread_id()
- #endif
- */
}
//! Set the current thread heap
@@ -1044,7 +1052,7 @@ page_full_to_free_on_new_heap(page_t* page, heap_t* heap) {
page->is_full = 0;
page->is_free = 1;
page->heap = heap;
- atomic_store_explicit(&page->thread_free, 0, memory_order_relaxed);
+ atomic_store_explicit(&page->thread_free, 0, memory_order_release);
page->next = heap->page_free[page->page_type];
heap->page_free[page->page_type] = page;
if (++heap->page_free_commit_count[page->page_type] >= global_page_free_overflow[page->page_type])
@@ -1082,10 +1090,10 @@ static NOINLINE void
page_adopt_thread_free_block_list(page_t* page) {
if (page->local_free)
return;
- unsigned long long thread_free = atomic_load_explicit(&page->thread_free, memory_order_relaxed);
+ unsigned long long thread_free = atomic_load_explicit(&page->thread_free, memory_order_acquire);
if (thread_free != 0) {
// Other threads can only replace with another valid list head, this will never change to 0 in other threads
- while (!atomic_compare_exchange_weak_explicit(&page->thread_free, &thread_free, 0, memory_order_relaxed,
+ while (!atomic_compare_exchange_weak_explicit(&page->thread_free, &thread_free, 0, memory_order_acquire,
memory_order_relaxed))
wait_spin();
page->local_free_count = page_block_from_thread_free_list(page, thread_free, &page->local_free);
@@ -1104,7 +1112,7 @@ page_put_thread_free_block(page_t* page, block_t* block) {
uintptr_t prev_head = atomic_load_explicit(&heap->thread_free[page->page_type], memory_order_relaxed);
block->next = (void*)prev_head;
while (!atomic_compare_exchange_weak_explicit(&heap->thread_free[page->page_type], &prev_head, (uintptr_t)block,
- memory_order_relaxed, memory_order_relaxed)) {
+ memory_order_release, memory_order_relaxed)) {
block->next = (void*)prev_head;
wait_spin();
}
@@ -1115,7 +1123,7 @@ page_put_thread_free_block(page_t* page, block_t* block) {
uint32_t list_size = page_block_from_thread_free_list(page, prev_thread_free, &block->next) + 1;
uint64_t thread_free = page_block_to_thread_free_list(page, block_index, list_size);
while (!atomic_compare_exchange_weak_explicit(&page->thread_free, &prev_thread_free, thread_free,
- memory_order_relaxed, memory_order_relaxed)) {
+ memory_order_release, memory_order_relaxed)) {
list_size = page_block_from_thread_free_list(page, prev_thread_free, &block->next) + 1;
thread_free = page_block_to_thread_free_list(page, block_index, list_size);
wait_spin();
@@ -1170,7 +1178,7 @@ page_allocate_block(page_t* page, unsigned int zero) {
unsigned int is_zero = 0;
block_t* block = (page->local_free != 0) ? page_get_local_free_block(page) : 0;
if (UNEXPECTED(block == 0)) {
- if (atomic_load_explicit(&page->thread_free, memory_order_relaxed) != 0) {
+ if (atomic_load_explicit(&page->thread_free, memory_order_acquire) != 0) {
page_adopt_thread_free_block_list(page);
block = (page->local_free != 0) ? page_get_local_free_block(page) : 0;
}
@@ -1451,7 +1459,7 @@ heap_make_free_page_available(heap_t* heap, uint32_t size_class, page_t* page) {
page_t* head = heap->page_available[size_class];
page->next = head;
page->prev = 0;
- atomic_store_explicit(&page->thread_free, 0, memory_order_relaxed);
+ atomic_store_explicit(&page->thread_free, 0, memory_order_release);
if (head)
head->prev = page;
heap->page_available[size_class] = page;
@@ -1515,9 +1523,9 @@ heap_get_page_generic(heap_t* heap, uint32_t size_class) {
page_type_t page_type = get_page_type(size_class);
// Check if there is a free page from multithreaded deallocations
- uintptr_t block_mt = atomic_load_explicit(&heap->thread_free[page_type], memory_order_relaxed);
+ uintptr_t block_mt = atomic_load_explicit(&heap->thread_free[page_type], memory_order_acquire);
if (UNEXPECTED(block_mt != 0)) {
- while (!atomic_compare_exchange_weak_explicit(&heap->thread_free[page_type], &block_mt, 0, memory_order_relaxed,
+ while (!atomic_compare_exchange_weak_explicit(&heap->thread_free[page_type], &block_mt, 0, memory_order_release,
memory_order_relaxed)) {
wait_spin();
}
@@ -1595,7 +1603,10 @@ heap_allocate_block_small_to_large(heap_t* heap, uint32_t size_class, unsigned i
//! Generic allocation path from heap pages, spans or new mapping
static NOINLINE RPMALLOC_ALLOCATOR void*
heap_allocate_block_huge(heap_t* heap, size_t size, unsigned int zero) {
- (void)sizeof(heap);
+ if (heap->id == 0) {
+ rpmalloc_initialize(0);
+ heap = get_thread_heap();
+ }
size_t alloc_size = get_page_aligned_size(size + SPAN_HEADER_SIZE);
size_t offset = 0;
size_t mapped_size = 0;
@@ -1790,7 +1801,7 @@ heap_free_all(heap_t* heap) {
heap->span_partial[itype] = 0;
heap->page_free[itype] = 0;
heap->page_free_commit_count[itype] = 0;
- atomic_store_explicit(&heap->thread_free[itype], 0, memory_order_relaxed);
+ atomic_store_explicit(&heap->thread_free[itype], 0, memory_order_release);
}
for (int itype = 0; itype < 4; ++itype) {
span_t* span = heap->span_used[itype];
@@ -2026,7 +2037,11 @@ rpmalloc_initialize(rpmalloc_interface_t* memory_interface) {
if (global_config.enable_huge_pages) {
#if PLATFORM_WINDOWS
HANDLE token = 0;
+#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_SYSTEM)
size_t large_page_minimum = GetLargePageMinimum();
+#else
+ size_t large_page_minimum = 2 * 1024 * 1024;
+#endif
if (large_page_minimum)
OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token);
if (token) {
diff --git a/thirdparty/rpmalloc/rpmalloc.h b/thirdparty/rpmalloc/rpmalloc.h
index 2e67280f9..d11292fb1 100644
--- a/thirdparty/rpmalloc/rpmalloc.h
+++ b/thirdparty/rpmalloc/rpmalloc.h
@@ -353,7 +353,7 @@ rpmalloc_heap_calloc(rpmalloc_heap_t* heap, size_t num, size_t size) RPMALLOC_AT
// also be strictly less than the span size (default 64KiB).
RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
rpmalloc_heap_aligned_calloc(rpmalloc_heap_t* heap, size_t alignment, size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC
- RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3);
+ RPMALLOC_ATTRIB_ALLOC_SIZE2(3, 4);
//! Reallocate the given block to at least the given size. The memory block MUST be allocated
// by the same heap given to this function.
diff --git a/xmake.lua b/xmake.lua
index 46c403c11..7a54bd236 100644
--- a/xmake.lua
+++ b/xmake.lua
@@ -121,6 +121,7 @@ end
if is_os("linux") then
add_cxxflags("-Wno-vla-cxx-extension")
+ add_defines("_GNU_SOURCE")
end
-- Turn use of undefined cpp macros into errors
@@ -150,7 +151,7 @@ option_end()
add_define_by_config("ZEN_USE_MIMALLOC", "zenmimalloc")
option("zenrpmalloc")
- set_default(is_os("windows"))
+ set_default(true)
set_showmenu(true)
set_description("Use rpmalloc for faster memory management")
option_end()