From e9699e2b995d3ded2efd5580ae6dadf555181d62 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 30 May 2024 05:11:56 +0200 Subject: Adding upstream version 0.0~git20230621.7018e24. Signed-off-by: Daniel Baumann --- libc-top-half/musl/arch/wasm32/atomic_arch.h | 1 - libc-top-half/musl/include/pthread.h | 6 --- libc-top-half/musl/src/env/__init_tls.c | 7 ++- libc-top-half/musl/src/thread/pthread_create.c | 53 ++++++++++++++++------ .../musl/src/thread/wasm32/wasi_thread_start.s | 17 +++++++ 5 files changed, 58 insertions(+), 26 deletions(-) (limited to 'libc-top-half/musl') diff --git a/libc-top-half/musl/arch/wasm32/atomic_arch.h b/libc-top-half/musl/arch/wasm32/atomic_arch.h index dd9428c..c24ce2d 100644 --- a/libc-top-half/musl/arch/wasm32/atomic_arch.h +++ b/libc-top-half/musl/arch/wasm32/atomic_arch.h @@ -1,4 +1,3 @@ -#define a_barrier() (__sync_synchronize()) #define a_cas(p, t, s) (__sync_val_compare_and_swap((p), (t), (s))) #define a_crash() (__builtin_trap()) #define a_clz_32 __builtin_clz diff --git a/libc-top-half/musl/include/pthread.h b/libc-top-half/musl/include/pthread.h index b14fe82..05101e8 100644 --- a/libc-top-half/musl/include/pthread.h +++ b/libc-top-half/musl/include/pthread.h @@ -55,15 +55,9 @@ extern "C" { #define PTHREAD_PROCESS_SHARED 1 -#if defined(__wasilibc_unmodified_upstream) || defined(_REENTRANT) #define PTHREAD_MUTEX_INITIALIZER {{{0}}} #define PTHREAD_RWLOCK_INITIALIZER {{{0}}} #define PTHREAD_COND_INITIALIZER {{{0}}} -#else -#define PTHREAD_MUTEX_INITIALIZER 0 -#define PTHREAD_RWLOCK_INITIALIZER 0 -#define PTHREAD_COND_INITIALIZER 0 -#endif #define PTHREAD_ONCE_INIT 0 diff --git a/libc-top-half/musl/src/env/__init_tls.c b/libc-top-half/musl/src/env/__init_tls.c index ece8d24..c3e407c 100644 --- a/libc-top-half/musl/src/env/__init_tls.c +++ b/libc-top-half/musl/src/env/__init_tls.c @@ -47,10 +47,9 @@ static inline void setup_default_stack_size() stack_size = sp > &__global_base ? &__heap_base - &__data_end : (ptrdiff_t)&__global_base; } - if (stack_size > __default_stacksize) - __default_stacksize = - stack_size < DEFAULT_STACK_MAX ? - stack_size : DEFAULT_STACK_MAX; + __default_stacksize = + stack_size < DEFAULT_STACK_MAX ? + stack_size : DEFAULT_STACK_MAX; } void __wasi_init_tp() { diff --git a/libc-top-half/musl/src/thread/pthread_create.c b/libc-top-half/musl/src/thread/pthread_create.c index 676e2cc..5de9f5a 100644 --- a/libc-top-half/musl/src/thread/pthread_create.c +++ b/libc-top-half/musl/src/thread/pthread_create.c @@ -60,6 +60,17 @@ void __tl_sync(pthread_t td) if (tl_lock_waiters) __wake(&__thread_list_lock, 1, 0); } +#ifndef __wasilibc_unmodified_upstream +static void *map_base_deferred_free; + +static void process_map_base_deferred_free() +{ + /* called with __tl_lock held */ + free(map_base_deferred_free); + map_base_deferred_free = NULL; +} +#endif + #ifdef __wasilibc_unmodified_upstream _Noreturn void __pthread_exit(void *result) #else @@ -164,14 +175,6 @@ static void __pthread_exit(void *result) self->prev->next = self->next; self->prev = self->next = self; -#ifndef __wasilibc_unmodified_upstream - /* On Linux, the thread is created with CLONE_CHILD_CLEARTID, - * and this lock will unlock by kernel when this thread terminates. - * So we should unlock it here in WebAssembly. - * See also set_tid_address(2) */ - __tl_unlock(); -#endif - #ifdef __wasilibc_unmodified_upstream if (state==DT_DETACHED && self->map_base) { /* Detached threads must block even implementation-internal @@ -190,10 +193,17 @@ static void __pthread_exit(void *result) } #else if (state==DT_DETACHED && self->map_base) { - // __syscall(SYS_exit) would unlock the thread, list - // do it manually here - __tl_unlock(); - free(self->map_base); + /* As we use malloc/free which is considerably more complex + * than mmap/munmap to call and can even require a valid + * thread context, it's difficult to implement __unmapself. + * + * Here we take an alternative approach which simply defers + * the deallocation. An obvious downside of this approach is + * that it keeps the stack longer. (possibly forever.) + * To avoid wasting too much memory, we only defer a single + * item at most. */ + process_map_base_deferred_free(); + map_base_deferred_free = self->map_base; // Can't use `exit()` here, because it is too high level return; } @@ -212,10 +222,15 @@ static void __pthread_exit(void *result) #ifdef __wasilibc_unmodified_upstream for (;;) __syscall(SYS_exit, 0); #else - // __syscall(SYS_exit) would unlock the thread, list - // do it manually here - __tl_unlock(); // Can't use `exit()` here, because it is too high level + + /* On Linux, the thread is created with CLONE_CHILD_CLEARTID, + * and the lock (__thread_list_lock) will be unlocked by kernel when + * this thread terminates. + * See also set_tid_address(2) + * + * In WebAssembly, we leave it to wasi_thread_start instead. + */ #endif } @@ -430,6 +445,14 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att if (map == MAP_FAILED) goto fail; } #else + /* Process the deferred free request if any before + * allocationg a new one. Hopefully it enables a reuse of the memory. + * + * Note: We can't perform a simple "handoff" becasue allocation + * sizes might be different. (eg. the stack size might differ) */ + __tl_lock(); + process_map_base_deferred_free(); + __tl_unlock(); map = malloc(size); if (!map) goto fail; #endif diff --git a/libc-top-half/musl/src/thread/wasm32/wasi_thread_start.s b/libc-top-half/musl/src/thread/wasm32/wasi_thread_start.s index 0fe9854..7a480b8 100644 --- a/libc-top-half/musl/src/thread/wasm32/wasi_thread_start.s +++ b/libc-top-half/musl/src/thread/wasm32/wasi_thread_start.s @@ -28,4 +28,21 @@ wasi_thread_start: local.get 1 # start_arg call __wasi_thread_start_C + # Unlock thread list. (as CLONE_CHILD_CLEARTID would do for Linux) + # + # Note: once we unlock the thread list, our "map_base" can be freed + # by a joining thread. It's safe as we are in ASM and no longer use + # our C stack or pthread_t. It's impossible to do this safely in C + # because there is no way to tell the C compiler not to use C stack. + i32.const __thread_list_lock + i32.const 0 + i32.atomic.store 0 + # As an optimization, we can check tl_lock_waiters here. + # But for now, simply wake up unconditionally as + # CLONE_CHILD_CLEARTID does. + i32.const __thread_list_lock + i32.const 1 + memory.atomic.notify 0 + drop + end_function -- cgit v1.2.3