From a00bf321eeeca836ee2a0d2d25aeb8524107b8cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9F=A9=E6=9C=B4=E5=AE=87?= Date: Fri, 11 Nov 2022 06:12:53 +0800 Subject: [PATCH] threads: implement init of TLS and stack pointer (#342) * threads: implement init of TLS and stack pointer * fix: rename wasi_snapshot_preview2_thread_spawn to wasi_thread_spawn Signed-off-by: Harald Hoyer * fix: change signature of wasi_thread_start Signed-off-by: Harald Hoyer * fix: pthread_exit for WASI Can't use `exit()` because it is too high level. Have to unlock the thread list. Signed-off-by: Harald Hoyer * fix: initialize struct pthread for the main thread Signed-off-by: Harald Hoyer * fix: store the aligned stack minus `struct start_args` Signed-off-by: Harald Hoyer Signed-off-by: Harald Hoyer Co-authored-by: Harald Hoyer --- Makefile | 8 ++ .../wasm32-wasi/posix/defined-symbols.txt | 15 +++ .../wasm32-wasi/posix/undefined-symbols.txt | 12 +-- libc-bottom-half/crt/crt1-command.c | 5 + libc-bottom-half/sources/__wasilibc_real.c | 6 +- libc-top-half/musl/arch/wasm32/pthread_arch.h | 14 +-- libc-top-half/musl/src/env/__init_tls.c | 33 +++++++ libc-top-half/musl/src/internal/libc.h | 6 +- .../musl/src/internal/pthread_impl.h | 2 + .../musl/src/thread/pthread_create.c | 98 ++++++++++++++++--- libc-top-half/musl/src/thread/pthread_self.c | 4 +- 11 files changed, 163 insertions(+), 40 deletions(-) diff --git a/Makefile b/Makefile index 11fa91ae..59598d46 100644 --- a/Makefile +++ b/Makefile @@ -192,9 +192,16 @@ LIBC_TOP_HALF_MUSL_SOURCES = \ ifeq ($(THREAD_MODEL), posix) LIBC_TOP_HALF_MUSL_SOURCES += \ $(addprefix $(LIBC_TOP_HALF_MUSL_SRC_DIR)/, \ + env/__init_tls.c \ + stdio/__lockfile.c \ thread/__lock.c \ thread/__wait.c \ thread/__timedwait.c \ + thread/default_attr.c \ + thread/pthread_attr_destroy.c \ + thread/pthread_attr_init.c \ + thread/pthread_attr_setstack.c \ + thread/pthread_attr_setstacksize.c \ thread/pthread_cleanup_push.c \ thread/pthread_cond_broadcast.c \ thread/pthread_cond_destroy.c \ @@ -235,6 +242,7 @@ LIBC_TOP_HALF_MUSL_SOURCES += \ thread/pthread_rwlockattr_init.c \ thread/pthread_rwlockattr_setpshared.c \ thread/pthread_setcancelstate.c \ + thread/pthread_self.c \ thread/pthread_testcancel.c \ thread/sem_destroy.c \ thread/sem_getvalue.c \ diff --git a/expected/wasm32-wasi/posix/defined-symbols.txt b/expected/wasm32-wasi/posix/defined-symbols.txt index fe29f30b..d92bcbd8 100644 --- a/expected/wasm32-wasi/posix/defined-symbols.txt +++ b/expected/wasm32-wasi/posix/defined-symbols.txt @@ -23,6 +23,7 @@ __c_locale __clock __clock_gettime __clock_nanosleep +__copy_tls __cos __cosdf __cosl @@ -38,6 +39,8 @@ __ctype_tolower_loc __ctype_toupper_loc __cxa_atexit __cxa_finalize +__default_guardsize +__default_stacksize __des_setkey __do_cleanup_pop __do_cleanup_push @@ -87,6 +90,7 @@ __getopt_msg __gmtime_r __hwcap __inet_aton +__init_tp __intscan __invtrigl_R __isalnum_l @@ -144,6 +148,7 @@ __locale_lock __locale_lockptr __localtime_r __lock +__lockfile __log2_data __log2f_data __log_data @@ -265,6 +270,7 @@ __tan __tandf __tanl __testcancel +__thread_list_lock __timedwait __timedwait_cp __tl_lock @@ -288,6 +294,7 @@ __tsearch_balance __uflow __unlist_locked_file __unlock +__unlockfile __uselocale __utc __wait @@ -318,6 +325,7 @@ __wasi_fd_seek __wasi_fd_sync __wasi_fd_tell __wasi_fd_write +__wasi_init_tp __wasi_path_create_directory __wasi_path_filestat_get __wasi_path_filestat_set_times @@ -371,6 +379,7 @@ __wasilibc_nocwd_scandirat __wasilibc_nocwd_symlinkat __wasilibc_nocwd_utimensat __wasilibc_open_nomode +__wasilibc_pthread_self __wasilibc_register_preopened_fd __wasilibc_rename_newat __wasilibc_rename_oldat @@ -953,6 +962,10 @@ program_invocation_name program_invocation_short_name pselect psignal +pthread_attr_destroy +pthread_attr_init +pthread_attr_setstack +pthread_attr_setstacksize pthread_cond_broadcast pthread_cond_destroy pthread_cond_init @@ -992,6 +1005,7 @@ pthread_rwlock_wrlock pthread_rwlockattr_destroy pthread_rwlockattr_init pthread_rwlockattr_setpshared +pthread_self pthread_setcancelstate pthread_testcancel pthread_timedjoin_np @@ -1182,6 +1196,7 @@ tfind tgamma tgammaf tgammal +thrd_current thrd_sleep time timegm diff --git a/expected/wasm32-wasi/posix/undefined-symbols.txt b/expected/wasm32-wasi/posix/undefined-symbols.txt index 407a6b71..7def0a9f 100644 --- a/expected/wasm32-wasi/posix/undefined-symbols.txt +++ b/expected/wasm32-wasi/posix/undefined-symbols.txt @@ -1,7 +1,4 @@ __addtf3 -__copy_tls -__default_guardsize -__default_stacksize __divtf3 __eqtf2 __extenddftf2 @@ -59,19 +56,18 @@ __imported_wasi_snapshot_preview1_sock_accept __imported_wasi_snapshot_preview1_sock_recv __imported_wasi_snapshot_preview1_sock_send __imported_wasi_snapshot_preview1_sock_shutdown -__imported_wasi_snapshot_preview2_thread_spawn +__imported_wasi_thread_spawn __letf2 -__lockfile __lttf2 __main_argc_argv __netf2 __stack_pointer __subtf3 -__thread_list_lock +__tls_align __tls_base +__tls_size __trunctfdf2 __trunctfsf2 -__unlockfile __unordtf2 -__wasilibc_pthread_self __wasm_call_ctors +__wasm_init_tls diff --git a/libc-bottom-half/crt/crt1-command.c b/libc-bottom-half/crt/crt1-command.c index 48be79f7..fb9ee71f 100644 --- a/libc-bottom-half/crt/crt1-command.c +++ b/libc-bottom-half/crt/crt1-command.c @@ -1,5 +1,6 @@ #ifdef _REENTRANT #include +extern void __wasi_init_tp(void); #endif #include extern void __wasm_call_ctors(void); @@ -29,6 +30,10 @@ void _start(void) { started = 1; #endif +#ifdef _REENTRANT + __wasi_init_tp(); +#endif + // The linker synthesizes this to call constructors. __wasm_call_ctors(); diff --git a/libc-bottom-half/sources/__wasilibc_real.c b/libc-bottom-half/sources/__wasilibc_real.c index 2648ac9f..855a2c6d 100644 --- a/libc-bottom-half/sources/__wasilibc_real.c +++ b/libc-bottom-half/sources/__wasilibc_real.c @@ -660,13 +660,13 @@ __wasi_errno_t __wasi_sock_shutdown( } #ifdef _REENTRANT -int32_t __imported_wasi_snapshot_preview2_thread_spawn(int32_t arg0) __attribute__(( - __import_module__("wasi_snapshot_preview2"), +int32_t __imported_wasi_thread_spawn(int32_t arg0) __attribute__(( + __import_module__("wasi"), __import_name__("thread_spawn") )); __wasi_errno_t __wasi_thread_spawn(void* start_arg) { - int32_t ret = __imported_wasi_snapshot_preview2_thread_spawn((int32_t) start_arg); + int32_t ret = __imported_wasi_thread_spawn((int32_t) start_arg); return (uint16_t) ret; } #endif diff --git a/libc-top-half/musl/arch/wasm32/pthread_arch.h b/libc-top-half/musl/arch/wasm32/pthread_arch.h index e23eaf8f..58e76ab0 100644 --- a/libc-top-half/musl/arch/wasm32/pthread_arch.h +++ b/libc-top-half/musl/arch/wasm32/pthread_arch.h @@ -1,11 +1,5 @@ -static inline uintptr_t __get_tp(void) { -#if _REENTRANT - int val; - __asm__("global.get __wasilibc_pthread_self\n" - "local.set %0" - : "=r"(val)); - return val; -#else - return 0; -#endif +extern _Thread_local struct __pthread __wasilibc_pthread_self; + +static inline uintptr_t __get_tp() { + return (uintptr_t)&__wasilibc_pthread_self; } diff --git a/libc-top-half/musl/src/env/__init_tls.c b/libc-top-half/musl/src/env/__init_tls.c index a93141ed..ee785bc1 100644 --- a/libc-top-half/musl/src/env/__init_tls.c +++ b/libc-top-half/musl/src/env/__init_tls.c @@ -1,7 +1,11 @@ +#ifdef __wasilibc_unmodified_upstream #define SYSCALL_NO_TLS 1 #include +#endif #include +#ifdef __wasilibc_unmodified_upstream #include +#endif #include #include #include "pthread_impl.h" @@ -11,15 +15,23 @@ volatile int __thread_list_lock; +#ifndef __wasilibc_unmodified_upstream +void __wasi_init_tp() { + __init_tp((void *)__get_tp()); +} +#endif + int __init_tp(void *p) { pthread_t td = p; td->self = td; +#ifdef __wasilibc_unmodified_upstream int r = __set_thread_area(TP_ADJ(p)); if (r < 0) return -1; if (!r) libc.can_do_threads = 1; td->detach_state = DT_JOINABLE; td->tid = __syscall(SYS_set_tid_address, &__thread_list_lock); +#endif td->locale = &libc.global_locale; td->robust_list.head = &td->robust_list.head; td->sysinfo = __sysinfo; @@ -27,6 +39,8 @@ int __init_tp(void *p) return 0; } +#ifdef __wasilibc_unmodified_upstream + static struct builtin_tls { char c; struct pthread pt; @@ -35,9 +49,15 @@ static struct builtin_tls { #define MIN_TLS_ALIGN offsetof(struct builtin_tls, pt) static struct tls_module main_tls; +#endif + +#ifndef __wasilibc_unmodified_upstream +extern void __wasm_init_tls(void*); +#endif void *__copy_tls(unsigned char *mem) { +#ifdef __wasilibc_unmodified_upstream pthread_t td; struct tls_module *p; size_t i; @@ -69,8 +89,20 @@ void *__copy_tls(unsigned char *mem) dtv[0] = libc.tls_cnt; td->dtv = dtv; return td; +#else + size_t tls_align = __builtin_wasm_tls_align(); + volatile void* tls_base = __builtin_wasm_tls_base(); + mem += tls_align; + mem -= (uintptr_t)mem & (tls_align-1); + __wasm_init_tls(mem); + __asm__("local.get %0\n" + "global.set __tls_base\n" + :: "r"(tls_base)); + return mem; +#endif } +#ifdef __wasilibc_unmodified_upstream #if ULONG_MAX == 0xffffffff typedef Elf32_Phdr Phdr; #else @@ -151,3 +183,4 @@ static void static_init_tls(size_t *aux) } weak_alias(static_init_tls, __init_tls); +#endif diff --git a/libc-top-half/musl/src/internal/libc.h b/libc-top-half/musl/src/internal/libc.h index 9b398474..355c3a4e 100644 --- a/libc-top-half/musl/src/internal/libc.h +++ b/libc-top-half/musl/src/internal/libc.h @@ -18,8 +18,10 @@ struct tls_module { }; struct __libc { -#if defined(__wasilibc_unmodified_upstream) || defined(_REENTRANT) +#ifdef __wasilibc_unmodified_upstream char can_do_threads; +#endif +#if defined(__wasilibc_unmodified_upstream) || defined(_REENTRANT) char threaded; #endif #ifdef __wasilibc_unmodified_upstream // WASI doesn't currently use any code that needs "secure" mode @@ -32,7 +34,7 @@ struct __libc { #ifdef __wasilibc_unmodified_upstream // WASI has no auxv size_t *auxv; #endif -#if defined(__wasilibc_unmodified_upstream) || defined(_REENTRANT) +#ifdef __wasilibc_unmodified_upstream // WASI use different TLS implement struct tls_module *tls_head; size_t tls_size, tls_align, tls_cnt; #endif diff --git a/libc-top-half/musl/src/internal/pthread_impl.h b/libc-top-half/musl/src/internal/pthread_impl.h index 22e557d5..a6d188bb 100644 --- a/libc-top-half/musl/src/internal/pthread_impl.h +++ b/libc-top-half/musl/src/internal/pthread_impl.h @@ -25,8 +25,10 @@ struct pthread { /* Part 1 -- these fields may be external or * internal (accessed via asm) ABI. Do not change. */ struct pthread *self; +#ifdef __wasilibc_unmodified_upstream #ifndef TLS_ABOVE_TP uintptr_t *dtv; +#endif #endif struct pthread *prev, *next; /* non-ABI */ uintptr_t sysinfo; diff --git a/libc-top-half/musl/src/thread/pthread_create.c b/libc-top-half/musl/src/thread/pthread_create.c index d0168987..1aa7be71 100644 --- a/libc-top-half/musl/src/thread/pthread_create.c +++ b/libc-top-half/musl/src/thread/pthread_create.c @@ -12,6 +12,8 @@ #include #endif +#include + static void dummy_0() { } @@ -158,6 +160,14 @@ _Noreturn void __pthread_exit(void *result) self->prev->next = self->next; self->prev = self->next = self; +#ifndef __wasilibc_unmodified_upstream + /* On Linux, the thread is created with CLONE_CHILD_CLEARTID, + * and this lock will unlock by kernel when this thread terminates. + * So we should unlock it here in WebAssembly. + * See also set_tid_address(2) */ + __tl_unlock(); +#endif + #ifdef __wasilibc_unmodified_upstream if (state==DT_DETACHED && self->map_base) { /* Detached threads must block even implementation-internal @@ -174,6 +184,15 @@ _Noreturn void __pthread_exit(void *result) * and then exits without touching the stack. */ __unmapself(self->map_base, self->map_size); } +#else + if (state==DT_DETACHED && self->map_base) { + // __syscall(SYS_exit) would unlock the thread, list + // do it manually here + __tl_unlock(); + free(self->map_base); + // Can't use `exit()` here, because it is too high level + for (;;) __wasi_proc_exit(0); + } #endif /* Wake any joiner. */ @@ -189,7 +208,11 @@ _Noreturn void __pthread_exit(void *result) #ifdef __wasilibc_unmodified_upstream for (;;) __syscall(SYS_exit, 0); #else - for (;;) exit(0); + // __syscall(SYS_exit) would unlock the thread, list + // do it manually here + __tl_unlock(); + // Can't use `exit()` here, because it is too high level + for (;;) __wasi_proc_exit(0); #endif } @@ -214,7 +237,7 @@ struct start_args { #else void *(*start_func)(void *); void *start_arg; - struct pthread *thread; + void *tls_base; #endif }; @@ -249,22 +272,27 @@ static int start_c11(void *p) } #else __attribute__((export_name("wasi_thread_start"))) -int wasi_thread_start(int tid, void *p) +_Noreturn void wasi_thread_start(int tid, void *p) { struct start_args *args = p; + __asm__(".globaltype __tls_base, i32\n" + "local.get %0\n" + "global.set __tls_base\n" + :: "r"(args->tls_base)); + pthread_t self = __pthread_self(); // Set the thread ID (TID) on the pthread structure. The TID is stored // atomically since it is also stored by the parent thread; this way, // whichever thread (parent or child) reaches this point first can proceed // without waiting. - atomic_store((atomic_int *) &(args->thread->tid), tid); - // Save the pointer to the pthread structure as the global `pthread_self`. - __asm__("local.set %0\n" - "global.set __wasilibc_pthread_self\n" - : "=r"(args->thread)); + atomic_store((atomic_int *) &(self->tid), tid); + // Set the stack pointer. + __asm__(".globaltype __stack_pointer, i32\n" + "local.get %0\n" + "global.set __stack_pointer\n" + :: "r"(self->stack)); // Execute the user's start function. int (*start)(void*) = (int(*)(void*)) args->start_func; __pthread_exit((void *)(uintptr_t)start(args->start_arg)); - return 0; } #endif @@ -299,8 +327,18 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att #endif pthread_attr_t attr = { 0 }; sigset_t set; +#ifndef __wasilibc_unmodified_upstream + size_t tls_size = __builtin_wasm_tls_size(); + size_t tls_align = __builtin_wasm_tls_align(); + void* tls_base = __builtin_wasm_tls_base(); + void* new_tls_base; + size_t tls_offset; + tls_size += tls_align; +#endif +#ifdef __wasilibc_unmodified_upstream if (!libc.can_do_threads) return ENOSYS; +#endif self = __pthread_self(); if (!libc.threaded) { for (FILE *f=*__ofl_lock(); f; f=f->next) @@ -327,7 +365,11 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att } if (attr._a_stackaddr) { +#ifdef __wasilibc_unmodified_upstream size_t need = libc.tls_size + __pthread_tsd_size; +#else + size_t need = tls_size + __pthread_tsd_size; +#endif size = attr._a_stacksize; stack = (void *)(attr._a_stackaddr & -16); stack_limit = (void *)(attr._a_stackaddr - size); @@ -336,7 +378,11 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att * application's stack space. */ if (need < size/8 && need < 2048) { tsd = stack - __pthread_tsd_size; +#ifdef __wasilibc_unmodified_upstream stack = tsd - libc.tls_size; +#else + stack = tsd - tls_size; +#endif memset(stack, 0, need); } else { size = ROUND(need); @@ -345,7 +391,11 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att } else { guard = ROUND(attr._a_guardsize); size = guard + ROUND(attr._a_stacksize +#ifdef __wasilibc_unmodified_upstream + libc.tls_size + __pthread_tsd_size); +#else + + tls_size + __pthread_tsd_size); +#endif } if (!tsd) { @@ -368,12 +418,22 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att #endif tsd = map + size - __pthread_tsd_size; if (!stack) { +#ifdef __wasilibc_unmodified_upstream stack = tsd - libc.tls_size; +#else + stack = tsd - tls_size; +#endif stack_limit = map + guard; } } +#ifdef __wasilibc_unmodified_upstream new = __copy_tls(tsd - libc.tls_size); +#else + new_tls_base = __copy_tls(tsd - tls_size); + tls_offset = new_tls_base - tls_base; + new = (void*)((uintptr_t)self + tls_offset); +#endif new->map_base = map; new->map_size = size; new->stack = stack; @@ -394,12 +454,12 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att /* Setup argument structure for the new thread on its stack. * It's safe to access from the caller only until the thread * list is unlocked. */ +#ifdef __wasilibc_unmodified_upstream stack -= (uintptr_t)stack % sizeof(uintptr_t); stack -= sizeof(struct start_args); struct start_args *args = (void *)stack; args->start_func = entry; args->start_arg = arg; -#ifdef __wasilibc_unmodified_upstream args->control = attr._a_sched ? 1 : 0; /* Application signals (but not the synccall signal) must be @@ -414,9 +474,19 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att args->sig_mask[(SIGCANCEL-1)/8/sizeof(long)] &= ~(1UL<<((SIGCANCEL-1)%(8*sizeof(long)))); #else - /* The new thread needs a pointer to the pthread struct so that it can set - * up its `wasilibc_pthread_self` global. */ - args->thread = new; + /* Align the stack to struct start_args */ + stack -= sizeof(struct start_args); + stack -= (uintptr_t)stack % alignof(struct start_args); + struct start_args *args = (void *)stack; + + /* Align the stack to 16 and store it */ + new->stack = (void *)((uintptr_t) stack & -16); + /* Correct the stack size */ + new->stack_size = stack - stack_limit; + + args->start_func = entry; + args->start_arg = arg; + args->tls_base = (void*)new_tls_base; #endif __tl_lock(); @@ -457,7 +527,7 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att if (ret < 0) { ret = -EAGAIN; } else { - atomic_store((atomic_int *) &(args->thread->tid), ret); + atomic_store((atomic_int *) &(new->tid), ret); } #endif diff --git a/libc-top-half/musl/src/thread/pthread_self.c b/libc-top-half/musl/src/thread/pthread_self.c index 197c6830..1f3eee1d 100644 --- a/libc-top-half/musl/src/thread/pthread_self.c +++ b/libc-top-half/musl/src/thread/pthread_self.c @@ -3,9 +3,7 @@ #if !defined(__wasilibc_unmodified_upstream) && defined(__wasm__) && \ defined(_REENTRANT) -// We need some place to store the thread ID. This WebAssembly global fits the -// bill and is used by `__get_tp` elsewhere. -__asm__(".globaltype __wasilibc_pthread_self, i32\n"); +_Thread_local struct pthread __wasilibc_pthread_self; #endif static pthread_t __pthread_self_internal()