summaryrefslogtreecommitdiffstats
path: root/libc-top-half/musl/src/thread/pthread_create.c
blob: 676e2ccf95d2a2b54398da61f83c1716df20de85 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
#define _GNU_SOURCE
#include "pthread_impl.h"
#include "stdio_impl.h"
#include "libc.h"
#include "lock.h"
#ifdef __wasilibc_unmodified_upstream
#include <sys/mman.h>
#endif
#include <string.h>
#include <stddef.h>
#ifndef __wasilibc_unmodified_upstream
#include <stdatomic.h>
#endif

#include <stdalign.h>

static void dummy_0()
{
}
weak_alias(dummy_0, __acquire_ptc);
weak_alias(dummy_0, __release_ptc);
weak_alias(dummy_0, __pthread_tsd_run_dtors);
weak_alias(dummy_0, __do_orphaned_stdio_locks);
#ifdef __wasilibc_unmodified_upstream
weak_alias(dummy_0, __dl_thread_cleanup);
weak_alias(dummy_0, __membarrier_init);
#endif

static int tl_lock_count;
static int tl_lock_waiters;

void __tl_lock(void)
{
	int tid = __pthread_self()->tid;
	int val = __thread_list_lock;
	if (val == tid) {
		tl_lock_count++;
		return;
	}
	while ((val = a_cas(&__thread_list_lock, 0, tid)))
		__wait(&__thread_list_lock, &tl_lock_waiters, val, 0);
}

void __tl_unlock(void)
{
	if (tl_lock_count) {
		tl_lock_count--;
		return;
	}
	a_store(&__thread_list_lock, 0);
	if (tl_lock_waiters) __wake(&__thread_list_lock, 1, 0);
}

void __tl_sync(pthread_t td)
{
	a_barrier();
	int val = __thread_list_lock;
	if (!val) return;
	__wait(&__thread_list_lock, &tl_lock_waiters, val, 0);
	if (tl_lock_waiters) __wake(&__thread_list_lock, 1, 0);
}

#ifdef __wasilibc_unmodified_upstream
_Noreturn void __pthread_exit(void *result)
#else
static void __pthread_exit(void *result)
#endif
{
	pthread_t self = __pthread_self();
	sigset_t set;

	self->canceldisable = 1;
	self->cancelasync = 0;
	self->result = result;

	while (self->cancelbuf) {
		void (*f)(void *) = self->cancelbuf->__f;
		void *x = self->cancelbuf->__x;
		self->cancelbuf = self->cancelbuf->__next;
		f(x);
	}

	__pthread_tsd_run_dtors();

#ifdef __wasilibc_unmodified_upstream
	__block_app_sigs(&set);
#endif

	/* This atomic potentially competes with a concurrent pthread_detach
	 * call; the loser is responsible for freeing thread resources. */
	int state = a_cas(&self->detach_state, DT_JOINABLE, DT_EXITING);

	if (state==DT_DETACHED && self->map_base) {
		/* Since __unmapself bypasses the normal munmap code path,
		 * explicitly wait for vmlock holders first. This must be
		 * done before any locks are taken, to avoid lock ordering
		 * issues that could lead to deadlock. */
#ifdef __wasilibc_unmodified_upstream
		__vm_wait();
#endif
	}

	/* Access to target the exiting thread with syscalls that use
	 * its kernel tid is controlled by killlock. For detached threads,
	 * any use past this point would have undefined behavior, but for
	 * joinable threads it's a valid usage that must be handled.
	 * Signals must be blocked since pthread_kill must be AS-safe. */
	LOCK(self->killlock);

	/* The thread list lock must be AS-safe, and thus depends on
	 * application signals being blocked above. */
	__tl_lock();

	/* If this is the only thread in the list, don't proceed with
	 * termination of the thread, but restore the previous lock and
	 * signal state to prepare for exit to call atexit handlers. */
	if (self->next == self) {
		__tl_unlock();
		UNLOCK(self->killlock);
		self->detach_state = state;
#ifdef __wasilibc_unmodified_upstream
		__restore_sigs(&set);
#endif
		exit(0);
	}

	/* At this point we are committed to thread termination. */

#ifdef __wasilibc_unmodified_upstream
	/* Process robust list in userspace to handle non-pshared mutexes
	 * and the detached thread case where the robust list head will
	 * be invalid when the kernel would process it. */
	__vm_lock();
#endif
	volatile void *volatile *rp;
	while ((rp=self->robust_list.head) && rp != &self->robust_list.head) {
		pthread_mutex_t *m = (void *)((char *)rp
			- offsetof(pthread_mutex_t, _m_next));
		int waiters = m->_m_waiters;
		int priv = (m->_m_type & 128) ^ 128;
		self->robust_list.pending = rp;
		self->robust_list.head = *rp;
		int cont = a_swap(&m->_m_lock, 0x40000000);
		self->robust_list.pending = 0;
		if (cont < 0 || waiters)
			__wake(&m->_m_lock, 1, priv);
	}
#ifdef __wasilibc_unmodified_upstream
	__vm_unlock();
#endif

	__do_orphaned_stdio_locks();
#ifdef __wasilibc_unmodified_upstream
	__dl_thread_cleanup();
#endif

	/* Last, unlink thread from the list. This change will not be visible
	 * until the lock is released, which only happens after SYS_exit
	 * has been called, via the exit futex address pointing at the lock.
	 * This needs to happen after any possible calls to LOCK() that might
	 * skip locking if process appears single-threaded. */
	if (!--libc.threads_minus_1) libc.need_locks = -1;
	self->next->prev = self->prev;
	self->prev->next = self->next;
	self->prev = self->next = self;

#ifndef __wasilibc_unmodified_upstream
	/* On Linux, the thread is created with CLONE_CHILD_CLEARTID,
	 * and this lock will unlock by kernel when this thread terminates.
	 * So we should unlock it here in WebAssembly.
	 * See also set_tid_address(2) */
	__tl_unlock();
#endif

#ifdef __wasilibc_unmodified_upstream
	if (state==DT_DETACHED && self->map_base) {
		/* Detached threads must block even implementation-internal
		 * signals, since they will not have a stack in their last
		 * moments of existence. */
		__block_all_sigs(&set);

		/* Robust list will no longer be valid, and was already
		 * processed above, so unregister it with the kernel. */
		if (self->robust_list.off)
			__syscall(SYS_set_robust_list, 0, 3*sizeof(long));

		/* The following call unmaps the thread's stack mapping
		 * and then exits without touching the stack. */
		__unmapself(self->map_base, self->map_size);
	}
#else
	if (state==DT_DETACHED && self->map_base) {
		// __syscall(SYS_exit) would unlock the thread, list
		// do it manually here
		__tl_unlock();
		free(self->map_base);
		// Can't use `exit()` here, because it is too high level
		return;
	}
#endif

	/* Wake any joiner. */
	a_store(&self->detach_state, DT_EXITED);
	__wake(&self->detach_state, 1, 1);

	/* After the kernel thread exits, its tid may be reused. Clear it
	 * to prevent inadvertent use and inform functions that would use
	 * it that it's no longer available. */
	self->tid = 0;
	UNLOCK(self->killlock);

#ifdef __wasilibc_unmodified_upstream
	for (;;) __syscall(SYS_exit, 0);
#else
	// __syscall(SYS_exit) would unlock the thread, list
	// do it manually here
	__tl_unlock();
	// Can't use `exit()` here, because it is too high level
#endif
}

void __do_cleanup_push(struct __ptcb *cb)
{
	struct pthread *self = __pthread_self();
	cb->__next = self->cancelbuf;
	self->cancelbuf = cb;
}

void __do_cleanup_pop(struct __ptcb *cb)
{
	__pthread_self()->cancelbuf = cb->__next;
}

struct start_args {
#ifdef __wasilibc_unmodified_upstream
	void *(*start_func)(void *);
	void *start_arg;
	volatile int control;
	unsigned long sig_mask[_NSIG/8/sizeof(long)];
#else
	/*
	 * Note: the offset of the "stack" and "tls_base" members
	 * in this structure is hardcoded in wasi_thread_start.
	 */
	void *stack;
	void *tls_base;
	void *(*start_func)(void *);
	void *start_arg;
#endif
};

#ifdef __wasilibc_unmodified_upstream
static int start(void *p)
{
	struct start_args *args = p;
	int state = args->control;
	if (state) {
		if (a_cas(&args->control, 1, 2)==1)
			__wait(&args->control, 0, 2, 1);
		if (args->control) {
#ifdef __wasilibc_unmodified_upstream
			__syscall(SYS_set_tid_address, &args->control);
			for (;;) __syscall(SYS_exit, 0);
#endif
		}
	}
#ifdef __wasilibc_unmodified_upstream
	__syscall(SYS_rt_sigprocmask, SIG_SETMASK, &args->sig_mask, 0, _NSIG/8);
#endif
	__pthread_exit(args->start_func(args->start_arg));
	return 0;
}

static int start_c11(void *p)
{
	struct start_args *args = p;
	int (*start)(void*) = (int(*)(void*)) args->start_func;
	__pthread_exit((void *)(uintptr_t)start(args->start_arg));
	return 0;
}
#else

/*
 * We want to ensure wasi_thread_start is linked whenever
 * pthread_create is used. The following reference is to ensure that.
 * Otherwise, the linker doesn't notice the dependency because
 * wasi_thread_start is used indirectly via a wasm export.
 */
void wasi_thread_start(int tid, void *p);
hidden void *__dummy_reference = wasi_thread_start;

hidden void __wasi_thread_start_C(int tid, void *p)
{
	struct start_args *args = p;
	pthread_t self = __pthread_self();
	// Set the thread ID (TID) on the pthread structure. The TID is stored
	// atomically since it is also stored by the parent thread; this way,
	// whichever thread (parent or child) reaches this point first can proceed
	// without waiting.
	atomic_store((atomic_int *) &(self->tid), tid);
	// Execute the user's start function.
	__pthread_exit(args->start_func(args->start_arg));
}
#endif

#ifdef __wasilibc_unmodified_upstream
#define ROUND(x) (((x)+PAGE_SIZE-1)&-PAGE_SIZE)
#else
/*
 * As we allocate stack with malloc() instead of mmap/mprotect,
 * there is no point to round it up to PAGE_SIZE.
 * Instead, round up to a sane alignment.
 * Note: PAGE_SIZE is rather big on WASM. (65536)
 */
#define ROUND(x) (((x)+16-1)&-16)
#endif

/* pthread_key_create.c overrides this */
static volatile size_t dummy = 0;
weak_alias(dummy, __pthread_tsd_size);
static void *dummy_tsd[1] = { 0 };
weak_alias(dummy_tsd, __pthread_tsd_main);

static FILE *volatile dummy_file = 0;
weak_alias(dummy_file, __stdin_used);
weak_alias(dummy_file, __stdout_used);
weak_alias(dummy_file, __stderr_used);

static void init_file_lock(FILE *f)
{
	if (f && f->lock<0) f->lock = 0;
}

int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attrp, void *(*entry)(void *), void *restrict arg)
{
	int ret, c11 = (attrp == __ATTRP_C11_THREAD);
	size_t size, guard;
	struct pthread *self, *new;
	unsigned char *map = 0, *stack = 0, *tsd = 0, *stack_limit;
#ifdef __wasilibc_unmodified_upstream
	unsigned flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND
		| CLONE_THREAD | CLONE_SYSVSEM | CLONE_SETTLS
		| CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | CLONE_DETACHED;
#endif
	pthread_attr_t attr = { 0 };
	sigset_t set;
#ifndef __wasilibc_unmodified_upstream
	size_t tls_size = __builtin_wasm_tls_size();
	size_t tls_align = __builtin_wasm_tls_align();
	void* tls_base = __builtin_wasm_tls_base();
	void* new_tls_base;
	size_t tls_offset;
	tls_size += tls_align;
#endif

#ifdef __wasilibc_unmodified_upstream
	if (!libc.can_do_threads) return ENOSYS;
#endif
	self = __pthread_self();
	if (!libc.threaded) {
		for (FILE *f=*__ofl_lock(); f; f=f->next)
			init_file_lock(f);
		__ofl_unlock();
		init_file_lock(__stdin_used);
		init_file_lock(__stdout_used);
		init_file_lock(__stderr_used);
#ifdef __wasilibc_unmodified_upstream
		__syscall(SYS_rt_sigprocmask, SIG_UNBLOCK, SIGPT_SET, 0, _NSIG/8);
#endif
		self->tsd = (void **)__pthread_tsd_main;
#ifdef __wasilibc_unmodified_upstream
		__membarrier_init();
#endif
		libc.threaded = 1;
	}
	if (attrp && !c11) attr = *attrp;

	__acquire_ptc();
	if (!attrp || c11) {
		attr._a_stacksize = __default_stacksize;
		attr._a_guardsize = __default_guardsize;
	}

	if (attr._a_stackaddr) {
#ifdef __wasilibc_unmodified_upstream
		size_t need = libc.tls_size + __pthread_tsd_size;
#else
		size_t need = tls_size + __pthread_tsd_size;
#endif
		size = attr._a_stacksize;
		stack = (void *)(attr._a_stackaddr & -16);
		stack_limit = (void *)(attr._a_stackaddr - size);
		/* Use application-provided stack for TLS only when
		 * it does not take more than ~12% or 2k of the
		 * application's stack space. */
		if (need < size/8 && need < 2048) {
			tsd = stack - __pthread_tsd_size;
#ifdef __wasilibc_unmodified_upstream
			stack = tsd - libc.tls_size;
#else
			stack = tsd - tls_size;
#endif
			memset(stack, 0, need);
		} else {
			size = ROUND(need);
		}
		guard = 0;
	} else {
		guard = ROUND(attr._a_guardsize);
		size = guard + ROUND(attr._a_stacksize
#ifdef __wasilibc_unmodified_upstream
			+ libc.tls_size +  __pthread_tsd_size);
#else
			+ tls_size +  __pthread_tsd_size);
#endif
	}

	if (!tsd) {
#ifdef __wasilibc_unmodified_upstream
		if (guard) {
			map = __mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANON, -1, 0);
			if (map == MAP_FAILED) goto fail;
			if (__mprotect(map+guard, size-guard, PROT_READ|PROT_WRITE)
			    && errno != ENOSYS) {
				__munmap(map, size);
				goto fail;
			}
		} else {
			map = __mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
			if (map == MAP_FAILED) goto fail;
		}
#else
		map = malloc(size);
		if (!map) goto fail;
#endif
		tsd = map + size - __pthread_tsd_size;
		if (!stack) {
#ifdef __wasilibc_unmodified_upstream
			stack = tsd - libc.tls_size;
#else
			stack = tsd - tls_size;
#endif
			stack_limit = map + guard;
		}
	}

#ifdef __wasilibc_unmodified_upstream
	new = __copy_tls(tsd - libc.tls_size);
#else
	new_tls_base = __copy_tls(tsd - tls_size);
	tls_offset = new_tls_base - tls_base;
	new = (void*)((uintptr_t)self + tls_offset);
#endif
	new->map_base = map;
	new->map_size = size;
	new->stack = stack;
	new->stack_size = stack - stack_limit;
	new->guard_size = guard;
	new->self = new;
	new->tsd = (void *)tsd;
	new->locale = &libc.global_locale;
	if (attr._a_detach) {
		new->detach_state = DT_DETACHED;
	} else {
		new->detach_state = DT_JOINABLE;
	}
	new->robust_list.head = &new->robust_list.head;
	new->canary = self->canary;
	new->sysinfo = self->sysinfo;

	/* Setup argument structure for the new thread on its stack.
	 * It's safe to access from the caller only until the thread
	 * list is unlocked. */
#ifdef __wasilibc_unmodified_upstream
	stack -= (uintptr_t)stack % sizeof(uintptr_t);
	stack -= sizeof(struct start_args);
	struct start_args *args = (void *)stack;
	args->start_func = entry;
	args->start_arg = arg;
	args->control = attr._a_sched ? 1 : 0;

	/* Application signals (but not the synccall signal) must be
	 * blocked before the thread list lock can be taken, to ensure
	 * that the lock is AS-safe. */
	__block_app_sigs(&set);

	/* Ensure SIGCANCEL is unblocked in new thread. This requires
	 * working with a copy of the set so we can restore the
	 * original mask in the calling thread. */
	memcpy(&args->sig_mask, &set, sizeof args->sig_mask);
	args->sig_mask[(SIGCANCEL-1)/8/sizeof(long)] &=
		~(1UL<<((SIGCANCEL-1)%(8*sizeof(long))));
#else
	/* Align the stack to struct start_args */
	stack -= sizeof(struct start_args);
	stack -= (uintptr_t)stack % alignof(struct start_args);
	struct start_args *args = (void *)stack;

	/* Align the stack to 16 and store it */
	new->stack = (void *)((uintptr_t) stack & -16);
	/* Correct the stack size */
	new->stack_size = stack - stack_limit;

	args->stack = new->stack; /* just for convenience of asm trampoline */
	args->start_func = entry;
	args->start_arg = arg;
	args->tls_base = (void*)new_tls_base;
#endif

	__tl_lock();
	if (!libc.threads_minus_1++) libc.need_locks = 1;
#ifdef __wasilibc_unmodified_upstream
	ret = __clone((c11 ? start_c11 : start), stack, flags, args, &new->tid, TP_ADJ(new), &__thread_list_lock);
#else
	/* Instead of `__clone`, WASI uses a host API to instantiate a new version
	 * of the current module and start executing the entry function. The
	 * wasi-threads specification requires the module to export a
	 * `wasi_thread_start` function, which is invoked with `args`. */
	ret = __wasi_thread_spawn((void *) args);
#endif

#ifdef __wasilibc_unmodified_upstream
	/* All clone failures translate to EAGAIN. If explicit scheduling
	 * was requested, attempt it before unlocking the thread list so
	 * that the failed thread is never exposed and so that we can
	 * clean up all transient resource usage before returning. */
	if (ret < 0) {
		ret = -EAGAIN;
	} else if (attr._a_sched) {
		ret = __syscall(SYS_sched_setscheduler,
			new->tid, attr._a_policy, &attr._a_prio);
		if (a_swap(&args->control, ret ? 3 : 0)==2)
			__wake(&args->control, 1, 1);
		if (ret)
			__wait(&args->control, 0, 3, 0);
	}
#else
	/* `wasi_thread_spawn` will either return a host-provided thread ID (TID)
	 * (`>= 0`) or an error code (`< 0`). As in the unmodified version, all
	 * spawn failures translate to EAGAIN; unlike the modified version, there is
	 * no need to "start up" the child thread--the host does this. If the spawn
	 * did succeed, then we store the TID atomically, since this parent thread
	 * is racing with the child thread to set this field; this way, whichever
	 * thread reaches this point first can continue without waiting. */
	if (ret < 0) {
		ret = -EAGAIN;
	} else {
		atomic_store((atomic_int *) &(new->tid), ret);
	}
#endif

	if (ret >= 0) {
		new->next = self->next;
		new->prev = self;
		new->next->prev = new;
		new->prev->next = new;
	} else {
		if (!--libc.threads_minus_1) libc.need_locks = 0;
	}
	__tl_unlock();
#ifdef __wasilibc_unmodified_upstream
	__restore_sigs(&set);
#endif
	__release_ptc();

	if (ret < 0) {
#ifdef __wasilibc_unmodified_upstream
		if (map) __munmap(map, size);
#else
		free(map);
#endif
		return -ret;
	}

	*res = new;
	return 0;
fail:
	__release_ptc();
	return EAGAIN;
}

#ifdef __wasilibc_unmodified_upstream
weak_alias(__pthread_exit, pthread_exit);
#endif
weak_alias(__pthread_create, pthread_create);