summaryrefslogtreecommitdiffstats
path: root/lib/isc/trampoline.c
blob: be451a98c1ba6d5c088a319435fff03214ed9565 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
/*
 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
 *
 * SPDX-License-Identifier: MPL-2.0
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
 *
 * See the COPYRIGHT file distributed with this work for additional
 * information regarding copyright ownership.
 */

/*! \file */

#include <inttypes.h>
#include <stdlib.h>
#include <uv.h>

#include <isc/mem.h>
#include <isc/once.h>
#include <isc/thread.h>
#include <isc/util.h>

#include "trampoline_p.h"

#define ISC__TRAMPOLINE_UNUSED 0

struct isc__trampoline {
	int tid; /* const */
	uintptr_t self;
	isc_threadfunc_t start;
	isc_threadarg_t arg;
	void *jemalloc_enforce_init;
};

/*
 * We can't use isc_mem API here, because it's called too
 * early and when the isc_mem_debugging flags are changed
 * later and ISC_MEM_DEBUGSIZE or ISC_MEM_DEBUGCTX flags are
 * added, neither isc_mem_put() nor isc_mem_free() can be used
 * to free up the memory allocated here because the flags were
 * not set when calling isc_mem_get() or isc_mem_allocate()
 * here.
 *
 * Since this is a single allocation at library load and deallocation at library
 * unload, using the standard allocator without the tracking is fine for this
 * single purpose.
 *
 * We can't use isc_mutex API either, because we track whether the mutexes get
 * properly destroyed, and we intentionally leak the static mutex here without
 * destroying it to prevent data race between library destructor running while
 * thread is being still created.
 */

static uv_mutex_t isc__trampoline_lock;
static isc__trampoline_t **trampolines;
#if defined(HAVE_THREAD_LOCAL)
#include <threads.h>
thread_local size_t isc_tid_v = SIZE_MAX;
#elif defined(HAVE___THREAD)
__thread size_t isc_tid_v = SIZE_MAX;
#elif HAVE___DECLSPEC_THREAD
__declspec(thread) size_t isc_tid_v = SIZE_MAX;
#endif /* if defined(HAVE_THREAD_LOCAL) */
static size_t isc__trampoline_min = 1;
static size_t isc__trampoline_max = 65;

static isc_once_t start_once = ISC_ONCE_INIT;
static isc_once_t stop_once = ISC_ONCE_INIT;

static isc__trampoline_t *
isc__trampoline_new(int tid, isc_threadfunc_t start, isc_threadarg_t arg) {
	isc__trampoline_t *trampoline = calloc(1, sizeof(*trampoline));
	RUNTIME_CHECK(trampoline != NULL);

	*trampoline = (isc__trampoline_t){
		.tid = tid,
		.start = start,
		.arg = arg,
		.self = ISC__TRAMPOLINE_UNUSED,
	};

	return (trampoline);
}

static void
do_init(void) {
	uv_mutex_init(&isc__trampoline_lock);

	trampolines = calloc(isc__trampoline_max, sizeof(trampolines[0]));
	RUNTIME_CHECK(trampolines != NULL);

	/* Get the trampoline slot 0 for the main thread */
	trampolines[0] = isc__trampoline_new(0, NULL, NULL);
	isc_tid_v = trampolines[0]->tid;
	trampolines[0]->self = isc_thread_self();

	/* Initialize the other trampolines */
	for (size_t i = 1; i < isc__trampoline_max; i++) {
		trampolines[i] = NULL;
	}
	isc__trampoline_min = 1;
}

void
isc__trampoline_initialize(void) {
	isc_once_do(&start_once, do_init);
}

static void
do_shutdown(void) {
	/*
	 * When the program using the library exits abruptly and the library
	 * gets unloaded, there might be some existing trampolines from unjoined
	 * threads.  We intentionally ignore those and don't check whether all
	 * trampolines have been cleared before exiting, so we leak a little bit
	 * of resources here, including the lock.
	 */
	free(trampolines[0]);
}

void
isc__trampoline_shutdown(void) {
	isc_once_do(&stop_once, do_shutdown);
}

isc__trampoline_t *
isc__trampoline_get(isc_threadfunc_t start, isc_threadarg_t arg) {
	isc__trampoline_t **tmp = NULL;
	isc__trampoline_t *trampoline = NULL;
	uv_mutex_lock(&isc__trampoline_lock);
again:
	for (size_t i = isc__trampoline_min; i < isc__trampoline_max; i++) {
		if (trampolines[i] == NULL) {
			trampoline = isc__trampoline_new(i, start, arg);
			trampolines[i] = trampoline;
			isc__trampoline_min = i + 1;
			goto done;
		}
	}
	tmp = calloc(2 * isc__trampoline_max, sizeof(trampolines[0]));
	RUNTIME_CHECK(tmp != NULL);
	for (size_t i = 0; i < isc__trampoline_max; i++) {
		tmp[i] = trampolines[i];
	}
	for (size_t i = isc__trampoline_max; i < 2 * isc__trampoline_max; i++) {
		tmp[i] = NULL;
	}
	free(trampolines);
	trampolines = tmp;
	isc__trampoline_max = isc__trampoline_max * 2;
	goto again;
done:
	INSIST(trampoline != NULL);
	uv_mutex_unlock(&isc__trampoline_lock);

	return (trampoline);
}

void
isc__trampoline_detach(isc__trampoline_t *trampoline) {
	uv_mutex_lock(&isc__trampoline_lock);
	REQUIRE(trampoline->self == isc_thread_self());
	REQUIRE(trampoline->tid > 0);
	REQUIRE((size_t)trampoline->tid < isc__trampoline_max);
	REQUIRE(trampolines[trampoline->tid] == trampoline);

	trampolines[trampoline->tid] = NULL;

	if (isc__trampoline_min > (size_t)trampoline->tid) {
		isc__trampoline_min = trampoline->tid;
	}

	free(trampoline->jemalloc_enforce_init);
	free(trampoline);

	uv_mutex_unlock(&isc__trampoline_lock);
	return;
}

void
isc__trampoline_attach(isc__trampoline_t *trampoline) {
	uv_mutex_lock(&isc__trampoline_lock);
	REQUIRE(trampoline->self == ISC__TRAMPOLINE_UNUSED);
	REQUIRE(trampoline->tid > 0);
	REQUIRE((size_t)trampoline->tid < isc__trampoline_max);
	REQUIRE(trampolines[trampoline->tid] == trampoline);

	/* Initialize the trampoline */
	isc_tid_v = trampoline->tid;
	trampoline->self = isc_thread_self();

	/*
	 * Ensure every thread starts with a malloc() call to prevent memory
	 * bloat caused by a jemalloc quirk.  While this dummy allocation is
	 * not used for anything, free() must not be immediately called for it
	 * so that an optimizing compiler does not strip away such a pair of
	 * malloc() + free() calls altogether, as it would foil the fix.
	 */
	trampoline->jemalloc_enforce_init = malloc(8);
	uv_mutex_unlock(&isc__trampoline_lock);
}

isc_threadresult_t
isc__trampoline_run(isc_threadarg_t arg) {
	isc__trampoline_t *trampoline = (isc__trampoline_t *)arg;
	isc_threadresult_t result;

	isc__trampoline_attach(trampoline);

	/* Run the main function */
	result = (trampoline->start)(trampoline->arg);

	isc__trampoline_detach(trampoline);

	return (result);
}