diff options
Diffstat (limited to 'lib/isc/trampoline.c')
-rw-r--r-- | lib/isc/trampoline.c | 218 |
1 files changed, 218 insertions, 0 deletions
diff --git a/lib/isc/trampoline.c b/lib/isc/trampoline.c new file mode 100644 index 0000000..be451a9 --- /dev/null +++ b/lib/isc/trampoline.c @@ -0,0 +1,218 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * SPDX-License-Identifier: MPL-2.0 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +/*! \file */ + +#include <inttypes.h> +#include <stdlib.h> +#include <uv.h> + +#include <isc/mem.h> +#include <isc/once.h> +#include <isc/thread.h> +#include <isc/util.h> + +#include "trampoline_p.h" + +#define ISC__TRAMPOLINE_UNUSED 0 + +struct isc__trampoline { + int tid; /* const */ + uintptr_t self; + isc_threadfunc_t start; + isc_threadarg_t arg; + void *jemalloc_enforce_init; +}; + +/* + * We can't use isc_mem API here, because it's called too + * early and when the isc_mem_debugging flags are changed + * later and ISC_MEM_DEBUGSIZE or ISC_MEM_DEBUGCTX flags are + * added, neither isc_mem_put() nor isc_mem_free() can be used + * to free up the memory allocated here because the flags were + * not set when calling isc_mem_get() or isc_mem_allocate() + * here. + * + * Since this is a single allocation at library load and deallocation at library + * unload, using the standard allocator without the tracking is fine for this + * single purpose. + * + * We can't use isc_mutex API either, because we track whether the mutexes get + * properly destroyed, and we intentionally leak the static mutex here without + * destroying it to prevent data race between library destructor running while + * thread is being still created. + */ + +static uv_mutex_t isc__trampoline_lock; +static isc__trampoline_t **trampolines; +#if defined(HAVE_THREAD_LOCAL) +#include <threads.h> +thread_local size_t isc_tid_v = SIZE_MAX; +#elif defined(HAVE___THREAD) +__thread size_t isc_tid_v = SIZE_MAX; +#elif HAVE___DECLSPEC_THREAD +__declspec(thread) size_t isc_tid_v = SIZE_MAX; +#endif /* if defined(HAVE_THREAD_LOCAL) */ +static size_t isc__trampoline_min = 1; +static size_t isc__trampoline_max = 65; + +static isc_once_t start_once = ISC_ONCE_INIT; +static isc_once_t stop_once = ISC_ONCE_INIT; + +static isc__trampoline_t * +isc__trampoline_new(int tid, isc_threadfunc_t start, isc_threadarg_t arg) { + isc__trampoline_t *trampoline = calloc(1, sizeof(*trampoline)); + RUNTIME_CHECK(trampoline != NULL); + + *trampoline = (isc__trampoline_t){ + .tid = tid, + .start = start, + .arg = arg, + .self = ISC__TRAMPOLINE_UNUSED, + }; + + return (trampoline); +} + +static void +do_init(void) { + uv_mutex_init(&isc__trampoline_lock); + + trampolines = calloc(isc__trampoline_max, sizeof(trampolines[0])); + RUNTIME_CHECK(trampolines != NULL); + + /* Get the trampoline slot 0 for the main thread */ + trampolines[0] = isc__trampoline_new(0, NULL, NULL); + isc_tid_v = trampolines[0]->tid; + trampolines[0]->self = isc_thread_self(); + + /* Initialize the other trampolines */ + for (size_t i = 1; i < isc__trampoline_max; i++) { + trampolines[i] = NULL; + } + isc__trampoline_min = 1; +} + +void +isc__trampoline_initialize(void) { + isc_once_do(&start_once, do_init); +} + +static void +do_shutdown(void) { + /* + * When the program using the library exits abruptly and the library + * gets unloaded, there might be some existing trampolines from unjoined + * threads. We intentionally ignore those and don't check whether all + * trampolines have been cleared before exiting, so we leak a little bit + * of resources here, including the lock. + */ + free(trampolines[0]); +} + +void +isc__trampoline_shutdown(void) { + isc_once_do(&stop_once, do_shutdown); +} + +isc__trampoline_t * +isc__trampoline_get(isc_threadfunc_t start, isc_threadarg_t arg) { + isc__trampoline_t **tmp = NULL; + isc__trampoline_t *trampoline = NULL; + uv_mutex_lock(&isc__trampoline_lock); +again: + for (size_t i = isc__trampoline_min; i < isc__trampoline_max; i++) { + if (trampolines[i] == NULL) { + trampoline = isc__trampoline_new(i, start, arg); + trampolines[i] = trampoline; + isc__trampoline_min = i + 1; + goto done; + } + } + tmp = calloc(2 * isc__trampoline_max, sizeof(trampolines[0])); + RUNTIME_CHECK(tmp != NULL); + for (size_t i = 0; i < isc__trampoline_max; i++) { + tmp[i] = trampolines[i]; + } + for (size_t i = isc__trampoline_max; i < 2 * isc__trampoline_max; i++) { + tmp[i] = NULL; + } + free(trampolines); + trampolines = tmp; + isc__trampoline_max = isc__trampoline_max * 2; + goto again; +done: + INSIST(trampoline != NULL); + uv_mutex_unlock(&isc__trampoline_lock); + + return (trampoline); +} + +void +isc__trampoline_detach(isc__trampoline_t *trampoline) { + uv_mutex_lock(&isc__trampoline_lock); + REQUIRE(trampoline->self == isc_thread_self()); + REQUIRE(trampoline->tid > 0); + REQUIRE((size_t)trampoline->tid < isc__trampoline_max); + REQUIRE(trampolines[trampoline->tid] == trampoline); + + trampolines[trampoline->tid] = NULL; + + if (isc__trampoline_min > (size_t)trampoline->tid) { + isc__trampoline_min = trampoline->tid; + } + + free(trampoline->jemalloc_enforce_init); + free(trampoline); + + uv_mutex_unlock(&isc__trampoline_lock); + return; +} + +void +isc__trampoline_attach(isc__trampoline_t *trampoline) { + uv_mutex_lock(&isc__trampoline_lock); + REQUIRE(trampoline->self == ISC__TRAMPOLINE_UNUSED); + REQUIRE(trampoline->tid > 0); + REQUIRE((size_t)trampoline->tid < isc__trampoline_max); + REQUIRE(trampolines[trampoline->tid] == trampoline); + + /* Initialize the trampoline */ + isc_tid_v = trampoline->tid; + trampoline->self = isc_thread_self(); + + /* + * Ensure every thread starts with a malloc() call to prevent memory + * bloat caused by a jemalloc quirk. While this dummy allocation is + * not used for anything, free() must not be immediately called for it + * so that an optimizing compiler does not strip away such a pair of + * malloc() + free() calls altogether, as it would foil the fix. + */ + trampoline->jemalloc_enforce_init = malloc(8); + uv_mutex_unlock(&isc__trampoline_lock); +} + +isc_threadresult_t +isc__trampoline_run(isc_threadarg_t arg) { + isc__trampoline_t *trampoline = (isc__trampoline_t *)arg; + isc_threadresult_t result; + + isc__trampoline_attach(trampoline); + + /* Run the main function */ + result = (trampoline->start)(trampoline->arg); + + isc__trampoline_detach(trampoline); + + return (result); +} |