#include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/san.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/rtree.h" /******************************************************************************/ /* Data. */ /* TSD_INITIALIZER triggers "-Wmissing-field-initializer" */ JEMALLOC_DIAGNOSTIC_PUSH JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER; JEMALLOC_TSD_TYPE_ATTR(bool) JEMALLOC_TLS_MODEL tsd_initialized = false; bool tsd_booted = false; #elif (defined(JEMALLOC_TLS)) JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER; pthread_key_t tsd_tsd; bool tsd_booted = false; #elif (defined(_WIN32)) DWORD tsd_tsd; tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER}; bool tsd_booted = false; #else /* * This contains a mutex, but it's pretty convenient to allow the mutex code to * have a dependency on tsd. So we define the struct here, and only refer to it * by pointer in the header. */ struct tsd_init_head_s { ql_head(tsd_init_block_t) blocks; malloc_mutex_t lock; }; pthread_key_t tsd_tsd; tsd_init_head_t tsd_init_head = { ql_head_initializer(blocks), MALLOC_MUTEX_INITIALIZER }; tsd_wrapper_t tsd_boot_wrapper = { false, TSD_INITIALIZER }; bool tsd_booted = false; #endif JEMALLOC_DIAGNOSTIC_POP /******************************************************************************/ /* A list of all the tsds in the nominal state. */ typedef ql_head(tsd_t) tsd_list_t; static tsd_list_t tsd_nominal_tsds = ql_head_initializer(tsd_nominal_tsds); static malloc_mutex_t tsd_nominal_tsds_lock; /* How many slow-path-enabling features are turned on. */ static atomic_u32_t tsd_global_slow_count = ATOMIC_INIT(0); static bool tsd_in_nominal_list(tsd_t *tsd) { tsd_t *tsd_list; bool found = false; /* * We don't know that tsd is nominal; it might not be safe to get data * out of it here. */ malloc_mutex_lock(TSDN_NULL, &tsd_nominal_tsds_lock); ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) { if (tsd == tsd_list) { found = true; break; } } malloc_mutex_unlock(TSDN_NULL, &tsd_nominal_tsds_lock); return found; } static void tsd_add_nominal(tsd_t *tsd) { assert(!tsd_in_nominal_list(tsd)); assert(tsd_state_get(tsd) <= tsd_state_nominal_max); ql_elm_new(tsd, TSD_MANGLE(tsd_link)); malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); ql_tail_insert(&tsd_nominal_tsds, tsd, TSD_MANGLE(tsd_link)); malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); } static void tsd_remove_nominal(tsd_t *tsd) { assert(tsd_in_nominal_list(tsd)); assert(tsd_state_get(tsd) <= tsd_state_nominal_max); malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); ql_remove(&tsd_nominal_tsds, tsd, TSD_MANGLE(tsd_link)); malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); } static void tsd_force_recompute(tsdn_t *tsdn) { /* * The stores to tsd->state here need to synchronize with the exchange * in tsd_slow_update. */ atomic_fence(ATOMIC_RELEASE); malloc_mutex_lock(tsdn, &tsd_nominal_tsds_lock); tsd_t *remote_tsd; ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) { assert(tsd_atomic_load(&remote_tsd->state, ATOMIC_RELAXED) <= tsd_state_nominal_max); tsd_atomic_store(&remote_tsd->state, tsd_state_nominal_recompute, ATOMIC_RELAXED); /* See comments in te_recompute_fast_threshold(). */ atomic_fence(ATOMIC_SEQ_CST); te_next_event_fast_set_non_nominal(remote_tsd); } malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock); } void tsd_global_slow_inc(tsdn_t *tsdn) { atomic_fetch_add_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED); /* * We unconditionally force a recompute, even if the global slow count * was already positive. If we didn't, then it would be possible for us * to return to the user, have the user synchronize externally with some * other thread, and then have that other thread not have picked up the * update yet (since the original incrementing thread might still be * making its way through the tsd list). */ tsd_force_recompute(tsdn); } void tsd_global_slow_dec(tsdn_t *tsdn) { atomic_fetch_sub_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED); /* See the note in ..._inc(). */ tsd_force_recompute(tsdn); } static bool tsd_local_slow(tsd_t *tsd) { return !tsd_tcache_enabled_get(tsd) || tsd_reentrancy_level_get(tsd) > 0; } bool tsd_global_slow() { return atomic_load_u32(&tsd_global_slow_count, ATOMIC_RELAXED) > 0; } /******************************************************************************/ static uint8_t tsd_state_compute(tsd_t *tsd) { if (!tsd_nominal(tsd)) { return tsd_state_get(tsd); } /* We're in *a* nominal state; but which one? */ if (malloc_slow || tsd_local_slow(tsd) || tsd_global_slow()) { return tsd_state_nominal_slow; } else { return tsd_state_nominal; } } void tsd_slow_update(tsd_t *tsd) { uint8_t old_state; do { uint8_t new_state = tsd_state_compute(tsd); old_state = tsd_atomic_exchange(&tsd->state, new_state, ATOMIC_ACQUIRE); } while (old_state == tsd_state_nominal_recompute); te_recompute_fast_threshold(tsd); } void tsd_state_set(tsd_t *tsd, uint8_t new_state) { /* Only the tsd module can change the state *to* recompute. */ assert(new_state != tsd_state_nominal_recompute); uint8_t old_state = tsd_atomic_load(&tsd->state, ATOMIC_RELAXED); if (old_state > tsd_state_nominal_max) { /* * Not currently in the nominal list, but it might need to be * inserted there. */ assert(!tsd_in_nominal_list(tsd)); tsd_atomic_store(&tsd->state, new_state, ATOMIC_RELAXED); if (new_state <= tsd_state_nominal_max) { tsd_add_nominal(tsd); } } else { /* * We're currently nominal. If the new state is non-nominal, * great; we take ourselves off the list and just enter the new * state. */ assert(tsd_in_nominal_list(tsd)); if (new_state > tsd_state_nominal_max) { tsd_remove_nominal(tsd); tsd_atomic_store(&tsd->state, new_state, ATOMIC_RELAXED); } else { /* * This is the tricky case. We're transitioning from * one nominal state to another. The caller can't know * about any races that are occurring at the same time, * so we always have to recompute no matter what. */ tsd_slow_update(tsd); } } te_recompute_fast_threshold(tsd); } static void tsd_prng_state_init(tsd_t *tsd) { /* * A nondeterministic seed based on the address of tsd reduces * the likelihood of lockstep non-uniform cache index * utilization among identical concurrent processes, but at the * cost of test repeatability. For debug builds, instead use a * deterministic seed. */ *tsd_prng_statep_get(tsd) = config_debug ? 0 : (uint64_t)(uintptr_t)tsd; } static bool tsd_data_init(tsd_t *tsd) { /* * We initialize the rtree context first (before the tcache), since the * tcache initialization depends on it. */ rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd)); tsd_prng_state_init(tsd); tsd_te_init(tsd); /* event_init may use the prng state above. */ tsd_san_init(tsd); return tsd_tcache_enabled_data_init(tsd); } static void assert_tsd_data_cleanup_done(tsd_t *tsd) { assert(!tsd_nominal(tsd)); assert(!tsd_in_nominal_list(tsd)); assert(*tsd_arenap_get_unsafe(tsd) == NULL); assert(*tsd_iarenap_get_unsafe(tsd) == NULL); assert(*tsd_tcache_enabledp_get_unsafe(tsd) == false); assert(*tsd_prof_tdatap_get_unsafe(tsd) == NULL); } static bool tsd_data_init_nocleanup(tsd_t *tsd) { assert(tsd_state_get(tsd) == tsd_state_reincarnated || tsd_state_get(tsd) == tsd_state_minimal_initialized); /* * During reincarnation, there is no guarantee that the cleanup function * will be called (deallocation may happen after all tsd destructors). * We set up tsd in a way that no cleanup is needed. */ rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd)); *tsd_tcache_enabledp_get_unsafe(tsd) = false; *tsd_reentrancy_levelp_get(tsd) = 1; tsd_prng_state_init(tsd); tsd_te_init(tsd); /* event_init may use the prng state above. */ tsd_san_init(tsd); assert_tsd_data_cleanup_done(tsd); return false; } tsd_t * tsd_fetch_slow(tsd_t *tsd, bool minimal) { assert(!tsd_fast(tsd)); if (tsd_state_get(tsd) == tsd_state_nominal_slow) { /* * On slow path but no work needed. Note that we can't * necessarily *assert* that we're slow, because we might be * slow because of an asynchronous modification to global state, * which might be asynchronously modified *back*. */ } else if (tsd_state_get(tsd) == tsd_state_nominal_recompute) { tsd_slow_update(tsd); } else if (tsd_state_get(tsd) == tsd_state_uninitialized) { if (!minimal) { if (tsd_booted) { tsd_state_set(tsd, tsd_state_nominal); tsd_slow_update(tsd); /* Trigger cleanup handler registration. */ tsd_set(tsd); tsd_data_init(tsd); } } else { tsd_state_set(tsd, tsd_state_minimal_initialized); tsd_set(tsd); tsd_data_init_nocleanup(tsd); } } else if (tsd_state_get(tsd) == tsd_state_minimal_initialized) { if (!minimal) { /* Switch to fully initialized. */ tsd_state_set(tsd, tsd_state_nominal); assert(*tsd_reentrancy_levelp_get(tsd) >= 1); (*tsd_reentrancy_levelp_get(tsd))--; tsd_slow_update(tsd); tsd_data_init(tsd); } else { assert_tsd_data_cleanup_done(tsd); } } else if (tsd_state_get(tsd) == tsd_state_purgatory) { tsd_state_set(tsd, tsd_state_reincarnated); tsd_set(tsd); tsd_data_init_nocleanup(tsd); } else { assert(tsd_state_get(tsd) == tsd_state_reincarnated); } return tsd; } void * malloc_tsd_malloc(size_t size) { return a0malloc(CACHELINE_CEILING(size)); } void malloc_tsd_dalloc(void *wrapper) { a0dalloc(wrapper); } #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32) static unsigned ncleanups; static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX]; #ifndef _WIN32 JEMALLOC_EXPORT #endif void _malloc_thread_cleanup(void) { bool pending[MALLOC_TSD_CLEANUPS_MAX], again; unsigned i; for (i = 0; i < ncleanups; i++) { pending[i] = true; } do { again = false; for (i = 0; i < ncleanups; i++) { if (pending[i]) { pending[i] = cleanups[i](); if (pending[i]) { again = true; } } } } while (again); } #ifndef _WIN32 JEMALLOC_EXPORT #endif void _malloc_tsd_cleanup_register(bool (*f)(void)) { assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX); cleanups[ncleanups] = f; ncleanups++; } #endif static void tsd_do_data_cleanup(tsd_t *tsd) { prof_tdata_cleanup(tsd); iarena_cleanup(tsd); arena_cleanup(tsd); tcache_cleanup(tsd); witnesses_cleanup(tsd_witness_tsdp_get_unsafe(tsd)); *tsd_reentrancy_levelp_get(tsd) = 1; } void tsd_cleanup(void *arg) { tsd_t *tsd = (tsd_t *)arg; switch (tsd_state_get(tsd)) { case tsd_state_uninitialized: /* Do nothing. */ break; case tsd_state_minimal_initialized: /* This implies the thread only did free() in its life time. */ /* Fall through. */ case tsd_state_reincarnated: /* * Reincarnated means another destructor deallocated memory * after the destructor was called. Cleanup isn't required but * is still called for testing and completeness. */ assert_tsd_data_cleanup_done(tsd); JEMALLOC_FALLTHROUGH; case tsd_state_nominal: case tsd_state_nominal_slow: tsd_do_data_cleanup(tsd); tsd_state_set(tsd, tsd_state_purgatory); tsd_set(tsd); break; case tsd_state_purgatory: /* * The previous time this destructor was called, we set the * state to tsd_state_purgatory so that other destructors * wouldn't cause re-creation of the tsd. This time, do * nothing, and do not request another callback. */ break; default: not_reached(); } #ifdef JEMALLOC_JET test_callback_t test_callback = *tsd_test_callbackp_get_unsafe(tsd); int *data = tsd_test_datap_get_unsafe(tsd); if (test_callback != NULL) { test_callback(data); } #endif } tsd_t * malloc_tsd_boot0(void) { tsd_t *tsd; #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32) ncleanups = 0; #endif if (malloc_mutex_init(&tsd_nominal_tsds_lock, "tsd_nominal_tsds_lock", WITNESS_RANK_OMIT, malloc_mutex_rank_exclusive)) { return NULL; } if (tsd_boot0()) { return NULL; } tsd = tsd_fetch(); return tsd; } void malloc_tsd_boot1(void) { tsd_boot1(); tsd_t *tsd = tsd_fetch(); /* malloc_slow has been set properly. Update tsd_slow. */ tsd_slow_update(tsd); } #ifdef _WIN32 static BOOL WINAPI _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) { switch (fdwReason) { #ifdef JEMALLOC_LAZY_LOCK case DLL_THREAD_ATTACH: isthreaded = true; break; #endif case DLL_THREAD_DETACH: _malloc_thread_cleanup(); break; default: break; } return true; } /* * We need to be able to say "read" here (in the "pragma section"), but have * hooked "read". We won't read for the rest of the file, so we can get away * with unhooking. */ #ifdef read # undef read #endif #ifdef _MSC_VER # ifdef _M_IX86 # pragma comment(linker, "/INCLUDE:__tls_used") # pragma comment(linker, "/INCLUDE:_tls_callback") # else # pragma comment(linker, "/INCLUDE:_tls_used") # pragma comment(linker, "/INCLUDE:" STRINGIFY(tls_callback) ) # endif # pragma section(".CRT$XLY",long,read) #endif JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used) BOOL (WINAPI *const tls_callback)(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) = _tls_callback; #endif #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ !defined(_WIN32)) void * tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) { pthread_t self = pthread_self(); tsd_init_block_t *iter; /* Check whether this thread has already inserted into the list. */ malloc_mutex_lock(TSDN_NULL, &head->lock); ql_foreach(iter, &head->blocks, link) { if (iter->thread == self) { malloc_mutex_unlock(TSDN_NULL, &head->lock); return iter->data; } } /* Insert block into list. */ ql_elm_new(block, link); block->thread = self; ql_tail_insert(&head->blocks, block, link); malloc_mutex_unlock(TSDN_NULL, &head->lock); return NULL; } void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) { malloc_mutex_lock(TSDN_NULL, &head->lock); ql_remove(&head->blocks, block, link); malloc_mutex_unlock(TSDN_NULL, &head->lock); } #endif void tsd_prefork(tsd_t *tsd) { malloc_mutex_prefork(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); } void tsd_postfork_parent(tsd_t *tsd) { malloc_mutex_postfork_parent(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); } void tsd_postfork_child(tsd_t *tsd) { malloc_mutex_postfork_child(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); ql_new(&tsd_nominal_tsds); if (tsd_state_get(tsd) <= tsd_state_nominal_max) { tsd_add_nominal(tsd); } }