summaryrefslogtreecommitdiffstats
path: root/deps/jemalloc/include/jemalloc/internal/tsd.h
diff options
context:
space:
mode:
Diffstat (limited to 'deps/jemalloc/include/jemalloc/internal/tsd.h')
-rw-r--r--deps/jemalloc/include/jemalloc/internal/tsd.h518
1 files changed, 518 insertions, 0 deletions
diff --git a/deps/jemalloc/include/jemalloc/internal/tsd.h b/deps/jemalloc/include/jemalloc/internal/tsd.h
new file mode 100644
index 0000000..66d6882
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/tsd.h
@@ -0,0 +1,518 @@
+#ifndef JEMALLOC_INTERNAL_TSD_H
+#define JEMALLOC_INTERNAL_TSD_H
+
+#include "jemalloc/internal/activity_callback.h"
+#include "jemalloc/internal/arena_types.h"
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/bin_types.h"
+#include "jemalloc/internal/jemalloc_internal_externs.h"
+#include "jemalloc/internal/peak.h"
+#include "jemalloc/internal/prof_types.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/rtree_tsd.h"
+#include "jemalloc/internal/tcache_types.h"
+#include "jemalloc/internal/tcache_structs.h"
+#include "jemalloc/internal/util.h"
+#include "jemalloc/internal/witness.h"
+
+/*
+ * Thread-Specific-Data layout
+ *
+ * At least some thread-local data gets touched on the fast-path of almost all
+ * malloc operations. But much of it is only necessary down slow-paths, or
+ * testing. We want to colocate the fast-path data so that it can live on the
+ * same cacheline if possible. So we define three tiers of hotness:
+ * TSD_DATA_FAST: Touched on the alloc/dalloc fast paths.
+ * TSD_DATA_SLOW: Touched down slow paths. "Slow" here is sort of general;
+ * there are "semi-slow" paths like "not a sized deallocation, but can still
+ * live in the tcache". We'll want to keep these closer to the fast-path
+ * data.
+ * TSD_DATA_SLOWER: Only touched in test or debug modes, or not touched at all.
+ *
+ * An additional concern is that the larger tcache bins won't be used (we have a
+ * bin per size class, but by default only cache relatively small objects). So
+ * the earlier bins are in the TSD_DATA_FAST tier, but the later ones are in the
+ * TSD_DATA_SLOWER tier.
+ *
+ * As a result of all this, we put the slow data first, then the fast data, then
+ * the slower data, while keeping the tcache as the last element of the fast
+ * data (so that the fast -> slower transition happens midway through the
+ * tcache). While we don't yet play alignment tricks to guarantee it, this
+ * increases our odds of getting some cache/page locality on fast paths.
+ */
+
+#ifdef JEMALLOC_JET
+typedef void (*test_callback_t)(int *);
+# define MALLOC_TSD_TEST_DATA_INIT 0x72b65c10
+# define MALLOC_TEST_TSD \
+ O(test_data, int, int) \
+ O(test_callback, test_callback_t, int)
+# define MALLOC_TEST_TSD_INITIALIZER , MALLOC_TSD_TEST_DATA_INIT, NULL
+#else
+# define MALLOC_TEST_TSD
+# define MALLOC_TEST_TSD_INITIALIZER
+#endif
+
+typedef ql_elm(tsd_t) tsd_link_t;
+
+/* O(name, type, nullable type) */
+#define TSD_DATA_SLOW \
+ O(tcache_enabled, bool, bool) \
+ O(reentrancy_level, int8_t, int8_t) \
+ O(thread_allocated_last_event, uint64_t, uint64_t) \
+ O(thread_allocated_next_event, uint64_t, uint64_t) \
+ O(thread_deallocated_last_event, uint64_t, uint64_t) \
+ O(thread_deallocated_next_event, uint64_t, uint64_t) \
+ O(tcache_gc_event_wait, uint64_t, uint64_t) \
+ O(tcache_gc_dalloc_event_wait, uint64_t, uint64_t) \
+ O(prof_sample_event_wait, uint64_t, uint64_t) \
+ O(prof_sample_last_event, uint64_t, uint64_t) \
+ O(stats_interval_event_wait, uint64_t, uint64_t) \
+ O(stats_interval_last_event, uint64_t, uint64_t) \
+ O(peak_alloc_event_wait, uint64_t, uint64_t) \
+ O(peak_dalloc_event_wait, uint64_t, uint64_t) \
+ O(prof_tdata, prof_tdata_t *, prof_tdata_t *) \
+ O(prng_state, uint64_t, uint64_t) \
+ O(san_extents_until_guard_small, uint64_t, uint64_t) \
+ O(san_extents_until_guard_large, uint64_t, uint64_t) \
+ O(iarena, arena_t *, arena_t *) \
+ O(arena, arena_t *, arena_t *) \
+ O(arena_decay_ticker, ticker_geom_t, ticker_geom_t) \
+ O(sec_shard, uint8_t, uint8_t) \
+ O(binshards, tsd_binshards_t, tsd_binshards_t)\
+ O(tsd_link, tsd_link_t, tsd_link_t) \
+ O(in_hook, bool, bool) \
+ O(peak, peak_t, peak_t) \
+ O(activity_callback_thunk, activity_callback_thunk_t, \
+ activity_callback_thunk_t) \
+ O(tcache_slow, tcache_slow_t, tcache_slow_t) \
+ O(rtree_ctx, rtree_ctx_t, rtree_ctx_t)
+
+#define TSD_DATA_SLOW_INITIALIZER \
+ /* tcache_enabled */ TCACHE_ENABLED_ZERO_INITIALIZER, \
+ /* reentrancy_level */ 0, \
+ /* thread_allocated_last_event */ 0, \
+ /* thread_allocated_next_event */ 0, \
+ /* thread_deallocated_last_event */ 0, \
+ /* thread_deallocated_next_event */ 0, \
+ /* tcache_gc_event_wait */ 0, \
+ /* tcache_gc_dalloc_event_wait */ 0, \
+ /* prof_sample_event_wait */ 0, \
+ /* prof_sample_last_event */ 0, \
+ /* stats_interval_event_wait */ 0, \
+ /* stats_interval_last_event */ 0, \
+ /* peak_alloc_event_wait */ 0, \
+ /* peak_dalloc_event_wait */ 0, \
+ /* prof_tdata */ NULL, \
+ /* prng_state */ 0, \
+ /* san_extents_until_guard_small */ 0, \
+ /* san_extents_until_guard_large */ 0, \
+ /* iarena */ NULL, \
+ /* arena */ NULL, \
+ /* arena_decay_ticker */ \
+ TICKER_GEOM_INIT(ARENA_DECAY_NTICKS_PER_UPDATE), \
+ /* sec_shard */ (uint8_t)-1, \
+ /* binshards */ TSD_BINSHARDS_ZERO_INITIALIZER, \
+ /* tsd_link */ {NULL}, \
+ /* in_hook */ false, \
+ /* peak */ PEAK_INITIALIZER, \
+ /* activity_callback_thunk */ \
+ ACTIVITY_CALLBACK_THUNK_INITIALIZER, \
+ /* tcache_slow */ TCACHE_SLOW_ZERO_INITIALIZER, \
+ /* rtree_ctx */ RTREE_CTX_INITIALIZER,
+
+/* O(name, type, nullable type) */
+#define TSD_DATA_FAST \
+ O(thread_allocated, uint64_t, uint64_t) \
+ O(thread_allocated_next_event_fast, uint64_t, uint64_t) \
+ O(thread_deallocated, uint64_t, uint64_t) \
+ O(thread_deallocated_next_event_fast, uint64_t, uint64_t) \
+ O(tcache, tcache_t, tcache_t)
+
+#define TSD_DATA_FAST_INITIALIZER \
+ /* thread_allocated */ 0, \
+ /* thread_allocated_next_event_fast */ 0, \
+ /* thread_deallocated */ 0, \
+ /* thread_deallocated_next_event_fast */ 0, \
+ /* tcache */ TCACHE_ZERO_INITIALIZER,
+
+/* O(name, type, nullable type) */
+#define TSD_DATA_SLOWER \
+ O(witness_tsd, witness_tsd_t, witness_tsdn_t) \
+ MALLOC_TEST_TSD
+
+#define TSD_DATA_SLOWER_INITIALIZER \
+ /* witness */ WITNESS_TSD_INITIALIZER \
+ /* test data */ MALLOC_TEST_TSD_INITIALIZER
+
+
+#define TSD_INITIALIZER { \
+ TSD_DATA_SLOW_INITIALIZER \
+ /* state */ ATOMIC_INIT(tsd_state_uninitialized), \
+ TSD_DATA_FAST_INITIALIZER \
+ TSD_DATA_SLOWER_INITIALIZER \
+}
+
+#if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
+void _malloc_tsd_cleanup_register(bool (*f)(void));
+#endif
+
+void *malloc_tsd_malloc(size_t size);
+void malloc_tsd_dalloc(void *wrapper);
+tsd_t *malloc_tsd_boot0(void);
+void malloc_tsd_boot1(void);
+void tsd_cleanup(void *arg);
+tsd_t *tsd_fetch_slow(tsd_t *tsd, bool internal);
+void tsd_state_set(tsd_t *tsd, uint8_t new_state);
+void tsd_slow_update(tsd_t *tsd);
+void tsd_prefork(tsd_t *tsd);
+void tsd_postfork_parent(tsd_t *tsd);
+void tsd_postfork_child(tsd_t *tsd);
+
+/*
+ * Call ..._inc when your module wants to take all threads down the slow paths,
+ * and ..._dec when it no longer needs to.
+ */
+void tsd_global_slow_inc(tsdn_t *tsdn);
+void tsd_global_slow_dec(tsdn_t *tsdn);
+bool tsd_global_slow();
+
+enum {
+ /* Common case --> jnz. */
+ tsd_state_nominal = 0,
+ /* Initialized but on slow path. */
+ tsd_state_nominal_slow = 1,
+ /*
+ * Some thread has changed global state in such a way that all nominal
+ * threads need to recompute their fast / slow status the next time they
+ * get a chance.
+ *
+ * Any thread can change another thread's status *to* recompute, but
+ * threads are the only ones who can change their status *from*
+ * recompute.
+ */
+ tsd_state_nominal_recompute = 2,
+ /*
+ * The above nominal states should be lower values. We use
+ * tsd_nominal_max to separate nominal states from threads in the
+ * process of being born / dying.
+ */
+ tsd_state_nominal_max = 2,
+
+ /*
+ * A thread might free() during its death as its only allocator action;
+ * in such scenarios, we need tsd, but set up in such a way that no
+ * cleanup is necessary.
+ */
+ tsd_state_minimal_initialized = 3,
+ /* States during which we know we're in thread death. */
+ tsd_state_purgatory = 4,
+ tsd_state_reincarnated = 5,
+ /*
+ * What it says on the tin; tsd that hasn't been initialized. Note
+ * that even when the tsd struct lives in TLS, when need to keep track
+ * of stuff like whether or not our pthread destructors have been
+ * scheduled, so this really truly is different than the nominal state.
+ */
+ tsd_state_uninitialized = 6
+};
+
+/*
+ * Some TSD accesses can only be done in a nominal state. To enforce this, we
+ * wrap TSD member access in a function that asserts on TSD state, and mangle
+ * field names to prevent touching them accidentally.
+ */
+#define TSD_MANGLE(n) cant_access_tsd_items_directly_use_a_getter_or_setter_##n
+
+#ifdef JEMALLOC_U8_ATOMICS
+# define tsd_state_t atomic_u8_t
+# define tsd_atomic_load atomic_load_u8
+# define tsd_atomic_store atomic_store_u8
+# define tsd_atomic_exchange atomic_exchange_u8
+#else
+# define tsd_state_t atomic_u32_t
+# define tsd_atomic_load atomic_load_u32
+# define tsd_atomic_store atomic_store_u32
+# define tsd_atomic_exchange atomic_exchange_u32
+#endif
+
+/* The actual tsd. */
+struct tsd_s {
+ /*
+ * The contents should be treated as totally opaque outside the tsd
+ * module. Access any thread-local state through the getters and
+ * setters below.
+ */
+
+#define O(n, t, nt) \
+ t TSD_MANGLE(n);
+
+ TSD_DATA_SLOW
+ /*
+ * We manually limit the state to just a single byte. Unless the 8-bit
+ * atomics are unavailable (which is rare).
+ */
+ tsd_state_t state;
+ TSD_DATA_FAST
+ TSD_DATA_SLOWER
+#undef O
+};
+
+JEMALLOC_ALWAYS_INLINE uint8_t
+tsd_state_get(tsd_t *tsd) {
+ /*
+ * This should be atomic. Unfortunately, compilers right now can't tell
+ * that this can be done as a memory comparison, and forces a load into
+ * a register that hurts fast-path performance.
+ */
+ /* return atomic_load_u8(&tsd->state, ATOMIC_RELAXED); */
+ return *(uint8_t *)&tsd->state;
+}
+
+/*
+ * Wrapper around tsd_t that makes it possible to avoid implicit conversion
+ * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be
+ * explicitly converted to tsd_t, which is non-nullable.
+ */
+struct tsdn_s {
+ tsd_t tsd;
+};
+#define TSDN_NULL ((tsdn_t *)0)
+JEMALLOC_ALWAYS_INLINE tsdn_t *
+tsd_tsdn(tsd_t *tsd) {
+ return (tsdn_t *)tsd;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsdn_null(const tsdn_t *tsdn) {
+ return tsdn == NULL;
+}
+
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsdn_tsd(tsdn_t *tsdn) {
+ assert(!tsdn_null(tsdn));
+
+ return &tsdn->tsd;
+}
+
+/*
+ * We put the platform-specific data declarations and inlines into their own
+ * header files to avoid cluttering this file. They define tsd_boot0,
+ * tsd_boot1, tsd_boot, tsd_booted_get, tsd_get_allocates, tsd_get, and tsd_set.
+ */
+#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
+#include "jemalloc/internal/tsd_malloc_thread_cleanup.h"
+#elif (defined(JEMALLOC_TLS))
+#include "jemalloc/internal/tsd_tls.h"
+#elif (defined(_WIN32))
+#include "jemalloc/internal/tsd_win.h"
+#else
+#include "jemalloc/internal/tsd_generic.h"
+#endif
+
+/*
+ * tsd_foop_get_unsafe(tsd) returns a pointer to the thread-local instance of
+ * foo. This omits some safety checks, and so can be used during tsd
+ * initialization and cleanup.
+ */
+#define O(n, t, nt) \
+JEMALLOC_ALWAYS_INLINE t * \
+tsd_##n##p_get_unsafe(tsd_t *tsd) { \
+ return &tsd->TSD_MANGLE(n); \
+}
+TSD_DATA_SLOW
+TSD_DATA_FAST
+TSD_DATA_SLOWER
+#undef O
+
+/* tsd_foop_get(tsd) returns a pointer to the thread-local instance of foo. */
+#define O(n, t, nt) \
+JEMALLOC_ALWAYS_INLINE t * \
+tsd_##n##p_get(tsd_t *tsd) { \
+ /* \
+ * Because the state might change asynchronously if it's \
+ * nominal, we need to make sure that we only read it once. \
+ */ \
+ uint8_t state = tsd_state_get(tsd); \
+ assert(state == tsd_state_nominal || \
+ state == tsd_state_nominal_slow || \
+ state == tsd_state_nominal_recompute || \
+ state == tsd_state_reincarnated || \
+ state == tsd_state_minimal_initialized); \
+ return tsd_##n##p_get_unsafe(tsd); \
+}
+TSD_DATA_SLOW
+TSD_DATA_FAST
+TSD_DATA_SLOWER
+#undef O
+
+/*
+ * tsdn_foop_get(tsdn) returns either the thread-local instance of foo (if tsdn
+ * isn't NULL), or NULL (if tsdn is NULL), cast to the nullable pointer type.
+ */
+#define O(n, t, nt) \
+JEMALLOC_ALWAYS_INLINE nt * \
+tsdn_##n##p_get(tsdn_t *tsdn) { \
+ if (tsdn_null(tsdn)) { \
+ return NULL; \
+ } \
+ tsd_t *tsd = tsdn_tsd(tsdn); \
+ return (nt *)tsd_##n##p_get(tsd); \
+}
+TSD_DATA_SLOW
+TSD_DATA_FAST
+TSD_DATA_SLOWER
+#undef O
+
+/* tsd_foo_get(tsd) returns the value of the thread-local instance of foo. */
+#define O(n, t, nt) \
+JEMALLOC_ALWAYS_INLINE t \
+tsd_##n##_get(tsd_t *tsd) { \
+ return *tsd_##n##p_get(tsd); \
+}
+TSD_DATA_SLOW
+TSD_DATA_FAST
+TSD_DATA_SLOWER
+#undef O
+
+/* tsd_foo_set(tsd, val) updates the thread-local instance of foo to be val. */
+#define O(n, t, nt) \
+JEMALLOC_ALWAYS_INLINE void \
+tsd_##n##_set(tsd_t *tsd, t val) { \
+ assert(tsd_state_get(tsd) != tsd_state_reincarnated && \
+ tsd_state_get(tsd) != tsd_state_minimal_initialized); \
+ *tsd_##n##p_get(tsd) = val; \
+}
+TSD_DATA_SLOW
+TSD_DATA_FAST
+TSD_DATA_SLOWER
+#undef O
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_assert_fast(tsd_t *tsd) {
+ /*
+ * Note that our fastness assertion does *not* include global slowness
+ * counters; it's not in general possible to ensure that they won't
+ * change asynchronously from underneath us.
+ */
+ assert(!malloc_slow && tsd_tcache_enabled_get(tsd) &&
+ tsd_reentrancy_level_get(tsd) == 0);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_fast(tsd_t *tsd) {
+ bool fast = (tsd_state_get(tsd) == tsd_state_nominal);
+ if (fast) {
+ tsd_assert_fast(tsd);
+ }
+
+ return fast;
+}
+
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_fetch_impl(bool init, bool minimal) {
+ tsd_t *tsd = tsd_get(init);
+
+ if (!init && tsd_get_allocates() && tsd == NULL) {
+ return NULL;
+ }
+ assert(tsd != NULL);
+
+ if (unlikely(tsd_state_get(tsd) != tsd_state_nominal)) {
+ return tsd_fetch_slow(tsd, minimal);
+ }
+ assert(tsd_fast(tsd));
+ tsd_assert_fast(tsd);
+
+ return tsd;
+}
+
+/* Get a minimal TSD that requires no cleanup. See comments in free(). */
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_fetch_min(void) {
+ return tsd_fetch_impl(true, true);
+}
+
+/* For internal background threads use only. */
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_internal_fetch(void) {
+ tsd_t *tsd = tsd_fetch_min();
+ /* Use reincarnated state to prevent full initialization. */
+ tsd_state_set(tsd, tsd_state_reincarnated);
+
+ return tsd;
+}
+
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_fetch(void) {
+ return tsd_fetch_impl(true, false);
+}
+
+static inline bool
+tsd_nominal(tsd_t *tsd) {
+ bool nominal = tsd_state_get(tsd) <= tsd_state_nominal_max;
+ assert(nominal || tsd_reentrancy_level_get(tsd) > 0);
+
+ return nominal;
+}
+
+JEMALLOC_ALWAYS_INLINE tsdn_t *
+tsdn_fetch(void) {
+ if (!tsd_booted_get()) {
+ return NULL;
+ }
+
+ return tsd_tsdn(tsd_fetch_impl(false, false));
+}
+
+JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
+tsd_rtree_ctx(tsd_t *tsd) {
+ return tsd_rtree_ctxp_get(tsd);
+}
+
+JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
+tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) {
+ /*
+ * If tsd cannot be accessed, initialize the fallback rtree_ctx and
+ * return a pointer to it.
+ */
+ if (unlikely(tsdn_null(tsdn))) {
+ rtree_ctx_data_init(fallback);
+ return fallback;
+ }
+ return tsd_rtree_ctx(tsdn_tsd(tsdn));
+}
+
+static inline bool
+tsd_state_nocleanup(tsd_t *tsd) {
+ return tsd_state_get(tsd) == tsd_state_reincarnated ||
+ tsd_state_get(tsd) == tsd_state_minimal_initialized;
+}
+
+/*
+ * These "raw" tsd reentrancy functions don't have any debug checking to make
+ * sure that we're not touching arena 0. Better is to call pre_reentrancy and
+ * post_reentrancy if this is possible.
+ */
+static inline void
+tsd_pre_reentrancy_raw(tsd_t *tsd) {
+ bool fast = tsd_fast(tsd);
+ assert(tsd_reentrancy_level_get(tsd) < INT8_MAX);
+ ++*tsd_reentrancy_levelp_get(tsd);
+ if (fast) {
+ /* Prepare slow path for reentrancy. */
+ tsd_slow_update(tsd);
+ assert(tsd_state_get(tsd) == tsd_state_nominal_slow);
+ }
+}
+
+static inline void
+tsd_post_reentrancy_raw(tsd_t *tsd) {
+ int8_t *reentrancy_level = tsd_reentrancy_levelp_get(tsd);
+ assert(*reentrancy_level > 0);
+ if (--*reentrancy_level == 0) {
+ tsd_slow_update(tsd);
+ }
+}
+
+#endif /* JEMALLOC_INTERNAL_TSD_H */