summaryrefslogtreecommitdiffstats
path: root/deps/jemalloc/src/sec.c
diff options
context:
space:
mode:
Diffstat (limited to 'deps/jemalloc/src/sec.c')
-rw-r--r--deps/jemalloc/src/sec.c422
1 files changed, 422 insertions, 0 deletions
diff --git a/deps/jemalloc/src/sec.c b/deps/jemalloc/src/sec.c
new file mode 100644
index 0000000..df67559
--- /dev/null
+++ b/deps/jemalloc/src/sec.c
@@ -0,0 +1,422 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/sec.h"
+
+static edata_t *sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
+ size_t alignment, bool zero, bool guarded, bool frequent_reuse,
+ bool *deferred_work_generated);
+static bool sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+ size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
+static bool sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+ size_t old_size, size_t new_size, bool *deferred_work_generated);
+static void sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+ bool *deferred_work_generated);
+
+static void
+sec_bin_init(sec_bin_t *bin) {
+ bin->being_batch_filled = false;
+ bin->bytes_cur = 0;
+ edata_list_active_init(&bin->freelist);
+}
+
+bool
+sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
+ const sec_opts_t *opts) {
+ assert(opts->max_alloc >= PAGE);
+
+ size_t max_alloc = PAGE_FLOOR(opts->max_alloc);
+ pszind_t npsizes = sz_psz2ind(max_alloc) + 1;
+
+ size_t sz_shards = opts->nshards * sizeof(sec_shard_t);
+ size_t sz_bins = opts->nshards * (size_t)npsizes * sizeof(sec_bin_t);
+ size_t sz_alloc = sz_shards + sz_bins;
+ void *dynalloc = base_alloc(tsdn, base, sz_alloc, CACHELINE);
+ if (dynalloc == NULL) {
+ return true;
+ }
+ sec_shard_t *shard_cur = (sec_shard_t *)dynalloc;
+ sec->shards = shard_cur;
+ sec_bin_t *bin_cur = (sec_bin_t *)&shard_cur[opts->nshards];
+ /* Just for asserts, below. */
+ sec_bin_t *bin_start = bin_cur;
+
+ for (size_t i = 0; i < opts->nshards; i++) {
+ sec_shard_t *shard = shard_cur;
+ shard_cur++;
+ bool err = malloc_mutex_init(&shard->mtx, "sec_shard",
+ WITNESS_RANK_SEC_SHARD, malloc_mutex_rank_exclusive);
+ if (err) {
+ return true;
+ }
+ shard->enabled = true;
+ shard->bins = bin_cur;
+ for (pszind_t j = 0; j < npsizes; j++) {
+ sec_bin_init(&shard->bins[j]);
+ bin_cur++;
+ }
+ shard->bytes_cur = 0;
+ shard->to_flush_next = 0;
+ }
+ /*
+ * Should have exactly matched the bin_start to the first unused byte
+ * after the shards.
+ */
+ assert((void *)shard_cur == (void *)bin_start);
+ /* And the last bin to use up the last bytes of the allocation. */
+ assert((char *)bin_cur == ((char *)dynalloc + sz_alloc));
+ sec->fallback = fallback;
+
+
+ sec->opts = *opts;
+ sec->npsizes = npsizes;
+
+ /*
+ * Initialize these last so that an improper use of an SEC whose
+ * initialization failed will segfault in an easy-to-spot way.
+ */
+ sec->pai.alloc = &sec_alloc;
+ sec->pai.alloc_batch = &pai_alloc_batch_default;
+ sec->pai.expand = &sec_expand;
+ sec->pai.shrink = &sec_shrink;
+ sec->pai.dalloc = &sec_dalloc;
+ sec->pai.dalloc_batch = &pai_dalloc_batch_default;
+
+ return false;
+}
+
+static sec_shard_t *
+sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
+ /*
+ * Eventually, we should implement affinity, tracking source shard using
+ * the edata_t's newly freed up fields. For now, just randomly
+ * distribute across all shards.
+ */
+ if (tsdn_null(tsdn)) {
+ return &sec->shards[0];
+ }
+ tsd_t *tsd = tsdn_tsd(tsdn);
+ uint8_t *idxp = tsd_sec_shardp_get(tsd);
+ if (*idxp == (uint8_t)-1) {
+ /*
+ * First use; initialize using the trick from Daniel Lemire's
+ * "A fast alternative to the modulo reduction. Use a 64 bit
+ * number to store 32 bits, since we'll deliberately overflow
+ * when we multiply by the number of shards.
+ */
+ uint64_t rand32 = prng_lg_range_u64(tsd_prng_statep_get(tsd), 32);
+ uint32_t idx =
+ (uint32_t)((rand32 * (uint64_t)sec->opts.nshards) >> 32);
+ assert(idx < (uint32_t)sec->opts.nshards);
+ *idxp = (uint8_t)idx;
+ }
+ return &sec->shards[*idxp];
+}
+
+/*
+ * Perhaps surprisingly, this can be called on the alloc pathways; if we hit an
+ * empty cache, we'll try to fill it, which can push the shard over it's limit.
+ */
+static void
+sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
+ malloc_mutex_assert_owner(tsdn, &shard->mtx);
+ edata_list_active_t to_flush;
+ edata_list_active_init(&to_flush);
+ while (shard->bytes_cur > sec->opts.bytes_after_flush) {
+ /* Pick a victim. */
+ sec_bin_t *bin = &shard->bins[shard->to_flush_next];
+
+ /* Update our victim-picking state. */
+ shard->to_flush_next++;
+ if (shard->to_flush_next == sec->npsizes) {
+ shard->to_flush_next = 0;
+ }
+
+ assert(shard->bytes_cur >= bin->bytes_cur);
+ if (bin->bytes_cur != 0) {
+ shard->bytes_cur -= bin->bytes_cur;
+ bin->bytes_cur = 0;
+ edata_list_active_concat(&to_flush, &bin->freelist);
+ }
+ /*
+ * Either bin->bytes_cur was 0, in which case we didn't touch
+ * the bin list but it should be empty anyways (or else we
+ * missed a bytes_cur update on a list modification), or it
+ * *was* 0 and we emptied it ourselves. Either way, it should
+ * be empty now.
+ */
+ assert(edata_list_active_empty(&bin->freelist));
+ }
+
+ malloc_mutex_unlock(tsdn, &shard->mtx);
+ bool deferred_work_generated = false;
+ pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
+ &deferred_work_generated);
+}
+
+static edata_t *
+sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
+ sec_bin_t *bin) {
+ malloc_mutex_assert_owner(tsdn, &shard->mtx);
+ if (!shard->enabled) {
+ return NULL;
+ }
+ edata_t *edata = edata_list_active_first(&bin->freelist);
+ if (edata != NULL) {
+ edata_list_active_remove(&bin->freelist, edata);
+ assert(edata_size_get(edata) <= bin->bytes_cur);
+ bin->bytes_cur -= edata_size_get(edata);
+ assert(edata_size_get(edata) <= shard->bytes_cur);
+ shard->bytes_cur -= edata_size_get(edata);
+ }
+ return edata;
+}
+
+static edata_t *
+sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
+ sec_bin_t *bin, size_t size) {
+ malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
+
+ edata_list_active_t result;
+ edata_list_active_init(&result);
+ bool deferred_work_generated = false;
+ size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size,
+ 1 + sec->opts.batch_fill_extra, &result, &deferred_work_generated);
+
+ edata_t *ret = edata_list_active_first(&result);
+ if (ret != NULL) {
+ edata_list_active_remove(&result, ret);
+ }
+
+ malloc_mutex_lock(tsdn, &shard->mtx);
+ bin->being_batch_filled = false;
+ /*
+ * Handle the easy case first: nothing to cache. Note that this can
+ * only happen in case of OOM, since sec_alloc checks the expected
+ * number of allocs, and doesn't bother going down the batch_fill
+ * pathway if there won't be anything left to cache. So to be in this
+ * code path, we must have asked for > 1 alloc, but only gotten 1 back.
+ */
+ if (nalloc <= 1) {
+ malloc_mutex_unlock(tsdn, &shard->mtx);
+ return ret;
+ }
+
+ size_t new_cached_bytes = (nalloc - 1) * size;
+
+ edata_list_active_concat(&bin->freelist, &result);
+ bin->bytes_cur += new_cached_bytes;
+ shard->bytes_cur += new_cached_bytes;
+
+ if (shard->bytes_cur > sec->opts.max_bytes) {
+ sec_flush_some_and_unlock(tsdn, sec, shard);
+ } else {
+ malloc_mutex_unlock(tsdn, &shard->mtx);
+ }
+
+ return ret;
+}
+
+static edata_t *
+sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
+ bool guarded, bool frequent_reuse, bool *deferred_work_generated) {
+ assert((size & PAGE_MASK) == 0);
+ assert(!guarded);
+
+ sec_t *sec = (sec_t *)self;
+
+ if (zero || alignment > PAGE || sec->opts.nshards == 0
+ || size > sec->opts.max_alloc) {
+ return pai_alloc(tsdn, sec->fallback, size, alignment, zero,
+ /* guarded */ false, frequent_reuse,
+ deferred_work_generated);
+ }
+ pszind_t pszind = sz_psz2ind(size);
+ assert(pszind < sec->npsizes);
+
+ sec_shard_t *shard = sec_shard_pick(tsdn, sec);
+ sec_bin_t *bin = &shard->bins[pszind];
+ bool do_batch_fill = false;
+
+ malloc_mutex_lock(tsdn, &shard->mtx);
+ edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, bin);
+ if (edata == NULL) {
+ if (!bin->being_batch_filled
+ && sec->opts.batch_fill_extra > 0) {
+ bin->being_batch_filled = true;
+ do_batch_fill = true;
+ }
+ }
+ malloc_mutex_unlock(tsdn, &shard->mtx);
+ if (edata == NULL) {
+ if (do_batch_fill) {
+ edata = sec_batch_fill_and_alloc(tsdn, sec, shard, bin,
+ size);
+ } else {
+ edata = pai_alloc(tsdn, sec->fallback, size, alignment,
+ zero, /* guarded */ false, frequent_reuse,
+ deferred_work_generated);
+ }
+ }
+ return edata;
+}
+
+static bool
+sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+ size_t new_size, bool zero, bool *deferred_work_generated) {
+ sec_t *sec = (sec_t *)self;
+ return pai_expand(tsdn, sec->fallback, edata, old_size, new_size, zero,
+ deferred_work_generated);
+}
+
+static bool
+sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+ size_t new_size, bool *deferred_work_generated) {
+ sec_t *sec = (sec_t *)self;
+ return pai_shrink(tsdn, sec->fallback, edata, old_size, new_size,
+ deferred_work_generated);
+}
+
+static void
+sec_flush_all_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
+ malloc_mutex_assert_owner(tsdn, &shard->mtx);
+ shard->bytes_cur = 0;
+ edata_list_active_t to_flush;
+ edata_list_active_init(&to_flush);
+ for (pszind_t i = 0; i < sec->npsizes; i++) {
+ sec_bin_t *bin = &shard->bins[i];
+ bin->bytes_cur = 0;
+ edata_list_active_concat(&to_flush, &bin->freelist);
+ }
+
+ /*
+ * Ordinarily we would try to avoid doing the batch deallocation while
+ * holding the shard mutex, but the flush_all pathways only happen when
+ * we're disabling the HPA or resetting the arena, both of which are
+ * rare pathways.
+ */
+ bool deferred_work_generated = false;
+ pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
+ &deferred_work_generated);
+}
+
+static void
+sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
+ edata_t *edata) {
+ malloc_mutex_assert_owner(tsdn, &shard->mtx);
+ assert(shard->bytes_cur <= sec->opts.max_bytes);
+ size_t size = edata_size_get(edata);
+ pszind_t pszind = sz_psz2ind(size);
+ assert(pszind < sec->npsizes);
+ /*
+ * Prepending here results in LIFO allocation per bin, which seems
+ * reasonable.
+ */
+ sec_bin_t *bin = &shard->bins[pszind];
+ edata_list_active_prepend(&bin->freelist, edata);
+ bin->bytes_cur += size;
+ shard->bytes_cur += size;
+ if (shard->bytes_cur > sec->opts.max_bytes) {
+ /*
+ * We've exceeded the shard limit. We make two nods in the
+ * direction of fragmentation avoidance: we flush everything in
+ * the shard, rather than one particular bin, and we hold the
+ * lock while flushing (in case one of the extents we flush is
+ * highly preferred from a fragmentation-avoidance perspective
+ * in the backing allocator). This has the extra advantage of
+ * not requiring advanced cache balancing strategies.
+ */
+ sec_flush_some_and_unlock(tsdn, sec, shard);
+ malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
+ } else {
+ malloc_mutex_unlock(tsdn, &shard->mtx);
+ }
+}
+
+static void
+sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+ bool *deferred_work_generated) {
+ sec_t *sec = (sec_t *)self;
+ if (sec->opts.nshards == 0
+ || edata_size_get(edata) > sec->opts.max_alloc) {
+ pai_dalloc(tsdn, sec->fallback, edata,
+ deferred_work_generated);
+ return;
+ }
+ sec_shard_t *shard = sec_shard_pick(tsdn, sec);
+ malloc_mutex_lock(tsdn, &shard->mtx);
+ if (shard->enabled) {
+ sec_shard_dalloc_and_unlock(tsdn, sec, shard, edata);
+ } else {
+ malloc_mutex_unlock(tsdn, &shard->mtx);
+ pai_dalloc(tsdn, sec->fallback, edata,
+ deferred_work_generated);
+ }
+}
+
+void
+sec_flush(tsdn_t *tsdn, sec_t *sec) {
+ for (size_t i = 0; i < sec->opts.nshards; i++) {
+ malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
+ sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
+ malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+ }
+}
+
+void
+sec_disable(tsdn_t *tsdn, sec_t *sec) {
+ for (size_t i = 0; i < sec->opts.nshards; i++) {
+ malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
+ sec->shards[i].enabled = false;
+ sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
+ malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+ }
+}
+
+void
+sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats) {
+ size_t sum = 0;
+ for (size_t i = 0; i < sec->opts.nshards; i++) {
+ /*
+ * We could save these lock acquisitions by making bytes_cur
+ * atomic, but stats collection is rare anyways and we expect
+ * the number and type of stats to get more interesting.
+ */
+ malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
+ sum += sec->shards[i].bytes_cur;
+ malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+ }
+ stats->bytes += sum;
+}
+
+void
+sec_mutex_stats_read(tsdn_t *tsdn, sec_t *sec,
+ mutex_prof_data_t *mutex_prof_data) {
+ for (size_t i = 0; i < sec->opts.nshards; i++) {
+ malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
+ malloc_mutex_prof_accum(tsdn, mutex_prof_data,
+ &sec->shards[i].mtx);
+ malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+ }
+}
+
+void
+sec_prefork2(tsdn_t *tsdn, sec_t *sec) {
+ for (size_t i = 0; i < sec->opts.nshards; i++) {
+ malloc_mutex_prefork(tsdn, &sec->shards[i].mtx);
+ }
+}
+
+void
+sec_postfork_parent(tsdn_t *tsdn, sec_t *sec) {
+ for (size_t i = 0; i < sec->opts.nshards; i++) {
+ malloc_mutex_postfork_parent(tsdn, &sec->shards[i].mtx);
+ }
+}
+
+void
+sec_postfork_child(tsdn_t *tsdn, sec_t *sec) {
+ for (size_t i = 0; i < sec->opts.nshards; i++) {
+ malloc_mutex_postfork_child(tsdn, &sec->shards[i].mtx);
+ }
+}