diff options
Diffstat (limited to 'deps/jemalloc/src/sec.c')
-rw-r--r-- | deps/jemalloc/src/sec.c | 422 |
1 files changed, 422 insertions, 0 deletions
diff --git a/deps/jemalloc/src/sec.c b/deps/jemalloc/src/sec.c new file mode 100644 index 0000000..df67559 --- /dev/null +++ b/deps/jemalloc/src/sec.c @@ -0,0 +1,422 @@ +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/sec.h" + +static edata_t *sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, + size_t alignment, bool zero, bool guarded, bool frequent_reuse, + bool *deferred_work_generated); +static bool sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, + size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated); +static bool sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, + size_t old_size, size_t new_size, bool *deferred_work_generated); +static void sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata, + bool *deferred_work_generated); + +static void +sec_bin_init(sec_bin_t *bin) { + bin->being_batch_filled = false; + bin->bytes_cur = 0; + edata_list_active_init(&bin->freelist); +} + +bool +sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback, + const sec_opts_t *opts) { + assert(opts->max_alloc >= PAGE); + + size_t max_alloc = PAGE_FLOOR(opts->max_alloc); + pszind_t npsizes = sz_psz2ind(max_alloc) + 1; + + size_t sz_shards = opts->nshards * sizeof(sec_shard_t); + size_t sz_bins = opts->nshards * (size_t)npsizes * sizeof(sec_bin_t); + size_t sz_alloc = sz_shards + sz_bins; + void *dynalloc = base_alloc(tsdn, base, sz_alloc, CACHELINE); + if (dynalloc == NULL) { + return true; + } + sec_shard_t *shard_cur = (sec_shard_t *)dynalloc; + sec->shards = shard_cur; + sec_bin_t *bin_cur = (sec_bin_t *)&shard_cur[opts->nshards]; + /* Just for asserts, below. */ + sec_bin_t *bin_start = bin_cur; + + for (size_t i = 0; i < opts->nshards; i++) { + sec_shard_t *shard = shard_cur; + shard_cur++; + bool err = malloc_mutex_init(&shard->mtx, "sec_shard", + WITNESS_RANK_SEC_SHARD, malloc_mutex_rank_exclusive); + if (err) { + return true; + } + shard->enabled = true; + shard->bins = bin_cur; + for (pszind_t j = 0; j < npsizes; j++) { + sec_bin_init(&shard->bins[j]); + bin_cur++; + } + shard->bytes_cur = 0; + shard->to_flush_next = 0; + } + /* + * Should have exactly matched the bin_start to the first unused byte + * after the shards. + */ + assert((void *)shard_cur == (void *)bin_start); + /* And the last bin to use up the last bytes of the allocation. */ + assert((char *)bin_cur == ((char *)dynalloc + sz_alloc)); + sec->fallback = fallback; + + + sec->opts = *opts; + sec->npsizes = npsizes; + + /* + * Initialize these last so that an improper use of an SEC whose + * initialization failed will segfault in an easy-to-spot way. + */ + sec->pai.alloc = &sec_alloc; + sec->pai.alloc_batch = &pai_alloc_batch_default; + sec->pai.expand = &sec_expand; + sec->pai.shrink = &sec_shrink; + sec->pai.dalloc = &sec_dalloc; + sec->pai.dalloc_batch = &pai_dalloc_batch_default; + + return false; +} + +static sec_shard_t * +sec_shard_pick(tsdn_t *tsdn, sec_t *sec) { + /* + * Eventually, we should implement affinity, tracking source shard using + * the edata_t's newly freed up fields. For now, just randomly + * distribute across all shards. + */ + if (tsdn_null(tsdn)) { + return &sec->shards[0]; + } + tsd_t *tsd = tsdn_tsd(tsdn); + uint8_t *idxp = tsd_sec_shardp_get(tsd); + if (*idxp == (uint8_t)-1) { + /* + * First use; initialize using the trick from Daniel Lemire's + * "A fast alternative to the modulo reduction. Use a 64 bit + * number to store 32 bits, since we'll deliberately overflow + * when we multiply by the number of shards. + */ + uint64_t rand32 = prng_lg_range_u64(tsd_prng_statep_get(tsd), 32); + uint32_t idx = + (uint32_t)((rand32 * (uint64_t)sec->opts.nshards) >> 32); + assert(idx < (uint32_t)sec->opts.nshards); + *idxp = (uint8_t)idx; + } + return &sec->shards[*idxp]; +} + +/* + * Perhaps surprisingly, this can be called on the alloc pathways; if we hit an + * empty cache, we'll try to fill it, which can push the shard over it's limit. + */ +static void +sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) { + malloc_mutex_assert_owner(tsdn, &shard->mtx); + edata_list_active_t to_flush; + edata_list_active_init(&to_flush); + while (shard->bytes_cur > sec->opts.bytes_after_flush) { + /* Pick a victim. */ + sec_bin_t *bin = &shard->bins[shard->to_flush_next]; + + /* Update our victim-picking state. */ + shard->to_flush_next++; + if (shard->to_flush_next == sec->npsizes) { + shard->to_flush_next = 0; + } + + assert(shard->bytes_cur >= bin->bytes_cur); + if (bin->bytes_cur != 0) { + shard->bytes_cur -= bin->bytes_cur; + bin->bytes_cur = 0; + edata_list_active_concat(&to_flush, &bin->freelist); + } + /* + * Either bin->bytes_cur was 0, in which case we didn't touch + * the bin list but it should be empty anyways (or else we + * missed a bytes_cur update on a list modification), or it + * *was* 0 and we emptied it ourselves. Either way, it should + * be empty now. + */ + assert(edata_list_active_empty(&bin->freelist)); + } + + malloc_mutex_unlock(tsdn, &shard->mtx); + bool deferred_work_generated = false; + pai_dalloc_batch(tsdn, sec->fallback, &to_flush, + &deferred_work_generated); +} + +static edata_t * +sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard, + sec_bin_t *bin) { + malloc_mutex_assert_owner(tsdn, &shard->mtx); + if (!shard->enabled) { + return NULL; + } + edata_t *edata = edata_list_active_first(&bin->freelist); + if (edata != NULL) { + edata_list_active_remove(&bin->freelist, edata); + assert(edata_size_get(edata) <= bin->bytes_cur); + bin->bytes_cur -= edata_size_get(edata); + assert(edata_size_get(edata) <= shard->bytes_cur); + shard->bytes_cur -= edata_size_get(edata); + } + return edata; +} + +static edata_t * +sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard, + sec_bin_t *bin, size_t size) { + malloc_mutex_assert_not_owner(tsdn, &shard->mtx); + + edata_list_active_t result; + edata_list_active_init(&result); + bool deferred_work_generated = false; + size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size, + 1 + sec->opts.batch_fill_extra, &result, &deferred_work_generated); + + edata_t *ret = edata_list_active_first(&result); + if (ret != NULL) { + edata_list_active_remove(&result, ret); + } + + malloc_mutex_lock(tsdn, &shard->mtx); + bin->being_batch_filled = false; + /* + * Handle the easy case first: nothing to cache. Note that this can + * only happen in case of OOM, since sec_alloc checks the expected + * number of allocs, and doesn't bother going down the batch_fill + * pathway if there won't be anything left to cache. So to be in this + * code path, we must have asked for > 1 alloc, but only gotten 1 back. + */ + if (nalloc <= 1) { + malloc_mutex_unlock(tsdn, &shard->mtx); + return ret; + } + + size_t new_cached_bytes = (nalloc - 1) * size; + + edata_list_active_concat(&bin->freelist, &result); + bin->bytes_cur += new_cached_bytes; + shard->bytes_cur += new_cached_bytes; + + if (shard->bytes_cur > sec->opts.max_bytes) { + sec_flush_some_and_unlock(tsdn, sec, shard); + } else { + malloc_mutex_unlock(tsdn, &shard->mtx); + } + + return ret; +} + +static edata_t * +sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero, + bool guarded, bool frequent_reuse, bool *deferred_work_generated) { + assert((size & PAGE_MASK) == 0); + assert(!guarded); + + sec_t *sec = (sec_t *)self; + + if (zero || alignment > PAGE || sec->opts.nshards == 0 + || size > sec->opts.max_alloc) { + return pai_alloc(tsdn, sec->fallback, size, alignment, zero, + /* guarded */ false, frequent_reuse, + deferred_work_generated); + } + pszind_t pszind = sz_psz2ind(size); + assert(pszind < sec->npsizes); + + sec_shard_t *shard = sec_shard_pick(tsdn, sec); + sec_bin_t *bin = &shard->bins[pszind]; + bool do_batch_fill = false; + + malloc_mutex_lock(tsdn, &shard->mtx); + edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, bin); + if (edata == NULL) { + if (!bin->being_batch_filled + && sec->opts.batch_fill_extra > 0) { + bin->being_batch_filled = true; + do_batch_fill = true; + } + } + malloc_mutex_unlock(tsdn, &shard->mtx); + if (edata == NULL) { + if (do_batch_fill) { + edata = sec_batch_fill_and_alloc(tsdn, sec, shard, bin, + size); + } else { + edata = pai_alloc(tsdn, sec->fallback, size, alignment, + zero, /* guarded */ false, frequent_reuse, + deferred_work_generated); + } + } + return edata; +} + +static bool +sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size, + size_t new_size, bool zero, bool *deferred_work_generated) { + sec_t *sec = (sec_t *)self; + return pai_expand(tsdn, sec->fallback, edata, old_size, new_size, zero, + deferred_work_generated); +} + +static bool +sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size, + size_t new_size, bool *deferred_work_generated) { + sec_t *sec = (sec_t *)self; + return pai_shrink(tsdn, sec->fallback, edata, old_size, new_size, + deferred_work_generated); +} + +static void +sec_flush_all_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) { + malloc_mutex_assert_owner(tsdn, &shard->mtx); + shard->bytes_cur = 0; + edata_list_active_t to_flush; + edata_list_active_init(&to_flush); + for (pszind_t i = 0; i < sec->npsizes; i++) { + sec_bin_t *bin = &shard->bins[i]; + bin->bytes_cur = 0; + edata_list_active_concat(&to_flush, &bin->freelist); + } + + /* + * Ordinarily we would try to avoid doing the batch deallocation while + * holding the shard mutex, but the flush_all pathways only happen when + * we're disabling the HPA or resetting the arena, both of which are + * rare pathways. + */ + bool deferred_work_generated = false; + pai_dalloc_batch(tsdn, sec->fallback, &to_flush, + &deferred_work_generated); +} + +static void +sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard, + edata_t *edata) { + malloc_mutex_assert_owner(tsdn, &shard->mtx); + assert(shard->bytes_cur <= sec->opts.max_bytes); + size_t size = edata_size_get(edata); + pszind_t pszind = sz_psz2ind(size); + assert(pszind < sec->npsizes); + /* + * Prepending here results in LIFO allocation per bin, which seems + * reasonable. + */ + sec_bin_t *bin = &shard->bins[pszind]; + edata_list_active_prepend(&bin->freelist, edata); + bin->bytes_cur += size; + shard->bytes_cur += size; + if (shard->bytes_cur > sec->opts.max_bytes) { + /* + * We've exceeded the shard limit. We make two nods in the + * direction of fragmentation avoidance: we flush everything in + * the shard, rather than one particular bin, and we hold the + * lock while flushing (in case one of the extents we flush is + * highly preferred from a fragmentation-avoidance perspective + * in the backing allocator). This has the extra advantage of + * not requiring advanced cache balancing strategies. + */ + sec_flush_some_and_unlock(tsdn, sec, shard); + malloc_mutex_assert_not_owner(tsdn, &shard->mtx); + } else { + malloc_mutex_unlock(tsdn, &shard->mtx); + } +} + +static void +sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata, + bool *deferred_work_generated) { + sec_t *sec = (sec_t *)self; + if (sec->opts.nshards == 0 + || edata_size_get(edata) > sec->opts.max_alloc) { + pai_dalloc(tsdn, sec->fallback, edata, + deferred_work_generated); + return; + } + sec_shard_t *shard = sec_shard_pick(tsdn, sec); + malloc_mutex_lock(tsdn, &shard->mtx); + if (shard->enabled) { + sec_shard_dalloc_and_unlock(tsdn, sec, shard, edata); + } else { + malloc_mutex_unlock(tsdn, &shard->mtx); + pai_dalloc(tsdn, sec->fallback, edata, + deferred_work_generated); + } +} + +void +sec_flush(tsdn_t *tsdn, sec_t *sec) { + for (size_t i = 0; i < sec->opts.nshards; i++) { + malloc_mutex_lock(tsdn, &sec->shards[i].mtx); + sec_flush_all_locked(tsdn, sec, &sec->shards[i]); + malloc_mutex_unlock(tsdn, &sec->shards[i].mtx); + } +} + +void +sec_disable(tsdn_t *tsdn, sec_t *sec) { + for (size_t i = 0; i < sec->opts.nshards; i++) { + malloc_mutex_lock(tsdn, &sec->shards[i].mtx); + sec->shards[i].enabled = false; + sec_flush_all_locked(tsdn, sec, &sec->shards[i]); + malloc_mutex_unlock(tsdn, &sec->shards[i].mtx); + } +} + +void +sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats) { + size_t sum = 0; + for (size_t i = 0; i < sec->opts.nshards; i++) { + /* + * We could save these lock acquisitions by making bytes_cur + * atomic, but stats collection is rare anyways and we expect + * the number and type of stats to get more interesting. + */ + malloc_mutex_lock(tsdn, &sec->shards[i].mtx); + sum += sec->shards[i].bytes_cur; + malloc_mutex_unlock(tsdn, &sec->shards[i].mtx); + } + stats->bytes += sum; +} + +void +sec_mutex_stats_read(tsdn_t *tsdn, sec_t *sec, + mutex_prof_data_t *mutex_prof_data) { + for (size_t i = 0; i < sec->opts.nshards; i++) { + malloc_mutex_lock(tsdn, &sec->shards[i].mtx); + malloc_mutex_prof_accum(tsdn, mutex_prof_data, + &sec->shards[i].mtx); + malloc_mutex_unlock(tsdn, &sec->shards[i].mtx); + } +} + +void +sec_prefork2(tsdn_t *tsdn, sec_t *sec) { + for (size_t i = 0; i < sec->opts.nshards; i++) { + malloc_mutex_prefork(tsdn, &sec->shards[i].mtx); + } +} + +void +sec_postfork_parent(tsdn_t *tsdn, sec_t *sec) { + for (size_t i = 0; i < sec->opts.nshards; i++) { + malloc_mutex_postfork_parent(tsdn, &sec->shards[i].mtx); + } +} + +void +sec_postfork_child(tsdn_t *tsdn, sec_t *sec) { + for (size_t i = 0; i < sec->opts.nshards; i++) { + malloc_mutex_postfork_child(tsdn, &sec->shards[i].mtx); + } +} |