diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-14 13:40:54 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-14 13:40:54 +0000 |
commit | 317c0644ccf108aa23ef3fd8358bd66c2840bfc0 (patch) | |
tree | c417b3d25c86b775989cb5ac042f37611b626c8a /deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h | |
parent | Initial commit. (diff) | |
download | redis-317c0644ccf108aa23ef3fd8358bd66c2840bfc0.tar.xz redis-317c0644ccf108aa23ef3fd8358bd66c2840bfc0.zip |
Adding upstream version 5:7.2.4.upstream/5%7.2.4
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h')
-rw-r--r-- | deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h | 391 |
1 files changed, 391 insertions, 0 deletions
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h new file mode 100644 index 0000000..2cd7e7c --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h @@ -0,0 +1,391 @@ +#ifndef JEMALLOC_INTERNAL_INLINES_C_H +#define JEMALLOC_INTERNAL_INLINES_C_H + +#include "jemalloc/internal/hook.h" +#include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/log.h" +#include "jemalloc/internal/sz.h" +#include "jemalloc/internal/thread_event.h" +#include "jemalloc/internal/witness.h" + +/* + * Translating the names of the 'i' functions: + * Abbreviations used in the first part of the function name (before + * alloc/dalloc) describe what that function accomplishes: + * a: arena (query) + * s: size (query, or sized deallocation) + * e: extent (query) + * p: aligned (allocates) + * vs: size (query, without knowing that the pointer is into the heap) + * r: rallocx implementation + * x: xallocx implementation + * Abbreviations used in the second part of the function name (after + * alloc/dalloc) describe the arguments it takes + * z: whether to return zeroed memory + * t: accepts a tcache_t * parameter + * m: accepts an arena_t * parameter + */ + +JEMALLOC_ALWAYS_INLINE arena_t * +iaalloc(tsdn_t *tsdn, const void *ptr) { + assert(ptr != NULL); + + return arena_aalloc(tsdn, ptr); +} + +JEMALLOC_ALWAYS_INLINE size_t +isalloc(tsdn_t *tsdn, const void *ptr) { + assert(ptr != NULL); + + return arena_salloc(tsdn, ptr); +} + +JEMALLOC_ALWAYS_INLINE void * +iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache, + bool is_internal, arena_t *arena, bool slow_path) { + void *ret; + + assert(!is_internal || tcache == NULL); + assert(!is_internal || arena == NULL || arena_is_auto(arena)); + if (!tsdn_null(tsdn) && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) == 0) { + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + } + + ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path); + if (config_stats && is_internal && likely(ret != NULL)) { + arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret)); + } + return ret; +} + +JEMALLOC_ALWAYS_INLINE void * +ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) { + return iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd), false, + NULL, slow_path); +} + +JEMALLOC_ALWAYS_INLINE void * +ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, bool is_internal, arena_t *arena) { + void *ret; + + assert(usize != 0); + assert(usize == sz_sa2u(usize, alignment)); + assert(!is_internal || tcache == NULL); + assert(!is_internal || arena == NULL || arena_is_auto(arena)); + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + + ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache); + assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); + if (config_stats && is_internal && likely(ret != NULL)) { + arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret)); + } + return ret; +} + +JEMALLOC_ALWAYS_INLINE void * +ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, arena_t *arena) { + return ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena); +} + +JEMALLOC_ALWAYS_INLINE void * +ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) { + return ipallocztm(tsd_tsdn(tsd), usize, alignment, zero, + tcache_get(tsd), false, NULL); +} + +JEMALLOC_ALWAYS_INLINE size_t +ivsalloc(tsdn_t *tsdn, const void *ptr) { + return arena_vsalloc(tsdn, ptr); +} + +JEMALLOC_ALWAYS_INLINE void +idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, + emap_alloc_ctx_t *alloc_ctx, bool is_internal, bool slow_path) { + assert(ptr != NULL); + assert(!is_internal || tcache == NULL); + assert(!is_internal || arena_is_auto(iaalloc(tsdn, ptr))); + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + if (config_stats && is_internal) { + arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr)); + } + if (!is_internal && !tsdn_null(tsdn) && + tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) { + assert(tcache == NULL); + } + arena_dalloc(tsdn, ptr, tcache, alloc_ctx, slow_path); +} + +JEMALLOC_ALWAYS_INLINE void +idalloc(tsd_t *tsd, void *ptr) { + idalloctm(tsd_tsdn(tsd), ptr, tcache_get(tsd), NULL, false, true); +} + +JEMALLOC_ALWAYS_INLINE void +isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, + emap_alloc_ctx_t *alloc_ctx, bool slow_path) { + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + arena_sdalloc(tsdn, ptr, size, tcache, alloc_ctx, slow_path); +} + +JEMALLOC_ALWAYS_INLINE void * +iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, + size_t alignment, bool zero, tcache_t *tcache, arena_t *arena, + hook_ralloc_args_t *hook_args) { + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + void *p; + size_t usize, copysize; + + usize = sz_sa2u(size, alignment); + if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) { + return NULL; + } + p = ipalloct(tsdn, usize, alignment, zero, tcache, arena); + if (p == NULL) { + return NULL; + } + /* + * Copy at most size bytes (not size+extra), since the caller has no + * expectation that the extra bytes will be reliably preserved. + */ + copysize = (size < oldsize) ? size : oldsize; + memcpy(p, ptr, copysize); + hook_invoke_alloc(hook_args->is_realloc + ? hook_alloc_realloc : hook_alloc_rallocx, p, (uintptr_t)p, + hook_args->args); + hook_invoke_dalloc(hook_args->is_realloc + ? hook_dalloc_realloc : hook_dalloc_rallocx, ptr, hook_args->args); + isdalloct(tsdn, ptr, oldsize, tcache, NULL, true); + return p; +} + +/* + * is_realloc threads through the knowledge of whether or not this call comes + * from je_realloc (as opposed to je_rallocx); this ensures that we pass the + * correct entry point into any hooks. + * Note that these functions are all force-inlined, so no actual bool gets + * passed-around anywhere. + */ +JEMALLOC_ALWAYS_INLINE void * +iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment, + bool zero, tcache_t *tcache, arena_t *arena, hook_ralloc_args_t *hook_args) +{ + assert(ptr != NULL); + assert(size != 0); + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + + if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) + != 0) { + /* + * Existing object alignment is inadequate; allocate new space + * and copy. + */ + return iralloct_realign(tsdn, ptr, oldsize, size, alignment, + zero, tcache, arena, hook_args); + } + + return arena_ralloc(tsdn, arena, ptr, oldsize, size, alignment, zero, + tcache, hook_args); +} + +JEMALLOC_ALWAYS_INLINE void * +iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, + bool zero, hook_ralloc_args_t *hook_args) { + return iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, zero, + tcache_get(tsd), NULL, hook_args); +} + +JEMALLOC_ALWAYS_INLINE bool +ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero, size_t *newsize) { + assert(ptr != NULL); + assert(size != 0); + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + + if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) + != 0) { + /* Existing object alignment is inadequate. */ + *newsize = oldsize; + return true; + } + + return arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero, + newsize); +} + +JEMALLOC_ALWAYS_INLINE void +fastpath_success_finish(tsd_t *tsd, uint64_t allocated_after, + cache_bin_t *bin, void *ret) { + thread_allocated_set(tsd, allocated_after); + if (config_stats) { + bin->tstats.nrequests++; + } + + LOG("core.malloc.exit", "result: %p", ret); +} + +JEMALLOC_ALWAYS_INLINE bool +malloc_initialized(void) { + return (malloc_init_state == malloc_init_initialized); +} + +/* + * malloc() fastpath. Included here so that we can inline it into operator new; + * function call overhead there is non-negligible as a fraction of total CPU in + * allocation-heavy C++ programs. We take the fallback alloc to allow malloc + * (which can return NULL) to differ in its behavior from operator new (which + * can't). It matches the signature of malloc / operator new so that we can + * tail-call the fallback allocator, allowing us to avoid setting up the call + * frame in the common case. + * + * Fastpath assumes size <= SC_LOOKUP_MAXCLASS, and that we hit + * tcache. If either of these is false, we tail-call to the slowpath, + * malloc_default(). Tail-calling is used to avoid any caller-saved + * registers. + * + * fastpath supports ticker and profiling, both of which will also + * tail-call to the slowpath if they fire. + */ +JEMALLOC_ALWAYS_INLINE void * +imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) { + LOG("core.malloc.entry", "size: %zu", size); + if (tsd_get_allocates() && unlikely(!malloc_initialized())) { + return fallback_alloc(size); + } + + tsd_t *tsd = tsd_get(false); + if (unlikely((size > SC_LOOKUP_MAXCLASS) || tsd == NULL)) { + return fallback_alloc(size); + } + /* + * The code below till the branch checking the next_event threshold may + * execute before malloc_init(), in which case the threshold is 0 to + * trigger slow path and initialization. + * + * Note that when uninitialized, only the fast-path variants of the sz / + * tsd facilities may be called. + */ + szind_t ind; + /* + * The thread_allocated counter in tsd serves as a general purpose + * accumulator for bytes of allocation to trigger different types of + * events. usize is always needed to advance thread_allocated, though + * it's not always needed in the core allocation logic. + */ + size_t usize; + sz_size2index_usize_fastpath(size, &ind, &usize); + /* Fast path relies on size being a bin. */ + assert(ind < SC_NBINS); + assert((SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS) && + (size <= SC_SMALL_MAXCLASS)); + + uint64_t allocated, threshold; + te_malloc_fastpath_ctx(tsd, &allocated, &threshold); + uint64_t allocated_after = allocated + usize; + /* + * The ind and usize might be uninitialized (or partially) before + * malloc_init(). The assertions check for: 1) full correctness (usize + * & ind) when initialized; and 2) guaranteed slow-path (threshold == 0) + * when !initialized. + */ + if (!malloc_initialized()) { + assert(threshold == 0); + } else { + assert(ind == sz_size2index(size)); + assert(usize > 0 && usize == sz_index2size(ind)); + } + /* + * Check for events and tsd non-nominal (fast_threshold will be set to + * 0) in a single branch. + */ + if (unlikely(allocated_after >= threshold)) { + return fallback_alloc(size); + } + assert(tsd_fast(tsd)); + + tcache_t *tcache = tsd_tcachep_get(tsd); + assert(tcache == tcache_get(tsd)); + cache_bin_t *bin = &tcache->bins[ind]; + bool tcache_success; + void *ret; + + /* + * We split up the code this way so that redundant low-water + * computation doesn't happen on the (more common) case in which we + * don't touch the low water mark. The compiler won't do this + * duplication on its own. + */ + ret = cache_bin_alloc_easy(bin, &tcache_success); + if (tcache_success) { + fastpath_success_finish(tsd, allocated_after, bin, ret); + return ret; + } + ret = cache_bin_alloc(bin, &tcache_success); + if (tcache_success) { + fastpath_success_finish(tsd, allocated_after, bin, ret); + return ret; + } + + return fallback_alloc(size); +} + +JEMALLOC_ALWAYS_INLINE int +iget_defrag_hint(tsdn_t *tsdn, void* ptr) { + int defrag = 0; + emap_alloc_ctx_t alloc_ctx; + emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx); + if (likely(alloc_ctx.slab)) { + /* Small allocation. */ + edata_t *slab = emap_edata_lookup(tsdn, &arena_emap_global, ptr); + arena_t *arena = arena_get_from_edata(slab); + szind_t binind = edata_szind_get(slab); + unsigned binshard = edata_binshard_get(slab); + bin_t *bin = arena_get_bin(arena, binind, binshard); + malloc_mutex_lock(tsdn, &bin->lock); + arena_dalloc_bin_locked_info_t info; + arena_dalloc_bin_locked_begin(&info, binind); + /* Don't bother moving allocations from the slab currently used for new allocations */ + if (slab != bin->slabcur) { + int free_in_slab = edata_nfree_get(slab); + if (free_in_slab) { + const bin_info_t *bin_info = &bin_infos[binind]; + /* Find number of non-full slabs and the number of regs in them */ + unsigned long curslabs = 0; + size_t curregs = 0; + /* Run on all bin shards (usually just one) */ + for (uint32_t i=0; i< bin_info->n_shards; i++) { + bin_t *bb = arena_get_bin(arena, binind, i); + curslabs += bb->stats.nonfull_slabs; + /* Deduct the regs in full slabs (they're not part of the game) */ + unsigned long full_slabs = bb->stats.curslabs - bb->stats.nonfull_slabs; + curregs += bb->stats.curregs - full_slabs * bin_info->nregs; + if (bb->slabcur) { + /* Remove slabcur from the overall utilization (not a candidate to nove from) */ + curregs -= bin_info->nregs - edata_nfree_get(bb->slabcur); + curslabs -= 1; + } + } + /* Compare the utilization ratio of the slab in question to the total average + * among non-full slabs. To avoid precision loss in division, we do that by + * extrapolating the usage of the slab as if all slabs have the same usage. + * If this slab is less used than the average, we'll prefer to move the data + * to hopefully more used ones. To avoid stagnation when all slabs have the same + * utilization, we give additional 12.5% weight to the decision to defrag. */ + defrag = (bin_info->nregs - free_in_slab) * curslabs <= curregs + curregs / 8; + } + } + arena_dalloc_bin_locked_finish(tsdn, arena, bin, &info); + malloc_mutex_unlock(tsdn, &bin->lock); + } + return defrag; +} + +#endif /* JEMALLOC_INTERNAL_INLINES_C_H */ |