diff options
Diffstat (limited to 'mm/slab.h')
-rw-r--r-- | mm/slab.h | 638 |
1 files changed, 638 insertions, 0 deletions
diff --git a/mm/slab.h b/mm/slab.h new file mode 100644 index 000000000..6952e10cf --- /dev/null +++ b/mm/slab.h @@ -0,0 +1,638 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef MM_SLAB_H +#define MM_SLAB_H +/* + * Internal slab definitions + */ + +#ifdef CONFIG_SLOB +/* + * Common fields provided in kmem_cache by all slab allocators + * This struct is either used directly by the allocator (SLOB) + * or the allocator must include definitions for all fields + * provided in kmem_cache_common in their definition of kmem_cache. + * + * Once we can do anonymous structs (C11 standard) we could put a + * anonymous struct definition in these allocators so that the + * separate allocations in the kmem_cache structure of SLAB and + * SLUB is no longer needed. + */ +struct kmem_cache { + unsigned int object_size;/* The original size of the object */ + unsigned int size; /* The aligned/padded/added on size */ + unsigned int align; /* Alignment as calculated */ + slab_flags_t flags; /* Active flags on the slab */ + unsigned int useroffset;/* Usercopy region offset */ + unsigned int usersize; /* Usercopy region size */ + const char *name; /* Slab name for sysfs */ + int refcount; /* Use counter */ + void (*ctor)(void *); /* Called on object slot creation */ + struct list_head list; /* List of all slab caches on the system */ +}; + +#endif /* CONFIG_SLOB */ + +#ifdef CONFIG_SLAB +#include <linux/slab_def.h> +#endif + +#ifdef CONFIG_SLUB +#include <linux/slub_def.h> +#endif + +#include <linux/memcontrol.h> +#include <linux/fault-inject.h> +#include <linux/kasan.h> +#include <linux/kmemleak.h> +#include <linux/random.h> +#include <linux/sched/mm.h> + +/* + * State of the slab allocator. + * + * This is used to describe the states of the allocator during bootup. + * Allocators use this to gradually bootstrap themselves. Most allocators + * have the problem that the structures used for managing slab caches are + * allocated from slab caches themselves. + */ +enum slab_state { + DOWN, /* No slab functionality yet */ + PARTIAL, /* SLUB: kmem_cache_node available */ + PARTIAL_NODE, /* SLAB: kmalloc size for node struct available */ + UP, /* Slab caches usable but not all extras yet */ + FULL /* Everything is working */ +}; + +extern enum slab_state slab_state; + +/* The slab cache mutex protects the management structures during changes */ +extern struct mutex slab_mutex; + +/* The list of all slab caches on the system */ +extern struct list_head slab_caches; + +/* The slab cache that manages slab cache information */ +extern struct kmem_cache *kmem_cache; + +/* A table of kmalloc cache names and sizes */ +extern const struct kmalloc_info_struct { + const char *name[NR_KMALLOC_TYPES]; + unsigned int size; +} kmalloc_info[]; + +#ifndef CONFIG_SLOB +/* Kmalloc array related functions */ +void setup_kmalloc_cache_index_table(void); +void create_kmalloc_caches(slab_flags_t); + +/* Find the kmalloc slab corresponding for a certain size */ +struct kmem_cache *kmalloc_slab(size_t, gfp_t); +#endif + +gfp_t kmalloc_fix_flags(gfp_t flags); + +/* Functions provided by the slab allocators */ +int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags); + +struct kmem_cache *create_kmalloc_cache(const char *name, unsigned int size, + slab_flags_t flags, unsigned int useroffset, + unsigned int usersize); +extern void create_boot_cache(struct kmem_cache *, const char *name, + unsigned int size, slab_flags_t flags, + unsigned int useroffset, unsigned int usersize); + +int slab_unmergeable(struct kmem_cache *s); +struct kmem_cache *find_mergeable(unsigned size, unsigned align, + slab_flags_t flags, const char *name, void (*ctor)(void *)); +#ifndef CONFIG_SLOB +struct kmem_cache * +__kmem_cache_alias(const char *name, unsigned int size, unsigned int align, + slab_flags_t flags, void (*ctor)(void *)); + +slab_flags_t kmem_cache_flags(unsigned int object_size, + slab_flags_t flags, const char *name); +#else +static inline struct kmem_cache * +__kmem_cache_alias(const char *name, unsigned int size, unsigned int align, + slab_flags_t flags, void (*ctor)(void *)) +{ return NULL; } + +static inline slab_flags_t kmem_cache_flags(unsigned int object_size, + slab_flags_t flags, const char *name) +{ + return flags; +} +#endif + + +/* Legal flag mask for kmem_cache_create(), for various configurations */ +#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \ + SLAB_CACHE_DMA32 | SLAB_PANIC | \ + SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS ) + +#if defined(CONFIG_DEBUG_SLAB) +#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER) +#elif defined(CONFIG_SLUB_DEBUG) +#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ + SLAB_TRACE | SLAB_CONSISTENCY_CHECKS) +#else +#define SLAB_DEBUG_FLAGS (0) +#endif + +#if defined(CONFIG_SLAB) +#define SLAB_CACHE_FLAGS (SLAB_MEM_SPREAD | SLAB_NOLEAKTRACE | \ + SLAB_RECLAIM_ACCOUNT | SLAB_TEMPORARY | \ + SLAB_ACCOUNT) +#elif defined(CONFIG_SLUB) +#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \ + SLAB_TEMPORARY | SLAB_ACCOUNT) +#else +#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE) +#endif + +/* Common flags available with current configuration */ +#define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS) + +/* Common flags permitted for kmem_cache_create */ +#define SLAB_FLAGS_PERMITTED (SLAB_CORE_FLAGS | \ + SLAB_RED_ZONE | \ + SLAB_POISON | \ + SLAB_STORE_USER | \ + SLAB_TRACE | \ + SLAB_CONSISTENCY_CHECKS | \ + SLAB_MEM_SPREAD | \ + SLAB_NOLEAKTRACE | \ + SLAB_RECLAIM_ACCOUNT | \ + SLAB_TEMPORARY | \ + SLAB_ACCOUNT) + +bool __kmem_cache_empty(struct kmem_cache *); +int __kmem_cache_shutdown(struct kmem_cache *); +void __kmem_cache_release(struct kmem_cache *); +int __kmem_cache_shrink(struct kmem_cache *); +void slab_kmem_cache_release(struct kmem_cache *); + +struct seq_file; +struct file; + +struct slabinfo { + unsigned long active_objs; + unsigned long num_objs; + unsigned long active_slabs; + unsigned long num_slabs; + unsigned long shared_avail; + unsigned int limit; + unsigned int batchcount; + unsigned int shared; + unsigned int objects_per_slab; + unsigned int cache_order; +}; + +void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo); +void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s); +ssize_t slabinfo_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos); + +/* + * Generic implementation of bulk operations + * These are useful for situations in which the allocator cannot + * perform optimizations. In that case segments of the object listed + * may be allocated or freed using these operations. + */ +void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); +int __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); + +static inline int cache_vmstat_idx(struct kmem_cache *s) +{ + return (s->flags & SLAB_RECLAIM_ACCOUNT) ? + NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B; +} + +#ifdef CONFIG_SLUB_DEBUG +#ifdef CONFIG_SLUB_DEBUG_ON +DECLARE_STATIC_KEY_TRUE(slub_debug_enabled); +#else +DECLARE_STATIC_KEY_FALSE(slub_debug_enabled); +#endif +extern void print_tracking(struct kmem_cache *s, void *object); +#else +static inline void print_tracking(struct kmem_cache *s, void *object) +{ +} +#endif + +/* + * Returns true if any of the specified slub_debug flags is enabled for the + * cache. Use only for flags parsed by setup_slub_debug() as it also enables + * the static key. + */ +static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t flags) +{ +#ifdef CONFIG_SLUB_DEBUG + VM_WARN_ON_ONCE(!(flags & SLAB_DEBUG_FLAGS)); + if (static_branch_unlikely(&slub_debug_enabled)) + return s->flags & flags; +#endif + return false; +} + +#ifdef CONFIG_MEMCG_KMEM +static inline struct obj_cgroup **page_obj_cgroups(struct page *page) +{ + /* + * page->mem_cgroup and page->obj_cgroups are sharing the same + * space. To distinguish between them in case we don't know for sure + * that the page is a slab page (e.g. page_cgroup_ino()), let's + * always set the lowest bit of obj_cgroups. + */ + return (struct obj_cgroup **) + ((unsigned long)page->obj_cgroups & ~0x1UL); +} + +static inline bool page_has_obj_cgroups(struct page *page) +{ + return ((unsigned long)page->obj_cgroups & 0x1UL); +} + +int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s, + gfp_t gfp); + +static inline void memcg_free_page_obj_cgroups(struct page *page) +{ + kfree(page_obj_cgroups(page)); + page->obj_cgroups = NULL; +} + +static inline size_t obj_full_size(struct kmem_cache *s) +{ + /* + * For each accounted object there is an extra space which is used + * to store obj_cgroup membership. Charge it too. + */ + return s->size + sizeof(struct obj_cgroup *); +} + +/* + * Returns false if the allocation should fail. + */ +static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, + struct obj_cgroup **objcgp, + size_t objects, gfp_t flags) +{ + struct obj_cgroup *objcg; + + if (!memcg_kmem_enabled()) + return true; + + if (!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT)) + return true; + + objcg = get_obj_cgroup_from_current(); + if (!objcg) + return true; + + if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s))) { + obj_cgroup_put(objcg); + return false; + } + + *objcgp = objcg; + return true; +} + +static inline void mod_objcg_state(struct obj_cgroup *objcg, + struct pglist_data *pgdat, + int idx, int nr) +{ + struct mem_cgroup *memcg; + struct lruvec *lruvec; + + rcu_read_lock(); + memcg = obj_cgroup_memcg(objcg); + lruvec = mem_cgroup_lruvec(memcg, pgdat); + mod_memcg_lruvec_state(lruvec, idx, nr); + rcu_read_unlock(); +} + +static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, + struct obj_cgroup *objcg, + gfp_t flags, size_t size, + void **p) +{ + struct page *page; + unsigned long off; + size_t i; + + if (!memcg_kmem_enabled() || !objcg) + return; + + for (i = 0; i < size; i++) { + if (likely(p[i])) { + page = virt_to_head_page(p[i]); + + if (!page_has_obj_cgroups(page) && + memcg_alloc_page_obj_cgroups(page, s, flags)) { + obj_cgroup_uncharge(objcg, obj_full_size(s)); + continue; + } + + off = obj_to_index(s, page, p[i]); + obj_cgroup_get(objcg); + page_obj_cgroups(page)[off] = objcg; + mod_objcg_state(objcg, page_pgdat(page), + cache_vmstat_idx(s), obj_full_size(s)); + } else { + obj_cgroup_uncharge(objcg, obj_full_size(s)); + } + } + obj_cgroup_put(objcg); +} + +static inline void memcg_slab_free_hook(struct kmem_cache *s_orig, + void **p, int objects) +{ + struct kmem_cache *s; + struct obj_cgroup *objcg; + struct page *page; + unsigned int off; + int i; + + if (!memcg_kmem_enabled()) + return; + + for (i = 0; i < objects; i++) { + if (unlikely(!p[i])) + continue; + + page = virt_to_head_page(p[i]); + if (!page_has_obj_cgroups(page)) + continue; + + if (!s_orig) + s = page->slab_cache; + else + s = s_orig; + + off = obj_to_index(s, page, p[i]); + objcg = page_obj_cgroups(page)[off]; + if (!objcg) + continue; + + page_obj_cgroups(page)[off] = NULL; + obj_cgroup_uncharge(objcg, obj_full_size(s)); + mod_objcg_state(objcg, page_pgdat(page), cache_vmstat_idx(s), + -obj_full_size(s)); + obj_cgroup_put(objcg); + } +} + +#else /* CONFIG_MEMCG_KMEM */ +static inline bool page_has_obj_cgroups(struct page *page) +{ + return false; +} + +static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr) +{ + return NULL; +} + +static inline int memcg_alloc_page_obj_cgroups(struct page *page, + struct kmem_cache *s, gfp_t gfp) +{ + return 0; +} + +static inline void memcg_free_page_obj_cgroups(struct page *page) +{ +} + +static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, + struct obj_cgroup **objcgp, + size_t objects, gfp_t flags) +{ + return true; +} + +static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, + struct obj_cgroup *objcg, + gfp_t flags, size_t size, + void **p) +{ +} + +static inline void memcg_slab_free_hook(struct kmem_cache *s, + void **p, int objects) +{ +} +#endif /* CONFIG_MEMCG_KMEM */ + +static inline struct kmem_cache *virt_to_cache(const void *obj) +{ + struct page *page; + + page = virt_to_head_page(obj); + if (WARN_ONCE(!PageSlab(page), "%s: Object is not a Slab page!\n", + __func__)) + return NULL; + return page->slab_cache; +} + +static __always_inline void account_slab_page(struct page *page, int order, + struct kmem_cache *s) +{ + mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s), + PAGE_SIZE << order); +} + +static __always_inline void unaccount_slab_page(struct page *page, int order, + struct kmem_cache *s) +{ + if (memcg_kmem_enabled()) + memcg_free_page_obj_cgroups(page); + + mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s), + -(PAGE_SIZE << order)); +} + +static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x) +{ + struct kmem_cache *cachep; + + if (!IS_ENABLED(CONFIG_SLAB_FREELIST_HARDENED) && + !kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) + return s; + + cachep = virt_to_cache(x); + if (WARN(cachep && cachep != s, + "%s: Wrong slab cache. %s but object is from %s\n", + __func__, s->name, cachep->name)) + print_tracking(cachep, x); + return cachep; +} + +static inline size_t slab_ksize(const struct kmem_cache *s) +{ +#ifndef CONFIG_SLUB + return s->object_size; + +#else /* CONFIG_SLUB */ +# ifdef CONFIG_SLUB_DEBUG + /* + * Debugging requires use of the padding between object + * and whatever may come after it. + */ + if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) + return s->object_size; +# endif + if (s->flags & SLAB_KASAN) + return s->object_size; + /* + * If we have the need to store the freelist pointer + * back there or track user information then we can + * only use the space before that information. + */ + if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER)) + return s->inuse; + /* + * Else we can use all the padding etc for the allocation + */ + return s->size; +#endif +} + +static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, + struct obj_cgroup **objcgp, + size_t size, gfp_t flags) +{ + flags &= gfp_allowed_mask; + + fs_reclaim_acquire(flags); + fs_reclaim_release(flags); + + might_sleep_if(gfpflags_allow_blocking(flags)); + + if (should_failslab(s, flags)) + return NULL; + + if (!memcg_slab_pre_alloc_hook(s, objcgp, size, flags)) + return NULL; + + return s; +} + +static inline void slab_post_alloc_hook(struct kmem_cache *s, + struct obj_cgroup *objcg, + gfp_t flags, size_t size, void **p) +{ + size_t i; + + flags &= gfp_allowed_mask; + for (i = 0; i < size; i++) { + p[i] = kasan_slab_alloc(s, p[i], flags); + /* As p[i] might get tagged, call kmemleak hook after KASAN. */ + kmemleak_alloc_recursive(p[i], s->object_size, 1, + s->flags, flags); + } + + memcg_slab_post_alloc_hook(s, objcg, flags, size, p); +} + +#ifndef CONFIG_SLOB +/* + * The slab lists for all objects. + */ +struct kmem_cache_node { + spinlock_t list_lock; + +#ifdef CONFIG_SLAB + struct list_head slabs_partial; /* partial list first, better asm code */ + struct list_head slabs_full; + struct list_head slabs_free; + unsigned long total_slabs; /* length of all slab lists */ + unsigned long free_slabs; /* length of free slab list only */ + unsigned long free_objects; + unsigned int free_limit; + unsigned int colour_next; /* Per-node cache coloring */ + struct array_cache *shared; /* shared per node */ + struct alien_cache **alien; /* on other nodes */ + unsigned long next_reap; /* updated without locking */ + int free_touched; /* updated without locking */ +#endif + +#ifdef CONFIG_SLUB + unsigned long nr_partial; + struct list_head partial; +#ifdef CONFIG_SLUB_DEBUG + atomic_long_t nr_slabs; + atomic_long_t total_objects; + struct list_head full; +#endif +#endif + +}; + +static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) +{ + return s->node[node]; +} + +/* + * Iterator over all nodes. The body will be executed for each node that has + * a kmem_cache_node structure allocated (which is true for all online nodes) + */ +#define for_each_kmem_cache_node(__s, __node, __n) \ + for (__node = 0; __node < nr_node_ids; __node++) \ + if ((__n = get_node(__s, __node))) + +#endif + +void *slab_start(struct seq_file *m, loff_t *pos); +void *slab_next(struct seq_file *m, void *p, loff_t *pos); +void slab_stop(struct seq_file *m, void *p); +int memcg_slab_show(struct seq_file *m, void *p); + +#if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG) +void dump_unreclaimable_slab(void); +#else +static inline void dump_unreclaimable_slab(void) +{ +} +#endif + +void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr); + +#ifdef CONFIG_SLAB_FREELIST_RANDOM +int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count, + gfp_t gfp); +void cache_random_seq_destroy(struct kmem_cache *cachep); +#else +static inline int cache_random_seq_create(struct kmem_cache *cachep, + unsigned int count, gfp_t gfp) +{ + return 0; +} +static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { } +#endif /* CONFIG_SLAB_FREELIST_RANDOM */ + +static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c) +{ + if (static_branch_unlikely(&init_on_alloc)) { + if (c->ctor) + return false; + if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) + return flags & __GFP_ZERO; + return true; + } + return flags & __GFP_ZERO; +} + +static inline bool slab_want_init_on_free(struct kmem_cache *c) +{ + if (static_branch_unlikely(&init_on_free)) + return !(c->ctor || + (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON))); + return false; +} + +#endif /* MM_SLAB_H */ |