From: Thomas Gleixner Date: Thu, 21 Jun 2018 17:29:19 +0200 Subject: [PATCH 075/353] mm/SLUB: delay giving back empty slubs to IRQ enabled regions Origin: https://git.kernel.org/cgit/linux/kernel/git/rt/linux-stable-rt.git/commit?id=0bed7d4f488bcf540f7876a275217b9c398254bb __free_slab() is invoked with disabled interrupts which increases the irq-off time while __free_pages() is doing the work. Allow __free_slab() to be invoked with enabled interrupts and move everything from interrupts-off invocations to a temporary per-CPU list so it can be processed later. Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior --- mm/slub.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 69 insertions(+), 5 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 97d2bd78534b..c62e0bf85d8e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1350,6 +1350,12 @@ static bool freelist_corrupted(struct kmem_cache *s, struct page *page, } #endif /* CONFIG_SLUB_DEBUG */ +struct slub_free_list { + raw_spinlock_t lock; + struct list_head list; +}; +static DEFINE_PER_CPU(struct slub_free_list, slub_free_list); + /* * Hooks for other subsystems that check memory allocations. In a typical * production configuration these hooks all should produce no code at all. @@ -1711,6 +1717,16 @@ static void __free_slab(struct kmem_cache *s, struct page *page) __free_pages(page, order); } +static void free_delayed(struct list_head *h) +{ + while (!list_empty(h)) { + struct page *page = list_first_entry(h, struct page, lru); + + list_del(&page->lru); + __free_slab(page->slab_cache, page); + } +} + static void rcu_free_slab(struct rcu_head *h) { struct page *page = container_of(h, struct page, rcu_head); @@ -1722,6 +1738,12 @@ static void free_slab(struct kmem_cache *s, struct page *page) { if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) { call_rcu(&page->rcu_head, rcu_free_slab); + } else if (irqs_disabled()) { + struct slub_free_list *f = this_cpu_ptr(&slub_free_list); + + raw_spin_lock(&f->lock); + list_add(&page->lru, &f->list); + raw_spin_unlock(&f->lock); } else __free_slab(s, page); } @@ -2257,14 +2279,21 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) pobjects = oldpage->pobjects; pages = oldpage->pages; if (drain && pobjects > s->cpu_partial) { + struct slub_free_list *f; unsigned long flags; + LIST_HEAD(tofree); /* * partial array is full. Move the existing * set to the per node partial list. */ local_irq_save(flags); unfreeze_partials(s, this_cpu_ptr(s->cpu_slab)); + f = this_cpu_ptr(&slub_free_list); + raw_spin_lock(&f->lock); + list_splice_init(&f->list, &tofree); + raw_spin_unlock(&f->lock); local_irq_restore(flags); + free_delayed(&tofree); oldpage = NULL; pobjects = 0; pages = 0; @@ -2332,7 +2361,22 @@ static bool has_cpu_slab(int cpu, void *info) static void flush_all(struct kmem_cache *s) { + LIST_HEAD(tofree); + int cpu; + on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC); + for_each_online_cpu(cpu) { + struct slub_free_list *f; + + if (!has_cpu_slab(cpu, s)) + continue; + + f = &per_cpu(slub_free_list, cpu); + raw_spin_lock_irq(&f->lock); + list_splice_init(&f->list, &tofree); + raw_spin_unlock_irq(&f->lock); + free_delayed(&tofree); + } } /* @@ -2530,8 +2574,10 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page) * already disabled (which is the case for bulk allocation). */ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, - unsigned long addr, struct kmem_cache_cpu *c) + unsigned long addr, struct kmem_cache_cpu *c, + struct list_head *to_free) { + struct slub_free_list *f; void *freelist; struct page *page; @@ -2598,6 +2644,13 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, VM_BUG_ON(!c->page->frozen); c->freelist = get_freepointer(s, freelist); c->tid = next_tid(c->tid); + +out: + f = this_cpu_ptr(&slub_free_list); + raw_spin_lock(&f->lock); + list_splice_init(&f->list, to_free); + raw_spin_unlock(&f->lock); + return freelist; new_slab: @@ -2613,7 +2666,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, if (unlikely(!freelist)) { slab_out_of_memory(s, gfpflags, node); - return NULL; + goto out; } page = c->page; @@ -2626,7 +2679,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, goto new_slab; /* Slab failed checks. Next slab needed */ deactivate_slab(s, page, get_freepointer(s, freelist), c); - return freelist; + goto out; } /* @@ -2638,6 +2691,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, { void *p; unsigned long flags; + LIST_HEAD(tofree); local_irq_save(flags); #ifdef CONFIG_PREEMPT @@ -2649,8 +2703,9 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, c = this_cpu_ptr(s->cpu_slab); #endif - p = ___slab_alloc(s, gfpflags, node, addr, c); + p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree); local_irq_restore(flags); + free_delayed(&tofree); return p; } @@ -3130,6 +3185,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p) { struct kmem_cache_cpu *c; + LIST_HEAD(to_free); int i; /* memcg and kmem_cache debug support */ @@ -3162,7 +3218,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, * of re-populating per CPU c->freelist */ p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, - _RET_IP_, c); + _RET_IP_, c, &to_free); if (unlikely(!p[i])) goto error; @@ -3174,6 +3230,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, } c->tid = next_tid(c->tid); local_irq_enable(); + free_delayed(&to_free); /* Clear memory outside IRQ disabled fastpath loop */ if (unlikely(flags & __GFP_ZERO)) { @@ -3188,6 +3245,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, return i; error: local_irq_enable(); + free_delayed(&to_free); slab_post_alloc_hook(s, flags, i, p); __kmem_cache_free_bulk(s, i, p); return 0; @@ -4237,6 +4295,12 @@ void __init kmem_cache_init(void) { static __initdata struct kmem_cache boot_kmem_cache, boot_kmem_cache_node; + int cpu; + + for_each_possible_cpu(cpu) { + raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock); + INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list); + } if (debug_guardpage_minorder()) slub_max_order = 0;