summaryrefslogtreecommitdiffstats
path: root/mm/shrinker.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--mm/shrinker.c809
1 files changed, 809 insertions, 0 deletions
diff --git a/mm/shrinker.c b/mm/shrinker.c
new file mode 100644
index 0000000000..dc5d2a6fcf
--- /dev/null
+++ b/mm/shrinker.c
@@ -0,0 +1,809 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/memcontrol.h>
+#include <linux/rwsem.h>
+#include <linux/shrinker.h>
+#include <linux/rculist.h>
+#include <trace/events/vmscan.h>
+
+#include "internal.h"
+
+LIST_HEAD(shrinker_list);
+DEFINE_MUTEX(shrinker_mutex);
+
+#ifdef CONFIG_MEMCG
+static int shrinker_nr_max;
+
+static inline int shrinker_unit_size(int nr_items)
+{
+ return (DIV_ROUND_UP(nr_items, SHRINKER_UNIT_BITS) * sizeof(struct shrinker_info_unit *));
+}
+
+static inline void shrinker_unit_free(struct shrinker_info *info, int start)
+{
+ struct shrinker_info_unit **unit;
+ int nr, i;
+
+ if (!info)
+ return;
+
+ unit = info->unit;
+ nr = DIV_ROUND_UP(info->map_nr_max, SHRINKER_UNIT_BITS);
+
+ for (i = start; i < nr; i++) {
+ if (!unit[i])
+ break;
+
+ kfree(unit[i]);
+ unit[i] = NULL;
+ }
+}
+
+static inline int shrinker_unit_alloc(struct shrinker_info *new,
+ struct shrinker_info *old, int nid)
+{
+ struct shrinker_info_unit *unit;
+ int nr = DIV_ROUND_UP(new->map_nr_max, SHRINKER_UNIT_BITS);
+ int start = old ? DIV_ROUND_UP(old->map_nr_max, SHRINKER_UNIT_BITS) : 0;
+ int i;
+
+ for (i = start; i < nr; i++) {
+ unit = kzalloc_node(sizeof(*unit), GFP_KERNEL, nid);
+ if (!unit) {
+ shrinker_unit_free(new, start);
+ return -ENOMEM;
+ }
+
+ new->unit[i] = unit;
+ }
+
+ return 0;
+}
+
+void free_shrinker_info(struct mem_cgroup *memcg)
+{
+ struct mem_cgroup_per_node *pn;
+ struct shrinker_info *info;
+ int nid;
+
+ for_each_node(nid) {
+ pn = memcg->nodeinfo[nid];
+ info = rcu_dereference_protected(pn->shrinker_info, true);
+ shrinker_unit_free(info, 0);
+ kvfree(info);
+ rcu_assign_pointer(pn->shrinker_info, NULL);
+ }
+}
+
+int alloc_shrinker_info(struct mem_cgroup *memcg)
+{
+ struct shrinker_info *info;
+ int nid, ret = 0;
+ int array_size = 0;
+
+ mutex_lock(&shrinker_mutex);
+ array_size = shrinker_unit_size(shrinker_nr_max);
+ for_each_node(nid) {
+ info = kvzalloc_node(sizeof(*info) + array_size, GFP_KERNEL, nid);
+ if (!info)
+ goto err;
+ info->map_nr_max = shrinker_nr_max;
+ if (shrinker_unit_alloc(info, NULL, nid))
+ goto err;
+ rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info);
+ }
+ mutex_unlock(&shrinker_mutex);
+
+ return ret;
+
+err:
+ mutex_unlock(&shrinker_mutex);
+ free_shrinker_info(memcg);
+ return -ENOMEM;
+}
+
+static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg,
+ int nid)
+{
+ return rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_info,
+ lockdep_is_held(&shrinker_mutex));
+}
+
+static int expand_one_shrinker_info(struct mem_cgroup *memcg, int new_size,
+ int old_size, int new_nr_max)
+{
+ struct shrinker_info *new, *old;
+ struct mem_cgroup_per_node *pn;
+ int nid;
+
+ for_each_node(nid) {
+ pn = memcg->nodeinfo[nid];
+ old = shrinker_info_protected(memcg, nid);
+ /* Not yet online memcg */
+ if (!old)
+ return 0;
+
+ /* Already expanded this shrinker_info */
+ if (new_nr_max <= old->map_nr_max)
+ continue;
+
+ new = kvzalloc_node(sizeof(*new) + new_size, GFP_KERNEL, nid);
+ if (!new)
+ return -ENOMEM;
+
+ new->map_nr_max = new_nr_max;
+
+ memcpy(new->unit, old->unit, old_size);
+ if (shrinker_unit_alloc(new, old, nid)) {
+ kvfree(new);
+ return -ENOMEM;
+ }
+
+ rcu_assign_pointer(pn->shrinker_info, new);
+ kvfree_rcu(old, rcu);
+ }
+
+ return 0;
+}
+
+static int expand_shrinker_info(int new_id)
+{
+ int ret = 0;
+ int new_nr_max = round_up(new_id + 1, SHRINKER_UNIT_BITS);
+ int new_size, old_size = 0;
+ struct mem_cgroup *memcg;
+
+ if (!root_mem_cgroup)
+ goto out;
+
+ lockdep_assert_held(&shrinker_mutex);
+
+ new_size = shrinker_unit_size(new_nr_max);
+ old_size = shrinker_unit_size(shrinker_nr_max);
+
+ memcg = mem_cgroup_iter(NULL, NULL, NULL);
+ do {
+ ret = expand_one_shrinker_info(memcg, new_size, old_size,
+ new_nr_max);
+ if (ret) {
+ mem_cgroup_iter_break(NULL, memcg);
+ goto out;
+ }
+ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
+out:
+ if (!ret)
+ shrinker_nr_max = new_nr_max;
+
+ return ret;
+}
+
+static inline int shrinker_id_to_index(int shrinker_id)
+{
+ return shrinker_id / SHRINKER_UNIT_BITS;
+}
+
+static inline int shrinker_id_to_offset(int shrinker_id)
+{
+ return shrinker_id % SHRINKER_UNIT_BITS;
+}
+
+static inline int calc_shrinker_id(int index, int offset)
+{
+ return index * SHRINKER_UNIT_BITS + offset;
+}
+
+void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
+{
+ if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) {
+ struct shrinker_info *info;
+ struct shrinker_info_unit *unit;
+
+ rcu_read_lock();
+ info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
+ unit = info->unit[shrinker_id_to_index(shrinker_id)];
+ if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) {
+ /* Pairs with smp mb in shrink_slab() */
+ smp_mb__before_atomic();
+ set_bit(shrinker_id_to_offset(shrinker_id), unit->map);
+ }
+ rcu_read_unlock();
+ }
+}
+
+static DEFINE_IDR(shrinker_idr);
+
+static int shrinker_memcg_alloc(struct shrinker *shrinker)
+{
+ int id, ret = -ENOMEM;
+
+ if (mem_cgroup_disabled())
+ return -ENOSYS;
+
+ mutex_lock(&shrinker_mutex);
+ id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL);
+ if (id < 0)
+ goto unlock;
+
+ if (id >= shrinker_nr_max) {
+ if (expand_shrinker_info(id)) {
+ idr_remove(&shrinker_idr, id);
+ goto unlock;
+ }
+ }
+ shrinker->id = id;
+ ret = 0;
+unlock:
+ mutex_unlock(&shrinker_mutex);
+ return ret;
+}
+
+static void shrinker_memcg_remove(struct shrinker *shrinker)
+{
+ int id = shrinker->id;
+
+ BUG_ON(id < 0);
+
+ lockdep_assert_held(&shrinker_mutex);
+
+ idr_remove(&shrinker_idr, id);
+}
+
+static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker,
+ struct mem_cgroup *memcg)
+{
+ struct shrinker_info *info;
+ struct shrinker_info_unit *unit;
+ long nr_deferred;
+
+ rcu_read_lock();
+ info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
+ unit = info->unit[shrinker_id_to_index(shrinker->id)];
+ nr_deferred = atomic_long_xchg(&unit->nr_deferred[shrinker_id_to_offset(shrinker->id)], 0);
+ rcu_read_unlock();
+
+ return nr_deferred;
+}
+
+static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
+ struct mem_cgroup *memcg)
+{
+ struct shrinker_info *info;
+ struct shrinker_info_unit *unit;
+ long nr_deferred;
+
+ rcu_read_lock();
+ info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
+ unit = info->unit[shrinker_id_to_index(shrinker->id)];
+ nr_deferred =
+ atomic_long_add_return(nr, &unit->nr_deferred[shrinker_id_to_offset(shrinker->id)]);
+ rcu_read_unlock();
+
+ return nr_deferred;
+}
+
+void reparent_shrinker_deferred(struct mem_cgroup *memcg)
+{
+ int nid, index, offset;
+ long nr;
+ struct mem_cgroup *parent;
+ struct shrinker_info *child_info, *parent_info;
+ struct shrinker_info_unit *child_unit, *parent_unit;
+
+ parent = parent_mem_cgroup(memcg);
+ if (!parent)
+ parent = root_mem_cgroup;
+
+ /* Prevent from concurrent shrinker_info expand */
+ mutex_lock(&shrinker_mutex);
+ for_each_node(nid) {
+ child_info = shrinker_info_protected(memcg, nid);
+ parent_info = shrinker_info_protected(parent, nid);
+ for (index = 0; index < shrinker_id_to_index(child_info->map_nr_max); index++) {
+ child_unit = child_info->unit[index];
+ parent_unit = parent_info->unit[index];
+ for (offset = 0; offset < SHRINKER_UNIT_BITS; offset++) {
+ nr = atomic_long_read(&child_unit->nr_deferred[offset]);
+ atomic_long_add(nr, &parent_unit->nr_deferred[offset]);
+ }
+ }
+ }
+ mutex_unlock(&shrinker_mutex);
+}
+#else
+static int shrinker_memcg_alloc(struct shrinker *shrinker)
+{
+ return -ENOSYS;
+}
+
+static void shrinker_memcg_remove(struct shrinker *shrinker)
+{
+}
+
+static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker,
+ struct mem_cgroup *memcg)
+{
+ return 0;
+}
+
+static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
+ struct mem_cgroup *memcg)
+{
+ return 0;
+}
+#endif /* CONFIG_MEMCG */
+
+static long xchg_nr_deferred(struct shrinker *shrinker,
+ struct shrink_control *sc)
+{
+ int nid = sc->nid;
+
+ if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
+ nid = 0;
+
+ if (sc->memcg &&
+ (shrinker->flags & SHRINKER_MEMCG_AWARE))
+ return xchg_nr_deferred_memcg(nid, shrinker,
+ sc->memcg);
+
+ return atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
+}
+
+
+static long add_nr_deferred(long nr, struct shrinker *shrinker,
+ struct shrink_control *sc)
+{
+ int nid = sc->nid;
+
+ if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
+ nid = 0;
+
+ if (sc->memcg &&
+ (shrinker->flags & SHRINKER_MEMCG_AWARE))
+ return add_nr_deferred_memcg(nr, nid, shrinker,
+ sc->memcg);
+
+ return atomic_long_add_return(nr, &shrinker->nr_deferred[nid]);
+}
+
+#define SHRINK_BATCH 128
+
+static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
+ struct shrinker *shrinker, int priority)
+{
+ unsigned long freed = 0;
+ unsigned long long delta;
+ long total_scan;
+ long freeable;
+ long nr;
+ long new_nr;
+ long batch_size = shrinker->batch ? shrinker->batch
+ : SHRINK_BATCH;
+ long scanned = 0, next_deferred;
+
+ freeable = shrinker->count_objects(shrinker, shrinkctl);
+ if (freeable == 0 || freeable == SHRINK_EMPTY)
+ return freeable;
+
+ /*
+ * copy the current shrinker scan count into a local variable
+ * and zero it so that other concurrent shrinker invocations
+ * don't also do this scanning work.
+ */
+ nr = xchg_nr_deferred(shrinker, shrinkctl);
+
+ if (shrinker->seeks) {
+ delta = freeable >> priority;
+ delta *= 4;
+ do_div(delta, shrinker->seeks);
+ } else {
+ /*
+ * These objects don't require any IO to create. Trim
+ * them aggressively under memory pressure to keep
+ * them from causing refetches in the IO caches.
+ */
+ delta = freeable / 2;
+ }
+
+ total_scan = nr >> priority;
+ total_scan += delta;
+ total_scan = min(total_scan, (2 * freeable));
+
+ trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
+ freeable, delta, total_scan, priority);
+
+ /*
+ * Normally, we should not scan less than batch_size objects in one
+ * pass to avoid too frequent shrinker calls, but if the slab has less
+ * than batch_size objects in total and we are really tight on memory,
+ * we will try to reclaim all available objects, otherwise we can end
+ * up failing allocations although there are plenty of reclaimable
+ * objects spread over several slabs with usage less than the
+ * batch_size.
+ *
+ * We detect the "tight on memory" situations by looking at the total
+ * number of objects we want to scan (total_scan). If it is greater
+ * than the total number of objects on slab (freeable), we must be
+ * scanning at high prio and therefore should try to reclaim as much as
+ * possible.
+ */
+ while (total_scan >= batch_size ||
+ total_scan >= freeable) {
+ unsigned long ret;
+ unsigned long nr_to_scan = min(batch_size, total_scan);
+
+ shrinkctl->nr_to_scan = nr_to_scan;
+ shrinkctl->nr_scanned = nr_to_scan;
+ ret = shrinker->scan_objects(shrinker, shrinkctl);
+ if (ret == SHRINK_STOP)
+ break;
+ freed += ret;
+
+ count_vm_events(SLABS_SCANNED, shrinkctl->nr_scanned);
+ total_scan -= shrinkctl->nr_scanned;
+ scanned += shrinkctl->nr_scanned;
+
+ cond_resched();
+ }
+
+ /*
+ * The deferred work is increased by any new work (delta) that wasn't
+ * done, decreased by old deferred work that was done now.
+ *
+ * And it is capped to two times of the freeable items.
+ */
+ next_deferred = max_t(long, (nr + delta - scanned), 0);
+ next_deferred = min(next_deferred, (2 * freeable));
+
+ /*
+ * move the unused scan count back into the shrinker in a
+ * manner that handles concurrent updates.
+ */
+ new_nr = add_nr_deferred(next_deferred, shrinker, shrinkctl);
+
+ trace_mm_shrink_slab_end(shrinker, shrinkctl->nid, freed, nr, new_nr, total_scan);
+ return freed;
+}
+
+#ifdef CONFIG_MEMCG
+static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
+ struct mem_cgroup *memcg, int priority)
+{
+ struct shrinker_info *info;
+ unsigned long ret, freed = 0;
+ int offset, index = 0;
+
+ if (!mem_cgroup_online(memcg))
+ return 0;
+
+ /*
+ * lockless algorithm of memcg shrink.
+ *
+ * The shrinker_info may be freed asynchronously via RCU in the
+ * expand_one_shrinker_info(), so the rcu_read_lock() needs to be used
+ * to ensure the existence of the shrinker_info.
+ *
+ * The shrinker_info_unit is never freed unless its corresponding memcg
+ * is destroyed. Here we already hold the refcount of memcg, so the
+ * memcg will not be destroyed, and of course shrinker_info_unit will
+ * not be freed.
+ *
+ * So in the memcg shrink:
+ * step 1: use rcu_read_lock() to guarantee existence of the
+ * shrinker_info.
+ * step 2: after getting shrinker_info_unit we can safely release the
+ * RCU lock.
+ * step 3: traverse the bitmap and calculate shrinker_id
+ * step 4: use rcu_read_lock() to guarantee existence of the shrinker.
+ * step 5: use shrinker_id to find the shrinker, then use
+ * shrinker_try_get() to guarantee existence of the shrinker,
+ * then we can release the RCU lock to do do_shrink_slab() that
+ * may sleep.
+ * step 6: do shrinker_put() paired with step 5 to put the refcount,
+ * if the refcount reaches 0, then wake up the waiter in
+ * shrinker_free() by calling complete().
+ * Note: here is different from the global shrink, we don't
+ * need to acquire the RCU lock to guarantee existence of
+ * the shrinker, because we don't need to use this
+ * shrinker to traverse the next shrinker in the bitmap.
+ * step 7: we have already exited the read-side of rcu critical section
+ * before calling do_shrink_slab(), the shrinker_info may be
+ * released in expand_one_shrinker_info(), so go back to step 1
+ * to reacquire the shrinker_info.
+ */
+again:
+ rcu_read_lock();
+ info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
+ if (unlikely(!info))
+ goto unlock;
+
+ if (index < shrinker_id_to_index(info->map_nr_max)) {
+ struct shrinker_info_unit *unit;
+
+ unit = info->unit[index];
+
+ rcu_read_unlock();
+
+ for_each_set_bit(offset, unit->map, SHRINKER_UNIT_BITS) {
+ struct shrink_control sc = {
+ .gfp_mask = gfp_mask,
+ .nid = nid,
+ .memcg = memcg,
+ };
+ struct shrinker *shrinker;
+ int shrinker_id = calc_shrinker_id(index, offset);
+
+ rcu_read_lock();
+ shrinker = idr_find(&shrinker_idr, shrinker_id);
+ if (unlikely(!shrinker || !shrinker_try_get(shrinker))) {
+ clear_bit(offset, unit->map);
+ rcu_read_unlock();
+ continue;
+ }
+ rcu_read_unlock();
+
+ /* Call non-slab shrinkers even though kmem is disabled */
+ if (!memcg_kmem_online() &&
+ !(shrinker->flags & SHRINKER_NONSLAB))
+ continue;
+
+ ret = do_shrink_slab(&sc, shrinker, priority);
+ if (ret == SHRINK_EMPTY) {
+ clear_bit(offset, unit->map);
+ /*
+ * After the shrinker reported that it had no objects to
+ * free, but before we cleared the corresponding bit in
+ * the memcg shrinker map, a new object might have been
+ * added. To make sure, we have the bit set in this
+ * case, we invoke the shrinker one more time and reset
+ * the bit if it reports that it is not empty anymore.
+ * The memory barrier here pairs with the barrier in
+ * set_shrinker_bit():
+ *
+ * list_lru_add() shrink_slab_memcg()
+ * list_add_tail() clear_bit()
+ * <MB> <MB>
+ * set_bit() do_shrink_slab()
+ */
+ smp_mb__after_atomic();
+ ret = do_shrink_slab(&sc, shrinker, priority);
+ if (ret == SHRINK_EMPTY)
+ ret = 0;
+ else
+ set_shrinker_bit(memcg, nid, shrinker_id);
+ }
+ freed += ret;
+ shrinker_put(shrinker);
+ }
+
+ index++;
+ goto again;
+ }
+unlock:
+ rcu_read_unlock();
+ return freed;
+}
+#else /* !CONFIG_MEMCG */
+static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
+ struct mem_cgroup *memcg, int priority)
+{
+ return 0;
+}
+#endif /* CONFIG_MEMCG */
+
+/**
+ * shrink_slab - shrink slab caches
+ * @gfp_mask: allocation context
+ * @nid: node whose slab caches to target
+ * @memcg: memory cgroup whose slab caches to target
+ * @priority: the reclaim priority
+ *
+ * Call the shrink functions to age shrinkable caches.
+ *
+ * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set,
+ * unaware shrinkers will receive a node id of 0 instead.
+ *
+ * @memcg specifies the memory cgroup to target. Unaware shrinkers
+ * are called only if it is the root cgroup.
+ *
+ * @priority is sc->priority, we take the number of objects and >> by priority
+ * in order to get the scan target.
+ *
+ * Returns the number of reclaimed slab objects.
+ */
+unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
+ int priority)
+{
+ unsigned long ret, freed = 0;
+ struct shrinker *shrinker;
+
+ /*
+ * The root memcg might be allocated even though memcg is disabled
+ * via "cgroup_disable=memory" boot parameter. This could make
+ * mem_cgroup_is_root() return false, then just run memcg slab
+ * shrink, but skip global shrink. This may result in premature
+ * oom.
+ */
+ if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg))
+ return shrink_slab_memcg(gfp_mask, nid, memcg, priority);
+
+ /*
+ * lockless algorithm of global shrink.
+ *
+ * In the unregistration setp, the shrinker will be freed asynchronously
+ * via RCU after its refcount reaches 0. So both rcu_read_lock() and
+ * shrinker_try_get() can be used to ensure the existence of the shrinker.
+ *
+ * So in the global shrink:
+ * step 1: use rcu_read_lock() to guarantee existence of the shrinker
+ * and the validity of the shrinker_list walk.
+ * step 2: use shrinker_try_get() to try get the refcount, if successful,
+ * then the existence of the shrinker can also be guaranteed,
+ * so we can release the RCU lock to do do_shrink_slab() that
+ * may sleep.
+ * step 3: *MUST* to reacquire the RCU lock before calling shrinker_put(),
+ * which ensures that neither this shrinker nor the next shrinker
+ * will be freed in the next traversal operation.
+ * step 4: do shrinker_put() paired with step 2 to put the refcount,
+ * if the refcount reaches 0, then wake up the waiter in
+ * shrinker_free() by calling complete().
+ */
+ rcu_read_lock();
+ list_for_each_entry_rcu(shrinker, &shrinker_list, list) {
+ struct shrink_control sc = {
+ .gfp_mask = gfp_mask,
+ .nid = nid,
+ .memcg = memcg,
+ };
+
+ if (!shrinker_try_get(shrinker))
+ continue;
+
+ rcu_read_unlock();
+
+ ret = do_shrink_slab(&sc, shrinker, priority);
+ if (ret == SHRINK_EMPTY)
+ ret = 0;
+ freed += ret;
+
+ rcu_read_lock();
+ shrinker_put(shrinker);
+ }
+
+ rcu_read_unlock();
+ cond_resched();
+ return freed;
+}
+
+struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...)
+{
+ struct shrinker *shrinker;
+ unsigned int size;
+ va_list ap;
+ int err;
+
+ shrinker = kzalloc(sizeof(struct shrinker), GFP_KERNEL);
+ if (!shrinker)
+ return NULL;
+
+ va_start(ap, fmt);
+ err = shrinker_debugfs_name_alloc(shrinker, fmt, ap);
+ va_end(ap);
+ if (err)
+ goto err_name;
+
+ shrinker->flags = flags | SHRINKER_ALLOCATED;
+ shrinker->seeks = DEFAULT_SEEKS;
+
+ if (flags & SHRINKER_MEMCG_AWARE) {
+ err = shrinker_memcg_alloc(shrinker);
+ if (err == -ENOSYS) {
+ /* Memcg is not supported, fallback to non-memcg-aware shrinker. */
+ shrinker->flags &= ~SHRINKER_MEMCG_AWARE;
+ goto non_memcg;
+ }
+
+ if (err)
+ goto err_flags;
+
+ return shrinker;
+ }
+
+non_memcg:
+ /*
+ * The nr_deferred is available on per memcg level for memcg aware
+ * shrinkers, so only allocate nr_deferred in the following cases:
+ * - non-memcg-aware shrinkers
+ * - !CONFIG_MEMCG
+ * - memcg is disabled by kernel command line
+ */
+ size = sizeof(*shrinker->nr_deferred);
+ if (flags & SHRINKER_NUMA_AWARE)
+ size *= nr_node_ids;
+
+ shrinker->nr_deferred = kzalloc(size, GFP_KERNEL);
+ if (!shrinker->nr_deferred)
+ goto err_flags;
+
+ return shrinker;
+
+err_flags:
+ shrinker_debugfs_name_free(shrinker);
+err_name:
+ kfree(shrinker);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(shrinker_alloc);
+
+void shrinker_register(struct shrinker *shrinker)
+{
+ if (unlikely(!(shrinker->flags & SHRINKER_ALLOCATED))) {
+ pr_warn("Must use shrinker_alloc() to dynamically allocate the shrinker");
+ return;
+ }
+
+ mutex_lock(&shrinker_mutex);
+ list_add_tail_rcu(&shrinker->list, &shrinker_list);
+ shrinker->flags |= SHRINKER_REGISTERED;
+ shrinker_debugfs_add(shrinker);
+ mutex_unlock(&shrinker_mutex);
+
+ init_completion(&shrinker->done);
+ /*
+ * Now the shrinker is fully set up, take the first reference to it to
+ * indicate that lookup operations are now allowed to use it via
+ * shrinker_try_get().
+ */
+ refcount_set(&shrinker->refcount, 1);
+}
+EXPORT_SYMBOL_GPL(shrinker_register);
+
+static void shrinker_free_rcu_cb(struct rcu_head *head)
+{
+ struct shrinker *shrinker = container_of(head, struct shrinker, rcu);
+
+ kfree(shrinker->nr_deferred);
+ kfree(shrinker);
+}
+
+void shrinker_free(struct shrinker *shrinker)
+{
+ struct dentry *debugfs_entry = NULL;
+ int debugfs_id;
+
+ if (!shrinker)
+ return;
+
+ if (shrinker->flags & SHRINKER_REGISTERED) {
+ /* drop the initial refcount */
+ shrinker_put(shrinker);
+ /*
+ * Wait for all lookups of the shrinker to complete, after that,
+ * no shrinker is running or will run again, then we can safely
+ * free it asynchronously via RCU and safely free the structure
+ * where the shrinker is located, such as super_block etc.
+ */
+ wait_for_completion(&shrinker->done);
+ }
+
+ mutex_lock(&shrinker_mutex);
+ if (shrinker->flags & SHRINKER_REGISTERED) {
+ /*
+ * Now we can safely remove it from the shrinker_list and then
+ * free it.
+ */
+ list_del_rcu(&shrinker->list);
+ debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id);
+ shrinker->flags &= ~SHRINKER_REGISTERED;
+ }
+
+ shrinker_debugfs_name_free(shrinker);
+
+ if (shrinker->flags & SHRINKER_MEMCG_AWARE)
+ shrinker_memcg_remove(shrinker);
+ mutex_unlock(&shrinker_mutex);
+
+ if (debugfs_entry)
+ shrinker_debugfs_remove(debugfs_entry, debugfs_id);
+
+ call_rcu(&shrinker->rcu, shrinker_free_rcu_cb);
+}
+EXPORT_SYMBOL_GPL(shrinker_free);