From: Thomas Gleixner Date: Wed, 26 Jun 2019 13:35:36 +0200 Subject: [PATCH 276/351] futex: Delay deallocation of pi_state Origin: https://git.kernel.org/cgit/linux/kernel/git/rt/linux-stable-rt.git/commit?id=45894c2eaf4378a9492d096c550577887d9bce85 [ Upstream commit d7c7cf8cb68b7df17e6e50be1f25f35d83e686c7 ] On -RT we can't invoke kfree() in a non-preemptible context. Defer the deallocation of pi_state to preemptible context. Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Steven Rostedt (VMware) --- kernel/futex.c | 55 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 11 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 8a49dd71b233..2fc6bedb460e 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -871,13 +871,13 @@ static void get_pi_state(struct futex_pi_state *pi_state) * Drops a reference to the pi_state object and frees or caches it * when the last reference is gone. */ -static void put_pi_state(struct futex_pi_state *pi_state) +static struct futex_pi_state *__put_pi_state(struct futex_pi_state *pi_state) { if (!pi_state) - return; + return NULL; if (!atomic_dec_and_test(&pi_state->refcount)) - return; + return NULL; /* * If pi_state->owner is NULL, the owner is most probably dying @@ -892,9 +892,7 @@ static void put_pi_state(struct futex_pi_state *pi_state) raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags); } - if (current->pi_state_cache) { - kfree(pi_state); - } else { + if (!current->pi_state_cache) { /* * pi_state->list is already empty. * clear pi_state->owner. @@ -903,6 +901,30 @@ static void put_pi_state(struct futex_pi_state *pi_state) pi_state->owner = NULL; atomic_set(&pi_state->refcount, 1); current->pi_state_cache = pi_state; + pi_state = NULL; + } + return pi_state; +} + +static void put_pi_state(struct futex_pi_state *pi_state) +{ + kfree(__put_pi_state(pi_state)); +} + +static void put_pi_state_atomic(struct futex_pi_state *pi_state, + struct list_head *to_free) +{ + if (__put_pi_state(pi_state)) + list_add(&pi_state->list, to_free); +} + +static void free_pi_state_list(struct list_head *to_free) +{ + struct futex_pi_state *p, *next; + + list_for_each_entry_safe(p, next, to_free, list) { + list_del(&p->list); + kfree(p); } } @@ -919,6 +941,7 @@ static void exit_pi_state_list(struct task_struct *curr) struct futex_pi_state *pi_state; struct futex_hash_bucket *hb; union futex_key key = FUTEX_KEY_INIT; + LIST_HEAD(to_free); if (!futex_cmpxchg_enabled) return; @@ -963,7 +986,7 @@ static void exit_pi_state_list(struct task_struct *curr) /* retain curr->pi_lock for the loop invariant */ raw_spin_unlock(&pi_state->pi_mutex.wait_lock); raw_spin_unlock(&hb->lock); - put_pi_state(pi_state); + put_pi_state_atomic(pi_state, &to_free); continue; } @@ -982,6 +1005,8 @@ static void exit_pi_state_list(struct task_struct *curr) raw_spin_lock_irq(&curr->pi_lock); } raw_spin_unlock_irq(&curr->pi_lock); + + free_pi_state_list(&to_free); } #else static inline void exit_pi_state_list(struct task_struct *curr) { } @@ -2017,6 +2042,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, struct futex_hash_bucket *hb1, *hb2; struct futex_q *this, *next; DEFINE_WAKE_Q(wake_q); + LIST_HEAD(to_free); if (nr_wake < 0 || nr_requeue < 0) return -EINVAL; @@ -2265,7 +2291,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, * object. */ this->pi_state = NULL; - put_pi_state(pi_state); + put_pi_state_atomic(pi_state, &to_free); /* * We stop queueing more waiters and let user * space deal with the mess. @@ -2282,7 +2308,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, * in futex_proxy_trylock_atomic() or in lookup_pi_state(). We * need to drop it here again. */ - put_pi_state(pi_state); + put_pi_state_atomic(pi_state, &to_free); out_unlock: double_unlock_hb(hb1, hb2); @@ -2303,6 +2329,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, out_put_key1: put_futex_key(&key1); out: + free_pi_state_list(&to_free); return ret ? ret : task_count; } @@ -2439,13 +2466,16 @@ static int unqueue_me(struct futex_q *q) static void unqueue_me_pi(struct futex_q *q) __releases(q->lock_ptr) { + struct futex_pi_state *ps; + __unqueue_futex(q); BUG_ON(!q->pi_state); - put_pi_state(q->pi_state); + ps = __put_pi_state(q->pi_state); q->pi_state = NULL; raw_spin_unlock(q->lock_ptr); + kfree(ps); } static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, @@ -3394,14 +3424,17 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, * did a lock-steal - fix up the PI-state in that case. */ if (q.pi_state && (q.pi_state->owner != current)) { + struct futex_pi_state *ps_free; + raw_spin_lock(q.lock_ptr); ret = fixup_pi_state_owner(uaddr2, &q, current); /* * Drop the reference to the pi state which * the requeue_pi() code acquired for us. */ - put_pi_state(q.pi_state); + ps_free = __put_pi_state(q.pi_state); spin_unlock(&hb2->lock); + kfree(ps_free); /* * Adjust the return value. It's either -EFAULT or * success (1) but the caller expects 0 for success.