From 47ba586f2ade0d9f8cc9281cb08529146bfc3c5d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 12 Oct 2017 17:34:38 +0200 Subject: [PATCH 176/323] locking/rtmutex: add ww_mutex addon for mutex-rt Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.10/older/patches-5.10.204-rt100.tar.xz Signed-off-by: Sebastian Andrzej Siewior --- include/linux/mutex.h | 8 - include/linux/ww_mutex.h | 8 + kernel/locking/rtmutex.c | 262 ++++++++++++++++++++++++++++++-- kernel/locking/rtmutex_common.h | 2 + kernel/locking/rwsem-rt.c | 2 +- 5 files changed, 262 insertions(+), 20 deletions(-) diff --git a/include/linux/mutex.h b/include/linux/mutex.h index e45774a337d2..90923d3008fc 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -82,14 +82,6 @@ struct mutex { struct ww_class; struct ww_acquire_ctx; -struct ww_mutex { - struct mutex base; - struct ww_acquire_ctx *ctx; -#ifdef CONFIG_DEBUG_MUTEXES - struct ww_class *ww_class; -#endif -}; - /* * This is the control structure for tasks blocked on mutex, * which resides on the blocked task's kernel stack: diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 6ecf2a0220db..3145de598645 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -28,6 +28,14 @@ struct ww_class { unsigned int is_wait_die; }; +struct ww_mutex { + struct mutex base; + struct ww_acquire_ctx *ctx; +#ifdef CONFIG_DEBUG_MUTEXES + struct ww_class *ww_class; +#endif +}; + struct ww_acquire_ctx { struct task_struct *task; unsigned long stamp; diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 1bf7a04688c5..c095d1b92f70 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "rtmutex_common.h" @@ -1234,6 +1235,40 @@ EXPORT_SYMBOL(__rt_spin_lock_init); #endif /* PREEMPT_RT */ +#ifdef CONFIG_PREEMPT_RT + static inline int __sched +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx) +{ + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock); + struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx); + + if (!hold_ctx) + return 0; + + if (unlikely(ctx == hold_ctx)) + return -EALREADY; + + if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && + (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { +#ifdef CONFIG_DEBUG_MUTEXES + DEBUG_LOCKS_WARN_ON(ctx->contending_lock); + ctx->contending_lock = ww; +#endif + return -EDEADLK; + } + + return 0; +} +#else + static inline int __sched +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx) +{ + BUG(); + return 0; +} + +#endif + static inline int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, struct rt_mutex_waiter *waiter) @@ -1512,7 +1547,8 @@ void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate) static int __sched __rt_mutex_slowlock(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, - struct rt_mutex_waiter *waiter) + struct rt_mutex_waiter *waiter, + struct ww_acquire_ctx *ww_ctx) { int ret = 0; @@ -1530,6 +1566,12 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, break; } + if (ww_ctx && ww_ctx->acquired > 0) { + ret = __mutex_lock_check_stamp(lock, ww_ctx); + if (ret) + break; + } + raw_spin_unlock_irq(&lock->wait_lock); schedule(); @@ -1558,16 +1600,106 @@ static void rt_mutex_handle_deadlock(int res, int detect_deadlock, } } +static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, + struct ww_acquire_ctx *ww_ctx) +{ +#ifdef CONFIG_DEBUG_MUTEXES + /* + * If this WARN_ON triggers, you used ww_mutex_lock to acquire, + * but released with a normal mutex_unlock in this call. + * + * This should never happen, always use ww_mutex_unlock. + */ + DEBUG_LOCKS_WARN_ON(ww->ctx); + + /* + * Not quite done after calling ww_acquire_done() ? + */ + DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire); + + if (ww_ctx->contending_lock) { + /* + * After -EDEADLK you tried to + * acquire a different ww_mutex? Bad! + */ + DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww); + + /* + * You called ww_mutex_lock after receiving -EDEADLK, + * but 'forgot' to unlock everything else first? + */ + DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0); + ww_ctx->contending_lock = NULL; + } + + /* + * Naughty, using a different class will lead to undefined behavior! + */ + DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class); +#endif + ww_ctx->acquired++; +} + +#ifdef CONFIG_PREEMPT_RT +static void ww_mutex_account_lock(struct rt_mutex *lock, + struct ww_acquire_ctx *ww_ctx) +{ + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock); + struct rt_mutex_waiter *waiter, *n; + + /* + * This branch gets optimized out for the common case, + * and is only important for ww_mutex_lock. + */ + ww_mutex_lock_acquired(ww, ww_ctx); + ww->ctx = ww_ctx; + + /* + * Give any possible sleeping processes the chance to wake up, + * so they can recheck if they have to back off. + */ + rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters.rb_root, + tree_entry) { + /* XXX debug rt mutex waiter wakeup */ + + BUG_ON(waiter->lock != lock); + rt_mutex_wake_waiter(waiter); + } +} + +#else + +static void ww_mutex_account_lock(struct rt_mutex *lock, + struct ww_acquire_ctx *ww_ctx) +{ + BUG(); +} +#endif + int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, enum rtmutex_chainwalk chwalk, + struct ww_acquire_ctx *ww_ctx, struct rt_mutex_waiter *waiter) { int ret; +#ifdef CONFIG_PREEMPT_RT + if (ww_ctx) { + struct ww_mutex *ww; + + ww = container_of(lock, struct ww_mutex, base.lock); + if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) + return -EALREADY; + } +#endif + /* Try to acquire the lock again: */ - if (try_to_take_rt_mutex(lock, current, NULL)) + if (try_to_take_rt_mutex(lock, current, NULL)) { + if (ww_ctx) + ww_mutex_account_lock(lock, ww_ctx); return 0; + } set_current_state(state); @@ -1577,14 +1709,24 @@ int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk); - if (likely(!ret)) + if (likely(!ret)) { /* sleep on the mutex */ - ret = __rt_mutex_slowlock(lock, state, timeout, waiter); + ret = __rt_mutex_slowlock(lock, state, timeout, waiter, + ww_ctx); + } else if (ww_ctx) { + /* ww_mutex received EDEADLK, let it become EALREADY */ + ret = __mutex_lock_check_stamp(lock, ww_ctx); + BUG_ON(!ret); + } if (unlikely(ret)) { __set_current_state(TASK_RUNNING); remove_waiter(lock, waiter); - rt_mutex_handle_deadlock(ret, chwalk, waiter); + /* ww_mutex wants to report EDEADLK/EALREADY, let it */ + if (!ww_ctx) + rt_mutex_handle_deadlock(ret, chwalk, waiter); + } else if (ww_ctx) { + ww_mutex_account_lock(lock, ww_ctx); } /* @@ -1601,7 +1743,8 @@ int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, static int __sched rt_mutex_slowlock(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, - enum rtmutex_chainwalk chwalk) + enum rtmutex_chainwalk chwalk, + struct ww_acquire_ctx *ww_ctx) { struct rt_mutex_waiter waiter; unsigned long flags; @@ -1619,7 +1762,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, */ raw_spin_lock_irqsave(&lock->wait_lock, flags); - ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, &waiter); + ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, ww_ctx, + &waiter); raw_spin_unlock_irqrestore(&lock->wait_lock, flags); @@ -1749,14 +1893,16 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, */ static inline int rt_mutex_fastlock(struct rt_mutex *lock, int state, + struct ww_acquire_ctx *ww_ctx, int (*slowfn)(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, - enum rtmutex_chainwalk chwalk)) + enum rtmutex_chainwalk chwalk, + struct ww_acquire_ctx *ww_ctx)) { if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) return 0; - return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); + return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK, ww_ctx); } static inline int @@ -1801,7 +1947,7 @@ rt_mutex_fastunlock(struct rt_mutex *lock, int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state) { might_sleep(); - return rt_mutex_fastlock(lock, state, rt_mutex_slowlock); + return rt_mutex_fastlock(lock, state, NULL, rt_mutex_slowlock); } /** @@ -2245,7 +2391,7 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, raw_spin_lock_irq(&lock->wait_lock); /* sleep on the mutex */ set_current_state(TASK_INTERRUPTIBLE); - ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); + ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL); /* * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might * have to fix that up. @@ -2315,3 +2461,97 @@ bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, return cleanup; } + +static inline int +ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +{ +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH + unsigned int tmp; + + if (ctx->deadlock_inject_countdown-- == 0) { + tmp = ctx->deadlock_inject_interval; + if (tmp > UINT_MAX/4) + tmp = UINT_MAX; + else + tmp = tmp*2 + tmp + tmp/2; + + ctx->deadlock_inject_interval = tmp; + ctx->deadlock_inject_countdown = tmp; + ctx->contending_lock = lock; + + ww_mutex_unlock(lock); + + return -EDEADLK; + } +#endif + + return 0; +} + +#ifdef CONFIG_PREEMPT_RT +int __sched +ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +{ + int ret; + + might_sleep(); + + mutex_acquire_nest(&lock->base.dep_map, 0, 0, + ctx ? &ctx->dep_map : NULL, _RET_IP_); + ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, + ctx); + if (ret) + mutex_release(&lock->base.dep_map, _RET_IP_); + else if (!ret && ctx && ctx->acquired > 1) + return ww_mutex_deadlock_injection(lock, ctx); + + return ret; +} +EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible); + +int __sched +ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +{ + int ret; + + might_sleep(); + + mutex_acquire_nest(&lock->base.dep_map, 0, 0, + ctx ? &ctx->dep_map : NULL, _RET_IP_); + ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, + ctx); + if (ret) + mutex_release(&lock->base.dep_map, _RET_IP_); + else if (!ret && ctx && ctx->acquired > 1) + return ww_mutex_deadlock_injection(lock, ctx); + + return ret; +} +EXPORT_SYMBOL_GPL(ww_mutex_lock); + +void __sched ww_mutex_unlock(struct ww_mutex *lock) +{ + /* + * The unlocking fastpath is the 0->1 transition from 'locked' + * into 'unlocked' state: + */ + if (lock->ctx) { +#ifdef CONFIG_DEBUG_MUTEXES + DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired); +#endif + if (lock->ctx->acquired > 0) + lock->ctx->acquired--; + lock->ctx = NULL; + } + + mutex_release(&lock->base.dep_map, _RET_IP_); + __rt_mutex_unlock(&lock->base.lock); +} +EXPORT_SYMBOL(ww_mutex_unlock); + +int __rt_mutex_owner_current(struct rt_mutex *lock) +{ + return rt_mutex_owner(lock) == current; +} +EXPORT_SYMBOL(__rt_mutex_owner_current); +#endif diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index c1a280167e3c..248a7d91583b 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -159,6 +159,7 @@ extern void rt_mutex_postunlock(struct wake_q_head *wake_q, struct wake_q_head *wake_sleeper_q); /* RW semaphore special interface */ +struct ww_acquire_ctx; extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state); extern int __rt_mutex_trylock(struct rt_mutex *lock); @@ -166,6 +167,7 @@ extern void __rt_mutex_unlock(struct rt_mutex *lock); int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, enum rtmutex_chainwalk chwalk, + struct ww_acquire_ctx *ww_ctx, struct rt_mutex_waiter *waiter); void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, diff --git a/kernel/locking/rwsem-rt.c b/kernel/locking/rwsem-rt.c index a0771c150041..274172d5bb3a 100644 --- a/kernel/locking/rwsem-rt.c +++ b/kernel/locking/rwsem-rt.c @@ -138,7 +138,7 @@ static int __sched __down_read_common(struct rw_semaphore *sem, int state) */ rt_mutex_init_waiter(&waiter, false); ret = rt_mutex_slowlock_locked(m, state, NULL, RT_MUTEX_MIN_CHAINWALK, - &waiter); + NULL, &waiter); /* * The slowlock() above is guaranteed to return with the rtmutex (for * ret = 0) is now held, so there can't be a writer active. Increment -- 2.43.0