From: Sebastian Andrzej Siewior Date: Thu, 12 Oct 2017 17:34:38 +0200 Subject: [PATCH 148/353] rtmutex: add ww_mutex addon for mutex-rt Origin: https://git.kernel.org/cgit/linux/kernel/git/rt/linux-stable-rt.git/commit?id=5638b534fffb7e1464776909b8ea609101dceddb Signed-off-by: Sebastian Andrzej Siewior --- kernel/locking/rtmutex.c | 271 ++++++++++++++++++++++++++++++-- kernel/locking/rtmutex_common.h | 2 + kernel/locking/rwsem-rt.c | 2 +- 3 files changed, 261 insertions(+), 14 deletions(-) diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 5ff2189f1199..cc47d5aa93a1 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "rtmutex_common.h" @@ -1245,6 +1246,40 @@ EXPORT_SYMBOL(__rt_spin_lock_init); #endif /* PREEMPT_RT_FULL */ +#ifdef CONFIG_PREEMPT_RT_FULL + static inline int __sched +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx) +{ + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock); + struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx); + + if (!hold_ctx) + return 0; + + if (unlikely(ctx == hold_ctx)) + return -EALREADY; + + if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && + (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { +#ifdef CONFIG_DEBUG_MUTEXES + DEBUG_LOCKS_WARN_ON(ctx->contending_lock); + ctx->contending_lock = ww; +#endif + return -EDEADLK; + } + + return 0; +} +#else + static inline int __sched +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx) +{ + BUG(); + return 0; +} + +#endif + static inline int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, struct rt_mutex_waiter *waiter) @@ -1523,7 +1558,8 @@ void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate) static int __sched __rt_mutex_slowlock(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, - struct rt_mutex_waiter *waiter) + struct rt_mutex_waiter *waiter, + struct ww_acquire_ctx *ww_ctx) { int ret = 0; @@ -1541,6 +1577,12 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, break; } + if (ww_ctx && ww_ctx->acquired > 0) { + ret = __mutex_lock_check_stamp(lock, ww_ctx); + if (ret) + break; + } + raw_spin_unlock_irq(&lock->wait_lock); debug_rt_mutex_print_deadlock(waiter); @@ -1575,16 +1617,106 @@ static void rt_mutex_handle_deadlock(int res, int detect_deadlock, } } +static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, + struct ww_acquire_ctx *ww_ctx) +{ +#ifdef CONFIG_DEBUG_MUTEXES + /* + * If this WARN_ON triggers, you used ww_mutex_lock to acquire, + * but released with a normal mutex_unlock in this call. + * + * This should never happen, always use ww_mutex_unlock. + */ + DEBUG_LOCKS_WARN_ON(ww->ctx); + + /* + * Not quite done after calling ww_acquire_done() ? + */ + DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire); + + if (ww_ctx->contending_lock) { + /* + * After -EDEADLK you tried to + * acquire a different ww_mutex? Bad! + */ + DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww); + + /* + * You called ww_mutex_lock after receiving -EDEADLK, + * but 'forgot' to unlock everything else first? + */ + DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0); + ww_ctx->contending_lock = NULL; + } + + /* + * Naughty, using a different class will lead to undefined behavior! + */ + DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class); +#endif + ww_ctx->acquired++; +} + +#ifdef CONFIG_PREEMPT_RT_FULL +static void ww_mutex_account_lock(struct rt_mutex *lock, + struct ww_acquire_ctx *ww_ctx) +{ + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock); + struct rt_mutex_waiter *waiter, *n; + + /* + * This branch gets optimized out for the common case, + * and is only important for ww_mutex_lock. + */ + ww_mutex_lock_acquired(ww, ww_ctx); + ww->ctx = ww_ctx; + + /* + * Give any possible sleeping processes the chance to wake up, + * so they can recheck if they have to back off. + */ + rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters.rb_root, + tree_entry) { + /* XXX debug rt mutex waiter wakeup */ + + BUG_ON(waiter->lock != lock); + rt_mutex_wake_waiter(waiter); + } +} + +#else + +static void ww_mutex_account_lock(struct rt_mutex *lock, + struct ww_acquire_ctx *ww_ctx) +{ + BUG(); +} +#endif + int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, enum rtmutex_chainwalk chwalk, + struct ww_acquire_ctx *ww_ctx, struct rt_mutex_waiter *waiter) { int ret; +#ifdef CONFIG_PREEMPT_RT_FULL + if (ww_ctx) { + struct ww_mutex *ww; + + ww = container_of(lock, struct ww_mutex, base.lock); + if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) + return -EALREADY; + } +#endif + /* Try to acquire the lock again: */ - if (try_to_take_rt_mutex(lock, current, NULL)) + if (try_to_take_rt_mutex(lock, current, NULL)) { + if (ww_ctx) + ww_mutex_account_lock(lock, ww_ctx); return 0; + } set_current_state(state); @@ -1594,14 +1726,24 @@ int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk); - if (likely(!ret)) + if (likely(!ret)) { /* sleep on the mutex */ - ret = __rt_mutex_slowlock(lock, state, timeout, waiter); + ret = __rt_mutex_slowlock(lock, state, timeout, waiter, + ww_ctx); + } else if (ww_ctx) { + /* ww_mutex received EDEADLK, let it become EALREADY */ + ret = __mutex_lock_check_stamp(lock, ww_ctx); + BUG_ON(!ret); + } if (unlikely(ret)) { __set_current_state(TASK_RUNNING); remove_waiter(lock, waiter); - rt_mutex_handle_deadlock(ret, chwalk, waiter); + /* ww_mutex wants to report EDEADLK/EALREADY, let it */ + if (!ww_ctx) + rt_mutex_handle_deadlock(ret, chwalk, waiter); + } else if (ww_ctx) { + ww_mutex_account_lock(lock, ww_ctx); } /* @@ -1618,7 +1760,8 @@ int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, static int __sched rt_mutex_slowlock(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, - enum rtmutex_chainwalk chwalk) + enum rtmutex_chainwalk chwalk, + struct ww_acquire_ctx *ww_ctx) { struct rt_mutex_waiter waiter; unsigned long flags; @@ -1636,7 +1779,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, */ raw_spin_lock_irqsave(&lock->wait_lock, flags); - ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, &waiter); + ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, ww_ctx, + &waiter); raw_spin_unlock_irqrestore(&lock->wait_lock, flags); @@ -1766,29 +1910,33 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, */ static inline int rt_mutex_fastlock(struct rt_mutex *lock, int state, + struct ww_acquire_ctx *ww_ctx, int (*slowfn)(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, - enum rtmutex_chainwalk chwalk)) + enum rtmutex_chainwalk chwalk, + struct ww_acquire_ctx *ww_ctx)) { if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) return 0; - return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); + return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK, ww_ctx); } static inline int rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, enum rtmutex_chainwalk chwalk, + struct ww_acquire_ctx *ww_ctx, int (*slowfn)(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, - enum rtmutex_chainwalk chwalk)) + enum rtmutex_chainwalk chwalk, + struct ww_acquire_ctx *ww_ctx)) { if (chwalk == RT_MUTEX_MIN_CHAINWALK && likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) return 0; - return slowfn(lock, state, timeout, chwalk); + return slowfn(lock, state, timeout, chwalk, ww_ctx); } static inline int @@ -1833,7 +1981,7 @@ rt_mutex_fastunlock(struct rt_mutex *lock, int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state) { might_sleep(); - return rt_mutex_fastlock(lock, state, rt_mutex_slowlock); + return rt_mutex_fastlock(lock, state, NULL, rt_mutex_slowlock); } /** @@ -1953,6 +2101,7 @@ rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout) mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); ret = rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, RT_MUTEX_MIN_CHAINWALK, + NULL, rt_mutex_slowlock); if (ret) mutex_release(&lock->dep_map, 1, _RET_IP_); @@ -2299,7 +2448,7 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, raw_spin_lock_irq(&lock->wait_lock); /* sleep on the mutex */ set_current_state(TASK_INTERRUPTIBLE); - ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); + ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL); /* * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might * have to fix that up. @@ -2384,3 +2533,99 @@ bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, return cleanup; } + +static inline int +ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +{ +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH + unsigned tmp; + + if (ctx->deadlock_inject_countdown-- == 0) { + tmp = ctx->deadlock_inject_interval; + if (tmp > UINT_MAX/4) + tmp = UINT_MAX; + else + tmp = tmp*2 + tmp + tmp/2; + + ctx->deadlock_inject_interval = tmp; + ctx->deadlock_inject_countdown = tmp; + ctx->contending_lock = lock; + + ww_mutex_unlock(lock); + + return -EDEADLK; + } +#endif + + return 0; +} + +#ifdef CONFIG_PREEMPT_RT_FULL +int __sched +ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +{ + int ret; + + might_sleep(); + + mutex_acquire_nest(&lock->base.dep_map, 0, 0, + ctx ? &ctx->dep_map : NULL, _RET_IP_); + ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, + ctx); + if (ret) + mutex_release(&lock->base.dep_map, 1, _RET_IP_); + else if (!ret && ctx && ctx->acquired > 1) + return ww_mutex_deadlock_injection(lock, ctx); + + return ret; +} +EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible); + +int __sched +ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +{ + int ret; + + might_sleep(); + + mutex_acquire_nest(&lock->base.dep_map, 0, 0, + ctx ? &ctx->dep_map : NULL, _RET_IP_); + ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, + ctx); + if (ret) + mutex_release(&lock->base.dep_map, 1, _RET_IP_); + else if (!ret && ctx && ctx->acquired > 1) + return ww_mutex_deadlock_injection(lock, ctx); + + return ret; +} +EXPORT_SYMBOL_GPL(ww_mutex_lock); + +void __sched ww_mutex_unlock(struct ww_mutex *lock) +{ + int nest = !!lock->ctx; + + /* + * The unlocking fastpath is the 0->1 transition from 'locked' + * into 'unlocked' state: + */ + if (nest) { +#ifdef CONFIG_DEBUG_MUTEXES + DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired); +#endif + if (lock->ctx->acquired > 0) + lock->ctx->acquired--; + lock->ctx = NULL; + } + + mutex_release(&lock->base.dep_map, nest, _RET_IP_); + __rt_mutex_unlock(&lock->base.lock); +} +EXPORT_SYMBOL(ww_mutex_unlock); + +int __rt_mutex_owner_current(struct rt_mutex *lock) +{ + return rt_mutex_owner(lock) == current; +} +EXPORT_SYMBOL(__rt_mutex_owner_current); +#endif diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index 7c1dd1bd95c7..8e0c592273e6 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -164,6 +164,7 @@ extern void rt_mutex_postunlock(struct wake_q_head *wake_q, struct wake_q_head *wake_sleeper_q); /* RW semaphore special interface */ +struct ww_acquire_ctx; extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state); extern int __rt_mutex_trylock(struct rt_mutex *lock); @@ -171,6 +172,7 @@ extern void __rt_mutex_unlock(struct rt_mutex *lock); int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, enum rtmutex_chainwalk chwalk, + struct ww_acquire_ctx *ww_ctx, struct rt_mutex_waiter *waiter); void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, diff --git a/kernel/locking/rwsem-rt.c b/kernel/locking/rwsem-rt.c index 7d3c5cf3d23d..660e22caf709 100644 --- a/kernel/locking/rwsem-rt.c +++ b/kernel/locking/rwsem-rt.c @@ -131,7 +131,7 @@ static int __sched __down_read_common(struct rw_semaphore *sem, int state) */ rt_mutex_init_waiter(&waiter, false); ret = rt_mutex_slowlock_locked(m, state, NULL, RT_MUTEX_MIN_CHAINWALK, - &waiter); + NULL, &waiter); /* * The slowlock() above is guaranteed to return with the rtmutex (for * ret = 0) is now held, so there can't be a writer active. Increment