diff options
Diffstat (limited to '')
-rw-r--r-- | debian/patches-rt/0170-locking-rtmutex-add-sleeping-lock-implementation.patch | 1214 |
1 files changed, 1214 insertions, 0 deletions
diff --git a/debian/patches-rt/0170-locking-rtmutex-add-sleeping-lock-implementation.patch b/debian/patches-rt/0170-locking-rtmutex-add-sleeping-lock-implementation.patch new file mode 100644 index 000000000..e8755a734 --- /dev/null +++ b/debian/patches-rt/0170-locking-rtmutex-add-sleeping-lock-implementation.patch @@ -0,0 +1,1214 @@ +From 4c85c347eeb7dae59269d3a4a07388a00de563bf Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Thu, 12 Oct 2017 17:11:19 +0200 +Subject: [PATCH 170/323] locking/rtmutex: add sleeping lock implementation +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.10/older/patches-5.10.204-rt100.tar.xz + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Luis Claudio R. Goncalves <lgoncalv@redhat.com> +--- + include/linux/kernel.h | 5 + + include/linux/preempt.h | 4 + + include/linux/rtmutex.h | 19 +- + include/linux/sched.h | 7 + + include/linux/sched/wake_q.h | 13 +- + include/linux/spinlock_rt.h | 155 +++++++++++ + include/linux/spinlock_types_rt.h | 38 +++ + kernel/futex/core.c | 10 +- + kernel/locking/rtmutex.c | 444 +++++++++++++++++++++++++++--- + kernel/locking/rtmutex_common.h | 14 +- + kernel/sched/core.c | 39 ++- + 11 files changed, 693 insertions(+), 55 deletions(-) + create mode 100644 include/linux/spinlock_rt.h + create mode 100644 include/linux/spinlock_types_rt.h + +diff --git a/include/linux/kernel.h b/include/linux/kernel.h +index 55d48d5627c7..7b4fdd5b2f7b 100644 +--- a/include/linux/kernel.h ++++ b/include/linux/kernel.h +@@ -223,6 +223,10 @@ extern void __cant_migrate(const char *file, int line); + */ + # define might_sleep() \ + do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) ++ ++# define might_sleep_no_state_check() \ ++ do { ___might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) ++ + /** + * cant_sleep - annotation for functions that cannot sleep + * +@@ -266,6 +270,7 @@ extern void __cant_migrate(const char *file, int line); + static inline void __might_sleep(const char *file, int line, + int preempt_offset) { } + # define might_sleep() do { might_resched(); } while (0) ++# define might_sleep_no_state_check() do { might_resched(); } while (0) + # define cant_sleep() do { } while (0) + # define cant_migrate() do { } while (0) + # define sched_annotate_sleep() do { } while (0) +diff --git a/include/linux/preempt.h b/include/linux/preempt.h +index 9881eac0698f..4d244e295e85 100644 +--- a/include/linux/preempt.h ++++ b/include/linux/preempt.h +@@ -121,7 +121,11 @@ + /* + * The preempt_count offset after spin_lock() + */ ++#if !defined(CONFIG_PREEMPT_RT) + #define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET ++#else ++#define PREEMPT_LOCK_OFFSET 0 ++#endif + + /* + * The preempt_count offset needed for things like: +diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h +index b828b938c876..b02009f53026 100644 +--- a/include/linux/rtmutex.h ++++ b/include/linux/rtmutex.h +@@ -19,6 +19,10 @@ + + extern int max_lock_depth; /* for sysctl */ + ++#ifdef CONFIG_DEBUG_MUTEXES ++#include <linux/debug_locks.h> ++#endif ++ + /** + * The rt_mutex structure + * +@@ -31,6 +35,7 @@ struct rt_mutex { + raw_spinlock_t wait_lock; + struct rb_root_cached waiters; + struct task_struct *owner; ++ int save_state; + #ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; + #endif +@@ -67,11 +72,19 @@ do { \ + #define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) + #endif + +-#define __RT_MUTEX_INITIALIZER(mutexname) \ +- { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ ++#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ ++ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ + , .waiters = RB_ROOT_CACHED \ + , .owner = NULL \ +- __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)} ++ __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) ++ ++#define __RT_MUTEX_INITIALIZER(mutexname) \ ++ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ ++ , .save_state = 0 } ++ ++#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \ ++ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ ++ , .save_state = 1 } + + #define DEFINE_RT_MUTEX(mutexname) \ + struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname) +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 73defe42fd23..3650fdaac4ca 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -141,6 +141,9 @@ struct io_uring_task; + smp_store_mb(current->state, (state_value)); \ + } while (0) + ++#define __set_current_state_no_track(state_value) \ ++ current->state = (state_value); ++ + #define set_special_state(state_value) \ + do { \ + unsigned long flags; /* may shadow */ \ +@@ -194,6 +197,9 @@ struct io_uring_task; + #define set_current_state(state_value) \ + smp_store_mb(current->state, (state_value)) + ++#define __set_current_state_no_track(state_value) \ ++ __set_current_state(state_value) ++ + /* + * set_special_state() should be used for those states when the blocking task + * can not use the regular condition based wait-loop. In that case we must +@@ -1021,6 +1027,7 @@ struct task_struct { + raw_spinlock_t pi_lock; + + struct wake_q_node wake_q; ++ struct wake_q_node wake_q_sleeper; + + #ifdef CONFIG_RT_MUTEXES + /* PI waiters blocked on a rt_mutex held by this task: */ +diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h +index 26a2013ac39c..6e2dff721547 100644 +--- a/include/linux/sched/wake_q.h ++++ b/include/linux/sched/wake_q.h +@@ -58,6 +58,17 @@ static inline bool wake_q_empty(struct wake_q_head *head) + + extern void wake_q_add(struct wake_q_head *head, struct task_struct *task); + extern void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task); +-extern void wake_up_q(struct wake_q_head *head); ++extern void wake_q_add_sleeper(struct wake_q_head *head, struct task_struct *task); ++extern void __wake_up_q(struct wake_q_head *head, bool sleeper); ++ ++static inline void wake_up_q(struct wake_q_head *head) ++{ ++ __wake_up_q(head, false); ++} ++ ++static inline void wake_up_q_sleeper(struct wake_q_head *head) ++{ ++ __wake_up_q(head, true); ++} + + #endif /* _LINUX_SCHED_WAKE_Q_H */ +diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h +new file mode 100644 +index 000000000000..3085132eae38 +--- /dev/null ++++ b/include/linux/spinlock_rt.h +@@ -0,0 +1,155 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++#ifndef __LINUX_SPINLOCK_RT_H ++#define __LINUX_SPINLOCK_RT_H ++ ++#ifndef __LINUX_SPINLOCK_H ++#error Do not include directly. Use spinlock.h ++#endif ++ ++#include <linux/bug.h> ++ ++extern void ++__rt_spin_lock_init(spinlock_t *lock, const char *name, struct lock_class_key *key); ++ ++#define spin_lock_init(slock) \ ++do { \ ++ static struct lock_class_key __key; \ ++ \ ++ rt_mutex_init(&(slock)->lock); \ ++ __rt_spin_lock_init(slock, #slock, &__key); \ ++} while (0) ++ ++extern void __lockfunc rt_spin_lock(spinlock_t *lock); ++extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass); ++extern void __lockfunc rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock); ++extern void __lockfunc rt_spin_unlock(spinlock_t *lock); ++extern void __lockfunc rt_spin_lock_unlock(spinlock_t *lock); ++extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags); ++extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock); ++extern int __lockfunc rt_spin_trylock(spinlock_t *lock); ++extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock); ++ ++/* ++ * lockdep-less calls, for derived types like rwlock: ++ * (for trylock they can use rt_mutex_trylock() directly. ++ * Migrate disable handling must be done at the call site. ++ */ ++extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock); ++extern void __lockfunc __rt_spin_trylock(struct rt_mutex *lock); ++extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock); ++ ++#define spin_lock(lock) rt_spin_lock(lock) ++ ++#define spin_lock_bh(lock) \ ++ do { \ ++ local_bh_disable(); \ ++ rt_spin_lock(lock); \ ++ } while (0) ++ ++#define spin_lock_irq(lock) spin_lock(lock) ++ ++#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock)) ++ ++#define spin_trylock(lock) \ ++({ \ ++ int __locked; \ ++ __locked = spin_do_trylock(lock); \ ++ __locked; \ ++}) ++ ++#ifdef CONFIG_LOCKDEP ++# define spin_lock_nested(lock, subclass) \ ++ do { \ ++ rt_spin_lock_nested(lock, subclass); \ ++ } while (0) ++ ++#define spin_lock_bh_nested(lock, subclass) \ ++ do { \ ++ local_bh_disable(); \ ++ rt_spin_lock_nested(lock, subclass); \ ++ } while (0) ++ ++# define spin_lock_nest_lock(lock, subclass) \ ++ do { \ ++ typecheck(struct lockdep_map *, &(subclass)->dep_map); \ ++ rt_spin_lock_nest_lock(lock, &(subclass)->dep_map); \ ++ } while (0) ++ ++# define spin_lock_irqsave_nested(lock, flags, subclass) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ flags = 0; \ ++ rt_spin_lock_nested(lock, subclass); \ ++ } while (0) ++#else ++# define spin_lock_nested(lock, subclass) spin_lock(((void)(subclass), (lock))) ++# define spin_lock_nest_lock(lock, subclass) spin_lock(((void)(subclass), (lock))) ++# define spin_lock_bh_nested(lock, subclass) spin_lock_bh(((void)(subclass), (lock))) ++ ++# define spin_lock_irqsave_nested(lock, flags, subclass) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ flags = 0; \ ++ spin_lock(((void)(subclass), (lock))); \ ++ } while (0) ++#endif ++ ++#define spin_lock_irqsave(lock, flags) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ flags = 0; \ ++ spin_lock(lock); \ ++ } while (0) ++ ++#define spin_unlock(lock) rt_spin_unlock(lock) ++ ++#define spin_unlock_bh(lock) \ ++ do { \ ++ rt_spin_unlock(lock); \ ++ local_bh_enable(); \ ++ } while (0) ++ ++#define spin_unlock_irq(lock) spin_unlock(lock) ++ ++#define spin_unlock_irqrestore(lock, flags) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ (void) flags; \ ++ spin_unlock(lock); \ ++ } while (0) ++ ++#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock)) ++#define spin_trylock_irq(lock) spin_trylock(lock) ++ ++#define spin_trylock_irqsave(lock, flags) \ ++({ \ ++ int __locked; \ ++ \ ++ typecheck(unsigned long, flags); \ ++ flags = 0; \ ++ __locked = spin_trylock(lock); \ ++ __locked; \ ++}) ++ ++#ifdef CONFIG_GENERIC_LOCKBREAK ++# define spin_is_contended(lock) ((lock)->break_lock) ++#else ++# define spin_is_contended(lock) (((void)(lock), 0)) ++#endif ++ ++static inline int spin_can_lock(spinlock_t *lock) ++{ ++ return !rt_mutex_is_locked(&lock->lock); ++} ++ ++static inline int spin_is_locked(spinlock_t *lock) ++{ ++ return rt_mutex_is_locked(&lock->lock); ++} ++ ++static inline void assert_spin_locked(spinlock_t *lock) ++{ ++ BUG_ON(!spin_is_locked(lock)); ++} ++ ++#endif +diff --git a/include/linux/spinlock_types_rt.h b/include/linux/spinlock_types_rt.h +new file mode 100644 +index 000000000000..446da786e5d5 +--- /dev/null ++++ b/include/linux/spinlock_types_rt.h +@@ -0,0 +1,38 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++#ifndef __LINUX_SPINLOCK_TYPES_RT_H ++#define __LINUX_SPINLOCK_TYPES_RT_H ++ ++#ifndef __LINUX_SPINLOCK_TYPES_H ++#error "Do not include directly. Include spinlock_types.h instead" ++#endif ++ ++#include <linux/cache.h> ++ ++/* ++ * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field: ++ */ ++typedef struct spinlock { ++ struct rt_mutex lock; ++ unsigned int break_lock; ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ struct lockdep_map dep_map; ++#endif ++} spinlock_t; ++ ++#define __RT_SPIN_INITIALIZER(name) \ ++ { \ ++ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ ++ .save_state = 1, \ ++ } ++/* ++.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock) ++*/ ++ ++#define __SPIN_LOCK_UNLOCKED(name) \ ++ { .lock = __RT_SPIN_INITIALIZER(name.lock), \ ++ SPIN_DEP_MAP_INIT(name) } ++ ++#define DEFINE_SPINLOCK(name) \ ++ spinlock_t name = __SPIN_LOCK_UNLOCKED(name) ++ ++#endif +diff --git a/kernel/futex/core.c b/kernel/futex/core.c +index 706cd446ec71..909dcd708a52 100644 +--- a/kernel/futex/core.c ++++ b/kernel/futex/core.c +@@ -1508,6 +1508,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_ + struct task_struct *new_owner; + bool postunlock = false; + DEFINE_WAKE_Q(wake_q); ++ DEFINE_WAKE_Q(wake_sleeper_q); + int ret = 0; + + new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); +@@ -1557,14 +1558,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_ + * not fail. + */ + pi_state_update_owner(pi_state, new_owner); +- postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); ++ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q, ++ &wake_sleeper_q); + } + + out_unlock: + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); + + if (postunlock) +- rt_mutex_postunlock(&wake_q); ++ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); + + return ret; + } +@@ -2867,7 +2869,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, + goto no_block; + } + +- rt_mutex_init_waiter(&rt_waiter); ++ rt_mutex_init_waiter(&rt_waiter, false); + + /* + * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not +@@ -3213,7 +3215,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, + * The waiter is allocated on our stack, manipulated by the requeue + * code while we sleep on uaddr. + */ +- rt_mutex_init_waiter(&rt_waiter); ++ rt_mutex_init_waiter(&rt_waiter, false); + + ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE); + if (unlikely(ret != 0)) +diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c +index 40ae934c6caa..35cdbbb5daa9 100644 +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -8,6 +8,11 @@ + * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> + * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt + * Copyright (C) 2006 Esben Nielsen ++ * Adaptive Spinlocks: ++ * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich, ++ * and Peter Morreale, ++ * Adaptive Spinlocks simplification: ++ * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com> + * + * See Documentation/locking/rt-mutex-design.rst for details. + */ +@@ -233,7 +238,7 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, + * Only use with rt_mutex_waiter_{less,equal}() + */ + #define task_to_waiter(p) \ +- &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline } ++ &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline, .task = (p) } + + static inline int + rt_mutex_waiter_less(struct rt_mutex_waiter *left, +@@ -273,6 +278,27 @@ rt_mutex_waiter_equal(struct rt_mutex_waiter *left, + return 1; + } + ++#define STEAL_NORMAL 0 ++#define STEAL_LATERAL 1 ++ ++static inline int ++rt_mutex_steal(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, int mode) ++{ ++ struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock); ++ ++ if (waiter == top_waiter || rt_mutex_waiter_less(waiter, top_waiter)) ++ return 1; ++ ++ /* ++ * Note that RT tasks are excluded from lateral-steals ++ * to prevent the introduction of an unbounded latency. ++ */ ++ if (mode == STEAL_NORMAL || rt_task(waiter->task)) ++ return 0; ++ ++ return rt_mutex_waiter_equal(waiter, top_waiter); ++} ++ + static void + rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) + { +@@ -377,6 +403,14 @@ static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, + return debug_rt_mutex_detect_deadlock(waiter, chwalk); + } + ++static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter) ++{ ++ if (waiter->savestate) ++ wake_up_lock_sleeper(waiter->task); ++ else ++ wake_up_process(waiter->task); ++} ++ + /* + * Max number of times we'll walk the boosting chain: + */ +@@ -700,13 +734,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, + * follow here. This is the end of the chain we are walking. + */ + if (!rt_mutex_owner(lock)) { ++ struct rt_mutex_waiter *lock_top_waiter; ++ + /* + * If the requeue [7] above changed the top waiter, + * then we need to wake the new top waiter up to try + * to get the lock. + */ +- if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) +- wake_up_process(rt_mutex_top_waiter(lock)->task); ++ lock_top_waiter = rt_mutex_top_waiter(lock); ++ if (prerequeue_top_waiter != lock_top_waiter) ++ rt_mutex_wake_waiter(lock_top_waiter); + raw_spin_unlock_irq(&lock->wait_lock); + return 0; + } +@@ -807,9 +844,11 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, + * @task: The task which wants to acquire the lock + * @waiter: The waiter that is queued to the lock's wait tree if the + * callsite called task_blocked_on_lock(), otherwise NULL ++ * @mode: Lock steal mode (STEAL_NORMAL, STEAL_LATERAL) + */ +-static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, +- struct rt_mutex_waiter *waiter) ++static int __try_to_take_rt_mutex(struct rt_mutex *lock, ++ struct task_struct *task, ++ struct rt_mutex_waiter *waiter, int mode) + { + lockdep_assert_held(&lock->wait_lock); + +@@ -845,12 +884,11 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, + */ + if (waiter) { + /* +- * If waiter is not the highest priority waiter of +- * @lock, give up. ++ * If waiter is not the highest priority waiter of @lock, ++ * or its peer when lateral steal is allowed, give up. + */ +- if (waiter != rt_mutex_top_waiter(lock)) ++ if (!rt_mutex_steal(lock, waiter, mode)) + return 0; +- + /* + * We can acquire the lock. Remove the waiter from the + * lock waiters tree. +@@ -868,14 +906,12 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, + */ + if (rt_mutex_has_waiters(lock)) { + /* +- * If @task->prio is greater than or equal to +- * the top waiter priority (kernel view), +- * @task lost. ++ * If @task->prio is greater than the top waiter ++ * priority (kernel view), or equal to it when a ++ * lateral steal is forbidden, @task lost. + */ +- if (!rt_mutex_waiter_less(task_to_waiter(task), +- rt_mutex_top_waiter(lock))) ++ if (!rt_mutex_steal(lock, task_to_waiter(task), mode)) + return 0; +- + /* + * The current top waiter stays enqueued. We + * don't have to change anything in the lock +@@ -922,6 +958,289 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, + return 1; + } + ++#ifdef CONFIG_PREEMPT_RT ++/* ++ * preemptible spin_lock functions: ++ */ ++static inline void rt_spin_lock_fastlock(struct rt_mutex *lock, ++ void (*slowfn)(struct rt_mutex *lock)) ++{ ++ might_sleep_no_state_check(); ++ ++ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) ++ return; ++ else ++ slowfn(lock); ++} ++ ++static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock, ++ void (*slowfn)(struct rt_mutex *lock)) ++{ ++ if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) ++ return; ++ else ++ slowfn(lock); ++} ++#ifdef CONFIG_SMP ++/* ++ * Note that owner is a speculative pointer and dereferencing relies ++ * on rcu_read_lock() and the check against the lock owner. ++ */ ++static int adaptive_wait(struct rt_mutex *lock, ++ struct task_struct *owner) ++{ ++ int res = 0; ++ ++ rcu_read_lock(); ++ for (;;) { ++ if (owner != rt_mutex_owner(lock)) ++ break; ++ /* ++ * Ensure that owner->on_cpu is dereferenced _after_ ++ * checking the above to be valid. ++ */ ++ barrier(); ++ if (!owner->on_cpu) { ++ res = 1; ++ break; ++ } ++ cpu_relax(); ++ } ++ rcu_read_unlock(); ++ return res; ++} ++#else ++static int adaptive_wait(struct rt_mutex *lock, ++ struct task_struct *orig_owner) ++{ ++ return 1; ++} ++#endif ++ ++static int task_blocks_on_rt_mutex(struct rt_mutex *lock, ++ struct rt_mutex_waiter *waiter, ++ struct task_struct *task, ++ enum rtmutex_chainwalk chwalk); ++/* ++ * Slow path lock function spin_lock style: this variant is very ++ * careful not to miss any non-lock wakeups. ++ * ++ * We store the current state under p->pi_lock in p->saved_state and ++ * the try_to_wake_up() code handles this accordingly. ++ */ ++void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock, ++ struct rt_mutex_waiter *waiter, ++ unsigned long flags) ++{ ++ struct task_struct *lock_owner, *self = current; ++ struct rt_mutex_waiter *top_waiter; ++ int ret; ++ ++ if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) ++ return; ++ ++ BUG_ON(rt_mutex_owner(lock) == self); ++ ++ /* ++ * We save whatever state the task is in and we'll restore it ++ * after acquiring the lock taking real wakeups into account ++ * as well. We are serialized via pi_lock against wakeups. See ++ * try_to_wake_up(). ++ */ ++ raw_spin_lock(&self->pi_lock); ++ self->saved_state = self->state; ++ __set_current_state_no_track(TASK_UNINTERRUPTIBLE); ++ raw_spin_unlock(&self->pi_lock); ++ ++ ret = task_blocks_on_rt_mutex(lock, waiter, self, RT_MUTEX_MIN_CHAINWALK); ++ BUG_ON(ret); ++ ++ for (;;) { ++ /* Try to acquire the lock again. */ ++ if (__try_to_take_rt_mutex(lock, self, waiter, STEAL_LATERAL)) ++ break; ++ ++ top_waiter = rt_mutex_top_waiter(lock); ++ lock_owner = rt_mutex_owner(lock); ++ ++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); ++ ++ if (top_waiter != waiter || adaptive_wait(lock, lock_owner)) ++ schedule(); ++ ++ raw_spin_lock_irqsave(&lock->wait_lock, flags); ++ ++ raw_spin_lock(&self->pi_lock); ++ __set_current_state_no_track(TASK_UNINTERRUPTIBLE); ++ raw_spin_unlock(&self->pi_lock); ++ } ++ ++ /* ++ * Restore the task state to current->saved_state. We set it ++ * to the original state above and the try_to_wake_up() code ++ * has possibly updated it when a real (non-rtmutex) wakeup ++ * happened while we were blocked. Clear saved_state so ++ * try_to_wakeup() does not get confused. ++ */ ++ raw_spin_lock(&self->pi_lock); ++ __set_current_state_no_track(self->saved_state); ++ self->saved_state = TASK_RUNNING; ++ raw_spin_unlock(&self->pi_lock); ++ ++ /* ++ * try_to_take_rt_mutex() sets the waiter bit ++ * unconditionally. We might have to fix that up: ++ */ ++ fixup_rt_mutex_waiters(lock); ++ ++ BUG_ON(rt_mutex_has_waiters(lock) && waiter == rt_mutex_top_waiter(lock)); ++ BUG_ON(!RB_EMPTY_NODE(&waiter->tree_entry)); ++} ++ ++static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock) ++{ ++ struct rt_mutex_waiter waiter; ++ unsigned long flags; ++ ++ rt_mutex_init_waiter(&waiter, true); ++ ++ raw_spin_lock_irqsave(&lock->wait_lock, flags); ++ rt_spin_lock_slowlock_locked(lock, &waiter, flags); ++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); ++ debug_rt_mutex_free_waiter(&waiter); ++} ++ ++static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock, ++ struct wake_q_head *wake_q, ++ struct wake_q_head *wq_sleeper); ++/* ++ * Slow path to release a rt_mutex spin_lock style ++ */ ++void __sched rt_spin_lock_slowunlock(struct rt_mutex *lock) ++{ ++ unsigned long flags; ++ DEFINE_WAKE_Q(wake_q); ++ DEFINE_WAKE_Q(wake_sleeper_q); ++ bool postunlock; ++ ++ raw_spin_lock_irqsave(&lock->wait_lock, flags); ++ postunlock = __rt_mutex_unlock_common(lock, &wake_q, &wake_sleeper_q); ++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); ++ ++ if (postunlock) ++ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); ++} ++ ++void __lockfunc rt_spin_lock(spinlock_t *lock) ++{ ++ spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); ++ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); ++ migrate_disable(); ++} ++EXPORT_SYMBOL(rt_spin_lock); ++ ++void __lockfunc __rt_spin_lock(struct rt_mutex *lock) ++{ ++ rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock); ++} ++ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass) ++{ ++ spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); ++ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); ++ migrate_disable(); ++} ++EXPORT_SYMBOL(rt_spin_lock_nested); ++ ++void __lockfunc rt_spin_lock_nest_lock(spinlock_t *lock, ++ struct lockdep_map *nest_lock) ++{ ++ spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_); ++ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); ++ migrate_disable(); ++} ++EXPORT_SYMBOL(rt_spin_lock_nest_lock); ++#endif ++ ++void __lockfunc rt_spin_unlock(spinlock_t *lock) ++{ ++ /* NOTE: we always pass in '1' for nested, for simplicity */ ++ spin_release(&lock->dep_map, _RET_IP_); ++ migrate_enable(); ++ rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock); ++} ++EXPORT_SYMBOL(rt_spin_unlock); ++ ++void __lockfunc __rt_spin_unlock(struct rt_mutex *lock) ++{ ++ rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock); ++} ++EXPORT_SYMBOL(__rt_spin_unlock); ++ ++/* ++ * Wait for the lock to get unlocked: instead of polling for an unlock ++ * (like raw spinlocks do), we lock and unlock, to force the kernel to ++ * schedule if there's contention: ++ */ ++void __lockfunc rt_spin_lock_unlock(spinlock_t *lock) ++{ ++ spin_lock(lock); ++ spin_unlock(lock); ++} ++EXPORT_SYMBOL(rt_spin_lock_unlock); ++ ++int __lockfunc rt_spin_trylock(spinlock_t *lock) ++{ ++ int ret; ++ ++ ret = __rt_mutex_trylock(&lock->lock); ++ if (ret) { ++ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); ++ migrate_disable(); ++ } ++ return ret; ++} ++EXPORT_SYMBOL(rt_spin_trylock); ++ ++int __lockfunc rt_spin_trylock_bh(spinlock_t *lock) ++{ ++ int ret; ++ ++ local_bh_disable(); ++ ret = __rt_mutex_trylock(&lock->lock); ++ if (ret) { ++ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); ++ migrate_disable(); ++ } else { ++ local_bh_enable(); ++ } ++ return ret; ++} ++EXPORT_SYMBOL(rt_spin_trylock_bh); ++ ++void ++__rt_spin_lock_init(spinlock_t *lock, const char *name, struct lock_class_key *key) ++{ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ /* ++ * Make sure we are not reinitializing a held lock: ++ */ ++ debug_check_no_locks_freed((void *)lock, sizeof(*lock)); ++ lockdep_init_map(&lock->dep_map, name, key, 0); ++#endif ++} ++EXPORT_SYMBOL(__rt_spin_lock_init); ++ ++#endif /* PREEMPT_RT */ ++ ++static inline int ++try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, ++ struct rt_mutex_waiter *waiter) ++{ ++ return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL); ++} ++ + /* + * Task blocks on lock. + * +@@ -1035,6 +1354,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, + * Called with lock->wait_lock held and interrupts disabled. + */ + static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, ++ struct wake_q_head *wake_sleeper_q, + struct rt_mutex *lock) + { + struct rt_mutex_waiter *waiter; +@@ -1074,7 +1394,10 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, + * Pairs with preempt_enable() in rt_mutex_postunlock(); + */ + preempt_disable(); +- wake_q_add(wake_q, waiter->task); ++ if (waiter->savestate) ++ wake_q_add_sleeper(wake_sleeper_q, waiter->task); ++ else ++ wake_q_add(wake_q, waiter->task); + raw_spin_unlock(¤t->pi_lock); + } + +@@ -1158,21 +1481,22 @@ void rt_mutex_adjust_pi(struct task_struct *task) + return; + } + next_lock = waiter->lock; +- raw_spin_unlock_irqrestore(&task->pi_lock, flags); + + /* gets dropped in rt_mutex_adjust_prio_chain()! */ + get_task_struct(task); + ++ raw_spin_unlock_irqrestore(&task->pi_lock, flags); + rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, + next_lock, NULL, task); + } + +-void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) ++void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate) + { + debug_rt_mutex_init_waiter(waiter); + RB_CLEAR_NODE(&waiter->pi_tree_entry); + RB_CLEAR_NODE(&waiter->tree_entry); + waiter->task = NULL; ++ waiter->savestate = savestate; + } + + /** +@@ -1283,7 +1607,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, + unsigned long flags; + int ret = 0; + +- rt_mutex_init_waiter(&waiter); ++ rt_mutex_init_waiter(&waiter, false); + + /* + * Technically we could use raw_spin_[un]lock_irq() here, but this can +@@ -1356,7 +1680,8 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) + * Return whether the current task needs to call rt_mutex_postunlock(). + */ + static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, +- struct wake_q_head *wake_q) ++ struct wake_q_head *wake_q, ++ struct wake_q_head *wake_sleeper_q) + { + unsigned long flags; + +@@ -1410,7 +1735,7 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, + * + * Queue the next waiter for wakeup once we release the wait_lock. + */ +- mark_wakeup_next_waiter(wake_q, lock); ++ mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock); + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + + return true; /* call rt_mutex_postunlock() */ +@@ -1447,9 +1772,11 @@ rt_mutex_fasttrylock(struct rt_mutex *lock, + /* + * Performs the wakeup of the top-waiter and re-enables preemption. + */ +-void rt_mutex_postunlock(struct wake_q_head *wake_q) ++void rt_mutex_postunlock(struct wake_q_head *wake_q, ++ struct wake_q_head *wake_sleeper_q) + { + wake_up_q(wake_q); ++ wake_up_q_sleeper(wake_sleeper_q); + + /* Pairs with preempt_disable() in rt_mutex_slowunlock() */ + preempt_enable(); +@@ -1458,15 +1785,17 @@ void rt_mutex_postunlock(struct wake_q_head *wake_q) + static inline void + rt_mutex_fastunlock(struct rt_mutex *lock, + bool (*slowfn)(struct rt_mutex *lock, +- struct wake_q_head *wqh)) ++ struct wake_q_head *wqh, ++ struct wake_q_head *wq_sleeper)) + { + DEFINE_WAKE_Q(wake_q); ++ DEFINE_WAKE_Q(wake_sleeper_q); + + if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) + return; + +- if (slowfn(lock, &wake_q)) +- rt_mutex_postunlock(&wake_q); ++ if (slowfn(lock, &wake_q, &wake_sleeper_q)) ++ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); + } + + int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state) +@@ -1597,16 +1926,13 @@ void __sched __rt_mutex_unlock(struct rt_mutex *lock) + void __sched rt_mutex_unlock(struct rt_mutex *lock) + { + mutex_release(&lock->dep_map, _RET_IP_); +- rt_mutex_fastunlock(lock, rt_mutex_slowunlock); ++ __rt_mutex_unlock(lock); + } + EXPORT_SYMBOL_GPL(rt_mutex_unlock); + +-/** +- * Futex variant, that since futex variants do not use the fast-path, can be +- * simple and will not need to retry. +- */ +-bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, +- struct wake_q_head *wake_q) ++static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock, ++ struct wake_q_head *wake_q, ++ struct wake_q_head *wq_sleeper) + { + lockdep_assert_held(&lock->wait_lock); + +@@ -1623,23 +1949,35 @@ bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, + * avoid inversion prior to the wakeup. preempt_disable() + * therein pairs with rt_mutex_postunlock(). + */ +- mark_wakeup_next_waiter(wake_q, lock); ++ mark_wakeup_next_waiter(wake_q, wq_sleeper, lock); + + return true; /* call postunlock() */ + } + ++/** ++ * Futex variant, that since futex variants do not use the fast-path, can be ++ * simple and will not need to retry. ++ */ ++bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, ++ struct wake_q_head *wake_q, ++ struct wake_q_head *wq_sleeper) ++{ ++ return __rt_mutex_unlock_common(lock, wake_q, wq_sleeper); ++} ++ + void __sched rt_mutex_futex_unlock(struct rt_mutex *lock) + { + DEFINE_WAKE_Q(wake_q); ++ DEFINE_WAKE_Q(wake_sleeper_q); + unsigned long flags; + bool postunlock; + + raw_spin_lock_irqsave(&lock->wait_lock, flags); +- postunlock = __rt_mutex_futex_unlock(lock, &wake_q); ++ postunlock = __rt_mutex_futex_unlock(lock, &wake_q, &wake_sleeper_q); + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + + if (postunlock) +- rt_mutex_postunlock(&wake_q); ++ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); + } + + /** +@@ -1675,7 +2013,7 @@ void __rt_mutex_init(struct rt_mutex *lock, const char *name, + if (name && key) + debug_rt_mutex_init(lock, name, key); + } +-EXPORT_SYMBOL_GPL(__rt_mutex_init); ++EXPORT_SYMBOL(__rt_mutex_init); + + /** + * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a +@@ -1695,6 +2033,14 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock, + struct task_struct *proxy_owner) + { + __rt_mutex_init(lock, NULL, NULL); ++#ifdef CONFIG_DEBUG_SPINLOCK ++ /* ++ * get another key class for the wait_lock. LOCK_PI and UNLOCK_PI is ++ * holding the ->wait_lock of the proxy_lock while unlocking a sleeping ++ * lock. ++ */ ++ raw_spin_lock_init(&lock->wait_lock); ++#endif + debug_rt_mutex_proxy_lock(lock, proxy_owner); + rt_mutex_set_owner(lock, proxy_owner); + } +@@ -1717,6 +2063,26 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock) + rt_mutex_set_owner(lock, NULL); + } + ++static void fixup_rt_mutex_blocked(struct rt_mutex *lock) ++{ ++ struct task_struct *tsk = current; ++ /* ++ * RT has a problem here when the wait got interrupted by a timeout ++ * or a signal. task->pi_blocked_on is still set. The task must ++ * acquire the hash bucket lock when returning from this function. ++ * ++ * If the hash bucket lock is contended then the ++ * BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)) in ++ * task_blocks_on_rt_mutex() will trigger. This can be avoided by ++ * clearing task->pi_blocked_on which removes the task from the ++ * boosting chain of the rtmutex. That's correct because the task ++ * is not longer blocked on it. ++ */ ++ raw_spin_lock(&tsk->pi_lock); ++ tsk->pi_blocked_on = NULL; ++ raw_spin_unlock(&tsk->pi_lock); ++} ++ + /** + * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task + * @lock: the rt_mutex to take +@@ -1789,6 +2155,9 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, + ret = 0; + } + ++ if (ret) ++ fixup_rt_mutex_blocked(lock); ++ + return ret; + } + +@@ -1878,6 +2247,9 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, + * have to fix that up. + */ + fixup_rt_mutex_waiters(lock); ++ if (ret) ++ fixup_rt_mutex_blocked(lock); ++ + raw_spin_unlock_irq(&lock->wait_lock); + + return ret; +diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h +index 9d1e974ca9c3..c1a280167e3c 100644 +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -31,6 +31,7 @@ struct rt_mutex_waiter { + struct task_struct *task; + struct rt_mutex *lock; + int prio; ++ bool savestate; + u64 deadline; + }; + +@@ -133,7 +134,7 @@ extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); + extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, + struct task_struct *proxy_owner); + extern void rt_mutex_proxy_unlock(struct rt_mutex *lock); +-extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); ++extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savetate); + extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter, + struct task_struct *task); +@@ -151,9 +152,12 @@ extern int __rt_mutex_futex_trylock(struct rt_mutex *l); + + extern void rt_mutex_futex_unlock(struct rt_mutex *lock); + extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, +- struct wake_q_head *wqh); ++ struct wake_q_head *wqh, ++ struct wake_q_head *wq_sleeper); ++ ++extern void rt_mutex_postunlock(struct wake_q_head *wake_q, ++ struct wake_q_head *wake_sleeper_q); + +-extern void rt_mutex_postunlock(struct wake_q_head *wake_q); + /* RW semaphore special interface */ + + extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state); +@@ -163,6 +167,10 @@ int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, + struct hrtimer_sleeper *timeout, + enum rtmutex_chainwalk chwalk, + struct rt_mutex_waiter *waiter); ++void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock, ++ struct rt_mutex_waiter *waiter, ++ unsigned long flags); ++void __sched rt_spin_lock_slowunlock(struct rt_mutex *lock); + + #ifdef CONFIG_DEBUG_RT_MUTEXES + # include "rtmutex-debug.h" +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index f6d40256c0d4..be5d41ed6ff2 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -511,9 +511,15 @@ static bool set_nr_if_polling(struct task_struct *p) + #endif + #endif + +-static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task) ++static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task, ++ bool sleeper) + { +- struct wake_q_node *node = &task->wake_q; ++ struct wake_q_node *node; ++ ++ if (sleeper) ++ node = &task->wake_q_sleeper; ++ else ++ node = &task->wake_q; + + /* + * Atomically grab the task, if ->wake_q is !nil already it means +@@ -549,7 +555,13 @@ static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task) + */ + void wake_q_add(struct wake_q_head *head, struct task_struct *task) + { +- if (__wake_q_add(head, task)) ++ if (__wake_q_add(head, task, false)) ++ get_task_struct(task); ++} ++ ++void wake_q_add_sleeper(struct wake_q_head *head, struct task_struct *task) ++{ ++ if (__wake_q_add(head, task, true)) + get_task_struct(task); + } + +@@ -572,28 +584,39 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task) + */ + void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task) + { +- if (!__wake_q_add(head, task)) ++ if (!__wake_q_add(head, task, false)) + put_task_struct(task); + } + +-void wake_up_q(struct wake_q_head *head) ++void __wake_up_q(struct wake_q_head *head, bool sleeper) + { + struct wake_q_node *node = head->first; + + while (node != WAKE_Q_TAIL) { + struct task_struct *task; + +- task = container_of(node, struct task_struct, wake_q); ++ if (sleeper) ++ task = container_of(node, struct task_struct, wake_q_sleeper); ++ else ++ task = container_of(node, struct task_struct, wake_q); ++ + BUG_ON(!task); + /* Task can safely be re-inserted now: */ + node = node->next; +- task->wake_q.next = NULL; + ++ if (sleeper) ++ task->wake_q_sleeper.next = NULL; ++ else ++ task->wake_q.next = NULL; + /* + * wake_up_process() executes a full barrier, which pairs with + * the queueing in wake_q_add() so as not to miss wakeups. + */ +- wake_up_process(task); ++ if (sleeper) ++ wake_up_lock_sleeper(task); ++ else ++ wake_up_process(task); ++ + put_task_struct(task); + } + } +-- +2.43.0 + |