summaryrefslogtreecommitdiffstats
path: root/debian/patches-rt/0004-sched-Provide-rt_mutex-specific-scheduler-helpers.patch
diff options
context:
space:
mode:
Diffstat (limited to 'debian/patches-rt/0004-sched-Provide-rt_mutex-specific-scheduler-helpers.patch')
-rw-r--r--debian/patches-rt/0004-sched-Provide-rt_mutex-specific-scheduler-helpers.patch128
1 files changed, 128 insertions, 0 deletions
diff --git a/debian/patches-rt/0004-sched-Provide-rt_mutex-specific-scheduler-helpers.patch b/debian/patches-rt/0004-sched-Provide-rt_mutex-specific-scheduler-helpers.patch
new file mode 100644
index 0000000000..f61a67ea3f
--- /dev/null
+++ b/debian/patches-rt/0004-sched-Provide-rt_mutex-specific-scheduler-helpers.patch
@@ -0,0 +1,128 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 8 Sep 2023 18:22:51 +0200
+Subject: [PATCH 4/7] sched: Provide rt_mutex specific scheduler helpers
+Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.6/older/patches-6.6.7-rt18.tar.xz
+
+With PREEMPT_RT there is a rt_mutex recursion problem where
+sched_submit_work() can use an rtlock (aka spinlock_t). More
+specifically what happens is:
+
+ mutex_lock() /* really rt_mutex */
+ ...
+ __rt_mutex_slowlock_locked()
+ task_blocks_on_rt_mutex()
+ // enqueue current task as waiter
+ // do PI chain walk
+ rt_mutex_slowlock_block()
+ schedule()
+ sched_submit_work()
+ ...
+ spin_lock() /* really rtlock */
+ ...
+ __rt_mutex_slowlock_locked()
+ task_blocks_on_rt_mutex()
+ // enqueue current task as waiter *AGAIN*
+ // *CONFUSION*
+
+Fix this by making rt_mutex do the sched_submit_work() early, before
+it enqueues itself as a waiter -- before it even knows *if* it will
+wait.
+
+[[ basically Thomas' patch but with different naming and a few asserts
+ added ]]
+
+Originally-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20230908162254.999499-5-bigeasy@linutronix.de
+---
+ include/linux/sched.h | 3 +++
+ include/linux/sched/rt.h | 4 ++++
+ kernel/sched/core.c | 36 ++++++++++++++++++++++++++++++++----
+ 3 files changed, 39 insertions(+), 4 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -911,6 +911,9 @@ struct task_struct {
+ * ->sched_remote_wakeup gets used, so it can be in this word.
+ */
+ unsigned sched_remote_wakeup:1;
++#ifdef CONFIG_RT_MUTEXES
++ unsigned sched_rt_mutex:1;
++#endif
+
+ /* Bit to tell LSMs we're in execve(): */
+ unsigned in_execve:1;
+--- a/include/linux/sched/rt.h
++++ b/include/linux/sched/rt.h
+@@ -30,6 +30,10 @@ static inline bool task_is_realtime(stru
+ }
+
+ #ifdef CONFIG_RT_MUTEXES
++extern void rt_mutex_pre_schedule(void);
++extern void rt_mutex_schedule(void);
++extern void rt_mutex_post_schedule(void);
++
+ /*
+ * Must hold either p->pi_lock or task_rq(p)->lock.
+ */
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -6724,9 +6724,6 @@ static inline void sched_submit_work(str
+ static DEFINE_WAIT_OVERRIDE_MAP(sched_map, LD_WAIT_CONFIG);
+ unsigned int task_flags;
+
+- if (task_is_running(tsk))
+- return;
+-
+ /*
+ * Establish LD_WAIT_CONFIG context to ensure none of the code called
+ * will use a blocking primitive -- which would lead to recursion.
+@@ -6784,7 +6781,12 @@ asmlinkage __visible void __sched schedu
+ {
+ struct task_struct *tsk = current;
+
+- sched_submit_work(tsk);
++#ifdef CONFIG_RT_MUTEXES
++ lockdep_assert(!tsk->sched_rt_mutex);
++#endif
++
++ if (!task_is_running(tsk))
++ sched_submit_work(tsk);
+ __schedule_loop(SM_NONE);
+ sched_update_worker(tsk);
+ }
+@@ -7045,6 +7047,32 @@ static void __setscheduler_prio(struct t
+
+ #ifdef CONFIG_RT_MUTEXES
+
++/*
++ * Would be more useful with typeof()/auto_type but they don't mix with
++ * bit-fields. Since it's a local thing, use int. Keep the generic sounding
++ * name such that if someone were to implement this function we get to compare
++ * notes.
++ */
++#define fetch_and_set(x, v) ({ int _x = (x); (x) = (v); _x; })
++
++void rt_mutex_pre_schedule(void)
++{
++ lockdep_assert(!fetch_and_set(current->sched_rt_mutex, 1));
++ sched_submit_work(current);
++}
++
++void rt_mutex_schedule(void)
++{
++ lockdep_assert(current->sched_rt_mutex);
++ __schedule_loop(SM_NONE);
++}
++
++void rt_mutex_post_schedule(void)
++{
++ sched_update_worker(current);
++ lockdep_assert(fetch_and_set(current->sched_rt_mutex, 0));
++}
++
+ static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
+ {
+ if (pi_task)