summaryrefslogtreecommitdiffstats
path: root/debian/patches-rt/0003-sched-Fix-balance_callback.patch
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--debian/patches-rt/0003-sched-Fix-balance_callback.patch235
1 files changed, 235 insertions, 0 deletions
diff --git a/debian/patches-rt/0003-sched-Fix-balance_callback.patch b/debian/patches-rt/0003-sched-Fix-balance_callback.patch
new file mode 100644
index 000000000..265608440
--- /dev/null
+++ b/debian/patches-rt/0003-sched-Fix-balance_callback.patch
@@ -0,0 +1,235 @@
+From db31f287a66b122733214b4a2bac9d59a564b77d Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 23 Oct 2020 12:12:00 +0200
+Subject: [PATCH 003/323] sched: Fix balance_callback()
+Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.10/older/patches-5.10.204-rt100.tar.xz
+
+The intent of balance_callback() has always been to delay executing
+balancing operations until the end of the current rq->lock section.
+This is because balance operations must often drop rq->lock, and that
+isn't safe in general.
+
+However, as noted by Scott, there were a few holes in that scheme;
+balance_callback() was called after rq->lock was dropped, which means
+another CPU can interleave and touch the callback list.
+
+Rework code to call the balance callbacks before dropping rq->lock
+where possible, and otherwise splice the balance list onto a local
+stack.
+
+This guarantees that the balance list must be empty when we take
+rq->lock. IOW, we'll only ever run our own balance callbacks.
+
+Reported-by: Scott Wood <swood@redhat.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/sched/core.c | 119 +++++++++++++++++++++++++++----------------
+ kernel/sched/sched.h | 3 ++
+ 2 files changed, 78 insertions(+), 44 deletions(-)
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 40f40f359c5d..ab8b7fd46334 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -3511,6 +3511,69 @@ static inline void finish_task(struct task_struct *prev)
+ #endif
+ }
+
++#ifdef CONFIG_SMP
++
++static void do_balance_callbacks(struct rq *rq, struct callback_head *head)
++{
++ void (*func)(struct rq *rq);
++ struct callback_head *next;
++
++ lockdep_assert_held(&rq->lock);
++
++ while (head) {
++ func = (void (*)(struct rq *))head->func;
++ next = head->next;
++ head->next = NULL;
++ head = next;
++
++ func(rq);
++ }
++}
++
++static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
++{
++ struct callback_head *head = rq->balance_callback;
++
++ lockdep_assert_held(&rq->lock);
++ if (head)
++ rq->balance_callback = NULL;
++
++ return head;
++}
++
++static void __balance_callbacks(struct rq *rq)
++{
++ do_balance_callbacks(rq, splice_balance_callbacks(rq));
++}
++
++static inline void balance_callbacks(struct rq *rq, struct callback_head *head)
++{
++ unsigned long flags;
++
++ if (unlikely(head)) {
++ raw_spin_lock_irqsave(&rq->lock, flags);
++ do_balance_callbacks(rq, head);
++ raw_spin_unlock_irqrestore(&rq->lock, flags);
++ }
++}
++
++#else
++
++static inline void __balance_callbacks(struct rq *rq)
++{
++}
++
++static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
++{
++ return NULL;
++}
++
++static inline void balance_callbacks(struct rq *rq, struct callback_head *head)
++{
++}
++
++#endif
++
+ static inline void
+ prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf)
+ {
+@@ -3536,6 +3599,7 @@ static inline void finish_lock_switch(struct rq *rq)
+ * prev into current:
+ */
+ spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
++ __balance_callbacks(rq);
+ raw_spin_unlock_irq(&rq->lock);
+ }
+
+@@ -3677,43 +3741,6 @@ static struct rq *finish_task_switch(struct task_struct *prev)
+ return rq;
+ }
+
+-#ifdef CONFIG_SMP
+-
+-/* rq->lock is NOT held, but preemption is disabled */
+-static void __balance_callback(struct rq *rq)
+-{
+- struct callback_head *head, *next;
+- void (*func)(struct rq *rq);
+- unsigned long flags;
+-
+- raw_spin_lock_irqsave(&rq->lock, flags);
+- head = rq->balance_callback;
+- rq->balance_callback = NULL;
+- while (head) {
+- func = (void (*)(struct rq *))head->func;
+- next = head->next;
+- head->next = NULL;
+- head = next;
+-
+- func(rq);
+- }
+- raw_spin_unlock_irqrestore(&rq->lock, flags);
+-}
+-
+-static inline void balance_callback(struct rq *rq)
+-{
+- if (unlikely(rq->balance_callback))
+- __balance_callback(rq);
+-}
+-
+-#else
+-
+-static inline void balance_callback(struct rq *rq)
+-{
+-}
+-
+-#endif
+-
+ /**
+ * schedule_tail - first thing a freshly forked thread must call.
+ * @prev: the thread we just switched away from.
+@@ -3733,7 +3760,6 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
+ */
+
+ rq = finish_task_switch(prev);
+- balance_callback(rq);
+ preempt_enable();
+
+ if (current->set_child_tid)
+@@ -4548,10 +4574,11 @@ static void __sched notrace __schedule(bool preempt)
+ rq = context_switch(rq, prev, next, &rf);
+ } else {
+ rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
+- rq_unlock_irq(rq, &rf);
+- }
+
+- balance_callback(rq);
++ rq_unpin_lock(rq, &rf);
++ __balance_callbacks(rq);
++ raw_spin_unlock_irq(&rq->lock);
++ }
+ }
+
+ void __noreturn do_task_dead(void)
+@@ -4972,9 +4999,11 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
+ out_unlock:
+ /* Avoid rq from going away on us: */
+ preempt_disable();
+- __task_rq_unlock(rq, &rf);
+
+- balance_callback(rq);
++ rq_unpin_lock(rq, &rf);
++ __balance_callbacks(rq);
++ raw_spin_unlock(&rq->lock);
++
+ preempt_enable();
+ }
+ #else
+@@ -5217,6 +5246,7 @@ static int __sched_setscheduler(struct task_struct *p,
+ int oldpolicy = -1, policy = attr->sched_policy;
+ int retval, oldprio, newprio, queued, running;
+ const struct sched_class *prev_class;
++ struct callback_head *head;
+ struct rq_flags rf;
+ int reset_on_fork;
+ int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
+@@ -5466,6 +5496,7 @@ static int __sched_setscheduler(struct task_struct *p,
+
+ /* Avoid rq from going away on us: */
+ preempt_disable();
++ head = splice_balance_callbacks(rq);
+ task_rq_unlock(rq, p, &rf);
+
+ if (pi) {
+@@ -5475,7 +5506,7 @@ static int __sched_setscheduler(struct task_struct *p,
+ }
+
+ /* Run balance callbacks after we've adjusted the PI chain: */
+- balance_callback(rq);
++ balance_callbacks(rq, head);
+ preempt_enable();
+
+ return 0;
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 8de07aba8bdd..fd71da3a4f0f 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -1225,6 +1225,9 @@ static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf)
+ rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
+ rf->clock_update_flags = 0;
+ #endif
++#ifdef CONFIG_SMP
++ SCHED_WARN_ON(rq->balance_callback);
++#endif
+ }
+
+ static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf)
+--
+2.43.0
+