1 files changed, 142 insertions, 0 deletions
diff --git a/debian/patches-rt/0290-sched-Simplify-migration_cpu_stop.patch b/debian/patches-rt/0290-sched-Simplify-migration_cpu_stop.patch
new file mode 100644
index 000000000..4d0841707
--- /dev/null
+++ b/debian/patches-rt/0290-sched-Simplify-migration_cpu_stop.patch
@@ -0,0 +1,142 @@
+From 4f468832c2a5bb5c0bb23b453038ff4fed722061 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 8 Jun 2021 00:37:31 -0400
+Subject: [PATCH 290/323] sched: Simplify migration_cpu_stop()
+Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.10/older/patches-5.10.204-rt100.tar.xz
+
+commit c20cf065d4a619d394d23290093b1002e27dff86 upstream.
+
+When affine_move_task() issues a migration_cpu_stop(), the purpose of
+that function is to complete that @pending, not any random other
+p->migration_pending that might have gotten installed since.
+
+This realization much simplifies migration_cpu_stop() and allows
+further necessary steps to fix all this as it provides the guarantee
+that @pending's stopper will complete @pending (and not some random
+other @pending).
+
+Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()")
+Cc: stable@kernel.org
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
+Link: https://lkml.kernel.org/r/20210224131355.430014682@infradead.org
+Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+---
+ kernel/sched/core.c | 56 +++++++--------------------------------------
+ 1 file changed, 8 insertions(+), 48 deletions(-)
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index df9c4c3838c0..31fa925852e5 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -1950,8 +1950,8 @@ static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
+  */
+ static int migration_cpu_stop(void *data)
+ {
+-	struct set_affinity_pending *pending;
+ 	struct migration_arg *arg = data;
++	struct set_affinity_pending *pending = arg->pending;
+ 	struct task_struct *p = arg->task;
+ 	int dest_cpu = arg->dest_cpu;
+ 	struct rq *rq = this_rq();
+@@ -1973,25 +1973,6 @@ static int migration_cpu_stop(void *data)
+ 	raw_spin_lock(&p->pi_lock);
+ 	rq_lock(rq, &rf);
+ 
+-	pending = p->migration_pending;
+-	if (pending && !arg->pending) {
+-		/*
+-		 * This happens from sched_exec() and migrate_task_to(),
+-		 * neither of them care about pending and just want a task to
+-		 * maybe move about.
+-		 *
+-		 * Even if there is a pending, we can ignore it, since
+-		 * affine_move_task() will have it's own stop_work's in flight
+-		 * which will manage the completion.
+-		 *
+-		 * Notably, pending doesn't need to match arg->pending. This can
+-		 * happen when tripple concurrent affine_move_task() first sets
+-		 * pending, then clears pending and eventually sets another
+-		 * pending.
+-		 */
+-		pending = NULL;
+-	}
+-
+ 	/*
+ 	 * If task_rq(p) != rq, it cannot be migrated here, because we're
+ 	 * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
+@@ -2002,31 +1983,20 @@ static int migration_cpu_stop(void *data)
+ 			goto out;
+ 
+ 		if (pending) {
+-			p->migration_pending = NULL;
++			if (p->migration_pending == pending)
++				p->migration_pending = NULL;
+ 			complete = true;
+ 		}
+ 
+-		/* migrate_enable() --  we must not race against SCA */
+-		if (dest_cpu < 0) {
+-			/*
+-			 * When this was migrate_enable() but we no longer
+-			 * have a @pending, a concurrent SCA 'fixed' things
+-			 * and we should be valid again. Nothing to do.
+-			 */
+-			if (!pending) {
+-				WARN_ON_ONCE(!cpumask_test_cpu(task_cpu(p), &p->cpus_mask));
+-				goto out;
+-			}
+-
++		if (dest_cpu < 0)
+ 			dest_cpu = cpumask_any_distribute(&p->cpus_mask);
+-		}
+ 
+ 		if (task_on_rq_queued(p))
+ 			rq = __migrate_task(rq, &rf, p, dest_cpu);
+ 		else
+ 			p->wake_cpu = dest_cpu;
+ 
+-	} else if (dest_cpu < 0 || pending) {
++	} else if (pending) {
+ 		/*
+ 		 * This happens when we get migrated between migrate_enable()'s
+ 		 * preempt_enable() and scheduling the stopper task. At that
+@@ -2041,22 +2011,13 @@ static int migration_cpu_stop(void *data)
+ 		 * ->pi_lock, so the allowed mask is stable - if it got
+ 		 * somewhere allowed, we're done.
+ 		 */
+-		if (pending && cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) {
+-			p->migration_pending = NULL;
++		if (cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) {
++			if (p->migration_pending == pending)
++				p->migration_pending = NULL;
+ 			complete = true;
+ 			goto out;
+ 		}
+ 
+-		/*
+-		 * When this was migrate_enable() but we no longer have an
+-		 * @pending, a concurrent SCA 'fixed' things and we should be
+-		 * valid again. Nothing to do.
+-		 */
+-		if (!pending) {
+-			WARN_ON_ONCE(!cpumask_test_cpu(task_cpu(p), &p->cpus_mask));
+-			goto out;
+-		}
+-
+ 		/*
+ 		 * When migrate_enable() hits a rq mis-match we can't reliably
+ 		 * determine is_migration_disabled() and so have to chase after
+@@ -2074,7 +2035,6 @@ static int migration_cpu_stop(void *data)
+ 		complete_all(&pending->done);
+ 
+ 	/* For pending->{arg,stop_work} */
+-	pending = arg->pending;
+ 	if (pending && refcount_dec_and_test(&pending->refs))
+ 		wake_up_var(&pending->refs);
+ 
+-- 
+2.43.0
+