summaryrefslogtreecommitdiffstats
path: root/debian/patches-rt/0011-sched-Fix-migrate_disable-vs-set_cpus_allowed_ptr.patch
diff options
context:
space:
mode:
Diffstat (limited to 'debian/patches-rt/0011-sched-Fix-migrate_disable-vs-set_cpus_allowed_ptr.patch')
-rw-r--r--debian/patches-rt/0011-sched-Fix-migrate_disable-vs-set_cpus_allowed_ptr.patch370
1 files changed, 370 insertions, 0 deletions
diff --git a/debian/patches-rt/0011-sched-Fix-migrate_disable-vs-set_cpus_allowed_ptr.patch b/debian/patches-rt/0011-sched-Fix-migrate_disable-vs-set_cpus_allowed_ptr.patch
new file mode 100644
index 000000000..d89f86138
--- /dev/null
+++ b/debian/patches-rt/0011-sched-Fix-migrate_disable-vs-set_cpus_allowed_ptr.patch
@@ -0,0 +1,370 @@
+From f6fcadc60ec427b9eeb4b734b77b7b110c050b83 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 23 Oct 2020 12:12:08 +0200
+Subject: [PATCH 011/323] sched: Fix migrate_disable() vs
+ set_cpus_allowed_ptr()
+Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.10/older/patches-5.10.204-rt100.tar.xz
+
+Concurrent migrate_disable() and set_cpus_allowed_ptr() has
+interesting features. We rely on set_cpus_allowed_ptr() to not return
+until the task runs inside the provided mask. This expectation is
+exported to userspace.
+
+This means that any set_cpus_allowed_ptr() caller must wait until
+migrate_enable() allows migrations.
+
+At the same time, we don't want migrate_enable() to schedule, due to
+patterns like:
+
+ preempt_disable();
+ migrate_disable();
+ ...
+ migrate_enable();
+ preempt_enable();
+
+And:
+
+ raw_spin_lock(&B);
+ spin_unlock(&A);
+
+this means that when migrate_enable() must restore the affinity
+mask, it cannot wait for completion thereof. Luck will have it that
+that is exactly the case where there is a pending
+set_cpus_allowed_ptr(), so let that provide storage for the async stop
+machine.
+
+Much thanks to Valentin who used TLA+ most effective and found lots of
+'interesting' cases.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/sched.h | 1 +
+ kernel/sched/core.c | 234 ++++++++++++++++++++++++++++++++++++------
+ 2 files changed, 205 insertions(+), 30 deletions(-)
+
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 76907e9876d5..5b5c194f5a62 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -726,6 +726,7 @@ struct task_struct {
+ int nr_cpus_allowed;
+ const cpumask_t *cpus_ptr;
+ cpumask_t cpus_mask;
++ void *migration_pending;
+ #if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
+ int migration_disabled;
+ #endif
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 5c9db4b2b6ec..3af7c42896c9 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -1746,15 +1746,26 @@ void migrate_enable(void)
+ {
+ struct task_struct *p = current;
+
+- if (--p->migration_disabled)
++ if (p->migration_disabled > 1) {
++ p->migration_disabled--;
+ return;
++ }
+
++ /*
++ * Ensure stop_task runs either before or after this, and that
++ * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
++ */
++ preempt_disable();
++ if (p->cpus_ptr != &p->cpus_mask)
++ __set_cpus_allowed_ptr(p, &p->cpus_mask, SCA_MIGRATE_ENABLE);
++ /*
++ * Mustn't clear migration_disabled() until cpus_ptr points back at the
++ * regular cpus_mask, otherwise things that race (eg.
++ * select_fallback_rq) get confused.
++ */
+ barrier();
+-
+- if (p->cpus_ptr == &p->cpus_mask)
+- return;
+-
+- __set_cpus_allowed_ptr(p, &p->cpus_mask, SCA_MIGRATE_ENABLE);
++ p->migration_disabled = 0;
++ preempt_enable();
+ }
+ EXPORT_SYMBOL_GPL(migrate_enable);
+
+@@ -1819,8 +1830,16 @@ static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf,
+ }
+
+ struct migration_arg {
+- struct task_struct *task;
+- int dest_cpu;
++ struct task_struct *task;
++ int dest_cpu;
++ struct set_affinity_pending *pending;
++};
++
++struct set_affinity_pending {
++ refcount_t refs;
++ struct completion done;
++ struct cpu_stop_work stop_work;
++ struct migration_arg arg;
+ };
+
+ /*
+@@ -1852,16 +1871,19 @@ static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
+ */
+ static int migration_cpu_stop(void *data)
+ {
++ struct set_affinity_pending *pending;
+ struct migration_arg *arg = data;
+ struct task_struct *p = arg->task;
++ int dest_cpu = arg->dest_cpu;
+ struct rq *rq = this_rq();
++ bool complete = false;
+ struct rq_flags rf;
+
+ /*
+ * The original target CPU might have gone down and we might
+ * be on another CPU but it doesn't matter.
+ */
+- local_irq_disable();
++ local_irq_save(rf.flags);
+ /*
+ * We need to explicitly wake pending tasks before running
+ * __migrate_task() such that we will not miss enforcing cpus_ptr
+@@ -1871,21 +1893,83 @@ static int migration_cpu_stop(void *data)
+
+ raw_spin_lock(&p->pi_lock);
+ rq_lock(rq, &rf);
++
++ pending = p->migration_pending;
+ /*
+ * If task_rq(p) != rq, it cannot be migrated here, because we're
+ * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
+ * we're holding p->pi_lock.
+ */
+ if (task_rq(p) == rq) {
++ if (is_migration_disabled(p))
++ goto out;
++
++ if (pending) {
++ p->migration_pending = NULL;
++ complete = true;
++ }
++
++ /* migrate_enable() -- we must not race against SCA */
++ if (dest_cpu < 0) {
++ /*
++ * When this was migrate_enable() but we no longer
++ * have a @pending, a concurrent SCA 'fixed' things
++ * and we should be valid again. Nothing to do.
++ */
++ if (!pending) {
++ WARN_ON_ONCE(!is_cpu_allowed(p, cpu_of(rq)));
++ goto out;
++ }
++
++ dest_cpu = cpumask_any_distribute(&p->cpus_mask);
++ }
++
+ if (task_on_rq_queued(p))
+- rq = __migrate_task(rq, &rf, p, arg->dest_cpu);
++ rq = __migrate_task(rq, &rf, p, dest_cpu);
+ else
+- p->wake_cpu = arg->dest_cpu;
++ p->wake_cpu = dest_cpu;
++
++ } else if (dest_cpu < 0) {
++ /*
++ * This happens when we get migrated between migrate_enable()'s
++ * preempt_enable() and scheduling the stopper task. At that
++ * point we're a regular task again and not current anymore.
++ *
++ * A !PREEMPT kernel has a giant hole here, which makes it far
++ * more likely.
++ */
++
++ /*
++ * When this was migrate_enable() but we no longer have an
++ * @pending, a concurrent SCA 'fixed' things and we should be
++ * valid again. Nothing to do.
++ */
++ if (!pending) {
++ WARN_ON_ONCE(!is_cpu_allowed(p, cpu_of(rq)));
++ goto out;
++ }
++
++ /*
++ * When migrate_enable() hits a rq mis-match we can't reliably
++ * determine is_migration_disabled() and so have to chase after
++ * it.
++ */
++ task_rq_unlock(rq, p, &rf);
++ stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop,
++ &pending->arg, &pending->stop_work);
++ return 0;
+ }
+- rq_unlock(rq, &rf);
+- raw_spin_unlock(&p->pi_lock);
++out:
++ task_rq_unlock(rq, p, &rf);
++
++ if (complete)
++ complete_all(&pending->done);
++
++ /* For pending->{arg,stop_work} */
++ pending = arg->pending;
++ if (pending && refcount_dec_and_test(&pending->refs))
++ wake_up_var(&pending->refs);
+
+- local_irq_enable();
+ return 0;
+ }
+
+@@ -1954,6 +2038,110 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+ __do_set_cpus_allowed(p, new_mask, 0);
+ }
+
++/*
++ * This function is wildly self concurrent, consider at least 3 times.
++ */
++static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flags *rf,
++ int dest_cpu, unsigned int flags)
++{
++ struct set_affinity_pending my_pending = { }, *pending = NULL;
++ struct migration_arg arg = {
++ .task = p,
++ .dest_cpu = dest_cpu,
++ };
++ bool complete = false;
++
++ /* Can the task run on the task's current CPU? If so, we're done */
++ if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) {
++ pending = p->migration_pending;
++ if (pending) {
++ refcount_inc(&pending->refs);
++ p->migration_pending = NULL;
++ complete = true;
++ }
++ task_rq_unlock(rq, p, rf);
++
++ if (complete)
++ goto do_complete;
++
++ return 0;
++ }
++
++ if (!(flags & SCA_MIGRATE_ENABLE)) {
++ /* serialized by p->pi_lock */
++ if (!p->migration_pending) {
++ refcount_set(&my_pending.refs, 1);
++ init_completion(&my_pending.done);
++ p->migration_pending = &my_pending;
++ } else {
++ pending = p->migration_pending;
++ refcount_inc(&pending->refs);
++ }
++ }
++ pending = p->migration_pending;
++ /*
++ * - !MIGRATE_ENABLE:
++ * we'll have installed a pending if there wasn't one already.
++ *
++ * - MIGRATE_ENABLE:
++ * we're here because the current CPU isn't matching anymore,
++ * the only way that can happen is because of a concurrent
++ * set_cpus_allowed_ptr() call, which should then still be
++ * pending completion.
++ *
++ * Either way, we really should have a @pending here.
++ */
++ if (WARN_ON_ONCE(!pending))
++ return -EINVAL;
++
++ if (flags & SCA_MIGRATE_ENABLE) {
++
++ refcount_inc(&pending->refs); /* pending->{arg,stop_work} */
++ task_rq_unlock(rq, p, rf);
++
++ pending->arg = (struct migration_arg) {
++ .task = p,
++ .dest_cpu = -1,
++ .pending = pending,
++ };
++
++ stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop,
++ &pending->arg, &pending->stop_work);
++
++ return 0;
++ }
++
++ if (task_running(rq, p) || p->state == TASK_WAKING) {
++
++ task_rq_unlock(rq, p, rf);
++ stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
++
++ } else {
++
++ if (!is_migration_disabled(p)) {
++ if (task_on_rq_queued(p))
++ rq = move_queued_task(rq, rf, p, dest_cpu);
++
++ p->migration_pending = NULL;
++ complete = true;
++ }
++ task_rq_unlock(rq, p, rf);
++
++do_complete:
++ if (complete)
++ complete_all(&pending->done);
++ }
++
++ wait_for_completion(&pending->done);
++
++ if (refcount_dec_and_test(&pending->refs))
++ wake_up_var(&pending->refs);
++
++ wait_var_event(&my_pending.refs, !refcount_read(&my_pending.refs));
++
++ return 0;
++}
++
+ /*
+ * Change a given task's CPU affinity. Migrate the thread to a
+ * proper CPU and schedule it away if the CPU it's executing on
+@@ -2023,23 +2211,8 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
+ p->nr_cpus_allowed != 1);
+ }
+
+- /* Can the task run on the task's current CPU? If so, we're done */
+- if (cpumask_test_cpu(task_cpu(p), new_mask))
+- goto out;
++ return affine_move_task(rq, p, &rf, dest_cpu, flags);
+
+- if (task_running(rq, p) || p->state == TASK_WAKING) {
+- struct migration_arg arg = { p, dest_cpu };
+- /* Need help from migration thread: drop lock and wait. */
+- task_rq_unlock(rq, p, &rf);
+- stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
+- return 0;
+- } else if (task_on_rq_queued(p)) {
+- /*
+- * OK, since we're going to drop the lock immediately
+- * afterwards anyway.
+- */
+- rq = move_queued_task(rq, &rf, p, dest_cpu);
+- }
+ out:
+ task_rq_unlock(rq, p, &rf);
+
+@@ -3230,6 +3403,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
+ init_numa_balancing(clone_flags, p);
+ #ifdef CONFIG_SMP
+ p->wake_entry.u_flags = CSD_TYPE_TTWU;
++ p->migration_pending = NULL;
+ #endif
+ }
+
+--
+2.43.0
+