diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 10:06:00 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 10:06:00 +0000 |
commit | b15a952c52a6825376d3e7f6c1bf5c886c6d8b74 (patch) | |
tree | 1500f2f8f276908a36d8126cb632c0d6b1276764 /debian/patches-rt/0011-sched-Fix-migrate_disable-vs-set_cpus_allowed_ptr.patch | |
parent | Adding upstream version 5.10.209. (diff) | |
download | linux-b15a952c52a6825376d3e7f6c1bf5c886c6d8b74.tar.xz linux-b15a952c52a6825376d3e7f6c1bf5c886c6d8b74.zip |
Adding debian version 5.10.209-2.debian/5.10.209-2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'debian/patches-rt/0011-sched-Fix-migrate_disable-vs-set_cpus_allowed_ptr.patch')
-rw-r--r-- | debian/patches-rt/0011-sched-Fix-migrate_disable-vs-set_cpus_allowed_ptr.patch | 370 |
1 files changed, 370 insertions, 0 deletions
diff --git a/debian/patches-rt/0011-sched-Fix-migrate_disable-vs-set_cpus_allowed_ptr.patch b/debian/patches-rt/0011-sched-Fix-migrate_disable-vs-set_cpus_allowed_ptr.patch new file mode 100644 index 000000000..d89f86138 --- /dev/null +++ b/debian/patches-rt/0011-sched-Fix-migrate_disable-vs-set_cpus_allowed_ptr.patch @@ -0,0 +1,370 @@ +From f6fcadc60ec427b9eeb4b734b77b7b110c050b83 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra <peterz@infradead.org> +Date: Fri, 23 Oct 2020 12:12:08 +0200 +Subject: [PATCH 011/323] sched: Fix migrate_disable() vs + set_cpus_allowed_ptr() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.10/older/patches-5.10.204-rt100.tar.xz + +Concurrent migrate_disable() and set_cpus_allowed_ptr() has +interesting features. We rely on set_cpus_allowed_ptr() to not return +until the task runs inside the provided mask. This expectation is +exported to userspace. + +This means that any set_cpus_allowed_ptr() caller must wait until +migrate_enable() allows migrations. + +At the same time, we don't want migrate_enable() to schedule, due to +patterns like: + + preempt_disable(); + migrate_disable(); + ... + migrate_enable(); + preempt_enable(); + +And: + + raw_spin_lock(&B); + spin_unlock(&A); + +this means that when migrate_enable() must restore the affinity +mask, it cannot wait for completion thereof. Luck will have it that +that is exactly the case where there is a pending +set_cpus_allowed_ptr(), so let that provide storage for the async stop +machine. + +Much thanks to Valentin who used TLA+ most effective and found lots of +'interesting' cases. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + include/linux/sched.h | 1 + + kernel/sched/core.c | 234 ++++++++++++++++++++++++++++++++++++------ + 2 files changed, 205 insertions(+), 30 deletions(-) + +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 76907e9876d5..5b5c194f5a62 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -726,6 +726,7 @@ struct task_struct { + int nr_cpus_allowed; + const cpumask_t *cpus_ptr; + cpumask_t cpus_mask; ++ void *migration_pending; + #if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) + int migration_disabled; + #endif +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 5c9db4b2b6ec..3af7c42896c9 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -1746,15 +1746,26 @@ void migrate_enable(void) + { + struct task_struct *p = current; + +- if (--p->migration_disabled) ++ if (p->migration_disabled > 1) { ++ p->migration_disabled--; + return; ++ } + ++ /* ++ * Ensure stop_task runs either before or after this, and that ++ * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule(). ++ */ ++ preempt_disable(); ++ if (p->cpus_ptr != &p->cpus_mask) ++ __set_cpus_allowed_ptr(p, &p->cpus_mask, SCA_MIGRATE_ENABLE); ++ /* ++ * Mustn't clear migration_disabled() until cpus_ptr points back at the ++ * regular cpus_mask, otherwise things that race (eg. ++ * select_fallback_rq) get confused. ++ */ + barrier(); +- +- if (p->cpus_ptr == &p->cpus_mask) +- return; +- +- __set_cpus_allowed_ptr(p, &p->cpus_mask, SCA_MIGRATE_ENABLE); ++ p->migration_disabled = 0; ++ preempt_enable(); + } + EXPORT_SYMBOL_GPL(migrate_enable); + +@@ -1819,8 +1830,16 @@ static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf, + } + + struct migration_arg { +- struct task_struct *task; +- int dest_cpu; ++ struct task_struct *task; ++ int dest_cpu; ++ struct set_affinity_pending *pending; ++}; ++ ++struct set_affinity_pending { ++ refcount_t refs; ++ struct completion done; ++ struct cpu_stop_work stop_work; ++ struct migration_arg arg; + }; + + /* +@@ -1852,16 +1871,19 @@ static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf, + */ + static int migration_cpu_stop(void *data) + { ++ struct set_affinity_pending *pending; + struct migration_arg *arg = data; + struct task_struct *p = arg->task; ++ int dest_cpu = arg->dest_cpu; + struct rq *rq = this_rq(); ++ bool complete = false; + struct rq_flags rf; + + /* + * The original target CPU might have gone down and we might + * be on another CPU but it doesn't matter. + */ +- local_irq_disable(); ++ local_irq_save(rf.flags); + /* + * We need to explicitly wake pending tasks before running + * __migrate_task() such that we will not miss enforcing cpus_ptr +@@ -1871,21 +1893,83 @@ static int migration_cpu_stop(void *data) + + raw_spin_lock(&p->pi_lock); + rq_lock(rq, &rf); ++ ++ pending = p->migration_pending; + /* + * If task_rq(p) != rq, it cannot be migrated here, because we're + * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because + * we're holding p->pi_lock. + */ + if (task_rq(p) == rq) { ++ if (is_migration_disabled(p)) ++ goto out; ++ ++ if (pending) { ++ p->migration_pending = NULL; ++ complete = true; ++ } ++ ++ /* migrate_enable() -- we must not race against SCA */ ++ if (dest_cpu < 0) { ++ /* ++ * When this was migrate_enable() but we no longer ++ * have a @pending, a concurrent SCA 'fixed' things ++ * and we should be valid again. Nothing to do. ++ */ ++ if (!pending) { ++ WARN_ON_ONCE(!is_cpu_allowed(p, cpu_of(rq))); ++ goto out; ++ } ++ ++ dest_cpu = cpumask_any_distribute(&p->cpus_mask); ++ } ++ + if (task_on_rq_queued(p)) +- rq = __migrate_task(rq, &rf, p, arg->dest_cpu); ++ rq = __migrate_task(rq, &rf, p, dest_cpu); + else +- p->wake_cpu = arg->dest_cpu; ++ p->wake_cpu = dest_cpu; ++ ++ } else if (dest_cpu < 0) { ++ /* ++ * This happens when we get migrated between migrate_enable()'s ++ * preempt_enable() and scheduling the stopper task. At that ++ * point we're a regular task again and not current anymore. ++ * ++ * A !PREEMPT kernel has a giant hole here, which makes it far ++ * more likely. ++ */ ++ ++ /* ++ * When this was migrate_enable() but we no longer have an ++ * @pending, a concurrent SCA 'fixed' things and we should be ++ * valid again. Nothing to do. ++ */ ++ if (!pending) { ++ WARN_ON_ONCE(!is_cpu_allowed(p, cpu_of(rq))); ++ goto out; ++ } ++ ++ /* ++ * When migrate_enable() hits a rq mis-match we can't reliably ++ * determine is_migration_disabled() and so have to chase after ++ * it. ++ */ ++ task_rq_unlock(rq, p, &rf); ++ stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop, ++ &pending->arg, &pending->stop_work); ++ return 0; + } +- rq_unlock(rq, &rf); +- raw_spin_unlock(&p->pi_lock); ++out: ++ task_rq_unlock(rq, p, &rf); ++ ++ if (complete) ++ complete_all(&pending->done); ++ ++ /* For pending->{arg,stop_work} */ ++ pending = arg->pending; ++ if (pending && refcount_dec_and_test(&pending->refs)) ++ wake_up_var(&pending->refs); + +- local_irq_enable(); + return 0; + } + +@@ -1954,6 +2038,110 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) + __do_set_cpus_allowed(p, new_mask, 0); + } + ++/* ++ * This function is wildly self concurrent, consider at least 3 times. ++ */ ++static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flags *rf, ++ int dest_cpu, unsigned int flags) ++{ ++ struct set_affinity_pending my_pending = { }, *pending = NULL; ++ struct migration_arg arg = { ++ .task = p, ++ .dest_cpu = dest_cpu, ++ }; ++ bool complete = false; ++ ++ /* Can the task run on the task's current CPU? If so, we're done */ ++ if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) { ++ pending = p->migration_pending; ++ if (pending) { ++ refcount_inc(&pending->refs); ++ p->migration_pending = NULL; ++ complete = true; ++ } ++ task_rq_unlock(rq, p, rf); ++ ++ if (complete) ++ goto do_complete; ++ ++ return 0; ++ } ++ ++ if (!(flags & SCA_MIGRATE_ENABLE)) { ++ /* serialized by p->pi_lock */ ++ if (!p->migration_pending) { ++ refcount_set(&my_pending.refs, 1); ++ init_completion(&my_pending.done); ++ p->migration_pending = &my_pending; ++ } else { ++ pending = p->migration_pending; ++ refcount_inc(&pending->refs); ++ } ++ } ++ pending = p->migration_pending; ++ /* ++ * - !MIGRATE_ENABLE: ++ * we'll have installed a pending if there wasn't one already. ++ * ++ * - MIGRATE_ENABLE: ++ * we're here because the current CPU isn't matching anymore, ++ * the only way that can happen is because of a concurrent ++ * set_cpus_allowed_ptr() call, which should then still be ++ * pending completion. ++ * ++ * Either way, we really should have a @pending here. ++ */ ++ if (WARN_ON_ONCE(!pending)) ++ return -EINVAL; ++ ++ if (flags & SCA_MIGRATE_ENABLE) { ++ ++ refcount_inc(&pending->refs); /* pending->{arg,stop_work} */ ++ task_rq_unlock(rq, p, rf); ++ ++ pending->arg = (struct migration_arg) { ++ .task = p, ++ .dest_cpu = -1, ++ .pending = pending, ++ }; ++ ++ stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop, ++ &pending->arg, &pending->stop_work); ++ ++ return 0; ++ } ++ ++ if (task_running(rq, p) || p->state == TASK_WAKING) { ++ ++ task_rq_unlock(rq, p, rf); ++ stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); ++ ++ } else { ++ ++ if (!is_migration_disabled(p)) { ++ if (task_on_rq_queued(p)) ++ rq = move_queued_task(rq, rf, p, dest_cpu); ++ ++ p->migration_pending = NULL; ++ complete = true; ++ } ++ task_rq_unlock(rq, p, rf); ++ ++do_complete: ++ if (complete) ++ complete_all(&pending->done); ++ } ++ ++ wait_for_completion(&pending->done); ++ ++ if (refcount_dec_and_test(&pending->refs)) ++ wake_up_var(&pending->refs); ++ ++ wait_var_event(&my_pending.refs, !refcount_read(&my_pending.refs)); ++ ++ return 0; ++} ++ + /* + * Change a given task's CPU affinity. Migrate the thread to a + * proper CPU and schedule it away if the CPU it's executing on +@@ -2023,23 +2211,8 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, + p->nr_cpus_allowed != 1); + } + +- /* Can the task run on the task's current CPU? If so, we're done */ +- if (cpumask_test_cpu(task_cpu(p), new_mask)) +- goto out; ++ return affine_move_task(rq, p, &rf, dest_cpu, flags); + +- if (task_running(rq, p) || p->state == TASK_WAKING) { +- struct migration_arg arg = { p, dest_cpu }; +- /* Need help from migration thread: drop lock and wait. */ +- task_rq_unlock(rq, p, &rf); +- stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); +- return 0; +- } else if (task_on_rq_queued(p)) { +- /* +- * OK, since we're going to drop the lock immediately +- * afterwards anyway. +- */ +- rq = move_queued_task(rq, &rf, p, dest_cpu); +- } + out: + task_rq_unlock(rq, p, &rf); + +@@ -3230,6 +3403,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) + init_numa_balancing(clone_flags, p); + #ifdef CONFIG_SMP + p->wake_entry.u_flags = CSD_TYPE_TTWU; ++ p->migration_pending = NULL; + #endif + } + +-- +2.43.0 + |