diff options
Diffstat (limited to '')
-rw-r--r-- | debian/patches-rt/0007-sched-hotplug-Consolidate-task-migration-on-CPU-unpl.patch | 283 |
1 files changed, 283 insertions, 0 deletions
diff --git a/debian/patches-rt/0007-sched-hotplug-Consolidate-task-migration-on-CPU-unpl.patch b/debian/patches-rt/0007-sched-hotplug-Consolidate-task-migration-on-CPU-unpl.patch new file mode 100644 index 000000000..606e32fa1 --- /dev/null +++ b/debian/patches-rt/0007-sched-hotplug-Consolidate-task-migration-on-CPU-unpl.patch @@ -0,0 +1,283 @@ +From 61ebefbbf2500f6ade2182806061526bbde76a28 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Fri, 23 Oct 2020 12:12:04 +0200 +Subject: [PATCH 007/323] sched/hotplug: Consolidate task migration on CPU + unplug +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.10/older/patches-5.10.204-rt100.tar.xz + +With the new mechanism which kicks tasks off the outgoing CPU at the end of +schedule() the situation on an outgoing CPU right before the stopper thread +brings it down completely is: + + - All user tasks and all unbound kernel threads have either been migrated + away or are not running and the next wakeup will move them to a online CPU. + + - All per CPU kernel threads, except cpu hotplug thread and the stopper + thread have either been unbound or parked by the responsible CPU hotplug + callback. + +That means that at the last step before the stopper thread is invoked the +cpu hotplug thread is the last legitimate running task on the outgoing +CPU. + +Add a final wait step right before the stopper thread is kicked which +ensures that any still running tasks on the way to park or on the way to +kick themself of the CPU are either sleeping or gone. + +This allows to remove the migrate_tasks() crutch in sched_cpu_dying(). If +sched_cpu_dying() detects that there is still another running task aside of +the stopper thread then it will explode with the appropriate fireworks. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + include/linux/cpuhotplug.h | 1 + + include/linux/sched/hotplug.h | 2 + + kernel/cpu.c | 9 +- + kernel/sched/core.c | 154 ++++++++-------------------------- + 4 files changed, 46 insertions(+), 120 deletions(-) + +diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h +index f5a5df3a8cfd..daf39c04a552 100644 +--- a/include/linux/cpuhotplug.h ++++ b/include/linux/cpuhotplug.h +@@ -155,6 +155,7 @@ enum cpuhp_state { + CPUHP_AP_ONLINE, + CPUHP_TEARDOWN_CPU, + CPUHP_AP_ONLINE_IDLE, ++ CPUHP_AP_SCHED_WAIT_EMPTY, + CPUHP_AP_SMPBOOT_THREADS, + CPUHP_AP_X86_VDSO_VMA_ONLINE, + CPUHP_AP_IRQ_AFFINITY_ONLINE, +diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h +index 9a62ffdd296f..412cdaba33eb 100644 +--- a/include/linux/sched/hotplug.h ++++ b/include/linux/sched/hotplug.h +@@ -11,8 +11,10 @@ extern int sched_cpu_activate(unsigned int cpu); + extern int sched_cpu_deactivate(unsigned int cpu); + + #ifdef CONFIG_HOTPLUG_CPU ++extern int sched_cpu_wait_empty(unsigned int cpu); + extern int sched_cpu_dying(unsigned int cpu); + #else ++# define sched_cpu_wait_empty NULL + # define sched_cpu_dying NULL + #endif + +diff --git a/kernel/cpu.c b/kernel/cpu.c +index abf717c4f57c..f8280edb679f 100644 +--- a/kernel/cpu.c ++++ b/kernel/cpu.c +@@ -1674,7 +1674,7 @@ static struct cpuhp_step cpuhp_hp_states[] = { + .name = "ap:online", + }, + /* +- * Handled on controll processor until the plugged processor manages ++ * Handled on control processor until the plugged processor manages + * this itself. + */ + [CPUHP_TEARDOWN_CPU] = { +@@ -1683,6 +1683,13 @@ static struct cpuhp_step cpuhp_hp_states[] = { + .teardown.single = takedown_cpu, + .cant_stop = true, + }, ++ ++ [CPUHP_AP_SCHED_WAIT_EMPTY] = { ++ .name = "sched:waitempty", ++ .startup.single = NULL, ++ .teardown.single = sched_cpu_wait_empty, ++ }, ++ + /* Handle smpboot threads park/unpark */ + [CPUHP_AP_SMPBOOT_THREADS] = { + .name = "smpboot/threads:online", +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index b902755615d7..a26a82c3e939 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -6741,120 +6741,6 @@ void idle_task_exit(void) + /* finish_cpu(), as ran on the BP, will clean up the active_mm state */ + } + +-/* +- * Since this CPU is going 'away' for a while, fold any nr_active delta +- * we might have. Assumes we're called after migrate_tasks() so that the +- * nr_active count is stable. We need to take the teardown thread which +- * is calling this into account, so we hand in adjust = 1 to the load +- * calculation. +- * +- * Also see the comment "Global load-average calculations". +- */ +-static void calc_load_migrate(struct rq *rq) +-{ +- long delta = calc_load_fold_active(rq, 1); +- if (delta) +- atomic_long_add(delta, &calc_load_tasks); +-} +- +-static struct task_struct *__pick_migrate_task(struct rq *rq) +-{ +- const struct sched_class *class; +- struct task_struct *next; +- +- for_each_class(class) { +- next = class->pick_next_task(rq); +- if (next) { +- next->sched_class->put_prev_task(rq, next); +- return next; +- } +- } +- +- /* The idle class should always have a runnable task */ +- BUG(); +-} +- +-/* +- * Migrate all tasks from the rq, sleeping tasks will be migrated by +- * try_to_wake_up()->select_task_rq(). +- * +- * Called with rq->lock held even though we'er in stop_machine() and +- * there's no concurrency possible, we hold the required locks anyway +- * because of lock validation efforts. +- */ +-static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf) +-{ +- struct rq *rq = dead_rq; +- struct task_struct *next, *stop = rq->stop; +- struct rq_flags orf = *rf; +- int dest_cpu; +- +- /* +- * Fudge the rq selection such that the below task selection loop +- * doesn't get stuck on the currently eligible stop task. +- * +- * We're currently inside stop_machine() and the rq is either stuck +- * in the stop_machine_cpu_stop() loop, or we're executing this code, +- * either way we should never end up calling schedule() until we're +- * done here. +- */ +- rq->stop = NULL; +- +- /* +- * put_prev_task() and pick_next_task() sched +- * class method both need to have an up-to-date +- * value of rq->clock[_task] +- */ +- update_rq_clock(rq); +- +- for (;;) { +- /* +- * There's this thread running, bail when that's the only +- * remaining thread: +- */ +- if (rq->nr_running == 1) +- break; +- +- next = __pick_migrate_task(rq); +- +- /* +- * Rules for changing task_struct::cpus_mask are holding +- * both pi_lock and rq->lock, such that holding either +- * stabilizes the mask. +- * +- * Drop rq->lock is not quite as disastrous as it usually is +- * because !cpu_active at this point, which means load-balance +- * will not interfere. Also, stop-machine. +- */ +- rq_unlock(rq, rf); +- raw_spin_lock(&next->pi_lock); +- rq_relock(rq, rf); +- +- /* +- * Since we're inside stop-machine, _nothing_ should have +- * changed the task, WARN if weird stuff happened, because in +- * that case the above rq->lock drop is a fail too. +- */ +- if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) { +- raw_spin_unlock(&next->pi_lock); +- continue; +- } +- +- /* Find suitable destination for @next, with force if needed. */ +- dest_cpu = select_fallback_rq(dead_rq->cpu, next); +- rq = __migrate_task(rq, rf, next, dest_cpu); +- if (rq != dead_rq) { +- rq_unlock(rq, rf); +- rq = dead_rq; +- *rf = orf; +- rq_relock(rq, rf); +- } +- raw_spin_unlock(&next->pi_lock); +- } +- +- rq->stop = stop; +-} +- + static int __balance_push_cpu_stop(void *arg) + { + struct task_struct *p = arg; +@@ -7125,10 +7011,6 @@ int sched_cpu_deactivate(unsigned int cpu) + return ret; + } + sched_domains_numa_masks_clear(cpu); +- +- /* Wait for all non per CPU kernel threads to vanish. */ +- balance_hotplug_wait(); +- + return 0; + } + +@@ -7148,6 +7030,41 @@ int sched_cpu_starting(unsigned int cpu) + } + + #ifdef CONFIG_HOTPLUG_CPU ++ ++/* ++ * Invoked immediately before the stopper thread is invoked to bring the ++ * CPU down completely. At this point all per CPU kthreads except the ++ * hotplug thread (current) and the stopper thread (inactive) have been ++ * either parked or have been unbound from the outgoing CPU. Ensure that ++ * any of those which might be on the way out are gone. ++ * ++ * If after this point a bound task is being woken on this CPU then the ++ * responsible hotplug callback has failed to do it's job. ++ * sched_cpu_dying() will catch it with the appropriate fireworks. ++ */ ++int sched_cpu_wait_empty(unsigned int cpu) ++{ ++ balance_hotplug_wait(); ++ return 0; ++} ++ ++/* ++ * Since this CPU is going 'away' for a while, fold any nr_active delta we ++ * might have. Called from the CPU stopper task after ensuring that the ++ * stopper is the last running task on the CPU, so nr_active count is ++ * stable. We need to take the teardown thread which is calling this into ++ * account, so we hand in adjust = 1 to the load calculation. ++ * ++ * Also see the comment "Global load-average calculations". ++ */ ++static void calc_load_migrate(struct rq *rq) ++{ ++ long delta = calc_load_fold_active(rq, 1); ++ ++ if (delta) ++ atomic_long_add(delta, &calc_load_tasks); ++} ++ + int sched_cpu_dying(unsigned int cpu) + { + struct rq *rq = cpu_rq(cpu); +@@ -7161,7 +7078,6 @@ int sched_cpu_dying(unsigned int cpu) + BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); + set_rq_offline(rq); + } +- migrate_tasks(rq, &rf); + BUG_ON(rq->nr_running != 1); + rq_unlock_irqrestore(rq, &rf); + +-- +2.43.0 + |