summaryrefslogtreecommitdiffstats
path: root/debian/patches-rt/0007-sched-hotplug-Consolidate-task-migration-on-CPU-unpl.patch
diff options
context:
space:
mode:
Diffstat (limited to 'debian/patches-rt/0007-sched-hotplug-Consolidate-task-migration-on-CPU-unpl.patch')
-rw-r--r--debian/patches-rt/0007-sched-hotplug-Consolidate-task-migration-on-CPU-unpl.patch283
1 files changed, 283 insertions, 0 deletions
diff --git a/debian/patches-rt/0007-sched-hotplug-Consolidate-task-migration-on-CPU-unpl.patch b/debian/patches-rt/0007-sched-hotplug-Consolidate-task-migration-on-CPU-unpl.patch
new file mode 100644
index 000000000..606e32fa1
--- /dev/null
+++ b/debian/patches-rt/0007-sched-hotplug-Consolidate-task-migration-on-CPU-unpl.patch
@@ -0,0 +1,283 @@
+From 61ebefbbf2500f6ade2182806061526bbde76a28 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 23 Oct 2020 12:12:04 +0200
+Subject: [PATCH 007/323] sched/hotplug: Consolidate task migration on CPU
+ unplug
+Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.10/older/patches-5.10.204-rt100.tar.xz
+
+With the new mechanism which kicks tasks off the outgoing CPU at the end of
+schedule() the situation on an outgoing CPU right before the stopper thread
+brings it down completely is:
+
+ - All user tasks and all unbound kernel threads have either been migrated
+ away or are not running and the next wakeup will move them to a online CPU.
+
+ - All per CPU kernel threads, except cpu hotplug thread and the stopper
+ thread have either been unbound or parked by the responsible CPU hotplug
+ callback.
+
+That means that at the last step before the stopper thread is invoked the
+cpu hotplug thread is the last legitimate running task on the outgoing
+CPU.
+
+Add a final wait step right before the stopper thread is kicked which
+ensures that any still running tasks on the way to park or on the way to
+kick themself of the CPU are either sleeping or gone.
+
+This allows to remove the migrate_tasks() crutch in sched_cpu_dying(). If
+sched_cpu_dying() detects that there is still another running task aside of
+the stopper thread then it will explode with the appropriate fireworks.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/cpuhotplug.h | 1 +
+ include/linux/sched/hotplug.h | 2 +
+ kernel/cpu.c | 9 +-
+ kernel/sched/core.c | 154 ++++++++--------------------------
+ 4 files changed, 46 insertions(+), 120 deletions(-)
+
+diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
+index f5a5df3a8cfd..daf39c04a552 100644
+--- a/include/linux/cpuhotplug.h
++++ b/include/linux/cpuhotplug.h
+@@ -155,6 +155,7 @@ enum cpuhp_state {
+ CPUHP_AP_ONLINE,
+ CPUHP_TEARDOWN_CPU,
+ CPUHP_AP_ONLINE_IDLE,
++ CPUHP_AP_SCHED_WAIT_EMPTY,
+ CPUHP_AP_SMPBOOT_THREADS,
+ CPUHP_AP_X86_VDSO_VMA_ONLINE,
+ CPUHP_AP_IRQ_AFFINITY_ONLINE,
+diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h
+index 9a62ffdd296f..412cdaba33eb 100644
+--- a/include/linux/sched/hotplug.h
++++ b/include/linux/sched/hotplug.h
+@@ -11,8 +11,10 @@ extern int sched_cpu_activate(unsigned int cpu);
+ extern int sched_cpu_deactivate(unsigned int cpu);
+
+ #ifdef CONFIG_HOTPLUG_CPU
++extern int sched_cpu_wait_empty(unsigned int cpu);
+ extern int sched_cpu_dying(unsigned int cpu);
+ #else
++# define sched_cpu_wait_empty NULL
+ # define sched_cpu_dying NULL
+ #endif
+
+diff --git a/kernel/cpu.c b/kernel/cpu.c
+index abf717c4f57c..f8280edb679f 100644
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -1674,7 +1674,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
+ .name = "ap:online",
+ },
+ /*
+- * Handled on controll processor until the plugged processor manages
++ * Handled on control processor until the plugged processor manages
+ * this itself.
+ */
+ [CPUHP_TEARDOWN_CPU] = {
+@@ -1683,6 +1683,13 @@ static struct cpuhp_step cpuhp_hp_states[] = {
+ .teardown.single = takedown_cpu,
+ .cant_stop = true,
+ },
++
++ [CPUHP_AP_SCHED_WAIT_EMPTY] = {
++ .name = "sched:waitempty",
++ .startup.single = NULL,
++ .teardown.single = sched_cpu_wait_empty,
++ },
++
+ /* Handle smpboot threads park/unpark */
+ [CPUHP_AP_SMPBOOT_THREADS] = {
+ .name = "smpboot/threads:online",
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index b902755615d7..a26a82c3e939 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -6741,120 +6741,6 @@ void idle_task_exit(void)
+ /* finish_cpu(), as ran on the BP, will clean up the active_mm state */
+ }
+
+-/*
+- * Since this CPU is going 'away' for a while, fold any nr_active delta
+- * we might have. Assumes we're called after migrate_tasks() so that the
+- * nr_active count is stable. We need to take the teardown thread which
+- * is calling this into account, so we hand in adjust = 1 to the load
+- * calculation.
+- *
+- * Also see the comment "Global load-average calculations".
+- */
+-static void calc_load_migrate(struct rq *rq)
+-{
+- long delta = calc_load_fold_active(rq, 1);
+- if (delta)
+- atomic_long_add(delta, &calc_load_tasks);
+-}
+-
+-static struct task_struct *__pick_migrate_task(struct rq *rq)
+-{
+- const struct sched_class *class;
+- struct task_struct *next;
+-
+- for_each_class(class) {
+- next = class->pick_next_task(rq);
+- if (next) {
+- next->sched_class->put_prev_task(rq, next);
+- return next;
+- }
+- }
+-
+- /* The idle class should always have a runnable task */
+- BUG();
+-}
+-
+-/*
+- * Migrate all tasks from the rq, sleeping tasks will be migrated by
+- * try_to_wake_up()->select_task_rq().
+- *
+- * Called with rq->lock held even though we'er in stop_machine() and
+- * there's no concurrency possible, we hold the required locks anyway
+- * because of lock validation efforts.
+- */
+-static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
+-{
+- struct rq *rq = dead_rq;
+- struct task_struct *next, *stop = rq->stop;
+- struct rq_flags orf = *rf;
+- int dest_cpu;
+-
+- /*
+- * Fudge the rq selection such that the below task selection loop
+- * doesn't get stuck on the currently eligible stop task.
+- *
+- * We're currently inside stop_machine() and the rq is either stuck
+- * in the stop_machine_cpu_stop() loop, or we're executing this code,
+- * either way we should never end up calling schedule() until we're
+- * done here.
+- */
+- rq->stop = NULL;
+-
+- /*
+- * put_prev_task() and pick_next_task() sched
+- * class method both need to have an up-to-date
+- * value of rq->clock[_task]
+- */
+- update_rq_clock(rq);
+-
+- for (;;) {
+- /*
+- * There's this thread running, bail when that's the only
+- * remaining thread:
+- */
+- if (rq->nr_running == 1)
+- break;
+-
+- next = __pick_migrate_task(rq);
+-
+- /*
+- * Rules for changing task_struct::cpus_mask are holding
+- * both pi_lock and rq->lock, such that holding either
+- * stabilizes the mask.
+- *
+- * Drop rq->lock is not quite as disastrous as it usually is
+- * because !cpu_active at this point, which means load-balance
+- * will not interfere. Also, stop-machine.
+- */
+- rq_unlock(rq, rf);
+- raw_spin_lock(&next->pi_lock);
+- rq_relock(rq, rf);
+-
+- /*
+- * Since we're inside stop-machine, _nothing_ should have
+- * changed the task, WARN if weird stuff happened, because in
+- * that case the above rq->lock drop is a fail too.
+- */
+- if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) {
+- raw_spin_unlock(&next->pi_lock);
+- continue;
+- }
+-
+- /* Find suitable destination for @next, with force if needed. */
+- dest_cpu = select_fallback_rq(dead_rq->cpu, next);
+- rq = __migrate_task(rq, rf, next, dest_cpu);
+- if (rq != dead_rq) {
+- rq_unlock(rq, rf);
+- rq = dead_rq;
+- *rf = orf;
+- rq_relock(rq, rf);
+- }
+- raw_spin_unlock(&next->pi_lock);
+- }
+-
+- rq->stop = stop;
+-}
+-
+ static int __balance_push_cpu_stop(void *arg)
+ {
+ struct task_struct *p = arg;
+@@ -7125,10 +7011,6 @@ int sched_cpu_deactivate(unsigned int cpu)
+ return ret;
+ }
+ sched_domains_numa_masks_clear(cpu);
+-
+- /* Wait for all non per CPU kernel threads to vanish. */
+- balance_hotplug_wait();
+-
+ return 0;
+ }
+
+@@ -7148,6 +7030,41 @@ int sched_cpu_starting(unsigned int cpu)
+ }
+
+ #ifdef CONFIG_HOTPLUG_CPU
++
++/*
++ * Invoked immediately before the stopper thread is invoked to bring the
++ * CPU down completely. At this point all per CPU kthreads except the
++ * hotplug thread (current) and the stopper thread (inactive) have been
++ * either parked or have been unbound from the outgoing CPU. Ensure that
++ * any of those which might be on the way out are gone.
++ *
++ * If after this point a bound task is being woken on this CPU then the
++ * responsible hotplug callback has failed to do it's job.
++ * sched_cpu_dying() will catch it with the appropriate fireworks.
++ */
++int sched_cpu_wait_empty(unsigned int cpu)
++{
++ balance_hotplug_wait();
++ return 0;
++}
++
++/*
++ * Since this CPU is going 'away' for a while, fold any nr_active delta we
++ * might have. Called from the CPU stopper task after ensuring that the
++ * stopper is the last running task on the CPU, so nr_active count is
++ * stable. We need to take the teardown thread which is calling this into
++ * account, so we hand in adjust = 1 to the load calculation.
++ *
++ * Also see the comment "Global load-average calculations".
++ */
++static void calc_load_migrate(struct rq *rq)
++{
++ long delta = calc_load_fold_active(rq, 1);
++
++ if (delta)
++ atomic_long_add(delta, &calc_load_tasks);
++}
++
+ int sched_cpu_dying(unsigned int cpu)
+ {
+ struct rq *rq = cpu_rq(cpu);
+@@ -7161,7 +7078,6 @@ int sched_cpu_dying(unsigned int cpu)
+ BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
+ set_rq_offline(rq);
+ }
+- migrate_tasks(rq, &rf);
+ BUG_ON(rq->nr_running != 1);
+ rq_unlock_irqrestore(rq, &rf);
+
+--
+2.43.0
+