1 files changed, 283 insertions, 0 deletions
diff --git a/debian/patches-rt/0007-sched-hotplug-Consolidate-task-migration-on-CPU-unpl.patch b/debian/patches-rt/0007-sched-hotplug-Consolidate-task-migration-on-CPU-unpl.patch
new file mode 100644
index 000000000..606e32fa1
--- /dev/null
+++ b/debian/patches-rt/0007-sched-hotplug-Consolidate-task-migration-on-CPU-unpl.patch
@@ -0,0 +1,283 @@
+From 61ebefbbf2500f6ade2182806061526bbde76a28 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 23 Oct 2020 12:12:04 +0200
+Subject: [PATCH 007/323] sched/hotplug: Consolidate task migration on CPU
+ unplug
+Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.10/older/patches-5.10.204-rt100.tar.xz
+
+With the new mechanism which kicks tasks off the outgoing CPU at the end of
+schedule() the situation on an outgoing CPU right before the stopper thread
+brings it down completely is:
+
+ - All user tasks and all unbound kernel threads have either been migrated
+   away or are not running and the next wakeup will move them to a online CPU.
+
+ - All per CPU kernel threads, except cpu hotplug thread and the stopper
+   thread have either been unbound or parked by the responsible CPU hotplug
+   callback.
+
+That means that at the last step before the stopper thread is invoked the
+cpu hotplug thread is the last legitimate running task on the outgoing
+CPU.
+
+Add a final wait step right before the stopper thread is kicked which
+ensures that any still running tasks on the way to park or on the way to
+kick themself of the CPU are either sleeping or gone.
+
+This allows to remove the migrate_tasks() crutch in sched_cpu_dying(). If
+sched_cpu_dying() detects that there is still another running task aside of
+the stopper thread then it will explode with the appropriate fireworks.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/cpuhotplug.h    |   1 +
+ include/linux/sched/hotplug.h |   2 +
+ kernel/cpu.c                  |   9 +-
+ kernel/sched/core.c           | 154 ++++++++--------------------------
+ 4 files changed, 46 insertions(+), 120 deletions(-)
+
+diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
+index f5a5df3a8cfd..daf39c04a552 100644
+--- a/include/linux/cpuhotplug.h
++++ b/include/linux/cpuhotplug.h
+@@ -155,6 +155,7 @@ enum cpuhp_state {
+ 	CPUHP_AP_ONLINE,
+ 	CPUHP_TEARDOWN_CPU,
+ 	CPUHP_AP_ONLINE_IDLE,
++	CPUHP_AP_SCHED_WAIT_EMPTY,
+ 	CPUHP_AP_SMPBOOT_THREADS,
+ 	CPUHP_AP_X86_VDSO_VMA_ONLINE,
+ 	CPUHP_AP_IRQ_AFFINITY_ONLINE,
+diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h
+index 9a62ffdd296f..412cdaba33eb 100644
+--- a/include/linux/sched/hotplug.h
++++ b/include/linux/sched/hotplug.h
+@@ -11,8 +11,10 @@ extern int sched_cpu_activate(unsigned int cpu);
+ extern int sched_cpu_deactivate(unsigned int cpu);
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
++extern int sched_cpu_wait_empty(unsigned int cpu);
+ extern int sched_cpu_dying(unsigned int cpu);
+ #else
++# define sched_cpu_wait_empty	NULL
+ # define sched_cpu_dying	NULL
+ #endif
+ 
+diff --git a/kernel/cpu.c b/kernel/cpu.c
+index abf717c4f57c..f8280edb679f 100644
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -1674,7 +1674,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
+ 		.name			= "ap:online",
+ 	},
+ 	/*
+-	 * Handled on controll processor until the plugged processor manages
++	 * Handled on control processor until the plugged processor manages
+ 	 * this itself.
+ 	 */
+ 	[CPUHP_TEARDOWN_CPU] = {
+@@ -1683,6 +1683,13 @@ static struct cpuhp_step cpuhp_hp_states[] = {
+ 		.teardown.single	= takedown_cpu,
+ 		.cant_stop		= true,
+ 	},
++
++	[CPUHP_AP_SCHED_WAIT_EMPTY] = {
++		.name			= "sched:waitempty",
++		.startup.single		= NULL,
++		.teardown.single	= sched_cpu_wait_empty,
++	},
++
+ 	/* Handle smpboot threads park/unpark */
+ 	[CPUHP_AP_SMPBOOT_THREADS] = {
+ 		.name			= "smpboot/threads:online",
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index b902755615d7..a26a82c3e939 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -6741,120 +6741,6 @@ void idle_task_exit(void)
+ 	/* finish_cpu(), as ran on the BP, will clean up the active_mm state */
+ }
+ 
+-/*
+- * Since this CPU is going 'away' for a while, fold any nr_active delta
+- * we might have. Assumes we're called after migrate_tasks() so that the
+- * nr_active count is stable. We need to take the teardown thread which
+- * is calling this into account, so we hand in adjust = 1 to the load
+- * calculation.
+- *
+- * Also see the comment "Global load-average calculations".
+- */
+-static void calc_load_migrate(struct rq *rq)
+-{
+-	long delta = calc_load_fold_active(rq, 1);
+-	if (delta)
+-		atomic_long_add(delta, &calc_load_tasks);
+-}
+-
+-static struct task_struct *__pick_migrate_task(struct rq *rq)
+-{
+-	const struct sched_class *class;
+-	struct task_struct *next;
+-
+-	for_each_class(class) {
+-		next = class->pick_next_task(rq);
+-		if (next) {
+-			next->sched_class->put_prev_task(rq, next);
+-			return next;
+-		}
+-	}
+-
+-	/* The idle class should always have a runnable task */
+-	BUG();
+-}
+-
+-/*
+- * Migrate all tasks from the rq, sleeping tasks will be migrated by
+- * try_to_wake_up()->select_task_rq().
+- *
+- * Called with rq->lock held even though we'er in stop_machine() and
+- * there's no concurrency possible, we hold the required locks anyway
+- * because of lock validation efforts.
+- */
+-static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
+-{
+-	struct rq *rq = dead_rq;
+-	struct task_struct *next, *stop = rq->stop;
+-	struct rq_flags orf = *rf;
+-	int dest_cpu;
+-
+-	/*
+-	 * Fudge the rq selection such that the below task selection loop
+-	 * doesn't get stuck on the currently eligible stop task.
+-	 *
+-	 * We're currently inside stop_machine() and the rq is either stuck
+-	 * in the stop_machine_cpu_stop() loop, or we're executing this code,
+-	 * either way we should never end up calling schedule() until we're
+-	 * done here.
+-	 */
+-	rq->stop = NULL;
+-
+-	/*
+-	 * put_prev_task() and pick_next_task() sched
+-	 * class method both need to have an up-to-date
+-	 * value of rq->clock[_task]
+-	 */
+-	update_rq_clock(rq);
+-
+-	for (;;) {
+-		/*
+-		 * There's this thread running, bail when that's the only
+-		 * remaining thread:
+-		 */
+-		if (rq->nr_running == 1)
+-			break;
+-
+-		next = __pick_migrate_task(rq);
+-
+-		/*
+-		 * Rules for changing task_struct::cpus_mask are holding
+-		 * both pi_lock and rq->lock, such that holding either
+-		 * stabilizes the mask.
+-		 *
+-		 * Drop rq->lock is not quite as disastrous as it usually is
+-		 * because !cpu_active at this point, which means load-balance
+-		 * will not interfere. Also, stop-machine.
+-		 */
+-		rq_unlock(rq, rf);
+-		raw_spin_lock(&next->pi_lock);
+-		rq_relock(rq, rf);
+-
+-		/*
+-		 * Since we're inside stop-machine, _nothing_ should have
+-		 * changed the task, WARN if weird stuff happened, because in
+-		 * that case the above rq->lock drop is a fail too.
+-		 */
+-		if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) {
+-			raw_spin_unlock(&next->pi_lock);
+-			continue;
+-		}
+-
+-		/* Find suitable destination for @next, with force if needed. */
+-		dest_cpu = select_fallback_rq(dead_rq->cpu, next);
+-		rq = __migrate_task(rq, rf, next, dest_cpu);
+-		if (rq != dead_rq) {
+-			rq_unlock(rq, rf);
+-			rq = dead_rq;
+-			*rf = orf;
+-			rq_relock(rq, rf);
+-		}
+-		raw_spin_unlock(&next->pi_lock);
+-	}
+-
+-	rq->stop = stop;
+-}
+-
+ static int __balance_push_cpu_stop(void *arg)
+ {
+ 	struct task_struct *p = arg;
+@@ -7125,10 +7011,6 @@ int sched_cpu_deactivate(unsigned int cpu)
+ 		return ret;
+ 	}
+ 	sched_domains_numa_masks_clear(cpu);
+-
+-	/* Wait for all non per CPU kernel threads to vanish. */
+-	balance_hotplug_wait();
+-
+ 	return 0;
+ }
+ 
+@@ -7148,6 +7030,41 @@ int sched_cpu_starting(unsigned int cpu)
+ }
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
++
++/*
++ * Invoked immediately before the stopper thread is invoked to bring the
++ * CPU down completely. At this point all per CPU kthreads except the
++ * hotplug thread (current) and the stopper thread (inactive) have been
++ * either parked or have been unbound from the outgoing CPU. Ensure that
++ * any of those which might be on the way out are gone.
++ *
++ * If after this point a bound task is being woken on this CPU then the
++ * responsible hotplug callback has failed to do it's job.
++ * sched_cpu_dying() will catch it with the appropriate fireworks.
++ */
++int sched_cpu_wait_empty(unsigned int cpu)
++{
++	balance_hotplug_wait();
++	return 0;
++}
++
++/*
++ * Since this CPU is going 'away' for a while, fold any nr_active delta we
++ * might have. Called from the CPU stopper task after ensuring that the
++ * stopper is the last running task on the CPU, so nr_active count is
++ * stable. We need to take the teardown thread which is calling this into
++ * account, so we hand in adjust = 1 to the load calculation.
++ *
++ * Also see the comment "Global load-average calculations".
++ */
++static void calc_load_migrate(struct rq *rq)
++{
++	long delta = calc_load_fold_active(rq, 1);
++
++	if (delta)
++		atomic_long_add(delta, &calc_load_tasks);
++}
++
+ int sched_cpu_dying(unsigned int cpu)
+ {
+ 	struct rq *rq = cpu_rq(cpu);
+@@ -7161,7 +7078,6 @@ int sched_cpu_dying(unsigned int cpu)
+ 		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
+ 		set_rq_offline(rq);
+ 	}
+-	migrate_tasks(rq, &rf);
+ 	BUG_ON(rq->nr_running != 1);
+ 	rq_unlock_irqrestore(rq, &rf);
+ 
+-- 
+2.43.0
+