1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
From 94b59ab31222fc252603987e1ee316264426a015 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 23 Oct 2020 12:12:02 +0200
Subject: [PATCH 005/323] sched/core: Wait for tasks being pushed away on
hotplug
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.10/older/patches-5.10.204-rt100.tar.xz
RT kernels need to ensure that all tasks which are not per CPU kthreads
have left the outgoing CPU to guarantee that no tasks are force migrated
within a migrate disabled section.
There is also some desire to (ab)use fine grained CPU hotplug control to
clear a CPU from active state to force migrate tasks which are not per CPU
kthreads away for power control purposes.
Add a mechanism which waits until all tasks which should leave the CPU
after the CPU active flag is cleared have moved to a different online CPU.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/sched/core.c | 40 +++++++++++++++++++++++++++++++++++++++-
kernel/sched/sched.h | 4 ++++
2 files changed, 43 insertions(+), 1 deletion(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 32c3acef5781..b902755615d7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6896,8 +6896,21 @@ static void balance_push(struct rq *rq)
* Both the cpu-hotplug and stop task are in this case and are
* required to complete the hotplug process.
*/
- if (is_per_cpu_kthread(push_task))
+ if (is_per_cpu_kthread(push_task)) {
+ /*
+ * If this is the idle task on the outgoing CPU try to wake
+ * up the hotplug control thread which might wait for the
+ * last task to vanish. The rcuwait_active() check is
+ * accurate here because the waiter is pinned on this CPU
+ * and can't obviously be running in parallel.
+ */
+ if (!rq->nr_running && rcuwait_active(&rq->hotplug_wait)) {
+ raw_spin_unlock(&rq->lock);
+ rcuwait_wake_up(&rq->hotplug_wait);
+ raw_spin_lock(&rq->lock);
+ }
return;
+ }
get_task_struct(push_task);
/*
@@ -6928,6 +6941,20 @@ static void balance_push_set(int cpu, bool on)
rq_unlock_irqrestore(rq, &rf);
}
+/*
+ * Invoked from a CPUs hotplug control thread after the CPU has been marked
+ * inactive. All tasks which are not per CPU kernel threads are either
+ * pushed off this CPU now via balance_push() or placed on a different CPU
+ * during wakeup. Wait until the CPU is quiescent.
+ */
+static void balance_hotplug_wait(void)
+{
+ struct rq *rq = this_rq();
+
+ rcuwait_wait_event(&rq->hotplug_wait, rq->nr_running == 1,
+ TASK_UNINTERRUPTIBLE);
+}
+
#else
static inline void balance_push(struct rq *rq)
@@ -6938,6 +6965,10 @@ static inline void balance_push_set(int cpu, bool on)
{
}
+static inline void balance_hotplug_wait(void)
+{
+}
+
#endif /* CONFIG_HOTPLUG_CPU */
void set_rq_online(struct rq *rq)
@@ -7094,6 +7125,10 @@ int sched_cpu_deactivate(unsigned int cpu)
return ret;
}
sched_domains_numa_masks_clear(cpu);
+
+ /* Wait for all non per CPU kernel threads to vanish. */
+ balance_hotplug_wait();
+
return 0;
}
@@ -7334,6 +7369,9 @@ void __init sched_init(void)
rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func);
#endif
+#ifdef CONFIG_HOTPLUG_CPU
+ rcuwait_init(&rq->hotplug_wait);
+#endif
#endif /* CONFIG_SMP */
hrtick_rq_init(rq);
atomic_set(&rq->nr_iowait, 0);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 81dc4212423a..a72464d370cd 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1007,6 +1007,10 @@ struct rq {
/* This is used to determine avg_idle's max value */
u64 max_idle_balance_cost;
+
+#ifdef CONFIG_HOTPLUG_CPU
+ struct rcuwait hotplug_wait;
+#endif
#endif /* CONFIG_SMP */
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
--
2.43.0
|