1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
|
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 20 Jan 2016 16:34:17 +0100
Subject: [PATCH 124/342] softirq: split timer softirqs out of ksoftirqd
Origin: https://git.kernel.org/cgit/linux/kernel/git/rt/linux-stable-rt.git/commit?id=df12f0ccfce0f4ac8542c56f719e8f2f71a65f2a
The softirqd runs in -RT with SCHED_FIFO (prio 1) and deals mostly with
timer wakeup which can not happen in hardirq context. The prio has been
risen from the normal SCHED_OTHER so the timer wakeup does not happen
too late.
With enough networking load it is possible that the system never goes
idle and schedules ksoftirqd and everything else with a higher priority.
One of the tasks left behind is one of RCU's threads and so we see stalls
and eventually run out of memory.
This patch moves the TIMER and HRTIMER softirqs out of the `ksoftirqd`
thread into its own `ktimersoftd`. The former can now run SCHED_OTHER
(same as mainline) and the latter at SCHED_FIFO due to the wakeups.
From networking point of view: The NAPI callback runs after the network
interrupt thread completes. If its run time takes too long the NAPI code
itself schedules the `ksoftirqd`. Here in the thread it can run at
SCHED_OTHER priority and it won't defer RCU anymore.
Cc: stable-rt@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/softirq.c | 85 +++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 73 insertions(+), 12 deletions(-)
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 3e9333d148ad..fe4e59c80a08 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -59,6 +59,10 @@ EXPORT_PER_CPU_SYMBOL(irq_stat);
static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
+#ifdef CONFIG_PREEMPT_RT_FULL
+#define TIMER_SOFTIRQS ((1 << TIMER_SOFTIRQ) | (1 << HRTIMER_SOFTIRQ))
+DEFINE_PER_CPU(struct task_struct *, ktimer_softirqd);
+#endif
const char * const softirq_to_name[NR_SOFTIRQS] = {
"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL",
@@ -172,6 +176,17 @@ static void wakeup_softirqd(void)
wake_up_process(tsk);
}
+#ifdef CONFIG_PREEMPT_RT_FULL
+static void wakeup_timer_softirqd(void)
+{
+ /* Interrupts are disabled: no need to stop preemption */
+ struct task_struct *tsk = __this_cpu_read(ktimer_softirqd);
+
+ if (tsk && tsk->state != TASK_RUNNING)
+ wake_up_process(tsk);
+}
+#endif
+
static void handle_softirq(unsigned int vec_nr)
{
struct softirq_action *h = softirq_vec + vec_nr;
@@ -493,7 +508,6 @@ void __raise_softirq_irqoff(unsigned int nr)
static inline void local_bh_disable_nort(void) { local_bh_disable(); }
static inline void _local_bh_enable_nort(void) { _local_bh_enable(); }
static void ksoftirqd_set_sched_params(unsigned int cpu) { }
-static void ksoftirqd_clr_sched_params(unsigned int cpu, bool online) { }
#else /* !PREEMPT_RT_FULL */
@@ -640,8 +654,12 @@ void thread_do_softirq(void)
static void do_raise_softirq_irqoff(unsigned int nr)
{
+ unsigned int mask;
+
+ mask = 1UL << nr;
+
trace_softirq_raise(nr);
- or_softirq_pending(1UL << nr);
+ or_softirq_pending(mask);
/*
* If we are not in a hard interrupt and inside a bh disabled
@@ -650,16 +668,29 @@ static void do_raise_softirq_irqoff(unsigned int nr)
* delegate it to ksoftirqd.
*/
if (!in_irq() && current->softirq_nestcnt)
- current->softirqs_raised |= (1U << nr);
- else if (__this_cpu_read(ksoftirqd))
- __this_cpu_read(ksoftirqd)->softirqs_raised |= (1U << nr);
+ current->softirqs_raised |= mask;
+ else if (!__this_cpu_read(ksoftirqd) || !__this_cpu_read(ktimer_softirqd))
+ return;
+
+ if (mask & TIMER_SOFTIRQS)
+ __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask;
+ else
+ __this_cpu_read(ksoftirqd)->softirqs_raised |= mask;
+}
+
+static void wakeup_proper_softirq(unsigned int nr)
+{
+ if ((1UL << nr) & TIMER_SOFTIRQS)
+ wakeup_timer_softirqd();
+ else
+ wakeup_softirqd();
}
void __raise_softirq_irqoff(unsigned int nr)
{
do_raise_softirq_irqoff(nr);
if (!in_irq() && !current->softirq_nestcnt)
- wakeup_softirqd();
+ wakeup_proper_softirq(nr);
}
/*
@@ -685,7 +716,7 @@ void raise_softirq_irqoff(unsigned int nr)
* raise a WARN() if the condition is met.
*/
if (!current->softirq_nestcnt)
- wakeup_softirqd();
+ wakeup_proper_softirq(nr);
}
static inline int ksoftirqd_softirq_pending(void)
@@ -697,23 +728,38 @@ static inline void local_bh_disable_nort(void) { }
static inline void _local_bh_enable_nort(void) { }
static inline void ksoftirqd_set_sched_params(unsigned int cpu)
+{
+ /* Take over all but timer pending softirqs when starting */
+ local_irq_disable();
+ current->softirqs_raised = local_softirq_pending() & ~TIMER_SOFTIRQS;
+ local_irq_enable();
+}
+
+static inline void ktimer_softirqd_set_sched_params(unsigned int cpu)
{
struct sched_param param = { .sched_priority = 1 };
sched_setscheduler(current, SCHED_FIFO, ¶m);
- /* Take over all pending softirqs when starting */
+
+ /* Take over timer pending softirqs when starting */
local_irq_disable();
- current->softirqs_raised = local_softirq_pending();
+ current->softirqs_raised = local_softirq_pending() & TIMER_SOFTIRQS;
local_irq_enable();
}
-static inline void ksoftirqd_clr_sched_params(unsigned int cpu, bool online)
+static inline void ktimer_softirqd_clr_sched_params(unsigned int cpu,
+ bool online)
{
struct sched_param param = { .sched_priority = 0 };
sched_setscheduler(current, SCHED_NORMAL, ¶m);
}
+static int ktimer_softirqd_should_run(unsigned int cpu)
+{
+ return current->softirqs_raised;
+}
+
#endif /* PREEMPT_RT_FULL */
/*
* Enter an interrupt context.
@@ -766,6 +812,9 @@ static inline void invoke_softirq(void)
if (__this_cpu_read(ksoftirqd) &&
__this_cpu_read(ksoftirqd)->softirqs_raised)
wakeup_softirqd();
+ if (__this_cpu_read(ktimer_softirqd) &&
+ __this_cpu_read(ktimer_softirqd)->softirqs_raised)
+ wakeup_timer_softirqd();
local_irq_restore(flags);
#endif
}
@@ -1153,18 +1202,30 @@ static int takeover_tasklets(unsigned int cpu)
static struct smp_hotplug_thread softirq_threads = {
.store = &ksoftirqd,
.setup = ksoftirqd_set_sched_params,
- .cleanup = ksoftirqd_clr_sched_params,
.thread_should_run = ksoftirqd_should_run,
.thread_fn = run_ksoftirqd,
.thread_comm = "ksoftirqd/%u",
};
+#ifdef CONFIG_PREEMPT_RT_FULL
+static struct smp_hotplug_thread softirq_timer_threads = {
+ .store = &ktimer_softirqd,
+ .setup = ktimer_softirqd_set_sched_params,
+ .cleanup = ktimer_softirqd_clr_sched_params,
+ .thread_should_run = ktimer_softirqd_should_run,
+ .thread_fn = run_ksoftirqd,
+ .thread_comm = "ktimersoftd/%u",
+};
+#endif
+
static __init int spawn_ksoftirqd(void)
{
cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
takeover_tasklets);
BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
-
+#ifdef CONFIG_PREEMPT_RT_FULL
+ BUG_ON(smpboot_register_percpu_thread(&softirq_timer_threads));
+#endif
return 0;
}
early_initcall(spawn_ksoftirqd);
|