1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
|
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 1 Dec 2021 17:41:09 +0100
Subject: [PATCH] softirq: Use a dedicated thread for timer wakeups.
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.6/older/patches-6.6.7-rt18.tar.xz
A timer/hrtimer softirq is raised in-IRQ context. With threaded
interrupts enabled or on PREEMPT_RT this leads to waking the ksoftirqd
for the processing of the softirq.
Once the ksoftirqd is marked as pending (or is running) it will collect
all raised softirqs. This in turn means that a softirq which would have
been processed at the end of the threaded interrupt, which runs at an
elevated priority, is now moved to ksoftirqd which runs at SCHED_OTHER
priority and competes with every regular task for CPU resources.
This introduces long delays on heavy loaded systems and is not desired
especially if the system is not overloaded by the softirqs.
Split the TIMER_SOFTIRQ and HRTIMER_SOFTIRQ processing into a dedicated
timers thread and let it run at the lowest SCHED_FIFO priority.
RT tasks are are woken up from hardirq context so only timer_list timers
and hrtimers for "regular" tasks are processed here. The higher priority
ensures that wakeups are performed before scheduling SCHED_OTHER tasks.
Using a dedicated variable to store the pending softirq bits values
ensure that the timer are not accidentally picked up by ksoftirqd and
other threaded interrupts.
It shouldn't be picked up by ksoftirqd since it runs at lower priority.
However if the timer bits are ORed while a threaded interrupt is
running, then the timer softirq would be performed at higher priority.
The new timer thread will block on the softirq lock before it starts
softirq work. This "race window" isn't closed because while timer thread
is performing the softirq it can get PI-boosted via the softirq lock by
a random force-threaded thread.
The timer thread can pick up pending softirqs from ksoftirqd but only
if the softirq load is high. It is not be desired that the picked up
softirqs are processed at SCHED_FIFO priority under high softirq load
but this can already happen by a PI-boost by a force-threaded interrupt.
Reported-by: kernel test robot <lkp@intel.com> [ static timer_threads ]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/interrupt.h | 16 ++++++++
kernel/softirq.c | 92 ++++++++++++++++++++++++++++++++++++++++++++--
kernel/time/hrtimer.c | 4 +-
kernel/time/timer.c | 2 -
4 files changed, 108 insertions(+), 6 deletions(-)
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -609,6 +609,22 @@ extern void __raise_softirq_irqoff(unsig
extern void raise_softirq_irqoff(unsigned int nr);
extern void raise_softirq(unsigned int nr);
+#ifdef CONFIG_PREEMPT_RT
+extern void raise_timer_softirq(void);
+extern void raise_hrtimer_softirq(void);
+
+#else
+static inline void raise_timer_softirq(void)
+{
+ raise_softirq(TIMER_SOFTIRQ);
+}
+
+static inline void raise_hrtimer_softirq(void)
+{
+ raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+}
+#endif
+
DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
static inline struct task_struct *this_cpu_ksoftirqd(void)
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -619,6 +619,29 @@ static inline void tick_irq_exit(void)
#endif
}
+#ifdef CONFIG_PREEMPT_RT
+static DEFINE_PER_CPU(struct task_struct *, timersd);
+static DEFINE_PER_CPU(unsigned long, pending_timer_softirq);
+
+static unsigned int local_pending_timers(void)
+{
+ return __this_cpu_read(pending_timer_softirq);
+}
+
+static void wake_timersd(void)
+{
+ struct task_struct *tsk = __this_cpu_read(timersd);
+
+ if (tsk)
+ wake_up_process(tsk);
+}
+
+#else
+
+static inline void wake_timersd(void) { }
+
+#endif
+
static inline void __irq_exit_rcu(void)
{
#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
@@ -628,8 +651,13 @@ static inline void __irq_exit_rcu(void)
#endif
account_hardirq_exit(current);
preempt_count_sub(HARDIRQ_OFFSET);
- if (!in_interrupt() && local_softirq_pending())
- invoke_softirq();
+ if (!in_interrupt()) {
+ if (local_softirq_pending())
+ invoke_softirq();
+
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && local_pending_timers())
+ wake_timersd();
+ }
tick_irq_exit();
}
@@ -963,12 +991,70 @@ static struct smp_hotplug_thread softirq
.thread_comm = "ksoftirqd/%u",
};
+#ifdef CONFIG_PREEMPT_RT
+static void timersd_setup(unsigned int cpu)
+{
+ sched_set_fifo_low(current);
+}
+
+static int timersd_should_run(unsigned int cpu)
+{
+ return local_pending_timers();
+}
+
+static void run_timersd(unsigned int cpu)
+{
+ unsigned int timer_si;
+
+ ksoftirqd_run_begin();
+
+ timer_si = local_pending_timers();
+ __this_cpu_write(pending_timer_softirq, 0);
+ or_softirq_pending(timer_si);
+
+ __do_softirq();
+
+ ksoftirqd_run_end();
+}
+
+static void raise_ktimers_thread(unsigned int nr)
+{
+ trace_softirq_raise(nr);
+ __this_cpu_or(pending_timer_softirq, 1 << nr);
+}
+
+void raise_hrtimer_softirq(void)
+{
+ raise_ktimers_thread(HRTIMER_SOFTIRQ);
+}
+
+void raise_timer_softirq(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ raise_ktimers_thread(TIMER_SOFTIRQ);
+ wake_timersd();
+ local_irq_restore(flags);
+}
+
+static struct smp_hotplug_thread timer_threads = {
+ .store = &timersd,
+ .setup = timersd_setup,
+ .thread_should_run = timersd_should_run,
+ .thread_fn = run_timersd,
+ .thread_comm = "ktimers/%u",
+};
+#endif
+
static __init int spawn_ksoftirqd(void)
{
cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
takeover_tasklets);
BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
-
+#ifdef CONFIG_PREEMPT_RT
+ BUG_ON(smpboot_register_percpu_thread(&timer_threads));
+#endif
return 0;
}
early_initcall(spawn_ksoftirqd);
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1808,7 +1808,7 @@ void hrtimer_interrupt(struct clock_even
if (!ktime_before(now, cpu_base->softirq_expires_next)) {
cpu_base->softirq_expires_next = KTIME_MAX;
cpu_base->softirq_activated = 1;
- raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+ raise_hrtimer_softirq();
}
__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
@@ -1921,7 +1921,7 @@ void hrtimer_run_queues(void)
if (!ktime_before(now, cpu_base->softirq_expires_next)) {
cpu_base->softirq_expires_next = KTIME_MAX;
cpu_base->softirq_activated = 1;
- raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+ raise_hrtimer_softirq();
}
__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -2054,7 +2054,7 @@ static void run_local_timers(void)
if (time_before(jiffies, base->next_expiry))
return;
}
- raise_softirq(TIMER_SOFTIRQ);
+ raise_timer_softirq();
}
/*
|