1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
|
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 3 Jul 2009 08:44:31 -0500
Subject: [PATCH 100/342] hrtimer: by timers by default into the softirq
context
Origin: https://git.kernel.org/cgit/linux/kernel/git/rt/linux-stable-rt.git/commit?id=f68468a5cbe2c273bed160e13102b7ed2a2dc653
We can't have hrtimers callbacks running in hardirq context on RT. Therefore
the timers are deferred to the softirq context by default.
There are few timers which expect to be run in hardirq context even on RT.
Those are:
- very short running where low latency is critical (kvm lapic)
- timers which take raw locks and need run in hard-irq context (perf, sched)
- wake up related timer (kernel side of clock_nanosleep() and so on)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/x86/kvm/lapic.c | 2 +-
include/linux/hrtimer.h | 6 ++++++
kernel/events/core.c | 4 ++--
kernel/sched/core.c | 2 +-
kernel/sched/deadline.c | 2 +-
kernel/sched/fair.c | 4 ++--
kernel/sched/rt.c | 4 ++--
kernel/time/hrtimer.c | 21 +++++++++++++++++++--
kernel/time/tick-broadcast-hrtimer.c | 2 +-
kernel/time/tick-sched.c | 2 +-
kernel/watchdog.c | 2 +-
11 files changed, 37 insertions(+), 14 deletions(-)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 89d07312e58c..037dbe04fda2 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2258,7 +2258,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
apic->vcpu = vcpu;
hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
- HRTIMER_MODE_ABS_PINNED);
+ HRTIMER_MODE_ABS_PINNED_HARD);
apic->lapic_timer.timer.function = apic_timer_fn;
/*
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 8714f1a37d84..082147c07831 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -42,6 +42,7 @@ enum hrtimer_mode {
HRTIMER_MODE_REL = 0x01,
HRTIMER_MODE_PINNED = 0x02,
HRTIMER_MODE_SOFT = 0x04,
+ HRTIMER_MODE_HARD = 0x08,
HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED,
HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED,
@@ -52,6 +53,11 @@ enum hrtimer_mode {
HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT,
HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT,
+ HRTIMER_MODE_ABS_HARD = HRTIMER_MODE_ABS | HRTIMER_MODE_HARD,
+ HRTIMER_MODE_REL_HARD = HRTIMER_MODE_REL | HRTIMER_MODE_HARD,
+
+ HRTIMER_MODE_ABS_PINNED_HARD = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_HARD,
+ HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD,
};
/*
diff --git a/kernel/events/core.c b/kernel/events/core.c
index ba66ea3ca705..11e9bbad4878 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1108,7 +1108,7 @@ static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval);
raw_spin_lock_init(&cpuctx->hrtimer_lock);
- hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+ hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
timer->function = perf_mux_hrtimer_handler;
}
@@ -9311,7 +9311,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event)
if (!is_sampling_event(event))
return;
- hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
hwc->hrtimer.function = perf_swevent_hrtimer;
/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3c5999476609..0f949b7738fd 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -315,7 +315,7 @@ static void hrtick_rq_init(struct rq *rq)
rq->hrtick_csd.info = rq;
#endif
- hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
rq->hrtick_timer.function = hrtick;
}
#else /* CONFIG_SCHED_HRTICK */
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 95ebbb2074c7..1a285367081c 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1086,7 +1086,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
{
struct hrtimer *timer = &dl_se->dl_timer;
- hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
timer->function = dl_task_timer;
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 16940416d526..d804b5443bb9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4951,9 +4951,9 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
cfs_b->period = ns_to_ktime(default_cfs_period());
INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
- hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+ hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
cfs_b->period_timer.function = sched_cfs_period_timer;
- hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
cfs_b->slack_timer.function = sched_cfs_slack_timer;
cfs_b->distribute_running = 0;
}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 52b55144d8ad..a2f2c6c91e05 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -45,8 +45,8 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
raw_spin_lock_init(&rt_b->rt_runtime_lock);
- hrtimer_init(&rt_b->rt_period_timer,
- CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL_HARD);
rt_b->rt_period_timer.function = sched_rt_period_timer;
}
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 8bbde17325c7..e947f3b344ac 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1202,7 +1202,9 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
* Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft
* match.
*/
+#ifndef CONFIG_PREEMPT_RT_BASE
WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft);
+#endif
base = lock_hrtimer_base(timer, &flags);
@@ -1362,10 +1364,17 @@ static inline int hrtimer_clockid_to_base(clockid_t clock_id)
static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
enum hrtimer_mode mode)
{
- bool softtimer = !!(mode & HRTIMER_MODE_SOFT);
- int base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
+ bool softtimer;
+ int base;
struct hrtimer_cpu_base *cpu_base;
+ softtimer = !!(mode & HRTIMER_MODE_SOFT);
+#ifdef CONFIG_PREEMPT_RT_FULL
+ if (!softtimer && !(mode & HRTIMER_MODE_HARD))
+ softtimer = true;
+#endif
+ base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
+
memset(timer, 0, sizeof(struct hrtimer));
cpu_base = raw_cpu_ptr(&hrtimer_bases);
@@ -1748,6 +1757,14 @@ static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
enum hrtimer_mode mode,
struct task_struct *task)
{
+#ifdef CONFIG_PREEMPT_RT_FULL
+ if (!(mode & (HRTIMER_MODE_SOFT | HRTIMER_MODE_HARD))) {
+ if (task_is_realtime(current) || system_state != SYSTEM_RUNNING)
+ mode |= HRTIMER_MODE_HARD;
+ else
+ mode |= HRTIMER_MODE_SOFT;
+ }
+#endif
__hrtimer_init(&sl->timer, clock_id, mode);
sl->timer.function = hrtimer_wakeup;
sl->task = task;
diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c
index a836efd34589..c50e8f3262de 100644
--- a/kernel/time/tick-broadcast-hrtimer.c
+++ b/kernel/time/tick-broadcast-hrtimer.c
@@ -107,7 +107,7 @@ static enum hrtimer_restart bc_handler(struct hrtimer *t)
void tick_setup_hrtimer_broadcast(void)
{
- hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
bctimer.function = bc_handler;
clockevents_register_device(&ce_broadcast_hrtimer);
}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index e774a49176cc..012bc81879bf 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1314,7 +1314,7 @@ void tick_setup_sched_timer(void)
/*
* Emulate tick processing via per-CPU hrtimers:
*/
- hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
ts->sched_timer.function = tick_sched_timer;
/* Get the next period (per-CPU) */
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 6d60701dc636..328620fe85f6 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -485,7 +485,7 @@ static void watchdog_enable(unsigned int cpu)
* Start the timer first to prevent the NMI watchdog triggering
* before the timer has a chance to fire.
*/
- hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
hrtimer->function = watchdog_timer_fn;
hrtimer_start(hrtimer, ns_to_ktime(sample_period),
HRTIMER_MODE_REL_PINNED);
|