1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
|
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 3 Jul 2009 08:44:31 -0500
Subject: [PATCH 100/353] hrtimer: by timers by default into the softirq
context
Origin: https://git.kernel.org/cgit/linux/kernel/git/rt/linux-stable-rt.git/commit?id=6117abca429d5fae96db4e3437aa1d6f818fe1d2
We can't have hrtimers callbacks running in hardirq context on RT. Therefore
the timers are deferred to the softirq context by default.
There are few timers which expect to be run in hardirq context even on RT.
Those are:
- very short running where low latency is critical (kvm lapic)
- timers which take raw locks and need run in hard-irq context (perf, sched)
- wake up related timer (kernel side of clock_nanosleep() and so on)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/x86/kvm/lapic.c | 2 +-
include/linux/hrtimer.h | 6 ++++++
kernel/events/core.c | 4 ++--
kernel/sched/core.c | 2 +-
kernel/sched/deadline.c | 2 +-
kernel/sched/fair.c | 4 ++--
kernel/sched/rt.c | 4 ++--
kernel/time/hrtimer.c | 21 +++++++++++++++++++--
kernel/time/tick-broadcast-hrtimer.c | 2 +-
kernel/time/tick-sched.c | 2 +-
kernel/watchdog.c | 2 +-
11 files changed, 37 insertions(+), 14 deletions(-)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 027941e3df68..9699f3735a55 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2262,7 +2262,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
apic->vcpu = vcpu;
hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
- HRTIMER_MODE_ABS_PINNED);
+ HRTIMER_MODE_ABS_PINNED_HARD);
apic->lapic_timer.timer.function = apic_timer_fn;
/*
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 8714f1a37d84..082147c07831 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -42,6 +42,7 @@ enum hrtimer_mode {
HRTIMER_MODE_REL = 0x01,
HRTIMER_MODE_PINNED = 0x02,
HRTIMER_MODE_SOFT = 0x04,
+ HRTIMER_MODE_HARD = 0x08,
HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED,
HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED,
@@ -52,6 +53,11 @@ enum hrtimer_mode {
HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT,
HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT,
+ HRTIMER_MODE_ABS_HARD = HRTIMER_MODE_ABS | HRTIMER_MODE_HARD,
+ HRTIMER_MODE_REL_HARD = HRTIMER_MODE_REL | HRTIMER_MODE_HARD,
+
+ HRTIMER_MODE_ABS_PINNED_HARD = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_HARD,
+ HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD,
};
/*
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 668e5492e4c4..f374d3a250eb 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1108,7 +1108,7 @@ static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval);
raw_spin_lock_init(&cpuctx->hrtimer_lock);
- hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+ hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
timer->function = perf_mux_hrtimer_handler;
}
@@ -9311,7 +9311,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event)
if (!is_sampling_event(event))
return;
- hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
hwc->hrtimer.function = perf_swevent_hrtimer;
/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index cdaf83d26695..b307f7c8ce14 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -315,7 +315,7 @@ static void hrtick_rq_init(struct rq *rq)
rq->hrtick_csd.info = rq;
#endif
- hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
rq->hrtick_timer.function = hrtick;
}
#else /* CONFIG_SCHED_HRTICK */
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 9243d0049714..52acbb31a777 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1105,7 +1105,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
{
struct hrtimer *timer = &dl_se->dl_timer;
- hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
timer->function = dl_task_timer;
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bd9a375c45f4..6d1911b08b7f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4998,9 +4998,9 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
cfs_b->period = ns_to_ktime(default_cfs_period());
INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
- hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+ hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
cfs_b->period_timer.function = sched_cfs_period_timer;
- hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
cfs_b->slack_timer.function = sched_cfs_slack_timer;
cfs_b->distribute_running = 0;
}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index a068884c369f..b15428ede6cf 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -45,8 +45,8 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
raw_spin_lock_init(&rt_b->rt_runtime_lock);
- hrtimer_init(&rt_b->rt_period_timer,
- CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL_HARD);
rt_b->rt_period_timer.function = sched_rt_period_timer;
}
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index ab38b51a96cc..beb81024c287 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1202,7 +1202,9 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
* Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft
* match.
*/
+#ifndef CONFIG_PREEMPT_RT_BASE
WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft);
+#endif
base = lock_hrtimer_base(timer, &flags);
@@ -1362,10 +1364,17 @@ static inline int hrtimer_clockid_to_base(clockid_t clock_id)
static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
enum hrtimer_mode mode)
{
- bool softtimer = !!(mode & HRTIMER_MODE_SOFT);
- int base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
+ bool softtimer;
+ int base;
struct hrtimer_cpu_base *cpu_base;
+ softtimer = !!(mode & HRTIMER_MODE_SOFT);
+#ifdef CONFIG_PREEMPT_RT_FULL
+ if (!softtimer && !(mode & HRTIMER_MODE_HARD))
+ softtimer = true;
+#endif
+ base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
+
memset(timer, 0, sizeof(struct hrtimer));
cpu_base = raw_cpu_ptr(&hrtimer_bases);
@@ -1748,6 +1757,14 @@ static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
enum hrtimer_mode mode,
struct task_struct *task)
{
+#ifdef CONFIG_PREEMPT_RT_FULL
+ if (!(mode & (HRTIMER_MODE_SOFT | HRTIMER_MODE_HARD))) {
+ if (task_is_realtime(current) || system_state != SYSTEM_RUNNING)
+ mode |= HRTIMER_MODE_HARD;
+ else
+ mode |= HRTIMER_MODE_SOFT;
+ }
+#endif
__hrtimer_init(&sl->timer, clock_id, mode);
sl->timer.function = hrtimer_wakeup;
sl->task = task;
diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c
index a836efd34589..c50e8f3262de 100644
--- a/kernel/time/tick-broadcast-hrtimer.c
+++ b/kernel/time/tick-broadcast-hrtimer.c
@@ -107,7 +107,7 @@ static enum hrtimer_restart bc_handler(struct hrtimer *t)
void tick_setup_hrtimer_broadcast(void)
{
- hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
bctimer.function = bc_handler;
clockevents_register_device(&ce_broadcast_hrtimer);
}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index e774a49176cc..012bc81879bf 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1314,7 +1314,7 @@ void tick_setup_sched_timer(void)
/*
* Emulate tick processing via per-CPU hrtimers:
*/
- hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
ts->sched_timer.function = tick_sched_timer;
/* Get the next period (per-CPU) */
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 44096c4f4d60..bd381bfe7fa4 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -485,7 +485,7 @@ static void watchdog_enable(unsigned int cpu)
* Start the timer first to prevent the NMI watchdog triggering
* before the timer has a chance to fire.
*/
- hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
hrtimer->function = watchdog_timer_fn;
hrtimer_start(hrtimer, ns_to_ktime(sample_period),
HRTIMER_MODE_REL_PINNED);
|