From: Sebastian Andrzej Siewior Date: Fri, 3 Jul 2009 08:44:31 -0500 Subject: [PATCH 100/353] hrtimer: by timers by default into the softirq context Origin: https://git.kernel.org/cgit/linux/kernel/git/rt/linux-stable-rt.git/commit?id=f29c49b257f3002eb54b6303ac0fc6551f3180b2 We can't have hrtimers callbacks running in hardirq context on RT. Therefore the timers are deferred to the softirq context by default. There are few timers which expect to be run in hardirq context even on RT. Those are: - very short running where low latency is critical (kvm lapic) - timers which take raw locks and need run in hard-irq context (perf, sched) - wake up related timer (kernel side of clock_nanosleep() and so on) Signed-off-by: Sebastian Andrzej Siewior --- arch/x86/kvm/lapic.c | 2 +- include/linux/hrtimer.h | 6 ++++++ kernel/events/core.c | 4 ++-- kernel/sched/core.c | 2 +- kernel/sched/deadline.c | 2 +- kernel/sched/fair.c | 4 ++-- kernel/sched/rt.c | 4 ++-- kernel/time/hrtimer.c | 21 +++++++++++++++++++-- kernel/time/tick-broadcast-hrtimer.c | 2 +- kernel/time/tick-sched.c | 2 +- kernel/watchdog.c | 2 +- 11 files changed, 37 insertions(+), 14 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 027941e3df68..9699f3735a55 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2262,7 +2262,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) apic->vcpu = vcpu; hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, - HRTIMER_MODE_ABS_PINNED); + HRTIMER_MODE_ABS_PINNED_HARD); apic->lapic_timer.timer.function = apic_timer_fn; /* diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 8714f1a37d84..082147c07831 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -42,6 +42,7 @@ enum hrtimer_mode { HRTIMER_MODE_REL = 0x01, HRTIMER_MODE_PINNED = 0x02, HRTIMER_MODE_SOFT = 0x04, + HRTIMER_MODE_HARD = 0x08, HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED, HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED, @@ -52,6 +53,11 @@ enum hrtimer_mode { HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT, HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT, + HRTIMER_MODE_ABS_HARD = HRTIMER_MODE_ABS | HRTIMER_MODE_HARD, + HRTIMER_MODE_REL_HARD = HRTIMER_MODE_REL | HRTIMER_MODE_HARD, + + HRTIMER_MODE_ABS_PINNED_HARD = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_HARD, + HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD, }; /* diff --git a/kernel/events/core.c b/kernel/events/core.c index 2bf4b6b109bf..69524886cc89 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1108,7 +1108,7 @@ static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu) cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval); raw_spin_lock_init(&cpuctx->hrtimer_lock); - hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); + hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); timer->function = perf_mux_hrtimer_handler; } @@ -9311,7 +9311,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event) if (!is_sampling_event(event)) return; - hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); hwc->hrtimer.function = perf_swevent_hrtimer; /* diff --git a/kernel/sched/core.c b/kernel/sched/core.c index cdaf83d26695..b307f7c8ce14 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -315,7 +315,7 @@ static void hrtick_rq_init(struct rq *rq) rq->hrtick_csd.info = rq; #endif - hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); rq->hrtick_timer.function = hrtick; } #else /* CONFIG_SCHED_HRTICK */ diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 9243d0049714..52acbb31a777 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1105,7 +1105,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se) { struct hrtimer *timer = &dl_se->dl_timer; - hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); timer->function = dl_task_timer; } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index bd9a375c45f4..6d1911b08b7f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4998,9 +4998,9 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) cfs_b->period = ns_to_ktime(default_cfs_period()); INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq); - hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); + hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); cfs_b->period_timer.function = sched_cfs_period_timer; - hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); cfs_b->slack_timer.function = sched_cfs_slack_timer; cfs_b->distribute_running = 0; } diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index a068884c369f..b15428ede6cf 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -45,8 +45,8 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) raw_spin_lock_init(&rt_b->rt_runtime_lock); - hrtimer_init(&rt_b->rt_period_timer, - CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_HARD); rt_b->rt_period_timer.function = sched_rt_period_timer; } diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index ab38b51a96cc..beb81024c287 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1202,7 +1202,9 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft * match. */ +#ifndef CONFIG_PREEMPT_RT_BASE WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft); +#endif base = lock_hrtimer_base(timer, &flags); @@ -1362,10 +1364,17 @@ static inline int hrtimer_clockid_to_base(clockid_t clock_id) static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, enum hrtimer_mode mode) { - bool softtimer = !!(mode & HRTIMER_MODE_SOFT); - int base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0; + bool softtimer; + int base; struct hrtimer_cpu_base *cpu_base; + softtimer = !!(mode & HRTIMER_MODE_SOFT); +#ifdef CONFIG_PREEMPT_RT_FULL + if (!softtimer && !(mode & HRTIMER_MODE_HARD)) + softtimer = true; +#endif + base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0; + memset(timer, 0, sizeof(struct hrtimer)); cpu_base = raw_cpu_ptr(&hrtimer_bases); @@ -1748,6 +1757,14 @@ static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, enum hrtimer_mode mode, struct task_struct *task) { +#ifdef CONFIG_PREEMPT_RT_FULL + if (!(mode & (HRTIMER_MODE_SOFT | HRTIMER_MODE_HARD))) { + if (task_is_realtime(current) || system_state != SYSTEM_RUNNING) + mode |= HRTIMER_MODE_HARD; + else + mode |= HRTIMER_MODE_SOFT; + } +#endif __hrtimer_init(&sl->timer, clock_id, mode); sl->timer.function = hrtimer_wakeup; sl->task = task; diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c index a836efd34589..c50e8f3262de 100644 --- a/kernel/time/tick-broadcast-hrtimer.c +++ b/kernel/time/tick-broadcast-hrtimer.c @@ -107,7 +107,7 @@ static enum hrtimer_restart bc_handler(struct hrtimer *t) void tick_setup_hrtimer_broadcast(void) { - hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); bctimer.function = bc_handler; clockevents_register_device(&ce_broadcast_hrtimer); } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 0491c4c4a5cf..721e70e1f0b9 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1326,7 +1326,7 @@ void tick_setup_sched_timer(void) /* * Emulate tick processing via per-CPU hrtimers: */ - hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); ts->sched_timer.function = tick_sched_timer; /* Get the next period (per-CPU) */ diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 44096c4f4d60..bd381bfe7fa4 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -485,7 +485,7 @@ static void watchdog_enable(unsigned int cpu) * Start the timer first to prevent the NMI watchdog triggering * before the timer has a chance to fire. */ - hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); hrtimer->function = watchdog_timer_fn; hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL_PINNED);