From: Sebastian Andrzej Siewior Date: Fri, 3 Jul 2009 08:44:31 -0500 Subject: [PATCH 100/354] hrtimer: by timers by default into the softirq context Origin: https://git.kernel.org/cgit/linux/kernel/git/rt/linux-stable-rt.git/commit?id=9e206d0caa6d175533fd2973ecbb54d48c6f1bd1 We can't have hrtimers callbacks running in hardirq context on RT. Therefore the timers are deferred to the softirq context by default. There are few timers which expect to be run in hardirq context even on RT. Those are: - very short running where low latency is critical (kvm lapic) - timers which take raw locks and need run in hard-irq context (perf, sched) - wake up related timer (kernel side of clock_nanosleep() and so on) Signed-off-by: Sebastian Andrzej Siewior --- arch/x86/kvm/lapic.c | 2 +- include/linux/hrtimer.h | 6 ++++++ kernel/events/core.c | 4 ++-- kernel/sched/core.c | 2 +- kernel/sched/deadline.c | 2 +- kernel/sched/fair.c | 4 ++-- kernel/sched/rt.c | 4 ++-- kernel/time/hrtimer.c | 21 +++++++++++++++++++-- kernel/time/tick-broadcast-hrtimer.c | 2 +- kernel/time/tick-sched.c | 2 +- kernel/watchdog.c | 2 +- 11 files changed, 37 insertions(+), 14 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 256b00f456e6..83c3421499aa 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2266,7 +2266,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) apic->vcpu = vcpu; hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, - HRTIMER_MODE_ABS_PINNED); + HRTIMER_MODE_ABS_PINNED_HARD); apic->lapic_timer.timer.function = apic_timer_fn; /* diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 684241f87e4d..f8b4818e6e08 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -42,6 +42,7 @@ enum hrtimer_mode { HRTIMER_MODE_REL = 0x01, HRTIMER_MODE_PINNED = 0x02, HRTIMER_MODE_SOFT = 0x04, + HRTIMER_MODE_HARD = 0x08, HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED, HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED, @@ -52,6 +53,11 @@ enum hrtimer_mode { HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT, HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT, + HRTIMER_MODE_ABS_HARD = HRTIMER_MODE_ABS | HRTIMER_MODE_HARD, + HRTIMER_MODE_REL_HARD = HRTIMER_MODE_REL | HRTIMER_MODE_HARD, + + HRTIMER_MODE_ABS_PINNED_HARD = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_HARD, + HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD, }; /* diff --git a/kernel/events/core.c b/kernel/events/core.c index 4182e265176d..daeff994ae9c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1108,7 +1108,7 @@ static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu) cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval); raw_spin_lock_init(&cpuctx->hrtimer_lock); - hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); + hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); timer->function = perf_mux_hrtimer_handler; } @@ -9372,7 +9372,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event) if (!is_sampling_event(event)) return; - hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); hwc->hrtimer.function = perf_swevent_hrtimer; /* diff --git a/kernel/sched/core.c b/kernel/sched/core.c index cdaf83d26695..b307f7c8ce14 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -315,7 +315,7 @@ static void hrtick_rq_init(struct rq *rq) rq->hrtick_csd.info = rq; #endif - hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); rq->hrtick_timer.function = hrtick; } #else /* CONFIG_SCHED_HRTICK */ diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 9243d0049714..52acbb31a777 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1105,7 +1105,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se) { struct hrtimer *timer = &dl_se->dl_timer; - hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); timer->function = dl_task_timer; } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3f6a613be8c3..802471dbb24d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4998,9 +4998,9 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) cfs_b->period = ns_to_ktime(default_cfs_period()); INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq); - hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); + hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); cfs_b->period_timer.function = sched_cfs_period_timer; - hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); cfs_b->slack_timer.function = sched_cfs_slack_timer; cfs_b->distribute_running = 0; } diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 0768ba9d2860..41e73baf41ab 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -45,8 +45,8 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) raw_spin_lock_init(&rt_b->rt_runtime_lock); - hrtimer_init(&rt_b->rt_period_timer, - CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_HARD); rt_b->rt_period_timer.function = sched_rt_period_timer; } diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 478ff82abd33..764c360568f1 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1202,7 +1202,9 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft * match. */ +#ifndef CONFIG_PREEMPT_RT_BASE WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft); +#endif base = lock_hrtimer_base(timer, &flags); @@ -1362,10 +1364,17 @@ static inline int hrtimer_clockid_to_base(clockid_t clock_id) static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, enum hrtimer_mode mode) { - bool softtimer = !!(mode & HRTIMER_MODE_SOFT); - int base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0; + bool softtimer; + int base; struct hrtimer_cpu_base *cpu_base; + softtimer = !!(mode & HRTIMER_MODE_SOFT); +#ifdef CONFIG_PREEMPT_RT_FULL + if (!softtimer && !(mode & HRTIMER_MODE_HARD)) + softtimer = true; +#endif + base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0; + memset(timer, 0, sizeof(struct hrtimer)); cpu_base = raw_cpu_ptr(&hrtimer_bases); @@ -1748,6 +1757,14 @@ static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, enum hrtimer_mode mode, struct task_struct *task) { +#ifdef CONFIG_PREEMPT_RT_FULL + if (!(mode & (HRTIMER_MODE_SOFT | HRTIMER_MODE_HARD))) { + if (task_is_realtime(current) || system_state != SYSTEM_RUNNING) + mode |= HRTIMER_MODE_HARD; + else + mode |= HRTIMER_MODE_SOFT; + } +#endif __hrtimer_init(&sl->timer, clock_id, mode); sl->timer.function = hrtimer_wakeup; sl->task = task; diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c index a836efd34589..c50e8f3262de 100644 --- a/kernel/time/tick-broadcast-hrtimer.c +++ b/kernel/time/tick-broadcast-hrtimer.c @@ -107,7 +107,7 @@ static enum hrtimer_restart bc_handler(struct hrtimer *t) void tick_setup_hrtimer_broadcast(void) { - hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); bctimer.function = bc_handler; clockevents_register_device(&ce_broadcast_hrtimer); } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 0491c4c4a5cf..721e70e1f0b9 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1326,7 +1326,7 @@ void tick_setup_sched_timer(void) /* * Emulate tick processing via per-CPU hrtimers: */ - hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); ts->sched_timer.function = tick_sched_timer; /* Get the next period (per-CPU) */ diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 44096c4f4d60..bd381bfe7fa4 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -485,7 +485,7 @@ static void watchdog_enable(unsigned int cpu) * Start the timer first to prevent the NMI watchdog triggering * before the timer has a chance to fire. */ - hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); hrtimer->function = watchdog_timer_fn; hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL_PINNED);