diff options
Diffstat (limited to 'kernel/time')
-rw-r--r-- | kernel/time/Kconfig | 2 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 2 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 2 | ||||
-rw-r--r-- | kernel/time/hrtimer.c | 43 | ||||
-rw-r--r-- | kernel/time/tick-broadcast.c | 23 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 96 | ||||
-rw-r--r-- | kernel/time/timer.c | 3 | ||||
-rw-r--r-- | kernel/time/timer_migration.c | 33 | ||||
-rw-r--r-- | kernel/time/timer_migration.h | 12 | ||||
-rw-r--r-- | kernel/time/vsyscall.c | 6 |
10 files changed, 119 insertions, 103 deletions
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index fc3b1a06c9..8ebb6d5a10 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -202,7 +202,7 @@ config HIGH_RES_TIMERS the size of the kernel image. config CLOCKSOURCE_WATCHDOG_MAX_SKEW_US - int "Clocksource watchdog maximum allowable skew (in μs)" + int "Clocksource watchdog maximum allowable skew (in microseconds)" depends on CLOCKSOURCE_WATCHDOG range 50 1000 default 125 diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index a7ca458cdd..60a6484831 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -677,7 +677,7 @@ static ssize_t current_device_show(struct device *dev, raw_spin_lock_irq(&clockevents_lock); td = tick_get_tick_dev(dev); if (td && td->evtdev) - count = snprintf(buf, PAGE_SIZE, "%s\n", td->evtdev->name); + count = sysfs_emit(buf, "%s\n", td->evtdev->name); raw_spin_unlock_irq(&clockevents_lock); return count; } diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 4d50d53ac7..d25ba49e31 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -1334,7 +1334,7 @@ static ssize_t current_clocksource_show(struct device *dev, ssize_t count = 0; mutex_lock(&clocksource_mutex); - count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name); + count = sysfs_emit(buf, "%s\n", curr_clocksource->name); mutex_unlock(&clocksource_mutex); return count; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 70625dff62..b8ee320208 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -644,17 +644,12 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) /* * Is the high resolution mode active ? */ -static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) +static inline int hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) { return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ? cpu_base->hres_active : 0; } -static inline int hrtimer_hres_active(void) -{ - return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases)); -} - static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base, struct hrtimer *next_timer, ktime_t expires_next) @@ -678,7 +673,7 @@ static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base, * set. So we'd effectively block all timers until the T2 event * fires. */ - if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected) + if (!hrtimer_hres_active(cpu_base) || cpu_base->hang_detected) return; tick_program_event(expires_next, 1); @@ -789,12 +784,12 @@ static void retrigger_next_event(void *arg) * function call will take care of the reprogramming in case the * CPU was in a NOHZ idle sleep. */ - if (!__hrtimer_hres_active(base) && !tick_nohz_active) + if (!hrtimer_hres_active(base) && !tick_nohz_active) return; raw_spin_lock(&base->lock); hrtimer_update_base(base); - if (__hrtimer_hres_active(base)) + if (hrtimer_hres_active(base)) hrtimer_force_reprogram(base, 0); else hrtimer_update_next_event(base); @@ -951,7 +946,7 @@ void clock_was_set(unsigned int bases) cpumask_var_t mask; int cpu; - if (!__hrtimer_hres_active(cpu_base) && !tick_nohz_active) + if (!hrtimer_hres_active(cpu_base) && !tick_nohz_active) goto out_timerfd; if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { @@ -1290,6 +1285,8 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, struct hrtimer_clock_base *base; unsigned long flags; + if (WARN_ON_ONCE(!timer->function)) + return; /* * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft * match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard @@ -1491,7 +1488,7 @@ u64 hrtimer_get_next_event(void) raw_spin_lock_irqsave(&cpu_base->lock, flags); - if (!__hrtimer_hres_active(cpu_base)) + if (!hrtimer_hres_active(cpu_base)) expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); @@ -1514,7 +1511,7 @@ u64 hrtimer_next_event_without(const struct hrtimer *exclude) raw_spin_lock_irqsave(&cpu_base->lock, flags); - if (__hrtimer_hres_active(cpu_base)) { + if (hrtimer_hres_active(cpu_base)) { unsigned int active; if (!cpu_base->softirq_activated) { @@ -1875,25 +1872,7 @@ retry: tick_program_event(expires_next, 1); pr_warn_once("hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta)); } - -/* called with interrupts disabled */ -static inline void __hrtimer_peek_ahead_timers(void) -{ - struct tick_device *td; - - if (!hrtimer_hres_active()) - return; - - td = this_cpu_ptr(&tick_cpu_device); - if (td && td->evtdev) - hrtimer_interrupt(td->evtdev); -} - -#else /* CONFIG_HIGH_RES_TIMERS */ - -static inline void __hrtimer_peek_ahead_timers(void) { } - -#endif /* !CONFIG_HIGH_RES_TIMERS */ +#endif /* !CONFIG_HIGH_RES_TIMERS */ /* * Called from run_local_timers in hardirq context every jiffy @@ -1904,7 +1883,7 @@ void hrtimer_run_queues(void) unsigned long flags; ktime_t now; - if (__hrtimer_hres_active(cpu_base)) + if (hrtimer_hres_active(cpu_base)) return; /* diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 771d1e0403..b4843099a8 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -1141,6 +1141,7 @@ void tick_broadcast_switch_to_oneshot(void) #ifdef CONFIG_HOTPLUG_CPU void hotplug_cpu__broadcast_tick_pull(int deadcpu) { + struct tick_device *td = this_cpu_ptr(&tick_cpu_device); struct clock_event_device *bc; unsigned long flags; @@ -1148,6 +1149,28 @@ void hotplug_cpu__broadcast_tick_pull(int deadcpu) bc = tick_broadcast_device.evtdev; if (bc && broadcast_needs_cpu(bc, deadcpu)) { + /* + * If the broadcast force bit of the current CPU is set, + * then the current CPU has not yet reprogrammed the local + * timer device to avoid a ping-pong race. See + * ___tick_broadcast_oneshot_control(). + * + * If the broadcast device is hrtimer based then + * programming the broadcast event below does not have any + * effect because the local clockevent device is not + * running and not programmed because the broadcast event + * is not earlier than the pending event of the local clock + * event device. As a consequence all CPUs waiting for a + * broadcast event are stuck forever. + * + * Detect this condition and reprogram the cpu local timer + * device to avoid the starvation. + */ + if (tick_check_broadcast_expired()) { + cpumask_clear_cpu(smp_processor_id(), tick_broadcast_force_mask); + tick_program_event(td->evtdev->next_event, 1); + } + /* This moves the broadcast assignment to this CPU: */ clockevents_program_event(bc, bc->next_event, 1); } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b58dffc58a..4e18db1819 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -237,7 +237,9 @@ static void timekeeping_check_update(struct timekeeper *tk, u64 offset) } } -static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr) +static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles); + +static inline u64 timekeeping_debug_get_ns(const struct tk_read_base *tkr) { struct timekeeper *tk = &tk_core.timekeeper; u64 now, last, mask, max, delta; @@ -264,34 +266,23 @@ static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr) * Try to catch underflows by checking if we are seeing small * mask-relative negative values. */ - if (unlikely((~delta & mask) < (mask >> 3))) { + if (unlikely((~delta & mask) < (mask >> 3))) tk->underflow_seen = 1; - delta = 0; - } - /* Cap delta value to the max_cycles values to avoid mult overflows */ - if (unlikely(delta > max)) { + /* Check for multiplication overflows */ + if (unlikely(delta > max)) tk->overflow_seen = 1; - delta = tkr->clock->max_cycles; - } - return delta; + /* timekeeping_cycles_to_ns() handles both under and overflow */ + return timekeeping_cycles_to_ns(tkr, now); } #else static inline void timekeeping_check_update(struct timekeeper *tk, u64 offset) { } -static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr) +static inline u64 timekeeping_debug_get_ns(const struct tk_read_base *tkr) { - u64 cycle_now, delta; - - /* read clocksource */ - cycle_now = tk_clock_read(tkr); - - /* calculate the delta since the last update_wall_time */ - delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask); - - return delta; + BUG(); } #endif @@ -370,32 +361,46 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) } /* Timekeeper helper functions. */ +static noinline u64 delta_to_ns_safe(const struct tk_read_base *tkr, u64 delta) +{ + return mul_u64_u32_add_u64_shr(delta, tkr->mult, tkr->xtime_nsec, tkr->shift); +} -static inline u64 timekeeping_delta_to_ns(const struct tk_read_base *tkr, u64 delta) +static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles) { - u64 nsec; + /* Calculate the delta since the last update_wall_time() */ + u64 mask = tkr->mask, delta = (cycles - tkr->cycle_last) & mask; - nsec = delta * tkr->mult + tkr->xtime_nsec; - nsec >>= tkr->shift; + /* + * This detects both negative motion and the case where the delta + * overflows the multiplication with tkr->mult. + */ + if (unlikely(delta > tkr->clock->max_cycles)) { + /* + * Handle clocksource inconsistency between CPUs to prevent + * time from going backwards by checking for the MSB of the + * mask being set in the delta. + */ + if (delta & ~(mask >> 1)) + return tkr->xtime_nsec >> tkr->shift; + + return delta_to_ns_safe(tkr, delta); + } - return nsec; + return ((delta * tkr->mult) + tkr->xtime_nsec) >> tkr->shift; } -static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr) +static __always_inline u64 __timekeeping_get_ns(const struct tk_read_base *tkr) { - u64 delta; - - delta = timekeeping_get_delta(tkr); - return timekeeping_delta_to_ns(tkr, delta); + return timekeeping_cycles_to_ns(tkr, tk_clock_read(tkr)); } -static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles) +static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr) { - u64 delta; + if (IS_ENABLED(CONFIG_DEBUG_TIMEKEEPING)) + return timekeeping_debug_get_ns(tkr); - /* calculate the delta since the last update_wall_time */ - delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask); - return timekeeping_delta_to_ns(tkr, delta); + return __timekeeping_get_ns(tkr); } /** @@ -431,14 +436,6 @@ static void update_fast_timekeeper(const struct tk_read_base *tkr, memcpy(base + 1, base, sizeof(*base)); } -static __always_inline u64 fast_tk_get_delta_ns(struct tk_read_base *tkr) -{ - u64 delta, cycles = tk_clock_read(tkr); - - delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask); - return timekeeping_delta_to_ns(tkr, delta); -} - static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) { struct tk_read_base *tkr; @@ -449,7 +446,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) seq = raw_read_seqcount_latch(&tkf->seq); tkr = tkf->base + (seq & 0x01); now = ktime_to_ns(tkr->base); - now += fast_tk_get_delta_ns(tkr); + now += __timekeeping_get_ns(tkr); } while (raw_read_seqcount_latch_retry(&tkf->seq, seq)); return now; @@ -565,7 +562,7 @@ static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono) tkr = tkf->base + (seq & 0x01); basem = ktime_to_ns(tkr->base); baser = ktime_to_ns(tkr->base_real); - delta = fast_tk_get_delta_ns(tkr); + delta = __timekeeping_get_ns(tkr); } while (raw_read_seqcount_latch_retry(&tkf->seq, seq)); if (mono) @@ -800,10 +797,15 @@ static void timekeeping_forward_now(struct timekeeper *tk) tk->tkr_mono.cycle_last = cycle_now; tk->tkr_raw.cycle_last = cycle_now; - tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult; - tk->tkr_raw.xtime_nsec += delta * tk->tkr_raw.mult; + while (delta > 0) { + u64 max = tk->tkr_mono.clock->max_cycles; + u64 incr = delta < max ? delta : max; - tk_normalize_xtime(tk); + tk->tkr_mono.xtime_nsec += incr * tk->tkr_mono.mult; + tk->tkr_raw.xtime_nsec += incr * tk->tkr_raw.mult; + tk_normalize_xtime(tk); + delta -= incr; + } } /** diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 3baf2fbe68..48288dd4a1 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -312,7 +312,6 @@ static struct ctl_table timer_sysctl[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, - {} }; static int __init timer_sysctl_init(void) @@ -2488,7 +2487,7 @@ void update_process_times(int user_tick) if (in_irq()) irq_work_tick(); #endif - scheduler_tick(); + sched_tick(); if (IS_ENABLED(CONFIG_POSIX_TIMERS)) run_posix_cpu_timers(); } diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c index 84413114db..d91efe1dc3 100644 --- a/kernel/time/timer_migration.c +++ b/kernel/time/timer_migration.c @@ -507,7 +507,14 @@ static void walk_groups(up_f up, void *data, struct tmigr_cpu *tmc) * (get_next_timer_interrupt()) * @firstexp: Contains the first event expiry information when last * active CPU of hierarchy is on the way to idle to make - * sure CPU will be back in time. + * sure CPU will be back in time. It is updated in top + * level group only. Be aware, there could occur a new top + * level of the hierarchy between the 'top level call' in + * tmigr_update_events() and the check for the parent group + * in walk_groups(). Then @firstexp might contain a value + * != KTIME_MAX even if it was not the final top + * level. This is not a problem, as the worst outcome is a + * CPU which might wake up a little early. * @evt: Pointer to tmigr_event which needs to be queued (of idle * child group) * @childmask: childmask of child group @@ -649,7 +656,7 @@ static bool tmigr_active_up(struct tmigr_group *group, } while (!atomic_try_cmpxchg(&group->migr_state, &curstate.state, newstate.state)); - if ((walk_done == false) && group->parent) + if (walk_done == false) data->childmask = group->childmask; /* @@ -1317,20 +1324,9 @@ static bool tmigr_inactive_up(struct tmigr_group *group, /* Event Handling */ tmigr_update_events(group, child, data); - if (group->parent && (walk_done == false)) + if (walk_done == false) data->childmask = group->childmask; - /* - * data->firstexp was set by tmigr_update_events() and contains the - * expiry of the first global event which needs to be handled. It - * differs from KTIME_MAX if: - * - group is the top level group and - * - group is idle (which means CPU was the last active CPU in the - * hierarchy) and - * - there is a pending event in the hierarchy - */ - WARN_ON_ONCE(data->firstexp != KTIME_MAX && group->parent); - trace_tmigr_group_set_cpu_inactive(group, newstate, childmask); return walk_done; @@ -1552,10 +1548,11 @@ static void tmigr_connect_child_parent(struct tmigr_group *child, data.childmask = child->childmask; /* - * There is only one new level per time. When connecting the - * child and the parent and set the child active when the parent - * is inactive, the parent needs to be the uppermost - * level. Otherwise there went something wrong! + * There is only one new level per time (which is protected by + * tmigr_mutex). When connecting the child and the parent and + * set the child active when the parent is inactive, the parent + * needs to be the uppermost level. Otherwise there went + * something wrong! */ WARN_ON(!tmigr_active_up(parent, child, &data) && parent->parent); } diff --git a/kernel/time/timer_migration.h b/kernel/time/timer_migration.h index 6c37d94a37..494f68cc13 100644 --- a/kernel/time/timer_migration.h +++ b/kernel/time/timer_migration.h @@ -22,7 +22,17 @@ struct tmigr_event { * struct tmigr_group - timer migration hierarchy group * @lock: Lock protecting the event information and group hierarchy * information during setup - * @parent: Pointer to the parent group + * @parent: Pointer to the parent group. Pointer is updated when a + * new hierarchy level is added because of a CPU coming + * online the first time. Once it is set, the pointer will + * not be removed or updated. When accessing parent pointer + * lock less to decide whether to abort a propagation or + * not, it is not a problem. The worst outcome is an + * unnecessary/early CPU wake up. But do not access parent + * pointer several times in the same 'action' (like + * activation, deactivation, check for remote expiry,...) + * without holding the lock as it is not ensured that value + * will not change. * @groupevt: Next event of the group which is only used when the * group is !active. The group event is then queued into * the parent timer queue. diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index f0d5062d9c..9193d6133e 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -22,10 +22,16 @@ static inline void update_vdso_data(struct vdso_data *vdata, u64 nsec, sec; vdata[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last; +#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT + vdata[CS_HRES_COARSE].max_cycles = tk->tkr_mono.clock->max_cycles; +#endif vdata[CS_HRES_COARSE].mask = tk->tkr_mono.mask; vdata[CS_HRES_COARSE].mult = tk->tkr_mono.mult; vdata[CS_HRES_COARSE].shift = tk->tkr_mono.shift; vdata[CS_RAW].cycle_last = tk->tkr_raw.cycle_last; +#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT + vdata[CS_RAW].max_cycles = tk->tkr_raw.clock->max_cycles; +#endif vdata[CS_RAW].mask = tk->tkr_raw.mask; vdata[CS_RAW].mult = tk->tkr_raw.mult; vdata[CS_RAW].shift = tk->tkr_raw.shift; |