summaryrefslogtreecommitdiffstats
path: root/kernel/time
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-07 13:18:06 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-07 13:18:06 +0000
commit638a9e433ecd61e64761352dbec1fa4f5874c941 (patch)
treefdbff74a238d7a5a7d1cef071b7230bc064b9f25 /kernel/time
parentReleasing progress-linux version 6.9.12-1~progress7.99u1. (diff)
downloadlinux-638a9e433ecd61e64761352dbec1fa4f5874c941.tar.xz
linux-638a9e433ecd61e64761352dbec1fa4f5874c941.zip
Merging upstream version 6.10.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'kernel/time')
-rw-r--r--kernel/time/Kconfig2
-rw-r--r--kernel/time/clockevents.c2
-rw-r--r--kernel/time/clocksource.c2
-rw-r--r--kernel/time/hrtimer.c43
-rw-r--r--kernel/time/tick-broadcast.c23
-rw-r--r--kernel/time/timekeeping.c96
-rw-r--r--kernel/time/timer.c3
-rw-r--r--kernel/time/timer_migration.c33
-rw-r--r--kernel/time/timer_migration.h12
-rw-r--r--kernel/time/vsyscall.c6
10 files changed, 119 insertions, 103 deletions
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index fc3b1a06c9..8ebb6d5a10 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -202,7 +202,7 @@ config HIGH_RES_TIMERS
the size of the kernel image.
config CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
- int "Clocksource watchdog maximum allowable skew (in μs)"
+ int "Clocksource watchdog maximum allowable skew (in microseconds)"
depends on CLOCKSOURCE_WATCHDOG
range 50 1000
default 125
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index a7ca458cdd..60a6484831 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -677,7 +677,7 @@ static ssize_t current_device_show(struct device *dev,
raw_spin_lock_irq(&clockevents_lock);
td = tick_get_tick_dev(dev);
if (td && td->evtdev)
- count = snprintf(buf, PAGE_SIZE, "%s\n", td->evtdev->name);
+ count = sysfs_emit(buf, "%s\n", td->evtdev->name);
raw_spin_unlock_irq(&clockevents_lock);
return count;
}
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 4d50d53ac7..d25ba49e31 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -1334,7 +1334,7 @@ static ssize_t current_clocksource_show(struct device *dev,
ssize_t count = 0;
mutex_lock(&clocksource_mutex);
- count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name);
+ count = sysfs_emit(buf, "%s\n", curr_clocksource->name);
mutex_unlock(&clocksource_mutex);
return count;
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 70625dff62..b8ee320208 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -644,17 +644,12 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
/*
* Is the high resolution mode active ?
*/
-static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base)
+static inline int hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base)
{
return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ?
cpu_base->hres_active : 0;
}
-static inline int hrtimer_hres_active(void)
-{
- return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases));
-}
-
static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base,
struct hrtimer *next_timer,
ktime_t expires_next)
@@ -678,7 +673,7 @@ static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base,
* set. So we'd effectively block all timers until the T2 event
* fires.
*/
- if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected)
+ if (!hrtimer_hres_active(cpu_base) || cpu_base->hang_detected)
return;
tick_program_event(expires_next, 1);
@@ -789,12 +784,12 @@ static void retrigger_next_event(void *arg)
* function call will take care of the reprogramming in case the
* CPU was in a NOHZ idle sleep.
*/
- if (!__hrtimer_hres_active(base) && !tick_nohz_active)
+ if (!hrtimer_hres_active(base) && !tick_nohz_active)
return;
raw_spin_lock(&base->lock);
hrtimer_update_base(base);
- if (__hrtimer_hres_active(base))
+ if (hrtimer_hres_active(base))
hrtimer_force_reprogram(base, 0);
else
hrtimer_update_next_event(base);
@@ -951,7 +946,7 @@ void clock_was_set(unsigned int bases)
cpumask_var_t mask;
int cpu;
- if (!__hrtimer_hres_active(cpu_base) && !tick_nohz_active)
+ if (!hrtimer_hres_active(cpu_base) && !tick_nohz_active)
goto out_timerfd;
if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
@@ -1290,6 +1285,8 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
struct hrtimer_clock_base *base;
unsigned long flags;
+ if (WARN_ON_ONCE(!timer->function))
+ return;
/*
* Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft
* match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard
@@ -1491,7 +1488,7 @@ u64 hrtimer_get_next_event(void)
raw_spin_lock_irqsave(&cpu_base->lock, flags);
- if (!__hrtimer_hres_active(cpu_base))
+ if (!hrtimer_hres_active(cpu_base))
expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL);
raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
@@ -1514,7 +1511,7 @@ u64 hrtimer_next_event_without(const struct hrtimer *exclude)
raw_spin_lock_irqsave(&cpu_base->lock, flags);
- if (__hrtimer_hres_active(cpu_base)) {
+ if (hrtimer_hres_active(cpu_base)) {
unsigned int active;
if (!cpu_base->softirq_activated) {
@@ -1875,25 +1872,7 @@ retry:
tick_program_event(expires_next, 1);
pr_warn_once("hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta));
}
-
-/* called with interrupts disabled */
-static inline void __hrtimer_peek_ahead_timers(void)
-{
- struct tick_device *td;
-
- if (!hrtimer_hres_active())
- return;
-
- td = this_cpu_ptr(&tick_cpu_device);
- if (td && td->evtdev)
- hrtimer_interrupt(td->evtdev);
-}
-
-#else /* CONFIG_HIGH_RES_TIMERS */
-
-static inline void __hrtimer_peek_ahead_timers(void) { }
-
-#endif /* !CONFIG_HIGH_RES_TIMERS */
+#endif /* !CONFIG_HIGH_RES_TIMERS */
/*
* Called from run_local_timers in hardirq context every jiffy
@@ -1904,7 +1883,7 @@ void hrtimer_run_queues(void)
unsigned long flags;
ktime_t now;
- if (__hrtimer_hres_active(cpu_base))
+ if (hrtimer_hres_active(cpu_base))
return;
/*
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 771d1e0403..b4843099a8 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -1141,6 +1141,7 @@ void tick_broadcast_switch_to_oneshot(void)
#ifdef CONFIG_HOTPLUG_CPU
void hotplug_cpu__broadcast_tick_pull(int deadcpu)
{
+ struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
struct clock_event_device *bc;
unsigned long flags;
@@ -1148,6 +1149,28 @@ void hotplug_cpu__broadcast_tick_pull(int deadcpu)
bc = tick_broadcast_device.evtdev;
if (bc && broadcast_needs_cpu(bc, deadcpu)) {
+ /*
+ * If the broadcast force bit of the current CPU is set,
+ * then the current CPU has not yet reprogrammed the local
+ * timer device to avoid a ping-pong race. See
+ * ___tick_broadcast_oneshot_control().
+ *
+ * If the broadcast device is hrtimer based then
+ * programming the broadcast event below does not have any
+ * effect because the local clockevent device is not
+ * running and not programmed because the broadcast event
+ * is not earlier than the pending event of the local clock
+ * event device. As a consequence all CPUs waiting for a
+ * broadcast event are stuck forever.
+ *
+ * Detect this condition and reprogram the cpu local timer
+ * device to avoid the starvation.
+ */
+ if (tick_check_broadcast_expired()) {
+ cpumask_clear_cpu(smp_processor_id(), tick_broadcast_force_mask);
+ tick_program_event(td->evtdev->next_event, 1);
+ }
+
/* This moves the broadcast assignment to this CPU: */
clockevents_program_event(bc, bc->next_event, 1);
}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index b58dffc58a..4e18db1819 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -237,7 +237,9 @@ static void timekeeping_check_update(struct timekeeper *tk, u64 offset)
}
}
-static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr)
+static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles);
+
+static inline u64 timekeeping_debug_get_ns(const struct tk_read_base *tkr)
{
struct timekeeper *tk = &tk_core.timekeeper;
u64 now, last, mask, max, delta;
@@ -264,34 +266,23 @@ static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr)
* Try to catch underflows by checking if we are seeing small
* mask-relative negative values.
*/
- if (unlikely((~delta & mask) < (mask >> 3))) {
+ if (unlikely((~delta & mask) < (mask >> 3)))
tk->underflow_seen = 1;
- delta = 0;
- }
- /* Cap delta value to the max_cycles values to avoid mult overflows */
- if (unlikely(delta > max)) {
+ /* Check for multiplication overflows */
+ if (unlikely(delta > max))
tk->overflow_seen = 1;
- delta = tkr->clock->max_cycles;
- }
- return delta;
+ /* timekeeping_cycles_to_ns() handles both under and overflow */
+ return timekeeping_cycles_to_ns(tkr, now);
}
#else
static inline void timekeeping_check_update(struct timekeeper *tk, u64 offset)
{
}
-static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr)
+static inline u64 timekeeping_debug_get_ns(const struct tk_read_base *tkr)
{
- u64 cycle_now, delta;
-
- /* read clocksource */
- cycle_now = tk_clock_read(tkr);
-
- /* calculate the delta since the last update_wall_time */
- delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
-
- return delta;
+ BUG();
}
#endif
@@ -370,32 +361,46 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
}
/* Timekeeper helper functions. */
+static noinline u64 delta_to_ns_safe(const struct tk_read_base *tkr, u64 delta)
+{
+ return mul_u64_u32_add_u64_shr(delta, tkr->mult, tkr->xtime_nsec, tkr->shift);
+}
-static inline u64 timekeeping_delta_to_ns(const struct tk_read_base *tkr, u64 delta)
+static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles)
{
- u64 nsec;
+ /* Calculate the delta since the last update_wall_time() */
+ u64 mask = tkr->mask, delta = (cycles - tkr->cycle_last) & mask;
- nsec = delta * tkr->mult + tkr->xtime_nsec;
- nsec >>= tkr->shift;
+ /*
+ * This detects both negative motion and the case where the delta
+ * overflows the multiplication with tkr->mult.
+ */
+ if (unlikely(delta > tkr->clock->max_cycles)) {
+ /*
+ * Handle clocksource inconsistency between CPUs to prevent
+ * time from going backwards by checking for the MSB of the
+ * mask being set in the delta.
+ */
+ if (delta & ~(mask >> 1))
+ return tkr->xtime_nsec >> tkr->shift;
+
+ return delta_to_ns_safe(tkr, delta);
+ }
- return nsec;
+ return ((delta * tkr->mult) + tkr->xtime_nsec) >> tkr->shift;
}
-static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
+static __always_inline u64 __timekeeping_get_ns(const struct tk_read_base *tkr)
{
- u64 delta;
-
- delta = timekeeping_get_delta(tkr);
- return timekeeping_delta_to_ns(tkr, delta);
+ return timekeeping_cycles_to_ns(tkr, tk_clock_read(tkr));
}
-static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles)
+static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
{
- u64 delta;
+ if (IS_ENABLED(CONFIG_DEBUG_TIMEKEEPING))
+ return timekeeping_debug_get_ns(tkr);
- /* calculate the delta since the last update_wall_time */
- delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
- return timekeeping_delta_to_ns(tkr, delta);
+ return __timekeeping_get_ns(tkr);
}
/**
@@ -431,14 +436,6 @@ static void update_fast_timekeeper(const struct tk_read_base *tkr,
memcpy(base + 1, base, sizeof(*base));
}
-static __always_inline u64 fast_tk_get_delta_ns(struct tk_read_base *tkr)
-{
- u64 delta, cycles = tk_clock_read(tkr);
-
- delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
- return timekeeping_delta_to_ns(tkr, delta);
-}
-
static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
{
struct tk_read_base *tkr;
@@ -449,7 +446,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
seq = raw_read_seqcount_latch(&tkf->seq);
tkr = tkf->base + (seq & 0x01);
now = ktime_to_ns(tkr->base);
- now += fast_tk_get_delta_ns(tkr);
+ now += __timekeeping_get_ns(tkr);
} while (raw_read_seqcount_latch_retry(&tkf->seq, seq));
return now;
@@ -565,7 +562,7 @@ static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
tkr = tkf->base + (seq & 0x01);
basem = ktime_to_ns(tkr->base);
baser = ktime_to_ns(tkr->base_real);
- delta = fast_tk_get_delta_ns(tkr);
+ delta = __timekeeping_get_ns(tkr);
} while (raw_read_seqcount_latch_retry(&tkf->seq, seq));
if (mono)
@@ -800,10 +797,15 @@ static void timekeeping_forward_now(struct timekeeper *tk)
tk->tkr_mono.cycle_last = cycle_now;
tk->tkr_raw.cycle_last = cycle_now;
- tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult;
- tk->tkr_raw.xtime_nsec += delta * tk->tkr_raw.mult;
+ while (delta > 0) {
+ u64 max = tk->tkr_mono.clock->max_cycles;
+ u64 incr = delta < max ? delta : max;
- tk_normalize_xtime(tk);
+ tk->tkr_mono.xtime_nsec += incr * tk->tkr_mono.mult;
+ tk->tkr_raw.xtime_nsec += incr * tk->tkr_raw.mult;
+ tk_normalize_xtime(tk);
+ delta -= incr;
+ }
}
/**
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 3baf2fbe68..48288dd4a1 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -312,7 +312,6 @@ static struct ctl_table timer_sysctl[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
- {}
};
static int __init timer_sysctl_init(void)
@@ -2488,7 +2487,7 @@ void update_process_times(int user_tick)
if (in_irq())
irq_work_tick();
#endif
- scheduler_tick();
+ sched_tick();
if (IS_ENABLED(CONFIG_POSIX_TIMERS))
run_posix_cpu_timers();
}
diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c
index 84413114db..d91efe1dc3 100644
--- a/kernel/time/timer_migration.c
+++ b/kernel/time/timer_migration.c
@@ -507,7 +507,14 @@ static void walk_groups(up_f up, void *data, struct tmigr_cpu *tmc)
* (get_next_timer_interrupt())
* @firstexp: Contains the first event expiry information when last
* active CPU of hierarchy is on the way to idle to make
- * sure CPU will be back in time.
+ * sure CPU will be back in time. It is updated in top
+ * level group only. Be aware, there could occur a new top
+ * level of the hierarchy between the 'top level call' in
+ * tmigr_update_events() and the check for the parent group
+ * in walk_groups(). Then @firstexp might contain a value
+ * != KTIME_MAX even if it was not the final top
+ * level. This is not a problem, as the worst outcome is a
+ * CPU which might wake up a little early.
* @evt: Pointer to tmigr_event which needs to be queued (of idle
* child group)
* @childmask: childmask of child group
@@ -649,7 +656,7 @@ static bool tmigr_active_up(struct tmigr_group *group,
} while (!atomic_try_cmpxchg(&group->migr_state, &curstate.state, newstate.state));
- if ((walk_done == false) && group->parent)
+ if (walk_done == false)
data->childmask = group->childmask;
/*
@@ -1317,20 +1324,9 @@ static bool tmigr_inactive_up(struct tmigr_group *group,
/* Event Handling */
tmigr_update_events(group, child, data);
- if (group->parent && (walk_done == false))
+ if (walk_done == false)
data->childmask = group->childmask;
- /*
- * data->firstexp was set by tmigr_update_events() and contains the
- * expiry of the first global event which needs to be handled. It
- * differs from KTIME_MAX if:
- * - group is the top level group and
- * - group is idle (which means CPU was the last active CPU in the
- * hierarchy) and
- * - there is a pending event in the hierarchy
- */
- WARN_ON_ONCE(data->firstexp != KTIME_MAX && group->parent);
-
trace_tmigr_group_set_cpu_inactive(group, newstate, childmask);
return walk_done;
@@ -1552,10 +1548,11 @@ static void tmigr_connect_child_parent(struct tmigr_group *child,
data.childmask = child->childmask;
/*
- * There is only one new level per time. When connecting the
- * child and the parent and set the child active when the parent
- * is inactive, the parent needs to be the uppermost
- * level. Otherwise there went something wrong!
+ * There is only one new level per time (which is protected by
+ * tmigr_mutex). When connecting the child and the parent and
+ * set the child active when the parent is inactive, the parent
+ * needs to be the uppermost level. Otherwise there went
+ * something wrong!
*/
WARN_ON(!tmigr_active_up(parent, child, &data) && parent->parent);
}
diff --git a/kernel/time/timer_migration.h b/kernel/time/timer_migration.h
index 6c37d94a37..494f68cc13 100644
--- a/kernel/time/timer_migration.h
+++ b/kernel/time/timer_migration.h
@@ -22,7 +22,17 @@ struct tmigr_event {
* struct tmigr_group - timer migration hierarchy group
* @lock: Lock protecting the event information and group hierarchy
* information during setup
- * @parent: Pointer to the parent group
+ * @parent: Pointer to the parent group. Pointer is updated when a
+ * new hierarchy level is added because of a CPU coming
+ * online the first time. Once it is set, the pointer will
+ * not be removed or updated. When accessing parent pointer
+ * lock less to decide whether to abort a propagation or
+ * not, it is not a problem. The worst outcome is an
+ * unnecessary/early CPU wake up. But do not access parent
+ * pointer several times in the same 'action' (like
+ * activation, deactivation, check for remote expiry,...)
+ * without holding the lock as it is not ensured that value
+ * will not change.
* @groupevt: Next event of the group which is only used when the
* group is !active. The group event is then queued into
* the parent timer queue.
diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c
index f0d5062d9c..9193d6133e 100644
--- a/kernel/time/vsyscall.c
+++ b/kernel/time/vsyscall.c
@@ -22,10 +22,16 @@ static inline void update_vdso_data(struct vdso_data *vdata,
u64 nsec, sec;
vdata[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last;
+#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT
+ vdata[CS_HRES_COARSE].max_cycles = tk->tkr_mono.clock->max_cycles;
+#endif
vdata[CS_HRES_COARSE].mask = tk->tkr_mono.mask;
vdata[CS_HRES_COARSE].mult = tk->tkr_mono.mult;
vdata[CS_HRES_COARSE].shift = tk->tkr_mono.shift;
vdata[CS_RAW].cycle_last = tk->tkr_raw.cycle_last;
+#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT
+ vdata[CS_RAW].max_cycles = tk->tkr_raw.clock->max_cycles;
+#endif
vdata[CS_RAW].mask = tk->tkr_raw.mask;
vdata[CS_RAW].mult = tk->tkr_raw.mult;
vdata[CS_RAW].shift = tk->tkr_raw.shift;