summaryrefslogtreecommitdiffstats
path: root/debian/patches-rt/0014-tick-Fix-timer-storm-since-introduction-of-timersd.patch
blob: d6f4c894e38108e463ac041a3db55aea27ae8e22 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
From 4198b74f0e8708aae4d35f92853c006348b773b5 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Tue, 5 Apr 2022 03:07:52 +0200
Subject: [PATCH 14/64] tick: Fix timer storm since introduction of timersd
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.82-rt27.tar.xz

If timers are pending while the tick is reprogrammed on nohz_mode, the
next expiry is not armed to fire now, it is delayed one jiffy forward
instead so as not to raise an inextinguishable timer storm with such
scenario:

1) IRQ triggers and queue a timer
2) ksoftirqd() is woken up
3) IRQ tail: timer is reprogrammed to fire now
4) IRQ exit
5) TIMER interrupt
6) goto 3)

...all that until we finally reach ksoftirqd.

Unfortunately we are checking the wrong softirq vector bitmask since
timersd kthread has split from ksoftirqd. Timers now have their own
vector state field that must be checked separately. As a result, the
old timer storm is back. This shows up early on boot with extremely long
initcalls:

	[  333.004807] initcall dquot_init+0x0/0x111 returned 0 after 323822879 usecs

and the cause is uncovered with the right trace events showing just
10 microseconds between ticks (~100 000 Hz):

|swapper/-1 1dn.h111 60818582us : hrtimer_expire_entry: hrtimer=00000000e0ef0f6b function=tick_sched_timer now=60415486608
|swapper/-1 1dn.h111 60818592us : hrtimer_expire_entry: hrtimer=00000000e0ef0f6b function=tick_sched_timer now=60415496082
|swapper/-1 1dn.h111 60818601us : hrtimer_expire_entry: hrtimer=00000000e0ef0f6b function=tick_sched_timer now=60415505550

Fix this by checking the right timer vector state from the nohz code.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://lkml.kernel.org/r/20220405010752.1347437-2-frederic@kernel.org
---
 include/linux/interrupt.h | 12 ++++++++++++
 kernel/softirq.c          |  7 +------
 kernel/time/tick-sched.c  |  2 +-
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index f459b0f27c94..a5091ac97fc6 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -611,9 +611,16 @@ extern void raise_softirq(unsigned int nr);
 
 #ifdef CONFIG_PREEMPT_RT
 DECLARE_PER_CPU(struct task_struct *, timersd);
+DECLARE_PER_CPU(unsigned long, pending_timer_softirq);
+
 extern void raise_timer_softirq(void);
 extern void raise_hrtimer_softirq(void);
 
+static inline unsigned int local_pending_timers(void)
+{
+        return __this_cpu_read(pending_timer_softirq);
+}
+
 #else
 static inline void raise_timer_softirq(void)
 {
@@ -624,6 +631,11 @@ static inline void raise_hrtimer_softirq(void)
 {
 	raise_softirq_irqoff(HRTIMER_SOFTIRQ);
 }
+
+static inline unsigned int local_pending_timers(void)
+{
+        return local_softirq_pending();
+}
 #endif
 
 DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 1892af494cdd..ab1fe34326ba 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -639,12 +639,7 @@ static inline void tick_irq_exit(void)
 
 #ifdef CONFIG_PREEMPT_RT
 DEFINE_PER_CPU(struct task_struct *, timersd);
-static DEFINE_PER_CPU(unsigned long, pending_timer_softirq);
-
-static unsigned int local_pending_timers(void)
-{
-        return __this_cpu_read(pending_timer_softirq);
-}
+DEFINE_PER_CPU(unsigned long, pending_timer_softirq);
 
 static void wake_timersd(void)
 {
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 8cfdc6b978d7..48fd460a5d0e 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -800,7 +800,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
 
 static inline bool local_timer_softirq_pending(void)
 {
-	return local_softirq_pending() & BIT(TIMER_SOFTIRQ);
+	return local_pending_timers() & BIT(TIMER_SOFTIRQ);
 }
 
 static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
-- 
2.44.0