1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
|
From: Clark Williams <williams@redhat.com>
Date: Fri, 17 Dec 2021 14:31:31 -0600
Subject: [PATCH 198/353] net: move xmit_recursion to per-task variable on -RT
Origin: https://git.kernel.org/cgit/linux/kernel/git/rt/linux-stable-rt.git/commit?id=9fcf3310d12df456718d378ff2826b302f35c28b
A softirq on -RT can be preempted. That means one task is in
__dev_queue_xmit(), gets preempted and another task may enter
__dev_queue_xmit() aw well. netperf together with a bridge device
will then trigger the `recursion alert` because each task increments
the xmit_recursion variable which is per-CPU.
A virtual device like br0 is required to trigger this warning.
This patch moves the lock owner and counter to be per task instead per-CPU so
it counts the recursion properly on -RT. The owner is also a task now and not a
CPU number.
Cc: stable-rt@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Clark Williams <williams@redhat.com>
---
include/linux/netdevice.h | 71 +++++++++++++++++++++++++++++++++++++--
include/linux/sched.h | 3 ++
net/core/dev.c | 6 +++-
3 files changed, 77 insertions(+), 3 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d3e4b3f195ff..63a0574e2ac2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -590,7 +590,11 @@ struct netdev_queue {
* write-mostly part
*/
spinlock_t _xmit_lock ____cacheline_aligned_in_smp;
+#ifdef CONFIG_PREEMPT_RT_FULL
+ struct task_struct *xmit_lock_owner;
+#else
int xmit_lock_owner;
+#endif
/*
* Time (in jiffies) of last Tx
*/
@@ -3011,14 +3015,38 @@ static inline void input_queue_tail_incr_save(struct softnet_data *sd,
#endif
}
+#define XMIT_RECURSION_LIMIT 8
DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
+#ifdef CONFIG_PREEMPT_RT_FULL
+static inline int dev_recursion_level(void)
+{
+ return current->xmit_recursion;
+}
+
+static inline bool dev_xmit_recursion(void)
+{
+ return unlikely(current->xmit_recursion >
+ XMIT_RECURSION_LIMIT);
+}
+
+static inline void dev_xmit_recursion_inc(void)
+{
+ current->xmit_recursion++;
+}
+
+static inline void dev_xmit_recursion_dec(void)
+{
+ current->xmit_recursion--;
+}
+
+#else
+
static inline int dev_recursion_level(void)
{
return this_cpu_read(softnet_data.xmit.recursion);
}
-#define XMIT_RECURSION_LIMIT 8
static inline bool dev_xmit_recursion(void)
{
return unlikely(__this_cpu_read(softnet_data.xmit.recursion) >
@@ -3034,6 +3062,7 @@ static inline void dev_xmit_recursion_dec(void)
{
__this_cpu_dec(softnet_data.xmit.recursion);
}
+#endif
void __netif_schedule(struct Qdisc *q);
void netif_schedule_queue(struct netdev_queue *txq);
@@ -3843,6 +3872,44 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits)
return (1U << debug_value) - 1;
}
+#ifdef CONFIG_PREEMPT_RT_FULL
+static inline void netdev_queue_set_owner(struct netdev_queue *txq, int cpu)
+{
+ txq->xmit_lock_owner = current;
+}
+
+static inline void netdev_queue_clear_owner(struct netdev_queue *txq)
+{
+ txq->xmit_lock_owner = NULL;
+}
+
+static inline bool netdev_queue_has_owner(struct netdev_queue *txq)
+{
+ if (txq->xmit_lock_owner != NULL)
+ return true;
+ return false;
+}
+
+#else
+
+static inline void netdev_queue_set_owner(struct netdev_queue *txq, int cpu)
+{
+ txq->xmit_lock_owner = cpu;
+}
+
+static inline void netdev_queue_clear_owner(struct netdev_queue *txq)
+{
+ txq->xmit_lock_owner = -1;
+}
+
+static inline bool netdev_queue_has_owner(struct netdev_queue *txq)
+{
+ if (txq->xmit_lock_owner != -1)
+ return true;
+ return false;
+}
+#endif
+
static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
{
spin_lock(&txq->_xmit_lock);
@@ -3895,7 +3962,7 @@ static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
static inline void txq_trans_update(struct netdev_queue *txq)
{
- if (txq->xmit_lock_owner != -1)
+ if (netdev_queue_has_owner(txq))
txq->trans_start = jiffies;
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3bc6b3206e14..7aa299de5ebf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1222,6 +1222,9 @@ struct task_struct {
#endif
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
unsigned long task_state_change;
+#endif
+#ifdef CONFIG_PREEMPT_RT_FULL
+ int xmit_recursion;
#endif
int pagefault_disabled;
#ifdef CONFIG_MMU
diff --git a/net/core/dev.c b/net/core/dev.c
index 7bce4581d6f0..32e8b237496c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3837,10 +3837,14 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
if (dev->flags & IFF_UP) {
int cpu = smp_processor_id(); /* ok because BHs are off */
+#ifdef CONFIG_PREEMPT_RT_FULL
+ if (READ_ONCE(txq->xmit_lock_owner) != current) {
+#else
/* Other cpus might concurrently change txq->xmit_lock_owner
* to -1 or to their cpu id, but not to our id.
*/
if (READ_ONCE(txq->xmit_lock_owner) != cpu) {
+#endif
if (dev_xmit_recursion())
goto recursion_alert;
@@ -8598,7 +8602,7 @@ static void netdev_init_one_queue(struct net_device *dev,
/* Initialize queue lock */
spin_lock_init(&queue->_xmit_lock);
netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
- queue->xmit_lock_owner = -1;
+ netdev_queue_clear_owner(queue);
netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
queue->dev = dev;
#ifdef CONFIG_BQL
|