summaryrefslogtreecommitdiffstats
path: root/debian/patches-rt/net-Avoid-the-IPI-to-free-the.patch
blob: 6005e7a2e6cc8dee8ae2024e5dbf8862c8d8332b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 15 Aug 2022 17:29:50 +0200
Subject: [PATCH] net: Avoid the IPI to free the
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.6/older/patches-6.6.7-rt18.tar.xz

skb_attempt_defer_free() collects a skbs, which was allocated on a
remote CPU, on a per-CPU list. These skbs are either freed on that
remote CPU once the CPU enters NET_RX or an remote IPI function is
invoked in to raise the NET_RX softirq if a threshold of pending skb has
been exceeded.
This remote IPI can cause the wakeup of ksoftirqd on PREEMPT_RT if the
remote CPU idle was idle. This is undesired because once the ksoftirqd
is running it will acquire all pending softirqs and they will not be
executed as part of the threaded interrupt until ksoftird goes idle
again.

To void all this, schedule the deferred clean up from a worker.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 include/linux/netdevice.h |    4 ++++
 net/core/dev.c            |   39 ++++++++++++++++++++++++++++++---------
 net/core/skbuff.c         |    7 ++++++-
 3 files changed, 40 insertions(+), 10 deletions(-)

--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3258,7 +3258,11 @@ struct softnet_data {
 	int			defer_count;
 	int			defer_ipi_scheduled;
 	struct sk_buff		*defer_list;
+#ifndef CONFIG_PREEMPT_RT
 	call_single_data_t	defer_csd;
+#else
+	struct work_struct	defer_work;
+#endif
 };
 
 static inline void input_queue_head_incr(struct softnet_data *sd)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4702,15 +4702,6 @@ static void rps_trigger_softirq(void *da
 
 #endif /* CONFIG_RPS */
 
-/* Called from hardirq (IPI) context */
-static void trigger_rx_softirq(void *data)
-{
-	struct softnet_data *sd = data;
-
-	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
-	smp_store_release(&sd->defer_ipi_scheduled, 0);
-}
-
 /*
  * After we queued a packet into sd->input_pkt_queue,
  * we need to make sure this queue is serviced soon.
@@ -6679,6 +6670,32 @@ static void skb_defer_free_flush(struct
 	}
 }
 
+#ifndef CONFIG_PREEMPT_RT
+
+/* Called from hardirq (IPI) context */
+static void trigger_rx_softirq(void *data)
+{
+	struct softnet_data *sd = data;
+
+	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+	smp_store_release(&sd->defer_ipi_scheduled, 0);
+}
+
+#else
+
+static void trigger_rx_softirq(struct work_struct *defer_work)
+{
+	struct softnet_data *sd;
+
+	sd = container_of(defer_work, struct softnet_data, defer_work);
+	smp_store_release(&sd->defer_ipi_scheduled, 0);
+	local_bh_disable();
+	skb_defer_free_flush(sd);
+	local_bh_enable();
+}
+
+#endif
+
 static int napi_threaded_poll(void *data)
 {
 	struct napi_struct *napi = data;
@@ -11603,7 +11620,11 @@ static int __init net_dev_init(void)
 		INIT_CSD(&sd->csd, rps_trigger_softirq, sd);
 		sd->cpu = i;
 #endif
+#ifndef CONFIG_PREEMPT_RT
 		INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
+#else
+		INIT_WORK(&sd->defer_work, trigger_rx_softirq);
+#endif
 		spin_lock_init(&sd->defer_lock);
 
 		init_gro_hash(&sd->backlog);
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -6841,8 +6841,13 @@ nodefer:	__kfree_skb(skb);
 	/* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU
 	 * if we are unlucky enough (this seems very unlikely).
 	 */
-	if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1))
+	if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1)) {
+#ifndef CONFIG_PREEMPT_RT
 		smp_call_function_single_async(cpu, &sd->defer_csd);
+#else
+		schedule_work_on(cpu, &sd->defer_work);
+#endif
+	}
 }
 
 static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,