1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 31 Aug 2016 17:21:56 +0200
Subject: [PATCH 202/351] net: add back the missing serialization in
ip_send_unicast_reply()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Origin: https://git.kernel.org/cgit/linux/kernel/git/rt/linux-stable-rt.git/commit?id=507b155f20020ad477cfe379fe332536dd19a7d2
Some time ago Sami Pietikäinen reported a crash on -RT in
ip_send_unicast_reply() which was later fixed by Nicholas Mc Guire
(v3.12.8-rt11). Later (v3.18.8) the code was reworked and I dropped the
patch. As it turns out it was mistake.
I have reports that the same crash is possible with a similar backtrace.
It seems that vanilla protects access to this_cpu_ptr() via
local_bh_disable(). This does not work the on -RT since we can have
NET_RX and NET_TX running in parallel on the same CPU.
This is brings back the old locks.
|Unable to handle kernel NULL pointer dereference at virtual address 00000010
|PC is at __ip_make_skb+0x198/0x3e8
|[<c04e39d8>] (__ip_make_skb) from [<c04e3ca8>] (ip_push_pending_frames+0x20/0x40)
|[<c04e3ca8>] (ip_push_pending_frames) from [<c04e3ff0>] (ip_send_unicast_reply+0x210/0x22c)
|[<c04e3ff0>] (ip_send_unicast_reply) from [<c04fbb54>] (tcp_v4_send_reset+0x190/0x1c0)
|[<c04fbb54>] (tcp_v4_send_reset) from [<c04fcc1c>] (tcp_v4_do_rcv+0x22c/0x288)
|[<c04fcc1c>] (tcp_v4_do_rcv) from [<c0474364>] (release_sock+0xb4/0x150)
|[<c0474364>] (release_sock) from [<c04ed904>] (tcp_close+0x240/0x454)
|[<c04ed904>] (tcp_close) from [<c0511408>] (inet_release+0x74/0x7c)
|[<c0511408>] (inet_release) from [<c0470728>] (sock_release+0x30/0xb0)
|[<c0470728>] (sock_release) from [<c0470abc>] (sock_close+0x1c/0x24)
|[<c0470abc>] (sock_close) from [<c0115ec4>] (__fput+0xe8/0x20c)
|[<c0115ec4>] (__fput) from [<c0116050>] (____fput+0x18/0x1c)
|[<c0116050>] (____fput) from [<c0058138>] (task_work_run+0xa4/0xb8)
|[<c0058138>] (task_work_run) from [<c0011478>] (do_work_pending+0xd0/0xe4)
|[<c0011478>] (do_work_pending) from [<c000e740>] (work_pending+0xc/0x20)
|Code: e3530001 8a000001 e3a00040 ea000011 (e5973010)
Cc: stable-rt@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
net/ipv4/tcp_ipv4.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 6549e07ce19c..84adb4755c5e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -62,6 +62,7 @@
#include <linux/init.h>
#include <linux/times.h>
#include <linux/slab.h>
+#include <linux/locallock.h>
#include <net/net_namespace.h>
#include <net/icmp.h>
@@ -637,6 +638,7 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(tcp_v4_send_check);
+static DEFINE_LOCAL_IRQ_LOCK(tcp_sk_lock);
/*
* This routine will send an RST to the other tcp.
*
@@ -771,6 +773,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
arg.tos = ip_hdr(skb)->tos;
arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
local_bh_disable();
+ local_lock(tcp_sk_lock);
ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk);
if (sk)
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
@@ -783,6 +786,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
ctl_sk->sk_mark = 0;
__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
__TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
+ local_unlock(tcp_sk_lock);
local_bh_enable();
#ifdef CONFIG_TCP_MD5SIG
@@ -863,6 +867,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
arg.tos = tos;
arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
local_bh_disable();
+ local_lock(tcp_sk_lock);
ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk);
if (sk)
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
@@ -874,6 +879,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
ctl_sk->sk_mark = 0;
__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
+ local_unlock(tcp_sk_lock);
local_bh_enable();
}
|