summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-08 17:45:29 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-08 17:45:29 +0000
commit83506c85f8d4332b3edfdc8f1fd07aa691415350 (patch)
tree316b9630e093bb3b80e5d6e1c304151b5597901e /net/ipv4
parentAdding upstream version 5.10.209. (diff)
downloadlinux-f56a81268c77b9694116d72b0068d6a36437c782.tar.xz
linux-f56a81268c77b9694116d72b0068d6a36437c782.zip
Adding upstream version 5.10.216.upstream/5.10.216
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--net/ipv4/af_inet.c9
-rw-r--r--net/ipv4/arp.c3
-rw-r--r--net/ipv4/devinet.c21
-rw-r--r--net/ipv4/igmp.c28
-rw-r--r--net/ipv4/inet_connection_sock.c18
-rw-r--r--net/ipv4/inet_diag.c6
-rw-r--r--net/ipv4/inet_timewait_sock.c32
-rw-r--r--net/ipv4/ip_gre.c5
-rw-r--r--net/ipv4/ip_output.c12
-rw-r--r--net/ipv4/ip_sockglue.c100
-rw-r--r--net/ipv4/ip_tunnel.c43
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/ipmr.c13
-rw-r--r--net/ipv4/netfilter/arp_tables.c8
-rw-r--r--net/ipv4/netfilter/ip_tables.c8
-rw-r--r--net/ipv4/route.c7
-rw-r--r--net/ipv4/tcp.c51
-rw-r--r--net/ipv4/udp.c16
-rw-r--r--net/ipv4/udp_offload.c13
19 files changed, 273 insertions, 122 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index acb488735..5f1b334e6 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -327,6 +327,9 @@ lookup_protocol:
if (INET_PROTOSW_REUSE & answer_flags)
sk->sk_reuse = SK_CAN_REUSE;
+ if (INET_PROTOSW_ICSK & answer_flags)
+ inet_init_csk_locks(sk);
+
inet = inet_sk(sk);
inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
@@ -1597,10 +1600,12 @@ EXPORT_SYMBOL(inet_current_timestamp);
int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
{
- if (sk->sk_family == AF_INET)
+ unsigned int family = READ_ONCE(sk->sk_family);
+
+ if (family == AF_INET)
return ip_recv_error(sk, msg, len, addr_len);
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
+ if (family == AF_INET6)
return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len);
#endif
return -EINVAL;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 83a47998c..8ae9bd6f9 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1104,7 +1104,8 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
if (neigh) {
if (!(neigh->nud_state & NUD_NOARP)) {
read_lock_bh(&neigh->lock);
- memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len);
+ memcpy(r->arp_ha.sa_data, neigh->ha,
+ min(dev->addr_len, (unsigned char)sizeof(r->arp_ha.sa_data_min)));
r->arp_flags = arp_state_to_flags(neigh);
read_unlock_bh(&neigh->lock);
r->arp_ha.sa_family = dev->type;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index da1ca8081..9ac7d47d2 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1798,6 +1798,21 @@ done:
return err;
}
+/* Combine dev_addr_genid and dev_base_seq to detect changes.
+ */
+static u32 inet_base_seq(const struct net *net)
+{
+ u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
+ net->dev_base_seq;
+
+ /* Must not return 0 (see nl_dump_check_consistent()).
+ * Chose a value far away from 0.
+ */
+ if (!res)
+ res = 0x80000000;
+ return res;
+}
+
static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = cb->nlh;
@@ -1849,8 +1864,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
idx = 0;
head = &tgt_net->dev_index_head[h];
rcu_read_lock();
- cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
- tgt_net->dev_base_seq;
+ cb->seq = inet_base_seq(tgt_net);
hlist_for_each_entry_rcu(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
@@ -2249,8 +2263,7 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb,
idx = 0;
head = &net->dev_index_head[h];
rcu_read_lock();
- cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
- net->dev_base_seq;
+ cb->seq = inet_base_seq(net);
hlist_for_each_entry_rcu(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index cb55fede0..f0a313747 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2493,8 +2493,8 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
goto done;
}
newpsl->sl_max = newpsl->sl_count = msf->imsf_numsrc;
- memcpy(newpsl->sl_addr, msf->imsf_slist,
- msf->imsf_numsrc * sizeof(msf->imsf_slist[0]));
+ memcpy(newpsl->sl_addr, msf->imsf_slist_flex,
+ flex_array_size(msf, imsf_slist_flex, msf->imsf_numsrc));
err = ip_mc_add_src(in_dev, &msf->imsf_multiaddr,
msf->imsf_fmode, newpsl->sl_count, newpsl->sl_addr, 0);
if (err) {
@@ -2526,11 +2526,10 @@ done:
err = ip_mc_leave_group(sk, &imr);
return err;
}
-
int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
- struct ip_msfilter __user *optval, int __user *optlen)
+ sockptr_t optval, sockptr_t optlen)
{
- int err, len, count, copycount;
+ int err, len, count, copycount, msf_size;
struct ip_mreqn imr;
__be32 addr = msf->imsf_multiaddr;
struct ip_mc_socklist *pmc;
@@ -2571,14 +2570,17 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
count = psl->sl_count;
}
copycount = count < msf->imsf_numsrc ? count : msf->imsf_numsrc;
- len = copycount * sizeof(psl->sl_addr[0]);
+ len = flex_array_size(psl, sl_addr, copycount);
msf->imsf_numsrc = count;
- if (put_user(IP_MSFILTER_SIZE(copycount), optlen) ||
- copy_to_user(optval, msf, IP_MSFILTER_SIZE(0))) {
+ msf_size = IP_MSFILTER_SIZE(copycount);
+ if (copy_to_sockptr(optlen, &msf_size, sizeof(int)) ||
+ copy_to_sockptr(optval, msf, IP_MSFILTER_SIZE(0))) {
return -EFAULT;
}
if (len &&
- copy_to_user(&optval->imsf_slist[0], psl->sl_addr, len))
+ copy_to_sockptr_offset(optval,
+ offsetof(struct ip_msfilter, imsf_slist_flex),
+ psl->sl_addr, len))
return -EFAULT;
return 0;
done:
@@ -2586,7 +2588,7 @@ done:
}
int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
- struct sockaddr_storage __user *p)
+ sockptr_t optval, size_t ss_offset)
{
int i, count, copycount;
struct sockaddr_in *psin;
@@ -2616,15 +2618,17 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
count = psl ? psl->sl_count : 0;
copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
gsf->gf_numsrc = count;
- for (i = 0; i < copycount; i++, p++) {
+ for (i = 0; i < copycount; i++) {
struct sockaddr_storage ss;
psin = (struct sockaddr_in *)&ss;
memset(&ss, 0, sizeof(ss));
psin->sin_family = AF_INET;
psin->sin_addr.s_addr = psl->sl_addr[i];
- if (copy_to_user(p, &ss, sizeof(ss)))
+ if (copy_to_sockptr_offset(optval, ss_offset,
+ &ss, sizeof(ss)))
return -EFAULT;
+ ss_offset += sizeof(ss);
}
return 0;
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 5f71a1c74..6ebe43b4d 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -536,6 +536,10 @@ out:
}
if (req)
reqsk_put(req);
+
+ if (newsk)
+ inet_init_csk_locks(newsk);
+
return newsk;
out_err:
newsk = NULL;
@@ -576,6 +580,20 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
}
EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
+void inet_csk_clear_xmit_timers_sync(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ /* ongoing timer handlers need to acquire socket lock. */
+ sock_not_owned_by_me(sk);
+
+ icsk->icsk_pending = icsk->icsk_ack.pending = 0;
+
+ sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer);
+ sk_stop_timer_sync(sk, &icsk->icsk_delack_timer);
+ sk_stop_timer_sync(sk, &sk->sk_timer);
+}
+
void inet_csk_delete_keepalive_timer(struct sock *sk)
{
sk_stop_timer(sk, &sk->sk_timer);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index fa9f1de58..27a5a7d66 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -57,7 +57,7 @@ static const struct inet_diag_handler *inet_diag_lock_handler(int proto)
return ERR_PTR(-ENOENT);
}
- if (!inet_diag_table[proto])
+ if (!READ_ONCE(inet_diag_table[proto]))
sock_load_diag_module(AF_INET, proto);
mutex_lock(&inet_diag_table_mutex);
@@ -1413,7 +1413,7 @@ int inet_diag_register(const struct inet_diag_handler *h)
mutex_lock(&inet_diag_table_mutex);
err = -EEXIST;
if (!inet_diag_table[type]) {
- inet_diag_table[type] = h;
+ WRITE_ONCE(inet_diag_table[type], h);
err = 0;
}
mutex_unlock(&inet_diag_table_mutex);
@@ -1430,7 +1430,7 @@ void inet_diag_unregister(const struct inet_diag_handler *h)
return;
mutex_lock(&inet_diag_table_mutex);
- inet_diag_table[type] = NULL;
+ WRITE_ONCE(inet_diag_table[type], NULL);
mutex_unlock(&inet_diag_table_mutex);
}
EXPORT_SYMBOL_GPL(inet_diag_unregister);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index c411c87ae..85cb44bfa 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -254,12 +254,12 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
}
EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
+/* Remove all non full sockets (TIME_WAIT and NEW_SYN_RECV) for dead netns */
void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family)
{
- struct inet_timewait_sock *tw;
- struct sock *sk;
struct hlist_nulls_node *node;
unsigned int slot;
+ struct sock *sk;
for (slot = 0; slot <= hashinfo->ehash_mask; slot++) {
struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
@@ -268,25 +268,35 @@ restart_rcu:
rcu_read_lock();
restart:
sk_nulls_for_each_rcu(sk, node, &head->chain) {
- if (sk->sk_state != TCP_TIME_WAIT)
+ int state = inet_sk_state_load(sk);
+
+ if ((1 << state) & ~(TCPF_TIME_WAIT |
+ TCPF_NEW_SYN_RECV))
continue;
- tw = inet_twsk(sk);
- if ((tw->tw_family != family) ||
- refcount_read(&twsk_net(tw)->count))
+
+ if (sk->sk_family != family ||
+ refcount_read(&sock_net(sk)->count))
continue;
- if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt)))
+ if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
continue;
- if (unlikely((tw->tw_family != family) ||
- refcount_read(&twsk_net(tw)->count))) {
- inet_twsk_put(tw);
+ if (unlikely(sk->sk_family != family ||
+ refcount_read(&sock_net(sk)->count))) {
+ sock_gen_put(sk);
goto restart;
}
rcu_read_unlock();
local_bh_disable();
- inet_twsk_deschedule_put(tw);
+ if (state == TCP_TIME_WAIT) {
+ inet_twsk_deschedule_put(inet_twsk(sk));
+ } else {
+ struct request_sock *req = inet_reqsk(sk);
+
+ inet_csk_reqsk_queue_drop_and_put(req->rsk_listener,
+ req);
+ }
local_bh_enable();
goto restart_rcu;
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index a6ad0fe13..0ac652fef 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -278,8 +278,13 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
tpi->flags | TUNNEL_NO_KEY,
iph->saddr, iph->daddr, 0);
} else {
+ if (unlikely(!pskb_may_pull(skb,
+ gre_hdr_len + sizeof(*ershdr))))
+ return PACKET_REJECT;
+
ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
ver = ershdr->ver;
+ iph = ip_hdr(skb);
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
tpi->flags | TUNNEL_KEY,
iph->saddr, iph->daddr, tpi->key);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index a99c37410..12ee857d6 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1260,6 +1260,12 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
if (unlikely(!rt))
return -EFAULT;
+ cork->fragsize = ip_sk_use_pmtu(sk) ?
+ dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu);
+
+ if (!inetdev_valid_mtu(cork->fragsize))
+ return -ENETUNREACH;
+
/*
* setup for corking.
*/
@@ -1276,12 +1282,6 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
cork->addr = ipc->addr;
}
- cork->fragsize = ip_sk_use_pmtu(sk) ?
- dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu);
-
- if (!inetdev_valid_mtu(cork->fragsize))
- return -ENETUNREACH;
-
cork->gso_size = ipc->gso_size;
cork->dst = &rt->dst;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 1b35afd32..b300d0988 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -670,12 +670,11 @@ static int set_mcast_msfilter(struct sock *sk, int ifindex,
struct sockaddr_storage *group,
struct sockaddr_storage *list)
{
- int msize = IP_MSFILTER_SIZE(numsrc);
struct ip_msfilter *msf;
struct sockaddr_in *psin;
int err, i;
- msf = kmalloc(msize, GFP_KERNEL);
+ msf = kmalloc(IP_MSFILTER_SIZE(numsrc), GFP_KERNEL);
if (!msf)
return -ENOBUFS;
@@ -691,7 +690,7 @@ static int set_mcast_msfilter(struct sock *sk, int ifindex,
if (psin->sin_family != AF_INET)
goto Eaddrnotavail;
- msf->imsf_slist[i] = psin->sin_addr.s_addr;
+ msf->imsf_slist_flex[i] = psin->sin_addr.s_addr;
}
err = ip_mc_msfilter(sk, msf, ifindex);
kfree(msf);
@@ -798,7 +797,8 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
goto out_free_gsf;
err = set_mcast_msfilter(sk, gsf->gf_interface, gsf->gf_numsrc,
- gsf->gf_fmode, &gsf->gf_group, gsf->gf_slist);
+ gsf->gf_fmode, &gsf->gf_group,
+ gsf->gf_slist_flex);
out_free_gsf:
kfree(gsf);
return err;
@@ -807,7 +807,7 @@ out_free_gsf:
static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
int optlen)
{
- const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
struct compat_group_filter *gf32;
unsigned int n;
void *p;
@@ -821,7 +821,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
p = kmalloc(optlen + 4, GFP_KERNEL);
if (!p)
return -ENOMEM;
- gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
+ gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */
err = -EFAULT;
if (copy_from_sockptr(gf32, optval, optlen))
@@ -834,7 +834,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
goto out_free_gsf;
err = -EINVAL;
- if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen)
+ if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen)
goto out_free_gsf;
/* numsrc >= (4G-140)/128 overflow in 32 bits */
@@ -842,7 +842,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (n > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf))
goto out_free_gsf;
err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode,
- &gf32->gf_group, gf32->gf_slist);
+ &gf32->gf_group, gf32->gf_slist_flex);
out_free_gsf:
kfree(p);
return err;
@@ -1460,37 +1460,37 @@ static bool getsockopt_needs_rtnl(int optname)
return false;
}
-static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
- int __user *optlen, int len)
+static int ip_get_mcast_msfilter(struct sock *sk, sockptr_t optval,
+ sockptr_t optlen, int len)
{
- const int size0 = offsetof(struct group_filter, gf_slist);
- struct group_filter __user *p = optval;
+ const int size0 = offsetof(struct group_filter, gf_slist_flex);
struct group_filter gsf;
- int num;
+ int num, gsf_size;
int err;
if (len < size0)
return -EINVAL;
- if (copy_from_user(&gsf, p, size0))
+ if (copy_from_sockptr(&gsf, optval, size0))
return -EFAULT;
num = gsf.gf_numsrc;
- err = ip_mc_gsfget(sk, &gsf, p->gf_slist);
+ err = ip_mc_gsfget(sk, &gsf, optval,
+ offsetof(struct group_filter, gf_slist_flex));
if (err)
return err;
if (gsf.gf_numsrc < num)
num = gsf.gf_numsrc;
- if (put_user(GROUP_FILTER_SIZE(num), optlen) ||
- copy_to_user(p, &gsf, size0))
+ gsf_size = GROUP_FILTER_SIZE(num);
+ if (copy_to_sockptr(optlen, &gsf_size, sizeof(int)) ||
+ copy_to_sockptr(optval, &gsf, size0))
return -EFAULT;
return 0;
}
-static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
- int __user *optlen, int len)
+static int compat_ip_get_mcast_msfilter(struct sock *sk, sockptr_t optval,
+ sockptr_t optlen, int len)
{
- const int size0 = offsetof(struct compat_group_filter, gf_slist);
- struct compat_group_filter __user *p = optval;
+ const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
struct compat_group_filter gf32;
struct group_filter gf;
int num;
@@ -1498,7 +1498,7 @@ static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
if (len < size0)
return -EINVAL;
- if (copy_from_user(&gf32, p, size0))
+ if (copy_from_sockptr(&gf32, optval, size0))
return -EFAULT;
gf.gf_interface = gf32.gf_interface;
@@ -1506,21 +1506,24 @@ static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
num = gf.gf_numsrc = gf32.gf_numsrc;
gf.gf_group = gf32.gf_group;
- err = ip_mc_gsfget(sk, &gf, p->gf_slist);
+ err = ip_mc_gsfget(sk, &gf, optval,
+ offsetof(struct compat_group_filter, gf_slist_flex));
if (err)
return err;
if (gf.gf_numsrc < num)
num = gf.gf_numsrc;
len = GROUP_FILTER_SIZE(num) - (sizeof(gf) - sizeof(gf32));
- if (put_user(len, optlen) ||
- put_user(gf.gf_fmode, &p->gf_fmode) ||
- put_user(gf.gf_numsrc, &p->gf_numsrc))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)) ||
+ copy_to_sockptr_offset(optval, offsetof(struct compat_group_filter, gf_fmode),
+ &gf.gf_fmode, sizeof(gf.gf_fmode)) ||
+ copy_to_sockptr_offset(optval, offsetof(struct compat_group_filter, gf_numsrc),
+ &gf.gf_numsrc, sizeof(gf.gf_numsrc)))
return -EFAULT;
return 0;
}
static int do_ip_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
+ sockptr_t optval, sockptr_t optlen)
{
struct inet_sock *inet = inet_sk(sk);
bool needs_rtnl = getsockopt_needs_rtnl(optname);
@@ -1533,7 +1536,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
if (ip_mroute_opt(optname))
return ip_mroute_getsockopt(sk, optname, optval, optlen);
- if (get_user(len, optlen))
+ if (copy_from_sockptr(&len, optlen, sizeof(int)))
return -EFAULT;
if (len < 0)
return -EINVAL;
@@ -1558,15 +1561,17 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
inet_opt->opt.optlen);
release_sock(sk);
- if (opt->optlen == 0)
- return put_user(0, optlen);
+ if (opt->optlen == 0) {
+ len = 0;
+ return copy_to_sockptr(optlen, &len, sizeof(int));
+ }
ip_options_undo(opt);
len = min_t(unsigned int, len, opt->optlen);
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
- if (copy_to_user(optval, opt->__data, len))
+ if (copy_to_sockptr(optval, opt->__data, len))
return -EFAULT;
return 0;
}
@@ -1657,9 +1662,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
addr.s_addr = inet->mc_addr;
release_sock(sk);
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
- if (copy_to_user(optval, &addr, len))
+ if (copy_to_sockptr(optval, &addr, len))
return -EFAULT;
return 0;
}
@@ -1671,12 +1676,11 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
err = -EINVAL;
goto out;
}
- if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
+ if (copy_from_sockptr(&msf, optval, IP_MSFILTER_SIZE(0))) {
err = -EFAULT;
goto out;
}
- err = ip_mc_msfget(sk, &msf,
- (struct ip_msfilter __user *)optval, optlen);
+ err = ip_mc_msfget(sk, &msf, optval, optlen);
goto out;
}
case MCAST_MSFILTER:
@@ -1698,8 +1702,13 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
if (sk->sk_type != SOCK_STREAM)
return -ENOPROTOOPT;
- msg.msg_control_is_user = true;
- msg.msg_control_user = optval;
+ if (optval.is_kernel) {
+ msg.msg_control_is_user = false;
+ msg.msg_control = optval.kernel;
+ } else {
+ msg.msg_control_is_user = true;
+ msg.msg_control_user = optval.user;
+ }
msg.msg_controllen = len;
msg.msg_flags = in_compat_syscall() ? MSG_CMSG_COMPAT : 0;
@@ -1720,7 +1729,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos);
}
len -= msg.msg_controllen;
- return put_user(len, optlen);
+ return copy_to_sockptr(optlen, &len, sizeof(int));
}
case IP_FREEBIND:
val = inet->freebind;
@@ -1743,15 +1752,15 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) {
unsigned char ucval = (unsigned char)val;
len = 1;
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
- if (copy_to_user(optval, &ucval, 1))
+ if (copy_to_sockptr(optval, &ucval, 1))
return -EFAULT;
} else {
len = min_t(unsigned int, sizeof(int), len);
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
- if (copy_to_user(optval, &val, len))
+ if (copy_to_sockptr(optval, &val, len))
return -EFAULT;
}
return 0;
@@ -1768,7 +1777,8 @@ int ip_getsockopt(struct sock *sk, int level,
{
int err;
- err = do_ip_getsockopt(sk, level, optname, optval, optlen);
+ err = do_ip_getsockopt(sk, level, optname,
+ USER_SOCKPTR(optval), USER_SOCKPTR(optlen));
#if IS_ENABLED(CONFIG_BPFILTER_UMH)
if (optname >= BPFILTER_IPT_SO_GET_INFO &&
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 99f70b990..0953d805c 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -364,7 +364,7 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
bool log_ecn_error)
{
const struct iphdr *iph = ip_hdr(skb);
- int err;
+ int nh, err;
#ifdef CONFIG_NET_IPGRE_BROADCAST
if (ipv4_is_multicast(iph->daddr)) {
@@ -390,8 +390,21 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
tunnel->i_seqno = ntohl(tpi->seq) + 1;
}
+ /* Save offset of outer header relative to skb->head,
+ * because we are going to reset the network header to the inner header
+ * and might change skb->head.
+ */
+ nh = skb_network_header(skb) - skb->head;
+
skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
+ if (!pskb_inet_may_pull(skb)) {
+ DEV_STATS_INC(tunnel->dev, rx_length_errors);
+ DEV_STATS_INC(tunnel->dev, rx_errors);
+ goto drop;
+ }
+ iph = (struct iphdr *)(skb->head + nh);
+
err = IP_ECN_decapsulate(iph, skb);
if (unlikely(err)) {
if (log_ecn_error)
@@ -540,6 +553,20 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
return 0;
}
+static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom)
+{
+ /* we must cap headroom to some upperlimit, else pskb_expand_head
+ * will overflow header offsets in skb_headers_offset_update().
+ */
+ static const unsigned int max_allowed = 512;
+
+ if (headroom > max_allowed)
+ headroom = max_allowed;
+
+ if (headroom > READ_ONCE(dev->needed_headroom))
+ WRITE_ONCE(dev->needed_headroom, headroom);
+}
+
void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
u8 proto, int tunnel_hlen)
{
@@ -613,13 +640,13 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
- if (headroom > READ_ONCE(dev->needed_headroom))
- WRITE_ONCE(dev->needed_headroom, headroom);
-
- if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
+ if (skb_cow_head(skb, headroom)) {
ip_rt_put(rt);
goto tx_dropped;
}
+
+ ip_tunnel_adj_headroom(dev, headroom);
+
iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
df, !net_eq(tunnel->net, dev_net(dev)));
return;
@@ -797,16 +824,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
- if (max_headroom > READ_ONCE(dev->needed_headroom))
- WRITE_ONCE(dev->needed_headroom, max_headroom);
- if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
+ if (skb_cow_head(skb, max_headroom)) {
ip_rt_put(rt);
dev->stats.tx_dropped++;
kfree_skb(skb);
return;
}
+ ip_tunnel_adj_headroom(dev, max_headroom);
+
iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
df, !net_eq(tunnel->net, dev_net(dev)));
return;
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index da9a55c68..ba1388ba6 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -332,7 +332,7 @@ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu)
};
skb_reset_network_header(skb);
- csum = csum_partial(icmp6h, len, 0);
+ csum = skb_checksum(skb, skb_transport_offset(skb), len, 0);
icmp6h->icmp6_cksum = csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, len,
IPPROTO_ICMPV6, csum);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index be1976536..db184cb82 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1540,7 +1540,8 @@ out:
}
/* Getsock opt support for the multicast routing system. */
-int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
+int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
+ sockptr_t optlen)
{
int olr;
int val;
@@ -1571,14 +1572,16 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
return -ENOPROTOOPT;
}
- if (get_user(olr, optlen))
+ if (copy_from_sockptr(&olr, optlen, sizeof(int)))
return -EFAULT;
- olr = min_t(unsigned int, olr, sizeof(int));
if (olr < 0)
return -EINVAL;
- if (put_user(olr, optlen))
+
+ olr = min_t(unsigned int, olr, sizeof(int));
+
+ if (copy_to_sockptr(optlen, &olr, sizeof(int)))
return -EFAULT;
- if (copy_to_user(optval, &val, olr))
+ if (copy_to_sockptr(optval, &val, olr))
return -EFAULT;
return 0;
}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index d6d45d820..5823e89b8 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -955,6 +955,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct arpt_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -963,6 +965,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
@@ -1253,6 +1257,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct arpt_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1261,6 +1267,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index ec981618b..22e9ff592 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1109,6 +1109,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct ipt_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1117,6 +1119,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
@@ -1493,6 +1497,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct ipt_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1501,6 +1507,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d360c7d70..cc409cc07 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -955,13 +955,11 @@ void ip_rt_send_redirect(struct sk_buff *skb)
icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
peer->rate_last = jiffies;
++peer->n_redirects;
-#ifdef CONFIG_IP_ROUTE_VERBOSE
- if (log_martians &&
+ if (IS_ENABLED(CONFIG_IP_ROUTE_VERBOSE) && log_martians &&
peer->n_redirects == ip_rt_redirect_number)
net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
&ip_hdr(skb)->saddr, inet_iif(skb),
&ip_hdr(skb)->daddr, &gw);
-#endif
}
out_put_peer:
inet_putpeer(peer);
@@ -2090,6 +2088,9 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
int err = -EINVAL;
u32 tag = 0;
+ if (!in_dev)
+ return -EINVAL;
+
if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
goto martian_source;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3dd9b76f4..ac6cb2dc6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -726,6 +726,7 @@ void tcp_push(struct sock *sk, int flags, int mss_now,
if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING);
set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
+ smp_mb__after_atomic();
}
/* It is possible TX completion already happened
* before we set TSQ_THROTTLED.
@@ -1777,6 +1778,36 @@ static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb,
return frag;
}
+static bool can_map_frag(const skb_frag_t *frag)
+{
+ struct page *page;
+
+ if (skb_frag_size(frag) != PAGE_SIZE || skb_frag_off(frag))
+ return false;
+
+ page = skb_frag_page(frag);
+
+ if (PageCompound(page) || page->mapping)
+ return false;
+
+ return true;
+}
+
+static int find_next_mappable_frag(const skb_frag_t *frag,
+ int remaining_in_skb)
+{
+ int offset = 0;
+
+ if (likely(can_map_frag(frag)))
+ return 0;
+
+ while (offset < remaining_in_skb && !can_map_frag(frag)) {
+ offset += skb_frag_size(frag);
+ ++frag;
+ }
+ return offset;
+}
+
static int tcp_copy_straggler_data(struct tcp_zerocopy_receive *zc,
struct sk_buff *skb, u32 copylen,
u32 *offset, u32 *seq)
@@ -1902,6 +1933,8 @@ static int tcp_zerocopy_receive(struct sock *sk,
ret = 0;
curr_addr = address;
while (length + PAGE_SIZE <= zc->length) {
+ int mappable_offset;
+
if (zc->recv_skip_hint < PAGE_SIZE) {
u32 offset_frag;
@@ -1929,15 +1962,11 @@ static int tcp_zerocopy_receive(struct sock *sk,
if (!frags || offset_frag)
break;
}
- if (skb_frag_size(frags) != PAGE_SIZE || skb_frag_off(frags)) {
- int remaining = zc->recv_skip_hint;
- while (remaining && (skb_frag_size(frags) != PAGE_SIZE ||
- skb_frag_off(frags))) {
- remaining -= skb_frag_size(frags);
- frags++;
- }
- zc->recv_skip_hint -= remaining;
+ mappable_offset = find_next_mappable_frag(frags,
+ zc->recv_skip_hint);
+ if (mappable_offset) {
+ zc->recv_skip_hint = mappable_offset;
break;
}
pages[pg_idx] = skb_frag_page(frags);
@@ -2688,6 +2717,8 @@ void tcp_close(struct sock *sk, long timeout)
lock_sock(sk);
__tcp_close(sk, timeout);
release_sock(sk);
+ if (!sk->sk_net_refcnt)
+ inet_csk_clear_xmit_timers_sync(sk);
sock_put(sk);
}
EXPORT_SYMBOL(tcp_close);
@@ -3713,11 +3744,11 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
if (get_user(len, optlen))
return -EFAULT;
- len = min_t(unsigned int, len, sizeof(int));
-
if (len < 0)
return -EINVAL;
+ len = min_t(unsigned int, len, sizeof(int));
+
switch (optname) {
case TCP_MAXSEG:
val = tp->mss_cache;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 476f79f15..16ff3962b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -602,6 +602,13 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
}
DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
+EXPORT_SYMBOL(udp_encap_needed_key);
+
+#if IS_ENABLED(CONFIG_IPV6)
+DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+EXPORT_SYMBOL(udpv6_encap_needed_key);
+#endif
+
void udp_encap_enable(void)
{
static_branch_inc(&udp_encap_needed_key);
@@ -1118,16 +1125,17 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_controllen) {
err = udp_cmsg_send(sk, msg, &ipc.gso_size);
- if (err > 0)
+ if (err > 0) {
err = ip_cmsg_send(sk, msg, &ipc,
sk->sk_family == AF_INET6);
+ connected = 0;
+ }
if (unlikely(err < 0)) {
kfree(ipc.opt);
return err;
}
if (ipc.opt)
free = 1;
- connected = 0;
}
if (!ipc.opt) {
struct ip_options_rcu *inet_opt;
@@ -2748,11 +2756,11 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
if (get_user(len, optlen))
return -EFAULT;
- len = min_t(unsigned int, len, sizeof(int));
-
if (len < 0)
return -EINVAL;
+ len = min_t(unsigned int, len, sizeof(int));
+
switch (optname) {
case UDP_CORK:
val = READ_ONCE(up->corkflag);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index f4b8e5606..445d8bc30 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -512,6 +512,11 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
unsigned int off = skb_gro_offset(skb);
int flush = 1;
+ /* We can do L4 aggregation only if the packet can't land in a tunnel
+ * otherwise we could corrupt the inner stream. Detecting such packets
+ * cannot be foolproof and the aggregation might still happen in some
+ * cases. Such packets should be caught in udp_unexpected_gso later.
+ */
NAPI_GRO_CB(skb)->is_flist = 0;
if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1;
@@ -668,13 +673,7 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
- if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
- if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
- skb->csum_level++;
- } else {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- skb->csum_level = 0;
- }
+ __skb_incr_checksum_unnecessary(skb);
return 0;
}