summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/inet_connection_sock.c33
-rw-r--r--net/ipv4/inet_diag.c6
-rw-r--r--net/ipv4/inet_fragment.c70
-rw-r--r--net/ipv4/inet_hashtables.c2
-rw-r--r--net/ipv4/inet_timewait_sock.c41
-rw-r--r--net/ipv4/ip_fragment.c2
-rw-r--r--net/ipv4/ip_gre.c5
-rw-r--r--net/ipv4/ip_tunnel.c15
-rw-r--r--net/ipv4/ipmr.c4
-rw-r--r--net/ipv4/netfilter/arp_tables.c4
-rw-r--r--net/ipv4/netfilter/ip_tables.c4
-rw-r--r--net/ipv4/raw.c1
-rw-r--r--net/ipv4/tcp.c6
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/udp.c11
-rw-r--r--net/ipv4/udp_offload.c23
16 files changed, 163 insertions, 68 deletions
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 79fa19a36..8407098a5 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -289,6 +289,7 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l
struct sock_reuseport *reuseport_cb;
struct inet_bind_hashbucket *head2;
struct inet_bind2_bucket *tb2;
+ bool conflict = false;
bool reuseport_cb_ok;
rcu_read_lock();
@@ -301,18 +302,20 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l
spin_lock(&head2->lock);
- inet_bind_bucket_for_each(tb2, &head2->chain)
- if (inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk))
- break;
+ inet_bind_bucket_for_each(tb2, &head2->chain) {
+ if (!inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk))
+ continue;
- if (tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
- reuseport_ok)) {
- spin_unlock(&head2->lock);
- return true;
+ if (!inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, reuseport_ok))
+ continue;
+
+ conflict = true;
+ break;
}
spin_unlock(&head2->lock);
- return false;
+
+ return conflict;
}
/*
@@ -771,6 +774,20 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
}
EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
+void inet_csk_clear_xmit_timers_sync(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ /* ongoing timer handlers need to acquire socket lock. */
+ sock_not_owned_by_me(sk);
+
+ icsk->icsk_pending = icsk->icsk_ack.pending = 0;
+
+ sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer);
+ sk_stop_timer_sync(sk, &icsk->icsk_delack_timer);
+ sk_stop_timer_sync(sk, &sk->sk_timer);
+}
+
void inet_csk_delete_keepalive_timer(struct sock *sk)
{
sk_stop_timer(sk, &sk->sk_timer);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index f7426926a..8f690a6e6 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -57,7 +57,7 @@ static const struct inet_diag_handler *inet_diag_lock_handler(int proto)
return ERR_PTR(-ENOENT);
}
- if (!inet_diag_table[proto])
+ if (!READ_ONCE(inet_diag_table[proto]))
sock_load_diag_module(AF_INET, proto);
mutex_lock(&inet_diag_table_mutex);
@@ -1419,7 +1419,7 @@ int inet_diag_register(const struct inet_diag_handler *h)
mutex_lock(&inet_diag_table_mutex);
err = -EEXIST;
if (!inet_diag_table[type]) {
- inet_diag_table[type] = h;
+ WRITE_ONCE(inet_diag_table[type], h);
err = 0;
}
mutex_unlock(&inet_diag_table_mutex);
@@ -1436,7 +1436,7 @@ void inet_diag_unregister(const struct inet_diag_handler *h)
return;
mutex_lock(&inet_diag_table_mutex);
- inet_diag_table[type] = NULL;
+ WRITE_ONCE(inet_diag_table[type], NULL);
mutex_unlock(&inet_diag_table_mutex);
}
EXPORT_SYMBOL_GPL(inet_diag_unregister);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index c9f9ac501..834cdc577 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -24,6 +24,8 @@
#include <net/ip.h>
#include <net/ipv6.h>
+#include "../core/sock_destructor.h"
+
/* Use skb->cb to track consecutive/adjacent fragments coming at
* the end of the queue. Nodes in the rb-tree queue will
* contain "runs" of one or more adjacent fragments.
@@ -39,6 +41,7 @@ struct ipfrag_skb_cb {
};
struct sk_buff *next_frag;
int frag_run_len;
+ int ip_defrag_offset;
};
#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
@@ -390,12 +393,12 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
*/
if (!last)
fragrun_create(q, skb); /* First fragment. */
- else if (last->ip_defrag_offset + last->len < end) {
+ else if (FRAG_CB(last)->ip_defrag_offset + last->len < end) {
/* This is the common case: skb goes to the end. */
/* Detect and discard overlaps. */
- if (offset < last->ip_defrag_offset + last->len)
+ if (offset < FRAG_CB(last)->ip_defrag_offset + last->len)
return IPFRAG_OVERLAP;
- if (offset == last->ip_defrag_offset + last->len)
+ if (offset == FRAG_CB(last)->ip_defrag_offset + last->len)
fragrun_append_to_last(q, skb);
else
fragrun_create(q, skb);
@@ -412,13 +415,13 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
parent = *rbn;
curr = rb_to_skb(parent);
- curr_run_end = curr->ip_defrag_offset +
+ curr_run_end = FRAG_CB(curr)->ip_defrag_offset +
FRAG_CB(curr)->frag_run_len;
- if (end <= curr->ip_defrag_offset)
+ if (end <= FRAG_CB(curr)->ip_defrag_offset)
rbn = &parent->rb_left;
else if (offset >= curr_run_end)
rbn = &parent->rb_right;
- else if (offset >= curr->ip_defrag_offset &&
+ else if (offset >= FRAG_CB(curr)->ip_defrag_offset &&
end <= curr_run_end)
return IPFRAG_DUP;
else
@@ -432,7 +435,7 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
rb_insert_color(&skb->rbnode, &q->rb_fragments);
}
- skb->ip_defrag_offset = offset;
+ FRAG_CB(skb)->ip_defrag_offset = offset;
return IPFRAG_OK;
}
@@ -442,13 +445,28 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
struct sk_buff *parent)
{
struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments);
- struct sk_buff **nextp;
+ void (*destructor)(struct sk_buff *);
+ unsigned int orig_truesize = 0;
+ struct sk_buff **nextp = NULL;
+ struct sock *sk = skb->sk;
int delta;
+ if (sk && is_skb_wmem(skb)) {
+ /* TX: skb->sk might have been passed as argument to
+ * dst->output and must remain valid until tx completes.
+ *
+ * Move sk to reassembled skb and fix up wmem accounting.
+ */
+ orig_truesize = skb->truesize;
+ destructor = skb->destructor;
+ }
+
if (head != skb) {
fp = skb_clone(skb, GFP_ATOMIC);
- if (!fp)
- return NULL;
+ if (!fp) {
+ head = skb;
+ goto out_restore_sk;
+ }
FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
if (RB_EMPTY_NODE(&skb->rbnode))
FRAG_CB(parent)->next_frag = fp;
@@ -457,6 +475,12 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
&q->rb_fragments);
if (q->fragments_tail == skb)
q->fragments_tail = fp;
+
+ if (orig_truesize) {
+ /* prevent skb_morph from releasing sk */
+ skb->sk = NULL;
+ skb->destructor = NULL;
+ }
skb_morph(skb, head);
FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
rb_replace_node(&head->rbnode, &skb->rbnode,
@@ -464,13 +488,13 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
consume_skb(head);
head = skb;
}
- WARN_ON(head->ip_defrag_offset != 0);
+ WARN_ON(FRAG_CB(head)->ip_defrag_offset != 0);
delta = -head->truesize;
/* Head of list must not be cloned. */
if (skb_unclone(head, GFP_ATOMIC))
- return NULL;
+ goto out_restore_sk;
delta += head->truesize;
if (delta)
@@ -486,7 +510,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
clone = alloc_skb(0, GFP_ATOMIC);
if (!clone)
- return NULL;
+ goto out_restore_sk;
skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
skb_frag_list_init(head);
for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
@@ -503,6 +527,21 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
nextp = &skb_shinfo(head)->frag_list;
}
+out_restore_sk:
+ if (orig_truesize) {
+ int ts_delta = head->truesize - orig_truesize;
+
+ /* if this reassembled skb is fragmented later,
+ * fraglist skbs will get skb->sk assigned from head->sk,
+ * and each frag skb will be released via sock_wfree.
+ *
+ * Update sk_wmem_alloc.
+ */
+ head->sk = sk;
+ head->destructor = destructor;
+ refcount_add(ts_delta, &sk->sk_wmem_alloc);
+ }
+
return nextp;
}
EXPORT_SYMBOL(inet_frag_reasm_prepare);
@@ -510,6 +549,8 @@ EXPORT_SYMBOL(inet_frag_reasm_prepare);
void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
void *reasm_data, bool try_coalesce)
{
+ struct sock *sk = is_skb_wmem(head) ? head->sk : NULL;
+ const unsigned int head_truesize = head->truesize;
struct sk_buff **nextp = reasm_data;
struct rb_node *rbn;
struct sk_buff *fp;
@@ -573,6 +614,9 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
head->prev = NULL;
head->tstamp = q->stamp;
head->mono_delivery_time = q->mono_delivery_time;
+
+ if (sk)
+ refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc);
}
EXPORT_SYMBOL(inet_frag_reasm_finish);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 56776e1b1..0ad25e678 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -1117,7 +1117,7 @@ error:
sock_prot_inuse_add(net, sk->sk_prot, -1);
spin_lock(lock);
- sk_nulls_del_node_init_rcu(sk);
+ __sk_nulls_del_node_init_rcu(sk);
spin_unlock(lock);
sk->sk_hash = 0;
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 1d77d992e..340a8f0c2 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -281,12 +281,12 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
}
EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
+/* Remove all non full sockets (TIME_WAIT and NEW_SYN_RECV) for dead netns */
void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family)
{
- struct inet_timewait_sock *tw;
- struct sock *sk;
struct hlist_nulls_node *node;
unsigned int slot;
+ struct sock *sk;
for (slot = 0; slot <= hashinfo->ehash_mask; slot++) {
struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
@@ -295,38 +295,35 @@ restart_rcu:
rcu_read_lock();
restart:
sk_nulls_for_each_rcu(sk, node, &head->chain) {
- if (sk->sk_state != TCP_TIME_WAIT) {
- /* A kernel listener socket might not hold refcnt for net,
- * so reqsk_timer_handler() could be fired after net is
- * freed. Userspace listener and reqsk never exist here.
- */
- if (unlikely(sk->sk_state == TCP_NEW_SYN_RECV &&
- hashinfo->pernet)) {
- struct request_sock *req = inet_reqsk(sk);
-
- inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req);
- }
+ int state = inet_sk_state_load(sk);
+ if ((1 << state) & ~(TCPF_TIME_WAIT |
+ TCPF_NEW_SYN_RECV))
continue;
- }
- tw = inet_twsk(sk);
- if ((tw->tw_family != family) ||
- refcount_read(&twsk_net(tw)->ns.count))
+ if (sk->sk_family != family ||
+ refcount_read(&sock_net(sk)->ns.count))
continue;
- if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt)))
+ if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
continue;
- if (unlikely((tw->tw_family != family) ||
- refcount_read(&twsk_net(tw)->ns.count))) {
- inet_twsk_put(tw);
+ if (unlikely(sk->sk_family != family ||
+ refcount_read(&sock_net(sk)->ns.count))) {
+ sock_gen_put(sk);
goto restart;
}
rcu_read_unlock();
local_bh_disable();
- inet_twsk_deschedule_put(tw);
+ if (state == TCP_TIME_WAIT) {
+ inet_twsk_deschedule_put(inet_twsk(sk));
+ } else {
+ struct request_sock *req = inet_reqsk(sk);
+
+ inet_csk_reqsk_queue_drop_and_put(req->rsk_listener,
+ req);
+ }
local_bh_enable();
goto restart_rcu;
}
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index fb1535698..6c309c1ec 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -378,6 +378,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
}
skb_dst_drop(skb);
+ skb_orphan(skb);
return -EINPROGRESS;
insert_error:
@@ -480,7 +481,6 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
struct ipq *qp;
__IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS);
- skb_orphan(skb);
/* Lookup (or create) queue header */
qp = ip_find(net, ip_hdr(skb), user, vif);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index d67d026d7..0267fa053 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -280,8 +280,13 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
tpi->flags | TUNNEL_NO_KEY,
iph->saddr, iph->daddr, 0);
} else {
+ if (unlikely(!pskb_may_pull(skb,
+ gre_hdr_len + sizeof(*ershdr))))
+ return PACKET_REJECT;
+
ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
ver = ershdr->ver;
+ iph = ip_hdr(skb);
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
tpi->flags | TUNNEL_KEY,
iph->saddr, iph->daddr, tpi->key);
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 328f9068c..3445e576b 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -364,7 +364,7 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
bool log_ecn_error)
{
const struct iphdr *iph = ip_hdr(skb);
- int err;
+ int nh, err;
#ifdef CONFIG_NET_IPGRE_BROADCAST
if (ipv4_is_multicast(iph->daddr)) {
@@ -390,8 +390,21 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
tunnel->i_seqno = ntohl(tpi->seq) + 1;
}
+ /* Save offset of outer header relative to skb->head,
+ * because we are going to reset the network header to the inner header
+ * and might change skb->head.
+ */
+ nh = skb_network_header(skb) - skb->head;
+
skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
+ if (!pskb_inet_may_pull(skb)) {
+ DEV_STATS_INC(tunnel->dev, rx_length_errors);
+ DEV_STATS_INC(tunnel->dev, rx_errors);
+ goto drop;
+ }
+ iph = (struct iphdr *)(skb->head + nh);
+
err = IP_ECN_decapsulate(iph, skb);
if (unlikely(err)) {
if (log_ecn_error)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index d5421c38c..3ed9ed2bf 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1581,9 +1581,11 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
if (copy_from_sockptr(&olr, optlen, sizeof(int)))
return -EFAULT;
- olr = min_t(unsigned int, olr, sizeof(int));
if (olr < 0)
return -EINVAL;
+
+ olr = min_t(unsigned int, olr, sizeof(int));
+
if (copy_to_sockptr(optlen, &olr, sizeof(int)))
return -EFAULT;
if (copy_to_sockptr(optval, &val, olr))
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 2407066b0..b150c9929 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -956,6 +956,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct arpt_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1254,6 +1256,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct arpt_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index da5998011..1f365e28e 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1110,6 +1110,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct ipt_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1494,6 +1496,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct ipt_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 7c63b91ed..ee0efd0ef 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -348,6 +348,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
goto error;
skb_reserve(skb, hlen);
+ skb->protocol = htons(ETH_P_IP);
skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc->mark;
skb->tstamp = sockc->transmit_time;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 86e7695d9..f01c0a5d2 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3052,6 +3052,8 @@ void tcp_close(struct sock *sk, long timeout)
lock_sock(sk);
__tcp_close(sk, timeout);
release_sock(sk);
+ if (!sk->sk_net_refcnt)
+ inet_csk_clear_xmit_timers_sync(sk);
sock_put(sk);
}
EXPORT_SYMBOL(tcp_close);
@@ -4102,11 +4104,11 @@ int do_tcp_getsockopt(struct sock *sk, int level,
if (copy_from_sockptr(&len, optlen, sizeof(int)))
return -EFAULT;
- len = min_t(unsigned int, len, sizeof(int));
-
if (len < 0)
return -EINVAL;
+ len = min_t(unsigned int, len, sizeof(int));
+
switch (optname) {
case TCP_MAXSEG:
val = tp->mss_cache;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 42844d20d..b3bfa1a09 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -357,10 +357,6 @@ void tcp_twsk_purge(struct list_head *net_exit_list, int family)
/* Even if tw_refcount == 1, we must clean up kernel reqsk */
inet_twsk_purge(net->ipv4.tcp_death_row.hashinfo, family);
} else if (!purged_once) {
- /* The last refcount is decremented in tcp_sk_exit_batch() */
- if (refcount_read(&net->ipv4.tcp_death_row.tw_refcount) == 1)
- continue;
-
inet_twsk_purge(&tcp_hashinfo, family);
purged_once = true;
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 87d759bab..2a78c7818 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -603,6 +603,13 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
}
DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
+EXPORT_SYMBOL(udp_encap_needed_key);
+
+#if IS_ENABLED(CONFIG_IPV6)
+DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+EXPORT_SYMBOL(udpv6_encap_needed_key);
+#endif
+
void udp_encap_enable(void)
{
static_branch_inc(&udp_encap_needed_key);
@@ -2790,11 +2797,11 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
if (get_user(len, optlen))
return -EFAULT;
- len = min_t(unsigned int, len, sizeof(int));
-
if (len < 0)
return -EINVAL;
+ len = min_t(unsigned int, len, sizeof(int));
+
switch (optname) {
case UDP_CORK:
val = udp_test_bit(CORK, sk);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 8096576fd..84b7d6089 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -441,8 +441,9 @@ static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
NAPI_GRO_CB(p)->count++;
p->data_len += skb->len;
- /* sk owenrship - if any - completely transferred to the aggregated packet */
+ /* sk ownership - if any - completely transferred to the aggregated packet */
skb->destructor = NULL;
+ skb->sk = NULL;
p->truesize += skb->truesize;
p->len += skb->len;
@@ -543,11 +544,19 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
unsigned int off = skb_gro_offset(skb);
int flush = 1;
- /* we can do L4 aggregation only if the packet can't land in a tunnel
- * otherwise we could corrupt the inner stream
+ /* We can do L4 aggregation only if the packet can't land in a tunnel
+ * otherwise we could corrupt the inner stream. Detecting such packets
+ * cannot be foolproof and the aggregation might still happen in some
+ * cases. Such packets should be caught in udp_unexpected_gso later.
*/
NAPI_GRO_CB(skb)->is_flist = 0;
if (!sk || !udp_sk(sk)->gro_receive) {
+ /* If the packet was locally encapsulated in a UDP tunnel that
+ * wasn't detected above, do not GRO.
+ */
+ if (skb->encapsulation)
+ goto out;
+
if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
NAPI_GRO_CB(skb)->is_flist = sk ? !udp_test_bit(GRO_ENABLED, sk) : 1;
@@ -707,13 +716,7 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
- if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
- if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
- skb->csum_level++;
- } else {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- skb->csum_level = 0;
- }
+ __skb_incr_checksum_unnecessary(skb);
return 0;
}