diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 4 | ||||
-rw-r--r-- | net/ipv4/esp4_offload.c | 3 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 1 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 6 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 16 | ||||
-rw-r--r-- | net/ipv4/ip_sockglue.c | 12 | ||||
-rw-r--r-- | net/ipv4/netfilter/nft_dup_ipv4.c | 18 | ||||
-rw-r--r-- | net/ipv4/raw.c | 5 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 36 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 4 | ||||
-rw-r--r-- | net/ipv4/udplite.c | 2 |
11 files changed, 76 insertions, 31 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 4b26ae525..7c902a1ef 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -578,6 +578,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) add_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending += writebias; + sk->sk_wait_pending++; /* Basic assumption: if someone sets sk->sk_err, he _must_ * change state of the socket from TCP_SYN_*. @@ -593,6 +594,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) } remove_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending -= writebias; + sk->sk_wait_pending--; return timeo; } @@ -865,7 +867,7 @@ int inet_shutdown(struct socket *sock, int how) EPOLLHUP, even on eg. unconnected UDP sockets -- RR */ /* fall through */ default: - sk->sk_shutdown |= how; + WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | how); if (sk->sk_prot->shutdown) sk->sk_prot->shutdown(sk, how); break; diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 58834a10c..93045373e 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -237,6 +237,9 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ secpath_reset(skb); + if (skb_needs_linearize(skb, skb->dev->features) && + __skb_linearize(skb)) + return -ENOMEM; return 0; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 0f9085220..7392a744c 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -826,6 +826,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, if (newsk) { struct inet_connection_sock *newicsk = inet_csk(newsk); + newsk->sk_wait_pending = 0; inet_sk_set_state(newsk, TCP_SYN_RECV); newicsk->icsk_bind_hash = NULL; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 898753328..e16373640 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -440,7 +440,7 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, /* Push GRE header. */ gre_build_header(skb, tunnel->tun_hlen, flags, proto, tunnel->parms.o_key, - (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); + (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } @@ -546,7 +546,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); gre_build_header(skb, tunnel_hlen, flags, proto, tunnel_id_to_key32(tun_info->key.tun_id), - (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); + (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; @@ -635,7 +635,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) } gre_build_header(skb, 8, TUNNEL_SEQ, - proto, 0, htonl(tunnel->o_seqno++)); + proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno))); df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 06a981676..92fa11e75 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1443,9 +1443,19 @@ struct sk_buff *__ip_make_skb(struct sock *sk, cork->dst = NULL; skb_dst_set(skb, &rt->dst); - if (iph->protocol == IPPROTO_ICMP) - icmp_out_count(net, ((struct icmphdr *) - skb_transport_header(skb))->type); + if (iph->protocol == IPPROTO_ICMP) { + u8 icmp_type; + + /* For such sockets, transhdrlen is zero when do ip_append_data(), + * so icmphdr does not in skb linear region and can not get icmp_type + * by icmp_hdr(skb)->type. + */ + if (sk->sk_type == SOCK_RAW && !inet_sk(sk)->hdrincl) + icmp_type = fl4->fl4_icmp_type; + else + icmp_type = icmp_hdr(skb)->type; + icmp_out_count(net, icmp_type); + } ip_cork_release(cork); out: diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 82f341e84..fbf39077f 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -316,7 +316,14 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, ipc->tos = val; ipc->priority = rt_tos2priority(ipc->tos); break; - + case IP_PROTOCOL: + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) + return -EINVAL; + val = *(int *)CMSG_DATA(cmsg); + if (val < 1 || val > 255) + return -EINVAL; + ipc->protocol = val; + break; default: return -EINVAL; } @@ -1522,6 +1529,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_MINTTL: val = inet->min_ttl; break; + case IP_PROTOCOL: + val = inet_sk(sk)->inet_num; + break; default: release_sock(sk); return -ENOPROTOOPT; diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c index 0af3d8df7..157bca240 100644 --- a/net/ipv4/netfilter/nft_dup_ipv4.c +++ b/net/ipv4/netfilter/nft_dup_ipv4.c @@ -16,8 +16,8 @@ #include <net/netfilter/ipv4/nf_dup_ipv4.h> struct nft_dup_ipv4 { - enum nft_registers sreg_addr:8; - enum nft_registers sreg_dev:8; + u8 sreg_addr; + u8 sreg_dev; }; static void nft_dup_ipv4_eval(const struct nft_expr *expr, @@ -43,16 +43,16 @@ static int nft_dup_ipv4_init(const struct nft_ctx *ctx, if (tb[NFTA_DUP_SREG_ADDR] == NULL) return -EINVAL; - priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]); - err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in_addr)); + err = nft_parse_register_load(tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr, + sizeof(struct in_addr)); if (err < 0) return err; - if (tb[NFTA_DUP_SREG_DEV] != NULL) { - priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]); - return nft_validate_register_load(priv->sreg_dev, sizeof(int)); - } - return 0; + if (tb[NFTA_DUP_SREG_DEV]) + err = nft_parse_register_load(tb[NFTA_DUP_SREG_DEV], + &priv->sreg_dev, sizeof(int)); + + return err; } static int nft_dup_ipv4_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 654f586fc..8ad120c07 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -563,6 +563,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } ipcm_init_sk(&ipc, inet); + /* Keep backward compat */ + if (hdrincl) + ipc.protocol = IPPROTO_RAW; if (msg->msg_controllen) { err = ip_cmsg_send(sk, msg, &ipc, false); @@ -630,7 +633,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - hdrincl ? IPPROTO_RAW : sk->sk_protocol, + hdrincl ? ipc.protocol : sk->sk_protocol, inet_sk_flowi_flags(sk) | (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0, sk->sk_uid); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 68f89fe7f..cb96775fc 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -515,6 +515,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) __poll_t mask; struct sock *sk = sock->sk; const struct tcp_sock *tp = tcp_sk(sk); + u8 shutdown; int state; sock_poll_wait(file, sock, wait); @@ -557,9 +558,10 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) * NOTE. Check for TCP_CLOSE is added. The goal is to prevent * blocking on fresh not-connected or disconnected socket. --ANK */ - if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE) + shutdown = READ_ONCE(sk->sk_shutdown); + if (shutdown == SHUTDOWN_MASK || state == TCP_CLOSE) mask |= EPOLLHUP; - if (sk->sk_shutdown & RCV_SHUTDOWN) + if (shutdown & RCV_SHUTDOWN) mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; /* Connected or passive Fast Open socket? */ @@ -575,8 +577,8 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (tcp_stream_is_readable(tp, target, sk)) mask |= EPOLLIN | EPOLLRDNORM; - if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { - if (sk_stream_is_writeable(sk)) { + if (!(shutdown & SEND_SHUTDOWN)) { + if (__sk_stream_is_writeable(sk, 1)) { mask |= EPOLLOUT | EPOLLWRNORM; } else { /* send SIGIO later */ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); @@ -588,7 +590,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) * pairs with the input side. */ smp_mb__after_atomic(); - if (sk_stream_is_writeable(sk)) + if (__sk_stream_is_writeable(sk, 1)) mask |= EPOLLOUT | EPOLLWRNORM; } } else @@ -2338,14 +2340,13 @@ bool tcp_check_oom(struct sock *sk, int shift) return too_many_orphans || out_of_socket_memory; } -void tcp_close(struct sock *sk, long timeout) +void __tcp_close(struct sock *sk, long timeout) { struct sk_buff *skb; int data_was_unread = 0; int state; - lock_sock(sk); - sk->sk_shutdown = SHUTDOWN_MASK; + WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); if (sk->sk_state == TCP_LISTEN) { tcp_set_state(sk, TCP_CLOSE); @@ -2505,6 +2506,12 @@ adjudge_to_death: out: bh_unlock_sock(sk); local_bh_enable(); +} + +void tcp_close(struct sock *sk, long timeout) +{ + lock_sock(sk); + __tcp_close(sk, timeout); release_sock(sk); sock_put(sk); } @@ -2561,6 +2568,12 @@ int tcp_disconnect(struct sock *sk, int flags) int old_state = sk->sk_state; u32 seq; + /* Deny disconnect if other threads are blocked in sk_wait_event() + * or inet_wait_for_connect(). + */ + if (sk->sk_wait_pending) + return -EBUSY; + if (old_state != TCP_CLOSE) tcp_set_state(sk, TCP_CLOSE); @@ -2593,7 +2606,7 @@ int tcp_disconnect(struct sock *sk, int flags) if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) inet_reset_saddr(sk); - sk->sk_shutdown = 0; + WRITE_ONCE(sk->sk_shutdown, 0); sock_reset_flag(sk, SOCK_DONE); tp->srtt_us = 0; tp->rcv_rtt_last_tsecr = 0; @@ -3363,7 +3376,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level, switch (optname) { case TCP_MAXSEG: val = tp->mss_cache; - if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) + if (tp->rx_opt.user_mss && + ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) val = tp->rx_opt.user_mss; if (tp->repair) val = tp->rx_opt.mss_clamp; @@ -3802,7 +3816,7 @@ void tcp_done(struct sock *sk) if (req) reqsk_fastopen_remove(sk, req, false); - sk->sk_shutdown = SHUTDOWN_MASK; + WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_state_change(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 117167806..bd921fa7b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4130,7 +4130,7 @@ void tcp_fin(struct sock *sk) inet_csk_schedule_ack(sk); - sk->sk_shutdown |= RCV_SHUTDOWN; + WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN); sock_set_flag(sk, SOCK_DONE); switch (sk->sk_state) { @@ -6209,7 +6209,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) break; tcp_set_state(sk, TCP_FIN_WAIT2); - sk->sk_shutdown |= SEND_SHUTDOWN; + WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | SEND_SHUTDOWN); sk_dst_confirm(sk); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 6beab353b..27173549b 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -64,6 +64,8 @@ struct proto udplite_prot = { .get_port = udp_v4_get_port, .memory_allocated = &udp_memory_allocated, .sysctl_mem = sysctl_udp_mem, + .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), + .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), .obj_size = sizeof(struct udp_sock), .h.udp_table = &udplite_table, #ifdef CONFIG_COMPAT |