diff options
Diffstat (limited to 'net/ipv6')
36 files changed, 962 insertions, 490 deletions
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 3036a45e8a..d283c59df4 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -52,4 +52,5 @@ obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o ifneq ($(CONFIG_IPV6),) obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o obj-y += mcast_snoop.o +obj-$(CONFIG_TCP_AO) += tcp_ao.o endif diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b007d098ff..5a839c5fb1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -236,6 +236,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .ioam6_id = IOAM6_DEFAULT_IF_ID, .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, .ndisc_evict_nocarrier = 1, + .ra_honor_pio_life = 0, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -297,6 +298,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .ioam6_id = IOAM6_DEFAULT_IF_ID, .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, .ndisc_evict_nocarrier = 1, + .ra_honor_pio_life = 0, }; /* Check if link is ready: is it up and is a valid qdisc available */ @@ -706,6 +708,22 @@ errout: return err; } +/* Combine dev_addr_genid and dev_base_seq to detect changes. + */ +static u32 inet6_base_seq(const struct net *net) +{ + u32 res = atomic_read(&net->ipv6.dev_addr_genid) + + net->dev_base_seq; + + /* Must not return 0 (see nl_dump_check_consistent()). + * Chose a value far away from 0. + */ + if (!res) + res = 0x80000000; + return res; +} + + static int inet6_netconf_dump_devconf(struct sk_buff *skb, struct netlink_callback *cb) { @@ -739,8 +757,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, idx = 0; head = &net->dev_index_head[h]; rcu_read_lock(); - cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ - net->dev_base_seq; + cb->seq = inet6_base_seq(net); hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; @@ -1397,6 +1414,7 @@ retry: idev->cnf.temp_valid_lft + age); cfg.preferred_lft = cnf_temp_preferred_lft + age - idev->desync_factor; cfg.preferred_lft = min_t(__u32, ifp->prefered_lft, cfg.preferred_lft); + cfg.preferred_lft = min_t(__u32, cfg.valid_lft, cfg.preferred_lft); cfg.plen = ifp->prefix_len; tmp_tstamp = ifp->tstamp; @@ -2657,22 +2675,23 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; else stored_lft = 0; - if (!create && stored_lft) { + + /* RFC4862 Section 5.5.3e: + * "Note that the preferred lifetime of the + * corresponding address is always reset to + * the Preferred Lifetime in the received + * Prefix Information option, regardless of + * whether the valid lifetime is also reset or + * ignored." + * + * So we should always update prefered_lft here. + */ + update_lft = !create && stored_lft; + + if (update_lft && !in6_dev->cnf.ra_honor_pio_life) { const u32 minimum_lft = min_t(u32, stored_lft, MIN_VALID_LIFETIME); valid_lft = max(valid_lft, minimum_lft); - - /* RFC4862 Section 5.5.3e: - * "Note that the preferred lifetime of the - * corresponding address is always reset to - * the Preferred Lifetime in the received - * Prefix Information option, regardless of - * whether the valid lifetime is also reset or - * ignored." - * - * So we should always update prefered_lft here. - */ - update_lft = 1; } if (update_lft) { @@ -5358,7 +5377,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, } rcu_read_lock(); - cb->seq = atomic_read(&tgt_net->ipv6.dev_addr_genid) ^ tgt_net->dev_base_seq; + cb->seq = inet6_base_seq(tgt_net); for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &tgt_net->dev_index_head[h]; @@ -6842,6 +6861,15 @@ static const struct ctl_table addrconf_sysctl[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "ra_honor_pio_life", + .data = &ipv6_devconf.ra_honor_pio_life, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, #ifdef CONFIG_IPV6_ROUTER_PREF { .procname = "accept_ra_rtr_pref", diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 507a8353a6..c008d21925 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -220,19 +220,26 @@ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) { EXPORT_SYMBOL_GPL(ipv6_stub); /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ -const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; +const struct in6_addr in6addr_loopback __aligned(BITS_PER_LONG/8) + = IN6ADDR_LOOPBACK_INIT; EXPORT_SYMBOL(in6addr_loopback); -const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; +const struct in6_addr in6addr_any __aligned(BITS_PER_LONG/8) + = IN6ADDR_ANY_INIT; EXPORT_SYMBOL(in6addr_any); -const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; +const struct in6_addr in6addr_linklocal_allnodes __aligned(BITS_PER_LONG/8) + = IN6ADDR_LINKLOCAL_ALLNODES_INIT; EXPORT_SYMBOL(in6addr_linklocal_allnodes); -const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_linklocal_allrouters __aligned(BITS_PER_LONG/8) + = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_linklocal_allrouters); -const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT; +const struct in6_addr in6addr_interfacelocal_allnodes __aligned(BITS_PER_LONG/8) + = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT; EXPORT_SYMBOL(in6addr_interfacelocal_allnodes); -const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_interfacelocal_allrouters __aligned(BITS_PER_LONG/8) + = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_interfacelocal_allrouters); -const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_sitelocal_allrouters __aligned(BITS_PER_LONG/8) + = IN6ADDR_SITELOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_sitelocal_allrouters); static void snmp6_free_dev(struct inet6_dev *idev) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 4375bfa4f6..959bfd9f63 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -220,10 +220,11 @@ lookup_protocol: inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk); np->hop_limit = -1; np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; - np->mc_loop = 1; - np->mc_all = 1; + inet6_set_bit(MC6_LOOP, sk); + inet6_set_bit(MC6_ALL, sk); np->pmtudisc = IPV6_PMTUDISC_WANT; - np->repflow = net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED; + inet6_assign_bit(REPFLOW, sk, net->ipv6.sysctl.flowlabel_reflect & + FLOWLABEL_REFLECT_ESTABLISHED); sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash); @@ -540,7 +541,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, } sin->sin6_port = inet->inet_dport; sin->sin6_addr = sk->sk_v6_daddr; - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) sin->sin6_flowinfo = np->flow_label; BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET6_GETPEERNAME); @@ -1052,6 +1053,7 @@ static const struct ipv6_stub ipv6_stub_impl = { #if IS_ENABLED(CONFIG_XFRM) .xfrm6_local_rxpmtu = xfrm6_local_rxpmtu, .xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv, + .xfrm6_gro_udp_encap_rcv = xfrm6_gro_udp_encap_rcv, .xfrm6_rcv_encap = xfrm6_rcv_encap, #endif .nd_tbl = &nd_tbl, @@ -1064,6 +1066,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { .udp6_lib_lookup = __udp6_lib_lookup, .ipv6_setsockopt = do_ipv6_setsockopt, .ipv6_getsockopt = do_ipv6_getsockopt, + .ipv6_dev_get_saddr = ipv6_dev_get_saddr, }; static int __init inet6_init(void) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 01005035ad..2016e90e6e 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -13,8 +13,8 @@ #define pr_fmt(fmt) "IPv6: " fmt -#include <crypto/algapi.h> #include <crypto/hash.h> +#include <crypto/utils.h> #include <linux/module.h> #include <linux/slab.h> #include <net/ip.h> @@ -51,9 +51,7 @@ static void *ah_alloc_tmp(struct crypto_ahash *ahash, int nfrags, { unsigned int len; - len = size + crypto_ahash_digestsize(ahash) + - (crypto_ahash_alignmask(ahash) & - ~(crypto_tfm_ctx_alignment() - 1)); + len = size + crypto_ahash_digestsize(ahash); len = ALIGN(len, crypto_tfm_ctx_alignment()); @@ -75,10 +73,9 @@ static inline u8 *ah_tmp_auth(u8 *tmp, unsigned int offset) return tmp + offset; } -static inline u8 *ah_tmp_icv(struct crypto_ahash *ahash, void *tmp, - unsigned int offset) +static inline u8 *ah_tmp_icv(void *tmp, unsigned int offset) { - return PTR_ALIGN((u8 *)tmp + offset, crypto_ahash_alignmask(ahash) + 1); + return tmp + offset; } static inline struct ahash_request *ah_tmp_req(struct crypto_ahash *ahash, @@ -299,7 +296,7 @@ static void ah6_output_done(void *data, int err) iph_base = AH_SKB_CB(skb)->tmp; iph_ext = ah_tmp_ext(iph_base); - icv = ah_tmp_icv(ahp->ahash, iph_ext, extlen); + icv = ah_tmp_icv(iph_ext, extlen); memcpy(ah->auth_data, icv, ahp->icv_trunc_len); memcpy(top_iph, iph_base, IPV6HDR_BASELEN); @@ -362,7 +359,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) iph_ext = ah_tmp_ext(iph_base); seqhi = (__be32 *)((char *)iph_ext + extlen); - icv = ah_tmp_icv(ahash, seqhi, seqhi_len); + icv = ah_tmp_icv(seqhi, seqhi_len); req = ah_tmp_req(ahash, icv); sg = ah_req_sg(ahash, req); seqhisg = sg + nfrags; @@ -468,7 +465,7 @@ static void ah6_input_done(void *data, int err) work_iph = AH_SKB_CB(skb)->tmp; auth_data = ah_tmp_auth(work_iph, hdr_len); - icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len); + icv = ah_tmp_icv(auth_data, ahp->icv_trunc_len); err = crypto_memneq(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0; if (err) @@ -576,7 +573,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) auth_data = ah_tmp_auth((u8 *)work_iph, hdr_len); seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len); - icv = ah_tmp_icv(ahash, seqhi, seqhi_len); + icv = ah_tmp_icv(seqhi, seqhi_len); req = ah_tmp_req(ahash, icv); sg = ah_req_sg(ahash, req); seqhisg = sg + nfrags; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 41ebc4e574..cc6a502db3 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -80,7 +80,8 @@ int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr) struct flowi6 fl6; int err = 0; - if (np->sndflow && (np->flow_label & IPV6_FLOWLABEL_MASK)) { + if (inet6_test_bit(SNDFLOW, sk) && + (np->flow_label & IPV6_FLOWLABEL_MASK)) { flowlabel = fl6_sock_lookup(sk, np->flow_label); if (IS_ERR(flowlabel)) return -EINVAL; @@ -163,7 +164,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; if (ipv6_addr_any(&usin->sin6_addr)) { @@ -305,11 +306,10 @@ static void ipv6_icmp_error_rfc4884(const struct sk_buff *skb, void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload) { - struct ipv6_pinfo *np = inet6_sk(sk); struct icmp6hdr *icmph = icmp6_hdr(skb); struct sock_exterr_skb *serr; - if (!np->recverr) + if (!inet6_test_bit(RECVERR6, sk)) return; skb = skb_clone(skb, GFP_ATOMIC); @@ -332,7 +332,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __skb_pull(skb, payload - skb->data); - if (inet6_sk(sk)->recverr_rfc4884) + if (inet6_test_bit(RECVERR6_RFC4884, sk)) ipv6_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884); skb_reset_transport_header(skb); @@ -344,12 +344,11 @@ EXPORT_SYMBOL_GPL(ipv6_icmp_error); void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info) { - const struct ipv6_pinfo *np = inet6_sk(sk); struct sock_exterr_skb *serr; struct ipv6hdr *iph; struct sk_buff *skb; - if (!np->recverr) + if (!inet6_test_bit(RECVERR6, sk)) return; skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); @@ -493,7 +492,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset), struct ipv6hdr, daddr); sin->sin6_addr = ip6h->daddr; - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) sin->sin6_flowinfo = ip6_flowinfo(ip6h); sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index a189e08370..527b7caddb 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -34,7 +34,9 @@ static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen) int off = sizeof(struct ipv6hdr); struct ipv6_opt_hdr *exthdr; - if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP)) + /* ESP or ESPINUDP */ + if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP || + ipv6_hdr->nexthdr == NEXTHDR_UDP)) return offsetof(struct ipv6hdr, nexthdr); while (off < nhlen) { @@ -54,10 +56,14 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head, int offset = skb_gro_offset(skb); struct xfrm_offload *xo; struct xfrm_state *x; + int encap_type = 0; __be32 seq; __be32 spi; int nhoff; + if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP) + encap_type = UDP_ENCAP_ESPINUDP; + if (!pskb_pull(skb, offset)) return NULL; @@ -104,7 +110,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head, /* We don't need to handle errors from xfrm_input, it does all * the error handling and frees the resources on error. */ - xfrm_input(skb, IPPROTO_ESP, spi, -2); + xfrm_input(skb, IPPROTO_ESP, spi, encap_type); return ERR_PTR(-EINPROGRESS); out_reset: diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 4952ae7924..02e9ffb63a 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -177,6 +177,8 @@ static bool ip6_parse_tlv(bool hopbyhop, case IPV6_TLV_IOAM: if (!ipv6_hop_ioam(skb, off)) return false; + + nh = skb_network_header(skb); break; case IPV6_TLV_JUMBO: if (!ipv6_hop_jumbo(skb, off)) @@ -943,6 +945,14 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) if (!skb_valid_dst(skb)) ip6_route_input(skb); + /* About to mangle packet header */ + if (skb_ensure_writable(skb, optoff + 2 + hdr->opt_len)) + goto drop; + + /* Trace pointer may have changed */ + trace = (struct ioam6_trace_hdr *)(skb_network_header(skb) + + optoff + sizeof(*hdr)); + ioam6_fill_trace_data(skb, ns, trace, true); break; default: diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 93a594a901..f624270971 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -385,7 +385,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, return dst; } - err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6); + err = xfrm_decode_session_reverse(net, skb, flowi6_to_flowi(&fl2), AF_INET6); if (err) goto relookup_failed; @@ -588,7 +588,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; - ipcm6_init_sk(&ipc6, np); + ipcm6_init_sk(&ipc6, sk); ipc6.sockc.mark = mark; fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); @@ -791,7 +791,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) msg.offset = 0; msg.type = type; - ipcm6_init_sk(&ipc6, np); + ipcm6_init_sk(&ipc6, sk); ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb)); ipc6.sockc.mark = mark; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 0c50dcd35f..80043e4611 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -133,7 +133,7 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused fl6.daddr = sk->sk_v6_daddr; res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt), - np->tclass, sk->sk_priority); + np->tclass, READ_ONCE(sk->sk_priority)); rcu_read_unlock(); return res; } diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index f6f5b83dd9..7563f8c6aa 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -46,7 +46,7 @@ struct ioam6_lwt { struct ioam6_lwt_encap tuninfo; }; -static struct netlink_range_validation freq_range = { +static const struct netlink_range_validation freq_range = { .min = IOAM6_IPTUNNEL_FREQ_MIN, .max = IOAM6_IPTUNNEL_FREQ_MAX, }; diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index b3ca4beb44..eca07e10e2 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -513,7 +513,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, return 0; } - if (np->repflow) { + if (inet6_test_bit(REPFLOW, sk)) { freq->flr_label = np->flow_label; return 0; } @@ -551,10 +551,10 @@ static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq) if (freq->flr_flags & IPV6_FL_F_REFLECT) { if (sk->sk_protocol != IPPROTO_TCP) return -ENOPROTOOPT; - if (!np->repflow) + if (!inet6_test_bit(REPFLOW, sk)) return -ESRCH; np->flow_label = 0; - np->repflow = 0; + inet6_clear_bit(REPFLOW, sk); return 0; } @@ -626,7 +626,7 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq, if (sk->sk_protocol != IPPROTO_TCP) return -ENOPROTOOPT; - np->repflow = 1; + inet6_set_bit(REPFLOW, sk); return 0; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1121082901..a722a43dd6 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -117,6 +117,8 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * return res; } + IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); + rcu_read_lock(); nexthop = rt6_nexthop((struct rt6_info *)dst, daddr); neigh = __ipv6_neigh_lookup_noref(dev, nexthop); @@ -176,6 +178,16 @@ ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, return ret; } +static int ip6_finish_output_gso(struct net *net, struct sock *sk, + struct sk_buff *skb, unsigned int mtu) +{ + if (!(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) && + !skb_gso_validate_network_len(skb, mtu)) + return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); + + return ip6_finish_output2(net, sk, skb); +} + static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { unsigned int mtu; @@ -189,17 +201,14 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff #endif mtu = ip6_skb_dst_mtu(skb); - if (skb_is_gso(skb) && - !(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) && - !skb_gso_validate_network_len(skb, mtu)) - return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); + if (skb_is_gso(skb)) + return ip6_finish_output_gso(net, sk, skb, mtu); - if ((skb->len > mtu && !skb_is_gso(skb)) || - dst_allfrag(skb_dst(skb)) || + if (skb->len > mtu || (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) return ip6_fragment(net, sk, skb, ip6_finish_output2); - else - return ip6_finish_output2(net, sk, skb); + + return ip6_finish_output2(net, sk, skb); } static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -238,12 +247,11 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(ip6_output); -bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) +bool ip6_autoflowlabel(struct net *net, const struct sock *sk) { - if (!np->autoflowlabel_set) + if (!inet6_test_bit(AUTOFLOWLABEL_SET, sk)) return ip6_default_np_autolabel(net); - else - return np->autoflowlabel; + return inet6_test_bit(AUTOFLOWLABEL, sk); } /* @@ -315,12 +323,12 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, * Fill in the IPv6 header */ if (np) - hlimit = np->hop_limit; + hlimit = READ_ONCE(np->hop_limit); if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - ip6_autoflowlabel(net, np), fl6)); + ip6_autoflowlabel(net, sk), fl6)); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; @@ -335,7 +343,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, mtu = dst_mtu(dst); if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { - IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); /* if egress device is enslaved to an L3 master device pass the * skb to its handler for processing @@ -375,9 +383,8 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel) if (sk && ra->sel == sel && (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == skb->dev->ifindex)) { - struct ipv6_pinfo *np = inet6_sk(sk); - if (np && np->rtalert_isolate && + if (inet6_test_bit(RTALERT_ISOLATE, sk) && !net_eq(sock_net(sk), dev_net(skb->dev))) { continue; } @@ -454,10 +461,6 @@ static int ip6_forward_proxy_check(struct sk_buff *skb) static inline int ip6_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); - - __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); - #ifdef CONFIG_NET_SWITCHDEV if (skb->offload_l3_fwd_mark) { consume_skb(skb); @@ -625,6 +628,8 @@ int ip6_forward(struct sk_buff *skb) } } + __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); + mtu = ip6_dst_mtu_maybe_forward(dst, true); if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; @@ -887,9 +892,11 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, mtu = IPV6_MIN_MTU; } - if (np && np->frag_size < mtu) { - if (np->frag_size) - mtu = np->frag_size; + if (np) { + u32 frag_size = READ_ONCE(np->frag_size); + + if (frag_size && frag_size < mtu) + mtu = frag_size; } if (mtu < hlen + sizeof(struct frag_hdr) + 8) goto fail_toobig; @@ -1023,9 +1030,6 @@ slow_path: return err; fail_toobig: - if (skb->sk && dst_allfrag(skb_dst(skb))) - sk_gso_disable(skb->sk); - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); err = -EMSGSIZE; @@ -1119,7 +1123,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, rcu_read_lock(); from = rt ? rcu_dereference(rt->from) : NULL; err = ip6_route_get_saddr(net, from, &fl6->daddr, - sk ? inet6_sk(sk)->srcprefs : 0, + sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0, &fl6->saddr); rcu_read_unlock(); @@ -1289,74 +1293,6 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); -/** - * ip6_dst_lookup_tunnel - perform route lookup on tunnel - * @skb: Packet for which lookup is done - * @dev: Tunnel device - * @net: Network namespace of tunnel device - * @sock: Socket which provides route info - * @saddr: Memory to store the src ip address - * @info: Tunnel information - * @protocol: IP protocol - * @use_cache: Flag to enable cache usage - * This function performs a route lookup on a tunnel - * - * It returns a valid dst pointer and stores src address to be used in - * tunnel in param saddr on success, else a pointer encoded error code. - */ - -struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, - struct net_device *dev, - struct net *net, - struct socket *sock, - struct in6_addr *saddr, - const struct ip_tunnel_info *info, - u8 protocol, - bool use_cache) -{ - struct dst_entry *dst = NULL; -#ifdef CONFIG_DST_CACHE - struct dst_cache *dst_cache; -#endif - struct flowi6 fl6; - __u8 prio; - -#ifdef CONFIG_DST_CACHE - dst_cache = (struct dst_cache *)&info->dst_cache; - if (use_cache) { - dst = dst_cache_get_ip6(dst_cache, saddr); - if (dst) - return dst; - } -#endif - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_mark = skb->mark; - fl6.flowi6_proto = protocol; - fl6.daddr = info->key.u.ipv6.dst; - fl6.saddr = info->key.u.ipv6.src; - prio = info->key.tos; - fl6.flowlabel = ip6_make_flowinfo(prio, info->key.label); - - dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, - NULL); - if (IS_ERR(dst)) { - netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); - return ERR_PTR(-ENETUNREACH); - } - if (dst->dev == dev) { /* is this necessary? */ - netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr); - dst_release(dst); - return ERR_PTR(-ELOOP); - } -#ifdef CONFIG_DST_CACHE - if (use_cache) - dst_cache_set_ip6(dst_cache, dst, &fl6.saddr); -#endif - *saddr = fl6.saddr; - return dst; -} -EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel); - static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, gfp_t gfp) { @@ -1398,7 +1334,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, struct rt6_info *rt) { struct ipv6_pinfo *np = inet6_sk(sk); - unsigned int mtu; + unsigned int mtu, frag_size; struct ipv6_txoptions *nopt, *opt = ipc6->opt; /* callers pass dst together with a reference, set it first so @@ -1442,25 +1378,23 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, v6_cork->hop_limit = ipc6->hlimit; v6_cork->tclass = ipc6->tclass; if (rt->dst.flags & DST_XFRM_TUNNEL) - mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? + mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); else - mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? + mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); - if (np->frag_size < mtu) { - if (np->frag_size) - mtu = np->frag_size; - } + + frag_size = READ_ONCE(np->frag_size); + if (frag_size && frag_size < mtu) + mtu = frag_size; + cork->base.fragsize = mtu; cork->base.gso_size = ipc6->gso_size; cork->base.tx_flags = 0; cork->base.mark = ipc6->sockc.mark; sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); - if (dst_allfrag(xfrm_dst_path(&rt->dst))) - cork->base.flags |= IPCORK_ALLFRAG; cork->base.length = 0; - cork->base.transmit_time = ipc6->sockc.transmit_time; return 0; @@ -1517,8 +1451,6 @@ static int __ip6_append_data(struct sock *sk, headersize = sizeof(struct ipv6hdr) + (opt ? opt->opt_flen + opt->opt_nflen : 0) + - (dst_allfrag(&rt->dst) ? - sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; if (mtu <= fragheaderlen || @@ -1628,7 +1560,7 @@ emsgsize: while (length > 0) { /* Check if the remaining data fits into current packet. */ - copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; + copy = (cork->length <= mtu ? mtu : maxfraglen) - skb->len; if (copy < length) copy = maxfraglen - skb->len; @@ -1659,7 +1591,7 @@ alloc_new_skb: */ datalen = length + fraggap; - if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) + if (datalen > (cork->length <= mtu ? mtu : maxfraglen) - fragheaderlen) datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; fraglen = datalen + fragheaderlen; pagedlen = 0; @@ -1908,7 +1840,6 @@ static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork) struct dst_entry *dst = cork->base.dst; cork->base.dst = NULL; - cork->base.flags &= ~IPCORK_ALLFRAG; skb_dst_set(skb, dst); } @@ -1929,7 +1860,6 @@ static void ip6_cork_release(struct inet_cork_full *cork, if (cork->base.dst) { dst_release(cork->base.dst); cork->base.dst = NULL; - cork->base.flags &= ~IPCORK_ALLFRAG; } } @@ -1941,7 +1871,6 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; struct in6_addr *final_dst; - struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); struct ipv6hdr *hdr; struct ipv6_txoptions *opt = v6_cork->opt; @@ -1984,18 +1913,18 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, ip6_flow_hdr(hdr, v6_cork->tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - ip6_autoflowlabel(net, np), fl6)); + ip6_autoflowlabel(net, sk), fl6)); hdr->hop_limit = v6_cork->hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; hdr->daddr = *final_dst; - skb->priority = sk->sk_priority; + skb->priority = READ_ONCE(sk->sk_priority); skb->mark = cork->base.mark; skb->tstamp = cork->base.transmit_time; ip6_cork_steal_dst(skb, cork); - IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); + IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); u8 icmp6_type; @@ -2097,7 +2026,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, return ERR_PTR(err); } if (ipc6->dontfrag < 0) - ipc6->dontfrag = inet6_sk(sk)->dontfrag; + ipc6->dontfrag = inet6_test_bit(DONTFRAG, sk); err = __ip6_append_data(sk, &queue, cork, &v6_cork, ¤t->task_frag, getfrag, from, diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 46c19bd489..9bbabf750a 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -796,8 +796,8 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, struct sk_buff *skb), bool log_ecn_err) { - const struct ipv6hdr *ipv6h = ipv6_hdr(skb); - int err; + const struct ipv6hdr *ipv6h; + int nh, err; if ((!(tpi->flags & TUNNEL_CSUM) && (tunnel->parms.i_flags & TUNNEL_CSUM)) || @@ -829,7 +829,6 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, goto drop; } - ipv6h = ipv6_hdr(skb); skb->protocol = eth_type_trans(skb, tunnel->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } else { @@ -837,7 +836,23 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, skb_reset_mac_header(skb); } + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_reset_network_header(skb); + + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(tunnel->dev, rx_length_errors); + DEV_STATS_INC(tunnel->dev, rx_errors); + goto drop; + } + + /* Get the outer header. */ + ipv6h = (struct ipv6hdr *)(skb->head + nh); + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index cdc4d4ee24..a7bf0327b3 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -1,3 +1,4 @@ + // SPDX-License-Identifier: GPL-2.0-only #include <linux/module.h> #include <linux/errno.h> @@ -75,8 +76,9 @@ EXPORT_SYMBOL_GPL(udp_sock_create6); int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, - struct net_device *dev, struct in6_addr *saddr, - struct in6_addr *daddr, + struct net_device *dev, + const struct in6_addr *saddr, + const struct in6_addr *daddr, __u8 prio, __u8 ttl, __be32 label, __be16 src_port, __be16 dst_port, bool nocheck) { @@ -111,4 +113,73 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, } EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb); +/** + * udp_tunnel6_dst_lookup - perform route lookup on UDP tunnel + * @skb: Packet for which lookup is done + * @dev: Tunnel device + * @net: Network namespace of tunnel device + * @sock: Socket which provides route info + * @oif: Index of the output interface + * @saddr: Memory to store the src ip address + * @key: Tunnel information + * @sport: UDP source port + * @dport: UDP destination port + * @dsfield: The traffic class field + * @dst_cache: The dst cache to use for lookup + * This function performs a route lookup on a UDP tunnel + * + * It returns a valid dst pointer and stores src address to be used in + * tunnel in param saddr on success, else a pointer encoded error code. + */ + +struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, + struct net_device *dev, + struct net *net, + struct socket *sock, + int oif, + struct in6_addr *saddr, + const struct ip_tunnel_key *key, + __be16 sport, __be16 dport, u8 dsfield, + struct dst_cache *dst_cache) +{ + struct dst_entry *dst = NULL; + struct flowi6 fl6; + +#ifdef CONFIG_DST_CACHE + if (dst_cache) { + dst = dst_cache_get_ip6(dst_cache, saddr); + if (dst) + return dst; + } +#endif + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_mark = skb->mark; + fl6.flowi6_proto = IPPROTO_UDP; + fl6.flowi6_oif = oif; + fl6.daddr = key->u.ipv6.dst; + fl6.saddr = key->u.ipv6.src; + fl6.fl6_sport = sport; + fl6.fl6_dport = dport; + fl6.flowlabel = ip6_make_flowinfo(dsfield, key->label); + + dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, + NULL); + if (IS_ERR(dst)) { + netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); + return ERR_PTR(-ENETUNREACH); + } + if (dst->dev == dev) { /* is this necessary? */ + netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr); + dst_release(dst); + return ERR_PTR(-ELOOP); + } +#ifdef CONFIG_DST_CACHE + if (dst_cache) + dst_cache_set_ip6(dst_cache, dst, &fl6.saddr); +#endif + *saddr = fl6.saddr; + return dst; +} +EXPORT_SYMBOL_GPL(udp_tunnel6_dst_lookup); + MODULE_LICENSE("GPL"); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 73c85d4e0e..e550240c85 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -569,11 +569,11 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_err; memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); - xfrm_decode_session(skb, &fl, AF_INET6); + xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6); break; case htons(ETH_P_IP): memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - xfrm_decode_session(skb, &fl, AF_INET); + xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET); break; default: goto tx_err; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 0e2a0847b3..7d661735cb 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -415,6 +415,101 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (ip6_mroute_opt(optname)) return ip6_mroute_setsockopt(sk, optname, optval, optlen); + /* Handle options that can be set without locking the socket. */ + switch (optname) { + case IPV6_UNICAST_HOPS: + if (optlen < sizeof(int)) + return -EINVAL; + if (val > 255 || val < -1) + return -EINVAL; + WRITE_ONCE(np->hop_limit, val); + return 0; + case IPV6_MULTICAST_LOOP: + if (optlen < sizeof(int)) + return -EINVAL; + if (val != valbool) + return -EINVAL; + inet6_assign_bit(MC6_LOOP, sk, valbool); + return 0; + case IPV6_MULTICAST_HOPS: + if (sk->sk_type == SOCK_STREAM) + return retv; + if (optlen < sizeof(int)) + return -EINVAL; + if (val > 255 || val < -1) + return -EINVAL; + WRITE_ONCE(np->mcast_hops, + val == -1 ? IPV6_DEFAULT_MCASTHOPS : val); + return 0; + case IPV6_MTU: + if (optlen < sizeof(int)) + return -EINVAL; + if (val && val < IPV6_MIN_MTU) + return -EINVAL; + WRITE_ONCE(np->frag_size, val); + return 0; + case IPV6_MINHOPCOUNT: + if (optlen < sizeof(int)) + return -EINVAL; + if (val < 0 || val > 255) + return -EINVAL; + + if (val) + static_branch_enable(&ip6_min_hopcount); + + /* tcp_v6_err() and tcp_v6_rcv() might read min_hopcount + * while we are changing it. + */ + WRITE_ONCE(np->min_hopcount, val); + return 0; + case IPV6_RECVERR_RFC4884: + if (optlen < sizeof(int)) + return -EINVAL; + if (val < 0 || val > 1) + return -EINVAL; + inet6_assign_bit(RECVERR6_RFC4884, sk, valbool); + return 0; + case IPV6_MULTICAST_ALL: + if (optlen < sizeof(int)) + return -EINVAL; + inet6_assign_bit(MC6_ALL, sk, valbool); + return 0; + case IPV6_AUTOFLOWLABEL: + inet6_assign_bit(AUTOFLOWLABEL, sk, valbool); + inet6_set_bit(AUTOFLOWLABEL_SET, sk); + return 0; + case IPV6_DONTFRAG: + inet6_assign_bit(DONTFRAG, sk, valbool); + return 0; + case IPV6_RECVERR: + if (optlen < sizeof(int)) + return -EINVAL; + inet6_assign_bit(RECVERR6, sk, valbool); + if (!val) + skb_errqueue_purge(&sk->sk_error_queue); + return 0; + case IPV6_ROUTER_ALERT_ISOLATE: + if (optlen < sizeof(int)) + return -EINVAL; + inet6_assign_bit(RTALERT_ISOLATE, sk, valbool); + return 0; + case IPV6_MTU_DISCOVER: + if (optlen < sizeof(int)) + return -EINVAL; + if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT) + return -EINVAL; + WRITE_ONCE(np->pmtudisc, val); + return 0; + case IPV6_FLOWINFO_SEND: + if (optlen < sizeof(int)) + return -EINVAL; + inet6_assign_bit(SNDFLOW, sk, valbool); + return 0; + case IPV6_ADDR_PREFERENCES: + if (optlen < sizeof(int)) + return -EINVAL; + return ip6_sock_set_addr_preferences(sk, val); + } if (needs_rtnl) rtnl_lock(); sockopt_lock_sock(sk); @@ -733,34 +828,7 @@ done: } break; } - case IPV6_UNICAST_HOPS: - if (optlen < sizeof(int)) - goto e_inval; - if (val > 255 || val < -1) - goto e_inval; - np->hop_limit = val; - retv = 0; - break; - case IPV6_MULTICAST_HOPS: - if (sk->sk_type == SOCK_STREAM) - break; - if (optlen < sizeof(int)) - goto e_inval; - if (val > 255 || val < -1) - goto e_inval; - np->mcast_hops = (val == -1 ? IPV6_DEFAULT_MCASTHOPS : val); - retv = 0; - break; - - case IPV6_MULTICAST_LOOP: - if (optlen < sizeof(int)) - goto e_inval; - if (val != valbool) - goto e_inval; - np->mc_loop = valbool; - retv = 0; - break; case IPV6_UNICAST_IF: { @@ -862,13 +930,6 @@ done: retv = ipv6_sock_ac_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr); break; } - case IPV6_MULTICAST_ALL: - if (optlen < sizeof(int)) - goto e_inval; - np->mc_all = valbool; - retv = 0; - break; - case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: if (in_compat_syscall()) @@ -896,42 +957,6 @@ done: goto e_inval; retv = ip6_ra_control(sk, val); break; - case IPV6_ROUTER_ALERT_ISOLATE: - if (optlen < sizeof(int)) - goto e_inval; - np->rtalert_isolate = valbool; - retv = 0; - break; - case IPV6_MTU_DISCOVER: - if (optlen < sizeof(int)) - goto e_inval; - if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT) - goto e_inval; - np->pmtudisc = val; - retv = 0; - break; - case IPV6_MTU: - if (optlen < sizeof(int)) - goto e_inval; - if (val && val < IPV6_MIN_MTU) - goto e_inval; - np->frag_size = val; - retv = 0; - break; - case IPV6_RECVERR: - if (optlen < sizeof(int)) - goto e_inval; - np->recverr = valbool; - if (!val) - skb_errqueue_purge(&sk->sk_error_queue); - retv = 0; - break; - case IPV6_FLOWINFO_SEND: - if (optlen < sizeof(int)) - goto e_inval; - np->sndflow = valbool; - retv = 0; - break; case IPV6_FLOWLABEL_MGR: retv = ipv6_flowlabel_opt(sk, optval, optlen); break; @@ -943,47 +968,10 @@ done: retv = xfrm_user_policy(sk, optname, optval, optlen); break; - case IPV6_ADDR_PREFERENCES: - if (optlen < sizeof(int)) - goto e_inval; - retv = __ip6_sock_set_addr_preferences(sk, val); - break; - case IPV6_MINHOPCOUNT: - if (optlen < sizeof(int)) - goto e_inval; - if (val < 0 || val > 255) - goto e_inval; - - if (val) - static_branch_enable(&ip6_min_hopcount); - - /* tcp_v6_err() and tcp_v6_rcv() might read min_hopcount - * while we are changing it. - */ - WRITE_ONCE(np->min_hopcount, val); - retv = 0; - break; - case IPV6_DONTFRAG: - np->dontfrag = valbool; - retv = 0; - break; - case IPV6_AUTOFLOWLABEL: - np->autoflowlabel = valbool; - np->autoflowlabel_set = 1; - retv = 0; - break; case IPV6_RECVFRAGSIZE: np->rxopt.bits.recvfragsize = valbool; retv = 0; break; - case IPV6_RECVERR_RFC4884: - if (optlen < sizeof(int)) - goto e_inval; - if (val < 0 || val > 1) - goto e_inval; - np->recverr_rfc4884 = valbool; - retv = 0; - break; } unlock: @@ -1180,7 +1168,8 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxhlim) { - int hlim = np->mcast_hops; + int hlim = READ_ONCE(np->mcast_hops); + put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); } if (np->rxopt.bits.rxtclass) { @@ -1197,7 +1186,8 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxohlim) { - int hlim = np->mcast_hops; + int hlim = READ_ONCE(np->mcast_hops); + put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim); } if (np->rxopt.bits.rxflow) { @@ -1347,9 +1337,9 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, struct dst_entry *dst; if (optname == IPV6_UNICAST_HOPS) - val = np->hop_limit; + val = READ_ONCE(np->hop_limit); else - val = np->mcast_hops; + val = READ_ONCE(np->mcast_hops); if (val < 0) { rcu_read_lock(); @@ -1365,7 +1355,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } case IPV6_MULTICAST_LOOP: - val = np->mc_loop; + val = inet6_test_bit(MC6_LOOP, sk); break; case IPV6_MULTICAST_IF: @@ -1373,7 +1363,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_MULTICAST_ALL: - val = np->mc_all; + val = inet6_test_bit(MC6_ALL, sk); break; case IPV6_UNICAST_IF: @@ -1381,15 +1371,15 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_MTU_DISCOVER: - val = np->pmtudisc; + val = READ_ONCE(np->pmtudisc); break; case IPV6_RECVERR: - val = np->recverr; + val = inet6_test_bit(RECVERR6, sk); break; case IPV6_FLOWINFO_SEND: - val = np->sndflow; + val = inet6_test_bit(SNDFLOW, sk); break; case IPV6_FLOWLABEL_MGR: @@ -1424,33 +1414,35 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } case IPV6_ADDR_PREFERENCES: + { + u8 srcprefs = READ_ONCE(np->srcprefs); val = 0; - if (np->srcprefs & IPV6_PREFER_SRC_TMP) + if (srcprefs & IPV6_PREFER_SRC_TMP) val |= IPV6_PREFER_SRC_TMP; - else if (np->srcprefs & IPV6_PREFER_SRC_PUBLIC) + else if (srcprefs & IPV6_PREFER_SRC_PUBLIC) val |= IPV6_PREFER_SRC_PUBLIC; else { /* XXX: should we return system default? */ val |= IPV6_PREFER_SRC_PUBTMP_DEFAULT; } - if (np->srcprefs & IPV6_PREFER_SRC_COA) + if (srcprefs & IPV6_PREFER_SRC_COA) val |= IPV6_PREFER_SRC_COA; else val |= IPV6_PREFER_SRC_HOME; break; - + } case IPV6_MINHOPCOUNT: - val = np->min_hopcount; + val = READ_ONCE(np->min_hopcount); break; case IPV6_DONTFRAG: - val = np->dontfrag; + val = inet6_test_bit(DONTFRAG, sk); break; case IPV6_AUTOFLOWLABEL: - val = ip6_autoflowlabel(sock_net(sk), np); + val = ip6_autoflowlabel(sock_net(sk), sk); break; case IPV6_RECVFRAGSIZE: @@ -1458,11 +1450,11 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_ROUTER_ALERT_ISOLATE: - val = np->rtalert_isolate; + val = inet6_test_bit(RTALERT_ISOLATE, sk); break; case IPV6_RECVERR_RFC4884: - val = np->recverr_rfc4884; + val = inet6_test_bit(RECVERR6_RFC4884, sk); break; default: diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index f948cf7bfc..bc6e0a0bad 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -642,7 +642,7 @@ bool inet6_mc_check(const struct sock *sk, const struct in6_addr *mc_addr, } if (!mc) { rcu_read_unlock(); - return np->mc_all; + return inet6_test_bit(MC6_ALL, sk); } psl = rcu_dereference(mc->sflist); if (!psl) { @@ -1716,7 +1716,7 @@ static void ip6_mc_hdr(const struct sock *sk, struct sk_buff *skb, hdr->payload_len = htons(len); hdr->nexthdr = proto; - hdr->hop_limit = inet6_sk(sk)->hop_limit; + hdr->hop_limit = READ_ONCE(inet6_sk(sk)->hop_limit); hdr->saddr = *saddr; hdr->daddr = *daddr; @@ -1789,7 +1789,7 @@ static void mld_sendpack(struct sk_buff *skb) rcu_read_lock(); idev = __in6_dev_get(skb->dev); - IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); payload_len = (skb_tail_pointer(skb) - skb_network_header(skb)) - sizeof(*pip6); @@ -2147,8 +2147,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) full_len = sizeof(struct ipv6hdr) + payload_len; rcu_read_lock(); - IP6_UPD_PO_STATS(net, __in6_dev_get(dev), - IPSTATS_MIB_OUT, full_len); + IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_OUTREQUESTS); rcu_read_unlock(); skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err); @@ -3015,8 +3014,6 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s continue; state->im = rcu_dereference(state->idev->mc_list); } - if (!state->im) - break; psf = rcu_dereference(state->im->mca_sources); } out: diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 553c8664e0..a19999b30b 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -500,11 +500,11 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, csum_partial(icmp6h, skb->len, 0)); - ip6_nd_hdr(skb, saddr, daddr, inet6_sk(sk)->hop_limit, skb->len); + ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len); rcu_read_lock(); idev = __in6_dev_get(dst->dev); - IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, dst->dev, @@ -1996,7 +1996,7 @@ static int __net_init ndisc_net_init(struct net *net) np = inet6_sk(sk); np->hop_limit = 255; /* Do not loopback ndisc messages */ - np->mc_loop = 0; + inet6_clear_bit(MC6_LOOP, sk); return 0; } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 857713d7a3..53d255838e 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -61,7 +61,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff #ifdef CONFIG_XFRM if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && - xfrm_decode_session(skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) { + xfrm_decode_session(net, skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) { skb_dst_set(skb, NULL); dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0); if (IS_ERR(dst)) diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index a88b2ce4a3..8dd4cd0c47 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -31,10 +31,10 @@ static const struct xt_table packet_mangler = { static unsigned int ip6t_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - unsigned int ret; struct in6_addr saddr, daddr; - u_int8_t hop_limit; - u_int32_t flowlabel, mark; + unsigned int ret, verdict; + u32 flowlabel, mark; + u8 hop_limit; int err; /* save source/dest address, mark, hoplimit, flowlabel, priority, */ @@ -47,8 +47,9 @@ ip6t_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *sta flowlabel = *((u_int32_t *)ipv6_hdr(skb)); ret = ip6t_do_table(priv, skb, state); + verdict = ret & NF_VERDICT_MASK; - if (ret != NF_DROP && ret != NF_STOLEN && + if (verdict != NF_DROP && verdict != NF_STOLEN && (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) || !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) || skb->mark != mark || diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index bf3cb3a136..52cf104e34 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -170,3 +170,4 @@ module_init(ip6table_nat_init); module_exit(ip6table_nat_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Ip6tables legacy nat table"); diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 08861d5d1f..fc9f675402 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -106,3 +106,4 @@ static void __exit ip6table_raw_fini(void) module_init(ip6table_raw_init); module_exit(ip6table_raw_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Ip6tables legacy raw table"); diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index d59b296b4f..be7817fbc0 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -182,3 +182,4 @@ module_init(nf_defrag_init); module_exit(nf_defrag_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IPv6 defragmentation support"); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 71d6927282..196dd4ecb5 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -418,3 +418,4 @@ void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, EXPORT_SYMBOL_GPL(nf_send_unreach6); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IPv6 packet rejection core"); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 25243737fb..d2098dd4ce 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -89,7 +89,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return -EAFNOSUPPORT; } daddr = &(u->sin6_addr); - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) fl6.flowlabel = u->sin6_flowinfo & IPV6_FLOWINFO_MASK; if (__ipv6_addr_needs_scope_id(ipv6_addr_type(daddr))) oif = u->sin6_scope_id; @@ -118,7 +118,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) l3mdev_master_ifindex_by_index(sock_net(sk), oif) != sk->sk_bound_dev_if)) return -EINVAL; - ipcm6_init_sk(&ipc6, np); + ipcm6_init_sk(&ipc6, sk); ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); ipc6.sockc.mark = READ_ONCE(sk->sk_mark); diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index e20b3705c2..6d1d922164 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -61,7 +61,7 @@ static const struct snmp_mib snmp6_ipstats_list[] = { SNMP_MIB_ITEM("Ip6InDiscards", IPSTATS_MIB_INDISCARDS), SNMP_MIB_ITEM("Ip6InDelivers", IPSTATS_MIB_INDELIVERS), SNMP_MIB_ITEM("Ip6OutForwDatagrams", IPSTATS_MIB_OUTFORWDATAGRAMS), - SNMP_MIB_ITEM("Ip6OutRequests", IPSTATS_MIB_OUTPKTS), + SNMP_MIB_ITEM("Ip6OutRequests", IPSTATS_MIB_OUTREQUESTS), SNMP_MIB_ITEM("Ip6OutDiscards", IPSTATS_MIB_OUTDISCARDS), SNMP_MIB_ITEM("Ip6OutNoRoutes", IPSTATS_MIB_OUTNOROUTES), SNMP_MIB_ITEM("Ip6ReasmTimeout", IPSTATS_MIB_REASMTIMEOUT), @@ -84,6 +84,7 @@ static const struct snmp_mib snmp6_ipstats_list[] = { SNMP_MIB_ITEM("Ip6InECT1Pkts", IPSTATS_MIB_ECT1PKTS), SNMP_MIB_ITEM("Ip6InECT0Pkts", IPSTATS_MIB_ECT0PKTS), SNMP_MIB_ITEM("Ip6InCEPkts", IPSTATS_MIB_CEPKTS), + SNMP_MIB_ITEM("Ip6OutTransmits", IPSTATS_MIB_OUTPKTS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 42fcec3ecf..dd0a4e73e6 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -291,6 +291,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { + bool recverr = inet6_test_bit(RECVERR6, sk); struct ipv6_pinfo *np = inet6_sk(sk); int err; int harderr; @@ -300,26 +301,26 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, 2. Socket is connected (otherwise the error indication is useless without recverr and error is hard. */ - if (!np->recverr && sk->sk_state != TCP_ESTABLISHED) + if (!recverr && sk->sk_state != TCP_ESTABLISHED) return; harderr = icmpv6_err_convert(type, code, &err); if (type == ICMPV6_PKT_TOOBIG) { ip6_sk_update_pmtu(skb, sk, info); - harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); + harderr = (READ_ONCE(np->pmtudisc) == IPV6_PMTUDISC_DO); } if (type == NDISC_REDIRECT) { ip6_sk_redirect(skb, sk); return; } - if (np->recverr) { + if (recverr) { u8 *payload = skb->data; if (!inet_test_bit(HDRINCL, sk)) payload += offset; ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload); } - if (np->recverr || harderr) { + if (recverr || harderr) { sk->sk_err = err; sk_error_report(sk); } @@ -587,7 +588,6 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, struct flowi6 *fl6, struct dst_entry **dstp, unsigned int flags, const struct sockcm_cookie *sockc) { - struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); struct ipv6hdr *iph; struct sk_buff *skb; @@ -651,7 +651,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, * have been queued for deletion. */ rcu_read_lock(); - IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); + IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, rt->dst.dev, dst_output); if (err > 0) @@ -668,7 +668,7 @@ out: error: IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); error_check: - if (err == -ENOBUFS && !np->recverr) + if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk)) err = 0; return err; } @@ -795,7 +795,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return -EINVAL; daddr = &sin6->sin6_addr; - if (np->sndflow) { + if (inet6_test_bit(SNDFLOW, sk)) { fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); @@ -898,7 +898,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); if (ipc6.dontfrag < 0) - ipc6.dontfrag = np->dontfrag; + ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 56525b5b95..ea1dec8448 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -341,7 +341,7 @@ struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, int flags) { struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, - 1, DST_OBSOLETE_FORCE_CHK, flags); + DST_OBSOLETE_FORCE_CHK, flags); if (rt) { rt6_info_init(rt); @@ -2622,7 +2622,7 @@ static struct dst_entry *ip6_route_output_flags_noref(struct net *net, if (!any_src) flags |= RT6_LOOKUP_F_HAS_SADDR; else if (sk) - flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); + flags |= rt6_srcprefs2flags(READ_ONCE(inet6_sk(sk)->srcprefs)); return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output); } @@ -2655,7 +2655,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori struct net_device *loopback_dev = net->loopback_dev; struct dst_entry *new = NULL; - rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1, + rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, DST_OBSOLETE_DEAD, 0); if (rt) { rt6_info_init(rt); diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 29346a6eec..35508abd76 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -512,22 +512,24 @@ int __init seg6_init(void) { int err; - err = genl_register_family(&seg6_genl_family); + err = register_pernet_subsys(&ip6_segments_ops); if (err) goto out; - err = register_pernet_subsys(&ip6_segments_ops); + err = genl_register_family(&seg6_genl_family); if (err) - goto out_unregister_genl; + goto out_unregister_pernet; #ifdef CONFIG_IPV6_SEG6_LWTUNNEL err = seg6_iptunnel_init(); if (err) - goto out_unregister_pernet; + goto out_unregister_genl; err = seg6_local_init(); - if (err) - goto out_unregister_pernet; + if (err) { + seg6_iptunnel_exit(); + goto out_unregister_genl; + } #endif #ifdef CONFIG_IPV6_SEG6_HMAC @@ -548,11 +550,11 @@ out_unregister_iptun: #endif #endif #ifdef CONFIG_IPV6_SEG6_LWTUNNEL -out_unregister_pernet: - unregister_pernet_subsys(&ip6_segments_ops); -#endif out_unregister_genl: genl_unregister_family(&seg6_genl_family); +#endif +out_unregister_pernet: + unregister_pernet_subsys(&ip6_segments_ops); goto out; } diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 8698b49dfc..12eedc6ca2 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -140,6 +140,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) struct dst_entry *dst; __u8 rcv_wscale; u32 tsoff = 0; + int l3index; if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || !th->ack || th->rst) @@ -215,6 +216,10 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq->snt_isn = cookie; treq->ts_off = 0; treq->txhash = net_tx_rndhash(); + + l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); + tcp_ao_syncookie(sk, skb, treq, AF_INET6, l3index); + if (IS_ENABLED(CONFIG_SMC)) ireq->smc_ok = 0; diff --git a/net/ipv6/tcp_ao.c b/net/ipv6/tcp_ao.c new file mode 100644 index 0000000000..3c09ac2620 --- /dev/null +++ b/net/ipv6/tcp_ao.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * INET An implementation of the TCP Authentication Option (TCP-AO). + * See RFC5925. + * + * Authors: Dmitry Safonov <dima@arista.com> + * Francesco Ruggeri <fruggeri@arista.com> + * Salam Noureddine <noureddine@arista.com> + */ +#include <crypto/hash.h> +#include <linux/tcp.h> + +#include <net/tcp.h> +#include <net/ipv6.h> + +static int tcp_v6_ao_calc_key(struct tcp_ao_key *mkt, u8 *key, + const struct in6_addr *saddr, + const struct in6_addr *daddr, + __be16 sport, __be16 dport, + __be32 sisn, __be32 disn) +{ + struct kdf_input_block { + u8 counter; + u8 label[6]; + struct tcp6_ao_context ctx; + __be16 outlen; + } __packed * tmp; + struct tcp_sigpool hp; + int err; + + err = tcp_sigpool_start(mkt->tcp_sigpool_id, &hp); + if (err) + return err; + + tmp = hp.scratch; + tmp->counter = 1; + memcpy(tmp->label, "TCP-AO", 6); + tmp->ctx.saddr = *saddr; + tmp->ctx.daddr = *daddr; + tmp->ctx.sport = sport; + tmp->ctx.dport = dport; + tmp->ctx.sisn = sisn; + tmp->ctx.disn = disn; + tmp->outlen = htons(tcp_ao_digest_size(mkt) * 8); /* in bits */ + + err = tcp_ao_calc_traffic_key(mkt, key, tmp, sizeof(*tmp), &hp); + tcp_sigpool_end(&hp); + + return err; +} + +int tcp_v6_ao_calc_key_skb(struct tcp_ao_key *mkt, u8 *key, + const struct sk_buff *skb, + __be32 sisn, __be32 disn) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); + + return tcp_v6_ao_calc_key(mkt, key, &iph->saddr, + &iph->daddr, th->source, + th->dest, sisn, disn); +} + +int tcp_v6_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, + const struct sock *sk, __be32 sisn, + __be32 disn, bool send) +{ + if (send) + return tcp_v6_ao_calc_key(mkt, key, &sk->sk_v6_rcv_saddr, + &sk->sk_v6_daddr, htons(sk->sk_num), + sk->sk_dport, sisn, disn); + else + return tcp_v6_ao_calc_key(mkt, key, &sk->sk_v6_daddr, + &sk->sk_v6_rcv_saddr, sk->sk_dport, + htons(sk->sk_num), disn, sisn); +} + +int tcp_v6_ao_calc_key_rsk(struct tcp_ao_key *mkt, u8 *key, + struct request_sock *req) +{ + struct inet_request_sock *ireq = inet_rsk(req); + + return tcp_v6_ao_calc_key(mkt, key, + &ireq->ir_v6_loc_addr, &ireq->ir_v6_rmt_addr, + htons(ireq->ir_num), ireq->ir_rmt_port, + htonl(tcp_rsk(req)->snt_isn), + htonl(tcp_rsk(req)->rcv_isn)); +} + +struct tcp_ao_key *tcp_v6_ao_lookup(const struct sock *sk, + struct sock *addr_sk, + int sndid, int rcvid) +{ + int l3index = l3mdev_master_ifindex_by_index(sock_net(sk), + addr_sk->sk_bound_dev_if); + struct in6_addr *addr = &addr_sk->sk_v6_daddr; + + return tcp_ao_do_lookup(sk, l3index, (union tcp_ao_addr *)addr, + AF_INET6, sndid, rcvid); +} + +struct tcp_ao_key *tcp_v6_ao_lookup_rsk(const struct sock *sk, + struct request_sock *req, + int sndid, int rcvid) +{ + struct inet_request_sock *ireq = inet_rsk(req); + struct in6_addr *addr = &ireq->ir_v6_rmt_addr; + int l3index; + + l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); + return tcp_ao_do_lookup(sk, l3index, (union tcp_ao_addr *)addr, + AF_INET6, sndid, rcvid); +} + +int tcp_v6_ao_hash_pseudoheader(struct tcp_sigpool *hp, + const struct in6_addr *daddr, + const struct in6_addr *saddr, int nbytes) +{ + struct tcp6_pseudohdr *bp; + struct scatterlist sg; + + bp = hp->scratch; + /* 1. TCP pseudo-header (RFC2460) */ + bp->saddr = *saddr; + bp->daddr = *daddr; + bp->len = cpu_to_be32(nbytes); + bp->protocol = cpu_to_be32(IPPROTO_TCP); + + sg_init_one(&sg, bp, sizeof(*bp)); + ahash_request_set_crypt(hp->req, &sg, NULL, sizeof(*bp)); + return crypto_ahash_update(hp->req); +} + +int tcp_v6_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne) +{ + return tcp_ao_hash_skb(AF_INET6, ao_hash, key, sk, skb, tkey, + hash_offset, sne); +} + +int tcp_v6_parse_ao(struct sock *sk, int cmd, + sockptr_t optval, int optlen) +{ + return tcp_parse_ao(sk, cmd, AF_INET6, optval, optlen); +} + +int tcp_v6_ao_synack_hash(char *ao_hash, struct tcp_ao_key *ao_key, + struct request_sock *req, const struct sk_buff *skb, + int hash_offset, u32 sne) +{ + void *hash_buf = NULL; + int err; + + hash_buf = kmalloc(tcp_ao_digest_size(ao_key), GFP_ATOMIC); + if (!hash_buf) + return -ENOMEM; + + err = tcp_v6_ao_calc_key_rsk(ao_key, hash_buf, req); + if (err) + goto out; + + err = tcp_ao_hash_skb(AF_INET6, ao_hash, ao_key, req_to_sk(req), skb, + hash_buf, hash_offset, sne); +out: + kfree(hash_buf); + return err; +} diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3783334ef2..8c6623496d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -76,16 +76,9 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); static const struct inet_connection_sock_af_ops ipv6_mapped; const struct inet_connection_sock_af_ops ipv6_specific; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; -#else -static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, - const struct in6_addr *addr, - int l3index) -{ - return NULL; -} #endif /* Helper returning the inet6 address from a given tcp socket. @@ -163,7 +156,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, memset(&fl6, 0, sizeof(fl6)); - if (np->sndflow) { + if (inet6_test_bit(SNDFLOW, sk)) { fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; IP6_ECN_flow_init(fl6.flowlabel); if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { @@ -239,7 +232,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (sk_is_mptcp(sk)) mptcpv6_handle_mapped(sk, true); sk->sk_backlog_rcv = tcp_v4_do_rcv; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) tp->af_specific = &tcp_sock_ipv6_mapped_specific; #endif @@ -252,7 +245,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (sk_is_mptcp(sk)) mptcpv6_handle_mapped(sk, false); sk->sk_backlog_rcv = tcp_v6_do_rcv; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) tp->af_specific = &tcp_sock_ipv6_specific; #endif goto failure; @@ -286,6 +279,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, goto failure; } + tp->tcp_usec_ts = dst_tcp_usec_ts(dst); tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; if (!saddr) { @@ -402,6 +396,8 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } if (sk->sk_state == TCP_TIME_WAIT) { + /* To increase the counter of ignored icmps for TCP-AO */ + tcp_ao_ignore_icmp(sk, AF_INET6, type, code); inet_twsk_put(inet_twsk(sk)); return 0; } @@ -412,6 +408,11 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; } + if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) { + sock_put(sk); + return 0; + } + bh_lock_sock(sk); if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); @@ -508,7 +509,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, tcp_ld_RTO_revert(sk, seq); } - if (!sock_owned_by_user(sk) && np->recverr) { + if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); } else { @@ -548,7 +549,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, &ireq->ir_v6_rmt_addr); fl6->daddr = ireq->ir_v6_rmt_addr; - if (np->repflow && ireq->pktopts) + if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts) fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? @@ -565,7 +566,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, if (!opt) opt = rcu_dereference(np->opt); err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark), - opt, tclass, sk->sk_priority); + opt, tclass, READ_ONCE(sk->sk_priority)); rcu_read_unlock(); err = net_xmit_eval(err); } @@ -606,8 +607,10 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, { struct tcp_md5sig cmd; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; + union tcp_ao_addr *addr; int l3index = 0; u8 prefixlen; + bool l3flag; u8 flags; if (optlen < sizeof(cmd)) @@ -620,6 +623,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, return -EINVAL; flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; + l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; if (optname == TCP_MD5SIG_EXT && cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { @@ -660,17 +664,33 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) return -EINVAL; - if (ipv6_addr_v4mapped(&sin6->sin6_addr)) - return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], + if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { + addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3]; + + /* Don't allow keys for peers that have a matching TCP-AO key. + * See the comment in tcp_ao_add_cmd() + */ + if (tcp_ao_required(sk, addr, AF_INET, + l3flag ? l3index : -1, false)) + return -EKEYREJECTED; + return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index, flags, cmd.tcpm_key, cmd.tcpm_keylen); + } + + addr = (union tcp_md5_addr *)&sin6->sin6_addr; - return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, - AF_INET6, prefixlen, l3index, flags, + /* Don't allow keys for peers that have a matching TCP-AO key. + * See the comment in tcp_ao_add_cmd() + */ + if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false)) + return -EKEYREJECTED; + + return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags, cmd.tcpm_key, cmd.tcpm_keylen); } -static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, +static int tcp_v6_md5_hash_headers(struct tcp_sigpool *hp, const struct in6_addr *daddr, const struct in6_addr *saddr, const struct tcphdr *th, int nbytes) @@ -691,39 +711,36 @@ static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, _th->check = 0; sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); - ahash_request_set_crypt(hp->md5_req, &sg, NULL, + ahash_request_set_crypt(hp->req, &sg, NULL, sizeof(*bp) + sizeof(*th)); - return crypto_ahash_update(hp->md5_req); + return crypto_ahash_update(hp->req); } static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, const struct in6_addr *daddr, struct in6_addr *saddr, const struct tcphdr *th) { - struct tcp_md5sig_pool *hp; - struct ahash_request *req; + struct tcp_sigpool hp; - hp = tcp_get_md5sig_pool(); - if (!hp) - goto clear_hash_noput; - req = hp->md5_req; + if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) + goto clear_hash_nostart; - if (crypto_ahash_init(req)) + if (crypto_ahash_init(hp.req)) goto clear_hash; - if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) + if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, th->doff << 2)) goto clear_hash; - if (tcp_md5_hash_key(hp, key)) + if (tcp_md5_hash_key(&hp, key)) goto clear_hash; - ahash_request_set_crypt(req, NULL, md5_hash, 0); - if (crypto_ahash_final(req)) + ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); + if (crypto_ahash_final(hp.req)) goto clear_hash; - tcp_put_md5sig_pool(); + tcp_sigpool_end(&hp); return 0; clear_hash: - tcp_put_md5sig_pool(); -clear_hash_noput: + tcp_sigpool_end(&hp); +clear_hash_nostart: memset(md5_hash, 0, 16); return 1; } @@ -733,10 +750,9 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, const struct sock *sk, const struct sk_buff *skb) { - const struct in6_addr *saddr, *daddr; - struct tcp_md5sig_pool *hp; - struct ahash_request *req; const struct tcphdr *th = tcp_hdr(skb); + const struct in6_addr *saddr, *daddr; + struct tcp_sigpool hp; if (sk) { /* valid for establish/request sockets */ saddr = &sk->sk_v6_rcv_saddr; @@ -747,34 +763,31 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, daddr = &ip6h->daddr; } - hp = tcp_get_md5sig_pool(); - if (!hp) - goto clear_hash_noput; - req = hp->md5_req; + if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) + goto clear_hash_nostart; - if (crypto_ahash_init(req)) + if (crypto_ahash_init(hp.req)) goto clear_hash; - if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len)) + if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, skb->len)) goto clear_hash; - if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) + if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2)) goto clear_hash; - if (tcp_md5_hash_key(hp, key)) + if (tcp_md5_hash_key(&hp, key)) goto clear_hash; - ahash_request_set_crypt(req, NULL, md5_hash, 0); - if (crypto_ahash_final(req)) + ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); + if (crypto_ahash_final(hp.req)) goto clear_hash; - tcp_put_md5sig_pool(); + tcp_sigpool_end(&hp); return 0; clear_hash: - tcp_put_md5sig_pool(); -clear_hash_noput: + tcp_sigpool_end(&hp); +clear_hash_nostart: memset(md5_hash, 0, 16); return 1; } - #endif static void tcp_v6_init_req(struct request_sock *req, @@ -797,7 +810,7 @@ static void tcp_v6_init_req(struct request_sock *req, (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || - np->rxopt.bits.rxohlim || np->repflow)) { + np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) { refcount_inc(&skb->users); ireq->pktopts = skb; } @@ -833,6 +846,11 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .req_md5_lookup = tcp_v6_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, #endif +#ifdef CONFIG_TCP_AO + .ao_lookup = tcp_v6_ao_lookup_rsk, + .ao_calc_key = tcp_v6_ao_calc_key_rsk, + .ao_synack_hash = tcp_v6_ao_synack_hash, +#endif #ifdef CONFIG_SYN_COOKIES .cookie_init_seq = cookie_v6_init_sequence, #endif @@ -844,8 +862,8 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, - int oif, struct tcp_md5sig_key *key, int rst, - u8 tclass, __be32 label, u32 priority, u32 txhash) + int oif, int rst, u8 tclass, __be32 label, + u32 priority, u32 txhash, struct tcp_key *key) { const struct tcphdr *th = tcp_hdr(skb); struct tcphdr *t1; @@ -860,13 +878,13 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 if (tsecr) tot_len += TCPOLEN_TSTAMP_ALIGNED; -#ifdef CONFIG_TCP_MD5SIG - if (key) + if (tcp_key_is_md5(key)) tot_len += TCPOLEN_MD5SIG_ALIGNED; -#endif + if (tcp_key_is_ao(key)) + tot_len += tcp_ao_len_aligned(key->ao_key); #ifdef CONFIG_MPTCP - if (rst && !key) { + if (rst && !tcp_key_is_md5(key)) { mrst = mptcp_reset_option(skb); if (mrst) @@ -907,14 +925,28 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 *topt++ = mrst; #ifdef CONFIG_TCP_MD5SIG - if (key) { + if (tcp_key_is_md5(key)) { *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); - tcp_v6_md5_hash_hdr((__u8 *)topt, key, + tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, t1); } #endif +#ifdef CONFIG_TCP_AO + if (tcp_key_is_ao(key)) { + *topt++ = htonl((TCPOPT_AO << 24) | + (tcp_ao_len(key->ao_key) << 16) | + (key->ao_key->sndid << 8) | + (key->rcv_next)); + + tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key, + key->traffic_key, + (union tcp_ao_addr *)&ipv6_hdr(skb)->saddr, + (union tcp_ao_addr *)&ipv6_hdr(skb)->daddr, + t1, key->sne); + } +#endif memset(&fl6, 0, sizeof(fl6)); fl6.daddr = ipv6_hdr(skb)->saddr; @@ -977,19 +1009,23 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) { const struct tcphdr *th = tcp_hdr(skb); struct ipv6hdr *ipv6h = ipv6_hdr(skb); - u32 seq = 0, ack_seq = 0; - struct tcp_md5sig_key *key = NULL; -#ifdef CONFIG_TCP_MD5SIG - const __u8 *hash_location = NULL; - unsigned char newhash[16]; - int genhash; - struct sock *sk1 = NULL; + const __u8 *md5_hash_location = NULL; +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) + bool allocated_traffic_key = false; #endif + const struct tcp_ao_hdr *aoh; + struct tcp_key key = {}; + u32 seq = 0, ack_seq = 0; __be32 label = 0; u32 priority = 0; struct net *net; u32 txhash = 0; int oif = 0; +#ifdef CONFIG_TCP_MD5SIG + unsigned char newhash[16]; + int genhash; + struct sock *sk1 = NULL; +#endif if (th->rst) return; @@ -1001,9 +1037,13 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) return; net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); -#ifdef CONFIG_TCP_MD5SIG + /* Invalid TCP option size or twice included auth */ + if (tcp_parse_auth_options(th, &md5_hash_location, &aoh)) + return; +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) rcu_read_lock(); - hash_location = tcp_parse_md5sig_option(th); +#endif +#ifdef CONFIG_TCP_MD5SIG if (sk && sk_fullsock(sk)) { int l3index; @@ -1011,8 +1051,10 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) * in an L3 domain and inet_iif is set to it. */ l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; - key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); - } else if (hash_location) { + key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); + if (key.md5_key) + key.type = TCP_KEY_MD5; + } else if (md5_hash_location) { int dif = tcp_v6_iif_l3_slave(skb); int sdif = tcp_v6_sdif(skb); int l3index; @@ -1036,12 +1078,13 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) */ l3index = tcp_v6_sdif(skb) ? dif : 0; - key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); - if (!key) + key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); + if (!key.md5_key) goto out; + key.type = TCP_KEY_MD5; - genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); - if (genhash || memcmp(hash_location, newhash, 16) != 0) + genhash = tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb); + if (genhash || memcmp(md5_hash_location, newhash, 16) != 0) goto out; } #endif @@ -1052,15 +1095,27 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - (th->doff << 2); +#ifdef CONFIG_TCP_AO + if (aoh) { + int l3index; + + l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; + if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq, + &key.ao_key, &key.traffic_key, + &allocated_traffic_key, + &key.rcv_next, &key.sne)) + goto out; + key.type = TCP_KEY_AO; + } +#endif + if (sk) { oif = sk->sk_bound_dev_if; if (sk_fullsock(sk)) { - const struct ipv6_pinfo *np = tcp_inet6_sk(sk); - trace_tcp_send_reset(sk, skb); - if (np->repflow) + if (inet6_test_bit(REPFLOW, sk)) label = ip6_flowlabel(ipv6h); - priority = sk->sk_priority; + priority = READ_ONCE(sk->sk_priority); txhash = sk->sk_txhash; } if (sk->sk_state == TCP_TIME_WAIT) { @@ -1073,45 +1128,141 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) label = ip6_flowlabel(ipv6h); } - tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, - ipv6_get_dsfield(ipv6h), label, priority, txhash); + tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, + ipv6_get_dsfield(ipv6h), label, priority, txhash, + &key); -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) out: + if (allocated_traffic_key) + kfree(key.traffic_key); rcu_read_unlock(); #endif } static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, - struct tcp_md5sig_key *key, u8 tclass, + struct tcp_key *key, u8 tclass, __be32 label, u32 priority, u32 txhash) { - tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, - tclass, label, priority, txhash); + tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0, + tclass, label, priority, txhash, key); } static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) { struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); + struct tcp_key key = {}; +#ifdef CONFIG_TCP_AO + struct tcp_ao_info *ao_info; + + if (static_branch_unlikely(&tcp_ao_needed.key)) { + + /* FIXME: the segment to-be-acked is not verified yet */ + ao_info = rcu_dereference(tcptw->ao_info); + if (ao_info) { + const struct tcp_ao_hdr *aoh; + + /* Invalid TCP option size or twice included auth */ + if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) + goto out; + if (aoh) + key.ao_key = tcp_ao_established_key(ao_info, + aoh->rnext_keyid, -1); + } + } + if (key.ao_key) { + struct tcp_ao_key *rnext_key; + + key.traffic_key = snd_other_key(key.ao_key); + /* rcv_next switches to our rcv_next */ + rnext_key = READ_ONCE(ao_info->rnext_key); + key.rcv_next = rnext_key->rcvid; + key.sne = READ_ONCE(ao_info->snd_sne); + key.type = TCP_KEY_AO; +#else + if (0) { +#endif +#ifdef CONFIG_TCP_MD5SIG + } else if (static_branch_unlikely(&tcp_md5_needed.key)) { + key.md5_key = tcp_twsk_md5_key(tcptw); + if (key.md5_key) + key.type = TCP_KEY_MD5; +#endif + } tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, - tcp_time_stamp_raw() + tcptw->tw_ts_offset, - tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), + tcp_tw_tsval(tcptw), + tcptw->tw_ts_recent, tw->tw_bound_dev_if, &key, tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority, tw->tw_txhash); +#ifdef CONFIG_TCP_AO +out: +#endif inet_twsk_put(tw); } static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req) { - int l3index; + struct tcp_key key = {}; + +#ifdef CONFIG_TCP_AO + if (static_branch_unlikely(&tcp_ao_needed.key) && + tcp_rsk_used_ao(req)) { + const struct in6_addr *addr = &ipv6_hdr(skb)->saddr; + const struct tcp_ao_hdr *aoh; + int l3index; + + l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; + /* Invalid TCP option size or twice included auth */ + if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) + return; + if (!aoh) + return; + key.ao_key = tcp_ao_do_lookup(sk, l3index, + (union tcp_ao_addr *)addr, + AF_INET6, aoh->rnext_keyid, -1); + if (unlikely(!key.ao_key)) { + /* Send ACK with any matching MKT for the peer */ + key.ao_key = tcp_ao_do_lookup(sk, l3index, + (union tcp_ao_addr *)addr, + AF_INET6, -1, -1); + /* Matching key disappeared (user removed the key?) + * let the handshake timeout. + */ + if (!key.ao_key) { + net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n", + addr, + ntohs(tcp_hdr(skb)->source), + &ipv6_hdr(skb)->daddr, + ntohs(tcp_hdr(skb)->dest)); + return; + } + } + key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC); + if (!key.traffic_key) + return; - l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; + key.type = TCP_KEY_AO; + key.rcv_next = aoh->keyid; + tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req); +#else + if (0) { +#endif +#ifdef CONFIG_TCP_MD5SIG + } else if (static_branch_unlikely(&tcp_md5_needed.key)) { + int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; + + key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, + l3index); + if (key.md5_key) + key.type = TCP_KEY_MD5; +#endif + } /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. @@ -1125,12 +1276,13 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, - tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, + tcp_rsk_tsval(tcp_rsk(req)), READ_ONCE(req->ts_recent), sk->sk_bound_dev_if, - tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), - ipv6_get_dsfield(ipv6_hdr(skb)), 0, + &key, ipv6_get_dsfield(ipv6_hdr(skb)), 0, READ_ONCE(sk->sk_priority), READ_ONCE(tcp_rsk(req)->txhash)); + if (tcp_key_is_ao(&key)) + kfree(key.traffic_key); } @@ -1235,7 +1387,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * if (sk_is_mptcp(newsk)) mptcpv6_handle_mapped(newsk, true); newsk->sk_backlog_rcv = tcp_v4_do_rcv; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) newtp->af_specific = &tcp_sock_ipv6_mapped_specific; #endif @@ -1247,7 +1399,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newnp->mcast_oif = inet_iif(skb); newnp->mcast_hops = ip_hdr(skb)->ttl; newnp->rcv_flowinfo = 0; - if (np->repflow) + if (inet6_test_bit(REPFLOW, sk)) newnp->flow_label = 0; /* @@ -1320,7 +1472,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newnp->mcast_oif = tcp_v6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); - if (np->repflow) + if (inet6_test_bit(REPFLOW, sk)) newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); /* Set ToS of the new socket based upon the value of incoming SYN. @@ -1360,19 +1512,26 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * #ifdef CONFIG_TCP_MD5SIG l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); - /* Copy over the MD5 key from the original socket */ - key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); - if (key) { - const union tcp_md5_addr *addr; - - addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr; - if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) { - inet_csk_prepare_forced_close(newsk); - tcp_done(newsk); - goto out; + if (!tcp_rsk_used_ao(req)) { + /* Copy over the MD5 key from the original socket */ + key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); + if (key) { + const union tcp_md5_addr *addr; + + addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr; + if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) { + inet_csk_prepare_forced_close(newsk); + tcp_done(newsk); + goto out; + } } } #endif +#ifdef CONFIG_TCP_AO + /* Copy over tcp_ao_info if any */ + if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6)) + goto out; /* OOM */ +#endif if (__inet_inherit_port(sk, newsk) < 0) { inet_csk_prepare_forced_close(newsk); @@ -1542,10 +1701,11 @@ ipv6_pktoptions: if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) np->mcast_oif = tcp_v6_iif(opt_skb); if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) - np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; + WRITE_ONCE(np->mcast_hops, + ipv6_hdr(opt_skb)->hop_limit); if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); - if (np->repflow) + if (inet6_test_bit(REPFLOW, sk)) np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { tcp_v6_restore_cb(opt_skb); @@ -1643,9 +1803,9 @@ process: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) drop_reason = SKB_DROP_REASON_XFRM_POLICY; else - drop_reason = tcp_inbound_md5_hash(sk, skb, - &hdr->saddr, &hdr->daddr, - AF_INET6, dif, sdif); + drop_reason = tcp_inbound_hash(sk, req, skb, + &hdr->saddr, &hdr->daddr, + AF_INET6, dif, sdif); if (drop_reason) { sk_drops_add(sk, skb); reqsk_put(req); @@ -1719,8 +1879,8 @@ process: goto discard_and_relse; } - drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr, - AF_INET6, dif, sdif); + drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr, + AF_INET6, dif, sdif); if (drop_reason) goto discard_and_relse; @@ -1895,7 +2055,6 @@ const struct inet_connection_sock_af_ops ipv6_specific = { .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, .net_header_len = sizeof(struct ipv6hdr), - .net_frag_header_len = sizeof(struct frag_hdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, @@ -1903,11 +2062,19 @@ const struct inet_connection_sock_af_ops ipv6_specific = { .mtu_reduced = tcp_v6_mtu_reduced, }; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { +#ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v6_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, .md5_parse = tcp_v6_parse_md5_keys, +#endif +#ifdef CONFIG_TCP_AO + .ao_lookup = tcp_v6_ao_lookup, + .calc_ao_hash = tcp_v6_ao_hash_skb, + .ao_parse = tcp_v6_parse_ao, + .ao_calc_key_sk = tcp_v6_ao_calc_key_sk, +#endif }; #endif @@ -1929,11 +2096,19 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .mtu_reduced = tcp_v4_mtu_reduced, }; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { +#ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v4_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, .md5_parse = tcp_v6_parse_md5_keys, +#endif +#ifdef CONFIG_TCP_AO + .ao_lookup = tcp_v6_ao_lookup, + .calc_ao_hash = tcp_v4_ao_hash_skb, + .ao_parse = tcp_v6_parse_ao, + .ao_calc_key_sk = tcp_v4_ao_calc_key_sk, +#endif }; #endif @@ -1948,7 +2123,7 @@ static int tcp_v6_init_sock(struct sock *sk) icsk->icsk_af_ops = &ipv6_specific; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; #endif diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 438476a313..a1a79ff46f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -599,7 +599,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!ip6_sk_accept_pmtu(sk)) goto out; ip6_sk_update_pmtu(skb, sk, info); - if (np->pmtudisc != IPV6_PMTUDISC_DONT) + if (READ_ONCE(np->pmtudisc) != IPV6_PMTUDISC_DONT) harderr = 1; } if (type == NDISC_REDIRECT) { @@ -620,7 +620,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - if (!np->recverr) { + if (!inet6_test_bit(RECVERR6, sk)) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; } else { @@ -1284,7 +1284,7 @@ csum_partial: send: err = ip6_send_skb(skb); if (err) { - if (err == -ENOBUFS && !inet6_sk(sk)->recverr) { + if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk)) { UDP6_INC_STATS(sock_net(sk), UDP_MIB_SNDBUFERRORS, is_udplite); err = 0; @@ -1430,7 +1430,7 @@ do_udp_sendmsg: fl6->fl6_dport = sin6->sin6_port; daddr = &sin6->sin6_addr; - if (np->sndflow) { + if (inet6_test_bit(SNDFLOW, sk)) { fl6->flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); @@ -1597,7 +1597,7 @@ back_from_confirm: do_append_data: if (ipc6.dontfrag < 0) - ipc6.dontfrag = np->dontfrag; + ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); up->len += ulen; err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), &ipc6, fl6, (struct rt6_info *)dst, @@ -1610,7 +1610,7 @@ do_append_data: WRITE_ONCE(up->pending, 0); if (err > 0) - err = np->recverr ? net_xmit_errno(err) : 0; + err = inet6_test_bit(RECVERR6, sk) ? net_xmit_errno(err) : 0; release_sock(sk); out: diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 4156387248..6e36e5047f 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -16,6 +16,8 @@ #include <linux/netfilter_ipv6.h> #include <net/ipv6.h> #include <net/xfrm.h> +#include <net/protocol.h> +#include <net/gro.h> int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, struct ip6_tnl *t) @@ -67,14 +69,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) return 0; } -/* If it's a keepalive packet, then just eat it. - * If it's an encapsulated packet, then pass it to the - * IPsec xfrm input. - * Returns 0 if skb passed to xfrm or was dropped. - * Returns >0 if skb should be passed to UDP. - * Returns <0 if skb should be resubmitted (-ret is protocol) - */ -int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) +static int __xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull) { struct udp_sock *up = udp_sk(sk); struct udphdr *uh; @@ -85,9 +80,6 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) __be32 *udpdata32; u16 encap_type; - if (skb->protocol == htons(ETH_P_IP)) - return xfrm4_udp_encap_rcv(sk, skb); - encap_type = READ_ONCE(up->encap_type); /* if this is not encapsulated socket, then just return now */ if (!encap_type) @@ -109,7 +101,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) case UDP_ENCAP_ESPINUDP: /* Check if this is a keepalive packet. If so, eat it. */ if (len == 1 && udpdata[0] == 0xff) { - goto drop; + return -EINVAL; } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) { /* ESP Packet without Non-ESP header */ len = sizeof(struct udphdr); @@ -120,7 +112,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) case UDP_ENCAP_ESPINUDP_NON_IKE: /* Check if this is a keepalive packet. If so, eat it. */ if (len == 1 && udpdata[0] == 0xff) { - goto drop; + return -EINVAL; } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) && udpdata32[0] == 0 && udpdata32[1] == 0) { @@ -138,31 +130,100 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) * protocol to ESP, and then call into the transform receiver. */ if (skb_unclone(skb, GFP_ATOMIC)) - goto drop; + return -EINVAL; /* Now we can update and verify the packet length... */ ip6h = ipv6_hdr(skb); ip6h->payload_len = htons(ntohs(ip6h->payload_len) - len); if (skb->len < ip6hlen + len) { /* packet is too small!?! */ - goto drop; + return -EINVAL; } /* pull the data buffer up to the ESP header and set the * transport header to point to ESP. Keep UDP on the stack * for later. */ - __skb_pull(skb, len); - skb_reset_transport_header(skb); + if (pull) { + __skb_pull(skb, len); + skb_reset_transport_header(skb); + } else { + skb_set_transport_header(skb, len); + } /* process ESP */ - return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, encap_type); - -drop: - kfree_skb(skb); return 0; } +/* If it's a keepalive packet, then just eat it. + * If it's an encapsulated packet, then pass it to the + * IPsec xfrm input. + * Returns 0 if skb passed to xfrm or was dropped. + * Returns >0 if skb should be passed to UDP. + * Returns <0 if skb should be resubmitted (-ret is protocol) + */ +int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) +{ + int ret; + + if (skb->protocol == htons(ETH_P_IP)) + return xfrm4_udp_encap_rcv(sk, skb); + + ret = __xfrm6_udp_encap_rcv(sk, skb, true); + if (!ret) + return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, + udp_sk(sk)->encap_type); + + if (ret < 0) { + kfree_skb(skb); + return 0; + } + + return ret; +} + +struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, + struct sk_buff *skb) +{ + int offset = skb_gro_offset(skb); + const struct net_offload *ops; + struct sk_buff *pp = NULL; + int ret; + + if (skb->protocol == htons(ETH_P_IP)) + return xfrm4_gro_udp_encap_rcv(sk, head, skb); + + offset = offset - sizeof(struct udphdr); + + if (!pskb_pull(skb, offset)) + return NULL; + + rcu_read_lock(); + ops = rcu_dereference(inet6_offloads[IPPROTO_ESP]); + if (!ops || !ops->callbacks.gro_receive) + goto out; + + ret = __xfrm6_udp_encap_rcv(sk, skb, false); + if (ret) + goto out; + + skb_push(skb, offset); + NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; + + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); + rcu_read_unlock(); + + return pp; + +out: + rcu_read_unlock(); + skb_push(skb, offset); + NAPI_GRO_CB(skb)->same_flow = 0; + NAPI_GRO_CB(skb)->flush = 1; + + return NULL; +} + int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t) { return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index ad07904642..5f7b1fdbff 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -95,7 +95,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) return -EMSGSIZE; } - if (toobig || dst_allfrag(skb_dst(skb))) + if (toobig) return ip6_fragment(net, sk, skb, __xfrm6_output_finish); |