diff options
Diffstat (limited to 'drivers/net/vxlan')
-rw-r--r-- | drivers/net/vxlan/vxlan_core.c | 450 | ||||
-rw-r--r-- | drivers/net/vxlan/vxlan_mdb.c | 190 | ||||
-rw-r--r-- | drivers/net/vxlan/vxlan_private.h | 2 |
3 files changed, 443 insertions, 199 deletions
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 5b5597073b..412c3c0b69 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2215,114 +2215,6 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst, return 0; } -static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct net_device *dev, - struct vxlan_sock *sock4, - struct sk_buff *skb, int oif, u8 tos, - __be32 daddr, __be32 *saddr, __be16 dport, __be16 sport, - __u8 flow_flags, struct dst_cache *dst_cache, - const struct ip_tunnel_info *info) -{ - bool use_cache = ip_tunnel_dst_cache_usable(skb, info); - struct rtable *rt = NULL; - struct flowi4 fl4; - - if (!sock4) - return ERR_PTR(-EIO); - - if (tos && !info) - use_cache = false; - if (use_cache) { - rt = dst_cache_get_ip4(dst_cache, saddr); - if (rt) - return rt; - } - - memset(&fl4, 0, sizeof(fl4)); - fl4.flowi4_oif = oif; - fl4.flowi4_tos = RT_TOS(tos); - fl4.flowi4_mark = skb->mark; - fl4.flowi4_proto = IPPROTO_UDP; - fl4.daddr = daddr; - fl4.saddr = *saddr; - fl4.fl4_dport = dport; - fl4.fl4_sport = sport; - fl4.flowi4_flags = flow_flags; - - rt = ip_route_output_key(vxlan->net, &fl4); - if (!IS_ERR(rt)) { - if (rt->dst.dev == dev) { - netdev_dbg(dev, "circular route to %pI4\n", &daddr); - ip_rt_put(rt); - return ERR_PTR(-ELOOP); - } - - *saddr = fl4.saddr; - if (use_cache) - dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr); - } else { - netdev_dbg(dev, "no route to %pI4\n", &daddr); - return ERR_PTR(-ENETUNREACH); - } - return rt; -} - -#if IS_ENABLED(CONFIG_IPV6) -static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, - struct net_device *dev, - struct vxlan_sock *sock6, - struct sk_buff *skb, int oif, u8 tos, - __be32 label, - const struct in6_addr *daddr, - struct in6_addr *saddr, - __be16 dport, __be16 sport, - struct dst_cache *dst_cache, - const struct ip_tunnel_info *info) -{ - bool use_cache = ip_tunnel_dst_cache_usable(skb, info); - struct dst_entry *ndst; - struct flowi6 fl6; - - if (!sock6) - return ERR_PTR(-EIO); - - if (tos && !info) - use_cache = false; - if (use_cache) { - ndst = dst_cache_get_ip6(dst_cache, saddr); - if (ndst) - return ndst; - } - - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_oif = oif; - fl6.daddr = *daddr; - fl6.saddr = *saddr; - fl6.flowlabel = ip6_make_flowinfo(tos, label); - fl6.flowi6_mark = skb->mark; - fl6.flowi6_proto = IPPROTO_UDP; - fl6.fl6_dport = dport; - fl6.fl6_sport = sport; - - ndst = ipv6_stub->ipv6_dst_lookup_flow(vxlan->net, sock6->sock->sk, - &fl6, NULL); - if (IS_ERR(ndst)) { - netdev_dbg(dev, "no route to %pI6\n", daddr); - return ERR_PTR(-ENETUNREACH); - } - - if (unlikely(ndst->dev == dev)) { - netdev_dbg(dev, "circular route to %pI6\n", daddr); - dst_release(ndst); - return ERR_PTR(-ELOOP); - } - - *saddr = fl6.saddr; - if (use_cache) - dst_cache_set_ip6(dst_cache, ndst, saddr); - return ndst; -} -#endif - /* Bypass encapsulation if the destination is local */ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, struct vxlan_dev *dst_vxlan, __be32 vni, @@ -2376,7 +2268,7 @@ drop: static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev, struct vxlan_dev *vxlan, - union vxlan_addr *daddr, + int addr_family, __be16 dst_port, int dst_ifindex, __be32 vni, struct dst_entry *dst, u32 rt_flags) @@ -2396,7 +2288,7 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev, dst_release(dst); dst_vxlan = vxlan_find_vni(vxlan->net, dst_ifindex, vni, - daddr->sa.sa_family, dst_port, + addr_family, dst_port, vxlan->cfg.flags); if (!dst_vxlan) { dev->stats.tx_errors++; @@ -2418,31 +2310,33 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, { struct dst_cache *dst_cache; struct ip_tunnel_info *info; + struct ip_tunnel_key *pkey; + struct ip_tunnel_key key; struct vxlan_dev *vxlan = netdev_priv(dev); const struct iphdr *old_iph = ip_hdr(skb); - union vxlan_addr *dst; - union vxlan_addr remote_ip, local_ip; struct vxlan_metadata _md; struct vxlan_metadata *md = &_md; unsigned int pkt_len = skb->len; __be16 src_port = 0, dst_port; struct dst_entry *ndst = NULL; - __u8 tos, ttl, flow_flags = 0; + int addr_family; + __u8 tos, ttl; int ifindex; int err; u32 flags = vxlan->cfg.flags; + bool use_cache; bool udp_sum = false; bool xnet = !net_eq(vxlan->net, dev_net(vxlan->dev)); __be32 vni = 0; -#if IS_ENABLED(CONFIG_IPV6) - __be32 label; -#endif info = skb_tunnel_info(skb); + use_cache = ip_tunnel_dst_cache_usable(skb, info); if (rdst) { - dst = &rdst->remote_ip; - if (vxlan_addr_any(dst)) { + memset(&key, 0, sizeof(key)); + pkey = &key; + + if (vxlan_addr_any(&rdst->remote_ip)) { if (did_rsc) { /* short-circuited back to local bridge */ vxlan_encap_bypass(skb, vxlan, vxlan, @@ -2452,30 +2346,40 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, goto drop; } + addr_family = vxlan->cfg.saddr.sa.sa_family; dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port; vni = (rdst->remote_vni) ? : default_vni; ifindex = rdst->remote_ifindex; - local_ip = vxlan->cfg.saddr; + + if (addr_family == AF_INET) { + key.u.ipv4.src = vxlan->cfg.saddr.sin.sin_addr.s_addr; + key.u.ipv4.dst = rdst->remote_ip.sin.sin_addr.s_addr; + } else { + key.u.ipv6.src = vxlan->cfg.saddr.sin6.sin6_addr; + key.u.ipv6.dst = rdst->remote_ip.sin6.sin6_addr; + } + dst_cache = &rdst->dst_cache; md->gbp = skb->mark; if (flags & VXLAN_F_TTL_INHERIT) { ttl = ip_tunnel_get_ttl(old_iph, skb); } else { ttl = vxlan->cfg.ttl; - if (!ttl && vxlan_addr_multicast(dst)) + if (!ttl && vxlan_addr_multicast(&rdst->remote_ip)) ttl = 1; } - tos = vxlan->cfg.tos; if (tos == 1) tos = ip_tunnel_get_dsfield(old_iph, skb); + if (tos && !info) + use_cache = false; - if (dst->sa.sa_family == AF_INET) + if (addr_family == AF_INET) udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX); else udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX); #if IS_ENABLED(CONFIG_IPV6) - label = vxlan->cfg.label; + key.label = vxlan->cfg.label; #endif } else { if (!info) { @@ -2483,17 +2387,9 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, dev->name); goto drop; } - remote_ip.sa.sa_family = ip_tunnel_info_af(info); - if (remote_ip.sa.sa_family == AF_INET) { - remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst; - local_ip.sin.sin_addr.s_addr = info->key.u.ipv4.src; - } else { - remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst; - local_ip.sin6.sin6_addr = info->key.u.ipv6.src; - } - dst = &remote_ip; + pkey = &info->key; + addr_family = ip_tunnel_info_af(info); dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port; - flow_flags = info->key.flow_flags; vni = tunnel_id_to_key32(info->key.tun_id); ifindex = 0; dst_cache = &info->dst_cache; @@ -2504,28 +2400,24 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } ttl = info->key.ttl; tos = info->key.tos; -#if IS_ENABLED(CONFIG_IPV6) - label = info->key.label; -#endif udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM); } src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min, vxlan->cfg.port_max, true); rcu_read_lock(); - if (dst->sa.sa_family == AF_INET) { + if (addr_family == AF_INET) { struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock); struct rtable *rt; __be16 df = 0; + __be32 saddr; if (!ifindex) ifindex = sock4->sock->sk->sk_bound_dev_if; - rt = vxlan_get_route(vxlan, dev, sock4, skb, ifindex, tos, - dst->sin.sin_addr.s_addr, - &local_ip.sin.sin_addr.s_addr, - dst_port, src_port, flow_flags, - dst_cache, info); + rt = udp_tunnel_dst_lookup(skb, dev, vxlan->net, ifindex, + &saddr, pkey, src_port, dst_port, + tos, use_cache ? dst_cache : NULL); if (IS_ERR(rt)) { err = PTR_ERR(rt); goto tx_error; @@ -2533,7 +2425,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, if (!info) { /* Bypass encapsulation if the destination is local */ - err = encap_bypass_if_local(skb, dev, vxlan, dst, + err = encap_bypass_if_local(skb, dev, vxlan, AF_INET, dst_port, ifindex, vni, &rt->dst, rt->rt_flags); if (err) @@ -2561,16 +2453,13 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } else if (err) { if (info) { struct ip_tunnel_info *unclone; - struct in_addr src, dst; unclone = skb_tunnel_info_unclone(skb); if (unlikely(!unclone)) goto tx_error; - src = remote_ip.sin.sin_addr; - dst = local_ip.sin.sin_addr; - unclone->key.u.ipv4.src = src.s_addr; - unclone->key.u.ipv4.dst = dst.s_addr; + unclone->key.u.ipv4.src = pkey->u.ipv4.dst; + unclone->key.u.ipv4.dst = saddr; } vxlan_encap_bypass(skb, vxlan, vxlan, vni, false); dst_release(ndst); @@ -2584,21 +2473,21 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, if (err < 0) goto tx_error; - udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, local_ip.sin.sin_addr.s_addr, - dst->sin.sin_addr.s_addr, tos, ttl, df, + udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, saddr, + pkey->u.ipv4.dst, tos, ttl, df, src_port, dst_port, xnet, !udp_sum); #if IS_ENABLED(CONFIG_IPV6) } else { struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock); + struct in6_addr saddr; if (!ifindex) ifindex = sock6->sock->sk->sk_bound_dev_if; - ndst = vxlan6_get_route(vxlan, dev, sock6, skb, ifindex, tos, - label, &dst->sin6.sin6_addr, - &local_ip.sin6.sin6_addr, - dst_port, src_port, - dst_cache, info); + ndst = udp_tunnel6_dst_lookup(skb, dev, vxlan->net, sock6->sock, + ifindex, &saddr, pkey, + src_port, dst_port, tos, + use_cache ? dst_cache : NULL); if (IS_ERR(ndst)) { err = PTR_ERR(ndst); ndst = NULL; @@ -2608,7 +2497,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, if (!info) { u32 rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags; - err = encap_bypass_if_local(skb, dev, vxlan, dst, + err = encap_bypass_if_local(skb, dev, vxlan, AF_INET6, dst_port, ifindex, vni, ndst, rt6i_flags); if (err) @@ -2623,16 +2512,13 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } else if (err) { if (info) { struct ip_tunnel_info *unclone; - struct in6_addr src, dst; unclone = skb_tunnel_info_unclone(skb); if (unlikely(!unclone)) goto tx_error; - src = remote_ip.sin6.sin6_addr; - dst = local_ip.sin6.sin6_addr; - unclone->key.u.ipv6.src = src; - unclone->key.u.ipv6.dst = dst; + unclone->key.u.ipv6.src = pkey->u.ipv6.dst; + unclone->key.u.ipv6.dst = saddr; } vxlan_encap_bypass(skb, vxlan, vxlan, vni, false); @@ -2649,9 +2535,8 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, goto tx_error; udp_tunnel6_xmit_skb(ndst, sock6->sock->sk, skb, dev, - &local_ip.sin6.sin6_addr, - &dst->sin6.sin6_addr, tos, ttl, - label, src_port, dst_port, !udp_sum); + &saddr, &pkey->u.ipv6.dst, tos, ttl, + pkey->label, src_port, dst_port, !udp_sum); #endif } vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX, pkt_len); @@ -3022,9 +2907,101 @@ static int vxlan_open(struct net_device *dev) return ret; } +struct vxlan_fdb_flush_desc { + bool ignore_default_entry; + unsigned long state; + unsigned long state_mask; + unsigned long flags; + unsigned long flags_mask; + __be32 src_vni; + u32 nhid; + __be32 vni; + __be16 port; + union vxlan_addr dst_ip; +}; + +static bool vxlan_fdb_is_default_entry(const struct vxlan_fdb *f, + const struct vxlan_dev *vxlan) +{ + return is_zero_ether_addr(f->eth_addr) && f->vni == vxlan->cfg.vni; +} + +static bool vxlan_fdb_nhid_matches(const struct vxlan_fdb *f, u32 nhid) +{ + struct nexthop *nh = rtnl_dereference(f->nh); + + return nh && nh->id == nhid; +} + +static bool vxlan_fdb_flush_matches(const struct vxlan_fdb *f, + const struct vxlan_dev *vxlan, + const struct vxlan_fdb_flush_desc *desc) +{ + if (desc->state_mask && (f->state & desc->state_mask) != desc->state) + return false; + + if (desc->flags_mask && (f->flags & desc->flags_mask) != desc->flags) + return false; + + if (desc->ignore_default_entry && vxlan_fdb_is_default_entry(f, vxlan)) + return false; + + if (desc->src_vni && f->vni != desc->src_vni) + return false; + + if (desc->nhid && !vxlan_fdb_nhid_matches(f, desc->nhid)) + return false; + + return true; +} + +static bool +vxlan_fdb_flush_should_match_remotes(const struct vxlan_fdb_flush_desc *desc) +{ + return desc->vni || desc->port || desc->dst_ip.sa.sa_family; +} + +static bool +vxlan_fdb_flush_remote_matches(const struct vxlan_fdb_flush_desc *desc, + const struct vxlan_rdst *rd) +{ + if (desc->vni && rd->remote_vni != desc->vni) + return false; + + if (desc->port && rd->remote_port != desc->port) + return false; + + if (desc->dst_ip.sa.sa_family && + !vxlan_addr_equal(&rd->remote_ip, &desc->dst_ip)) + return false; + + return true; +} + +static void +vxlan_fdb_flush_match_remotes(struct vxlan_fdb *f, struct vxlan_dev *vxlan, + const struct vxlan_fdb_flush_desc *desc, + bool *p_destroy_fdb) +{ + bool remotes_flushed = false; + struct vxlan_rdst *rd, *tmp; + + list_for_each_entry_safe(rd, tmp, &f->remotes, list) { + if (!vxlan_fdb_flush_remote_matches(desc, rd)) + continue; + + vxlan_fdb_dst_destroy(vxlan, f, rd, true); + remotes_flushed = true; + } + + *p_destroy_fdb = remotes_flushed && list_empty(&f->remotes); +} + /* Purge the forwarding table */ -static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all) +static void vxlan_flush(struct vxlan_dev *vxlan, + const struct vxlan_fdb_flush_desc *desc) { + bool match_remotes = vxlan_fdb_flush_should_match_remotes(desc); unsigned int h; for (h = 0; h < FDB_HASH_SIZE; ++h) { @@ -3034,28 +3011,122 @@ static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all) hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) { struct vxlan_fdb *f = container_of(p, struct vxlan_fdb, hlist); - if (!do_all && (f->state & (NUD_PERMANENT | NUD_NOARP))) - continue; - /* the all_zeros_mac entry is deleted at vxlan_uninit */ - if (is_zero_ether_addr(f->eth_addr) && - f->vni == vxlan->cfg.vni) + + if (!vxlan_fdb_flush_matches(f, vxlan, desc)) continue; + + if (match_remotes) { + bool destroy_fdb = false; + + vxlan_fdb_flush_match_remotes(f, vxlan, desc, + &destroy_fdb); + + if (!destroy_fdb) + continue; + } + vxlan_fdb_destroy(vxlan, f, true, true); } spin_unlock_bh(&vxlan->hash_lock[h]); } } +static const struct nla_policy vxlan_del_bulk_policy[NDA_MAX + 1] = { + [NDA_SRC_VNI] = { .type = NLA_U32 }, + [NDA_NH_ID] = { .type = NLA_U32 }, + [NDA_VNI] = { .type = NLA_U32 }, + [NDA_PORT] = { .type = NLA_U16 }, + [NDA_DST] = NLA_POLICY_RANGE(NLA_BINARY, sizeof(struct in_addr), + sizeof(struct in6_addr)), + [NDA_NDM_STATE_MASK] = { .type = NLA_U16 }, + [NDA_NDM_FLAGS_MASK] = { .type = NLA_U8 }, +}; + +#define VXLAN_FDB_FLUSH_IGNORED_NDM_FLAGS (NTF_MASTER | NTF_SELF) +#define VXLAN_FDB_FLUSH_ALLOWED_NDM_STATES (NUD_PERMANENT | NUD_NOARP) +#define VXLAN_FDB_FLUSH_ALLOWED_NDM_FLAGS (NTF_EXT_LEARNED | NTF_OFFLOADED | \ + NTF_ROUTER) + +static int vxlan_fdb_delete_bulk(struct nlmsghdr *nlh, struct net_device *dev, + struct netlink_ext_ack *extack) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_fdb_flush_desc desc = {}; + struct ndmsg *ndm = nlmsg_data(nlh); + struct nlattr *tb[NDA_MAX + 1]; + u8 ndm_flags; + int err; + + ndm_flags = ndm->ndm_flags & ~VXLAN_FDB_FLUSH_IGNORED_NDM_FLAGS; + + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, vxlan_del_bulk_policy, + extack); + if (err) + return err; + + if (ndm_flags & ~VXLAN_FDB_FLUSH_ALLOWED_NDM_FLAGS) { + NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm flag bits set"); + return -EINVAL; + } + if (ndm->ndm_state & ~VXLAN_FDB_FLUSH_ALLOWED_NDM_STATES) { + NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm state bits set"); + return -EINVAL; + } + + desc.state = ndm->ndm_state; + desc.flags = ndm_flags; + + if (tb[NDA_NDM_STATE_MASK]) + desc.state_mask = nla_get_u16(tb[NDA_NDM_STATE_MASK]); + + if (tb[NDA_NDM_FLAGS_MASK]) + desc.flags_mask = nla_get_u8(tb[NDA_NDM_FLAGS_MASK]); + + if (tb[NDA_SRC_VNI]) + desc.src_vni = cpu_to_be32(nla_get_u32(tb[NDA_SRC_VNI])); + + if (tb[NDA_NH_ID]) + desc.nhid = nla_get_u32(tb[NDA_NH_ID]); + + if (tb[NDA_VNI]) + desc.vni = cpu_to_be32(nla_get_u32(tb[NDA_VNI])); + + if (tb[NDA_PORT]) + desc.port = nla_get_be16(tb[NDA_PORT]); + + if (tb[NDA_DST]) { + union vxlan_addr ip; + + err = vxlan_nla_get_addr(&ip, tb[NDA_DST]); + if (err) { + NL_SET_ERR_MSG_ATTR(extack, tb[NDA_DST], + "Unsupported address family"); + return err; + } + desc.dst_ip = ip; + } + + vxlan_flush(vxlan, &desc); + + return 0; +} + /* Cleanup timer and forwarding table on shutdown */ static int vxlan_stop(struct net_device *dev) { struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_fdb_flush_desc desc = { + /* Default entry is deleted at vxlan_uninit. */ + .ignore_default_entry = true, + .state = 0, + .state_mask = NUD_PERMANENT | NUD_NOARP, + }; vxlan_multicast_leave(vxlan); del_timer_sync(&vxlan->age_timer); - vxlan_flush(vxlan, false); + vxlan_flush(vxlan, &desc); vxlan_sock_release(vxlan); return 0; @@ -3100,11 +3171,14 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock); struct rtable *rt; - rt = vxlan_get_route(vxlan, dev, sock4, skb, 0, info->key.tos, - info->key.u.ipv4.dst, - &info->key.u.ipv4.src, dport, sport, - info->key.flow_flags, &info->dst_cache, - info); + if (!sock4) + return -EIO; + + rt = udp_tunnel_dst_lookup(skb, dev, vxlan->net, 0, + &info->key.u.ipv4.src, + &info->key, + sport, dport, info->key.tos, + &info->dst_cache); if (IS_ERR(rt)) return PTR_ERR(rt); ip_rt_put(rt); @@ -3113,10 +3187,14 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock); struct dst_entry *ndst; - ndst = vxlan6_get_route(vxlan, dev, sock6, skb, 0, info->key.tos, - info->key.label, &info->key.u.ipv6.dst, - &info->key.u.ipv6.src, dport, sport, - &info->dst_cache, info); + if (!sock6) + return -EIO; + + ndst = udp_tunnel6_dst_lookup(skb, dev, vxlan->net, sock6->sock, + 0, &info->key.u.ipv6.src, + &info->key, + sport, dport, info->key.tos, + &info->dst_cache); if (IS_ERR(ndst)) return PTR_ERR(ndst); dst_release(ndst); @@ -3142,11 +3220,13 @@ static const struct net_device_ops vxlan_netdev_ether_ops = { .ndo_set_mac_address = eth_mac_addr, .ndo_fdb_add = vxlan_fdb_add, .ndo_fdb_del = vxlan_fdb_delete, + .ndo_fdb_del_bulk = vxlan_fdb_delete_bulk, .ndo_fdb_dump = vxlan_fdb_dump, .ndo_fdb_get = vxlan_fdb_get, .ndo_mdb_add = vxlan_mdb_add, .ndo_mdb_del = vxlan_mdb_del, .ndo_mdb_dump = vxlan_mdb_dump, + .ndo_mdb_get = vxlan_mdb_get, .ndo_fill_metadata_dst = vxlan_fill_metadata_dst, }; @@ -4294,8 +4374,12 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], static void vxlan_dellink(struct net_device *dev, struct list_head *head) { struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_fdb_flush_desc desc = { + /* Default entry is deleted at vxlan_uninit. */ + .ignore_default_entry = true, + }; - vxlan_flush(vxlan, true); + vxlan_flush(vxlan, &desc); list_del(&vxlan->next); unregister_netdevice_queue(dev, head); @@ -4305,7 +4389,6 @@ static void vxlan_dellink(struct net_device *dev, struct list_head *head) static size_t vxlan_get_size(const struct net_device *dev) { - return nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_ID */ nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_GROUP{6} */ nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LINK */ @@ -4323,7 +4406,6 @@ static size_t vxlan_get_size(const struct net_device *dev) nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_COLLECT_METADATA */ nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_AGEING */ nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LIMIT */ - nla_total_size(sizeof(struct ifla_vxlan_port_range)) + nla_total_size(sizeof(__be16)) + /* IFLA_VXLAN_PORT */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_CSUM */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_TX */ @@ -4331,6 +4413,8 @@ static size_t vxlan_get_size(const struct net_device *dev) nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LOCALBYPASS */ + /* IFLA_VXLAN_PORT_RANGE */ + nla_total_size(sizeof(struct ifla_vxlan_port_range)) + nla_total_size(0) + /* IFLA_VXLAN_GBP */ nla_total_size(0) + /* IFLA_VXLAN_GPE */ nla_total_size(0) + /* IFLA_VXLAN_REMCSUM_NOPARTIAL */ diff --git a/drivers/net/vxlan/vxlan_mdb.c b/drivers/net/vxlan/vxlan_mdb.c index 5e04162226..eb4c580b5c 100644 --- a/drivers/net/vxlan/vxlan_mdb.c +++ b/drivers/net/vxlan/vxlan_mdb.c @@ -311,7 +311,7 @@ vxlan_mdbe_src_list_pol[MDBE_SRC_LIST_MAX + 1] = { [MDBE_SRC_LIST_ENTRY] = NLA_POLICY_NESTED(vxlan_mdbe_src_list_entry_pol), }; -static struct netlink_range_validation vni_range = { +static const struct netlink_range_validation vni_range = { .max = VXLAN_N_VID - 1, }; @@ -370,12 +370,10 @@ static bool vxlan_mdb_is_valid_source(const struct nlattr *attr, __be16 proto, return true; } -static void vxlan_mdb_config_group_set(struct vxlan_mdb_config *cfg, - const struct br_mdb_entry *entry, - const struct nlattr *source_attr) +static void vxlan_mdb_group_set(struct vxlan_mdb_entry_key *group, + const struct br_mdb_entry *entry, + const struct nlattr *source_attr) { - struct vxlan_mdb_entry_key *group = &cfg->group; - switch (entry->addr.proto) { case htons(ETH_P_IP): group->dst.sa.sa_family = AF_INET; @@ -503,7 +501,7 @@ static int vxlan_mdb_config_attrs_init(struct vxlan_mdb_config *cfg, entry->addr.proto, extack)) return -EINVAL; - vxlan_mdb_config_group_set(cfg, entry, mdbe_attrs[MDBE_ATTR_SOURCE]); + vxlan_mdb_group_set(&cfg->group, entry, mdbe_attrs[MDBE_ATTR_SOURCE]); /* rtnetlink code only validates that IPv4 group address is * multicast. @@ -927,23 +925,20 @@ vxlan_mdb_nlmsg_src_list_size(const struct vxlan_mdb_entry_key *group, return nlmsg_size; } -static size_t vxlan_mdb_nlmsg_size(const struct vxlan_dev *vxlan, - const struct vxlan_mdb_entry *mdb_entry, - const struct vxlan_mdb_remote *remote) +static size_t +vxlan_mdb_nlmsg_remote_size(const struct vxlan_dev *vxlan, + const struct vxlan_mdb_entry *mdb_entry, + const struct vxlan_mdb_remote *remote) { const struct vxlan_mdb_entry_key *group = &mdb_entry->key; struct vxlan_rdst *rd = rtnl_dereference(remote->rd); size_t nlmsg_size; - nlmsg_size = NLMSG_ALIGN(sizeof(struct br_port_msg)) + - /* MDBA_MDB */ - nla_total_size(0) + - /* MDBA_MDB_ENTRY */ - nla_total_size(0) + /* MDBA_MDB_ENTRY_INFO */ - nla_total_size(sizeof(struct br_mdb_entry)) + + nlmsg_size = nla_total_size(sizeof(struct br_mdb_entry)) + /* MDBA_MDB_EATTR_TIMER */ nla_total_size(sizeof(u32)); + /* MDBA_MDB_EATTR_SOURCE */ if (vxlan_mdb_is_sg(group)) nlmsg_size += nla_total_size(vxlan_addr_size(&group->dst)); @@ -971,6 +966,19 @@ static size_t vxlan_mdb_nlmsg_size(const struct vxlan_dev *vxlan, return nlmsg_size; } +static size_t vxlan_mdb_nlmsg_size(const struct vxlan_dev *vxlan, + const struct vxlan_mdb_entry *mdb_entry, + const struct vxlan_mdb_remote *remote) +{ + return NLMSG_ALIGN(sizeof(struct br_port_msg)) + + /* MDBA_MDB */ + nla_total_size(0) + + /* MDBA_MDB_ENTRY */ + nla_total_size(0) + + /* Remote entry */ + vxlan_mdb_nlmsg_remote_size(vxlan, mdb_entry, remote); +} + static int vxlan_mdb_nlmsg_fill(const struct vxlan_dev *vxlan, struct sk_buff *skb, const struct vxlan_mdb_entry *mdb_entry, @@ -1298,6 +1306,156 @@ int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[], return err; } +static const struct nla_policy vxlan_mdbe_attrs_get_pol[MDBE_ATTR_MAX + 1] = { + [MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY, + sizeof(struct in_addr), + sizeof(struct in6_addr)), + [MDBE_ATTR_SRC_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range), +}; + +static int vxlan_mdb_get_parse(struct net_device *dev, struct nlattr *tb[], + struct vxlan_mdb_entry_key *group, + struct netlink_ext_ack *extack) +{ + struct br_mdb_entry *entry = nla_data(tb[MDBA_GET_ENTRY]); + struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1]; + struct vxlan_dev *vxlan = netdev_priv(dev); + int err; + + memset(group, 0, sizeof(*group)); + group->vni = vxlan->default_dst.remote_vni; + + if (!tb[MDBA_GET_ENTRY_ATTRS]) { + vxlan_mdb_group_set(group, entry, NULL); + return 0; + } + + err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX, + tb[MDBA_GET_ENTRY_ATTRS], + vxlan_mdbe_attrs_get_pol, extack); + if (err) + return err; + + if (mdbe_attrs[MDBE_ATTR_SOURCE] && + !vxlan_mdb_is_valid_source(mdbe_attrs[MDBE_ATTR_SOURCE], + entry->addr.proto, extack)) + return -EINVAL; + + vxlan_mdb_group_set(group, entry, mdbe_attrs[MDBE_ATTR_SOURCE]); + + if (mdbe_attrs[MDBE_ATTR_SRC_VNI]) + group->vni = + cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_SRC_VNI])); + + return 0; +} + +static struct sk_buff * +vxlan_mdb_get_reply_alloc(const struct vxlan_dev *vxlan, + const struct vxlan_mdb_entry *mdb_entry) +{ + struct vxlan_mdb_remote *remote; + size_t nlmsg_size; + + nlmsg_size = NLMSG_ALIGN(sizeof(struct br_port_msg)) + + /* MDBA_MDB */ + nla_total_size(0) + + /* MDBA_MDB_ENTRY */ + nla_total_size(0); + + list_for_each_entry(remote, &mdb_entry->remotes, list) + nlmsg_size += vxlan_mdb_nlmsg_remote_size(vxlan, mdb_entry, + remote); + + return nlmsg_new(nlmsg_size, GFP_KERNEL); +} + +static int +vxlan_mdb_get_reply_fill(const struct vxlan_dev *vxlan, + struct sk_buff *skb, + const struct vxlan_mdb_entry *mdb_entry, + u32 portid, u32 seq) +{ + struct nlattr *mdb_nest, *mdb_entry_nest; + struct vxlan_mdb_remote *remote; + struct br_port_msg *bpm; + struct nlmsghdr *nlh; + int err; + + nlh = nlmsg_put(skb, portid, seq, RTM_NEWMDB, sizeof(*bpm), 0); + if (!nlh) + return -EMSGSIZE; + + bpm = nlmsg_data(nlh); + memset(bpm, 0, sizeof(*bpm)); + bpm->family = AF_BRIDGE; + bpm->ifindex = vxlan->dev->ifindex; + mdb_nest = nla_nest_start_noflag(skb, MDBA_MDB); + if (!mdb_nest) { + err = -EMSGSIZE; + goto cancel; + } + mdb_entry_nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY); + if (!mdb_entry_nest) { + err = -EMSGSIZE; + goto cancel; + } + + list_for_each_entry(remote, &mdb_entry->remotes, list) { + err = vxlan_mdb_entry_info_fill(vxlan, skb, mdb_entry, remote); + if (err) + goto cancel; + } + + nla_nest_end(skb, mdb_entry_nest); + nla_nest_end(skb, mdb_nest); + nlmsg_end(skb, nlh); + + return 0; + +cancel: + nlmsg_cancel(skb, nlh); + return err; +} + +int vxlan_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid, + u32 seq, struct netlink_ext_ack *extack) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_mdb_entry *mdb_entry; + struct vxlan_mdb_entry_key group; + struct sk_buff *skb; + int err; + + ASSERT_RTNL(); + + err = vxlan_mdb_get_parse(dev, tb, &group, extack); + if (err) + return err; + + mdb_entry = vxlan_mdb_entry_lookup(vxlan, &group); + if (!mdb_entry) { + NL_SET_ERR_MSG_MOD(extack, "MDB entry not found"); + return -ENOENT; + } + + skb = vxlan_mdb_get_reply_alloc(vxlan, mdb_entry); + if (!skb) + return -ENOMEM; + + err = vxlan_mdb_get_reply_fill(vxlan, skb, mdb_entry, portid, seq); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to fill MDB get reply"); + goto free; + } + + return rtnl_unicast(skb, dev_net(dev), portid); + +free: + kfree_skb(skb); + return err; +} + struct vxlan_mdb_entry *vxlan_mdb_entry_skb_get(struct vxlan_dev *vxlan, struct sk_buff *skb, __be32 src_vni) diff --git a/drivers/net/vxlan/vxlan_private.h b/drivers/net/vxlan/vxlan_private.h index 817fa30758..db679c3809 100644 --- a/drivers/net/vxlan/vxlan_private.h +++ b/drivers/net/vxlan/vxlan_private.h @@ -235,6 +235,8 @@ int vxlan_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, struct netlink_ext_ack *extack); int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack); +int vxlan_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid, + u32 seq, struct netlink_ext_ack *extack); struct vxlan_mdb_entry *vxlan_mdb_entry_skb_get(struct vxlan_dev *vxlan, struct sk_buff *skb, __be32 src_vni); |