summaryrefslogtreecommitdiffstats
path: root/drivers/net/vxlan
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/vxlan')
-rw-r--r--drivers/net/vxlan/vxlan_core.c450
-rw-r--r--drivers/net/vxlan/vxlan_mdb.c190
-rw-r--r--drivers/net/vxlan/vxlan_private.h2
3 files changed, 443 insertions, 199 deletions
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index 5b5597073b..412c3c0b69 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -2215,114 +2215,6 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
return 0;
}
-static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct net_device *dev,
- struct vxlan_sock *sock4,
- struct sk_buff *skb, int oif, u8 tos,
- __be32 daddr, __be32 *saddr, __be16 dport, __be16 sport,
- __u8 flow_flags, struct dst_cache *dst_cache,
- const struct ip_tunnel_info *info)
-{
- bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
- struct rtable *rt = NULL;
- struct flowi4 fl4;
-
- if (!sock4)
- return ERR_PTR(-EIO);
-
- if (tos && !info)
- use_cache = false;
- if (use_cache) {
- rt = dst_cache_get_ip4(dst_cache, saddr);
- if (rt)
- return rt;
- }
-
- memset(&fl4, 0, sizeof(fl4));
- fl4.flowi4_oif = oif;
- fl4.flowi4_tos = RT_TOS(tos);
- fl4.flowi4_mark = skb->mark;
- fl4.flowi4_proto = IPPROTO_UDP;
- fl4.daddr = daddr;
- fl4.saddr = *saddr;
- fl4.fl4_dport = dport;
- fl4.fl4_sport = sport;
- fl4.flowi4_flags = flow_flags;
-
- rt = ip_route_output_key(vxlan->net, &fl4);
- if (!IS_ERR(rt)) {
- if (rt->dst.dev == dev) {
- netdev_dbg(dev, "circular route to %pI4\n", &daddr);
- ip_rt_put(rt);
- return ERR_PTR(-ELOOP);
- }
-
- *saddr = fl4.saddr;
- if (use_cache)
- dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
- } else {
- netdev_dbg(dev, "no route to %pI4\n", &daddr);
- return ERR_PTR(-ENETUNREACH);
- }
- return rt;
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
- struct net_device *dev,
- struct vxlan_sock *sock6,
- struct sk_buff *skb, int oif, u8 tos,
- __be32 label,
- const struct in6_addr *daddr,
- struct in6_addr *saddr,
- __be16 dport, __be16 sport,
- struct dst_cache *dst_cache,
- const struct ip_tunnel_info *info)
-{
- bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
- struct dst_entry *ndst;
- struct flowi6 fl6;
-
- if (!sock6)
- return ERR_PTR(-EIO);
-
- if (tos && !info)
- use_cache = false;
- if (use_cache) {
- ndst = dst_cache_get_ip6(dst_cache, saddr);
- if (ndst)
- return ndst;
- }
-
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_oif = oif;
- fl6.daddr = *daddr;
- fl6.saddr = *saddr;
- fl6.flowlabel = ip6_make_flowinfo(tos, label);
- fl6.flowi6_mark = skb->mark;
- fl6.flowi6_proto = IPPROTO_UDP;
- fl6.fl6_dport = dport;
- fl6.fl6_sport = sport;
-
- ndst = ipv6_stub->ipv6_dst_lookup_flow(vxlan->net, sock6->sock->sk,
- &fl6, NULL);
- if (IS_ERR(ndst)) {
- netdev_dbg(dev, "no route to %pI6\n", daddr);
- return ERR_PTR(-ENETUNREACH);
- }
-
- if (unlikely(ndst->dev == dev)) {
- netdev_dbg(dev, "circular route to %pI6\n", daddr);
- dst_release(ndst);
- return ERR_PTR(-ELOOP);
- }
-
- *saddr = fl6.saddr;
- if (use_cache)
- dst_cache_set_ip6(dst_cache, ndst, saddr);
- return ndst;
-}
-#endif
-
/* Bypass encapsulation if the destination is local */
static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
struct vxlan_dev *dst_vxlan, __be32 vni,
@@ -2376,7 +2268,7 @@ drop:
static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
struct vxlan_dev *vxlan,
- union vxlan_addr *daddr,
+ int addr_family,
__be16 dst_port, int dst_ifindex, __be32 vni,
struct dst_entry *dst,
u32 rt_flags)
@@ -2396,7 +2288,7 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
dst_release(dst);
dst_vxlan = vxlan_find_vni(vxlan->net, dst_ifindex, vni,
- daddr->sa.sa_family, dst_port,
+ addr_family, dst_port,
vxlan->cfg.flags);
if (!dst_vxlan) {
dev->stats.tx_errors++;
@@ -2418,31 +2310,33 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
{
struct dst_cache *dst_cache;
struct ip_tunnel_info *info;
+ struct ip_tunnel_key *pkey;
+ struct ip_tunnel_key key;
struct vxlan_dev *vxlan = netdev_priv(dev);
const struct iphdr *old_iph = ip_hdr(skb);
- union vxlan_addr *dst;
- union vxlan_addr remote_ip, local_ip;
struct vxlan_metadata _md;
struct vxlan_metadata *md = &_md;
unsigned int pkt_len = skb->len;
__be16 src_port = 0, dst_port;
struct dst_entry *ndst = NULL;
- __u8 tos, ttl, flow_flags = 0;
+ int addr_family;
+ __u8 tos, ttl;
int ifindex;
int err;
u32 flags = vxlan->cfg.flags;
+ bool use_cache;
bool udp_sum = false;
bool xnet = !net_eq(vxlan->net, dev_net(vxlan->dev));
__be32 vni = 0;
-#if IS_ENABLED(CONFIG_IPV6)
- __be32 label;
-#endif
info = skb_tunnel_info(skb);
+ use_cache = ip_tunnel_dst_cache_usable(skb, info);
if (rdst) {
- dst = &rdst->remote_ip;
- if (vxlan_addr_any(dst)) {
+ memset(&key, 0, sizeof(key));
+ pkey = &key;
+
+ if (vxlan_addr_any(&rdst->remote_ip)) {
if (did_rsc) {
/* short-circuited back to local bridge */
vxlan_encap_bypass(skb, vxlan, vxlan,
@@ -2452,30 +2346,40 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
goto drop;
}
+ addr_family = vxlan->cfg.saddr.sa.sa_family;
dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
vni = (rdst->remote_vni) ? : default_vni;
ifindex = rdst->remote_ifindex;
- local_ip = vxlan->cfg.saddr;
+
+ if (addr_family == AF_INET) {
+ key.u.ipv4.src = vxlan->cfg.saddr.sin.sin_addr.s_addr;
+ key.u.ipv4.dst = rdst->remote_ip.sin.sin_addr.s_addr;
+ } else {
+ key.u.ipv6.src = vxlan->cfg.saddr.sin6.sin6_addr;
+ key.u.ipv6.dst = rdst->remote_ip.sin6.sin6_addr;
+ }
+
dst_cache = &rdst->dst_cache;
md->gbp = skb->mark;
if (flags & VXLAN_F_TTL_INHERIT) {
ttl = ip_tunnel_get_ttl(old_iph, skb);
} else {
ttl = vxlan->cfg.ttl;
- if (!ttl && vxlan_addr_multicast(dst))
+ if (!ttl && vxlan_addr_multicast(&rdst->remote_ip))
ttl = 1;
}
-
tos = vxlan->cfg.tos;
if (tos == 1)
tos = ip_tunnel_get_dsfield(old_iph, skb);
+ if (tos && !info)
+ use_cache = false;
- if (dst->sa.sa_family == AF_INET)
+ if (addr_family == AF_INET)
udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX);
else
udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
#if IS_ENABLED(CONFIG_IPV6)
- label = vxlan->cfg.label;
+ key.label = vxlan->cfg.label;
#endif
} else {
if (!info) {
@@ -2483,17 +2387,9 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
dev->name);
goto drop;
}
- remote_ip.sa.sa_family = ip_tunnel_info_af(info);
- if (remote_ip.sa.sa_family == AF_INET) {
- remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
- local_ip.sin.sin_addr.s_addr = info->key.u.ipv4.src;
- } else {
- remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
- local_ip.sin6.sin6_addr = info->key.u.ipv6.src;
- }
- dst = &remote_ip;
+ pkey = &info->key;
+ addr_family = ip_tunnel_info_af(info);
dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
- flow_flags = info->key.flow_flags;
vni = tunnel_id_to_key32(info->key.tun_id);
ifindex = 0;
dst_cache = &info->dst_cache;
@@ -2504,28 +2400,24 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
}
ttl = info->key.ttl;
tos = info->key.tos;
-#if IS_ENABLED(CONFIG_IPV6)
- label = info->key.label;
-#endif
udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
}
src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
vxlan->cfg.port_max, true);
rcu_read_lock();
- if (dst->sa.sa_family == AF_INET) {
+ if (addr_family == AF_INET) {
struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
struct rtable *rt;
__be16 df = 0;
+ __be32 saddr;
if (!ifindex)
ifindex = sock4->sock->sk->sk_bound_dev_if;
- rt = vxlan_get_route(vxlan, dev, sock4, skb, ifindex, tos,
- dst->sin.sin_addr.s_addr,
- &local_ip.sin.sin_addr.s_addr,
- dst_port, src_port, flow_flags,
- dst_cache, info);
+ rt = udp_tunnel_dst_lookup(skb, dev, vxlan->net, ifindex,
+ &saddr, pkey, src_port, dst_port,
+ tos, use_cache ? dst_cache : NULL);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
goto tx_error;
@@ -2533,7 +2425,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
if (!info) {
/* Bypass encapsulation if the destination is local */
- err = encap_bypass_if_local(skb, dev, vxlan, dst,
+ err = encap_bypass_if_local(skb, dev, vxlan, AF_INET,
dst_port, ifindex, vni,
&rt->dst, rt->rt_flags);
if (err)
@@ -2561,16 +2453,13 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
} else if (err) {
if (info) {
struct ip_tunnel_info *unclone;
- struct in_addr src, dst;
unclone = skb_tunnel_info_unclone(skb);
if (unlikely(!unclone))
goto tx_error;
- src = remote_ip.sin.sin_addr;
- dst = local_ip.sin.sin_addr;
- unclone->key.u.ipv4.src = src.s_addr;
- unclone->key.u.ipv4.dst = dst.s_addr;
+ unclone->key.u.ipv4.src = pkey->u.ipv4.dst;
+ unclone->key.u.ipv4.dst = saddr;
}
vxlan_encap_bypass(skb, vxlan, vxlan, vni, false);
dst_release(ndst);
@@ -2584,21 +2473,21 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
if (err < 0)
goto tx_error;
- udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, local_ip.sin.sin_addr.s_addr,
- dst->sin.sin_addr.s_addr, tos, ttl, df,
+ udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, saddr,
+ pkey->u.ipv4.dst, tos, ttl, df,
src_port, dst_port, xnet, !udp_sum);
#if IS_ENABLED(CONFIG_IPV6)
} else {
struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
+ struct in6_addr saddr;
if (!ifindex)
ifindex = sock6->sock->sk->sk_bound_dev_if;
- ndst = vxlan6_get_route(vxlan, dev, sock6, skb, ifindex, tos,
- label, &dst->sin6.sin6_addr,
- &local_ip.sin6.sin6_addr,
- dst_port, src_port,
- dst_cache, info);
+ ndst = udp_tunnel6_dst_lookup(skb, dev, vxlan->net, sock6->sock,
+ ifindex, &saddr, pkey,
+ src_port, dst_port, tos,
+ use_cache ? dst_cache : NULL);
if (IS_ERR(ndst)) {
err = PTR_ERR(ndst);
ndst = NULL;
@@ -2608,7 +2497,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
if (!info) {
u32 rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;
- err = encap_bypass_if_local(skb, dev, vxlan, dst,
+ err = encap_bypass_if_local(skb, dev, vxlan, AF_INET6,
dst_port, ifindex, vni,
ndst, rt6i_flags);
if (err)
@@ -2623,16 +2512,13 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
} else if (err) {
if (info) {
struct ip_tunnel_info *unclone;
- struct in6_addr src, dst;
unclone = skb_tunnel_info_unclone(skb);
if (unlikely(!unclone))
goto tx_error;
- src = remote_ip.sin6.sin6_addr;
- dst = local_ip.sin6.sin6_addr;
- unclone->key.u.ipv6.src = src;
- unclone->key.u.ipv6.dst = dst;
+ unclone->key.u.ipv6.src = pkey->u.ipv6.dst;
+ unclone->key.u.ipv6.dst = saddr;
}
vxlan_encap_bypass(skb, vxlan, vxlan, vni, false);
@@ -2649,9 +2535,8 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
udp_tunnel6_xmit_skb(ndst, sock6->sock->sk, skb, dev,
- &local_ip.sin6.sin6_addr,
- &dst->sin6.sin6_addr, tos, ttl,
- label, src_port, dst_port, !udp_sum);
+ &saddr, &pkey->u.ipv6.dst, tos, ttl,
+ pkey->label, src_port, dst_port, !udp_sum);
#endif
}
vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX, pkt_len);
@@ -3022,9 +2907,101 @@ static int vxlan_open(struct net_device *dev)
return ret;
}
+struct vxlan_fdb_flush_desc {
+ bool ignore_default_entry;
+ unsigned long state;
+ unsigned long state_mask;
+ unsigned long flags;
+ unsigned long flags_mask;
+ __be32 src_vni;
+ u32 nhid;
+ __be32 vni;
+ __be16 port;
+ union vxlan_addr dst_ip;
+};
+
+static bool vxlan_fdb_is_default_entry(const struct vxlan_fdb *f,
+ const struct vxlan_dev *vxlan)
+{
+ return is_zero_ether_addr(f->eth_addr) && f->vni == vxlan->cfg.vni;
+}
+
+static bool vxlan_fdb_nhid_matches(const struct vxlan_fdb *f, u32 nhid)
+{
+ struct nexthop *nh = rtnl_dereference(f->nh);
+
+ return nh && nh->id == nhid;
+}
+
+static bool vxlan_fdb_flush_matches(const struct vxlan_fdb *f,
+ const struct vxlan_dev *vxlan,
+ const struct vxlan_fdb_flush_desc *desc)
+{
+ if (desc->state_mask && (f->state & desc->state_mask) != desc->state)
+ return false;
+
+ if (desc->flags_mask && (f->flags & desc->flags_mask) != desc->flags)
+ return false;
+
+ if (desc->ignore_default_entry && vxlan_fdb_is_default_entry(f, vxlan))
+ return false;
+
+ if (desc->src_vni && f->vni != desc->src_vni)
+ return false;
+
+ if (desc->nhid && !vxlan_fdb_nhid_matches(f, desc->nhid))
+ return false;
+
+ return true;
+}
+
+static bool
+vxlan_fdb_flush_should_match_remotes(const struct vxlan_fdb_flush_desc *desc)
+{
+ return desc->vni || desc->port || desc->dst_ip.sa.sa_family;
+}
+
+static bool
+vxlan_fdb_flush_remote_matches(const struct vxlan_fdb_flush_desc *desc,
+ const struct vxlan_rdst *rd)
+{
+ if (desc->vni && rd->remote_vni != desc->vni)
+ return false;
+
+ if (desc->port && rd->remote_port != desc->port)
+ return false;
+
+ if (desc->dst_ip.sa.sa_family &&
+ !vxlan_addr_equal(&rd->remote_ip, &desc->dst_ip))
+ return false;
+
+ return true;
+}
+
+static void
+vxlan_fdb_flush_match_remotes(struct vxlan_fdb *f, struct vxlan_dev *vxlan,
+ const struct vxlan_fdb_flush_desc *desc,
+ bool *p_destroy_fdb)
+{
+ bool remotes_flushed = false;
+ struct vxlan_rdst *rd, *tmp;
+
+ list_for_each_entry_safe(rd, tmp, &f->remotes, list) {
+ if (!vxlan_fdb_flush_remote_matches(desc, rd))
+ continue;
+
+ vxlan_fdb_dst_destroy(vxlan, f, rd, true);
+ remotes_flushed = true;
+ }
+
+ *p_destroy_fdb = remotes_flushed && list_empty(&f->remotes);
+}
+
/* Purge the forwarding table */
-static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all)
+static void vxlan_flush(struct vxlan_dev *vxlan,
+ const struct vxlan_fdb_flush_desc *desc)
{
+ bool match_remotes = vxlan_fdb_flush_should_match_remotes(desc);
unsigned int h;
for (h = 0; h < FDB_HASH_SIZE; ++h) {
@@ -3034,28 +3011,122 @@ static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all)
hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
struct vxlan_fdb *f
= container_of(p, struct vxlan_fdb, hlist);
- if (!do_all && (f->state & (NUD_PERMANENT | NUD_NOARP)))
- continue;
- /* the all_zeros_mac entry is deleted at vxlan_uninit */
- if (is_zero_ether_addr(f->eth_addr) &&
- f->vni == vxlan->cfg.vni)
+
+ if (!vxlan_fdb_flush_matches(f, vxlan, desc))
continue;
+
+ if (match_remotes) {
+ bool destroy_fdb = false;
+
+ vxlan_fdb_flush_match_remotes(f, vxlan, desc,
+ &destroy_fdb);
+
+ if (!destroy_fdb)
+ continue;
+ }
+
vxlan_fdb_destroy(vxlan, f, true, true);
}
spin_unlock_bh(&vxlan->hash_lock[h]);
}
}
+static const struct nla_policy vxlan_del_bulk_policy[NDA_MAX + 1] = {
+ [NDA_SRC_VNI] = { .type = NLA_U32 },
+ [NDA_NH_ID] = { .type = NLA_U32 },
+ [NDA_VNI] = { .type = NLA_U32 },
+ [NDA_PORT] = { .type = NLA_U16 },
+ [NDA_DST] = NLA_POLICY_RANGE(NLA_BINARY, sizeof(struct in_addr),
+ sizeof(struct in6_addr)),
+ [NDA_NDM_STATE_MASK] = { .type = NLA_U16 },
+ [NDA_NDM_FLAGS_MASK] = { .type = NLA_U8 },
+};
+
+#define VXLAN_FDB_FLUSH_IGNORED_NDM_FLAGS (NTF_MASTER | NTF_SELF)
+#define VXLAN_FDB_FLUSH_ALLOWED_NDM_STATES (NUD_PERMANENT | NUD_NOARP)
+#define VXLAN_FDB_FLUSH_ALLOWED_NDM_FLAGS (NTF_EXT_LEARNED | NTF_OFFLOADED | \
+ NTF_ROUTER)
+
+static int vxlan_fdb_delete_bulk(struct nlmsghdr *nlh, struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_fdb_flush_desc desc = {};
+ struct ndmsg *ndm = nlmsg_data(nlh);
+ struct nlattr *tb[NDA_MAX + 1];
+ u8 ndm_flags;
+ int err;
+
+ ndm_flags = ndm->ndm_flags & ~VXLAN_FDB_FLUSH_IGNORED_NDM_FLAGS;
+
+ err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, vxlan_del_bulk_policy,
+ extack);
+ if (err)
+ return err;
+
+ if (ndm_flags & ~VXLAN_FDB_FLUSH_ALLOWED_NDM_FLAGS) {
+ NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm flag bits set");
+ return -EINVAL;
+ }
+ if (ndm->ndm_state & ~VXLAN_FDB_FLUSH_ALLOWED_NDM_STATES) {
+ NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm state bits set");
+ return -EINVAL;
+ }
+
+ desc.state = ndm->ndm_state;
+ desc.flags = ndm_flags;
+
+ if (tb[NDA_NDM_STATE_MASK])
+ desc.state_mask = nla_get_u16(tb[NDA_NDM_STATE_MASK]);
+
+ if (tb[NDA_NDM_FLAGS_MASK])
+ desc.flags_mask = nla_get_u8(tb[NDA_NDM_FLAGS_MASK]);
+
+ if (tb[NDA_SRC_VNI])
+ desc.src_vni = cpu_to_be32(nla_get_u32(tb[NDA_SRC_VNI]));
+
+ if (tb[NDA_NH_ID])
+ desc.nhid = nla_get_u32(tb[NDA_NH_ID]);
+
+ if (tb[NDA_VNI])
+ desc.vni = cpu_to_be32(nla_get_u32(tb[NDA_VNI]));
+
+ if (tb[NDA_PORT])
+ desc.port = nla_get_be16(tb[NDA_PORT]);
+
+ if (tb[NDA_DST]) {
+ union vxlan_addr ip;
+
+ err = vxlan_nla_get_addr(&ip, tb[NDA_DST]);
+ if (err) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[NDA_DST],
+ "Unsupported address family");
+ return err;
+ }
+ desc.dst_ip = ip;
+ }
+
+ vxlan_flush(vxlan, &desc);
+
+ return 0;
+}
+
/* Cleanup timer and forwarding table on shutdown */
static int vxlan_stop(struct net_device *dev)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_fdb_flush_desc desc = {
+ /* Default entry is deleted at vxlan_uninit. */
+ .ignore_default_entry = true,
+ .state = 0,
+ .state_mask = NUD_PERMANENT | NUD_NOARP,
+ };
vxlan_multicast_leave(vxlan);
del_timer_sync(&vxlan->age_timer);
- vxlan_flush(vxlan, false);
+ vxlan_flush(vxlan, &desc);
vxlan_sock_release(vxlan);
return 0;
@@ -3100,11 +3171,14 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
struct rtable *rt;
- rt = vxlan_get_route(vxlan, dev, sock4, skb, 0, info->key.tos,
- info->key.u.ipv4.dst,
- &info->key.u.ipv4.src, dport, sport,
- info->key.flow_flags, &info->dst_cache,
- info);
+ if (!sock4)
+ return -EIO;
+
+ rt = udp_tunnel_dst_lookup(skb, dev, vxlan->net, 0,
+ &info->key.u.ipv4.src,
+ &info->key,
+ sport, dport, info->key.tos,
+ &info->dst_cache);
if (IS_ERR(rt))
return PTR_ERR(rt);
ip_rt_put(rt);
@@ -3113,10 +3187,14 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
struct dst_entry *ndst;
- ndst = vxlan6_get_route(vxlan, dev, sock6, skb, 0, info->key.tos,
- info->key.label, &info->key.u.ipv6.dst,
- &info->key.u.ipv6.src, dport, sport,
- &info->dst_cache, info);
+ if (!sock6)
+ return -EIO;
+
+ ndst = udp_tunnel6_dst_lookup(skb, dev, vxlan->net, sock6->sock,
+ 0, &info->key.u.ipv6.src,
+ &info->key,
+ sport, dport, info->key.tos,
+ &info->dst_cache);
if (IS_ERR(ndst))
return PTR_ERR(ndst);
dst_release(ndst);
@@ -3142,11 +3220,13 @@ static const struct net_device_ops vxlan_netdev_ether_ops = {
.ndo_set_mac_address = eth_mac_addr,
.ndo_fdb_add = vxlan_fdb_add,
.ndo_fdb_del = vxlan_fdb_delete,
+ .ndo_fdb_del_bulk = vxlan_fdb_delete_bulk,
.ndo_fdb_dump = vxlan_fdb_dump,
.ndo_fdb_get = vxlan_fdb_get,
.ndo_mdb_add = vxlan_mdb_add,
.ndo_mdb_del = vxlan_mdb_del,
.ndo_mdb_dump = vxlan_mdb_dump,
+ .ndo_mdb_get = vxlan_mdb_get,
.ndo_fill_metadata_dst = vxlan_fill_metadata_dst,
};
@@ -4294,8 +4374,12 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
static void vxlan_dellink(struct net_device *dev, struct list_head *head)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_fdb_flush_desc desc = {
+ /* Default entry is deleted at vxlan_uninit. */
+ .ignore_default_entry = true,
+ };
- vxlan_flush(vxlan, true);
+ vxlan_flush(vxlan, &desc);
list_del(&vxlan->next);
unregister_netdevice_queue(dev, head);
@@ -4305,7 +4389,6 @@ static void vxlan_dellink(struct net_device *dev, struct list_head *head)
static size_t vxlan_get_size(const struct net_device *dev)
{
-
return nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_ID */
nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_GROUP{6} */
nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LINK */
@@ -4323,7 +4406,6 @@ static size_t vxlan_get_size(const struct net_device *dev)
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_COLLECT_METADATA */
nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_AGEING */
nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LIMIT */
- nla_total_size(sizeof(struct ifla_vxlan_port_range)) +
nla_total_size(sizeof(__be16)) + /* IFLA_VXLAN_PORT */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_CSUM */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_TX */
@@ -4331,6 +4413,8 @@ static size_t vxlan_get_size(const struct net_device *dev)
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LOCALBYPASS */
+ /* IFLA_VXLAN_PORT_RANGE */
+ nla_total_size(sizeof(struct ifla_vxlan_port_range)) +
nla_total_size(0) + /* IFLA_VXLAN_GBP */
nla_total_size(0) + /* IFLA_VXLAN_GPE */
nla_total_size(0) + /* IFLA_VXLAN_REMCSUM_NOPARTIAL */
diff --git a/drivers/net/vxlan/vxlan_mdb.c b/drivers/net/vxlan/vxlan_mdb.c
index 5e04162226..eb4c580b5c 100644
--- a/drivers/net/vxlan/vxlan_mdb.c
+++ b/drivers/net/vxlan/vxlan_mdb.c
@@ -311,7 +311,7 @@ vxlan_mdbe_src_list_pol[MDBE_SRC_LIST_MAX + 1] = {
[MDBE_SRC_LIST_ENTRY] = NLA_POLICY_NESTED(vxlan_mdbe_src_list_entry_pol),
};
-static struct netlink_range_validation vni_range = {
+static const struct netlink_range_validation vni_range = {
.max = VXLAN_N_VID - 1,
};
@@ -370,12 +370,10 @@ static bool vxlan_mdb_is_valid_source(const struct nlattr *attr, __be16 proto,
return true;
}
-static void vxlan_mdb_config_group_set(struct vxlan_mdb_config *cfg,
- const struct br_mdb_entry *entry,
- const struct nlattr *source_attr)
+static void vxlan_mdb_group_set(struct vxlan_mdb_entry_key *group,
+ const struct br_mdb_entry *entry,
+ const struct nlattr *source_attr)
{
- struct vxlan_mdb_entry_key *group = &cfg->group;
-
switch (entry->addr.proto) {
case htons(ETH_P_IP):
group->dst.sa.sa_family = AF_INET;
@@ -503,7 +501,7 @@ static int vxlan_mdb_config_attrs_init(struct vxlan_mdb_config *cfg,
entry->addr.proto, extack))
return -EINVAL;
- vxlan_mdb_config_group_set(cfg, entry, mdbe_attrs[MDBE_ATTR_SOURCE]);
+ vxlan_mdb_group_set(&cfg->group, entry, mdbe_attrs[MDBE_ATTR_SOURCE]);
/* rtnetlink code only validates that IPv4 group address is
* multicast.
@@ -927,23 +925,20 @@ vxlan_mdb_nlmsg_src_list_size(const struct vxlan_mdb_entry_key *group,
return nlmsg_size;
}
-static size_t vxlan_mdb_nlmsg_size(const struct vxlan_dev *vxlan,
- const struct vxlan_mdb_entry *mdb_entry,
- const struct vxlan_mdb_remote *remote)
+static size_t
+vxlan_mdb_nlmsg_remote_size(const struct vxlan_dev *vxlan,
+ const struct vxlan_mdb_entry *mdb_entry,
+ const struct vxlan_mdb_remote *remote)
{
const struct vxlan_mdb_entry_key *group = &mdb_entry->key;
struct vxlan_rdst *rd = rtnl_dereference(remote->rd);
size_t nlmsg_size;
- nlmsg_size = NLMSG_ALIGN(sizeof(struct br_port_msg)) +
- /* MDBA_MDB */
- nla_total_size(0) +
- /* MDBA_MDB_ENTRY */
- nla_total_size(0) +
/* MDBA_MDB_ENTRY_INFO */
- nla_total_size(sizeof(struct br_mdb_entry)) +
+ nlmsg_size = nla_total_size(sizeof(struct br_mdb_entry)) +
/* MDBA_MDB_EATTR_TIMER */
nla_total_size(sizeof(u32));
+
/* MDBA_MDB_EATTR_SOURCE */
if (vxlan_mdb_is_sg(group))
nlmsg_size += nla_total_size(vxlan_addr_size(&group->dst));
@@ -971,6 +966,19 @@ static size_t vxlan_mdb_nlmsg_size(const struct vxlan_dev *vxlan,
return nlmsg_size;
}
+static size_t vxlan_mdb_nlmsg_size(const struct vxlan_dev *vxlan,
+ const struct vxlan_mdb_entry *mdb_entry,
+ const struct vxlan_mdb_remote *remote)
+{
+ return NLMSG_ALIGN(sizeof(struct br_port_msg)) +
+ /* MDBA_MDB */
+ nla_total_size(0) +
+ /* MDBA_MDB_ENTRY */
+ nla_total_size(0) +
+ /* Remote entry */
+ vxlan_mdb_nlmsg_remote_size(vxlan, mdb_entry, remote);
+}
+
static int vxlan_mdb_nlmsg_fill(const struct vxlan_dev *vxlan,
struct sk_buff *skb,
const struct vxlan_mdb_entry *mdb_entry,
@@ -1298,6 +1306,156 @@ int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[],
return err;
}
+static const struct nla_policy vxlan_mdbe_attrs_get_pol[MDBE_ATTR_MAX + 1] = {
+ [MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY,
+ sizeof(struct in_addr),
+ sizeof(struct in6_addr)),
+ [MDBE_ATTR_SRC_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range),
+};
+
+static int vxlan_mdb_get_parse(struct net_device *dev, struct nlattr *tb[],
+ struct vxlan_mdb_entry_key *group,
+ struct netlink_ext_ack *extack)
+{
+ struct br_mdb_entry *entry = nla_data(tb[MDBA_GET_ENTRY]);
+ struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1];
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ int err;
+
+ memset(group, 0, sizeof(*group));
+ group->vni = vxlan->default_dst.remote_vni;
+
+ if (!tb[MDBA_GET_ENTRY_ATTRS]) {
+ vxlan_mdb_group_set(group, entry, NULL);
+ return 0;
+ }
+
+ err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX,
+ tb[MDBA_GET_ENTRY_ATTRS],
+ vxlan_mdbe_attrs_get_pol, extack);
+ if (err)
+ return err;
+
+ if (mdbe_attrs[MDBE_ATTR_SOURCE] &&
+ !vxlan_mdb_is_valid_source(mdbe_attrs[MDBE_ATTR_SOURCE],
+ entry->addr.proto, extack))
+ return -EINVAL;
+
+ vxlan_mdb_group_set(group, entry, mdbe_attrs[MDBE_ATTR_SOURCE]);
+
+ if (mdbe_attrs[MDBE_ATTR_SRC_VNI])
+ group->vni =
+ cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_SRC_VNI]));
+
+ return 0;
+}
+
+static struct sk_buff *
+vxlan_mdb_get_reply_alloc(const struct vxlan_dev *vxlan,
+ const struct vxlan_mdb_entry *mdb_entry)
+{
+ struct vxlan_mdb_remote *remote;
+ size_t nlmsg_size;
+
+ nlmsg_size = NLMSG_ALIGN(sizeof(struct br_port_msg)) +
+ /* MDBA_MDB */
+ nla_total_size(0) +
+ /* MDBA_MDB_ENTRY */
+ nla_total_size(0);
+
+ list_for_each_entry(remote, &mdb_entry->remotes, list)
+ nlmsg_size += vxlan_mdb_nlmsg_remote_size(vxlan, mdb_entry,
+ remote);
+
+ return nlmsg_new(nlmsg_size, GFP_KERNEL);
+}
+
+static int
+vxlan_mdb_get_reply_fill(const struct vxlan_dev *vxlan,
+ struct sk_buff *skb,
+ const struct vxlan_mdb_entry *mdb_entry,
+ u32 portid, u32 seq)
+{
+ struct nlattr *mdb_nest, *mdb_entry_nest;
+ struct vxlan_mdb_remote *remote;
+ struct br_port_msg *bpm;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = nlmsg_put(skb, portid, seq, RTM_NEWMDB, sizeof(*bpm), 0);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ bpm = nlmsg_data(nlh);
+ memset(bpm, 0, sizeof(*bpm));
+ bpm->family = AF_BRIDGE;
+ bpm->ifindex = vxlan->dev->ifindex;
+ mdb_nest = nla_nest_start_noflag(skb, MDBA_MDB);
+ if (!mdb_nest) {
+ err = -EMSGSIZE;
+ goto cancel;
+ }
+ mdb_entry_nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY);
+ if (!mdb_entry_nest) {
+ err = -EMSGSIZE;
+ goto cancel;
+ }
+
+ list_for_each_entry(remote, &mdb_entry->remotes, list) {
+ err = vxlan_mdb_entry_info_fill(vxlan, skb, mdb_entry, remote);
+ if (err)
+ goto cancel;
+ }
+
+ nla_nest_end(skb, mdb_entry_nest);
+ nla_nest_end(skb, mdb_nest);
+ nlmsg_end(skb, nlh);
+
+ return 0;
+
+cancel:
+ nlmsg_cancel(skb, nlh);
+ return err;
+}
+
+int vxlan_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid,
+ u32 seq, struct netlink_ext_ack *extack)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_mdb_entry *mdb_entry;
+ struct vxlan_mdb_entry_key group;
+ struct sk_buff *skb;
+ int err;
+
+ ASSERT_RTNL();
+
+ err = vxlan_mdb_get_parse(dev, tb, &group, extack);
+ if (err)
+ return err;
+
+ mdb_entry = vxlan_mdb_entry_lookup(vxlan, &group);
+ if (!mdb_entry) {
+ NL_SET_ERR_MSG_MOD(extack, "MDB entry not found");
+ return -ENOENT;
+ }
+
+ skb = vxlan_mdb_get_reply_alloc(vxlan, mdb_entry);
+ if (!skb)
+ return -ENOMEM;
+
+ err = vxlan_mdb_get_reply_fill(vxlan, skb, mdb_entry, portid, seq);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to fill MDB get reply");
+ goto free;
+ }
+
+ return rtnl_unicast(skb, dev_net(dev), portid);
+
+free:
+ kfree_skb(skb);
+ return err;
+}
+
struct vxlan_mdb_entry *vxlan_mdb_entry_skb_get(struct vxlan_dev *vxlan,
struct sk_buff *skb,
__be32 src_vni)
diff --git a/drivers/net/vxlan/vxlan_private.h b/drivers/net/vxlan/vxlan_private.h
index 817fa30758..db679c3809 100644
--- a/drivers/net/vxlan/vxlan_private.h
+++ b/drivers/net/vxlan/vxlan_private.h
@@ -235,6 +235,8 @@ int vxlan_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags,
struct netlink_ext_ack *extack);
int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack);
+int vxlan_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid,
+ u32 seq, struct netlink_ext_ack *extack);
struct vxlan_mdb_entry *vxlan_mdb_entry_skb_get(struct vxlan_dev *vxlan,
struct sk_buff *skb,
__be32 src_vni);