diff options
Diffstat (limited to 'drivers/net/vxlan/vxlan_vnifilter.c')
-rw-r--r-- | drivers/net/vxlan/vxlan_vnifilter.c | 1010 |
1 files changed, 1010 insertions, 0 deletions
diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c new file mode 100644 index 0000000000..9c59d0bf8c --- /dev/null +++ b/drivers/net/vxlan/vxlan_vnifilter.c @@ -0,0 +1,1010 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Vxlan vni filter for collect metadata mode + * + * Authors: Roopa Prabhu <roopa@nvidia.com> + * + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/etherdevice.h> +#include <linux/rhashtable.h> +#include <net/rtnetlink.h> +#include <net/net_namespace.h> +#include <net/sock.h> +#include <net/vxlan.h> + +#include "vxlan_private.h" + +static inline int vxlan_vni_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct vxlan_vni_node *vnode = ptr; + __be32 vni = *(__be32 *)arg->key; + + return vnode->vni != vni; +} + +const struct rhashtable_params vxlan_vni_rht_params = { + .head_offset = offsetof(struct vxlan_vni_node, vnode), + .key_offset = offsetof(struct vxlan_vni_node, vni), + .key_len = sizeof(__be32), + .nelem_hint = 3, + .max_size = VXLAN_N_VID, + .obj_cmpfn = vxlan_vni_cmp, + .automatic_shrinking = true, +}; + +static void vxlan_vs_add_del_vninode(struct vxlan_dev *vxlan, + struct vxlan_vni_node *v, + bool del) +{ + struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); + struct vxlan_dev_node *node; + struct vxlan_sock *vs; + + spin_lock(&vn->sock_lock); + if (del) { + if (!hlist_unhashed(&v->hlist4.hlist)) + hlist_del_init_rcu(&v->hlist4.hlist); +#if IS_ENABLED(CONFIG_IPV6) + if (!hlist_unhashed(&v->hlist6.hlist)) + hlist_del_init_rcu(&v->hlist6.hlist); +#endif + goto out; + } + +#if IS_ENABLED(CONFIG_IPV6) + vs = rtnl_dereference(vxlan->vn6_sock); + if (vs && v) { + node = &v->hlist6; + hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni)); + } +#endif + vs = rtnl_dereference(vxlan->vn4_sock); + if (vs && v) { + node = &v->hlist4; + hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni)); + } +out: + spin_unlock(&vn->sock_lock); +} + +void vxlan_vs_add_vnigrp(struct vxlan_dev *vxlan, + struct vxlan_sock *vs, + bool ipv6) +{ + struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); + struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp); + struct vxlan_vni_node *v, *tmp; + struct vxlan_dev_node *node; + + if (!vg) + return; + + spin_lock(&vn->sock_lock); + list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { +#if IS_ENABLED(CONFIG_IPV6) + if (ipv6) + node = &v->hlist6; + else +#endif + node = &v->hlist4; + node->vxlan = vxlan; + hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni)); + } + spin_unlock(&vn->sock_lock); +} + +void vxlan_vs_del_vnigrp(struct vxlan_dev *vxlan) +{ + struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp); + struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); + struct vxlan_vni_node *v, *tmp; + + if (!vg) + return; + + spin_lock(&vn->sock_lock); + list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { + hlist_del_init_rcu(&v->hlist4.hlist); +#if IS_ENABLED(CONFIG_IPV6) + hlist_del_init_rcu(&v->hlist6.hlist); +#endif + } + spin_unlock(&vn->sock_lock); +} + +static void vxlan_vnifilter_stats_get(const struct vxlan_vni_node *vninode, + struct vxlan_vni_stats *dest) +{ + int i; + + memset(dest, 0, sizeof(*dest)); + for_each_possible_cpu(i) { + struct vxlan_vni_stats_pcpu *pstats; + struct vxlan_vni_stats temp; + unsigned int start; + + pstats = per_cpu_ptr(vninode->stats, i); + do { + start = u64_stats_fetch_begin(&pstats->syncp); + memcpy(&temp, &pstats->stats, sizeof(temp)); + } while (u64_stats_fetch_retry(&pstats->syncp, start)); + + dest->rx_packets += temp.rx_packets; + dest->rx_bytes += temp.rx_bytes; + dest->rx_drops += temp.rx_drops; + dest->rx_errors += temp.rx_errors; + dest->tx_packets += temp.tx_packets; + dest->tx_bytes += temp.tx_bytes; + dest->tx_drops += temp.tx_drops; + dest->tx_errors += temp.tx_errors; + } +} + +static void vxlan_vnifilter_stats_add(struct vxlan_vni_node *vninode, + int type, unsigned int len) +{ + struct vxlan_vni_stats_pcpu *pstats = this_cpu_ptr(vninode->stats); + + u64_stats_update_begin(&pstats->syncp); + switch (type) { + case VXLAN_VNI_STATS_RX: + pstats->stats.rx_bytes += len; + pstats->stats.rx_packets++; + break; + case VXLAN_VNI_STATS_RX_DROPS: + pstats->stats.rx_drops++; + break; + case VXLAN_VNI_STATS_RX_ERRORS: + pstats->stats.rx_errors++; + break; + case VXLAN_VNI_STATS_TX: + pstats->stats.tx_bytes += len; + pstats->stats.tx_packets++; + break; + case VXLAN_VNI_STATS_TX_DROPS: + pstats->stats.tx_drops++; + break; + case VXLAN_VNI_STATS_TX_ERRORS: + pstats->stats.tx_errors++; + break; + } + u64_stats_update_end(&pstats->syncp); +} + +void vxlan_vnifilter_count(struct vxlan_dev *vxlan, __be32 vni, + struct vxlan_vni_node *vninode, + int type, unsigned int len) +{ + struct vxlan_vni_node *vnode; + + if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)) + return; + + if (vninode) { + vnode = vninode; + } else { + vnode = vxlan_vnifilter_lookup(vxlan, vni); + if (!vnode) + return; + } + + vxlan_vnifilter_stats_add(vnode, type, len); +} + +static u32 vnirange(struct vxlan_vni_node *vbegin, + struct vxlan_vni_node *vend) +{ + return (be32_to_cpu(vend->vni) - be32_to_cpu(vbegin->vni)); +} + +static size_t vxlan_vnifilter_entry_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct tunnel_msg)) + + nla_total_size(0) /* VXLAN_VNIFILTER_ENTRY */ + + nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_START */ + + nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_END */ + + nla_total_size(sizeof(struct in6_addr));/* VXLAN_VNIFILTER_ENTRY_GROUP{6} */ +} + +static int __vnifilter_entry_fill_stats(struct sk_buff *skb, + const struct vxlan_vni_node *vbegin) +{ + struct vxlan_vni_stats vstats; + struct nlattr *vstats_attr; + + vstats_attr = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY_STATS); + if (!vstats_attr) + goto out_stats_err; + + vxlan_vnifilter_stats_get(vbegin, &vstats); + if (nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_BYTES, + vstats.rx_bytes, VNIFILTER_ENTRY_STATS_PAD) || + nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_PKTS, + vstats.rx_packets, VNIFILTER_ENTRY_STATS_PAD) || + nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_DROPS, + vstats.rx_drops, VNIFILTER_ENTRY_STATS_PAD) || + nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_ERRORS, + vstats.rx_errors, VNIFILTER_ENTRY_STATS_PAD) || + nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_BYTES, + vstats.tx_bytes, VNIFILTER_ENTRY_STATS_PAD) || + nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_PKTS, + vstats.tx_packets, VNIFILTER_ENTRY_STATS_PAD) || + nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_DROPS, + vstats.tx_drops, VNIFILTER_ENTRY_STATS_PAD) || + nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_ERRORS, + vstats.tx_errors, VNIFILTER_ENTRY_STATS_PAD)) + goto out_stats_err; + + nla_nest_end(skb, vstats_attr); + + return 0; + +out_stats_err: + nla_nest_cancel(skb, vstats_attr); + return -EMSGSIZE; +} + +static bool vxlan_fill_vni_filter_entry(struct sk_buff *skb, + struct vxlan_vni_node *vbegin, + struct vxlan_vni_node *vend, + bool fill_stats) +{ + struct nlattr *ventry; + u32 vs = be32_to_cpu(vbegin->vni); + u32 ve = 0; + + if (vbegin != vend) + ve = be32_to_cpu(vend->vni); + + ventry = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY); + if (!ventry) + return false; + + if (nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_START, vs)) + goto out_err; + + if (ve && nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_END, ve)) + goto out_err; + + if (!vxlan_addr_any(&vbegin->remote_ip)) { + if (vbegin->remote_ip.sa.sa_family == AF_INET) { + if (nla_put_in_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP, + vbegin->remote_ip.sin.sin_addr.s_addr)) + goto out_err; +#if IS_ENABLED(CONFIG_IPV6) + } else { + if (nla_put_in6_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP6, + &vbegin->remote_ip.sin6.sin6_addr)) + goto out_err; +#endif + } + } + + if (fill_stats && __vnifilter_entry_fill_stats(skb, vbegin)) + goto out_err; + + nla_nest_end(skb, ventry); + + return true; + +out_err: + nla_nest_cancel(skb, ventry); + + return false; +} + +static void vxlan_vnifilter_notify(const struct vxlan_dev *vxlan, + struct vxlan_vni_node *vninode, int cmd) +{ + struct tunnel_msg *tmsg; + struct sk_buff *skb; + struct nlmsghdr *nlh; + struct net *net = dev_net(vxlan->dev); + int err = -ENOBUFS; + + skb = nlmsg_new(vxlan_vnifilter_entry_nlmsg_size(), GFP_KERNEL); + if (!skb) + goto out_err; + + err = -EMSGSIZE; + nlh = nlmsg_put(skb, 0, 0, cmd, sizeof(*tmsg), 0); + if (!nlh) + goto out_err; + tmsg = nlmsg_data(nlh); + memset(tmsg, 0, sizeof(*tmsg)); + tmsg->family = AF_BRIDGE; + tmsg->ifindex = vxlan->dev->ifindex; + + if (!vxlan_fill_vni_filter_entry(skb, vninode, vninode, false)) + goto out_err; + + nlmsg_end(skb, nlh); + rtnl_notify(skb, net, 0, RTNLGRP_TUNNEL, NULL, GFP_KERNEL); + + return; + +out_err: + rtnl_set_sk_err(net, RTNLGRP_TUNNEL, err); + + kfree_skb(skb); +} + +static int vxlan_vnifilter_dump_dev(const struct net_device *dev, + struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct vxlan_vni_node *tmp, *v, *vbegin = NULL, *vend = NULL; + struct vxlan_dev *vxlan = netdev_priv(dev); + struct tunnel_msg *new_tmsg, *tmsg; + int idx = 0, s_idx = cb->args[1]; + struct vxlan_vni_group *vg; + struct nlmsghdr *nlh; + bool dump_stats; + int err = 0; + + if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)) + return -EINVAL; + + /* RCU needed because of the vni locking rules (rcu || rtnl) */ + vg = rcu_dereference(vxlan->vnigrp); + if (!vg || !vg->num_vnis) + return 0; + + tmsg = nlmsg_data(cb->nlh); + dump_stats = !!(tmsg->flags & TUNNEL_MSG_FLAG_STATS); + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + RTM_NEWTUNNEL, sizeof(*new_tmsg), NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; + new_tmsg = nlmsg_data(nlh); + memset(new_tmsg, 0, sizeof(*new_tmsg)); + new_tmsg->family = PF_BRIDGE; + new_tmsg->ifindex = dev->ifindex; + + list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { + if (idx < s_idx) { + idx++; + continue; + } + if (!vbegin) { + vbegin = v; + vend = v; + continue; + } + if (!dump_stats && vnirange(vend, v) == 1 && + vxlan_addr_equal(&v->remote_ip, &vend->remote_ip)) { + goto update_end; + } else { + if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend, + dump_stats)) { + err = -EMSGSIZE; + break; + } + idx += vnirange(vbegin, vend) + 1; + vbegin = v; + } +update_end: + vend = v; + } + + if (!err && vbegin) { + if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend, dump_stats)) + err = -EMSGSIZE; + } + + cb->args[1] = err ? idx : 0; + + nlmsg_end(skb, nlh); + + return err; +} + +static int vxlan_vnifilter_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int idx = 0, err = 0, s_idx = cb->args[0]; + struct net *net = sock_net(skb->sk); + struct tunnel_msg *tmsg; + struct net_device *dev; + + tmsg = nlmsg_data(cb->nlh); + + if (tmsg->flags & ~TUNNEL_MSG_VALID_USER_FLAGS) { + NL_SET_ERR_MSG(cb->extack, "Invalid tunnelmsg flags in ancillary header"); + return -EINVAL; + } + + rcu_read_lock(); + if (tmsg->ifindex) { + dev = dev_get_by_index_rcu(net, tmsg->ifindex); + if (!dev) { + err = -ENODEV; + goto out_err; + } + if (!netif_is_vxlan(dev)) { + NL_SET_ERR_MSG(cb->extack, + "The device is not a vxlan device"); + err = -EINVAL; + goto out_err; + } + err = vxlan_vnifilter_dump_dev(dev, skb, cb); + /* if the dump completed without an error we return 0 here */ + if (err != -EMSGSIZE) + goto out_err; + } else { + for_each_netdev_rcu(net, dev) { + if (!netif_is_vxlan(dev)) + continue; + if (idx < s_idx) + goto skip; + err = vxlan_vnifilter_dump_dev(dev, skb, cb); + if (err == -EMSGSIZE) + break; +skip: + idx++; + } + } + cb->args[0] = idx; + rcu_read_unlock(); + + return skb->len; + +out_err: + rcu_read_unlock(); + + return err; +} + +static const struct nla_policy vni_filter_entry_policy[VXLAN_VNIFILTER_ENTRY_MAX + 1] = { + [VXLAN_VNIFILTER_ENTRY_START] = { .type = NLA_U32 }, + [VXLAN_VNIFILTER_ENTRY_END] = { .type = NLA_U32 }, + [VXLAN_VNIFILTER_ENTRY_GROUP] = { .type = NLA_BINARY, + .len = sizeof_field(struct iphdr, daddr) }, + [VXLAN_VNIFILTER_ENTRY_GROUP6] = { .type = NLA_BINARY, + .len = sizeof(struct in6_addr) }, +}; + +static const struct nla_policy vni_filter_policy[VXLAN_VNIFILTER_MAX + 1] = { + [VXLAN_VNIFILTER_ENTRY] = { .type = NLA_NESTED }, +}; + +static int vxlan_update_default_fdb_entry(struct vxlan_dev *vxlan, __be32 vni, + union vxlan_addr *old_remote_ip, + union vxlan_addr *remote_ip, + struct netlink_ext_ack *extack) +{ + struct vxlan_rdst *dst = &vxlan->default_dst; + u32 hash_index; + int err = 0; + + hash_index = fdb_head_index(vxlan, all_zeros_mac, vni); + spin_lock_bh(&vxlan->hash_lock[hash_index]); + if (remote_ip && !vxlan_addr_any(remote_ip)) { + err = vxlan_fdb_update(vxlan, all_zeros_mac, + remote_ip, + NUD_REACHABLE | NUD_PERMANENT, + NLM_F_APPEND | NLM_F_CREATE, + vxlan->cfg.dst_port, + vni, + vni, + dst->remote_ifindex, + NTF_SELF, 0, true, extack); + if (err) { + spin_unlock_bh(&vxlan->hash_lock[hash_index]); + return err; + } + } + + if (old_remote_ip && !vxlan_addr_any(old_remote_ip)) { + __vxlan_fdb_delete(vxlan, all_zeros_mac, + *old_remote_ip, + vxlan->cfg.dst_port, + vni, vni, + dst->remote_ifindex, + true); + } + spin_unlock_bh(&vxlan->hash_lock[hash_index]); + + return err; +} + +static int vxlan_vni_update_group(struct vxlan_dev *vxlan, + struct vxlan_vni_node *vninode, + union vxlan_addr *group, + bool create, bool *changed, + struct netlink_ext_ack *extack) +{ + struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); + struct vxlan_rdst *dst = &vxlan->default_dst; + union vxlan_addr *newrip = NULL, *oldrip = NULL; + union vxlan_addr old_remote_ip; + int ret = 0; + + memcpy(&old_remote_ip, &vninode->remote_ip, sizeof(old_remote_ip)); + + /* if per vni remote ip is not present use vxlan dev + * default dst remote ip for fdb entry + */ + if (group && !vxlan_addr_any(group)) { + newrip = group; + } else { + if (!vxlan_addr_any(&dst->remote_ip)) + newrip = &dst->remote_ip; + } + + /* if old rip exists, and no newrip, + * explicitly delete old rip + */ + if (!newrip && !vxlan_addr_any(&old_remote_ip)) + oldrip = &old_remote_ip; + + if (!newrip && !oldrip) + return 0; + + if (!create && oldrip && newrip && vxlan_addr_equal(oldrip, newrip)) + return 0; + + ret = vxlan_update_default_fdb_entry(vxlan, vninode->vni, + oldrip, newrip, + extack); + if (ret) + goto out; + + if (group) + memcpy(&vninode->remote_ip, group, sizeof(vninode->remote_ip)); + + if (vxlan->dev->flags & IFF_UP) { + if (vxlan_addr_multicast(&old_remote_ip) && + !vxlan_group_used(vn, vxlan, vninode->vni, + &old_remote_ip, + vxlan->default_dst.remote_ifindex)) { + ret = vxlan_igmp_leave(vxlan, &old_remote_ip, + 0); + if (ret) + goto out; + } + + if (vxlan_addr_multicast(&vninode->remote_ip)) { + ret = vxlan_igmp_join(vxlan, &vninode->remote_ip, 0); + if (ret == -EADDRINUSE) + ret = 0; + if (ret) + goto out; + } + } + + *changed = true; + + return 0; +out: + return ret; +} + +int vxlan_vnilist_update_group(struct vxlan_dev *vxlan, + union vxlan_addr *old_remote_ip, + union vxlan_addr *new_remote_ip, + struct netlink_ext_ack *extack) +{ + struct list_head *headp, *hpos; + struct vxlan_vni_group *vg; + struct vxlan_vni_node *vent; + int ret; + + vg = rtnl_dereference(vxlan->vnigrp); + + headp = &vg->vni_list; + list_for_each_prev(hpos, headp) { + vent = list_entry(hpos, struct vxlan_vni_node, vlist); + if (vxlan_addr_any(&vent->remote_ip)) { + ret = vxlan_update_default_fdb_entry(vxlan, vent->vni, + old_remote_ip, + new_remote_ip, + extack); + if (ret) + return ret; + } + } + + return 0; +} + +static void vxlan_vni_delete_group(struct vxlan_dev *vxlan, + struct vxlan_vni_node *vninode) +{ + struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); + struct vxlan_rdst *dst = &vxlan->default_dst; + + /* if per vni remote_ip not present, delete the + * default dst remote_ip previously added for this vni + */ + if (!vxlan_addr_any(&vninode->remote_ip) || + !vxlan_addr_any(&dst->remote_ip)) + __vxlan_fdb_delete(vxlan, all_zeros_mac, + (vxlan_addr_any(&vninode->remote_ip) ? + dst->remote_ip : vninode->remote_ip), + vxlan->cfg.dst_port, + vninode->vni, vninode->vni, + dst->remote_ifindex, + true); + + if (vxlan->dev->flags & IFF_UP) { + if (vxlan_addr_multicast(&vninode->remote_ip) && + !vxlan_group_used(vn, vxlan, vninode->vni, + &vninode->remote_ip, + dst->remote_ifindex)) { + vxlan_igmp_leave(vxlan, &vninode->remote_ip, 0); + } + } +} + +static int vxlan_vni_update(struct vxlan_dev *vxlan, + struct vxlan_vni_group *vg, + __be32 vni, union vxlan_addr *group, + bool *changed, + struct netlink_ext_ack *extack) +{ + struct vxlan_vni_node *vninode; + int ret; + + vninode = rhashtable_lookup_fast(&vg->vni_hash, &vni, + vxlan_vni_rht_params); + if (!vninode) + return 0; + + ret = vxlan_vni_update_group(vxlan, vninode, group, false, changed, + extack); + if (ret) + return ret; + + if (changed) + vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL); + + return 0; +} + +static void __vxlan_vni_add_list(struct vxlan_vni_group *vg, + struct vxlan_vni_node *v) +{ + struct list_head *headp, *hpos; + struct vxlan_vni_node *vent; + + headp = &vg->vni_list; + list_for_each_prev(hpos, headp) { + vent = list_entry(hpos, struct vxlan_vni_node, vlist); + if (be32_to_cpu(v->vni) < be32_to_cpu(vent->vni)) + continue; + else + break; + } + list_add_rcu(&v->vlist, hpos); + vg->num_vnis++; +} + +static void __vxlan_vni_del_list(struct vxlan_vni_group *vg, + struct vxlan_vni_node *v) +{ + list_del_rcu(&v->vlist); + vg->num_vnis--; +} + +static struct vxlan_vni_node *vxlan_vni_alloc(struct vxlan_dev *vxlan, + __be32 vni) +{ + struct vxlan_vni_node *vninode; + + vninode = kzalloc(sizeof(*vninode), GFP_KERNEL); + if (!vninode) + return NULL; + vninode->stats = netdev_alloc_pcpu_stats(struct vxlan_vni_stats_pcpu); + if (!vninode->stats) { + kfree(vninode); + return NULL; + } + vninode->vni = vni; + vninode->hlist4.vxlan = vxlan; +#if IS_ENABLED(CONFIG_IPV6) + vninode->hlist6.vxlan = vxlan; +#endif + + return vninode; +} + +static void vxlan_vni_free(struct vxlan_vni_node *vninode) +{ + free_percpu(vninode->stats); + kfree(vninode); +} + +static int vxlan_vni_add(struct vxlan_dev *vxlan, + struct vxlan_vni_group *vg, + u32 vni, union vxlan_addr *group, + struct netlink_ext_ack *extack) +{ + struct vxlan_vni_node *vninode; + __be32 v = cpu_to_be32(vni); + bool changed = false; + int err = 0; + + if (vxlan_vnifilter_lookup(vxlan, v)) + return vxlan_vni_update(vxlan, vg, v, group, &changed, extack); + + err = vxlan_vni_in_use(vxlan->net, vxlan, &vxlan->cfg, v); + if (err) { + NL_SET_ERR_MSG(extack, "VNI in use"); + return err; + } + + vninode = vxlan_vni_alloc(vxlan, v); + if (!vninode) + return -ENOMEM; + + err = rhashtable_lookup_insert_fast(&vg->vni_hash, + &vninode->vnode, + vxlan_vni_rht_params); + if (err) { + vxlan_vni_free(vninode); + return err; + } + + __vxlan_vni_add_list(vg, vninode); + + if (vxlan->dev->flags & IFF_UP) + vxlan_vs_add_del_vninode(vxlan, vninode, false); + + err = vxlan_vni_update_group(vxlan, vninode, group, true, &changed, + extack); + + if (changed) + vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL); + + return err; +} + +static void vxlan_vni_node_rcu_free(struct rcu_head *rcu) +{ + struct vxlan_vni_node *v; + + v = container_of(rcu, struct vxlan_vni_node, rcu); + vxlan_vni_free(v); +} + +static int vxlan_vni_del(struct vxlan_dev *vxlan, + struct vxlan_vni_group *vg, + u32 vni, struct netlink_ext_ack *extack) +{ + struct vxlan_vni_node *vninode; + __be32 v = cpu_to_be32(vni); + int err = 0; + + vg = rtnl_dereference(vxlan->vnigrp); + + vninode = rhashtable_lookup_fast(&vg->vni_hash, &v, + vxlan_vni_rht_params); + if (!vninode) { + err = -ENOENT; + goto out; + } + + vxlan_vni_delete_group(vxlan, vninode); + + err = rhashtable_remove_fast(&vg->vni_hash, + &vninode->vnode, + vxlan_vni_rht_params); + if (err) + goto out; + + __vxlan_vni_del_list(vg, vninode); + + vxlan_vnifilter_notify(vxlan, vninode, RTM_DELTUNNEL); + + if (vxlan->dev->flags & IFF_UP) + vxlan_vs_add_del_vninode(vxlan, vninode, true); + + call_rcu(&vninode->rcu, vxlan_vni_node_rcu_free); + + return 0; +out: + return err; +} + +static int vxlan_vni_add_del(struct vxlan_dev *vxlan, __u32 start_vni, + __u32 end_vni, union vxlan_addr *group, + int cmd, struct netlink_ext_ack *extack) +{ + struct vxlan_vni_group *vg; + int v, err = 0; + + vg = rtnl_dereference(vxlan->vnigrp); + + for (v = start_vni; v <= end_vni; v++) { + switch (cmd) { + case RTM_NEWTUNNEL: + err = vxlan_vni_add(vxlan, vg, v, group, extack); + break; + case RTM_DELTUNNEL: + err = vxlan_vni_del(vxlan, vg, v, extack); + break; + default: + err = -EOPNOTSUPP; + break; + } + if (err) + goto out; + } + + return 0; +out: + return err; +} + +static int vxlan_process_vni_filter(struct vxlan_dev *vxlan, + struct nlattr *nlvnifilter, + int cmd, struct netlink_ext_ack *extack) +{ + struct nlattr *vattrs[VXLAN_VNIFILTER_ENTRY_MAX + 1]; + u32 vni_start = 0, vni_end = 0; + union vxlan_addr group; + int err; + + err = nla_parse_nested(vattrs, + VXLAN_VNIFILTER_ENTRY_MAX, + nlvnifilter, vni_filter_entry_policy, + extack); + if (err) + return err; + + if (vattrs[VXLAN_VNIFILTER_ENTRY_START]) { + vni_start = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_START]); + vni_end = vni_start; + } + + if (vattrs[VXLAN_VNIFILTER_ENTRY_END]) + vni_end = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_END]); + + if (!vni_start && !vni_end) { + NL_SET_ERR_MSG_ATTR(extack, nlvnifilter, + "vni start nor end found in vni entry"); + return -EINVAL; + } + + if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]) { + group.sin.sin_addr.s_addr = + nla_get_in_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]); + group.sa.sa_family = AF_INET; + } else if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]) { + group.sin6.sin6_addr = + nla_get_in6_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]); + group.sa.sa_family = AF_INET6; + } else { + memset(&group, 0, sizeof(group)); + } + + if (vxlan_addr_multicast(&group) && !vxlan->default_dst.remote_ifindex) { + NL_SET_ERR_MSG(extack, + "Local interface required for multicast remote group"); + + return -EINVAL; + } + + err = vxlan_vni_add_del(vxlan, vni_start, vni_end, &group, cmd, + extack); + if (err) + return err; + + return 0; +} + +void vxlan_vnigroup_uninit(struct vxlan_dev *vxlan) +{ + struct vxlan_vni_node *v, *tmp; + struct vxlan_vni_group *vg; + + vg = rtnl_dereference(vxlan->vnigrp); + list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { + rhashtable_remove_fast(&vg->vni_hash, &v->vnode, + vxlan_vni_rht_params); + hlist_del_init_rcu(&v->hlist4.hlist); +#if IS_ENABLED(CONFIG_IPV6) + hlist_del_init_rcu(&v->hlist6.hlist); +#endif + __vxlan_vni_del_list(vg, v); + vxlan_vnifilter_notify(vxlan, v, RTM_DELTUNNEL); + call_rcu(&v->rcu, vxlan_vni_node_rcu_free); + } + rhashtable_destroy(&vg->vni_hash); + kfree(vg); +} + +int vxlan_vnigroup_init(struct vxlan_dev *vxlan) +{ + struct vxlan_vni_group *vg; + int ret; + + vg = kzalloc(sizeof(*vg), GFP_KERNEL); + if (!vg) + return -ENOMEM; + ret = rhashtable_init(&vg->vni_hash, &vxlan_vni_rht_params); + if (ret) { + kfree(vg); + return ret; + } + INIT_LIST_HEAD(&vg->vni_list); + rcu_assign_pointer(vxlan->vnigrp, vg); + + return 0; +} + +static int vxlan_vnifilter_process(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct tunnel_msg *tmsg; + struct vxlan_dev *vxlan; + struct net_device *dev; + struct nlattr *attr; + int err, vnis = 0; + int rem; + + /* this should validate the header and check for remaining bytes */ + err = nlmsg_parse(nlh, sizeof(*tmsg), NULL, VXLAN_VNIFILTER_MAX, + vni_filter_policy, extack); + if (err < 0) + return err; + + tmsg = nlmsg_data(nlh); + dev = __dev_get_by_index(net, tmsg->ifindex); + if (!dev) + return -ENODEV; + + if (!netif_is_vxlan(dev)) { + NL_SET_ERR_MSG_MOD(extack, "The device is not a vxlan device"); + return -EINVAL; + } + + vxlan = netdev_priv(dev); + + if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)) + return -EOPNOTSUPP; + + nlmsg_for_each_attr(attr, nlh, sizeof(*tmsg), rem) { + switch (nla_type(attr)) { + case VXLAN_VNIFILTER_ENTRY: + err = vxlan_process_vni_filter(vxlan, attr, + nlh->nlmsg_type, extack); + break; + default: + continue; + } + vnis++; + if (err) + break; + } + + if (!vnis) { + NL_SET_ERR_MSG_MOD(extack, "No vnis found to process"); + err = -EINVAL; + } + + return err; +} + +void vxlan_vnifilter_init(void) +{ + rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_GETTUNNEL, NULL, + vxlan_vnifilter_dump, 0); + rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_NEWTUNNEL, + vxlan_vnifilter_process, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_DELTUNNEL, + vxlan_vnifilter_process, NULL, 0); +} + +void vxlan_vnifilter_uninit(void) +{ + rtnl_unregister(PF_BRIDGE, RTM_GETTUNNEL); + rtnl_unregister(PF_BRIDGE, RTM_NEWTUNNEL); + rtnl_unregister(PF_BRIDGE, RTM_DELTUNNEL); +} |