summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/sfc/tc_encap_actions.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/sfc/tc_encap_actions.c')
-rw-r--r--drivers/net/ethernet/sfc/tc_encap_actions.c751
1 files changed, 751 insertions, 0 deletions
diff --git a/drivers/net/ethernet/sfc/tc_encap_actions.c b/drivers/net/ethernet/sfc/tc_encap_actions.c
new file mode 100644
index 0000000000..87443f9dfd
--- /dev/null
+++ b/drivers/net/ethernet/sfc/tc_encap_actions.c
@@ -0,0 +1,751 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2023, Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "tc_encap_actions.h"
+#include "tc.h"
+#include "mae.h"
+#include <net/vxlan.h>
+#include <net/geneve.h>
+#include <net/netevent.h>
+#include <net/arp.h>
+
+static const struct rhashtable_params efx_neigh_ht_params = {
+ .key_len = offsetof(struct efx_neigh_binder, ha),
+ .key_offset = 0,
+ .head_offset = offsetof(struct efx_neigh_binder, linkage),
+};
+
+static const struct rhashtable_params efx_tc_encap_ht_params = {
+ .key_len = offsetofend(struct efx_tc_encap_action, key),
+ .key_offset = 0,
+ .head_offset = offsetof(struct efx_tc_encap_action, linkage),
+};
+
+static void efx_tc_encap_free(void *ptr, void *__unused)
+{
+ struct efx_tc_encap_action *enc = ptr;
+
+ WARN_ON(refcount_read(&enc->ref));
+ kfree(enc);
+}
+
+static void efx_neigh_free(void *ptr, void *__unused)
+{
+ struct efx_neigh_binder *neigh = ptr;
+
+ WARN_ON(refcount_read(&neigh->ref));
+ WARN_ON(!list_empty(&neigh->users));
+ put_net_track(neigh->net, &neigh->ns_tracker);
+ netdev_put(neigh->egdev, &neigh->dev_tracker);
+ kfree(neigh);
+}
+
+int efx_tc_init_encap_actions(struct efx_nic *efx)
+{
+ int rc;
+
+ rc = rhashtable_init(&efx->tc->neigh_ht, &efx_neigh_ht_params);
+ if (rc < 0)
+ goto fail_neigh_ht;
+ rc = rhashtable_init(&efx->tc->encap_ht, &efx_tc_encap_ht_params);
+ if (rc < 0)
+ goto fail_encap_ht;
+ return 0;
+fail_encap_ht:
+ rhashtable_destroy(&efx->tc->neigh_ht);
+fail_neigh_ht:
+ return rc;
+}
+
+/* Only call this in init failure teardown.
+ * Normal exit should fini instead as there may be entries in the table.
+ */
+void efx_tc_destroy_encap_actions(struct efx_nic *efx)
+{
+ rhashtable_destroy(&efx->tc->encap_ht);
+ rhashtable_destroy(&efx->tc->neigh_ht);
+}
+
+void efx_tc_fini_encap_actions(struct efx_nic *efx)
+{
+ rhashtable_free_and_destroy(&efx->tc->encap_ht, efx_tc_encap_free, NULL);
+ rhashtable_free_and_destroy(&efx->tc->neigh_ht, efx_neigh_free, NULL);
+}
+
+static void efx_neigh_update(struct work_struct *work);
+
+static int efx_bind_neigh(struct efx_nic *efx,
+ struct efx_tc_encap_action *encap, struct net *net,
+ struct netlink_ext_ack *extack)
+{
+ struct efx_neigh_binder *neigh, *old;
+ struct flowi6 flow6 = {};
+ struct flowi4 flow4 = {};
+ int rc;
+
+ /* GCC stupidly thinks that only values explicitly listed in the enum
+ * definition can _possibly_ be sensible case values, so without this
+ * cast it complains about the IPv6 versions.
+ */
+ switch ((int)encap->type) {
+ case EFX_ENCAP_TYPE_VXLAN:
+ case EFX_ENCAP_TYPE_GENEVE:
+ flow4.flowi4_proto = IPPROTO_UDP;
+ flow4.fl4_dport = encap->key.tp_dst;
+ flow4.flowi4_tos = encap->key.tos;
+ flow4.daddr = encap->key.u.ipv4.dst;
+ flow4.saddr = encap->key.u.ipv4.src;
+ break;
+ case EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6:
+ case EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6:
+ flow6.flowi6_proto = IPPROTO_UDP;
+ flow6.fl6_dport = encap->key.tp_dst;
+ flow6.flowlabel = ip6_make_flowinfo(encap->key.tos,
+ encap->key.label);
+ flow6.daddr = encap->key.u.ipv6.dst;
+ flow6.saddr = encap->key.u.ipv6.src;
+ break;
+ default:
+ NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported encap type %d",
+ (int)encap->type);
+ return -EOPNOTSUPP;
+ }
+
+ neigh = kzalloc(sizeof(*neigh), GFP_KERNEL_ACCOUNT);
+ if (!neigh)
+ return -ENOMEM;
+ neigh->net = get_net_track(net, &neigh->ns_tracker, GFP_KERNEL_ACCOUNT);
+ neigh->dst_ip = flow4.daddr;
+ neigh->dst_ip6 = flow6.daddr;
+
+ old = rhashtable_lookup_get_insert_fast(&efx->tc->neigh_ht,
+ &neigh->linkage,
+ efx_neigh_ht_params);
+ if (old) {
+ /* don't need our new entry */
+ put_net_track(neigh->net, &neigh->ns_tracker);
+ kfree(neigh);
+ if (IS_ERR(old)) /* oh dear, it's actually an error */
+ return PTR_ERR(old);
+ if (!refcount_inc_not_zero(&old->ref))
+ return -EAGAIN;
+ /* existing entry found, ref taken */
+ neigh = old;
+ } else {
+ /* New entry. We need to initiate a lookup */
+ struct neighbour *n;
+ struct rtable *rt;
+
+ if (encap->type & EFX_ENCAP_FLAG_IPV6) {
+#if IS_ENABLED(CONFIG_IPV6)
+ struct dst_entry *dst;
+
+ dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &flow6,
+ NULL);
+ rc = PTR_ERR_OR_ZERO(dst);
+ if (rc) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for IPv6 encap");
+ goto out_free;
+ }
+ neigh->egdev = dst->dev;
+ netdev_hold(neigh->egdev, &neigh->dev_tracker,
+ GFP_KERNEL_ACCOUNT);
+ neigh->ttl = ip6_dst_hoplimit(dst);
+ n = dst_neigh_lookup(dst, &flow6.daddr);
+ dst_release(dst);
+#else
+ /* We shouldn't ever get here, because if IPv6 isn't
+ * enabled how did someone create an IPv6 tunnel_key?
+ */
+ rc = -EOPNOTSUPP;
+ NL_SET_ERR_MSG_MOD(extack, "No IPv6 support (neigh bind)");
+ goto out_free;
+#endif
+ } else {
+ rt = ip_route_output_key(net, &flow4);
+ if (IS_ERR_OR_NULL(rt)) {
+ rc = PTR_ERR_OR_ZERO(rt);
+ if (!rc)
+ rc = -EIO;
+ NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for encap");
+ goto out_free;
+ }
+ neigh->egdev = rt->dst.dev;
+ netdev_hold(neigh->egdev, &neigh->dev_tracker,
+ GFP_KERNEL_ACCOUNT);
+ neigh->ttl = ip4_dst_hoplimit(&rt->dst);
+ n = dst_neigh_lookup(&rt->dst, &flow4.daddr);
+ ip_rt_put(rt);
+ }
+ if (!n) {
+ rc = -ENETUNREACH;
+ NL_SET_ERR_MSG_MOD(extack, "Failed to lookup neighbour for encap");
+ netdev_put(neigh->egdev, &neigh->dev_tracker);
+ goto out_free;
+ }
+ refcount_set(&neigh->ref, 1);
+ INIT_LIST_HEAD(&neigh->users);
+ read_lock_bh(&n->lock);
+ ether_addr_copy(neigh->ha, n->ha);
+ neigh->n_valid = n->nud_state & NUD_VALID;
+ read_unlock_bh(&n->lock);
+ rwlock_init(&neigh->lock);
+ INIT_WORK(&neigh->work, efx_neigh_update);
+ neigh->efx = efx;
+ neigh->used = jiffies;
+ if (!neigh->n_valid)
+ /* Prod ARP to find us a neighbour */
+ neigh_event_send(n, NULL);
+ neigh_release(n);
+ }
+ /* Add us to this neigh */
+ encap->neigh = neigh;
+ list_add_tail(&encap->list, &neigh->users);
+ return 0;
+
+out_free:
+ /* cleanup common to several error paths */
+ rhashtable_remove_fast(&efx->tc->neigh_ht, &neigh->linkage,
+ efx_neigh_ht_params);
+ synchronize_rcu();
+ put_net_track(net, &neigh->ns_tracker);
+ kfree(neigh);
+ return rc;
+}
+
+static void efx_free_neigh(struct efx_neigh_binder *neigh)
+{
+ struct efx_nic *efx = neigh->efx;
+
+ rhashtable_remove_fast(&efx->tc->neigh_ht, &neigh->linkage,
+ efx_neigh_ht_params);
+ synchronize_rcu();
+ netdev_put(neigh->egdev, &neigh->dev_tracker);
+ put_net_track(neigh->net, &neigh->ns_tracker);
+ kfree(neigh);
+}
+
+static void efx_release_neigh(struct efx_nic *efx,
+ struct efx_tc_encap_action *encap)
+{
+ struct efx_neigh_binder *neigh = encap->neigh;
+
+ if (!neigh)
+ return;
+ list_del(&encap->list);
+ encap->neigh = NULL;
+ if (!refcount_dec_and_test(&neigh->ref))
+ return; /* still in use */
+ efx_free_neigh(neigh);
+}
+
+static void efx_gen_tun_header_eth(struct efx_tc_encap_action *encap, u16 proto)
+{
+ struct efx_neigh_binder *neigh = encap->neigh;
+ struct ethhdr *eth;
+
+ encap->encap_hdr_len = sizeof(*eth);
+ eth = (struct ethhdr *)encap->encap_hdr;
+
+ if (encap->neigh->n_valid)
+ ether_addr_copy(eth->h_dest, neigh->ha);
+ else
+ eth_zero_addr(eth->h_dest);
+ ether_addr_copy(eth->h_source, neigh->egdev->dev_addr);
+ eth->h_proto = htons(proto);
+}
+
+static void efx_gen_tun_header_ipv4(struct efx_tc_encap_action *encap, u8 ipproto, u8 len)
+{
+ struct efx_neigh_binder *neigh = encap->neigh;
+ struct ip_tunnel_key *key = &encap->key;
+ struct iphdr *ip;
+
+ ip = (struct iphdr *)(encap->encap_hdr + encap->encap_hdr_len);
+ encap->encap_hdr_len += sizeof(*ip);
+
+ ip->daddr = key->u.ipv4.dst;
+ ip->saddr = key->u.ipv4.src;
+ ip->ttl = neigh->ttl;
+ ip->protocol = ipproto;
+ ip->version = 0x4;
+ ip->ihl = 0x5;
+ ip->tot_len = cpu_to_be16(ip->ihl * 4 + len);
+ ip_send_check(ip);
+}
+
+#ifdef CONFIG_IPV6
+static void efx_gen_tun_header_ipv6(struct efx_tc_encap_action *encap, u8 ipproto, u8 len)
+{
+ struct efx_neigh_binder *neigh = encap->neigh;
+ struct ip_tunnel_key *key = &encap->key;
+ struct ipv6hdr *ip;
+
+ ip = (struct ipv6hdr *)(encap->encap_hdr + encap->encap_hdr_len);
+ encap->encap_hdr_len += sizeof(*ip);
+
+ ip6_flow_hdr(ip, key->tos, key->label);
+ ip->daddr = key->u.ipv6.dst;
+ ip->saddr = key->u.ipv6.src;
+ ip->hop_limit = neigh->ttl;
+ ip->nexthdr = ipproto;
+ ip->version = 0x6;
+ ip->payload_len = cpu_to_be16(len);
+}
+#endif
+
+static void efx_gen_tun_header_udp(struct efx_tc_encap_action *encap, u8 len)
+{
+ struct ip_tunnel_key *key = &encap->key;
+ struct udphdr *udp;
+
+ udp = (struct udphdr *)(encap->encap_hdr + encap->encap_hdr_len);
+ encap->encap_hdr_len += sizeof(*udp);
+
+ udp->dest = key->tp_dst;
+ udp->len = cpu_to_be16(sizeof(*udp) + len);
+}
+
+static void efx_gen_tun_header_vxlan(struct efx_tc_encap_action *encap)
+{
+ struct ip_tunnel_key *key = &encap->key;
+ struct vxlanhdr *vxlan;
+
+ vxlan = (struct vxlanhdr *)(encap->encap_hdr + encap->encap_hdr_len);
+ encap->encap_hdr_len += sizeof(*vxlan);
+
+ vxlan->vx_flags = VXLAN_HF_VNI;
+ vxlan->vx_vni = vxlan_vni_field(tunnel_id_to_key32(key->tun_id));
+}
+
+static void efx_gen_tun_header_geneve(struct efx_tc_encap_action *encap)
+{
+ struct ip_tunnel_key *key = &encap->key;
+ struct genevehdr *geneve;
+ u32 vni;
+
+ geneve = (struct genevehdr *)(encap->encap_hdr + encap->encap_hdr_len);
+ encap->encap_hdr_len += sizeof(*geneve);
+
+ geneve->proto_type = htons(ETH_P_TEB);
+ /* convert tun_id to host-endian so we can use host arithmetic to
+ * extract individual bytes.
+ */
+ vni = ntohl(tunnel_id_to_key32(key->tun_id));
+ geneve->vni[0] = vni >> 16;
+ geneve->vni[1] = vni >> 8;
+ geneve->vni[2] = vni;
+}
+
+#define vxlan_header_l4_len (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
+#define vxlan4_header_len (sizeof(struct ethhdr) + sizeof(struct iphdr) + vxlan_header_l4_len)
+static void efx_gen_vxlan_header_ipv4(struct efx_tc_encap_action *encap)
+{
+ BUILD_BUG_ON(sizeof(encap->encap_hdr) < vxlan4_header_len);
+ efx_gen_tun_header_eth(encap, ETH_P_IP);
+ efx_gen_tun_header_ipv4(encap, IPPROTO_UDP, vxlan_header_l4_len);
+ efx_gen_tun_header_udp(encap, sizeof(struct vxlanhdr));
+ efx_gen_tun_header_vxlan(encap);
+}
+
+#define geneve_header_l4_len (sizeof(struct udphdr) + sizeof(struct genevehdr))
+#define geneve4_header_len (sizeof(struct ethhdr) + sizeof(struct iphdr) + geneve_header_l4_len)
+static void efx_gen_geneve_header_ipv4(struct efx_tc_encap_action *encap)
+{
+ BUILD_BUG_ON(sizeof(encap->encap_hdr) < geneve4_header_len);
+ efx_gen_tun_header_eth(encap, ETH_P_IP);
+ efx_gen_tun_header_ipv4(encap, IPPROTO_UDP, geneve_header_l4_len);
+ efx_gen_tun_header_udp(encap, sizeof(struct genevehdr));
+ efx_gen_tun_header_geneve(encap);
+}
+
+#ifdef CONFIG_IPV6
+#define vxlan6_header_len (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + vxlan_header_l4_len)
+static void efx_gen_vxlan_header_ipv6(struct efx_tc_encap_action *encap)
+{
+ BUILD_BUG_ON(sizeof(encap->encap_hdr) < vxlan6_header_len);
+ efx_gen_tun_header_eth(encap, ETH_P_IPV6);
+ efx_gen_tun_header_ipv6(encap, IPPROTO_UDP, vxlan_header_l4_len);
+ efx_gen_tun_header_udp(encap, sizeof(struct vxlanhdr));
+ efx_gen_tun_header_vxlan(encap);
+}
+
+#define geneve6_header_len (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + geneve_header_l4_len)
+static void efx_gen_geneve_header_ipv6(struct efx_tc_encap_action *encap)
+{
+ BUILD_BUG_ON(sizeof(encap->encap_hdr) < geneve6_header_len);
+ efx_gen_tun_header_eth(encap, ETH_P_IPV6);
+ efx_gen_tun_header_ipv6(encap, IPPROTO_UDP, geneve_header_l4_len);
+ efx_gen_tun_header_udp(encap, sizeof(struct genevehdr));
+ efx_gen_tun_header_geneve(encap);
+}
+#endif
+
+static void efx_gen_encap_header(struct efx_nic *efx,
+ struct efx_tc_encap_action *encap)
+{
+ encap->n_valid = encap->neigh->n_valid;
+
+ /* GCC stupidly thinks that only values explicitly listed in the enum
+ * definition can _possibly_ be sensible case values, so without this
+ * cast it complains about the IPv6 versions.
+ */
+ switch ((int)encap->type) {
+ case EFX_ENCAP_TYPE_VXLAN:
+ efx_gen_vxlan_header_ipv4(encap);
+ break;
+ case EFX_ENCAP_TYPE_GENEVE:
+ efx_gen_geneve_header_ipv4(encap);
+ break;
+#ifdef CONFIG_IPV6
+ case EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6:
+ efx_gen_vxlan_header_ipv6(encap);
+ break;
+ case EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6:
+ efx_gen_geneve_header_ipv6(encap);
+ break;
+#endif
+ default:
+ /* unhandled encap type, can't happen */
+ if (net_ratelimit())
+ netif_err(efx, drv, efx->net_dev,
+ "Bogus encap type %d, can't generate\n",
+ encap->type);
+
+ /* Use fallback action. */
+ encap->n_valid = false;
+ break;
+ }
+}
+
+static void efx_tc_update_encap(struct efx_nic *efx,
+ struct efx_tc_encap_action *encap)
+{
+ struct efx_tc_action_set_list *acts, *fallback;
+ struct efx_tc_flow_rule *rule;
+ struct efx_tc_action_set *act;
+ int rc;
+
+ if (encap->n_valid) {
+ /* Make sure no rules are using this encap while we change it */
+ list_for_each_entry(act, &encap->users, encap_user) {
+ acts = act->user;
+ if (WARN_ON(!acts)) /* can't happen */
+ continue;
+ rule = container_of(acts, struct efx_tc_flow_rule, acts);
+ if (rule->fallback)
+ fallback = rule->fallback;
+ else /* fallback fallback: deliver to PF */
+ fallback = &efx->tc->facts.pf;
+ rc = efx_mae_update_rule(efx, fallback->fw_id,
+ rule->fw_id);
+ if (rc)
+ netif_err(efx, drv, efx->net_dev,
+ "Failed to update (f) rule %08x rc %d\n",
+ rule->fw_id, rc);
+ else
+ netif_dbg(efx, drv, efx->net_dev, "Updated (f) rule %08x\n",
+ rule->fw_id);
+ }
+ }
+
+ /* Make sure we don't leak arbitrary bytes on the wire;
+ * set an all-0s ethernet header. A successful call to
+ * efx_gen_encap_header() will overwrite this.
+ */
+ memset(encap->encap_hdr, 0, sizeof(encap->encap_hdr));
+ encap->encap_hdr_len = ETH_HLEN;
+
+ if (encap->neigh) {
+ read_lock_bh(&encap->neigh->lock);
+ efx_gen_encap_header(efx, encap);
+ read_unlock_bh(&encap->neigh->lock);
+ } else {
+ encap->n_valid = false;
+ }
+
+ rc = efx_mae_update_encap_md(efx, encap);
+ if (rc) {
+ netif_err(efx, drv, efx->net_dev,
+ "Failed to update encap hdr %08x rc %d\n",
+ encap->fw_id, rc);
+ return;
+ }
+ netif_dbg(efx, drv, efx->net_dev, "Updated encap hdr %08x\n",
+ encap->fw_id);
+ if (!encap->n_valid)
+ return;
+ /* Update rule users: use the action if they are now ready */
+ list_for_each_entry(act, &encap->users, encap_user) {
+ acts = act->user;
+ if (WARN_ON(!acts)) /* can't happen */
+ continue;
+ rule = container_of(acts, struct efx_tc_flow_rule, acts);
+ if (!efx_tc_check_ready(efx, rule))
+ continue;
+ rc = efx_mae_update_rule(efx, acts->fw_id, rule->fw_id);
+ if (rc)
+ netif_err(efx, drv, efx->net_dev,
+ "Failed to update rule %08x rc %d\n",
+ rule->fw_id, rc);
+ else
+ netif_dbg(efx, drv, efx->net_dev, "Updated rule %08x\n",
+ rule->fw_id);
+ }
+}
+
+static void efx_neigh_update(struct work_struct *work)
+{
+ struct efx_neigh_binder *neigh = container_of(work, struct efx_neigh_binder, work);
+ struct efx_tc_encap_action *encap;
+ struct efx_nic *efx = neigh->efx;
+
+ mutex_lock(&efx->tc->mutex);
+ list_for_each_entry(encap, &neigh->users, list)
+ efx_tc_update_encap(neigh->efx, encap);
+ /* release ref taken in efx_neigh_event() */
+ if (refcount_dec_and_test(&neigh->ref))
+ efx_free_neigh(neigh);
+ mutex_unlock(&efx->tc->mutex);
+}
+
+static int efx_neigh_event(struct efx_nic *efx, struct neighbour *n)
+{
+ struct efx_neigh_binder keys = {NULL}, *neigh;
+ bool n_valid, ipv6 = false;
+ char ha[ETH_ALEN];
+ size_t keysize;
+
+ if (WARN_ON(!efx->tc))
+ return NOTIFY_DONE;
+
+ if (n->tbl == &arp_tbl) {
+ keysize = sizeof(keys.dst_ip);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (n->tbl == ipv6_stub->nd_tbl) {
+ ipv6 = true;
+ keysize = sizeof(keys.dst_ip6);
+#endif
+ } else {
+ return NOTIFY_DONE;
+ }
+ if (!n->parms) {
+ netif_warn(efx, drv, efx->net_dev, "neigh_event with no parms!\n");
+ return NOTIFY_DONE;
+ }
+ keys.net = read_pnet(&n->parms->net);
+ if (n->tbl->key_len != keysize) {
+ netif_warn(efx, drv, efx->net_dev, "neigh_event with bad key_len %u\n",
+ n->tbl->key_len);
+ return NOTIFY_DONE;
+ }
+ read_lock_bh(&n->lock); /* Get a consistent view */
+ memcpy(ha, n->ha, ETH_ALEN);
+ n_valid = (n->nud_state & NUD_VALID) && !n->dead;
+ read_unlock_bh(&n->lock);
+ if (ipv6)
+ memcpy(&keys.dst_ip6, n->primary_key, n->tbl->key_len);
+ else
+ memcpy(&keys.dst_ip, n->primary_key, n->tbl->key_len);
+ rcu_read_lock();
+ neigh = rhashtable_lookup_fast(&efx->tc->neigh_ht, &keys,
+ efx_neigh_ht_params);
+ if (!neigh || neigh->dying)
+ /* We're not interested in this neighbour */
+ goto done;
+ write_lock_bh(&neigh->lock);
+ if (n_valid == neigh->n_valid && !memcmp(ha, neigh->ha, ETH_ALEN)) {
+ write_unlock_bh(&neigh->lock);
+ /* Nothing has changed; no work to do */
+ goto done;
+ }
+ neigh->n_valid = n_valid;
+ memcpy(neigh->ha, ha, ETH_ALEN);
+ write_unlock_bh(&neigh->lock);
+ if (refcount_inc_not_zero(&neigh->ref)) {
+ rcu_read_unlock();
+ if (!schedule_work(&neigh->work))
+ /* failed to schedule, release the ref we just took */
+ if (refcount_dec_and_test(&neigh->ref))
+ efx_free_neigh(neigh);
+ } else {
+done:
+ rcu_read_unlock();
+ }
+ return NOTIFY_DONE;
+}
+
+bool efx_tc_check_ready(struct efx_nic *efx, struct efx_tc_flow_rule *rule)
+{
+ struct efx_tc_action_set *act;
+
+ /* Encap actions can only be offloaded if they have valid
+ * neighbour info for the outer Ethernet header.
+ */
+ list_for_each_entry(act, &rule->acts.list, list)
+ if (act->encap_md && !act->encap_md->n_valid)
+ return false;
+ return true;
+}
+
+struct efx_tc_encap_action *efx_tc_flower_create_encap_md(
+ struct efx_nic *efx, const struct ip_tunnel_info *info,
+ struct net_device *egdev, struct netlink_ext_ack *extack)
+{
+ enum efx_encap_type type = efx_tc_indr_netdev_type(egdev);
+ struct efx_tc_encap_action *encap, *old;
+ struct efx_rep *to_efv;
+ s64 rc;
+
+ if (type == EFX_ENCAP_TYPE_NONE) {
+ /* dest is not an encap device */
+ NL_SET_ERR_MSG_MOD(extack, "Not a (supported) tunnel device but tunnel_key is set");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+ rc = efx_mae_check_encap_type_supported(efx, type);
+ if (rc < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware reports no support for this tunnel type");
+ return ERR_PTR(rc);
+ }
+ /* No support yet for Geneve options */
+ if (info->options_len) {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel options");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+ switch (info->mode) {
+ case IP_TUNNEL_INFO_TX:
+ break;
+ case IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6:
+ type |= EFX_ENCAP_FLAG_IPV6;
+ break;
+ default:
+ NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported tunnel mode %u",
+ info->mode);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+ encap = kzalloc(sizeof(*encap), GFP_KERNEL_ACCOUNT);
+ if (!encap)
+ return ERR_PTR(-ENOMEM);
+ encap->type = type;
+ encap->key = info->key;
+ INIT_LIST_HEAD(&encap->users);
+ old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_ht,
+ &encap->linkage,
+ efx_tc_encap_ht_params);
+ if (old) {
+ /* don't need our new entry */
+ kfree(encap);
+ if (IS_ERR(old)) /* oh dear, it's actually an error */
+ return ERR_CAST(old);
+ if (!refcount_inc_not_zero(&old->ref))
+ return ERR_PTR(-EAGAIN);
+ /* existing entry found, ref taken */
+ return old;
+ }
+
+ rc = efx_bind_neigh(efx, encap, dev_net(egdev), extack);
+ if (rc < 0)
+ goto out_remove;
+ to_efv = efx_tc_flower_lookup_efv(efx, encap->neigh->egdev);
+ if (IS_ERR(to_efv)) {
+ /* neigh->egdev isn't ours */
+ NL_SET_ERR_MSG_MOD(extack, "Tunnel egress device not on switch");
+ rc = PTR_ERR(to_efv);
+ goto out_release;
+ }
+ rc = efx_tc_flower_external_mport(efx, to_efv);
+ if (rc < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to identify tunnel egress m-port");
+ goto out_release;
+ }
+ encap->dest_mport = rc;
+ read_lock_bh(&encap->neigh->lock);
+ efx_gen_encap_header(efx, encap);
+ read_unlock_bh(&encap->neigh->lock);
+
+ rc = efx_mae_allocate_encap_md(efx, encap);
+ if (rc < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to write tunnel header to hw");
+ goto out_release;
+ }
+
+ /* ref and return */
+ refcount_set(&encap->ref, 1);
+ return encap;
+out_release:
+ efx_release_neigh(efx, encap);
+out_remove:
+ rhashtable_remove_fast(&efx->tc->encap_ht, &encap->linkage,
+ efx_tc_encap_ht_params);
+ kfree(encap);
+ return ERR_PTR(rc);
+}
+
+void efx_tc_flower_release_encap_md(struct efx_nic *efx,
+ struct efx_tc_encap_action *encap)
+{
+ if (!refcount_dec_and_test(&encap->ref))
+ return; /* still in use */
+ efx_release_neigh(efx, encap);
+ rhashtable_remove_fast(&efx->tc->encap_ht, &encap->linkage,
+ efx_tc_encap_ht_params);
+ efx_mae_free_encap_md(efx, encap);
+ kfree(encap);
+}
+
+static void efx_tc_remove_neigh_users(struct efx_nic *efx, struct efx_neigh_binder *neigh)
+{
+ struct efx_tc_encap_action *encap, *next;
+
+ list_for_each_entry_safe(encap, next, &neigh->users, list) {
+ /* Should cause neigh usage count to fall to zero, freeing it */
+ efx_release_neigh(efx, encap);
+ /* The encap has lost its neigh, so it's now unready */
+ efx_tc_update_encap(efx, encap);
+ }
+}
+
+void efx_tc_unregister_egdev(struct efx_nic *efx, struct net_device *net_dev)
+{
+ struct efx_neigh_binder *neigh;
+ struct rhashtable_iter walk;
+
+ mutex_lock(&efx->tc->mutex);
+ rhashtable_walk_enter(&efx->tc->neigh_ht, &walk);
+ rhashtable_walk_start(&walk);
+ while ((neigh = rhashtable_walk_next(&walk)) != NULL) {
+ if (IS_ERR(neigh))
+ continue;
+ if (neigh->egdev != net_dev)
+ continue;
+ neigh->dying = true;
+ rhashtable_walk_stop(&walk);
+ synchronize_rcu(); /* Make sure any updates see dying flag */
+ efx_tc_remove_neigh_users(efx, neigh); /* might sleep */
+ rhashtable_walk_start(&walk);
+ }
+ rhashtable_walk_stop(&walk);
+ rhashtable_walk_exit(&walk);
+ mutex_unlock(&efx->tc->mutex);
+}
+
+int efx_tc_netevent_event(struct efx_nic *efx, unsigned long event,
+ void *ptr)
+{
+ if (efx->type->is_vf)
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETEVENT_NEIGH_UPDATE:
+ return efx_neigh_event(efx, ptr);
+ default:
+ return NOTIFY_DONE;
+ }
+}