diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:27:49 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:27:49 +0000 |
commit | ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch) | |
tree | b2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/net/ethernet/netronome/nfp/flower | |
parent | Initial commit. (diff) | |
download | linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip |
Adding upstream version 6.6.15.upstream/6.6.15
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/net/ethernet/netronome/nfp/flower')
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/flower/action.c | 1307 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/flower/cmsg.c | 383 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/flower/cmsg.h | 752 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/flower/conntrack.c | 2260 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/flower/conntrack.h | 289 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/flower/lag_conf.c | 723 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/flower/main.c | 1032 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/flower/main.h | 732 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/flower/match.c | 722 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/flower/metadata.c | 726 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/flower/offload.c | 1974 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/flower/qos_conf.c | 887 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c | 1564 |
13 files changed, 13351 insertions, 0 deletions
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c new file mode 100644 index 0000000000..2b383d92d7 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -0,0 +1,1307 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#include <linux/bitfield.h> +#include <linux/mpls.h> +#include <net/pkt_cls.h> +#include <net/tc_act/tc_csum.h> +#include <net/tc_act/tc_gact.h> +#include <net/tc_act/tc_mirred.h> +#include <net/tc_act/tc_mpls.h> +#include <net/tc_act/tc_pedit.h> +#include <net/tc_act/tc_vlan.h> +#include <net/tc_act/tc_tunnel_key.h> + +#include "cmsg.h" +#include "main.h" +#include "../nfp_net_repr.h" + +/* The kernel versions of TUNNEL_* are not ABI and therefore vulnerable + * to change. Such changes will break our FW ABI. + */ +#define NFP_FL_TUNNEL_CSUM cpu_to_be16(0x01) +#define NFP_FL_TUNNEL_KEY cpu_to_be16(0x04) +#define NFP_FL_TUNNEL_GENEVE_OPT cpu_to_be16(0x0800) +#define NFP_FL_SUPPORTED_TUNNEL_INFO_FLAGS (IP_TUNNEL_INFO_TX | \ + IP_TUNNEL_INFO_IPV6) +#define NFP_FL_SUPPORTED_UDP_TUN_FLAGS (NFP_FL_TUNNEL_CSUM | \ + NFP_FL_TUNNEL_KEY | \ + NFP_FL_TUNNEL_GENEVE_OPT) + +static int +nfp_fl_push_mpls(struct nfp_fl_push_mpls *push_mpls, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + size_t act_size = sizeof(struct nfp_fl_push_mpls); + u32 mpls_lse = 0; + + push_mpls->head.jump_id = NFP_FL_ACTION_OPCODE_PUSH_MPLS; + push_mpls->head.len_lw = act_size >> NFP_FL_LW_SIZ; + + /* BOS is optional in the TC action but required for offload. */ + if (act->mpls_push.bos != ACT_MPLS_BOS_NOT_SET) { + mpls_lse |= act->mpls_push.bos << MPLS_LS_S_SHIFT; + } else { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: BOS field must explicitly be set for MPLS push"); + return -EOPNOTSUPP; + } + + /* Leave MPLS TC as a default value of 0 if not explicitly set. */ + if (act->mpls_push.tc != ACT_MPLS_TC_NOT_SET) + mpls_lse |= act->mpls_push.tc << MPLS_LS_TC_SHIFT; + + /* Proto, label and TTL are enforced and verified for MPLS push. */ + mpls_lse |= act->mpls_push.label << MPLS_LS_LABEL_SHIFT; + mpls_lse |= act->mpls_push.ttl << MPLS_LS_TTL_SHIFT; + push_mpls->ethtype = act->mpls_push.proto; + push_mpls->lse = cpu_to_be32(mpls_lse); + + return 0; +} + +static void +nfp_fl_pop_mpls(struct nfp_fl_pop_mpls *pop_mpls, + const struct flow_action_entry *act) +{ + size_t act_size = sizeof(struct nfp_fl_pop_mpls); + + pop_mpls->head.jump_id = NFP_FL_ACTION_OPCODE_POP_MPLS; + pop_mpls->head.len_lw = act_size >> NFP_FL_LW_SIZ; + pop_mpls->ethtype = act->mpls_pop.proto; +} + +static void +nfp_fl_set_mpls(struct nfp_fl_set_mpls *set_mpls, + const struct flow_action_entry *act) +{ + size_t act_size = sizeof(struct nfp_fl_set_mpls); + u32 mpls_lse = 0, mpls_mask = 0; + + set_mpls->head.jump_id = NFP_FL_ACTION_OPCODE_SET_MPLS; + set_mpls->head.len_lw = act_size >> NFP_FL_LW_SIZ; + + if (act->mpls_mangle.label != ACT_MPLS_LABEL_NOT_SET) { + mpls_lse |= act->mpls_mangle.label << MPLS_LS_LABEL_SHIFT; + mpls_mask |= MPLS_LS_LABEL_MASK; + } + if (act->mpls_mangle.tc != ACT_MPLS_TC_NOT_SET) { + mpls_lse |= act->mpls_mangle.tc << MPLS_LS_TC_SHIFT; + mpls_mask |= MPLS_LS_TC_MASK; + } + if (act->mpls_mangle.bos != ACT_MPLS_BOS_NOT_SET) { + mpls_lse |= act->mpls_mangle.bos << MPLS_LS_S_SHIFT; + mpls_mask |= MPLS_LS_S_MASK; + } + if (act->mpls_mangle.ttl) { + mpls_lse |= act->mpls_mangle.ttl << MPLS_LS_TTL_SHIFT; + mpls_mask |= MPLS_LS_TTL_MASK; + } + + set_mpls->lse = cpu_to_be32(mpls_lse); + set_mpls->lse_mask = cpu_to_be32(mpls_mask); +} + +static void nfp_fl_pop_vlan(struct nfp_fl_pop_vlan *pop_vlan) +{ + size_t act_size = sizeof(struct nfp_fl_pop_vlan); + + pop_vlan->head.jump_id = NFP_FL_ACTION_OPCODE_POP_VLAN; + pop_vlan->head.len_lw = act_size >> NFP_FL_LW_SIZ; + pop_vlan->reserved = 0; +} + +static void +nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan, + const struct flow_action_entry *act) +{ + size_t act_size = sizeof(struct nfp_fl_push_vlan); + u16 tmp_push_vlan_tci; + + push_vlan->head.jump_id = NFP_FL_ACTION_OPCODE_PUSH_VLAN; + push_vlan->head.len_lw = act_size >> NFP_FL_LW_SIZ; + push_vlan->reserved = 0; + push_vlan->vlan_tpid = act->vlan.proto; + + tmp_push_vlan_tci = + FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, act->vlan.prio) | + FIELD_PREP(NFP_FL_PUSH_VLAN_VID, act->vlan.vid); + push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci); +} + +static int +nfp_fl_pre_lag(struct nfp_app *app, const struct flow_action_entry *act, + struct nfp_fl_payload *nfp_flow, int act_len, + struct netlink_ext_ack *extack) +{ + size_t act_size = sizeof(struct nfp_fl_pre_lag); + struct nfp_fl_pre_lag *pre_lag; + struct net_device *out_dev; + int err; + + out_dev = act->dev; + if (!out_dev || !netif_is_lag_master(out_dev)) + return 0; + + if (act_len + act_size > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at LAG action"); + return -EOPNOTSUPP; + } + + /* Pre_lag action must be first on action list. + * If other actions already exist they need to be pushed forward. + */ + if (act_len) + memmove(nfp_flow->action_data + act_size, + nfp_flow->action_data, act_len); + + pre_lag = (struct nfp_fl_pre_lag *)nfp_flow->action_data; + err = nfp_flower_lag_populate_pre_action(app, out_dev, pre_lag, extack); + if (err) + return err; + + pre_lag->head.jump_id = NFP_FL_ACTION_OPCODE_PRE_LAG; + pre_lag->head.len_lw = act_size >> NFP_FL_LW_SIZ; + + nfp_flow->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); + + return act_size; +} + +static int +nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output, + const struct flow_action_entry *act, + struct nfp_fl_payload *nfp_flow, + bool last, struct net_device *in_dev, + enum nfp_flower_tun_type tun_type, int *tun_out_cnt, + bool pkt_host, struct netlink_ext_ack *extack) +{ + size_t act_size = sizeof(struct nfp_fl_output); + struct nfp_flower_priv *priv = app->priv; + struct net_device *out_dev; + u16 tmp_flags; + + output->head.jump_id = NFP_FL_ACTION_OPCODE_OUTPUT; + output->head.len_lw = act_size >> NFP_FL_LW_SIZ; + + out_dev = act->dev; + if (!out_dev) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid egress interface for mirred action"); + return -EOPNOTSUPP; + } + + tmp_flags = last ? NFP_FL_OUT_FLAGS_LAST : 0; + + if (tun_type) { + /* Verify the egress netdev matches the tunnel type. */ + if (!nfp_fl_netdev_is_tunnel_type(out_dev, tun_type)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: egress interface does not match the required tunnel type"); + return -EOPNOTSUPP; + } + + if (*tun_out_cnt) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot offload more than one tunnel mirred output per filter"); + return -EOPNOTSUPP; + } + (*tun_out_cnt)++; + + output->flags = cpu_to_be16(tmp_flags | + NFP_FL_OUT_FLAGS_USE_TUN); + output->port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type); + } else if (netif_is_lag_master(out_dev) && + priv->flower_en_feats & NFP_FL_ENABLE_LAG) { + int gid; + + output->flags = cpu_to_be16(tmp_flags); + gid = nfp_flower_lag_get_output_id(app, out_dev); + if (gid < 0) { + NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot find group id for LAG action"); + return gid; + } + output->port = cpu_to_be32(NFP_FL_LAG_OUT | gid); + } else if (nfp_flower_internal_port_can_offload(app, out_dev)) { + if (!(priv->flower_ext_feats & NFP_FL_FEATS_PRE_TUN_RULES) && + !(priv->flower_ext_feats & NFP_FL_FEATS_DECAP_V2)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pre-tunnel rules not supported in loaded firmware"); + return -EOPNOTSUPP; + } + + if (nfp_flow->pre_tun_rule.dev || !pkt_host) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pre-tunnel rules require single egress dev and ptype HOST action"); + return -EOPNOTSUPP; + } + + nfp_flow->pre_tun_rule.dev = out_dev; + + return 0; + } else { + /* Set action output parameters. */ + output->flags = cpu_to_be16(tmp_flags); + + if (nfp_netdev_is_nfp_repr(in_dev)) { + /* Confirm ingress and egress are on same device. */ + if (!netdev_port_same_parent_id(in_dev, out_dev)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: ingress and egress interfaces are on different devices"); + return -EOPNOTSUPP; + } + } + + if (!nfp_netdev_is_nfp_repr(out_dev)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: egress interface is not an nfp port"); + return -EOPNOTSUPP; + } + + output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev)); + if (!output->port) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid port id for egress interface"); + return -EOPNOTSUPP; + } + } + nfp_flow->meta.shortcut = output->port; + + return 0; +} + +static bool +nfp_flower_tun_is_gre(struct flow_rule *rule, int start_idx) +{ + struct flow_action_entry *act = rule->action.entries; + int num_act = rule->action.num_entries; + int act_idx; + + /* Preparse action list for next mirred or redirect action */ + for (act_idx = start_idx + 1; act_idx < num_act; act_idx++) + if (act[act_idx].id == FLOW_ACTION_REDIRECT || + act[act_idx].id == FLOW_ACTION_MIRRED) + return netif_is_gretap(act[act_idx].dev) || + netif_is_ip6gretap(act[act_idx].dev); + + return false; +} + +static enum nfp_flower_tun_type +nfp_fl_get_tun_from_act(struct nfp_app *app, + struct flow_rule *rule, + const struct flow_action_entry *act, int act_idx) +{ + const struct ip_tunnel_info *tun = act->tunnel; + struct nfp_flower_priv *priv = app->priv; + + /* Determine the tunnel type based on the egress netdev + * in the mirred action for tunnels without l4. + */ + if (nfp_flower_tun_is_gre(rule, act_idx)) + return NFP_FL_TUNNEL_GRE; + + switch (tun->key.tp_dst) { + case htons(IANA_VXLAN_UDP_PORT): + return NFP_FL_TUNNEL_VXLAN; + case htons(GENEVE_UDP_PORT): + if (priv->flower_ext_feats & NFP_FL_FEATS_GENEVE) + return NFP_FL_TUNNEL_GENEVE; + fallthrough; + default: + return NFP_FL_TUNNEL_NONE; + } +} + +static struct nfp_fl_pre_tunnel *nfp_fl_pre_tunnel(char *act_data, int act_len) +{ + size_t act_size = sizeof(struct nfp_fl_pre_tunnel); + struct nfp_fl_pre_tunnel *pre_tun_act; + + /* Pre_tunnel action must be first on action list. + * If other actions already exist they need to be pushed forward. + */ + if (act_len) + memmove(act_data + act_size, act_data, act_len); + + pre_tun_act = (struct nfp_fl_pre_tunnel *)act_data; + + memset(pre_tun_act, 0, act_size); + + pre_tun_act->head.jump_id = NFP_FL_ACTION_OPCODE_PRE_TUNNEL; + pre_tun_act->head.len_lw = act_size >> NFP_FL_LW_SIZ; + + return pre_tun_act; +} + +static int +nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + struct ip_tunnel_info *ip_tun = (struct ip_tunnel_info *)act->tunnel; + int opt_len, opt_cnt, act_start, tot_push_len; + u8 *src = ip_tunnel_info_opts(ip_tun); + + /* We need to populate the options in reverse order for HW. + * Therefore we go through the options, calculating the + * number of options and the total size, then we populate + * them in reverse order in the action list. + */ + opt_cnt = 0; + tot_push_len = 0; + opt_len = ip_tun->options_len; + while (opt_len > 0) { + struct geneve_opt *opt = (struct geneve_opt *)src; + + opt_cnt++; + if (opt_cnt > NFP_FL_MAX_GENEVE_OPT_CNT) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed number of geneve options exceeded"); + return -EOPNOTSUPP; + } + + tot_push_len += sizeof(struct nfp_fl_push_geneve) + + opt->length * 4; + if (tot_push_len > NFP_FL_MAX_GENEVE_OPT_ACT) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at push geneve options"); + return -EOPNOTSUPP; + } + + opt_len -= sizeof(struct geneve_opt) + opt->length * 4; + src += sizeof(struct geneve_opt) + opt->length * 4; + } + + if (*list_len + tot_push_len > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at push geneve options"); + return -EOPNOTSUPP; + } + + act_start = *list_len; + *list_len += tot_push_len; + src = ip_tunnel_info_opts(ip_tun); + while (opt_cnt) { + struct geneve_opt *opt = (struct geneve_opt *)src; + struct nfp_fl_push_geneve *push; + size_t act_size, len; + + opt_cnt--; + act_size = sizeof(struct nfp_fl_push_geneve) + opt->length * 4; + tot_push_len -= act_size; + len = act_start + tot_push_len; + + push = (struct nfp_fl_push_geneve *)&nfp_fl->action_data[len]; + push->head.jump_id = NFP_FL_ACTION_OPCODE_PUSH_GENEVE; + push->head.len_lw = act_size >> NFP_FL_LW_SIZ; + push->reserved = 0; + push->class = opt->opt_class; + push->type = opt->type; + push->length = opt->length; + memcpy(&push->opt_data, opt->opt_data, opt->length * 4); + + src += sizeof(struct geneve_opt) + opt->length * 4; + } + + return 0; +} + +static int +nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, + const struct flow_action_entry *act, + struct nfp_fl_pre_tunnel *pre_tun, + enum nfp_flower_tun_type tun_type, + struct net_device *netdev, struct netlink_ext_ack *extack) +{ + const struct ip_tunnel_info *ip_tun = act->tunnel; + bool ipv6 = ip_tunnel_info_af(ip_tun) == AF_INET6; + size_t act_size = sizeof(struct nfp_fl_set_tun); + struct nfp_flower_priv *priv = app->priv; + u32 tmp_set_ip_tun_type_index = 0; + /* Currently support one pre-tunnel so index is always 0. */ + int pretun_idx = 0; + + if (!IS_ENABLED(CONFIG_IPV6) && ipv6) + return -EOPNOTSUPP; + + if (ipv6 && !(priv->flower_ext_feats & NFP_FL_FEATS_IPV6_TUN)) + return -EOPNOTSUPP; + + BUILD_BUG_ON(NFP_FL_TUNNEL_CSUM != TUNNEL_CSUM || + NFP_FL_TUNNEL_KEY != TUNNEL_KEY || + NFP_FL_TUNNEL_GENEVE_OPT != TUNNEL_GENEVE_OPT); + if (ip_tun->options_len && + (tun_type != NFP_FL_TUNNEL_GENEVE || + !(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT))) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support geneve options offload"); + return -EOPNOTSUPP; + } + + if (ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_UDP_TUN_FLAGS) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: loaded firmware does not support tunnel flag offload"); + return -EOPNOTSUPP; + } + + set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_TUNNEL; + set_tun->head.len_lw = act_size >> NFP_FL_LW_SIZ; + + /* Set tunnel type and pre-tunnel index. */ + tmp_set_ip_tun_type_index |= + FIELD_PREP(NFP_FL_TUNNEL_TYPE, tun_type) | + FIELD_PREP(NFP_FL_PRE_TUN_INDEX, pretun_idx); + + set_tun->tun_type_index = cpu_to_be32(tmp_set_ip_tun_type_index); + if (ip_tun->key.tun_flags & NFP_FL_TUNNEL_KEY) + set_tun->tun_id = ip_tun->key.tun_id; + + if (ip_tun->key.ttl) { + set_tun->ttl = ip_tun->key.ttl; +#ifdef CONFIG_IPV6 + } else if (ipv6) { + struct net *net = dev_net(netdev); + struct flowi6 flow = {}; + struct dst_entry *dst; + + flow.daddr = ip_tun->key.u.ipv6.dst; + flow.flowi4_proto = IPPROTO_UDP; + dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &flow, NULL); + if (!IS_ERR(dst)) { + set_tun->ttl = ip6_dst_hoplimit(dst); + dst_release(dst); + } else { + set_tun->ttl = net->ipv6.devconf_all->hop_limit; + } +#endif + } else { + struct net *net = dev_net(netdev); + struct flowi4 flow = {}; + struct rtable *rt; + int err; + + /* Do a route lookup to determine ttl - if fails then use + * default. Note that CONFIG_INET is a requirement of + * CONFIG_NET_SWITCHDEV so must be defined here. + */ + flow.daddr = ip_tun->key.u.ipv4.dst; + flow.flowi4_proto = IPPROTO_UDP; + rt = ip_route_output_key(net, &flow); + err = PTR_ERR_OR_ZERO(rt); + if (!err) { + set_tun->ttl = ip4_dst_hoplimit(&rt->dst); + ip_rt_put(rt); + } else { + set_tun->ttl = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); + } + } + + set_tun->tos = ip_tun->key.tos; + set_tun->tun_flags = ip_tun->key.tun_flags; + + if (tun_type == NFP_FL_TUNNEL_GENEVE) { + set_tun->tun_proto = htons(ETH_P_TEB); + set_tun->tun_len = ip_tun->options_len / 4; + } + + /* Complete pre_tunnel action. */ + if (ipv6) { + pre_tun->flags |= cpu_to_be16(NFP_FL_PRE_TUN_IPV6); + pre_tun->ipv6_dst = ip_tun->key.u.ipv6.dst; + } else { + pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst; + } + + return 0; +} + +static void nfp_fl_set_helper32(u32 value, u32 mask, u8 *p_exact, u8 *p_mask) +{ + u32 oldvalue = get_unaligned((u32 *)p_exact); + u32 oldmask = get_unaligned((u32 *)p_mask); + + value &= mask; + value |= oldvalue & ~mask; + + put_unaligned(oldmask | mask, (u32 *)p_mask); + put_unaligned(value, (u32 *)p_exact); +} + +static int +nfp_fl_set_eth(const struct flow_action_entry *act, u32 off, + struct nfp_fl_set_eth *set_eth, struct netlink_ext_ack *extack) +{ + u32 exact, mask; + + if (off + 4 > ETH_ALEN * 2) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit ethernet action"); + return -EOPNOTSUPP; + } + + mask = ~act->mangle.mask; + exact = act->mangle.val; + + if (exact & ~mask) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit ethernet action"); + return -EOPNOTSUPP; + } + + nfp_fl_set_helper32(exact, mask, &set_eth->eth_addr_val[off], + &set_eth->eth_addr_mask[off]); + + set_eth->reserved = cpu_to_be16(0); + set_eth->head.jump_id = NFP_FL_ACTION_OPCODE_SET_ETHERNET; + set_eth->head.len_lw = sizeof(*set_eth) >> NFP_FL_LW_SIZ; + + return 0; +} + +struct ipv4_ttl_word { + __u8 ttl; + __u8 protocol; + __sum16 check; +}; + +static int +nfp_fl_set_ip4(const struct flow_action_entry *act, u32 off, + struct nfp_fl_set_ip4_addrs *set_ip_addr, + struct nfp_fl_set_ip4_ttl_tos *set_ip_ttl_tos, + struct netlink_ext_ack *extack) +{ + struct ipv4_ttl_word *ttl_word_mask; + struct ipv4_ttl_word *ttl_word; + struct iphdr *tos_word_mask; + struct iphdr *tos_word; + __be32 exact, mask; + + /* We are expecting tcf_pedit to return a big endian value */ + mask = (__force __be32)~act->mangle.mask; + exact = (__force __be32)act->mangle.val; + + if (exact & ~mask) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv4 action"); + return -EOPNOTSUPP; + } + + switch (off) { + case offsetof(struct iphdr, daddr): + set_ip_addr->ipv4_dst_mask |= mask; + set_ip_addr->ipv4_dst &= ~mask; + set_ip_addr->ipv4_dst |= exact & mask; + set_ip_addr->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS; + set_ip_addr->head.len_lw = sizeof(*set_ip_addr) >> + NFP_FL_LW_SIZ; + break; + case offsetof(struct iphdr, saddr): + set_ip_addr->ipv4_src_mask |= mask; + set_ip_addr->ipv4_src &= ~mask; + set_ip_addr->ipv4_src |= exact & mask; + set_ip_addr->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS; + set_ip_addr->head.len_lw = sizeof(*set_ip_addr) >> + NFP_FL_LW_SIZ; + break; + case offsetof(struct iphdr, ttl): + ttl_word_mask = (struct ipv4_ttl_word *)&mask; + ttl_word = (struct ipv4_ttl_word *)&exact; + + if (ttl_word_mask->protocol || ttl_word_mask->check) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv4 ttl action"); + return -EOPNOTSUPP; + } + + set_ip_ttl_tos->ipv4_ttl_mask |= ttl_word_mask->ttl; + set_ip_ttl_tos->ipv4_ttl &= ~ttl_word_mask->ttl; + set_ip_ttl_tos->ipv4_ttl |= ttl_word->ttl & ttl_word_mask->ttl; + set_ip_ttl_tos->head.jump_id = + NFP_FL_ACTION_OPCODE_SET_IPV4_TTL_TOS; + set_ip_ttl_tos->head.len_lw = sizeof(*set_ip_ttl_tos) >> + NFP_FL_LW_SIZ; + break; + case round_down(offsetof(struct iphdr, tos), 4): + tos_word_mask = (struct iphdr *)&mask; + tos_word = (struct iphdr *)&exact; + + if (tos_word_mask->version || tos_word_mask->ihl || + tos_word_mask->tot_len) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv4 tos action"); + return -EOPNOTSUPP; + } + + set_ip_ttl_tos->ipv4_tos_mask |= tos_word_mask->tos; + set_ip_ttl_tos->ipv4_tos &= ~tos_word_mask->tos; + set_ip_ttl_tos->ipv4_tos |= tos_word->tos & tos_word_mask->tos; + set_ip_ttl_tos->head.jump_id = + NFP_FL_ACTION_OPCODE_SET_IPV4_TTL_TOS; + set_ip_ttl_tos->head.len_lw = sizeof(*set_ip_ttl_tos) >> + NFP_FL_LW_SIZ; + break; + default: + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pedit on unsupported section of IPv4 header"); + return -EOPNOTSUPP; + } + + return 0; +} + +static void +nfp_fl_set_ip6_helper(int opcode_tag, u8 word, __be32 exact, __be32 mask, + struct nfp_fl_set_ipv6_addr *ip6) +{ + ip6->ipv6[word].mask |= mask; + ip6->ipv6[word].exact &= ~mask; + ip6->ipv6[word].exact |= exact & mask; + + ip6->reserved = cpu_to_be16(0); + ip6->head.jump_id = opcode_tag; + ip6->head.len_lw = sizeof(*ip6) >> NFP_FL_LW_SIZ; +} + +struct ipv6_hop_limit_word { + __be16 payload_len; + u8 nexthdr; + u8 hop_limit; +}; + +static int +nfp_fl_set_ip6_hop_limit_flow_label(u32 off, __be32 exact, __be32 mask, + struct nfp_fl_set_ipv6_tc_hl_fl *ip_hl_fl, + struct netlink_ext_ack *extack) +{ + struct ipv6_hop_limit_word *fl_hl_mask; + struct ipv6_hop_limit_word *fl_hl; + + switch (off) { + case offsetof(struct ipv6hdr, payload_len): + fl_hl_mask = (struct ipv6_hop_limit_word *)&mask; + fl_hl = (struct ipv6_hop_limit_word *)&exact; + + if (fl_hl_mask->nexthdr || fl_hl_mask->payload_len) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv6 hop limit action"); + return -EOPNOTSUPP; + } + + ip_hl_fl->ipv6_hop_limit_mask |= fl_hl_mask->hop_limit; + ip_hl_fl->ipv6_hop_limit &= ~fl_hl_mask->hop_limit; + ip_hl_fl->ipv6_hop_limit |= fl_hl->hop_limit & + fl_hl_mask->hop_limit; + break; + case round_down(offsetof(struct ipv6hdr, flow_lbl), 4): + if (mask & ~IPV6_FLOWINFO_MASK || + exact & ~IPV6_FLOWINFO_MASK) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv6 flow info action"); + return -EOPNOTSUPP; + } + + ip_hl_fl->ipv6_label_mask |= mask; + ip_hl_fl->ipv6_label &= ~mask; + ip_hl_fl->ipv6_label |= exact & mask; + break; + } + + ip_hl_fl->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV6_TC_HL_FL; + ip_hl_fl->head.len_lw = sizeof(*ip_hl_fl) >> NFP_FL_LW_SIZ; + + return 0; +} + +static int +nfp_fl_set_ip6(const struct flow_action_entry *act, u32 off, + struct nfp_fl_set_ipv6_addr *ip_dst, + struct nfp_fl_set_ipv6_addr *ip_src, + struct nfp_fl_set_ipv6_tc_hl_fl *ip_hl_fl, + struct netlink_ext_ack *extack) +{ + __be32 exact, mask; + int err = 0; + u8 word; + + /* We are expecting tcf_pedit to return a big endian value */ + mask = (__force __be32)~act->mangle.mask; + exact = (__force __be32)act->mangle.val; + + if (exact & ~mask) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv6 action"); + return -EOPNOTSUPP; + } + + if (off < offsetof(struct ipv6hdr, saddr)) { + err = nfp_fl_set_ip6_hop_limit_flow_label(off, exact, mask, + ip_hl_fl, extack); + } else if (off < offsetof(struct ipv6hdr, daddr)) { + word = (off - offsetof(struct ipv6hdr, saddr)) / sizeof(exact); + nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_SRC, word, + exact, mask, ip_src); + } else if (off < offsetof(struct ipv6hdr, daddr) + + sizeof(struct in6_addr)) { + word = (off - offsetof(struct ipv6hdr, daddr)) / sizeof(exact); + nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_DST, word, + exact, mask, ip_dst); + } else { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pedit on unsupported section of IPv6 header"); + return -EOPNOTSUPP; + } + + return err; +} + +static int +nfp_fl_set_tport(const struct flow_action_entry *act, u32 off, + struct nfp_fl_set_tport *set_tport, int opcode, + struct netlink_ext_ack *extack) +{ + u32 exact, mask; + + if (off) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pedit on unsupported section of L4 header"); + return -EOPNOTSUPP; + } + + mask = ~act->mangle.mask; + exact = act->mangle.val; + + if (exact & ~mask) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit L4 action"); + return -EOPNOTSUPP; + } + + nfp_fl_set_helper32(exact, mask, set_tport->tp_port_val, + set_tport->tp_port_mask); + + set_tport->reserved = cpu_to_be16(0); + set_tport->head.jump_id = opcode; + set_tport->head.len_lw = sizeof(*set_tport) >> NFP_FL_LW_SIZ; + + return 0; +} + +static u32 nfp_fl_csum_l4_to_flag(u8 ip_proto) +{ + switch (ip_proto) { + case 0: + /* Filter doesn't force proto match, + * both TCP and UDP will be updated if encountered + */ + return TCA_CSUM_UPDATE_FLAG_TCP | TCA_CSUM_UPDATE_FLAG_UDP; + case IPPROTO_TCP: + return TCA_CSUM_UPDATE_FLAG_TCP; + case IPPROTO_UDP: + return TCA_CSUM_UPDATE_FLAG_UDP; + default: + /* All other protocols will be ignored by FW */ + return 0; + } +} + +struct nfp_flower_pedit_acts { + struct nfp_fl_set_ipv6_addr set_ip6_dst, set_ip6_src; + struct nfp_fl_set_ipv6_tc_hl_fl set_ip6_tc_hl_fl; + struct nfp_fl_set_ip4_ttl_tos set_ip_ttl_tos; + struct nfp_fl_set_ip4_addrs set_ip_addr; + struct nfp_fl_set_tport set_tport; + struct nfp_fl_set_eth set_eth; +}; + +static int +nfp_fl_commit_mangle(struct flow_rule *rule, char *nfp_action, + int *a_len, struct nfp_flower_pedit_acts *set_act, + u32 *csum_updated) +{ + size_t act_size = 0; + u8 ip_proto = 0; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + ip_proto = match.key->ip_proto; + } + + if (set_act->set_eth.head.len_lw) { + act_size = sizeof(set_act->set_eth); + memcpy(nfp_action, &set_act->set_eth, act_size); + *a_len += act_size; + } + + if (set_act->set_ip_ttl_tos.head.len_lw) { + nfp_action += act_size; + act_size = sizeof(set_act->set_ip_ttl_tos); + memcpy(nfp_action, &set_act->set_ip_ttl_tos, act_size); + *a_len += act_size; + + /* Hardware will automatically fix IPv4 and TCP/UDP checksum. */ + *csum_updated |= TCA_CSUM_UPDATE_FLAG_IPV4HDR | + nfp_fl_csum_l4_to_flag(ip_proto); + } + + if (set_act->set_ip_addr.head.len_lw) { + nfp_action += act_size; + act_size = sizeof(set_act->set_ip_addr); + memcpy(nfp_action, &set_act->set_ip_addr, act_size); + *a_len += act_size; + + /* Hardware will automatically fix IPv4 and TCP/UDP checksum. */ + *csum_updated |= TCA_CSUM_UPDATE_FLAG_IPV4HDR | + nfp_fl_csum_l4_to_flag(ip_proto); + } + + if (set_act->set_ip6_tc_hl_fl.head.len_lw) { + nfp_action += act_size; + act_size = sizeof(set_act->set_ip6_tc_hl_fl); + memcpy(nfp_action, &set_act->set_ip6_tc_hl_fl, act_size); + *a_len += act_size; + + /* Hardware will automatically fix TCP/UDP checksum. */ + *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto); + } + + if (set_act->set_ip6_dst.head.len_lw && + set_act->set_ip6_src.head.len_lw) { + /* TC compiles set src and dst IPv6 address as a single action, + * the hardware requires this to be 2 separate actions. + */ + nfp_action += act_size; + act_size = sizeof(set_act->set_ip6_src); + memcpy(nfp_action, &set_act->set_ip6_src, act_size); + *a_len += act_size; + + act_size = sizeof(set_act->set_ip6_dst); + memcpy(&nfp_action[sizeof(set_act->set_ip6_src)], + &set_act->set_ip6_dst, act_size); + *a_len += act_size; + + /* Hardware will automatically fix TCP/UDP checksum. */ + *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto); + } else if (set_act->set_ip6_dst.head.len_lw) { + nfp_action += act_size; + act_size = sizeof(set_act->set_ip6_dst); + memcpy(nfp_action, &set_act->set_ip6_dst, act_size); + *a_len += act_size; + + /* Hardware will automatically fix TCP/UDP checksum. */ + *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto); + } else if (set_act->set_ip6_src.head.len_lw) { + nfp_action += act_size; + act_size = sizeof(set_act->set_ip6_src); + memcpy(nfp_action, &set_act->set_ip6_src, act_size); + *a_len += act_size; + + /* Hardware will automatically fix TCP/UDP checksum. */ + *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto); + } + if (set_act->set_tport.head.len_lw) { + nfp_action += act_size; + act_size = sizeof(set_act->set_tport); + memcpy(nfp_action, &set_act->set_tport, act_size); + *a_len += act_size; + + /* Hardware will automatically fix TCP/UDP checksum. */ + *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto); + } + + return 0; +} + +static int +nfp_fl_pedit(const struct flow_action_entry *act, + char *nfp_action, int *a_len, + u32 *csum_updated, struct nfp_flower_pedit_acts *set_act, + struct netlink_ext_ack *extack) +{ + enum flow_action_mangle_base htype; + u32 offset; + + htype = act->mangle.htype; + offset = act->mangle.offset; + + switch (htype) { + case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH: + return nfp_fl_set_eth(act, offset, &set_act->set_eth, extack); + case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4: + return nfp_fl_set_ip4(act, offset, &set_act->set_ip_addr, + &set_act->set_ip_ttl_tos, extack); + case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6: + return nfp_fl_set_ip6(act, offset, &set_act->set_ip6_dst, + &set_act->set_ip6_src, + &set_act->set_ip6_tc_hl_fl, extack); + case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP: + return nfp_fl_set_tport(act, offset, &set_act->set_tport, + NFP_FL_ACTION_OPCODE_SET_TCP, extack); + case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP: + return nfp_fl_set_tport(act, offset, &set_act->set_tport, + NFP_FL_ACTION_OPCODE_SET_UDP, extack); + default: + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pedit on unsupported header"); + return -EOPNOTSUPP; + } +} + +static struct nfp_fl_meter *nfp_fl_meter(char *act_data) +{ + size_t act_size = sizeof(struct nfp_fl_meter); + struct nfp_fl_meter *meter_act; + + meter_act = (struct nfp_fl_meter *)act_data; + + memset(meter_act, 0, act_size); + + meter_act->head.jump_id = NFP_FL_ACTION_OPCODE_METER; + meter_act->head.len_lw = act_size >> NFP_FL_LW_SIZ; + + return meter_act; +} + +static int +nfp_flower_meter_action(struct nfp_app *app, + const struct flow_action_entry *action, + struct nfp_fl_payload *nfp_fl, int *a_len, + struct net_device *netdev, + struct netlink_ext_ack *extack) +{ + struct nfp_fl_meter *fl_meter; + u32 meter_id; + + if (*a_len + sizeof(struct nfp_fl_meter) > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload:meter action size beyond the allowed maximum"); + return -EOPNOTSUPP; + } + + meter_id = action->hw_index; + if (!nfp_flower_search_meter_entry(app, meter_id)) { + NL_SET_ERR_MSG_MOD(extack, + "can not offload flow table with unsupported police action."); + return -EOPNOTSUPP; + } + + fl_meter = nfp_fl_meter(&nfp_fl->action_data[*a_len]); + *a_len += sizeof(struct nfp_fl_meter); + fl_meter->meter_id = cpu_to_be32(meter_id); + + return 0; +} + +static int +nfp_flower_output_action(struct nfp_app *app, + const struct flow_action_entry *act, + struct nfp_fl_payload *nfp_fl, int *a_len, + struct net_device *netdev, bool last, + enum nfp_flower_tun_type *tun_type, int *tun_out_cnt, + int *out_cnt, u32 *csum_updated, bool pkt_host, + struct netlink_ext_ack *extack) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_output *output; + int err, prelag_size; + + /* If csum_updated has not been reset by now, it means HW will + * incorrectly update csums when they are not requested. + */ + if (*csum_updated) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: set actions without updating checksums are not supported"); + return -EOPNOTSUPP; + } + + if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: mirred output increases action list size beyond the allowed maximum"); + return -EOPNOTSUPP; + } + + output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len]; + err = nfp_fl_output(app, output, act, nfp_fl, last, netdev, *tun_type, + tun_out_cnt, pkt_host, extack); + if (err) + return err; + + *a_len += sizeof(struct nfp_fl_output); + + if (priv->flower_en_feats & NFP_FL_ENABLE_LAG) { + /* nfp_fl_pre_lag returns -err or size of prelag action added. + * This will be 0 if it is not egressing to a lag dev. + */ + prelag_size = nfp_fl_pre_lag(app, act, nfp_fl, *a_len, extack); + if (prelag_size < 0) { + return prelag_size; + } else if (prelag_size > 0 && (!last || *out_cnt)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: LAG action has to be last action in action list"); + return -EOPNOTSUPP; + } + + *a_len += prelag_size; + } + (*out_cnt)++; + + return 0; +} + +static int +nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, + struct flow_rule *rule, + struct nfp_fl_payload *nfp_fl, int *a_len, + struct net_device *netdev, + enum nfp_flower_tun_type *tun_type, int *tun_out_cnt, + int *out_cnt, u32 *csum_updated, + struct nfp_flower_pedit_acts *set_act, bool *pkt_host, + struct netlink_ext_ack *extack, int act_idx) +{ + struct nfp_flower_priv *fl_priv = app->priv; + struct nfp_fl_pre_tunnel *pre_tun; + struct nfp_fl_set_tun *set_tun; + struct nfp_fl_push_vlan *psh_v; + struct nfp_fl_push_mpls *psh_m; + struct nfp_fl_pop_vlan *pop_v; + struct nfp_fl_pop_mpls *pop_m; + struct nfp_fl_set_mpls *set_m; + int err; + + switch (act->id) { + case FLOW_ACTION_DROP: + nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_DROP); + break; + case FLOW_ACTION_REDIRECT_INGRESS: + case FLOW_ACTION_REDIRECT: + err = nfp_flower_output_action(app, act, nfp_fl, a_len, netdev, + true, tun_type, tun_out_cnt, + out_cnt, csum_updated, *pkt_host, + extack); + if (err) + return err; + break; + case FLOW_ACTION_MIRRED_INGRESS: + case FLOW_ACTION_MIRRED: + err = nfp_flower_output_action(app, act, nfp_fl, a_len, netdev, + false, tun_type, tun_out_cnt, + out_cnt, csum_updated, *pkt_host, + extack); + if (err) + return err; + break; + case FLOW_ACTION_VLAN_POP: + if (*a_len + + sizeof(struct nfp_fl_pop_vlan) > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at pop vlan"); + return -EOPNOTSUPP; + } + + pop_v = (struct nfp_fl_pop_vlan *)&nfp_fl->action_data[*a_len]; + nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_POPV); + + nfp_fl_pop_vlan(pop_v); + *a_len += sizeof(struct nfp_fl_pop_vlan); + break; + case FLOW_ACTION_VLAN_PUSH: + if (*a_len + + sizeof(struct nfp_fl_push_vlan) > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at push vlan"); + return -EOPNOTSUPP; + } + + psh_v = (struct nfp_fl_push_vlan *)&nfp_fl->action_data[*a_len]; + nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); + + nfp_fl_push_vlan(psh_v, act); + *a_len += sizeof(struct nfp_fl_push_vlan); + break; + case FLOW_ACTION_TUNNEL_ENCAP: { + const struct ip_tunnel_info *ip_tun = act->tunnel; + + *tun_type = nfp_fl_get_tun_from_act(app, rule, act, act_idx); + if (*tun_type == NFP_FL_TUNNEL_NONE) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported tunnel type in action list"); + return -EOPNOTSUPP; + } + + if (ip_tun->mode & ~NFP_FL_SUPPORTED_TUNNEL_INFO_FLAGS) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported tunnel flags in action list"); + return -EOPNOTSUPP; + } + + /* Pre-tunnel action is required for tunnel encap. + * This checks for next hop entries on NFP. + * If none, the packet falls back before applying other actions. + */ + if (*a_len + sizeof(struct nfp_fl_pre_tunnel) + + sizeof(struct nfp_fl_set_tun) > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at tunnel encap"); + return -EOPNOTSUPP; + } + + pre_tun = nfp_fl_pre_tunnel(nfp_fl->action_data, *a_len); + nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); + *a_len += sizeof(struct nfp_fl_pre_tunnel); + + err = nfp_fl_push_geneve_options(nfp_fl, a_len, act, extack); + if (err) + return err; + + set_tun = (void *)&nfp_fl->action_data[*a_len]; + err = nfp_fl_set_tun(app, set_tun, act, pre_tun, *tun_type, + netdev, extack); + if (err) + return err; + *a_len += sizeof(struct nfp_fl_set_tun); + } + break; + case FLOW_ACTION_TUNNEL_DECAP: + /* Tunnel decap is handled by default so accept action. */ + return 0; + case FLOW_ACTION_MANGLE: + if (nfp_fl_pedit(act, &nfp_fl->action_data[*a_len], + a_len, csum_updated, set_act, extack)) + return -EOPNOTSUPP; + break; + case FLOW_ACTION_CSUM: + /* csum action requests recalc of something we have not fixed */ + if (act->csum_flags & ~*csum_updated) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported csum update action in action list"); + return -EOPNOTSUPP; + } + /* If we will correctly fix the csum we can remove it from the + * csum update list. Which will later be used to check support. + */ + *csum_updated &= ~act->csum_flags; + break; + case FLOW_ACTION_MPLS_PUSH: + if (*a_len + + sizeof(struct nfp_fl_push_mpls) > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at push MPLS"); + return -EOPNOTSUPP; + } + + psh_m = (struct nfp_fl_push_mpls *)&nfp_fl->action_data[*a_len]; + nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); + + err = nfp_fl_push_mpls(psh_m, act, extack); + if (err) + return err; + *a_len += sizeof(struct nfp_fl_push_mpls); + break; + case FLOW_ACTION_MPLS_POP: + if (*a_len + + sizeof(struct nfp_fl_pop_mpls) > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at pop MPLS"); + return -EOPNOTSUPP; + } + + pop_m = (struct nfp_fl_pop_mpls *)&nfp_fl->action_data[*a_len]; + nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); + + nfp_fl_pop_mpls(pop_m, act); + *a_len += sizeof(struct nfp_fl_pop_mpls); + break; + case FLOW_ACTION_MPLS_MANGLE: + if (*a_len + + sizeof(struct nfp_fl_set_mpls) > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at set MPLS"); + return -EOPNOTSUPP; + } + + set_m = (struct nfp_fl_set_mpls *)&nfp_fl->action_data[*a_len]; + nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); + + nfp_fl_set_mpls(set_m, act); + *a_len += sizeof(struct nfp_fl_set_mpls); + break; + case FLOW_ACTION_PTYPE: + /* TC ptype skbedit sets PACKET_HOST for ingress redirect. */ + if (act->ptype != PACKET_HOST) + return -EOPNOTSUPP; + + *pkt_host = true; + break; + case FLOW_ACTION_POLICE: + if (!(fl_priv->flower_ext_feats & NFP_FL_FEATS_QOS_METER)) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: unsupported police action in action list"); + return -EOPNOTSUPP; + } + + err = nfp_flower_meter_action(app, act, nfp_fl, a_len, netdev, + extack); + if (err) + return err; + break; + default: + /* Currently we do not handle any other actions. */ + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported action in action list"); + return -EOPNOTSUPP; + } + + return 0; +} + +static bool nfp_fl_check_mangle_start(struct flow_action *flow_act, + int current_act_idx) +{ + struct flow_action_entry current_act; + struct flow_action_entry prev_act; + + current_act = flow_act->entries[current_act_idx]; + if (current_act.id != FLOW_ACTION_MANGLE) + return false; + + if (current_act_idx == 0) + return true; + + prev_act = flow_act->entries[current_act_idx - 1]; + + return prev_act.id != FLOW_ACTION_MANGLE; +} + +static bool nfp_fl_check_mangle_end(struct flow_action *flow_act, + int current_act_idx) +{ + struct flow_action_entry current_act; + struct flow_action_entry next_act; + + current_act = flow_act->entries[current_act_idx]; + if (current_act.id != FLOW_ACTION_MANGLE) + return false; + + if (current_act_idx == flow_act->num_entries) + return true; + + next_act = flow_act->entries[current_act_idx + 1]; + + return next_act.id != FLOW_ACTION_MANGLE; +} + +int nfp_flower_compile_action(struct nfp_app *app, + struct flow_rule *rule, + struct net_device *netdev, + struct nfp_fl_payload *nfp_flow, + struct netlink_ext_ack *extack) +{ + int act_len, act_cnt, err, tun_out_cnt, out_cnt, i; + struct nfp_flower_pedit_acts set_act; + enum nfp_flower_tun_type tun_type; + struct flow_action_entry *act; + bool pkt_host = false; + u32 csum_updated = 0; + + if (!flow_action_hw_stats_check(&rule->action, extack, + FLOW_ACTION_HW_STATS_DELAYED_BIT)) + return -EOPNOTSUPP; + + memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ); + nfp_flow->meta.act_len = 0; + tun_type = NFP_FL_TUNNEL_NONE; + act_len = 0; + act_cnt = 0; + tun_out_cnt = 0; + out_cnt = 0; + + flow_action_for_each(i, act, &rule->action) { + if (nfp_fl_check_mangle_start(&rule->action, i)) + memset(&set_act, 0, sizeof(set_act)); + err = nfp_flower_loop_action(app, act, rule, nfp_flow, &act_len, + netdev, &tun_type, &tun_out_cnt, + &out_cnt, &csum_updated, + &set_act, &pkt_host, extack, i); + if (err) + return err; + act_cnt++; + if (nfp_fl_check_mangle_end(&rule->action, i)) + nfp_fl_commit_mangle(rule, + &nfp_flow->action_data[act_len], + &act_len, &set_act, &csum_updated); + } + + /* We optimise when the action list is small, this can unfortunately + * not happen once we have more than one action in the action list. + */ + if (act_cnt > 1) + nfp_flow->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); + + nfp_flow->meta.act_len = act_len; + + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c new file mode 100644 index 0000000000..153533cd8f --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -0,0 +1,383 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ + +#include <linux/bitfield.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/workqueue.h> +#include <net/dst_metadata.h> + +#include "main.h" +#include "../nfp_net.h" +#include "../nfp_net_repr.h" +#include "./cmsg.h" + +static struct nfp_flower_cmsg_hdr * +nfp_flower_cmsg_get_hdr(struct sk_buff *skb) +{ + return (struct nfp_flower_cmsg_hdr *)skb->data; +} + +struct sk_buff * +nfp_flower_cmsg_alloc(struct nfp_app *app, unsigned int size, + enum nfp_flower_cmsg_type_port type, gfp_t flag) +{ + struct nfp_flower_cmsg_hdr *ch; + struct sk_buff *skb; + + size += NFP_FLOWER_CMSG_HLEN; + + skb = nfp_app_ctrl_msg_alloc(app, size, flag); + if (!skb) + return NULL; + + ch = nfp_flower_cmsg_get_hdr(skb); + ch->pad = 0; + ch->version = NFP_FLOWER_CMSG_VER1; + ch->type = type; + skb_put(skb, size); + + return skb; +} + +struct sk_buff * +nfp_flower_cmsg_mac_repr_start(struct nfp_app *app, unsigned int num_ports) +{ + struct nfp_flower_cmsg_mac_repr *msg; + struct sk_buff *skb; + + skb = nfp_flower_cmsg_alloc(app, struct_size(msg, ports, num_ports), + NFP_FLOWER_CMSG_TYPE_MAC_REPR, GFP_KERNEL); + if (!skb) + return NULL; + + msg = nfp_flower_cmsg_get_data(skb); + memset(msg->reserved, 0, sizeof(msg->reserved)); + msg->num_ports = num_ports; + + return skb; +} + +void +nfp_flower_cmsg_mac_repr_add(struct sk_buff *skb, unsigned int idx, + unsigned int nbi, unsigned int nbi_port, + unsigned int phys_port) +{ + struct nfp_flower_cmsg_mac_repr *msg; + + msg = nfp_flower_cmsg_get_data(skb); + msg->ports[idx].idx = idx; + msg->ports[idx].info = nbi & NFP_FLOWER_CMSG_MAC_REPR_NBI; + msg->ports[idx].nbi_port = nbi_port; + msg->ports[idx].phys_port = phys_port; +} + +int nfp_flower_cmsg_portmod(struct nfp_repr *repr, bool carrier_ok, + unsigned int mtu, bool mtu_only) +{ + struct nfp_flower_cmsg_portmod *msg; + struct sk_buff *skb; + + skb = nfp_flower_cmsg_alloc(repr->app, sizeof(*msg), + NFP_FLOWER_CMSG_TYPE_PORT_MOD, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + msg = nfp_flower_cmsg_get_data(skb); + msg->portnum = cpu_to_be32(repr->dst->u.port_info.port_id); + msg->reserved = 0; + msg->info = carrier_ok; + + if (mtu_only) + msg->info |= NFP_FLOWER_CMSG_PORTMOD_MTU_CHANGE_ONLY; + + msg->mtu = cpu_to_be16(mtu); + + nfp_ctrl_tx(repr->app->ctrl, skb); + + return 0; +} + +int nfp_flower_cmsg_portreify(struct nfp_repr *repr, bool exists) +{ + struct nfp_flower_cmsg_portreify *msg; + struct sk_buff *skb; + + skb = nfp_flower_cmsg_alloc(repr->app, sizeof(*msg), + NFP_FLOWER_CMSG_TYPE_PORT_REIFY, + GFP_KERNEL); + if (!skb) + return -ENOMEM; + + msg = nfp_flower_cmsg_get_data(skb); + msg->portnum = cpu_to_be32(repr->dst->u.port_info.port_id); + msg->reserved = 0; + msg->info = cpu_to_be16(exists); + + nfp_ctrl_tx(repr->app->ctrl, skb); + + return 0; +} + +static bool +nfp_flower_process_mtu_ack(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_flower_priv *app_priv = app->priv; + struct nfp_flower_cmsg_portmod *msg; + + msg = nfp_flower_cmsg_get_data(skb); + + if (!(msg->info & NFP_FLOWER_CMSG_PORTMOD_MTU_CHANGE_ONLY)) + return false; + + spin_lock_bh(&app_priv->mtu_conf.lock); + if (!app_priv->mtu_conf.requested_val || + app_priv->mtu_conf.portnum != be32_to_cpu(msg->portnum) || + be16_to_cpu(msg->mtu) != app_priv->mtu_conf.requested_val) { + /* Not an ack for requested MTU change. */ + spin_unlock_bh(&app_priv->mtu_conf.lock); + return false; + } + + app_priv->mtu_conf.ack = true; + app_priv->mtu_conf.requested_val = 0; + wake_up(&app_priv->mtu_conf.wait_q); + spin_unlock_bh(&app_priv->mtu_conf.lock); + + return true; +} + +static void +nfp_flower_cmsg_portmod_rx(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_flower_cmsg_portmod *msg; + struct net_device *netdev; + bool link; + + msg = nfp_flower_cmsg_get_data(skb); + link = msg->info & NFP_FLOWER_CMSG_PORTMOD_INFO_LINK; + + rtnl_lock(); + rcu_read_lock(); + netdev = nfp_app_dev_get(app, be32_to_cpu(msg->portnum), NULL); + rcu_read_unlock(); + if (!netdev) { + nfp_flower_cmsg_warn(app, "ctrl msg for unknown port 0x%08x\n", + be32_to_cpu(msg->portnum)); + rtnl_unlock(); + return; + } + + if (link) { + u16 mtu = be16_to_cpu(msg->mtu); + + netif_carrier_on(netdev); + + /* An MTU of 0 from the firmware should be ignored */ + if (mtu) + dev_set_mtu(netdev, mtu); + } else { + netif_carrier_off(netdev); + } + rtnl_unlock(); +} + +static void +nfp_flower_cmsg_portreify_rx(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_flower_cmsg_portreify *msg; + bool exists; + + msg = nfp_flower_cmsg_get_data(skb); + + rcu_read_lock(); + exists = !!nfp_app_dev_get(app, be32_to_cpu(msg->portnum), NULL); + rcu_read_unlock(); + if (!exists) { + nfp_flower_cmsg_warn(app, "ctrl msg for unknown port 0x%08x\n", + be32_to_cpu(msg->portnum)); + return; + } + + atomic_inc(&priv->reify_replies); + wake_up(&priv->reify_wait_queue); +} + +static void +nfp_flower_cmsg_merge_hint_rx(struct nfp_app *app, struct sk_buff *skb) +{ + unsigned int msg_len = nfp_flower_cmsg_get_data_len(skb); + struct nfp_flower_cmsg_merge_hint *msg; + struct nfp_fl_payload *sub_flows[2]; + struct nfp_flower_priv *priv; + int err, i, flow_cnt; + + msg = nfp_flower_cmsg_get_data(skb); + /* msg->count starts at 0 and always assumes at least 1 entry. */ + flow_cnt = msg->count + 1; + + if (msg_len < struct_size(msg, flow, flow_cnt)) { + nfp_flower_cmsg_warn(app, "Merge hint ctrl msg too short - %d bytes but expect %zd\n", + msg_len, struct_size(msg, flow, flow_cnt)); + return; + } + + if (flow_cnt != 2) { + nfp_flower_cmsg_warn(app, "Merge hint contains %d flows - two are expected\n", + flow_cnt); + return; + } + + priv = app->priv; + mutex_lock(&priv->nfp_fl_lock); + for (i = 0; i < flow_cnt; i++) { + u32 ctx = be32_to_cpu(msg->flow[i].host_ctx); + + sub_flows[i] = nfp_flower_get_fl_payload_from_ctx(app, ctx); + if (!sub_flows[i]) { + nfp_flower_cmsg_warn(app, "Invalid flow in merge hint\n"); + goto err_mutex_unlock; + } + } + + err = nfp_flower_merge_offloaded_flows(app, sub_flows[0], sub_flows[1]); + /* Only warn on memory fail. Hint veto will not break functionality. */ + if (err == -ENOMEM) + nfp_flower_cmsg_warn(app, "Flow merge memory fail.\n"); + +err_mutex_unlock: + mutex_unlock(&priv->nfp_fl_lock); +} + +static void +nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_flower_priv *app_priv = app->priv; + struct nfp_flower_cmsg_hdr *cmsg_hdr; + enum nfp_flower_cmsg_type_port type; + bool skb_stored = false; + + cmsg_hdr = nfp_flower_cmsg_get_hdr(skb); + + type = cmsg_hdr->type; + switch (type) { + case NFP_FLOWER_CMSG_TYPE_PORT_MOD: + nfp_flower_cmsg_portmod_rx(app, skb); + break; + case NFP_FLOWER_CMSG_TYPE_MERGE_HINT: + if (app_priv->flower_en_feats & NFP_FL_ENABLE_FLOW_MERGE) { + nfp_flower_cmsg_merge_hint_rx(app, skb); + break; + } + goto err_default; + case NFP_FLOWER_CMSG_TYPE_NO_NEIGH: + nfp_tunnel_request_route_v4(app, skb); + break; + case NFP_FLOWER_CMSG_TYPE_NO_NEIGH_V6: + nfp_tunnel_request_route_v6(app, skb); + break; + case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS: + nfp_tunnel_keep_alive(app, skb); + break; + case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS_V6: + nfp_tunnel_keep_alive_v6(app, skb); + break; + case NFP_FLOWER_CMSG_TYPE_QOS_STATS: + nfp_flower_stats_rlim_reply(app, skb); + break; + case NFP_FLOWER_CMSG_TYPE_LAG_CONFIG: + if (app_priv->flower_en_feats & NFP_FL_ENABLE_LAG) { + skb_stored = nfp_flower_lag_unprocessed_msg(app, skb); + break; + } + fallthrough; + default: +err_default: + nfp_flower_cmsg_warn(app, "Cannot handle invalid repr control type %u\n", + type); + goto out; + } + + if (!skb_stored) + dev_consume_skb_any(skb); + return; +out: + dev_kfree_skb_any(skb); +} + +void nfp_flower_cmsg_process_rx(struct work_struct *work) +{ + struct sk_buff_head cmsg_joined; + struct nfp_flower_priv *priv; + struct sk_buff *skb; + + priv = container_of(work, struct nfp_flower_priv, cmsg_work); + skb_queue_head_init(&cmsg_joined); + + spin_lock_bh(&priv->cmsg_skbs_high.lock); + skb_queue_splice_tail_init(&priv->cmsg_skbs_high, &cmsg_joined); + spin_unlock_bh(&priv->cmsg_skbs_high.lock); + + spin_lock_bh(&priv->cmsg_skbs_low.lock); + skb_queue_splice_tail_init(&priv->cmsg_skbs_low, &cmsg_joined); + spin_unlock_bh(&priv->cmsg_skbs_low.lock); + + while ((skb = __skb_dequeue(&cmsg_joined))) + nfp_flower_cmsg_process_one_rx(priv->app, skb); +} + +static void +nfp_flower_queue_ctl_msg(struct nfp_app *app, struct sk_buff *skb, int type) +{ + struct nfp_flower_priv *priv = app->priv; + struct sk_buff_head *skb_head; + + if (type == NFP_FLOWER_CMSG_TYPE_PORT_MOD) + skb_head = &priv->cmsg_skbs_high; + else + skb_head = &priv->cmsg_skbs_low; + + if (skb_queue_len(skb_head) >= NFP_FLOWER_WORKQ_MAX_SKBS) { + nfp_flower_cmsg_warn(app, "Dropping queued control messages\n"); + dev_kfree_skb_any(skb); + return; + } + + skb_queue_tail(skb_head, skb); + schedule_work(&priv->cmsg_work); +} + +void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_flower_cmsg_hdr *cmsg_hdr; + + cmsg_hdr = nfp_flower_cmsg_get_hdr(skb); + + if (unlikely(cmsg_hdr->version != NFP_FLOWER_CMSG_VER1)) { + nfp_flower_cmsg_warn(app, "Cannot handle repr control version %u\n", + cmsg_hdr->version); + dev_kfree_skb_any(skb); + return; + } + + if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_FLOW_STATS) { + /* We need to deal with stats updates from HW asap */ + nfp_flower_rx_flow_stats(app, skb); + dev_consume_skb_any(skb); + } else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_PORT_MOD && + nfp_flower_process_mtu_ack(app, skb)) { + /* Handle MTU acks outside wq to prevent RTNL conflict. */ + dev_consume_skb_any(skb); + } else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH || + cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6) { + /* Acks from the NFP that the route is added - ignore. */ + dev_consume_skb_any(skb); + } else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_PORT_REIFY) { + /* Handle REIFY acks outside wq to prevent RTNL conflict. */ + nfp_flower_cmsg_portreify_rx(app, skb); + dev_consume_skb_any(skb); + } else { + nfp_flower_queue_ctl_msg(app, skb, cmsg_hdr->type); + } +} diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h new file mode 100644 index 0000000000..2df2af1da7 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -0,0 +1,752 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#ifndef NFP_FLOWER_CMSG_H +#define NFP_FLOWER_CMSG_H + +#include <linux/bitfield.h> +#include <linux/skbuff.h> +#include <linux/types.h> +#include <net/geneve.h> +#include <net/gre.h> +#include <net/vxlan.h> + +#include "../nfp_app.h" +#include "../nfpcore/nfp_cpp.h" + +#define NFP_FLOWER_LAYER_EXT_META BIT(0) +#define NFP_FLOWER_LAYER_PORT BIT(1) +#define NFP_FLOWER_LAYER_MAC BIT(2) +#define NFP_FLOWER_LAYER_TP BIT(3) +#define NFP_FLOWER_LAYER_IPV4 BIT(4) +#define NFP_FLOWER_LAYER_IPV6 BIT(5) +#define NFP_FLOWER_LAYER_CT BIT(6) +#define NFP_FLOWER_LAYER_VXLAN BIT(7) + +#define NFP_FLOWER_LAYER2_GRE BIT(0) +#define NFP_FLOWER_LAYER2_QINQ BIT(4) +#define NFP_FLOWER_LAYER2_GENEVE BIT(5) +#define NFP_FLOWER_LAYER2_GENEVE_OP BIT(6) +#define NFP_FLOWER_LAYER2_TUN_IPV6 BIT(7) + +#define NFP_FLOWER_MASK_VLAN_PRIO GENMASK(15, 13) +#define NFP_FLOWER_MASK_VLAN_PRESENT BIT(12) +#define NFP_FLOWER_MASK_VLAN_VID GENMASK(11, 0) + +#define NFP_FLOWER_MASK_MPLS_LB GENMASK(31, 12) +#define NFP_FLOWER_MASK_MPLS_TC GENMASK(11, 9) +#define NFP_FLOWER_MASK_MPLS_BOS BIT(8) +#define NFP_FLOWER_MASK_MPLS_Q BIT(0) + +#define NFP_FL_IP_FRAG_FIRST BIT(7) +#define NFP_FL_IP_FRAGMENTED BIT(6) + +/* GRE Tunnel flags */ +#define NFP_FL_GRE_FLAG_KEY BIT(2) + +/* Compressed HW representation of TCP Flags */ +#define NFP_FL_TCP_FLAG_URG BIT(4) +#define NFP_FL_TCP_FLAG_PSH BIT(3) +#define NFP_FL_TCP_FLAG_RST BIT(2) +#define NFP_FL_TCP_FLAG_SYN BIT(1) +#define NFP_FL_TCP_FLAG_FIN BIT(0) + +#define NFP_FL_SC_ACT_DROP 0x80000000 +#define NFP_FL_SC_ACT_USER 0x7D000000 +#define NFP_FL_SC_ACT_POPV 0x6A000000 +#define NFP_FL_SC_ACT_NULL 0x00000000 + +/* The maximum action list size (in bytes) supported by the NFP. + */ +#define NFP_FL_MAX_A_SIZ 1216 +#define NFP_FL_LW_SIZ 2 + +/* Maximum allowed geneve options */ +#define NFP_FL_MAX_GENEVE_OPT_ACT 32 +#define NFP_FL_MAX_GENEVE_OPT_CNT 64 +#define NFP_FL_MAX_GENEVE_OPT_KEY 32 +#define NFP_FL_MAX_GENEVE_OPT_KEY_V6 8 + +/* Action opcodes */ +#define NFP_FL_ACTION_OPCODE_OUTPUT 0 +#define NFP_FL_ACTION_OPCODE_PUSH_VLAN 1 +#define NFP_FL_ACTION_OPCODE_POP_VLAN 2 +#define NFP_FL_ACTION_OPCODE_PUSH_MPLS 3 +#define NFP_FL_ACTION_OPCODE_POP_MPLS 4 +#define NFP_FL_ACTION_OPCODE_SET_TUNNEL 6 +#define NFP_FL_ACTION_OPCODE_SET_ETHERNET 7 +#define NFP_FL_ACTION_OPCODE_SET_MPLS 8 +#define NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS 9 +#define NFP_FL_ACTION_OPCODE_SET_IPV4_TTL_TOS 10 +#define NFP_FL_ACTION_OPCODE_SET_IPV6_SRC 11 +#define NFP_FL_ACTION_OPCODE_SET_IPV6_DST 12 +#define NFP_FL_ACTION_OPCODE_SET_IPV6_TC_HL_FL 13 +#define NFP_FL_ACTION_OPCODE_SET_UDP 14 +#define NFP_FL_ACTION_OPCODE_SET_TCP 15 +#define NFP_FL_ACTION_OPCODE_PRE_LAG 16 +#define NFP_FL_ACTION_OPCODE_PRE_TUNNEL 17 +#define NFP_FL_ACTION_OPCODE_METER 24 +#define NFP_FL_ACTION_OPCODE_PUSH_GENEVE 26 +#define NFP_FL_ACTION_OPCODE_NUM 32 + +#define NFP_FL_OUT_FLAGS_LAST BIT(15) +#define NFP_FL_OUT_FLAGS_USE_TUN BIT(4) +#define NFP_FL_OUT_FLAGS_TYPE_IDX GENMASK(2, 0) + +#define NFP_FL_PUSH_VLAN_PRIO GENMASK(15, 13) +#define NFP_FL_PUSH_VLAN_VID GENMASK(11, 0) + +/* LAG ports */ +#define NFP_FL_LAG_OUT 0xC0DE0000 + +/* Tunnel ports */ +#define NFP_FL_PORT_TYPE_TUN 0x50000000 +#define NFP_FL_TUNNEL_TYPE GENMASK(7, 4) +#define NFP_FL_PRE_TUN_INDEX GENMASK(2, 0) + +#define NFP_FLOWER_WORKQ_MAX_SKBS 30000 + +/* Cmesg reply (empirical) timeout*/ +#define NFP_FL_REPLY_TIMEOUT msecs_to_jiffies(40) + +#define nfp_flower_cmsg_warn(app, fmt, args...) \ + do { \ + if (net_ratelimit()) \ + nfp_warn((app)->cpp, fmt, ## args); \ + } while (0) + +enum nfp_flower_tun_type { + NFP_FL_TUNNEL_NONE = 0, + NFP_FL_TUNNEL_GRE = 1, + NFP_FL_TUNNEL_VXLAN = 2, + NFP_FL_TUNNEL_GENEVE = 4, +}; + +struct nfp_fl_act_head { + u8 jump_id; + u8 len_lw; +}; + +struct nfp_fl_set_eth { + struct nfp_fl_act_head head; + __be16 reserved; + u8 eth_addr_mask[ETH_ALEN * 2]; + u8 eth_addr_val[ETH_ALEN * 2]; +}; + +struct nfp_fl_set_ip4_addrs { + struct nfp_fl_act_head head; + __be16 reserved; + __be32 ipv4_src_mask; + __be32 ipv4_src; + __be32 ipv4_dst_mask; + __be32 ipv4_dst; +}; + +struct nfp_fl_set_ip4_ttl_tos { + struct nfp_fl_act_head head; + u8 ipv4_ttl_mask; + u8 ipv4_tos_mask; + u8 ipv4_ttl; + u8 ipv4_tos; + __be16 reserved; +}; + +struct nfp_fl_set_ipv6_tc_hl_fl { + struct nfp_fl_act_head head; + u8 ipv6_tc_mask; + u8 ipv6_hop_limit_mask; + __be16 reserved; + u8 ipv6_tc; + u8 ipv6_hop_limit; + __be32 ipv6_label_mask; + __be32 ipv6_label; +}; + +struct nfp_fl_set_ipv6_addr { + struct nfp_fl_act_head head; + __be16 reserved; + struct { + __be32 mask; + __be32 exact; + } ipv6[4]; +}; + +struct nfp_fl_set_tport { + struct nfp_fl_act_head head; + __be16 reserved; + u8 tp_port_mask[4]; + u8 tp_port_val[4]; +}; + +struct nfp_fl_output { + struct nfp_fl_act_head head; + __be16 flags; + __be32 port; +}; + +struct nfp_fl_push_vlan { + struct nfp_fl_act_head head; + __be16 reserved; + __be16 vlan_tpid; + __be16 vlan_tci; +}; + +struct nfp_fl_pop_vlan { + struct nfp_fl_act_head head; + __be16 reserved; +}; + +struct nfp_fl_pre_lag { + struct nfp_fl_act_head head; + __be16 group_id; + u8 lag_version[3]; + u8 instance; +}; + +#define NFP_FL_PRE_LAG_VER_OFF 8 + +struct nfp_fl_pre_tunnel { + struct nfp_fl_act_head head; + __be16 flags; + union { + __be32 ipv4_dst; + struct in6_addr ipv6_dst; + }; +}; + +#define NFP_FL_PRE_TUN_IPV6 BIT(0) + +struct nfp_fl_set_tun { + struct nfp_fl_act_head head; + __be16 reserved; + __be64 tun_id __packed; + __be32 tun_type_index; + __be16 tun_flags; + u8 ttl; + u8 tos; + __be16 outer_vlan_tpid; + __be16 outer_vlan_tci; + u8 tun_len; + u8 res2; + __be16 tun_proto; +}; + +struct nfp_fl_push_geneve { + struct nfp_fl_act_head head; + __be16 reserved; + __be16 class; + u8 type; + u8 length; + u8 opt_data[]; +}; + +struct nfp_fl_push_mpls { + struct nfp_fl_act_head head; + __be16 ethtype; + __be32 lse; +}; + +struct nfp_fl_pop_mpls { + struct nfp_fl_act_head head; + __be16 ethtype; +}; + +struct nfp_fl_set_mpls { + struct nfp_fl_act_head head; + __be16 reserved; + __be32 lse_mask; + __be32 lse; +}; + +struct nfp_fl_meter { + struct nfp_fl_act_head head; + __be16 reserved; + __be32 meter_id; +}; + +/* Metadata with L2 (1W/4B) + * ---------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | key_type | mask_id | PCP |p| vlan outermost VID | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * ^ ^ + * NOTE: | TCI | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_meta_tci { + u8 nfp_flow_key_layer; + u8 mask_id; + __be16 tci; +}; + +/* Extended metadata for additional key_layers (1W/4B) + * ---------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | nfp_flow_key_layer2 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_ext_meta { + __be32 nfp_flow_key_layer2; +}; + +/* Port details (1W/4B) + * ---------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | port_ingress | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_in_port { + __be32 in_port; +}; + +/* L2 details (4W/16B) + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | mac_addr_dst, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | mac_addr_dst, 47 - 32 | mac_addr_src, 15 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | mac_addr_src, 47 - 16 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | mpls outermost label | TC |B| reserved |q| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_mac_mpls { + u8 mac_dst[6]; + u8 mac_src[6]; + __be32 mpls_lse; +}; + +/* VLAN details (2W/8B) + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | outer_tpid | outer_tci | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | inner_tpid | inner_tci | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_vlan { + __be16 outer_tpid; + __be16 outer_tci; + __be16 inner_tpid; + __be16 inner_tci; +}; + +/* L4 ports (for UDP, TCP, SCTP) (1W/4B) + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | port_src | port_dst | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_tp_ports { + __be16 port_src; + __be16 port_dst; +}; + +struct nfp_flower_ip_ext { + u8 tos; + u8 proto; + u8 ttl; + u8 flags; +}; + +/* L3 IPv4 details (3W/12B) + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | DSCP |ECN| protocol | ttl | flags | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv4_addr_src | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv4_addr_dst | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_ipv4 { + struct nfp_flower_ip_ext ip_ext; + __be32 ipv4_src; + __be32 ipv4_dst; +}; + +/* L3 IPv6 details (10W/40B) + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | DSCP |ECN| protocol | ttl | flags | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_exthdr | res | ipv6_flow_label | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_ipv6 { + struct nfp_flower_ip_ext ip_ext; + __be32 ipv6_flow_label_exthdr; + struct in6_addr ipv6_src; + struct in6_addr ipv6_dst; +}; + +struct nfp_flower_tun_ipv4 { + __be32 src; + __be32 dst; +}; + +struct nfp_flower_tun_ipv6 { + struct in6_addr src; + struct in6_addr dst; +}; + +struct nfp_flower_tun_ip_ext { + u8 tos; + u8 ttl; +}; + +/* Flow Frame IPv4 UDP TUNNEL --> Tunnel details (4W/16B) + * ----------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv4_addr_src | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv4_addr_dst | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | tos | ttl | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | VNI | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_ipv4_udp_tun { + struct nfp_flower_tun_ipv4 ipv4; + __be16 reserved1; + struct nfp_flower_tun_ip_ext ip_ext; + __be32 reserved2; + __be32 tun_id; +}; + +/* Flow Frame IPv6 UDP TUNNEL --> Tunnel details (11W/44B) + * ----------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | tos | ttl | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | VNI | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_ipv6_udp_tun { + struct nfp_flower_tun_ipv6 ipv6; + __be16 reserved1; + struct nfp_flower_tun_ip_ext ip_ext; + __be32 reserved2; + __be32 tun_id; +}; + +/* Flow Frame GRE TUNNEL --> Tunnel details (6W/24B) + * ----------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv4_addr_src | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv4_addr_dst | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | tun_flags | tos | ttl | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | Ethertype | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Key | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ + +struct nfp_flower_ipv4_gre_tun { + struct nfp_flower_tun_ipv4 ipv4; + __be16 tun_flags; + struct nfp_flower_tun_ip_ext ip_ext; + __be16 reserved1; + __be16 ethertype; + __be32 tun_key; + __be32 reserved2; +}; + +/* Flow Frame GRE TUNNEL V6 --> Tunnel details (12W/48B) + * ----------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | tun_flags | tos | ttl | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | Ethertype | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Key | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_ipv6_gre_tun { + struct nfp_flower_tun_ipv6 ipv6; + __be16 tun_flags; + struct nfp_flower_tun_ip_ext ip_ext; + __be16 reserved1; + __be16 ethertype; + __be32 tun_key; + __be32 reserved2; +}; + +struct nfp_flower_geneve_options { + u8 data[NFP_FL_MAX_GENEVE_OPT_KEY]; +}; + +#define NFP_FL_TUN_VNI_OFFSET 8 + +/* The base header for a control message packet. + * Defines an 8-bit version, and an 8-bit type, padded + * to a 32-bit word. Rest of the packet is type-specific. + */ +struct nfp_flower_cmsg_hdr { + __be16 pad; + u8 type; + u8 version; +}; + +#define NFP_FLOWER_CMSG_HLEN sizeof(struct nfp_flower_cmsg_hdr) +#define NFP_FLOWER_CMSG_VER1 1 + +/* Types defined for port related control messages */ +enum nfp_flower_cmsg_type_port { + NFP_FLOWER_CMSG_TYPE_FLOW_ADD = 0, + NFP_FLOWER_CMSG_TYPE_FLOW_MOD = 1, + NFP_FLOWER_CMSG_TYPE_FLOW_DEL = 2, + NFP_FLOWER_CMSG_TYPE_LAG_CONFIG = 4, + NFP_FLOWER_CMSG_TYPE_PORT_REIFY = 6, + NFP_FLOWER_CMSG_TYPE_MAC_REPR = 7, + NFP_FLOWER_CMSG_TYPE_PORT_MOD = 8, + NFP_FLOWER_CMSG_TYPE_MERGE_HINT = 9, + NFP_FLOWER_CMSG_TYPE_NO_NEIGH = 10, + NFP_FLOWER_CMSG_TYPE_TUN_MAC = 11, + NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS = 12, + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH = 13, + NFP_FLOWER_CMSG_TYPE_TUN_IPS = 14, + NFP_FLOWER_CMSG_TYPE_FLOW_STATS = 15, + NFP_FLOWER_CMSG_TYPE_PORT_ECHO = 16, + NFP_FLOWER_CMSG_TYPE_QOS_MOD = 18, + NFP_FLOWER_CMSG_TYPE_QOS_DEL = 19, + NFP_FLOWER_CMSG_TYPE_QOS_STATS = 20, + NFP_FLOWER_CMSG_TYPE_PRE_TUN_RULE = 21, + NFP_FLOWER_CMSG_TYPE_TUN_IPS_V6 = 22, + NFP_FLOWER_CMSG_TYPE_NO_NEIGH_V6 = 23, + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6 = 24, + NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS_V6 = 25, + NFP_FLOWER_CMSG_TYPE_MAX = 32, +}; + +/* NFP_FLOWER_CMSG_TYPE_MAC_REPR */ +struct nfp_flower_cmsg_mac_repr { + u8 reserved[3]; + u8 num_ports; + struct { + u8 idx; + u8 info; + u8 nbi_port; + u8 phys_port; + } ports[]; +}; + +#define NFP_FLOWER_CMSG_MAC_REPR_NBI GENMASK(1, 0) + +/* NFP_FLOWER_CMSG_TYPE_PORT_MOD */ +struct nfp_flower_cmsg_portmod { + __be32 portnum; + u8 reserved; + u8 info; + __be16 mtu; +}; + +#define NFP_FLOWER_CMSG_PORTMOD_INFO_LINK BIT(0) +#define NFP_FLOWER_CMSG_PORTMOD_MTU_CHANGE_ONLY BIT(1) + +/* NFP_FLOWER_CMSG_TYPE_PORT_REIFY */ +struct nfp_flower_cmsg_portreify { + __be32 portnum; + u16 reserved; + __be16 info; +}; + +#define NFP_FLOWER_CMSG_PORTREIFY_INFO_EXIST BIT(0) + +/* NFP_FLOWER_CMSG_TYPE_FLOW_MERGE_HINT */ +struct nfp_flower_cmsg_merge_hint { + u8 reserved[3]; + u8 count; + struct { + __be32 host_ctx; + __be64 host_cookie; + } __packed flow[]; +}; + +enum nfp_flower_cmsg_port_type { + NFP_FLOWER_CMSG_PORT_TYPE_UNSPEC = 0x0, + NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT = 0x1, + NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT = 0x2, + NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT = 0x3, +}; + +enum nfp_flower_cmsg_port_vnic_type { + NFP_FLOWER_CMSG_PORT_VNIC_TYPE_VF = 0x0, + NFP_FLOWER_CMSG_PORT_VNIC_TYPE_PF = 0x1, + NFP_FLOWER_CMSG_PORT_VNIC_TYPE_CTRL = 0x2, +}; + +#define NFP_FLOWER_CMSG_PORT_TYPE GENMASK(31, 28) +#define NFP_FLOWER_CMSG_PORT_SYS_ID GENMASK(27, 24) +#define NFP_FLOWER_CMSG_PORT_NFP_ID GENMASK(23, 22) +#define NFP_FLOWER_CMSG_PORT_PCI GENMASK(15, 14) +#define NFP_FLOWER_CMSG_PORT_VNIC_TYPE GENMASK(13, 12) +#define NFP_FLOWER_CMSG_PORT_VNIC GENMASK(11, 6) +#define NFP_FLOWER_CMSG_PORT_PCIE_Q GENMASK(5, 0) +#define NFP_FLOWER_CMSG_PORT_PHYS_PORT_NUM GENMASK(7, 0) + +static inline u32 nfp_flower_internal_port_get_port_id(u8 internal_port) +{ + return FIELD_PREP(NFP_FLOWER_CMSG_PORT_PHYS_PORT_NUM, internal_port) | + FIELD_PREP(NFP_FLOWER_CMSG_PORT_TYPE, + NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT); +} + +static inline u32 nfp_flower_cmsg_phys_port(u8 phys_port) +{ + return FIELD_PREP(NFP_FLOWER_CMSG_PORT_PHYS_PORT_NUM, phys_port) | + FIELD_PREP(NFP_FLOWER_CMSG_PORT_TYPE, + NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT); +} + +static inline u32 +nfp_flower_cmsg_pcie_port(u8 nfp_pcie, enum nfp_flower_cmsg_port_vnic_type type, + u8 vnic, u8 q) +{ + return FIELD_PREP(NFP_FLOWER_CMSG_PORT_PCI, nfp_pcie) | + FIELD_PREP(NFP_FLOWER_CMSG_PORT_VNIC_TYPE, type) | + FIELD_PREP(NFP_FLOWER_CMSG_PORT_VNIC, vnic) | + FIELD_PREP(NFP_FLOWER_CMSG_PORT_PCIE_Q, q) | + FIELD_PREP(NFP_FLOWER_CMSG_PORT_TYPE, + NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT); +} + +static inline void *nfp_flower_cmsg_get_data(struct sk_buff *skb) +{ + return (unsigned char *)skb->data + NFP_FLOWER_CMSG_HLEN; +} + +static inline int nfp_flower_cmsg_get_data_len(struct sk_buff *skb) +{ + return skb->len - NFP_FLOWER_CMSG_HLEN; +} + +static inline bool +nfp_fl_netdev_is_tunnel_type(struct net_device *netdev, + enum nfp_flower_tun_type tun_type) +{ + if (netif_is_vxlan(netdev)) + return tun_type == NFP_FL_TUNNEL_VXLAN; + if (netif_is_gretap(netdev) || netif_is_ip6gretap(netdev)) + return tun_type == NFP_FL_TUNNEL_GRE; + if (netif_is_geneve(netdev)) + return tun_type == NFP_FL_TUNNEL_GENEVE; + + return false; +} + +static inline bool nfp_fl_is_netdev_to_offload(struct net_device *netdev) +{ + if (!netdev->rtnl_link_ops) + return false; + if (!strcmp(netdev->rtnl_link_ops->kind, "openvswitch")) + return true; + if (netif_is_vxlan(netdev)) + return true; + if (netif_is_geneve(netdev)) + return true; + if (netif_is_gretap(netdev)) + return true; + if (netif_is_ip6gretap(netdev)) + return true; + + return false; +} + +struct sk_buff * +nfp_flower_cmsg_mac_repr_start(struct nfp_app *app, unsigned int num_ports); +void +nfp_flower_cmsg_mac_repr_add(struct sk_buff *skb, unsigned int idx, + unsigned int nbi, unsigned int nbi_port, + unsigned int phys_port); +int nfp_flower_cmsg_portmod(struct nfp_repr *repr, bool carrier_ok, + unsigned int mtu, bool mtu_only); +int nfp_flower_cmsg_portreify(struct nfp_repr *repr, bool exists); +void nfp_flower_cmsg_process_rx(struct work_struct *work); +void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb); +struct sk_buff * +nfp_flower_cmsg_alloc(struct nfp_app *app, unsigned int size, + enum nfp_flower_cmsg_type_port type, gfp_t flag); + +#endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c new file mode 100644 index 0000000000..2967bab725 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -0,0 +1,2260 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2021 Corigine, Inc. */ + +#include <net/tc_act/tc_csum.h> +#include <net/tc_act/tc_ct.h> + +#include "conntrack.h" +#include "../nfp_port.h" + +const struct rhashtable_params nfp_tc_ct_merge_params = { + .head_offset = offsetof(struct nfp_fl_ct_tc_merge, + hash_node), + .key_len = sizeof(unsigned long) * 2, + .key_offset = offsetof(struct nfp_fl_ct_tc_merge, cookie), + .automatic_shrinking = true, +}; + +const struct rhashtable_params nfp_nft_ct_merge_params = { + .head_offset = offsetof(struct nfp_fl_nft_tc_merge, + hash_node), + .key_len = sizeof(unsigned long) * 3, + .key_offset = offsetof(struct nfp_fl_nft_tc_merge, cookie), + .automatic_shrinking = true, +}; + +static struct flow_action_entry *get_flow_act(struct flow_rule *rule, + enum flow_action_id act_id); + +/** + * get_hashentry() - Wrapper around hashtable lookup. + * @ht: hashtable where entry could be found + * @key: key to lookup + * @params: hashtable params + * @size: size of entry to allocate if not in table + * + * Returns an entry from a hashtable. If entry does not exist + * yet allocate the memory for it and return the new entry. + */ +static void *get_hashentry(struct rhashtable *ht, void *key, + const struct rhashtable_params params, size_t size) +{ + void *result; + + result = rhashtable_lookup_fast(ht, key, params); + + if (result) + return result; + + result = kzalloc(size, GFP_KERNEL); + if (!result) + return ERR_PTR(-ENOMEM); + + return result; +} + +bool is_pre_ct_flow(struct flow_cls_offload *flow) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); + struct flow_dissector *dissector = rule->match.dissector; + struct flow_action_entry *act; + struct flow_match_ct ct; + int i; + + if (dissector->used_keys & BIT_ULL(FLOW_DISSECTOR_KEY_CT)) { + flow_rule_match_ct(rule, &ct); + if (ct.key->ct_state) + return false; + } + + if (flow->common.chain_index) + return false; + + flow_action_for_each(i, act, &flow->rule->action) { + if (act->id == FLOW_ACTION_CT) { + /* The pre_ct rule only have the ct or ct nat action, cannot + * contains other ct action e.g ct commit and so on. + */ + if ((!act->ct.action || act->ct.action == TCA_CT_ACT_NAT)) + return true; + else + return false; + } + } + + return false; +} + +bool is_post_ct_flow(struct flow_cls_offload *flow) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); + struct flow_dissector *dissector = rule->match.dissector; + struct flow_action_entry *act; + bool exist_ct_clear = false; + struct flow_match_ct ct; + int i; + + if (dissector->used_keys & BIT_ULL(FLOW_DISSECTOR_KEY_CT)) { + flow_rule_match_ct(rule, &ct); + if (ct.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED) + return true; + } else { + /* post ct entry cannot contains any ct action except ct_clear. */ + flow_action_for_each(i, act, &flow->rule->action) { + if (act->id == FLOW_ACTION_CT) { + /* ignore ct clear action. */ + if (act->ct.action == TCA_CT_ACT_CLEAR) { + exist_ct_clear = true; + continue; + } + + return false; + } + } + /* when do nat with ct, the post ct entry ignore the ct status, + * will match the nat field(sip/dip) instead. In this situation, + * the flow chain index is not zero and contains ct clear action. + */ + if (flow->common.chain_index && exist_ct_clear) + return true; + } + + return false; +} + +/** + * get_mangled_key() - Mangle the key if mangle act exists + * @rule: rule that carries the actions + * @buf: pointer to key to be mangled + * @offset: used to adjust mangled offset in L2/L3/L4 header + * @key_sz: key size + * @htype: mangling type + * + * Returns buf where the mangled key stores. + */ +static void *get_mangled_key(struct flow_rule *rule, void *buf, + u32 offset, size_t key_sz, + enum flow_action_mangle_base htype) +{ + struct flow_action_entry *act; + u32 *val = (u32 *)buf; + u32 off, msk, key; + int i; + + flow_action_for_each(i, act, &rule->action) { + if (act->id == FLOW_ACTION_MANGLE && + act->mangle.htype == htype) { + off = act->mangle.offset - offset; + msk = act->mangle.mask; + key = act->mangle.val; + + /* Mangling is supposed to be u32 aligned */ + if (off % 4 || off >= key_sz) + continue; + + val[off >> 2] &= msk; + val[off >> 2] |= key; + } + } + + return buf; +} + +/* Only tos and ttl are involved in flow_match_ip structure, which + * doesn't conform to the layout of ip/ipv6 header definition. So + * they need particular process here: fill them into the ip/ipv6 + * header, so that mangling actions can work directly. + */ +#define NFP_IPV4_TOS_MASK GENMASK(23, 16) +#define NFP_IPV4_TTL_MASK GENMASK(31, 24) +#define NFP_IPV6_TCLASS_MASK GENMASK(27, 20) +#define NFP_IPV6_HLIMIT_MASK GENMASK(7, 0) +static void *get_mangled_tos_ttl(struct flow_rule *rule, void *buf, + bool is_v6) +{ + struct flow_match_ip match; + /* IPv4's ttl field is in third dword. */ + __be32 ip_hdr[3]; + u32 tmp, hdr_len; + + flow_rule_match_ip(rule, &match); + + if (is_v6) { + tmp = FIELD_PREP(NFP_IPV6_TCLASS_MASK, match.key->tos); + ip_hdr[0] = cpu_to_be32(tmp); + tmp = FIELD_PREP(NFP_IPV6_HLIMIT_MASK, match.key->ttl); + ip_hdr[1] = cpu_to_be32(tmp); + hdr_len = 2 * sizeof(__be32); + } else { + tmp = FIELD_PREP(NFP_IPV4_TOS_MASK, match.key->tos); + ip_hdr[0] = cpu_to_be32(tmp); + tmp = FIELD_PREP(NFP_IPV4_TTL_MASK, match.key->ttl); + ip_hdr[2] = cpu_to_be32(tmp); + hdr_len = 3 * sizeof(__be32); + } + + get_mangled_key(rule, ip_hdr, 0, hdr_len, + is_v6 ? FLOW_ACT_MANGLE_HDR_TYPE_IP6 : + FLOW_ACT_MANGLE_HDR_TYPE_IP4); + + match.key = buf; + + if (is_v6) { + tmp = be32_to_cpu(ip_hdr[0]); + match.key->tos = FIELD_GET(NFP_IPV6_TCLASS_MASK, tmp); + tmp = be32_to_cpu(ip_hdr[1]); + match.key->ttl = FIELD_GET(NFP_IPV6_HLIMIT_MASK, tmp); + } else { + tmp = be32_to_cpu(ip_hdr[0]); + match.key->tos = FIELD_GET(NFP_IPV4_TOS_MASK, tmp); + tmp = be32_to_cpu(ip_hdr[2]); + match.key->ttl = FIELD_GET(NFP_IPV4_TTL_MASK, tmp); + } + + return buf; +} + +/* Note entry1 and entry2 are not swappable. only skip ip and + * tport merge check for pre_ct and post_ct when pre_ct do nat. + */ +static bool nfp_ct_merge_check_cannot_skip(struct nfp_fl_ct_flow_entry *entry1, + struct nfp_fl_ct_flow_entry *entry2) +{ + /* only pre_ct have NFP_FL_ACTION_DO_NAT flag. */ + if ((entry1->flags & NFP_FL_ACTION_DO_NAT) && + entry2->type == CT_TYPE_POST_CT) + return false; + + return true; +} + +/* Note entry1 and entry2 are not swappable, entry1 should be + * the former flow whose mangle action need be taken into account + * if existed, and entry2 should be the latter flow whose action + * we don't care. + */ +static int nfp_ct_merge_check(struct nfp_fl_ct_flow_entry *entry1, + struct nfp_fl_ct_flow_entry *entry2) +{ + unsigned long long ovlp_keys; + bool out, is_v6 = false; + u8 ip_proto = 0; + ovlp_keys = entry1->rule->match.dissector->used_keys & + entry2->rule->match.dissector->used_keys; + /* Temporary buffer for mangling keys, 64 is enough to cover max + * struct size of key in various fields that may be mangled. + * Supported fields to mangle: + * mac_src/mac_dst(struct flow_match_eth_addrs, 12B) + * nw_tos/nw_ttl(struct flow_match_ip, 2B) + * nw_src/nw_dst(struct flow_match_ipv4/6_addrs, 32B) + * tp_src/tp_dst(struct flow_match_ports, 4B) + */ + char buf[64]; + + if (entry1->netdev && entry2->netdev && + entry1->netdev != entry2->netdev) + return -EINVAL; + + /* Check the overlapped fields one by one, the unmasked part + * should not conflict with each other. + */ + if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match1, match2; + + flow_rule_match_control(entry1->rule, &match1); + flow_rule_match_control(entry2->rule, &match2); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match1, match2; + + flow_rule_match_basic(entry1->rule, &match1); + flow_rule_match_basic(entry2->rule, &match2); + + /* n_proto field is a must in ct-related flows, + * it should be either ipv4 or ipv6. + */ + is_v6 = match1.key->n_proto == htons(ETH_P_IPV6); + /* ip_proto field is a must when port field is cared */ + ip_proto = match1.key->ip_proto; + + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + /* if pre ct entry do nat, the nat ip exists in nft entry, + * will be do merge check when do nft and post ct merge, + * so skip this ip merge check here. + */ + if ((ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS)) && + nfp_ct_merge_check_cannot_skip(entry1, entry2)) { + struct flow_match_ipv4_addrs match1, match2; + + flow_rule_match_ipv4_addrs(entry1->rule, &match1); + flow_rule_match_ipv4_addrs(entry2->rule, &match2); + + memcpy(buf, match1.key, sizeof(*match1.key)); + match1.key = get_mangled_key(entry1->rule, buf, + offsetof(struct iphdr, saddr), + sizeof(*match1.key), + FLOW_ACT_MANGLE_HDR_TYPE_IP4); + + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + /* if pre ct entry do nat, the nat ip exists in nft entry, + * will be do merge check when do nft and post ct merge, + * so skip this ip merge check here. + */ + if ((ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS)) && + nfp_ct_merge_check_cannot_skip(entry1, entry2)) { + struct flow_match_ipv6_addrs match1, match2; + + flow_rule_match_ipv6_addrs(entry1->rule, &match1); + flow_rule_match_ipv6_addrs(entry2->rule, &match2); + + memcpy(buf, match1.key, sizeof(*match1.key)); + match1.key = get_mangled_key(entry1->rule, buf, + offsetof(struct ipv6hdr, saddr), + sizeof(*match1.key), + FLOW_ACT_MANGLE_HDR_TYPE_IP6); + + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + /* if pre ct entry do nat, the nat tport exists in nft entry, + * will be do merge check when do nft and post ct merge, + * so skip this tport merge check here. + */ + if ((ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_PORTS)) && + nfp_ct_merge_check_cannot_skip(entry1, entry2)) { + enum flow_action_mangle_base htype = FLOW_ACT_MANGLE_UNSPEC; + struct flow_match_ports match1, match2; + + flow_rule_match_ports(entry1->rule, &match1); + flow_rule_match_ports(entry2->rule, &match2); + + if (ip_proto == IPPROTO_UDP) + htype = FLOW_ACT_MANGLE_HDR_TYPE_UDP; + else if (ip_proto == IPPROTO_TCP) + htype = FLOW_ACT_MANGLE_HDR_TYPE_TCP; + + memcpy(buf, match1.key, sizeof(*match1.key)); + match1.key = get_mangled_key(entry1->rule, buf, 0, + sizeof(*match1.key), htype); + + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match1, match2; + + flow_rule_match_eth_addrs(entry1->rule, &match1); + flow_rule_match_eth_addrs(entry2->rule, &match2); + + memcpy(buf, match1.key, sizeof(*match1.key)); + match1.key = get_mangled_key(entry1->rule, buf, 0, + sizeof(*match1.key), + FLOW_ACT_MANGLE_HDR_TYPE_ETH); + + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match1, match2; + + flow_rule_match_vlan(entry1->rule, &match1); + flow_rule_match_vlan(entry2->rule, &match2); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_MPLS)) { + struct flow_match_mpls match1, match2; + + flow_rule_match_mpls(entry1->rule, &match1); + flow_rule_match_mpls(entry2->rule, &match2); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_TCP)) { + struct flow_match_tcp match1, match2; + + flow_rule_match_tcp(entry1->rule, &match1); + flow_rule_match_tcp(entry2->rule, &match2); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_IP)) { + struct flow_match_ip match1, match2; + + flow_rule_match_ip(entry1->rule, &match1); + flow_rule_match_ip(entry2->rule, &match2); + + match1.key = get_mangled_tos_ttl(entry1->rule, buf, is_v6); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid match1, match2; + + flow_rule_match_enc_keyid(entry1->rule, &match1); + flow_rule_match_enc_keyid(entry2->rule, &match2); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { + struct flow_match_ipv4_addrs match1, match2; + + flow_rule_match_enc_ipv4_addrs(entry1->rule, &match1); + flow_rule_match_enc_ipv4_addrs(entry2->rule, &match2); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { + struct flow_match_ipv6_addrs match1, match2; + + flow_rule_match_enc_ipv6_addrs(entry1->rule, &match1); + flow_rule_match_enc_ipv6_addrs(entry2->rule, &match2); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_match_control match1, match2; + + flow_rule_match_enc_control(entry1->rule, &match1); + flow_rule_match_enc_control(entry2->rule, &match2); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP)) { + struct flow_match_ip match1, match2; + + flow_rule_match_enc_ip(entry1->rule, &match1); + flow_rule_match_enc_ip(entry2->rule, &match2); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_ENC_OPTS)) { + struct flow_match_enc_opts match1, match2; + + flow_rule_match_enc_opts(entry1->rule, &match1); + flow_rule_match_enc_opts(entry2->rule, &match2); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); + if (out) + goto check_failed; + } + + return 0; + +check_failed: + return -EINVAL; +} + +static int nfp_ct_check_vlan_merge(struct flow_action_entry *a_in, + struct flow_rule *rule) +{ + struct flow_match_vlan match; + + if (unlikely(flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN))) + return -EOPNOTSUPP; + + /* post_ct does not match VLAN KEY, can be merged. */ + if (likely(!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN))) + return 0; + + switch (a_in->id) { + /* pre_ct has pop vlan, post_ct cannot match VLAN KEY, cannot be merged. */ + case FLOW_ACTION_VLAN_POP: + return -EOPNOTSUPP; + + case FLOW_ACTION_VLAN_PUSH: + case FLOW_ACTION_VLAN_MANGLE: + flow_rule_match_vlan(rule, &match); + /* different vlan id, cannot be merged. */ + if ((match.key->vlan_id & match.mask->vlan_id) ^ + (a_in->vlan.vid & match.mask->vlan_id)) + return -EOPNOTSUPP; + + /* different tpid, cannot be merged. */ + if ((match.key->vlan_tpid & match.mask->vlan_tpid) ^ + (a_in->vlan.proto & match.mask->vlan_tpid)) + return -EOPNOTSUPP; + + /* different priority, cannot be merged. */ + if ((match.key->vlan_priority & match.mask->vlan_priority) ^ + (a_in->vlan.prio & match.mask->vlan_priority)) + return -EOPNOTSUPP; + + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +/* Extra check for multiple ct-zones merge + * currently surpport nft entries merge check in different zones + */ +static int nfp_ct_merge_extra_check(struct nfp_fl_ct_flow_entry *nft_entry, + struct nfp_fl_ct_tc_merge *tc_m_entry) +{ + struct nfp_fl_nft_tc_merge *prev_nft_m_entry; + struct nfp_fl_ct_flow_entry *pre_ct_entry; + + pre_ct_entry = tc_m_entry->pre_ct_parent; + prev_nft_m_entry = pre_ct_entry->prev_m_entries[pre_ct_entry->num_prev_m_entries - 1]; + + return nfp_ct_merge_check(prev_nft_m_entry->nft_parent, nft_entry); +} + +static int nfp_ct_merge_act_check(struct nfp_fl_ct_flow_entry *pre_ct_entry, + struct nfp_fl_ct_flow_entry *post_ct_entry, + struct nfp_fl_ct_flow_entry *nft_entry) +{ + struct flow_action_entry *act; + int i, err; + + /* Check for pre_ct->action conflicts */ + flow_action_for_each(i, act, &pre_ct_entry->rule->action) { + switch (act->id) { + case FLOW_ACTION_VLAN_PUSH: + case FLOW_ACTION_VLAN_POP: + case FLOW_ACTION_VLAN_MANGLE: + err = nfp_ct_check_vlan_merge(act, post_ct_entry->rule); + if (err) + return err; + break; + case FLOW_ACTION_MPLS_PUSH: + case FLOW_ACTION_MPLS_POP: + case FLOW_ACTION_MPLS_MANGLE: + return -EOPNOTSUPP; + default: + break; + } + } + + /* Check for nft->action conflicts */ + flow_action_for_each(i, act, &nft_entry->rule->action) { + switch (act->id) { + case FLOW_ACTION_VLAN_PUSH: + case FLOW_ACTION_VLAN_POP: + case FLOW_ACTION_VLAN_MANGLE: + case FLOW_ACTION_MPLS_PUSH: + case FLOW_ACTION_MPLS_POP: + case FLOW_ACTION_MPLS_MANGLE: + return -EOPNOTSUPP; + default: + break; + } + } + return 0; +} + +static int nfp_ct_check_meta(struct nfp_fl_ct_flow_entry *post_ct_entry, + struct nfp_fl_ct_flow_entry *nft_entry) +{ + struct flow_dissector *dissector = post_ct_entry->rule->match.dissector; + struct flow_action_entry *ct_met; + struct flow_match_ct ct; + int i; + + ct_met = get_flow_act(nft_entry->rule, FLOW_ACTION_CT_METADATA); + if (ct_met && (dissector->used_keys & BIT_ULL(FLOW_DISSECTOR_KEY_CT))) { + u32 *act_lbl; + + act_lbl = ct_met->ct_metadata.labels; + flow_rule_match_ct(post_ct_entry->rule, &ct); + for (i = 0; i < 4; i++) { + if ((ct.key->ct_labels[i] & ct.mask->ct_labels[i]) ^ + (act_lbl[i] & ct.mask->ct_labels[i])) + return -EINVAL; + } + + if ((ct.key->ct_mark & ct.mask->ct_mark) ^ + (ct_met->ct_metadata.mark & ct.mask->ct_mark)) + return -EINVAL; + + return 0; + } else { + /* post_ct with ct clear action will not match the + * ct status when nft is nat entry. + */ + if (nft_entry->flags & NFP_FL_ACTION_DO_MANGLE) + return 0; + } + + return -EINVAL; +} + +static int +nfp_fl_calc_key_layers_sz(struct nfp_fl_key_ls in_key_ls, uint16_t *map) +{ + int key_size; + + /* This field must always be present */ + key_size = sizeof(struct nfp_flower_meta_tci); + map[FLOW_PAY_META_TCI] = 0; + + if (in_key_ls.key_layer & NFP_FLOWER_LAYER_EXT_META) { + map[FLOW_PAY_EXT_META] = key_size; + key_size += sizeof(struct nfp_flower_ext_meta); + } + if (in_key_ls.key_layer & NFP_FLOWER_LAYER_PORT) { + map[FLOW_PAY_INPORT] = key_size; + key_size += sizeof(struct nfp_flower_in_port); + } + if (in_key_ls.key_layer & NFP_FLOWER_LAYER_MAC) { + map[FLOW_PAY_MAC_MPLS] = key_size; + key_size += sizeof(struct nfp_flower_mac_mpls); + } + if (in_key_ls.key_layer & NFP_FLOWER_LAYER_TP) { + map[FLOW_PAY_L4] = key_size; + key_size += sizeof(struct nfp_flower_tp_ports); + } + if (in_key_ls.key_layer & NFP_FLOWER_LAYER_IPV4) { + map[FLOW_PAY_IPV4] = key_size; + key_size += sizeof(struct nfp_flower_ipv4); + } + if (in_key_ls.key_layer & NFP_FLOWER_LAYER_IPV6) { + map[FLOW_PAY_IPV6] = key_size; + key_size += sizeof(struct nfp_flower_ipv6); + } + + if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_QINQ) { + map[FLOW_PAY_QINQ] = key_size; + key_size += sizeof(struct nfp_flower_vlan); + } + + if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GRE) { + map[FLOW_PAY_GRE] = key_size; + if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) + key_size += sizeof(struct nfp_flower_ipv6_gre_tun); + else + key_size += sizeof(struct nfp_flower_ipv4_gre_tun); + } + + if ((in_key_ls.key_layer & NFP_FLOWER_LAYER_VXLAN) || + (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GENEVE)) { + map[FLOW_PAY_UDP_TUN] = key_size; + if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) + key_size += sizeof(struct nfp_flower_ipv6_udp_tun); + else + key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + } + + if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) { + map[FLOW_PAY_GENEVE_OPT] = key_size; + key_size += sizeof(struct nfp_flower_geneve_options); + } + + return key_size; +} + +/* get the csum flag according the ip proto and mangle action. */ +static void nfp_fl_get_csum_flag(struct flow_action_entry *a_in, u8 ip_proto, u32 *csum) +{ + if (a_in->id != FLOW_ACTION_MANGLE) + return; + + switch (a_in->mangle.htype) { + case FLOW_ACT_MANGLE_HDR_TYPE_IP4: + *csum |= TCA_CSUM_UPDATE_FLAG_IPV4HDR; + if (ip_proto == IPPROTO_TCP) + *csum |= TCA_CSUM_UPDATE_FLAG_TCP; + else if (ip_proto == IPPROTO_UDP) + *csum |= TCA_CSUM_UPDATE_FLAG_UDP; + break; + case FLOW_ACT_MANGLE_HDR_TYPE_TCP: + *csum |= TCA_CSUM_UPDATE_FLAG_TCP; + break; + case FLOW_ACT_MANGLE_HDR_TYPE_UDP: + *csum |= TCA_CSUM_UPDATE_FLAG_UDP; + break; + default: + break; + } +} + +static int nfp_fl_merge_actions_offload(struct flow_rule **rules, + struct nfp_flower_priv *priv, + struct net_device *netdev, + struct nfp_fl_payload *flow_pay, + int num_rules) +{ + enum flow_action_hw_stats tmp_stats = FLOW_ACTION_HW_STATS_DONT_CARE; + struct flow_action_entry *a_in; + int i, j, id, num_actions = 0; + struct flow_rule *a_rule; + int err = 0, offset = 0; + + for (i = 0; i < num_rules; i++) + num_actions += rules[i]->action.num_entries; + + /* Add one action to make sure there is enough room to add an checksum action + * when do nat. + */ + a_rule = flow_rule_alloc(num_actions + (num_rules / 2)); + if (!a_rule) + return -ENOMEM; + + /* post_ct entry have one action at least. */ + if (rules[num_rules - 1]->action.num_entries != 0) + tmp_stats = rules[num_rules - 1]->action.entries[0].hw_stats; + + /* Actions need a BASIC dissector. */ + a_rule->match = rules[0]->match; + + /* Copy actions */ + for (j = 0; j < num_rules; j++) { + u32 csum_updated = 0; + u8 ip_proto = 0; + + if (flow_rule_match_key(rules[j], FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + /* ip_proto is the only field that is needed in later compile_action, + * needed to set the correct checksum flags. It doesn't really matter + * which input rule's ip_proto field we take as the earlier merge checks + * would have made sure that they don't conflict. We do not know which + * of the subflows would have the ip_proto filled in, so we need to iterate + * through the subflows and assign the proper subflow to a_rule + */ + flow_rule_match_basic(rules[j], &match); + if (match.mask->ip_proto) { + a_rule->match = rules[j]->match; + ip_proto = match.key->ip_proto; + } + } + + for (i = 0; i < rules[j]->action.num_entries; i++) { + a_in = &rules[j]->action.entries[i]; + id = a_in->id; + + /* Ignore CT related actions as these would already have + * been taken care of by previous checks, and we do not send + * any CT actions to the firmware. + */ + switch (id) { + case FLOW_ACTION_CT: + case FLOW_ACTION_GOTO: + case FLOW_ACTION_CT_METADATA: + continue; + default: + /* nft entry is generated by tc ct, which mangle action do not care + * the stats, inherit the post entry stats to meet the + * flow_action_hw_stats_check. + * nft entry flow rules are at odd array index. + */ + if (j & 0x01) { + if (a_in->hw_stats == FLOW_ACTION_HW_STATS_DONT_CARE) + a_in->hw_stats = tmp_stats; + nfp_fl_get_csum_flag(a_in, ip_proto, &csum_updated); + } + memcpy(&a_rule->action.entries[offset++], + a_in, sizeof(struct flow_action_entry)); + break; + } + } + /* nft entry have mangle action, but do not have checksum action when do NAT, + * hardware will automatically fix IPv4 and TCP/UDP checksum. so add an csum action + * to meet csum action check. + */ + if (csum_updated) { + struct flow_action_entry *csum_action; + + csum_action = &a_rule->action.entries[offset++]; + csum_action->id = FLOW_ACTION_CSUM; + csum_action->csum_flags = csum_updated; + csum_action->hw_stats = tmp_stats; + } + } + + /* Some actions would have been ignored, so update the num_entries field */ + a_rule->action.num_entries = offset; + err = nfp_flower_compile_action(priv->app, a_rule, netdev, flow_pay, NULL); + kfree(a_rule); + + return err; +} + +static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry) +{ + enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE; + struct nfp_fl_ct_zone_entry *zt = m_entry->zt; + struct flow_rule *rules[NFP_MAX_ENTRY_RULES]; + struct nfp_fl_ct_flow_entry *pre_ct_entry; + struct nfp_fl_key_ls key_layer, tmp_layer; + struct nfp_flower_priv *priv = zt->priv; + u16 key_map[_FLOW_PAY_LAYERS_MAX]; + struct nfp_fl_payload *flow_pay; + u8 *key, *msk, *kdata, *mdata; + struct nfp_port *port = NULL; + int num_rules, err, i, j = 0; + struct net_device *netdev; + bool qinq_sup; + u32 port_id; + u16 offset; + + netdev = m_entry->netdev; + qinq_sup = !!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ); + + pre_ct_entry = m_entry->tc_m_parent->pre_ct_parent; + num_rules = pre_ct_entry->num_prev_m_entries * 2 + _CT_TYPE_MAX; + + for (i = 0; i < pre_ct_entry->num_prev_m_entries; i++) { + rules[j++] = pre_ct_entry->prev_m_entries[i]->tc_m_parent->pre_ct_parent->rule; + rules[j++] = pre_ct_entry->prev_m_entries[i]->nft_parent->rule; + } + + rules[j++] = m_entry->tc_m_parent->pre_ct_parent->rule; + rules[j++] = m_entry->nft_parent->rule; + rules[j++] = m_entry->tc_m_parent->post_ct_parent->rule; + + memset(&key_layer, 0, sizeof(struct nfp_fl_key_ls)); + memset(&key_map, 0, sizeof(key_map)); + + /* Calculate the resultant key layer and size for offload */ + for (i = 0; i < num_rules; i++) { + err = nfp_flower_calculate_key_layers(priv->app, + m_entry->netdev, + &tmp_layer, rules[i], + &tun_type, NULL); + if (err) + return err; + + key_layer.key_layer |= tmp_layer.key_layer; + key_layer.key_layer_two |= tmp_layer.key_layer_two; + } + key_layer.key_size = nfp_fl_calc_key_layers_sz(key_layer, key_map); + + flow_pay = nfp_flower_allocate_new(&key_layer); + if (!flow_pay) + return -ENOMEM; + + memset(flow_pay->unmasked_data, 0, key_layer.key_size); + memset(flow_pay->mask_data, 0, key_layer.key_size); + + kdata = flow_pay->unmasked_data; + mdata = flow_pay->mask_data; + + offset = key_map[FLOW_PAY_META_TCI]; + key = kdata + offset; + msk = mdata + offset; + nfp_flower_compile_meta((struct nfp_flower_meta_tci *)key, + (struct nfp_flower_meta_tci *)msk, + key_layer.key_layer); + + if (NFP_FLOWER_LAYER_EXT_META & key_layer.key_layer) { + offset = key_map[FLOW_PAY_EXT_META]; + key = kdata + offset; + msk = mdata + offset; + nfp_flower_compile_ext_meta((struct nfp_flower_ext_meta *)key, + key_layer.key_layer_two); + nfp_flower_compile_ext_meta((struct nfp_flower_ext_meta *)msk, + key_layer.key_layer_two); + } + + /* Using in_port from the -trk rule. The tc merge checks should already + * be checking that the ingress netdevs are the same + */ + port_id = nfp_flower_get_port_id_from_netdev(priv->app, netdev); + offset = key_map[FLOW_PAY_INPORT]; + key = kdata + offset; + msk = mdata + offset; + err = nfp_flower_compile_port((struct nfp_flower_in_port *)key, + port_id, false, tun_type, NULL); + if (err) + goto ct_offload_err; + err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk, + port_id, true, tun_type, NULL); + if (err) + goto ct_offload_err; + + /* This following part works on the assumption that previous checks has + * already filtered out flows that has different values for the different + * layers. Here we iterate through all three rules and merge their respective + * masked value(cared bits), basic method is: + * final_key = (r1_key & r1_mask) | (r2_key & r2_mask) | (r3_key & r3_mask) + * final_mask = r1_mask | r2_mask | r3_mask + * If none of the rules contains a match that is also fine, that simply means + * that the layer is not present. + */ + if (!qinq_sup) { + for (i = 0; i < num_rules; i++) { + offset = key_map[FLOW_PAY_META_TCI]; + key = kdata + offset; + msk = mdata + offset; + nfp_flower_compile_tci((struct nfp_flower_meta_tci *)key, + (struct nfp_flower_meta_tci *)msk, + rules[i]); + } + } + + if (NFP_FLOWER_LAYER_MAC & key_layer.key_layer) { + offset = key_map[FLOW_PAY_MAC_MPLS]; + key = kdata + offset; + msk = mdata + offset; + for (i = 0; i < num_rules; i++) { + nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)key, + (struct nfp_flower_mac_mpls *)msk, + rules[i]); + err = nfp_flower_compile_mpls((struct nfp_flower_mac_mpls *)key, + (struct nfp_flower_mac_mpls *)msk, + rules[i], NULL); + if (err) + goto ct_offload_err; + } + } + + if (NFP_FLOWER_LAYER_IPV4 & key_layer.key_layer) { + offset = key_map[FLOW_PAY_IPV4]; + key = kdata + offset; + msk = mdata + offset; + for (i = 0; i < num_rules; i++) { + nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)key, + (struct nfp_flower_ipv4 *)msk, + rules[i]); + } + } + + if (NFP_FLOWER_LAYER_IPV6 & key_layer.key_layer) { + offset = key_map[FLOW_PAY_IPV6]; + key = kdata + offset; + msk = mdata + offset; + for (i = 0; i < num_rules; i++) { + nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)key, + (struct nfp_flower_ipv6 *)msk, + rules[i]); + } + } + + if (NFP_FLOWER_LAYER_TP & key_layer.key_layer) { + offset = key_map[FLOW_PAY_L4]; + key = kdata + offset; + msk = mdata + offset; + for (i = 0; i < num_rules; i++) { + nfp_flower_compile_tport((struct nfp_flower_tp_ports *)key, + (struct nfp_flower_tp_ports *)msk, + rules[i]); + } + } + + if (NFP_FLOWER_LAYER2_QINQ & key_layer.key_layer_two) { + offset = key_map[FLOW_PAY_QINQ]; + key = kdata + offset; + msk = mdata + offset; + for (i = 0; i < num_rules; i++) { + nfp_flower_compile_vlan((struct nfp_flower_vlan *)key, + (struct nfp_flower_vlan *)msk, + rules[i]); + } + } + + if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_GRE) { + offset = key_map[FLOW_PAY_GRE]; + key = kdata + offset; + msk = mdata + offset; + if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) { + struct nfp_flower_ipv6_gre_tun *gre_match; + struct nfp_ipv6_addr_entry *entry; + struct in6_addr *dst; + + for (i = 0; i < num_rules; i++) { + nfp_flower_compile_ipv6_gre_tun((void *)key, + (void *)msk, rules[i]); + } + gre_match = (struct nfp_flower_ipv6_gre_tun *)key; + dst = &gre_match->ipv6.dst; + + entry = nfp_tunnel_add_ipv6_off(priv->app, dst); + if (!entry) { + err = -ENOMEM; + goto ct_offload_err; + } + + flow_pay->nfp_tun_ipv6 = entry; + } else { + __be32 dst; + + for (i = 0; i < num_rules; i++) { + nfp_flower_compile_ipv4_gre_tun((void *)key, + (void *)msk, rules[i]); + } + dst = ((struct nfp_flower_ipv4_gre_tun *)key)->ipv4.dst; + + /* Store the tunnel destination in the rule data. + * This must be present and be an exact match. + */ + flow_pay->nfp_tun_ipv4_addr = dst; + nfp_tunnel_add_ipv4_off(priv->app, dst); + } + } + + if (key_layer.key_layer & NFP_FLOWER_LAYER_VXLAN || + key_layer.key_layer_two & NFP_FLOWER_LAYER2_GENEVE) { + offset = key_map[FLOW_PAY_UDP_TUN]; + key = kdata + offset; + msk = mdata + offset; + if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) { + struct nfp_flower_ipv6_udp_tun *udp_match; + struct nfp_ipv6_addr_entry *entry; + struct in6_addr *dst; + + for (i = 0; i < num_rules; i++) { + nfp_flower_compile_ipv6_udp_tun((void *)key, + (void *)msk, rules[i]); + } + udp_match = (struct nfp_flower_ipv6_udp_tun *)key; + dst = &udp_match->ipv6.dst; + + entry = nfp_tunnel_add_ipv6_off(priv->app, dst); + if (!entry) { + err = -ENOMEM; + goto ct_offload_err; + } + + flow_pay->nfp_tun_ipv6 = entry; + } else { + __be32 dst; + + for (i = 0; i < num_rules; i++) { + nfp_flower_compile_ipv4_udp_tun((void *)key, + (void *)msk, rules[i]); + } + dst = ((struct nfp_flower_ipv4_udp_tun *)key)->ipv4.dst; + + /* Store the tunnel destination in the rule data. + * This must be present and be an exact match. + */ + flow_pay->nfp_tun_ipv4_addr = dst; + nfp_tunnel_add_ipv4_off(priv->app, dst); + } + + if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) { + offset = key_map[FLOW_PAY_GENEVE_OPT]; + key = kdata + offset; + msk = mdata + offset; + for (i = 0; i < num_rules; i++) + nfp_flower_compile_geneve_opt(key, msk, rules[i]); + } + } + + /* Merge actions into flow_pay */ + err = nfp_fl_merge_actions_offload(rules, priv, netdev, flow_pay, num_rules); + if (err) + goto ct_offload_err; + + /* Use the pointer address as the cookie, but set the last bit to 1. + * This is to avoid the 'is_merge_flow' check from detecting this as + * an already merged flow. This works since address alignment means + * that the last bit for pointer addresses will be 0. + */ + flow_pay->tc_flower_cookie = ((unsigned long)flow_pay) | 0x1; + err = nfp_compile_flow_metadata(priv->app, flow_pay->tc_flower_cookie, + flow_pay, netdev, NULL); + if (err) + goto ct_offload_err; + + if (nfp_netdev_is_nfp_repr(netdev)) + port = nfp_port_from_netdev(netdev); + + err = rhashtable_insert_fast(&priv->flow_table, &flow_pay->fl_node, + nfp_flower_table_params); + if (err) + goto ct_release_offload_meta_err; + + err = nfp_flower_xmit_flow(priv->app, flow_pay, + NFP_FLOWER_CMSG_TYPE_FLOW_ADD); + if (err) + goto ct_remove_rhash_err; + + m_entry->tc_flower_cookie = flow_pay->tc_flower_cookie; + m_entry->flow_pay = flow_pay; + + if (port) + port->tc_offload_cnt++; + + return err; + +ct_remove_rhash_err: + WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table, + &flow_pay->fl_node, + nfp_flower_table_params)); +ct_release_offload_meta_err: + nfp_modify_flow_metadata(priv->app, flow_pay); +ct_offload_err: + if (flow_pay->nfp_tun_ipv4_addr) + nfp_tunnel_del_ipv4_off(priv->app, flow_pay->nfp_tun_ipv4_addr); + if (flow_pay->nfp_tun_ipv6) + nfp_tunnel_put_ipv6_off(priv->app, flow_pay->nfp_tun_ipv6); + kfree(flow_pay->action_data); + kfree(flow_pay->mask_data); + kfree(flow_pay->unmasked_data); + kfree(flow_pay); + return err; +} + +static int nfp_fl_ct_del_offload(struct nfp_app *app, unsigned long cookie, + struct net_device *netdev) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_payload *flow_pay; + struct nfp_port *port = NULL; + int err = 0; + + if (nfp_netdev_is_nfp_repr(netdev)) + port = nfp_port_from_netdev(netdev); + + flow_pay = nfp_flower_search_fl_table(app, cookie, netdev); + if (!flow_pay) + return -ENOENT; + + err = nfp_modify_flow_metadata(app, flow_pay); + if (err) + goto err_free_merge_flow; + + if (flow_pay->nfp_tun_ipv4_addr) + nfp_tunnel_del_ipv4_off(app, flow_pay->nfp_tun_ipv4_addr); + + if (flow_pay->nfp_tun_ipv6) + nfp_tunnel_put_ipv6_off(app, flow_pay->nfp_tun_ipv6); + + if (!flow_pay->in_hw) { + err = 0; + goto err_free_merge_flow; + } + + err = nfp_flower_xmit_flow(app, flow_pay, + NFP_FLOWER_CMSG_TYPE_FLOW_DEL); + +err_free_merge_flow: + nfp_flower_del_linked_merge_flows(app, flow_pay); + if (port) + port->tc_offload_cnt--; + kfree(flow_pay->action_data); + kfree(flow_pay->mask_data); + kfree(flow_pay->unmasked_data); + WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table, + &flow_pay->fl_node, + nfp_flower_table_params)); + kfree_rcu(flow_pay, rcu); + return err; +} + +static int nfp_ct_do_nft_merge(struct nfp_fl_ct_zone_entry *zt, + struct nfp_fl_ct_flow_entry *nft_entry, + struct nfp_fl_ct_tc_merge *tc_m_entry) +{ + struct nfp_fl_ct_flow_entry *post_ct_entry, *pre_ct_entry; + struct nfp_fl_nft_tc_merge *nft_m_entry; + unsigned long new_cookie[3]; + int err; + + pre_ct_entry = tc_m_entry->pre_ct_parent; + post_ct_entry = tc_m_entry->post_ct_parent; + + err = nfp_ct_merge_act_check(pre_ct_entry, post_ct_entry, nft_entry); + if (err) + return err; + + /* Check that the two tc flows are also compatible with + * the nft entry. No need to check the pre_ct and post_ct + * entries as that was already done during pre_merge. + * The nft entry does not have a chain populated, so + * skip this check. + */ + err = nfp_ct_merge_check(pre_ct_entry, nft_entry); + if (err) + return err; + err = nfp_ct_merge_check(nft_entry, post_ct_entry); + if (err) + return err; + err = nfp_ct_check_meta(post_ct_entry, nft_entry); + if (err) + return err; + + if (pre_ct_entry->num_prev_m_entries > 0) { + err = nfp_ct_merge_extra_check(nft_entry, tc_m_entry); + if (err) + return err; + } + + /* Combine tc_merge and nft cookies for this cookie. */ + new_cookie[0] = tc_m_entry->cookie[0]; + new_cookie[1] = tc_m_entry->cookie[1]; + new_cookie[2] = nft_entry->cookie; + nft_m_entry = get_hashentry(&zt->nft_merge_tb, + &new_cookie, + nfp_nft_ct_merge_params, + sizeof(*nft_m_entry)); + + if (IS_ERR(nft_m_entry)) + return PTR_ERR(nft_m_entry); + + /* nft_m_entry already present, not merging again */ + if (!memcmp(&new_cookie, nft_m_entry->cookie, sizeof(new_cookie))) + return 0; + + memcpy(&nft_m_entry->cookie, &new_cookie, sizeof(new_cookie)); + nft_m_entry->zt = zt; + nft_m_entry->tc_m_parent = tc_m_entry; + nft_m_entry->nft_parent = nft_entry; + nft_m_entry->tc_flower_cookie = 0; + /* Copy the netdev from the pre_ct entry. When the tc_m_entry was created + * it only combined them if the netdevs were the same, so can use any of them. + */ + nft_m_entry->netdev = pre_ct_entry->netdev; + + /* Add this entry to the tc_m_list and nft_flow lists */ + list_add(&nft_m_entry->tc_merge_list, &tc_m_entry->children); + list_add(&nft_m_entry->nft_flow_list, &nft_entry->children); + + err = rhashtable_insert_fast(&zt->nft_merge_tb, &nft_m_entry->hash_node, + nfp_nft_ct_merge_params); + if (err) + goto err_nft_ct_merge_insert; + + zt->nft_merge_count++; + + if (post_ct_entry->goto_chain_index > 0) + return nfp_fl_create_new_pre_ct(nft_m_entry); + + /* Generate offload structure and send to nfp */ + err = nfp_fl_ct_add_offload(nft_m_entry); + if (err) + goto err_nft_ct_offload; + + return err; + +err_nft_ct_offload: + nfp_fl_ct_del_offload(zt->priv->app, nft_m_entry->tc_flower_cookie, + nft_m_entry->netdev); +err_nft_ct_merge_insert: + list_del(&nft_m_entry->tc_merge_list); + list_del(&nft_m_entry->nft_flow_list); + kfree(nft_m_entry); + return err; +} + +static int nfp_ct_do_tc_merge(struct nfp_fl_ct_zone_entry *zt, + struct nfp_fl_ct_flow_entry *ct_entry1, + struct nfp_fl_ct_flow_entry *ct_entry2) +{ + struct nfp_fl_ct_flow_entry *post_ct_entry, *pre_ct_entry; + struct nfp_fl_ct_flow_entry *nft_entry, *nft_tmp; + struct nfp_fl_ct_tc_merge *m_entry; + unsigned long new_cookie[2]; + int err; + + if (ct_entry1->type == CT_TYPE_PRE_CT) { + pre_ct_entry = ct_entry1; + post_ct_entry = ct_entry2; + } else { + post_ct_entry = ct_entry1; + pre_ct_entry = ct_entry2; + } + + /* Checks that the chain_index of the filter matches the + * chain_index of the GOTO action. + */ + if (post_ct_entry->chain_index != pre_ct_entry->goto_chain_index) + return -EINVAL; + + err = nfp_ct_merge_check(pre_ct_entry, post_ct_entry); + if (err) + return err; + + new_cookie[0] = pre_ct_entry->cookie; + new_cookie[1] = post_ct_entry->cookie; + m_entry = get_hashentry(&zt->tc_merge_tb, &new_cookie, + nfp_tc_ct_merge_params, sizeof(*m_entry)); + if (IS_ERR(m_entry)) + return PTR_ERR(m_entry); + + /* m_entry already present, not merging again */ + if (!memcmp(&new_cookie, m_entry->cookie, sizeof(new_cookie))) + return 0; + + memcpy(&m_entry->cookie, &new_cookie, sizeof(new_cookie)); + m_entry->zt = zt; + m_entry->post_ct_parent = post_ct_entry; + m_entry->pre_ct_parent = pre_ct_entry; + + /* Add this entry to the pre_ct and post_ct lists */ + list_add(&m_entry->post_ct_list, &post_ct_entry->children); + list_add(&m_entry->pre_ct_list, &pre_ct_entry->children); + INIT_LIST_HEAD(&m_entry->children); + + err = rhashtable_insert_fast(&zt->tc_merge_tb, &m_entry->hash_node, + nfp_tc_ct_merge_params); + if (err) + goto err_ct_tc_merge_insert; + zt->tc_merge_count++; + + /* Merge with existing nft flows */ + list_for_each_entry_safe(nft_entry, nft_tmp, &zt->nft_flows_list, + list_node) { + nfp_ct_do_nft_merge(zt, nft_entry, m_entry); + } + + return 0; + +err_ct_tc_merge_insert: + list_del(&m_entry->post_ct_list); + list_del(&m_entry->pre_ct_list); + kfree(m_entry); + return err; +} + +static struct +nfp_fl_ct_zone_entry *get_nfp_zone_entry(struct nfp_flower_priv *priv, + u16 zone, bool wildcarded) +{ + struct nfp_fl_ct_zone_entry *zt; + int err; + + if (wildcarded && priv->ct_zone_wc) + return priv->ct_zone_wc; + + if (!wildcarded) { + zt = get_hashentry(&priv->ct_zone_table, &zone, + nfp_zone_table_params, sizeof(*zt)); + + /* If priv is set this is an existing entry, just return it */ + if (IS_ERR(zt) || zt->priv) + return zt; + } else { + zt = kzalloc(sizeof(*zt), GFP_KERNEL); + if (!zt) + return ERR_PTR(-ENOMEM); + } + + zt->zone = zone; + zt->priv = priv; + zt->nft = NULL; + + /* init the various hash tables and lists */ + INIT_LIST_HEAD(&zt->pre_ct_list); + INIT_LIST_HEAD(&zt->post_ct_list); + INIT_LIST_HEAD(&zt->nft_flows_list); + + err = rhashtable_init(&zt->tc_merge_tb, &nfp_tc_ct_merge_params); + if (err) + goto err_tc_merge_tb_init; + + err = rhashtable_init(&zt->nft_merge_tb, &nfp_nft_ct_merge_params); + if (err) + goto err_nft_merge_tb_init; + + if (wildcarded) { + priv->ct_zone_wc = zt; + } else { + err = rhashtable_insert_fast(&priv->ct_zone_table, + &zt->hash_node, + nfp_zone_table_params); + if (err) + goto err_zone_insert; + } + + return zt; + +err_zone_insert: + rhashtable_destroy(&zt->nft_merge_tb); +err_nft_merge_tb_init: + rhashtable_destroy(&zt->tc_merge_tb); +err_tc_merge_tb_init: + kfree(zt); + return ERR_PTR(err); +} + +static struct net_device *get_netdev_from_rule(struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) { + struct flow_match_meta match; + + flow_rule_match_meta(rule, &match); + if (match.key->ingress_ifindex & match.mask->ingress_ifindex) + return __dev_get_by_index(&init_net, + match.key->ingress_ifindex); + } + + return NULL; +} + +static void nfp_nft_ct_translate_mangle_action(struct flow_action_entry *mangle_action) +{ + if (mangle_action->id != FLOW_ACTION_MANGLE) + return; + + switch (mangle_action->mangle.htype) { + case FLOW_ACT_MANGLE_HDR_TYPE_IP4: + case FLOW_ACT_MANGLE_HDR_TYPE_IP6: + mangle_action->mangle.val = (__force u32)cpu_to_be32(mangle_action->mangle.val); + mangle_action->mangle.mask = (__force u32)cpu_to_be32(mangle_action->mangle.mask); + return; + + case FLOW_ACT_MANGLE_HDR_TYPE_TCP: + case FLOW_ACT_MANGLE_HDR_TYPE_UDP: + mangle_action->mangle.val = (__force u16)cpu_to_be16(mangle_action->mangle.val); + mangle_action->mangle.mask = (__force u16)cpu_to_be16(mangle_action->mangle.mask); + return; + + default: + return; + } +} + +static int nfp_nft_ct_set_flow_flag(struct flow_action_entry *act, + struct nfp_fl_ct_flow_entry *entry) +{ + switch (act->id) { + case FLOW_ACTION_CT: + if (act->ct.action == TCA_CT_ACT_NAT) + entry->flags |= NFP_FL_ACTION_DO_NAT; + break; + + case FLOW_ACTION_MANGLE: + entry->flags |= NFP_FL_ACTION_DO_MANGLE; + break; + + default: + break; + } + + return 0; +} + +static struct +nfp_fl_ct_flow_entry *nfp_fl_ct_add_flow(struct nfp_fl_ct_zone_entry *zt, + struct net_device *netdev, + struct flow_cls_offload *flow, + bool is_nft, struct netlink_ext_ack *extack) +{ + struct nf_flow_match *nft_match = NULL; + struct nfp_fl_ct_flow_entry *entry; + struct nfp_fl_ct_map_entry *map; + struct flow_action_entry *act; + int err, i; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return ERR_PTR(-ENOMEM); + + entry->rule = flow_rule_alloc(flow->rule->action.num_entries); + if (!entry->rule) { + err = -ENOMEM; + goto err_pre_ct_rule; + } + + /* nft flows gets destroyed after callback return, so need + * to do a full copy instead of just a reference. + */ + if (is_nft) { + nft_match = kzalloc(sizeof(*nft_match), GFP_KERNEL); + if (!nft_match) { + err = -ENOMEM; + goto err_pre_ct_act; + } + memcpy(&nft_match->dissector, flow->rule->match.dissector, + sizeof(nft_match->dissector)); + memcpy(&nft_match->mask, flow->rule->match.mask, + sizeof(nft_match->mask)); + memcpy(&nft_match->key, flow->rule->match.key, + sizeof(nft_match->key)); + entry->rule->match.dissector = &nft_match->dissector; + entry->rule->match.mask = &nft_match->mask; + entry->rule->match.key = &nft_match->key; + + if (!netdev) + netdev = get_netdev_from_rule(entry->rule); + } else { + entry->rule->match.dissector = flow->rule->match.dissector; + entry->rule->match.mask = flow->rule->match.mask; + entry->rule->match.key = flow->rule->match.key; + } + + entry->zt = zt; + entry->netdev = netdev; + entry->cookie = flow->cookie > 0 ? flow->cookie : (unsigned long)entry; + entry->chain_index = flow->common.chain_index; + entry->tun_offset = NFP_FL_CT_NO_TUN; + + /* Copy over action data. Unfortunately we do not get a handle to the + * original tcf_action data, and the flow objects gets destroyed, so we + * cannot just save a pointer to this either, so need to copy over the + * data unfortunately. + */ + entry->rule->action.num_entries = flow->rule->action.num_entries; + flow_action_for_each(i, act, &flow->rule->action) { + struct flow_action_entry *new_act; + + new_act = &entry->rule->action.entries[i]; + memcpy(new_act, act, sizeof(struct flow_action_entry)); + /* nft entry mangle field is host byte order, need translate to + * network byte order. + */ + if (is_nft) + nfp_nft_ct_translate_mangle_action(new_act); + + nfp_nft_ct_set_flow_flag(new_act, entry); + /* Entunnel is a special case, need to allocate and copy + * tunnel info. + */ + if (act->id == FLOW_ACTION_TUNNEL_ENCAP) { + struct ip_tunnel_info *tun = act->tunnel; + size_t tun_size = sizeof(*tun) + tun->options_len; + + new_act->tunnel = kmemdup(tun, tun_size, GFP_ATOMIC); + if (!new_act->tunnel) { + err = -ENOMEM; + goto err_pre_ct_tun_cp; + } + entry->tun_offset = i; + } + } + + INIT_LIST_HEAD(&entry->children); + + if (flow->cookie == 0) + return entry; + + /* Now add a ct map entry to flower-priv */ + map = get_hashentry(&zt->priv->ct_map_table, &flow->cookie, + nfp_ct_map_params, sizeof(*map)); + if (IS_ERR(map)) { + NL_SET_ERR_MSG_MOD(extack, + "offload error: ct map entry creation failed"); + err = -ENOMEM; + goto err_ct_flow_insert; + } + map->cookie = flow->cookie; + map->ct_entry = entry; + err = rhashtable_insert_fast(&zt->priv->ct_map_table, + &map->hash_node, + nfp_ct_map_params); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "offload error: ct map entry table add failed"); + goto err_map_insert; + } + + return entry; + +err_map_insert: + kfree(map); +err_ct_flow_insert: + if (entry->tun_offset != NFP_FL_CT_NO_TUN) + kfree(entry->rule->action.entries[entry->tun_offset].tunnel); +err_pre_ct_tun_cp: + kfree(nft_match); +err_pre_ct_act: + kfree(entry->rule); +err_pre_ct_rule: + kfree(entry); + return ERR_PTR(err); +} + +static void cleanup_nft_merge_entry(struct nfp_fl_nft_tc_merge *m_entry) +{ + struct nfp_fl_ct_zone_entry *zt; + int err; + + zt = m_entry->zt; + + /* Flow is in HW, need to delete */ + if (m_entry->tc_flower_cookie) { + err = nfp_fl_ct_del_offload(zt->priv->app, m_entry->tc_flower_cookie, + m_entry->netdev); + if (err) + return; + } + + WARN_ON_ONCE(rhashtable_remove_fast(&zt->nft_merge_tb, + &m_entry->hash_node, + nfp_nft_ct_merge_params)); + zt->nft_merge_count--; + list_del(&m_entry->tc_merge_list); + list_del(&m_entry->nft_flow_list); + + if (m_entry->next_pre_ct_entry) { + struct nfp_fl_ct_map_entry pre_ct_map_ent; + + pre_ct_map_ent.ct_entry = m_entry->next_pre_ct_entry; + pre_ct_map_ent.cookie = 0; + nfp_fl_ct_del_flow(&pre_ct_map_ent); + } + + kfree(m_entry); +} + +static void nfp_free_nft_merge_children(void *entry, bool is_nft_flow) +{ + struct nfp_fl_nft_tc_merge *m_entry, *tmp; + + /* These post entries are parts of two lists, one is a list of nft_entries + * and the other is of from a list of tc_merge structures. Iterate + * through the relevant list and cleanup the entries. + */ + + if (is_nft_flow) { + /* Need to iterate through list of nft_flow entries */ + struct nfp_fl_ct_flow_entry *ct_entry = entry; + + list_for_each_entry_safe(m_entry, tmp, &ct_entry->children, + nft_flow_list) { + cleanup_nft_merge_entry(m_entry); + } + } else { + /* Need to iterate through list of tc_merged_flow entries */ + struct nfp_fl_ct_tc_merge *ct_entry = entry; + + list_for_each_entry_safe(m_entry, tmp, &ct_entry->children, + tc_merge_list) { + cleanup_nft_merge_entry(m_entry); + } + } +} + +static void nfp_del_tc_merge_entry(struct nfp_fl_ct_tc_merge *m_ent) +{ + struct nfp_fl_ct_zone_entry *zt; + int err; + + zt = m_ent->zt; + err = rhashtable_remove_fast(&zt->tc_merge_tb, + &m_ent->hash_node, + nfp_tc_ct_merge_params); + if (err) + pr_warn("WARNING: could not remove merge_entry from hashtable\n"); + zt->tc_merge_count--; + list_del(&m_ent->post_ct_list); + list_del(&m_ent->pre_ct_list); + + if (!list_empty(&m_ent->children)) + nfp_free_nft_merge_children(m_ent, false); + kfree(m_ent); +} + +static void nfp_free_tc_merge_children(struct nfp_fl_ct_flow_entry *entry) +{ + struct nfp_fl_ct_tc_merge *m_ent, *tmp; + + switch (entry->type) { + case CT_TYPE_PRE_CT: + list_for_each_entry_safe(m_ent, tmp, &entry->children, pre_ct_list) { + nfp_del_tc_merge_entry(m_ent); + } + break; + case CT_TYPE_POST_CT: + list_for_each_entry_safe(m_ent, tmp, &entry->children, post_ct_list) { + nfp_del_tc_merge_entry(m_ent); + } + break; + default: + break; + } +} + +void nfp_fl_ct_clean_flow_entry(struct nfp_fl_ct_flow_entry *entry) +{ + list_del(&entry->list_node); + + if (!list_empty(&entry->children)) { + if (entry->type == CT_TYPE_NFT) + nfp_free_nft_merge_children(entry, true); + else + nfp_free_tc_merge_children(entry); + } + + if (entry->tun_offset != NFP_FL_CT_NO_TUN) + kfree(entry->rule->action.entries[entry->tun_offset].tunnel); + + if (entry->type == CT_TYPE_NFT) { + struct nf_flow_match *nft_match; + + nft_match = container_of(entry->rule->match.dissector, + struct nf_flow_match, dissector); + kfree(nft_match); + } + + kfree(entry->rule); + kfree(entry); +} + +static struct flow_action_entry *get_flow_act_ct(struct flow_rule *rule) +{ + struct flow_action_entry *act; + int i; + + /* More than one ct action may be present in a flow rule, + * Return the first one that is not a CT clear action + */ + flow_action_for_each(i, act, &rule->action) { + if (act->id == FLOW_ACTION_CT && act->ct.action != TCA_CT_ACT_CLEAR) + return act; + } + + return NULL; +} + +static struct flow_action_entry *get_flow_act(struct flow_rule *rule, + enum flow_action_id act_id) +{ + struct flow_action_entry *act = NULL; + int i; + + flow_action_for_each(i, act, &rule->action) { + if (act->id == act_id) + return act; + } + return NULL; +} + +static void +nfp_ct_merge_tc_entries(struct nfp_fl_ct_flow_entry *ct_entry1, + struct nfp_fl_ct_zone_entry *zt_src, + struct nfp_fl_ct_zone_entry *zt_dst) +{ + struct nfp_fl_ct_flow_entry *ct_entry2, *ct_tmp; + struct list_head *ct_list; + + if (ct_entry1->type == CT_TYPE_PRE_CT) + ct_list = &zt_src->post_ct_list; + else if (ct_entry1->type == CT_TYPE_POST_CT) + ct_list = &zt_src->pre_ct_list; + else + return; + + list_for_each_entry_safe(ct_entry2, ct_tmp, ct_list, + list_node) { + nfp_ct_do_tc_merge(zt_dst, ct_entry2, ct_entry1); + } +} + +static void +nfp_ct_merge_nft_with_tc(struct nfp_fl_ct_flow_entry *nft_entry, + struct nfp_fl_ct_zone_entry *zt) +{ + struct nfp_fl_ct_tc_merge *tc_merge_entry; + struct rhashtable_iter iter; + + rhashtable_walk_enter(&zt->tc_merge_tb, &iter); + rhashtable_walk_start(&iter); + while ((tc_merge_entry = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(tc_merge_entry)) + continue; + rhashtable_walk_stop(&iter); + nfp_ct_do_nft_merge(zt, nft_entry, tc_merge_entry); + rhashtable_walk_start(&iter); + } + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); +} + +int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv, + struct net_device *netdev, + struct flow_cls_offload *flow, + struct netlink_ext_ack *extack, + struct nfp_fl_nft_tc_merge *m_entry) +{ + struct flow_action_entry *ct_act, *ct_goto; + struct nfp_fl_ct_flow_entry *ct_entry; + struct nfp_fl_ct_zone_entry *zt; + int err; + + ct_act = get_flow_act_ct(flow->rule); + if (!ct_act) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: Conntrack action empty in conntrack offload"); + return -EOPNOTSUPP; + } + + ct_goto = get_flow_act(flow->rule, FLOW_ACTION_GOTO); + if (!ct_goto) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: Conntrack requires ACTION_GOTO"); + return -EOPNOTSUPP; + } + + zt = get_nfp_zone_entry(priv, ct_act->ct.zone, false); + if (IS_ERR(zt)) { + NL_SET_ERR_MSG_MOD(extack, + "offload error: Could not create zone table entry"); + return PTR_ERR(zt); + } + + if (!zt->nft) { + zt->nft = ct_act->ct.flow_table; + err = nf_flow_table_offload_add_cb(zt->nft, nfp_fl_ct_handle_nft_flow, zt); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "offload error: Could not register nft_callback"); + return err; + } + } + + /* Add entry to pre_ct_list */ + ct_entry = nfp_fl_ct_add_flow(zt, netdev, flow, false, extack); + if (IS_ERR(ct_entry)) + return PTR_ERR(ct_entry); + ct_entry->type = CT_TYPE_PRE_CT; + ct_entry->chain_index = flow->common.chain_index; + ct_entry->goto_chain_index = ct_goto->chain_index; + + if (m_entry) { + struct nfp_fl_ct_flow_entry *pre_ct_entry; + int i; + + pre_ct_entry = m_entry->tc_m_parent->pre_ct_parent; + for (i = 0; i < pre_ct_entry->num_prev_m_entries; i++) + ct_entry->prev_m_entries[i] = pre_ct_entry->prev_m_entries[i]; + ct_entry->prev_m_entries[i++] = m_entry; + ct_entry->num_prev_m_entries = i; + + m_entry->next_pre_ct_entry = ct_entry; + } + + list_add(&ct_entry->list_node, &zt->pre_ct_list); + zt->pre_ct_count++; + + nfp_ct_merge_tc_entries(ct_entry, zt, zt); + + /* Need to check and merge with tables in the wc_zone as well */ + if (priv->ct_zone_wc) + nfp_ct_merge_tc_entries(ct_entry, priv->ct_zone_wc, zt); + + return 0; +} + +int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, + struct net_device *netdev, + struct flow_cls_offload *flow, + struct netlink_ext_ack *extack) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); + struct nfp_fl_ct_flow_entry *ct_entry; + struct nfp_fl_ct_zone_entry *zt; + bool wildcarded = false; + struct flow_match_ct ct; + struct flow_action_entry *ct_goto; + + flow_rule_match_ct(rule, &ct); + if (!ct.mask->ct_zone) { + wildcarded = true; + } else if (ct.mask->ct_zone != U16_MAX) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: partially wildcarded ct_zone is not supported"); + return -EOPNOTSUPP; + } + + zt = get_nfp_zone_entry(priv, ct.key->ct_zone, wildcarded); + if (IS_ERR(zt)) { + NL_SET_ERR_MSG_MOD(extack, + "offload error: Could not create zone table entry"); + return PTR_ERR(zt); + } + + /* Add entry to post_ct_list */ + ct_entry = nfp_fl_ct_add_flow(zt, netdev, flow, false, extack); + if (IS_ERR(ct_entry)) + return PTR_ERR(ct_entry); + + ct_entry->type = CT_TYPE_POST_CT; + ct_entry->chain_index = flow->common.chain_index; + ct_goto = get_flow_act(flow->rule, FLOW_ACTION_GOTO); + ct_entry->goto_chain_index = ct_goto ? ct_goto->chain_index : 0; + list_add(&ct_entry->list_node, &zt->post_ct_list); + zt->post_ct_count++; + + if (wildcarded) { + /* Iterate through all zone tables if not empty, look for merges with + * pre_ct entries and merge them. + */ + struct rhashtable_iter iter; + struct nfp_fl_ct_zone_entry *zone_table; + + rhashtable_walk_enter(&priv->ct_zone_table, &iter); + rhashtable_walk_start(&iter); + while ((zone_table = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(zone_table)) + continue; + rhashtable_walk_stop(&iter); + nfp_ct_merge_tc_entries(ct_entry, zone_table, zone_table); + rhashtable_walk_start(&iter); + } + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); + } else { + nfp_ct_merge_tc_entries(ct_entry, zt, zt); + } + + return 0; +} + +int nfp_fl_create_new_pre_ct(struct nfp_fl_nft_tc_merge *m_entry) +{ + struct nfp_fl_ct_flow_entry *pre_ct_entry, *post_ct_entry; + struct flow_cls_offload new_pre_ct_flow; + int err; + + pre_ct_entry = m_entry->tc_m_parent->pre_ct_parent; + if (pre_ct_entry->num_prev_m_entries >= NFP_MAX_RECIRC_CT_ZONES - 1) + return -1; + + post_ct_entry = m_entry->tc_m_parent->post_ct_parent; + memset(&new_pre_ct_flow, 0, sizeof(struct flow_cls_offload)); + new_pre_ct_flow.rule = post_ct_entry->rule; + new_pre_ct_flow.common.chain_index = post_ct_entry->chain_index; + + err = nfp_fl_ct_handle_pre_ct(pre_ct_entry->zt->priv, + pre_ct_entry->netdev, + &new_pre_ct_flow, NULL, + m_entry); + return err; +} + +static void +nfp_fl_ct_sub_stats(struct nfp_fl_nft_tc_merge *nft_merge, + enum ct_entry_type type, u64 *m_pkts, + u64 *m_bytes, u64 *m_used) +{ + struct nfp_flower_priv *priv = nft_merge->zt->priv; + struct nfp_fl_payload *nfp_flow; + u32 ctx_id; + + nfp_flow = nft_merge->flow_pay; + if (!nfp_flow) + return; + + ctx_id = be32_to_cpu(nfp_flow->meta.host_ctx_id); + *m_pkts += priv->stats[ctx_id].pkts; + *m_bytes += priv->stats[ctx_id].bytes; + *m_used = max_t(u64, *m_used, priv->stats[ctx_id].used); + + /* If request is for a sub_flow which is part of a tunnel merged + * flow then update stats from tunnel merged flows first. + */ + if (!list_empty(&nfp_flow->linked_flows)) + nfp_flower_update_merge_stats(priv->app, nfp_flow); + + if (type != CT_TYPE_NFT) { + /* Update nft cached stats */ + flow_stats_update(&nft_merge->nft_parent->stats, + priv->stats[ctx_id].bytes, + priv->stats[ctx_id].pkts, + 0, priv->stats[ctx_id].used, + FLOW_ACTION_HW_STATS_DELAYED); + } else { + /* Update pre_ct cached stats */ + flow_stats_update(&nft_merge->tc_m_parent->pre_ct_parent->stats, + priv->stats[ctx_id].bytes, + priv->stats[ctx_id].pkts, + 0, priv->stats[ctx_id].used, + FLOW_ACTION_HW_STATS_DELAYED); + /* Update post_ct cached stats */ + flow_stats_update(&nft_merge->tc_m_parent->post_ct_parent->stats, + priv->stats[ctx_id].bytes, + priv->stats[ctx_id].pkts, + 0, priv->stats[ctx_id].used, + FLOW_ACTION_HW_STATS_DELAYED); + } + + /* Update previous pre_ct/post_ct/nft flow stats */ + if (nft_merge->tc_m_parent->pre_ct_parent->num_prev_m_entries > 0) { + struct nfp_fl_nft_tc_merge *tmp_nft_merge; + int i; + + for (i = 0; i < nft_merge->tc_m_parent->pre_ct_parent->num_prev_m_entries; i++) { + tmp_nft_merge = nft_merge->tc_m_parent->pre_ct_parent->prev_m_entries[i]; + flow_stats_update(&tmp_nft_merge->tc_m_parent->pre_ct_parent->stats, + priv->stats[ctx_id].bytes, + priv->stats[ctx_id].pkts, + 0, priv->stats[ctx_id].used, + FLOW_ACTION_HW_STATS_DELAYED); + flow_stats_update(&tmp_nft_merge->tc_m_parent->post_ct_parent->stats, + priv->stats[ctx_id].bytes, + priv->stats[ctx_id].pkts, + 0, priv->stats[ctx_id].used, + FLOW_ACTION_HW_STATS_DELAYED); + flow_stats_update(&tmp_nft_merge->nft_parent->stats, + priv->stats[ctx_id].bytes, + priv->stats[ctx_id].pkts, + 0, priv->stats[ctx_id].used, + FLOW_ACTION_HW_STATS_DELAYED); + } + } + + /* Reset stats from the nfp */ + priv->stats[ctx_id].pkts = 0; + priv->stats[ctx_id].bytes = 0; +} + +int nfp_fl_ct_stats(struct flow_cls_offload *flow, + struct nfp_fl_ct_map_entry *ct_map_ent) +{ + struct nfp_fl_ct_flow_entry *ct_entry = ct_map_ent->ct_entry; + struct nfp_fl_nft_tc_merge *nft_merge, *nft_m_tmp; + struct nfp_fl_ct_tc_merge *tc_merge, *tc_m_tmp; + + u64 pkts = 0, bytes = 0, used = 0; + u64 m_pkts, m_bytes, m_used; + + spin_lock_bh(&ct_entry->zt->priv->stats_lock); + + if (ct_entry->type == CT_TYPE_PRE_CT) { + /* Iterate tc_merge entries associated with this flow */ + list_for_each_entry_safe(tc_merge, tc_m_tmp, &ct_entry->children, + pre_ct_list) { + m_pkts = 0; + m_bytes = 0; + m_used = 0; + /* Iterate nft_merge entries associated with this tc_merge flow */ + list_for_each_entry_safe(nft_merge, nft_m_tmp, &tc_merge->children, + tc_merge_list) { + nfp_fl_ct_sub_stats(nft_merge, CT_TYPE_PRE_CT, + &m_pkts, &m_bytes, &m_used); + } + pkts += m_pkts; + bytes += m_bytes; + used = max_t(u64, used, m_used); + /* Update post_ct partner */ + flow_stats_update(&tc_merge->post_ct_parent->stats, + m_bytes, m_pkts, 0, m_used, + FLOW_ACTION_HW_STATS_DELAYED); + } + } else if (ct_entry->type == CT_TYPE_POST_CT) { + /* Iterate tc_merge entries associated with this flow */ + list_for_each_entry_safe(tc_merge, tc_m_tmp, &ct_entry->children, + post_ct_list) { + m_pkts = 0; + m_bytes = 0; + m_used = 0; + /* Iterate nft_merge entries associated with this tc_merge flow */ + list_for_each_entry_safe(nft_merge, nft_m_tmp, &tc_merge->children, + tc_merge_list) { + nfp_fl_ct_sub_stats(nft_merge, CT_TYPE_POST_CT, + &m_pkts, &m_bytes, &m_used); + } + pkts += m_pkts; + bytes += m_bytes; + used = max_t(u64, used, m_used); + /* Update pre_ct partner */ + flow_stats_update(&tc_merge->pre_ct_parent->stats, + m_bytes, m_pkts, 0, m_used, + FLOW_ACTION_HW_STATS_DELAYED); + } + } else { + /* Iterate nft_merge entries associated with this nft flow */ + list_for_each_entry_safe(nft_merge, nft_m_tmp, &ct_entry->children, + nft_flow_list) { + nfp_fl_ct_sub_stats(nft_merge, CT_TYPE_NFT, + &pkts, &bytes, &used); + } + } + + /* Add stats from this request to stats potentially cached by + * previous requests. + */ + flow_stats_update(&ct_entry->stats, bytes, pkts, 0, used, + FLOW_ACTION_HW_STATS_DELAYED); + /* Finally update the flow stats from the original stats request */ + flow_stats_update(&flow->stats, ct_entry->stats.bytes, + ct_entry->stats.pkts, 0, + ct_entry->stats.lastused, + FLOW_ACTION_HW_STATS_DELAYED); + /* Stats has been synced to original flow, can now clear + * the cache. + */ + ct_entry->stats.pkts = 0; + ct_entry->stats.bytes = 0; + spin_unlock_bh(&ct_entry->zt->priv->stats_lock); + + return 0; +} + +static bool +nfp_fl_ct_offload_nft_supported(struct flow_cls_offload *flow) +{ + struct flow_rule *flow_rule = flow->rule; + struct flow_action *flow_action = + &flow_rule->action; + struct flow_action_entry *act; + int i; + + flow_action_for_each(i, act, flow_action) { + if (act->id == FLOW_ACTION_CT_METADATA) { + enum ip_conntrack_info ctinfo = + act->ct_metadata.cookie & NFCT_INFOMASK; + + return ctinfo != IP_CT_NEW; + } + } + + return false; +} + +static int +nfp_fl_ct_offload_nft_flow(struct nfp_fl_ct_zone_entry *zt, struct flow_cls_offload *flow) +{ + struct nfp_fl_ct_map_entry *ct_map_ent; + struct nfp_fl_ct_flow_entry *ct_entry; + struct netlink_ext_ack *extack = NULL; + + extack = flow->common.extack; + switch (flow->command) { + case FLOW_CLS_REPLACE: + if (!nfp_fl_ct_offload_nft_supported(flow)) + return -EOPNOTSUPP; + + /* Netfilter can request offload multiple times for the same + * flow - protect against adding duplicates. + */ + ct_map_ent = rhashtable_lookup_fast(&zt->priv->ct_map_table, &flow->cookie, + nfp_ct_map_params); + if (!ct_map_ent) { + ct_entry = nfp_fl_ct_add_flow(zt, NULL, flow, true, extack); + if (IS_ERR(ct_entry)) + return PTR_ERR(ct_entry); + ct_entry->type = CT_TYPE_NFT; + list_add(&ct_entry->list_node, &zt->nft_flows_list); + zt->nft_flows_count++; + nfp_ct_merge_nft_with_tc(ct_entry, zt); + } + return 0; + case FLOW_CLS_DESTROY: + ct_map_ent = rhashtable_lookup_fast(&zt->priv->ct_map_table, &flow->cookie, + nfp_ct_map_params); + return nfp_fl_ct_del_flow(ct_map_ent); + case FLOW_CLS_STATS: + ct_map_ent = rhashtable_lookup_fast(&zt->priv->ct_map_table, &flow->cookie, + nfp_ct_map_params); + if (ct_map_ent) + return nfp_fl_ct_stats(flow, ct_map_ent); + break; + default: + break; + } + return -EINVAL; +} + +int nfp_fl_ct_handle_nft_flow(enum tc_setup_type type, void *type_data, void *cb_priv) +{ + struct flow_cls_offload *flow = type_data; + struct nfp_fl_ct_zone_entry *zt = cb_priv; + int err = -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSFLOWER: + while (!mutex_trylock(&zt->priv->nfp_fl_lock)) { + if (!zt->nft) /* avoid deadlock */ + return err; + msleep(20); + } + err = nfp_fl_ct_offload_nft_flow(zt, flow); + mutex_unlock(&zt->priv->nfp_fl_lock); + break; + default: + return -EOPNOTSUPP; + } + return err; +} + +static void +nfp_fl_ct_clean_nft_entries(struct nfp_fl_ct_zone_entry *zt) +{ + struct nfp_fl_ct_flow_entry *nft_entry, *ct_tmp; + struct nfp_fl_ct_map_entry *ct_map_ent; + + list_for_each_entry_safe(nft_entry, ct_tmp, &zt->nft_flows_list, + list_node) { + ct_map_ent = rhashtable_lookup_fast(&zt->priv->ct_map_table, + &nft_entry->cookie, + nfp_ct_map_params); + nfp_fl_ct_del_flow(ct_map_ent); + } +} + +int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent) +{ + struct nfp_fl_ct_flow_entry *ct_entry; + struct nfp_fl_ct_zone_entry *zt; + struct rhashtable *m_table; + struct nf_flowtable *nft; + + if (!ct_map_ent) + return -ENOENT; + + zt = ct_map_ent->ct_entry->zt; + ct_entry = ct_map_ent->ct_entry; + m_table = &zt->priv->ct_map_table; + + switch (ct_entry->type) { + case CT_TYPE_PRE_CT: + zt->pre_ct_count--; + if (ct_map_ent->cookie > 0) + rhashtable_remove_fast(m_table, &ct_map_ent->hash_node, + nfp_ct_map_params); + nfp_fl_ct_clean_flow_entry(ct_entry); + if (ct_map_ent->cookie > 0) + kfree(ct_map_ent); + + if (!zt->pre_ct_count && zt->nft) { + nft = zt->nft; + zt->nft = NULL; /* avoid deadlock */ + nf_flow_table_offload_del_cb(nft, + nfp_fl_ct_handle_nft_flow, + zt); + nfp_fl_ct_clean_nft_entries(zt); + } + break; + case CT_TYPE_POST_CT: + zt->post_ct_count--; + rhashtable_remove_fast(m_table, &ct_map_ent->hash_node, + nfp_ct_map_params); + nfp_fl_ct_clean_flow_entry(ct_entry); + kfree(ct_map_ent); + break; + case CT_TYPE_NFT: + zt->nft_flows_count--; + rhashtable_remove_fast(m_table, &ct_map_ent->hash_node, + nfp_ct_map_params); + nfp_fl_ct_clean_flow_entry(ct_map_ent->ct_entry); + kfree(ct_map_ent); + break; + default: + break; + } + + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h new file mode 100644 index 0000000000..c4ec783580 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h @@ -0,0 +1,289 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2021 Corigine, Inc. */ + +#ifndef __NFP_FLOWER_CONNTRACK_H__ +#define __NFP_FLOWER_CONNTRACK_H__ 1 + +#include <net/netfilter/nf_flow_table.h> +#include "main.h" + +#define NFP_FL_CT_NO_TUN 0xff + +#define COMPARE_UNMASKED_FIELDS(__match1, __match2, __out) \ + do { \ + typeof(__match1) _match1 = (__match1); \ + typeof(__match2) _match2 = (__match2); \ + bool *_out = (__out); \ + int i, size = sizeof(*(_match1).key); \ + char *k1, *m1, *k2, *m2; \ + *_out = false; \ + k1 = (char *)_match1.key; \ + m1 = (char *)_match1.mask; \ + k2 = (char *)_match2.key; \ + m2 = (char *)_match2.mask; \ + for (i = 0; i < size; i++) \ + if ((k1[i] & m1[i] & m2[i]) ^ \ + (k2[i] & m1[i] & m2[i])) { \ + *_out = true; \ + break; \ + } \ + } while (0) \ + +extern const struct rhashtable_params nfp_zone_table_params; +extern const struct rhashtable_params nfp_ct_map_params; +extern const struct rhashtable_params nfp_tc_ct_merge_params; +extern const struct rhashtable_params nfp_nft_ct_merge_params; + +/** + * struct nfp_fl_ct_zone_entry - Zone entry containing conntrack flow information + * @zone: The zone number, used as lookup key in hashtable + * @hash_node: Used by the hashtable + * @priv: Pointer to nfp_flower_priv data + * @nft: Pointer to nf_flowtable for this zone + * + * @pre_ct_list: The pre_ct_list of nfp_fl_ct_flow_entry entries + * @pre_ct_count: Keep count of the number of pre_ct entries + * + * @post_ct_list: The post_ct_list of nfp_fl_ct_flow_entry entries + * @post_ct_count: Keep count of the number of post_ct entries + * + * @tc_merge_tb: The table of merged tc flows + * @tc_merge_count: Keep count of the number of merged tc entries + * + * @nft_flows_list: The list of nft relatednfp_fl_ct_flow_entry entries + * @nft_flows_count: Keep count of the number of nft_flow entries + * + * @nft_merge_tb: The table of merged tc+nft flows + * @nft_merge_count: Keep count of the number of merged tc+nft entries + */ +struct nfp_fl_ct_zone_entry { + u16 zone; + struct rhash_head hash_node; + + struct nfp_flower_priv *priv; + struct nf_flowtable *nft; + + struct list_head pre_ct_list; + unsigned int pre_ct_count; + + struct list_head post_ct_list; + unsigned int post_ct_count; + + struct rhashtable tc_merge_tb; + unsigned int tc_merge_count; + + struct list_head nft_flows_list; + unsigned int nft_flows_count; + + struct rhashtable nft_merge_tb; + unsigned int nft_merge_count; +}; + +enum ct_entry_type { + CT_TYPE_PRE_CT, + CT_TYPE_NFT, + CT_TYPE_POST_CT, + _CT_TYPE_MAX, +}; + +#define NFP_MAX_RECIRC_CT_ZONES 4 +#define NFP_MAX_ENTRY_RULES (NFP_MAX_RECIRC_CT_ZONES * 2 + 1) + +enum nfp_nfp_layer_name { + FLOW_PAY_META_TCI = 0, + FLOW_PAY_INPORT, + FLOW_PAY_EXT_META, + FLOW_PAY_MAC_MPLS, + FLOW_PAY_L4, + FLOW_PAY_IPV4, + FLOW_PAY_IPV6, + FLOW_PAY_CT, + FLOW_PAY_GRE, + FLOW_PAY_QINQ, + FLOW_PAY_UDP_TUN, + FLOW_PAY_GENEVE_OPT, + + _FLOW_PAY_LAYERS_MAX +}; + +/* NFP flow entry flags. */ +#define NFP_FL_ACTION_DO_NAT BIT(0) +#define NFP_FL_ACTION_DO_MANGLE BIT(1) + +/** + * struct nfp_fl_ct_flow_entry - Flow entry containing conntrack flow information + * @cookie: Flow cookie, same as original TC flow, used as key + * @list_node: Used by the list + * @chain_index: Chain index of the original flow + * @goto_chain_index: goto chain index of the flow + * @netdev: netdev structure. + * @zt: Reference to the zone table this belongs to + * @children: List of tc_merge flows this flow forms part of + * @rule: Reference to the original TC flow rule + * @stats: Used to cache stats for updating + * @prev_m_entries: Array of all previous nft_tc_merge entries + * @num_prev_m_entries: The number of all previous nft_tc_merge entries + * @tun_offset: Used to indicate tunnel action offset in action list + * @flags: Used to indicate flow flag like NAT which used by merge. + * @type: Type of ct-entry from enum ct_entry_type + */ +struct nfp_fl_ct_flow_entry { + unsigned long cookie; + struct list_head list_node; + u32 chain_index; + u32 goto_chain_index; + struct net_device *netdev; + struct nfp_fl_ct_zone_entry *zt; + struct list_head children; + struct flow_rule *rule; + struct flow_stats stats; + struct nfp_fl_nft_tc_merge *prev_m_entries[NFP_MAX_RECIRC_CT_ZONES - 1]; + u8 num_prev_m_entries; + u8 tun_offset; // Set to NFP_FL_CT_NO_TUN if no tun + u8 flags; + u8 type; +}; + +/** + * struct nfp_fl_ct_tc_merge - Merge of two flows from tc + * @cookie: Flow cookie, combination of pre and post ct cookies + * @hash_node: Used by the hashtable + * @pre_ct_list: This entry is part of a pre_ct_list + * @post_ct_list: This entry is part of a post_ct_list + * @zt: Reference to the zone table this belongs to + * @pre_ct_parent: The pre_ct_parent + * @post_ct_parent: The post_ct_parent + * @children: List of nft merged entries + */ +struct nfp_fl_ct_tc_merge { + unsigned long cookie[2]; + struct rhash_head hash_node; + struct list_head pre_ct_list; + struct list_head post_ct_list; + struct nfp_fl_ct_zone_entry *zt; + struct nfp_fl_ct_flow_entry *pre_ct_parent; + struct nfp_fl_ct_flow_entry *post_ct_parent; + struct list_head children; +}; + +/** + * struct nfp_fl_nft_tc_merge - Merge of tc_merge flows with nft flow + * @netdev: Ingress netdev name + * @cookie: Flow cookie, combination of tc_merge and nft cookies + * @hash_node: Used by the hashtable + * @zt: Reference to the zone table this belongs to + * @nft_flow_list: This entry is part of a nft_flows_list + * @tc_merge_list: This entry is part of a ct_merge_list + * @tc_m_parent: The tc_merge parent + * @nft_parent: The nft_entry parent + * @tc_flower_cookie: The cookie of the flow offloaded to the nfp + * @flow_pay: Reference to the offloaded flow struct + * @next_pre_ct_entry: Reference to the next ct zone pre ct entry + */ +struct nfp_fl_nft_tc_merge { + struct net_device *netdev; + unsigned long cookie[3]; + struct rhash_head hash_node; + struct nfp_fl_ct_zone_entry *zt; + struct list_head nft_flow_list; + struct list_head tc_merge_list; + struct nfp_fl_ct_tc_merge *tc_m_parent; + struct nfp_fl_ct_flow_entry *nft_parent; + unsigned long tc_flower_cookie; + struct nfp_fl_payload *flow_pay; + struct nfp_fl_ct_flow_entry *next_pre_ct_entry; +}; + +/** + * struct nfp_fl_ct_map_entry - Map between flow cookie and specific ct_flow + * @cookie: Flow cookie, same as original TC flow, used as key + * @hash_node: Used by the hashtable + * @ct_entry: Pointer to corresponding ct_entry + */ +struct nfp_fl_ct_map_entry { + unsigned long cookie; + struct rhash_head hash_node; + struct nfp_fl_ct_flow_entry *ct_entry; +}; + +bool is_pre_ct_flow(struct flow_cls_offload *flow); +bool is_post_ct_flow(struct flow_cls_offload *flow); + +/** + * nfp_fl_ct_handle_pre_ct() - Handles -trk conntrack rules + * @priv: Pointer to app priv + * @netdev: netdev structure. + * @flow: TC flower classifier offload structure. + * @extack: Extack pointer for errors + * @m_entry:previous nfp_fl_nft_tc_merge entry + * + * Adds a new entry to the relevant zone table and tries to + * merge with other +trk+est entries and offload if possible. + * + * Return: negative value on error, 0 if configured successfully. + */ +int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv, + struct net_device *netdev, + struct flow_cls_offload *flow, + struct netlink_ext_ack *extack, + struct nfp_fl_nft_tc_merge *m_entry); +/** + * nfp_fl_ct_handle_post_ct() - Handles +trk+est conntrack rules + * @priv: Pointer to app priv + * @netdev: netdev structure. + * @flow: TC flower classifier offload structure. + * @extack: Extack pointer for errors + * + * Adds a new entry to the relevant zone table and tries to + * merge with other -trk entries and offload if possible. + * + * Return: negative value on error, 0 if configured successfully. + */ +int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, + struct net_device *netdev, + struct flow_cls_offload *flow, + struct netlink_ext_ack *extack); + +/** + * nfp_fl_create_new_pre_ct() - create next ct_zone -trk conntrack rules + * @m_entry:previous nfp_fl_nft_tc_merge entry + * + * Create a new pre_ct entry from previous nfp_fl_nft_tc_merge entry + * to the next relevant zone table. Try to merge with other +trk+est + * entries and offload if possible. The created new pre_ct entry is + * linked to the previous nfp_fl_nft_tc_merge entry. + * + * Return: negative value on error, 0 if configured successfully. + */ +int nfp_fl_create_new_pre_ct(struct nfp_fl_nft_tc_merge *m_entry); + +/** + * nfp_fl_ct_clean_flow_entry() - Free a nfp_fl_ct_flow_entry + * @entry: Flow entry to cleanup + */ +void nfp_fl_ct_clean_flow_entry(struct nfp_fl_ct_flow_entry *entry); + +/** + * nfp_fl_ct_del_flow() - Handle flow_del callbacks for conntrack + * @ct_map_ent: ct map entry for the flow that needs deleting + */ +int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent); + +/** + * nfp_fl_ct_handle_nft_flow() - Handle flower flow callbacks for nft table + * @type: Type provided by callback + * @type_data: Callback data + * @cb_priv: Pointer to data provided when registering the callback, in this + * case it's the zone table. + */ +int nfp_fl_ct_handle_nft_flow(enum tc_setup_type type, void *type_data, + void *cb_priv); + +/** + * nfp_fl_ct_stats() - Handle flower stats callbacks for ct flows + * @flow: TC flower classifier offload structure. + * @ct_map_ent: ct map entry for the flow that needs deleting + */ +int nfp_fl_ct_stats(struct flow_cls_offload *flow, + struct nfp_fl_ct_map_entry *ct_map_ent); +#endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c new file mode 100644 index 0000000000..88d6d992e7 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c @@ -0,0 +1,723 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2018 Netronome Systems, Inc. */ + +#include "main.h" + +/* LAG group config flags. */ +#define NFP_FL_LAG_LAST BIT(1) +#define NFP_FL_LAG_FIRST BIT(2) +#define NFP_FL_LAG_DATA BIT(3) +#define NFP_FL_LAG_XON BIT(4) +#define NFP_FL_LAG_SYNC BIT(5) +#define NFP_FL_LAG_SWITCH BIT(6) +#define NFP_FL_LAG_RESET BIT(7) + +/* LAG port state flags. */ +#define NFP_PORT_LAG_LINK_UP BIT(0) +#define NFP_PORT_LAG_TX_ENABLED BIT(1) +#define NFP_PORT_LAG_CHANGED BIT(2) + +enum nfp_fl_lag_batch { + NFP_FL_LAG_BATCH_FIRST, + NFP_FL_LAG_BATCH_MEMBER, + NFP_FL_LAG_BATCH_FINISHED +}; + +/** + * struct nfp_flower_cmsg_lag_config - control message payload for LAG config + * @ctrl_flags: Configuration flags + * @reserved: Reserved for future use + * @ttl: Time to live of packet - host always sets to 0xff + * @pkt_number: Config message packet number - increment for each message + * @batch_ver: Batch version of messages - increment for each batch of messages + * @group_id: Group ID applicable + * @group_inst: Group instance number - increment when group is reused + * @members: Array of 32-bit words listing all active group members + */ +struct nfp_flower_cmsg_lag_config { + u8 ctrl_flags; + u8 reserved[2]; + u8 ttl; + __be32 pkt_number; + __be32 batch_ver; + __be32 group_id; + __be32 group_inst; + __be32 members[]; +}; + +/** + * struct nfp_fl_lag_group - list entry for each LAG group + * @group_id: Assigned group ID for host/kernel sync + * @group_inst: Group instance in case of ID reuse + * @list: List entry + * @master_ndev: Group master Netdev + * @dirty: Marked if the group needs synced to HW + * @offloaded: Marked if the group is currently offloaded to NIC + * @to_remove: Marked if the group should be removed from NIC + * @to_destroy: Marked if the group should be removed from driver + * @slave_cnt: Number of slaves in group + */ +struct nfp_fl_lag_group { + unsigned int group_id; + u8 group_inst; + struct list_head list; + struct net_device *master_ndev; + bool dirty; + bool offloaded; + bool to_remove; + bool to_destroy; + unsigned int slave_cnt; +}; + +#define NFP_FL_LAG_PKT_NUMBER_MASK GENMASK(30, 0) +#define NFP_FL_LAG_VERSION_MASK GENMASK(22, 0) +#define NFP_FL_LAG_HOST_TTL 0xff + +/* Use this ID with zero members to ack a batch config */ +#define NFP_FL_LAG_SYNC_ID 0 +#define NFP_FL_LAG_GROUP_MIN 1 /* ID 0 reserved */ +#define NFP_FL_LAG_GROUP_MAX 32 /* IDs 1 to 31 are valid */ + +/* wait for more config */ +#define NFP_FL_LAG_DELAY (msecs_to_jiffies(2)) + +#define NFP_FL_LAG_RETRANS_LIMIT 100 /* max retrans cmsgs to store */ + +static unsigned int nfp_fl_get_next_pkt_number(struct nfp_fl_lag *lag) +{ + lag->pkt_num++; + lag->pkt_num &= NFP_FL_LAG_PKT_NUMBER_MASK; + + return lag->pkt_num; +} + +static void nfp_fl_increment_version(struct nfp_fl_lag *lag) +{ + /* LSB is not considered by firmware so add 2 for each increment. */ + lag->batch_ver += 2; + lag->batch_ver &= NFP_FL_LAG_VERSION_MASK; + + /* Zero is reserved by firmware. */ + if (!lag->batch_ver) + lag->batch_ver += 2; +} + +static struct nfp_fl_lag_group * +nfp_fl_lag_group_create(struct nfp_fl_lag *lag, struct net_device *master) +{ + struct nfp_fl_lag_group *group; + struct nfp_flower_priv *priv; + int id; + + priv = container_of(lag, struct nfp_flower_priv, nfp_lag); + + id = ida_simple_get(&lag->ida_handle, NFP_FL_LAG_GROUP_MIN, + NFP_FL_LAG_GROUP_MAX, GFP_KERNEL); + if (id < 0) { + nfp_flower_cmsg_warn(priv->app, + "No more bonding groups available\n"); + return ERR_PTR(id); + } + + group = kmalloc(sizeof(*group), GFP_KERNEL); + if (!group) { + ida_simple_remove(&lag->ida_handle, id); + return ERR_PTR(-ENOMEM); + } + + group->group_id = id; + group->master_ndev = master; + group->dirty = true; + group->offloaded = false; + group->to_remove = false; + group->to_destroy = false; + group->slave_cnt = 0; + group->group_inst = ++lag->global_inst; + list_add_tail(&group->list, &lag->group_list); + + return group; +} + +static struct nfp_fl_lag_group * +nfp_fl_lag_find_group_for_master_with_lag(struct nfp_fl_lag *lag, + struct net_device *master) +{ + struct nfp_fl_lag_group *entry; + + if (!master) + return NULL; + + list_for_each_entry(entry, &lag->group_list, list) + if (entry->master_ndev == master) + return entry; + + return NULL; +} + +static int nfp_fl_lag_get_group_info(struct nfp_app *app, + struct net_device *netdev, + __be16 *group_id, + u8 *batch_ver, + u8 *group_inst) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_lag_group *group = NULL; + __be32 temp_vers; + + mutex_lock(&priv->nfp_lag.lock); + group = nfp_fl_lag_find_group_for_master_with_lag(&priv->nfp_lag, + netdev); + if (!group) { + mutex_unlock(&priv->nfp_lag.lock); + return -ENOENT; + } + + if (group_id) + *group_id = cpu_to_be16(group->group_id); + + if (batch_ver) { + temp_vers = cpu_to_be32(priv->nfp_lag.batch_ver << + NFP_FL_PRE_LAG_VER_OFF); + memcpy(batch_ver, &temp_vers, 3); + } + + if (group_inst) + *group_inst = group->group_inst; + + mutex_unlock(&priv->nfp_lag.lock); + + return 0; +} + +int nfp_flower_lag_populate_pre_action(struct nfp_app *app, + struct net_device *master, + struct nfp_fl_pre_lag *pre_act, + struct netlink_ext_ack *extack) +{ + if (nfp_fl_lag_get_group_info(app, master, &pre_act->group_id, + pre_act->lag_version, + &pre_act->instance)) { + NL_SET_ERR_MSG_MOD(extack, "invalid entry: group does not exist for LAG action"); + return -ENOENT; + } + + return 0; +} + +void nfp_flower_lag_get_info_from_netdev(struct nfp_app *app, + struct net_device *netdev, + struct nfp_tun_neigh_lag *lag) +{ + nfp_fl_lag_get_group_info(app, netdev, NULL, + lag->lag_version, &lag->lag_instance); +} + +int nfp_flower_lag_get_output_id(struct nfp_app *app, struct net_device *master) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_lag_group *group = NULL; + int group_id = -ENOENT; + + mutex_lock(&priv->nfp_lag.lock); + group = nfp_fl_lag_find_group_for_master_with_lag(&priv->nfp_lag, + master); + if (group) + group_id = group->group_id; + mutex_unlock(&priv->nfp_lag.lock); + + return group_id; +} + +static int +nfp_fl_lag_config_group(struct nfp_fl_lag *lag, struct nfp_fl_lag_group *group, + struct net_device **active_members, + unsigned int member_cnt, enum nfp_fl_lag_batch *batch) +{ + struct nfp_flower_cmsg_lag_config *cmsg_payload; + struct nfp_flower_priv *priv; + unsigned long int flags; + unsigned int size, i; + struct sk_buff *skb; + + priv = container_of(lag, struct nfp_flower_priv, nfp_lag); + size = sizeof(*cmsg_payload) + sizeof(__be32) * member_cnt; + skb = nfp_flower_cmsg_alloc(priv->app, size, + NFP_FLOWER_CMSG_TYPE_LAG_CONFIG, + GFP_KERNEL); + if (!skb) + return -ENOMEM; + + cmsg_payload = nfp_flower_cmsg_get_data(skb); + flags = 0; + + /* Increment batch version for each new batch of config messages. */ + if (*batch == NFP_FL_LAG_BATCH_FIRST) { + flags |= NFP_FL_LAG_FIRST; + nfp_fl_increment_version(lag); + *batch = NFP_FL_LAG_BATCH_MEMBER; + } + + /* If it is a reset msg then it is also the end of the batch. */ + if (lag->rst_cfg) { + flags |= NFP_FL_LAG_RESET; + *batch = NFP_FL_LAG_BATCH_FINISHED; + } + + /* To signal the end of a batch, both the switch and last flags are set + * and the reserved SYNC group ID is used. + */ + if (*batch == NFP_FL_LAG_BATCH_FINISHED) { + flags |= NFP_FL_LAG_SWITCH | NFP_FL_LAG_LAST; + lag->rst_cfg = false; + cmsg_payload->group_id = cpu_to_be32(NFP_FL_LAG_SYNC_ID); + cmsg_payload->group_inst = 0; + } else { + cmsg_payload->group_id = cpu_to_be32(group->group_id); + cmsg_payload->group_inst = cpu_to_be32(group->group_inst); + } + + cmsg_payload->reserved[0] = 0; + cmsg_payload->reserved[1] = 0; + cmsg_payload->ttl = NFP_FL_LAG_HOST_TTL; + cmsg_payload->ctrl_flags = flags; + cmsg_payload->batch_ver = cpu_to_be32(lag->batch_ver); + cmsg_payload->pkt_number = cpu_to_be32(nfp_fl_get_next_pkt_number(lag)); + + for (i = 0; i < member_cnt; i++) + cmsg_payload->members[i] = + cpu_to_be32(nfp_repr_get_port_id(active_members[i])); + + nfp_ctrl_tx(priv->app->ctrl, skb); + return 0; +} + +static void nfp_fl_lag_do_work(struct work_struct *work) +{ + enum nfp_fl_lag_batch batch = NFP_FL_LAG_BATCH_FIRST; + struct nfp_fl_lag_group *entry, *storage; + struct delayed_work *delayed_work; + struct nfp_flower_priv *priv; + struct nfp_fl_lag *lag; + int err; + + delayed_work = to_delayed_work(work); + lag = container_of(delayed_work, struct nfp_fl_lag, work); + priv = container_of(lag, struct nfp_flower_priv, nfp_lag); + + mutex_lock(&lag->lock); + list_for_each_entry_safe(entry, storage, &lag->group_list, list) { + struct net_device *iter_netdev, **acti_netdevs; + struct nfp_flower_repr_priv *repr_priv; + int active_count = 0, slaves = 0; + struct nfp_repr *repr; + unsigned long *flags; + + if (entry->to_remove) { + /* Active count of 0 deletes group on hw. */ + err = nfp_fl_lag_config_group(lag, entry, NULL, 0, + &batch); + if (!err) { + entry->to_remove = false; + entry->offloaded = false; + } else { + nfp_flower_cmsg_warn(priv->app, + "group delete failed\n"); + schedule_delayed_work(&lag->work, + NFP_FL_LAG_DELAY); + continue; + } + + if (entry->to_destroy) { + ida_simple_remove(&lag->ida_handle, + entry->group_id); + list_del(&entry->list); + kfree(entry); + } + continue; + } + + acti_netdevs = kmalloc_array(entry->slave_cnt, + sizeof(*acti_netdevs), GFP_KERNEL); + + /* Include sanity check in the loop. It may be that a bond has + * changed between processing the last notification and the + * work queue triggering. If the number of slaves has changed + * or it now contains netdevs that cannot be offloaded, ignore + * the group until pending notifications are processed. + */ + rcu_read_lock(); + for_each_netdev_in_bond_rcu(entry->master_ndev, iter_netdev) { + if (!nfp_netdev_is_nfp_repr(iter_netdev)) { + slaves = 0; + break; + } + + repr = netdev_priv(iter_netdev); + + if (repr->app != priv->app) { + slaves = 0; + break; + } + + slaves++; + if (slaves > entry->slave_cnt) + break; + + /* Check the ports for state changes. */ + repr_priv = repr->app_priv; + flags = &repr_priv->lag_port_flags; + + if (*flags & NFP_PORT_LAG_CHANGED) { + *flags &= ~NFP_PORT_LAG_CHANGED; + entry->dirty = true; + } + + if ((*flags & NFP_PORT_LAG_TX_ENABLED) && + (*flags & NFP_PORT_LAG_LINK_UP)) + acti_netdevs[active_count++] = iter_netdev; + } + rcu_read_unlock(); + + if (slaves != entry->slave_cnt || !entry->dirty) { + kfree(acti_netdevs); + continue; + } + + err = nfp_fl_lag_config_group(lag, entry, acti_netdevs, + active_count, &batch); + if (!err) { + entry->offloaded = true; + entry->dirty = false; + } else { + nfp_flower_cmsg_warn(priv->app, + "group offload failed\n"); + schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY); + } + + kfree(acti_netdevs); + } + + /* End the config batch if at least one packet has been batched. */ + if (batch == NFP_FL_LAG_BATCH_MEMBER) { + batch = NFP_FL_LAG_BATCH_FINISHED; + err = nfp_fl_lag_config_group(lag, NULL, NULL, 0, &batch); + if (err) + nfp_flower_cmsg_warn(priv->app, + "group batch end cmsg failed\n"); + } + + mutex_unlock(&lag->lock); +} + +static int +nfp_fl_lag_put_unprocessed(struct nfp_fl_lag *lag, struct sk_buff *skb) +{ + struct nfp_flower_cmsg_lag_config *cmsg_payload; + + cmsg_payload = nfp_flower_cmsg_get_data(skb); + if (be32_to_cpu(cmsg_payload->group_id) >= NFP_FL_LAG_GROUP_MAX) + return -EINVAL; + + /* Drop cmsg retrans if storage limit is exceeded to prevent + * overloading. If the fw notices that expected messages have not been + * received in a given time block, it will request a full resync. + */ + if (skb_queue_len(&lag->retrans_skbs) >= NFP_FL_LAG_RETRANS_LIMIT) + return -ENOSPC; + + __skb_queue_tail(&lag->retrans_skbs, skb); + + return 0; +} + +static void nfp_fl_send_unprocessed(struct nfp_fl_lag *lag) +{ + struct nfp_flower_priv *priv; + struct sk_buff *skb; + + priv = container_of(lag, struct nfp_flower_priv, nfp_lag); + + while ((skb = __skb_dequeue(&lag->retrans_skbs))) + nfp_ctrl_tx(priv->app->ctrl, skb); +} + +bool nfp_flower_lag_unprocessed_msg(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_flower_cmsg_lag_config *cmsg_payload; + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_lag_group *group_entry; + unsigned long int flags; + bool store_skb = false; + int err; + + cmsg_payload = nfp_flower_cmsg_get_data(skb); + flags = cmsg_payload->ctrl_flags; + + /* Note the intentional fall through below. If DATA and XON are both + * set, the message will stored and sent again with the rest of the + * unprocessed messages list. + */ + + /* Store */ + if (flags & NFP_FL_LAG_DATA) + if (!nfp_fl_lag_put_unprocessed(&priv->nfp_lag, skb)) + store_skb = true; + + /* Send stored */ + if (flags & NFP_FL_LAG_XON) + nfp_fl_send_unprocessed(&priv->nfp_lag); + + /* Resend all */ + if (flags & NFP_FL_LAG_SYNC) { + /* To resend all config: + * 1) Clear all unprocessed messages + * 2) Mark all groups dirty + * 3) Reset NFP group config + * 4) Schedule a LAG config update + */ + + __skb_queue_purge(&priv->nfp_lag.retrans_skbs); + + mutex_lock(&priv->nfp_lag.lock); + list_for_each_entry(group_entry, &priv->nfp_lag.group_list, + list) + group_entry->dirty = true; + + err = nfp_flower_lag_reset(&priv->nfp_lag); + if (err) + nfp_flower_cmsg_warn(priv->app, + "mem err in group reset msg\n"); + mutex_unlock(&priv->nfp_lag.lock); + + schedule_delayed_work(&priv->nfp_lag.work, 0); + } + + return store_skb; +} + +static void +nfp_fl_lag_schedule_group_remove(struct nfp_fl_lag *lag, + struct nfp_fl_lag_group *group) +{ + group->to_remove = true; + + schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY); +} + +static void +nfp_fl_lag_schedule_group_delete(struct nfp_fl_lag *lag, + struct net_device *master) +{ + struct nfp_fl_lag_group *group; + struct nfp_flower_priv *priv; + + priv = container_of(lag, struct nfp_flower_priv, nfp_lag); + + if (!netif_is_bond_master(master)) + return; + + mutex_lock(&lag->lock); + group = nfp_fl_lag_find_group_for_master_with_lag(lag, master); + if (!group) { + mutex_unlock(&lag->lock); + nfp_warn(priv->app->cpp, "untracked bond got unregistered %s\n", + netdev_name(master)); + return; + } + + group->to_remove = true; + group->to_destroy = true; + mutex_unlock(&lag->lock); + + schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY); +} + +static int +nfp_fl_lag_changeupper_event(struct nfp_fl_lag *lag, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *upper = info->upper_dev, *iter_netdev; + struct netdev_lag_upper_info *lag_upper_info; + struct nfp_fl_lag_group *group; + struct nfp_flower_priv *priv; + unsigned int slave_count = 0; + bool can_offload = true; + struct nfp_repr *repr; + + if (!netif_is_lag_master(upper)) + return 0; + + priv = container_of(lag, struct nfp_flower_priv, nfp_lag); + + rcu_read_lock(); + for_each_netdev_in_bond_rcu(upper, iter_netdev) { + if (!nfp_netdev_is_nfp_repr(iter_netdev)) { + can_offload = false; + break; + } + repr = netdev_priv(iter_netdev); + + /* Ensure all ports are created by the same app/on same card. */ + if (repr->app != priv->app) { + can_offload = false; + break; + } + + slave_count++; + } + rcu_read_unlock(); + + lag_upper_info = info->upper_info; + + /* Firmware supports active/backup and L3/L4 hash bonds. */ + if (lag_upper_info && + lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && + (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH || + (lag_upper_info->hash_type != NETDEV_LAG_HASH_L34 && + lag_upper_info->hash_type != NETDEV_LAG_HASH_E34 && + lag_upper_info->hash_type != NETDEV_LAG_HASH_UNKNOWN))) { + can_offload = false; + nfp_flower_cmsg_warn(priv->app, + "Unable to offload tx_type %u hash %u\n", + lag_upper_info->tx_type, + lag_upper_info->hash_type); + } + + mutex_lock(&lag->lock); + group = nfp_fl_lag_find_group_for_master_with_lag(lag, upper); + + if (slave_count == 0 || !can_offload) { + /* Cannot offload the group - remove if previously offloaded. */ + if (group && group->offloaded) + nfp_fl_lag_schedule_group_remove(lag, group); + + mutex_unlock(&lag->lock); + return 0; + } + + if (!group) { + group = nfp_fl_lag_group_create(lag, upper); + if (IS_ERR(group)) { + mutex_unlock(&lag->lock); + return PTR_ERR(group); + } + } + + group->dirty = true; + group->slave_cnt = slave_count; + + /* Group may have been on queue for removal but is now offloadable. */ + group->to_remove = false; + mutex_unlock(&lag->lock); + + schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY); + return 0; +} + +static void +nfp_fl_lag_changels_event(struct nfp_fl_lag *lag, struct net_device *netdev, + struct netdev_notifier_changelowerstate_info *info) +{ + struct netdev_lag_lower_state_info *lag_lower_info; + struct nfp_flower_repr_priv *repr_priv; + struct nfp_flower_priv *priv; + struct nfp_repr *repr; + unsigned long *flags; + + if (!netif_is_lag_port(netdev) || !nfp_netdev_is_nfp_repr(netdev)) + return; + + lag_lower_info = info->lower_state_info; + if (!lag_lower_info) + return; + + priv = container_of(lag, struct nfp_flower_priv, nfp_lag); + repr = netdev_priv(netdev); + + /* Verify that the repr is associated with this app. */ + if (repr->app != priv->app) + return; + + repr_priv = repr->app_priv; + flags = &repr_priv->lag_port_flags; + + mutex_lock(&lag->lock); + if (lag_lower_info->link_up) + *flags |= NFP_PORT_LAG_LINK_UP; + else + *flags &= ~NFP_PORT_LAG_LINK_UP; + + if (lag_lower_info->tx_enabled) + *flags |= NFP_PORT_LAG_TX_ENABLED; + else + *flags &= ~NFP_PORT_LAG_TX_ENABLED; + + *flags |= NFP_PORT_LAG_CHANGED; + mutex_unlock(&lag->lock); + + schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY); +} + +int nfp_flower_lag_netdev_event(struct nfp_flower_priv *priv, + struct net_device *netdev, + unsigned long event, void *ptr) +{ + struct nfp_fl_lag *lag = &priv->nfp_lag; + int err; + + switch (event) { + case NETDEV_CHANGEUPPER: + err = nfp_fl_lag_changeupper_event(lag, ptr); + if (err) + return NOTIFY_BAD; + return NOTIFY_OK; + case NETDEV_CHANGELOWERSTATE: + nfp_fl_lag_changels_event(lag, netdev, ptr); + return NOTIFY_OK; + case NETDEV_UNREGISTER: + nfp_fl_lag_schedule_group_delete(lag, netdev); + return NOTIFY_OK; + } + + return NOTIFY_DONE; +} + +int nfp_flower_lag_reset(struct nfp_fl_lag *lag) +{ + enum nfp_fl_lag_batch batch = NFP_FL_LAG_BATCH_FIRST; + + lag->rst_cfg = true; + return nfp_fl_lag_config_group(lag, NULL, NULL, 0, &batch); +} + +void nfp_flower_lag_init(struct nfp_fl_lag *lag) +{ + INIT_DELAYED_WORK(&lag->work, nfp_fl_lag_do_work); + INIT_LIST_HEAD(&lag->group_list); + mutex_init(&lag->lock); + ida_init(&lag->ida_handle); + + __skb_queue_head_init(&lag->retrans_skbs); + + /* 0 is a reserved batch version so increment to first valid value. */ + nfp_fl_increment_version(lag); +} + +void nfp_flower_lag_cleanup(struct nfp_fl_lag *lag) +{ + struct nfp_fl_lag_group *entry, *storage; + + cancel_delayed_work_sync(&lag->work); + + __skb_queue_purge(&lag->retrans_skbs); + + /* Remove all groups. */ + mutex_lock(&lag->lock); + list_for_each_entry_safe(entry, storage, &lag->group_list, list) { + list_del(&entry->list); + kfree(entry); + } + mutex_unlock(&lag->lock); + mutex_destroy(&lag->lock); + ida_destroy(&lag->ida_handle); +} diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c new file mode 100644 index 0000000000..83eaa5ae3c --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -0,0 +1,1032 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#include <linux/etherdevice.h> +#include <linux/lockdep.h> +#include <linux/pci.h> +#include <linux/skbuff.h> +#include <linux/vmalloc.h> +#include <net/devlink.h> +#include <net/dst_metadata.h> + +#include "main.h" +#include "../nfpcore/nfp_cpp.h" +#include "../nfpcore/nfp_nffw.h" +#include "../nfpcore/nfp_nsp.h" +#include "../nfp_app.h" +#include "../nfp_main.h" +#include "../nfp_net.h" +#include "../nfp_net_repr.h" +#include "../nfp_port.h" +#include "./cmsg.h" + +#define NFP_FLOWER_ALLOWED_VER 0x0001000000010000UL + +#define NFP_MIN_INT_PORT_ID 1 +#define NFP_MAX_INT_PORT_ID 256 + +static const char *nfp_flower_extra_cap(struct nfp_app *app, struct nfp_net *nn) +{ + return "FLOWER"; +} + +static enum devlink_eswitch_mode eswitch_mode_get(struct nfp_app *app) +{ + return DEVLINK_ESWITCH_MODE_SWITCHDEV; +} + +static int +nfp_flower_lookup_internal_port_id(struct nfp_flower_priv *priv, + struct net_device *netdev) +{ + struct net_device *entry; + int i, id = 0; + + rcu_read_lock(); + idr_for_each_entry(&priv->internal_ports.port_ids, entry, i) + if (entry == netdev) { + id = i; + break; + } + rcu_read_unlock(); + + return id; +} + +static int +nfp_flower_get_internal_port_id(struct nfp_app *app, struct net_device *netdev) +{ + struct nfp_flower_priv *priv = app->priv; + int id; + + id = nfp_flower_lookup_internal_port_id(priv, netdev); + if (id > 0) + return id; + + idr_preload(GFP_ATOMIC); + spin_lock_bh(&priv->internal_ports.lock); + id = idr_alloc(&priv->internal_ports.port_ids, netdev, + NFP_MIN_INT_PORT_ID, NFP_MAX_INT_PORT_ID, GFP_ATOMIC); + spin_unlock_bh(&priv->internal_ports.lock); + idr_preload_end(); + + return id; +} + +u32 nfp_flower_get_port_id_from_netdev(struct nfp_app *app, + struct net_device *netdev) +{ + struct nfp_flower_priv *priv = app->priv; + int ext_port; + int gid; + + if (nfp_netdev_is_nfp_repr(netdev)) { + return nfp_repr_get_port_id(netdev); + } else if (nfp_flower_internal_port_can_offload(app, netdev)) { + ext_port = nfp_flower_get_internal_port_id(app, netdev); + if (ext_port < 0) + return 0; + + return nfp_flower_internal_port_get_port_id(ext_port); + } else if (netif_is_lag_master(netdev) && + priv->flower_ext_feats & NFP_FL_FEATS_TUNNEL_NEIGH_LAG) { + gid = nfp_flower_lag_get_output_id(app, netdev); + if (gid < 0) + return 0; + + return (NFP_FL_LAG_OUT | gid); + } + + return 0; +} + +static struct net_device * +nfp_flower_get_netdev_from_internal_port_id(struct nfp_app *app, int port_id) +{ + struct nfp_flower_priv *priv = app->priv; + struct net_device *netdev; + + rcu_read_lock(); + netdev = idr_find(&priv->internal_ports.port_ids, port_id); + rcu_read_unlock(); + + return netdev; +} + +static void +nfp_flower_free_internal_port_id(struct nfp_app *app, struct net_device *netdev) +{ + struct nfp_flower_priv *priv = app->priv; + int id; + + id = nfp_flower_lookup_internal_port_id(priv, netdev); + if (!id) + return; + + spin_lock_bh(&priv->internal_ports.lock); + idr_remove(&priv->internal_ports.port_ids, id); + spin_unlock_bh(&priv->internal_ports.lock); +} + +static int +nfp_flower_internal_port_event_handler(struct nfp_app *app, + struct net_device *netdev, + unsigned long event) +{ + if (event == NETDEV_UNREGISTER && + nfp_flower_internal_port_can_offload(app, netdev)) + nfp_flower_free_internal_port_id(app, netdev); + + return NOTIFY_OK; +} + +static void nfp_flower_internal_port_init(struct nfp_flower_priv *priv) +{ + spin_lock_init(&priv->internal_ports.lock); + idr_init(&priv->internal_ports.port_ids); +} + +static void nfp_flower_internal_port_cleanup(struct nfp_flower_priv *priv) +{ + idr_destroy(&priv->internal_ports.port_ids); +} + +static struct nfp_flower_non_repr_priv * +nfp_flower_non_repr_priv_lookup(struct nfp_app *app, struct net_device *netdev) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_flower_non_repr_priv *entry; + + ASSERT_RTNL(); + + list_for_each_entry(entry, &priv->non_repr_priv, list) + if (entry->netdev == netdev) + return entry; + + return NULL; +} + +void +__nfp_flower_non_repr_priv_get(struct nfp_flower_non_repr_priv *non_repr_priv) +{ + non_repr_priv->ref_count++; +} + +struct nfp_flower_non_repr_priv * +nfp_flower_non_repr_priv_get(struct nfp_app *app, struct net_device *netdev) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_flower_non_repr_priv *entry; + + entry = nfp_flower_non_repr_priv_lookup(app, netdev); + if (entry) + goto inc_ref; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return NULL; + + entry->netdev = netdev; + list_add(&entry->list, &priv->non_repr_priv); + +inc_ref: + __nfp_flower_non_repr_priv_get(entry); + return entry; +} + +void +__nfp_flower_non_repr_priv_put(struct nfp_flower_non_repr_priv *non_repr_priv) +{ + if (--non_repr_priv->ref_count) + return; + + list_del(&non_repr_priv->list); + kfree(non_repr_priv); +} + +void +nfp_flower_non_repr_priv_put(struct nfp_app *app, struct net_device *netdev) +{ + struct nfp_flower_non_repr_priv *entry; + + entry = nfp_flower_non_repr_priv_lookup(app, netdev); + if (!entry) + return; + + __nfp_flower_non_repr_priv_put(entry); +} + +static enum nfp_repr_type +nfp_flower_repr_get_type_and_port(struct nfp_app *app, u32 port_id, u8 *port) +{ + switch (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port_id)) { + case NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT: + *port = FIELD_GET(NFP_FLOWER_CMSG_PORT_PHYS_PORT_NUM, + port_id); + return NFP_REPR_TYPE_PHYS_PORT; + + case NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT: + *port = FIELD_GET(NFP_FLOWER_CMSG_PORT_VNIC, port_id); + if (FIELD_GET(NFP_FLOWER_CMSG_PORT_VNIC_TYPE, port_id) == + NFP_FLOWER_CMSG_PORT_VNIC_TYPE_PF) + return NFP_REPR_TYPE_PF; + else + return NFP_REPR_TYPE_VF; + } + + return __NFP_REPR_TYPE_MAX; +} + +static struct net_device * +nfp_flower_dev_get(struct nfp_app *app, u32 port_id, bool *redir_egress) +{ + enum nfp_repr_type repr_type; + struct nfp_reprs *reprs; + u8 port = 0; + + /* Check if the port is internal. */ + if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port_id) == + NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT) { + if (redir_egress) + *redir_egress = true; + port = FIELD_GET(NFP_FLOWER_CMSG_PORT_PHYS_PORT_NUM, port_id); + return nfp_flower_get_netdev_from_internal_port_id(app, port); + } + + repr_type = nfp_flower_repr_get_type_and_port(app, port_id, &port); + if (repr_type > NFP_REPR_TYPE_MAX) + return NULL; + + reprs = rcu_dereference(app->reprs[repr_type]); + if (!reprs) + return NULL; + + if (port >= reprs->num_reprs) + return NULL; + + return rcu_dereference(reprs->reprs[port]); +} + +static int +nfp_flower_reprs_reify(struct nfp_app *app, enum nfp_repr_type type, + bool exists) +{ + struct nfp_reprs *reprs; + int i, err, count = 0; + + reprs = rcu_dereference_protected(app->reprs[type], + nfp_app_is_locked(app)); + if (!reprs) + return 0; + + for (i = 0; i < reprs->num_reprs; i++) { + struct net_device *netdev; + + netdev = nfp_repr_get_locked(app, reprs, i); + if (netdev) { + struct nfp_repr *repr = netdev_priv(netdev); + + err = nfp_flower_cmsg_portreify(repr, exists); + if (err) + return err; + count++; + } + } + + return count; +} + +static int +nfp_flower_wait_repr_reify(struct nfp_app *app, atomic_t *replies, int tot_repl) +{ + struct nfp_flower_priv *priv = app->priv; + + if (!tot_repl) + return 0; + + assert_nfp_app_locked(app); + if (!wait_event_timeout(priv->reify_wait_queue, + atomic_read(replies) >= tot_repl, + NFP_FL_REPLY_TIMEOUT)) { + nfp_warn(app->cpp, "Not all reprs responded to reify\n"); + return -EIO; + } + + return 0; +} + +static int +nfp_flower_repr_netdev_open(struct nfp_app *app, struct nfp_repr *repr) +{ + int err; + + err = nfp_flower_cmsg_portmod(repr, true, repr->netdev->mtu, false); + if (err) + return err; + + netif_tx_wake_all_queues(repr->netdev); + + return 0; +} + +static int +nfp_flower_repr_netdev_stop(struct nfp_app *app, struct nfp_repr *repr) +{ + netif_tx_disable(repr->netdev); + + return nfp_flower_cmsg_portmod(repr, false, repr->netdev->mtu, false); +} + +static void +nfp_flower_repr_netdev_clean(struct nfp_app *app, struct net_device *netdev) +{ + struct nfp_repr *repr = netdev_priv(netdev); + + kfree(repr->app_priv); +} + +static void +nfp_flower_repr_netdev_preclean(struct nfp_app *app, struct net_device *netdev) +{ + struct nfp_repr *repr = netdev_priv(netdev); + struct nfp_flower_priv *priv = app->priv; + atomic_t *replies = &priv->reify_replies; + int err; + + atomic_set(replies, 0); + err = nfp_flower_cmsg_portreify(repr, false); + if (err) { + nfp_warn(app->cpp, "Failed to notify firmware about repr destruction\n"); + return; + } + + nfp_flower_wait_repr_reify(app, replies, 1); +} + +static void nfp_flower_sriov_disable(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + + if (!priv->nn) + return; + + nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_VF); +} + +static int +nfp_flower_spawn_vnic_reprs(struct nfp_app *app, + enum nfp_flower_cmsg_port_vnic_type vnic_type, + enum nfp_repr_type repr_type, unsigned int cnt) +{ + u8 nfp_pcie = nfp_cppcore_pcie_unit(app->pf->cpp); + struct nfp_flower_priv *priv = app->priv; + atomic_t *replies = &priv->reify_replies; + struct nfp_flower_repr_priv *repr_priv; + enum nfp_port_type port_type; + struct nfp_repr *nfp_repr; + struct nfp_reprs *reprs; + int i, err, reify_cnt; + const u8 queue = 0; + + port_type = repr_type == NFP_REPR_TYPE_PF ? NFP_PORT_PF_PORT : + NFP_PORT_VF_PORT; + + reprs = nfp_reprs_alloc(cnt); + if (!reprs) + return -ENOMEM; + + for (i = 0; i < cnt; i++) { + struct net_device *repr; + struct nfp_port *port; + u32 port_id; + + repr = nfp_repr_alloc(app); + if (!repr) { + err = -ENOMEM; + goto err_reprs_clean; + } + + repr_priv = kzalloc(sizeof(*repr_priv), GFP_KERNEL); + if (!repr_priv) { + err = -ENOMEM; + nfp_repr_free(repr); + goto err_reprs_clean; + } + + nfp_repr = netdev_priv(repr); + nfp_repr->app_priv = repr_priv; + repr_priv->nfp_repr = nfp_repr; + + /* For now we only support 1 PF */ + WARN_ON(repr_type == NFP_REPR_TYPE_PF && i); + + port = nfp_port_alloc(app, port_type, repr); + if (IS_ERR(port)) { + err = PTR_ERR(port); + kfree(repr_priv); + nfp_repr_free(repr); + goto err_reprs_clean; + } + if (repr_type == NFP_REPR_TYPE_PF) { + port->pf_id = i; + port->vnic = priv->nn->dp.ctrl_bar; + } else { + port->pf_id = 0; + port->vf_id = i; + port->vnic = + app->pf->vf_cfg_mem + i * NFP_NET_CFG_BAR_SZ; + } + + eth_hw_addr_random(repr); + + port_id = nfp_flower_cmsg_pcie_port(nfp_pcie, vnic_type, + i, queue); + err = nfp_repr_init(app, repr, + port_id, port, priv->nn->dp.netdev); + if (err) { + kfree(repr_priv); + nfp_port_free(port); + nfp_repr_free(repr); + goto err_reprs_clean; + } + + RCU_INIT_POINTER(reprs->reprs[i], repr); + nfp_info(app->cpp, "%s%d Representor(%s) created\n", + repr_type == NFP_REPR_TYPE_PF ? "PF" : "VF", i, + repr->name); + } + + nfp_app_reprs_set(app, repr_type, reprs); + + atomic_set(replies, 0); + reify_cnt = nfp_flower_reprs_reify(app, repr_type, true); + if (reify_cnt < 0) { + err = reify_cnt; + nfp_warn(app->cpp, "Failed to notify firmware about repr creation\n"); + goto err_reprs_remove; + } + + err = nfp_flower_wait_repr_reify(app, replies, reify_cnt); + if (err) + goto err_reprs_remove; + + return 0; +err_reprs_remove: + reprs = nfp_app_reprs_set(app, repr_type, NULL); +err_reprs_clean: + nfp_reprs_clean_and_free(app, reprs); + return err; +} + +static int nfp_flower_sriov_enable(struct nfp_app *app, int num_vfs) +{ + struct nfp_flower_priv *priv = app->priv; + + if (!priv->nn) + return 0; + + return nfp_flower_spawn_vnic_reprs(app, + NFP_FLOWER_CMSG_PORT_VNIC_TYPE_VF, + NFP_REPR_TYPE_VF, num_vfs); +} + +static int +nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv) +{ + struct nfp_eth_table *eth_tbl = app->pf->eth_tbl; + atomic_t *replies = &priv->reify_replies; + struct nfp_flower_repr_priv *repr_priv; + struct nfp_repr *nfp_repr; + struct sk_buff *ctrl_skb; + struct nfp_reprs *reprs; + int err, reify_cnt; + unsigned int i; + + ctrl_skb = nfp_flower_cmsg_mac_repr_start(app, eth_tbl->count); + if (!ctrl_skb) + return -ENOMEM; + + reprs = nfp_reprs_alloc(eth_tbl->max_index + 1); + if (!reprs) { + err = -ENOMEM; + goto err_free_ctrl_skb; + } + + for (i = 0; i < eth_tbl->count; i++) { + unsigned int phys_port = eth_tbl->ports[i].index; + struct net_device *repr; + struct nfp_port *port; + u32 cmsg_port_id; + + repr = nfp_repr_alloc(app); + if (!repr) { + err = -ENOMEM; + goto err_reprs_clean; + } + + repr_priv = kzalloc(sizeof(*repr_priv), GFP_KERNEL); + if (!repr_priv) { + err = -ENOMEM; + nfp_repr_free(repr); + goto err_reprs_clean; + } + + nfp_repr = netdev_priv(repr); + nfp_repr->app_priv = repr_priv; + repr_priv->nfp_repr = nfp_repr; + + port = nfp_port_alloc(app, NFP_PORT_PHYS_PORT, repr); + if (IS_ERR(port)) { + err = PTR_ERR(port); + kfree(repr_priv); + nfp_repr_free(repr); + goto err_reprs_clean; + } + err = nfp_port_init_phy_port(app->pf, app, port, i); + if (err) { + kfree(repr_priv); + nfp_port_free(port); + nfp_repr_free(repr); + goto err_reprs_clean; + } + + SET_NETDEV_DEV(repr, &priv->nn->pdev->dev); + nfp_net_get_mac_addr(app->pf, repr, port); + + cmsg_port_id = nfp_flower_cmsg_phys_port(phys_port); + err = nfp_repr_init(app, repr, + cmsg_port_id, port, priv->nn->dp.netdev); + if (err) { + kfree(repr_priv); + nfp_port_free(port); + nfp_repr_free(repr); + goto err_reprs_clean; + } + + nfp_flower_cmsg_mac_repr_add(ctrl_skb, i, + eth_tbl->ports[i].nbi, + eth_tbl->ports[i].base, + phys_port); + + RCU_INIT_POINTER(reprs->reprs[phys_port], repr); + nfp_info(app->cpp, "Phys Port %d Representor(%s) created\n", + phys_port, repr->name); + } + + nfp_app_reprs_set(app, NFP_REPR_TYPE_PHYS_PORT, reprs); + + /* The REIFY/MAC_REPR control messages should be sent after the MAC + * representors are registered using nfp_app_reprs_set(). This is + * because the firmware may respond with control messages for the + * MAC representors, f.e. to provide the driver with information + * about their state, and without registration the driver will drop + * any such messages. + */ + atomic_set(replies, 0); + reify_cnt = nfp_flower_reprs_reify(app, NFP_REPR_TYPE_PHYS_PORT, true); + if (reify_cnt < 0) { + err = reify_cnt; + nfp_warn(app->cpp, "Failed to notify firmware about repr creation\n"); + goto err_reprs_remove; + } + + err = nfp_flower_wait_repr_reify(app, replies, reify_cnt); + if (err) + goto err_reprs_remove; + + nfp_ctrl_tx(app->ctrl, ctrl_skb); + + return 0; +err_reprs_remove: + reprs = nfp_app_reprs_set(app, NFP_REPR_TYPE_PHYS_PORT, NULL); +err_reprs_clean: + nfp_reprs_clean_and_free(app, reprs); +err_free_ctrl_skb: + kfree_skb(ctrl_skb); + return err; +} + +static int nfp_flower_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, + unsigned int id) +{ + if (id > 0) { + nfp_warn(app->cpp, "FlowerNIC doesn't support more than one data vNIC\n"); + goto err_invalid_port; + } + + eth_hw_addr_random(nn->dp.netdev); + netif_keep_dst(nn->dp.netdev); + nn->vnic_no_name = true; + + return 0; + +err_invalid_port: + nn->port = nfp_port_alloc(app, NFP_PORT_INVALID, nn->dp.netdev); + return PTR_ERR_OR_ZERO(nn->port); +} + +static void nfp_flower_vnic_clean(struct nfp_app *app, struct nfp_net *nn) +{ + struct nfp_flower_priv *priv = app->priv; + + if (app->pf->num_vfs) + nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_VF); + nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PF); + nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PHYS_PORT); + + priv->nn = NULL; +} + +static int nfp_flower_vnic_init(struct nfp_app *app, struct nfp_net *nn) +{ + struct nfp_flower_priv *priv = app->priv; + int err; + + priv->nn = nn; + + err = nfp_flower_spawn_phy_reprs(app, app->priv); + if (err) + goto err_clear_nn; + + err = nfp_flower_spawn_vnic_reprs(app, + NFP_FLOWER_CMSG_PORT_VNIC_TYPE_PF, + NFP_REPR_TYPE_PF, 1); + if (err) + goto err_destroy_reprs_phy; + + if (app->pf->num_vfs) { + err = nfp_flower_spawn_vnic_reprs(app, + NFP_FLOWER_CMSG_PORT_VNIC_TYPE_VF, + NFP_REPR_TYPE_VF, + app->pf->num_vfs); + if (err) + goto err_destroy_reprs_pf; + } + + return 0; + +err_destroy_reprs_pf: + nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PF); +err_destroy_reprs_phy: + nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PHYS_PORT); +err_clear_nn: + priv->nn = NULL; + return err; +} + +static void nfp_flower_wait_host_bit(struct nfp_app *app) +{ + unsigned long err_at; + u64 feat; + int err; + + /* Wait for HOST_ACK flag bit to propagate */ + err_at = jiffies + msecs_to_jiffies(100); + do { + feat = nfp_rtsym_read_le(app->pf->rtbl, + "_abi_flower_combined_features_global", + &err); + if (time_is_before_eq_jiffies(err_at)) { + nfp_warn(app->cpp, + "HOST_ACK bit not propagated in FW.\n"); + break; + } + usleep_range(1000, 2000); + } while (!err && !(feat & NFP_FL_FEATS_HOST_ACK)); + + if (err) + nfp_warn(app->cpp, + "Could not read global features entry from FW\n"); +} + +static int nfp_flower_sync_feature_bits(struct nfp_app *app) +{ + struct nfp_flower_priv *app_priv = app->priv; + int err; + + /* Tell the firmware of the host supported features. */ + err = nfp_rtsym_write_le(app->pf->rtbl, "_abi_flower_host_mask", + app_priv->flower_ext_feats | + NFP_FL_FEATS_HOST_ACK); + if (!err) + nfp_flower_wait_host_bit(app); + else if (err != -ENOENT) + return err; + + /* Tell the firmware that the driver supports lag. */ + err = nfp_rtsym_write_le(app->pf->rtbl, + "_abi_flower_balance_sync_enable", 1); + if (!err) { + app_priv->flower_en_feats |= NFP_FL_ENABLE_LAG; + nfp_flower_lag_init(&app_priv->nfp_lag); + } else if (err == -ENOENT) { + nfp_warn(app->cpp, "LAG not supported by FW.\n"); + } else { + return err; + } + + if (app_priv->flower_ext_feats & NFP_FL_FEATS_FLOW_MOD) { + /* Tell the firmware that the driver supports flow merging. */ + err = nfp_rtsym_write_le(app->pf->rtbl, + "_abi_flower_merge_hint_enable", 1); + if (!err) { + app_priv->flower_en_feats |= NFP_FL_ENABLE_FLOW_MERGE; + nfp_flower_internal_port_init(app_priv); + } else if (err == -ENOENT) { + nfp_warn(app->cpp, + "Flow merge not supported by FW.\n"); + } else { + return err; + } + } else { + nfp_warn(app->cpp, "Flow mod/merge not supported by FW.\n"); + } + + return 0; +} + +static int nfp_flower_init(struct nfp_app *app) +{ + u64 version, features, ctx_count, num_mems; + const struct nfp_pf *pf = app->pf; + struct nfp_flower_priv *app_priv; + int err; + + if (!pf->eth_tbl) { + nfp_warn(app->cpp, "FlowerNIC requires eth table\n"); + return -EINVAL; + } + + if (!pf->mac_stats_bar) { + nfp_warn(app->cpp, "FlowerNIC requires mac_stats BAR\n"); + return -EINVAL; + } + + if (!pf->vf_cfg_bar) { + nfp_warn(app->cpp, "FlowerNIC requires vf_cfg BAR\n"); + return -EINVAL; + } + + version = nfp_rtsym_read_le(app->pf->rtbl, "hw_flower_version", &err); + if (err) { + nfp_warn(app->cpp, "FlowerNIC requires hw_flower_version memory symbol\n"); + return err; + } + + num_mems = nfp_rtsym_read_le(app->pf->rtbl, "CONFIG_FC_HOST_CTX_SPLIT", + &err); + if (err) { + nfp_warn(app->cpp, + "FlowerNIC: unsupported host context memory: %d\n", + err); + err = 0; + num_mems = 1; + } + + if (!FIELD_FIT(NFP_FL_STAT_ID_MU_NUM, num_mems) || !num_mems) { + nfp_warn(app->cpp, + "FlowerNIC: invalid host context memory: %llu\n", + num_mems); + return -EINVAL; + } + + ctx_count = nfp_rtsym_read_le(app->pf->rtbl, "CONFIG_FC_HOST_CTX_COUNT", + &err); + if (err) { + nfp_warn(app->cpp, + "FlowerNIC: unsupported host context count: %d\n", + err); + err = 0; + ctx_count = BIT(17); + } + + /* We need to ensure hardware has enough flower capabilities. */ + if (version != NFP_FLOWER_ALLOWED_VER) { + nfp_warn(app->cpp, "FlowerNIC: unsupported firmware version\n"); + return -EINVAL; + } + + app_priv = vzalloc(sizeof(struct nfp_flower_priv)); + if (!app_priv) + return -ENOMEM; + + app_priv->total_mem_units = num_mems; + app_priv->active_mem_unit = 0; + app_priv->stats_ring_size = roundup_pow_of_two(ctx_count); + app->priv = app_priv; + app_priv->app = app; + skb_queue_head_init(&app_priv->cmsg_skbs_high); + skb_queue_head_init(&app_priv->cmsg_skbs_low); + INIT_WORK(&app_priv->cmsg_work, nfp_flower_cmsg_process_rx); + init_waitqueue_head(&app_priv->reify_wait_queue); + + init_waitqueue_head(&app_priv->mtu_conf.wait_q); + spin_lock_init(&app_priv->mtu_conf.lock); + + err = nfp_flower_metadata_init(app, ctx_count, num_mems); + if (err) + goto err_free_app_priv; + + /* Extract the extra features supported by the firmware. */ + features = nfp_rtsym_read_le(app->pf->rtbl, + "_abi_flower_extra_features", &err); + if (err) + app_priv->flower_ext_feats = 0; + else + app_priv->flower_ext_feats = features & NFP_FL_FEATS_HOST; + + err = nfp_flower_sync_feature_bits(app); + if (err) + goto err_cleanup; + + if (app_priv->flower_ext_feats & NFP_FL_FEATS_VF_RLIM) + nfp_flower_qos_init(app); + + INIT_LIST_HEAD(&app_priv->indr_block_cb_priv); + INIT_LIST_HEAD(&app_priv->non_repr_priv); + app_priv->pre_tun_rule_cnt = 0; + + return 0; + +err_cleanup: + if (app_priv->flower_en_feats & NFP_FL_ENABLE_LAG) + nfp_flower_lag_cleanup(&app_priv->nfp_lag); + nfp_flower_metadata_cleanup(app); +err_free_app_priv: + vfree(app->priv); + return err; +} + +static void nfp_flower_clean(struct nfp_app *app) +{ + struct nfp_flower_priv *app_priv = app->priv; + + skb_queue_purge(&app_priv->cmsg_skbs_high); + skb_queue_purge(&app_priv->cmsg_skbs_low); + flush_work(&app_priv->cmsg_work); + + if (app_priv->flower_ext_feats & NFP_FL_FEATS_VF_RLIM) + nfp_flower_qos_cleanup(app); + + if (app_priv->flower_en_feats & NFP_FL_ENABLE_LAG) + nfp_flower_lag_cleanup(&app_priv->nfp_lag); + + if (app_priv->flower_en_feats & NFP_FL_ENABLE_FLOW_MERGE) + nfp_flower_internal_port_cleanup(app_priv); + + nfp_flower_metadata_cleanup(app); + vfree(app->priv); + app->priv = NULL; +} + +static bool nfp_flower_check_ack(struct nfp_flower_priv *app_priv) +{ + bool ret; + + spin_lock_bh(&app_priv->mtu_conf.lock); + ret = app_priv->mtu_conf.ack; + spin_unlock_bh(&app_priv->mtu_conf.lock); + + return ret; +} + +static int +nfp_flower_repr_change_mtu(struct nfp_app *app, struct net_device *netdev, + int new_mtu) +{ + struct nfp_flower_priv *app_priv = app->priv; + struct nfp_repr *repr = netdev_priv(netdev); + int err; + + /* Only need to config FW for physical port MTU change. */ + if (repr->port->type != NFP_PORT_PHYS_PORT) + return 0; + + if (!(app_priv->flower_ext_feats & NFP_FL_NBI_MTU_SETTING)) { + nfp_err(app->cpp, "Physical port MTU setting not supported\n"); + return -EINVAL; + } + + spin_lock_bh(&app_priv->mtu_conf.lock); + app_priv->mtu_conf.ack = false; + app_priv->mtu_conf.requested_val = new_mtu; + app_priv->mtu_conf.portnum = repr->dst->u.port_info.port_id; + spin_unlock_bh(&app_priv->mtu_conf.lock); + + err = nfp_flower_cmsg_portmod(repr, netif_carrier_ok(netdev), new_mtu, + true); + if (err) { + spin_lock_bh(&app_priv->mtu_conf.lock); + app_priv->mtu_conf.requested_val = 0; + spin_unlock_bh(&app_priv->mtu_conf.lock); + return err; + } + + /* Wait for fw to ack the change. */ + if (!wait_event_timeout(app_priv->mtu_conf.wait_q, + nfp_flower_check_ack(app_priv), + NFP_FL_REPLY_TIMEOUT)) { + spin_lock_bh(&app_priv->mtu_conf.lock); + app_priv->mtu_conf.requested_val = 0; + spin_unlock_bh(&app_priv->mtu_conf.lock); + nfp_warn(app->cpp, "MTU change not verified with fw\n"); + return -EIO; + } + + return 0; +} + +static int nfp_flower_start(struct nfp_app *app) +{ + struct nfp_flower_priv *app_priv = app->priv; + int err; + + if (app_priv->flower_en_feats & NFP_FL_ENABLE_LAG) { + err = nfp_flower_lag_reset(&app_priv->nfp_lag); + if (err) + return err; + } + + err = flow_indr_dev_register(nfp_flower_indr_setup_tc_cb, app); + if (err) + return err; + + err = nfp_tunnel_config_start(app); + if (err) + goto err_tunnel_config; + + return 0; + +err_tunnel_config: + flow_indr_dev_unregister(nfp_flower_indr_setup_tc_cb, app, + nfp_flower_setup_indr_tc_release); + return err; +} + +static void nfp_flower_stop(struct nfp_app *app) +{ + nfp_tunnel_config_stop(app); + + flow_indr_dev_unregister(nfp_flower_indr_setup_tc_cb, app, + nfp_flower_setup_indr_tc_release); +} + +static int +nfp_flower_netdev_event(struct nfp_app *app, struct net_device *netdev, + unsigned long event, void *ptr) +{ + struct nfp_flower_priv *app_priv = app->priv; + int ret; + + if (app_priv->flower_en_feats & NFP_FL_ENABLE_LAG) { + ret = nfp_flower_lag_netdev_event(app_priv, netdev, event, ptr); + if (ret & NOTIFY_STOP_MASK) + return ret; + } + + ret = nfp_flower_internal_port_event_handler(app, netdev, event); + if (ret & NOTIFY_STOP_MASK) + return ret; + + return nfp_tunnel_mac_event_handler(app, netdev, event, ptr); +} + +const struct nfp_app_type app_flower = { + .id = NFP_APP_FLOWER_NIC, + .name = "flower", + + .ctrl_cap_mask = ~0U, + .ctrl_has_meta = true, + + .extra_cap = nfp_flower_extra_cap, + + .init = nfp_flower_init, + .clean = nfp_flower_clean, + + .repr_change_mtu = nfp_flower_repr_change_mtu, + + .vnic_alloc = nfp_flower_vnic_alloc, + .vnic_init = nfp_flower_vnic_init, + .vnic_clean = nfp_flower_vnic_clean, + + .repr_preclean = nfp_flower_repr_netdev_preclean, + .repr_clean = nfp_flower_repr_netdev_clean, + + .repr_open = nfp_flower_repr_netdev_open, + .repr_stop = nfp_flower_repr_netdev_stop, + + .start = nfp_flower_start, + .stop = nfp_flower_stop, + + .netdev_event = nfp_flower_netdev_event, + + .ctrl_msg_rx = nfp_flower_cmsg_rx, + + .sriov_enable = nfp_flower_sriov_enable, + .sriov_disable = nfp_flower_sriov_disable, + + .eswitch_mode_get = eswitch_mode_get, + .dev_get = nfp_flower_dev_get, + + .setup_tc = nfp_flower_setup_tc, +}; diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h new file mode 100644 index 0000000000..2b7c947ff4 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -0,0 +1,732 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#ifndef __NFP_FLOWER_H__ +#define __NFP_FLOWER_H__ 1 + +#include "cmsg.h" +#include "../nfp_net.h" + +#include <linux/circ_buf.h> +#include <linux/hashtable.h> +#include <linux/rhashtable.h> +#include <linux/time64.h> +#include <linux/types.h> +#include <net/flow_offload.h> +#include <net/pkt_cls.h> +#include <net/pkt_sched.h> +#include <net/tcp.h> +#include <linux/workqueue.h> +#include <linux/idr.h> + +struct nfp_fl_pre_lag; +struct net_device; +struct nfp_app; + +#define NFP_FL_STAT_ID_MU_NUM GENMASK(31, 22) +#define NFP_FL_STAT_ID_STAT GENMASK(21, 0) + +#define NFP_FL_STATS_ELEM_RS sizeof_field(struct nfp_fl_stats_id, \ + init_unalloc) +#define NFP_FLOWER_MASK_ENTRY_RS 256 +#define NFP_FLOWER_MASK_ELEMENT_RS 1 +#define NFP_FLOWER_MASK_HASH_BITS 10 + +#define NFP_FLOWER_KEY_MAX_LW 32 + +#define NFP_FL_META_FLAG_MANAGE_MASK BIT(7) + +#define NFP_FL_MASK_REUSE_TIME_NS 40000 +#define NFP_FL_MASK_ID_LOCATION 1 + +/* Extra features bitmap. */ +#define NFP_FL_FEATS_GENEVE BIT(0) +#define NFP_FL_NBI_MTU_SETTING BIT(1) +#define NFP_FL_FEATS_GENEVE_OPT BIT(2) +#define NFP_FL_FEATS_VLAN_PCP BIT(3) +#define NFP_FL_FEATS_VF_RLIM BIT(4) +#define NFP_FL_FEATS_FLOW_MOD BIT(5) +#define NFP_FL_FEATS_PRE_TUN_RULES BIT(6) +#define NFP_FL_FEATS_IPV6_TUN BIT(7) +#define NFP_FL_FEATS_VLAN_QINQ BIT(8) +#define NFP_FL_FEATS_QOS_PPS BIT(9) +#define NFP_FL_FEATS_QOS_METER BIT(10) +#define NFP_FL_FEATS_DECAP_V2 BIT(11) +#define NFP_FL_FEATS_TUNNEL_NEIGH_LAG BIT(12) +#define NFP_FL_FEATS_HOST_ACK BIT(31) + +#define NFP_FL_ENABLE_FLOW_MERGE BIT(0) +#define NFP_FL_ENABLE_LAG BIT(1) + +#define NFP_FL_FEATS_HOST \ + (NFP_FL_FEATS_GENEVE | \ + NFP_FL_NBI_MTU_SETTING | \ + NFP_FL_FEATS_GENEVE_OPT | \ + NFP_FL_FEATS_VLAN_PCP | \ + NFP_FL_FEATS_VF_RLIM | \ + NFP_FL_FEATS_FLOW_MOD | \ + NFP_FL_FEATS_PRE_TUN_RULES | \ + NFP_FL_FEATS_IPV6_TUN | \ + NFP_FL_FEATS_VLAN_QINQ | \ + NFP_FL_FEATS_QOS_PPS | \ + NFP_FL_FEATS_QOS_METER | \ + NFP_FL_FEATS_DECAP_V2 | \ + NFP_FL_FEATS_TUNNEL_NEIGH_LAG) + +struct nfp_fl_mask_id { + struct circ_buf mask_id_free_list; + ktime_t *last_used; + u8 init_unallocated; +}; + +struct nfp_fl_stats_id { + struct circ_buf free_list; + u32 init_unalloc; + u8 repeated_em_count; +}; + +/** + * struct nfp_fl_tunnel_offloads - priv data for tunnel offloads + * @offloaded_macs: Hashtable of the offloaded MAC addresses + * @ipv4_off_list: List of IPv4 addresses to offload + * @ipv6_off_list: List of IPv6 addresses to offload + * @ipv4_off_lock: Lock for the IPv4 address list + * @ipv6_off_lock: Lock for the IPv6 address list + * @mac_off_ids: IDA to manage id assignment for offloaded MACs + * @neigh_nb: Notifier to monitor neighbour state + */ +struct nfp_fl_tunnel_offloads { + struct rhashtable offloaded_macs; + struct list_head ipv4_off_list; + struct list_head ipv6_off_list; + struct mutex ipv4_off_lock; + struct mutex ipv6_off_lock; + struct ida mac_off_ids; + struct notifier_block neigh_nb; +}; + +/** + * struct nfp_tun_neigh_lag - lag info + * @lag_version: lag version + * @lag_instance: lag instance + */ +struct nfp_tun_neigh_lag { + u8 lag_version[3]; + u8 lag_instance; +}; + +/** + * struct nfp_tun_neigh - basic neighbour data + * @dst_addr: Destination MAC address + * @src_addr: Source MAC address + * @port_id: NFP port to output packet on - associated with source IPv4 + */ +struct nfp_tun_neigh { + u8 dst_addr[ETH_ALEN]; + u8 src_addr[ETH_ALEN]; + __be32 port_id; +}; + +/** + * struct nfp_tun_neigh_ext - extended neighbour data + * @vlan_tpid: VLAN_TPID match field + * @vlan_tci: VLAN_TCI match field + * @host_ctx: Host context ID to be saved here + */ +struct nfp_tun_neigh_ext { + __be16 vlan_tpid; + __be16 vlan_tci; + __be32 host_ctx; +}; + +/** + * struct nfp_tun_neigh_v4 - neighbour/route entry on the NFP for IPv4 + * @dst_ipv4: Destination IPv4 address + * @src_ipv4: Source IPv4 address + * @common: Neighbour/route common info + * @ext: Neighbour/route extended info + * @lag: lag port info + */ +struct nfp_tun_neigh_v4 { + __be32 dst_ipv4; + __be32 src_ipv4; + struct nfp_tun_neigh common; + struct nfp_tun_neigh_ext ext; + struct nfp_tun_neigh_lag lag; +}; + +/** + * struct nfp_tun_neigh_v6 - neighbour/route entry on the NFP for IPv6 + * @dst_ipv6: Destination IPv6 address + * @src_ipv6: Source IPv6 address + * @common: Neighbour/route common info + * @ext: Neighbour/route extended info + * @lag: lag port info + */ +struct nfp_tun_neigh_v6 { + struct in6_addr dst_ipv6; + struct in6_addr src_ipv6; + struct nfp_tun_neigh common; + struct nfp_tun_neigh_ext ext; + struct nfp_tun_neigh_lag lag; +}; + +/** + * struct nfp_neigh_entry + * @neigh_cookie: Cookie for hashtable lookup + * @ht_node: rhash_head entry for hashtable + * @list_head: Needed as member of linked_nn_entries list + * @payload: The neighbour info payload + * @flow: Linked flow rule + * @is_ipv6: Flag to indicate if payload is ipv6 or ipv4 + */ +struct nfp_neigh_entry { + unsigned long neigh_cookie; + struct rhash_head ht_node; + struct list_head list_head; + char *payload; + struct nfp_predt_entry *flow; + bool is_ipv6; +}; + +/** + * struct nfp_predt_entry + * @list_head: List head to attach to predt_list + * @flow_pay: Direct link to flow_payload + * @nn_list: List of linked nfp_neigh_entries + */ +struct nfp_predt_entry { + struct list_head list_head; + struct nfp_fl_payload *flow_pay; + struct list_head nn_list; +}; + +/** + * struct nfp_mtu_conf - manage MTU setting + * @portnum: NFP port number of repr with requested MTU change + * @requested_val: MTU value requested for repr + * @ack: Received ack that MTU has been correctly set + * @wait_q: Wait queue for MTU acknowledgements + * @lock: Lock for setting/reading MTU variables + */ +struct nfp_mtu_conf { + u32 portnum; + unsigned int requested_val; + bool ack; + wait_queue_head_t wait_q; + spinlock_t lock; +}; + +/** + * struct nfp_fl_lag - Flower APP priv data for link aggregation + * @work: Work queue for writing configs to the HW + * @lock: Lock to protect lag_group_list + * @group_list: List of all master/slave groups offloaded + * @ida_handle: IDA to handle group ids + * @pkt_num: Incremented for each config packet sent + * @batch_ver: Incremented for each batch of config packets + * @global_inst: Instance allocator for groups + * @rst_cfg: Marker to reset HW LAG config + * @retrans_skbs: Cmsgs that could not be processed by HW and require + * retransmission + */ +struct nfp_fl_lag { + struct delayed_work work; + struct mutex lock; + struct list_head group_list; + struct ida ida_handle; + unsigned int pkt_num; + unsigned int batch_ver; + u8 global_inst; + bool rst_cfg; + struct sk_buff_head retrans_skbs; +}; + +/** + * struct nfp_fl_internal_ports - Flower APP priv data for additional ports + * @port_ids: Assignment of ids to any additional ports + * @lock: Lock for extra ports list + */ +struct nfp_fl_internal_ports { + struct idr port_ids; + spinlock_t lock; +}; + +/** + * struct nfp_flower_priv - Flower APP per-vNIC priv data + * @app: Back pointer to app + * @nn: Pointer to vNIC + * @mask_id_seed: Seed used for mask hash table + * @flower_version: HW version of flower + * @flower_ext_feats: Bitmap of extra features the HW supports + * @flower_en_feats: Bitmap of features enabled by HW + * @stats_ids: List of free stats ids + * @mask_ids: List of free mask ids + * @mask_table: Hash table used to store masks + * @stats_ring_size: Maximum number of allowed stats ids + * @flow_table: Hash table used to store flower rules + * @stats: Stored stats updates for flower rules + * @stats_lock: Lock for flower rule stats updates + * @stats_ctx_table: Hash table to map stats contexts to its flow rule + * @cmsg_work: Workqueue for control messages processing + * @cmsg_skbs_high: List of higher priority skbs for control message + * processing + * @cmsg_skbs_low: List of lower priority skbs for control message + * processing + * @tun: Tunnel offload data + * @reify_replies: atomically stores the number of replies received + * from firmware for repr reify + * @reify_wait_queue: wait queue for repr reify response counting + * @mtu_conf: Configuration of repr MTU value + * @nfp_lag: Link aggregation data block + * @indr_block_cb_priv: List of priv data passed to indirect block cbs + * @non_repr_priv: List of offloaded non-repr ports and their priv data + * @active_mem_unit: Current active memory unit for flower rules + * @total_mem_units: Total number of available memory units for flower rules + * @internal_ports: Internal port ids used in offloaded rules + * @qos_stats_work: Workqueue for qos stats processing + * @qos_rate_limiters: Current active qos rate limiters + * @qos_stats_lock: Lock on qos stats updates + * @meter_stats_lock: Lock on meter stats updates + * @meter_table: Hash table used to store the meter table + * @pre_tun_rule_cnt: Number of pre-tunnel rules offloaded + * @merge_table: Hash table to store merged flows + * @ct_zone_table: Hash table used to store the different zones + * @ct_zone_wc: Special zone entry for wildcarded zone matches + * @ct_map_table: Hash table used to referennce ct flows + * @predt_list: List to keep track of decap pretun flows + * @neigh_table: Table to keep track of neighbor entries + * @predt_lock: Lock to serialise predt/neigh table updates + * @nfp_fl_lock: Lock to protect the flow offload operation + */ +struct nfp_flower_priv { + struct nfp_app *app; + struct nfp_net *nn; + u32 mask_id_seed; + u64 flower_version; + u64 flower_ext_feats; + u8 flower_en_feats; + struct nfp_fl_stats_id stats_ids; + struct nfp_fl_mask_id mask_ids; + DECLARE_HASHTABLE(mask_table, NFP_FLOWER_MASK_HASH_BITS); + u32 stats_ring_size; + struct rhashtable flow_table; + struct nfp_fl_stats *stats; + spinlock_t stats_lock; /* lock stats */ + struct rhashtable stats_ctx_table; + struct work_struct cmsg_work; + struct sk_buff_head cmsg_skbs_high; + struct sk_buff_head cmsg_skbs_low; + struct nfp_fl_tunnel_offloads tun; + atomic_t reify_replies; + wait_queue_head_t reify_wait_queue; + struct nfp_mtu_conf mtu_conf; + struct nfp_fl_lag nfp_lag; + struct list_head indr_block_cb_priv; + struct list_head non_repr_priv; + unsigned int active_mem_unit; + unsigned int total_mem_units; + struct nfp_fl_internal_ports internal_ports; + struct delayed_work qos_stats_work; + unsigned int qos_rate_limiters; + spinlock_t qos_stats_lock; /* Protect the qos stats */ + struct mutex meter_stats_lock; /* Protect the meter stats */ + struct rhashtable meter_table; + int pre_tun_rule_cnt; + struct rhashtable merge_table; + struct rhashtable ct_zone_table; + struct nfp_fl_ct_zone_entry *ct_zone_wc; + struct rhashtable ct_map_table; + struct list_head predt_list; + struct rhashtable neigh_table; + spinlock_t predt_lock; /* Lock to serialise predt/neigh table updates */ + struct mutex nfp_fl_lock; /* Protect the flow operation */ +}; + +/** + * struct nfp_fl_qos - Flower APP priv data for quality of service + * @netdev_port_id: NFP port number of repr with qos info + * @curr_stats: Currently stored stats updates for qos info + * @prev_stats: Previously stored updates for qos info + * @last_update: Stored time when last stats were updated + */ +struct nfp_fl_qos { + u32 netdev_port_id; + struct nfp_stat_pair curr_stats; + struct nfp_stat_pair prev_stats; + u64 last_update; +}; + +/** + * struct nfp_flower_repr_priv - Flower APP per-repr priv data + * @nfp_repr: Back pointer to nfp_repr + * @lag_port_flags: Extended port flags to record lag state of repr + * @mac_offloaded: Flag indicating a MAC address is offloaded for repr + * @offloaded_mac_addr: MAC address that has been offloaded for repr + * @block_shared: Flag indicating if offload applies to shared blocks + * @mac_list: List entry of reprs that share the same offloaded MAC + * @qos_table: Stored info on filters implementing qos + * @on_bridge: Indicates if the repr is attached to a bridge + */ +struct nfp_flower_repr_priv { + struct nfp_repr *nfp_repr; + unsigned long lag_port_flags; + bool mac_offloaded; + u8 offloaded_mac_addr[ETH_ALEN]; + bool block_shared; + struct list_head mac_list; + struct nfp_fl_qos qos_table; + bool on_bridge; +}; + +/** + * struct nfp_flower_non_repr_priv - Priv data for non-repr offloaded ports + * @list: List entry of offloaded reprs + * @netdev: Pointer to non-repr net_device + * @ref_count: Number of references held for this priv data + * @mac_offloaded: Flag indicating a MAC address is offloaded for device + * @offloaded_mac_addr: MAC address that has been offloaded for dev + */ +struct nfp_flower_non_repr_priv { + struct list_head list; + struct net_device *netdev; + int ref_count; + bool mac_offloaded; + u8 offloaded_mac_addr[ETH_ALEN]; +}; + +struct nfp_fl_key_ls { + u32 key_layer_two; + u8 key_layer; + int key_size; +}; + +struct nfp_fl_rule_metadata { + u8 key_len; + u8 mask_len; + u8 act_len; + u8 flags; + __be32 host_ctx_id; + __be64 host_cookie __packed; + __be64 flow_version __packed; + __be32 shortcut; +}; + +struct nfp_fl_stats { + u64 pkts; + u64 bytes; + u64 used; +}; + +/** + * struct nfp_ipv6_addr_entry - cached IPv6 addresses + * @ipv6_addr: IP address + * @ref_count: number of rules currently using this IP + * @list: list pointer + */ +struct nfp_ipv6_addr_entry { + struct in6_addr ipv6_addr; + int ref_count; + struct list_head list; +}; + +struct nfp_fl_payload { + struct nfp_fl_rule_metadata meta; + unsigned long tc_flower_cookie; + struct rhash_head fl_node; + struct rcu_head rcu; + __be32 nfp_tun_ipv4_addr; + struct nfp_ipv6_addr_entry *nfp_tun_ipv6; + struct net_device *ingress_dev; + char *unmasked_data; + char *mask_data; + char *action_data; + struct list_head linked_flows; + bool in_hw; + struct { + struct nfp_predt_entry *predt; + struct net_device *dev; + __be16 vlan_tpid; + __be16 vlan_tci; + __be16 port_idx; + u8 loc_mac[ETH_ALEN]; + u8 rem_mac[ETH_ALEN]; + bool is_ipv6; + } pre_tun_rule; +}; + +struct nfp_fl_payload_link { + /* A link contains a pointer to a merge flow and an associated sub_flow. + * Each merge flow will feature in 2 links to its underlying sub_flows. + * A sub_flow will have at least 1 link to a merge flow or more if it + * has been used to create multiple merge flows. + * + * For a merge flow, 'linked_flows' in its nfp_fl_payload struct lists + * all links to sub_flows (sub_flow.flow) via merge.list. + * For a sub_flow, 'linked_flows' gives all links to merge flows it has + * formed (merge_flow.flow) via sub_flow.list. + */ + struct { + struct list_head list; + struct nfp_fl_payload *flow; + } merge_flow, sub_flow; +}; + +extern const struct rhashtable_params nfp_flower_table_params; +extern const struct rhashtable_params merge_table_params; +extern const struct rhashtable_params neigh_table_params; + +struct nfp_merge_info { + u64 parent_ctx; + struct rhash_head ht_node; +}; + +struct nfp_fl_stats_frame { + __be32 stats_con_id; + __be32 pkt_count; + __be64 byte_count; + __be64 stats_cookie; +}; + +struct nfp_meter_stats_entry { + u64 pkts; + u64 bytes; + u64 drops; +}; + +struct nfp_meter_entry { + struct rhash_head ht_node; + u32 meter_id; + bool bps; + u32 rate; + u32 burst; + u64 used; + struct nfp_meter_stats { + u64 update; + struct nfp_meter_stats_entry curr; + struct nfp_meter_stats_entry prev; + } stats; +}; + +enum nfp_meter_op { + NFP_METER_ADD, + NFP_METER_DEL, +}; + +static inline bool +nfp_flower_internal_port_can_offload(struct nfp_app *app, + struct net_device *netdev) +{ + struct nfp_flower_priv *app_priv = app->priv; + + if (!(app_priv->flower_en_feats & NFP_FL_ENABLE_FLOW_MERGE)) + return false; + if (!netdev->rtnl_link_ops) + return false; + if (!strcmp(netdev->rtnl_link_ops->kind, "openvswitch")) + return true; + + return false; +} + +/* The address of the merged flow acts as its cookie. + * Cookies supplied to us by TC flower are also addresses to allocated + * memory and thus this scheme should not generate any collisions. + */ +static inline bool nfp_flower_is_merge_flow(struct nfp_fl_payload *flow_pay) +{ + return flow_pay->tc_flower_cookie == (unsigned long)flow_pay; +} + +static inline bool nfp_flower_is_supported_bridge(struct net_device *netdev) +{ + return netif_is_ovs_master(netdev); +} + +int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count, + unsigned int host_ctx_split); +void nfp_flower_metadata_cleanup(struct nfp_app *app); + +int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev, + enum tc_setup_type type, void *type_data); +int nfp_flower_merge_offloaded_flows(struct nfp_app *app, + struct nfp_fl_payload *sub_flow1, + struct nfp_fl_payload *sub_flow2); +void +nfp_flower_compile_meta(struct nfp_flower_meta_tci *ext, + struct nfp_flower_meta_tci *msk, u8 key_type); +void +nfp_flower_compile_tci(struct nfp_flower_meta_tci *ext, + struct nfp_flower_meta_tci *msk, + struct flow_rule *rule); +void +nfp_flower_compile_ext_meta(struct nfp_flower_ext_meta *frame, u32 key_ext); +int +nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port, + bool mask_version, enum nfp_flower_tun_type tun_type, + struct netlink_ext_ack *extack); +void +nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext, + struct nfp_flower_mac_mpls *msk, + struct flow_rule *rule); +int +nfp_flower_compile_mpls(struct nfp_flower_mac_mpls *ext, + struct nfp_flower_mac_mpls *msk, + struct flow_rule *rule, + struct netlink_ext_ack *extack); +void +nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext, + struct nfp_flower_tp_ports *msk, + struct flow_rule *rule); +void +nfp_flower_compile_vlan(struct nfp_flower_vlan *ext, + struct nfp_flower_vlan *msk, + struct flow_rule *rule); +void +nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext, + struct nfp_flower_ipv4 *msk, struct flow_rule *rule); +void +nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext, + struct nfp_flower_ipv6 *msk, struct flow_rule *rule); +void +nfp_flower_compile_geneve_opt(u8 *ext, u8 *msk, struct flow_rule *rule); +void +nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext, + struct nfp_flower_ipv4_gre_tun *msk, + struct flow_rule *rule); +void +nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext, + struct nfp_flower_ipv4_udp_tun *msk, + struct flow_rule *rule); +void +nfp_flower_compile_ipv6_udp_tun(struct nfp_flower_ipv6_udp_tun *ext, + struct nfp_flower_ipv6_udp_tun *msk, + struct flow_rule *rule); +void +nfp_flower_compile_ipv6_gre_tun(struct nfp_flower_ipv6_gre_tun *ext, + struct nfp_flower_ipv6_gre_tun *msk, + struct flow_rule *rule); +int nfp_flower_compile_flow_match(struct nfp_app *app, + struct flow_rule *rule, + struct nfp_fl_key_ls *key_ls, + struct net_device *netdev, + struct nfp_fl_payload *nfp_flow, + enum nfp_flower_tun_type tun_type, + struct netlink_ext_ack *extack); +int nfp_flower_compile_action(struct nfp_app *app, + struct flow_rule *rule, + struct net_device *netdev, + struct nfp_fl_payload *nfp_flow, + struct netlink_ext_ack *extack); +int nfp_compile_flow_metadata(struct nfp_app *app, u32 cookie, + struct nfp_fl_payload *nfp_flow, + struct net_device *netdev, + struct netlink_ext_ack *extack); +void __nfp_modify_flow_metadata(struct nfp_flower_priv *priv, + struct nfp_fl_payload *nfp_flow); +int nfp_modify_flow_metadata(struct nfp_app *app, + struct nfp_fl_payload *nfp_flow); + +struct nfp_fl_payload * +nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie, + struct net_device *netdev); +struct nfp_fl_payload * +nfp_flower_get_fl_payload_from_ctx(struct nfp_app *app, u32 ctx_id); +struct nfp_fl_payload * +nfp_flower_remove_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie); + +void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb); + +int nfp_tunnel_config_start(struct nfp_app *app); +void nfp_tunnel_config_stop(struct nfp_app *app); +int nfp_tunnel_mac_event_handler(struct nfp_app *app, + struct net_device *netdev, + unsigned long event, void *ptr); +void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4); +void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4); +void +nfp_tunnel_put_ipv6_off(struct nfp_app *app, struct nfp_ipv6_addr_entry *entry); +struct nfp_ipv6_addr_entry * +nfp_tunnel_add_ipv6_off(struct nfp_app *app, struct in6_addr *ipv6); +void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb); +void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb); +void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb); +void nfp_tunnel_keep_alive_v6(struct nfp_app *app, struct sk_buff *skb); +void nfp_flower_lag_init(struct nfp_fl_lag *lag); +void nfp_flower_lag_cleanup(struct nfp_fl_lag *lag); +int nfp_flower_lag_reset(struct nfp_fl_lag *lag); +int nfp_flower_lag_netdev_event(struct nfp_flower_priv *priv, + struct net_device *netdev, + unsigned long event, void *ptr); +bool nfp_flower_lag_unprocessed_msg(struct nfp_app *app, struct sk_buff *skb); +int nfp_flower_lag_populate_pre_action(struct nfp_app *app, + struct net_device *master, + struct nfp_fl_pre_lag *pre_act, + struct netlink_ext_ack *extack); +int nfp_flower_lag_get_output_id(struct nfp_app *app, + struct net_device *master); +void nfp_flower_lag_get_info_from_netdev(struct nfp_app *app, + struct net_device *netdev, + struct nfp_tun_neigh_lag *lag); +void nfp_flower_qos_init(struct nfp_app *app); +void nfp_flower_qos_cleanup(struct nfp_app *app); +int nfp_flower_setup_qos_offload(struct nfp_app *app, struct net_device *netdev, + struct tc_cls_matchall_offload *flow); +void nfp_flower_stats_rlim_reply(struct nfp_app *app, struct sk_buff *skb); +int nfp_flower_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv, + enum tc_setup_type type, void *type_data, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)); +void nfp_flower_setup_indr_tc_release(void *cb_priv); + +void +__nfp_flower_non_repr_priv_get(struct nfp_flower_non_repr_priv *non_repr_priv); +struct nfp_flower_non_repr_priv * +nfp_flower_non_repr_priv_get(struct nfp_app *app, struct net_device *netdev); +void +__nfp_flower_non_repr_priv_put(struct nfp_flower_non_repr_priv *non_repr_priv); +void +nfp_flower_non_repr_priv_put(struct nfp_app *app, struct net_device *netdev); +u32 nfp_flower_get_port_id_from_netdev(struct nfp_app *app, + struct net_device *netdev); +void nfp_tun_link_and_update_nn_entries(struct nfp_app *app, + struct nfp_predt_entry *predt); +void nfp_tun_unlink_and_update_nn_entries(struct nfp_app *app, + struct nfp_predt_entry *predt); +int nfp_flower_xmit_pre_tun_flow(struct nfp_app *app, + struct nfp_fl_payload *flow); +int nfp_flower_xmit_pre_tun_del_flow(struct nfp_app *app, + struct nfp_fl_payload *flow); + +struct nfp_fl_payload * +nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer); +int nfp_flower_calculate_key_layers(struct nfp_app *app, + struct net_device *netdev, + struct nfp_fl_key_ls *ret_key_ls, + struct flow_rule *flow, + enum nfp_flower_tun_type *tun_type, + struct netlink_ext_ack *extack); +void +nfp_flower_del_linked_merge_flows(struct nfp_app *app, + struct nfp_fl_payload *sub_flow); +int +nfp_flower_xmit_flow(struct nfp_app *app, struct nfp_fl_payload *nfp_flow, + u8 mtype); +void +nfp_flower_update_merge_stats(struct nfp_app *app, + struct nfp_fl_payload *sub_flow); + +int nfp_setup_tc_act_offload(struct nfp_app *app, + struct flow_offload_action *fl_act); +int nfp_init_meter_table(struct nfp_app *app); +void nfp_flower_stats_meter_request_all(struct nfp_flower_priv *fl_priv); +void nfp_act_stats_reply(struct nfp_app *app, void *pmsg); +int nfp_flower_offload_one_police(struct nfp_app *app, bool ingress, + bool pps, u32 id, u32 rate, u32 burst); +int nfp_flower_setup_meter_entry(struct nfp_app *app, + const struct flow_action_entry *action, + enum nfp_meter_op op, + u32 meter_id); +struct nfp_meter_entry * +nfp_flower_search_meter_entry(struct nfp_app *app, u32 meter_id); +#endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c new file mode 100644 index 0000000000..e01430139b --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -0,0 +1,722 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#include <linux/bitfield.h> +#include <net/pkt_cls.h> + +#include "cmsg.h" +#include "main.h" + +void +nfp_flower_compile_meta(struct nfp_flower_meta_tci *ext, + struct nfp_flower_meta_tci *msk, u8 key_type) +{ + /* Populate the metadata frame. */ + ext->nfp_flow_key_layer = key_type; + ext->mask_id = ~0; + + msk->nfp_flow_key_layer = key_type; + msk->mask_id = ~0; +} + +void +nfp_flower_compile_tci(struct nfp_flower_meta_tci *ext, + struct nfp_flower_meta_tci *msk, + struct flow_rule *rule) +{ + u16 msk_tci, key_tci; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; + + flow_rule_match_vlan(rule, &match); + /* Populate the tci field. */ + key_tci = NFP_FLOWER_MASK_VLAN_PRESENT; + key_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, + match.key->vlan_priority) | + FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, + match.key->vlan_id); + + msk_tci = NFP_FLOWER_MASK_VLAN_PRESENT; + msk_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, + match.mask->vlan_priority) | + FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, + match.mask->vlan_id); + + ext->tci |= cpu_to_be16((key_tci & msk_tci)); + msk->tci |= cpu_to_be16(msk_tci); + } +} + +static void +nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext, + struct nfp_flower_meta_tci *msk, + struct flow_rule *rule, u8 key_type, bool qinq_sup) +{ + memset(ext, 0, sizeof(struct nfp_flower_meta_tci)); + memset(msk, 0, sizeof(struct nfp_flower_meta_tci)); + + nfp_flower_compile_meta(ext, msk, key_type); + + if (!qinq_sup) + nfp_flower_compile_tci(ext, msk, rule); +} + +void +nfp_flower_compile_ext_meta(struct nfp_flower_ext_meta *frame, u32 key_ext) +{ + frame->nfp_flow_key_layer2 = cpu_to_be32(key_ext); +} + +int +nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port, + bool mask_version, enum nfp_flower_tun_type tun_type, + struct netlink_ext_ack *extack) +{ + if (mask_version) { + frame->in_port = cpu_to_be32(~0); + return 0; + } + + if (tun_type) { + frame->in_port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type); + } else { + if (!cmsg_port) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid ingress interface for match offload"); + return -EOPNOTSUPP; + } + frame->in_port = cpu_to_be32(cmsg_port); + } + + return 0; +} + +void +nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext, + struct nfp_flower_mac_mpls *msk, + struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; + u8 tmp; + int i; + + flow_rule_match_eth_addrs(rule, &match); + /* Populate mac frame. */ + for (i = 0; i < ETH_ALEN; i++) { + tmp = match.key->dst[i] & match.mask->dst[i]; + ext->mac_dst[i] |= tmp & (~msk->mac_dst[i]); + msk->mac_dst[i] |= match.mask->dst[i]; + + tmp = match.key->src[i] & match.mask->src[i]; + ext->mac_src[i] |= tmp & (~msk->mac_src[i]); + msk->mac_src[i] |= match.mask->src[i]; + } + } +} + +int +nfp_flower_compile_mpls(struct nfp_flower_mac_mpls *ext, + struct nfp_flower_mac_mpls *msk, + struct flow_rule *rule, + struct netlink_ext_ack *extack) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS)) { + struct flow_match_mpls match; + u32 key_mpls, msk_mpls; + + flow_rule_match_mpls(rule, &match); + + /* Only support matching the first LSE */ + if (match.mask->used_lses != 1) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: invalid LSE depth for MPLS match offload"); + return -EOPNOTSUPP; + } + + key_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, + match.key->ls[0].mpls_label) | + FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, + match.key->ls[0].mpls_tc) | + FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, + match.key->ls[0].mpls_bos) | + NFP_FLOWER_MASK_MPLS_Q; + + msk_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, + match.mask->ls[0].mpls_label) | + FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, + match.mask->ls[0].mpls_tc) | + FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, + match.mask->ls[0].mpls_bos) | + NFP_FLOWER_MASK_MPLS_Q; + + ext->mpls_lse |= cpu_to_be32((key_mpls & msk_mpls)); + msk->mpls_lse |= cpu_to_be32(msk_mpls); + } else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + /* Check for mpls ether type and set NFP_FLOWER_MASK_MPLS_Q + * bit, which indicates an mpls ether type but without any + * mpls fields. + */ + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + if (match.key->n_proto == cpu_to_be16(ETH_P_MPLS_UC) || + match.key->n_proto == cpu_to_be16(ETH_P_MPLS_MC)) { + ext->mpls_lse |= cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q); + msk->mpls_lse |= cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q); + } + } + + return 0; +} + +static int +nfp_flower_compile_mac_mpls(struct nfp_flower_mac_mpls *ext, + struct nfp_flower_mac_mpls *msk, + struct flow_rule *rule, + struct netlink_ext_ack *extack) +{ + memset(ext, 0, sizeof(struct nfp_flower_mac_mpls)); + memset(msk, 0, sizeof(struct nfp_flower_mac_mpls)); + + nfp_flower_compile_mac(ext, msk, rule); + + return nfp_flower_compile_mpls(ext, msk, rule, extack); +} + +void +nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext, + struct nfp_flower_tp_ports *msk, + struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; + __be16 tmp; + + flow_rule_match_ports(rule, &match); + + tmp = match.key->src & match.mask->src; + ext->port_src |= tmp & (~msk->port_src); + msk->port_src |= match.mask->src; + + tmp = match.key->dst & match.mask->dst; + ext->port_dst |= tmp & (~msk->port_dst); + msk->port_dst |= match.mask->dst; + } +} + +static void +nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext, + struct nfp_flower_ip_ext *msk, struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + ext->proto |= match.key->ip_proto & match.mask->ip_proto; + msk->proto |= match.mask->ip_proto; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { + struct flow_match_ip match; + u8 tmp; + + flow_rule_match_ip(rule, &match); + + tmp = match.key->tos & match.mask->tos; + ext->tos |= tmp & (~msk->tos); + msk->tos |= match.mask->tos; + + tmp = match.key->ttl & match.mask->ttl; + ext->ttl |= tmp & (~msk->ttl); + msk->ttl |= match.mask->ttl; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { + u16 tcp_flags, tcp_flags_mask; + struct flow_match_tcp match; + + flow_rule_match_tcp(rule, &match); + tcp_flags = be16_to_cpu(match.key->flags); + tcp_flags_mask = be16_to_cpu(match.mask->flags); + + if (tcp_flags & TCPHDR_FIN) + ext->flags |= NFP_FL_TCP_FLAG_FIN; + if (tcp_flags_mask & TCPHDR_FIN) + msk->flags |= NFP_FL_TCP_FLAG_FIN; + + if (tcp_flags & TCPHDR_SYN) + ext->flags |= NFP_FL_TCP_FLAG_SYN; + if (tcp_flags_mask & TCPHDR_SYN) + msk->flags |= NFP_FL_TCP_FLAG_SYN; + + if (tcp_flags & TCPHDR_RST) + ext->flags |= NFP_FL_TCP_FLAG_RST; + if (tcp_flags_mask & TCPHDR_RST) + msk->flags |= NFP_FL_TCP_FLAG_RST; + + if (tcp_flags & TCPHDR_PSH) + ext->flags |= NFP_FL_TCP_FLAG_PSH; + if (tcp_flags_mask & TCPHDR_PSH) + msk->flags |= NFP_FL_TCP_FLAG_PSH; + + if (tcp_flags & TCPHDR_URG) + ext->flags |= NFP_FL_TCP_FLAG_URG; + if (tcp_flags_mask & TCPHDR_URG) + msk->flags |= NFP_FL_TCP_FLAG_URG; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; + + flow_rule_match_control(rule, &match); + if (match.key->flags & FLOW_DIS_IS_FRAGMENT) + ext->flags |= NFP_FL_IP_FRAGMENTED; + if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) + msk->flags |= NFP_FL_IP_FRAGMENTED; + if (match.key->flags & FLOW_DIS_FIRST_FRAG) + ext->flags |= NFP_FL_IP_FRAG_FIRST; + if (match.mask->flags & FLOW_DIS_FIRST_FRAG) + msk->flags |= NFP_FL_IP_FRAG_FIRST; + } +} + +static void +nfp_flower_fill_vlan(struct flow_match_vlan *match, + struct nfp_flower_vlan *ext, + struct nfp_flower_vlan *msk, bool outer_vlan) +{ + struct flow_dissector_key_vlan *mask = match->mask; + struct flow_dissector_key_vlan *key = match->key; + u16 msk_tci, key_tci; + + key_tci = NFP_FLOWER_MASK_VLAN_PRESENT; + key_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, + key->vlan_priority) | + FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, + key->vlan_id); + msk_tci = NFP_FLOWER_MASK_VLAN_PRESENT; + msk_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, + mask->vlan_priority) | + FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, + mask->vlan_id); + + if (outer_vlan) { + ext->outer_tci |= cpu_to_be16((key_tci & msk_tci)); + ext->outer_tpid |= key->vlan_tpid & mask->vlan_tpid; + msk->outer_tci |= cpu_to_be16(msk_tci); + msk->outer_tpid |= mask->vlan_tpid; + } else { + ext->inner_tci |= cpu_to_be16((key_tci & msk_tci)); + ext->inner_tpid |= key->vlan_tpid & mask->vlan_tpid; + msk->inner_tci |= cpu_to_be16(msk_tci); + msk->inner_tpid |= mask->vlan_tpid; + } +} + +void +nfp_flower_compile_vlan(struct nfp_flower_vlan *ext, + struct nfp_flower_vlan *msk, + struct flow_rule *rule) +{ + struct flow_match_vlan match; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + flow_rule_match_vlan(rule, &match); + nfp_flower_fill_vlan(&match, ext, msk, true); + } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { + flow_rule_match_cvlan(rule, &match); + nfp_flower_fill_vlan(&match, ext, msk, false); + } +} + +void +nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext, + struct nfp_flower_ipv4 *msk, struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { + struct flow_match_ipv4_addrs match; + __be32 tmp; + + flow_rule_match_ipv4_addrs(rule, &match); + + tmp = match.key->src & match.mask->src; + ext->ipv4_src |= tmp & (~msk->ipv4_src); + msk->ipv4_src |= match.mask->src; + + tmp = match.key->dst & match.mask->dst; + ext->ipv4_dst |= tmp & (~msk->ipv4_dst); + msk->ipv4_dst |= match.mask->dst; + } + + nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); +} + +void +nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext, + struct nfp_flower_ipv6 *msk, struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { + struct flow_match_ipv6_addrs match; + u8 tmp; + int i; + + flow_rule_match_ipv6_addrs(rule, &match); + for (i = 0; i < sizeof(ext->ipv6_src); i++) { + tmp = match.key->src.s6_addr[i] & + match.mask->src.s6_addr[i]; + ext->ipv6_src.s6_addr[i] |= tmp & + (~msk->ipv6_src.s6_addr[i]); + msk->ipv6_src.s6_addr[i] |= match.mask->src.s6_addr[i]; + + tmp = match.key->dst.s6_addr[i] & + match.mask->dst.s6_addr[i]; + ext->ipv6_dst.s6_addr[i] |= tmp & + (~msk->ipv6_dst.s6_addr[i]); + msk->ipv6_dst.s6_addr[i] |= match.mask->dst.s6_addr[i]; + } + } + + nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); +} + +void +nfp_flower_compile_geneve_opt(u8 *ext, u8 *msk, struct flow_rule *rule) +{ + struct flow_match_enc_opts match; + int i; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) { + flow_rule_match_enc_opts(rule, &match); + + for (i = 0; i < match.mask->len; i++) { + ext[i] |= match.key->data[i] & match.mask->data[i]; + msk[i] |= match.mask->data[i]; + } + } +} + +static void +nfp_flower_compile_tun_ipv4_addrs(struct nfp_flower_tun_ipv4 *ext, + struct nfp_flower_tun_ipv4 *msk, + struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_enc_ipv4_addrs(rule, &match); + ext->src |= match.key->src & match.mask->src; + ext->dst |= match.key->dst & match.mask->dst; + msk->src |= match.mask->src; + msk->dst |= match.mask->dst; + } +} + +static void +nfp_flower_compile_tun_ipv6_addrs(struct nfp_flower_tun_ipv6 *ext, + struct nfp_flower_tun_ipv6 *msk, + struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { + struct flow_match_ipv6_addrs match; + int i; + + flow_rule_match_enc_ipv6_addrs(rule, &match); + for (i = 0; i < sizeof(ext->src); i++) { + ext->src.s6_addr[i] |= match.key->src.s6_addr[i] & + match.mask->src.s6_addr[i]; + ext->dst.s6_addr[i] |= match.key->dst.s6_addr[i] & + match.mask->dst.s6_addr[i]; + msk->src.s6_addr[i] |= match.mask->src.s6_addr[i]; + msk->dst.s6_addr[i] |= match.mask->dst.s6_addr[i]; + } + } +} + +static void +nfp_flower_compile_tun_ip_ext(struct nfp_flower_tun_ip_ext *ext, + struct nfp_flower_tun_ip_ext *msk, + struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { + struct flow_match_ip match; + + flow_rule_match_enc_ip(rule, &match); + ext->tos |= match.key->tos & match.mask->tos; + ext->ttl |= match.key->ttl & match.mask->ttl; + msk->tos |= match.mask->tos; + msk->ttl |= match.mask->ttl; + } +} + +static void +nfp_flower_compile_tun_udp_key(__be32 *key, __be32 *key_msk, + struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid match; + u32 vni; + + flow_rule_match_enc_keyid(rule, &match); + vni = be32_to_cpu((match.key->keyid & match.mask->keyid)) << + NFP_FL_TUN_VNI_OFFSET; + *key |= cpu_to_be32(vni); + vni = be32_to_cpu(match.mask->keyid) << NFP_FL_TUN_VNI_OFFSET; + *key_msk |= cpu_to_be32(vni); + } +} + +static void +nfp_flower_compile_tun_gre_key(__be32 *key, __be32 *key_msk, __be16 *flags, + __be16 *flags_msk, struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid match; + + flow_rule_match_enc_keyid(rule, &match); + *key |= match.key->keyid & match.mask->keyid; + *key_msk |= match.mask->keyid; + + *flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY); + *flags_msk = cpu_to_be16(NFP_FL_GRE_FLAG_KEY); + } +} + +void +nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext, + struct nfp_flower_ipv4_gre_tun *msk, + struct flow_rule *rule) +{ + /* NVGRE is the only supported GRE tunnel type */ + ext->ethertype = cpu_to_be16(ETH_P_TEB); + msk->ethertype = cpu_to_be16(~0); + + nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, rule); + nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); + nfp_flower_compile_tun_gre_key(&ext->tun_key, &msk->tun_key, + &ext->tun_flags, &msk->tun_flags, rule); +} + +void +nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext, + struct nfp_flower_ipv4_udp_tun *msk, + struct flow_rule *rule) +{ + nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, rule); + nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); + nfp_flower_compile_tun_udp_key(&ext->tun_id, &msk->tun_id, rule); +} + +void +nfp_flower_compile_ipv6_udp_tun(struct nfp_flower_ipv6_udp_tun *ext, + struct nfp_flower_ipv6_udp_tun *msk, + struct flow_rule *rule) +{ + nfp_flower_compile_tun_ipv6_addrs(&ext->ipv6, &msk->ipv6, rule); + nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); + nfp_flower_compile_tun_udp_key(&ext->tun_id, &msk->tun_id, rule); +} + +void +nfp_flower_compile_ipv6_gre_tun(struct nfp_flower_ipv6_gre_tun *ext, + struct nfp_flower_ipv6_gre_tun *msk, + struct flow_rule *rule) +{ + /* NVGRE is the only supported GRE tunnel type */ + ext->ethertype = cpu_to_be16(ETH_P_TEB); + msk->ethertype = cpu_to_be16(~0); + + nfp_flower_compile_tun_ipv6_addrs(&ext->ipv6, &msk->ipv6, rule); + nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); + nfp_flower_compile_tun_gre_key(&ext->tun_key, &msk->tun_key, + &ext->tun_flags, &msk->tun_flags, rule); +} + +int nfp_flower_compile_flow_match(struct nfp_app *app, + struct flow_rule *rule, + struct nfp_fl_key_ls *key_ls, + struct net_device *netdev, + struct nfp_fl_payload *nfp_flow, + enum nfp_flower_tun_type tun_type, + struct netlink_ext_ack *extack) +{ + struct nfp_flower_priv *priv = app->priv; + bool qinq_sup; + u32 port_id; + int ext_len; + int err; + u8 *ext; + u8 *msk; + + port_id = nfp_flower_get_port_id_from_netdev(app, netdev); + + memset(nfp_flow->unmasked_data, 0, key_ls->key_size); + memset(nfp_flow->mask_data, 0, key_ls->key_size); + + ext = nfp_flow->unmasked_data; + msk = nfp_flow->mask_data; + + qinq_sup = !!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ); + + nfp_flower_compile_meta_tci((struct nfp_flower_meta_tci *)ext, + (struct nfp_flower_meta_tci *)msk, + rule, key_ls->key_layer, qinq_sup); + ext += sizeof(struct nfp_flower_meta_tci); + msk += sizeof(struct nfp_flower_meta_tci); + + /* Populate Extended Metadata if Required. */ + if (NFP_FLOWER_LAYER_EXT_META & key_ls->key_layer) { + nfp_flower_compile_ext_meta((struct nfp_flower_ext_meta *)ext, + key_ls->key_layer_two); + nfp_flower_compile_ext_meta((struct nfp_flower_ext_meta *)msk, + key_ls->key_layer_two); + ext += sizeof(struct nfp_flower_ext_meta); + msk += sizeof(struct nfp_flower_ext_meta); + } + + /* Populate Exact Port data. */ + err = nfp_flower_compile_port((struct nfp_flower_in_port *)ext, + port_id, false, tun_type, extack); + if (err) + return err; + + /* Populate Mask Port Data. */ + err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk, + port_id, true, tun_type, extack); + if (err) + return err; + + ext += sizeof(struct nfp_flower_in_port); + msk += sizeof(struct nfp_flower_in_port); + + if (NFP_FLOWER_LAYER_MAC & key_ls->key_layer) { + err = nfp_flower_compile_mac_mpls((struct nfp_flower_mac_mpls *)ext, + (struct nfp_flower_mac_mpls *)msk, + rule, extack); + if (err) + return err; + + ext += sizeof(struct nfp_flower_mac_mpls); + msk += sizeof(struct nfp_flower_mac_mpls); + } + + if (NFP_FLOWER_LAYER_TP & key_ls->key_layer) { + nfp_flower_compile_tport((struct nfp_flower_tp_ports *)ext, + (struct nfp_flower_tp_ports *)msk, + rule); + ext += sizeof(struct nfp_flower_tp_ports); + msk += sizeof(struct nfp_flower_tp_ports); + } + + if (NFP_FLOWER_LAYER_IPV4 & key_ls->key_layer) { + nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)ext, + (struct nfp_flower_ipv4 *)msk, + rule); + ext += sizeof(struct nfp_flower_ipv4); + msk += sizeof(struct nfp_flower_ipv4); + } + + if (NFP_FLOWER_LAYER_IPV6 & key_ls->key_layer) { + nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)ext, + (struct nfp_flower_ipv6 *)msk, + rule); + ext += sizeof(struct nfp_flower_ipv6); + msk += sizeof(struct nfp_flower_ipv6); + } + + if (NFP_FLOWER_LAYER2_QINQ & key_ls->key_layer_two) { + nfp_flower_compile_vlan((struct nfp_flower_vlan *)ext, + (struct nfp_flower_vlan *)msk, + rule); + ext += sizeof(struct nfp_flower_vlan); + msk += sizeof(struct nfp_flower_vlan); + } + + if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GRE) { + if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) { + struct nfp_flower_ipv6_gre_tun *gre_match; + struct nfp_ipv6_addr_entry *entry; + struct in6_addr *dst; + + nfp_flower_compile_ipv6_gre_tun((void *)ext, + (void *)msk, rule); + gre_match = (struct nfp_flower_ipv6_gre_tun *)ext; + dst = &gre_match->ipv6.dst; + ext += sizeof(struct nfp_flower_ipv6_gre_tun); + msk += sizeof(struct nfp_flower_ipv6_gre_tun); + + entry = nfp_tunnel_add_ipv6_off(app, dst); + if (!entry) + return -EOPNOTSUPP; + + nfp_flow->nfp_tun_ipv6 = entry; + } else { + __be32 dst; + + nfp_flower_compile_ipv4_gre_tun((void *)ext, + (void *)msk, rule); + dst = ((struct nfp_flower_ipv4_gre_tun *)ext)->ipv4.dst; + ext += sizeof(struct nfp_flower_ipv4_gre_tun); + msk += sizeof(struct nfp_flower_ipv4_gre_tun); + + /* Store the tunnel destination in the rule data. + * This must be present and be an exact match. + */ + nfp_flow->nfp_tun_ipv4_addr = dst; + nfp_tunnel_add_ipv4_off(app, dst); + } + } + + if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN || + key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE) { + if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) { + struct nfp_flower_ipv6_udp_tun *udp_match; + struct nfp_ipv6_addr_entry *entry; + struct in6_addr *dst; + + nfp_flower_compile_ipv6_udp_tun((void *)ext, + (void *)msk, rule); + udp_match = (struct nfp_flower_ipv6_udp_tun *)ext; + dst = &udp_match->ipv6.dst; + ext += sizeof(struct nfp_flower_ipv6_udp_tun); + msk += sizeof(struct nfp_flower_ipv6_udp_tun); + + entry = nfp_tunnel_add_ipv6_off(app, dst); + if (!entry) + return -EOPNOTSUPP; + + nfp_flow->nfp_tun_ipv6 = entry; + } else { + __be32 dst; + + nfp_flower_compile_ipv4_udp_tun((void *)ext, + (void *)msk, rule); + dst = ((struct nfp_flower_ipv4_udp_tun *)ext)->ipv4.dst; + ext += sizeof(struct nfp_flower_ipv4_udp_tun); + msk += sizeof(struct nfp_flower_ipv4_udp_tun); + + /* Store the tunnel destination in the rule data. + * This must be present and be an exact match. + */ + nfp_flow->nfp_tun_ipv4_addr = dst; + nfp_tunnel_add_ipv4_off(app, dst); + } + + if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) { + nfp_flower_compile_geneve_opt(ext, msk, rule); + } + } + + /* Check that the flow key does not exceed the maximum limit. + * All structures in the key is multiples of 4 bytes, so use u32. + */ + ext_len = (u32 *)ext - (u32 *)nfp_flow->unmasked_data; + if (ext_len > NFP_FLOWER_KEY_MAX_LW) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: flow key too long"); + return -EOPNOTSUPP; + } + + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c new file mode 100644 index 0000000000..80e4675582 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c @@ -0,0 +1,726 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#include <linux/hash.h> +#include <linux/hashtable.h> +#include <linux/jhash.h> +#include <linux/math64.h> +#include <linux/vmalloc.h> +#include <net/pkt_cls.h> + +#include "cmsg.h" +#include "conntrack.h" +#include "main.h" +#include "../nfp_app.h" + +struct nfp_mask_id_table { + struct hlist_node link; + u32 hash_key; + u32 ref_cnt; + u8 mask_id; +}; + +struct nfp_fl_flow_table_cmp_arg { + struct net_device *netdev; + unsigned long cookie; +}; + +struct nfp_fl_stats_ctx_to_flow { + struct rhash_head ht_node; + u32 stats_cxt; + struct nfp_fl_payload *flow; +}; + +static const struct rhashtable_params stats_ctx_table_params = { + .key_offset = offsetof(struct nfp_fl_stats_ctx_to_flow, stats_cxt), + .head_offset = offsetof(struct nfp_fl_stats_ctx_to_flow, ht_node), + .key_len = sizeof(u32), +}; + +static int nfp_release_stats_entry(struct nfp_app *app, u32 stats_context_id) +{ + struct nfp_flower_priv *priv = app->priv; + struct circ_buf *ring; + + ring = &priv->stats_ids.free_list; + /* Check if buffer is full, stats_ring_size must be power of 2 */ + if (!CIRC_SPACE(ring->head, ring->tail, priv->stats_ring_size)) + return -ENOBUFS; + + /* Each increment of head represents size of NFP_FL_STATS_ELEM_RS */ + memcpy(&ring->buf[ring->head * NFP_FL_STATS_ELEM_RS], + &stats_context_id, NFP_FL_STATS_ELEM_RS); + ring->head = (ring->head + 1) & (priv->stats_ring_size - 1); + + return 0; +} + +static int nfp_get_stats_entry(struct nfp_app *app, u32 *stats_context_id) +{ + struct nfp_flower_priv *priv = app->priv; + u32 freed_stats_id, temp_stats_id; + struct circ_buf *ring; + + ring = &priv->stats_ids.free_list; + freed_stats_id = priv->stats_ring_size; + /* Check for unallocated entries first. */ + if (priv->stats_ids.init_unalloc > 0) { + *stats_context_id = + FIELD_PREP(NFP_FL_STAT_ID_STAT, + priv->stats_ids.init_unalloc - 1) | + FIELD_PREP(NFP_FL_STAT_ID_MU_NUM, + priv->active_mem_unit); + + if (++priv->active_mem_unit == priv->total_mem_units) { + priv->stats_ids.init_unalloc--; + priv->active_mem_unit = 0; + } + + return 0; + } + + /* Check if buffer is empty. */ + if (ring->head == ring->tail) { + *stats_context_id = freed_stats_id; + return -ENOENT; + } + + /* Each increment of tail represents size of NFP_FL_STATS_ELEM_RS */ + memcpy(&temp_stats_id, &ring->buf[ring->tail * NFP_FL_STATS_ELEM_RS], + NFP_FL_STATS_ELEM_RS); + *stats_context_id = temp_stats_id; + memcpy(&ring->buf[ring->tail * NFP_FL_STATS_ELEM_RS], &freed_stats_id, + NFP_FL_STATS_ELEM_RS); + /* stats_ring_size must be power of 2 */ + ring->tail = (ring->tail + 1) & (priv->stats_ring_size - 1); + + return 0; +} + +/* Must be called with either RTNL or rcu_read_lock */ +struct nfp_fl_payload * +nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie, + struct net_device *netdev) +{ + struct nfp_fl_flow_table_cmp_arg flower_cmp_arg; + struct nfp_flower_priv *priv = app->priv; + + flower_cmp_arg.netdev = netdev; + flower_cmp_arg.cookie = tc_flower_cookie; + + return rhashtable_lookup_fast(&priv->flow_table, &flower_cmp_arg, + nfp_flower_table_params); +} + +void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb) +{ + unsigned int msg_len = nfp_flower_cmsg_get_data_len(skb); + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_stats_frame *stats; + unsigned char *msg; + u32 ctx_id; + int i; + + msg = nfp_flower_cmsg_get_data(skb); + + spin_lock(&priv->stats_lock); + for (i = 0; i < msg_len / sizeof(*stats); i++) { + stats = (struct nfp_fl_stats_frame *)msg + i; + ctx_id = be32_to_cpu(stats->stats_con_id); + priv->stats[ctx_id].pkts += be32_to_cpu(stats->pkt_count); + priv->stats[ctx_id].bytes += be64_to_cpu(stats->byte_count); + priv->stats[ctx_id].used = jiffies; + } + spin_unlock(&priv->stats_lock); +} + +static int nfp_release_mask_id(struct nfp_app *app, u8 mask_id) +{ + struct nfp_flower_priv *priv = app->priv; + struct circ_buf *ring; + + ring = &priv->mask_ids.mask_id_free_list; + /* Checking if buffer is full, + * NFP_FLOWER_MASK_ENTRY_RS must be power of 2 + */ + if (CIRC_SPACE(ring->head, ring->tail, NFP_FLOWER_MASK_ENTRY_RS) == 0) + return -ENOBUFS; + + /* Each increment of head represents size of + * NFP_FLOWER_MASK_ELEMENT_RS + */ + memcpy(&ring->buf[ring->head * NFP_FLOWER_MASK_ELEMENT_RS], &mask_id, + NFP_FLOWER_MASK_ELEMENT_RS); + ring->head = (ring->head + 1) & (NFP_FLOWER_MASK_ENTRY_RS - 1); + + priv->mask_ids.last_used[mask_id] = ktime_get(); + + return 0; +} + +static int nfp_mask_alloc(struct nfp_app *app, u8 *mask_id) +{ + struct nfp_flower_priv *priv = app->priv; + ktime_t reuse_timeout; + struct circ_buf *ring; + u8 temp_id, freed_id; + + ring = &priv->mask_ids.mask_id_free_list; + freed_id = NFP_FLOWER_MASK_ENTRY_RS - 1; + /* Checking for unallocated entries first. */ + if (priv->mask_ids.init_unallocated > 0) { + *mask_id = priv->mask_ids.init_unallocated; + priv->mask_ids.init_unallocated--; + return 0; + } + + /* Checking if buffer is empty. */ + if (ring->head == ring->tail) + goto err_not_found; + + /* Each increment of tail represents size of + * NFP_FLOWER_MASK_ELEMENT_RS + */ + memcpy(&temp_id, &ring->buf[ring->tail * NFP_FLOWER_MASK_ELEMENT_RS], + NFP_FLOWER_MASK_ELEMENT_RS); + *mask_id = temp_id; + + reuse_timeout = ktime_add_ns(priv->mask_ids.last_used[*mask_id], + NFP_FL_MASK_REUSE_TIME_NS); + + if (ktime_before(ktime_get(), reuse_timeout)) + goto err_not_found; + + memcpy(&ring->buf[ring->tail * NFP_FLOWER_MASK_ELEMENT_RS], &freed_id, + NFP_FLOWER_MASK_ELEMENT_RS); + /* NFP_FLOWER_MASK_ENTRY_RS must be power of 2 */ + ring->tail = (ring->tail + 1) & (NFP_FLOWER_MASK_ENTRY_RS - 1); + + return 0; + +err_not_found: + *mask_id = freed_id; + return -ENOENT; +} + +static int +nfp_add_mask_table(struct nfp_app *app, char *mask_data, u32 mask_len) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_mask_id_table *mask_entry; + unsigned long hash_key; + u8 mask_id; + + if (nfp_mask_alloc(app, &mask_id)) + return -ENOENT; + + mask_entry = kmalloc(sizeof(*mask_entry), GFP_KERNEL); + if (!mask_entry) { + nfp_release_mask_id(app, mask_id); + return -ENOMEM; + } + + INIT_HLIST_NODE(&mask_entry->link); + mask_entry->mask_id = mask_id; + hash_key = jhash(mask_data, mask_len, priv->mask_id_seed); + mask_entry->hash_key = hash_key; + mask_entry->ref_cnt = 1; + hash_add(priv->mask_table, &mask_entry->link, hash_key); + + return mask_id; +} + +static struct nfp_mask_id_table * +nfp_search_mask_table(struct nfp_app *app, char *mask_data, u32 mask_len) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_mask_id_table *mask_entry; + unsigned long hash_key; + + hash_key = jhash(mask_data, mask_len, priv->mask_id_seed); + + hash_for_each_possible(priv->mask_table, mask_entry, link, hash_key) + if (mask_entry->hash_key == hash_key) + return mask_entry; + + return NULL; +} + +static int +nfp_find_in_mask_table(struct nfp_app *app, char *mask_data, u32 mask_len) +{ + struct nfp_mask_id_table *mask_entry; + + mask_entry = nfp_search_mask_table(app, mask_data, mask_len); + if (!mask_entry) + return -ENOENT; + + mask_entry->ref_cnt++; + + /* Casting u8 to int for later use. */ + return mask_entry->mask_id; +} + +static bool +nfp_check_mask_add(struct nfp_app *app, char *mask_data, u32 mask_len, + u8 *meta_flags, u8 *mask_id) +{ + int id; + + id = nfp_find_in_mask_table(app, mask_data, mask_len); + if (id < 0) { + id = nfp_add_mask_table(app, mask_data, mask_len); + if (id < 0) + return false; + *meta_flags |= NFP_FL_META_FLAG_MANAGE_MASK; + } + *mask_id = id; + + return true; +} + +static bool +nfp_check_mask_remove(struct nfp_app *app, char *mask_data, u32 mask_len, + u8 *meta_flags, u8 *mask_id) +{ + struct nfp_mask_id_table *mask_entry; + + mask_entry = nfp_search_mask_table(app, mask_data, mask_len); + if (!mask_entry) + return false; + + *mask_id = mask_entry->mask_id; + mask_entry->ref_cnt--; + if (!mask_entry->ref_cnt) { + hash_del(&mask_entry->link); + nfp_release_mask_id(app, *mask_id); + kfree(mask_entry); + if (meta_flags) + *meta_flags |= NFP_FL_META_FLAG_MANAGE_MASK; + } + + return true; +} + +int nfp_compile_flow_metadata(struct nfp_app *app, u32 cookie, + struct nfp_fl_payload *nfp_flow, + struct net_device *netdev, + struct netlink_ext_ack *extack) +{ + struct nfp_fl_stats_ctx_to_flow *ctx_entry; + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_payload *check_entry; + u8 new_mask_id; + u32 stats_cxt; + int err; + + err = nfp_get_stats_entry(app, &stats_cxt); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot allocate new stats context"); + return err; + } + + nfp_flow->meta.host_ctx_id = cpu_to_be32(stats_cxt); + nfp_flow->meta.host_cookie = cpu_to_be64(cookie); + nfp_flow->ingress_dev = netdev; + + ctx_entry = kzalloc(sizeof(*ctx_entry), GFP_KERNEL); + if (!ctx_entry) { + err = -ENOMEM; + goto err_release_stats; + } + + ctx_entry->stats_cxt = stats_cxt; + ctx_entry->flow = nfp_flow; + + if (rhashtable_insert_fast(&priv->stats_ctx_table, &ctx_entry->ht_node, + stats_ctx_table_params)) { + err = -ENOMEM; + goto err_free_ctx_entry; + } + + /* Do not allocate a mask-id for pre_tun_rules. These flows are used to + * configure the pre_tun table and are never actually send to the + * firmware as an add-flow message. This causes the mask-id allocation + * on the firmware to get out of sync if allocated here. + */ + new_mask_id = 0; + if (!nfp_flow->pre_tun_rule.dev && + !nfp_check_mask_add(app, nfp_flow->mask_data, + nfp_flow->meta.mask_len, + &nfp_flow->meta.flags, &new_mask_id)) { + NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot allocate a new mask id"); + err = -ENOENT; + goto err_remove_rhash; + } + + nfp_flow->meta.flow_version = cpu_to_be64(priv->flower_version); + priv->flower_version++; + + /* Update flow payload with mask ids. */ + nfp_flow->unmasked_data[NFP_FL_MASK_ID_LOCATION] = new_mask_id; + priv->stats[stats_cxt].pkts = 0; + priv->stats[stats_cxt].bytes = 0; + priv->stats[stats_cxt].used = jiffies; + + check_entry = nfp_flower_search_fl_table(app, cookie, netdev); + if (check_entry) { + NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot offload duplicate flow entry"); + err = -EEXIST; + goto err_remove_mask; + } + + return 0; + +err_remove_mask: + if (!nfp_flow->pre_tun_rule.dev) + nfp_check_mask_remove(app, nfp_flow->mask_data, + nfp_flow->meta.mask_len, + NULL, &new_mask_id); +err_remove_rhash: + WARN_ON_ONCE(rhashtable_remove_fast(&priv->stats_ctx_table, + &ctx_entry->ht_node, + stats_ctx_table_params)); +err_free_ctx_entry: + kfree(ctx_entry); +err_release_stats: + nfp_release_stats_entry(app, stats_cxt); + + return err; +} + +void __nfp_modify_flow_metadata(struct nfp_flower_priv *priv, + struct nfp_fl_payload *nfp_flow) +{ + nfp_flow->meta.flags &= ~NFP_FL_META_FLAG_MANAGE_MASK; + nfp_flow->meta.flow_version = cpu_to_be64(priv->flower_version); + priv->flower_version++; +} + +int nfp_modify_flow_metadata(struct nfp_app *app, + struct nfp_fl_payload *nfp_flow) +{ + struct nfp_fl_stats_ctx_to_flow *ctx_entry; + struct nfp_flower_priv *priv = app->priv; + u8 new_mask_id = 0; + u32 temp_ctx_id; + + __nfp_modify_flow_metadata(priv, nfp_flow); + + if (!nfp_flow->pre_tun_rule.dev) + nfp_check_mask_remove(app, nfp_flow->mask_data, + nfp_flow->meta.mask_len, &nfp_flow->meta.flags, + &new_mask_id); + + /* Update flow payload with mask ids. */ + nfp_flow->unmasked_data[NFP_FL_MASK_ID_LOCATION] = new_mask_id; + + /* Release the stats ctx id and ctx to flow table entry. */ + temp_ctx_id = be32_to_cpu(nfp_flow->meta.host_ctx_id); + + ctx_entry = rhashtable_lookup_fast(&priv->stats_ctx_table, &temp_ctx_id, + stats_ctx_table_params); + if (!ctx_entry) + return -ENOENT; + + WARN_ON_ONCE(rhashtable_remove_fast(&priv->stats_ctx_table, + &ctx_entry->ht_node, + stats_ctx_table_params)); + kfree(ctx_entry); + + return nfp_release_stats_entry(app, temp_ctx_id); +} + +struct nfp_fl_payload * +nfp_flower_get_fl_payload_from_ctx(struct nfp_app *app, u32 ctx_id) +{ + struct nfp_fl_stats_ctx_to_flow *ctx_entry; + struct nfp_flower_priv *priv = app->priv; + + ctx_entry = rhashtable_lookup_fast(&priv->stats_ctx_table, &ctx_id, + stats_ctx_table_params); + if (!ctx_entry) + return NULL; + + return ctx_entry->flow; +} + +static int nfp_fl_obj_cmpfn(struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct nfp_fl_flow_table_cmp_arg *cmp_arg = arg->key; + const struct nfp_fl_payload *flow_entry = obj; + + if (flow_entry->ingress_dev == cmp_arg->netdev) + return flow_entry->tc_flower_cookie != cmp_arg->cookie; + + return 1; +} + +static u32 nfp_fl_obj_hashfn(const void *data, u32 len, u32 seed) +{ + const struct nfp_fl_payload *flower_entry = data; + + return jhash2((u32 *)&flower_entry->tc_flower_cookie, + sizeof(flower_entry->tc_flower_cookie) / sizeof(u32), + seed); +} + +static u32 nfp_fl_key_hashfn(const void *data, u32 len, u32 seed) +{ + const struct nfp_fl_flow_table_cmp_arg *cmp_arg = data; + + return jhash2((u32 *)&cmp_arg->cookie, + sizeof(cmp_arg->cookie) / sizeof(u32), seed); +} + +const struct rhashtable_params nfp_flower_table_params = { + .head_offset = offsetof(struct nfp_fl_payload, fl_node), + .hashfn = nfp_fl_key_hashfn, + .obj_cmpfn = nfp_fl_obj_cmpfn, + .obj_hashfn = nfp_fl_obj_hashfn, + .automatic_shrinking = true, +}; + +const struct rhashtable_params merge_table_params = { + .key_offset = offsetof(struct nfp_merge_info, parent_ctx), + .head_offset = offsetof(struct nfp_merge_info, ht_node), + .key_len = sizeof(u64), +}; + +const struct rhashtable_params nfp_zone_table_params = { + .head_offset = offsetof(struct nfp_fl_ct_zone_entry, hash_node), + .key_len = sizeof(u16), + .key_offset = offsetof(struct nfp_fl_ct_zone_entry, zone), + .automatic_shrinking = false, +}; + +const struct rhashtable_params nfp_ct_map_params = { + .head_offset = offsetof(struct nfp_fl_ct_map_entry, hash_node), + .key_len = sizeof(unsigned long), + .key_offset = offsetof(struct nfp_fl_ct_map_entry, cookie), + .automatic_shrinking = true, +}; + +const struct rhashtable_params neigh_table_params = { + .key_offset = offsetof(struct nfp_neigh_entry, neigh_cookie), + .head_offset = offsetof(struct nfp_neigh_entry, ht_node), + .key_len = sizeof(unsigned long), +}; + +int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count, + unsigned int host_num_mems) +{ + struct nfp_flower_priv *priv = app->priv; + int err, stats_size; + + hash_init(priv->mask_table); + + err = rhashtable_init(&priv->flow_table, &nfp_flower_table_params); + if (err) + return err; + + err = rhashtable_init(&priv->stats_ctx_table, &stats_ctx_table_params); + if (err) + goto err_free_flow_table; + + err = rhashtable_init(&priv->merge_table, &merge_table_params); + if (err) + goto err_free_stats_ctx_table; + + mutex_init(&priv->nfp_fl_lock); + + err = rhashtable_init(&priv->ct_zone_table, &nfp_zone_table_params); + if (err) + goto err_free_merge_table; + + err = rhashtable_init(&priv->ct_map_table, &nfp_ct_map_params); + if (err) + goto err_free_ct_zone_table; + + err = rhashtable_init(&priv->neigh_table, &neigh_table_params); + if (err) + goto err_free_ct_map_table; + + INIT_LIST_HEAD(&priv->predt_list); + + get_random_bytes(&priv->mask_id_seed, sizeof(priv->mask_id_seed)); + + /* Init ring buffer and unallocated mask_ids. */ + priv->mask_ids.mask_id_free_list.buf = + kmalloc_array(NFP_FLOWER_MASK_ENTRY_RS, + NFP_FLOWER_MASK_ELEMENT_RS, GFP_KERNEL); + if (!priv->mask_ids.mask_id_free_list.buf) + goto err_free_neigh_table; + + priv->mask_ids.init_unallocated = NFP_FLOWER_MASK_ENTRY_RS - 1; + + /* Init timestamps for mask id*/ + priv->mask_ids.last_used = + kmalloc_array(NFP_FLOWER_MASK_ENTRY_RS, + sizeof(*priv->mask_ids.last_used), GFP_KERNEL); + if (!priv->mask_ids.last_used) + goto err_free_mask_id; + + /* Init ring buffer and unallocated stats_ids. */ + priv->stats_ids.free_list.buf = + vmalloc(array_size(NFP_FL_STATS_ELEM_RS, + priv->stats_ring_size)); + if (!priv->stats_ids.free_list.buf) + goto err_free_last_used; + + priv->stats_ids.init_unalloc = div_u64(host_ctx_count, host_num_mems); + + stats_size = FIELD_PREP(NFP_FL_STAT_ID_STAT, host_ctx_count) | + FIELD_PREP(NFP_FL_STAT_ID_MU_NUM, host_num_mems - 1); + priv->stats = kvmalloc_array(stats_size, sizeof(struct nfp_fl_stats), + GFP_KERNEL); + if (!priv->stats) + goto err_free_ring_buf; + + spin_lock_init(&priv->stats_lock); + spin_lock_init(&priv->predt_lock); + + return 0; + +err_free_ring_buf: + vfree(priv->stats_ids.free_list.buf); +err_free_last_used: + kfree(priv->mask_ids.last_used); +err_free_mask_id: + kfree(priv->mask_ids.mask_id_free_list.buf); +err_free_neigh_table: + rhashtable_destroy(&priv->neigh_table); +err_free_ct_map_table: + rhashtable_destroy(&priv->ct_map_table); +err_free_ct_zone_table: + rhashtable_destroy(&priv->ct_zone_table); +err_free_merge_table: + rhashtable_destroy(&priv->merge_table); +err_free_stats_ctx_table: + rhashtable_destroy(&priv->stats_ctx_table); +err_free_flow_table: + rhashtable_destroy(&priv->flow_table); + return -ENOMEM; +} + +static void nfp_zone_table_entry_destroy(struct nfp_fl_ct_zone_entry *zt) +{ + if (!zt) + return; + + if (!list_empty(&zt->pre_ct_list)) { + struct rhashtable *m_table = &zt->priv->ct_map_table; + struct nfp_fl_ct_flow_entry *entry, *tmp; + struct nfp_fl_ct_map_entry *map; + + WARN_ONCE(1, "pre_ct_list not empty as expected, cleaning up\n"); + list_for_each_entry_safe(entry, tmp, &zt->pre_ct_list, + list_node) { + map = rhashtable_lookup_fast(m_table, + &entry->cookie, + nfp_ct_map_params); + WARN_ON_ONCE(rhashtable_remove_fast(m_table, + &map->hash_node, + nfp_ct_map_params)); + nfp_fl_ct_clean_flow_entry(entry); + kfree(map); + } + } + + if (!list_empty(&zt->post_ct_list)) { + struct rhashtable *m_table = &zt->priv->ct_map_table; + struct nfp_fl_ct_flow_entry *entry, *tmp; + struct nfp_fl_ct_map_entry *map; + + WARN_ONCE(1, "post_ct_list not empty as expected, cleaning up\n"); + list_for_each_entry_safe(entry, tmp, &zt->post_ct_list, + list_node) { + map = rhashtable_lookup_fast(m_table, + &entry->cookie, + nfp_ct_map_params); + WARN_ON_ONCE(rhashtable_remove_fast(m_table, + &map->hash_node, + nfp_ct_map_params)); + nfp_fl_ct_clean_flow_entry(entry); + kfree(map); + } + } + + if (zt->nft) { + nf_flow_table_offload_del_cb(zt->nft, + nfp_fl_ct_handle_nft_flow, + zt); + zt->nft = NULL; + } + + if (!list_empty(&zt->nft_flows_list)) { + struct rhashtable *m_table = &zt->priv->ct_map_table; + struct nfp_fl_ct_flow_entry *entry, *tmp; + struct nfp_fl_ct_map_entry *map; + + WARN_ONCE(1, "nft_flows_list not empty as expected, cleaning up\n"); + list_for_each_entry_safe(entry, tmp, &zt->nft_flows_list, + list_node) { + map = rhashtable_lookup_fast(m_table, + &entry->cookie, + nfp_ct_map_params); + WARN_ON_ONCE(rhashtable_remove_fast(m_table, + &map->hash_node, + nfp_ct_map_params)); + nfp_fl_ct_clean_flow_entry(entry); + kfree(map); + } + } + + rhashtable_free_and_destroy(&zt->tc_merge_tb, + nfp_check_rhashtable_empty, NULL); + rhashtable_free_and_destroy(&zt->nft_merge_tb, + nfp_check_rhashtable_empty, NULL); + + kfree(zt); +} + +static void nfp_free_zone_table_entry(void *ptr, void *arg) +{ + struct nfp_fl_ct_zone_entry *zt = ptr; + + nfp_zone_table_entry_destroy(zt); +} + +static void nfp_free_map_table_entry(void *ptr, void *arg) +{ + struct nfp_fl_ct_map_entry *map = ptr; + + if (!map) + return; + + kfree(map); +} + +void nfp_flower_metadata_cleanup(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + + if (!priv) + return; + + rhashtable_free_and_destroy(&priv->flow_table, + nfp_check_rhashtable_empty, NULL); + rhashtable_free_and_destroy(&priv->stats_ctx_table, + nfp_check_rhashtable_empty, NULL); + rhashtable_free_and_destroy(&priv->merge_table, + nfp_check_rhashtable_empty, NULL); + rhashtable_free_and_destroy(&priv->ct_zone_table, + nfp_free_zone_table_entry, NULL); + nfp_zone_table_entry_destroy(priv->ct_zone_wc); + + rhashtable_free_and_destroy(&priv->ct_map_table, + nfp_free_map_table_entry, NULL); + rhashtable_free_and_destroy(&priv->neigh_table, + nfp_check_rhashtable_empty, NULL); + kvfree(priv->stats); + kfree(priv->mask_ids.mask_id_free_list.buf); + kfree(priv->mask_ids.last_used); + vfree(priv->stats_ids.free_list.buf); +} diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c new file mode 100644 index 0000000000..0aceef9fe5 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -0,0 +1,1974 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#include <linux/skbuff.h> +#include <net/devlink.h> +#include <net/pkt_cls.h> + +#include "cmsg.h" +#include "main.h" +#include "conntrack.h" +#include "../nfpcore/nfp_cpp.h" +#include "../nfpcore/nfp_nsp.h" +#include "../nfp_app.h" +#include "../nfp_main.h" +#include "../nfp_net.h" +#include "../nfp_port.h" + +#define NFP_FLOWER_SUPPORTED_TCPFLAGS \ + (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST | \ + TCPHDR_PSH | TCPHDR_URG) + +#define NFP_FLOWER_SUPPORTED_CTLFLAGS \ + (FLOW_DIS_IS_FRAGMENT | \ + FLOW_DIS_FIRST_FRAG) + +#define NFP_FLOWER_WHITELIST_DISSECTOR \ + (BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_TCP) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_CVLAN) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_OPTS) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_MPLS) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_CT) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_META) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_IP)) + +#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR \ + (BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_OPTS) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP)) + +#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R \ + (BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) + +#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_V6_R \ + (BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) + +#define NFP_FLOWER_MERGE_FIELDS \ + (NFP_FLOWER_LAYER_PORT | \ + NFP_FLOWER_LAYER_MAC | \ + NFP_FLOWER_LAYER_TP | \ + NFP_FLOWER_LAYER_IPV4 | \ + NFP_FLOWER_LAYER_IPV6) + +#define NFP_FLOWER_PRE_TUN_RULE_FIELDS \ + (NFP_FLOWER_LAYER_EXT_META | \ + NFP_FLOWER_LAYER_PORT | \ + NFP_FLOWER_LAYER_MAC | \ + NFP_FLOWER_LAYER_IPV4 | \ + NFP_FLOWER_LAYER_IPV6) + +struct nfp_flower_merge_check { + union { + struct { + __be16 tci; + struct nfp_flower_mac_mpls l2; + struct nfp_flower_tp_ports l4; + union { + struct nfp_flower_ipv4 ipv4; + struct nfp_flower_ipv6 ipv6; + }; + }; + unsigned long vals[8]; + }; +}; + +int +nfp_flower_xmit_flow(struct nfp_app *app, struct nfp_fl_payload *nfp_flow, + u8 mtype) +{ + u32 meta_len, key_len, mask_len, act_len, tot_len; + struct sk_buff *skb; + unsigned char *msg; + + meta_len = sizeof(struct nfp_fl_rule_metadata); + key_len = nfp_flow->meta.key_len; + mask_len = nfp_flow->meta.mask_len; + act_len = nfp_flow->meta.act_len; + + tot_len = meta_len + key_len + mask_len + act_len; + + /* Convert to long words as firmware expects + * lengths in units of NFP_FL_LW_SIZ. + */ + nfp_flow->meta.key_len >>= NFP_FL_LW_SIZ; + nfp_flow->meta.mask_len >>= NFP_FL_LW_SIZ; + nfp_flow->meta.act_len >>= NFP_FL_LW_SIZ; + + skb = nfp_flower_cmsg_alloc(app, tot_len, mtype, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + msg = nfp_flower_cmsg_get_data(skb); + memcpy(msg, &nfp_flow->meta, meta_len); + memcpy(&msg[meta_len], nfp_flow->unmasked_data, key_len); + memcpy(&msg[meta_len + key_len], nfp_flow->mask_data, mask_len); + memcpy(&msg[meta_len + key_len + mask_len], + nfp_flow->action_data, act_len); + + /* Convert back to bytes as software expects + * lengths in units of bytes. + */ + nfp_flow->meta.key_len <<= NFP_FL_LW_SIZ; + nfp_flow->meta.mask_len <<= NFP_FL_LW_SIZ; + nfp_flow->meta.act_len <<= NFP_FL_LW_SIZ; + + nfp_ctrl_tx(app->ctrl, skb); + + return 0; +} + +static bool nfp_flower_check_higher_than_mac(struct flow_rule *rule) +{ + return flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP); +} + +static bool nfp_flower_check_higher_than_l3(struct flow_rule *rule) +{ + return flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP); +} + +static int +nfp_flower_calc_opt_layer(struct flow_dissector_key_enc_opts *enc_opts, + u32 *key_layer_two, int *key_size, bool ipv6, + struct netlink_ext_ack *extack) +{ + if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY || + (ipv6 && enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY_V6)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: geneve options exceed maximum length"); + return -EOPNOTSUPP; + } + + if (enc_opts->len > 0) { + *key_layer_two |= NFP_FLOWER_LAYER2_GENEVE_OP; + *key_size += sizeof(struct nfp_flower_geneve_options); + } + + return 0; +} + +static int +nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports, + struct flow_dissector_key_enc_opts *enc_op, + u32 *key_layer_two, u8 *key_layer, int *key_size, + struct nfp_flower_priv *priv, + enum nfp_flower_tun_type *tun_type, bool ipv6, + struct netlink_ext_ack *extack) +{ + int err; + + switch (enc_ports->dst) { + case htons(IANA_VXLAN_UDP_PORT): + *tun_type = NFP_FL_TUNNEL_VXLAN; + *key_layer |= NFP_FLOWER_LAYER_VXLAN; + + if (ipv6) { + *key_layer |= NFP_FLOWER_LAYER_EXT_META; + *key_size += sizeof(struct nfp_flower_ext_meta); + *key_layer_two |= NFP_FLOWER_LAYER2_TUN_IPV6; + *key_size += sizeof(struct nfp_flower_ipv6_udp_tun); + } else { + *key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + } + + if (enc_op) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on vxlan tunnels"); + return -EOPNOTSUPP; + } + break; + case htons(GENEVE_UDP_PORT): + if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support geneve offload"); + return -EOPNOTSUPP; + } + *tun_type = NFP_FL_TUNNEL_GENEVE; + *key_layer |= NFP_FLOWER_LAYER_EXT_META; + *key_size += sizeof(struct nfp_flower_ext_meta); + *key_layer_two |= NFP_FLOWER_LAYER2_GENEVE; + + if (ipv6) { + *key_layer_two |= NFP_FLOWER_LAYER2_TUN_IPV6; + *key_size += sizeof(struct nfp_flower_ipv6_udp_tun); + } else { + *key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + } + + if (!enc_op) + break; + if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support geneve option offload"); + return -EOPNOTSUPP; + } + err = nfp_flower_calc_opt_layer(enc_op, key_layer_two, key_size, + ipv6, extack); + if (err) + return err; + break; + default: + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: tunnel type unknown"); + return -EOPNOTSUPP; + } + + return 0; +} + +int +nfp_flower_calculate_key_layers(struct nfp_app *app, + struct net_device *netdev, + struct nfp_fl_key_ls *ret_key_ls, + struct flow_rule *rule, + enum nfp_flower_tun_type *tun_type, + struct netlink_ext_ack *extack) +{ + struct flow_dissector *dissector = rule->match.dissector; + struct flow_match_basic basic = { NULL, NULL}; + struct nfp_flower_priv *priv = app->priv; + u32 key_layer_two; + u8 key_layer; + int key_size; + int err; + + if (dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match not supported"); + return -EOPNOTSUPP; + } + + /* If any tun dissector is used then the required set must be used. */ + if (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR && + (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_V6_R) + != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_V6_R && + (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) + != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: tunnel match not supported"); + return -EOPNOTSUPP; + } + + key_layer_two = 0; + key_layer = NFP_FLOWER_LAYER_PORT; + key_size = sizeof(struct nfp_flower_meta_tci) + + sizeof(struct nfp_flower_in_port); + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS)) { + key_layer |= NFP_FLOWER_LAYER_MAC; + key_size += sizeof(struct nfp_flower_mac_mpls); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan vlan; + + flow_rule_match_vlan(rule, &vlan); + if (!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_PCP) && + vlan.key->vlan_priority) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support VLAN PCP offload"); + return -EOPNOTSUPP; + } + if (priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ && + !(key_layer_two & NFP_FLOWER_LAYER2_QINQ)) { + key_layer |= NFP_FLOWER_LAYER_EXT_META; + key_size += sizeof(struct nfp_flower_ext_meta); + key_size += sizeof(struct nfp_flower_vlan); + key_layer_two |= NFP_FLOWER_LAYER2_QINQ; + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { + struct flow_match_vlan cvlan; + + if (!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support VLAN QinQ offload"); + return -EOPNOTSUPP; + } + + flow_rule_match_vlan(rule, &cvlan); + if (!(key_layer_two & NFP_FLOWER_LAYER2_QINQ)) { + key_layer |= NFP_FLOWER_LAYER_EXT_META; + key_size += sizeof(struct nfp_flower_ext_meta); + key_size += sizeof(struct nfp_flower_vlan); + key_layer_two |= NFP_FLOWER_LAYER2_QINQ; + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_match_enc_opts enc_op = { NULL, NULL }; + struct flow_match_ipv4_addrs ipv4_addrs; + struct flow_match_ipv6_addrs ipv6_addrs; + struct flow_match_control enc_ctl; + struct flow_match_ports enc_ports; + bool ipv6_tun = false; + + flow_rule_match_enc_control(rule, &enc_ctl); + + if (enc_ctl.mask->addr_type != 0xffff) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: wildcarded protocols on tunnels are not supported"); + return -EOPNOTSUPP; + } + + ipv6_tun = enc_ctl.key->addr_type == + FLOW_DISSECTOR_KEY_IPV6_ADDRS; + if (ipv6_tun && + !(priv->flower_ext_feats & NFP_FL_FEATS_IPV6_TUN)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: firmware does not support IPv6 tunnels"); + return -EOPNOTSUPP; + } + + if (!ipv6_tun && + enc_ctl.key->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: tunnel address type not IPv4 or IPv6"); + return -EOPNOTSUPP; + } + + if (ipv6_tun) { + flow_rule_match_enc_ipv6_addrs(rule, &ipv6_addrs); + if (memchr_inv(&ipv6_addrs.mask->dst, 0xff, + sizeof(ipv6_addrs.mask->dst))) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match IPv6 destination address is supported"); + return -EOPNOTSUPP; + } + } else { + flow_rule_match_enc_ipv4_addrs(rule, &ipv4_addrs); + if (ipv4_addrs.mask->dst != cpu_to_be32(~0)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match IPv4 destination address is supported"); + return -EOPNOTSUPP; + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) + flow_rule_match_enc_opts(rule, &enc_op); + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) { + /* Check if GRE, which has no enc_ports */ + if (!netif_is_gretap(netdev) && !netif_is_ip6gretap(netdev)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: an exact match on L4 destination port is required for non-GRE tunnels"); + return -EOPNOTSUPP; + } + + *tun_type = NFP_FL_TUNNEL_GRE; + key_layer |= NFP_FLOWER_LAYER_EXT_META; + key_size += sizeof(struct nfp_flower_ext_meta); + key_layer_two |= NFP_FLOWER_LAYER2_GRE; + + if (ipv6_tun) { + key_layer_two |= NFP_FLOWER_LAYER2_TUN_IPV6; + key_size += + sizeof(struct nfp_flower_ipv6_gre_tun); + } else { + key_size += + sizeof(struct nfp_flower_ipv4_gre_tun); + } + + if (enc_op.key) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on GRE tunnels"); + return -EOPNOTSUPP; + } + } else { + flow_rule_match_enc_ports(rule, &enc_ports); + if (enc_ports.mask->dst != cpu_to_be16(~0)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match L4 destination port is supported"); + return -EOPNOTSUPP; + } + + err = nfp_flower_calc_udp_tun_layer(enc_ports.key, + enc_op.key, + &key_layer_two, + &key_layer, + &key_size, priv, + tun_type, ipv6_tun, + extack); + if (err) + return err; + + /* Ensure the ingress netdev matches the expected + * tun type. + */ + if (!nfp_fl_netdev_is_tunnel_type(netdev, *tun_type)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: ingress netdev does not match the expected tunnel type"); + return -EOPNOTSUPP; + } + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) + flow_rule_match_basic(rule, &basic); + + if (basic.mask && basic.mask->n_proto) { + /* Ethernet type is present in the key. */ + switch (basic.key->n_proto) { + case cpu_to_be16(ETH_P_IP): + key_layer |= NFP_FLOWER_LAYER_IPV4; + key_size += sizeof(struct nfp_flower_ipv4); + break; + + case cpu_to_be16(ETH_P_IPV6): + key_layer |= NFP_FLOWER_LAYER_IPV6; + key_size += sizeof(struct nfp_flower_ipv6); + break; + + /* Currently we do not offload ARP + * because we rely on it to get to the host. + */ + case cpu_to_be16(ETH_P_ARP): + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: ARP not supported"); + return -EOPNOTSUPP; + + case cpu_to_be16(ETH_P_MPLS_UC): + case cpu_to_be16(ETH_P_MPLS_MC): + if (!(key_layer & NFP_FLOWER_LAYER_MAC)) { + key_layer |= NFP_FLOWER_LAYER_MAC; + key_size += sizeof(struct nfp_flower_mac_mpls); + } + break; + + /* Will be included in layer 2. */ + case cpu_to_be16(ETH_P_8021Q): + break; + + default: + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match on given EtherType is not supported"); + return -EOPNOTSUPP; + } + } else if (nfp_flower_check_higher_than_mac(rule)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot match above L2 without specified EtherType"); + return -EOPNOTSUPP; + } + + if (basic.mask && basic.mask->ip_proto) { + switch (basic.key->ip_proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_SCTP: + case IPPROTO_ICMP: + case IPPROTO_ICMPV6: + key_layer |= NFP_FLOWER_LAYER_TP; + key_size += sizeof(struct nfp_flower_tp_ports); + break; + } + } + + if (!(key_layer & NFP_FLOWER_LAYER_TP) && + nfp_flower_check_higher_than_l3(rule)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot match on L4 information without specified IP protocol type"); + return -EOPNOTSUPP; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { + struct flow_match_tcp tcp; + u32 tcp_flags; + + flow_rule_match_tcp(rule, &tcp); + tcp_flags = be16_to_cpu(tcp.key->flags); + + if (tcp_flags & ~NFP_FLOWER_SUPPORTED_TCPFLAGS) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: no match support for selected TCP flags"); + return -EOPNOTSUPP; + } + + /* We only support PSH and URG flags when either + * FIN, SYN or RST is present as well. + */ + if ((tcp_flags & (TCPHDR_PSH | TCPHDR_URG)) && + !(tcp_flags & (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST))) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: PSH and URG is only supported when used with FIN, SYN or RST"); + return -EOPNOTSUPP; + } + + /* We need to store TCP flags in the either the IPv4 or IPv6 key + * space, thus we need to ensure we include a IPv4/IPv6 key + * layer if we have not done so already. + */ + if (!basic.key) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match on TCP flags requires a match on L3 protocol"); + return -EOPNOTSUPP; + } + + if (!(key_layer & NFP_FLOWER_LAYER_IPV4) && + !(key_layer & NFP_FLOWER_LAYER_IPV6)) { + switch (basic.key->n_proto) { + case cpu_to_be16(ETH_P_IP): + key_layer |= NFP_FLOWER_LAYER_IPV4; + key_size += sizeof(struct nfp_flower_ipv4); + break; + + case cpu_to_be16(ETH_P_IPV6): + key_layer |= NFP_FLOWER_LAYER_IPV6; + key_size += sizeof(struct nfp_flower_ipv6); + break; + + default: + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match on TCP flags requires a match on IPv4/IPv6"); + return -EOPNOTSUPP; + } + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control ctl; + + flow_rule_match_control(rule, &ctl); + if (ctl.key->flags & ~NFP_FLOWER_SUPPORTED_CTLFLAGS) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match on unknown control flag"); + return -EOPNOTSUPP; + } + } + + ret_key_ls->key_layer = key_layer; + ret_key_ls->key_layer_two = key_layer_two; + ret_key_ls->key_size = key_size; + + return 0; +} + +struct nfp_fl_payload * +nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer) +{ + struct nfp_fl_payload *flow_pay; + + flow_pay = kmalloc(sizeof(*flow_pay), GFP_KERNEL); + if (!flow_pay) + return NULL; + + flow_pay->meta.key_len = key_layer->key_size; + flow_pay->unmasked_data = kmalloc(key_layer->key_size, GFP_KERNEL); + if (!flow_pay->unmasked_data) + goto err_free_flow; + + flow_pay->meta.mask_len = key_layer->key_size; + flow_pay->mask_data = kmalloc(key_layer->key_size, GFP_KERNEL); + if (!flow_pay->mask_data) + goto err_free_unmasked; + + flow_pay->action_data = kmalloc(NFP_FL_MAX_A_SIZ, GFP_KERNEL); + if (!flow_pay->action_data) + goto err_free_mask; + + flow_pay->nfp_tun_ipv4_addr = 0; + flow_pay->nfp_tun_ipv6 = NULL; + flow_pay->meta.flags = 0; + INIT_LIST_HEAD(&flow_pay->linked_flows); + flow_pay->in_hw = false; + flow_pay->pre_tun_rule.dev = NULL; + + return flow_pay; + +err_free_mask: + kfree(flow_pay->mask_data); +err_free_unmasked: + kfree(flow_pay->unmasked_data); +err_free_flow: + kfree(flow_pay); + return NULL; +} + +static int +nfp_flower_update_merge_with_actions(struct nfp_fl_payload *flow, + struct nfp_flower_merge_check *merge, + u8 *last_act_id, int *act_out) +{ + struct nfp_fl_set_ipv6_tc_hl_fl *ipv6_tc_hl_fl; + struct nfp_fl_set_ip4_ttl_tos *ipv4_ttl_tos; + struct nfp_fl_set_ip4_addrs *ipv4_add; + struct nfp_fl_set_ipv6_addr *ipv6_add; + struct nfp_fl_push_vlan *push_vlan; + struct nfp_fl_pre_tunnel *pre_tun; + struct nfp_fl_set_tport *tport; + struct nfp_fl_set_eth *eth; + struct nfp_fl_act_head *a; + unsigned int act_off = 0; + bool ipv6_tun = false; + u8 act_id = 0; + u8 *ports; + int i; + + while (act_off < flow->meta.act_len) { + a = (struct nfp_fl_act_head *)&flow->action_data[act_off]; + act_id = a->jump_id; + + switch (act_id) { + case NFP_FL_ACTION_OPCODE_OUTPUT: + if (act_out) + (*act_out)++; + break; + case NFP_FL_ACTION_OPCODE_PUSH_VLAN: + push_vlan = (struct nfp_fl_push_vlan *)a; + if (push_vlan->vlan_tci) + merge->tci = cpu_to_be16(0xffff); + break; + case NFP_FL_ACTION_OPCODE_POP_VLAN: + merge->tci = cpu_to_be16(0); + break; + case NFP_FL_ACTION_OPCODE_SET_TUNNEL: + /* New tunnel header means l2 to l4 can be matched. */ + eth_broadcast_addr(&merge->l2.mac_dst[0]); + eth_broadcast_addr(&merge->l2.mac_src[0]); + memset(&merge->l4, 0xff, + sizeof(struct nfp_flower_tp_ports)); + if (ipv6_tun) + memset(&merge->ipv6, 0xff, + sizeof(struct nfp_flower_ipv6)); + else + memset(&merge->ipv4, 0xff, + sizeof(struct nfp_flower_ipv4)); + break; + case NFP_FL_ACTION_OPCODE_SET_ETHERNET: + eth = (struct nfp_fl_set_eth *)a; + for (i = 0; i < ETH_ALEN; i++) + merge->l2.mac_dst[i] |= eth->eth_addr_mask[i]; + for (i = 0; i < ETH_ALEN; i++) + merge->l2.mac_src[i] |= + eth->eth_addr_mask[ETH_ALEN + i]; + break; + case NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS: + ipv4_add = (struct nfp_fl_set_ip4_addrs *)a; + merge->ipv4.ipv4_src |= ipv4_add->ipv4_src_mask; + merge->ipv4.ipv4_dst |= ipv4_add->ipv4_dst_mask; + break; + case NFP_FL_ACTION_OPCODE_SET_IPV4_TTL_TOS: + ipv4_ttl_tos = (struct nfp_fl_set_ip4_ttl_tos *)a; + merge->ipv4.ip_ext.ttl |= ipv4_ttl_tos->ipv4_ttl_mask; + merge->ipv4.ip_ext.tos |= ipv4_ttl_tos->ipv4_tos_mask; + break; + case NFP_FL_ACTION_OPCODE_SET_IPV6_SRC: + ipv6_add = (struct nfp_fl_set_ipv6_addr *)a; + for (i = 0; i < 4; i++) + merge->ipv6.ipv6_src.in6_u.u6_addr32[i] |= + ipv6_add->ipv6[i].mask; + break; + case NFP_FL_ACTION_OPCODE_SET_IPV6_DST: + ipv6_add = (struct nfp_fl_set_ipv6_addr *)a; + for (i = 0; i < 4; i++) + merge->ipv6.ipv6_dst.in6_u.u6_addr32[i] |= + ipv6_add->ipv6[i].mask; + break; + case NFP_FL_ACTION_OPCODE_SET_IPV6_TC_HL_FL: + ipv6_tc_hl_fl = (struct nfp_fl_set_ipv6_tc_hl_fl *)a; + merge->ipv6.ip_ext.ttl |= + ipv6_tc_hl_fl->ipv6_hop_limit_mask; + merge->ipv6.ip_ext.tos |= ipv6_tc_hl_fl->ipv6_tc_mask; + merge->ipv6.ipv6_flow_label_exthdr |= + ipv6_tc_hl_fl->ipv6_label_mask; + break; + case NFP_FL_ACTION_OPCODE_SET_UDP: + case NFP_FL_ACTION_OPCODE_SET_TCP: + tport = (struct nfp_fl_set_tport *)a; + ports = (u8 *)&merge->l4.port_src; + for (i = 0; i < 4; i++) + ports[i] |= tport->tp_port_mask[i]; + break; + case NFP_FL_ACTION_OPCODE_PRE_TUNNEL: + pre_tun = (struct nfp_fl_pre_tunnel *)a; + ipv6_tun = be16_to_cpu(pre_tun->flags) & + NFP_FL_PRE_TUN_IPV6; + break; + case NFP_FL_ACTION_OPCODE_PRE_LAG: + case NFP_FL_ACTION_OPCODE_PUSH_GENEVE: + break; + default: + return -EOPNOTSUPP; + } + + act_off += a->len_lw << NFP_FL_LW_SIZ; + } + + if (last_act_id) + *last_act_id = act_id; + + return 0; +} + +static int +nfp_flower_populate_merge_match(struct nfp_fl_payload *flow, + struct nfp_flower_merge_check *merge, + bool extra_fields) +{ + struct nfp_flower_meta_tci *meta_tci; + u8 *mask = flow->mask_data; + u8 key_layer, match_size; + + memset(merge, 0, sizeof(struct nfp_flower_merge_check)); + + meta_tci = (struct nfp_flower_meta_tci *)mask; + key_layer = meta_tci->nfp_flow_key_layer; + + if (key_layer & ~NFP_FLOWER_MERGE_FIELDS && !extra_fields) + return -EOPNOTSUPP; + + merge->tci = meta_tci->tci; + mask += sizeof(struct nfp_flower_meta_tci); + + if (key_layer & NFP_FLOWER_LAYER_EXT_META) + mask += sizeof(struct nfp_flower_ext_meta); + + mask += sizeof(struct nfp_flower_in_port); + + if (key_layer & NFP_FLOWER_LAYER_MAC) { + match_size = sizeof(struct nfp_flower_mac_mpls); + memcpy(&merge->l2, mask, match_size); + mask += match_size; + } + + if (key_layer & NFP_FLOWER_LAYER_TP) { + match_size = sizeof(struct nfp_flower_tp_ports); + memcpy(&merge->l4, mask, match_size); + mask += match_size; + } + + if (key_layer & NFP_FLOWER_LAYER_IPV4) { + match_size = sizeof(struct nfp_flower_ipv4); + memcpy(&merge->ipv4, mask, match_size); + } + + if (key_layer & NFP_FLOWER_LAYER_IPV6) { + match_size = sizeof(struct nfp_flower_ipv6); + memcpy(&merge->ipv6, mask, match_size); + } + + return 0; +} + +static int +nfp_flower_can_merge(struct nfp_fl_payload *sub_flow1, + struct nfp_fl_payload *sub_flow2) +{ + /* Two flows can be merged if sub_flow2 only matches on bits that are + * either matched by sub_flow1 or set by a sub_flow1 action. This + * ensures that every packet that hits sub_flow1 and recirculates is + * guaranteed to hit sub_flow2. + */ + struct nfp_flower_merge_check sub_flow1_merge, sub_flow2_merge; + int err, act_out = 0; + u8 last_act_id = 0; + + err = nfp_flower_populate_merge_match(sub_flow1, &sub_flow1_merge, + true); + if (err) + return err; + + err = nfp_flower_populate_merge_match(sub_flow2, &sub_flow2_merge, + false); + if (err) + return err; + + err = nfp_flower_update_merge_with_actions(sub_flow1, &sub_flow1_merge, + &last_act_id, &act_out); + if (err) + return err; + + /* Must only be 1 output action and it must be the last in sequence. */ + if (act_out != 1 || last_act_id != NFP_FL_ACTION_OPCODE_OUTPUT) + return -EOPNOTSUPP; + + /* Reject merge if sub_flow2 matches on something that is not matched + * on or set in an action by sub_flow1. + */ + err = bitmap_andnot(sub_flow2_merge.vals, sub_flow2_merge.vals, + sub_flow1_merge.vals, + sizeof(struct nfp_flower_merge_check) * 8); + if (err) + return -EINVAL; + + return 0; +} + +static unsigned int +nfp_flower_copy_pre_actions(char *act_dst, char *act_src, int len, + bool *tunnel_act) +{ + unsigned int act_off = 0, act_len; + struct nfp_fl_act_head *a; + u8 act_id = 0; + + while (act_off < len) { + a = (struct nfp_fl_act_head *)&act_src[act_off]; + act_len = a->len_lw << NFP_FL_LW_SIZ; + act_id = a->jump_id; + + switch (act_id) { + case NFP_FL_ACTION_OPCODE_PRE_TUNNEL: + if (tunnel_act) + *tunnel_act = true; + fallthrough; + case NFP_FL_ACTION_OPCODE_PRE_LAG: + memcpy(act_dst + act_off, act_src + act_off, act_len); + break; + default: + return act_off; + } + + act_off += act_len; + } + + return act_off; +} + +static int +nfp_fl_verify_post_tun_acts(char *acts, int len, struct nfp_fl_push_vlan **vlan) +{ + struct nfp_fl_act_head *a; + unsigned int act_off = 0; + + while (act_off < len) { + a = (struct nfp_fl_act_head *)&acts[act_off]; + + if (a->jump_id == NFP_FL_ACTION_OPCODE_PUSH_VLAN && !act_off) + *vlan = (struct nfp_fl_push_vlan *)a; + else if (a->jump_id != NFP_FL_ACTION_OPCODE_OUTPUT) + return -EOPNOTSUPP; + + act_off += a->len_lw << NFP_FL_LW_SIZ; + } + + /* Ensure any VLAN push also has an egress action. */ + if (*vlan && act_off <= sizeof(struct nfp_fl_push_vlan)) + return -EOPNOTSUPP; + + return 0; +} + +static int +nfp_fl_push_vlan_after_tun(char *acts, int len, struct nfp_fl_push_vlan *vlan) +{ + struct nfp_fl_set_tun *tun; + struct nfp_fl_act_head *a; + unsigned int act_off = 0; + + while (act_off < len) { + a = (struct nfp_fl_act_head *)&acts[act_off]; + + if (a->jump_id == NFP_FL_ACTION_OPCODE_SET_TUNNEL) { + tun = (struct nfp_fl_set_tun *)a; + tun->outer_vlan_tpid = vlan->vlan_tpid; + tun->outer_vlan_tci = vlan->vlan_tci; + + return 0; + } + + act_off += a->len_lw << NFP_FL_LW_SIZ; + } + + /* Return error if no tunnel action is found. */ + return -EOPNOTSUPP; +} + +static int +nfp_flower_merge_action(struct nfp_fl_payload *sub_flow1, + struct nfp_fl_payload *sub_flow2, + struct nfp_fl_payload *merge_flow) +{ + unsigned int sub1_act_len, sub2_act_len, pre_off1, pre_off2; + struct nfp_fl_push_vlan *post_tun_push_vlan = NULL; + bool tunnel_act = false; + char *merge_act; + int err; + + /* The last action of sub_flow1 must be output - do not merge this. */ + sub1_act_len = sub_flow1->meta.act_len - sizeof(struct nfp_fl_output); + sub2_act_len = sub_flow2->meta.act_len; + + if (!sub2_act_len) + return -EINVAL; + + if (sub1_act_len + sub2_act_len > NFP_FL_MAX_A_SIZ) + return -EINVAL; + + /* A shortcut can only be applied if there is a single action. */ + if (sub1_act_len) + merge_flow->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); + else + merge_flow->meta.shortcut = sub_flow2->meta.shortcut; + + merge_flow->meta.act_len = sub1_act_len + sub2_act_len; + merge_act = merge_flow->action_data; + + /* Copy any pre-actions to the start of merge flow action list. */ + pre_off1 = nfp_flower_copy_pre_actions(merge_act, + sub_flow1->action_data, + sub1_act_len, &tunnel_act); + merge_act += pre_off1; + sub1_act_len -= pre_off1; + pre_off2 = nfp_flower_copy_pre_actions(merge_act, + sub_flow2->action_data, + sub2_act_len, NULL); + merge_act += pre_off2; + sub2_act_len -= pre_off2; + + /* FW does a tunnel push when egressing, therefore, if sub_flow 1 pushes + * a tunnel, there are restrictions on what sub_flow 2 actions lead to a + * valid merge. + */ + if (tunnel_act) { + char *post_tun_acts = &sub_flow2->action_data[pre_off2]; + + err = nfp_fl_verify_post_tun_acts(post_tun_acts, sub2_act_len, + &post_tun_push_vlan); + if (err) + return err; + + if (post_tun_push_vlan) { + pre_off2 += sizeof(*post_tun_push_vlan); + sub2_act_len -= sizeof(*post_tun_push_vlan); + } + } + + /* Copy remaining actions from sub_flows 1 and 2. */ + memcpy(merge_act, sub_flow1->action_data + pre_off1, sub1_act_len); + + if (post_tun_push_vlan) { + /* Update tunnel action in merge to include VLAN push. */ + err = nfp_fl_push_vlan_after_tun(merge_act, sub1_act_len, + post_tun_push_vlan); + if (err) + return err; + + merge_flow->meta.act_len -= sizeof(*post_tun_push_vlan); + } + + merge_act += sub1_act_len; + memcpy(merge_act, sub_flow2->action_data + pre_off2, sub2_act_len); + + return 0; +} + +/* Flow link code should only be accessed under RTNL. */ +static void nfp_flower_unlink_flow(struct nfp_fl_payload_link *link) +{ + list_del(&link->merge_flow.list); + list_del(&link->sub_flow.list); + kfree(link); +} + +static void nfp_flower_unlink_flows(struct nfp_fl_payload *merge_flow, + struct nfp_fl_payload *sub_flow) +{ + struct nfp_fl_payload_link *link; + + list_for_each_entry(link, &merge_flow->linked_flows, merge_flow.list) + if (link->sub_flow.flow == sub_flow) { + nfp_flower_unlink_flow(link); + return; + } +} + +static int nfp_flower_link_flows(struct nfp_fl_payload *merge_flow, + struct nfp_fl_payload *sub_flow) +{ + struct nfp_fl_payload_link *link; + + link = kmalloc(sizeof(*link), GFP_KERNEL); + if (!link) + return -ENOMEM; + + link->merge_flow.flow = merge_flow; + list_add_tail(&link->merge_flow.list, &merge_flow->linked_flows); + link->sub_flow.flow = sub_flow; + list_add_tail(&link->sub_flow.list, &sub_flow->linked_flows); + + return 0; +} + +/** + * nfp_flower_merge_offloaded_flows() - Merge 2 existing flows to single flow. + * @app: Pointer to the APP handle + * @sub_flow1: Initial flow matched to produce merge hint + * @sub_flow2: Post recirculation flow matched in merge hint + * + * Combines 2 flows (if valid) to a single flow, removing the initial from hw + * and offloading the new, merged flow. + * + * Return: negative value on error, 0 in success. + */ +int nfp_flower_merge_offloaded_flows(struct nfp_app *app, + struct nfp_fl_payload *sub_flow1, + struct nfp_fl_payload *sub_flow2) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_payload *merge_flow; + struct nfp_fl_key_ls merge_key_ls; + struct nfp_merge_info *merge_info; + u64 parent_ctx = 0; + int err; + + if (sub_flow1 == sub_flow2 || + nfp_flower_is_merge_flow(sub_flow1) || + nfp_flower_is_merge_flow(sub_flow2)) + return -EINVAL; + + /* Check if the two flows are already merged */ + parent_ctx = (u64)(be32_to_cpu(sub_flow1->meta.host_ctx_id)) << 32; + parent_ctx |= (u64)(be32_to_cpu(sub_flow2->meta.host_ctx_id)); + if (rhashtable_lookup_fast(&priv->merge_table, + &parent_ctx, merge_table_params)) { + nfp_flower_cmsg_warn(app, "The two flows are already merged.\n"); + return 0; + } + + err = nfp_flower_can_merge(sub_flow1, sub_flow2); + if (err) + return err; + + merge_key_ls.key_size = sub_flow1->meta.key_len; + + merge_flow = nfp_flower_allocate_new(&merge_key_ls); + if (!merge_flow) + return -ENOMEM; + + merge_flow->tc_flower_cookie = (unsigned long)merge_flow; + merge_flow->ingress_dev = sub_flow1->ingress_dev; + + memcpy(merge_flow->unmasked_data, sub_flow1->unmasked_data, + sub_flow1->meta.key_len); + memcpy(merge_flow->mask_data, sub_flow1->mask_data, + sub_flow1->meta.mask_len); + + err = nfp_flower_merge_action(sub_flow1, sub_flow2, merge_flow); + if (err) + goto err_destroy_merge_flow; + + err = nfp_flower_link_flows(merge_flow, sub_flow1); + if (err) + goto err_destroy_merge_flow; + + err = nfp_flower_link_flows(merge_flow, sub_flow2); + if (err) + goto err_unlink_sub_flow1; + + err = nfp_compile_flow_metadata(app, merge_flow->tc_flower_cookie, merge_flow, + merge_flow->ingress_dev, NULL); + if (err) + goto err_unlink_sub_flow2; + + err = rhashtable_insert_fast(&priv->flow_table, &merge_flow->fl_node, + nfp_flower_table_params); + if (err) + goto err_release_metadata; + + merge_info = kmalloc(sizeof(*merge_info), GFP_KERNEL); + if (!merge_info) { + err = -ENOMEM; + goto err_remove_rhash; + } + merge_info->parent_ctx = parent_ctx; + err = rhashtable_insert_fast(&priv->merge_table, &merge_info->ht_node, + merge_table_params); + if (err) + goto err_destroy_merge_info; + + err = nfp_flower_xmit_flow(app, merge_flow, + NFP_FLOWER_CMSG_TYPE_FLOW_MOD); + if (err) + goto err_remove_merge_info; + + merge_flow->in_hw = true; + sub_flow1->in_hw = false; + + return 0; + +err_remove_merge_info: + WARN_ON_ONCE(rhashtable_remove_fast(&priv->merge_table, + &merge_info->ht_node, + merge_table_params)); +err_destroy_merge_info: + kfree(merge_info); +err_remove_rhash: + WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table, + &merge_flow->fl_node, + nfp_flower_table_params)); +err_release_metadata: + nfp_modify_flow_metadata(app, merge_flow); +err_unlink_sub_flow2: + nfp_flower_unlink_flows(merge_flow, sub_flow2); +err_unlink_sub_flow1: + nfp_flower_unlink_flows(merge_flow, sub_flow1); +err_destroy_merge_flow: + kfree(merge_flow->action_data); + kfree(merge_flow->mask_data); + kfree(merge_flow->unmasked_data); + kfree(merge_flow); + return err; +} + +/** + * nfp_flower_validate_pre_tun_rule() + * @app: Pointer to the APP handle + * @flow: Pointer to NFP flow representation of rule + * @key_ls: Pointer to NFP key layers structure + * @extack: Netlink extended ACK report + * + * Verifies the flow as a pre-tunnel rule. + * + * Return: negative value on error, 0 if verified. + */ +static int +nfp_flower_validate_pre_tun_rule(struct nfp_app *app, + struct nfp_fl_payload *flow, + struct nfp_fl_key_ls *key_ls, + struct netlink_ext_ack *extack) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_flower_meta_tci *meta_tci; + struct nfp_flower_mac_mpls *mac; + u8 *ext = flow->unmasked_data; + struct nfp_fl_act_head *act; + u8 *mask = flow->mask_data; + bool vlan = false; + int act_offset; + u8 key_layer; + + meta_tci = (struct nfp_flower_meta_tci *)flow->unmasked_data; + key_layer = key_ls->key_layer; + if (!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ)) { + if (meta_tci->tci & cpu_to_be16(NFP_FLOWER_MASK_VLAN_PRESENT)) { + u16 vlan_tci = be16_to_cpu(meta_tci->tci); + + vlan_tci &= ~NFP_FLOWER_MASK_VLAN_PRESENT; + flow->pre_tun_rule.vlan_tci = cpu_to_be16(vlan_tci); + vlan = true; + } else { + flow->pre_tun_rule.vlan_tci = cpu_to_be16(0xffff); + } + } + + if (key_layer & ~NFP_FLOWER_PRE_TUN_RULE_FIELDS) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: too many match fields"); + return -EOPNOTSUPP; + } else if (key_ls->key_layer_two & ~NFP_FLOWER_LAYER2_QINQ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: non-vlan in extended match fields"); + return -EOPNOTSUPP; + } + + if (!(key_layer & NFP_FLOWER_LAYER_MAC)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: MAC fields match required"); + return -EOPNOTSUPP; + } + + if (!(key_layer & NFP_FLOWER_LAYER_IPV4) && + !(key_layer & NFP_FLOWER_LAYER_IPV6)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: match on ipv4/ipv6 eth_type must be present"); + return -EOPNOTSUPP; + } + + if (key_layer & NFP_FLOWER_LAYER_IPV6) + flow->pre_tun_rule.is_ipv6 = true; + else + flow->pre_tun_rule.is_ipv6 = false; + + /* Skip fields known to exist. */ + mask += sizeof(struct nfp_flower_meta_tci); + ext += sizeof(struct nfp_flower_meta_tci); + if (key_ls->key_layer_two) { + mask += sizeof(struct nfp_flower_ext_meta); + ext += sizeof(struct nfp_flower_ext_meta); + } + mask += sizeof(struct nfp_flower_in_port); + ext += sizeof(struct nfp_flower_in_port); + + /* Ensure destination MAC address is fully matched. */ + mac = (struct nfp_flower_mac_mpls *)mask; + if (!is_broadcast_ether_addr(&mac->mac_dst[0])) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: dest MAC field must not be masked"); + return -EOPNOTSUPP; + } + + /* Ensure source MAC address is fully matched. This is only needed + * for firmware with the DECAP_V2 feature enabled. Don't do this + * for firmware without this feature to keep old behaviour. + */ + if (priv->flower_ext_feats & NFP_FL_FEATS_DECAP_V2) { + mac = (struct nfp_flower_mac_mpls *)mask; + if (!is_broadcast_ether_addr(&mac->mac_src[0])) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported pre-tunnel rule: source MAC field must not be masked"); + return -EOPNOTSUPP; + } + } + + if (mac->mpls_lse) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: MPLS not supported"); + return -EOPNOTSUPP; + } + + /* Ensure destination MAC address matches pre_tun_dev. */ + mac = (struct nfp_flower_mac_mpls *)ext; + if (memcmp(&mac->mac_dst[0], flow->pre_tun_rule.dev->dev_addr, 6)) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported pre-tunnel rule: dest MAC must match output dev MAC"); + return -EOPNOTSUPP; + } + + /* Save mac addresses in pre_tun_rule entry for later use */ + memcpy(&flow->pre_tun_rule.loc_mac, &mac->mac_dst[0], ETH_ALEN); + memcpy(&flow->pre_tun_rule.rem_mac, &mac->mac_src[0], ETH_ALEN); + + mask += sizeof(struct nfp_flower_mac_mpls); + ext += sizeof(struct nfp_flower_mac_mpls); + if (key_layer & NFP_FLOWER_LAYER_IPV4 || + key_layer & NFP_FLOWER_LAYER_IPV6) { + /* Flags and proto fields have same offset in IPv4 and IPv6. */ + int ip_flags = offsetof(struct nfp_flower_ipv4, ip_ext.flags); + int ip_proto = offsetof(struct nfp_flower_ipv4, ip_ext.proto); + int size; + int i; + + size = key_layer & NFP_FLOWER_LAYER_IPV4 ? + sizeof(struct nfp_flower_ipv4) : + sizeof(struct nfp_flower_ipv6); + + + /* Ensure proto and flags are the only IP layer fields. */ + for (i = 0; i < size; i++) + if (mask[i] && i != ip_flags && i != ip_proto) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: only flags and proto can be matched in ip header"); + return -EOPNOTSUPP; + } + ext += size; + mask += size; + } + + if ((priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ)) { + if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_QINQ) { + struct nfp_flower_vlan *vlan_tags; + u16 vlan_tpid; + u16 vlan_tci; + + vlan_tags = (struct nfp_flower_vlan *)ext; + + vlan_tci = be16_to_cpu(vlan_tags->outer_tci); + vlan_tpid = be16_to_cpu(vlan_tags->outer_tpid); + + vlan_tci &= ~NFP_FLOWER_MASK_VLAN_PRESENT; + flow->pre_tun_rule.vlan_tci = cpu_to_be16(vlan_tci); + flow->pre_tun_rule.vlan_tpid = cpu_to_be16(vlan_tpid); + vlan = true; + } else { + flow->pre_tun_rule.vlan_tci = cpu_to_be16(0xffff); + flow->pre_tun_rule.vlan_tpid = cpu_to_be16(0xffff); + } + } + + /* Action must be a single egress or pop_vlan and egress. */ + act_offset = 0; + act = (struct nfp_fl_act_head *)&flow->action_data[act_offset]; + if (vlan) { + if (act->jump_id != NFP_FL_ACTION_OPCODE_POP_VLAN) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: match on VLAN must have VLAN pop as first action"); + return -EOPNOTSUPP; + } + + act_offset += act->len_lw << NFP_FL_LW_SIZ; + act = (struct nfp_fl_act_head *)&flow->action_data[act_offset]; + } + + if (act->jump_id != NFP_FL_ACTION_OPCODE_OUTPUT) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: non egress action detected where egress was expected"); + return -EOPNOTSUPP; + } + + act_offset += act->len_lw << NFP_FL_LW_SIZ; + + /* Ensure there are no more actions after egress. */ + if (act_offset != flow->meta.act_len) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: egress is not the last action"); + return -EOPNOTSUPP; + } + + return 0; +} + +static bool offload_pre_check(struct flow_cls_offload *flow) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); + struct flow_dissector *dissector = rule->match.dissector; + struct flow_match_ct ct; + + if (dissector->used_keys & BIT_ULL(FLOW_DISSECTOR_KEY_CT)) { + flow_rule_match_ct(rule, &ct); + /* Allow special case where CT match is all 0 */ + if (memchr_inv(ct.key, 0, sizeof(*ct.key))) + return false; + } + + if (flow->common.chain_index) + return false; + + return true; +} + +/** + * nfp_flower_add_offload() - Adds a new flow to hardware. + * @app: Pointer to the APP handle + * @netdev: netdev structure. + * @flow: TC flower classifier offload structure. + * + * Adds a new flow to the repeated hash structure and action payload. + * + * Return: negative value on error, 0 if configured successfully. + */ +static int +nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, + struct flow_cls_offload *flow) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); + enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE; + struct nfp_flower_priv *priv = app->priv; + struct netlink_ext_ack *extack = NULL; + struct nfp_fl_payload *flow_pay; + struct nfp_fl_key_ls *key_layer; + struct nfp_port *port = NULL; + int err; + + extack = flow->common.extack; + if (nfp_netdev_is_nfp_repr(netdev)) + port = nfp_port_from_netdev(netdev); + + if (is_pre_ct_flow(flow)) + return nfp_fl_ct_handle_pre_ct(priv, netdev, flow, extack, NULL); + + if (is_post_ct_flow(flow)) + return nfp_fl_ct_handle_post_ct(priv, netdev, flow, extack); + + if (!offload_pre_check(flow)) + return -EOPNOTSUPP; + + key_layer = kmalloc(sizeof(*key_layer), GFP_KERNEL); + if (!key_layer) + return -ENOMEM; + + err = nfp_flower_calculate_key_layers(app, netdev, key_layer, rule, + &tun_type, extack); + if (err) + goto err_free_key_ls; + + flow_pay = nfp_flower_allocate_new(key_layer); + if (!flow_pay) { + err = -ENOMEM; + goto err_free_key_ls; + } + + err = nfp_flower_compile_flow_match(app, rule, key_layer, netdev, + flow_pay, tun_type, extack); + if (err) + goto err_destroy_flow; + + err = nfp_flower_compile_action(app, rule, netdev, flow_pay, extack); + if (err) + goto err_destroy_flow; + + if (flow_pay->pre_tun_rule.dev) { + err = nfp_flower_validate_pre_tun_rule(app, flow_pay, key_layer, extack); + if (err) + goto err_destroy_flow; + } + + err = nfp_compile_flow_metadata(app, flow->cookie, flow_pay, netdev, extack); + if (err) + goto err_destroy_flow; + + flow_pay->tc_flower_cookie = flow->cookie; + err = rhashtable_insert_fast(&priv->flow_table, &flow_pay->fl_node, + nfp_flower_table_params); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot insert flow into tables for offloads"); + goto err_release_metadata; + } + + if (flow_pay->pre_tun_rule.dev) { + if (priv->flower_ext_feats & NFP_FL_FEATS_DECAP_V2) { + struct nfp_predt_entry *predt; + + predt = kzalloc(sizeof(*predt), GFP_KERNEL); + if (!predt) { + err = -ENOMEM; + goto err_remove_rhash; + } + predt->flow_pay = flow_pay; + INIT_LIST_HEAD(&predt->nn_list); + spin_lock_bh(&priv->predt_lock); + list_add(&predt->list_head, &priv->predt_list); + flow_pay->pre_tun_rule.predt = predt; + nfp_tun_link_and_update_nn_entries(app, predt); + spin_unlock_bh(&priv->predt_lock); + } else { + err = nfp_flower_xmit_pre_tun_flow(app, flow_pay); + } + } else { + err = nfp_flower_xmit_flow(app, flow_pay, + NFP_FLOWER_CMSG_TYPE_FLOW_ADD); + } + + if (err) + goto err_remove_rhash; + + if (port) + port->tc_offload_cnt++; + + flow_pay->in_hw = true; + + /* Deallocate flow payload when flower rule has been destroyed. */ + kfree(key_layer); + + return 0; + +err_remove_rhash: + WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table, + &flow_pay->fl_node, + nfp_flower_table_params)); +err_release_metadata: + nfp_modify_flow_metadata(app, flow_pay); +err_destroy_flow: + if (flow_pay->nfp_tun_ipv6) + nfp_tunnel_put_ipv6_off(app, flow_pay->nfp_tun_ipv6); + kfree(flow_pay->action_data); + kfree(flow_pay->mask_data); + kfree(flow_pay->unmasked_data); + kfree(flow_pay); +err_free_key_ls: + kfree(key_layer); + return err; +} + +static void +nfp_flower_remove_merge_flow(struct nfp_app *app, + struct nfp_fl_payload *del_sub_flow, + struct nfp_fl_payload *merge_flow) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_payload_link *link, *temp; + struct nfp_merge_info *merge_info; + struct nfp_fl_payload *origin; + u64 parent_ctx = 0; + bool mod = false; + int err; + + link = list_first_entry(&merge_flow->linked_flows, + struct nfp_fl_payload_link, merge_flow.list); + origin = link->sub_flow.flow; + + /* Re-add rule the merge had overwritten if it has not been deleted. */ + if (origin != del_sub_flow) + mod = true; + + err = nfp_modify_flow_metadata(app, merge_flow); + if (err) { + nfp_flower_cmsg_warn(app, "Metadata fail for merge flow delete.\n"); + goto err_free_links; + } + + if (!mod) { + err = nfp_flower_xmit_flow(app, merge_flow, + NFP_FLOWER_CMSG_TYPE_FLOW_DEL); + if (err) { + nfp_flower_cmsg_warn(app, "Failed to delete merged flow.\n"); + goto err_free_links; + } + } else { + __nfp_modify_flow_metadata(priv, origin); + err = nfp_flower_xmit_flow(app, origin, + NFP_FLOWER_CMSG_TYPE_FLOW_MOD); + if (err) + nfp_flower_cmsg_warn(app, "Failed to revert merge flow.\n"); + origin->in_hw = true; + } + +err_free_links: + /* Clean any links connected with the merged flow. */ + list_for_each_entry_safe(link, temp, &merge_flow->linked_flows, + merge_flow.list) { + u32 ctx_id = be32_to_cpu(link->sub_flow.flow->meta.host_ctx_id); + + parent_ctx = (parent_ctx << 32) | (u64)(ctx_id); + nfp_flower_unlink_flow(link); + } + + merge_info = rhashtable_lookup_fast(&priv->merge_table, + &parent_ctx, + merge_table_params); + if (merge_info) { + WARN_ON_ONCE(rhashtable_remove_fast(&priv->merge_table, + &merge_info->ht_node, + merge_table_params)); + kfree(merge_info); + } + + kfree(merge_flow->action_data); + kfree(merge_flow->mask_data); + kfree(merge_flow->unmasked_data); + WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table, + &merge_flow->fl_node, + nfp_flower_table_params)); + kfree_rcu(merge_flow, rcu); +} + +void +nfp_flower_del_linked_merge_flows(struct nfp_app *app, + struct nfp_fl_payload *sub_flow) +{ + struct nfp_fl_payload_link *link, *temp; + + /* Remove any merge flow formed from the deleted sub_flow. */ + list_for_each_entry_safe(link, temp, &sub_flow->linked_flows, + sub_flow.list) + nfp_flower_remove_merge_flow(app, sub_flow, + link->merge_flow.flow); +} + +/** + * nfp_flower_del_offload() - Removes a flow from hardware. + * @app: Pointer to the APP handle + * @netdev: netdev structure. + * @flow: TC flower classifier offload structure + * + * Removes a flow from the repeated hash structure and clears the + * action payload. Any flows merged from this are also deleted. + * + * Return: negative value on error, 0 if removed successfully. + */ +static int +nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev, + struct flow_cls_offload *flow) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_ct_map_entry *ct_map_ent; + struct netlink_ext_ack *extack = NULL; + struct nfp_fl_payload *nfp_flow; + struct nfp_port *port = NULL; + int err; + + extack = flow->common.extack; + if (nfp_netdev_is_nfp_repr(netdev)) + port = nfp_port_from_netdev(netdev); + + /* Check ct_map_table */ + ct_map_ent = rhashtable_lookup_fast(&priv->ct_map_table, &flow->cookie, + nfp_ct_map_params); + if (ct_map_ent) { + err = nfp_fl_ct_del_flow(ct_map_ent); + return err; + } + + nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, netdev); + if (!nfp_flow) { + NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot remove flow that does not exist"); + return -ENOENT; + } + + err = nfp_modify_flow_metadata(app, nfp_flow); + if (err) + goto err_free_merge_flow; + + if (nfp_flow->nfp_tun_ipv4_addr) + nfp_tunnel_del_ipv4_off(app, nfp_flow->nfp_tun_ipv4_addr); + + if (nfp_flow->nfp_tun_ipv6) + nfp_tunnel_put_ipv6_off(app, nfp_flow->nfp_tun_ipv6); + + if (!nfp_flow->in_hw) { + err = 0; + goto err_free_merge_flow; + } + + if (nfp_flow->pre_tun_rule.dev) { + if (priv->flower_ext_feats & NFP_FL_FEATS_DECAP_V2) { + struct nfp_predt_entry *predt; + + predt = nfp_flow->pre_tun_rule.predt; + if (predt) { + spin_lock_bh(&priv->predt_lock); + nfp_tun_unlink_and_update_nn_entries(app, predt); + list_del(&predt->list_head); + spin_unlock_bh(&priv->predt_lock); + kfree(predt); + } + } else { + err = nfp_flower_xmit_pre_tun_del_flow(app, nfp_flow); + } + } else { + err = nfp_flower_xmit_flow(app, nfp_flow, + NFP_FLOWER_CMSG_TYPE_FLOW_DEL); + } + /* Fall through on error. */ + +err_free_merge_flow: + nfp_flower_del_linked_merge_flows(app, nfp_flow); + if (port) + port->tc_offload_cnt--; + kfree(nfp_flow->action_data); + kfree(nfp_flow->mask_data); + kfree(nfp_flow->unmasked_data); + WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table, + &nfp_flow->fl_node, + nfp_flower_table_params)); + kfree_rcu(nfp_flow, rcu); + return err; +} + +static void +__nfp_flower_update_merge_stats(struct nfp_app *app, + struct nfp_fl_payload *merge_flow) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_payload_link *link; + struct nfp_fl_payload *sub_flow; + u64 pkts, bytes, used; + u32 ctx_id; + + ctx_id = be32_to_cpu(merge_flow->meta.host_ctx_id); + pkts = priv->stats[ctx_id].pkts; + /* Do not cycle subflows if no stats to distribute. */ + if (!pkts) + return; + bytes = priv->stats[ctx_id].bytes; + used = priv->stats[ctx_id].used; + + /* Reset stats for the merge flow. */ + priv->stats[ctx_id].pkts = 0; + priv->stats[ctx_id].bytes = 0; + + /* The merge flow has received stats updates from firmware. + * Distribute these stats to all subflows that form the merge. + * The stats will collected from TC via the subflows. + */ + list_for_each_entry(link, &merge_flow->linked_flows, merge_flow.list) { + sub_flow = link->sub_flow.flow; + ctx_id = be32_to_cpu(sub_flow->meta.host_ctx_id); + priv->stats[ctx_id].pkts += pkts; + priv->stats[ctx_id].bytes += bytes; + priv->stats[ctx_id].used = max_t(u64, used, + priv->stats[ctx_id].used); + } +} + +void +nfp_flower_update_merge_stats(struct nfp_app *app, + struct nfp_fl_payload *sub_flow) +{ + struct nfp_fl_payload_link *link; + + /* Get merge flows that the subflow forms to distribute their stats. */ + list_for_each_entry(link, &sub_flow->linked_flows, sub_flow.list) + __nfp_flower_update_merge_stats(app, link->merge_flow.flow); +} + +/** + * nfp_flower_get_stats() - Populates flow stats obtained from hardware. + * @app: Pointer to the APP handle + * @netdev: Netdev structure. + * @flow: TC flower classifier offload structure + * + * Populates a flow statistics structure which which corresponds to a + * specific flow. + * + * Return: negative value on error, 0 if stats populated successfully. + */ +static int +nfp_flower_get_stats(struct nfp_app *app, struct net_device *netdev, + struct flow_cls_offload *flow) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_ct_map_entry *ct_map_ent; + struct netlink_ext_ack *extack = NULL; + struct nfp_fl_payload *nfp_flow; + u32 ctx_id; + + /* Check ct_map table first */ + ct_map_ent = rhashtable_lookup_fast(&priv->ct_map_table, &flow->cookie, + nfp_ct_map_params); + if (ct_map_ent) + return nfp_fl_ct_stats(flow, ct_map_ent); + + extack = flow->common.extack; + nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, netdev); + if (!nfp_flow) { + NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot dump stats for flow that does not exist"); + return -EINVAL; + } + + ctx_id = be32_to_cpu(nfp_flow->meta.host_ctx_id); + + spin_lock_bh(&priv->stats_lock); + /* If request is for a sub_flow, update stats from merged flows. */ + if (!list_empty(&nfp_flow->linked_flows)) + nfp_flower_update_merge_stats(app, nfp_flow); + + flow_stats_update(&flow->stats, priv->stats[ctx_id].bytes, + priv->stats[ctx_id].pkts, 0, priv->stats[ctx_id].used, + FLOW_ACTION_HW_STATS_DELAYED); + + priv->stats[ctx_id].pkts = 0; + priv->stats[ctx_id].bytes = 0; + spin_unlock_bh(&priv->stats_lock); + + return 0; +} + +static int +nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev, + struct flow_cls_offload *flower) +{ + struct nfp_flower_priv *priv = app->priv; + int ret; + + if (!eth_proto_is_802_3(flower->common.protocol)) + return -EOPNOTSUPP; + + mutex_lock(&priv->nfp_fl_lock); + switch (flower->command) { + case FLOW_CLS_REPLACE: + ret = nfp_flower_add_offload(app, netdev, flower); + break; + case FLOW_CLS_DESTROY: + ret = nfp_flower_del_offload(app, netdev, flower); + break; + case FLOW_CLS_STATS: + ret = nfp_flower_get_stats(app, netdev, flower); + break; + default: + ret = -EOPNOTSUPP; + break; + } + mutex_unlock(&priv->nfp_fl_lock); + + return ret; +} + +static int nfp_flower_setup_tc_block_cb(enum tc_setup_type type, + void *type_data, void *cb_priv) +{ + struct flow_cls_common_offload *common = type_data; + struct nfp_repr *repr = cb_priv; + + if (!tc_can_offload_extack(repr->netdev, common->extack)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return nfp_flower_repr_offload(repr->app, repr->netdev, + type_data); + case TC_SETUP_CLSMATCHALL: + return nfp_flower_setup_qos_offload(repr->app, repr->netdev, + type_data); + default: + return -EOPNOTSUPP; + } +} + +static LIST_HEAD(nfp_block_cb_list); + +static int nfp_flower_setup_tc_block(struct net_device *netdev, + struct flow_block_offload *f) +{ + struct nfp_repr *repr = netdev_priv(netdev); + struct nfp_flower_repr_priv *repr_priv; + struct flow_block_cb *block_cb; + + if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + repr_priv = repr->app_priv; + repr_priv->block_shared = f->block_shared; + f->driver_block_list = &nfp_block_cb_list; + f->unlocked_driver_cb = true; + + switch (f->command) { + case FLOW_BLOCK_BIND: + if (flow_block_cb_is_busy(nfp_flower_setup_tc_block_cb, repr, + &nfp_block_cb_list)) + return -EBUSY; + + block_cb = flow_block_cb_alloc(nfp_flower_setup_tc_block_cb, + repr, repr, NULL); + if (IS_ERR(block_cb)) + return PTR_ERR(block_cb); + + flow_block_cb_add(block_cb, f); + list_add_tail(&block_cb->driver_list, &nfp_block_cb_list); + return 0; + case FLOW_BLOCK_UNBIND: + block_cb = flow_block_cb_lookup(f->block, + nfp_flower_setup_tc_block_cb, + repr); + if (!block_cb) + return -ENOENT; + + flow_block_cb_remove(block_cb, f); + list_del(&block_cb->driver_list); + return 0; + default: + return -EOPNOTSUPP; + } +} + +int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev, + enum tc_setup_type type, void *type_data) +{ + switch (type) { + case TC_SETUP_BLOCK: + return nfp_flower_setup_tc_block(netdev, type_data); + default: + return -EOPNOTSUPP; + } +} + +struct nfp_flower_indr_block_cb_priv { + struct net_device *netdev; + struct nfp_app *app; + struct list_head list; +}; + +static struct nfp_flower_indr_block_cb_priv * +nfp_flower_indr_block_cb_priv_lookup(struct nfp_app *app, + struct net_device *netdev) +{ + struct nfp_flower_indr_block_cb_priv *cb_priv; + struct nfp_flower_priv *priv = app->priv; + + list_for_each_entry(cb_priv, &priv->indr_block_cb_priv, list) + if (cb_priv->netdev == netdev) + return cb_priv; + + return NULL; +} + +static int nfp_flower_setup_indr_block_cb(enum tc_setup_type type, + void *type_data, void *cb_priv) +{ + struct nfp_flower_indr_block_cb_priv *priv = cb_priv; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return nfp_flower_repr_offload(priv->app, priv->netdev, + type_data); + default: + return -EOPNOTSUPP; + } +} + +void nfp_flower_setup_indr_tc_release(void *cb_priv) +{ + struct nfp_flower_indr_block_cb_priv *priv = cb_priv; + + list_del(&priv->list); + kfree(priv); +} + +static int +nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct Qdisc *sch, struct nfp_app *app, + struct flow_block_offload *f, void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) +{ + struct nfp_flower_indr_block_cb_priv *cb_priv; + struct nfp_flower_priv *priv = app->priv; + struct flow_block_cb *block_cb; + + if ((f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS && + !nfp_flower_internal_port_can_offload(app, netdev)) || + (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS && + nfp_flower_internal_port_can_offload(app, netdev))) + return -EOPNOTSUPP; + + f->unlocked_driver_cb = true; + + switch (f->command) { + case FLOW_BLOCK_BIND: + cb_priv = nfp_flower_indr_block_cb_priv_lookup(app, netdev); + if (cb_priv && + flow_block_cb_is_busy(nfp_flower_setup_indr_block_cb, + cb_priv, + &nfp_block_cb_list)) + return -EBUSY; + + cb_priv = kmalloc(sizeof(*cb_priv), GFP_KERNEL); + if (!cb_priv) + return -ENOMEM; + + cb_priv->netdev = netdev; + cb_priv->app = app; + list_add(&cb_priv->list, &priv->indr_block_cb_priv); + + block_cb = flow_indr_block_cb_alloc(nfp_flower_setup_indr_block_cb, + cb_priv, cb_priv, + nfp_flower_setup_indr_tc_release, + f, netdev, sch, data, app, cleanup); + if (IS_ERR(block_cb)) { + list_del(&cb_priv->list); + kfree(cb_priv); + return PTR_ERR(block_cb); + } + + flow_block_cb_add(block_cb, f); + list_add_tail(&block_cb->driver_list, &nfp_block_cb_list); + return 0; + case FLOW_BLOCK_UNBIND: + cb_priv = nfp_flower_indr_block_cb_priv_lookup(app, netdev); + if (!cb_priv) + return -ENOENT; + + block_cb = flow_block_cb_lookup(f->block, + nfp_flower_setup_indr_block_cb, + cb_priv); + if (!block_cb) + return -ENOENT; + + flow_indr_block_cb_remove(block_cb, f); + list_del(&block_cb->driver_list); + return 0; + default: + return -EOPNOTSUPP; + } + return 0; +} + +static int +nfp_setup_tc_no_dev(struct nfp_app *app, enum tc_setup_type type, void *data) +{ + if (!data) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_ACT: + return nfp_setup_tc_act_offload(app, data); + default: + return -EOPNOTSUPP; + } +} + +int +nfp_flower_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv, + enum tc_setup_type type, void *type_data, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) +{ + if (!netdev) + return nfp_setup_tc_no_dev(cb_priv, type, data); + + if (!nfp_fl_is_netdev_to_offload(netdev)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_BLOCK: + return nfp_flower_setup_indr_tc_block(netdev, sch, cb_priv, + type_data, data, cleanup); + default: + return -EOPNOTSUPP; + } +} diff --git a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c new file mode 100644 index 0000000000..e7180b4793 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c @@ -0,0 +1,887 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#include <linux/hash.h> +#include <linux/hashtable.h> +#include <linux/jhash.h> +#include <linux/math64.h> +#include <linux/vmalloc.h> +#include <net/pkt_cls.h> +#include <net/pkt_sched.h> + +#include "cmsg.h" +#include "main.h" +#include "../nfp_port.h" + +#define NFP_FL_QOS_UPDATE msecs_to_jiffies(1000) +#define NFP_FL_QOS_PPS BIT(15) +#define NFP_FL_QOS_METER BIT(10) + +struct nfp_police_cfg_head { + __be32 flags_opts; + union { + __be32 meter_id; + __be32 port; + }; +}; + +enum NFP_FL_QOS_TYPES { + NFP_FL_QOS_TYPE_BPS, + NFP_FL_QOS_TYPE_PPS, + NFP_FL_QOS_TYPE_MAX, +}; + +/* Police cmsg for configuring a trTCM traffic conditioner (8W/32B) + * See RFC 2698 for more details. + * ---------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved |p| Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Port Ingress | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Token Bucket Peak | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Token Bucket Committed | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Peak Burst Size | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Committed Burst Size | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Peak Information Rate | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Committed Information Rate | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * Word[0](FLag options): + * [15] p(pps) 1 for pps, 0 for bps + * + * Meter control message + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-------------------------------+-+---+-----+-+---------+-+---+-+ + * | Reserved |p| Y |TYPE |E|TSHFV |P| PC|R| + * +-------------------------------+-+---+-----+-+---------+-+---+-+ + * | meter ID | + * +-------------------------------+-------------------------------+ + * + */ +struct nfp_police_config { + struct nfp_police_cfg_head head; + __be32 bkt_tkn_p; + __be32 bkt_tkn_c; + __be32 pbs; + __be32 cbs; + __be32 pir; + __be32 cir; +}; + +struct nfp_police_stats_reply { + struct nfp_police_cfg_head head; + __be64 pass_bytes; + __be64 pass_pkts; + __be64 drop_bytes; + __be64 drop_pkts; +}; + +int nfp_flower_offload_one_police(struct nfp_app *app, bool ingress, + bool pps, u32 id, u32 rate, u32 burst) +{ + struct nfp_police_config *config; + struct sk_buff *skb; + + skb = nfp_flower_cmsg_alloc(app, sizeof(struct nfp_police_config), + NFP_FLOWER_CMSG_TYPE_QOS_MOD, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + config = nfp_flower_cmsg_get_data(skb); + memset(config, 0, sizeof(struct nfp_police_config)); + if (pps) + config->head.flags_opts |= cpu_to_be32(NFP_FL_QOS_PPS); + if (!ingress) + config->head.flags_opts |= cpu_to_be32(NFP_FL_QOS_METER); + + if (ingress) + config->head.port = cpu_to_be32(id); + else + config->head.meter_id = cpu_to_be32(id); + + config->bkt_tkn_p = cpu_to_be32(burst); + config->bkt_tkn_c = cpu_to_be32(burst); + config->pbs = cpu_to_be32(burst); + config->cbs = cpu_to_be32(burst); + config->pir = cpu_to_be32(rate); + config->cir = cpu_to_be32(rate); + nfp_ctrl_tx(app->ctrl, skb); + + return 0; +} + +static int nfp_policer_validate(const struct flow_action *action, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack, + bool ingress) +{ + if (act->police.exceed.act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when exceed action is not drop"); + return -EOPNOTSUPP; + } + + if (ingress) { + if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE && + act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not continue or ok"); + return -EOPNOTSUPP; + } + } else { + if (act->police.notexceed.act_id != FLOW_ACTION_PIPE && + act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not pipe or ok"); + return -EOPNOTSUPP; + } + } + + if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && + !flow_action_is_last_entry(action, act)) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is ok, but action is not last"); + return -EOPNOTSUPP; + } + + if (act->police.peakrate_bytes_ps || + act->police.avrate || act->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int +nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev, + struct tc_cls_matchall_offload *flow, + struct netlink_ext_ack *extack) +{ + struct flow_action_entry *paction = &flow->rule->action.entries[0]; + u32 action_num = flow->rule->action.num_entries; + struct nfp_flower_priv *fl_priv = app->priv; + struct flow_action_entry *action = NULL; + struct nfp_flower_repr_priv *repr_priv; + u32 netdev_port_id, i; + struct nfp_repr *repr; + bool pps_support; + u32 bps_num = 0; + u32 pps_num = 0; + u32 burst; + bool pps; + u64 rate; + int err; + + if (!nfp_netdev_is_nfp_repr(netdev)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload not supported on higher level port"); + return -EOPNOTSUPP; + } + repr = netdev_priv(netdev); + repr_priv = repr->app_priv; + netdev_port_id = nfp_repr_get_port_id(netdev); + pps_support = !!(fl_priv->flower_ext_feats & NFP_FL_FEATS_QOS_PPS); + + if (repr_priv->block_shared) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload not supported on shared blocks"); + return -EOPNOTSUPP; + } + + if (repr->port->type != NFP_PORT_VF_PORT) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload not supported on non-VF ports"); + return -EOPNOTSUPP; + } + + if (pps_support) { + if (action_num > 2 || action_num == 0) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload only support action number 1 or 2"); + return -EOPNOTSUPP; + } + } else { + if (!flow_offload_has_one_action(&flow->rule->action)) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload requires a single action"); + return -EOPNOTSUPP; + } + } + + if (flow->common.prio != 1) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload requires highest priority"); + return -EOPNOTSUPP; + } + + for (i = 0 ; i < action_num; i++) { + action = paction + i; + if (action->id != FLOW_ACTION_POLICE) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload requires police action"); + return -EOPNOTSUPP; + } + + err = nfp_policer_validate(&flow->rule->action, action, extack, true); + if (err) + return err; + + if (action->police.rate_bytes_ps > 0) { + if (bps_num++) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload only support one BPS action"); + return -EOPNOTSUPP; + } + } + if (action->police.rate_pkt_ps > 0) { + if (!pps_support) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: FW does not support PPS action"); + return -EOPNOTSUPP; + } + if (pps_num++) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload only support one PPS action"); + return -EOPNOTSUPP; + } + } + } + + for (i = 0 ; i < action_num; i++) { + /* Set QoS data for this interface */ + action = paction + i; + if (action->police.rate_bytes_ps > 0) { + rate = action->police.rate_bytes_ps; + burst = action->police.burst; + } else if (action->police.rate_pkt_ps > 0) { + rate = action->police.rate_pkt_ps; + burst = action->police.burst_pkt; + } else { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit is not BPS or PPS"); + continue; + } + + if (rate != 0) { + pps = false; + if (action->police.rate_pkt_ps > 0) + pps = true; + nfp_flower_offload_one_police(repr->app, true, + pps, netdev_port_id, + rate, burst); + } + } + repr_priv->qos_table.netdev_port_id = netdev_port_id; + fl_priv->qos_rate_limiters++; + if (fl_priv->qos_rate_limiters == 1) + schedule_delayed_work(&fl_priv->qos_stats_work, + NFP_FL_QOS_UPDATE); + + return 0; +} + +static int +nfp_flower_remove_rate_limiter(struct nfp_app *app, struct net_device *netdev, + struct tc_cls_matchall_offload *flow, + struct netlink_ext_ack *extack) +{ + struct nfp_flower_priv *fl_priv = app->priv; + struct nfp_flower_repr_priv *repr_priv; + struct nfp_police_config *config; + u32 netdev_port_id, i; + struct nfp_repr *repr; + struct sk_buff *skb; + bool pps_support; + + if (!nfp_netdev_is_nfp_repr(netdev)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload not supported on higher level port"); + return -EOPNOTSUPP; + } + repr = netdev_priv(netdev); + + netdev_port_id = nfp_repr_get_port_id(netdev); + repr_priv = repr->app_priv; + pps_support = !!(fl_priv->flower_ext_feats & NFP_FL_FEATS_QOS_PPS); + + if (!repr_priv->qos_table.netdev_port_id) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot remove qos entry that does not exist"); + return -EOPNOTSUPP; + } + + memset(&repr_priv->qos_table, 0, sizeof(struct nfp_fl_qos)); + fl_priv->qos_rate_limiters--; + if (!fl_priv->qos_rate_limiters) + cancel_delayed_work_sync(&fl_priv->qos_stats_work); + for (i = 0 ; i < NFP_FL_QOS_TYPE_MAX; i++) { + if (i == NFP_FL_QOS_TYPE_PPS && !pps_support) + break; + /* 0:bps 1:pps + * Clear QoS data for this interface. + * There is no need to check if a specific QOS_TYPE was + * configured as the firmware handles clearing a QoS entry + * safely, even if it wasn't explicitly added. + */ + skb = nfp_flower_cmsg_alloc(repr->app, sizeof(struct nfp_police_config), + NFP_FLOWER_CMSG_TYPE_QOS_DEL, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + config = nfp_flower_cmsg_get_data(skb); + memset(config, 0, sizeof(struct nfp_police_config)); + if (i == NFP_FL_QOS_TYPE_PPS) + config->head.flags_opts = cpu_to_be32(NFP_FL_QOS_PPS); + config->head.port = cpu_to_be32(netdev_port_id); + nfp_ctrl_tx(repr->app->ctrl, skb); + } + + return 0; +} + +void nfp_flower_stats_rlim_reply(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_flower_priv *fl_priv = app->priv; + struct nfp_flower_repr_priv *repr_priv; + struct nfp_police_stats_reply *msg; + struct nfp_stat_pair *curr_stats; + struct nfp_stat_pair *prev_stats; + struct net_device *netdev; + struct nfp_repr *repr; + u32 netdev_port_id; + + msg = nfp_flower_cmsg_get_data(skb); + if (be32_to_cpu(msg->head.flags_opts) & NFP_FL_QOS_METER) + return nfp_act_stats_reply(app, msg); + + netdev_port_id = be32_to_cpu(msg->head.port); + rcu_read_lock(); + netdev = nfp_app_dev_get(app, netdev_port_id, NULL); + if (!netdev) + goto exit_unlock_rcu; + + repr = netdev_priv(netdev); + repr_priv = repr->app_priv; + curr_stats = &repr_priv->qos_table.curr_stats; + prev_stats = &repr_priv->qos_table.prev_stats; + + spin_lock_bh(&fl_priv->qos_stats_lock); + curr_stats->pkts = be64_to_cpu(msg->pass_pkts) + + be64_to_cpu(msg->drop_pkts); + curr_stats->bytes = be64_to_cpu(msg->pass_bytes) + + be64_to_cpu(msg->drop_bytes); + + if (!repr_priv->qos_table.last_update) { + prev_stats->pkts = curr_stats->pkts; + prev_stats->bytes = curr_stats->bytes; + } + + repr_priv->qos_table.last_update = jiffies; + spin_unlock_bh(&fl_priv->qos_stats_lock); + +exit_unlock_rcu: + rcu_read_unlock(); +} + +static void +nfp_flower_stats_rlim_request(struct nfp_flower_priv *fl_priv, + u32 id, bool ingress) +{ + struct nfp_police_cfg_head *head; + struct sk_buff *skb; + + skb = nfp_flower_cmsg_alloc(fl_priv->app, + sizeof(struct nfp_police_cfg_head), + NFP_FLOWER_CMSG_TYPE_QOS_STATS, + GFP_ATOMIC); + if (!skb) + return; + head = nfp_flower_cmsg_get_data(skb); + + memset(head, 0, sizeof(struct nfp_police_cfg_head)); + if (ingress) { + head->port = cpu_to_be32(id); + } else { + head->flags_opts = cpu_to_be32(NFP_FL_QOS_METER); + head->meter_id = cpu_to_be32(id); + } + + nfp_ctrl_tx(fl_priv->app->ctrl, skb); +} + +static void +nfp_flower_stats_rlim_request_all(struct nfp_flower_priv *fl_priv) +{ + struct nfp_reprs *repr_set; + int i; + + rcu_read_lock(); + repr_set = rcu_dereference(fl_priv->app->reprs[NFP_REPR_TYPE_VF]); + if (!repr_set) + goto exit_unlock_rcu; + + for (i = 0; i < repr_set->num_reprs; i++) { + struct net_device *netdev; + + netdev = rcu_dereference(repr_set->reprs[i]); + if (netdev) { + struct nfp_repr *priv = netdev_priv(netdev); + struct nfp_flower_repr_priv *repr_priv; + u32 netdev_port_id; + + repr_priv = priv->app_priv; + netdev_port_id = repr_priv->qos_table.netdev_port_id; + if (!netdev_port_id) + continue; + + nfp_flower_stats_rlim_request(fl_priv, + netdev_port_id, true); + } + } + +exit_unlock_rcu: + rcu_read_unlock(); +} + +static void update_stats_cache(struct work_struct *work) +{ + struct delayed_work *delayed_work; + struct nfp_flower_priv *fl_priv; + + delayed_work = to_delayed_work(work); + fl_priv = container_of(delayed_work, struct nfp_flower_priv, + qos_stats_work); + + nfp_flower_stats_rlim_request_all(fl_priv); + nfp_flower_stats_meter_request_all(fl_priv); + + schedule_delayed_work(&fl_priv->qos_stats_work, NFP_FL_QOS_UPDATE); +} + +static int +nfp_flower_stats_rate_limiter(struct nfp_app *app, struct net_device *netdev, + struct tc_cls_matchall_offload *flow, + struct netlink_ext_ack *extack) +{ + struct nfp_flower_priv *fl_priv = app->priv; + struct nfp_flower_repr_priv *repr_priv; + struct nfp_stat_pair *curr_stats; + struct nfp_stat_pair *prev_stats; + u64 diff_bytes, diff_pkts; + struct nfp_repr *repr; + + if (!nfp_netdev_is_nfp_repr(netdev)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload not supported on higher level port"); + return -EOPNOTSUPP; + } + repr = netdev_priv(netdev); + + repr_priv = repr->app_priv; + if (!repr_priv->qos_table.netdev_port_id) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot find qos entry for stats update"); + return -EOPNOTSUPP; + } + + spin_lock_bh(&fl_priv->qos_stats_lock); + curr_stats = &repr_priv->qos_table.curr_stats; + prev_stats = &repr_priv->qos_table.prev_stats; + diff_pkts = curr_stats->pkts - prev_stats->pkts; + diff_bytes = curr_stats->bytes - prev_stats->bytes; + prev_stats->pkts = curr_stats->pkts; + prev_stats->bytes = curr_stats->bytes; + spin_unlock_bh(&fl_priv->qos_stats_lock); + + flow_stats_update(&flow->stats, diff_bytes, diff_pkts, 0, + repr_priv->qos_table.last_update, + FLOW_ACTION_HW_STATS_DELAYED); + return 0; +} + +void nfp_flower_qos_init(struct nfp_app *app) +{ + struct nfp_flower_priv *fl_priv = app->priv; + + spin_lock_init(&fl_priv->qos_stats_lock); + mutex_init(&fl_priv->meter_stats_lock); + nfp_init_meter_table(app); + + INIT_DELAYED_WORK(&fl_priv->qos_stats_work, &update_stats_cache); +} + +void nfp_flower_qos_cleanup(struct nfp_app *app) +{ + struct nfp_flower_priv *fl_priv = app->priv; + + cancel_delayed_work_sync(&fl_priv->qos_stats_work); +} + +int nfp_flower_setup_qos_offload(struct nfp_app *app, struct net_device *netdev, + struct tc_cls_matchall_offload *flow) +{ + struct netlink_ext_ack *extack = flow->common.extack; + struct nfp_flower_priv *fl_priv = app->priv; + int ret; + + if (!(fl_priv->flower_ext_feats & NFP_FL_FEATS_VF_RLIM)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support qos rate limit offload"); + return -EOPNOTSUPP; + } + + mutex_lock(&fl_priv->nfp_fl_lock); + switch (flow->command) { + case TC_CLSMATCHALL_REPLACE: + ret = nfp_flower_install_rate_limiter(app, netdev, flow, extack); + break; + case TC_CLSMATCHALL_DESTROY: + ret = nfp_flower_remove_rate_limiter(app, netdev, flow, extack); + break; + case TC_CLSMATCHALL_STATS: + ret = nfp_flower_stats_rate_limiter(app, netdev, flow, extack); + break; + default: + ret = -EOPNOTSUPP; + break; + } + mutex_unlock(&fl_priv->nfp_fl_lock); + + return ret; +} + +/* Offload tc action, currently only for tc police */ + +static const struct rhashtable_params stats_meter_table_params = { + .key_offset = offsetof(struct nfp_meter_entry, meter_id), + .head_offset = offsetof(struct nfp_meter_entry, ht_node), + .key_len = sizeof(u32), +}; + +struct nfp_meter_entry * +nfp_flower_search_meter_entry(struct nfp_app *app, u32 meter_id) +{ + struct nfp_flower_priv *priv = app->priv; + + return rhashtable_lookup_fast(&priv->meter_table, &meter_id, + stats_meter_table_params); +} + +static struct nfp_meter_entry * +nfp_flower_add_meter_entry(struct nfp_app *app, u32 meter_id) +{ + struct nfp_meter_entry *meter_entry = NULL; + struct nfp_flower_priv *priv = app->priv; + + meter_entry = rhashtable_lookup_fast(&priv->meter_table, + &meter_id, + stats_meter_table_params); + if (meter_entry) + return meter_entry; + + meter_entry = kzalloc(sizeof(*meter_entry), GFP_KERNEL); + if (!meter_entry) + return NULL; + + meter_entry->meter_id = meter_id; + meter_entry->used = jiffies; + if (rhashtable_insert_fast(&priv->meter_table, &meter_entry->ht_node, + stats_meter_table_params)) { + kfree(meter_entry); + return NULL; + } + + priv->qos_rate_limiters++; + if (priv->qos_rate_limiters == 1) + schedule_delayed_work(&priv->qos_stats_work, + NFP_FL_QOS_UPDATE); + + return meter_entry; +} + +static void nfp_flower_del_meter_entry(struct nfp_app *app, u32 meter_id) +{ + struct nfp_meter_entry *meter_entry = NULL; + struct nfp_flower_priv *priv = app->priv; + + meter_entry = rhashtable_lookup_fast(&priv->meter_table, &meter_id, + stats_meter_table_params); + if (!meter_entry) + return; + + rhashtable_remove_fast(&priv->meter_table, + &meter_entry->ht_node, + stats_meter_table_params); + kfree(meter_entry); + priv->qos_rate_limiters--; + if (!priv->qos_rate_limiters) + cancel_delayed_work_sync(&priv->qos_stats_work); +} + +int nfp_flower_setup_meter_entry(struct nfp_app *app, + const struct flow_action_entry *action, + enum nfp_meter_op op, + u32 meter_id) +{ + struct nfp_flower_priv *fl_priv = app->priv; + struct nfp_meter_entry *meter_entry = NULL; + int err = 0; + + mutex_lock(&fl_priv->meter_stats_lock); + + switch (op) { + case NFP_METER_DEL: + nfp_flower_del_meter_entry(app, meter_id); + goto exit_unlock; + case NFP_METER_ADD: + meter_entry = nfp_flower_add_meter_entry(app, meter_id); + break; + default: + err = -EOPNOTSUPP; + goto exit_unlock; + } + + if (!meter_entry) { + err = -ENOMEM; + goto exit_unlock; + } + + if (action->police.rate_bytes_ps > 0) { + meter_entry->bps = true; + meter_entry->rate = action->police.rate_bytes_ps; + meter_entry->burst = action->police.burst; + } else { + meter_entry->bps = false; + meter_entry->rate = action->police.rate_pkt_ps; + meter_entry->burst = action->police.burst_pkt; + } + +exit_unlock: + mutex_unlock(&fl_priv->meter_stats_lock); + return err; +} + +int nfp_init_meter_table(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + + return rhashtable_init(&priv->meter_table, &stats_meter_table_params); +} + +void +nfp_flower_stats_meter_request_all(struct nfp_flower_priv *fl_priv) +{ + struct nfp_meter_entry *meter_entry = NULL; + struct rhashtable_iter iter; + + mutex_lock(&fl_priv->meter_stats_lock); + rhashtable_walk_enter(&fl_priv->meter_table, &iter); + rhashtable_walk_start(&iter); + + while ((meter_entry = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(meter_entry)) + continue; + nfp_flower_stats_rlim_request(fl_priv, + meter_entry->meter_id, false); + } + + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); + mutex_unlock(&fl_priv->meter_stats_lock); +} + +static int +nfp_act_install_actions(struct nfp_app *app, struct flow_offload_action *fl_act, + struct netlink_ext_ack *extack) +{ + struct flow_action_entry *paction = &fl_act->action.entries[0]; + u32 action_num = fl_act->action.num_entries; + struct nfp_flower_priv *fl_priv = app->priv; + struct flow_action_entry *action = NULL; + u32 burst, i, meter_id; + bool pps_support, pps; + bool add = false; + u64 rate; + int err; + + pps_support = !!(fl_priv->flower_ext_feats & NFP_FL_FEATS_QOS_PPS); + + for (i = 0 ; i < action_num; i++) { + /* Set qos associate data for this interface */ + action = paction + i; + if (action->id != FLOW_ACTION_POLICE) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload requires police action"); + continue; + } + + err = nfp_policer_validate(&fl_act->action, action, extack, false); + if (err) + return err; + + if (action->police.rate_bytes_ps > 0) { + rate = action->police.rate_bytes_ps; + burst = action->police.burst; + } else if (action->police.rate_pkt_ps > 0 && pps_support) { + rate = action->police.rate_pkt_ps; + burst = action->police.burst_pkt; + } else { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: unsupported qos rate limit"); + continue; + } + + if (rate != 0) { + meter_id = action->hw_index; + if (nfp_flower_setup_meter_entry(app, action, NFP_METER_ADD, meter_id)) + continue; + + pps = false; + if (action->police.rate_pkt_ps > 0) + pps = true; + nfp_flower_offload_one_police(app, false, pps, meter_id, + rate, burst); + add = true; + } + } + + return add ? 0 : -EOPNOTSUPP; +} + +static int +nfp_act_remove_actions(struct nfp_app *app, struct flow_offload_action *fl_act, + struct netlink_ext_ack *extack) +{ + struct nfp_meter_entry *meter_entry = NULL; + struct nfp_police_config *config; + struct sk_buff *skb; + u32 meter_id; + bool pps; + + /* Delete qos associate data for this interface */ + if (fl_act->id != FLOW_ACTION_POLICE) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload requires police action"); + return -EOPNOTSUPP; + } + + meter_id = fl_act->index; + meter_entry = nfp_flower_search_meter_entry(app, meter_id); + if (!meter_entry) { + NL_SET_ERR_MSG_MOD(extack, + "no meter entry when delete the action index."); + return -ENOENT; + } + pps = !meter_entry->bps; + + skb = nfp_flower_cmsg_alloc(app, sizeof(struct nfp_police_config), + NFP_FLOWER_CMSG_TYPE_QOS_DEL, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + config = nfp_flower_cmsg_get_data(skb); + memset(config, 0, sizeof(struct nfp_police_config)); + config->head.flags_opts = cpu_to_be32(NFP_FL_QOS_METER); + config->head.meter_id = cpu_to_be32(meter_id); + if (pps) + config->head.flags_opts |= cpu_to_be32(NFP_FL_QOS_PPS); + + nfp_ctrl_tx(app->ctrl, skb); + nfp_flower_setup_meter_entry(app, NULL, NFP_METER_DEL, meter_id); + + return 0; +} + +void +nfp_act_stats_reply(struct nfp_app *app, void *pmsg) +{ + struct nfp_flower_priv *fl_priv = app->priv; + struct nfp_meter_entry *meter_entry = NULL; + struct nfp_police_stats_reply *msg = pmsg; + u32 meter_id; + + meter_id = be32_to_cpu(msg->head.meter_id); + mutex_lock(&fl_priv->meter_stats_lock); + + meter_entry = nfp_flower_search_meter_entry(app, meter_id); + if (!meter_entry) + goto exit_unlock; + + meter_entry->stats.curr.pkts = be64_to_cpu(msg->pass_pkts) + + be64_to_cpu(msg->drop_pkts); + meter_entry->stats.curr.bytes = be64_to_cpu(msg->pass_bytes) + + be64_to_cpu(msg->drop_bytes); + meter_entry->stats.curr.drops = be64_to_cpu(msg->drop_pkts); + if (!meter_entry->stats.update) { + meter_entry->stats.prev.pkts = meter_entry->stats.curr.pkts; + meter_entry->stats.prev.bytes = meter_entry->stats.curr.bytes; + meter_entry->stats.prev.drops = meter_entry->stats.curr.drops; + } + + meter_entry->stats.update = jiffies; + +exit_unlock: + mutex_unlock(&fl_priv->meter_stats_lock); +} + +static int +nfp_act_stats_actions(struct nfp_app *app, struct flow_offload_action *fl_act, + struct netlink_ext_ack *extack) +{ + struct nfp_flower_priv *fl_priv = app->priv; + struct nfp_meter_entry *meter_entry = NULL; + u64 diff_bytes, diff_pkts, diff_drops; + int err = 0; + + if (fl_act->id != FLOW_ACTION_POLICE) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload requires police action"); + return -EOPNOTSUPP; + } + + mutex_lock(&fl_priv->meter_stats_lock); + meter_entry = nfp_flower_search_meter_entry(app, fl_act->index); + if (!meter_entry) { + err = -ENOENT; + goto exit_unlock; + } + diff_pkts = meter_entry->stats.curr.pkts > meter_entry->stats.prev.pkts ? + meter_entry->stats.curr.pkts - meter_entry->stats.prev.pkts : 0; + diff_bytes = meter_entry->stats.curr.bytes > meter_entry->stats.prev.bytes ? + meter_entry->stats.curr.bytes - meter_entry->stats.prev.bytes : 0; + diff_drops = meter_entry->stats.curr.drops > meter_entry->stats.prev.drops ? + meter_entry->stats.curr.drops - meter_entry->stats.prev.drops : 0; + + flow_stats_update(&fl_act->stats, diff_bytes, diff_pkts, diff_drops, + meter_entry->stats.update, + FLOW_ACTION_HW_STATS_DELAYED); + + meter_entry->stats.prev.pkts = meter_entry->stats.curr.pkts; + meter_entry->stats.prev.bytes = meter_entry->stats.curr.bytes; + meter_entry->stats.prev.drops = meter_entry->stats.curr.drops; + +exit_unlock: + mutex_unlock(&fl_priv->meter_stats_lock); + return err; +} + +int nfp_setup_tc_act_offload(struct nfp_app *app, + struct flow_offload_action *fl_act) +{ + struct netlink_ext_ack *extack = fl_act->extack; + struct nfp_flower_priv *fl_priv = app->priv; + + if (!(fl_priv->flower_ext_feats & NFP_FL_FEATS_QOS_METER)) + return -EOPNOTSUPP; + + switch (fl_act->command) { + case FLOW_ACT_REPLACE: + return nfp_act_install_actions(app, fl_act, extack); + case FLOW_ACT_DESTROY: + return nfp_act_remove_actions(app, fl_act, extack); + case FLOW_ACT_STATS: + return nfp_act_stats_actions(app, fl_act, extack); + default: + return -EOPNOTSUPP; + } +} diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c new file mode 100644 index 0000000000..e522845c7c --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -0,0 +1,1564 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + +#include <linux/etherdevice.h> +#include <linux/inetdevice.h> +#include <net/netevent.h> +#include <linux/idr.h> +#include <net/dst_metadata.h> +#include <net/arp.h> + +#include "cmsg.h" +#include "main.h" +#include "../nfp_net_repr.h" +#include "../nfp_net.h" + +#define NFP_FL_MAX_ROUTES 32 + +#define NFP_TUN_PRE_TUN_RULE_LIMIT 32 +#define NFP_TUN_PRE_TUN_RULE_DEL BIT(0) +#define NFP_TUN_PRE_TUN_IDX_BIT BIT(3) +#define NFP_TUN_PRE_TUN_IPV6_BIT BIT(7) + +/** + * struct nfp_tun_pre_tun_rule - rule matched before decap + * @flags: options for the rule offset + * @port_idx: index of destination MAC address for the rule + * @vlan_tci: VLAN info associated with MAC + * @host_ctx_id: stats context of rule to update + */ +struct nfp_tun_pre_tun_rule { + __be32 flags; + __be16 port_idx; + __be16 vlan_tci; + __be32 host_ctx_id; +}; + +/** + * struct nfp_tun_active_tuns - periodic message of active tunnels + * @seq: sequence number of the message + * @count: number of tunnels report in message + * @flags: options part of the request + * @tun_info.ipv4: dest IPv4 address of active route + * @tun_info.egress_port: port the encapsulated packet egressed + * @tun_info.extra: reserved for future use + * @tun_info: tunnels that have sent traffic in reported period + */ +struct nfp_tun_active_tuns { + __be32 seq; + __be32 count; + __be32 flags; + struct route_ip_info { + __be32 ipv4; + __be32 egress_port; + __be32 extra[2]; + } tun_info[]; +}; + +/** + * struct nfp_tun_active_tuns_v6 - periodic message of active IPv6 tunnels + * @seq: sequence number of the message + * @count: number of tunnels report in message + * @flags: options part of the request + * @tun_info.ipv6: dest IPv6 address of active route + * @tun_info.egress_port: port the encapsulated packet egressed + * @tun_info.extra: reserved for future use + * @tun_info: tunnels that have sent traffic in reported period + */ +struct nfp_tun_active_tuns_v6 { + __be32 seq; + __be32 count; + __be32 flags; + struct route_ip_info_v6 { + struct in6_addr ipv6; + __be32 egress_port; + __be32 extra[2]; + } tun_info[]; +}; + +/** + * struct nfp_tun_req_route_ipv4 - NFP requests a route/neighbour lookup + * @ingress_port: ingress port of packet that signalled request + * @ipv4_addr: destination ipv4 address for route + * @reserved: reserved for future use + */ +struct nfp_tun_req_route_ipv4 { + __be32 ingress_port; + __be32 ipv4_addr; + __be32 reserved[2]; +}; + +/** + * struct nfp_tun_req_route_ipv6 - NFP requests an IPv6 route/neighbour lookup + * @ingress_port: ingress port of packet that signalled request + * @ipv6_addr: destination ipv6 address for route + */ +struct nfp_tun_req_route_ipv6 { + __be32 ingress_port; + struct in6_addr ipv6_addr; +}; + +/** + * struct nfp_offloaded_route - routes that are offloaded to the NFP + * @list: list pointer + * @ip_add: destination of route - can be IPv4 or IPv6 + */ +struct nfp_offloaded_route { + struct list_head list; + u8 ip_add[]; +}; + +#define NFP_FL_IPV4_ADDRS_MAX 32 + +/** + * struct nfp_tun_ipv4_addr - set the IP address list on the NFP + * @count: number of IPs populated in the array + * @ipv4_addr: array of IPV4_ADDRS_MAX 32 bit IPv4 addresses + */ +struct nfp_tun_ipv4_addr { + __be32 count; + __be32 ipv4_addr[NFP_FL_IPV4_ADDRS_MAX]; +}; + +/** + * struct nfp_ipv4_addr_entry - cached IPv4 addresses + * @ipv4_addr: IP address + * @ref_count: number of rules currently using this IP + * @list: list pointer + */ +struct nfp_ipv4_addr_entry { + __be32 ipv4_addr; + int ref_count; + struct list_head list; +}; + +#define NFP_FL_IPV6_ADDRS_MAX 4 + +/** + * struct nfp_tun_ipv6_addr - set the IP address list on the NFP + * @count: number of IPs populated in the array + * @ipv6_addr: array of IPV6_ADDRS_MAX 128 bit IPv6 addresses + */ +struct nfp_tun_ipv6_addr { + __be32 count; + struct in6_addr ipv6_addr[NFP_FL_IPV6_ADDRS_MAX]; +}; + +#define NFP_TUN_MAC_OFFLOAD_DEL_FLAG 0x2 + +/** + * struct nfp_tun_mac_addr_offload - configure MAC address of tunnel EP on NFP + * @flags: MAC address offload options + * @count: number of MAC addresses in the message (should be 1) + * @index: index of MAC address in the lookup table + * @addr: interface MAC address + */ +struct nfp_tun_mac_addr_offload { + __be16 flags; + __be16 count; + __be16 index; + u8 addr[ETH_ALEN]; +}; + +/** + * struct nfp_neigh_update_work - update neighbour information to nfp + * @work: Work queue for writing neigh to the nfp + * @n: neighbour entry + * @app: Back pointer to app + */ +struct nfp_neigh_update_work { + struct work_struct work; + struct neighbour *n; + struct nfp_app *app; +}; + +enum nfp_flower_mac_offload_cmd { + NFP_TUNNEL_MAC_OFFLOAD_ADD = 0, + NFP_TUNNEL_MAC_OFFLOAD_DEL = 1, + NFP_TUNNEL_MAC_OFFLOAD_MOD = 2, +}; + +#define NFP_MAX_MAC_INDEX 0xff + +/** + * struct nfp_tun_offloaded_mac - hashtable entry for an offloaded MAC + * @ht_node: Hashtable entry + * @addr: Offloaded MAC address + * @index: Offloaded index for given MAC address + * @ref_count: Number of devs using this MAC address + * @repr_list: List of reprs sharing this MAC address + * @bridge_count: Number of bridge/internal devs with MAC + */ +struct nfp_tun_offloaded_mac { + struct rhash_head ht_node; + u8 addr[ETH_ALEN]; + u16 index; + int ref_count; + struct list_head repr_list; + int bridge_count; +}; + +static const struct rhashtable_params offloaded_macs_params = { + .key_offset = offsetof(struct nfp_tun_offloaded_mac, addr), + .head_offset = offsetof(struct nfp_tun_offloaded_mac, ht_node), + .key_len = ETH_ALEN, + .automatic_shrinking = true, +}; + +void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_tun_active_tuns *payload; + struct net_device *netdev; + int count, i, pay_len; + struct neighbour *n; + __be32 ipv4_addr; + u32 port; + + payload = nfp_flower_cmsg_get_data(skb); + count = be32_to_cpu(payload->count); + if (count > NFP_FL_MAX_ROUTES) { + nfp_flower_cmsg_warn(app, "Tunnel keep-alive request exceeds max routes.\n"); + return; + } + + pay_len = nfp_flower_cmsg_get_data_len(skb); + if (pay_len != struct_size(payload, tun_info, count)) { + nfp_flower_cmsg_warn(app, "Corruption in tunnel keep-alive message.\n"); + return; + } + + rcu_read_lock(); + for (i = 0; i < count; i++) { + ipv4_addr = payload->tun_info[i].ipv4; + port = be32_to_cpu(payload->tun_info[i].egress_port); + netdev = nfp_app_dev_get(app, port, NULL); + if (!netdev) + continue; + + n = neigh_lookup(&arp_tbl, &ipv4_addr, netdev); + if (!n) + continue; + + /* Update the used timestamp of neighbour */ + neigh_event_send(n, NULL); + neigh_release(n); + } + rcu_read_unlock(); +} + +void nfp_tunnel_keep_alive_v6(struct nfp_app *app, struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct nfp_tun_active_tuns_v6 *payload; + struct net_device *netdev; + int count, i, pay_len; + struct neighbour *n; + void *ipv6_add; + u32 port; + + payload = nfp_flower_cmsg_get_data(skb); + count = be32_to_cpu(payload->count); + if (count > NFP_FL_IPV6_ADDRS_MAX) { + nfp_flower_cmsg_warn(app, "IPv6 tunnel keep-alive request exceeds max routes.\n"); + return; + } + + pay_len = nfp_flower_cmsg_get_data_len(skb); + if (pay_len != struct_size(payload, tun_info, count)) { + nfp_flower_cmsg_warn(app, "Corruption in tunnel keep-alive message.\n"); + return; + } + + rcu_read_lock(); + for (i = 0; i < count; i++) { + ipv6_add = &payload->tun_info[i].ipv6; + port = be32_to_cpu(payload->tun_info[i].egress_port); + netdev = nfp_app_dev_get(app, port, NULL); + if (!netdev) + continue; + + n = neigh_lookup(&nd_tbl, ipv6_add, netdev); + if (!n) + continue; + + /* Update the used timestamp of neighbour */ + neigh_event_send(n, NULL); + neigh_release(n); + } + rcu_read_unlock(); +#endif +} + +static int +nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata, + gfp_t flag) +{ + struct nfp_flower_priv *priv = app->priv; + struct sk_buff *skb; + unsigned char *msg; + + if (!(priv->flower_ext_feats & NFP_FL_FEATS_DECAP_V2) && + (mtype == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH || + mtype == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6)) + plen -= sizeof(struct nfp_tun_neigh_ext); + + if (!(priv->flower_ext_feats & NFP_FL_FEATS_TUNNEL_NEIGH_LAG) && + (mtype == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH || + mtype == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6)) + plen -= sizeof(struct nfp_tun_neigh_lag); + + skb = nfp_flower_cmsg_alloc(app, plen, mtype, flag); + if (!skb) + return -ENOMEM; + + msg = nfp_flower_cmsg_get_data(skb); + memcpy(msg, pdata, nfp_flower_cmsg_get_data_len(skb)); + + nfp_ctrl_tx(app->ctrl, skb); + return 0; +} + +static void +nfp_tun_mutual_link(struct nfp_predt_entry *predt, + struct nfp_neigh_entry *neigh) +{ + struct nfp_fl_payload *flow_pay = predt->flow_pay; + struct nfp_tun_neigh_ext *ext; + struct nfp_tun_neigh *common; + + if (flow_pay->pre_tun_rule.is_ipv6 != neigh->is_ipv6) + return; + + /* In the case of bonding it is possible that there might already + * be a flow linked (as the MAC address gets shared). If a flow + * is already linked just return. + */ + if (neigh->flow) + return; + + common = neigh->is_ipv6 ? + &((struct nfp_tun_neigh_v6 *)neigh->payload)->common : + &((struct nfp_tun_neigh_v4 *)neigh->payload)->common; + ext = neigh->is_ipv6 ? + &((struct nfp_tun_neigh_v6 *)neigh->payload)->ext : + &((struct nfp_tun_neigh_v4 *)neigh->payload)->ext; + + if (memcmp(flow_pay->pre_tun_rule.loc_mac, + common->src_addr, ETH_ALEN) || + memcmp(flow_pay->pre_tun_rule.rem_mac, + common->dst_addr, ETH_ALEN)) + return; + + list_add(&neigh->list_head, &predt->nn_list); + neigh->flow = predt; + ext->host_ctx = flow_pay->meta.host_ctx_id; + ext->vlan_tci = flow_pay->pre_tun_rule.vlan_tci; + ext->vlan_tpid = flow_pay->pre_tun_rule.vlan_tpid; +} + +static void +nfp_tun_link_predt_entries(struct nfp_app *app, + struct nfp_neigh_entry *nn_entry) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_predt_entry *predt, *tmp; + + list_for_each_entry_safe(predt, tmp, &priv->predt_list, list_head) { + nfp_tun_mutual_link(predt, nn_entry); + } +} + +void nfp_tun_link_and_update_nn_entries(struct nfp_app *app, + struct nfp_predt_entry *predt) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_neigh_entry *nn_entry; + struct rhashtable_iter iter; + size_t neigh_size; + u8 type; + + rhashtable_walk_enter(&priv->neigh_table, &iter); + rhashtable_walk_start(&iter); + while ((nn_entry = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(nn_entry)) + continue; + nfp_tun_mutual_link(predt, nn_entry); + neigh_size = nn_entry->is_ipv6 ? + sizeof(struct nfp_tun_neigh_v6) : + sizeof(struct nfp_tun_neigh_v4); + type = nn_entry->is_ipv6 ? NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6 : + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH; + nfp_flower_xmit_tun_conf(app, type, neigh_size, + nn_entry->payload, + GFP_ATOMIC); + } + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); +} + +static void nfp_tun_cleanup_nn_entries(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_neigh_entry *neigh; + struct nfp_tun_neigh_ext *ext; + struct rhashtable_iter iter; + size_t neigh_size; + u8 type; + + rhashtable_walk_enter(&priv->neigh_table, &iter); + rhashtable_walk_start(&iter); + while ((neigh = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(neigh)) + continue; + ext = neigh->is_ipv6 ? + &((struct nfp_tun_neigh_v6 *)neigh->payload)->ext : + &((struct nfp_tun_neigh_v4 *)neigh->payload)->ext; + ext->host_ctx = cpu_to_be32(U32_MAX); + ext->vlan_tpid = cpu_to_be16(U16_MAX); + ext->vlan_tci = cpu_to_be16(U16_MAX); + + neigh_size = neigh->is_ipv6 ? + sizeof(struct nfp_tun_neigh_v6) : + sizeof(struct nfp_tun_neigh_v4); + type = neigh->is_ipv6 ? NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6 : + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH; + nfp_flower_xmit_tun_conf(app, type, neigh_size, neigh->payload, + GFP_ATOMIC); + + rhashtable_remove_fast(&priv->neigh_table, &neigh->ht_node, + neigh_table_params); + if (neigh->flow) + list_del(&neigh->list_head); + kfree(neigh); + } + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); +} + +void nfp_tun_unlink_and_update_nn_entries(struct nfp_app *app, + struct nfp_predt_entry *predt) +{ + struct nfp_neigh_entry *neigh, *tmp; + struct nfp_tun_neigh_ext *ext; + size_t neigh_size; + u8 type; + + list_for_each_entry_safe(neigh, tmp, &predt->nn_list, list_head) { + ext = neigh->is_ipv6 ? + &((struct nfp_tun_neigh_v6 *)neigh->payload)->ext : + &((struct nfp_tun_neigh_v4 *)neigh->payload)->ext; + neigh->flow = NULL; + ext->host_ctx = cpu_to_be32(U32_MAX); + ext->vlan_tpid = cpu_to_be16(U16_MAX); + ext->vlan_tci = cpu_to_be16(U16_MAX); + list_del(&neigh->list_head); + neigh_size = neigh->is_ipv6 ? + sizeof(struct nfp_tun_neigh_v6) : + sizeof(struct nfp_tun_neigh_v4); + type = neigh->is_ipv6 ? NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6 : + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH; + nfp_flower_xmit_tun_conf(app, type, neigh_size, neigh->payload, + GFP_ATOMIC); + } +} + +static void +nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app, + void *flow, struct neighbour *neigh, bool is_ipv6, + bool override) +{ + bool neigh_invalid = !(neigh->nud_state & NUD_VALID) || neigh->dead; + size_t neigh_size = is_ipv6 ? sizeof(struct nfp_tun_neigh_v6) : + sizeof(struct nfp_tun_neigh_v4); + unsigned long cookie = (unsigned long)neigh; + struct nfp_flower_priv *priv = app->priv; + struct nfp_tun_neigh_lag lag_info; + struct nfp_neigh_entry *nn_entry; + u32 port_id; + u8 mtype; + + port_id = nfp_flower_get_port_id_from_netdev(app, netdev); + if (!port_id) + return; + + if ((port_id & NFP_FL_LAG_OUT) == NFP_FL_LAG_OUT) { + memset(&lag_info, 0, sizeof(struct nfp_tun_neigh_lag)); + nfp_flower_lag_get_info_from_netdev(app, netdev, &lag_info); + } + + spin_lock_bh(&priv->predt_lock); + nn_entry = rhashtable_lookup_fast(&priv->neigh_table, &cookie, + neigh_table_params); + if (!nn_entry && !neigh_invalid) { + struct nfp_tun_neigh_ext *ext; + struct nfp_tun_neigh_lag *lag; + struct nfp_tun_neigh *common; + + nn_entry = kzalloc(sizeof(*nn_entry) + neigh_size, + GFP_ATOMIC); + if (!nn_entry) + goto err; + + nn_entry->payload = (char *)&nn_entry[1]; + nn_entry->neigh_cookie = cookie; + nn_entry->is_ipv6 = is_ipv6; + nn_entry->flow = NULL; + if (is_ipv6) { + struct flowi6 *flowi6 = (struct flowi6 *)flow; + struct nfp_tun_neigh_v6 *payload; + + payload = (struct nfp_tun_neigh_v6 *)nn_entry->payload; + payload->src_ipv6 = flowi6->saddr; + payload->dst_ipv6 = flowi6->daddr; + common = &payload->common; + ext = &payload->ext; + lag = &payload->lag; + mtype = NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6; + } else { + struct flowi4 *flowi4 = (struct flowi4 *)flow; + struct nfp_tun_neigh_v4 *payload; + + payload = (struct nfp_tun_neigh_v4 *)nn_entry->payload; + payload->src_ipv4 = flowi4->saddr; + payload->dst_ipv4 = flowi4->daddr; + common = &payload->common; + ext = &payload->ext; + lag = &payload->lag; + mtype = NFP_FLOWER_CMSG_TYPE_TUN_NEIGH; + } + ext->host_ctx = cpu_to_be32(U32_MAX); + ext->vlan_tpid = cpu_to_be16(U16_MAX); + ext->vlan_tci = cpu_to_be16(U16_MAX); + ether_addr_copy(common->src_addr, netdev->dev_addr); + neigh_ha_snapshot(common->dst_addr, neigh, netdev); + + if ((port_id & NFP_FL_LAG_OUT) == NFP_FL_LAG_OUT) + memcpy(lag, &lag_info, sizeof(struct nfp_tun_neigh_lag)); + common->port_id = cpu_to_be32(port_id); + + if (rhashtable_insert_fast(&priv->neigh_table, + &nn_entry->ht_node, + neigh_table_params)) + goto err; + + nfp_tun_link_predt_entries(app, nn_entry); + nfp_flower_xmit_tun_conf(app, mtype, neigh_size, + nn_entry->payload, + GFP_ATOMIC); + } else if (nn_entry && neigh_invalid) { + if (is_ipv6) { + struct flowi6 *flowi6 = (struct flowi6 *)flow; + struct nfp_tun_neigh_v6 *payload; + + payload = (struct nfp_tun_neigh_v6 *)nn_entry->payload; + memset(payload, 0, sizeof(struct nfp_tun_neigh_v6)); + payload->dst_ipv6 = flowi6->daddr; + mtype = NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6; + } else { + struct flowi4 *flowi4 = (struct flowi4 *)flow; + struct nfp_tun_neigh_v4 *payload; + + payload = (struct nfp_tun_neigh_v4 *)nn_entry->payload; + memset(payload, 0, sizeof(struct nfp_tun_neigh_v4)); + payload->dst_ipv4 = flowi4->daddr; + mtype = NFP_FLOWER_CMSG_TYPE_TUN_NEIGH; + } + /* Trigger ARP to verify invalid neighbour state. */ + neigh_event_send(neigh, NULL); + rhashtable_remove_fast(&priv->neigh_table, + &nn_entry->ht_node, + neigh_table_params); + + nfp_flower_xmit_tun_conf(app, mtype, neigh_size, + nn_entry->payload, + GFP_ATOMIC); + + if (nn_entry->flow) + list_del(&nn_entry->list_head); + kfree(nn_entry); + } else if (nn_entry && !neigh_invalid) { + struct nfp_tun_neigh *common; + u8 dst_addr[ETH_ALEN]; + bool is_mac_change; + + if (is_ipv6) { + struct nfp_tun_neigh_v6 *payload; + + payload = (struct nfp_tun_neigh_v6 *)nn_entry->payload; + common = &payload->common; + mtype = NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6; + } else { + struct nfp_tun_neigh_v4 *payload; + + payload = (struct nfp_tun_neigh_v4 *)nn_entry->payload; + common = &payload->common; + mtype = NFP_FLOWER_CMSG_TYPE_TUN_NEIGH; + } + + ether_addr_copy(dst_addr, common->dst_addr); + neigh_ha_snapshot(common->dst_addr, neigh, netdev); + is_mac_change = !ether_addr_equal(dst_addr, common->dst_addr); + if (override || is_mac_change) { + if (is_mac_change && nn_entry->flow) { + list_del(&nn_entry->list_head); + nn_entry->flow = NULL; + } + nfp_tun_link_predt_entries(app, nn_entry); + nfp_flower_xmit_tun_conf(app, mtype, neigh_size, + nn_entry->payload, + GFP_ATOMIC); + } + } + + spin_unlock_bh(&priv->predt_lock); + return; + +err: + kfree(nn_entry); + spin_unlock_bh(&priv->predt_lock); + nfp_flower_cmsg_warn(app, "Neighbour configuration failed.\n"); +} + +static void +nfp_tun_release_neigh_update_work(struct nfp_neigh_update_work *update_work) +{ + neigh_release(update_work->n); + kfree(update_work); +} + +static void nfp_tun_neigh_update(struct work_struct *work) +{ + struct nfp_neigh_update_work *update_work; + struct nfp_app *app; + struct neighbour *n; + bool neigh_invalid; + int err; + + update_work = container_of(work, struct nfp_neigh_update_work, work); + app = update_work->app; + n = update_work->n; + + if (!nfp_flower_get_port_id_from_netdev(app, n->dev)) + goto out; + +#if IS_ENABLED(CONFIG_INET) + neigh_invalid = !(n->nud_state & NUD_VALID) || n->dead; + if (n->tbl->family == AF_INET6) { +#if IS_ENABLED(CONFIG_IPV6) + struct flowi6 flow6 = {}; + + flow6.daddr = *(struct in6_addr *)n->primary_key; + if (!neigh_invalid) { + struct dst_entry *dst; + /* Use ipv6_dst_lookup_flow to populate flow6->saddr + * and other fields. This information is only needed + * for new entries, lookup can be skipped when an entry + * gets invalidated - as only the daddr is needed for + * deleting. + */ + dst = ip6_dst_lookup_flow(dev_net(n->dev), NULL, + &flow6, NULL); + if (IS_ERR(dst)) + goto out; + + dst_release(dst); + } + nfp_tun_write_neigh(n->dev, app, &flow6, n, true, false); +#endif /* CONFIG_IPV6 */ + } else { + struct flowi4 flow4 = {}; + + flow4.daddr = *(__be32 *)n->primary_key; + if (!neigh_invalid) { + struct rtable *rt; + /* Use ip_route_output_key to populate flow4->saddr and + * other fields. This information is only needed for + * new entries, lookup can be skipped when an entry + * gets invalidated - as only the daddr is needed for + * deleting. + */ + rt = ip_route_output_key(dev_net(n->dev), &flow4); + err = PTR_ERR_OR_ZERO(rt); + if (err) + goto out; + + ip_rt_put(rt); + } + nfp_tun_write_neigh(n->dev, app, &flow4, n, false, false); + } +#endif /* CONFIG_INET */ +out: + nfp_tun_release_neigh_update_work(update_work); +} + +static struct nfp_neigh_update_work * +nfp_tun_alloc_neigh_update_work(struct nfp_app *app, struct neighbour *n) +{ + struct nfp_neigh_update_work *update_work; + + update_work = kzalloc(sizeof(*update_work), GFP_ATOMIC); + if (!update_work) + return NULL; + + INIT_WORK(&update_work->work, nfp_tun_neigh_update); + neigh_hold(n); + update_work->n = n; + update_work->app = app; + + return update_work; +} + +static int +nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct nfp_neigh_update_work *update_work; + struct nfp_flower_priv *app_priv; + struct netevent_redirect *redir; + struct neighbour *n; + struct nfp_app *app; + + switch (event) { + case NETEVENT_REDIRECT: + redir = (struct netevent_redirect *)ptr; + n = redir->neigh; + break; + case NETEVENT_NEIGH_UPDATE: + n = (struct neighbour *)ptr; + break; + default: + return NOTIFY_DONE; + } +#if IS_ENABLED(CONFIG_IPV6) + if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl) +#else + if (n->tbl != &arp_tbl) +#endif + return NOTIFY_DONE; + + app_priv = container_of(nb, struct nfp_flower_priv, tun.neigh_nb); + app = app_priv->app; + update_work = nfp_tun_alloc_neigh_update_work(app, n); + if (!update_work) + return NOTIFY_DONE; + + queue_work(system_highpri_wq, &update_work->work); + + return NOTIFY_DONE; +} + +void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_tun_req_route_ipv4 *payload; + struct net_device *netdev; + struct flowi4 flow = {}; + struct neighbour *n; + struct rtable *rt; + int err; + + payload = nfp_flower_cmsg_get_data(skb); + + rcu_read_lock(); + netdev = nfp_app_dev_get(app, be32_to_cpu(payload->ingress_port), NULL); + if (!netdev) + goto fail_rcu_unlock; + dev_hold(netdev); + + flow.daddr = payload->ipv4_addr; + flow.flowi4_proto = IPPROTO_UDP; + +#if IS_ENABLED(CONFIG_INET) + /* Do a route lookup on same namespace as ingress port. */ + rt = ip_route_output_key(dev_net(netdev), &flow); + err = PTR_ERR_OR_ZERO(rt); + if (err) + goto fail_rcu_unlock; +#else + goto fail_rcu_unlock; +#endif + + /* Get the neighbour entry for the lookup */ + n = dst_neigh_lookup(&rt->dst, &flow.daddr); + ip_rt_put(rt); + if (!n) + goto fail_rcu_unlock; + rcu_read_unlock(); + + nfp_tun_write_neigh(n->dev, app, &flow, n, false, true); + neigh_release(n); + dev_put(netdev); + return; + +fail_rcu_unlock: + rcu_read_unlock(); + dev_put(netdev); + nfp_flower_cmsg_warn(app, "Requested route not found.\n"); +} + +void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_tun_req_route_ipv6 *payload; + struct net_device *netdev; + struct flowi6 flow = {}; + struct dst_entry *dst; + struct neighbour *n; + + payload = nfp_flower_cmsg_get_data(skb); + + rcu_read_lock(); + netdev = nfp_app_dev_get(app, be32_to_cpu(payload->ingress_port), NULL); + if (!netdev) + goto fail_rcu_unlock; + dev_hold(netdev); + + flow.daddr = payload->ipv6_addr; + flow.flowi6_proto = IPPROTO_UDP; + +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(netdev), NULL, &flow, + NULL); + if (IS_ERR(dst)) + goto fail_rcu_unlock; +#else + goto fail_rcu_unlock; +#endif + + n = dst_neigh_lookup(dst, &flow.daddr); + dst_release(dst); + if (!n) + goto fail_rcu_unlock; + rcu_read_unlock(); + + nfp_tun_write_neigh(n->dev, app, &flow, n, true, true); + neigh_release(n); + dev_put(netdev); + return; + +fail_rcu_unlock: + rcu_read_unlock(); + dev_put(netdev); + nfp_flower_cmsg_warn(app, "Requested IPv6 route not found.\n"); +} + +static void nfp_tun_write_ipv4_list(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_addr_entry *entry; + struct nfp_tun_ipv4_addr payload; + struct list_head *ptr, *storage; + int count; + + memset(&payload, 0, sizeof(struct nfp_tun_ipv4_addr)); + mutex_lock(&priv->tun.ipv4_off_lock); + count = 0; + list_for_each_safe(ptr, storage, &priv->tun.ipv4_off_list) { + if (count >= NFP_FL_IPV4_ADDRS_MAX) { + mutex_unlock(&priv->tun.ipv4_off_lock); + nfp_flower_cmsg_warn(app, "IPv4 offload exceeds limit.\n"); + return; + } + entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); + payload.ipv4_addr[count++] = entry->ipv4_addr; + } + payload.count = cpu_to_be32(count); + mutex_unlock(&priv->tun.ipv4_off_lock); + + nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_IPS, + sizeof(struct nfp_tun_ipv4_addr), + &payload, GFP_KERNEL); +} + +void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_addr_entry *entry; + struct list_head *ptr, *storage; + + mutex_lock(&priv->tun.ipv4_off_lock); + list_for_each_safe(ptr, storage, &priv->tun.ipv4_off_list) { + entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); + if (entry->ipv4_addr == ipv4) { + entry->ref_count++; + mutex_unlock(&priv->tun.ipv4_off_lock); + return; + } + } + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + mutex_unlock(&priv->tun.ipv4_off_lock); + nfp_flower_cmsg_warn(app, "Mem error when offloading IP address.\n"); + return; + } + entry->ipv4_addr = ipv4; + entry->ref_count = 1; + list_add_tail(&entry->list, &priv->tun.ipv4_off_list); + mutex_unlock(&priv->tun.ipv4_off_lock); + + nfp_tun_write_ipv4_list(app); +} + +void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_addr_entry *entry; + struct list_head *ptr, *storage; + + mutex_lock(&priv->tun.ipv4_off_lock); + list_for_each_safe(ptr, storage, &priv->tun.ipv4_off_list) { + entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); + if (entry->ipv4_addr == ipv4) { + entry->ref_count--; + if (!entry->ref_count) { + list_del(&entry->list); + kfree(entry); + } + break; + } + } + mutex_unlock(&priv->tun.ipv4_off_lock); + + nfp_tun_write_ipv4_list(app); +} + +static void nfp_tun_write_ipv6_list(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv6_addr_entry *entry; + struct nfp_tun_ipv6_addr payload; + int count = 0; + + memset(&payload, 0, sizeof(struct nfp_tun_ipv6_addr)); + mutex_lock(&priv->tun.ipv6_off_lock); + list_for_each_entry(entry, &priv->tun.ipv6_off_list, list) { + if (count >= NFP_FL_IPV6_ADDRS_MAX) { + nfp_flower_cmsg_warn(app, "Too many IPv6 tunnel endpoint addresses, some cannot be offloaded.\n"); + break; + } + payload.ipv6_addr[count++] = entry->ipv6_addr; + } + mutex_unlock(&priv->tun.ipv6_off_lock); + payload.count = cpu_to_be32(count); + + nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_IPS_V6, + sizeof(struct nfp_tun_ipv6_addr), + &payload, GFP_KERNEL); +} + +struct nfp_ipv6_addr_entry * +nfp_tunnel_add_ipv6_off(struct nfp_app *app, struct in6_addr *ipv6) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv6_addr_entry *entry; + + mutex_lock(&priv->tun.ipv6_off_lock); + list_for_each_entry(entry, &priv->tun.ipv6_off_list, list) + if (!memcmp(&entry->ipv6_addr, ipv6, sizeof(*ipv6))) { + entry->ref_count++; + mutex_unlock(&priv->tun.ipv6_off_lock); + return entry; + } + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + mutex_unlock(&priv->tun.ipv6_off_lock); + nfp_flower_cmsg_warn(app, "Mem error when offloading IP address.\n"); + return NULL; + } + entry->ipv6_addr = *ipv6; + entry->ref_count = 1; + list_add_tail(&entry->list, &priv->tun.ipv6_off_list); + mutex_unlock(&priv->tun.ipv6_off_lock); + + nfp_tun_write_ipv6_list(app); + + return entry; +} + +void +nfp_tunnel_put_ipv6_off(struct nfp_app *app, struct nfp_ipv6_addr_entry *entry) +{ + struct nfp_flower_priv *priv = app->priv; + bool freed = false; + + mutex_lock(&priv->tun.ipv6_off_lock); + if (!--entry->ref_count) { + list_del(&entry->list); + kfree(entry); + freed = true; + } + mutex_unlock(&priv->tun.ipv6_off_lock); + + if (freed) + nfp_tun_write_ipv6_list(app); +} + +static int +__nfp_tunnel_offload_mac(struct nfp_app *app, const u8 *mac, u16 idx, bool del) +{ + struct nfp_tun_mac_addr_offload payload; + + memset(&payload, 0, sizeof(payload)); + + if (del) + payload.flags = cpu_to_be16(NFP_TUN_MAC_OFFLOAD_DEL_FLAG); + + /* FW supports multiple MACs per cmsg but restrict to single. */ + payload.count = cpu_to_be16(1); + payload.index = cpu_to_be16(idx); + ether_addr_copy(payload.addr, mac); + + return nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_MAC, + sizeof(struct nfp_tun_mac_addr_offload), + &payload, GFP_KERNEL); +} + +static bool nfp_tunnel_port_is_phy_repr(int port) +{ + if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port) == + NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT) + return true; + + return false; +} + +static u16 nfp_tunnel_get_mac_idx_from_phy_port_id(int port) +{ + return port << 8 | NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT; +} + +static u16 nfp_tunnel_get_global_mac_idx_from_ida(int id) +{ + return id << 8 | NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT; +} + +static int nfp_tunnel_get_ida_from_global_mac_idx(u16 nfp_mac_idx) +{ + return nfp_mac_idx >> 8; +} + +static bool nfp_tunnel_is_mac_idx_global(u16 nfp_mac_idx) +{ + return (nfp_mac_idx & 0xff) == NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT; +} + +static struct nfp_tun_offloaded_mac * +nfp_tunnel_lookup_offloaded_macs(struct nfp_app *app, const u8 *mac) +{ + struct nfp_flower_priv *priv = app->priv; + + return rhashtable_lookup_fast(&priv->tun.offloaded_macs, mac, + offloaded_macs_params); +} + +static void +nfp_tunnel_offloaded_macs_inc_ref_and_link(struct nfp_tun_offloaded_mac *entry, + struct net_device *netdev, bool mod) +{ + if (nfp_netdev_is_nfp_repr(netdev)) { + struct nfp_flower_repr_priv *repr_priv; + struct nfp_repr *repr; + + repr = netdev_priv(netdev); + repr_priv = repr->app_priv; + + /* If modifing MAC, remove repr from old list first. */ + if (mod) + list_del(&repr_priv->mac_list); + + list_add_tail(&repr_priv->mac_list, &entry->repr_list); + } else if (nfp_flower_is_supported_bridge(netdev)) { + entry->bridge_count++; + } + + entry->ref_count++; +} + +static int +nfp_tunnel_add_shared_mac(struct nfp_app *app, struct net_device *netdev, + int port, bool mod) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_tun_offloaded_mac *entry; + int ida_idx = -1, err; + u16 nfp_mac_idx = 0; + + entry = nfp_tunnel_lookup_offloaded_macs(app, netdev->dev_addr); + if (entry && nfp_tunnel_is_mac_idx_global(entry->index)) { + if (entry->bridge_count || + !nfp_flower_is_supported_bridge(netdev)) { + nfp_tunnel_offloaded_macs_inc_ref_and_link(entry, + netdev, mod); + return 0; + } + + /* MAC is global but matches need to go to pre_tun table. */ + nfp_mac_idx = entry->index | NFP_TUN_PRE_TUN_IDX_BIT; + } + + if (!nfp_mac_idx) { + /* Assign a global index if non-repr or MAC is now shared. */ + if (entry || !port) { + ida_idx = ida_alloc_max(&priv->tun.mac_off_ids, + NFP_MAX_MAC_INDEX, GFP_KERNEL); + if (ida_idx < 0) + return ida_idx; + + nfp_mac_idx = + nfp_tunnel_get_global_mac_idx_from_ida(ida_idx); + + if (nfp_flower_is_supported_bridge(netdev)) + nfp_mac_idx |= NFP_TUN_PRE_TUN_IDX_BIT; + + } else { + nfp_mac_idx = + nfp_tunnel_get_mac_idx_from_phy_port_id(port); + } + } + + if (!entry) { + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + err = -ENOMEM; + goto err_free_ida; + } + + ether_addr_copy(entry->addr, netdev->dev_addr); + INIT_LIST_HEAD(&entry->repr_list); + + if (rhashtable_insert_fast(&priv->tun.offloaded_macs, + &entry->ht_node, + offloaded_macs_params)) { + err = -ENOMEM; + goto err_free_entry; + } + } + + err = __nfp_tunnel_offload_mac(app, netdev->dev_addr, + nfp_mac_idx, false); + if (err) { + /* If not shared then free. */ + if (!entry->ref_count) + goto err_remove_hash; + goto err_free_ida; + } + + entry->index = nfp_mac_idx; + nfp_tunnel_offloaded_macs_inc_ref_and_link(entry, netdev, mod); + + return 0; + +err_remove_hash: + rhashtable_remove_fast(&priv->tun.offloaded_macs, &entry->ht_node, + offloaded_macs_params); +err_free_entry: + kfree(entry); +err_free_ida: + if (ida_idx != -1) + ida_free(&priv->tun.mac_off_ids, ida_idx); + + return err; +} + +static int +nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, + const u8 *mac, bool mod) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_flower_repr_priv *repr_priv; + struct nfp_tun_offloaded_mac *entry; + struct nfp_repr *repr; + u16 nfp_mac_idx; + int ida_idx; + + entry = nfp_tunnel_lookup_offloaded_macs(app, mac); + if (!entry) + return 0; + + entry->ref_count--; + /* If del is part of a mod then mac_list is still in use elsewhere. */ + if (nfp_netdev_is_nfp_repr(netdev) && !mod) { + repr = netdev_priv(netdev); + repr_priv = repr->app_priv; + list_del(&repr_priv->mac_list); + } + + if (nfp_flower_is_supported_bridge(netdev)) { + entry->bridge_count--; + + if (!entry->bridge_count && entry->ref_count) { + nfp_mac_idx = entry->index & ~NFP_TUN_PRE_TUN_IDX_BIT; + if (__nfp_tunnel_offload_mac(app, mac, nfp_mac_idx, + false)) { + nfp_flower_cmsg_warn(app, "MAC offload index revert failed on %s.\n", + netdev_name(netdev)); + return 0; + } + + entry->index = nfp_mac_idx; + return 0; + } + } + + /* If MAC is now used by 1 repr set the offloaded MAC index to port. */ + if (entry->ref_count == 1 && list_is_singular(&entry->repr_list)) { + int port, err; + + repr_priv = list_first_entry(&entry->repr_list, + struct nfp_flower_repr_priv, + mac_list); + repr = repr_priv->nfp_repr; + port = nfp_repr_get_port_id(repr->netdev); + nfp_mac_idx = nfp_tunnel_get_mac_idx_from_phy_port_id(port); + err = __nfp_tunnel_offload_mac(app, mac, nfp_mac_idx, false); + if (err) { + nfp_flower_cmsg_warn(app, "MAC offload index revert failed on %s.\n", + netdev_name(netdev)); + return 0; + } + + ida_idx = nfp_tunnel_get_ida_from_global_mac_idx(entry->index); + ida_free(&priv->tun.mac_off_ids, ida_idx); + entry->index = nfp_mac_idx; + return 0; + } + + if (entry->ref_count) + return 0; + + WARN_ON_ONCE(rhashtable_remove_fast(&priv->tun.offloaded_macs, + &entry->ht_node, + offloaded_macs_params)); + + if (nfp_flower_is_supported_bridge(netdev)) + nfp_mac_idx = entry->index & ~NFP_TUN_PRE_TUN_IDX_BIT; + else + nfp_mac_idx = entry->index; + + /* If MAC has global ID then extract and free the ida entry. */ + if (nfp_tunnel_is_mac_idx_global(nfp_mac_idx)) { + ida_idx = nfp_tunnel_get_ida_from_global_mac_idx(entry->index); + ida_free(&priv->tun.mac_off_ids, ida_idx); + } + + kfree(entry); + + return __nfp_tunnel_offload_mac(app, mac, 0, true); +} + +static int +nfp_tunnel_offload_mac(struct nfp_app *app, struct net_device *netdev, + enum nfp_flower_mac_offload_cmd cmd) +{ + struct nfp_flower_non_repr_priv *nr_priv = NULL; + bool non_repr = false, *mac_offloaded; + u8 *off_mac = NULL; + int err, port = 0; + + if (nfp_netdev_is_nfp_repr(netdev)) { + struct nfp_flower_repr_priv *repr_priv; + struct nfp_repr *repr; + + repr = netdev_priv(netdev); + if (repr->app != app) + return 0; + + repr_priv = repr->app_priv; + if (repr_priv->on_bridge) + return 0; + + mac_offloaded = &repr_priv->mac_offloaded; + off_mac = &repr_priv->offloaded_mac_addr[0]; + port = nfp_repr_get_port_id(netdev); + if (!nfp_tunnel_port_is_phy_repr(port)) + return 0; + } else if (nfp_fl_is_netdev_to_offload(netdev)) { + nr_priv = nfp_flower_non_repr_priv_get(app, netdev); + if (!nr_priv) + return -ENOMEM; + + mac_offloaded = &nr_priv->mac_offloaded; + off_mac = &nr_priv->offloaded_mac_addr[0]; + non_repr = true; + } else { + return 0; + } + + if (!is_valid_ether_addr(netdev->dev_addr)) { + err = -EINVAL; + goto err_put_non_repr_priv; + } + + if (cmd == NFP_TUNNEL_MAC_OFFLOAD_MOD && !*mac_offloaded) + cmd = NFP_TUNNEL_MAC_OFFLOAD_ADD; + + switch (cmd) { + case NFP_TUNNEL_MAC_OFFLOAD_ADD: + err = nfp_tunnel_add_shared_mac(app, netdev, port, false); + if (err) + goto err_put_non_repr_priv; + + if (non_repr) + __nfp_flower_non_repr_priv_get(nr_priv); + + *mac_offloaded = true; + ether_addr_copy(off_mac, netdev->dev_addr); + break; + case NFP_TUNNEL_MAC_OFFLOAD_DEL: + /* Only attempt delete if add was successful. */ + if (!*mac_offloaded) + break; + + if (non_repr) + __nfp_flower_non_repr_priv_put(nr_priv); + + *mac_offloaded = false; + + err = nfp_tunnel_del_shared_mac(app, netdev, netdev->dev_addr, + false); + if (err) + goto err_put_non_repr_priv; + + break; + case NFP_TUNNEL_MAC_OFFLOAD_MOD: + /* Ignore if changing to the same address. */ + if (ether_addr_equal(netdev->dev_addr, off_mac)) + break; + + err = nfp_tunnel_add_shared_mac(app, netdev, port, true); + if (err) + goto err_put_non_repr_priv; + + /* Delete the previous MAC address. */ + err = nfp_tunnel_del_shared_mac(app, netdev, off_mac, true); + if (err) + nfp_flower_cmsg_warn(app, "Failed to remove offload of replaced MAC addr on %s.\n", + netdev_name(netdev)); + + ether_addr_copy(off_mac, netdev->dev_addr); + break; + default: + err = -EINVAL; + goto err_put_non_repr_priv; + } + + if (non_repr) + __nfp_flower_non_repr_priv_put(nr_priv); + + return 0; + +err_put_non_repr_priv: + if (non_repr) + __nfp_flower_non_repr_priv_put(nr_priv); + + return err; +} + +int nfp_tunnel_mac_event_handler(struct nfp_app *app, + struct net_device *netdev, + unsigned long event, void *ptr) +{ + int err; + + if (event == NETDEV_DOWN) { + err = nfp_tunnel_offload_mac(app, netdev, + NFP_TUNNEL_MAC_OFFLOAD_DEL); + if (err) + nfp_flower_cmsg_warn(app, "Failed to delete offload MAC on %s.\n", + netdev_name(netdev)); + } else if (event == NETDEV_UP) { + err = nfp_tunnel_offload_mac(app, netdev, + NFP_TUNNEL_MAC_OFFLOAD_ADD); + if (err) + nfp_flower_cmsg_warn(app, "Failed to offload MAC on %s.\n", + netdev_name(netdev)); + } else if (event == NETDEV_CHANGEADDR) { + /* Only offload addr change if netdev is already up. */ + if (!(netdev->flags & IFF_UP)) + return NOTIFY_OK; + + err = nfp_tunnel_offload_mac(app, netdev, + NFP_TUNNEL_MAC_OFFLOAD_MOD); + if (err) + nfp_flower_cmsg_warn(app, "Failed to offload MAC change on %s.\n", + netdev_name(netdev)); + } else if (event == NETDEV_CHANGEUPPER) { + /* If a repr is attached to a bridge then tunnel packets + * entering the physical port are directed through the bridge + * datapath and cannot be directly detunneled. Therefore, + * associated offloaded MACs and indexes should not be used + * by fw for detunneling. + */ + struct netdev_notifier_changeupper_info *info = ptr; + struct net_device *upper = info->upper_dev; + struct nfp_flower_repr_priv *repr_priv; + struct nfp_repr *repr; + + if (!nfp_netdev_is_nfp_repr(netdev) || + !nfp_flower_is_supported_bridge(upper)) + return NOTIFY_OK; + + repr = netdev_priv(netdev); + if (repr->app != app) + return NOTIFY_OK; + + repr_priv = repr->app_priv; + + if (info->linking) { + if (nfp_tunnel_offload_mac(app, netdev, + NFP_TUNNEL_MAC_OFFLOAD_DEL)) + nfp_flower_cmsg_warn(app, "Failed to delete offloaded MAC on %s.\n", + netdev_name(netdev)); + repr_priv->on_bridge = true; + } else { + repr_priv->on_bridge = false; + + if (!(netdev->flags & IFF_UP)) + return NOTIFY_OK; + + if (nfp_tunnel_offload_mac(app, netdev, + NFP_TUNNEL_MAC_OFFLOAD_ADD)) + nfp_flower_cmsg_warn(app, "Failed to offload MAC on %s.\n", + netdev_name(netdev)); + } + } + return NOTIFY_OK; +} + +int nfp_flower_xmit_pre_tun_flow(struct nfp_app *app, + struct nfp_fl_payload *flow) +{ + struct nfp_flower_priv *app_priv = app->priv; + struct nfp_tun_offloaded_mac *mac_entry; + struct nfp_flower_meta_tci *key_meta; + struct nfp_tun_pre_tun_rule payload; + struct net_device *internal_dev; + int err; + + if (app_priv->pre_tun_rule_cnt == NFP_TUN_PRE_TUN_RULE_LIMIT) + return -ENOSPC; + + memset(&payload, 0, sizeof(struct nfp_tun_pre_tun_rule)); + + internal_dev = flow->pre_tun_rule.dev; + payload.vlan_tci = flow->pre_tun_rule.vlan_tci; + payload.host_ctx_id = flow->meta.host_ctx_id; + + /* Lookup MAC index for the pre-tunnel rule egress device. + * Note that because the device is always an internal port, it will + * have a constant global index so does not need to be tracked. + */ + mac_entry = nfp_tunnel_lookup_offloaded_macs(app, + internal_dev->dev_addr); + if (!mac_entry) + return -ENOENT; + + /* Set/clear IPV6 bit. cpu_to_be16() swap will lead to MSB being + * set/clear for port_idx. + */ + key_meta = (struct nfp_flower_meta_tci *)flow->unmasked_data; + if (key_meta->nfp_flow_key_layer & NFP_FLOWER_LAYER_IPV6) + mac_entry->index |= NFP_TUN_PRE_TUN_IPV6_BIT; + else + mac_entry->index &= ~NFP_TUN_PRE_TUN_IPV6_BIT; + + payload.port_idx = cpu_to_be16(mac_entry->index); + + /* Copy mac id and vlan to flow - dev may not exist at delete time. */ + flow->pre_tun_rule.vlan_tci = payload.vlan_tci; + flow->pre_tun_rule.port_idx = payload.port_idx; + + err = nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_PRE_TUN_RULE, + sizeof(struct nfp_tun_pre_tun_rule), + (unsigned char *)&payload, GFP_KERNEL); + if (err) + return err; + + app_priv->pre_tun_rule_cnt++; + + return 0; +} + +int nfp_flower_xmit_pre_tun_del_flow(struct nfp_app *app, + struct nfp_fl_payload *flow) +{ + struct nfp_flower_priv *app_priv = app->priv; + struct nfp_tun_pre_tun_rule payload; + u32 tmp_flags = 0; + int err; + + memset(&payload, 0, sizeof(struct nfp_tun_pre_tun_rule)); + + tmp_flags |= NFP_TUN_PRE_TUN_RULE_DEL; + payload.flags = cpu_to_be32(tmp_flags); + payload.vlan_tci = flow->pre_tun_rule.vlan_tci; + payload.port_idx = flow->pre_tun_rule.port_idx; + + err = nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_PRE_TUN_RULE, + sizeof(struct nfp_tun_pre_tun_rule), + (unsigned char *)&payload, GFP_KERNEL); + if (err) + return err; + + app_priv->pre_tun_rule_cnt--; + + return 0; +} + +int nfp_tunnel_config_start(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + int err; + + /* Initialise rhash for MAC offload tracking. */ + err = rhashtable_init(&priv->tun.offloaded_macs, + &offloaded_macs_params); + if (err) + return err; + + ida_init(&priv->tun.mac_off_ids); + + /* Initialise priv data for IPv4/v6 offloading. */ + mutex_init(&priv->tun.ipv4_off_lock); + INIT_LIST_HEAD(&priv->tun.ipv4_off_list); + mutex_init(&priv->tun.ipv6_off_lock); + INIT_LIST_HEAD(&priv->tun.ipv6_off_list); + + /* Initialise priv data for neighbour offloading. */ + priv->tun.neigh_nb.notifier_call = nfp_tun_neigh_event_handler; + + err = register_netevent_notifier(&priv->tun.neigh_nb); + if (err) { + rhashtable_free_and_destroy(&priv->tun.offloaded_macs, + nfp_check_rhashtable_empty, NULL); + return err; + } + + return 0; +} + +void nfp_tunnel_config_stop(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_addr_entry *ip_entry; + struct list_head *ptr, *storage; + + unregister_netevent_notifier(&priv->tun.neigh_nb); + + ida_destroy(&priv->tun.mac_off_ids); + + /* Free any memory that may be occupied by ipv4 list. */ + list_for_each_safe(ptr, storage, &priv->tun.ipv4_off_list) { + ip_entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); + list_del(&ip_entry->list); + kfree(ip_entry); + } + + mutex_destroy(&priv->tun.ipv6_off_lock); + + /* Destroy rhash. Entries should be cleaned on netdev notifier unreg. */ + rhashtable_free_and_destroy(&priv->tun.offloaded_macs, + nfp_check_rhashtable_empty, NULL); + + nfp_tun_cleanup_nn_entries(app); +} |